diff options
author | Kartik Agaram <vc@akkartik.com> | 2018-09-07 22:13:10 -0700 |
---|---|---|
committer | Kartik Agaram <vc@akkartik.com> | 2018-09-07 22:19:13 -0700 |
commit | e07a3f2886b117970b3cd58f7cd6806cbfe5cc4a (patch) | |
tree | 6fcddb741f3b15eaa7892b5ce0468a6f3695005a | |
parent | 608a7fa8d0faf9a3e3d182d9eabe969804443aab (diff) | |
download | mu-e07a3f2886b117970b3cd58f7cd6806cbfe5cc4a.tar.gz |
4537
Streamline the factorial function; we don't need to save a stack variable into a register before operating on it. All instructions can take a stack variable directly. In the process we found two bugs: a) Opcode f7 was not implemented correctly. It was internally consistent but I'd never validated it against a natively running program. Turns out it encodes multiple instructions, not just 'not'. b) The way we look up imm32 operands was sometimes reading them before disp8/disp32 operands.
-rw-r--r-- | subx/012elf.cc | 1 | ||||
-rw-r--r-- | subx/013direct_addressing.cc | 56 | ||||
-rw-r--r-- | subx/014indirect_addressing.cc | 9 | ||||
-rw-r--r-- | subx/015immediate_addressing.cc | 61 | ||||
-rw-r--r-- | subx/031check_operands.cc | 3 | ||||
-rwxr-xr-x | subx/apps/factorial | bin | 156 -> 157 bytes | |||
-rw-r--r-- | subx/apps/factorial.subx | 8 |
7 files changed, 97 insertions, 41 deletions
diff --git a/subx/012elf.cc b/subx/012elf.cc index f82b48aa..3042b6b6 100644 --- a/subx/012elf.cc +++ b/subx/012elf.cc @@ -71,7 +71,6 @@ void load_elf_contents(uint8_t* elf_contents, size_t size, int argc, char* argv[ // we wastefully use a separate page of memory for argv uint32_t argv_data = ARGV_DATA_SEGMENT; for (int i = argc-1; i >= /*skip 'subx_bin' and 'run'*/2; --i) { - dbg << "push " << argv_data << end(); push(argv_data); for (size_t j = 0; j <= strlen(argv[i]); ++j) { write_mem_u8(argv_data, argv[i][j]); diff --git a/subx/013direct_addressing.cc b/subx/013direct_addressing.cc index 2211a7a5..38224088 100644 --- a/subx/013direct_addressing.cc +++ b/subx/013direct_addressing.cc @@ -90,6 +90,47 @@ case 0x29: { // subtract r32 from r/m32 //:: multiply :(before "End Initialize Op Names(name)") +put(name, "f7", "test/negate/mul/div rm32 (with EAX if necessary) depending on subop"); + +:(scenario multiply_eax_by_r32) +% Reg[EAX].i = 4; +% Reg[ECX].i = 3; +== 0x1 +# op ModR/M SIB displacement immediate + f7 e1 # multiply EAX by ECX +# ModR/M in binary: 11 (direct mode) 100 (subop mul) 001 (src ECX) ++run: operate on r/m32 ++run: r/m32 is ECX ++run: subop: multiply EAX by r/m32 ++run: storing 0x0000000c + +:(before "End Single-Byte Opcodes") +case 0xf7: { // xor r32 with r/m32 + uint8_t modrm = next(); + trace(90, "run") << "operate on r/m32" << end(); + int32_t* arg1 = effective_address(modrm); + uint8_t subop = (modrm>>3)&0x7; // middle 3 'reg opcode' bits + switch (subop) { + case 4: { // mul unsigned EAX by r/m32 + trace(90, "run") << "subop: multiply EAX by r/m32" << end(); + uint64_t result = Reg[EAX].u * static_cast<uint32_t>(*arg1); + Reg[EAX].u = result & 0xffffffff; + Reg[EDX].u = result >> 32; + OF = (Reg[EDX].u != 0); + trace(90, "run") << "storing 0x" << HEXWORD << Reg[EAX].u << end(); + break; + } + // End Op f7 Subops + default: + cerr << "unrecognized sub-opcode after f7: " << NUM(subop) << '\n'; + exit(1); + } + break; +} + +//: + +:(before "End Initialize Op Names(name)") put(name_0f, "af", "multiply rm32 into r32"); :(scenario multiply_r32_into_r32) @@ -200,17 +241,16 @@ put(name, "f7", "bitwise complement of rm32"); % Reg[EBX].i = 0x0f0f00ff; == 0x1 # op ModR/M SIB displacement immediate - f7 c3 # not EBX -# ModR/M in binary: 11 (direct mode) 000 (unused) 011 (dest EBX) -+run: 'not' of r/m32 + f7 d3 # not EBX +# ModR/M in binary: 11 (direct mode) 010 (subop not) 011 (dest EBX) ++run: operate on r/m32 +run: r/m32 is EBX ++run: subop: not +run: storing 0xf0f0ff00 -:(before "End Single-Byte Opcodes") -case 0xf7: { // xor r32 with r/m32 - uint8_t modrm = next(); - trace(90, "run") << "'not' of r/m32" << end(); - int32_t* arg1 = effective_address(modrm); +:(before "End Op f7 Subops") +case 2: { // not r/m32 + trace(90, "run") << "subop: not" << end(); *arg1 = ~(*arg1); trace(90, "run") << "storing 0x" << HEXWORD << *arg1 << end(); SF = (*arg1 >> 31); diff --git a/subx/014indirect_addressing.cc b/subx/014indirect_addressing.cc index 344da8de..22a3c179 100644 --- a/subx/014indirect_addressing.cc +++ b/subx/014indirect_addressing.cc @@ -227,16 +227,17 @@ case 0x33: { // xor r/m32 with r32 //:: not -:(scenario not_r32_with_mem_at_r32) +:(scenario not_of_mem_at_r32) % Reg[EBX].i = 0x60; == 0x1 # code segment # op ModR/M SIB displacement immediate - f7 03 # negate *EBX -# ModR/M in binary: 00 (indirect mode) 000 (unused) 011 (dest EBX) + f7 13 # negate *EBX +# ModR/M in binary: 00 (indirect mode) 010 (subop not) 011 (dest EBX) == 0x60 # data segment ff 00 0f 0f # 0x0f0f00ff -+run: 'not' of r/m32 ++run: operate on r/m32 +run: effective address is 0x60 (EBX) ++run: subop: not +run: storing 0xf0f0ff00 //:: compare (cmp) diff --git a/subx/015immediate_addressing.cc b/subx/015immediate_addressing.cc index c75c419e..b1d97f1e 100644 --- a/subx/015immediate_addressing.cc +++ b/subx/015immediate_addressing.cc @@ -9,17 +9,19 @@ put(name, "81", "combine rm32 with imm32 based on subop"); # op ModR/M SIB displacement immediate 81 c3 0a 0b 0c 0d # add 0x0d0c0b0a to EBX # ModR/M in binary: 11 (direct mode) 000 (add imm32) 011 (dest EBX) -+run: combine imm32 0x0d0c0b0a with r/m32 ++run: combine imm32 with r/m32 +run: r/m32 is EBX ++run: imm32 is 0x0d0c0b0a +run: subop add +run: storing 0x0d0c0b0b :(before "End Single-Byte Opcodes") case 0x81: { // combine imm32 with r/m32 + trace(90, "run") << "combine imm32 with r/m32" << end(); uint8_t modrm = next(); - int32_t arg2 = imm32(); - trace(90, "run") << "combine imm32 0x" << HEXWORD << arg2 << " with r/m32" << end(); int32_t* arg1 = effective_address(modrm); + int32_t arg2 = imm32(); + trace(90, "run") << "imm32 is 0x" << HEXWORD << arg2 << end(); uint8_t subop = (modrm>>3)&0x7; // middle 3 'reg opcode' bits switch (subop) { case 0: @@ -44,8 +46,9 @@ case 0x81: { // combine imm32 with r/m32 # ModR/M in binary: 00 (indirect mode) 000 (add imm32) 011 (dest EBX) == 0x60 # data segment 01 00 00 00 # 1 -+run: combine imm32 0x0d0c0b0a with r/m32 ++run: combine imm32 with r/m32 +run: effective address is 0x60 (EBX) ++run: imm32 is 0x0d0c0b0a +run: subop add +run: storing 0x0d0c0b0b @@ -80,8 +83,9 @@ case 0x2d: { // subtract imm32 from EAX # ModR/M in binary: 00 (indirect mode) 101 (subtract imm32) 011 (dest EBX) == 0x60 # data segment 0a 00 00 00 # 10 -+run: combine imm32 0x00000001 with r/m32 ++run: combine imm32 with r/m32 +run: effective address is 0x60 (EBX) ++run: imm32 is 0x00000001 +run: subop subtract +run: storing 0x00000009 @@ -100,8 +104,9 @@ case 5: { # op ModR/M SIB displacement immediate 81 eb 01 00 00 00 # subtract 1 from EBX # ModR/M in binary: 11 (direct mode) 101 (subtract imm32) 011 (dest EBX) -+run: combine imm32 0x00000001 with r/m32 ++run: combine imm32 with r/m32 +run: r/m32 is EBX ++run: imm32 is 0x00000001 +run: subop subtract +run: storing 0x00000009 @@ -136,8 +141,9 @@ case 0x25: { // and imm32 with EAX # ModR/M in binary: 00 (indirect mode) 100 (and imm32) 011 (dest EBX) == 0x60 # data segment ff 00 00 00 # 0xff -+run: combine imm32 0x0d0c0b0a with r/m32 ++run: combine imm32 with r/m32 +run: effective address is 0x60 (EBX) ++run: imm32 is 0x0d0c0b0a +run: subop and +run: storing 0x0000000a @@ -156,8 +162,9 @@ case 4: { # op ModR/M SIB displacement immediate 81 e3 0a 0b 0c 0d # and 0x0d0c0b0a with EBX # ModR/M in binary: 11 (direct mode) 100 (and imm32) 011 (dest EBX) -+run: combine imm32 0x0d0c0b0a with r/m32 ++run: combine imm32 with r/m32 +run: r/m32 is EBX ++run: imm32 is 0x0d0c0b0a +run: subop and +run: storing 0x0000000a @@ -192,8 +199,9 @@ case 0x0d: { // or imm32 with EAX # ModR/M in binary: 00 (indirect mode) 001 (or imm32) 011 (dest EBX) == 0x60 # data segment a0 b0 c0 d0 # 0xd0c0b0a0 -+run: combine imm32 0x0d0c0b0a with r/m32 ++run: combine imm32 with r/m32 +run: effective address is 0x60 (EBX) ++run: imm32 is 0x0d0c0b0a +run: subop or +run: storing 0xddccbbaa @@ -210,8 +218,9 @@ case 1: { # op ModR/M SIB displacement immediate 81 cb 0a 0b 0c 0d # or 0x0d0c0b0a with EBX # ModR/M in binary: 11 (direct mode) 001 (or imm32) 011 (dest EBX) -+run: combine imm32 0x0d0c0b0a with r/m32 ++run: combine imm32 with r/m32 +run: r/m32 is EBX ++run: imm32 is 0x0d0c0b0a +run: subop or +run: storing 0xddccbbaa @@ -246,8 +255,9 @@ case 0x35: { // xor imm32 with EAX # ModR/M in binary: 00 (indirect mode) 110 (xor imm32) 011 (dest EBX) == 0x60 # data segment a0 b0 c0 d0 # 0xd0c0b0a0 -+run: combine imm32 0x0d0c0b0a with r/m32 ++run: combine imm32 with r/m32 +run: effective address is 0x60 (EBX) ++run: imm32 is 0x0d0c0b0a +run: subop xor +run: storing 0xddccbbaa @@ -264,8 +274,9 @@ case 6: { # op ModR/M SIB displacement immediate 81 f3 0a 0b 0c 0d # xor 0x0d0c0b0a with EBX # ModR/M in binary: 11 (direct mode) 110 (xor imm32) 011 (dest EBX) -+run: combine imm32 0x0d0c0b0a with r/m32 ++run: combine imm32 with r/m32 +run: r/m32 is EBX ++run: imm32 is 0x0d0c0b0a +run: subop xor +run: storing 0xddccbbaa @@ -320,8 +331,9 @@ case 0x3d: { // subtract imm32 from EAX # op ModR/M SIB displacement immediate 81 fb 07 0b 0c 0d # compare 0x0d0c0b07 with EBX # ModR/M in binary: 11 (direct mode) 111 (compare imm32) 011 (dest EBX) -+run: combine imm32 0x0d0c0b07 with r/m32 ++run: combine imm32 with r/m32 +run: r/m32 is EBX ++run: imm32 is 0x0d0c0b07 +run: SF=0; ZF=0; OF=0 :(before "End Op 81 Subops") @@ -342,8 +354,9 @@ case 7: { # op ModR/M SIB displacement immediate 81 fb 0a 0b 0c 0d # compare 0x0d0c0b0a with EBX # ModR/M in binary: 11 (direct mode) 111 (compare imm32) 011 (dest EBX) -+run: combine imm32 0x0d0c0b0a with r/m32 ++run: combine imm32 with r/m32 +run: r/m32 is EBX ++run: imm32 is 0x0d0c0b0a +run: SF=1; ZF=0; OF=0 :(scenario compare_imm32_with_r32_equal) @@ -352,8 +365,9 @@ case 7: { # op ModR/M SIB displacement immediate 81 fb 0a 0b 0c 0d # compare 0x0d0c0b0a with EBX # ModR/M in binary: 11 (direct mode) 111 (compare imm32) 011 (dest EBX) -+run: combine imm32 0x0d0c0b0a with r/m32 ++run: combine imm32 with r/m32 +run: r/m32 is EBX ++run: imm32 is 0x0d0c0b0a +run: SF=0; ZF=1; OF=0 :(scenario compare_imm32_with_mem_at_r32_greater) @@ -364,8 +378,9 @@ case 7: { # ModR/M in binary: 00 (indirect mode) 111 (compare imm32) 011 (dest EBX) == 0x60 # data segment 0a 0b 0c 0d # 0x0d0c0b0a -+run: combine imm32 0x0d0c0b07 with r/m32 ++run: combine imm32 with r/m32 +run: effective address is 0x60 (EBX) ++run: imm32 is 0x0d0c0b07 +run: SF=0; ZF=0; OF=0 :(scenario compare_imm32_with_mem_at_r32_lesser) @@ -376,8 +391,9 @@ case 7: { # ModR/M in binary: 00 (indirect mode) 111 (compare imm32) 011 (dest EBX) == 0x60 # data segment 07 0b 0c 0d # 0x0d0c0b07 -+run: combine imm32 0x0d0c0b0a with r/m32 ++run: combine imm32 with r/m32 +run: effective address is 0x60 (EBX) ++run: imm32 is 0x0d0c0b0a +run: SF=1; ZF=0; OF=0 :(scenario compare_imm32_with_mem_at_r32_equal) @@ -389,8 +405,9 @@ case 7: { # ModR/M in binary: 00 (indirect mode) 111 (compare imm32) 011 (dest EBX) == 0x60 # data segment 0a 0b 0c 0d # 0x0d0c0b0a -+run: combine imm32 0x0d0c0b0a with r/m32 ++run: combine imm32 with r/m32 +run: effective address is 0x60 (EBX) ++run: imm32 is 0x0d0c0b0a +run: SF=0; ZF=1; OF=0 //:: copy (mov) @@ -438,15 +455,17 @@ put(name, "c7", "copy imm32 to rm32"); # op ModR/M SIB displacement immediate c7 03 0a 0b 0c 0d # copy 0x0d0c0b0a to *EBX # ModR/M in binary: 00 (indirect mode) 000 (unused) 011 (dest EBX) -+run: copy imm32 0x0d0c0b0a to r/m32 ++run: copy imm32 to r/m32 +run: effective address is 0x60 (EBX) ++run: imm32 is 0x0d0c0b0a :(before "End Single-Byte Opcodes") case 0xc7: { // copy imm32 to r32 uint8_t modrm = next(); - int32_t arg2 = imm32(); - trace(90, "run") << "copy imm32 0x" << HEXWORD << arg2 << " to r/m32" << end(); + trace(90, "run") << "copy imm32 to r/m32" << end(); int32_t* arg1 = effective_address(modrm); + int32_t arg2 = imm32(); + trace(90, "run") << "imm32 is 0x" << HEXWORD << arg2 << end(); *arg1 = arg2; break; } diff --git a/subx/031check_operands.cc b/subx/031check_operands.cc index 12013287..934b1022 100644 --- a/subx/031check_operands.cc +++ b/subx/031check_operands.cc @@ -183,8 +183,6 @@ void init_permitted_operands() { // or put(Permitted_operands, "09", 0x01); put(Permitted_operands, "0b", 0x01); - // complement - put(Permitted_operands, "f7", 0x01); // xor put(Permitted_operands, "31", 0x01); put(Permitted_operands, "33", 0x01); @@ -204,6 +202,7 @@ void init_permitted_operands() { //// Class O: op, ModR/M and subop (not r32) // imm32 imm8 disp32 |disp16 disp8 subop modrm // 0 0 0 |0 0 1 1 + put(Permitted_operands, "f7", 0x03); // test/not/mul/div put(Permitted_operands, "ff", 0x03); // jump/push/call //// Class N: op, ModR/M and imm32 diff --git a/subx/apps/factorial b/subx/apps/factorial index daea2cf4..d9501e23 100755 --- a/subx/apps/factorial +++ b/subx/apps/factorial Binary files differdiff --git a/subx/apps/factorial.subx b/subx/apps/factorial.subx index c531041b..76594623 100644 --- a/subx/apps/factorial.subx +++ b/subx/apps/factorial.subx @@ -32,15 +32,13 @@ # factorial(n) factorial: - # initialize n - 8b/copy 1/mod/*+disp8 4/rm32/sib 4/base/ESP 4/index/none 2/r32/EDX 4/disp8 . # copy *(ESP+4) to EDX # initialize EAX to 1 (base case) b8/copy . . . . . . . 1/imm32 # copy 1 to EAX # if (n <= 1) jump exit - 81 7/subop/compare 3/mod/direct 2/rm32/EDX . . . . . 1/imm32 # compare EDX with 1 + 81 7/subop/compare 1/mod/*+disp8 4/rm32/sib 4/base/ESP 4/index/none . . 4/disp8 1/imm32 # compare *(ESP+4) with 1 7e/jump-if-<= . . . . . . $factorial:exit/disp8 # jump if <= to $factorial:exit # EBX: n-1 - 89/copy 3/mod/direct 3/rm32/EBX . . . 2/r32/EDX . . # copy EDX to EBX + 8b/copy 1/mod/*+disp8 4/rm32/sib 4/base/ESP 4/index/none 3/r32/EBX 4/disp8 . # copy *(ESP+4) to EBX 81 5/subop/subtract 3/mod/direct 3/rm32/EBX . . . . . 1/imm32 # subtract 1 from EBX # prepare call 55/push . . . . . . . . # push EBP @@ -56,7 +54,7 @@ factorial: # refresh n 8b/copy 1/mod/*+disp8 4/rm32/sib 4/base/ESP 4/index/none 2/r32/EDX 4/disp8 . # copy *(ESP+4) to EDX # return n * factorial(n-1) - 0f af/multiply 3/mod/direct 2/rm32/EDX . . . 0/r32/EAX . . # multiply EDX (n) into EAX (factorial(n-1)) + f7 4/subop/multiply 1/mod/*+disp8 4/rm32/sib 4/base/ESP 4/index/none 4/disp8 . # multiply *(ESP+4) (n) into EAX (factorial(n-1)) # TODO: check for overflow $factorial:exit: c3/return |