about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorKartik Agaram <vc@akkartik.com>2018-09-07 22:13:10 -0700
committerKartik Agaram <vc@akkartik.com>2018-09-07 22:19:13 -0700
commite07a3f2886b117970b3cd58f7cd6806cbfe5cc4a (patch)
tree6fcddb741f3b15eaa7892b5ce0468a6f3695005a
parent608a7fa8d0faf9a3e3d182d9eabe969804443aab (diff)
downloadmu-e07a3f2886b117970b3cd58f7cd6806cbfe5cc4a.tar.gz
4537
Streamline the factorial function; we don't need to save a stack variable
into a register before operating on it. All instructions can take a stack
variable directly.

In the process we found two bugs:

a) Opcode f7 was not implemented correctly. It was internally consistent
but I'd never validated it against a natively running program. Turns out
it encodes multiple instructions, not just 'not'.

b) The way we look up imm32 operands was sometimes reading them before
disp8/disp32 operands.
-rw-r--r--subx/012elf.cc1
-rw-r--r--subx/013direct_addressing.cc56
-rw-r--r--subx/014indirect_addressing.cc9
-rw-r--r--subx/015immediate_addressing.cc61
-rw-r--r--subx/031check_operands.cc3
-rwxr-xr-xsubx/apps/factorialbin156 -> 157 bytes
-rw-r--r--subx/apps/factorial.subx8
7 files changed, 97 insertions, 41 deletions
diff --git a/subx/012elf.cc b/subx/012elf.cc
index f82b48aa..3042b6b6 100644
--- a/subx/012elf.cc
+++ b/subx/012elf.cc
@@ -71,7 +71,6 @@ void load_elf_contents(uint8_t* elf_contents, size_t size, int argc, char* argv[
   // we wastefully use a separate page of memory for argv
   uint32_t argv_data = ARGV_DATA_SEGMENT;
   for (int i = argc-1;  i >= /*skip 'subx_bin' and 'run'*/2;  --i) {
-    dbg << "push " << argv_data << end();
     push(argv_data);
     for (size_t j = 0;  j <= strlen(argv[i]);  ++j) {
       write_mem_u8(argv_data, argv[i][j]);
diff --git a/subx/013direct_addressing.cc b/subx/013direct_addressing.cc
index 2211a7a5..38224088 100644
--- a/subx/013direct_addressing.cc
+++ b/subx/013direct_addressing.cc
@@ -90,6 +90,47 @@ case 0x29: {  // subtract r32 from r/m32
 //:: multiply
 
 :(before "End Initialize Op Names(name)")
+put(name, "f7", "test/negate/mul/div rm32 (with EAX if necessary) depending on subop");
+
+:(scenario multiply_eax_by_r32)
+% Reg[EAX].i = 4;
+% Reg[ECX].i = 3;
+== 0x1
+# op      ModR/M  SIB   displacement  immediate
+  f7      e1                                      # multiply EAX by ECX
+# ModR/M in binary: 11 (direct mode) 100 (subop mul) 001 (src ECX)
++run: operate on r/m32
++run: r/m32 is ECX
++run: subop: multiply EAX by r/m32
++run: storing 0x0000000c
+
+:(before "End Single-Byte Opcodes")
+case 0xf7: {  // xor r32 with r/m32
+  uint8_t modrm = next();
+  trace(90, "run") << "operate on r/m32" << end();
+  int32_t* arg1 = effective_address(modrm);
+  uint8_t subop = (modrm>>3)&0x7;  // middle 3 'reg opcode' bits
+  switch (subop) {
+  case 4: {  // mul unsigned EAX by r/m32
+    trace(90, "run") << "subop: multiply EAX by r/m32" << end();
+    uint64_t result = Reg[EAX].u * static_cast<uint32_t>(*arg1);
+    Reg[EAX].u = result & 0xffffffff;
+    Reg[EDX].u = result >> 32;
+    OF = (Reg[EDX].u != 0);
+    trace(90, "run") << "storing 0x" << HEXWORD << Reg[EAX].u << end();
+    break;
+  }
+  // End Op f7 Subops
+  default:
+    cerr << "unrecognized sub-opcode after f7: " << NUM(subop) << '\n';
+    exit(1);
+  }
+  break;
+}
+
+//:
+
+:(before "End Initialize Op Names(name)")
 put(name_0f, "af", "multiply rm32 into r32");
 
 :(scenario multiply_r32_into_r32)
@@ -200,17 +241,16 @@ put(name, "f7", "bitwise complement of rm32");
 % Reg[EBX].i = 0x0f0f00ff;
 == 0x1
 # op  ModR/M  SIB   displacement  immediate
-  f7  c3                                      # not EBX
-# ModR/M in binary: 11 (direct mode) 000 (unused) 011 (dest EBX)
-+run: 'not' of r/m32
+  f7  d3                                      # not EBX
+# ModR/M in binary: 11 (direct mode) 010 (subop not) 011 (dest EBX)
++run: operate on r/m32
 +run: r/m32 is EBX
++run: subop: not
 +run: storing 0xf0f0ff00
 
-:(before "End Single-Byte Opcodes")
-case 0xf7: {  // xor r32 with r/m32
-  uint8_t modrm = next();
-  trace(90, "run") << "'not' of r/m32" << end();
-  int32_t* arg1 = effective_address(modrm);
+:(before "End Op f7 Subops")
+case 2: {  // not r/m32
+  trace(90, "run") << "subop: not" << end();
   *arg1 = ~(*arg1);
   trace(90, "run") << "storing 0x" << HEXWORD << *arg1 << end();
   SF = (*arg1 >> 31);
diff --git a/subx/014indirect_addressing.cc b/subx/014indirect_addressing.cc
index 344da8de..22a3c179 100644
--- a/subx/014indirect_addressing.cc
+++ b/subx/014indirect_addressing.cc
@@ -227,16 +227,17 @@ case 0x33: {  // xor r/m32 with r32
 
 //:: not
 
-:(scenario not_r32_with_mem_at_r32)
+:(scenario not_of_mem_at_r32)
 % Reg[EBX].i = 0x60;
 == 0x1  # code segment
 # op  ModR/M  SIB   displacement  immediate
-  f7  03                                      # negate *EBX
-# ModR/M in binary: 00 (indirect mode) 000 (unused) 011 (dest EBX)
+  f7  13                                      # negate *EBX
+# ModR/M in binary: 00 (indirect mode) 010 (subop not) 011 (dest EBX)
 == 0x60  # data segment
 ff 00 0f 0f  # 0x0f0f00ff
-+run: 'not' of r/m32
++run: operate on r/m32
 +run: effective address is 0x60 (EBX)
++run: subop: not
 +run: storing 0xf0f0ff00
 
 //:: compare (cmp)
diff --git a/subx/015immediate_addressing.cc b/subx/015immediate_addressing.cc
index c75c419e..b1d97f1e 100644
--- a/subx/015immediate_addressing.cc
+++ b/subx/015immediate_addressing.cc
@@ -9,17 +9,19 @@ put(name, "81", "combine rm32 with imm32 based on subop");
 # op  ModR/M  SIB   displacement  immediate
   81  c3                          0a 0b 0c 0d  # add 0x0d0c0b0a to EBX
 # ModR/M in binary: 11 (direct mode) 000 (add imm32) 011 (dest EBX)
-+run: combine imm32 0x0d0c0b0a with r/m32
++run: combine imm32 with r/m32
 +run: r/m32 is EBX
++run: imm32 is 0x0d0c0b0a
 +run: subop add
 +run: storing 0x0d0c0b0b
 
 :(before "End Single-Byte Opcodes")
 case 0x81: {  // combine imm32 with r/m32
+  trace(90, "run") << "combine imm32 with r/m32" << end();
   uint8_t modrm = next();
-  int32_t arg2 = imm32();
-  trace(90, "run") << "combine imm32 0x" << HEXWORD << arg2 << " with r/m32" << end();
   int32_t* arg1 = effective_address(modrm);
+  int32_t arg2 = imm32();
+  trace(90, "run") << "imm32 is 0x" << HEXWORD << arg2 << end();
   uint8_t subop = (modrm>>3)&0x7;  // middle 3 'reg opcode' bits
   switch (subop) {
   case 0:
@@ -44,8 +46,9 @@ case 0x81: {  // combine imm32 with r/m32
 # ModR/M in binary: 00 (indirect mode) 000 (add imm32) 011 (dest EBX)
 == 0x60  # data segment
 01 00 00 00  # 1
-+run: combine imm32 0x0d0c0b0a with r/m32
++run: combine imm32 with r/m32
 +run: effective address is 0x60 (EBX)
++run: imm32 is 0x0d0c0b0a
 +run: subop add
 +run: storing 0x0d0c0b0b
 
@@ -80,8 +83,9 @@ case 0x2d: {  // subtract imm32 from EAX
 # ModR/M in binary: 00 (indirect mode) 101 (subtract imm32) 011 (dest EBX)
 == 0x60  # data segment
 0a 00 00 00  # 10
-+run: combine imm32 0x00000001 with r/m32
++run: combine imm32 with r/m32
 +run: effective address is 0x60 (EBX)
++run: imm32 is 0x00000001
 +run: subop subtract
 +run: storing 0x00000009
 
@@ -100,8 +104,9 @@ case 5: {
 # op  ModR/M  SIB   displacement  immediate
   81  eb                          01 00 00 00  # subtract 1 from EBX
 # ModR/M in binary: 11 (direct mode) 101 (subtract imm32) 011 (dest EBX)
-+run: combine imm32 0x00000001 with r/m32
++run: combine imm32 with r/m32
 +run: r/m32 is EBX
++run: imm32 is 0x00000001
 +run: subop subtract
 +run: storing 0x00000009
 
@@ -136,8 +141,9 @@ case 0x25: {  // and imm32 with EAX
 # ModR/M in binary: 00 (indirect mode) 100 (and imm32) 011 (dest EBX)
 == 0x60  # data segment
 ff 00 00 00  # 0xff
-+run: combine imm32 0x0d0c0b0a with r/m32
++run: combine imm32 with r/m32
 +run: effective address is 0x60 (EBX)
++run: imm32 is 0x0d0c0b0a
 +run: subop and
 +run: storing 0x0000000a
 
@@ -156,8 +162,9 @@ case 4: {
 # op  ModR/M  SIB   displacement  immediate
   81  e3                          0a 0b 0c 0d  # and 0x0d0c0b0a with EBX
 # ModR/M in binary: 11 (direct mode) 100 (and imm32) 011 (dest EBX)
-+run: combine imm32 0x0d0c0b0a with r/m32
++run: combine imm32 with r/m32
 +run: r/m32 is EBX
++run: imm32 is 0x0d0c0b0a
 +run: subop and
 +run: storing 0x0000000a
 
@@ -192,8 +199,9 @@ case 0x0d: {  // or imm32 with EAX
 # ModR/M in binary: 00 (indirect mode) 001 (or imm32) 011 (dest EBX)
 == 0x60  # data segment
 a0 b0 c0 d0  # 0xd0c0b0a0
-+run: combine imm32 0x0d0c0b0a with r/m32
++run: combine imm32 with r/m32
 +run: effective address is 0x60 (EBX)
++run: imm32 is 0x0d0c0b0a
 +run: subop or
 +run: storing 0xddccbbaa
 
@@ -210,8 +218,9 @@ case 1: {
 # op  ModR/M  SIB   displacement  immediate
   81  cb                          0a 0b 0c 0d  # or 0x0d0c0b0a with EBX
 # ModR/M in binary: 11 (direct mode) 001 (or imm32) 011 (dest EBX)
-+run: combine imm32 0x0d0c0b0a with r/m32
++run: combine imm32 with r/m32
 +run: r/m32 is EBX
++run: imm32 is 0x0d0c0b0a
 +run: subop or
 +run: storing 0xddccbbaa
 
@@ -246,8 +255,9 @@ case 0x35: {  // xor imm32 with EAX
 # ModR/M in binary: 00 (indirect mode) 110 (xor imm32) 011 (dest EBX)
 == 0x60  # data segment
 a0 b0 c0 d0  # 0xd0c0b0a0
-+run: combine imm32 0x0d0c0b0a with r/m32
++run: combine imm32 with r/m32
 +run: effective address is 0x60 (EBX)
++run: imm32 is 0x0d0c0b0a
 +run: subop xor
 +run: storing 0xddccbbaa
 
@@ -264,8 +274,9 @@ case 6: {
 # op  ModR/M  SIB   displacement  immediate
   81  f3                          0a 0b 0c 0d  # xor 0x0d0c0b0a with EBX
 # ModR/M in binary: 11 (direct mode) 110 (xor imm32) 011 (dest EBX)
-+run: combine imm32 0x0d0c0b0a with r/m32
++run: combine imm32 with r/m32
 +run: r/m32 is EBX
++run: imm32 is 0x0d0c0b0a
 +run: subop xor
 +run: storing 0xddccbbaa
 
@@ -320,8 +331,9 @@ case 0x3d: {  // subtract imm32 from EAX
 # op  ModR/M  SIB   displacement  immediate
   81  fb                          07 0b 0c 0d  # compare 0x0d0c0b07 with EBX
 # ModR/M in binary: 11 (direct mode) 111 (compare imm32) 011 (dest EBX)
-+run: combine imm32 0x0d0c0b07 with r/m32
++run: combine imm32 with r/m32
 +run: r/m32 is EBX
++run: imm32 is 0x0d0c0b07
 +run: SF=0; ZF=0; OF=0
 
 :(before "End Op 81 Subops")
@@ -342,8 +354,9 @@ case 7: {
 # op  ModR/M  SIB   displacement  immediate
   81  fb                          0a 0b 0c 0d  # compare 0x0d0c0b0a with EBX
 # ModR/M in binary: 11 (direct mode) 111 (compare imm32) 011 (dest EBX)
-+run: combine imm32 0x0d0c0b0a with r/m32
++run: combine imm32 with r/m32
 +run: r/m32 is EBX
++run: imm32 is 0x0d0c0b0a
 +run: SF=1; ZF=0; OF=0
 
 :(scenario compare_imm32_with_r32_equal)
@@ -352,8 +365,9 @@ case 7: {
 # op  ModR/M  SIB   displacement  immediate
   81  fb                          0a 0b 0c 0d  # compare 0x0d0c0b0a with EBX
 # ModR/M in binary: 11 (direct mode) 111 (compare imm32) 011 (dest EBX)
-+run: combine imm32 0x0d0c0b0a with r/m32
++run: combine imm32 with r/m32
 +run: r/m32 is EBX
++run: imm32 is 0x0d0c0b0a
 +run: SF=0; ZF=1; OF=0
 
 :(scenario compare_imm32_with_mem_at_r32_greater)
@@ -364,8 +378,9 @@ case 7: {
 # ModR/M in binary: 00 (indirect mode) 111 (compare imm32) 011 (dest EBX)
 == 0x60  # data segment
 0a 0b 0c 0d  # 0x0d0c0b0a
-+run: combine imm32 0x0d0c0b07 with r/m32
++run: combine imm32 with r/m32
 +run: effective address is 0x60 (EBX)
++run: imm32 is 0x0d0c0b07
 +run: SF=0; ZF=0; OF=0
 
 :(scenario compare_imm32_with_mem_at_r32_lesser)
@@ -376,8 +391,9 @@ case 7: {
 # ModR/M in binary: 00 (indirect mode) 111 (compare imm32) 011 (dest EBX)
 == 0x60  # data segment
 07 0b 0c 0d  # 0x0d0c0b07
-+run: combine imm32 0x0d0c0b0a with r/m32
++run: combine imm32 with r/m32
 +run: effective address is 0x60 (EBX)
++run: imm32 is 0x0d0c0b0a
 +run: SF=1; ZF=0; OF=0
 
 :(scenario compare_imm32_with_mem_at_r32_equal)
@@ -389,8 +405,9 @@ case 7: {
 # ModR/M in binary: 00 (indirect mode) 111 (compare imm32) 011 (dest EBX)
 == 0x60  # data segment
 0a 0b 0c 0d  # 0x0d0c0b0a
-+run: combine imm32 0x0d0c0b0a with r/m32
++run: combine imm32 with r/m32
 +run: effective address is 0x60 (EBX)
++run: imm32 is 0x0d0c0b0a
 +run: SF=0; ZF=1; OF=0
 
 //:: copy (mov)
@@ -438,15 +455,17 @@ put(name, "c7", "copy imm32 to rm32");
 # op  ModR/M  SIB   displacement  immediate
   c7  03                          0a 0b 0c 0d  # copy 0x0d0c0b0a to *EBX
 # ModR/M in binary: 00 (indirect mode) 000 (unused) 011 (dest EBX)
-+run: copy imm32 0x0d0c0b0a to r/m32
++run: copy imm32 to r/m32
 +run: effective address is 0x60 (EBX)
++run: imm32 is 0x0d0c0b0a
 
 :(before "End Single-Byte Opcodes")
 case 0xc7: {  // copy imm32 to r32
   uint8_t modrm = next();
-  int32_t arg2 = imm32();
-  trace(90, "run") << "copy imm32 0x" << HEXWORD << arg2 << " to r/m32" << end();
+  trace(90, "run") << "copy imm32 to r/m32" << end();
   int32_t* arg1 = effective_address(modrm);
+  int32_t arg2 = imm32();
+  trace(90, "run") << "imm32 is 0x" << HEXWORD << arg2 << end();
   *arg1 = arg2;
   break;
 }
diff --git a/subx/031check_operands.cc b/subx/031check_operands.cc
index 12013287..934b1022 100644
--- a/subx/031check_operands.cc
+++ b/subx/031check_operands.cc
@@ -183,8 +183,6 @@ void init_permitted_operands() {
   // or
   put(Permitted_operands, "09", 0x01);
   put(Permitted_operands, "0b", 0x01);
-  // complement
-  put(Permitted_operands, "f7", 0x01);
   // xor
   put(Permitted_operands, "31", 0x01);
   put(Permitted_operands, "33", 0x01);
@@ -204,6 +202,7 @@ void init_permitted_operands() {
   //// Class O: op, ModR/M and subop (not r32)
   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
   //  0     0     0      |0       0     1     1
+  put(Permitted_operands, "f7", 0x03);  // test/not/mul/div
   put(Permitted_operands, "ff", 0x03);  // jump/push/call
 
   //// Class N: op, ModR/M and imm32
diff --git a/subx/apps/factorial b/subx/apps/factorial
index daea2cf4..d9501e23 100755
--- a/subx/apps/factorial
+++ b/subx/apps/factorial
Binary files differdiff --git a/subx/apps/factorial.subx b/subx/apps/factorial.subx
index c531041b..76594623 100644
--- a/subx/apps/factorial.subx
+++ b/subx/apps/factorial.subx
@@ -32,15 +32,13 @@
 
 # factorial(n)
 factorial:
-  # initialize n
-  8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none              2/r32/EDX   4/disp8         .                 # copy *(ESP+4) to EDX
   # initialize EAX to 1 (base case)
   b8/copy                         .               .             .           .             .           .           .               1/imm32           # copy 1 to EAX
   # if (n <= 1) jump exit
-  81          7/subop/compare     3/mod/direct    2/rm32/EDX    .           .             .           .           .               1/imm32           # compare EDX with 1
+  81          7/subop/compare     1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none  .           .           4/disp8         1/imm32           # compare *(ESP+4) with 1
   7e/jump-if-<=                   .               .             .           .             .           .           $factorial:exit/disp8             # jump if <= to $factorial:exit
   # EBX: n-1
-  89/copy                         3/mod/direct    3/rm32/EBX    .           .             .           2/r32/EDX   .               .                 # copy EDX to EBX
+  8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none              3/r32/EBX   4/disp8         .                 # copy *(ESP+4) to EBX
   81          5/subop/subtract    3/mod/direct    3/rm32/EBX    .           .             .           .           .               1/imm32           # subtract 1 from EBX
   # prepare call
   55/push                         .               .             .           .             .           .           .               .                 # push EBP
@@ -56,7 +54,7 @@ factorial:
   # refresh n
   8b/copy                         1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none              2/r32/EDX   4/disp8         .                 # copy *(ESP+4) to EDX
   # return n * factorial(n-1)
-  0f af/multiply                  3/mod/direct    2/rm32/EDX    .           .             .           0/r32/EAX   .               .                 # multiply EDX (n) into EAX (factorial(n-1))
+  f7          4/subop/multiply    1/mod/*+disp8   4/rm32/sib    4/base/ESP  4/index/none                          4/disp8         .                 # multiply *(ESP+4) (n) into EAX (factorial(n-1))
   # TODO: check for overflow
 $factorial:exit:
   c3/return