https://github.com/akkartik/mu/blob/master/subx/021byte_addressing.cc
  1 //: SubX mostly deals with instructions operating on 32-bit operands, but we
  2 //: still need to deal with raw bytes for strings and so on.
  3 
  4 //: Unfortunately the register encodings when dealing with bytes are a mess.
  5 //: We need a special case for them.
  6 :(code)
  7 string rname_8bit(uint8_t r) {
  8   switch (r) {
  9   case 0: return "AL";  // lowest byte of EAX
 10   case 1: return "CL";  // lowest byte of ECX
 11   case 2: return "DL";  // lowest byte of EDX
 12   case 3: return "BL";  // lowest byte of EBX
 13   case 4: return "AH";  // second lowest byte of EAX
 14   case 5: return "CH";  // second lowest byte of ECX
 15   case 6: return "DH";  // second lowest byte of EDX
 16   case 7: return "BH";  // second lowest byte of EBX
 17   default: raise << "invalid 8-bit register " << r << '\n' << end();  return "";
 18   }
 19 }
 20 
 21 uint8_t* effective_byte_address(uint8_t modrm) {
 22   uint8_t mod = (modrm>>6);
 23   uint8_t rm = modrm & 0x7;
 24   if (mod == 3) {
 25     // select an 8-bit register
 26     trace(Callstack_depth+1, "run") << "r/m8 is " << rname_8bit(rm) << end();
 27     return reg_8bit(rm);
 28   }
 29   // the rest is as usual
 30   return mem_addr_u8(effective_address_number(modrm));
 31 }
 32 
 33 uint8_t* reg_8bit(uint8_t rm) {
 34   uint8_t* result = reinterpret_cast<uint8_t*>(&Reg[rm & 0x3].i);  // _L register
 35   if (rm & 0x4)
 36     ++result;  // _H register;  assumes host is little-endian
 37   return result;
 38 }
 39 
 40 :(before "End Initialize Op Names")
 41 put_new(Name, "88", "copy r8 to r8/m8-at-r32");
 42 
 43 :(scenario copy_r8_to_mem_at_r32)
 44 % Reg[EBX].i = 0x224488ab;
 45 % Reg[EAX].i = 0x2000;
 46 == 0x1
 47 # op  ModR/M  SIB   displacement  immediate
 48   88  18                                      # copy BL to the byte at *EAX
 49 # ModR/M in binary: 00 (indirect mode) 011 (src BL) 000 (dest EAX)
 50 == 0x2000
 51 f0 cc bb aa
 52 +run: copy BL to r8/m8-at-r32
 53 +run: effective address is 0x00002000 (EAX)
 54 +run: storing 0xab
 55 % CHECK_EQ(0xaabbccab, read_mem_u32(0x2000));
 56 
 57 :(before "End Single-Byte Opcodes")
 58 case 0x88: {  // copy r8 to r/m8
 59   const uint8_t modrm = next();
 60   const uint8_t rsrc = (modrm>>3)&0x7;
 61   trace(Callstack_depth+1, "run") << "copy " << rname_8bit(rsrc) << " to r8/m8-at-r32" << end();
 62   // use unsigned to zero-extend 8-bit value to 32 bits
 63   uint8_t* dest = reinterpret_cast<uint8_t*>(effective_byte_address(modrm));
 64   const uint8_t* src = reg_8bit(rsrc);
 65   *dest = *src;
 66   trace(Callstack_depth+1, "run") << "storing 0x" << HEXBYTE << NUM(*dest) << end();
 67   break;
 68 }
 69 
 70 //:
 71 
 72 :(before "End Initialize Op Names")
 73 put_new(Name, "8a", "copy r8/m8-at-r32 to r8");
 74 
 75 :(scenario copy_mem_at_r32_to_r8)
 76 % Reg[EBX].i = 0xaabbcc0f;  // one nibble each of lowest byte set to all 0s and all 1s, to maximize value of this test
 77 % Reg[EAX].i = 0x2000;
 78 == 0x1
 79 # op  ModR/M  SIB   displacement  immediate
 80   8a  18                                      # copy just the byte at *EAX to BL
 81 # ModR/M in binary: 00 (indirect mode) 011 (dest EBX) 000 (src EAX)
 82 == 0x2000  # data segment
 83 ab ff ff ff  # 0xab with more data in following bytes
 84 +run: copy r8/m8-at-r32 to BL
 85 +run: effective address is 0x00002000 (EAX)
 86 +run: storing 0xab
 87 # remaining bytes of EBX are *not* cleared
 88 +run: EBX now contains 0xaabbccab
 89 
 90 :(before "End Single-Byte Opcodes")
 91 case 0x8a: {  // copy r/m8 to r8
 92   const uint8_t modrm = next();
 93   const uint8_t rdest = (modrm>>3)&0x7;
 94   trace(Callstack_depth+1, "run") << "copy r8/m8-at-r32 to " << rname_8bit(rdest) << end();
 95   // use unsigned to zero-extend 8-bit value to 32 bits
 96   const uint8_t* src = reinterpret_cast<uint8_t*>(effective_byte_address(modrm));
 97   uint8_t* dest = reg_8bit(rdest);
 98   trace(Callstack_depth+1, "run") << "storing 0x" << HEXBYTE << NUM(*src) << end();
 99   *dest = *src;
100   const uint8_t rdest_32bit = rdest & 0x3;
101   trace(Callstack_depth+1, "run") << rname(rdest_32bit) << " now contains 0x" << HEXWORD << Reg[rdest_32bit].u << end();
102   break;
103 }
104 
105 :(scenario cannot_copy_byte_to_ESP_EBP_ESI_EDI)
106 % Reg[ESI].u = 0xaabbccdd;
107 % Reg[EBX].u = 0x11223344;
108 == 0x1
109 # op  ModR/M  SIB   displacement  immediate
110   8a  f3                                      # copy just the byte at *EBX to 8-bit register '6'
111 # ModR/M in binary: 11 (direct mode) 110 (dest 8-bit 'register 6') 011 (src EBX)
112 # ensure 8-bit register '6' is DH, not ESI
113 +run: copy r8/m8-at-r32 to DH
114 +run: storing 0x44
115 # ensure ESI is unchanged
116 % CHECK_EQ(Reg[ESI].u, 0xaabbccdd);
117 
118 //:
119 
120 :(before "End Initialize Op Names")
121 put_new(Name, "c6", "copy imm8 to r8/m8-at-r32 (mov)");
122 
123 :(scenario copy_imm8_to_mem_at_r32)
124 % Reg[EAX].i = 0x2000;
125 == 0x1
126 # op  ModR/M  SIB   displacement  immediate
127   c6  00                          dd          # copy to the byte at *EAX
128 # ModR/M in binary: 00 (indirect mode) 000 (unused) 000 (dest EAX)
129 == 0x2000
130 f0 cc bb aa
131 +run: copy imm8 to r8/m8-at-r32
132 +run: effective address is 0x00002000 (EAX)
133 +run: storing 0xdd
134 % CHECK_EQ(0xaabbccdd, read_mem_u32(0x2000));
135 
136 :(before "End Single-Byte Opcodes")
137 case 0xc6: {  // copy imm8 to r/m8
138   const uint8_t modrm = next();
139   const uint8_t src = next();
140   trace(Callstack_depth+1, "run") << "copy imm8 to r8/m8-at-r32" << end();
141   trace(Callstack_depth+1, "run") << "imm8 is 0x" << HEXWORD << src << end();
142   const uint8_t subop = (modrm>>3)&0x7;  // middle 3 'reg opcode' bits
143   if (subop != 0) {
144     cerr << "unrecognized subop for opcode c6: " << NUM(subop) << " (only 0/copy currently implemented)\n";
145     exit(1);
146   }
147   // use unsigned to zero-extend 8-bit value to 32 bits
148   uint8_t* dest = reinterpret_cast<uint8_t*>(effective_byte_address(modrm));
149   *dest = src;
150   trace(Callstack_depth+1, "run") << "storing 0x" << HEXBYTE << NUM(*dest) << end();
151   break;
152 }