https://github.com/akkartik/mu/blob/master/021byte_addressing.cc
  1 //: SubX mostly deals with instructions operating on 32-bit operands, but we
  2 //: still need to deal with raw bytes for strings and so on.
  3 
  4 //: Unfortunately the register encodings when dealing with bytes are a mess.
  5 //: We need a special case for them.
  6 :(code)
  7 string rname_8bit(uint8_t r) {
  8   switch (r) {
  9   case 0: return "AL";  // lowest byte of EAX
 10   case 1: return "CL";  // lowest byte of ECX
 11   case 2: return "DL";  // lowest byte of EDX
 12   case 3: return "BL";  // lowest byte of EBX
 13   case 4: return "AH";  // second lowest byte of EAX
 14   case 5: return "CH";  // second lowest byte of ECX
 15   case 6: return "DH";  // second lowest byte of EDX
 16   case 7: return "BH";  // second lowest byte of EBX
 17   default: raise << "invalid 8-bit register " << r << '\n' << end();  return "";
 18   }
 19 }
 20 
 21 uint8_t* effective_byte_address(uint8_t modrm) {
 22   uint8_t mod = (modrm>>6);
 23   uint8_t rm = modrm & 0x7;
 24   if (mod == 3) {
 25     // select an 8-bit register
 26     trace(Callstack_depth+1, "run") << "r/m8 is " << rname_8bit(rm) << end();
 27     return reg_8bit(rm);
 28   }
 29   // the rest is as usual
 30   return mem_addr_u8(effective_address_number(modrm));
 31 }
 32 
 33 uint8_t* reg_8bit(uint8_t rm) {
 34   uint8_t* result = reinterpret_cast<uint8_t*>(&Reg[rm & 0x3].i);  // _L register
 35   if (rm & 0x4)
 36     ++result;  // _H register;  assumes host is little-endian
 37   return result;
 38 }
 39 
 40 :(before "End Initialize Op Names")
 41 put_new(Name, "88", "copy r8 to r8/m8-at-r32");
 42 
 43 :(code)
 44 void test_copy_r8_to_mem_at_r32() {
 45   Reg[EBX].i = 0x224488ab;
 46   Reg[EAX].i = 0x2000;
 47   run(
 48       "== code 0x1\n"
 49       // op     ModR/M  SIB   displacement  immediate
 50       "  88     18                                      \n"  // copy BL to the byte at *EAX
 51       // ModR/M in binary: 00 (indirect mode) 011 (src BL) 000 (dest EAX)
 52       "== data 0x2000\n"
 53       "f0 cc bb aa\n"
 54   );
 55   CHECK_TRACE_CONTENTS(
 56       "run: copy BL to r8/m8-at-r32\n"
 57       "run: effective address is 0x00002000 (EAX)\n"
 58       "run: storing 0xab\n"
 59   );
 60   CHECK_EQ(0xaabbccab, read_mem_u32(0x2000));
 61 }
 62 
 63 :(before "End Single-Byte Opcodes")
 64 case 0x88: {  // copy r8 to r/m8
 65   const uint8_t modrm = next();
 66   const uint8_t rsrc = (modrm>>3)&0x7;
 67   trace(Callstack_depth+1, "run") << "copy " << rname_8bit(rsrc) << " to r8/m8-at-r32" << end();
 68   // use unsigned to zero-extend 8-bit value to 32 bits
 69   uint8_t* dest = effective_byte_address(modrm);
 70   const uint8_t* src = reg_8bit(rsrc);
 71   *dest = *src;  // Read/write multiple elements of vector<uint8_t> at once. Assumes sizeof(int) == 4 on the host as well.
 72   trace(Callstack_depth+1, "run") << "storing 0x" << HEXBYTE << NUM(*dest) << end();
 73   break;
 74 }
 75 
 76 //:
 77 
 78 :(before "End Initialize Op Names")
 79 put_new(Name, "8a", "copy r8/m8-at-r32 to r8");
 80 
 81 :(code)
 82 void test_copy_mem_at_r32_to_r8() {
 83   Reg[EBX].i = 0xaabbcc0f;  // one nibble each of lowest byte set to all 0s and all 1s, to maximize value of this test
 84   Reg[EAX].i = 0x2000;
 85   run(
 86       "== code 0x1\n"
 87       // op     ModR/M  SIB   displacement  immediate
 88       "  8a     18                                      \n"  // copy just the byte at *EAX to BL
 89       // ModR/M in binary: 00 (indirect mode) 011 (dest EBX) 000 (src EAX)
 90       "== data 0x2000\n"
 91       "ab ff ff ff\n"  // 0xab with more data in following bytes
 92   );
 93   CHECK_TRACE_CONTENTS(
 94       "run: copy r8/m8-at-r32 to BL\n"
 95       "run: effective address is 0x00002000 (EAX)\n"
 96       "run: storing 0xab\n"
 97       // remaining bytes of EBX are *not* cleared
 98       "run: EBX now contains 0xaabbccab\n"
 99   );
100 }
101 
102 :(before "End Single-Byte Opcodes")
103 case 0x8a: {  // copy r/m8 to r8
104   const uint8_t modrm = next();
105   const uint8_t rdest = (modrm>>3)&0x7;
106   trace(Callstack_depth+1, "run") << "copy r8/m8-at-r32 to " << rname_8bit(rdest) << end();
107   // use unsigned to zero-extend 8-bit value to 32 bits
108   const uint8_t* src = effective_byte_address(modrm);
109   uint8_t* dest = reg_8bit(rdest);
110   trace(Callstack_depth+1, "run") << "storing 0x" << HEXBYTE << NUM(*src) << end();
111   *dest = *src;  // Read/write multiple elements of vector<uint8_t> at once. Assumes sizeof(int) == 4 on the host as well.
112   const uint8_t rdest_32bit = rdest & 0x3;
113   trace(Callstack_depth+1, "run") << rname(rdest_32bit) << " now contains 0x" << HEXWORD << Reg[rdest_32bit].u << end();
114   break;
115 }
116 
117 :(code)
118 void test_cannot_copy_byte_to_ESP_EBP_ESI_EDI() {
119   Reg[ESI].u = 0xaabbccdd;
120   Reg[EBX].u = 0x11223344;
121   run(
122       "== code 0x1\n"
123       // op     ModR/M  SIB   displacement  immediate
124       "  8a     f3                                      \n"  // copy just the byte at *EBX to 8-bit register '6'
125       // ModR/M in binary: 11 (direct mode) 110 (dest 8-bit 'register 6') 011 (src EBX)
126   );
127   CHECK_TRACE_CONTENTS(
128       // ensure 8-bit register '6' is DH, not ESI
129       "run: copy r8/m8-at-r32 to DH\n"
130       "run: storing 0x44\n"
131   );
132   // ensure ESI is unchanged
133   CHECK_EQ(Reg[ESI].u, 0xaabbccdd);
134 }
135 
136 //:
137 
138 :(before "End Initialize Op Names")
139 put_new(Name, "c6", "copy imm8 to r8/m8-at-r32 (mov)");
140 
141 :(code)
142 void test_copy_imm8_to_mem_at_r32() {
143   Reg[EAX].i = 0x2000;
144   run(
145       "== code 0x1\n"
146       // op     ModR/M  SIB   displacement  immediate
147       "  c6     00                          dd          \n"  // copy to the byte at *EAX
148       // ModR/M in binary: 00 (indirect mode) 000 (unused) 000 (dest EAX)
149       "== data 0x2000\n"
150       "f0 cc bb aa\n"
151   );
152   CHECK_TRACE_CONTENTS(
153       "run: copy imm8 to r8/m8-at-r32\n"
154       "run: effective address is 0x00002000 (EAX)\n"
155       "run: storing 0xdd\n"
156   );
157   CHECK_EQ(0xaabbccdd, read_mem_u32(0x2000));
158 }
159 
160 :(before "End Single-Byte Opcodes")
161 case 0xc6: {  // copy imm8 to r/m8
162   const uint8_t modrm = next();
163   const uint8_t src = next();
164   trace(Callstack_depth+1, "run") << "copy imm8 to r8/m8-at-r32" << end();
165   trace(Callstack_depth+1, "run") << "imm8 is 0x" << HEXWORD << NUM(src) << end();
166   const uint8_t subop = (modrm>>3)&0x7;  // middle 3 'reg opcode' bits
167   if (subop != 0) {
168     cerr << "unrecognized subop for opcode c6: " << NUM(subop) << " (only 0/copy currently implemented)\n";
169     exit(1);
170   }
171   // use unsigned to zero-extend 8-bit value to 32 bits
172   uint8_t* dest = effective_byte_address(modrm);
173   *dest = src;  // Write multiple elements of vector<uint8_t> at once. Assumes sizeof(int) == 4 on the host as well.
174   trace(Callstack_depth+1, "run") << "storing 0x" << HEXBYTE << NUM(*dest) << end();
175   break;
176 }
177 
178 //:: set flags (setcc)
179 
180 :(before "End Initialize Op Names")
181 put_new(Name_0f, "94", "set r8/m8-at-rm32 to 1 if equal, if ZF is set, 0 otherwise (setcc/setz/sete)");
182 put_new(Name_0f, "95", "set r8/m8-at-rm32 to 1 if not equal, if ZF is not set, 0 otherwise (setcc/setnz/setne)");
183 put_new(Name_0f, "9f", "set r8/m8-at-rm32 to 1 if greater (signed), if ZF is unset and SF == OF, 0 otherwise (setcc/setg/setnle)");
184 put_new(Name_0f, "97", "set r8/m8-at-rm32 to 1 if greater (unsigned), if ZF is unset and CF is unset, 0 otherwise (setcc/seta/setnbe)");
185 put_new(Name_0f, "9d", "set r8/m8-at-rm32 to 1 if greater or equal (signed), if SF == OF, 0 otherwise (setcc/setge/setnl)");
186 put_new(Name_0f, "93", "set r8/m8-at-rm32 to 1 if greater or equal (unsigned), if CF is unset, 0 otherwise (setcc/setae/setnb)");
187 put_new(Name_0f, "9c", "set r8/m8-at-rm32 to 1 if lesser (signed), if SF != OF, 0 otherwise (setcc/setl/setnge)");
188 put_new(Name_0f, "92", "set r8/m8-at-rm32 to 1 if lesser (unsigned), if CF is set, 0 otherwise (setcc/setb/setnae)");
189 put_new(Name_0f, "9e", "set r8/m8-at-rm32 to 1 if lesser or equal (signed), if ZF is set or SF != OF, 0 otherwise (setcc/setle/setng)");
190 put_new(Name_0f, "96", "set r8/m8-at-rm32 to 1 if lesser or equal (unsigned), if ZF is set or CF is set, 0 otherwise (setcc/setbe/setna)");
191 
192 :(before "End Two-Byte Opcodes Starting With 0f")
193 case 0x94: {  // set r8/m8-at-rm32 if ZF
194   const uint8_t modrm = next();
195   trace(Callstack_depth+1, "run") << "set r8/m8-at-rm32" << end();
196   uint8_t* dest = effective_byte_address(modrm);
197   *dest = ZF;
198   trace(Callstack_depth+1, "run") << "storing " << NUM(*dest) << end();
199   break;
200 }
201 case 0x95: {  // set r8/m8-at-rm32 if !ZF
202   const uint8_t modrm = next();
203   trace(Callstack_depth+1, "run") << "set r8/m8-at-rm32" << end();
204   uint8_t* dest = effective_byte_address(modrm);
205   *dest = !ZF;
206   trace(Callstack_depth+1, "run") << "storing " << NUM(*dest) << end();
207   break;
208 }
209 case 0x9f: {  // set r8/m8-at-rm32 if !SF and !ZF
210   const uint8_t modrm = next();
211   trace(Callstack_depth+1, "run") << "set r8/m8-at-rm32" << end();
212   uint8_t* dest = effective_byte_address(modrm);
213   *dest = !ZF && SF == OF;
214   trace(Callstack_depth+1, "run") << "storing " << NUM(*dest) << end();
215   break;
216 }
217 case 0x97: {  // set r8/m8-at-rm32 if !CF and !ZF
218   const uint8_t modrm = next();
219   trace(Callstack_depth+1, "run") << "set r8/m8-at-rm32" << end();
220   uint8_t* dest = effective_byte_address(modrm);
221   *dest = (!CF && !ZF);
222   trace(Callstack_depth+1, "run") << "storing " << NUM(*dest) << end();
223   break;
224 }
225 case 0x9d: {  // set r8/m8-at-rm32 if !SF
226   const uint8_t modrm = next();
227   trace(Callstack_depth+1, "run") << "set r8/m8-at-rm32" << end();
228   uint8_t* dest = effective_byte_address(modrm);
229   *dest = (SF == OF);
230   trace(Callstack_depth+1, "run") << "storing " << NUM(*dest) << end();
231   break;
232 }
233 case 0x93: {  // set r8/m8-at-rm32 if !CF
234   const uint8_t modrm = next();
235   trace(Callstack_depth+1, "run") << "set r8/m8-at-rm32" << end();
236   uint8_t* dest = effective_byte_address(modrm);
237   *dest = !CF;
238   trace(Callstack_depth+1, "run") << "storing " << NUM(*dest) << end();
239   break;
240 }
241 case 0x9c: {  // set r8/m8-at-rm32 if SF and !ZF
242   const uint8_t modrm = next();
243   trace(Callstack_depth+1, "run") << "set r8/m8-at-rm32" << end();
244   uint8_t* dest = effective_byte_address(modrm);
245   *dest = (SF != OF);
246   trace(Callstack_depth+1, "run") << "storing " << NUM(*dest) << end();
247   break;
248 }
249 case 0x92: {  // set r8/m8-at-rm32 if CF
250   const uint8_t modrm = next();
251   trace(Callstack_depth+1, "run") &8
sCHA@eYu䱮Xb^bi|̰
dII" XI{d?wc9F	W㔪j6I_-*&Tb0nq͕0/-vxrˉoL"	M߸Q΅s1͚1&xU0%^)vpg_Vc埝}T=e&{]E6zc?sp0^gT<pa#z$Å]q9y
pmK	6F.uwnLv HNJ5e]jw%)o&I;5ih Dm=]NGY}*7cr(KڞOҦ	uwhR.JYtse;£Xf~J^Ao;Y.1<鰞-RB'%ZtFd^3I=64<}2YEeUL}a0Ϟv:;ov0,yl{>ub(eQ-
tqC!B9<ޡoK
{
7:b;qsXRŤ(ljZ:.f>f0 ]h=,[>nƧKNqlTIi,"le~~vFuv翛ٍ9w3wfv<'CIʹD*$e~U Eʲ;ٴ-PU(ex6U#/gED͐ukFIfȺ@d#0YtԌty X>
O~ւZFYc:/N^]@Oxy#+hX<_<{q
n܇{=??I1v_Qg\_IS I_7O}<{G%dJׁZ	)s(\gNgG_CE}Rs=(ZO'c鳓guGχn:\$8FT3
d.΃e¾E?	F?nMtYF[]BcC_}%t\$38~OTi
PY8ch&==ƟKj!?Rh{.uW3tNĨvI>wF}6ٓ`0m~V|g'p/`C&j;"e'qax#I-J:Q/QC;xp{ǮiUG8t)/	o`@Mfu!ıZƔLǩskaSb AWw"Wv5^cOsI18NjJYn`QF0'{Zת}J䋵*ASUS	th5]FYpiLXb4SyWe<
4h
DgY
ape0D*KwsgcJ*(S0n!: ^v腣o*sJ^ӡIǵ*mB߻#AmU3rs$_s6"UE
cQStRI*אMR%cӹL,h#y}@֫2s,s(7{nu_R`pp^
sղl6jX=<47qu⏏Pm1vnwQ 0vB۪)ul:cw{TExuE-ixH.M2fMU<YN5:"Di{jե\zsB3轇hkTtNBGeɹkWm
\Z%Uldߥ¨ҽfp%]2R^ǹ
|̮4!uE
;-(/؋
կYD3.1nTS9u+W}yYQuvRRc;o8n0M}Zn]:0	aF2"k1AĵW X	lѪ=Dc
bH]ؠiS744eMi(`=#_bq+vW@(ё1Foպ Mxa֌kY}\Ԭp$Ԉ:"s;]en ]sWvjK)Q=kNVWz>SҨ@!)>ngjX-EV#^m1u3B=	^bv6q%W$8dzl}yC,*qS
`;.-Ȍ.D/v /gN1	@)SZ
;F|	bQb|s@HfUZg|CD| h5`~Jb[R]4-m$3jwi>=!̊LzT6t¤ϏKX3Y16_q-RL-s9Og'+۸]yܮej|9gK1HbfC\2&*nWS=o:hP`Eserǫ0{Fn`t>f\T	5O]Y<S
$OS46a
/^8xB-jt}
rF	@pяfR{#',ye+w͚n԰$4(eJ"zC[3bVjbqIBw3XD0A&n]^$gH蓯`K/ۮI^yBdCVOUXy!B3E\E>0<>)3%Q.0֥'	*kahw{6f(Α "x/:D~EJ]A6})EOXƛNx,-4ƄXㅑID"KDfmFS,2`0Jޔ