https://github.com/akkartik/mu/blob/master/subx/030---operands.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 :(before "End Help Texts")
16 put_new(Help, "instructions",
17 "Each x86 instruction consists of an instruction or opcode and some number\n"
18 "of operands.\n"
19 "Each operand has a type. An instruction won't have more than one operand of\n"
20 "any type.\n"
21 "Each instruction has some set of allowed operand types. It'll reject others.\n"
22 "The complete list of operand types: mod, subop, r32 (register), rm32\n"
23 "(register or memory), scale, index, base, disp8, disp16, disp32, imm8,\n"
24 "imm32.\n"
25 "Each of these has its own help page. Try reading 'subx help mod' next.\n"
26 );
27 :(before "End Help Contents")
28 cerr << " instructions\n";
29
30 :(scenario pack_immediate_constants)
31 == 0x1
32 bb 0x2a/imm32
33 +transform: packing instruction 'bb 0x2a/imm32'
34 +transform: instruction after packing: 'bb 2a 00 00 00'
35 +run: copy imm32 0x0000002a to EBX
36
37
38
39 :(before "End Globals")
40 set<string> Instruction_operands;
41 :(before "End One-time Setup")
42 Instruction_operands.insert("subop");
43 Instruction_operands.insert("mod");
44 Instruction_operands.insert("rm32");
45 Instruction_operands.insert("base");
46 Instruction_operands.insert("index");
47 Instruction_operands.insert("scale");
48 Instruction_operands.insert("r32");
49 Instruction_operands.insert("disp8");
50 Instruction_operands.insert("disp16");
51 Instruction_operands.insert("disp32");
52 Instruction_operands.insert("imm8");
53 Instruction_operands.insert("imm32");
54
55 :(before "End Help Texts")
56 init_operand_type_help();
57 :(code)
58 void init_operand_type_help() {
59 put(Help, "mod",
60 "2-bit operand controlling the _addressing mode_ of many instructions,\n"
61 "to determine how to compute the _effective address_ to look up memory at\n"
62 "based on the 'rm32' operand and potentially others.\n"
63 "\n"
64 "If mod = 3, just operate on the contents of the register specified by rm32\n"
65 " (direct mode).\n"
66 "If mod = 2, effective address is usually* rm32 + disp32\n"
67 " (indirect mode with displacement).\n"
68 "If mod = 1, effective address is usually* rm32 + disp8\n"
69 " (indirect mode with displacement).\n"
70 "If mod = 0, effective address is usually* rm32 (indirect mode).\n"
71 "(* - The exception is when rm32 is '4'. Register 4 is the stack pointer (ESP).\n"
72 " Using it as an address gets more involved. For more details,\n"
73 " try reading the help pages for 'base', 'index' and 'scale'.)\n"
74 "\n"
75 "For complete details, spend some time with two tables in the IA-32 software\n"
76 "developer's manual that are also included in this repo:\n"
77 " - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n"
78 " - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
79 );
80 put(Help, "subop",
81 "Additional 3-bit operand for determining the instruction when the opcode is 81, 8f or ff.\n"
82 "Can't coexist with operand of type 'r32' in a single instruction, because the two use the same bits.\n"
83 );
84 put(Help, "r32",
85 "3-bit operand specifying a register operand used directly, without any further addressing modes.\n"
86 );
87 put(Help, "rm32",
88 "32-bit value in register or memory. The precise details of its construction\n"
89 "depend on the eponymous 3-bit 'rm32' operand, the 'mod' operand, and also\n"
90 "potentially the 'SIB' operands ('scale', 'index' and 'base') and a displacement\n"
91 "('disp8' or 'disp32').\n"
92 "\n"
93 "For complete details, spend some time with two tables in the IA-32 software\n"
94 "developer's manual that are also included in this repo:\n"
95 " - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n"
96 " - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
97 );
98 put(Help, "base",
99 "Additional 3-bit operand (when 'rm32' is 4, unless 'mod' is 3) specifying the\n"
100 "register containing an address to look up.\n"
101 "This address may be further modified by 'index' and 'scale' operands.\n"
102 " effective address = base + index*scale + displacement (disp8 or disp32)\n"
103 "For complete details, spend some time with the IA-32 software developer's manual,\n"
104 "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
105 "It is included in this repository as 'sib.pdf'.\n"
106 );
107 put(Help, "index",
108 "Optional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be added to\n"
109 "the 'base' operand to compute the 'effective address' at which to look up memory.\n"
110 " effective address = base + index*scale + displacement (disp8 or disp32)\n"
111 "For complete details, spend some time with the IA-32 software developer's manual,\n"
112 "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
113 "It is included in this repository as 'sib.pdf'.\n"
114 );
115 put(Help, "scale",
116 "Optional 2-bit operand (when 'rm32' is 4 unless 'mod' is 3) that encodes a\n"
117 "power of 2 to be multiplied to the 'index' operand before adding the result to\n"
118 "the 'base' operand to compute the _effective address_ to operate on.\n"
119 " effective address = base + index * scale + displacement (disp8 or disp32)\n"
120 "\n"
121 "When scale is 0, use index unmodified.\n"
122 "When scale is 1, multiply index by 2.\n"
123 "When scale is 2, multiply index by 4.\n"
124 "When scale is 3, multiply index by 8.\n"
125 "\n"
126 "For complete details, spend some time with the IA-32 software developer's manual,\n"
127 "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
128 "It is included in this repository as 'sib.pdf'.\n"
129 );
130 put(Help, "disp8",
131 "8-bit value to be added in many instructions.\n"
132 );
133 put(Help, "disp16",
134 "16-bit value to be added in many instructions.\n"
135 );
136 put(Help, "disp32",
137 "32-bit value to be added in many instructions.\n"
138 );
139 put(Help, "imm8",
140 "8-bit value for many instructions.\n"
141 );
142 put(Help, "imm32",
143 "32-bit value for many instructions.\n"
144 );
145 }
146
147
148
149 :(after "Begin Transforms")
150
151 Transform.push_back(pack_operands);
152
153
154 :(code)
155 void pack_operands(program& p) {
156 if (p.segments.empty()) return;
157 segment& code = p.segments.at(0);
158
159 trace(99, "transform") << "-- pack operands" << end();
160 for (int i = 0; i < SIZE(code.lines); ++i) {
161 line& inst = code.lines.at(i);
162 if (all_hex_bytes(inst)) continue;
163 trace(99, "transform") << "packing instruction '" << to_string(inst) << "'" << end();
164 pack_operands(inst);
165 trace(99, "transform") << "instruction after packing: '" << to_string(inst.words) << "'" << end();
166 }
167 }
168
169 void pack_operands(line& inst) {
170 line new_inst;
171 add_opcodes(inst, new_inst);
172 add_modrm_byte(inst, new_inst);
173 add_sib_byte(inst, new_inst);
174 add_disp_bytes(inst, new_inst);
175 add_imm_bytes(inst, new_inst);
176 inst.words.swap(new_inst.words);
177 }
178
179 void add_opcodes(const line& in, line& out) {
180 out.words.push_back(in.words.at(0));
181 if (in.words.at(0).data == "0f" || in.words.at(0).data == "f2" || in.words.at(0).data == "f3")
182 out.words.push_back(in.words.at(1));
183 if (in.words.at(0).data == "f3" && in.words.at(1).data == "0f")
184 out.words.push_back(in.words.at(2));
185 if (in.words.at(0).data == "f2" && in.words.at(1).data == "0f")
186 out.words.push_back(in.words.at(2));
187 }
188
189 void add_modrm_byte(const line& in, line& out) {
190 uint8_t mod=0, reg_subop=0, rm32=0;
191 bool emit = false;
192 for (int i = 0; i < SIZE(in.words); ++i) {
193 const word& curr = in.words.at(i);
194 if (has_operand_metadata(curr, "mod")) {
195 mod = hex_byte(curr.data);
196 emit = true;
197 }
198 else if (has_operand_metadata(curr, "rm32")) {
199 rm32 = hex_byte(curr.data);
200 emit = true;
201 }
202 else if (has_operand_metadata(curr, "r32")) {
203 reg_subop = hex_byte(curr.data);
204 emit = true;
205 }
206 else if (has_operand_metadata(curr, "subop")) {
207 reg_subop = hex_byte(curr.data);
208 emit = true;
209 }
210 }
211 if (emit)
212 out.words.push_back(hex_byte_text((mod << 6) | (reg_subop << 3) | rm32));
213 }
214
215 void add_sib_byte(const line& in, line& out) {
216 uint8_t scale=0, index=0, base=0;
217 bool emit = false;
218 for (int i = 0; i < SIZE(in.words); ++i) {
219 const word& curr = in.words.at(i);
220 if (has_operand_metadata(curr, "scale")) {
221 scale = hex_byte(curr.data);
222 emit = true;
223 }
224 else if (has_operand_metadata(curr, "index")) {
225 index = hex_byte(curr.data);
226 emit = true;
227 }
228 else if (has_operand_metadata(curr, "base")) {
229 base = hex_byte(curr.data);
230 emit = true;
231 }
232 }
233 if (emit)
234 out.words.push_back(hex_byte_text((scale << 6) | (index << 3) | base));
235 }
236
237 void add_disp_bytes(const line& in, line& out) {
238 for (int i = 0; i < SIZE(in.words); ++i) {
239 const word& curr = in.words.at(i);
240 if (has_operand_metadata(curr, "disp8"))
241 emit_hex_bytes(out, curr, 1);
242 if (has_operand_metadata(curr, "disp16"))
243 emit_hex_bytes(out, curr, 2);
244 else if (has_operand_metadata(curr, "disp32"))
245 emit_hex_bytes(out, curr, 4);
246 }
247 }
248
249 void add_imm_bytes(const line& in, line& out) {
250 for (int i = 0; i < SIZE(in.words); ++i) {
251 const word& curr = in.words.at(i);
252 if (has_operand_metadata(curr, "imm8"))
253 emit_hex_bytes(out, curr, 1);
254 else if (has_operand_metadata(curr, "imm32"))
255 emit_hex_bytes(out, curr, 4);
256 }
257 }
258
259 void emit_hex_bytes(line& out, const word& w, int num) {
260 assert(num <= 4);
261 bool is_number = looks_like_hex_int(w.data);
262 if (num == 1 || !is_number) {
263 out.words.push_back(w);
264 if (is_number)
265 out.words.back().data = hex_byte_to_string(parse_int(w.data));
266 return;
267 }
268 emit_hex_bytes(out, static_cast<uint32_t>(parse_int(w.data)), num);
269 }
270
271 void emit_hex_bytes(line& out, uint32_t val, int num) {
272 assert(num <= 4);
273 for (int i = 0; i < num; ++i) {
274 out.words.push_back(hex_byte_text(val & 0xff));
275 val = val >> 8;
276 }
277 }
278
279 word hex_byte_text(uint8_t val) {
280 word result;
281 result.data = hex_byte_to_string(val);
282 result.original = result.data+"/auto";
283 return result;
284 }
285
286 string hex_byte_to_string(uint8_t val) {
287 ostringstream out;
288
289 out << HEXBYTE << NUM(val);
290 return out.str();
291 }
292
293 string to_string(const vector<word>& in) {
294 ostringstream out;
295 for (int i = 0; i < SIZE(in); ++i) {
296 if (i > 0) out << ' ';
297 out << in.at(i).data;
298 }
299 return out.str();
300 }
301
302 :(before "End Unit Tests")
303 void test_preserve_metadata_when_emitting_single_byte() {
304 word in;
305 in.data = "f0";
306 in.original = "f0/foo";
307 line out;
308 emit_hex_bytes(out, in, 1);
309 CHECK_EQ(out.words.at(0).data, "f0");
310 CHECK_EQ(out.words.at(0).original, "f0/foo");
311 }
312
313 :(scenario pack_disp8)
314 == 0x1
315 74 2/disp8
316 +transform: packing instruction '74 2/disp8'
317 +transform: instruction after packing: '74 02'
318
319 :(scenarios transform)
320 :(scenario pack_disp8_negative)
321 == 0x1
322
323 74 -1/disp8
324 +transform: packing instruction '74 -1/disp8'
325 +transform: instruction after packing: '74 ff'
326 :(scenarios run)
327
328
329 :(code)
330 void transform(const string& text_bytes) {
331 program p;
332 istringstream in(text_bytes);
333 parse(in, p);
334 if (trace_contains_errors()) return;
335 transform(p);
336 }
337
338 :(scenario pack_modrm_imm32)
339 == 0x1
340
341
342
343 81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32
344 +transform: packing instruction '81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32'
345 +transform: instruction after packing: '81 c3 01 00 00 00'
346
347 :(scenario pack_imm32_large)
348 == 0x1
349 b9 0x080490a7/imm32
350 +transform: packing instruction 'b9 0x080490a7/imm32'
351 +transform: instruction after packing: 'b9 a7 90 04 08'
352
353 :(scenario pack_immediate_constants_hex)
354 == 0x1
355 b9 0x2a/imm32
356 +transform: packing instruction 'b9 0x2a/imm32'
357 +transform: instruction after packing: 'b9 2a 00 00 00'
358 +run: copy imm32 0x0000002a to ECX
359
360 :(scenarios transform)
361 :(scenario pack_silently_ignores_non_hex)
362 % Hide_errors = true;
363 == 0x1
364 b9 foo/imm32
365 +transform: packing instruction 'b9 foo/imm32'
366
367 +transform: instruction after packing: 'b9 foo'
368 :(scenarios run)
369
370 :(scenario pack_flags_bad_hex)
371 % Hide_errors = true;
372 == 0x1
373 b9 0xfoo/imm32
374 +error: not a number: 0xfoo
375
376
377
378 :(code)
379 bool all_hex_bytes(const line& inst) {
380 for (int i = 0; i < SIZE(inst.words); ++i)
381 if (!is_hex_byte(inst.words.at(i)))
382 return false;
383 return true;
384 }
385
386 bool is_hex_byte(const word& curr) {
387 if (contains_any_operand_metadata(curr))
388 return false;
389 if (SIZE(curr.data) != 2)
390 return false;
391 if (curr.data.find_first_not_of("0123456789abcdefABCDEF") != string::npos)
392 return false;
393 return true;
394 }
395
396 bool contains_any_operand_metadata(const word& word) {
397 for (int i = 0; i < SIZE(word.metadata); ++i)
398 if (Instruction_operands.find(word.metadata.at(i)) != Instruction_operands.end())
399 return true;
400 return false;
401 }
402
403 bool has_operand_metadata(const line& inst, const string& m) {
404 bool result = false;
405 for (int i = 0; i < SIZE(inst.words); ++i) {
406 if (!has_operand_metadata(inst.words.at(i), m)) continue;
407 if (result) {
408 raise << "'" << to_string(inst) << "' has conflicting " << m << " operands\n" << end();
409 return false;
410 }
411 result = true;
412 }
413 return result;
414 }
415
416 bool has_operand_metadata(const word& w, const string& m) {
417 bool result = false;
418 bool metadata_found = false;
419 for (int i = 0; i < SIZE(w.metadata); ++i) {
420 const string& curr = w.metadata.at(i);
421 if (Instruction_operands.find(curr) == Instruction_operands.end()) continue;
422 if (metadata_found) {
423 raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
424 return false;
425 }
426 metadata_found = true;
427 result = (curr == m);
428 }
429 return result;
430 }
431
432 word metadata(const line& inst, const string& m) {
433 for (int i = 0; i < SIZE(inst.words); ++i)
434 if (has_operand_metadata(inst.words.at(i), m))
435 return inst.words.at(i);
436 assert(false);
437 }
438
439 bool looks_like_hex_int(const string& s) {
440 if (s.empty()) return false;
441 if (s.at(0) == '-' || s.at(0) == '+') return true;
442 if (isdigit(s.at(0))) return true;
443
444 return false;
445 }
446
447 :(code)
448 string to_string(const line& inst) {
449 ostringstream out;
450 for (int i = 0; i < SIZE(inst.words); ++i) {
451 if (i > 0) out << ' ';
452 out << inst.words.at(i).original;
453 }
454 return out.str();
455 }