https://github.com/akkartik/mu/blob/master/030---operands.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 :(before "End Help Texts")
16 put_new(Help, "instructions",
17 "Each x86 instruction consists of an instruction or opcode and some number\n"
18 "of operands.\n"
19 "Each operand has a type. An instruction won't have more than one operand of\n"
20 "any type.\n"
21 "Each instruction has some set of allowed operand types. It'll reject others.\n"
22 "The complete list of operand types: mod, subop, r32 (register), rm32\n"
23 "(register or memory), scale, index, base, disp8, disp16, disp32, imm8,\n"
24 "imm32.\n"
25 "Each of these has its own help page. Try reading 'subx help mod' next.\n"
26 );
27 :(before "End Help Contents")
28 cerr << " instructions\n";
29
30 :(code)
31 void test_pack_immediate_constants() {
32 run(
33 "== code 0x1\n"
34 "bb 0x2a/imm32\n"
35 );
36 CHECK_TRACE_CONTENTS(
37 "transform: packing instruction 'bb 0x2a/imm32'\n"
38 "transform: instruction after packing: 'bb 2a 00 00 00'\n"
39 "run: copy imm32 0x0000002a to EBX\n"
40 );
41 }
42
43
44
45 :(before "End Globals")
46 set<string> Instruction_operands;
47 :(before "End One-time Setup")
48 Instruction_operands.insert("subop");
49 Instruction_operands.insert("mod");
50 Instruction_operands.insert("rm32");
51 Instruction_operands.insert("base");
52 Instruction_operands.insert("index");
53 Instruction_operands.insert("scale");
54 Instruction_operands.insert("r32");
55 Instruction_operands.insert("disp8");
56 Instruction_operands.insert("disp16");
57 Instruction_operands.insert("disp32");
58 Instruction_operands.insert("imm8");
59 Instruction_operands.insert("imm32");
60
61 :(before "End Help Texts")
62 init_operand_type_help();
63 :(code)
64 void init_operand_type_help() {
65 put(Help, "mod",
66 "2-bit operand controlling the _addressing mode_ of many instructions,\n"
67 "to determine how to compute the _effective address_ to look up memory at\n"
68 "based on the 'rm32' operand and potentially others.\n"
69 "\n"
70 "If mod = 3, just operate on the contents of the register specified by rm32\n"
71 " (direct mode).\n"
72 "If mod = 2, effective address is usually* rm32 + disp32\n"
73 " (indirect mode with displacement).\n"
74 "If mod = 1, effective address is usually* rm32 + disp8\n"
75 " (indirect mode with displacement).\n"
76 "If mod = 0, effective address is usually* rm32 (indirect mode).\n"
77 "(* - The exception is when rm32 is '4'. Register 4 is the stack pointer (ESP).\n"
78 " Using it as an address gets more involved. For more details,\n"
79 " try reading the help pages for 'base', 'index' and 'scale'.)\n"
80 "\n"
81 "For complete details, spend some time with two tables in the IA-32 software\n"
82 "developer's manual that are also included in this repo:\n"
83 " - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n"
84 " - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
85 );
86 put(Help, "subop",
87 "Additional 3-bit operand for determining the instruction when the opcode\n"
88 "is 81, 8f, d3, f7 or ff.\n"
89 "Can't coexist with operand of type 'r32' in a single instruction, because\n"
90 "the two use the same bits.\n"
91 );
92 put(Help, "r32",
93 "3-bit operand specifying a register operand used directly, without any further addressing modes.\n"
94 );
95 put(Help, "rm32",
96 "32-bit value in register or memory. The precise details of its construction\n"
97 "depend on the eponymous 3-bit 'rm32' operand, the 'mod' operand, and also\n"
98 "potentially the 'SIB' operands ('scale', 'index' and 'base') and a displacement\n"
99 "('disp8' or 'disp32').\n"
100 "\n"
101 "For complete details, spend some time with two tables in the IA-32 software\n"
102 "developer's manual that are also included in this repo:\n"
103 " - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n"
104 " - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
105 );
106 put(Help, "base",
107 "Additional 3-bit operand (when 'rm32' is 4, unless 'mod' is 3) specifying the\n"
108 "register containing an address to look up.\n"
109 "This address may be further modified by 'index' and 'scale' operands.\n"
110 " effective address = base + index*scale + displacement (disp8 or disp32)\n"
111 "For complete details, spend some time with the IA-32 software developer's manual,\n"
112 "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
113 "It is included in this repository as 'sib.pdf'.\n"
114 );
115 put(Help, "index",
116 "Optional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be added to\n"
117 "the 'base' operand to compute the 'effective address' at which to look up memory.\n"
118 " effective address = base + index*scale + displacement (disp8 or disp32)\n"
119 "For complete details, spend some time with the IA-32 software developer's manual,\n"
120 "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
121 "It is included in this repository as 'sib.pdf'.\n"
122 );
123 put(Help, "scale",
124 "Optional 2-bit operand (when 'rm32' is 4 unless 'mod' is 3) that encodes a\n"
125 "power of 2 to be multiplied to the 'index' operand before adding the result to\n"
126 "the 'base' operand to compute the _effective address_ to operate on.\n"
127 " effective address = base + index * scale + displacement (disp8 or disp32)\n"
128 "\n"
129 "When scale is 0, use index unmodified.\n"
130 "When scale is 1, multiply index by 2.\n"
131 "When scale is 2, multiply index by 4.\n"
132 "When scale is 3, multiply index by 8.\n"
133 "\n"
134 "For complete details, spend some time with the IA-32 software developer's manual,\n"
135 "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
136 "It is included in this repository as 'sib.pdf'.\n"
137 );
138 put(Help, "disp8",
139 "8-bit value to be added in many instructions.\n"
140 );
141 put(Help, "disp16",
142 "16-bit value to be added in many instructions.\n"
143 "Currently not used in any SubX instructions.\n"
144 );
145 put(Help, "disp32",
146 "32-bit value to be added in many instructions.\n"
147 );
148 put(Help, "imm8",
149 "8-bit value for many instructions.\n"
150 );
151 put(Help, "imm32",
152 "32-bit value for many instructions.\n"
153 );
154 }
155
156
157
158 :(after "Begin Transforms")
159
160 Transform.push_back(pack_operands);
161
162
163 :(code)
164 void pack_operands(program& p) {
165 if (p.segments.empty()) return;
166 segment& code = *find(p, "code");
167
168 trace(3, "transform") << "-- pack operands" << end();
169 for (int i = 0; i < SIZE(code.lines); ++i) {
170 line& inst = code.lines.at(i);
171 if (all_hex_bytes(inst)) continue;
172 trace(99, "transform") << "packing instruction '" << to_string(inst) << "'" << end();
173 pack_operands(inst);
174 trace(99, "transform") << "instruction after packing: '" << to_string(inst.words) << "'" << end();
175 }
176 }
177
178 void pack_operands(line& inst) {
179 line new_inst;
180 add_opcodes(inst, new_inst);
181 add_modrm_byte(inst, new_inst);
182 add_sib_byte(inst, new_inst);
183 add_disp_bytes(inst, new_inst);
184 add_imm_bytes(inst, new_inst);
185 inst.words.swap(new_inst.words);
186 }
187
188 void add_opcodes(const line& in, line& out) {
189 out.words.push_back(in.words.at(0));
190 if (in.words.at(0).data == "0f" || in.words.at(0).data == "f2" || in.words.at(0).data == "f3")
191 out.words.push_back(in.words.at(1));
192 if (in.words.at(0).data == "f3" && in.words.at(1).data == "0f")
193 out.words.push_back(in.words.at(2));
194 if (in.words.at(0).data == "f2" && in.words.at(1).data == "0f")
195 out.words.push_back(in.words.at(2));
196 }
197
198 void add_modrm_byte(const line& in, line& out) {
199 uint8_t mod=0, reg_subop=0, rm32=0;
200 bool emit = false;
201 for (int i = 0; i < SIZE(in.words); ++i) {
202 const word& curr = in.words.at(i);
203 if (has_operand_metadata(curr, "mod")) {
204 mod = hex_byte(curr.data);
205 emit = true;
206 }
207 else if (has_operand_metadata(curr, "rm32")) {
208 rm32 = hex_byte(curr.data);
209 emit = true;
210 }
211 else if (has_operand_metadata(curr, "r32")) {
212 reg_subop = hex_byte(curr.data);
213 emit = true;
214 }
215 else if (has_operand_metadata(curr, "subop")) {
216 reg_subop = hex_byte(curr.data);
217 emit = true;
218 }
219 }
220 if (emit)
221 out.words.push_back(hex_byte_text((mod << 6) | (reg_subop << 3) | rm32));
222 }
223
224 void add_sib_byte(const line& in, line& out) {
225 uint8_t scale=0, index=0, base=0;
226 bool emit = false;
227 for (int i = 0; i < SIZE(in.words); ++i) {
228 const word& curr = in.words.at(i);
229 if (has_operand_metadata(curr, "scale")) {
230 scale = hex_byte(curr.data);
231 emit = true;
232 }
233 else if (has_operand_metadata(curr, "index")) {
234 index = hex_byte(curr.data);
235 emit = true;
236 }
237 else if (has_operand_metadata(curr, "base")) {
238 base = hex_byte(curr.data);
239 emit = true;
240 }
241 }
242 if (emit)
243 out.words.push_back(hex_byte_text((scale << 6) | (index << 3) | base));
244 }
245
246 void add_disp_bytes(const line& in, line& out) {
247 for (int i = 0; i < SIZE(in.words); ++i) {
248 const word& curr = in.words.at(i);
249 if (has_operand_metadata(curr, "disp8"))
250 emit_hex_bytes(out, curr, 1);
251 if (has_operand_metadata(curr, "disp16"))
252 emit_hex_bytes(out, curr, 2);
253 else if (has_operand_metadata(curr, "disp32"))
254 emit_hex_bytes(out, curr, 4);
255 }
256 }
257
258 void add_imm_bytes(const line& in, line& out) {
259 for (int i = 0; i < SIZE(in.words); ++i) {
260 const word& curr = in.words.at(i);
261 if (has_operand_metadata(curr, "imm8"))
262 emit_hex_bytes(out, curr, 1);
263 else if (has_operand_metadata(curr, "imm32"))
264 emit_hex_bytes(out, curr, 4);
265 }
266 }
267
268 void emit_hex_bytes(line& out, const word& w, int num) {
269 assert(num <= 4);
270 bool is_number = looks_like_hex_int(w.data);
271 if (num == 1 || !is_number) {
272 out.words.push_back(w);
273 if (is_number)
274 out.words.back().data = hex_byte_to_string(parse_int(w.data));
275 return;
276 }
277 emit_hex_bytes(out, static_cast<uint32_t>(parse_int(w.data)), num);
278 }
279
280 void emit_hex_bytes(line& out, uint32_t val, int num) {
281 assert(num <= 4);
282 for (int i = 0; i < num; ++i) {
283 out.words.push_back(hex_byte_text(val & 0xff));
284 val = val >> 8;
285 }
286 }
287
288 word hex_byte_text(uint8_t val) {
289 word result;
290 result.data = hex_byte_to_string(val);
291 result.original = result.data+"/auto";
292 return result;
293 }
294
295 string hex_byte_to_string(uint8_t val) {
296 ostringstream out;
297
298 out << HEXBYTE << NUM(val);
299 return out.str();
300 }
301
302 string to_string(const vector<word>& in) {
303 ostringstream out;
304 for (int i = 0; i < SIZE(in); ++i) {
305 if (i > 0) out << ' ';
306 out << in.at(i).data;
307 }
308 return out.str();
309 }
310
311 :(before "End Unit Tests")
312 void test_preserve_metadata_when_emitting_single_byte() {
313 word in;
314 in.data = "f0";
315 in.original = "f0/foo";
316 line out;
317 emit_hex_bytes(out, in, 1);
318 CHECK_EQ(out.words.at(0).data, "f0");
319 CHECK_EQ(out.words.at(0).original, "f0/foo");
320 }
321
322 :(code)
323 void test_pack_disp8() {
324 run(
325 "== code 0x1\n"
326 "74 2/disp8\n"
327 );
328 CHECK_TRACE_CONTENTS(
329 "transform: packing instruction '74 2/disp8'\n"
330 "transform: instruction after packing: '74 02'\n"
331 );
332 }
333
334 void test_pack_disp8_negative() {
335 transform(
336 "== code 0x1\n"
337
338 "74 -1/disp8\n"
339 );
340 CHECK_TRACE_CONTENTS(
341 "transform: packing instruction '74 -1/disp8'\n"
342 "transform: instruction after packing: '74 ff'\n"
343 );
344 }
345
346
347 void transform(const string& text_bytes) {
348 program p;
349 istringstream in(text_bytes);
350 parse(in, p);
351 if (trace_contains_errors()) return;
352 transform(p);
353 }
354
355 void test_pack_modrm_imm32() {
356 run(
357 "== code 0x1\n"
358
359
360
361 " 81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32 \n"
362 );
363 CHECK_TRACE_CONTENTS(
364 "transform: packing instruction '81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32'\n"
365 "transform: instruction after packing: '81 c3 01 00 00 00'\n"
366 );
367 }
368
369 void test_pack_imm32_large() {
370 run(
371 "== code 0x1\n"
372 "b9 0x080490a7/imm32\n"
373 );
374 CHECK_TRACE_CONTENTS(
375 "transform: packing instruction 'b9 0x080490a7/imm32'\n"
376 "transform: instruction after packing: 'b9 a7 90 04 08'\n"
377 );
378 }
379
380 void test_pack_immediate_constants_hex() {
381 run(
382 "== code 0x1\n"
383 "b9 0x2a/imm32\n"
384 );
385 CHECK_TRACE_CONTENTS(
386 "transform: packing instruction 'b9 0x2a/imm32'\n"
387 "transform: instruction after packing: 'b9 2a 00 00 00'\n"
388 "run: copy imm32 0x0000002a to ECX\n"
389 );
390 }
391
392 void test_pack_silently_ignores_non_hex() {
393 Hide_errors = true;
394 transform(
395 "== code 0x1\n"
396 "b9 foo/imm32\n"
397 );
398 CHECK_TRACE_CONTENTS(
399 "transform: packing instruction 'b9 foo/imm32'\n"
400
401 "transform: instruction after packing: 'b9 foo'\n"
402 );
403 }
404
405 void test_pack_flags_bad_hex() {
406 Hide_errors = true;
407 run(
408 "== code 0x1\n"
409 "b9 0xfoo/imm32\n"
410 );
411 CHECK_TRACE_CONTENTS(
412 "error: not a number: 0xfoo\n"
413 );
414 }
415
416 void test_pack_flags_uppercase_hex() {
417 Hide_errors = true;
418 run(
419 "== code 0x1\n"
420 "b9 0xAb/imm32\n"
421 );
422 CHECK_TRACE_CONTENTS(
423 "error: uppercase hex not allowed: 0xAb\n"
424 );
425 }
426
427
428
429 bool all_hex_bytes(const line& inst) {
430 for (int i = 0; i < SIZE(inst.words); ++i)
431 if (!is_hex_byte(inst.words.at(i)))
432 return false;
433 return true;
434 }
435
436 bool is_hex_byte(const word& curr) {
437 if (contains_any_operand_metadata(curr))
438 return false;
439 if (SIZE(curr.data) != 2)
440 return false;
441 if (curr.data.find_first_not_of("0123456789abcdef") != string::npos)
442 return false;
443 return true;
444 }
445
446 bool contains_any_operand_metadata(const word& word) {
447 for (int i = 0; i < SIZE(word.metadata); ++i)
448 if (Instruction_operands.find(word.metadata.at(i)) != Instruction_operands.end())
449 return true;
450 return false;
451 }
452
453 bool has_operand_metadata(const line& inst, const string& m) {
454 bool result = false;
455 for (int i = 0; i < SIZE(inst.words); ++i) {
456 if (!has_operand_metadata(inst.words.at(i), m)) continue;
457 if (result) {
458 raise << "'" << to_string(inst) << "' has conflicting " << m << " operands\n" << end();
459 return false;
460 }
461 result = true;
462 }
463 return result;
464 }
465
466 bool has_operand_metadata(const word& w, const string& m) {
467 bool result = false;
468 bool metadata_found = false;
469 for (int i = 0; i < SIZE(w.metadata); ++i) {
470 const string& curr = w.metadata.at(i);
471 if (Instruction_operands.find(curr) == Instruction_operands.end()) continue;
472 if (metadata_found) {
473 raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
474 return false;
475 }
476 metadata_found = true;
477 result = (curr == m);
478 }
479 return result;
480 }
481
482 word metadata(const line& inst, const string& m) {
483 for (int i = 0; i < SIZE(inst.words); ++i)
484 if (has_operand_metadata(inst.words.at(i), m))
485 return inst.words.at(i);
486 assert(false);
487 }
488
489 bool looks_like_hex_int(const string& s) {
490 if (s.empty()) return false;
491 if (s.at(0) == '-' || s.at(0) == '+') return true;
492 if (isdigit(s.at(0))) return true;
493
494 return false;
495 }
496
497 string to_string(const line& inst) {
498 ostringstream out;
499 for (int i = 0; i < SIZE(inst.words); ++i) {
500 if (i > 0) out << ' ';
501 out << inst.words.at(i).original;
502 }
503 return out.str();
504 }
505
506 int32_t parse_int(const string& s) {
507 if (s.empty()) return 0;
508 if (contains_uppercase(s)) {
509 raise << "uppercase hex not allowed: " << s << '\n' << end();
510 return 0;
511 }
512 istringstream in(s);
513 in >> std::hex;
514 if (s.at(0) == '-') {
515 int32_t result = 0;
516 in >> result;
517 if (!in || !in.eof()) {
518 raise << "not a number: " << s << '\n' << end();
519 return 0;
520 }
521 return result;
522 }
523 uint32_t uresult = 0;
524 in >> uresult;
525 if (!in || !in.eof()) {
526 raise << "not a number: " << s << '\n' << end();
527 return 0;
528 }
529 return static_cast<int32_t>(uresult);
530 }
531 :(before "End Unit Tests")
532 void test_parse_int() {
533 CHECK_EQ(0, parse_int("0"));
534 CHECK_EQ(0, parse_int("0x0"));
535 CHECK_EQ(0, parse_int("0x0"));
536 CHECK_EQ(16, parse_int("10"));
537 CHECK_EQ(-1, parse_int("-1"));
538 CHECK_EQ(-1, parse_int("0xffffffff"));
539 }