https://github.com/akkartik/mu/blob/main/linux/bootstrap/032operands.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14 :(before "End Help Texts")
15 put_new(Help, "instructions",
16 "Each x86 instruction consists of an instruction or opcode and some number\n"
17 "of arguments.\n"
18 "Each argument has a type. An instruction won't have more than one argument of\n"
19 "any type.\n"
20 "Each instruction has some set of allowed argument types. It'll reject others.\n"
21 "The complete list of argument types: mod, subop, r32 (integer register),\n"
22 "rm32 (integer register or memory), x32 (floating point register),\n"
23 "xm32 (floating point register or memory), scale, index, base, disp8, disp16,\n"
24 "disp32,imm8,imm32.\n"
25 "Each of these has its own help page. Try reading 'bootstrap help mod' next.\n"
26 );
27 :(before "End Help Contents")
28 cerr << " instructions\n";
29
30 :(before "Running Test Program")
31 transform(p);
32 if (trace_contains_errors()) return;
33
34 :(code)
35 void test_pack_immediate_constants() {
36 run(
37 "== code 0x1\n"
38 "bb 0x2a/imm32\n"
39 );
40 CHECK_TRACE_CONTENTS(
41 "transform: packing instruction 'bb 0x2a/imm32'\n"
42 "transform: instruction after packing: 'bb 2a 00 00 00'\n"
43 "run: copy imm32 0x0000002a to EBX\n"
44 );
45 }
46
47
48
49 :(before "End Globals")
50 set<string> Instruction_arguments;
51 :(before "End One-time Setup")
52 Instruction_arguments.insert("subop");
53 Instruction_arguments.insert("mod");
54 Instruction_arguments.insert("rm32");
55 Instruction_arguments.insert("xm32");
56 Instruction_arguments.insert("base");
57 Instruction_arguments.insert("index");
58 Instruction_arguments.insert("scale");
59 Instruction_arguments.insert("r32");
60 Instruction_arguments.insert("x32");
61 Instruction_arguments.insert("disp8");
62 Instruction_arguments.insert("disp16");
63 Instruction_arguments.insert("disp32");
64 Instruction_arguments.insert("imm8");
65 Instruction_arguments.insert("imm32");
66
67 :(before "End Help Texts")
68 init_argument_type_help();
69 :(code)
70 void init_argument_type_help() {
71 put(Help, "mod",
72 "2-bit argument controlling the _addressing mode_ of many instructions,\n"
73 "to determine how to compute the _effective address_ to look up memory at\n"
74 "based on the 'rm32' argument and potentially others.\n"
75 "\n"
76 "If mod = 3, just operate on the contents of the register specified by rm32\n"
77 " (direct mode).\n"
78 "If mod = 2, effective address is usually* rm32 + disp32\n"
79 " (indirect mode with displacement).\n"
80 "If mod = 1, effective address is usually* rm32 + disp8\n"
81 " (indirect mode with displacement).\n"
82 "If mod = 0, effective address is usually* rm32 (indirect mode).\n"
83 "(* - The exception is when rm32 is '4'. Register 4 is the stack pointer (ESP).\n"
84 " Using it as an address gets more involved. For more details,\n"
85 " try reading the help pages for 'base', 'index' and 'scale'.)\n"
86 "\n"
87 "For complete details, spend some time with two tables in the IA-32 software\n"
88 "developer's manual that are also included in this repo:\n"
89 " - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n"
90 " - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
91 );
92 put(Help, "subop",
93 "Additional 3-bit argument for determining the instruction when the opcode\n"
94 "is 81, 8f, d3, f7 or ff.\n"
95 "Can't coexist with argument of type 'r32' in a single instruction, because\n"
96 "the two use the same bits.\n"
97 );
98 put(Help, "r32",
99 "3-bit argument specifying an integer register argument used directly,\n"
100 "without any further addressing modes.\n"
101 );
102 put(Help, "x32",
103 "3-bit argument specifying a floating-point register argument used directly,\n"
104 "without any further addressing modes.\n"
105 );
106 put(Help, "rm32",
107 "32-bit value in an integer register or memory. The precise details of its\n"
108 "construction depend on the eponymous 3-bit 'rm32' argument, the 'mod' argument,\n"
109 "and also potentially the 'SIB' arguments ('scale', 'index' and 'base')\n"
110 "and a displacement ('disp8' or 'disp32').\n"
111 "\n"
112 "For complete details, spend some time with two tables in the IA-32 software\n"
113 "developer's manual that are also included in this repo:\n"
114 " - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n"
115 " - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
116 );
117 put(Help, "xm32",
118 "32-bit value in a floating-point register or memory. The precise details of its\n"
119 "construction depend on the eponymous 3-bit 'xm32' argument, the 'mod' argument,\n"
120 "and also potentially the 'SIB' arguments ('scale', 'index' and 'base')\n"
121 "and a displacement ('disp8' or 'disp32').\n"
122 "\n"
123 "For complete details, spend some time with two tables in the IA-32 software\n"
124 "developer's manual that are also included in this repo:\n"
125 " - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n"
126 " - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
127 "\n"
128 "One subtlety here: while /xm32 refers to floating-point registers in direct mode\n"
129 "(when /mod is 3), other addressing modes to construct memory addresses use integer registers\n"
130 "(just like /rm32). Other than direct mode, its behavior is identical to /rm32.\n"
131 );
132 put(Help, "base",
133 "Additional 3-bit argument (when 'rm32' is 4, unless 'mod' is 3) specifying the\n"
134 "register containing an address to look up.\n"
135 "This address may be further modified by 'index' and 'scale' arguments.\n"
136 " effective address = base + index*scale + displacement (disp8 or disp32)\n"
137 "For complete details, spend some time with the IA-32 software developer's manual,\n"
138 "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
139 "It is included in this repository as 'sib.pdf'.\n"
140 );
141 put(Help, "index",
142 "Optional 3-bit argument (when 'rm32' is 4 unless 'mod' is 3) that can be added to\n"
143 "the 'base' argument to compute the 'effective address' at which to look up memory.\n"
144 " effective address = base + index*scale + displacement (disp8 or disp32)\n"
145 "For complete details, spend some time with the IA-32 software developer's manual,\n"
146 "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
147 "It is included in this repository as 'sib.pdf'.\n"
148 );
149 put(Help, "scale",
150 "Optional 2-bit argument (when 'rm32' is 4 unless 'mod' is 3) that encodes a\n"
151 "power of 2 to be multiplied to the 'index' argument before adding the result to\n"
152 "the 'base' argument to compute the _effective address_ to operate on.\n"
153 " effective address = base + index * scale + displacement (disp8 or disp32)\n"
154 "\n"
155 "When scale is 0, use index unmodified.\n"
156 "When scale is 1, multiply index by 2.\n"
157 "When scale is 2, multiply index by 4.\n"
158 "When scale is 3, multiply index by 8.\n"
159 "\n"
160 "For complete details, spend some time with the IA-32 software developer's manual,\n"
161 "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
162 "It is included in this repository as 'sib.pdf'.\n"
163 );
164 put(Help, "disp8",
165 "8-bit value to be added in many instructions.\n"
166 );
167 put(Help, "disp16",
168 "16-bit value to be added in many instructions.\n"
169 "Currently not used in any SubX instructions.\n"
170 );
171 put(Help, "disp32",
172 "32-bit value to be added in many instructions.\n"
173 );
174 put(Help, "imm8",
175 "8-bit value for many instructions.\n"
176 );
177 put(Help, "imm32",
178 "32-bit value for many instructions.\n"
179 );
180 }
181
182
183
184 :(after "Begin Transforms")
185 Transform.push_back(pack_arguments);
186
187 :(code)
188 void pack_arguments(program& p) {
189 if (p.segments.empty()) return;
190 segment& code = *find(p, "code");
191
192 trace(3, "transform") << "-- pack arguments" << end();
193 for (int i = 0; i < SIZE(code.lines); ++i) {
194 line& inst = code.lines.at(i);
195 if (all_hex_bytes(inst)) continue;
196 trace(99, "transform") << "packing instruction '" << to_string(inst) << "'" << end();
197 pack_arguments(inst);
198 trace(99, "transform") << "instruction after packing: '" << to_string(inst.words) << "'" << end();
199 }
200 }
201
202 void pack_arguments(line& inst) {
203 line new_inst;
204 add_opcodes(inst, new_inst);
205 add_modrm_byte(inst, new_inst);
206 add_sib_byte(inst, new_inst);
207 add_disp_bytes(inst, new_inst);
208 add_imm_bytes(inst, new_inst);
209 inst.words.swap(new_inst.words);
210 }
211
212 void add_opcodes(const line& in, line& out) {
213 out.words.push_back(in.words.at(0));
214 if (in.words.at(0).data == "0f" || in.words.at(0).data == "f2" || in.words.at(0).data == "f3")
215 out.words.push_back(in.words.at(1));
216 if (in.words.at(0).data == "f3" && in.words.at(1).data == "0f")
217 out.words.push_back(in.words.at(2));
218 if (in.words.at(0).data == "f2" && in.words.at(1).data == "0f")
219 out.words.push_back(in.words.at(2));
220 }
221
222 void add_modrm_byte(const line& in, line& out) {
223 uint8_t mod=0, reg_subop=0, rm32=0;
224 bool emit = false;
225 for (int i = 0; i < SIZE(in.words); ++i) {
226 const word& curr = in.words.at(i);
227 if (has_argument_metadata(curr, "mod")) {
228 mod = hex_byte(curr.data);
229 emit = true;
230 }
231 else if (has_argument_metadata(curr, "rm32")) {
232 rm32 = hex_byte(curr.data);
233 emit = true;
234 }
235 else if (has_argument_metadata(curr, "r32")) {
236 reg_subop = hex_byte(curr.data);
237 emit = true;
238 }
239 else if (has_argument_metadata(curr, "xm32")) {
240 rm32 = hex_byte(curr.data);
241 emit = true;
242 }
243 else if (has_argument_metadata(curr, "x32")) {
244 reg_subop = hex_byte(curr.data);
245 emit = true;
246 }
247 else if (has_argument_metadata(curr, "subop")) {
248 reg_subop = hex_byte(curr.data);
249 emit = true;
250 }
251 }
252 if (emit)
253 out.words.push_back(hex_byte_text((mod << 6) | (reg_subop << 3) | rm32));
254 }
255
256 void add_sib_byte(const line& in, line& out) {
257 uint8_t scale=0, index=0, base=0;
258 bool emit = false;
259 for (int i = 0; i < SIZE(in.words); ++i) {
260 const word& curr = in.words.at(i);
261 if (has_argument_metadata(curr, "scale")) {
262 scale = hex_byte(curr.data);
263 emit = true;
264 }
265 else if (has_argument_metadata(curr, "index")) {
266 index = hex_byte(curr.data);
267 emit = true;
268 }
269 else if (has_argument_metadata(curr, "base")) {
270 base = hex_byte(curr.data);
271 emit = true;
272 }
273 }
274 if (emit)
275 out.words.push_back(hex_byte_text((scale << 6) | (index << 3) | base));
276 }
277
278 void add_disp_bytes(const line& in, line& out) {
279 for (int i = 0; i < SIZE(in.words); ++i) {
280 const word& curr = in.words.at(i);
281 if (has_argument_metadata(curr, "disp8"))
282 emit_hex_bytes(out, curr, 1);
283 if (has_argument_metadata(curr, "disp16"))
284 emit_hex_bytes(out, curr, 2);
285 else if (has_argument_metadata(curr, "disp32"))
286 emit_hex_bytes(out, curr, 4);
287 }
288 }
289
290 void add_imm_bytes(const line& in, line& out) {
291 for (int i = 0; i < SIZE(in.words); ++i) {
292 const word& curr = in.words.at(i);
293 if (has_argument_metadata(curr, "imm8"))
294 emit_hex_bytes(out, curr, 1);
295 else if (has_argument_metadata(curr, "imm32"))
296 emit_hex_bytes(out, curr, 4);
297 }
298 }
299
300 void emit_hex_bytes(line& out, const word& w, int num) {
301 assert(num <= 4);
302 bool is_number = looks_like_hex_int(w.data);
303 if (num == 1 || !is_number) {
304 out.words.push_back(w);
305 if (is_number)
306 out.words.back().data = hex_byte_to_string(parse_int(w.data));
307 return;
308 }
309 emit_hex_bytes(out, static_cast<uint32_t>(parse_int(w.data)), num);
310 }
311
312 void emit_hex_bytes(line& out, uint32_t val, int num) {
313 assert(num <= 4);
314 for (int i = 0; i < num; ++i) {
315 out.words.push_back(hex_byte_text(val & 0xff));
316 val = val >> 8;
317 }
318 }
319
320 word hex_byte_text(uint8_t val) {
321 word result;
322 result.data = hex_byte_to_string(val);
323 result.original = result.data+"/auto";
324 return result;
325 }
326
327 string hex_byte_to_string(uint8_t val) {
328 ostringstream out;
329
330 out << HEXBYTE << NUM(val);
331 return out.str();
332 }
333
334 string to_string(const vector<word>& in) {
335 ostringstream out;
336 for (int i = 0; i < SIZE(in); ++i) {
337 if (i > 0) out << ' ';
338 out << in.at(i).data;
339 }
340 return out.str();
341 }
342
343 :(before "End Unit Tests")
344 void test_preserve_metadata_when_emitting_single_byte() {
345 word in;
346 in.data = "f0";
347 in.original = "f0/foo";
348 line out;
349 emit_hex_bytes(out, in, 1);
350 CHECK_EQ(out.words.at(0).data, "f0");
351 CHECK_EQ(out.words.at(0).original, "f0/foo");
352 }
353
354 :(code)
355 void test_pack_disp8() {
356 run(
357 "== code 0x1\n"
358 "74 2/disp8\n"
359 );
360 CHECK_TRACE_CONTENTS(
361 "transform: packing instruction '74 2/disp8'\n"
362 "transform: instruction after packing: '74 02'\n"
363 );
364 }
365
366 void test_pack_disp8_negative() {
367 transform(
368 "== code 0x1\n"
369
370 "74 -1/disp8\n"
371 );
372 CHECK_TRACE_CONTENTS(
373 "transform: packing instruction '74 -1/disp8'\n"
374 "transform: instruction after packing: '74 ff'\n"
375 );
376 }
377
378 void test_pack_rm32_direct() {
379 run(
380 "== code 0x1\n"
381
382
383
384 " 01 3/mod/direct 3/rm32/ebx 0/r32/eax \n"
385 );
386 CHECK_TRACE_CONTENTS(
387 "transform: packing instruction '01 3/mod/direct 3/rm32/ebx 0/r32/eax'\n"
388 "transform: instruction after packing: '01 c3'\n"
389 );
390 }
391
392 void test_pack_rm32_indirect() {
393 transform(
394 "== code 0x1\n"
395
396
397
398 " 01 0/mod/indirect 3/rm32/ebx 0/r32/eax \n"
399 );
400 CHECK_TRACE_CONTENTS(
401 "transform: packing instruction '01 0/mod/indirect 3/rm32/ebx 0/r32/eax'\n"
402 "transform: instruction after packing: '01 03'\n"
403 );
404 }
405
406 void test_pack_x32() {
407 run(
408 "== code 0x1\n"
409
410
411
412 " f3 0f 2a 3/mod/direct 3/rm32/ebx 1/x32 \n"
413 );
414 CHECK_TRACE_CONTENTS(
415 "transform: packing instruction 'f3 0f 2a 3/mod/direct 3/rm32/ebx 1/x32'\n"
416 "transform: instruction after packing: 'f3 0f 2a cb'\n"
417 );
418 }
419
420 void test_pack_xm32_direct() {
421 transform(
422 "== code 0x1\n"
423
424
425
426 " f3 0f 5e 3/mod/direct 3/xm32 1/x32 \n"
427 );
428 CHECK_TRACE_CONTENTS(
429 "transform: packing instruction 'f3 0f 5e 3/mod/direct 3/xm32 1/x32'\n"
430 "transform: instruction after packing: 'f3 0f 5e cb'\n"
431 );
432 }
433
434 void test_pack_xm32_indirect() {
435 transform(
436 "== code 0x1\n"
437
438
439
440 " f3 0f 5e 0/mod/indirect 3/rm32/ebx 1/x32 \n"
441 );
442 CHECK_TRACE_CONTENTS(
443 "transform: packing instruction 'f3 0f 5e 0/mod/indirect 3/rm32/ebx 1/x32'\n"
444 "transform: instruction after packing: 'f3 0f 5e 0b'\n"
445 );
446 }
447
448
449 void transform(const string& text_bytes) {
450 program p;
451 istringstream in(text_bytes);
452 parse(in, p);
453 if (trace_contains_errors()) return;
454 transform(p);
455 }
456
457 void test_pack_modrm_imm32() {
458 run(
459 "== code 0x1\n"
460
461
462
463 " 81 0/add/subop 3/mod/direct 3/rm32/ebx 1/imm32 \n"
464 );
465 CHECK_TRACE_CONTENTS(
466 "transform: packing instruction '81 0/add/subop 3/mod/direct 3/rm32/ebx 1/imm32'\n"
467 "transform: instruction after packing: '81 c3 01 00 00 00'\n"
468 );
469 }
470
471 void test_pack_imm32_large() {
472 run(
473 "== code 0x1\n"
474 "b9 0x080490a7/imm32\n"
475 );
476 CHECK_TRACE_CONTENTS(
477 "transform: packing instruction 'b9 0x080490a7/imm32'\n"
478 "transform: instruction after packing: 'b9 a7 90 04 08'\n"
479 );
480 }
481
482 void test_pack_immediate_constants_hex() {
483 run(
484 "== code 0x1\n"
485 "b9 0x2a/imm32\n"
486 );
487 CHECK_TRACE_CONTENTS(
488 "transform: packing instruction 'b9 0x2a/imm32'\n"
489 "transform: instruction after packing: 'b9 2a 00 00 00'\n"
490 "run: copy imm32 0x0000002a to ECX\n"
491 );
492 }
493
494 void test_pack_silently_ignores_non_hex() {
495 Hide_errors = true;
496 transform(
497 "== code 0x1\n"
498 "b9 foo/imm32\n"
499 );
500 CHECK_TRACE_CONTENTS(
501 "transform: packing instruction 'b9 foo/imm32'\n"
502
503 "transform: instruction after packing: 'b9 foo'\n"
504 );
505 }
506
507 void test_pack_flags_bad_hex() {
508 Hide_errors = true;
509 run(
510 "== code 0x1\n"
511 "b9 0xfoo/imm32\n"
512 );
513 CHECK_TRACE_CONTENTS(
514 "error: not a number: 0xfoo\n"
515 );
516 }
517
518 void test_pack_flags_uppercase_hex() {
519 Hide_errors = true;
520 run(
521 "== code 0x1\n"
522 "b9 0xAb/imm32\n"
523 );
524 CHECK_TRACE_CONTENTS(
525 "error: uppercase hex not allowed: 0xAb\n"
526 );
527 }
528
529
530
531 bool all_hex_bytes(const line& inst) {
532 for (int i = 0; i < SIZE(inst.words); ++i)
533 if (!is_hex_byte(inst.words.at(i)))
534 return false;
535 return true;
536 }
537
538 bool is_hex_byte(const word& curr) {
539 if (contains_any_argument_metadata(curr))
540 return false;
541 if (SIZE(curr.data) != 2)
542 return false;
543 if (curr.data.find_first_not_of("0123456789abcdef") != string::npos)
544 return false;
545 return true;
546 }
547
548 bool contains_any_argument_metadata(const word& word) {
549 for (int i = 0; i < SIZE(word.metadata); ++i)
550 if (Instruction_arguments.find(word.metadata.at(i)) != Instruction_arguments.end())
551 return true;
552 return false;
553 }
554
555 bool has_argument_metadata(const line& inst, const string& m) {
556 bool result = false;
557 for (int i = 0; i < SIZE(inst.words); ++i) {
558 if (!has_argument_metadata(inst.words.at(i), m)) continue;
559 if (result) {
560 raise << "'" << to_string(inst) << "' has conflicting " << m << " arguments\n" << end();
561 return false;
562 }
563 result = true;
564 }
565 return result;
566 }
567
568 bool has_argument_metadata(const word& w, const string& m) {
569 bool result = false;
570 bool metadata_found = false;
571 for (int i = 0; i < SIZE(w.metadata); ++i) {
572 const string& curr = w.metadata.at(i);
573 if (Instruction_arguments.find(curr) == Instruction_arguments.end()) continue;
574 if (metadata_found) {
575 raise << "'" << w.original << "' has conflicting argument types; it should have only one\n" << end();
576 return false;
577 }
578 metadata_found = true;
579 result = (curr == m);
580 }
581 return result;
582 }
583
584 word metadata(const line& inst, const string& m) {
585 for (int i = 0; i < SIZE(inst.words); ++i)
586 if (has_argument_metadata(inst.words.at(i), m))
587 return inst.words.at(i);
588 assert(false);
589 }
590
591 bool looks_like_hex_int(const string& s) {
592 if (s.empty()) return false;
593 if (s.at(0) == '-' || s.at(0) == '+') return true;
594 if (isdigit(s.at(0))) return true;
595
596 return false;
597 }
598
599 string to_string(const line& inst) {
600 ostringstream out;
601 for (int i = 0; i < SIZE(inst.words); ++i) {
602 if (i > 0) out << ' ';
603 out << inst.words.at(i).original;
604 }
605 return out.str();
606 }
607
608 int32_t parse_int(const string& s) {
609 if (s.empty()) return 0;
610 if (contains_uppercase(s)) {
611 raise << "uppercase hex not allowed: " << s << '\n' << end();
612 return 0;
613 }
614 istringstream in(s);
615 in >> std::hex;
616 if (s.at(0) == '-') {
617 int32_t result = 0;
618 in >> result;
619 if (!in || !in.eof()) {
620 raise << "not a number: " << s << '\n' << end();
621 return 0;
622 }
623 return result;
624 }
625 uint32_t uresult = 0;
626 in >> uresult;
627 if (!in || !in.eof()) {
628 raise << "not a number: " << s << '\n' << end();
629 return 0;
630 }
631 return static_cast<int32_t>(uresult);
632 }
633 :(before "End Unit Tests")
634 void test_parse_int() {
635 CHECK_EQ(0, parse_int("0"));
636 CHECK_EQ(0, parse_int("0x0"));
637 CHECK_EQ(0, parse_int("0x0"));
638 CHECK_EQ(16, parse_int("10"));
639 CHECK_EQ(-1, parse_int("-1"));
640 CHECK_EQ(-1, parse_int("0xffffffff"));
641 }