https://github.com/akkartik/mu/blob/main/linux/bootstrap/032operands.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14 :(before "End Help Texts")
15 put_new(Help, "instructions",
16 "Each x86 instruction consists of an instruction or opcode and some number\n"
17 "of arguments.\n"
18 "Each argument has a type. An instruction won't have more than one argument of\n"
19 "any type.\n"
20 "Each instruction has some set of allowed argument types. It'll reject others.\n"
21 "The complete list of argument types: mod, subop, r32 (integer register),\n"
22 "rm32 (integer register or memory), x32 (floating point register),\n"
23 "xm32 (floating point register or memory), scale, index, base, disp8, disp16,\n"
24 "disp32,imm8,imm32.\n"
25 "Each of these has its own help page. Try reading 'bootstrap help mod' next.\n"
26 );
27 :(before "End Help Contents")
28 cerr << " instructions\n";
29
30 :(before "Running Test Program")
31 transform(p);
32 if (trace_contains_errors()) return;
33
34 :(code)
35 void test_pack_immediate_constants() {
36 run(
37 "== code 0x1\n"
38 "bb 0x2a/imm32\n"
39 );
40 CHECK_TRACE_CONTENTS(
41 "transform: packing instruction 'bb 0x2a/imm32'\n"
42 "transform: instruction after packing: 'bb 2a 00 00 00'\n"
43 "run: copy imm32 0x0000002a to EBX\n"
44 );
45 }
46
47
48
49 :(before "End Globals")
50 set<string> Instruction_arguments;
51 :(before "End One-time Setup")
52 Instruction_arguments.insert("subop");
53 Instruction_arguments.insert("mod");
54 Instruction_arguments.insert("rm32");
55 Instruction_arguments.insert("xm32");
56 Instruction_arguments.insert("base");
57 Instruction_arguments.insert("index");
58 Instruction_arguments.insert("scale");
59 Instruction_arguments.insert("r32");
60 Instruction_arguments.insert("x32");
61 Instruction_arguments.insert("disp8");
62 Instruction_arguments.insert("disp16");
63 Instruction_arguments.insert("disp32");
64 Instruction_arguments.insert("imm8");
65 Instruction_arguments.insert("imm32");
66
67 :(before "End Help Texts")
68 init_argument_type_help();
69 :(code)
70 void init_argument_type_help() {
71 put(Help, "mod",
72 "2-bit argument controlling the _addressing mode_ of many instructions,\n"
73 "to determine how to compute the _effective address_ to look up memory at\n"
74 "based on the 'rm32' argument and potentially others.\n"
75 "\n"
76 "If mod = 3, just operate on the contents of the register specified by rm32\n"
77 " (direct mode)\n"
78 "If mod = 2, effective address is usually* rm32 + disp32\n"
79 " (indirect mode with displacement)\n"
80 "If mod = 1, effective address is usually* rm32 + disp8\n"
81 " (indirect mode with displacement)\n"
82 "If mod = 0, effective address is usually* rm32\n"
83 " (indirect mode)\n"
84 "\n"
85 "* - The exception is when rm32 is '4'. Register 4 is the stack pointer (ESP).\n"
86 " Using it as an address gets more involved. For more details,\n"
87 " try reading the help pages for 'base', 'index' and 'scale'.\n"
88 "\n"
89 "For complete details, spend some time with two tables in the IA-32 software\n"
90 "developer's manual that are also included in this repo:\n"
91 " - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n"
92 " - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
93 );
94 put(Help, "subop",
95 "Additional 3-bit argument for determining the instruction when the opcode\n"
96 "is 81, 8f, d3, f7 or ff.\n"
97 "Can't coexist with argument of type 'r32' in a single instruction, because\n"
98 "the two use the same bits.\n"
99 );
100 put(Help, "r32",
101 "3-bit argument specifying an integer register argument used directly,\n"
102 "without any further addressing modes.\n"
103 );
104 put(Help, "x32",
105 "3-bit argument specifying a floating-point register argument used directly,\n"
106 "without any further addressing modes.\n"
107 );
108 put(Help, "rm32",
109 "32-bit value in an integer register or memory. The precise details of its\n"
110 "construction depend on the eponymous 3-bit 'rm32' argument, the 'mod' argument,\n"
111 "and also potentially the 'SIB' arguments ('scale', 'index' and 'base')\n"
112 "and a displacement ('disp8' or 'disp32').\n"
113 "\n"
114 "For complete details, spend some time with two tables in the IA-32 software\n"
115 "developer's manual that are also included in this repo:\n"
116 " - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n"
117 " - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
118 );
119 put(Help, "xm32",
120 "32-bit value in a floating-point register or memory. The precise details of its\n"
121 "construction depend on the eponymous 3-bit 'xm32' argument, the 'mod' argument,\n"
122 "and also potentially the 'SIB' arguments ('scale', 'index' and 'base')\n"
123 "and a displacement ('disp8' or 'disp32').\n"
124 "\n"
125 "For complete details, spend some time with two tables in the IA-32 software\n"
126 "developer's manual that are also included in this repo:\n"
127 " - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n"
128 " - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
129 "\n"
130 "One subtlety here: while /xm32 refers to floating-point registers in direct mode\n"
131 "(when /mod is 3), other addressing modes to construct memory addresses use integer registers\n"
132 "(just like /rm32). Other than direct mode, its behavior is identical to /rm32.\n"
133 );
134 put(Help, "base",
135 "Additional 3-bit argument (when 'rm32' is 4, unless 'mod' is 3) specifying the\n"
136 "register containing an address to look up.\n"
137 "This address may be further modified by 'index' and 'scale' arguments.\n"
138 " effective address = base + index*scale + displacement (disp8 or disp32)\n"
139 "For complete details, spend some time with the IA-32 software developer's manual,\n"
140 "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
141 "It is included in this repository as 'sib.pdf'.\n"
142 );
143 put(Help, "index",
144 "Optional 3-bit argument (when 'rm32' is 4 unless 'mod' is 3) that can be added to\n"
145 "the 'base' argument to compute the 'effective address' at which to look up memory.\n"
146 " effective address = base + index*scale + displacement (disp8 or disp32)\n"
147 "For complete details, spend some time with the IA-32 software developer's manual,\n"
148 "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
149 "It is included in this repository as 'sib.pdf'.\n"
150 );
151 put(Help, "scale",
152 "Optional 2-bit argument (when 'rm32' is 4 unless 'mod' is 3) that encodes a\n"
153 "power of 2 to be multiplied to the 'index' argument before adding the result to\n"
154 "the 'base' argument to compute the _effective address_ to operate on.\n"
155 " effective address = base + index * scale + displacement (disp8 or disp32)\n"
156 "\n"
157 "When scale is 0, use index unmodified.\n"
158 "When scale is 1, multiply index by 2.\n"
159 "When scale is 2, multiply index by 4.\n"
160 "When scale is 3, multiply index by 8.\n"
161 "\n"
162 "For complete details, spend some time with the IA-32 software developer's manual,\n"
163 "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
164 "It is included in this repository as 'sib.pdf'.\n"
165 );
166 put(Help, "disp8",
167 "8-bit value to be added in many instructions.\n"
168 );
169 put(Help, "disp16",
170 "16-bit value to be added in many instructions.\n"
171 "Currently not used in any SubX instructions.\n"
172 );
173 put(Help, "disp32",
174 "32-bit value to be added in many instructions.\n"
175 );
176 put(Help, "imm8",
177 "8-bit value for many instructions.\n"
178 );
179 put(Help, "imm32",
180 "32-bit value for many instructions.\n"
181 );
182 }
183
184
185
186 :(after "Begin Transforms")
187 Transform.push_back(pack_arguments);
188
189 :(code)
190 void pack_arguments(program& p) {
191 if (p.segments.empty()) return;
192 segment& code = *find(p, "code");
193
194 trace(3, "transform") << "-- pack arguments" << end();
195 for (int i = 0; i < SIZE(code.lines); ++i) {
196 line& inst = code.lines.at(i);
197 if (all_hex_bytes(inst)) continue;
198 trace(99, "transform") << "packing instruction '" << to_string(inst) << "'" << end();
199 pack_arguments(inst);
200 trace(99, "transform") << "instruction after packing: '" << to_string(inst.words) << "'" << end();
201 }
202 }
203
204 void pack_arguments(line& inst) {
205 line new_inst;
206 add_opcodes(inst, new_inst);
207 add_modrm_byte(inst, new_inst);
208 add_sib_byte(inst, new_inst);
209 add_disp_bytes(inst, new_inst);
210 add_imm_bytes(inst, new_inst);
211 inst.words.swap(new_inst.words);
212 }
213
214 void add_opcodes(const line& in, line& out) {
215 out.words.push_back(in.words.at(0));
216 if (in.words.at(0).data == "0f" || in.words.at(0).data == "f2" || in.words.at(0).data == "f3")
217 out.words.push_back(in.words.at(1));
218 if (in.words.at(0).data == "f3" && in.words.at(1).data == "0f")
219 out.words.push_back(in.words.at(2));
220 if (in.words.at(0).data == "f2" && in.words.at(1).data == "0f")
221 out.words.push_back(in.words.at(2));
222 }
223
224 void add_modrm_byte(const line& in, line& out) {
225 uint8_t mod=0, reg_subop=0, rm32=0;
226 bool emit = false;
227 for (int i = 0; i < SIZE(in.words); ++i) {
228 const word& curr = in.words.at(i);
229 if (has_argument_metadata(curr, "mod")) {
230 mod = hex_byte(curr.data);
231 emit = true;
232 }
233 else if (has_argument_metadata(curr, "rm32")) {
234 rm32 = hex_byte(curr.data);
235 emit = true;
236 }
237 else if (has_argument_metadata(curr, "r32")) {
238 reg_subop = hex_byte(curr.data);
239 emit = true;
240 }
241 else if (has_argument_metadata(curr, "xm32")) {
242 rm32 = hex_byte(curr.data);
243 emit = true;
244 }
245 else if (has_argument_metadata(curr, "x32")) {
246 reg_subop = hex_byte(curr.data);
247 emit = true;
248 }
249 else if (has_argument_metadata(curr, "subop")) {
250 reg_subop = hex_byte(curr.data);
251 emit = true;
252 }
253 }
254 if (emit)
255 out.words.push_back(hex_byte_text((mod << 6) | (reg_subop << 3) | rm32));
256 }
257
258 void add_sib_byte(const line& in, line& out) {
259 uint8_t scale=0, index=0, base=0;
260 bool emit = false;
261 for (int i = 0; i < SIZE(in.words); ++i) {
262 const word& curr = in.words.at(i);
263 if (has_argument_metadata(curr, "scale")) {
264 scale = hex_byte(curr.data);
265 emit = true;
266 }
267 else if (has_argument_metadata(curr, "index")) {
268 index = hex_byte(curr.data);
269 emit = true;
270 }
271 else if (has_argument_metadata(curr, "base")) {
272 base = hex_byte(curr.data);
273 emit = true;
274 }
275 }
276 if (emit)
277 out.words.push_back(hex_byte_text((scale << 6) | (index << 3) | base));
278 }
279
280 void add_disp_bytes(const line& in, line& out) {
281 for (int i = 0; i < SIZE(in.words); ++i) {
282 const word& curr = in.words.at(i);
283 if (has_argument_metadata(curr, "disp8"))
284 emit_hex_bytes(out, curr, 1);
285 if (has_argument_metadata(curr, "disp16"))
286 emit_hex_bytes(out, curr, 2);
287 else if (has_argument_metadata(curr, "disp32"))
288 emit_hex_bytes(out, curr, 4);
289 }
290 }
291
292 void add_imm_bytes(const line& in, line& out) {
293 for (int i = 0; i < SIZE(in.words); ++i) {
294 const word& curr = in.words.at(i);
295 if (has_argument_metadata(curr, "imm8"))
296 emit_hex_bytes(out, curr, 1);
297 else if (has_argument_metadata(curr, "imm32"))
298 emit_hex_bytes(out, curr, 4);
299 }
300 }
301
302 void emit_hex_bytes(line& out, const word& w, int num) {
303 assert(num <= 4);
304 bool is_number = looks_like_hex_int(w.data);
305 if (num == 1 || !is_number) {
306 out.words.push_back(w);
307 if (is_number)
308 out.words.back().data = hex_byte_to_string(parse_int(w.data));
309 return;
310 }
311 emit_hex_bytes(out, static_cast<uint32_t>(parse_int(w.data)), num);
312 }
313
314 void emit_hex_bytes(line& out, uint32_t val, int num) {
315 assert(num <= 4);
316 for (int i = 0; i < num; ++i) {
317 out.words.push_back(hex_byte_text(val & 0xff));
318 val = val >> 8;
319 }
320 }
321
322 word hex_byte_text(uint8_t val) {
323 word result;
324 result.data = hex_byte_to_string(val);
325 result.original = result.data+"/auto";
326 return result;
327 }
328
329 string hex_byte_to_string(uint8_t val) {
330 ostringstream out;
331
332 out << HEXBYTE << NUM(val);
333 return out.str();
334 }
335
336 string to_string(const vector<word>& in) {
337 ostringstream out;
338 for (int i = 0; i < SIZE(in); ++i) {
339 if (i > 0) out << ' ';
340 out << in.at(i).data;
341 }
342 return out.str();
343 }
344
345 :(before "End Unit Tests")
346 void test_preserve_metadata_when_emitting_single_byte() {
347 word in;
348 in.data = "f0";
349 in.original = "f0/foo";
350 line out;
351 emit_hex_bytes(out, in, 1);
352 CHECK_EQ(out.words.at(0).data, "f0");
353 CHECK_EQ(out.words.at(0).original, "f0/foo");
354 }
355
356 :(code)
357 void test_pack_disp8() {
358 run(
359 "== code 0x1\n"
360 "74 2/disp8\n"
361 );
362 CHECK_TRACE_CONTENTS(
363 "transform: packing instruction '74 2/disp8'\n"
364 "transform: instruction after packing: '74 02'\n"
365 );
366 }
367
368 void test_pack_disp8_negative() {
369 transform(
370 "== code 0x1\n"
371
372 "74 -1/disp8\n"
373 );
374 CHECK_TRACE_CONTENTS(
375 "transform: packing instruction '74 -1/disp8'\n"
376 "transform: instruction after packing: '74 ff'\n"
377 );
378 }
379
380 void test_pack_rm32_direct() {
381 run(
382 "== code 0x1\n"
383
384
385
386 " 01 3/mod/direct 3/rm32/ebx 0/r32/eax \n"
387 );
388 CHECK_TRACE_CONTENTS(
389 "transform: packing instruction '01 3/mod/direct 3/rm32/ebx 0/r32/eax'\n"
390 "transform: instruction after packing: '01 c3'\n"
391 );
392 }
393
394 void test_pack_rm32_indirect() {
395 transform(
396 "== code 0x1\n"
397
398
399
400 " 01 0/mod/indirect 3/rm32/ebx 0/r32/eax \n"
401 );
402 CHECK_TRACE_CONTENTS(
403 "transform: packing instruction '01 0/mod/indirect 3/rm32/ebx 0/r32/eax'\n"
404 "transform: instruction after packing: '01 03'\n"
405 );
406 }
407
408 void test_pack_x32() {
409 run(
410 "== code 0x1\n"
411
412
413
414 " f3 0f 2a 3/mod/direct 3/rm32/ebx 1/x32 \n"
415 );
416 CHECK_TRACE_CONTENTS(
417 "transform: packing instruction 'f3 0f 2a 3/mod/direct 3/rm32/ebx 1/x32'\n"
418 "transform: instruction after packing: 'f3 0f 2a cb'\n"
419 );
420 }
421
422 void test_pack_xm32_direct() {
423 transform(
424 "== code 0x1\n"
425
426
427
428 " f3 0f 5e 3/mod/direct 3/xm32 1/x32 \n"
429 );
430 CHECK_TRACE_CONTENTS(
431 "transform: packing instruction 'f3 0f 5e 3/mod/direct 3/xm32 1/x32'\n"
432 "transform: instruction after packing: 'f3 0f 5e cb'\n"
433 );
434 }
435
436 void test_pack_xm32_indirect() {
437 transform(
438 "== code 0x1\n"
439
440
441
442 " f3 0f 5e 0/mod/indirect 3/rm32/ebx 1/x32 \n"
443 );
444 CHECK_TRACE_CONTENTS(
445 "transform: packing instruction 'f3 0f 5e 0/mod/indirect 3/rm32/ebx 1/x32'\n"
446 "transform: instruction after packing: 'f3 0f 5e 0b'\n"
447 );
448 }
449
450
451 void transform(const string& text_bytes) {
452 program p;
453 istringstream in(text_bytes);
454 parse(in, p);
455 if (trace_contains_errors()) return;
456 transform(p);
457 }
458
459 void test_pack_modrm_imm32() {
460 run(
461 "== code 0x1\n"
462
463
464
465 " 81 0/add/subop 3/mod/direct 3/rm32/ebx 1/imm32 \n"
466 );
467 CHECK_TRACE_CONTENTS(
468 "transform: packing instruction '81 0/add/subop 3/mod/direct 3/rm32/ebx 1/imm32'\n"
469 "transform: instruction after packing: '81 c3 01 00 00 00'\n"
470 );
471 }
472
473 void test_pack_imm32_large() {
474 run(
475 "== code 0x1\n"
476 "b9 0x080490a7/imm32\n"
477 );
478 CHECK_TRACE_CONTENTS(
479 "transform: packing instruction 'b9 0x080490a7/imm32'\n"
480 "transform: instruction after packing: 'b9 a7 90 04 08'\n"
481 );
482 }
483
484 void test_pack_immediate_constants_hex() {
485 run(
486 "== code 0x1\n"
487 "b9 0x2a/imm32\n"
488 );
489 CHECK_TRACE_CONTENTS(
490 "transform: packing instruction 'b9 0x2a/imm32'\n"
491 "transform: instruction after packing: 'b9 2a 00 00 00'\n"
492 "run: copy imm32 0x0000002a to ECX\n"
493 );
494 }
495
496 void test_pack_silently_ignores_non_hex() {
497 Hide_errors = true;
498 transform(
499 "== code 0x1\n"
500 "b9 foo/imm32\n"
501 );
502 CHECK_TRACE_CONTENTS(
503 "transform: packing instruction 'b9 foo/imm32'\n"
504
505 "transform: instruction after packing: 'b9 foo'\n"
506 );
507 }
508
509 void test_pack_flags_bad_hex() {
510 Hide_errors = true;
511 run(
512 "== code 0x1\n"
513 "b9 0xfoo/imm32\n"
514 );
515 CHECK_TRACE_CONTENTS(
516 "error: not a number: 0xfoo\n"
517 );
518 }
519
520 void test_pack_flags_uppercase_hex() {
521 Hide_errors = true;
522 run(
523 "== code 0x1\n"
524 "b9 0xAb/imm32\n"
525 );
526 CHECK_TRACE_CONTENTS(
527 "error: uppercase hex not allowed: 0xAb\n"
528 );
529 }
530
531
532
533 bool all_hex_bytes(const line& inst) {
534 for (int i = 0; i < SIZE(inst.words); ++i)
535 if (!is_hex_byte(inst.words.at(i)))
536 return false;
537 return true;
538 }
539
540 bool is_hex_byte(const word& curr) {
541 if (contains_any_argument_metadata(curr))
542 return false;
543 if (SIZE(curr.data) != 2)
544 return false;
545 if (curr.data.find_first_not_of("0123456789abcdef") != string::npos)
546 return false;
547 return true;
548 }
549
550 bool contains_any_argument_metadata(const word& word) {
551 for (int i = 0; i < SIZE(word.metadata); ++i)
552 if (Instruction_arguments.find(word.metadata.at(i)) != Instruction_arguments.end())
553 return true;
554 return false;
555 }
556
557 bool has_argument_metadata(const line& inst, const string& m) {
558 bool result = false;
559 for (int i = 0; i < SIZE(inst.words); ++i) {
560 if (!has_argument_metadata(inst.words.at(i), m)) continue;
561 if (result) {
562 raise << "'" << to_string(inst) << "' has conflicting " << m << " arguments\n" << end();
563 return false;
564 }
565 result = true;
566 }
567 return result;
568 }
569
570 bool has_argument_metadata(const word& w, const string& m) {
571 bool result = false;
572 bool metadata_found = false;
573 for (int i = 0; i < SIZE(w.metadata); ++i) {
574 const string& curr = w.metadata.at(i);
575 if (Instruction_arguments.find(curr) == Instruction_arguments.end()) continue;
576 if (metadata_found) {
577 raise << "'" << w.original << "' has conflicting argument types; it should have only one\n" << end();
578 return false;
579 }
580 metadata_found = true;
581 result = (curr == m);
582 }
583 return result;
584 }
585
586 word metadata(const line& inst, const string& m) {
587 for (int i = 0; i < SIZE(inst.words); ++i)
588 if (has_argument_metadata(inst.words.at(i), m))
589 return inst.words.at(i);
590 assert(false);
591 }
592
593 bool looks_like_hex_int(const string& s) {
594 if (s.empty()) return false;
595 if (s.at(0) == '-' || s.at(0) == '+') return true;
596 if (isdigit(s.at(0))) return true;
597
598 return false;
599 }
600
601 string to_string(const line& inst) {
602 ostringstream out;
603 for (int i = 0; i < SIZE(inst.words); ++i) {
604 if (i > 0) out << ' ';
605 out << inst.words.at(i).original;
606 }
607 return out.str();
608 }
609
610 int32_t parse_int(const string& s) {
611 if (s.empty()) return 0;
612 if (contains_uppercase(s)) {
613 raise << "uppercase hex not allowed: " << s << '\n' << end();
614 return 0;
615 }
616 istringstream in(s);
617 in >> std::hex;
618 if (s.at(0) == '-') {
619 int32_t result = 0;
620 in >> result;
621 if (!in || !in.eof()) {
622 raise << "not a number: " << s << '\n' << end();
623 return 0;
624 }
625 return result;
626 }
627 uint32_t uresult = 0;
628 in >> uresult;
629 if (!in || !in.eof()) {
630 raise << "not a number: " << s << '\n' << end();
631 return 0;
632 }
633 return static_cast<int32_t>(uresult);
634 }
635 :(before "End Unit Tests")
636 void test_parse_int() {
637 CHECK_EQ(0, parse_int("0"));
638 CHECK_EQ(0, parse_int("0x0"));
639 CHECK_EQ(0, parse_int("0x0"));
640 CHECK_EQ(16, parse_int("10"));
641 CHECK_EQ(-1, parse_int("-1"));
642 CHECK_EQ(-1, parse_int("0xffffffff"));
643 }