https://github.com/akkartik/mu/blob/master/032operands.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14 :(before "End Help Texts")
15 put_new(Help, "instructions",
16 "Each x86 instruction consists of an instruction or opcode and some number\n"
17 "of operands.\n"
18 "Each operand has a type. An instruction won't have more than one operand of\n"
19 "any type.\n"
20 "Each instruction has some set of allowed operand types. It'll reject others.\n"
21 "The complete list of operand types: mod, subop, r32 (register), rm32\n"
22 "(register or memory), scale, index, base, disp8, disp16, disp32, imm8,\n"
23 "imm32.\n"
24 "Each of these has its own help page. Try reading 'bootstrap help mod' next.\n"
25 );
26 :(before "End Help Contents")
27 cerr << " instructions\n";
28
29 :(before "Running Test Program")
30 transform(p);
31 if (trace_contains_errors()) return;
32
33 :(code)
34 void test_pack_immediate_constants() {
35 run(
36 "== code 0x1\n"
37 "bb 0x2a/imm32\n"
38 );
39 CHECK_TRACE_CONTENTS(
40 "transform: packing instruction 'bb 0x2a/imm32'\n"
41 "transform: instruction after packing: 'bb 2a 00 00 00'\n"
42 "run: copy imm32 0x0000002a to EBX\n"
43 );
44 }
45
46
47
48 :(before "End Globals")
49 set<string> Instruction_operands;
50 :(before "End One-time Setup")
51 Instruction_operands.insert("subop");
52 Instruction_operands.insert("mod");
53 Instruction_operands.insert("rm32");
54 Instruction_operands.insert("base");
55 Instruction_operands.insert("index");
56 Instruction_operands.insert("scale");
57 Instruction_operands.insert("r32");
58 Instruction_operands.insert("disp8");
59 Instruction_operands.insert("disp16");
60 Instruction_operands.insert("disp32");
61 Instruction_operands.insert("imm8");
62 Instruction_operands.insert("imm32");
63
64 :(before "End Help Texts")
65 init_operand_type_help();
66 :(code)
67 void init_operand_type_help() {
68 put(Help, "mod",
69 "2-bit operand controlling the _addressing mode_ of many instructions,\n"
70 "to determine how to compute the _effective address_ to look up memory at\n"
71 "based on the 'rm32' operand and potentially others.\n"
72 "\n"
73 "If mod = 3, just operate on the contents of the register specified by rm32\n"
74 " (direct mode).\n"
75 "If mod = 2, effective address is usually* rm32 + disp32\n"
76 " (indirect mode with displacement).\n"
77 "If mod = 1, effective address is usually* rm32 + disp8\n"
78 " (indirect mode with displacement).\n"
79 "If mod = 0, effective address is usually* rm32 (indirect mode).\n"
80 "(* - The exception is when rm32 is '4'. Register 4 is the stack pointer (ESP).\n"
81 " Using it as an address gets more involved. For more details,\n"
82 " try reading the help pages for 'base', 'index' and 'scale'.)\n"
83 "\n"
84 "For complete details, spend some time with two tables in the IA-32 software\n"
85 "developer's manual that are also included in this repo:\n"
86 " - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n"
87 " - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
88 );
89 put(Help, "subop",
90 "Additional 3-bit operand for determining the instruction when the opcode\n"
91 "is 81, 8f, d3, f7 or ff.\n"
92 "Can't coexist with operand of type 'r32' in a single instruction, because\n"
93 "the two use the same bits.\n"
94 );
95 put(Help, "r32",
96 "3-bit operand specifying a register operand used directly, without any further addressing modes.\n"
97 );
98 put(Help, "rm32",
99 "32-bit value in register or memory. The precise details of its construction\n"
100 "depend on the eponymous 3-bit 'rm32' operand, the 'mod' operand, and also\n"
101 "potentially the 'SIB' operands ('scale', 'index' and 'base') and a displacement\n"
102 "('disp8' or 'disp32').\n"
103 "\n"
104 "For complete details, spend some time with two tables in the IA-32 software\n"
105 "developer's manual that are also included in this repo:\n"
106 " - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n"
107 " - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
108 );
109 put(Help, "base",
110 "Additional 3-bit operand (when 'rm32' is 4, unless 'mod' is 3) specifying the\n"
111 "register containing an address to look up.\n"
112 "This address may be further modified by 'index' and 'scale' operands.\n"
113 " effective address = base + index*scale + displacement (disp8 or disp32)\n"
114 "For complete details, spend some time with the IA-32 software developer's manual,\n"
115 "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
116 "It is included in this repository as 'sib.pdf'.\n"
117 );
118 put(Help, "index",
119 "Optional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be added to\n"
120 "the 'base' operand to compute the 'effective address' at which to look up memory.\n"
121 " effective address = base + index*scale + displacement (disp8 or disp32)\n"
122 "For complete details, spend some time with the IA-32 software developer's manual,\n"
123 "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
124 "It is included in this repository as 'sib.pdf'.\n"
125 );
126 put(Help, "scale",
127 "Optional 2-bit operand (when 'rm32' is 4 unless 'mod' is 3) that encodes a\n"
128 "power of 2 to be multiplied to the 'index' operand before adding the result to\n"
129 "the 'base' operand to compute the _effective address_ to operate on.\n"
130 " effective address = base + index * scale + displacement (disp8 or disp32)\n"
131 "\n"
132 "When scale is 0, use index unmodified.\n"
133 "When scale is 1, multiply index by 2.\n"
134 "When scale is 2, multiply index by 4.\n"
135 "When scale is 3, multiply index by 8.\n"
136 "\n"
137 "For complete details, spend some time with the IA-32 software developer's manual,\n"
138 "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
139 "It is included in this repository as 'sib.pdf'.\n"
140 );
141 put(Help, "disp8",
142 "8-bit value to be added in many instructions.\n"
143 );
144 put(Help, "disp16",
145 "16-bit value to be added in many instructions.\n"
146 "Currently not used in any SubX instructions.\n"
147 );
148 put(Help, "disp32",
149 "32-bit value to be added in many instructions.\n"
150 );
151 put(Help, "imm8",
152 "8-bit value for many instructions.\n"
153 );
154 put(Help, "imm32",
155 "32-bit value for many instructions.\n"
156 );
157 }
158
159
160
161 :(after "Begin Transforms")
162 Transform.push_back(pack_operands);
163
164 :(code)
165 void pack_operands(program& p) {
166 if (p.segments.empty()) return;
167 segment& code = *find(p, "code");
168
169 trace(3, "transform") << "-- pack operands" << end();
170 for (int i = 0; i < SIZE(code.lines); ++i) {
171 line& inst = code.lines.at(i);
172 if (all_hex_bytes(inst)) continue;
173 trace(99, "transform") << "packing instruction '" << to_string(inst) << "'" << end();
174 pack_operands(inst);
175 trace(99, "transform") << "instruction after packing: '" << to_string(inst.words) << "'" << end();
176 }
177 }
178
179 void pack_operands(line& inst) {
180 line new_inst;
181 add_opcodes(inst, new_inst);
182 add_modrm_byte(inst, new_inst);
183 add_sib_byte(inst, new_inst);
184 add_disp_bytes(inst, new_inst);
185 add_imm_bytes(inst, new_inst);
186 inst.words.swap(new_inst.words);
187 }
188
189 void add_opcodes(const line& in, line& out) {
190 out.words.push_back(in.words.at(0));
191 if (in.words.at(0).data == "0f" || in.words.at(0).data == "f2" || in.words.at(0).data == "f3")
192 out.words.push_back(in.words.at(1));
193 if (in.words.at(0).data == "f3" && in.words.at(1).data == "0f")
194 out.words.push_back(in.words.at(2));
195 if (in.words.at(0).data == "f2" && in.words.at(1).data == "0f")
196 out.words.push_back(in.words.at(2));
197 }
198
199 void add_modrm_byte(const line& in, line& out) {
200 uint8_t mod=0, reg_subop=0, rm32=0;
201 bool emit = false;
202 for (int i = 0; i < SIZE(in.words); ++i) {
203 const word& curr = in.words.at(i);
204 if (has_operand_metadata(curr, "mod")) {
205 mod = hex_byte(curr.data);
206 emit = true;
207 }
208 else if (has_operand_metadata(curr, "rm32")) {
209 rm32 = hex_byte(curr.data);
210 emit = true;
211 }
212 else if (has_operand_metadata(curr, "r32")) {
213 reg_subop = hex_byte(curr.data);
214 emit = true;
215 }
216 else if (has_operand_metadata(curr, "subop")) {
217 reg_subop = hex_byte(curr.data);
218 emit = true;
219 }
220 }
221 if (emit)
222 out.words.push_back(hex_byte_text((mod << 6) | (reg_subop << 3) | rm32));
223 }
224
225 void add_sib_byte(const line& in, line& out) {
226 uint8_t scale=0, index=0, base=0;
227 bool emit = false;
228 for (int i = 0; i < SIZE(in.words); ++i) {
229 const word& curr = in.words.at(i);
230 if (has_operand_metadata(curr, "scale")) {
231 scale = hex_byte(curr.data);
232 emit = true;
233 }
234 else if (has_operand_metadata(curr, "index")) {
235 index = hex_byte(curr.data);
236 emit = true;
237 }
238 else if (has_operand_metadata(curr, "base")) {
239 base = hex_byte(curr.data);
240 emit = true;
241 }
242 }
243 if (emit)
244 out.words.push_back(hex_byte_text((scale << 6) | (index << 3) | base));
245 }
246
247 void add_disp_bytes(const line& in, line& out) {
248 for (int i = 0; i < SIZE(in.words); ++i) {
249 const word& curr = in.words.at(i);
250 if (has_operand_metadata(curr, "disp8"))
251 emit_hex_bytes(out, curr, 1);
252 if (has_operand_metadata(curr, "disp16"))
253 emit_hex_bytes(out, curr, 2);
254 else if (has_operand_metadata(curr, "disp32"))
255 emit_hex_bytes(out, curr, 4);
256 }
257 }
258
259 void add_imm_bytes(const line& in, line& out) {
260 for (int i = 0; i < SIZE(in.words); ++i) {
261 const word& curr = in.words.at(i);
262 if (has_operand_metadata(curr, "imm8"))
263 emit_hex_bytes(out, curr, 1);
264 else if (has_operand_metadata(curr, "imm32"))
265 emit_hex_bytes(out, curr, 4);
266 }
267 }
268
269 void emit_hex_bytes(line& out, const word& w, int num) {
270 assert(num <= 4);
271 bool is_number = looks_like_hex_int(w.data);
272 if (num == 1 || !is_number) {
273 out.words.push_back(w);
274 if (is_number)
275 out.words.back().data = hex_byte_to_string(parse_int(w.data));
276 return;
277 }
278 emit_hex_bytes(out, static_cast<uint32_t>(parse_int(w.data)), num);
279 }
280
281 void emit_hex_bytes(line& out, uint32_t val, int num) {
282 assert(num <= 4);
283 for (int i = 0; i < num; ++i) {
284 out.words.push_back(hex_byte_text(val & 0xff));
285 val = val >> 8;
286 }
287 }
288
289 word hex_byte_text(uint8_t val) {
290 word result;
291 result.data = hex_byte_to_string(val);
292 result.original = result.data+"/auto";
293 return result;
294 }
295
296 string hex_byte_to_string(uint8_t val) {
297 ostringstream out;
298
299 out << HEXBYTE << NUM(val);
300 return out.str();
301 }
302
303 string to_string(const vector<word>& in) {
304 ostringstream out;
305 for (int i = 0; i < SIZE(in); ++i) {
306 if (i > 0) out << ' ';
307 out << in.at(i).data;
308 }
309 return out.str();
310 }
311
312 :(before "End Unit Tests")
313 void test_preserve_metadata_when_emitting_single_byte() {
314 word in;
315 in.data = "f0";
316 in.original = "f0/foo";
317 line out;
318 emit_hex_bytes(out, in, 1);
319 CHECK_EQ(out.words.at(0).data, "f0");
320 CHECK_EQ(out.words.at(0).original, "f0/foo");
321 }
322
323 :(code)
324 void test_pack_disp8() {
325 run(
326 "== code 0x1\n"
327 "74 2/disp8\n"
328 );
329 CHECK_TRACE_CONTENTS(
330 "transform: packing instruction '74 2/disp8'\n"
331 "transform: instruction after packing: '74 02'\n"
332 );
333 }
334
335 void test_pack_disp8_negative() {
336 transform(
337 "== code 0x1\n"
338
339 "74 -1/disp8\n"
340 );
341 CHECK_TRACE_CONTENTS(
342 "transform: packing instruction '74 -1/disp8'\n"
343 "transform: instruction after packing: '74 ff'\n"
344 );
345 }
346
347
348 void transform(const string& text_bytes) {
349 program p;
350 istringstream in(text_bytes);
351 parse(in, p);
352 if (trace_contains_errors()) return;
353 transform(p);
354 }
355
356 void test_pack_modrm_imm32() {
357 run(
358 "== code 0x1\n"
359
360
361
362 " 81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32 \n"
363 );
364 CHECK_TRACE_CONTENTS(
365 "transform: packing instruction '81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32'\n"
366 "transform: instruction after packing: '81 c3 01 00 00 00'\n"
367 );
368 }
369
370 void test_pack_imm32_large() {
371 run(
372 "== code 0x1\n"
373 "b9 0x080490a7/imm32\n"
374 );
375 CHECK_TRACE_CONTENTS(
376 "transform: packing instruction 'b9 0x080490a7/imm32'\n"
377 "transform: instruction after packing: 'b9 a7 90 04 08'\n"
378 );
379 }
380
381 void test_pack_immediate_constants_hex() {
382 run(
383 "== code 0x1\n"
384 "b9 0x2a/imm32\n"
385 );
386 CHECK_TRACE_CONTENTS(
387 "transform: packing instruction 'b9 0x2a/imm32'\n"
388 "transform: instruction after packing: 'b9 2a 00 00 00'\n"
389 "run: copy imm32 0x0000002a to ECX\n"
390 );
391 }
392
393 void test_pack_silently_ignores_non_hex() {
394 Hide_errors = true;
395 transform(
396 "== code 0x1\n"
397 "b9 foo/imm32\n"
398 );
399 CHECK_TRACE_CONTENTS(
400 "transform: packing instruction 'b9 foo/imm32'\n"
401
402 "transform: instruction after packing: 'b9 foo'\n"
403 );
404 }
405
406 void test_pack_flags_bad_hex() {
407 Hide_errors = true;
408 run(
409 "== code 0x1\n"
410 "b9 0xfoo/imm32\n"
411 );
412 CHECK_TRACE_CONTENTS(
413 "error: not a number: 0xfoo\n"
414 );
415 }
416
417 void test_pack_flags_uppercase_hex() {
418 Hide_errors = true;
419 run(
420 "== code 0x1\n"
421 "b9 0xAb/imm32\n"
422 );
423 CHECK_TRACE_CONTENTS(
424 "error: uppercase hex not allowed: 0xAb\n"
425 );
426 }
427
428
429
430 bool all_hex_bytes(const line& inst) {
431 for (int i = 0; i < SIZE(inst.words); ++i)
432 if (!is_hex_byte(inst.words.at(i)))
433 return false;
434 return true;
435 }
436
437 bool is_hex_byte(const word& curr) {
438 if (contains_any_operand_metadata(curr))
439 return false;
440 if (SIZE(curr.data) != 2)
441 return false;
442 if (curr.data.find_first_not_of("0123456789abcdef") != string::npos)
443 return false;
444 return true;
445 }
446
447 bool contains_any_operand_metadata(const word& word) {
448 for (int i = 0; i < SIZE(word.metadata); ++i)
449 if (Instruction_operands.find(word.metadata.at(i)) != Instruction_operands.end())
450 return true;
451 return false;
452 }
453
454 bool has_operand_metadata(const line& inst, const string& m) {
455 bool result = false;
456 for (int i = 0; i < SIZE(inst.words); ++i) {
457 if (!has_operand_metadata(inst.words.at(i), m)) continue;
458 if (result) {
459 raise << "'" << to_string(inst) << "' has conflicting " << m << " operands\n" << end();
460 return false;
461 }
462 result = true;
463 }
464 return result;
465 }
466
467 bool has_operand_metadata(const word& w, const string& m) {
468 bool result = false;
469 bool metadata_found = false;
470 for (int i = 0; i < SIZE(w.metadata); ++i) {
471 const string& curr = w.metadata.at(i);
472 if (Instruction_operands.find(curr) == Instruction_operands.end()) continue;
473 if (metadata_found) {
474 raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
475 return false;
476 }
477 metadata_found = true;
478 result = (curr == m);
479 }
480 return result;
481 }
482
483 word metadata(const line& inst, const string& m) {
484 for (int i = 0; i < SIZE(inst.words); ++i)
485 if (has_operand_metadata(inst.words.at(i), m))
486 return inst.words.at(i);
487 assert(false);
488 }
489
490 bool looks_like_hex_int(const string& s) {
491 if (s.empty()) return false;
492 if (s.at(0) == '-' || s.at(0) == '+') return true;
493 if (isdigit(s.at(0))) return true;
494
495 return false;
496 }
497
498 string to_string(const line& inst) {
499 ostringstream out;
500 for (int i = 0; i < SIZE(inst.words); ++i) {
501 if (i > 0) out << ' ';
502 out << inst.words.at(i).original;
503 }
504 return out.str();
505 }
506
507 int32_t parse_int(const string& s) {
508 if (s.empty()) return 0;
509 if (contains_uppercase(s)) {
510 raise << "uppercase hex not allowed: " << s << '\n' << end();
511 return 0;
512 }
513 istringstream in(s);
514 in >> std::hex;
515 if (s.at(0) == '-') {
516 int32_t result = 0;
517 in >> result;
518 if (!in || !in.eof()) {
519 raise << "not a number: " << s << '\n' << end();
520 return 0;
521 }
522 return result;
523 }
524 uint32_t uresult = 0;
525 in >> uresult;
526 if (!in || !in.eof()) {
527 raise << "not a number: " << s << '\n' << end();
528 return 0;
529 }
530 return static_cast<int32_t>(uresult);
531 }
532 :(before "End Unit Tests")
533 void test_parse_int() {
534 CHECK_EQ(0, parse_int("0"));
535 CHECK_EQ(0, parse_int("0x0"));
536 CHECK_EQ(0, parse_int("0x0"));
537 CHECK_EQ(16, parse_int("10"));
538 CHECK_EQ(-1, parse_int("-1"));
539 CHECK_EQ(-1, parse_int("0xffffffff"));
540 }