https://github.com/akkartik/mu/blob/master/subx/030---operands.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 :(before "End Help Texts")
16 put_new(Help, "instructions",
17 "Each x86 instruction consists of an instruction or opcode and some number\n"
18 "of operands.\n"
19 "Each operand has a type. An instruction won't have more than one operand of\n"
20 "any type.\n"
21 "Each instruction has some set of allowed operand types. It'll reject others.\n"
22 "The complete list of operand types: mod, subop, r32 (register), rm32\n"
23 "(register or memory), scale, index, base, disp8, disp16, disp32, imm8,\n"
24 "imm32.\n"
25 "Each of these has its own help page. Try reading 'subx help mod' next.\n"
26 );
27 :(before "End Help Contents")
28 cerr << " instructions\n";
29
30 :(code)
31 void test_pack_immediate_constants() {
32 run(
33 "== code 0x1\n"
34 "bb 0x2a/imm32\n"
35 );
36 CHECK_TRACE_CONTENTS(
37 "transform: packing instruction 'bb 0x2a/imm32'\n"
38 "transform: instruction after packing: 'bb 2a 00 00 00'\n"
39 "run: copy imm32 0x0000002a to EBX\n"
40 );
41 }
42
43
44
45 :(before "End Globals")
46 set<string> Instruction_operands;
47 :(before "End One-time Setup")
48 Instruction_operands.insert("subop");
49 Instruction_operands.insert("mod");
50 Instruction_operands.insert("rm32");
51 Instruction_operands.insert("base");
52 Instruction_operands.insert("index");
53 Instruction_operands.insert("scale");
54 Instruction_operands.insert("r32");
55 Instruction_operands.insert("disp8");
56 Instruction_operands.insert("disp16");
57 Instruction_operands.insert("disp32");
58 Instruction_operands.insert("imm8");
59 Instruction_operands.insert("imm32");
60
61 :(before "End Help Texts")
62 init_operand_type_help();
63 :(code)
64 void init_operand_type_help() {
65 put(Help, "mod",
66 "2-bit operand controlling the _addressing mode_ of many instructions,\n"
67 "to determine how to compute the _effective address_ to look up memory at\n"
68 "based on the 'rm32' operand and potentially others.\n"
69 "\n"
70 "If mod = 3, just operate on the contents of the register specified by rm32\n"
71 " (direct mode).\n"
72 "If mod = 2, effective address is usually* rm32 + disp32\n"
73 " (indirect mode with displacement).\n"
74 "If mod = 1, effective address is usually* rm32 + disp8\n"
75 " (indirect mode with displacement).\n"
76 "If mod = 0, effective address is usually* rm32 (indirect mode).\n"
77 "(* - The exception is when rm32 is '4'. Register 4 is the stack pointer (ESP).\n"
78 " Using it as an address gets more involved. For more details,\n"
79 " try reading the help pages for 'base', 'index' and 'scale'.)\n"
80 "\n"
81 "For complete details, spend some time with two tables in the IA-32 software\n"
82 "developer's manual that are also included in this repo:\n"
83 " - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n"
84 " - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
85 );
86 put(Help, "subop",
87 "Additional 3-bit operand for determining the instruction when the opcode is 81, 8f or ff.\n"
88 "Can't coexist with operand of type 'r32' in a single instruction, because the two use the same bits.\n"
89 );
90 put(Help, "r32",
91 "3-bit operand specifying a register operand used directly, without any further addressing modes.\n"
92 );
93 put(Help, "rm32",
94 "32-bit value in register or memory. The precise details of its construction\n"
95 "depend on the eponymous 3-bit 'rm32' operand, the 'mod' operand, and also\n"
96 "potentially the 'SIB' operands ('scale', 'index' and 'base') and a displacement\n"
97 "('disp8' or 'disp32').\n"
98 "\n"
99 "For complete details, spend some time with two tables in the IA-32 software\n"
100 "developer's manual that are also included in this repo:\n"
101 " - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n"
102 " - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
103 );
104 put(Help, "base",
105 "Additional 3-bit operand (when 'rm32' is 4, unless 'mod' is 3) specifying the\n"
106 "register containing an address to look up.\n"
107 "This address may be further modified by 'index' and 'scale' operands.\n"
108 " effective address = base + index*scale + displacement (disp8 or disp32)\n"
109 "For complete details, spend some time with the IA-32 software developer's manual,\n"
110 "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
111 "It is included in this repository as 'sib.pdf'.\n"
112 );
113 put(Help, "index",
114 "Optional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be added to\n"
115 "the 'base' operand to compute the 'effective address' at which to look up memory.\n"
116 " effective address = base + index*scale + displacement (disp8 or disp32)\n"
117 "For complete details, spend some time with the IA-32 software developer's manual,\n"
118 "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
119 "It is included in this repository as 'sib.pdf'.\n"
120 );
121 put(Help, "scale",
122 "Optional 2-bit operand (when 'rm32' is 4 unless 'mod' is 3) that encodes a\n"
123 "power of 2 to be multiplied to the 'index' operand before adding the result to\n"
124 "the 'base' operand to compute the _effective address_ to operate on.\n"
125 " effective address = base + index * scale + displacement (disp8 or disp32)\n"
126 "\n"
127 "When scale is 0, use index unmodified.\n"
128 "When scale is 1, multiply index by 2.\n"
129 "When scale is 2, multiply index by 4.\n"
130 "When scale is 3, multiply index by 8.\n"
131 "\n"
132 "For complete details, spend some time with the IA-32 software developer's manual,\n"
133 "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
134 "It is included in this repository as 'sib.pdf'.\n"
135 );
136 put(Help, "disp8",
137 "8-bit value to be added in many instructions.\n"
138 );
139 put(Help, "disp16",
140 "16-bit value to be added in many instructions.\n"
141 "Currently not used in any SubX instructions.\n"
142 );
143 put(Help, "disp32",
144 "32-bit value to be added in many instructions.\n"
145 );
146 put(Help, "imm8",
147 "8-bit value for many instructions.\n"
148 );
149 put(Help, "imm32",
150 "32-bit value for many instructions.\n"
151 );
152 }
153
154
155
156 :(after "Begin Transforms")
157
158 Transform.push_back(pack_operands);
159
160
161 :(code)
162 void pack_operands(program& p) {
163 if (p.segments.empty()) return;
164 segment& code = *find(p, "code");
165
166 trace(3, "transform") << "-- pack operands" << end();
167 for (int i = 0; i < SIZE(code.lines); ++i) {
168 line& inst = code.lines.at(i);
169 if (all_hex_bytes(inst)) continue;
170 trace(99, "transform") << "packing instruction '" << to_string(inst) << "'" << end();
171 pack_operands(inst);
172 trace(99, "transform") << "instruction after packing: '" << to_string(inst.words) << "'" << end();
173 }
174 }
175
176 void pack_operands(line& inst) {
177 line new_inst;
178 add_opcodes(inst, new_inst);
179 add_modrm_byte(inst, new_inst);
180 add_sib_byte(inst, new_inst);
181 add_disp_bytes(inst, new_inst);
182 add_imm_bytes(inst, new_inst);
183 inst.words.swap(new_inst.words);
184 }
185
186 void add_opcodes(const line& in, line& out) {
187 out.words.push_back(in.words.at(0));
188 if (in.words.at(0).data == "0f" || in.words.at(0).data == "f2" || in.words.at(0).data == "f3")
189 out.words.push_back(in.words.at(1));
190 if (in.words.at(0).data == "f3" && in.words.at(1).data == "0f")
191 out.words.push_back(in.words.at(2));
192 if (in.words.at(0).data == "f2" && in.words.at(1).data == "0f")
193 out.words.push_back(in.words.at(2));
194 }
195
196 void add_modrm_byte(const line& in, line& out) {
197 uint8_t mod=0, reg_subop=0, rm32=0;
198 bool emit = false;
199 for (int i = 0; i < SIZE(in.words); ++i) {
200 const word& curr = in.words.at(i);
201 if (has_operand_metadata(curr, "mod")) {
202 mod = hex_byte(curr.data);
203 emit = true;
204 }
205 else if (has_operand_metadata(curr, "rm32")) {
206 rm32 = hex_byte(curr.data);
207 emit = true;
208 }
209 else if (has_operand_metadata(curr, "r32")) {
210 reg_subop = hex_byte(curr.data);
211 emit = true;
212 }
213 else if (has_operand_metadata(curr, "subop")) {
214 reg_subop = hex_byte(curr.data);
215 emit = true;
216 }
217 }
218 if (emit)
219 out.words.push_back(hex_byte_text((mod << 6) | (reg_subop << 3) | rm32));
220 }
221
222 void add_sib_byte(const line& in, line& out) {
223 uint8_t scale=0, index=0, base=0;
224 bool emit = false;
225 for (int i = 0; i < SIZE(in.words); ++i) {
226 const word& curr = in.words.at(i);
227 if (has_operand_metadata(curr, "scale")) {
228 scale = hex_byte(curr.data);
229 emit = true;
230 }
231 else if (has_operand_metadata(curr, "index")) {
232 index = hex_byte(curr.data);
233 emit = true;
234 }
235 else if (has_operand_metadata(curr, "base")) {
236 base = hex_byte(curr.data);
237 emit = true;
238 }
239 }
240 if (emit)
241 out.words.push_back(hex_byte_text((scale << 6) | (index << 3) | base));
242 }
243
244 void add_disp_bytes(const line& in, line& out) {
245 for (int i = 0; i < SIZE(in.words); ++i) {
246 const word& curr = in.words.at(i);
247 if (has_operand_metadata(curr, "disp8"))
248 emit_hex_bytes(out, curr, 1);
249 if (has_operand_metadata(curr, "disp16"))
250 emit_hex_bytes(out, curr, 2);
251 else if (has_operand_metadata(curr, "disp32"))
252 emit_hex_bytes(out, curr, 4);
253 }
254 }
255
256 void add_imm_bytes(const line& in, line& out) {
257 for (int i = 0; i < SIZE(in.words); ++i) {
258 const word& curr = in.words.at(i);
259 if (has_operand_metadata(curr, "imm8"))
260 emit_hex_bytes(out, curr, 1);
261 else if (has_operand_metadata(curr, "imm32"))
262 emit_hex_bytes(out, curr, 4);
263 }
264 }
265
266 void emit_hex_bytes(line& out, const word& w, int num) {
267 assert(num <= 4);
268 bool is_number = looks_like_hex_int(w.data);
269 if (num == 1 || !is_number) {
270 out.words.push_back(w);
271 if (is_number)
272 out.words.back().data = hex_byte_to_string(parse_int(w.data));
273 return;
274 }
275 emit_hex_bytes(out, static_cast<uint32_t>(parse_int(w.data)), num);
276 }
277
278 void emit_hex_bytes(line& out, uint32_t val, int num) {
279 assert(num <= 4);
280 for (int i = 0; i < num; ++i) {
281 out.words.push_back(hex_byte_text(val & 0xff));
282 val = val >> 8;
283 }
284 }
285
286 word hex_byte_text(uint8_t val) {
287 word result;
288 result.data = hex_byte_to_string(val);
289 result.original = result.data+"/auto";
290 return result;
291 }
292
293 string hex_byte_to_string(uint8_t val) {
294 ostringstream out;
295
296 out << HEXBYTE << NUM(val);
297 return out.str();
298 }
299
300 string to_string(const vector<word>& in) {
301 ostringstream out;
302 for (int i = 0; i < SIZE(in); ++i) {
303 if (i > 0) out << ' ';
304 out << in.at(i).data;
305 }
306 return out.str();
307 }
308
309 :(before "End Unit Tests")
310 void test_preserve_metadata_when_emitting_single_byte() {
311 word in;
312 in.data = "f0";
313 in.original = "f0/foo";
314 line out;
315 emit_hex_bytes(out, in, 1);
316 CHECK_EQ(out.words.at(0).data, "f0");
317 CHECK_EQ(out.words.at(0).original, "f0/foo");
318 }
319
320 :(code)
321 void test_pack_disp8() {
322 run(
323 "== code 0x1\n"
324 "74 2/disp8\n"
325 );
326 CHECK_TRACE_CONTENTS(
327 "transform: packing instruction '74 2/disp8'\n"
328 "transform: instruction after packing: '74 02'\n"
329 );
330 }
331
332 void test_pack_disp8_negative() {
333 transform(
334 "== code 0x1\n"
335
336 "74 -1/disp8\n"
337 );
338 CHECK_TRACE_CONTENTS(
339 "transform: packing instruction '74 -1/disp8'\n"
340 "transform: instruction after packing: '74 ff'\n"
341 );
342 }
343
344
345 void transform(const string& text_bytes) {
346 program p;
347 istringstream in(text_bytes);
348 parse(in, p);
349 if (trace_contains_errors()) return;
350 transform(p);
351 }
352
353 void test_pack_modrm_imm32() {
354 run(
355 "== code 0x1\n"
356
357
358
359 " 81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32 \n"
360 );
361 CHECK_TRACE_CONTENTS(
362 "transform: packing instruction '81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32'\n"
363 "transform: instruction after packing: '81 c3 01 00 00 00'\n"
364 );
365 }
366
367 void test_pack_imm32_large() {
368 run(
369 "== code 0x1\n"
370 "b9 0x080490a7/imm32\n"
371 );
372 CHECK_TRACE_CONTENTS(
373 "transform: packing instruction 'b9 0x080490a7/imm32'\n"
374 "transform: instruction after packing: 'b9 a7 90 04 08'\n"
375 );
376 }
377
378 void test_pack_immediate_constants_hex() {
379 run(
380 "== code 0x1\n"
381 "b9 0x2a/imm32\n"
382 );
383 CHECK_TRACE_CONTENTS(
384 "transform: packing instruction 'b9 0x2a/imm32'\n"
385 "transform: instruction after packing: 'b9 2a 00 00 00'\n"
386 "run: copy imm32 0x0000002a to ECX\n"
387 );
388 }
389
390 void test_pack_silently_ignores_non_hex() {
391 Hide_errors = true;
392 transform(
393 "== code 0x1\n"
394 "b9 foo/imm32\n"
395 );
396 CHECK_TRACE_CONTENTS(
397 "transform: packing instruction 'b9 foo/imm32'\n"
398
399 "transform: instruction after packing: 'b9 foo'\n"
400 );
401 }
402
403 void test_pack_flags_bad_hex() {
404 Hide_errors = true;
405 run(
406 "== code 0x1\n"
407 "b9 0xfoo/imm32\n"
408 );
409 CHECK_TRACE_CONTENTS(
410 "error: not a number: 0xfoo\n"
411 );
412 }
413
414
415
416 bool all_hex_bytes(const line& inst) {
417 for (int i = 0; i < SIZE(inst.words); ++i)
418 if (!is_hex_byte(inst.words.at(i)))
419 return false;
420 return true;
421 }
422
423 bool is_hex_byte(const word& curr) {
424 if (contains_any_operand_metadata(curr))
425 return false;
426 if (SIZE(curr.data) != 2)
427 return false;
428 if (curr.data.find_first_not_of("0123456789abcdefABCDEF") != string::npos)
429 return false;
430 return true;
431 }
432
433 bool contains_any_operand_metadata(const word& word) {
434 for (int i = 0; i < SIZE(word.metadata); ++i)
435 if (Instruction_operands.find(word.metadata.at(i)) != Instruction_operands.end())
436 return true;
437 return false;
438 }
439
440 bool has_operand_metadata(const line& inst, const string& m) {
441 bool result = false;
442 for (int i = 0; i < SIZE(inst.words); ++i) {
443 if (!has_operand_metadata(inst.words.at(i), m)) continue;
444 if (result) {
445 raise << "'" << to_string(inst) << "' has conflicting " << m << " operands\n" << end();
446 return false;
447 }
448 result = true;
449 }
450 return result;
451 }
452
453 bool has_operand_metadata(const word& w, const string& m) {
454 bool result = false;
455 bool metadata_found = false;
456 for (int i = 0; i < SIZE(w.metadata); ++i) {
457 const string& curr = w.metadata.at(i);
458 if (Instruction_operands.find(curr) == Instruction_operands.end()) continue;
459 if (metadata_found) {
460 raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
461 return false;
462 }
463 metadata_found = true;
464 result = (curr == m);
465 }
466 return result;
467 }
468
469 word metadata(const line& inst, const string& m) {
470 for (int i = 0; i < SIZE(inst.words); ++i)
471 if (has_operand_metadata(inst.words.at(i), m))
472 return inst.words.at(i);
473 assert(false);
474 }
475
476 bool looks_like_hex_int(const string& s) {
477 if (s.empty()) return false;
478 if (s.at(0) == '-' || s.at(0) == '+') return true;
479 if (isdigit(s.at(0))) return true;
480
481 return false;
482 }
483
484 string to_string(const line& inst) {
485 ostringstream out;
486 for (int i = 0; i < SIZE(inst.words); ++i) {
487 if (i > 0) out << ' ';
488 out << inst.words.at(i).original;
489 }
490 return out.str();
491 }