https://github.com/akkartik/mu/blob/master/subx/031check_operands.cc
1
2
3
4 void test_check_missing_imm8_operand() {
5 Hide_errors = true;
6 run(
7 "== code 0x1\n"
8 "cd\n"
9 );
10 CHECK_TRACE_CONTENTS(
11 "error: 'cd' (software interrupt): missing imm8 operand\n"
12 );
13 }
14
15 :(before "Pack Operands(segment code)")
16 check_operands(code);
17 if (trace_contains_errors()) return;
18
19 :(code)
20 void check_operands(const segment& code) {
21 trace(3, "transform") << "-- check operands" << end();
22 for (int i = 0; i < SIZE(code.lines); ++i) {
23 check_operands(code.lines.at(i));
24 if (trace_contains_errors()) return;
25 }
26 }
27
28 void check_operands(const line& inst) {
29 word op = preprocess_op(inst.words.at(0));
30 if (op.data == "0f") {
31 check_operands_0f(inst);
32 return;
33 }
34 if (op.data == "f3") {
35 check_operands_f3(inst);
36 return;
37 }
38 check_operands(inst, op);
39 }
40
41 word preprocess_op(word op) {
42 op.data = tolower(op.data.c_str());
43
44 if (starts_with(op.data, "0x"))
45 op.data = op.data.substr(2);
46 if (SIZE(op.data) == 1)
47 op.data = string("0")+op.data;
48 return op;
49 }
50
51 void test_preprocess_op() {
52 word w1; w1.data = "0xf";
53 word w2; w2.data = "0f";
54 CHECK_EQ(preprocess_op(w1).data, preprocess_op(w2).data);
55 }
56
57
58
59
60
61
62
63
64
65
66 :(before "End Types")
67 enum expected_operand_type {
68
69 MODRM,
70 SUBOP,
71 DISP8,
72 DISP16,
73 DISP32,
74 IMM8,
75 IMM32,
76 NUM_OPERAND_TYPES
77 };
78 :(before "End Globals")
79 vector<string> Operand_type_name;
80 map<string, expected_operand_type> Operand_type;
81 :(before "End One-time Setup")
82 init_op_types();
83 :(code)
84 void init_op_types() {
85 assert(NUM_OPERAND_TYPES <= 8);
86 Operand_type_name.resize(NUM_OPERAND_TYPES);
87 #define DEF(type) Operand_type_name.at(type) = tolower(#type), put(Operand_type, tolower(#type), type);
88 DEF(MODRM);
89 DEF(SUBOP);
90 DEF(DISP8);
91 DEF(DISP16);
92 DEF(DISP32);
93 DEF(IMM8);
94 DEF(IMM32);
95 #undef DEF
96 }
97
98 :(before "End Globals")
99 map<string, uint8_t> Permitted_operands;
100 const uint8_t INVALID_OPERANDS = 0xff;
101 :(before "End One-time Setup")
102 init_permitted_operands();
103 :(code)
104 void init_permitted_operands() {
105
106
107 put(Permitted_operands, "f4", 0x00);
108
109 put(Permitted_operands, "40", 0x00);
110 put(Permitted_operands, "41", 0x00);
111 put(Permitted_operands, "42", 0x00);
112 put(Permitted_operands, "43", 0x00);
113 put(Permitted_operands, "44", 0x00);
114 put(Permitted_operands, "45", 0x00);
115 put(Permitted_operands, "46", 0x00);
116 put(Permitted_operands, "47", 0x00);
117
118 put(Permitted_operands, "48", 0x00);
119 put(Permitted_operands, "49", 0x00);
120 put(Permitted_operands, "4a", 0x00);
121 put(Permitted_operands, "4b", 0x00);
122 put(Permitted_operands, "4c", 0x00);
123 put(Permitted_operands, "4d", 0x00);
124 put(Permitted_operands, "4e", 0x00);
125 put(Permitted_operands, "4f", 0x00);
126
127 put(Permitted_operands, "50", 0x00);
128 put(Permitted_operands, "51", 0x00);
129 put(Permitted_operands, "52", 0x00);
130 put(Permitted_operands, "53", 0x00);
131 put(Permitted_operands, "54", 0x00);
132 put(Permitted_operands, "55", 0x00);
133 put(Permitted_operands, "56", 0x00);
134 put(Permitted_operands, "57", 0x00);
135
136 put(Permitted_operands, "58", 0x00);
137 put(Permitted_operands, "59", 0x00);
138 put(Permitted_operands, "5a", 0x00);
139 put(Permitted_operands, "5b", 0x00);
140 put(Permitted_operands, "5c", 0x00);
141 put(Permitted_operands, "5d", 0x00);
142 put(Permitted_operands, "5e", 0x00);
143 put(Permitted_operands, "5f", 0x00);
144
145 put(Permitted_operands, "99", 0x00);
146
147 put(Permitted_operands, "c3", 0x00);
148
149
150
151
152
153
154 put(Permitted_operands, "eb", 0x04);
155 put(Permitted_operands, "72", 0x04);
156 put(Permitted_operands, "73", 0x04);
157 put(Permitted_operands, "74", 0x04);
158 put(Permitted_operands, "75", 0x04);
159 put(Permitted_operands, "76", 0x04);
160 put(Permitted_operands, "77", 0x04);
161 put(Permitted_operands, "7c", 0x04);
162 put(Permitted_operands, "7d", 0x04);
163 put(Permitted_operands, "7e", 0x04);
164 put(Permitted_operands, "7f", 0x04);
165
166
167
168
169 put(Permitted_operands, "e8", 0x10);
170 put(Permitted_operands, "e9", 0x10);
171
172
173
174
175 put(Permitted_operands, "cd", 0x20);
176
177
178
179
180 put(Permitted_operands, "05", 0x40);
181 put(Permitted_operands, "2d", 0x40);
182 put(Permitted_operands, "25", 0x40);
183 put(Permitted_operands, "0d", 0x40);
184 put(Permitted_operands, "35", 0x40);
185 put(Permitted_operands, "3d", 0x40);
186 put(Permitted_operands, "68", 0x40);
187
188 put(Permitted_operands, "b8", 0x40);
189 put(Permitted_operands, "b9", 0x40);
190 put(Permitted_operands, "ba", 0x40);
191 put(Permitted_operands, "bb", 0x40);
192 put(Permitted_operands, "bc", 0x40);
193 put(Permitted_operands, "bd", 0x40);
194 put(Permitted_operands, "be", 0x40);
195 put(Permitted_operands, "bf", 0x40);
196
197
198
199
200
201
202 put(Permitted_operands, "01", 0x01);
203 put(Permitted_operands, "03", 0x01);
204
205 put(Permitted_operands, "29", 0x01);
206 put(Permitted_operands, "2b", 0x01);
207
208 put(Permitted_operands, "21", 0x01);
209 put(Permitted_operands, "23", 0x01);
210
211 put(Permitted_operands, "09", 0x01);
212 put(Permitted_operands, "0b", 0x01);
213
214 put(Permitted_operands, "31", 0x01);
215 put(Permitted_operands, "33", 0x01);
216
217 put(Permitted_operands, "39", 0x01);
218 put(Permitted_operands, "3b", 0x01);
219
220 put(Permitted_operands, "88", 0x01);
221 put(Permitted_operands, "89", 0x01);
222 put(Permitted_operands, "8a", 0x01);
223 put(Permitted_operands, "8b", 0x01);
224
225 put(Permitted_operands, "87", 0x01);
226
227 put(Permitted_operands, "8d", 0x01);
228
229 put(Permitted_operands, "8f", 0x01);
230
231
232
233
234 put(Permitted_operands, "d3", 0x03);
235 put(Permitted_operands, "f7", 0x03);
236 put(Permitted_operands, "ff", 0x03);
237
238
239
240
241 put(Permitted_operands, "c1", 0x23);
242 put(Permitted_operands, "c6", 0x23);
243
244
245
246
247 put(Permitted_operands, "81", 0x43);
248 put(Permitted_operands, "c7", 0x43);
249
250
251 }
252
253 #define HAS(bitvector, bit) ((bitvector) & (1 << (bit)))
254 #define SET(bitvector, bit) ((bitvector) | (1 << (bit)))
255 #define CLEAR(bitvector, bit) ((bitvector) & (~(1 << (bit))))
256
257 void check_operands(const line& inst, const word& op) {
258 if (!is_hex_byte(op)) return;
259 uint8_t expected_bitvector = get(Permitted_operands, op.data);
260 if (HAS(expected_bitvector, MODRM)) {
261 check_operands_modrm(inst, op);
262 compare_bitvector_modrm(inst, expected_bitvector, op);
263 }
264 else {
265 compare_bitvector(inst, expected_bitvector, op);
266 }
267 }
268
269
270
271 void compare_bitvector(const line& inst, uint8_t expected, const word& op) {
272 if (all_hex_bytes(inst) && has_operands(inst)) return;
273 uint8_t bitvector = compute_expected_operand_bitvector(inst);
274 if (trace_contains_errors()) return;
275 if (bitvector == expected) return;
276 for (int i = 0; i < NUM_OPERAND_TYPES; ++i, bitvector >>= 1, expected >>= 1) {
277
278 if ((bitvector & 0x1) == (expected & 0x1)) continue;
279 const string& optype = Operand_type_name.at(i);
280 if ((bitvector & 0x1) > (expected & 0x1))
281 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << optype << " operand\n" << end();
282 else
283 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << optype << " operand\n" << end();
284
285 }
286
287 }
288
289 string maybe_name(const word& op) {
290 if (!is_hex_byte(op)) return "";
291 if (!contains_key(Name, op.data)) return "";
292
293 const string& s = get(Name, op.data);
294 return " ("+s.substr(0, s.find(" ("))+')';
295 }
296
297 uint32_t compute_expected_operand_bitvector(const line& inst) {
298 set<string> operands_found;
299 uint32_t bitvector = 0;
300 for (int i = 1; i < SIZE(inst.words); ++i) {
301 bitvector = bitvector | expected_bit_for_received_operand(inst.words.at(i), operands_found, inst);
302 if (trace_contains_errors()) return INVALID_OPERANDS;
303 }
304 return bitvector;
305 }
306
307 bool has_operands(const line& inst) {
308 return SIZE(inst.words) > first_operand(inst);
309 }
310
311 int first_operand(const line& inst) {
312 if (inst.words.at(0).data == "0f") return 2;
313 if (inst.words.at(0).data == "f2" || inst.words.at(0).data == "f3") {
314 if (inst.words.at(1).data == "0f")
315 return 3;
316 else
317 return 2;
318 }
319 return 1;
320 }
321
322
323
324 uint32_t expected_bit_for_received_operand(const word& w, set<string>& instruction_operands, const line& inst) {
325 uint32_t bv = 0;
326 bool found = false;
327 for (int i = 0; i < SIZE(w.metadata); ++i) {
328 string curr = w.metadata.at(i);
329 string expected_metadata = curr;
330 if (curr == "mod" || curr == "rm32" || curr == "r32" || curr == "scale" || curr == "index" || curr == "base")
331 expected_metadata = "modrm";
332 else if (!contains_key(Operand_type, curr)) continue;
333 if (found) {
334 raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
335 return INVALID_OPERANDS;
336 }
337 if (instruction_operands.find(curr) != instruction_operands.end()) {
338 raise << "'" << to_string(inst) << "': duplicate " << curr << " operand\n" << end();
339 return INVALID_OPERANDS;
340 }
341 instruction_operands.insert(curr);
342 bv = (1 << get(Operand_type, expected_metadata));
343 found = true;
344 }
345 return bv;
346 }
347
348 void test_conflicting_operand_type() {
349 Hide_errors = true;
350 run(
351 "== code 0x1\n"
352 "cd/software-interrupt 80/imm8/imm32\n"
353 );
354 CHECK_TRACE_CONTENTS(
355 "error: '80/imm8/imm32' has conflicting operand types; it should have only one\n"
356 );
357 }
358
359
360
361
362 void test_check_missing_mod_operand() {
363 Hide_errors = true;
364 run(
365 "== code 0x1\n"
366 "81 0/add/subop 3/rm32/ebx 1/imm32\n"
367 );
368 CHECK_TRACE_CONTENTS(
369 "error: '81 0/add/subop 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing mod operand\n"
370 );
371 }
372
373 void check_operands_modrm(const line& inst, const word& op) {
374 if (all_hex_bytes(inst)) return;
375 check_operand_metadata_present(inst, "mod", op);
376 check_operand_metadata_present(inst, "rm32", op);
377
378 if (op.data == "81" || op.data == "8f" || op.data == "ff") {
379 check_operand_metadata_present(inst, "subop", op);
380 check_operand_metadata_absent(inst, "r32", op, "should be replaced by subop");
381 }
382 if (trace_contains_errors()) return;
383 if (metadata(inst, "rm32").data != "4") return;
384
385 uint8_t mod = hex_byte(metadata(inst, "mod").data);
386 if (mod != 3) {
387 check_operand_metadata_present(inst, "base", op);
388 check_operand_metadata_present(inst, "index", op);
389 }
390 else {
391 check_operand_metadata_absent(inst, "base", op, "direct mode");
392 check_operand_metadata_absent(inst, "index", op, "direct mode");
393 }
394
395 }
396
397
398
399 void compare_bitvector_modrm(const line& inst, uint8_t expected, const word& op) {
400 if (all_hex_bytes(inst) && has_operands(inst)) return;
401 uint8_t bitvector = compute_expected_operand_bitvector(inst);
402 if (trace_contains_errors()) return;
403
404 if (has_operand_metadata(inst, "mod")) {
405 int32_t mod = parse_int(metadata(inst, "mod").data);
406 switch (mod) {
407 case 0:
408 if (has_operand_metadata(inst, "rm32") && parse_int(metadata(inst, "rm32").data) == 5)
409 expected |= (1<<DISP32);
410 break;
411 case 1:
412 expected |= (1<<DISP8);
413 break;
414 case 2:
415 expected |= (1<<DISP32);
416 break;
417 }
418 }
419 if (bitvector == expected) return;
420 for (int i = 0; i < NUM_OPERAND_TYPES; ++i, bitvector >>= 1, expected >>= 1) {
421
422 if ((bitvector & 0x1) == (expected & 0x1)) continue;
423 const string& optype = Operand_type_name.at(i);
424 if ((bitvector & 0x1) > (expected & 0x1))
425 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << optype << " operand\n" << end();
426 else
427 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << optype << " operand\n" << end();
428
429 }
430
431 }
432
433 void check_operand_metadata_present(const line& inst, const string& type, const word& op) {
434 if (!has_operand_metadata(inst, type))
435 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << type << " operand\n" << end();
436 }
437
438 void check_operand_metadata_absent(const line& inst, const string& type, const word& op, const string& msg) {
439 if (has_operand_metadata(inst, type))
440 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << type << " operand (" << msg << ")\n" << end();
441 }
442
443 void test_modrm_with_displacement() {
444 Reg[EAX].u = 0x1;
445 transform(
446 "== code 0x1\n"
447
448 "8b/copy 1/mod/lookup+disp8 0/rm32/EAX 2/r32/EDX 4/disp8\n"
449 );
450 CHECK_TRACE_COUNT("error", 0);
451 }
452
453 void test_check_missing_disp8() {
454 Hide_errors = true;
455 transform(
456 "== code 0x1\n"
457 "89/copy 1/mod/lookup+disp8 0/rm32/EAX 1/r32/ECX\n"
458 );
459 CHECK_TRACE_CONTENTS(
460 "error: '89/copy 1/mod/lookup+disp8 0/rm32/EAX 1/r32/ECX' (copy r32 to rm32): missing disp8 operand\n"
461 );
462 }
463
464 void test_check_missing_disp32() {
465 Hide_errors = true;
466 transform(
467 "== code 0x1\n"
468 "8b/copy 0/mod/indirect 5/rm32/.disp32 2/r32/EDX\n"
469 );
470 CHECK_TRACE_CONTENTS(
471 "error: '8b/copy 0/mod/indirect 5/rm32/.disp32 2/r32/EDX' (copy rm32 to r32): missing disp32 operand\n"
472 );
473 }
474
475 void test_conflicting_operands_in_modrm_instruction() {
476 Hide_errors = true;
477 run(
478 "== code 0x1\n"
479 "01/add 0/mod 3/mod\n"
480 );
481 CHECK_TRACE_CONTENTS(
482 "error: '01/add 0/mod 3/mod' has conflicting mod operands\n"
483 );
484 }
485
486 void test_conflicting_operand_type_modrm() {
487 Hide_errors = true;
488 run(
489 "== code 0x1\n"
490 "01/add 0/mod 3/rm32/r32\n"
491 );
492 CHECK_TRACE_CONTENTS(
493 "error: '3/rm32/r32' has conflicting operand types; it should have only one\n"
494 );
495 }
496
497 void test_check_missing_rm32_operand() {
498 Hide_errors = true;
499 run(
500 "== code 0x1\n"
501 "81 0/add/subop 0/mod 1/imm32\n"
502 );
503 CHECK_TRACE_CONTENTS(
504 "error: '81 0/add/subop 0/mod 1/imm32' (combine rm32 with imm32 based on subop): missing rm32 operand\n"
505 );
506 }
507
508 void test_check_missing_subop_operand() {
509 Hide_errors = true;
510 run(
511 "== code 0x1\n"
512 "81 0/mod 3/rm32/ebx 1/imm32\n"
513 );
514 CHECK_TRACE_CONTENTS(
515 "error: '81 0/mod 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing subop operand\n"
516 );
517 }
518
519 void test_check_missing_base_operand() {
520 Hide_errors = true;
521 run(
522 "== code 0x1\n"
523 "81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32\n"
524 );
525 CHECK_TRACE_CONTENTS(
526 "error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32' (combine rm32 with imm32 based on subop): missing base operand\n"
527 );
528 }
529
530 void test_check_missing_index_operand() {
531 Hide_errors = true;
532 run(
533 "== code 0x1\n"
534 "81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32\n"
535 );
536 CHECK_TRACE_CONTENTS(
537 "error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32' (combine rm32 with imm32 based on subop): missing index operand\n"
538 );
539 }
540
541 void test_check_missing_base_operand_2() {
542 Hide_errors = true;
543 run(
544 "== code 0x1\n"
545 "81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32\n"
546 );
547 CHECK_TRACE_CONTENTS(
548 "error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32' (combine rm32 with imm32 based on subop): missing base operand\n"
549 );
550 }
551
552 void test_check_extra_displacement() {
553 Hide_errors = true;
554 run(
555 "== code 0x1\n"
556 "89/copy 0/mod/indirect 0/rm32/EAX 1/r32/ECX 4/disp8\n"
557 );
558 CHECK_TRACE_CONTENTS(
559 "error: '89/copy 0/mod/indirect 0/rm32/EAX 1/r32/ECX 4/disp8' (copy r32 to rm32): unexpected disp8 operand\n"
560 );
561 }
562
563 void test_check_duplicate_operand() {
564 Hide_errors = true;
565 run(
566 "== code 0x1\n"
567 "89/copy 0/mod/indirect 0/rm32/EAX 1/r32/ECX 1/r32\n"
568 );
569 CHECK_TRACE_CONTENTS(
570 "error: '89/copy 0/mod/indirect 0/rm32/EAX 1/r32/ECX 1/r32': duplicate r32 operand\n"
571 );
572 }
573
574 void test_check_base_operand_not_needed_in_direct_mode() {
575 run(
576 "== code 0x1\n"
577 "81 0/add/subop 3/mod/indirect 4/rm32/use-sib 1/imm32\n"
578 );
579 CHECK_TRACE_COUNT("error", 0);
580 }
581
582 void test_extra_modrm() {
583 Hide_errors = true;
584 run(
585 "== code 0x1\n"
586 "59/pop-to-ECX 3/mod/direct 1/rm32/ECX 4/r32/ESP\n"
587 );
588 CHECK_TRACE_CONTENTS(
589 "error: '59/pop-to-ECX 3/mod/direct 1/rm32/ECX 4/r32/ESP' (pop top of stack to ECX): unexpected modrm operand\n"
590 );
591 }
592
593
594
595 void check_operands_0f(const line& inst) {
596 assert(inst.words.at(0).data == "0f");
597 if (SIZE(inst.words) == 1) {
598 raise << "opcode '0f' requires a second opcode\n" << end();
599 return;
600 }
601 word op = preprocess_op(inst.words.at(1));
602 if (!contains_key(Name_0f, op.data)) {
603 raise << "unknown 2-byte opcode '0f " << op.data << "'\n" << end();
604 return;
605 }
606 check_operands_0f(inst, op);
607 }
608
609 void check_operands_f3(const line& ) {
610 raise << "no supported opcodes starting with f3\n" << end();
611 }
612
613 void test_check_missing_disp32_operand() {
614 Hide_errors = true;
615 run(
616 "== code 0x1\n"
617 " 0f 84 # jmp if ZF to ??\n"
618 );
619 CHECK_TRACE_CONTENTS(
620 "error: '0f 84' (jump disp32 bytes away if equal, if ZF is set): missing disp32 operand\n"
621 );
622 }
623
624 :(before "End Globals")
625 map<string, uint8_t> Permitted_operands_0f;
626 :(before "End Init Permitted Operands")
627
628
629
630 put_new(Permitted_operands_0f, "84", 0x10);
631 put_new(Permitted_operands_0f, "85", 0x10);
632 put_new(Permitted_operands_0f, "8c", 0x10);
633 put_new(Permitted_operands_0f, "8d", 0x10);
634 put_new(Permitted_operands_0f, "8e", 0x10);
635 put_new(Permitted_operands_0f, "8f", 0x10);
636
637
638
639
640 put_new(Permitted_operands_0f, "af", 0x01);
641
642 :(code)
643 void check_operands_0f(const line& inst, const word& op) {
644 uint8_t expected_bitvector = get(Permitted_operands_0f, op.data);
645 if (HAS(expected_bitvector, MODRM))
646 check_operands_modrm(inst, op);
647 compare_bitvector_0f(inst, CLEAR(expected_bitvector, MODRM), op);
648 }
649
650 void compare_bitvector_0f(const line& inst, uint8_t expected, const word& op) {
651 if (all_hex_bytes(inst) && has_operands(inst)) return;
652 uint8_t bitvector = compute_expected_operand_bitvector(inst);
653 if (trace_contains_errors()) return;
654 if (bitvector == expected) return;
655 for (int i = 0; i < NUM_OPERAND_TYPES; ++i, bitvector >>= 1, expected >>= 1) {
656
657 if ((bitvector & 0x1) == (expected & 0x1)) continue;
658 const string& optype = Operand_type_name.at(i);
659 if ((bitvector & 0x1) > (expected & 0x1))
660 raise << "'" << to_string(inst) << "'" << maybe_name_0f(op) << ": unexpected " << optype << " operand\n" << end();
661 else
662 raise << "'" << to_string(inst) << "'" << maybe_name_0f(op) << ": missing " << optype << " operand\n" << end();
663
664 }
665
666 }
667
668 string maybe_name_0f(const word& op) {
669 if (!is_hex_byte(op)) return "";
670 if (!contains_key(Name_0f, op.data)) return "";
671
672 const string& s = get(Name_0f, op.data);
673 return " ("+s.substr(0, s.find(" ("))+')';
674 }
675
676 string tolower(const char* s) {
677 ostringstream out;
678 for (; *s; ++s)
679 out << static_cast<char>(tolower(*s));
680 return out.str();
681 }
682
683 #undef HAS
684 #undef SET
685 #undef CLEAR
686
687 :(before "End Includes")
688 #include<cctype>