https://github.com/akkartik/mu/blob/master/subx/031check_operands.cc
1
2
3
4 :(scenario check_missing_imm8_operand)
5 % Hide_errors = true;
6 == 0x1
7 cd
8 +error: 'cd' (software interrupt): missing imm8 operand
9
10 :(before "Pack Operands(segment code)")
11 check_operands(code);
12 if (trace_contains_errors()) return;
13
14 :(code)
15 void check_operands(const segment& code) {
16 trace(99, "transform") << "-- check operands" << end();
17 for (int i = 0; i < SIZE(code.lines); ++i) {
18 check_operands(code.lines.at(i));
19 if (trace_contains_errors()) return;
20 }
21 }
22
23 void check_operands(const line& inst) {
24 word op = preprocess_op(inst.words.at(0));
25 if (op.data == "0f") {
26 check_operands_0f(inst);
27 return;
28 }
29 if (op.data == "f3") {
30 check_operands_f3(inst);
31 return;
32 }
33 check_operands(inst, op);
34 }
35
36 word preprocess_op(word op) {
37 op.data = tolower(op.data.c_str());
38
39 if (starts_with(op.data, "0x"))
40 op.data = op.data.substr(2);
41 if (SIZE(op.data) == 1)
42 op.data = string("0")+op.data;
43 return op;
44 }
45
46 void test_preprocess_op() {
47 word w1; w1.data = "0xf";
48 word w2; w2.data = "0f";
49 CHECK_EQ(preprocess_op(w1).data, preprocess_op(w2).data);
50 }
51
52
53
54
55
56
57
58
59
60
61 :(before "End Types")
62 enum expected_operand_type {
63
64 MODRM,
65 SUBOP,
66 DISP8,
67 DISP16,
68 DISP32,
69 IMM8,
70 IMM32,
71 NUM_OPERAND_TYPES
72 };
73 :(before "End Globals")
74 vector<string> Operand_type_name;
75 map<string, expected_operand_type> Operand_type;
76 :(before "End One-time Setup")
77 init_op_types();
78 :(code)
79 void init_op_types() {
80 assert(NUM_OPERAND_TYPES <= 8);
81 Operand_type_name.resize(NUM_OPERAND_TYPES);
82
83 DEF(MODRM);
84 DEF(SUBOP);
85 DEF(DISP8);
86 DEF(DISP16);
87 DEF(DISP32);
88 DEF(IMM8);
89 DEF(IMM32);
90
91 }
92
93 :(before "End Globals")
94 map<string, uint8_t> Permitted_operands;
95 const uint8_t INVALID_OPERANDS = 0xff;
96 :(before "End One-time Setup")
97 init_permitted_operands();
98 :(code)
99 void init_permitted_operands() {
100
101
102 put(Permitted_operands, "f4", 0x00);
103
104 put(Permitted_operands, "40", 0x00);
105 put(Permitted_operands, "41", 0x00);
106 put(Permitted_operands, "42", 0x00);
107 put(Permitted_operands, "43", 0x00);
108 put(Permitted_operands, "44", 0x00);
109 put(Permitted_operands, "45", 0x00);
110 put(Permitted_operands, "46", 0x00);
111 put(Permitted_operands, "47", 0x00);
112
113 put(Permitted_operands, "48", 0x00);
114 put(Permitted_operands, "49", 0x00);
115 put(Permitted_operands, "4a", 0x00);
116 put(Permitted_operands, "4b", 0x00);
117 put(Permitted_operands, "4c", 0x00);
118 put(Permitted_operands, "4d", 0x00);
119 put(Permitted_operands, "4e", 0x00);
120 put(Permitted_operands, "4f", 0x00);
121
122 put(Permitted_operands, "50", 0x00);
123 put(Permitted_operands, "51", 0x00);
124 put(Permitted_operands, "52", 0x00);
125 put(Permitted_operands, "53", 0x00);
126 put(Permitted_operands, "54", 0x00);
127 put(Permitted_operands, "55", 0x00);
128 put(Permitted_operands, "56", 0x00);
129 put(Permitted_operands, "57", 0x00);
130
131 put(Permitted_operands, "58", 0x00);
132 put(Permitted_operands, "59", 0x00);
133 put(Permitted_operands, "5a", 0x00);
134 put(Permitted_operands, "5b", 0x00);
135 put(Permitted_operands, "5c", 0x00);
136 put(Permitted_operands, "5d", 0x00);
137 put(Permitted_operands, "5e", 0x00);
138 put(Permitted_operands, "5f", 0x00);
139
140 put(Permitted_operands, "c3", 0x00);
141
142
143
144
145
146
147 put(Permitted_operands, "eb", 0x04);
148 put(Permitted_operands, "74", 0x04);
149 put(Permitted_operands, "75", 0x04);
150 put(Permitted_operands, "7c", 0x04);
151 put(Permitted_operands, "7d", 0x04);
152 put(Permitted_operands, "7e", 0x04);
153 put(Permitted_operands, "7f", 0x04);
154
155
156
157
158 put(Permitted_operands, "e8", 0x10);
159 put(Permitted_operands, "e9", 0x10);
160
161
162
163
164 put(Permitted_operands, "cd", 0x20);
165
166
167
168
169 put(Permitted_operands, "05", 0x40);
170 put(Permitted_operands, "2d", 0x40);
171 put(Permitted_operands, "25", 0x40);
172 put(Permitted_operands, "0d", 0x40);
173 put(Permitted_operands, "35", 0x40);
174 put(Permitted_operands, "3d", 0x40);
175 put(Permitted_operands, "68", 0x40);
176
177 put(Permitted_operands, "b8", 0x40);
178 put(Permitted_operands, "b9", 0x40);
179 put(Permitted_operands, "ba", 0x40);
180 put(Permitted_operands, "bb", 0x40);
181 put(Permitted_operands, "bc", 0x40);
182 put(Permitted_operands, "bd", 0x40);
183 put(Permitted_operands, "be", 0x40);
184 put(Permitted_operands, "bf", 0x40);
185
186
187
188
189
190
191 put(Permitted_operands, "01", 0x01);
192 put(Permitted_operands, "03", 0x01);
193
194 put(Permitted_operands, "29", 0x01);
195 put(Permitted_operands, "2b", 0x01);
196
197 put(Permitted_operands, "21", 0x01);
198 put(Permitted_operands, "23", 0x01);
199
200 put(Permitted_operands, "09", 0x01);
201 put(Permitted_operands, "0b", 0x01);
202
203 put(Permitted_operands, "31", 0x01);
204 put(Permitted_operands, "33", 0x01);
205
206 put(Permitted_operands, "39", 0x01);
207 put(Permitted_operands, "3b", 0x01);
208
209 put(Permitted_operands, "88", 0x01);
210 put(Permitted_operands, "89", 0x01);
211 put(Permitted_operands, "8a", 0x01);
212 put(Permitted_operands, "8b", 0x01);
213
214 put(Permitted_operands, "87", 0x01);
215
216 put(Permitted_operands, "8d", 0x01);
217
218 put(Permitted_operands, "8f", 0x01);
219
220
221
222
223 put(Permitted_operands, "d3", 0x03);
224 put(Permitted_operands, "f7", 0x03);
225 put(Permitted_operands, "ff", 0x03);
226
227
228
229
230 put(Permitted_operands, "c1", 0x23);
231 put(Permitted_operands, "c6", 0x23);
232
233
234
235
236 put(Permitted_operands, "81", 0x43);
237 put(Permitted_operands, "c7", 0x43);
238
239
240 }
241
242 :(code)
243
244
245
246
247 void check_operands(const line& inst, const word& op) {
248 if (!is_hex_byte(op)) return;
249 uint8_t expected_bitvector = get(Permitted_operands, op.data);
250 if (HAS(expected_bitvector, MODRM)) {
251 check_operands_modrm(inst, op);
252 compare_bitvector_modrm(inst, expected_bitvector, op);
253 }
254 else {
255 compare_bitvector(inst, expected_bitvector, op);
256 }
257 }
258
259
260
261 void compare_bitvector(const line& inst, uint8_t expected, const word& op) {
262 if (all_hex_bytes(inst) && has_operands(inst)) return;
263 uint8_t bitvector = compute_expected_operand_bitvector(inst);
264 if (trace_contains_errors()) return;
265 if (bitvector == expected) return;
266 for (int i = 0; i < NUM_OPERAND_TYPES; ++i, bitvector >>= 1, expected >>= 1) {
267
268 if ((bitvector & 0x1) == (expected & 0x1)) continue;
269 const string& optype = Operand_type_name.at(i);
270 if ((bitvector & 0x1) > (expected & 0x1))
271 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << optype << " operand\n" << end();
272 else
273 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << optype << " operand\n" << end();
274
275 }
276
277 }
278
279 string maybe_name(const word& op) {
280 if (!is_hex_byte(op)) return "";
281 if (!contains_key(Name, op.data)) return "";
282
283 const string& s = get(Name, op.data);
284 return " ("+s.substr(0, s.find(" ("))+')';
285 }
286
287 uint32_t compute_expected_operand_bitvector(const line& inst) {
288 set<string> operands_found;
289 uint32_t bitvector = 0;
290 for (int i = 1; i < SIZE(inst.words); ++i) {
291 bitvector = bitvector | expected_bit_for_received_operand(inst.words.at(i), operands_found, inst);
292 if (trace_contains_errors()) return INVALID_OPERANDS;
293 }
294 return bitvector;
295 }
296
297 bool has_operands(const line& inst) {
298 return SIZE(inst.words) > first_operand(inst);
299 }
300
301 int first_operand(const line& inst) {
302 if (inst.words.at(0).data == "0f") return 2;
303 if (inst.words.at(0).data == "f2" || inst.words.at(0).data == "f3") {
304 if (inst.words.at(1).data == "0f")
305 return 3;
306 else
307 return 2;
308 }
309 return 1;
310 }
311
312
313
314 uint32_t expected_bit_for_received_operand(const word& w, set<string>& instruction_operands, const line& inst) {
315 uint32_t bv = 0;
316 bool found = false;
317 for (int i = 0; i < SIZE(w.metadata); ++i) {
318 string curr = w.metadata.at(i);
319 string expected_metadata = curr;
320 if (curr == "mod" || curr == "rm32" || curr == "r32" || curr == "scale" || curr == "index" || curr == "base")
321 expected_metadata = "modrm";
322 else if (!contains_key(Operand_type, curr)) continue;
323 if (found) {
324 raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
325 return INVALID_OPERANDS;
326 }
327 if (instruction_operands.find(curr) != instruction_operands.end()) {
328 raise << "'" << to_string(inst) << "': duplicate " << curr << " operand\n" << end();
329 return INVALID_OPERANDS;
330 }
331 instruction_operands.insert(curr);
332 bv = (1 << get(Operand_type, expected_metadata));
333 found = true;
334 }
335 return bv;
336 }
337
338 :(scenario conflicting_operand_type)
339 % Hide_errors = true;
340 == 0x1
341 cd/software-interrupt 80/imm8/imm32
342 +error: '80/imm8/imm32' has conflicting operand types; it should have only one
343
344
345
346
347 :(scenario check_missing_mod_operand)
348 % Hide_errors = true;
349 == 0x1
350 81 0/add/subop 3/rm32/ebx 1/imm32
351 +error: '81 0/add/subop 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing mod operand
352
353 :(code)
354 void check_operands_modrm(const line& inst, const word& op) {
355 if (all_hex_bytes(inst)) return;
356 check_operand_metadata_present(inst, "mod", op);
357 check_operand_metadata_present(inst, "rm32", op);
358
359 if (op.data == "81" || op.data == "8f" || op.data == "ff") {
360 check_operand_metadata_present(inst, "subop", op);
361 check_operand_metadata_absent(inst, "r32", op, "should be replaced by subop");
362 }
363 if (trace_contains_errors()) return;
364 if (metadata(inst, "rm32").data != "4") return;
365
366 uint8_t mod = hex_byte(metadata(inst, "mod").data);
367 if (mod != 3) {
368 check_operand_metadata_present(inst, "base", op);
369 check_operand_metadata_present(inst, "index", op);
370 }
371 else {
372 check_operand_metadata_absent(inst, "base", op, "direct mode");
373 check_operand_metadata_absent(inst, "index", op, "direct mode");
374 }
375
376 }
377
378
379
380 void compare_bitvector_modrm(const line& inst, uint8_t expected, const word& op) {
381 if (all_hex_bytes(inst) && has_operands(inst)) return;
382 uint8_t bitvector = compute_expected_operand_bitvector(inst);
383 if (trace_contains_errors()) return;
384
385 if (has_operand_metadata(inst, "mod")) {
386 int32_t mod = parse_int(metadata(inst, "mod").data);
387 switch (mod) {
388 case 0:
389 if (has_operand_metadata(inst, "rm32") && parse_int(metadata(inst, "rm32").data) == 5)
390 expected |= (1<<DISP32);
391 break;
392 case 1:
393 expected |= (1<<DISP8);
394 break;
395 case 2:
396 expected |= (1<<DISP32);
397 break;
398 }
399 }
400 if (bitvector == expected) return;
401 for (int i = 0; i < NUM_OPERAND_TYPES; ++i, bitvector >>= 1, expected >>= 1) {
402
403 if ((bitvector & 0x1) == (expected & 0x1)) continue;
404 const string& optype = Operand_type_name.at(i);
405 if ((bitvector & 0x1) > (expected & 0x1))
406 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << optype << " operand\n" << end();
407 else
408 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << optype << " operand\n" << end();
409
410 }
411
412 }
413
414 void check_operand_metadata_present(const line& inst, const string& type, const word& op) {
415 if (!has_operand_metadata(inst, type))
416 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << type << " operand\n" << end();
417 }
418
419 void check_operand_metadata_absent(const line& inst, const string& type, const word& op, const string& msg) {
420 if (has_operand_metadata(inst, type))
421 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << type << " operand (" << msg << ")\n" << end();
422 }
423
424 :(scenarios transform)
425 :(scenario modrm_with_displacement)
426 % Reg[EAX].u = 0x1;
427 == 0x1
428
429 8b/copy 1/mod/lookup+disp8 0/rm32/EAX 2/r32/EDX 4/disp8
430 $error: 0
431
432 :(scenario check_missing_disp8)
433 % Hide_errors = true;
434 == 0x1
435 89/copy 1/mod/lookup+disp8 0/rm32/EAX 1/r32/ECX
436 +error: '89/copy 1/mod/lookup+disp8 0/rm32/EAX 1/r32/ECX' (copy r32 to rm32): missing disp8 operand
437
438 :(scenario check_missing_disp32)
439 % Hide_errors = true;
440 == 0x1
441 8b/copy 0/mod/indirect 5/rm32/.disp32 2/r32/EDX
442 +error: '8b/copy 0/mod/indirect 5/rm32/.disp32 2/r32/EDX' (copy rm32 to r32): missing disp32 operand
443 :(scenarios run)
444
445 :(scenario conflicting_operands_in_modrm_instruction)
446 % Hide_errors = true;
447 == 0x1
448 01/add 0/mod 3/mod
449 +error: '01/add 0/mod 3/mod' has conflicting mod operands
450
451 :(scenario conflicting_operand_type_modrm)
452 % Hide_errors = true;
453 == 0x1
454 01/add 0/mod 3/rm32/r32
455 +error: '3/rm32/r32' has conflicting operand types; it should have only one
456
457 :(scenario check_missing_rm32_operand)
458 % Hide_errors = true;
459 == 0x1
460 81 0/add/subop 0/mod 1/imm32
461 +error: '81 0/add/subop 0/mod 1/imm32' (combine rm32 with imm32 based on subop): missing rm32 operand
462
463 :(scenario check_missing_subop_operand)
464 % Hide_errors = true;
465 == 0x1
466 81 0/mod 3/rm32/ebx 1/imm32
467 +error: '81 0/mod 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing subop operand
468
469 :(scenario check_missing_base_operand)
470 % Hide_errors = true;
471 == 0x1
472 81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32
473 +error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32' (combine rm32 with imm32 based on subop): missing base operand
474
475 :(scenario check_missing_index_operand)
476 % Hide_errors = true;
477 == 0x1
478 81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32
479 +error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32' (combine rm32 with imm32 based on subop): missing index operand
480
481 :(scenario check_missing_base_operand_2)
482 % Hide_errors = true;
483 == 0x1
484 81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32
485 +error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32' (combine rm32 with imm32 based on subop): missing base operand
486
487 :(scenario check_extra_displacement)
488 % Hide_errors = true;
489 == 0x1
490 89/copy 0/mod/indirect 0/rm32/EAX 1/r32/ECX 4/disp8
491 +error: '89/copy 0/mod/indirect 0/rm32/EAX 1/r32/ECX 4/disp8' (copy r32 to rm32): unexpected disp8 operand
492
493 :(scenario check_duplicate_operand)
494 % Hide_errors = true;
495 == 0x1
496 89/copy 0/mod/indirect 0/rm32/EAX 1/r32/ECX 1/r32
497 +error: '89/copy 0/mod/indirect 0/rm32/EAX 1/r32/ECX 1/r32': duplicate r32 operand
498
499 :(scenario check_base_operand_not_needed_in_direct_mode)
500 == 0x1
501 81 0/add/subop 3/mod/indirect 4/rm32/use-sib 1/imm32
502 $error: 0
503
504 :(scenario extra_modrm)
505 % Hide_errors = true;
506 == 0x1
507 59/pop-to-ECX 3/mod/direct 1/rm32/ECX 4/r32/ESP
508 +error: '59/pop-to-ECX 3/mod/direct 1/rm32/ECX 4/r32/ESP' (pop top of stack to ECX): unexpected modrm operand
509
510
511
512 :(code)
513 void check_operands_0f(const line& inst) {
514 assert(inst.words.at(0).data == "0f");
515 if (SIZE(inst.words) == 1) {
516 raise << "opcode '0f' requires a second opcode\n" << end();
517 return;
518 }
519 word op = preprocess_op(inst.words.at(1));
520 if (!contains_key(Name_0f, op.data)) {
521 raise << "unknown 2-byte opcode '0f " << op.data << "'\n" << end();
522 return;
523 }
524 check_operands_0f(inst, op);
525 }
526
527 void check_operands_f3(const line& ) {
528 raise << "no supported opcodes starting with f3\n" << end();
529 }
530
531 :(scenario check_missing_disp32_operand)
532 % Hide_errors = true;
533 == 0x1
534
535
536
537 0f 84
538 +error: '0f 84' (jump disp32 bytes away if equal, if ZF is set): missing disp32 operand
539
540 :(before "End Globals")
541 map<string, uint8_t> Permitted_operands_0f;
542 :(before "End Init Permitted Operands")
543
544
545
546 put_new(Permitted_operands_0f, "84", 0x10);
547 put_new(Permitted_operands_0f, "85", 0x10);
548 put_new(Permitted_operands_0f, "8c", 0x10);
549 put_new(Permitted_operands_0f, "8d", 0x10);
550 put_new(Permitted_operands_0f, "8e", 0x10);
551 put_new(Permitted_operands_0f, "8f", 0x10);
552
553
554
555
556 put_new(Permitted_operands_0f, "af", 0x01);
557
558 :(code)
559 void check_operands_0f(const line& inst, const word& op) {
560 uint8_t expected_bitvector = get(Permitted_operands_0f, op.data);
561 if (HAS(expected_bitvector, MODRM))
562 check_operands_modrm(inst, op);
563 compare_bitvector_0f(inst, CLEAR(expected_bitvector, MODRM), op);
564 }
565
566 void compare_bitvector_0f(const line& inst, uint8_t expected, const word& op) {
567 if (all_hex_bytes(inst) && has_operands(inst)) return;
568 uint8_t bitvector = compute_expected_operand_bitvector(inst);
569 if (trace_contains_errors()) return;
570 if (bitvector == expected) return;
571 for (int i = 0; i < NUM_OPERAND_TYPES; ++i, bitvector >>= 1, expected >>= 1) {
572
573 if ((bitvector & 0x1) == (expected & 0x1)) continue;
574 const string& optype = Operand_type_name.at(i);
575 if ((bitvector & 0x1) > (expected & 0x1))
576 raise << "'" << to_string(inst) << "'" << maybe_name_0f(op) << ": unexpected " << optype << " operand\n" << end();
577 else
578 raise << "'" << to_string(inst) << "'" << maybe_name_0f(op) << ": missing " << optype << " operand\n" << end();
579
580 }
581
582 }
583
584 string maybe_name_0f(const word& op) {
585 if (!is_hex_byte(op)) return "";
586 if (!contains_key(Name_0f, op.data)) return "";
587
588 const string& s = get(Name_0f, op.data);
589 return " ("+s.substr(0, s.find(" ("))+')';
590 }
591
592 string tolower(const char* s) {
593 ostringstream out;
594 for (; *s; ++s)
595 out << static_cast<char>(tolower(*s));
596 return out.str();
597 }
598
599
600
601
602
603 :(before "End Includes")
604