https://github.com/akkartik/mu/blob/master/subx/031check_operands.cc
1
2
3
4 :(scenario check_missing_imm8_operand)
5 % Hide_errors = true;
6 == 0x1
7 cd
8 +error: 'cd' (software interrupt): missing imm8 operand
9
10 :(before "Pack Operands(segment code)")
11 check_operands(code);
12 if (trace_contains_errors()) return;
13
14 :(code)
15 void check_operands(const segment& code) {
16 trace(99, "transform") << "-- check operands" << end();
17 for (int i = 0; i < SIZE(code.lines); ++i) {
18 check_operands(code.lines.at(i));
19 if (trace_contains_errors()) return;
20 }
21 }
22
23 void check_operands(const line& inst) {
24 word op = preprocess_op(inst.words.at(0));
25 if (op.data == "0f") {
26 check_operands_0f(inst);
27 return;
28 }
29 if (op.data == "f3") {
30 check_operands_f3(inst);
31 return;
32 }
33 check_operands(inst, op);
34 }
35
36 word preprocess_op(word op) {
37 op.data = tolower(op.data.c_str());
38
39 if (starts_with(op.data, "0x"))
40 op.data = op.data.substr(2);
41 if (SIZE(op.data) == 1)
42 op.data = string("0")+op.data;
43 return op;
44 }
45
46 void test_preprocess_op() {
47 word w1; w1.data = "0xf";
48 word w2; w2.data = "0f";
49 CHECK_EQ(preprocess_op(w1).data, preprocess_op(w2).data);
50 }
51
52
53
54
55
56 :(before "End Types")
57 enum operand_type {
58
59 MODRM,
60 SUBOP,
61 DISP8,
62 DISP16,
63 DISP32,
64 IMM8,
65 IMM32,
66 NUM_OPERAND_TYPES
67 };
68 :(before "End Globals")
69 vector<string> Operand_type_name;
70 map<string, operand_type> Operand_type;
71 :(before "End One-time Setup")
72 init_op_types();
73 :(code)
74 void init_op_types() {
75 assert(NUM_OPERAND_TYPES <= 8);
76 Operand_type_name.resize(NUM_OPERAND_TYPES);
77
78 DEF(MODRM);
79 DEF(SUBOP);
80 DEF(DISP8);
81 DEF(DISP16);
82 DEF(DISP32);
83 DEF(IMM8);
84 DEF(IMM32);
85
86 }
87
88 :(before "End Globals")
89 map<string, uint8_t> Permitted_operands;
90 const uint8_t INVALID_OPERANDS = 0xff;
91 :(before "End One-time Setup")
92 init_permitted_operands();
93 :(code)
94 void init_permitted_operands() {
95
96
97 put(Permitted_operands, "f4", 0x00);
98
99 put(Permitted_operands, "40", 0x00);
100 put(Permitted_operands, "41", 0x00);
101 put(Permitted_operands, "42", 0x00);
102 put(Permitted_operands, "43", 0x00);
103 put(Permitted_operands, "44", 0x00);
104 put(Permitted_operands, "45", 0x00);
105 put(Permitted_operands, "46", 0x00);
106 put(Permitted_operands, "47", 0x00);
107
108 put(Permitted_operands, "48", 0x00);
109 put(Permitted_operands, "49", 0x00);
110 put(Permitted_operands, "4a", 0x00);
111 put(Permitted_operands, "4b", 0x00);
112 put(Permitted_operands, "4c", 0x00);
113 put(Permitted_operands, "4d", 0x00);
114 put(Permitted_operands, "4e", 0x00);
115 put(Permitted_operands, "4f", 0x00);
116
117 put(Permitted_operands, "50", 0x00);
118 put(Permitted_operands, "51", 0x00);
119 put(Permitted_operands, "52", 0x00);
120 put(Permitted_operands, "53", 0x00);
121 put(Permitted_operands, "54", 0x00);
122 put(Permitted_operands, "55", 0x00);
123 put(Permitted_operands, "56", 0x00);
124 put(Permitted_operands, "57", 0x00);
125
126 put(Permitted_operands, "58", 0x00);
127 put(Permitted_operands, "59", 0x00);
128 put(Permitted_operands, "5a", 0x00);
129 put(Permitted_operands, "5b", 0x00);
130 put(Permitted_operands, "5c", 0x00);
131 put(Permitted_operands, "5d", 0x00);
132 put(Permitted_operands, "5e", 0x00);
133 put(Permitted_operands, "5f", 0x00);
134
135 put(Permitted_operands, "c3", 0x00);
136
137
138
139
140
141
142 put(Permitted_operands, "eb", 0x04);
143 put(Permitted_operands, "74", 0x04);
144 put(Permitted_operands, "75", 0x04);
145 put(Permitted_operands, "7c", 0x04);
146 put(Permitted_operands, "7d", 0x04);
147 put(Permitted_operands, "7e", 0x04);
148 put(Permitted_operands, "7f", 0x04);
149
150
151
152
153 put(Permitted_operands, "e8", 0x10);
154 put(Permitted_operands, "e9", 0x10);
155
156
157
158
159 put(Permitted_operands, "cd", 0x20);
160
161
162
163
164 put(Permitted_operands, "05", 0x40);
165 put(Permitted_operands, "2d", 0x40);
166 put(Permitted_operands, "25", 0x40);
167 put(Permitted_operands, "0d", 0x40);
168 put(Permitted_operands, "35", 0x40);
169 put(Permitted_operands, "3d", 0x40);
170 put(Permitted_operands, "68", 0x40);
171
172 put(Permitted_operands, "b8", 0x40);
173 put(Permitted_operands, "b9", 0x40);
174 put(Permitted_operands, "ba", 0x40);
175 put(Permitted_operands, "bb", 0x40);
176 put(Permitted_operands, "bc", 0x40);
177 put(Permitted_operands, "bd", 0x40);
178 put(Permitted_operands, "be", 0x40);
179 put(Permitted_operands, "bf", 0x40);
180
181
182
183
184
185
186 put(Permitted_operands, "01", 0x01);
187 put(Permitted_operands, "03", 0x01);
188
189 put(Permitted_operands, "29", 0x01);
190 put(Permitted_operands, "2b", 0x01);
191
192 put(Permitted_operands, "21", 0x01);
193 put(Permitted_operands, "23", 0x01);
194
195 put(Permitted_operands, "09", 0x01);
196 put(Permitted_operands, "0b", 0x01);
197
198 put(Permitted_operands, "31", 0x01);
199 put(Permitted_operands, "33", 0x01);
200
201 put(Permitted_operands, "39", 0x01);
202 put(Permitted_operands, "3b", 0x01);
203
204 put(Permitted_operands, "88", 0x01);
205 put(Permitted_operands, "89", 0x01);
206 put(Permitted_operands, "8a", 0x01);
207 put(Permitted_operands, "8b", 0x01);
208
209 put(Permitted_operands, "87", 0x01);
210
211 put(Permitted_operands, "8d", 0x01);
212
213 put(Permitted_operands, "8f", 0x01);
214
215
216
217
218 put(Permitted_operands, "d3", 0x03);
219 put(Permitted_operands, "f7", 0x03);
220 put(Permitted_operands, "ff", 0x03);
221
222
223
224
225 put(Permitted_operands, "c1", 0x23);
226
227
228
229
230 put(Permitted_operands, "c7", 0x41);
231
232
233
234
235 put(Permitted_operands, "81", 0x43);
236
237
238 }
239
240 :(code)
241
242
243
244
245 void check_operands(const line& inst, const word& op) {
246 if (!is_hex_byte(op)) return;
247 uint8_t expected_bitvector = get(Permitted_operands, op.data);
248 if (HAS(expected_bitvector, MODRM)) {
249 check_operands_modrm(inst, op);
250 compare_bitvector_modrm(inst, expected_bitvector, op);
251 }
252 else {
253 compare_bitvector(inst, expected_bitvector, op);
254 }
255 }
256
257
258
259 void compare_bitvector(const line& inst, uint8_t expected, const word& op) {
260 if (all_hex_bytes(inst) && has_operands(inst)) return;
261 uint8_t bitvector = compute_operand_bitvector(inst);
262 if (trace_contains_errors()) return;
263 if (bitvector == expected) return;
264 for (int i = 0; i < NUM_OPERAND_TYPES; ++i, bitvector >>= 1, expected >>= 1) {
265
266 if ((bitvector & 0x1) == (expected & 0x1)) continue;
267 const string& optype = Operand_type_name.at(i);
268 if ((bitvector & 0x1) > (expected & 0x1))
269 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << optype << " operand\n" << end();
270 else
271 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << optype << " operand\n" << end();
272
273 }
274
275 }
276
277 string maybe_name(const word& op) {
278 if (!is_hex_byte(op)) return "";
279 if (!contains_key(Name, op.data)) return "";
280
281 const string& s = get(Name, op.data);
282 return " ("+s.substr(0, s.find(" ("))+')';
283 }
284
285 uint32_t compute_operand_bitvector(const line& inst) {
286 uint32_t bitvector = 0;
287 for (int i = 1; i < SIZE(inst.words); ++i) {
288 bitvector = bitvector | bitvector_for_operand(inst.words.at(i));
289 if (trace_contains_errors()) return INVALID_OPERANDS;
290 }
291 return bitvector;
292 }
293
294 bool has_operands(const line& inst) {
295 return SIZE(inst.words) > first_operand(inst);
296 }
297
298 int first_operand(const line& inst) {
299 if (inst.words.at(0).data == "0f") return 2;
300 if (inst.words.at(0).data == "f2" || inst.words.at(0).data == "f3") {
301 if (inst.words.at(1).data == "0f")
302 return 3;
303 else
304 return 2;
305 }
306 return 1;
307 }
308
309
310
311 uint32_t bitvector_for_operand(const word& w) {
312 uint32_t bv = 0;
313 bool found = false;
314 for (int i = 0; i < SIZE(w.metadata); ++i) {
315 const string& curr = w.metadata.at(i);
316 if (!contains_key(Operand_type, curr)) continue;
317 if (found) {
318 raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
319 return INVALID_OPERANDS;
320 }
321 bv = (1 << get(Operand_type, curr));
322 found = true;
323 }
324 return bv;
325 }
326
327 :(scenario conflicting_operand_type)
328 % Hide_errors = true;
329 == 0x1
330 cd/software-interrupt 80/imm8/imm32
331 +error: '80/imm8/imm32' has conflicting operand types; it should have only one
332
333
334
335
336 :(scenario check_missing_mod_operand)
337 % Hide_errors = true;
338 == 0x1
339 81 0/add/subop 3/rm32/ebx 1/imm32
340 +error: '81 0/add/subop 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing mod operand
341
342 :(code)
343 void check_operands_modrm(const line& inst, const word& op) {
344 if (all_hex_bytes(inst)) return;
345 check_operand_metadata_present(inst, "mod", op);
346 check_operand_metadata_present(inst, "rm32", op);
347
348 if (op.data == "81" || op.data == "8f" || op.data == "ff") {
349 check_operand_metadata_present(inst, "subop", op);
350 check_operand_metadata_absent(inst, "r32", op, "should be replaced by subop");
351 }
352 if (trace_contains_errors()) return;
353 if (metadata(inst, "rm32").data != "4") return;
354
355 uint8_t mod = hex_byte(metadata(inst, "mod").data);
356 if (mod != 3) {
357 check_operand_metadata_present(inst, "base", op);
358 check_operand_metadata_present(inst, "index", op);
359 }
360 else {
361 check_operand_metadata_absent(inst, "base", op, "direct mode");
362 check_operand_metadata_absent(inst, "index", op, "direct mode");
363 }
364
365 }
366
367
368
369
370 void compare_bitvector_modrm(const line& inst, uint8_t expected, const word& op) {
371 if (all_hex_bytes(inst) && has_operands(inst)) return;
372 uint8_t bitvector = compute_operand_bitvector(inst);
373 if (trace_contains_errors()) return;
374 expected = CLEAR(expected, MODRM);
375 if (bitvector == expected) return;
376 for (int i = 0; i < NUM_OPERAND_TYPES; ++i, bitvector >>= 1, expected >>= 1) {
377
378 if ((bitvector & 0x1) == (expected & 0x1)) continue;
379 const string& optype = Operand_type_name.at(i);
380 if (i == DISP8) {
381 int32_t mod = parse_int(metadata(inst, "mod").data);
382 if (mod != 1)
383 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << optype << " operand\n" << end();
384 continue;
385 }
386 if (i == DISP32) {
387 int32_t mod = parse_int(metadata(inst, "mod").data);
388 int32_t rm32 = parse_int(metadata(inst, "rm32").data);
389 if (mod == 0 && rm32 == 5)
390 ;
391 else if (mod != 2)
392 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << optype << " operand\n" << end();
393 continue;
394 }
395 if ((bitvector & 0x1) > (expected & 0x1))
396 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << optype << " operand\n" << end();
397 else
398 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << optype << " operand\n" << end();
399
400 }
401
402 }
403
404 void check_operand_metadata_present(const line& inst, const string& type, const word& op) {
405 if (!has_operand_metadata(inst, type))
406 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << type << " operand\n" << end();
407 }
408
409 void check_operand_metadata_absent(const line& inst, const string& type, const word& op, const string& msg) {
410 if (has_operand_metadata(inst, type))
411 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << type << " operand (" << msg << ")\n" << end();
412 }
413
414 :(scenarios transform)
415 :(scenario modrm_with_displacement)
416 % Reg[EAX].u = 0x1;
417 == 0x1
418
419 8b/copy 1/mod/lookup+disp8 0/rm32/EAX 2/r32/EDX 4/disp8
420 $error: 0
421 :(scenarios run)
422
423 :(scenario conflicting_operands_in_modrm_instruction)
424 % Hide_errors = true;
425 == 0x1
426 01/add 0/mod 3/mod
427 +error: '01/add 0/mod 3/mod' has conflicting mod operands
428
429 :(scenario conflicting_operand_type_modrm)
430 % Hide_errors = true;
431 == 0x1
432 01/add 0/mod 3/rm32/r32
433 +error: '3/rm32/r32' has conflicting operand types; it should have only one
434
435 :(scenario check_missing_rm32_operand)
436 % Hide_errors = true;
437 == 0x1
438 81 0/add/subop 0/mod 1/imm32
439 +error: '81 0/add/subop 0/mod 1/imm32' (combine rm32 with imm32 based on subop): missing rm32 operand
440
441 :(scenario check_missing_subop_operand)
442 % Hide_errors = true;
443 == 0x1
444 81 0/mod 3/rm32/ebx 1/imm32
445 +error: '81 0/mod 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing subop operand
446
447 :(scenario check_missing_base_operand)
448 % Hide_errors = true;
449 == 0x1
450 81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32
451 +error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32' (combine rm32 with imm32 based on subop): missing base operand
452
453 :(scenario check_missing_index_operand)
454 % Hide_errors = true;
455 == 0x1
456 81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32
457 +error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32' (combine rm32 with imm32 based on subop): missing index operand
458
459 :(scenario check_missing_base_operand_2)
460 % Hide_errors = true;
461 == 0x1
462 81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32
463 +error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32' (combine rm32 with imm32 based on subop): missing base operand
464
465 :(scenario check_extra_displacement)
466 % Hide_errors = true;
467 == 0x1
468 89/copy 0/mod/indirect 0/rm32/EAX 1/r32/ECX 4/disp8
469 +error: '89/copy 0/mod/indirect 0/rm32/EAX 1/r32/ECX 4/disp8' (copy r32 to rm32): unexpected disp8 operand
470
471 :(scenario check_base_operand_not_needed_in_direct_mode)
472 == 0x1
473 81 0/add/subop 3/mod/indirect 4/rm32/use-sib 1/imm32
474 $error: 0
475
476
477
478 :(code)
479 void check_operands_0f(const line& inst) {
480 assert(inst.words.at(0).data == "0f");
481 if (SIZE(inst.words) == 1) {
482 raise << "opcode '0f' requires a second opcode\n" << end();
483 return;
484 }
485 word op = preprocess_op(inst.words.at(1));
486 if (!contains_key(Name_0f, op.data)) {
487 raise << "unknown 2-byte opcode '0f " << op.data << "'\n" << end();
488 return;
489 }
490 check_operands_0f(inst, op);
491 }
492
493 void check_operands_f3(const line& ) {
494 raise << "no supported opcodes starting with f3\n" << end();
495 }
496
497 :(scenario check_missing_disp32_operand)
498 % Hide_errors = true;
499 == 0x1
500
501
502
503 0f 84
504 +error: '0f 84' (jump disp32 bytes away if equal, if ZF is set): missing disp32 operand
505
506 :(before "End Globals")
507 map<string, uint8_t> Permitted_operands_0f;
508 :(before "End Init Permitted Operands")
509
510
511
512 put_new(Permitted_operands_0f, "84", 0x10);
513 put_new(Permitted_operands_0f, "85", 0x10);
514 put_new(Permitted_operands_0f, "8c", 0x10);
515 put_new(Permitted_operands_0f, "8d", 0x10);
516 put_new(Permitted_operands_0f, "8e", 0x10);
517 put_new(Permitted_operands_0f, "8f", 0x10);
518
519
520
521
522 put_new(Permitted_operands_0f, "af", 0x01);
523
524 :(code)
525 void check_operands_0f(const line& inst, const word& op) {
526 uint8_t expected_bitvector = get(Permitted_operands_0f, op.data);
527 if (HAS(expected_bitvector, MODRM))
528 check_operands_modrm(inst, op);
529 compare_bitvector_0f(inst, CLEAR(expected_bitvector, MODRM), op);
530 }
531
532 void compare_bitvector_0f(const line& inst, uint8_t expected, const word& op) {
533 if (all_hex_bytes(inst) && has_operands(inst)) return;
534 uint8_t bitvector = compute_operand_bitvector(inst);
535 if (trace_contains_errors()) return;
536 if (bitvector == expected) return;
537 for (int i = 0; i < NUM_OPERAND_TYPES; ++i, bitvector >>= 1, expected >>= 1) {
538
539 if ((bitvector & 0x1) == (expected & 0x1)) continue;
540 const string& optype = Operand_type_name.at(i);
541 if ((bitvector & 0x1) > (expected & 0x1))
542 raise << "'" << to_string(inst) << "'" << maybe_name_0f(op) << ": unexpected " << optype << " operand\n" << end();
543 else
544 raise << "'" << to_string(inst) << "'" << maybe_name_0f(op) << ": missing " << optype << " operand\n" << end();
545
546 }
547
548 }
549
550 string maybe_name_0f(const word& op) {
551 if (!is_hex_byte(op)) return "";
552 if (!contains_key(Name_0f, op.data)) return "";
553
554 const string& s = get(Name_0f, op.data);
555 return " ("+s.substr(0, s.find(" ("))+')';
556 }
557
558 string tolower(const char* s) {
559 ostringstream out;
560 for (; *s; ++s)
561 out << static_cast<char>(tolower(*s));
562 return out.str();
563 }
564
565
566
567
568
569 :(before "End Includes")
570