https://github.com/akkartik/mu/blob/master/subx/031check_operands.cc
  1 //: Since we're tagging operands with their types, let's start checking these
  2 //: operand types for each instruction.
  3 
  4 void test_check_missing_imm8_operand() {
  5   Hide_errors = true;
  6   run(
  7       "== code 0x1\n"
  8       "cd\n"  // interrupt ??
  9   );
 10   CHECK_TRACE_CONTENTS(
 11       "error: 'cd' (software interrupt): missing imm8 operand\n"
 12   );
 13 }
 14 
 15 :(before "Pack Operands(segment code)")
 16 check_operands(code);
 17 if (trace_contains_errors()) return;
 18 
 19 :(code)
 20 void check_operands(const segment& code) {
 21   trace(3, "transform") << "-- check operands" << end();
 22   for (int i = 0;  i < SIZE(code.lines);  ++i) {
 23     check_operands(code.lines.at(i));
 24     if (trace_contains_errors()) return;  // stop at the first mal-formed instruction
 25   }
 26 }
 27 
 28 void check_operands(const line& inst) {
 29   word op = preprocess_op(inst.words.at(0));
 30   if (op.data == "0f") {
 31     check_operands_0f(inst);
 32     return;
 33   }
 34   if (op.data == "f3") {
 35     check_operands_f3(inst);
 36     return;
 37   }
 38   check_operands(inst, op);
 39 }
 40 
 41 word preprocess_op(word/*copy*/ op) {
 42   op.data = tolower(op.data.c_str());
 43   // opcodes can't be negative
 44   if (starts_with(op.data, "0x"))
 45     op.data = op.data.substr(2);
 46   if (SIZE(op.data) == 1)
 47     op.data = string("0")+op.data;
 48   return op;
 49 }
 50 
 51 void test_preprocess_op() {
 52   word w1;  w1.data = "0xf";
 53   word w2;  w2.data = "0f";
 54   CHECK_EQ(preprocess_op(w1).data, preprocess_op(w2).data);
 55 }
 56 
 57 //: To check the operands for an opcode, we'll track the permitted operands
 58 //: for each supported opcode in a bitvector. That way we can often compute the
 59 //: 'received' operand bitvector for each instruction's operands and compare
 60 //: it with the 'expected' bitvector.
 61 //:
 62 //: The 'expected' and 'received' bitvectors can be different; the MODRM bit
 63 //: in the 'expected' bitvector maps to multiple 'received' operand types in
 64 //: an instruction. We deal in expected bitvectors throughout.
 65 
 66 :(before "End Types")
 67 enum expected_operand_type {
 68   // start from the least significant bit
 69   MODRM,  // more complex, may also involve disp8 or disp32
 70   SUBOP,
 71   DISP8,
 72   DISP16,
 73   DISP32,
 74   IMM8,
 75   IMM32,
 76   NUM_OPERAND_TYPES
 77 };
 78 :(before "End Globals")
 79 vector<string> Operand_type_name;
 80 map<string, expected_operand_type> Operand_type;
 81 :(before "End One-time Setup")
 82 init_op_types();
 83 :(code)
 84 void init_op_types() {
 85   assert(NUM_OPERAND_TYPES <= /*bits in a uint8_t*/8);
 86   Operand_type_name.resize(NUM_OPERAND_TYPES);
 87   #define DEF(type) Operand_type_name.at(type) = tolower(#type), put(Operand_type, tolower(#type), type);
 88   DEF(MODRM);
 89   DEF(SUBOP);
 90   DEF(DISP8);
 91   DEF(DISP16);
 92   DEF(DISP32);
 93   DEF(IMM8);
 94   DEF(IMM32);
 95   #undef DEF
 96 }
 97 
 98 :(before "End Globals")
 99 map</*op*/string, /*bitvector*/uint8_t> Permitted_operands;
100 const uint8_t INVALID_OPERANDS = 0xff;  // no instruction uses all the operand types
101 :(before "End One-time Setup")
102 init_permitted_operands();
103 :(code)
104 void init_permitted_operands() {
105   //// Class A: just op, no operands
106   // halt
107   put(Permitted_operands, "f4", 0x00);
108   // inc
109   put(Permitted_operands, "40", 0x00);
110   put(Permitted_operands, "41", 0x00);
111   put(Permitted_operands, "42", 0x00);
112   put(Permitted_operands, "43", 0x00);
113   put(Permitted_operands, "44", 0x00);
114   put(Permitted_operands, "45", 0x00);
115   put(Permitted_operands, "46", 0x00);
116   put(Permitted_operands, "47", 0x00);
117   // dec
118   put(Permitted_operands, "48", 0x00);
119   put(Permitted_operands, "49", 0x00);
120   put(Permitted_operands, "4a", 0x00);
121   put(Permitted_operands, "4b", 0x00);
122   put(Permitted_operands, "4c", 0x00);
123   put(Permitted_operands, "4d", 0x00);
124   put(Permitted_operands, "4e", 0x00);
125   put(Permitted_operands, "4f", 0x00);
126   // push
127   put(Permitted_operands, "50", 0x00);
128   put(Permitted_operands, "51", 0x00);
129   put(Permitted_operands, "52", 0x00);
130   put(Permitted_operands, "53", 0x00);
131   put(Permitted_operands, "54", 0x00);
132   put(Permitted_operands, "55", 0x00);
133   put(Permitted_operands, "56", 0x00);
134   put(Permitted_operands, "57", 0x00);
135   // pop
136   put(Permitted_operands, "58", 0x00);
137   put(Permitted_operands, "59", 0x00);
138   put(Permitted_operands, "5a", 0x00);
139   put(Permitted_operands, "5b", 0x00);
140   put(Permitted_operands, "5c", 0x00);
141   put(Permitted_operands, "5d", 0x00);
142   put(Permitted_operands, "5e", 0x00);
143   put(Permitted_operands, "5f", 0x00);
144   // sign-extend EAX into EDX
145   put(Permitted_operands, "99", 0x00);
146   // return
147   put(Permitted_operands, "c3", 0x00);
148 
149   //// Class B: just op and disp8
150   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
151   //  0     0     0      |0       1     0     0
152 
153   // jump
154   put(Permitted_operands, "eb", 0x04);
155   put(Permitted_operands, "72", 0x04);
156   put(Permitted_operands, "73", 0x04);
157   put(Permitted_operands, "74", 0x04);
158   put(Permitted_operands, "75", 0x04);
159   put(Permitted_operands, "76", 0x04);
160   put(Permitted_operands, "77", 0x04);
161   put(Permitted_operands, "7c", 0x04);
162   put(Permitted_operands, "7d", 0x04);
163   put(Permitted_operands, "7e", 0x04);
164   put(Permitted_operands, "7f", 0x04);
165 
166   //// Class D: just op and disp32
167   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
168   //  0     0     1      |0       0     0     0
169   put(Permitted_operands, "e8", 0x10);  // call
170   put(Permitted_operands, "e9", 0x10);  // jump
171 
172   //// Class E: just op and imm8
173   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
174   //  0     1     0      |0       0     0     0
175   put(Permitted_operands, "cd", 0x20);  // software interrupt
176 
177   //// Class F: just op and imm32
178   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
179   //  1     0     0      |0       0     0     0
180   put(Permitted_operands, "05", 0x40);  // add
181   put(Permitted_operands, "2d", 0x40);  // subtract
182   put(Permitted_operands, "25", 0x40);  // and
183   put(Permitted_operands, "0d", 0x40);  // or
184   put(Permitted_operands, "35", 0x40);  // xor
185   put(Permitted_operands, "3d", 0x40);  // compare
186   put(Permitted_operands, "68", 0x40);  // push
187   // copy
188   put(Permitted_operands, "b8", 0x40);
189   put(Permitted_operands, "b9", 0x40);
190   put(Permitted_operands, "ba", 0x40);
191   put(Permitted_operands, "bb", 0x40);
192   put(Permitted_operands, "bc", 0x40);
193   put(Permitted_operands, "bd", 0x40);
194   put(Permitted_operands, "be", 0x40);
195   put(Permitted_operands, "bf", 0x40);
196 
197   //// Class M: using ModR/M byte
198   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
199   //  0     0     0      |0       0     0     1
200 
201   // add
202   put(Permitted_operands, "01", 0x01);
203   put(Permitted_operands, "03", 0x01);
204   // subtract
205   put(Permitted_operands, "29", 0x01);
206   put(Permitted_operands, "2b", 0x01);
207   // and
208   put(Permitted_operands, "21", 0x01);
209   put(Permitted_operands, "23", 0x01);
210   // or
211   put(Permitted_operands, "09", 0x01);
212   put(Permitted_operands, "0b", 0x01);
213   // xor
214   put(Permitted_operands, "31", 0x01);
215   put(Permitted_operands, "33", 0x01);
216   // compare
217   put(Permitted_operands, "39", 0x01);
218   put(Permitted_operands, "3b", 0x01);
219   // copy
220   put(Permitted_operands, "88", 0x01);
221   put(Permitted_operands, "89", 0x01);
222   put(Permitted_operands, "8a", 0x01);
223   put(Permitted_operands, "8b", 0x01);
224   // swap
225   put(Permitted_operands, "87", 0x01);
226   // copy address (lea)
227   put(Permitted_operands, "8d", 0x01);
228   // pop
229   put(Permitted_operands, "8f", 0x01);
230 
231   //// Class N: op, ModR/M and subop (not r32)
232   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
233   //  0     0     0      |0       0     1     1
234   put(Permitted_operands, "d3", 0x03);  // shift
235   put(Permitted_operands, "f7", 0x03);  // test/not/mul/div
236   put(Permitted_operands, "ff", 0x03);  // jump/push/call
237 
238   //// Class O: op, ModR/M, subop (not r32) and imm8
239   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
240   //  0     1     0      |0       0     1     1
241   put(Permitted_operands, "c1", 0x23);  // combine
242   put(Permitted_operands, "c6", 0x23);  // copy
243 
244   //// Class P: op, ModR/M, subop (not r32) and imm32
245   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
246   //  1     0     0      |0       0     1     1
247   put(Permitted_operands, "81", 0x43);  // combine
248   put(Permitted_operands, "c7", 0x43);  // copy
249 
250   // End Init Permitted Operands
251 }
252 
253 #define HAS(bitvector, bit)  ((bitvector) & (1 << (bit)))
254 #define SET(bitvector, bit)  ((bitvector) | (1 << (bit)))
255 #define CLEAR(bitvector, bit)  ((bitvector) & (~(1 << (bit))))
256 
257 void check_operands(const line& inst, const word& op) {
258   if (!is_hex_byte(op)) return;
259   uint8_t expected_bitvector = get(Permitted_operands, op.data);
260   if (HAS(expected_bitvector, MODRM)) {
261     check_operands_modrm(inst, op);
262     compare_bitvector_modrm(inst, expected_bitvector, op);
263   }
264   else {
265     compare_bitvector(inst, expected_bitvector, op);
266   }
267 }
268 
269 //: Many instructions can be checked just by comparing bitvectors.
270 
271 void compare_bitvector(const line& inst, uint8_t expected, const word& op) {
272   if (all_hex_bytes(inst) && has_operands(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
273   uint8_t bitvector = compute_expected_operand_bitvector(inst);
274   if (trace_contains_errors()) return;  // duplicate operand type
275   if (bitvector == expected) return;  // all good with this instruction
276   for (int i = 0;  i < NUM_OPERAND_TYPES;  ++i, bitvector >>= 1, expected >>= 1) {
277 //?     cerr << "comparing " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n';
278     if ((bitvector & 0x1) == (expected & 0x1)) continue;  // all good with this operand
279     const string& optype = Operand_type_name.at(i);
280     if ((bitvector & 0x1) > (expected & 0x1))
281       raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << optype << " operand\n" << end();
282     else
283       raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << optype << " operand\n" << end();
284     // continue giving all errors for a single instruction
285   }
286   // ignore settings in any unused bits
287 }
288 
289 string maybe_name(const word& op) {
290   if (!is_hex_byte(op)) return "";
291   if (!contains_key(Name, op.data)) return "";
292   // strip stuff in parens from the name
293   const string& s = get(Name, op.data);
294   return " ("+s.substr(0, s.find(" ("))+')';
295 }
296 
297 uint32_t compute_expected_operand_bitvector(const line& inst) {
298   set<string> operands_found;
299   uint32_t bitvector = 0;
300   for (int i = /*skip op*/1;  i < SIZE(inst.words);  ++i) {
301     bitvector = bitvector | expected_bit_for_received_operand(inst.words.at(i), operands_found, inst);
302     if (trace_contains_errors()) return INVALID_OPERANDS;  // duplicate operand type
303   }
304   return bitvector;
305 }
306 
307 bool has_operands(const line& inst) {
308   return SIZE(inst.words) > first_operand(inst);
309 }
310 
311 int first_operand(const line& inst) {
312   if (inst.words.at(0).data == "0f") return 2;
313   if (inst.words.at(0).data == "f2" || inst.words.at(0).data == "f3") {
314     if (inst.words.at(1).data == "0f")
315       return 3;
316     else
317       return 2;
318   }
319   return 1;
320 }
321 
322 // Scan the metadata of 'w' and return the expected bit corresponding to any operand type.
323 // Also raise an error if metadata contains multiple operand types.
324 uint32_t expected_bit_for_received_operand(const word& w, set<string>& instruction_operands, const line& inst) {
325   uint32_t bv = 0;
326   bool found = false;
327   for (int i = 0;  i < SIZE(w.metadata);  ++i) {
328     string/*copy*/ curr = w.metadata.at(i);
329     string expected_metadata = curr;
330     if (curr == "mod" || curr == "rm32" || curr == "r32" || curr == "scale" || curr == "index" || curr == "base")
331       expected_metadata = "modrm";
332     else if (!contains_key(Operand_type, curr)) continue;  // ignore unrecognized metadata
333     if (found) {
334       raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
335       return INVALID_OPERANDS;
336     }
337     if (instruction_operands.find(curr) != instruction_operands.end()) {
338       raise << "'" << to_string(inst) << "': duplicate " << curr << " operand\n" << end();
339       return INVALID_OPERANDS;
340     }
341     instruction_operands.insert(curr);
342     bv = (1 << get(Operand_type, expected_metadata));
343     found = true;
344   }
345   return bv;
346 }
347 
348 void test_conflicting_operand_type() {
349   Hide_errors = true;
350   run(
351       "== code 0x1\n"
352       "cd/software-interrupt 80/imm8/imm32\n"
353   );
354   CHECK_TRACE_CONTENTS(
355       "error: '80/imm8/imm32' has conflicting operand types; it should have only one\n"
356   );
357 }
358 
359 //: Instructions computing effective addresses have more complex rules, so
360 //: we'll hard-code a common set of instruction-decoding rules.
361 
362 void test_check_missing_mod_operand() {
363   Hide_errors = true;
364   run(
365       "== code 0x1\n"
366       "81 0/add/subop       3/rm32/ebx 1/imm32\n"
367   );
368   CHECK_TRACE_CONTENTS(
369       "error: '81 0/add/subop 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing mod operand\n"
370   );
371 }
372 
373 void check_operands_modrm(const line& inst, const word& op) {
374   if (all_hex_bytes(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
375   check_operand_metadata_present(inst, "mod", op);
376   check_operand_metadata_present(inst, "rm32", op);
377   // no check for r32; some instructions don't use it; just assume it's 0 if missing
378   if (op.data == "81" || op.data == "8f" || op.data == "ff") {  // keep sync'd with 'help subop'
379     check_operand_metadata_present(inst, "subop", op);
380     check_operand_metadata_absent(inst, "r32", op, "should be replaced by subop");
381   }
382   if (trace_contains_errors()) return;
383   if (metadata(inst, "rm32").data != "4") return;
384   // SIB byte checks
385   uint8_t mod = hex_byte(metadata(inst, "mod").data);
386   if (mod != /*direct*/3) {
387     check_operand_metadata_present(inst, "base", op);
388     check_operand_metadata_present(inst, "index", op);  // otherwise why go to SIB?
389   }
390   else {
391     check_operand_metadata_absent(inst, "base", op, "direct mode");
392     check_operand_metadata_absent(inst, "index", op, "direct mode");
393   }
394   // no check for scale; 0 (2**0 = 1) by default
395 }
396 
397 // same as compare_bitvector, with one additional exception for modrm-based
398 // instructions: they may use an extra displacement on occasion
399 void compare_bitvector_modrm(const line& inst, uint8_t expected, const word& op) {
400   if (all_hex_bytes(inst) && has_operands(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
401   uint8_t bitvector = compute_expected_operand_bitvector(inst);
402   if (trace_contains_errors()) return;  // duplicate operand type
403   // update 'expected' bitvector for the additional exception
404   if (has_operand_metadata(inst, "mod")) {
405     int32_t mod = parse_int(metadata(inst, "mod").data);
406     switch (mod) {
407     case 0:
408       if (has_operand_metadata(inst, "rm32") && parse_int(metadata(inst, "rm32").data) == 5)
409         expected |= (1<<DISP32);
410       break;
411     case 1:
412       expected |= (1<<DISP8);
413       break;
414     case 2:
415       expected |= (1<<DISP32);
416       break;
417     }
418   }
419   if (bitvector == expected) return;  // all good with this instruction
420   for (int i = 0;  i < NUM_OPERAND_TYPES;  ++i, bitvector >>= 1, expected >>= 1) {
421 //?     cerr << "comparing for modrm " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n';
422     if ((bitvector & 0x1) == (expected & 0x1)) continue;  // all good with this operand
423     const string& optype = Operand_type_name.at(i);
424     if ((bitvector & 0x1) > (expected & 0x1))
425       raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << optype << " operand\n" << end();
426     else
427       raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << optype << " operand\n" << end();
428     // continue giving all errors for a single instruction
429   }
430   // ignore settings in any unused bits
431 }
432 
433 void check_operand_metadata_present(const line& inst, const string& type, const word& op) {
434   if (!has_operand_metadata(inst, type))
435     raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << type << " operand\n" << end();
436 }
437 
438 void check_operand_metadata_absent(const line& inst, const string& type, const word& op, const string& msg) {
439   if (has_operand_metadata(inst, type))
440     raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << type << " operand (" << msg << ")\n" << end();
441 }
442 
443 void test_modrm_with_displacement() {
444   Reg[EAX].u = 0x1;
445   transform(
446       "== code 0x1\n"
447       // just avoid null pointer
448       "8b/copy 1/mod/lookup+disp8 0/rm32/EAX 2/r32/EDX 4/disp8\n"  // copy *(EAX+4) to EDX
449   );
450   CHECK_TRACE_COUNT("error", 0);
451 }
452 
453 void test_check_missing_disp8() {
454   Hide_errors = true;
455   transform(
456       "== code 0x1\n"
457       "89/copy 1/mod/lookup+disp8 0/rm32/EAX 1/r32/ECX\n"  // missing disp8
458   );
459   CHECK_TRACE_CONTENTS(
460       "error: '89/copy 1/mod/lookup+disp8 0/rm32/EAX 1/r32/ECX' (copy r32 to rm32): missing disp8 operand\n"
461   );
462 }
463 
464 void test_check_missing_disp32() {
465   Hide_errors = true;
466   transform(
467       "== code 0x1\n"
468       "8b/copy 0/mod/indirect 5/rm32/.disp32 2/r32/EDX\n"  // missing disp32
469   );
470   CHECK_TRACE_CONTENTS(
471       "error: '8b/copy 0/mod/indirect 5/rm32/.disp32 2/r32/EDX' (copy rm32 to r32): missing disp32 operand\n"
472   );
473 }
474 
475 void test_conflicting_operands_in_modrm_instruction() {
476   Hide_errors = true;
477   run(
478       "== code 0x1\n"
479       "01/add 0/mod 3/mod\n"
480   );
481   CHECK_TRACE_CONTENTS(
482       "error: '01/add 0/mod 3/mod' has conflicting mod operands\n"
483   );
484 }
485 
486 void test_conflicting_operand_type_modrm() {
487   Hide_errors = true;
488   run(
489       "== code 0x1\n"
490       "01/add 0/mod 3/rm32/r32\n"
491   );
492   CHECK_TRACE_CONTENTS(
493       "error: '3/rm32/r32' has conflicting operand types; it should have only one\n"
494   );
495 }
496 
497 void test_check_missing_rm32_operand() {
498   Hide_errors = true;
499   run(
500       "== code 0x1\n"
501       "81 0/add/subop 0/mod            1/imm32\n"
502   );
503   CHECK_TRACE_CONTENTS(
504       "error: '81 0/add/subop 0/mod 1/imm32' (combine rm32 with imm32 based on subop): missing rm32 operand\n"
505   );
506 }
507 
508 void test_check_missing_subop_operand() {
509   Hide_errors = true;
510   run(
511       "== code 0x1\n"
512       "81             0/mod 3/rm32/ebx 1/imm32\n"
513   );
514   CHECK_TRACE_CONTENTS(
515       "error: '81 0/mod 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing subop operand\n"
516   );
517 }
518 
519 void test_check_missing_base_operand() {
520   Hide_errors = true;
521   run(
522       "== code 0x1\n"
523       "81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32\n"
524   );
525   CHECK_TRACE_CONTENTS(
526       "error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32' (combine rm32 with imm32 based on subop): missing base operand\n"
527   );
528 }
529 
530 void test_check_missing_index_operand() {
531   Hide_errors = true;
532   run(
533       "== code 0x1\n"
534       "81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32\n"
535   );
536   CHECK_TRACE_CONTENTS(
537       "error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32' (combine rm32 with imm32 based on subop): missing index operand\n"
538   );
539 }
540 
541 void test_check_missing_base_operand_2() {
542   Hide_errors = true;
543   run(
544       "== code 0x1\n"
545       "81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32\n"
546   );
547   CHECK_TRACE_CONTENTS(
548       "error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32' (combine rm32 with imm32 based on subop): missing base operand\n"
549   );
550 }
551 
552 void test_check_extra_displacement() {
553   Hide_errors = true;
554   run(
555       "== code 0x1\n"
556       "89/copy 0/mod/indirect 0/rm32/EAX 1/r32/ECX 4/disp8\n"
557   );
558   CHECK_TRACE_CONTENTS(
559       "error: '89/copy 0/mod/indirect 0/rm32/EAX 1/r32/ECX 4/disp8' (copy r32 to rm32): unexpected disp8 operand\n"
560   );
561 }
562 
563 void test_check_duplicate_operand() {
564   Hide_errors = true;
565   run(
566       "== code 0x1\n"
567       "89/copy 0/mod/indirect 0/rm32/EAX 1/r32/ECX 1/r32\n"
568   );
569   CHECK_TRACE_CONTENTS(
570       "error: '89/copy 0/mod/indirect 0/rm32/EAX 1/r32/ECX 1/r32': duplicate r32 operand\n"
571   );
572 }
573 
574 void test_check_base_operand_not_needed_in_direct_mode() {
575   run(
576       "== code 0x1\n"
577       "81 0/add/subop 3/mod/indirect 4/rm32/use-sib 1/imm32\n"
578   );
579   CHECK_TRACE_COUNT("error", 0);
580 }
581 
582 void test_extra_modrm() {
583   Hide_errors = true;
584   run(
585       "== code 0x1\n"
586       "59/pop-to-ECX  3/mod/direct 1/rm32/ECX 4/r32/ESP\n"
587   );
588   CHECK_TRACE_CONTENTS(
589       "error: '59/pop-to-ECX 3/mod/direct 1/rm32/ECX 4/r32/ESP' (pop top of stack to ECX): unexpected modrm operand\n"
590   );
591 }
592 
593 //:: similarly handle multi-byte opcodes
594 
595 void check_operands_0f(const line& inst) {
596   assert(inst.words.at(0).data == "0f");
597   if (SIZE(inst.words) == 1) {
598     raise << "opcode '0f' requires a second opcode\n" << end();
599     return;
600   }
601   word op = preprocess_op(inst.words.at(1));
602   if (!contains_key(Name_0f, op.data)) {
603     raise << "unknown 2-byte opcode '0f " << op.data << "'\n" << end();
604     return;
605   }
606   check_operands_0f(inst, op);
607 }
608 
609 void check_operands_f3(const line& /*unused*/) {
610   raise << "no supported opcodes starting with f3\n" << end();
611 }
612 
613 void test_check_missing_disp32_operand() {
614   Hide_errors = true;
615   run(
616       "== code 0x1\n"
617       "  0f 84  # jmp if ZF to ??\n"
618   );
619   CHECK_TRACE_CONTENTS(
620       "error: '0f 84' (jump disp32 bytes away if equal, if ZF is set): missing disp32 operand\n"
621   );
622 }
623 
624 :(before "End Globals")
625 map</*op*/string, /*bitvector*/uint8_t> Permitted_operands_0f;
626 :(before "End Init Permitted Operands")
627 //// Class D: just op and disp32
628 //  imm32 imm8  disp32 |disp16  disp8 subop modrm
629 //  0     0     1      |0       0     0     0
630 put_new(Permitted_operands_0f, "84", 0x10);
631 put_new(Permitted_operands_0f, "85", 0x10);
632 put_new(Permitted_operands_0f, "8c", 0x10);
633 put_new(Permitted_operands_0f, "8d", 0x10);
634 put_new(Permitted_operands_0f, "8e", 0x10);
635 put_new(Permitted_operands_0f, "8f", 0x10);
636 
637 //// Class M: using ModR/M byte
638 //  imm32 imm8  disp32 |disp16  disp8 subop modrm
639 //  0     0     0      |0       0     0     1
640 put_new(Permitted_operands_0f, "af", 0x01);
641 
642 :(code)
643 void check_operands_0f(const line& inst, const word& op) {
644   uint8_t expected_bitvector = get(Permitted_operands_0f, op.data);
645   if (HAS(expected_bitvector, MODRM))
646     check_operands_modrm(inst, op);
647   compare_bitvector_0f(inst, CLEAR(expected_bitvector, MODRM), op);
648 }
649 
650 void compare_bitvector_0f(const line& inst, uint8_t expected, const word& op) {
651   if (all_hex_bytes(inst) && has_operands(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
652   uint8_t bitvector = compute_expected_operand_bitvector(inst);
653   if (trace_contains_errors()) return;  // duplicate operand type
654   if (bitvector == expected) return;  // all good with this instruction
655   for (int i = 0;  i < NUM_OPERAND_TYPES;  ++i, bitvector >>= 1, expected >>= 1) {
656 //?     cerr << "comparing " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n';
657     if ((bitvector & 0x1) == (expected & 0x1)) continue;  // all good with this operand
658     const string& optype = Operand_type_name.at(i);
659     if ((bitvector & 0x1) > (expected & 0x1))
660       raise << "'" << to_string(inst) << "'" << maybe_name_0f(op) << ": unexpected " << optype << " operand\n" << end();
661     else
662       raise << "'" << to_string(inst) << "'" << maybe_name_0f(op) << ": missing " << optype << " operand\n" << end();
663     // continue giving all errors for a single instruction
664   }
665   // ignore settings in any unused bits
666 }
667 
668 string maybe_name_0f(const word& op) {
669   if (!is_hex_byte(op)) return "";
670   if (!contains_key(Name_0f, op.data)) return "";
671   // strip stuff in parens from the name
672   const string& s = get(Name_0f, op.data);
673   return " ("+s.substr(0, s.find(" ("))+')';
674 }
675 
676 string tolower(const char* s) {
677   ostringstream out;
678   for (/*nada*/;  *s;  ++s)
679     out << static_cast<char>(tolower(*s));
680   return out.str();
681 }
682 
683 #undef HAS
684 #undef SET
685 #undef CLEAR
686 
687 :(before "End Includes")
688 #include<cctype>