https://github.com/akkartik/mu/blob/main/linux/bootstrap/033check_operands.cc
  1 //: Since we're tagging arguments with their types, let's start checking these
  2 //: argument types for each instruction.
  3 
  4 void test_check_missing_imm8_argument() {
  5   Hide_errors = true;
  6   run(
  7       "== code 0x1\n"
  8       "cd\n"  // interrupt ??
  9   );
 10   CHECK_TRACE_CONTENTS(
 11       "error: 'cd' (software interrupt): missing imm8 argument\n"
 12   );
 13 }
 14 
 15 :(before "Pack Operands(segment code)")
 16 check_arguments(code);
 17 if (trace_contains_errors()) return;
 18 
 19 :(code)
 20 void check_arguments(const segment& code) {
 21   trace(3, "transform") << "-- check arguments" << end();
 22   for (int i = 0;  i < SIZE(code.lines);  ++i) {
 23     check_arguments(code.lines.at(i));
 24     if (trace_contains_errors()) return;  // stop at the first mal-formed instruction
 25   }
 26 }
 27 
 28 void check_arguments(const line& inst) {
 29   word op = preprocess_op(inst.words.at(0));
 30   if (op.data == "0f") {
 31     check_arguments_0f(inst);
 32     return;
 33   }
 34   if (op.data == "f3") {
 35     check_arguments_f3(inst);
 36     return;
 37   }
 38   check_arguments(inst, op);
 39 }
 40 
 41 word preprocess_op(word/*copy*/ op) {
 42   op.data = tolower(op.data.c_str());
 43   // opcodes can't be negative
 44   if (starts_with(op.data, "0x"))
 45     op.data = op.data.substr(2);
 46   if (SIZE(op.data) == 1)
 47     op.data = string("0")+op.data;
 48   return op;
 49 }
 50 
 51 void test_preprocess_op() {
 52   word w1;  w1.data = "0xf";
 53   word w2;  w2.data = "0f";
 54   CHECK_EQ(preprocess_op(w1).data, preprocess_op(w2).data);
 55 }
 56 
 57 //: To check the arguments for an opcode, we'll track the permitted arguments
 58 //: for each supported opcode in a bitvector. That way we can often compute the
 59 //: 'received' argument bitvector for each instruction's arguments and compare
 60 //: it with the 'expected' bitvector.
 61 //:
 62 //: The 'expected' and 'received' bitvectors can be different; the MODRM bit
 63 //: in the 'expected' bitvector maps to multiple 'received' argument types in
 64 //: an instruction. We deal in expected bitvectors throughout.
 65 
 66 :(before "End Types")
 67 enum expected_argument_type {
 68   // start from the least significant bit
 69   MODRM,  // more complex, may also involve disp8 or disp32
 70   SUBOP,
 71   DISP8,
 72   DISP16,
 73   DISP32,
 74   IMM8,
 75   IMM32,
 76   NUM_OPERAND_TYPES
 77 };
 78 :(before "End Globals")
 79 vector<string> Operand_type_name;
 80 map<string, expected_argument_type> Operand_type;
 81 :(before "End One-time Setup")
 82 init_op_types();
 83 :(code)
 84 void init_op_types() {
 85   assert(NUM_OPERAND_TYPES <= /*bits in a uint8_t*/8);
 86   Operand_type_name.resize(NUM_OPERAND_TYPES);
 87   #define DEF(type) Operand_type_name.at(type) = tolower(#type), put(Operand_type, tolower(#type), type);
 88   DEF(MODRM);
 89   DEF(SUBOP);
 90   DEF(DISP8);
 91   DEF(DISP16);
 92   DEF(DISP32);
 93   DEF(IMM8);
 94   DEF(IMM32);
 95   #undef DEF
 96 }
 97 
 98 :(before "End Globals")
 99 map</*op*/string, /*bitvector*/uint8_t> Permitted_arguments;
100 const uint8_t INVALID_OPERANDS = 0xff;  // no instruction uses all the argument types
101 :(before "End One-time Setup")
102 init_permitted_arguments();
103 :(code)
104 void init_permitted_arguments() {
105   //// Class A: just op, no arguments
106   // halt
107   put(Permitted_arguments, "f4", 0x00);
108   // inc
109   put(Permitted_arguments, "40", 0x00);
110   put(Permitted_arguments, "41", 0x00);
111   put(Permitted_arguments, "42", 0x00);
112   put(Permitted_arguments, "43", 0x00);
113   put(Permitted_arguments, "44", 0x00);
114   put(Permitted_arguments, "45", 0x00);
115   put(Permitted_arguments, "46", 0x00);
116   put(Permitted_arguments, "47", 0x00);
117   // dec
118   put(Permitted_arguments, "48", 0x00);
119   put(Permitted_arguments, "49", 0x00);
120   put(Permitted_arguments, "4a", 0x00);
121   put(Permitted_arguments, "4b", 0x00);
122   put(Permitted_arguments, "4c", 0x00);
123   put(Permitted_arguments, "4d", 0x00);
124   put(Permitted_arguments, "4e", 0x00);
125   put(Permitted_arguments, "4f", 0x00);
126   // push
127   put(Permitted_arguments, "50", 0x00);
128   put(Permitted_arguments, "51", 0x00);
129   put(Permitted_arguments, "52", 0x00);
130   put(Permitted_arguments, "53", 0x00);
131   put(Permitted_arguments, "54", 0x00);
132   put(Permitted_arguments, "55", 0x00);
133   put(Permitted_arguments, "56", 0x00);
134   put(Permitted_arguments, "57", 0x00);
135   // pop
136   put(Permitted_arguments, "58", 0x00);
137   put(Permitted_arguments, "59", 0x00);
138   put(Permitted_arguments, "5a", 0x00);
139   put(Permitted_arguments, "5b", 0x00);
140   put(Permitted_arguments, "5c", 0x00);
141   put(Permitted_arguments, "5d", 0x00);
142   put(Permitted_arguments, "5e", 0x00);
143   put(Permitted_arguments, "5f", 0x00);
144   // sign-extend EAX into EDX
145   put(Permitted_arguments, "99", 0x00);
146   // return
147   put(Permitted_arguments, "c3", 0x00);
148   // enable/disable interrupts
149   // not really part of SubX; just needed in low-level boot.subx
150   put(Permitted_arguments, "fa", 0x00);
151   put(Permitted_arguments, "fb", 0x00);
152 
153   //// Class B: just op and disp8
154   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
155   //  0     0     0      |0       1     0     0
156 
157   // jump
158   put(Permitted_arguments, "eb", 0x04);
159   put(Permitted_arguments, "72", 0x04);
160   put(Permitted_arguments, "73", 0x04);
161   put(Permitted_arguments, "74", 0x04);
162   put(Permitted_arguments, "75", 0x04);
163   put(Permitted_arguments, "76", 0x04);
164   put(Permitted_arguments, "77", 0x04);
165   put(Permitted_arguments, "7c", 0x04);
166   put(Permitted_arguments, "7d", 0x04);
167   put(Permitted_arguments, "7e", 0x04);
168   put(Permitted_arguments, "7f", 0x04);
169 
170   //// Class D: just op and disp32
171   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
172   //  0     0     1      |0       0     0     0
173   put(Permitted_arguments, "e8", 0x10);  // call
174   put(Permitted_arguments, "e9", 0x10);  // jump
175 
176   //// Class E: just op and imm8
177   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
178   //  0     1     0      |0       0     0     0
179   put(Permitted_arguments, "cd", 0x20);  // software interrupt
180 
181   //// Class F: just op and imm32
182   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
183   //  1     0     0      |0       0     0     0
184   put(Permitted_arguments, "05", 0x40);  // add
185   put(Permitted_arguments, "2d", 0x40);  // subtract
186   put(Permitted_arguments, "25", 0x40);  // and
187   put(Permitted_arguments, "0d", 0x40);  // or
188   put(Permitted_arguments, "35", 0x40);  // xor
189   put(Permitted_arguments, "3d", 0x40);  // compare
190   put(Permitted_arguments, "68", 0x40);  // push
191   // copy
192   put(Permitted_arguments, "b8", 0x40);
193   put(Permitted_arguments, "b9", 0x40);
194   put(Permitted_arguments, "ba", 0x40);
195   put(Permitted_arguments, "bb", 0x40);
196   put(Permitted_arguments, "bc", 0x40);
197   put(Permitted_arguments, "bd", 0x40);
198   put(Permitted_arguments, "be", 0x40);
199   put(Permitted_arguments, "bf", 0x40);
200 
201   //// Class M: using ModR/M byte
202   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
203   //  0     0     0      |0       0     0     1
204 
205   // add
206   put(Permitted_arguments, "01", 0x01);
207   put(Permitted_arguments, "03", 0x01);
208   // subtract
209   put(Permitted_arguments, "29", 0x01);
210   put(Permitted_arguments, "2b", 0x01);
211   // and
212   put(Permitted_arguments, "21", 0x01);
213   put(Permitted_arguments, "23", 0x01);
214   // or
215   put(Permitted_arguments, "09", 0x01);
216   put(Permitted_arguments, "0b", 0x01);
217   // xor
218   put(Permitted_arguments, "31", 0x01);
219   put(Permitted_arguments, "33", 0x01);
220   // compare
221   put(Permitted_arguments, "39", 0x01);
222   put(Permitted_arguments, "3b", 0x01);
223   // copy
224   put(Permitted_arguments, "88", 0x01);
225   put(Permitted_arguments, "89", 0x01);
226   put(Permitted_arguments, "8a", 0x01);
227   put(Permitted_arguments, "8b", 0x01);
228   // swap
229   put(Permitted_arguments, "87", 0x01);
230   // copy address (lea)
231   put(Permitted_arguments, "8d", 0x01);
232 
233   //// Class N: op, ModR/M and subop (not r32)
234   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
235   //  0     0     0      |0       0     1     1
236   put(Permitted_arguments, "8f", 0x03);  // pop
237   put(Permitted_arguments, "d3", 0x03);  // shift
238   put(Permitted_arguments, "f7", 0x03);  // test/not/mul/div
239   put(Permitted_arguments, "ff", 0x03);  // jump/push/call
240 
241   //// Class O: op, ModR/M, subop (not r32) and imm8
242   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
243   //  0     1     0      |0       0     1     1
244   put(Permitted_arguments, "c1", 0x23);  // combine
245   put(Permitted_arguments, "c6", 0x23);  // copy
246 
247   //// Class P: op, ModR/M, subop (not r32) and imm32
248   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
249   //  1     0     0      |0       0     1     1
250   put(Permitted_arguments, "81", 0x43);  // combine
251   put(Permitted_arguments, "c7", 0x43);  // copy
252 
253   //// Class Q: op, ModR/M and imm32
254   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
255   //  1     0     0      |0       0     0     1
256   put(Permitted_arguments, "69", 0x41);  // multiply
257 
258   // End Init Permitted Operands
259 }
260 
261 #define HAS(bitvector, bit)  ((bitvector) & (1 << (bit)))
262 #define SET(bitvector, bit)  ((bitvector) | (1 << (bit)))
263 #define CLEAR(bitvector, bit)  ((bitvector) & (~(1 << (bit))))
264 
265 void check_arguments(const line& inst, const word& op) {
266   if (!is_hex_byte(op)) return;
267   uint8_t expected_bitvector = get(Permitted_arguments, op.data);
268   if (HAS(expected_bitvector, MODRM)) {
269     check_arguments_modrm(inst, op);
270     compare_bitvector_modrm(inst, expected_bitvector, maybe_name(op));
271   }
272   else {
273     compare_bitvector(inst, expected_bitvector, maybe_name(op));
274   }
275 }
276 
277 //: Many instructions can be checked just by comparing bitvectors.
278 
279 void compare_bitvector(const line& inst, uint8_t expected, const string& maybe_op_name) {
280   if (all_hex_bytes(inst) && has_arguments(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
281   uint8_t bitvector = compute_expected_argument_bitvector(inst);
282   if (trace_contains_errors()) return;  // duplicate argument type
283   if (bitvector == expected) return;  // all good with this instruction
284   for (int i = 0;  i < NUM_OPERAND_TYPES;  ++i, bitvector >>= 1, expected >>= 1) {
285 //?     cerr << "comparing " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n';
286     if ((bitvector & 0x1) == (expected & 0x1)) continue;  // all good with this argument
287     const string& optype = Operand_type_name.at(i);
288     if ((bitvector & 0x1) > (expected & 0x1))
289       raise << "'" << to_string(inst) << "'" << maybe_op_name << ": unexpected " << optype << " argument\n" << end();
290     else
291       raise << "'" << to_string(inst) << "'" << maybe_op_name << ": missing " << optype << " argument\n" << end();
292     // continue giving all errors for a single instruction
293   }
294   // ignore settings in any unused bits
295 }
296 
297 string maybe_name(const word& op) {
298   if (!is_hex_byte(op)) return "";
299   if (!contains_key(Name, op.data)) return "";
300   // strip stuff in parens from the name
301   const string& s = get(Name, op.data);
302   return " ("+s.substr(0, s.find(" ("))+')';
303 }
304 
305 uint32_t compute_expected_argument_bitvector(const line& inst) {
306   set<string> arguments_found;
307   uint32_t bitvector = 0;
308   for (int i = /*skip op*/1;  i < SIZE(inst.words);  ++i) {
309     bitvector = bitvector | expected_bit_for_received_argument(inst.words.at(i), arguments_found, inst);
310     if (trace_contains_errors()) return INVALID_OPERANDS;  // duplicate argument type
311   }
312   return bitvector;
313 }
314 
315 bool has_arguments(const line& inst) {
316   return SIZE(inst.words) > first_argument(inst);
317 }
318 
319 int first_argument(const line& inst) {
320   if (inst.words.at(0).data == "0f") return 2;
321   if (inst.words.at(0).data == "f2" || inst.words.at(0).data == "f3") {
322     if (inst.words.at(1).data == "0f")
323       return 3;
324     else
325       return 2;
326   }
327   return 1;
328 }
329 
330 // Scan the metadata of 'w' and return the expected bit corresponding to any argument type.
331 // Also raise an error if metadata contains multiple argument types.
332 uint32_t expected_bit_for_received_argument(const word& w, set<string>& instruction_arguments, const line& inst) {
333   uint32_t bv = 0;
334   bool found = false;
335   for (int i = 0;  i < SIZE(w.metadata);  ++i) {
336     string/*copy*/ curr = w.metadata.at(i);
337     string expected_metadata = curr;
338     if (curr == "mod" || curr == "rm32" || curr == "r32" || curr == "xm32" || curr == "x32" || curr == "scale" || curr == "index" || curr == "base")
339       expected_metadata = "modrm";
340     else if (!contains_key(Operand_type, curr)) continue;  // ignore unrecognized metadata
341     if (found) {
342       raise << "'" << w.original << "' has conflicting argument types; it should have only one\n" << end();
343       return INVALID_OPERANDS;
344     }
345     if (instruction_arguments.find(curr) != instruction_arguments.end()) {
346       raise << "'" << to_string(inst) << "': duplicate " << curr << " argument\n" << end();
347       return INVALID_OPERANDS;
348     }
349     instruction_arguments.insert(curr);
350     bv = (1 << get(Operand_type, expected_metadata));
351     found = true;
352   }
353   return bv;
354 }
355 
356 void test_conflicting_argument_type() {
357   Hide_errors = true;
358   run(
359       "== code 0x1\n"
360       "cd/software-interrupt 80/imm8/imm32\n"
361   );
362   CHECK_TRACE_CONTENTS(
363       "error: '80/imm8/imm32' has conflicting argument types; it should have only one\n"
364   );
365 }
366 
367 //: Instructions computing effective addresses have more complex rules, so
368 //: we'll hard-code a common set of instruction-decoding rules.
369 
370 void test_check_missing_mod_argument() {
371   Hide_errors = true;
372   run(
373       "== code 0x1\n"
374       "81 0/add/subop       3/rm32/ebx 1/imm32\n"
375   );
376   CHECK_TRACE_CONTENTS(
377       "error: '81 0/add/subop 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing mod argument\n"
378   );
379 }
380 
381 void check_arguments_modrm(const line& inst, const word& op) {
382   if (all_hex_bytes(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
383   check_argument_metadata_present(inst, "mod", op);
384   if (!has_argument_metadata(inst, "rm32") && !has_argument_metadata(inst, "xm32"))
385     raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing rm32 (or xm32) argument\n" << end();
386   // no check for r32; some instructions don't use it; just assume it's 0 if missing
387   if (op.data == "81" || op.data == "8f" || op.data == "f7" || op.data == "ff") {  // keep sync'd with 'help subop'
388     check_argument_metadata_present(inst, "subop", op);
389     check_argument_metadata_absent(inst, "r32", op, "should be replaced by subop");
390     check_argument_metadata_absent(inst, "x32", op, "should be replaced by subop");
391   }
392   if (trace_contains_errors()) return;
393   if (metadata_m32(inst).data != "4") return;
394   // SIB byte checks
395   uint8_t mod = hex_byte(metadata(inst, "mod").data);
396   if (mod != /*direct*/3) {
397     check_argument_metadata_present(inst, "base", op);
398     check_argument_metadata_present(inst, "index", op);  // otherwise why go to SIB?
399   }
400   else {
401     check_argument_metadata_absent(inst, "base", op, "direct mode");
402     check_argument_metadata_absent(inst, "index", op, "direct mode");
403   }
404   // no check for scale; 0 (2**0 = 1) by default
405 }
406 
407 word metadata_m32(const line& inst) {
408   for (int i = 0;  i < SIZE(inst.words);  ++i)
409     if (has_argument_metadata(inst.words.at(i), "rm32") || has_argument_metadata(inst.words.at(i), "xm32"))
410       return inst.words.at(i);
411   assert(false);
412 }
413 
414 // same as compare_bitvector, with one additional exception for modrm-based
415 // instructions: they may use an extra displacement on occasion
416 void compare_bitvector_modrm(const line& inst, uint8_t expected, const string& maybe_op_name) {
417   if (all_hex_bytes(inst) && has_arguments(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
418   uint8_t bitvector = compute_expected_argument_bitvector(inst);
419   if (trace_contains_errors()) return;  // duplicate argument type
420   // update 'expected' bitvector for the additional exception
421   if (has_argument_metadata(inst, "mod")) {
422     int32_t mod = parse_int(metadata(inst, "mod").data);
423     switch (mod) {
424     case 0:
425       if (has_argument_metadata(inst, "rm32") && parse_int(metadata(inst, "rm32").data) == 5)
426         expected |= (1<<DISP32);
427       break;
428     case 1:
429       expected |= (1<<DISP8);
430       break;
431     case 2:
432       expected |= (1<<DISP32);
433       break;
434     }
435   }
436   if (bitvector == expected) return;  // all good with this instruction
437   for (int i = 0;  i < NUM_OPERAND_TYPES;  ++i, bitvector >>= 1, expected >>= 1) {
438 //?     cerr << "comparing for modrm " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n';
439     if ((bitvector & 0x1) == (expected & 0x1)) continue;  // all good with this argument
440     const string& optype = Operand_type_name.at(i);
441     if ((bitvector & 0x1) > (expected & 0x1))
442       raise << "'" << to_string(inst) << "'" << maybe_op_name << ": unexpected " << optype << " argument\n" << end();
443     else
444       raise << "'" << to_string(inst) << "'" << maybe_op_name << ": missing " << optype << " argument\n" << end();
445     // continue giving all errors for a single instruction
446   }
447   // ignore settings in any unused bits
448 }
449 
450 void check_argument_metadata_present(const line& inst, const string& type, const word& op) {
451   if (!has_argument_metadata(inst, type))
452     raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << type << " argument\n" << end();
453 }
454 
455 void check_argument_metadata_absent(const line& inst, const string& type, const word& op, const string& msg) {
456   if (has_argument_metadata(inst, type))
457     raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << type << " argument (" << msg << ")\n" << end();
458 }
459 
460 void test_modrm_with_displacement() {
461   Reg[EAX].u = 0x1;
462   transform(
463       "== code 0x1\n"
464       // just avoid null pointer
465       "8b/copy 1/mod/lookup+disp8 0/rm32/EAX 2/r32/EDX 4/disp8\n"  // copy *(EAX+4) to EDX
466   );
467   CHECK_TRACE_COUNT("error", 0);
468 }
469 
470 void test_check_missing_disp8() {
471   Hide_errors = true;
472   transform(
473       "== code 0x1\n"
474       "89/copy 1/mod/lookup+disp8 0/rm32/EAX 1/r32/ECX\n"  // missing disp8
475   );
476   CHECK_TRACE_CONTENTS(
477       "error: '89/copy 1/mod/lookup+disp8 0/rm32/EAX 1/r32/ECX' (copy r32 to rm32): missing disp8 argument\n"
478   );
479 }
480 
481 void test_check_missing_disp32() {
482   Hide_errors = true;
483   transform(
484       "== code 0x1\n"
485       "8b/copy 0/mod/indirect 5/rm32/.disp32 2/r32/EDX\n"  // missing disp32
486   );
487   CHECK_TRACE_CONTENTS(
488       "error: '8b/copy 0/mod/indirect 5/rm32/.disp32 2/r32/EDX' (copy rm32 to r32): missing disp32 argument\n"
489   );
490 }
491 
492 void test_conflicting_arguments_in_modrm_instruction() {
493   Hide_errors = true;
494   run(
495       "== code 0x1\n"
496       "01/add 0/mod 3/mod\n"
497   );
498   CHECK_TRACE_CONTENTS(
499       "error: '01/add 0/mod 3/mod' has conflicting mod arguments\n"
500   );
501 }
502 
503 void test_conflicting_argument_type_modrm() {
504   Hide_errors = true;
505   run(
506       "== code 0x1\n"
507       "01/add 0/mod 3/rm32/r32\n"
508   );
509   CHECK_TRACE_CONTENTS(
510       "error: '3/rm32/r32' has conflicting argument types; it should have only one\n"
511   );
512 }
513 
514 void test_check_missing_rm32_argument() {
515   Hide_errors = true;
516   run(
517       "== code 0x1\n"
518       "81 0/add/subop 0/mod            1/imm32\n"
519   );
520   CHECK_TRACE_CONTENTS(
521       "error: '81 0/add/subop 0/mod 1/imm32' (combine rm32 with imm32 based on subop): missing rm32 (or xm32) argument\n"
522   );
523 }
524 
525 void test_check_missing_subop_argument() {
526   Hide_errors = true;
527   run(
528       "== code 0x1\n"
529       "81             0/mod 3/rm32/ebx 1/imm32\n"
530   );
531   CHECK_TRACE_CONTENTS(
532       "error: '81 0/mod 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing subop argument\n"
533   );
534 }
535 
536 void test_check_missing_base_argument() {
537   Hide_errors = true;
538   run(
539       "== code 0x1\n"
540       "81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32\n"
541   );
542   CHECK_TRACE_CONTENTS(
543       "error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32' (combine rm32 with imm32 based on subop): missing base argument\n"
544   );
545 }
546 
547 void test_check_missing_index_argument() {
548   Hide_errors = true;
549   run(
550       "== code 0x1\n"
551       "81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32\n"
552   );
553   CHECK_TRACE_CONTENTS(
554       "error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32' (combine rm32 with imm32 based on subop): missing index argument\n"
555   );
556 }
557 
558 void test_check_missing_base_argument_2() {
559   Hide_errors = true;
560   run(
561       "== code 0x1\n"
562       "81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32\n"
563   );
564   CHECK_TRACE_CONTENTS(
565       "error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32' (combine rm32 with imm32 based on subop): missing base argument\n"
566   );
567 }
568 
569 void test_check_extra_displacement() {
570   Hide_errors = true;
571   run(
572       "== code 0x1\n"
573       "89/copy 0/mod/indirect 0/rm32/EAX 1/r32/ECX 4/disp8\n"
574   );
575   CHECK_TRACE_CONTENTS(
576       "error: '89/copy 0/mod/indirect 0/rm32/EAX 1/r32/ECX 4/disp8' (copy r32 to rm32): unexpected disp8 argument\n"
577   );
578 }
579 
580 void test_check_duplicate_argument() {
581   Hide_errors = true;
582   run(
583       "== code 0x1\n"
584       "89/copy 0/mod/indirect 0/rm32/EAX 1/r32/ECX 1/r32\n"
585   );
586   CHECK_TRACE_CONTENTS(
587       "error: '89/copy 0/mod/indirect 0/rm32/EAX 1/r32/ECX 1/r32': duplicate r32 argument\n"
588   );
589 }
590 
591 void test_check_base_argument_not_needed_in_direct_mode() {
592   run(
593       "== code 0x1\n"
594       "81 0/add/subop 3/mod/indirect 4/rm32/use-sib 1/imm32\n"
595   );
596   CHECK_TRACE_COUNT("error", 0);
597 }
598 
599 void test_extra_modrm() {
600   Hide_errors = true;
601   run(
602       "== code 0x1\n"
603       "59/pop-to-ECX  3/mod/direct 1/rm32/ECX 4/r32/ESP\n"
604   );
605   CHECK_TRACE_CONTENTS(
606       "error: '59/pop-to-ECX 3/mod/direct 1/rm32/ECX 4/r32/ESP' (pop top of stack to ECX): unexpected modrm argument\n"
607   );
608 }
609 
610 //:: similarly handle multi-byte opcodes
611 
612 void check_arguments_0f(const line& inst) {
613   assert(inst.words.at(0).data == "0f");
614   if (SIZE(inst.words) == 1) {
615     raise << "opcode '0f' requires a second opcode\n" << end();
616     return;
617   }
618   word op = preprocess_op(inst.words.at(1));
619   if (!contains_key(Name_0f, op.data)) {
620     raise << "unknown 2-byte opcode '0f " << op.data << "'\n" << end();
621     return;
622   }
623   check_arguments_0f(inst, op);
624 }
625 
626 void check_arguments_f3(const line& inst) {
627   assert(inst.words.at(0).data == "f3");
628   if (SIZE(inst.words) == 1) {
629     raise << "opcode 'f3' requires a second opcode\n" << end();
630     return;
631   }
632   word op = preprocess_op(inst.words.at(1));
633   if (op.data == "0f") {
634     word op2 = preprocess_op(inst.words.at(2));
635     check_arguments_f3_0f(inst, op2);
636     return;
637   }
638   if (!contains_key(Name_f3, op.data)) {
639     raise << "unknown 2-byte opcode 'f3 " << op.data << "'\n" << end();
640     return;
641   }
642   check_arguments_f3(inst, op);
643 }
644 
645 void test_check_missing_disp32_argument() {
646   Hide_errors = true;
647   run(
648       "== code 0x1\n"
649       "  0f 84  # jmp if ZF to ??\n"
650   );
651   CHECK_TRACE_CONTENTS(
652       "error: '0f 84' (jump disp32 bytes away if equal, if ZF is set): missing disp32 argument\n"
653   );
654 }
655 
656 void test_0f_opcode_with_modrm() {
657   transform(
658       "== code 0x1\n"
659       "0f af/multiply 2/mod/*+disp32 5/rm32/ebp 8/disp32 0/r32\n"
660   );
661   CHECK_TRACE_DOESNT_CONTAIN_ERRORS();
662 }
663 
664 :(before "End Globals")
665 map</*op*/string, /*bitvector*/uint8_t> Permitted_arguments_0f;
666 :(before "End Init Permitted Operands")
667 //// Class D: just op and disp32
668 //  imm32 imm8  disp32 |disp16  disp8 subop modrm
669 //  0     0     1      |0       0     0     0
670 put_new(Permitted_arguments_0f, "82", 0x10);
671 put_new(Permitted_arguments_0f, "83", 0x10);
672 put_new(Permitted_arguments_0f, "84", 0x10);
673 put_new(Permitted_arguments_0f, "85", 0x10);
674 put_new(Permitted_arguments_0f, "86", 0x10);
675 put_new(Permitted_arguments_0f, "87", 0x10);
676 put_new(Permitted_arguments_0f, "8c", 0x10);
677 put_new(Permitted_arguments_0f, "8d", 0x10);
678 put_new(Permitted_arguments_0f, "8e", 0x10);
679 put_new(Permitted_arguments_0f, "8f", 0x10);
680 
681 //// Class M: using ModR/M byte
682 //  imm32 imm8  disp32 |disp16  disp8 subop modrm
683 //  0     0     0      |0       0     0     1
684 put_new(Permitted_arguments_0f, "2f", 0x01);  // compare floats
685 put_new(Permitted_arguments_0f, "af", 0x01);  // multiply ints
686 // setcc
687 put_new(Permitted_arguments_0f, "92", 0x01);
688 put_new(Permitted_arguments_0f, "93", 0x01);
689 put_new(Permitted_arguments_0f, "94", 0x01);
690 put_new(Permitted_arguments_0f, "95", 0x01);
691 put_new(Permitted_arguments_0f, "96", 0x01);
692 put_new(Permitted_arguments_0f, "97", 0x01);
693 put_new(Permitted_arguments_0f, "9c", 0x01);
694 put_new(Permitted_arguments_0f, "9d", 0x01);
695 put_new(Permitted_arguments_0f, "9e", 0x01);
696 put_new(Permitted_arguments_0f, "9f", 0x01);
697 
698 :(before "End Globals")
699 map</*op*/string, /*bitvector*/uint8_t> Permitted_arguments_f3;
700 map</*op*/string, /*bitvector*/uint8_t> Permitted_arguments_f3_0f;
701 :(before "End Init Permitted Operands")
702 //// Class M: using ModR/M byte
703 //  imm32 imm8  disp32 |disp16  disp8 subop modrm
704 //  0     0     0      |0       0     0     1
705 put_new(Permitted_arguments_f3_0f, "10", 0x01);  // copy xm32 to x32
706 put_new(Permitted_arguments_f3_0f, "11", 0x01);  // copy x32 to xm32
707 put_new(Permitted_arguments_f3_0f, "2a", 0x01);  // convert-to-float
708 put_new(Permitted_arguments_f3_0f, "2c", 0x01);  // truncate-to-int
709 put_new(Permitted_arguments_f3_0f, "2d", 0x01);  // convert-to-int
710 put_new(Permitted_arguments_f3_0f, "51", 0x01);  // square root
711 put_new(Permitted_arguments_f3_0f, "52", 0x01);  // inverse square root
712 put_new(Permitted_arguments_f3_0f, "53", 0x01);  // reciprocal
713 put_new(Permitted_arguments_f3_0f, "58", 0x01);  // add floats
714 put_new(Permitted_arguments_f3_0f, "59", 0x01);  // multiply floats
715 put_new(Permitted_arguments_f3_0f, "5c", 0x01);  // subtract floats
716 put_new(Permitted_arguments_f3_0f, "5d", 0x01);  // minimum of floats
717 put_new(Permitted_arguments_f3_0f, "5e", 0x01);  // divide floats
718 put_new(Permitted_arguments_f3_0f, "5f", 0x01);  // maximum of floats
719 
720 :(code)
721 void check_arguments_0f(const line& inst, const word& op) {
722   uint8_t expected_bitvector = get(Permitted_arguments_0f, op.data);
723   if (HAS(expected_bitvector, MODRM)) {
724     check_arguments_modrm(inst, op);
725     compare_bitvector_modrm(inst, expected_bitvector, maybe_name_0f(op));
726   }
727   else {
728     compare_bitvector(inst, CLEAR(expected_bitvector, MODRM), maybe_name_0f(op));
729   }
730 }
731 
732 void check_arguments_f3(const line& inst, const word& op) {
733   uint8_t expected_bitvector = get(Permitted_arguments_f3, op.data);
734   if (HAS(expected_bitvector, MODRM)) {
735     check_arguments_modrm(inst, op);
736     compare_bitvector_modrm(inst, expected_bitvector, maybe_name_f3(op));
737   }
738   else {
739     compare_bitvector(inst, CLEAR(expected_bitvector, MODRM), maybe_name_f3(op));
740   }
741 }
742 
743 void check_arguments_f3_0f(const line& inst, const word& op) {
744   uint8_t expected_bitvector = get(Permitted_arguments_f3_0f, op.data);
745   if (HAS(expected_bitvector, MODRM)) {
746     check_arguments_modrm(inst, op);
747     compare_bitvector_modrm(inst, expected_bitvector, maybe_name_f3_0f(op));
748   }
749   else {
750     compare_bitvector(inst, CLEAR(expected_bitvector, MODRM), maybe_name_f3_0f(op));
751   }
752 }
753 
754 string maybe_name_0f(const word& op) {
755   if (!is_hex_byte(op)) return "";
756   if (!contains_key(Name_0f, op.data)) return "";
757   // strip stuff in parens from the name
758   const string& s = get(Name_0f, op.data);
759   return " ("+s.substr(0, s.find(" ("))+')';
760 }
761 
762 string maybe_name_f3(const word& op) {
763   if (!is_hex_byte(op)) return "";
764   if (!contains_key(Name_f3, op.data)) return "";
765   // strip stuff in parens from the name
766   const string& s = get(Name_f3, op.data);
767   return " ("+s.substr(0, s.find(" ("))+')';
768 }
769 
770 string maybe_name_f3_0f(const word& op) {
771   if (!is_hex_byte(op)) return "";
772   if (!contains_key(Name_f3_0f, op.data)) return "";
773   // strip stuff in parens from the name
774   const string& s = get(Name_f3_0f, op.data);
775   return " ("+s.substr(0, s.find(" ("))+')';
776 }
777 
778 string tolower(const char* s) {
779   ostringstream out;
780   for (/*nada*/;  *s;  ++s)
781     out << static_cast<char>(tolower(*s));
782   return out.str();
783 }
784 
785 #undef HAS
786 #undef SET
787 #undef CLEAR
788 
789 :(before "End Includes")
790 #include<cctype>