1 //: Since we're tagging operands with their types, let's start checking these
  2 //: operand types for each instruction.
  3 
  4 :(scenario check_missing_imm8_operand)
  5 % Hide_errors = true;
  6 == 0x1
  7 # instruction                     effective address                                                   operand     displacement    immediate
  8 # op          subop               mod             rm32          base        index         scale       r32
  9 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
 10   cd                                                                                                                                                # int ??
 11 +error: 'cd' (software interrupt): missing imm8 operand
 12 
 13 :(before "Pack Operands(segment code)")
 14 check_operands(code);
 15 if (trace_contains_errors()) return;
 16 
 17 :(code)
 18 void check_operands(const segment& code) {
 19   trace(99, "transform") << "-- check operands" << end();
 20   for (int i = 0;  i < SIZE(code.lines);  ++i) {
 21     check_operands(code.lines.at(i));
 22     if (trace_contains_errors()) return;  // stop at the first mal-formed instruction
 23   }
 24 }
 25 
 26 void check_operands(const line& inst) {
 27   word op = preprocess_op(inst.words.at(0));
 28   if (op.data == "0f") {
 29     check_operands_0f(inst);
 30     return;
 31   }
 32   if (op.data == "f3") {
 33     check_operands_f3(inst);
 34     return;
 35   }
 36   check_operands(inst, op);
 37 }
 38 
 39 word preprocess_op(word/*copy*/ op) {
 40   op.data = tolower(op.data.c_str());
 41   // opcodes can't be negative
 42   if (starts_with(op.data, "0x"))
 43     op.data = op.data.substr(2);
 44   if (SIZE(op.data) == 1)
 45     op.data = string("0")+op.data;
 46   return op;
 47 }
 48 
 49 void test_preprocess_op() {
 50   word w1;  w1.data = "0xf";
 51   word w2;  w2.data = "0f";
 52   CHECK_EQ(preprocess_op(w1).data, preprocess_op(w2).data);
 53 }
 54 
 55 //: To check the operands for an opcode, we'll track the permitted operands
 56 //: for each supported opcode in a bitvector. That way we can often compute the
 57 //: bitvector for each instruction's operands and compare it with the expected.
 58 
 59 :(before "End Types")
 60 enum operand_type {
 61   // start from the least significant bit
 62   MODRM,  // more complex, may also involve disp8 or disp32
 63   SUBOP,
 64   DISP8,
 65   DISP16,
 66   DISP32,
 67   IMM8,
 68   IMM32,
 69   NUM_OPERAND_TYPES
 70 };
 71 :(before "End Globals")
 72 vector<string> Operand_type_name;
 73 map<string, operand_type> Operand_type;
 74 :(before "End One-time Setup")
 75 init_op_types();
 76 :(code)
 77 void init_op_types() {
 78   assert(NUM_OPERAND_TYPES <= /*bits in a uint8_t*/8);
 79   Operand_type_name.resize(NUM_OPERAND_TYPES);
 80   #define DEF(type) Operand_type_name.at(type) = tolower(#type), put(Operand_type, tolower(#type), type);
 81   DEF(MODRM);
 82   DEF(SUBOP);
 83   DEF(DISP8);
 84   DEF(DISP16);
 85   DEF(DISP32);
 86   DEF(IMM8);
 87   DEF(IMM32);
 88   #undef DEF
 89 }
 90 
 91 :(before "End Globals")
 92 map</*op*/string, /*bitvector*/uint8_t> Permitted_operands;
 93 const uint8_t INVALID_OPERANDS = 0xff;  // no instruction uses all the operand types
 94 :(before "End One-time Setup")
 95 init_permitted_operands();
 96 :(code)
 97 void init_permitted_operands() {
 98   //// Class A: just op, no operands
 99   // halt
100   put(Permitted_operands, "f4", 0x00);
101   // inc
102   put(Permitted_operands, "40", 0x00);
103   put(Permitted_operands, "41", 0x00);
104   put(Permitted_operands, "42", 0x00);
105   put(Permitted_operands, "43", 0x00);
106   put(Permitted_operands, "44", 0x00);
107   put(Permitted_operands, "45", 0x00);
108   put(Permitted_operands, "46", 0x00);
109   put(Permitted_operands, "47", 0x00);
110   // dec
111   put(Permitted_operands, "48", 0x00);
112   put(Permitted_operands, "49", 0x00);
113   put(Permitted_operands, "4a", 0x00);
114   put(Permitted_operands, "4b", 0x00);
115   put(Permitted_operands, "4c", 0x00);
116   put(Permitted_operands, "4d", 0x00);
117   put(Permitted_operands, "4e", 0x00);
118   put(Permitted_operands, "4f", 0x00);
119   // push
120   put(Permitted_operands, "50", 0x00);
121   put(Permitted_operands, "51", 0x00);
122   put(Permitted_operands, "52", 0x00);
123   put(Permitted_operands, "53", 0x00);
124   put(Permitted_operands, "54", 0x00);
125   put(Permitted_operands, "55", 0x00);
126   put(Permitted_operands, "56", 0x00);
127   put(Permitted_operands, "57", 0x00);
128   // pop
129   put(Permitted_operands, "58", 0x00);
130   put(Permitted_operands, "59", 0x00);
131   put(Permitted_operands, "5a", 0x00);
132   put(Permitted_operands, "5b", 0x00);
133   put(Permitted_operands, "5c", 0x00);
134   put(Permitted_operands, "5d", 0x00);
135   put(Permitted_operands, "5e", 0x00);
136   put(Permitted_operands, "5f", 0x00);
137   // return
138   put(Permitted_operands, "c3", 0x00);
139 
140   //// Class B: just op and disp8
141   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
142   //  0     0     0      |0       1     0     0
143 
144   // jump
145   put(Permitted_operands, "eb", 0x04);
146   put(Permitted_operands, "74", 0x04);
147   put(Permitted_operands, "75", 0x04);
148   put(Permitted_operands, "7c", 0x04);
149   put(Permitted_operands, "7d", 0x04);
150   put(Permitted_operands, "7e", 0x04);
151   put(Permitted_operands, "7f", 0x04);
152 
153   //// Class C: just op and disp16
154   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
155   //  0     0     0      |1       0     0     0
156   put(Permitted_operands, "e9", 0x08);  // jump
157 
158   //// Class D: just op and disp32
159   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
160   //  0     0     1      |0       0     0     0
161   put(Permitted_operands, "e8", 0x10);  // call
162 
163   //// Class E: just op and imm8
164   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
165   //  0     1     0      |0       0     0     0
166   put(Permitted_operands, "cd", 0x20);  // software interrupt
167 
168   //// Class F: just op and imm32
169   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
170   //  1     0     0      |0       0     0     0
171   put(Permitted_operands, "05", 0x40);  // add
172   put(Permitted_operands, "2d", 0x40);  // subtract
173   put(Permitted_operands, "25", 0x40);  // and
174   put(Permitted_operands, "0d", 0x40);  // or
175   put(Permitted_operands, "35", 0x40);  // xor
176   put(Permitted_operands, "3d", 0x40);  // compare
177   put(Permitted_operands, "68", 0x40);  // push
178   // copy
179   put(Permitted_operands, "b8", 0x40);
180   put(Permitted_operands, "b9", 0x40);
181   put(Permitted_operands, "ba", 0x40);
182   put(Permitted_operands, "bb", 0x40);
183   put(Permitted_operands, "bc", 0x40);
184   put(Permitted_operands, "bd", 0x40);
185   put(Permitted_operands, "be", 0x40);
186   put(Permitted_operands, "bf", 0x40);
187 
188   //// Class M: using ModR/M byte
189   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
190   //  0     0     0      |0       0     0     1
191 
192   // add
193   put(Permitted_operands, "01", 0x01);
194   put(Permitted_operands, "03", 0x01);
195   // subtract
196   put(Permitted_operands, "29", 0x01);
197   put(Permitted_operands, "2b", 0x01);
198   // and
199   put(Permitted_operands, "21", 0x01);
200   put(Permitted_operands, "23", 0x01);
201   // or
202   put(Permitted_operands, "09", 0x01);
203   put(Permitted_operands, "0b", 0x01);
204   // xor
205   put(Permitted_operands, "31", 0x01);
206   put(Permitted_operands, "33", 0x01);
207   // compare
208   put(Permitted_operands, "39", 0x01);
209   put(Permitted_operands, "3b", 0x01);
210   // copy
211   put(Permitted_operands, "88", 0x01);
212   put(Permitted_operands, "89", 0x01);
213   put(Permitted_operands, "8a", 0x01);
214   put(Permitted_operands, "8b", 0x01);
215   // swap
216   put(Permitted_operands, "87", 0x01);
217   // lea
218   put(Permitted_operands, "8d", 0x01);
219   // pop
220   put(Permitted_operands, "8f", 0x01);
221 
222   //// Class O: op, ModR/M and subop (not r32)
223   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
224   //  0     0     0      |0       0     1     1
225   put(Permitted_operands, "f7", 0x03);  // test/not/mul/div
226   put(Permitted_operands, "ff", 0x03);  // jump/push/call
227 
228   //// Class N: op, ModR/M and imm32
229   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
230   //  1     0     0      |0       0     0     1
231   put(Permitted_operands, "c7", 0x41);  // copy
232 
233   //// Class P: op, ModR/M, subop (not r32) and imm32
234   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
235   //  1     0     0      |0       0     1     1
236   put(Permitted_operands, "81", 0x43);  // combine
237 
238   // End Init Permitted Operands
239 }
240 
241 :(code)
242 #define HAS(bitvector, bit)  ((bitvector) & (1 << (bit)))
243 #define SET(bitvector, bit)  ((bitvector) | (1 << (bit)))
244 #define CLEAR(bitvector, bit)  ((bitvector) & (~(1 << (bit))))
245 
246 void check_operands(const line& inst, const word& op) {
247   if (!is_hex_byte(op)) return;
248   uint8_t expected_bitvector = get(Permitted_operands, op.data);
249   if (HAS(expected_bitvector, MODRM)) {
250     check_operands_modrm(inst, op);
251     compare_bitvector_modrm(inst, expected_bitvector, op);
252   }
253   else {
254     compare_bitvector(inst, expected_bitvector, op);
255   }
256 }
257 
258 //: Many instructions can be checked just by comparing bitvectors.
259 
260 void compare_bitvector(const line& inst, uint8_t expected, const word& op) {
261   if (all_hex_bytes(inst) && has_operands(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
262   uint8_t bitvector = compute_operand_bitvector(inst);
263   if (trace_contains_errors()) return;  // duplicate operand type
264   if (bitvector == expected) return;  // all good with this instruction
265   for (int i = 0;  i < NUM_OPERAND_TYPES;  ++i, bitvector >>= 1, expected >>= 1) {
266 //?     cerr << "comparing " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n';
267     if ((bitvector & 0x1) == (expected & 0x1)) continue;  // all good with this operand
268     const string& optype = Operand_type_name.at(i);
269     if ((bitvector & 0x1) > (expected & 0x1))
270       raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << optype << " operand\n" << end();
271     else
272       raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << optype << " operand\n" << end();
273     // continue giving all errors for a single instruction
274   }
275   // ignore settings in any unused bits
276 }
277 
278 string maybe_name(const word& op) {
279   if (!is_hex_byte(op)) return "";
280   if (!contains_key(name, op.data)) return "";
281   return " ("+get(name, op.data)+')';
282 }
283 
284 uint32_t compute_operand_bitvector(const line& inst) {
285   uint32_t bitvector = 0;
286   for (int i = /*skip op*/1;  i < SIZE(inst.words);  ++i) {
287     bitvector = bitvector | bitvector_for_operand(inst.words.at(i));
288     if (trace_contains_errors()) return INVALID_OPERANDS;  // duplicate operand type
289   }
290   return bitvector;
291 }
292 
293 bool has_operands(const line& inst) {
294   return SIZE(inst.words) > first_operand(inst);
295 }
296 
297 int first_operand(const line& inst) {
298   if (inst.words.at(0).data == "0f") return 2;
299   if (inst.words.at(0).data == "f2" || inst.words.at(0).data == "f3") {
300     if (inst.words.at(1).data == "0f")
301       return 3;
302     else
303       return 2;
304   }
305   return 1;
306 }
307 
308 // Scan the metadata of 'w' and return the bit corresponding to any operand type.
309 // Also raise an error if metadata contains multiple operand types.
310 uint32_t bitvector_for_operand(const word& w) {
311   uint32_t bv = 0;
312   bool found = false;
313   for (int i = 0;  i < SIZE(w.metadata);  ++i) {
314     const string& curr = w.metadata.at(i);
315     if (!contains_key(Operand_type, curr)) continue;  // ignore unrecognized metadata
316     if (found) {
317       raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
318       return INVALID_OPERANDS;
319     }
320     bv = (1 << get(Operand_type, curr));
321     found = true;
322   }
323   return bv;
324 }
325 
326 :(scenario conflicting_operand_type)
327 % Hide_errors = true;
328 == 0x1
329 cd/software-interrupt 80/imm8/imm32
330 +error: '80/imm8/imm32' has conflicting operand types; it should have only one
331 
332 //: Instructions computing effective addresses have more complex rules, so
333 //: we'll hard-code a common set of instruction-decoding rules.
334 
335 :(scenario check_missing_mod_operand)
336 % Hide_errors = true;
337 == 0x1
338 81 0/add/subop       3/rm32/ebx 1/imm32
339 +error: '81 0/add/subop 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing mod operand
340 
341 :(code)
342 void check_operands_modrm(const line& inst, const word& op) {
343   if (all_hex_bytes(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
344   check_operand_metadata_present(inst, "mod", op);
345   check_operand_metadata_present(inst, "rm32", op);
346   // no check for r32; some instructions don't use it; just assume it's 0 if missing
347   if (op.data == "81" || op.data == "8f" || op.data == "ff") {  // keep sync'd with 'help subop'
348     check_operand_metadata_present(inst, "subop", op);
349     check_operand_metadata_absent(inst, "r32", op, "should be replaced by subop");
350   }
351   if (trace_contains_errors()) return;
352   if (metadata(inst, "rm32").data != "4") return;
353   // SIB byte checks
354   uint8_t mod = hex_byte(metadata(inst, "mod").data);
355   if (mod != /*direct*/3) {
356     check_operand_metadata_present(inst, "base", op);
357     check_operand_metadata_present(inst, "index", op);  // otherwise why go to SIB?
358   }
359   else {
360     check_operand_metadata_absent(inst, "base", op, "direct mode");
361     check_operand_metadata_absent(inst, "index", op, "direct mode");
362   }
363   // no check for scale; 0 (2**0 = 1) by default
364 }
365 
366 // same as compare_bitvector, with a couple of exceptions for modrm-based instructions
367 //   exception 1: ignore modrm bit since we already checked it above
368 //   exception 2: modrm instructions can use a displacement on occasion
369 void compare_bitvector_modrm(const line& inst, uint8_t expected, const word& op) {
370   if (all_hex_bytes(inst) && has_operands(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
371   uint8_t bitvector = compute_operand_bitvector(inst);
372   if (trace_contains_errors()) return;  // duplicate operand type
373   expected = CLEAR(expected, MODRM);  // exception 1
374   if (bitvector == expected) return;  // all good with this instruction
375   for (int i = 0;  i < NUM_OPERAND_TYPES;  ++i, bitvector >>= 1, expected >>= 1) {
376 //?     cerr << "comparing for modrm " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n';
377     if ((bitvector & 0x1) == (expected & 0x1)) continue;  // all good with this operand
378     if (i == DISP8 || i == DISP32) continue;  // exception 2
379     const string& optype = Operand_type_name.at(i);
380     if ((bitvector & 0x1) > (expected & 0x1))
381       raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << optype << " operand\n" << end();
382     else
383       raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << optype << " operand\n" << end();
384     // continue giving all errors for a single instruction
385   }
386   // ignore settings in any unused bits
387 }
388 
389 void check_operand_metadata_present(const line& inst, const string& type, const word& op) {
390   if (!has_operand_metadata(inst, type))
391     raise << "'" << to_string(inst) << "' (" << get(name, op.data) << "): missing " << type << " operand\n" << end();
392 }
393 
394 void check_operand_metadata_absent(const line& inst, const string& type, const word& op, const string& msg) {
395   if (has_operand_metadata(inst, type))
396     raise << "'" << to_string(inst) << "' (" << get(name, op.data) << "): unexpected " << type << " operand (" << msg << ")\n" << end();
397 }
398 
399 :(scenarios transform)
400 :(scenario modrm_with_displacement)
401 % Reg[EAX].u = 0x1;
402 == 0x1
403 # just avoid null pointer
404 8b/copy 1/mod/lookup+disp8 0/rm32/EAX 2/r32/EDX 4/disp8  # copy *(EAX+4) to EDX
405 $error: 0
406 :(scenarios run)
407 
408 :(scenario conflicting_operands_in_modrm_instruction)
409 % Hide_errors = true;
410 == 0x1
411 01/add 0/mod 3/mod
412 +error: '01/add 0/mod 3/mod' has conflicting mod operands
413 
414 :(scenario conflicting_operand_type_modrm)
415 % Hide_errors = true;
416 == 0x1
417 01/add 0/mod 3/rm32/r32
418 +error: '3/rm32/r32' has conflicting operand types; it should have only one
419 
420 :(scenario check_missing_rm32_operand)
421 % Hide_errors = true;
422 == 0x1
423 81 0/add/subop 0/mod            1/imm32
424 +error: '81 0/add/subop 0/mod 1/imm32' (combine rm32 with imm32 based on subop): missing rm32 operand
425 
426 :(scenario check_missing_subop_operand)
427 % Hide_errors = true;
428 == 0x1
429 81             0/mod 3/rm32/ebx 1/imm32
430 +error: '81 0/mod 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing subop operand
431 
432 :(scenario check_missing_base_operand)
433 % Hide_errors = true;
434 == 0x1
435 81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32
436 +error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32' (combine rm32 with imm32 based on subop): missing base operand
437 
438 :(scenario check_missing_index_operand)
439 % Hide_errors = true;
440 == 0x1
441 81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32
442 +error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32' (combine rm32 with imm32 based on subop): missing index operand
443 
444 :(scenario check_missing_base_operand_2)
445 % Hide_errors = true;
446 == 0x1
447 81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32
448 +error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32' (combine rm32 with imm32 based on subop): missing base operand
449 
450 :(scenario check_base_operand_not_needed_in_direct_mode)
451 == 0x1
452 81 0/add/subop 3/mod/indirect 4/rm32/use-sib 1/imm32
453 $error: 0
454 
455 //:: similarly handle multi-byte opcodes
456 
457 :(code)
458 void check_operands_0f(const line& inst) {
459   assert(inst.words.at(0).data == "0f");
460   if (SIZE(inst.words) == 1) {
461     raise << "opcode '0f' requires a second opcode\n" << end();
462     return;
463   }
464   word op = preprocess_op(inst.words.at(1));
465   if (!contains_key(name_0f, op.data)) {
466     raise << "unknown 2-byte opcode '0f " << op.data << "'\n" << end();
467     return;
468   }
469   check_operands_0f(inst, op);
470 }
471 
472 void check_operands_f3(const line& /*unused*/) {
473   raise << "no supported opcodes starting with f3\n" << end();
474 }
475 
476 :(scenario check_missing_disp16_operand)
477 % Hide_errors = true;
478 == 0x1
479 # instruction                     effective address                                                   operand     displacement    immediate
480 # op          subop               mod             rm32          base        index         scale       r32
481 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
482   0f 84                                                                                                                                             # jmp if ZF to ??
483 +error: '0f 84' (jump disp16 bytes away if ZF is set): missing disp16 operand
484 
485 :(before "End Globals")
486 map</*op*/string, /*bitvector*/uint8_t> Permitted_operands_0f;
487 :(before "End Init Permitted Operands")
488 //// Class C: just op and disp16
489 //  imm32 imm8  disp32 |disp16  disp8 subop modrm
490 //  0     0     0      |1       0     0     0
491 put(Permitted_operands_0f, "84", 0x08);
492 put(Permitted_operands_0f, "85", 0x08);
493 put(Permitted_operands_0f, "8c", 0x08);
494 put(Permitted_operands_0f, "8d", 0x08);
495 put(Permitted_operands_0f, "8e", 0x08);
496 put(Permitted_operands_0f, "8f", 0x08);
497 
498 //// Class M: using ModR/M byte
499 //  imm32 imm8  disp32 |disp16  disp8 subop modrm
500 //  0     0     0      |0       0     0     1
501 put(Permitted_operands_0f, "af", 0x01);
502 
503 :(code)
504 void check_operands_0f(const line& inst, const word& op) {
505   uint8_t expected_bitvector = get(Permitted_operands_0f, op.data);
506   if (HAS(expected_bitvector, MODRM))
507     check_operands_modrm(inst, op);
508   compare_bitvector_0f(inst, CLEAR(expected_bitvector, MODRM), op);
509 }
510 
511 void compare_bitvector_0f(const line& inst, uint8_t expected, const word& op) {
512   if (all_hex_bytes(inst) && has_operands(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
513   uint8_t bitvector = compute_operand_bitvector(inst);
514   if (trace_contains_errors()) return;  // duplicate operand type
515   if (bitvector == expected) return;  // all good with this instruction
516   for (int i = 0;  i < NUM_OPERAND_TYPES;  ++i, bitvector >>= 1, expected >>= 1) {
517 //?     cerr << "comparing " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n';
518     if ((bitvector & 0x1) == (expected & 0x1)) continue;  // all good with this operand
519     const string& optype = Operand_type_name.at(i);
520     if ((bitvector & 0x1) > (expected & 0x1))
521       raise << "'" << to_string(inst) << "' (" << get(name_0f, op.data) << "): unexpected " << optype << " operand\n" << end();
522     else
523       raise << "'" << to_string(inst) << "' (" << get(name_0f, op.data) << "): missing " << optype << " operand\n" << end();
524     // continue giving all errors for a single instruction
525   }
526   // ignore settings in any unused bits
527 }
528 
529 string tolower(const char* s) {
530   ostringstream out;
531   for (/*nada*/;  *s;  ++s)
532     out << static_cast<char>(tolower(*s));
533   return out.str();
534 }
535 
536 #undef HAS
537 #undef SET
538 #undef CLEAR
539 
540 :(before "End Includes")
541 #include<cctype>