1 //: Beginning of "level 2": tagging bytes with metadata around what field of
  2 //: an x86 instruction they're for.
  3 //:
  4 //: The x86 instruction set is variable-length, and how a byte is interpreted
  5 //: affects later instruction boundaries. A lot of the pain in programming machine code
  6 //: stems from computer and programmer going out of sync on what a byte
  7 //: means. The miscommunication is usually not immediately caught, and
  8 //: metastasizes at runtime into kilobytes of misinterpreted instructions.
  9 //: Tagging bytes with what the programmer expects them to be interpreted as
 10 //: helps the computer catch miscommunication immediately.
 11 //:
 12 //: This is one way SubX is going to be different from a 'language': we
 13 //: typically think of languages as less verbose than machine code. Here we're
 14 //: making machine code *more* verbose.
 15 //:
 16 //: ---
 17 //:
 18 //: While we're here, we'll also improve a couple of other things in level 2:
 19 //:
 20 //: a) Machine code often packs logically separate operands into bitfields of
 21 //: a single byte. In a later layer (pack_operands) we'll start writing out
 22 //: each operand separately, and the translator will construct the right bytes
 23 //: out of operands.
 24 //:
 25 //: SubX now gets still more verbose. What used to be a single byte, say 'c3',
 26 //: can now expand to '3/mod 0/subop 3/rm32'.
 27 //:
 28 //: b) Since each operand is tagged, we can loosen ordering restrictions and
 29 //: allow writing out the operands in any order, like keyword arguments.
 30 //:
 31 //: The actual opcodes (first 1-3 bytes of each instruction) will continue to
 32 //: be at the start of each line. The x86 instruction set is a mess, and
 33 //: opcodes often don't admit good names.
 34 
 35 :(before "End Help Texts")
 36 put(Help, "instructions",
 37   "Each x86 instruction consists of an instruction or opcode and some number\n"
 38   "of operands.\n"
 39   "Each operand has a type. An instruction won't have more than one operand of\n"
 40   "any type.\n"
 41   "Each instruction has some set of allowed operand types. It'll reject others.\n"
 42   "The complete list of operand types: mod, subop, r32 (register), rm32\n"
 43   "(register or memory), scale, index, base, disp8, disp16, disp32, imm8,\n"
 44   "imm32.\n"
 45   "Each of these has its own help page. Try reading 'subx help mod' next.\n"
 46 );
 47 :(before "End Help Contents")
 48 cerr << "  instructions\n";
 49 
 50 //:: Check for 'syntax errors'; missing or unexpected operands.
 51 
 52 :(scenario check_missing_imm8_operand)
 53 % Hide_errors = true;
 54 == 0x1
 55 # instruction                     effective address                                                   operand     displacement    immediate
 56 # op          subop               mod             rm32          base        index         scale       r32
 57 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
 58   cd                                                                                                                                                # int ??
 59 +error: 'cd' (software interrupt): missing imm8 operand
 60 
 61 :(before "End Transforms")
 62 Transform.push_back(check_operands);
 63 
 64 :(code)
 65 void check_operands(/*const*/ program& p) {
 66   trace(99, "transform") << "-- check operands" << end();
 67   if (p.segments.empty()) return;
 68   const segment& code = p.segments.at(0);
 69   for (int i = 0;  i < SIZE(code.lines);  ++i) {
 70     check_operands(code.lines.at(i));
 71     if (trace_contains_errors()) return;  // stop at the first mal-formed instruction
 72   }
 73 }
 74 
 75 void check_operands(const line& inst) {
 76   word op = preprocess_op(inst.words.at(0));
 77   if (op.data == "0f") {
 78     check_operands_0f(inst);
 79     return;
 80   }
 81   if (op.data == "f3") {
 82     check_operands_f3(inst);
 83     return;
 84   }
 85   check_operands(inst, op);
 86 }
 87 
 88 word preprocess_op(word/*copy*/ op) {
 89   op.data = tolower(op.data.c_str());
 90   if (starts_with(op.data, "0x"))
 91     op.data = op.data.substr(2);
 92   return op;
 93 }
 94 
 95 //: To check the operands for an opcode, we'll track the permitted operands
 96 //: for each supported opcode in a bitvector. That way we can often compute the
 97 //: bitvector for each instruction's operands and compare it with the expected.
 98 
 99 :(before "End Types")
100 enum operand_type {
101   // start from the least significant bit
102   MODRM,  // more complex, may also involve disp8 or disp32
103   SUBOP,
104   DISP8,
105   DISP16,
106   DISP32,
107   IMM8,
108   IMM32,
109   NUM_OPERAND_TYPES
110 };
111 :(before "End Globals")
112 vector<string> Operand_type_name;
113 map<string, operand_type> Operand_type;
114 :(before "End One-time Setup")
115 init_op_types();
116 :(code)
117 void init_op_types() {
118   assert(NUM_OPERAND_TYPES <= /*bits in a uint8_t*/8);
119   Operand_type_name.resize(NUM_OPERAND_TYPES);
120   #define DEF(type) Operand_type_name.at(type) = tolower(#type), put(Operand_type, tolower(#type), type);
121   DEF(MODRM);
122   DEF(SUBOP);
123   DEF(DISP8);
124   DEF(DISP16);
125   DEF(DISP32);
126   DEF(IMM8);
127   DEF(IMM32);
128   #undef DEF
129 }
130 
131 :(before "End Globals")
132 map</*op*/string, /*bitvector*/uint8_t> Permitted_operands;
133 const uint8_t INVALID_OPERANDS = 0xff;  // no instruction uses all the operand types
134 :(before "End One-time Setup")
135 init_permitted_operands();
136 :(code)
137 void init_permitted_operands() {
138   //// Class A: just op, no operands
139   // halt
140   put(Permitted_operands, "f4", 0x00);
141   // push
142   put(Permitted_operands, "50", 0x00);
143   put(Permitted_operands, "51", 0x00);
144   put(Permitted_operands, "52", 0x00);
145   put(Permitted_operands, "53", 0x00);
146   put(Permitted_operands, "54", 0x00);
147   put(Permitted_operands, "55", 0x00);
148   put(Permitted_operands, "56", 0x00);
149   put(Permitted_operands, "57", 0x00);
150   // pop
151   put(Permitted_operands, "58", 0x00);
152   put(Permitted_operands, "59", 0x00);
153   put(Permitted_operands, "5a", 0x00);
154   put(Permitted_operands, "5b", 0x00);
155   put(Permitted_operands, "5c", 0x00);
156   put(Permitted_operands, "5d", 0x00);
157   put(Permitted_operands, "5e", 0x00);
158   put(Permitted_operands, "5f", 0x00);
159   // return
160   put(Permitted_operands, "c3", 0x00);
161 
162   //// Class B: just op and disp8
163   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
164   //  0     0     0      |0       1     0     0
165 
166   // jump
167   put(Permitted_operands, "eb", 0x04);
168   put(Permitted_operands, "74", 0x04);
169   put(Permitted_operands, "75", 0x04);
170   put(Permitted_operands, "7c", 0x04);
171   put(Permitted_operands, "7d", 0x04);
172   put(Permitted_operands, "7e", 0x04);
173   put(Permitted_operands, "7f", 0x04);
174 
175   //// Class C: just op and disp16
176   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
177   //  0     0     0      |1       0     0     0
178   put(Permitted_operands, "e9", 0x08);  // jump
179 
180   //// Class D: just op and disp32
181   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
182   //  0     0     1      |0       0     0     0
183   put(Permitted_operands, "e8", 0x10);  // call
184 
185   //// Class E: just op and imm8
186   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
187   //  0     1     0      |0       0     0     0
188   put(Permitted_operands, "cd", 0x20);  // software interrupt
189 
190   //// Class F: just op and imm32
191   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
192   //  1     0     0      |0       0     0     0
193   put(Permitted_operands, "05", 0x40);  // add
194   put(Permitted_operands, "2d", 0x40);  // subtract
195   put(Permitted_operands, "25", 0x40);  // and
196   put(Permitted_operands, "0d", 0x40);  // or
197   put(Permitted_operands, "35", 0x40);  // xor
198   put(Permitted_operands, "3d", 0x40);  // compare
199   put(Permitted_operands, "68", 0x40);  // push
200   // copy
201   put(Permitted_operands, "b8", 0x40);
202   put(Permitted_operands, "b9", 0x40);
203   put(Permitted_operands, "ba", 0x40);
204   put(Permitted_operands, "bb", 0x40);
205   put(Permitted_operands, "bc", 0x40);
206   put(Permitted_operands, "bd", 0x40);
207   put(Permitted_operands, "be", 0x40);
208   put(Permitted_operands, "bf", 0x40);
209 
210   //// Class M: using ModR/M byte
211   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
212   //  0     0     0      |0       0     0     1
213 
214   // add
215   put(Permitted_operands, "01", 0x01);
216   put(Permitted_operands, "03", 0x01);
217   // subtract
218   put(Permitted_operands, "29", 0x01);
219   put(Permitted_operands, "2b", 0x01);
220   // and
221   put(Permitted_operands, "21", 0x01);
222   put(Permitted_operands, "23", 0x01);
223   // or
224   put(Permitted_operands, "09", 0x01);
225   put(Permitted_operands, "0b", 0x01);
226   // complement
227   put(Permitted_operands, "f7", 0x01);
228   // xor
229   put(Permitted_operands, "31", 0x01);
230   put(Permitted_operands, "33", 0x01);
231   // compare
232   put(Permitted_operands, "39", 0x01);
233   put(Permitted_operands, "3b", 0x01);
234   // copy
235   put(Permitted_operands, "89", 0x01);
236   put(Permitted_operands, "8b", 0x01);
237   // swap
238   put(Permitted_operands, "87", 0x01);
239   // pop
240   put(Permitted_operands, "8f", 0x01);
241 
242   //// Class O: op, ModR/M and subop (not r32)
243   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
244   //  0     0     0      |0       0     1     1
245   put(Permitted_operands, "ff", 0x03);  // jump/push/call
246 
247   //// Class N: op, ModR/M and imm32
248   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
249   //  1     0     0      |0       0     0     1
250   put(Permitted_operands, "c7", 0x41);  // copy
251 
252   //// Class P: op, ModR/M, subop (not r32) and imm32
253   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
254   //  1     0     0      |0       0     1     1
255   put(Permitted_operands, "81", 0x43);  // combine
256 
257   // End Init Permitted Operands
258 }
259 
260 :(code)
261 #define HAS(bitvector, bit)  ((bitvector) & (1 << (bit)))
262 #define SET(bitvector, bit)  ((bitvector) | (1 << (bit)))
263 #define CLEAR(bitvector, bit)  ((bitvector) & (~(1 << (bit))))
264 
265 void check_operands(const line& inst, const word& op) {
266   if (!is_hex_byte(op)) return;
267   uint8_t expected_bitvector = get(Permitted_operands, op.data);
268   if (HAS(expected_bitvector, MODRM)) {
269     check_operands_modrm(inst, op);
270     compare_bitvector_modrm(inst, expected_bitvector, op);
271   }
272   else {
273     compare_bitvector(inst, expected_bitvector, op);
274   }
275 }
276 
277 //: Many instructions can be checked just by comparing bitvectors.
278 
279 void compare_bitvector(const line& inst, uint8_t expected, const word& op) {
280   if (all_hex_bytes(inst) && has_operands(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
281   uint8_t bitvector = compute_operand_bitvector(inst);
282   if (trace_contains_errors()) return;  // duplicate operand type
283   if (bitvector == expected) return;  // all good with this instruction
284   for (int i = 0;  i < NUM_OPERAND_TYPES;  ++i, bitvector >>= 1, expected >>= 1) {
285 //?     cerr << "comparing " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n';
286     if ((bitvector & 0x1) == (expected & 0x1)) continue;  // all good with this operand
287     const string& optype = Operand_type_name.at(i);
288     if ((bitvector & 0x1) > (expected & 0x1))
289       raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << optype << " operand\n" << end();
290     else
291       raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << optype << " operand\n" << end();
292     // continue giving all errors for a single instruction
293   }
294   // ignore settings in any unused bits
295 }
296 
297 string maybe_name(const word& op) {
298   if (!is_hex_byte(op)) return "";
299   if (!contains_key(name, op.data)) return "";
300   return " ("+get(name, op.data)+')';
301 }
302 
303 bool is_hex_byte(const word& curr) {
304   if (contains_any_operand_metadata(curr))
305     return false;
306   if (SIZE(curr.data) != 2)
307     return false;
308   if (curr.data.find_first_not_of("0123456789abcdefABCDEF") != string::npos)
309     return false;
310   return true;
311 }
312 
313 uint32_t compute_operand_bitvector(const line& inst) {
314   uint32_t bitvector = 0;
315   for (int i = /*skip op*/1;  i < SIZE(inst.words);  ++i) {
316     bitvector = bitvector | bitvector_for_operand(inst.words.at(i));
317     if (trace_contains_errors()) return INVALID_OPERANDS;  // duplicate operand type
318   }
319   return bitvector;
320 }
321 
322 bool has_operands(const line& inst) {
323   return SIZE(inst.words) > first_operand(inst);
324 }
325 
326 int first_operand(const line& inst) {
327   if (inst.words.at(0).data == "0f") return 2;
328   if (inst.words.at(0).data == "f3") {
329     if (inst.words.at(1).data == "0f")
330       return 3;
331     else
332       return 2;
333   }
334   return 1;
335 }
336 
337 bool all_hex_bytes(const line& inst) {
338   for (int i = 0;  i < SIZE(inst.words);  ++i)
339     if (!is_hex_byte(inst.words.at(i)))
340       return false;
341   return true;
342 }
343 
344 bool contains_any_operand_metadata(const word& word) {
345   for (int i = 0;  i < SIZE(word.metadata);  ++i)
346     if (Instruction_operands.find(word.metadata.at(i)) != Instruction_operands.end())
347       return true;
348   return false;
349 }
350 
351 // Scan the metadata of 'w' and return the bit corresponding to any operand type.
352 // Also raise an error if metadata contains multiple operand types.
353 uint32_t bitvector_for_operand(const word& w) {
354   uint32_t bv = 0;
355   bool found = false;
356   for (int i = 0;  i < SIZE(w.metadata);  ++i) {
357     const string& curr = w.metadata.at(i);
358     if (!contains_key(Operand_type, curr)) continue;  // ignore unrecognized metadata
359     if (found) {
360       raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
361       return INVALID_OPERANDS;
362     }
363     bv = (1 << get(Operand_type, curr));
364     found = true;
365   }
366   return bv;
367 }
368 
369 :(scenario conflicting_operand_type)
370 % Hide_errors = true;
371 == 0x1
372 cd/software-interrupt 80/imm8/imm32
373 +error: '80/imm8/imm32' has conflicting operand types; it should have only one
374 
375 //: Instructions computing effective addresses have more complex rules, so
376 //: we'll hard-code a common set of instruction-decoding rules.
377 
378 :(scenario check_missing_mod_operand)
379 % Hide_errors = true;
380 == 0x1
381 81 0/add/subop       3/rm32/ebx 1/imm32
382 +error: '81 0/add/subop 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing mod operand
383 
384 :(before "End Globals")
385 set<string> Instruction_operands;
386 :(before "End One-time Setup")
387 Instruction_operands.insert("subop");
388 Instruction_operands.insert("mod");
389 Instruction_operands.insert("rm32");
390 Instruction_operands.insert("base");
391 Instruction_operands.insert("index");
392 Instruction_operands.insert("scale");
393 Instruction_operands.insert("r32");
394 Instruction_operands.insert("disp8");
395 Instruction_operands.insert("disp16");
396 Instruction_operands.insert("disp32");
397 Instruction_operands.insert("imm8");
398 Instruction_operands.insert("imm32");
399 
400 :(code)
401 void check_operands_modrm(const line& inst, const word& op) {
402   if (all_hex_bytes(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
403   check_metadata_present(inst, "mod", op);
404   check_metadata_present(inst, "rm32", op);
405   // no check for r32; some instructions don't use it; just assume it's 0 if missing
406   if (op.data == "81" || op.data == "8f" || op.data == "ff") {  // keep sync'd with 'help subop'
407     check_metadata_present(inst, "subop", op);
408     check_metadata_absent(inst, "r32", op, "should be replaced by subop");
409   }
410   if (trace_contains_errors()) return;
411   if (metadata(inst, "rm32").data != "4") return;
412   // SIB byte checks
413   uint8_t mod = hex_byte(metadata(inst, "mod").data);
414   if (mod != /*direct*/3) {
415     check_metadata_present(inst, "base", op);
416     check_metadata_present(inst, "index", op);  // otherwise why go to SIB?
417   }
418   else {
419     check_metadata_absent(inst, "base", op, "direct mode");
420     check_metadata_absent(inst, "index", op, "direct mode");
421   }
422   // no check for scale; 0 (2**0 = 1) by default
423 }
424 
425 // same as compare_bitvector, with a couple of exceptions for modrm-based instructions
426 //   exception 1: ignore modrm bit since we already checked it above
427 //   exception 2: modrm instructions can use a displacement on occasion
428 void compare_bitvector_modrm(const line& inst, uint8_t expected, const word& op) {
429   if (all_hex_bytes(inst) && has_operands(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
430   uint8_t bitvector = compute_operand_bitvector(inst);
431   if (trace_contains_errors()) return;  // duplicate operand type
432   expected = CLEAR(expected, MODRM);  // exception 1
433   if (bitvector == expected) return;  // all good with this instruction
434   for (int i = 0;  i < NUM_OPERAND_TYPES;  ++i, bitvector >>= 1, expected >>= 1) {
435 //?     cerr << "comparing for modrm " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n';
436     if ((bitvector & 0x1) == (expected & 0x1)) continue;  // all good with this operand
437     if (i == DISP8 || i == DISP32) continue;  // exception 2
438     const string& optype = Operand_type_name.at(i);
439     if ((bitvector & 0x1) > (expected & 0x1))
440       raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << optype << " operand\n" << end();
441     else
442       raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << optype << " operand\n" << end();
443     // continue giving all errors for a single instruction
444   }
445   // ignore settings in any unused bits
446 }
447 
448 void check_metadata_present(const line& inst, const string& type, const word& op) {
449   if (!has_metadata(inst, type))
450     raise << "'" << to_string(inst) << "' (" << get(name, op.data) << "): missing " << type << " operand\n" << end();
451 }
452 
453 void check_metadata_absent(const line& inst, const string& type, const word& op, const string& msg) {
454   if (has_metadata(inst, type))
455     raise << "'" << to_string(inst) << "' (" << get(name, op.data) << "): unexpected " << type << " operand (" << msg << ")\n" << end();
456 }
457 
458 bool has_metadata(const line& inst, const string& m) {
459   bool result = false;
460   for (int i = 0;  i < SIZE(inst.words);  ++i) {
461     if (!has_metadata(inst.words.at(i), m)) continue;
462     if (result) {
463       raise << "'" << to_string(inst) << "' has conflicting " << m << " operands\n" << end();
464       return false;
465     }
466     result = true;
467   }
468   return result;
469 }
470 
471 bool has_metadata(const word& w, const string& m) {
472   bool result = false;
473   bool metadata_found = false;
474   for (int i = 0;  i < SIZE(w.metadata);  ++i) {
475     const string& curr = w.metadata.at(i);
476     if (!contains_key(Instruction_operands, curr)) continue;  // ignore unrecognized metadata
477     if (metadata_found) {
478       raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
479       return false;
480     }
481     metadata_found = true;
482     result = (curr == m);
483   }
484   return result;
485 }
486 
487 word metadata(const line& inst, const string& m) {
488   for (int i = 0;  i < SIZE(inst.words);  ++i)
489     if (has_metadata(inst.words.at(i), m))
490       return inst.words.at(i);
491   assert(false);
492 }
493 
494 :(scenarios transform)
495 :(scenario modrm_with_displacement)
496 % Reg[EAX].u = 0x1;
497 == 0x1
498 # just avoid null pointer
499 8b/copy 1/mod/lookup+disp8 0/rm32/EAX 2/r32/EDX 4/disp8  # copy *(EAX+4) to EDX
500 $error: 0
501 :(scenarios run)
502 
503 //: helper for scenario
504 :(code)
505 void transform(const string& text_bytes) {
506   program p;
507   istringstream in(text_bytes);
508   parse(in, p);
509   if (trace_contains_errors()) return;
510   transform(p);
511 }
512 
513 :(scenario conflicting_operands_in_modrm_instruction)
514 % Hide_errors = true;
515 == 0x1
516 01/add 0/mod 3/mod
517 +error: '01/add 0/mod 3/mod' has conflicting mod operands
518 
519 :(scenario conflicting_operand_type_modrm)
520 % Hide_errors = true;
521 == 0x1
522 01/add 0/mod 3/rm32/r32
523 +error: '3/rm32/r32' has conflicting operand types; it should have only one
524 
525 :(scenario check_missing_rm32_operand)
526 % Hide_errors = true;
527 == 0x1
528 81 0/add/subop 0/mod            1/imm32
529 +error: '81 0/add/subop 0/mod 1/imm32' (combine rm32 with imm32 based on subop): missing rm32 operand
530 
531 :(scenario check_missing_subop_operand)
532 % Hide_errors = true;
533 == 0x1
534 81             0/mod 3/rm32/ebx 1/imm32
535 +error: '81 0/mod 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing subop operand
536 
537 :(scenario check_missing_base_operand)
538 % Hide_errors = true;
539 == 0x1
540 81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32
541 +error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32' (combine rm32 with imm32 based on subop): missing base operand
542 
543 :(scenario check_missing_index_operand)
544 % Hide_errors = true;
545 == 0x1
546 81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32
547 +error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32' (combine rm32 with imm32 based on subop): missing index operand
548 
549 :(scenario check_missing_base_operand_2)
550 % Hide_errors = true;
551 == 0x1
552 81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32
553 +error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32' (combine rm32 with imm32 based on subop): missing base operand
554 
555 :(scenario check_base_operand_not_needed_in_direct_mode)
556 == 0x1
557 81 0/add/subop 3/mod/indirect 4/rm32/use-sib 1/imm32
558 $error: 0
559 
560 //:: similarly handle multi-byte opcodes
561 
562 :(code)
563 void check_operands_0f(const line& inst) {
564   assert(inst.words.at(0).data == "0f");
565   if (SIZE(inst.words) == 1) {
566     raise << "opcode '0f' requires a second opcode\n" << end();
567     return;
568   }
569   word op = preprocess_op(inst.words.at(1));
570   if (!contains_key(name_0f, op.data)) {
571     raise << "unknown 2-byte opcode '0f " << op.data << "'\n" << end();
572     return;
573   }
574   check_operands_0f(inst, op);
575 }
576 
577 void check_operands_f3(const line& /*unused*/) {
578   raise << "no supported opcodes starting with f3\n" << end();
579 }
580 
581 :(scenario check_missing_disp16_operand)
582 % Hide_errors = true;
583 == 0x1
584 # instruction                     effective address                                                   operand     displacement    immediate
585 # op          subop               mod             rm32          base        index         scale       r32
586 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
587   0f 84                                                                                                                                             # jmp if ZF to ??
588 +error: '0f 84' (jump disp16 bytes away if ZF is set): missing disp16 operand
589 
590 :(before "End Globals")
591 map</*op*/string, /*bitvector*/uint8_t> Permitted_operands_0f;
592 :(before "End Init Permitted Operands")
593 //// Class C: just op and disp16
594 //  imm32 imm8  disp32 |disp16  disp8 subop modrm
595 //  0     0     0      |1       0     0     0
596 put(Permitted_operands_0f, "84", 0x08);
597 put(Permitted_operands_0f, "85", 0x08);
598 put(Permitted_operands_0f, "8c", 0x08);
599 put(Permitted_operands_0f, "8d", 0x08);
600 put(Permitted_operands_0f, "8e", 0x08);
601 put(Permitted_operands_0f, "8f", 0x08);
602 
603 //// Class M: using ModR/M byte
604 //  imm32 imm8  disp32 |disp16  disp8 subop modrm
605 //  0     0     0      |0       0     0     1
606 put(Permitted_operands_0f, "af", 0x01);
607 
608 :(code)
609 void check_operands_0f(const line& inst, const word& op) {
610   uint8_t expected_bitvector = get(Permitted_operands_0f, op.data);
611   if (HAS(expected_bitvector, MODRM))
612     check_operands_modrm(inst, op);
613   compare_bitvector_0f(inst, CLEAR(expected_bitvector, MODRM), op);
614 }
615 
616 void compare_bitvector_0f(const line& inst, uint8_t expected, const word& op) {
617   if (all_hex_bytes(inst) && has_operands(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
618   uint8_t bitvector = compute_operand_bitvector(inst);
619   if (trace_contains_errors()) return;  // duplicate operand type
620   if (bitvector == expected) return;  // all good with this instruction
621   for (int i = 0;  i < NUM_OPERAND_TYPES;  ++i, bitvector >>= 1, expected >>= 1) {
622 //?     cerr << "comparing " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n';
623     if ((bitvector & 0x1) == (expected & 0x1)) continue;  // all good with this operand
624     const string& optype = Operand_type_name.at(i);
625     if ((bitvector & 0x1) > (expected & 0x1))
626       raise << "'" << to_string(inst) << "' (" << get(name_0f, op.data) << "): unexpected " << optype << " operand\n" << end();
627     else
628       raise << "'" << to_string(inst) << "' (" << get(name_0f, op.data) << "): missing " << optype << " operand\n" << end();
629     // continue giving all errors for a single instruction
630   }
631   // ignore settings in any unused bits
632 }
633 
634 string to_string(const line& inst) {
635   ostringstream out;
636   for (int i = 0;  i < SIZE(inst.words);  ++i) {
637     if (i > 0) out << ' ';
638     out << inst.words.at(i).original;
639   }
640   return out.str();
641 }
642 
643 string tolower(const char* s) {
644   ostringstream out;
645   for (/*nada*/;  *s;  ++s)
646     out << static_cast<char>(tolower(*s));
647   return out.str();
648 }
649 
650 #undef HAS
651 #undef SET
652 #undef CLEAR
653 
654 //:: docs on each operand type
655 
656 :(before "End Help Texts")
657 init_operand_type_help();
658 :(code)
659 void init_operand_type_help() {
660   put(Help, "mod",
661     "2-bit operand controlling the _addressing mode_ of many instructions,\n"
662     "to determine how to compute the _effective address_ to look up memory at\n"
663     "based on the 'rm32' operand and potentially others.\n"
664     "\n"
665     "If mod = 3, just operate on the contents of the register specified by rm32\n"
666     "            (direct mode).\n"
667     "If mod = 2, effective address is usually* rm32 + disp32\n"
668     "            (indirect mode with displacement).\n"
669     "If mod = 1, effective address is usually* rm32 + disp8\n"
670     "            (indirect mode with displacement).\n"
671     "If mod = 0, effective address is usually* rm32 (indirect mode).\n"
672     "(* - The exception is when rm32 is '4'. Register 4 is the stack pointer (ESP).\n"
673     "     Using it as an address gets more involved. For more details,\n"
674     "     try reading the help pages for 'base', 'index' and 'scale'.)\n"
675     "\n"
676     "For complete details consult the IA-32 software developer's manual, table 2-2,\n"
677     "\"32-bit addressing forms with the ModR/M byte\".\n"
678     "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
679   );
680   put(Help, "subop",
681     "Additional 3-bit operand for determining the instruction when the opcode is 81, 8f or ff.\n"
682     "Can't coexist with operand of type 'r32' in a single instruction, because the two use the same bits.\n"
683   );
684   put(Help, "r32",
685     "3-bit operand specifying a register operand used directly, without any further addressing modes.\n"
686   );
687   put(Help, "rm32",
688     "3-bit operand specifying a register operand whose precise interpretation interacts with 'mod'.\n"
689     "For complete details consult the IA-32 software developer's manual, table 2-2,\n"
690     "\"32-bit addressing forms with the ModR/M byte\".\n"
691     "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
692   );
693   put(Help, "base",
694     "Additional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) specifying the register containing an address to look up.\n"
695     "This address may be further modified by 'index' and 'scale' operands.\n"
696     "  effective address = base + index*scale + displacement (disp8 or disp32)\n"
697     "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
698     "\"32-bit addressing forms with the SIB byte\".\n"
699     "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
700   );
701   put(Help, "index",
702     "Optional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be added to the 'base' operand to compute the 'effective address' at which to look up memory.\n"
703     "  effective address = base + index*scale + displacement (disp8 or disp32)\n"
704     "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
705     "\"32-bit addressing forms with the SIB byte\".\n"
706     "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
707   );
708   put(Help, "scale",
709     "Optional 2-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be multiplied to the 'index' operand before adding the result to the 'base' operand to compute the _effective address_ to operate on.\n"
710     "  effective address = base + index * scale + displacement (disp8 or disp32)\n"
711     "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
712     "\"32-bit addressing forms with the SIB byte\".\n"
713     "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
714   );
715   put(Help, "disp8",
716     "8-bit value to be added in many instructions.\n"
717   );
718   put(Help, "disp16",
719     "16-bit value to be added in many instructions.\n"
720   );
721   put(Help, "disp32",
722     "32-bit value to be added in many instructions.\n"
723   );
724   put(Help, "imm8",
725     "8-bit value for many instructions.\n"
726   );
727   put(Help, "imm32",
728     "32-bit value for many instructions.\n"
729   );
730 }
731 
732 :(before "End Includes")
733 #include<cctype>