1 //: Beginning of "level 2": tagging bytes with metadata around what field of
  2 //: an x86 instruction they're for.
  3 //:
  4 //: The x86 instruction set is variable-length, and how a byte is interpreted
  5 //: affects later instruction boundaries. A lot of the pain in programming machine code
  6 //: stems from computer and programmer going out of sync on what a byte
  7 //: means. The miscommunication is usually not immediately caught, and
  8 //: metastasizes at runtime into kilobytes of misinterpreted instructions.
  9 //: Tagging bytes with what the programmer expects them to be interpreted as
 10 //: helps the computer catch miscommunication immediately.
 11 //:
 12 //: This is one way SubX is going to be different from a 'language': we
 13 //: typically think of languages as less verbose than machine code. Here we're
 14 //: making machine code *more* verbose.
 15 //:
 16 //: ---
 17 //:
 18 //: While we're here, we'll also improve a couple of other things:
 19 //:
 20 //: a) Machine code often packs logically separate operands into bitfields of
 21 //: a single byte. We'll start writing out each operand separately, and the
 22 //: translator will construct the right bytes out of operands.
 23 //:
 24 //: SubX now gets still more verbose. What used to be a single byte, say 'c3',
 25 //: can now expand to '3/mod 0/subop 3/rm32'.
 26 //:
 27 //: b) Since each operand is tagged, we can loosen ordering restrictions and
 28 //: allow writing out the operands in any order, like keyword arguments.
 29 //:
 30 //: c) Operand values can be expressed in either decimal or hex (when prefixed
 31 //: with '0x'. Raw 2-character hex bytes without the '0x' are only valid when
 32 //: tagged without any operand metadata. (This may be a bad idea.)
 33 //:
 34 //: Coda: the actual opcodes (1-3 bytes) will continue to be at the start of
 35 //: each line, in hex, and untagged. The x86 instruction set is a mess, and
 36 //: instructions don't admit good names.
 37 
 38 :(before "End Help Texts")
 39 put(Help, "instructions",
 40   "Each x86 instruction consists of an instruction or opcode and some number\n"
 41   "of operands.\n"
 42   "Each operand has a type. An instruction won't have more than one operand of\n"
 43   "any type.\n"
 44   "Each instruction has some set of allowed operand types. It'll reject others.\n"
 45   "The complete list of operand types: mod, subop, r32 (register), rm32\n"
 46   "(register or memory), scale, index, base, disp8, disp16, disp32, imm8,\n"
 47   "imm32.\n"
 48   "Each of these has its own help page. Try reading 'subx help mod' next.\n"
 49 );
 50 :(before "End Help Contents")
 51 cerr << "  instructions\n";
 52 
 53 //:: Check for 'syntax errors'; missing or unexpected operands.
 54 
 55 :(scenario check_missing_imm8_operand)
 56 % Hide_errors = true;
 57 == 0x1
 58 # instruction                     effective address                                           operand     displacement    immediate
 59 # op          subop               mod             rm32          base      index     scale     r32
 60 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits    3 bits    2 bits    2 bits      0/1/2/4 bytes   0/1/2/4 bytes
 61   cd                                                                                                                                        # int ??
 62 +error: 'cd' (software interrupt): missing imm8 operand
 63 
 64 :(before "End One-time Setup")
 65 Transform.push_back(check_operands);
 66 
 67 :(code)
 68 void check_operands(/*const*/ program& p) {
 69   trace(99, "transform") << "-- check operands" << end();
 70   if (p.segments.empty()) return;
 71   const segment& code = p.segments.at(0);
 72   for (int i = 0;  i < SIZE(code.lines);  ++i) {
 73     check_operands(code.lines.at(i));
 74     if (trace_contains_errors()) return;  // stop at the first mal-formed instruction
 75   }
 76 }
 77 
 78 void check_operands(const line& inst) {
 79   word op = preprocess_op(inst.words.at(0));
 80   if (op.data == "0f") {
 81     check_operands_0f(inst);
 82     return;
 83   }
 84   if (op.data == "f3") {
 85     check_operands_f3(inst);
 86     return;
 87   }
 88   check_operands(inst, op);
 89 }
 90 
 91 word preprocess_op(word/*copy*/ op) {
 92   op.data = tolower(op.data.c_str());
 93   if (starts_with(op.data, "0x"))
 94     op.data = op.data.substr(2);
 95   return op;
 96 }
 97 
 98 //: To check the operands for an opcode, we'll track the permitted operands
 99 //: for each supported opcode in a bitvector. That way we can often compute the
100 //: bitvector for each instruction's operands and compare it with the expected.
101 
102 :(before "End Types")
103 enum operand_type {
104   // start from the least significant bit
105   MODRM,  // more complex, may also involve disp8 or disp32
106   SUBOP,
107   DISP8,
108   DISP16,
109   DISP32,
110   IMM8,
111   IMM32,
112   NUM_OPERAND_TYPES
113 };
114 :(before "End Globals")
115 vector<string> Operand_type_name;
116 map<string, operand_type> Operand_type;
117 :(before "End One-time Setup")
118 init_op_types();
119 :(code)
120 void init_op_types() {
121   assert(NUM_OPERAND_TYPES <= /*bits in a uint8_t*/8);
122   Operand_type_name.resize(NUM_OPERAND_TYPES);
123   #define DEF(type) Operand_type_name.at(type) = tolower(#type), put(Operand_type, tolower(#type), type);
124   DEF(MODRM);
125   DEF(SUBOP);
126   DEF(DISP8);
127   DEF(DISP16);
128   DEF(DISP32);
129   DEF(IMM8);
130   DEF(IMM32);
131   #undef DEF
132 }
133 
134 :(before "End Globals")
135 map</*op*/string, /*bitvector*/uint8_t> Permitted_operands;
136 const uint8_t INVALID_OPERANDS = 0xff;  // no instruction uses all the operand types
137 :(before "End One-time Setup")
138 init_permitted_operands();
139 :(code)
140 void init_permitted_operands() {
141   //// Class A: just op, no operands
142   // halt
143   put(Permitted_operands, "f4", 0x00);
144   // push
145   put(Permitted_operands, "50", 0x00);
146   put(Permitted_operands, "51", 0x00);
147   put(Permitted_operands, "52", 0x00);
148   put(Permitted_operands, "53", 0x00);
149   put(Permitted_operands, "54", 0x00);
150   put(Permitted_operands, "55", 0x00);
151   put(Permitted_operands, "56", 0x00);
152   put(Permitted_operands, "57", 0x00);
153   // pop
154   put(Permitted_operands, "58", 0x00);
155   put(Permitted_operands, "59", 0x00);
156   put(Permitted_operands, "5a", 0x00);
157   put(Permitted_operands, "5b", 0x00);
158   put(Permitted_operands, "5c", 0x00);
159   put(Permitted_operands, "5d", 0x00);
160   put(Permitted_operands, "5e", 0x00);
161   put(Permitted_operands, "5f", 0x00);
162   // return
163   put(Permitted_operands, "c3", 0x00);
164 
165   //// Class B: just op and disp8
166   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
167   //  0     0     0      |0       1     0     0
168 
169   // jump
170   put(Permitted_operands, "eb", 0x04);
171   put(Permitted_operands, "74", 0x04);
172   put(Permitted_operands, "75", 0x04);
173   put(Permitted_operands, "7c", 0x04);
174   put(Permitted_operands, "7d", 0x04);
175   put(Permitted_operands, "7e", 0x04);
176   put(Permitted_operands, "7f", 0x04);
177 
178   //// Class C: just op and disp16
179   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
180   //  0     0     0      |1       0     0     0
181   put(Permitted_operands, "e8", 0x08);  // jump
182 
183   //// Class D: just op and disp32
184   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
185   //  0     0     1      |0       0     0     0
186   put(Permitted_operands, "e9", 0x10);  // call
187 
188   //// Class E: just op and imm8
189   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
190   //  0     1     0      |0       0     0     0
191   put(Permitted_operands, "cd", 0x20);  // software interrupt
192 
193   //// Class F: just op and imm32
194   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
195   //  1     0     0      |0       0     0     0
196   put(Permitted_operands, "05", 0x40);  // add
197   put(Permitted_operands, "2d", 0x40);  // subtract
198   put(Permitted_operands, "25", 0x40);  // and
199   put(Permitted_operands, "0d", 0x40);  // or
200   put(Permitted_operands, "35", 0x40);  // xor
201   put(Permitted_operands, "3d", 0x40);  // compare
202   put(Permitted_operands, "68", 0x40);  // push
203   // copy
204   put(Permitted_operands, "b8", 0x40);
205   put(Permitted_operands, "b9", 0x40);
206   put(Permitted_operands, "ba", 0x40);
207   put(Permitted_operands, "bb", 0x40);
208   put(Permitted_operands, "bc", 0x40);
209   put(Permitted_operands, "bd", 0x40);
210   put(Permitted_operands, "be", 0x40);
211   put(Permitted_operands, "bf", 0x40);
212 
213   //// Class M: using ModR/M byte
214   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
215   //  0     0     0      |0       0     0     1
216 
217   // add
218   put(Permitted_operands, "01", 0x01);
219   put(Permitted_operands, "03", 0x01);
220   // subtract
221   put(Permitted_operands, "29", 0x01);
222   put(Permitted_operands, "2b", 0x01);
223   // and
224   put(Permitted_operands, "21", 0x01);
225   put(Permitted_operands, "23", 0x01);
226   // or
227   put(Permitted_operands, "09", 0x01);
228   put(Permitted_operands, "0b", 0x01);
229   // complement
230   put(Permitted_operands, "f7", 0x01);
231   // xor
232   put(Permitted_operands, "31", 0x01);
233   put(Permitted_operands, "33", 0x01);
234   // compare
235   put(Permitted_operands, "39", 0x01);
236   put(Permitted_operands, "3b", 0x01);
237   // copy
238   put(Permitted_operands, "89", 0x01);
239   put(Permitted_operands, "8b", 0x01);
240   // swap
241   put(Permitted_operands, "87", 0x01);
242   // pop
243   put(Permitted_operands, "8f", 0x01);
244 
245   //// Class O: op, ModR/M and subop (not r32)
246   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
247   //  0     0     0      |0       0     1     1
248   put(Permitted_operands, "ff", 0x03);  // jump/push/call
249 
250   //// Class N: op, ModR/M and imm32
251   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
252   //  1     0     0      |0       0     0     1
253   put(Permitted_operands, "c7", 0x41);  // copy
254 
255   //// Class P: op, ModR/M, subop (not r32) and imm32
256   //  imm32 imm8  disp32 |disp16  disp8 subop modrm
257   //  1     0     0      |0       0     1     1
258   put(Permitted_operands, "81", 0x43);  // combine
259 
260   // End Init Permitted Operands
261 }
262 
263 :(before "End Includes")
264 #define HAS(bitvector, bit)  ((bitvector) & (1 << (bit)))
265 #define SET(bitvector, bit)  ((bitvector) | (1 << (bit)))
266 #define CLEAR(bitvector, bit)  ((bitvector) & (~(1 << (bit))))
267 
268 :(code)
269 void check_operands(const line& inst, const word& op) {
270   if (!is_hex_byte(op)) return;
271   uint8_t expected_bitvector = get(Permitted_operands, op.data);
272   if (HAS(expected_bitvector, MODRM))
273     check_operands_modrm(inst, op);
274   compare_bitvector(inst, CLEAR(expected_bitvector, MODRM), op);
275 }
276 
277 //: Many instructions can be checked just by comparing bitvectors.
278 
279 void compare_bitvector(const line& inst, uint8_t expected, const word& op) {
280   if (all_hex_bytes(inst) && has_operands(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
281   uint8_t bitvector = compute_operand_bitvector(inst);
282   if (trace_contains_errors()) return;  // duplicate operand type
283   if (bitvector == expected) return;  // all good with this instruction
284   for (int i = 0;  i < NUM_OPERAND_TYPES;  ++i, bitvector >>= 1, expected >>= 1) {
285 //?     cerr << "comparing " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n';
286     if ((bitvector & 0x1) == (expected & 0x1)) continue;  // all good with this operand
287     const string& optype = Operand_type_name.at(i);
288     if ((bitvector & 0x1) > (expected & 0x1))
289       raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << optype << " operand\n" << end();
290     else
291       raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << optype << " operand\n" << end();
292     // continue giving all errors for a single instruction
293   }
294   // ignore settings in any unused bits
295 }
296 
297 string maybe_name(const word& op) {
298   if (!is_hex_byte(op)) return "";
299   if (!contains_key(name, op.data)) return "";
300   return " ("+get(name, op.data)+')';
301 }
302 
303 bool is_hex_byte(const word& curr) {
304   if (contains_any_operand_metadata(curr))
305     return false;
306   if (SIZE(curr.data) != 2)
307     return false;
308   if (curr.data.find_first_not_of("0123456789abcdefABCDEF") != string::npos)
309     return false;
310   return true;
311 }
312 
313 uint32_t compute_operand_bitvector(const line& inst) {
314   uint32_t bitvector = 0;
315   for (int i = /*skip op*/1;  i < SIZE(inst.words);  ++i) {
316     bitvector = bitvector | bitvector_for_operand(inst.words.at(i));
317     if (trace_contains_errors()) return INVALID_OPERANDS;  // duplicate operand type
318   }
319   return bitvector;
320 }
321 
322 bool has_operands(const line& inst) {
323   return SIZE(inst.words) > first_operand(inst);
324 }
325 
326 int first_operand(const line& inst) {
327   if (inst.words.at(0).data == "0f") return 2;
328   if (inst.words.at(0).data == "f3") {
329     if (inst.words.at(1).data == "0f")
330       return 3;
331     else
332       return 2;
333   }
334   return 1;
335 }
336 
337 bool all_hex_bytes(const line& inst) {
338   for (int i = 0;  i < SIZE(inst.words);  ++i)
339     if (!is_hex_byte(inst.words.at(i)))
340       return false;
341   return true;
342 }
343 
344 bool contains_any_operand_metadata(const word& word) {
345   for (int i = 0;  i < SIZE(word.metadata);  ++i)
346     if (Instruction_operands.find(word.metadata.at(i)) != Instruction_operands.end())
347       return true;
348   return false;
349 }
350 
351 // Scan the metadata of 'w' and return the bit corresponding to any operand type.
352 // Also raise an error if metadata contains multiple operand types.
353 uint32_t bitvector_for_operand(const word& w) {
354   uint32_t bv = 0;
355   bool found = false;
356   for (int i = 0;  i < SIZE(w.metadata);  ++i) {
357     const string& curr = w.metadata.at(i);
358     if (!contains_key(Operand_type, curr)) continue;  // ignore unrecognized metadata
359     if (found) {
360       raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
361       return INVALID_OPERANDS;
362     }
363     bv = (1 << get(Operand_type, curr));
364     found = true;
365   }
366   return bv;
367 }
368 
369 :(scenario conflicting_operand_type)
370 % Hide_errors = true;
371 == 0x1
372 cd/software-interrupt 80/imm8/imm32
373 +error: '80/imm8/imm32' has conflicting operand types; it should have only one
374 
375 //: Instructions computing effective addresses have more complex rules, so
376 //: we'll hard-code a common set of instruction-decoding rules.
377 
378 :(scenario check_missing_mod_operand)
379 % Hide_errors = true;
380 == 0x1
381 81 0/add/subop       3/rm32/ebx 1/imm32
382 +error: '81 0/add/subop 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing mod operand
383 
384 :(before "End Globals")
385 set<string> Instruction_operands;
386 :(before "End One-time Setup")
387 Instruction_operands.insert("subop");
388 Instruction_operands.insert("mod");
389 Instruction_operands.insert("rm32");
390 Instruction_operands.insert("base");
391 Instruction_operands.insert("index");
392 Instruction_operands.insert("scale");
393 Instruction_operands.insert("r32");
394 Instruction_operands.insert("disp8");
395 Instruction_operands.insert("disp16");
396 Instruction_operands.insert("disp32");
397 Instruction_operands.insert("imm8");
398 Instruction_operands.insert("imm32");
399 
400 :(code)
401 void check_operands_modrm(const line& inst, const word& op) {
402   if (all_hex_bytes(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
403   check_metadata_present(inst, "mod", op);
404   check_metadata_present(inst, "rm32", op);
405   // no check for r32; some instructions don't use it; just assume it's 0 if missing
406   if (op.data == "81" || op.data == "8f" || op.data == "ff") {  // keep sync'd with 'help subop'
407     check_metadata_present(inst, "subop", op);
408     check_metadata_absent(inst, "r32", op, "should be replaced by subop");
409   }
410   if (trace_contains_errors()) return;
411   if (metadata(inst, "rm32").data != "4") return;
412   // SIB byte checks
413   uint8_t mod = hex_byte(metadata(inst, "mod").data);
414   if (mod != /*direct*/3) {
415     check_metadata_present(inst, "base", op);
416     check_metadata_present(inst, "index", op);  // otherwise why go to SIB?
417   }
418   else {
419     check_metadata_absent(inst, "base", op, "direct mode");
420     check_metadata_absent(inst, "index", op, "direct mode");
421   }
422   // no check for scale; 0 (2**0 = 1) by default
423 }
424 
425 void check_metadata_present(const line& inst, const string& type, const word& op) {
426   if (!has_metadata(inst, type))
427     raise << "'" << to_string(inst) << "' (" << get(name, op.data) << "): missing " << type << " operand\n" << end();
428 }
429 
430 void check_metadata_absent(const line& inst, const string& type, const word& op, const string& msg) {
431   if (has_metadata(inst, type))
432     raise << "'" << to_string(inst) << "' (" << get(name, op.data) << "): unexpected " << type << " operand (" << msg << ")\n" << end();
433 }
434 
435 bool has_metadata(const line& inst, const string& m) {
436   bool result = false;
437   for (int i = 0;  i < SIZE(inst.words);  ++i) {
438     if (!has_metadata(inst.words.at(i), m)) continue;
439     if (result) {
440       raise << "'" << to_string(inst) << "' has conflicting " << m << " operands\n" << end();
441       return false;
442     }
443     result = true;
444   }
445   return result;
446 }
447 
448 bool has_metadata(const word& w, const string& m) {
449   bool result = false;
450   bool metadata_found = false;
451   for (int i = 0;  i < SIZE(w.metadata);  ++i) {
452     const string& curr = w.metadata.at(i);
453     if (!contains_key(Instruction_operands, curr)) continue;  // ignore unrecognized metadata
454     if (metadata_found) {
455       raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
456       return false;
457     }
458     metadata_found = true;
459     result = (curr == m);
460   }
461   return result;
462 }
463 
464 word metadata(const line& inst, const string& m) {
465   for (int i = 0;  i < SIZE(inst.words);  ++i)
466     if (has_metadata(inst.words.at(i), m))
467       return inst.words.at(i);
468   assert(false);
469 }
470 
471 :(scenario conflicting_operands_in_modrm_instruction)
472 % Hide_errors = true;
473 == 0x1
474 01/add 0/mod 3/mod
475 +error: '01/add 0/mod 3/mod' has conflicting mod operands
476 
477 :(scenario conflicting_operand_type_modrm)
478 % Hide_errors = true;
479 == 0x1
480 01/add 0/mod 3/rm32/r32
481 +error: '3/rm32/r32' has conflicting operand types; it should have only one
482 
483 :(scenario check_missing_rm32_operand)
484 % Hide_errors = true;
485 == 0x1
486 81 0/add/subop 0/mod            1/imm32
487 +error: '81 0/add/subop 0/mod 1/imm32' (combine rm32 with imm32 based on subop): missing rm32 operand
488 
489 :(scenario check_missing_subop_operand)
490 % Hide_errors = true;
491 == 0x1
492 81             0/mod 3/rm32/ebx 1/imm32
493 +error: '81 0/mod 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing subop operand
494 
495 :(scenario check_missing_base_operand)
496 % Hide_errors = true;
497 == 0x1
498 81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32
499 +error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32' (combine rm32 with imm32 based on subop): missing base operand
500 
501 :(scenario check_missing_index_operand)
502 % Hide_errors = true;
503 == 0x1
504 81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32
505 +error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32' (combine rm32 with imm32 based on subop): missing index operand
506 
507 :(scenario check_missing_base_operand_2)
508 % Hide_errors = true;
509 == 0x1
510 81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32
511 +error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32' (combine rm32 with imm32 based on subop): missing base operand
512 
513 :(scenario check_base_operand_not_needed_in_direct_mode)
514 == 0x1
515 81 0/add/subop 3/mod/indirect 4/rm32/use-sib 1/imm32
516 $error: 0
517 
518 //:: similarly handle multi-byte opcodes
519 
520 :(code)
521 void check_operands_0f(const line& inst) {
522   assert(inst.words.at(0).data == "0f");
523   if (SIZE(inst.words) == 1) {
524     raise << "opcode '0f' requires a second opcode\n" << end();
525     return;
526   }
527   word op = preprocess_op(inst.words.at(1));
528   if (!contains_key(name_0f, op.data)) {
529     raise << "unknown 2-byte opcode '0f " << op.data << "'\n" << end();
530     return;
531   }
532   check_operands_0f(inst, op);
533 }
534 
535 void check_operands_f3(const line& /*unused*/) {
536   raise << "no supported opcodes starting with f3\n" << end();
537 }
538 
539 :(scenario check_missing_disp16_operand)
540 % Hide_errors = true;
541 == 0x1
542 # instruction                     effective address                                           operand     displacement    immediate
543 # op          subop               mod             rm32          base      index     scale     r32
544 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits    3 bits    2 bits    2 bits      0/1/2/4 bytes   0/1/2/4 bytes
545   0f 84                                                                                                                                     # jmp if ZF to ??
546 +error: '0f 84' (jump disp16 bytes away if ZF is set): missing disp16 operand
547 
548 :(before "End Globals")
549 map</*op*/string, /*bitvector*/uint8_t> Permitted_operands_0f;
550 :(before "End Init Permitted Operands")
551 //// Class C: just op and disp16
552 //  imm32 imm8  disp32 |disp16  disp8 subop modrm
553 //  0     0     0      |1       0     0     0
554 put(Permitted_operands_0f, "84", 0x08);
555 put(Permitted_operands_0f, "85", 0x08);
556 put(Permitted_operands_0f, "8c", 0x08);
557 put(Permitted_operands_0f, "8d", 0x08);
558 put(Permitted_operands_0f, "8e", 0x08);
559 put(Permitted_operands_0f, "8f", 0x08);
560 
561 :(code)
562 void check_operands_0f(const line& inst, const word& op) {
563   uint8_t expected_bitvector = get(Permitted_operands_0f, op.data);
564   if (HAS(expected_bitvector, MODRM))
565     check_operands_modrm(inst, op);
566   compare_bitvector_0f(inst, CLEAR(expected_bitvector, MODRM), op);
567 }
568 
569 void compare_bitvector_0f(const line& inst, uint8_t expected, const word& op) {
570   if (all_hex_bytes(inst) && has_operands(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
571   uint8_t bitvector = compute_operand_bitvector(inst);
572   if (trace_contains_errors()) return;  // duplicate operand type
573   if (bitvector == expected) return;  // all good with this instruction
574   for (int i = 0;  i < NUM_OPERAND_TYPES;  ++i, bitvector >>= 1, expected >>= 1) {
575 //?     cerr << "comparing " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n';
576     if ((bitvector & 0x1) == (expected & 0x1)) continue;  // all good with this operand
577     const string& optype = Operand_type_name.at(i);
578     if ((bitvector & 0x1) > (expected & 0x1))
579       raise << "'" << to_string(inst) << "' (" << get(name_0f, op.data) << "): unexpected " << optype << " operand\n" << end();
580     else
581       raise << "'" << to_string(inst) << "' (" << get(name_0f, op.data) << "): missing " << optype << " operand\n" << end();
582     // continue giving all errors for a single instruction
583   }
584   // ignore settings in any unused bits
585 }
586 
587 string to_string(const line& inst) {
588   ostringstream out;
589   for (int i = 0;  i < SIZE(inst.words);  ++i) {
590     if (i > 0) out << ' ';
591     out << inst.words.at(i).original;
592   }
593   return out.str();
594 }
595 
596 string tolower(const char* s) {
597   ostringstream out;
598   for (/*nada*/;  *s;  ++s)
599     out << static_cast<char>(tolower(*s));
600   return out.str();
601 }
602 
603 //:: docs on each operand type
604 
605 :(before "End Help Texts")
606 init_operand_type_help();
607 :(code)
608 void init_operand_type_help() {
609   put(Help, "mod",
610     "2-bit operand controlling the _addressing mode_ of many instructions,\n"
611     "to determine how to compute the _effective address_ to look up memory at\n"
612     "based on the 'rm32' operand and potentially others.\n"
613     "\n"
614     "If mod = 3, just operate on the contents of the register specified by rm32\n"
615     "            (direct mode).\n"
616     "If mod = 2, effective address is usually* rm32 + disp32\n"
617     "            (indirect mode with displacement).\n"
618     "If mod = 1, effective address is usually* rm32 + disp8\n"
619     "            (indirect mode with displacement).\n"
620     "If mod = 0, effective address is usually* rm32 (indirect mode).\n"
621     "(* - The exception is when rm32 is '4'. Register 4 is the stack pointer (ESP).\n"
622     "     Using it as an address gets more involved. For more details,\n"
623     "     try reading the help pages for 'base', 'index' and 'scale'.)\n"
624     "\n"
625     "For complete details consult the IA-32 software developer's manual, table 2-2,\n"
626     "\"32-bit addressing forms with the ModR/M byte\".\n"
627     "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
628   );
629   put(Help, "subop",
630     "Additional 3-bit operand for determining the instruction when the opcode is 81, 8f or ff.\n"
631     "Can't coexist with operand of type 'r32' in a single instruction, because the two use the same bits.\n"
632   );
633   put(Help, "r32",
634     "3-bit operand specifying a register operand used directly, without any further addressing modes.\n"
635   );
636   put(Help, "rm32",
637     "3-bit operand specifying a register operand whose precise interpretation interacts with 'mod'.\n"
638     "For complete details consult the IA-32 software developer's manual, table 2-2,\n"
639     "\"32-bit addressing forms with the ModR/M byte\".\n"
640     "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
641   );
642   put(Help, "base",
643     "Additional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) specifying the register containing an address to look up.\n"
644     "This address may be further modified by 'index' and 'scale' operands.\n"
645     "  effective address = base + index*scale + displacement (disp8 or disp32)\n"
646     "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
647     "\"32-bit addressing forms with the SIB byte\".\n"
648     "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
649   );
650   put(Help, "index",
651     "Optional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be added to the 'base' operand to compute the 'effective address' at which to look up memory.\n"
652     "  effective address = base + index*scale + displacement (disp8 or disp32)\n"
653     "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
654     "\"32-bit addressing forms with the SIB byte\".\n"
655     "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
656   );
657   put(Help, "scale",
658     "Optional 2-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be multiplied to the 'index' operand before adding the result to the 'base' operand to compute the _effective address_ to operate on.\n"
659     "  effective address = base + index * scale + displacement (disp8 or disp32)\n"
660     "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
661     "\"32-bit addressing forms with the SIB byte\".\n"
662     "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
663   );
664   put(Help, "disp8",
665     "8-bit value to be added in many instructions.\n"
666   );
667   put(Help, "disp16",
668     "16-bit value to be added in many instructions.\n"
669   );
670   put(Help, "disp32",
671     "32-bit value to be added in many instructions.\n"
672   );
673   put(Help, "imm8",
674     "8-bit value for many instructions.\n"
675   );
676   put(Help, "imm32",
677     "32-bit value for many instructions.\n"
678   );
679 }
680 
681 :(before "End Includes")
682 #include<cctype>