1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38 :(before "End Help Texts")
39 put(Help, "instructions",
40 "Each x86 instruction consists of an instruction or opcode and some number\n"
41 "of operands.\n"
42 "Each operand has a type. An instruction won't have more than one operand of\n"
43 "any type.\n"
44 "Each instruction has some set of allowed operand types. It'll reject others.\n"
45 "The complete list of operand types: mod, subop, r32 (register), rm32\n"
46 "(register or memory), scale, index, base, disp8, disp16, disp32, imm8,\n"
47 "imm32.\n"
48 "Each of these has its own help page. Try reading 'subx help mod' next.\n"
49 );
50 :(before "End Help Contents")
51 cerr << " instructions\n";
52
53
54
55 :(scenario check_missing_imm8_operand)
56 % Hide_errors = true;
57 == 0x1
58
59
60
61 cd
62 +error: 'cd' (software interrupt): missing imm8 operand
63
64 :(before "End One-time Setup")
65 Transform.push_back(check_operands);
66
67 :(code)
68 void check_operands( program& p) {
69 trace(99, "transform") << "-- check operands" << end();
70 if (p.segments.empty()) return;
71 const segment& code = p.segments.at(0);
72 for (int i = 0; i < SIZE(code.lines); ++i) {
73 check_operands(code.lines.at(i));
74 if (trace_contains_errors()) return;
75 }
76 }
77
78 void check_operands(const line& inst) {
79 word op = preprocess_op(inst.words.at(0));
80 if (op.data == "0f") {
81 check_operands_0f(inst);
82 return;
83 }
84 if (op.data == "f3") {
85 check_operands_f3(inst);
86 return;
87 }
88 check_operands(inst, op);
89 }
90
91 word preprocess_op(word op) {
92 op.data = tolower(op.data.c_str());
93 if (starts_with(op.data, "0x"))
94 op.data = op.data.substr(2);
95 return op;
96 }
97
98
99
100
101
102 :(before "End Types")
103 enum operand_type {
104
105 MODRM,
106 SUBOP,
107 DISP8,
108 DISP16,
109 DISP32,
110 IMM8,
111 IMM32,
112 NUM_OPERAND_TYPES
113 };
114 :(before "End Globals")
115 vector<string> Operand_type_name;
116 map<string, operand_type> Operand_type;
117 :(before "End One-time Setup")
118 init_op_types();
119 :(code)
120 void init_op_types() {
121 assert(NUM_OPERAND_TYPES <= 8);
122 Operand_type_name.resize(NUM_OPERAND_TYPES);
123
124 DEF(MODRM);
125 DEF(SUBOP);
126 DEF(DISP8);
127 DEF(DISP16);
128 DEF(DISP32);
129 DEF(IMM8);
130 DEF(IMM32);
131
132 }
133
134 :(before "End Globals")
135 map<string, uint8_t> Permitted_operands;
136 const uint8_t INVALID_OPERANDS = 0xff;
137 :(before "End One-time Setup")
138 init_permitted_operands();
139 :(code)
140 void init_permitted_operands() {
141
142
143 put(Permitted_operands, "f4", 0x00);
144
145 put(Permitted_operands, "50", 0x00);
146 put(Permitted_operands, "51", 0x00);
147 put(Permitted_operands, "52", 0x00);
148 put(Permitted_operands, "53", 0x00);
149 put(Permitted_operands, "54", 0x00);
150 put(Permitted_operands, "55", 0x00);
151 put(Permitted_operands, "56", 0x00);
152 put(Permitted_operands, "57", 0x00);
153
154 put(Permitted_operands, "58", 0x00);
155 put(Permitted_operands, "59", 0x00);
156 put(Permitted_operands, "5a", 0x00);
157 put(Permitted_operands, "5b", 0x00);
158 put(Permitted_operands, "5c", 0x00);
159 put(Permitted_operands, "5d", 0x00);
160 put(Permitted_operands, "5e", 0x00);
161 put(Permitted_operands, "5f", 0x00);
162
163 put(Permitted_operands, "c3", 0x00);
164
165
166
167
168
169
170 put(Permitted_operands, "eb", 0x04);
171 put(Permitted_operands, "74", 0x04);
172 put(Permitted_operands, "75", 0x04);
173 put(Permitted_operands, "7c", 0x04);
174 put(Permitted_operands, "7d", 0x04);
175 put(Permitted_operands, "7e", 0x04);
176 put(Permitted_operands, "7f", 0x04);
177
178
179
180
181 put(Permitted_operands, "e8", 0x08);
182
183
184
185
186 put(Permitted_operands, "e9", 0x10);
187
188
189
190
191 put(Permitted_operands, "cd", 0x20);
192
193
194
195
196 put(Permitted_operands, "05", 0x40);
197 put(Permitted_operands, "2d", 0x40);
198 put(Permitted_operands, "25", 0x40);
199 put(Permitted_operands, "0d", 0x40);
200 put(Permitted_operands, "35", 0x40);
201 put(Permitted_operands, "3d", 0x40);
202 put(Permitted_operands, "68", 0x40);
203
204 put(Permitted_operands, "b8", 0x40);
205 put(Permitted_operands, "b9", 0x40);
206 put(Permitted_operands, "ba", 0x40);
207 put(Permitted_operands, "bb", 0x40);
208 put(Permitted_operands, "bc", 0x40);
209 put(Permitted_operands, "bd", 0x40);
210 put(Permitted_operands, "be", 0x40);
211 put(Permitted_operands, "bf", 0x40);
212
213
214
215
216
217
218 put(Permitted_operands, "01", 0x01);
219 put(Permitted_operands, "03", 0x01);
220
221 put(Permitted_operands, "29", 0x01);
222 put(Permitted_operands, "2b", 0x01);
223
224 put(Permitted_operands, "21", 0x01);
225 put(Permitted_operands, "23", 0x01);
226
227 put(Permitted_operands, "09", 0x01);
228 put(Permitted_operands, "0b", 0x01);
229
230 put(Permitted_operands, "f7", 0x01);
231
232 put(Permitted_operands, "31", 0x01);
233 put(Permitted_operands, "33", 0x01);
234
235 put(Permitted_operands, "39", 0x01);
236 put(Permitted_operands, "3b", 0x01);
237
238 put(Permitted_operands, "89", 0x01);
239 put(Permitted_operands, "8b", 0x01);
240
241 put(Permitted_operands, "87", 0x01);
242
243 put(Permitted_operands, "8f", 0x01);
244
245
246
247
248 put(Permitted_operands, "ff", 0x03);
249
250
251
252
253 put(Permitted_operands, "c7", 0x41);
254
255
256
257
258 put(Permitted_operands, "81", 0x43);
259
260
261 }
262
263 :(before "End Includes")
264
265
266
267
268 :(code)
269 void check_operands(const line& inst, const word& op) {
270 if (!is_hex_byte(op)) return;
271 uint8_t expected_bitvector = get(Permitted_operands, op.data);
272 if (HAS(expected_bitvector, MODRM))
273 check_operands_modrm(inst, op);
274 compare_bitvector(inst, CLEAR(expected_bitvector, MODRM), op);
275 }
276
277
278
279 void compare_bitvector(const line& inst, uint8_t expected, const word& op) {
280 if (all_hex_bytes(inst) && has_operands(inst)) return;
281 uint8_t bitvector = compute_operand_bitvector(inst);
282 if (trace_contains_errors()) return;
283 if (bitvector == expected) return;
284 for (int i = 0; i < NUM_OPERAND_TYPES; ++i, bitvector >>= 1, expected >>= 1) {
285
286 if ((bitvector & 0x1) == (expected & 0x1)) continue;
287 const string& optype = Operand_type_name.at(i);
288 if ((bitvector & 0x1) > (expected & 0x1))
289 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << optype << " operand\n" << end();
290 else
291 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << optype << " operand\n" << end();
292
293 }
294
295 }
296
297 string maybe_name(const word& op) {
298 if (!is_hex_byte(op)) return "";
299 if (!contains_key(name, op.data)) return "";
300 return " ("+get(name, op.data)+')';
301 }
302
303 bool is_hex_byte(const word& curr) {
304 if (contains_any_operand_metadata(curr))
305 return false;
306 if (SIZE(curr.data) != 2)
307 return false;
308 if (curr.data.find_first_not_of("0123456789abcdefABCDEF") != string::npos)
309 return false;
310 return true;
311 }
312
313 uint32_t compute_operand_bitvector(const line& inst) {
314 uint32_t bitvector = 0;
315 for (int i = 1; i < SIZE(inst.words); ++i) {
316 bitvector = bitvector | bitvector_for_operand(inst.words.at(i));
317 if (trace_contains_errors()) return INVALID_OPERANDS;
318 }
319 return bitvector;
320 }
321
322 bool has_operands(const line& inst) {
323 return SIZE(inst.words) > first_operand(inst);
324 }
325
326 int first_operand(const line& inst) {
327 if (inst.words.at(0).data == "0f") return 2;
328 if (inst.words.at(0).data == "f3") {
329 if (inst.words.at(1).data == "0f")
330 return 3;
331 else
332 return 2;
333 }
334 return 1;
335 }
336
337 bool all_hex_bytes(const line& inst) {
338 for (int i = 0; i < SIZE(inst.words); ++i)
339 if (!is_hex_byte(inst.words.at(i)))
340 return false;
341 return true;
342 }
343
344 bool contains_any_operand_metadata(const word& word) {
345 for (int i = 0; i < SIZE(word.metadata); ++i)
346 if (Instruction_operands.find(word.metadata.at(i)) != Instruction_operands.end())
347 return true;
348 return false;
349 }
350
351
352
353 uint32_t bitvector_for_operand(const word& w) {
354 uint32_t bv = 0;
355 bool found = false;
356 for (int i = 0; i < SIZE(w.metadata); ++i) {
357 const string& curr = w.metadata.at(i);
358 if (!contains_key(Operand_type, curr)) continue;
359 if (found) {
360 raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
361 return INVALID_OPERANDS;
362 }
363 bv = (1 << get(Operand_type, curr));
364 found = true;
365 }
366 return bv;
367 }
368
369 :(scenario conflicting_operand_type)
370 % Hide_errors = true;
371 == 0x1
372 cd/software-interrupt 80/imm8/imm32
373 +error: '80/imm8/imm32' has conflicting operand types; it should have only one
374
375
376
377
378 :(scenario check_missing_mod_operand)
379 % Hide_errors = true;
380 == 0x1
381 81 0/add/subop 3/rm32/ebx 1/imm32
382 +error: '81 0/add/subop 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing mod operand
383
384 :(before "End Globals")
385 set<string> Instruction_operands;
386 :(before "End One-time Setup")
387 Instruction_operands.insert("subop");
388 Instruction_operands.insert("mod");
389 Instruction_operands.insert("rm32");
390 Instruction_operands.insert("base");
391 Instruction_operands.insert("index");
392 Instruction_operands.insert("scale");
393 Instruction_operands.insert("r32");
394 Instruction_operands.insert("disp8");
395 Instruction_operands.insert("disp16");
396 Instruction_operands.insert("disp32");
397 Instruction_operands.insert("imm8");
398 Instruction_operands.insert("imm32");
399
400 :(code)
401 void check_operands_modrm(const line& inst, const word& op) {
402 if (all_hex_bytes(inst)) return;
403 check_metadata_present(inst, "mod", op);
404 check_metadata_present(inst, "rm32", op);
405
406 if (op.data == "81" || op.data == "8f" || op.data == "ff") {
407 check_metadata_present(inst, "subop", op);
408 check_metadata_absent(inst, "r32", op, "should be replaced by subop");
409 }
410 if (trace_contains_errors()) return;
411 if (metadata(inst, "rm32").data != "4") return;
412
413 uint8_t mod = hex_byte(metadata(inst, "mod").data);
414 if (mod != 3) {
415 check_metadata_present(inst, "base", op);
416 check_metadata_present(inst, "index", op);
417 }
418 else {
419 check_metadata_absent(inst, "base", op, "direct mode");
420 check_metadata_absent(inst, "index", op, "direct mode");
421 }
422
423 }
424
425 void check_metadata_present(const line& inst, const string& type, const word& op) {
426 if (!has_metadata(inst, type))
427 raise << "'" << to_string(inst) << "' (" << get(name, op.data) << "): missing " << type << " operand\n" << end();
428 }
429
430 void check_metadata_absent(const line& inst, const string& type, const word& op, const string& msg) {
431 if (has_metadata(inst, type))
432 raise << "'" << to_string(inst) << "' (" << get(name, op.data) << "): unexpected " << type << " operand (" << msg << ")\n" << end();
433 }
434
435 bool has_metadata(const line& inst, const string& m) {
436 bool result = false;
437 for (int i = 0; i < SIZE(inst.words); ++i) {
438 if (!has_metadata(inst.words.at(i), m)) continue;
439 if (result) {
440 raise << "'" << to_string(inst) << "' has conflicting " << m << " operands\n" << end();
441 return false;
442 }
443 result = true;
444 }
445 return result;
446 }
447
448 bool has_metadata(const word& w, const string& m) {
449 bool result = false;
450 bool metadata_found = false;
451 for (int i = 0; i < SIZE(w.metadata); ++i) {
452 const string& curr = w.metadata.at(i);
453 if (!contains_key(Instruction_operands, curr)) continue;
454 if (metadata_found) {
455 raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
456 return false;
457 }
458 metadata_found = true;
459 result = (curr == m);
460 }
461 return result;
462 }
463
464 word metadata(const line& inst, const string& m) {
465 for (int i = 0; i < SIZE(inst.words); ++i)
466 if (has_metadata(inst.words.at(i), m))
467 return inst.words.at(i);
468 assert(false);
469 }
470
471 :(scenario conflicting_operands_in_modrm_instruction)
472 % Hide_errors = true;
473 == 0x1
474 01/add 0/mod 3/mod
475 +error: '01/add 0/mod 3/mod' has conflicting mod operands
476
477 :(scenario conflicting_operand_type_modrm)
478 % Hide_errors = true;
479 == 0x1
480 01/add 0/mod 3/rm32/r32
481 +error: '3/rm32/r32' has conflicting operand types; it should have only one
482
483 :(scenario check_missing_rm32_operand)
484 % Hide_errors = true;
485 == 0x1
486 81 0/add/subop 0/mod 1/imm32
487 +error: '81 0/add/subop 0/mod 1/imm32' (combine rm32 with imm32 based on subop): missing rm32 operand
488
489 :(scenario check_missing_subop_operand)
490 % Hide_errors = true;
491 == 0x1
492 81 0/mod 3/rm32/ebx 1/imm32
493 +error: '81 0/mod 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing subop operand
494
495 :(scenario check_missing_base_operand)
496 % Hide_errors = true;
497 == 0x1
498 81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32
499 +error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32' (combine rm32 with imm32 based on subop): missing base operand
500
501 :(scenario check_missing_index_operand)
502 % Hide_errors = true;
503 == 0x1
504 81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32
505 +error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32' (combine rm32 with imm32 based on subop): missing index operand
506
507 :(scenario check_missing_base_operand_2)
508 % Hide_errors = true;
509 == 0x1
510 81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32
511 +error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32' (combine rm32 with imm32 based on subop): missing base operand
512
513 :(scenario check_base_operand_not_needed_in_direct_mode)
514 == 0x1
515 81 0/add/subop 3/mod/indirect 4/rm32/use-sib 1/imm32
516 $error: 0
517
518
519
520 :(code)
521 void check_operands_0f(const line& inst) {
522 assert(inst.words.at(0).data == "0f");
523 if (SIZE(inst.words) == 1) {
524 raise << "opcode '0f' requires a second opcode\n" << end();
525 return;
526 }
527 word op = preprocess_op(inst.words.at(1));
528 if (!contains_key(name_0f, op.data)) {
529 raise << "unknown 2-byte opcode '0f " << op.data << "'\n" << end();
530 return;
531 }
532 check_operands_0f(inst, op);
533 }
534
535 void check_operands_f3(const line& ) {
536 raise << "no supported opcodes starting with f3\n" << end();
537 }
538
539 :(scenario check_missing_disp16_operand)
540 % Hide_errors = true;
541 == 0x1
542
543
544
545 0f 84
546 +error: '0f 84' (jump disp16 bytes away if ZF is set): missing disp16 operand
547
548 :(before "End Globals")
549 map<string, uint8_t> Permitted_operands_0f;
550 :(before "End Init Permitted Operands")
551
552
553
554 put(Permitted_operands_0f, "84", 0x08);
555 put(Permitted_operands_0f, "85", 0x08);
556 put(Permitted_operands_0f, "8c", 0x08);
557 put(Permitted_operands_0f, "8d", 0x08);
558 put(Permitted_operands_0f, "8e", 0x08);
559 put(Permitted_operands_0f, "8f", 0x08);
560
561 :(code)
562 void check_operands_0f(const line& inst, const word& op) {
563 uint8_t expected_bitvector = get(Permitted_operands_0f, op.data);
564 if (HAS(expected_bitvector, MODRM))
565 check_operands_modrm(inst, op);
566 compare_bitvector_0f(inst, CLEAR(expected_bitvector, MODRM), op);
567 }
568
569 void compare_bitvector_0f(const line& inst, uint8_t expected, const word& op) {
570 if (all_hex_bytes(inst) && has_operands(inst)) return;
571 uint8_t bitvector = compute_operand_bitvector(inst);
572 if (trace_contains_errors()) return;
573 if (bitvector == expected) return;
574 for (int i = 0; i < NUM_OPERAND_TYPES; ++i, bitvector >>= 1, expected >>= 1) {
575
576 if ((bitvector & 0x1) == (expected & 0x1)) continue;
577 const string& optype = Operand_type_name.at(i);
578 if ((bitvector & 0x1) > (expected & 0x1))
579 raise << "'" << to_string(inst) << "' (" << get(name_0f, op.data) << "): unexpected " << optype << " operand\n" << end();
580 else
581 raise << "'" << to_string(inst) << "' (" << get(name_0f, op.data) << "): missing " << optype << " operand\n" << end();
582
583 }
584
585 }
586
587 string to_string(const line& inst) {
588 ostringstream out;
589 for (int i = 0; i < SIZE(inst.words); ++i) {
590 if (i > 0) out << ' ';
591 out << inst.words.at(i).original;
592 }
593 return out.str();
594 }
595
596 string tolower(const char* s) {
597 ostringstream out;
598 for (; *s; ++s)
599 out << static_cast<char>(tolower(*s));
600 return out.str();
601 }
602
603
604
605 :(before "End Help Texts")
606 init_operand_type_help();
607 :(code)
608 void init_operand_type_help() {
609 put(Help, "mod",
610 "2-bit operand controlling the _addressing mode_ of many instructions,\n"
611 "to determine how to compute the _effective address_ to look up memory at\n"
612 "based on the 'rm32' operand and potentially others.\n"
613 "\n"
614 "If mod = 3, just operate on the contents of the register specified by rm32\n"
615 " (direct mode).\n"
616 "If mod = 2, effective address is usually* rm32 + disp32\n"
617 " (indirect mode with displacement).\n"
618 "If mod = 1, effective address is usually* rm32 + disp8\n"
619 " (indirect mode with displacement).\n"
620 "If mod = 0, effective address is usually* rm32 (indirect mode).\n"
621 "(* - The exception is when rm32 is '4'. Register 4 is the stack pointer (ESP).\n"
622 " Using it as an address gets more involved. For more details,\n"
623 " try reading the help pages for 'base', 'index' and 'scale'.)\n"
624 "\n"
625 "For complete details consult the IA-32 software developer's manual, table 2-2,\n"
626 "\"32-bit addressing forms with the ModR/M byte\".\n"
627 " https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
628 );
629 put(Help, "subop",
630 "Additional 3-bit operand for determining the instruction when the opcode is 81, 8f or ff.\n"
631 "Can't coexist with operand of type 'r32' in a single instruction, because the two use the same bits.\n"
632 );
633 put(Help, "r32",
634 "3-bit operand specifying a register operand used directly, without any further addressing modes.\n"
635 );
636 put(Help, "rm32",
637 "3-bit operand specifying a register operand whose precise interpretation interacts with 'mod'.\n"
638 "For complete details consult the IA-32 software developer's manual, table 2-2,\n"
639 "\"32-bit addressing forms with the ModR/M byte\".\n"
640 " https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
641 );
642 put(Help, "base",
643 "Additional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) specifying the register containing an address to look up.\n"
644 "This address may be further modified by 'index' and 'scale' operands.\n"
645 " effective address = base + index*scale + displacement (disp8 or disp32)\n"
646 "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
647 "\"32-bit addressing forms with the SIB byte\".\n"
648 " https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
649 );
650 put(Help, "index",
651 "Optional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be added to the 'base' operand to compute the 'effective address' at which to look up memory.\n"
652 " effective address = base + index*scale + displacement (disp8 or disp32)\n"
653 "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
654 "\"32-bit addressing forms with the SIB byte\".\n"
655 " https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
656 );
657 put(Help, "scale",
658 "Optional 2-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be multiplied to the 'index' operand before adding the result to the 'base' operand to compute the _effective address_ to operate on.\n"
659 " effective address = base + index * scale + displacement (disp8 or disp32)\n"
660 "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
661 "\"32-bit addressing forms with the SIB byte\".\n"
662 " https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
663 );
664 put(Help, "disp8",
665 "8-bit value to be added in many instructions.\n"
666 );
667 put(Help, "disp16",
668 "16-bit value to be added in many instructions.\n"
669 );
670 put(Help, "disp32",
671 "32-bit value to be added in many instructions.\n"
672 );
673 put(Help, "imm8",
674 "8-bit value for many instructions.\n"
675 );
676 put(Help, "imm32",
677 "32-bit value for many instructions.\n"
678 );
679 }
680
681 :(before "End Includes")
682