https://github.com/akkartik/mu/blob/master/033check_operands.cc
  1 //: Since we're tagging operands with their types, let's start checking these
  2 //: operand types for each instruction.
  3 
  4 void test_check_missing_imm8_operand() {
  5   Hide_errors = true;
  6   run(
  7       "== code 0x1\n"
  8       "cd\n"  // interrupt ??
  9   );
 10   CHECK_TRACE_CONTENTS(
 11       "error: 'cd' (software interrupt): missing imm8 operand\n"
 12   );
 13 }
 14 
 15 :(before "Pack Operands(segment code)")
 16 check_operands(code);
 17 if (trace_contains_errors()) return;
 18 
 19 :(code)
pre { line-height: 125%; }
td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
.highlight .hll { background-color: #ffffcc }
.highlight .c { color: #888888 } /* Comment */
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
.highlight .k { color: #008800; font-weight: bold } /* Keyword */
.highlight .ch { color: #888888 } /* Comment.Hashbang */
.highlight .cm { color: #888888 } /* Comment.Multiline */
.highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */
.highlight .cpf { color: #888888 } /* Comment.PreprocFile */
.highlight .c1 { color: #888888 } /* Comment.Single */
.highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */
.highlight .gr { color: #aa0000 } /* Generic.Error */
.highlight .gh { color: #333333 } /* Generic.Heading */
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
.highlight .go { color: #888888 } /* Generic.Output */
.highlight .gp { color: #555555 } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #666666 } /* Generic.Subheading */
.highlight .gt { color: #aa0000 } /* Generic.Traceback */
.highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */
.highlight .kp { color: #008800 } /* Keyword.Pseudo */
.highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */
.highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */
.highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */
.highlight .na { color: #336699 } /* Name.Attribute */
.highlight .nb { color: #003388 } /* Name.Builtin */
.highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */
.highlight .no { color: #003366; font-weight: bold } /* Name.Constant */
.highlight .nd { color: #555555 } /* Name.Decorator */
.highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */
.highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */
.highlight .nl { color: #336699; font-style: italic } /* Name.Label */
.highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */
.highlight .py { color: #336699; font-weight: bold } /* Name.Property */
.highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */
.highlight .nv { color: #336699 } /* Name.Variable */
.highlight .ow { color: #008800 } /* Operator.Word */
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */
.highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */
.highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */
.highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */
.highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */
.highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */
.highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */
.highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */
.highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */
.highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */
.highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */
.highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */
.highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */
.highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */
.highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */
.highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */
.highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */
.highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */
.highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */
.highlight .vc { color: #336699 } /* Name.Variable.Class */
.highlight .vg { color: #dd7700 } /* Name.Variable.Global */
.highlight .vi { color: #3333bb } /* Name.Variable.Instance */
.highlight .vm { color: #336699 } /* Name.Variable.Magic */
.highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */
set tabstop=4
set noexpandtab
class="Delimiter">; 48 return op; 49 } 50 51 void test_preprocess_op() { 52 word w1; w1.data = "0xf"; 53 word w2; w2.data = "0f"; 54 CHECK_EQ(preprocess_op(w1).data, preprocess_op(w2).data); 55 } 56 57 //: To check the operands for an opcode, we'll track the permitted operands 58 //: for each supported opcode in a bitvector. That way we can often compute the 59 //: 'received' operand bitvector for each instruction's operands and compare 60 //: it with the 'expected' bitvector. 61 //: 62 //: The 'expected' and 'received' bitvectors can be different; the MODRM bit 63 //: in the 'expected' bitvector maps to multiple 'received' operand types in 64 //: an instruction. We deal in expected bitvectors throughout. 65 66 :(before "End Types") 67 enum expected_operand_type { 68 // start from the least significant bit 69 MODRM, // more complex, may also involve disp8 or disp32 70 SUBOP, 71 DISP8, 72 DISP16, 73 DISP32, 74 IMM8, 75 IMM32, 76 NUM_OPERAND_TYPES 77 }; 78 :(before "End Globals") 79 vector<string> Operand_type_name; 80 map<string, expected_operand_type> Operand_type; 81 :(before "End One-time Setup") 82 init_op_types(); 83 :(code) 84 void init_op_types() { 85 assert(NUM_OPERAND_TYPES <= /*bits in a uint8_t*/8); 86 Operand_type_name.resize(NUM_OPERAND_TYPES); 87 #define DEF(type) Operand_type_name.at(type) = tolower(#type), put(Operand_type, tolower(#type), type); 88 DEF(MODRM); 89 DEF(SUBOP); 90 DEF(DISP8); 91 DEF(DISP16); 92 DEF(DISP32); 93 DEF(IMM8); 94 DEF(IMM32); 95 #undef DEF 96 } 97 98 :(before "End Globals") 99 map</*op*/string, /*bitvector*/uint8_t> Permitted_operands; 100 const uint8_t INVALID_OPERANDS = 0xff; // no instruction uses all the operand types 101 :(before "End One-time Setup") 102 init_permitted_operands(); 103 :(code) 104 void init_permitted_operands() { 105 //// Class A: just op, no operands 106 // halt 107 put(Permitted_operands, "f4", 0x00); 108 // inc 109 put(Permitted_operands, "40", 0x00); 110 put(Permitted_operands, "41", 0x00); 111 put(Permitted_operands, "42", 0x00); 112 put(Permitted_operands, "43", 0x00); 113 put(Permitted_operands, "44", 0x00); 114 put(Permitted_operands, "45", 0x00); 115 put(Permitted_operands, "46", 0x00); 116 put(Permitted_operands, "47", 0x00); 117 // dec 118 put(Permitted_operands, "48", 0x00); 119 put(Permitted_operands, "49", 0x00); 120 put(Permitted_operands, "4a", 0x00); 121 put(Permitted_operands, "4b", 0x00); 122 put(Permitted_operands, "4c", 0x00); 123 put(Permitted_operands, "4d", 0x00); 124 put(Permitted_operands, "4e", 0x00); 125 put(Permitted_operands, "4f", 0x00); 126 // push 127 put(Permitted_operands, "50", 0x00); 128 put(Permitted_operands, "51", 0x00); 129 put(Permitted_operands, "52", 0x00); 130 put(Permitted_operands, "53", 0x00); 131 put(Permitted_operands, "54", 0x00); 132 put(Permitted_operands, "55", 0x00); 133 put(Permitted_operands, "56", 0x00); 134 put(Permitted_operands, "57", 0x00); 135 // pop 136 put(Permitted_operands, "58", 0x00); 137 put(Permitted_operands, "59", 0x00); 138 put(Permitted_operands, "5a", 0x00); 139 put(Permitted_operands, "5b", 0x00); 140 put(Permitted_operands, "5c", 0x00); 141 put(Permitted_operands, "5d", 0x00); 142 put(Permitted_operands, "5e", 0x00); 143 put(Permitted_operands, "5f", 0x00); 144 // sign-extend EAX into EDX 145 put(Permitted_operands, "99", 0x00); 146 // return 147 put(Permitted_operands, "c3", 0x00); 148 149 //// Class B: just op and disp8 150 // imm32 imm8 disp32 |disp16 disp8 subop modrm 151 // 0 0 0 |0 1 0 0 152 153 // jump 154 put(Permitted_operands, "eb", 0x04); 155 put(Permitted_operands, "72", 0x04); 156 put(Permitted_operands, "73", 0x04); 157 put(Permitted_operands, "74", 0x04); 158 put(Permitted_operands, "75", 0x04); 159 put(Permitted_operands, "76", 0x04); 160 put(Permitted_operands, "77", 0x04); 161 put(Permitted_operands, "7c", 0x04); 162 put(Permitted_operands, "7d", 0x04); 163 put(Permitted_operands, "7e", 0x04); 164 put(Permitted_operands, "7f", 0x04); 165 166 //// Class D: just op and disp32 167 // imm32 imm8 disp32 |disp16 disp8 subop modrm 168 // 0 0 1 |0 0 0 0 169 put(Permitted_operands, "e8", 0x10); // call 170 put(Permitted_operands, "e9", 0x10); // jump 171 172 //// Class E: just op and imm8 173 // imm32 imm8 disp32 |disp16 disp8 subop modrm 174 // 0 1 0 |0 0 0 0 175 put(Permitted_operands, "cd", 0x20); // software interrupt 176 177 //// Class F: just op and imm32 178 // imm32 imm8 disp32 |disp16 disp8 subop modrm 179 // 1 0 0 |0 0 0 0 180 put(Permitted_operands, "05", 0x40); // add 181 put(Permitted_operands, "2d", 0x40); // subtract 182 put(Permitted_operands, "25", 0x40); // and 183 put(Permitted_operands, "0d", 0x40); // or 184 put(Permitted_operands, "35", 0x40); // xor 185 put(Permitted_operands, "3d", 0x40); // compare 186 put(Permitted_operands, "68", 0x40); // push 187 // copy 188 put(Permitted_operands, "b8", 0x40); 189 put(Permitted_operands, "b9", 0x40); 190 put(Permitted_operands, "ba", 0x40); 191 put(Permitted_operands, "bb", 0x40); 192 put(Permitted_operands, "bc", 0x40); 193 put(Permitted_operands, "bd", 0x40); 194 put(Permitted_operands, "be", 0x40); 195 put(Permitted_operands, "bf", 0x40); 196 197 //// Class M: using ModR/M byte 198 // imm32 imm8 disp32 |disp16 disp8 subop modrm 199 // 0 0 0 |0 0 0 1 200 201 // add 202 put(Permitted_operands, "01", 0x01); 203 put(Permitted_operands, "03", 0x01); 204 // subtract 205 put(Permitted_operands, "29", 0x01); 206 put(Permitted_operands, "2b", 0x01); 207 // and 208 put(Permitted_operands, "21", 0x01); 209 put(Permitted_operands, "23", 0x01); 210 // or 211 put(Permitted_operands, "09", 0x01); 212 put(Permitted_operands, "0b", 0x01); 213 // xor 214 put(Permitted_operands, "31", 0x01); 215 put(Permitted_operands, "33", 0x01); 216 // compare 217 put(Permitted_operands, "39", 0x01); 218 put(Permitted_operands, "3b", 0x01); 219 // copy 220 put(Permitted_operands, "88", 0x01); 221 put(Permitted_operands, "89", 0x01); 222 put(Permitted_operands, "8a", 0x01); 223 put(Permitted_operands, "8b", 0x01); 224 // swap 225 put(Permitted_operands, "87", 0x01); 226 // copy address (lea) 227 put(Permitted_operands, "8d", 0x01); 228 229 //// Class N: op, ModR/M and subop (not r32) 230 // imm32 imm8 disp32 |disp16 disp8 subop modrm 231 // 0 0 0 |0 0 1 1 232 put(Permitted_operands, "8f", 0x03); // pop 233 put(Permitted_operands, "d3", 0x03); // shift 234 put(Permitted_operands, "f7", 0x03); // test/not/mul/div 235 put(Permitted_operands, "ff", 0x03); // jump/push/call 236 237 //// Class O: op, ModR/M, subop (not r32) and imm8 238 // imm32 imm8 disp32 |disp16 disp8 subop modrm 239 // 0 1 0 |0 0 1 1 240 put(Permitted_operands, "c1", 0x23); // combine 241 put(Permitted_operands, "c6", 0x23); // copy 242 243 //// Class P: op, ModR/M, subop (not r32) and imm32 244 // imm32 imm8 disp32 |disp16 disp8 subop modrm 245 // 1 0 0 |0 0 1 1 246 put(Permitted_operands, "81", 0x43); // combine 247 put(Permitted_operands, "c7", 0x43); // copy 248 249 // End Init Permitted Operands 250 } 251 252 #define HAS(bitvector, bit) ((bitvector) & (1 << (bit))) 253 #define SET(bitvector, bit) ((bitvector) | (1 << (bit))) 254 #define CLEAR(bitvector, bit) ((bitvector) & (~(1 << (bit)))) 255 256 void check_operands(const line& inst, const word& op) { 257 if (!is_hex_byte(op)) return; 258 uint8_t expected_bitvector = get(Permitted_operands, op.data); 259 if (HAS(expected_bitvector, MODRM)) { 260 check_operands_modrm(inst, op); 261 compare_bitvector_modrm(inst, expected_bitvector, op); 262 } 263 else { 264 compare_bitvector(inst, expected_bitvector, op); 265 } 266 } 267 268 //: Many instructions can be checked just by comparing bitvectors. 269 270 void compare_bitvector(const line& inst, uint8_t expected, const word& op) { 271 if (all_hex_bytes(inst) && has_operands(inst)) return; // deliberately programming in raw hex; we'll raise a warning elsewhere 272 uint8_t bitvector = compute_expected_operand_bitvector(inst); 273 if (trace_contains_errors()) return; // duplicate operand type 274 if (bitvector == expected) return; // all good with this instruction 275 for (int i = 0; i < NUM_OPERAND_TYPES; ++i, bitvector >>= 1, expected >>= 1) { 276 //? cerr << "comparing " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n'; 277 if ((bitvector & 0x1) == (expected & 0x1)) continue; // all good with this operand 278 const string& optype = Operand_type_name.at(i); 279 if ((bitvector & 0x1) > (expected & 0x1)) 280 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << optype << " operand\n" << end(); 281 else 282 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << optype << " operand\n" << end(); 283 // continue giving all errors for a single instruction 284 } 285 // ignore settings in any unused bits 286 } 287 288 string maybe_name(const word& op) { 289 if (!is_hex_byte(op)) return ""; 290 if (!contains_key(Name, op.data)) return ""; 291 // strip stuff in parens from the name 292 const string& s = get(Name, op.data); 293 return " ("+s.substr(0, s.find(" ("))+')'; 294 } 295 296 uint32_t compute_expected_operand_bitvector(const line& inst) { 297 set<string> operands_found; 298 uint32_t bitvector = 0; 299 for (int i = /*skip op*/1; i < SIZE(inst.words); ++i) { 300 bitvector = bitvector | expected_bit_for_received_operand(inst.words.at(i), operands_found, inst); 301 if (trace_contains_errors()) return INVALID_OPERANDS; // duplicate operand type 302 } 303 return bitvector; 304 } 305 306 bool has_operands(const line& inst) { 307 return SIZE(inst.words) > first_operand(inst); 308 } 309 310 int first_operand(const line& inst) { 311 if (inst.words.at(0).data == "0f") return 2; 312 if (inst.words.at(0).data == "f2" || inst.words.at(0).data == "f3") { 313 if (inst.words.at(1).data == "0f") 314 return 3; 315 else 316 return 2; 317 } 318 return 1; 319 } 320 321 // Scan the metadata of 'w' and return the expected bit corresponding to any operand type. 322 // Also raise an error if metadata contains multiple operand types. 323 uint32_t expected_bit_for_received_operand(const word& w, set<string>& instruction_operands, const line& inst) { 324 uint32_t bv = 0; 325 bool found = false; 326 for (int i = 0; i < SIZE(w.metadata); ++i) { 327 string/*copy*/ curr = w.metadata.at(i); 328 string expected_metadata = curr; 329 if (curr == "mod" || curr == "rm32" || curr == "r32" || curr == "scale" || curr == "index" || curr == "base") 330 expected_metadata = "modrm"; 331 else if (!contains_key(Operand_type, curr)) continue; // ignore unrecognized metadata 332 if (found) { 333 raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end(); 334 return INVALID_OPERANDS; 335 } 336 if (instruction_operands.find(curr) != instruction_operands.end()) { 337 raise << "'" << to_string(inst) << "': duplicate " << curr << " operand\n" << end(); 338 return INVALID_OPERANDS; 339 } 340 instruction_operands.insert(curr); 341 bv = (1 << get(Operand_type, expected_metadata)); 342 found = true; 343 } 344 return bv; 345 } 346 347 void test_conflicting_operand_type() { 348 Hide_errors = true; 349 run( 350 "== code 0x1\n" 351 "cd/software-interrupt 80/imm8/imm32\n" 352 ); 353 CHECK_TRACE_CONTENTS( 354 "error: '80/imm8/imm32' has conflicting operand types; it should have only one\n" 355 ); 356 } 357 358 //: Instructions computing effective addresses have more complex rules, so 359 //: we'll hard-code a common set of instruction-decoding rules. 360 361 void test_check_missing_mod_operand() { 362 Hide_errors = true; 363 run( 364 "== code 0x1\n" 365 "81 0/add/subop 3/rm32/ebx 1/imm32\n" 366 ); 367 CHECK_TRACE_CONTENTS( 368 "error: '81 0/add/subop 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing mod operand\n" 369 ); 370 } 371 372 void check_operands_modrm(const line& inst, const word& op) { 373 if (all_hex_bytes(inst)) return; // deliberately programming in raw hex; we'll raise a warning elsewhere 374 check_operand_metadata_present(inst, "mod", op); 375 check_operand_metadata_present(inst, "rm32", op); 376 // no check for r32; some instructions don't use it; just assume it's 0 if missing 377 if (op.data == "81" || op.data == "8f" || op.data == "ff") { // keep sync'd with 'help subop' 378 check_operand_metadata_present(inst, "subop", op); 379 check_operand_metadata_absent(inst, "r32", op, "should be replaced by subop"); 380 } 381 if (trace_contains_errors()) return; 382 if (metadata(inst, "rm32").data != "4") return; 383 // SIB byte checks 384 uint8_t mod = hex_byte(metadata(inst, "mod").data); 385 if (mod != /*direct*/3) { 386 check_operand_metadata_present(inst, "base", op); 387 check_operand_metadata_present(inst, "index", op); // otherwise why go to SIB? 388 } 389 else { 390 check_operand_metadata_absent(inst, "base", op, "direct mode"); 391 check_operand_metadata_absent(inst, "index", op, "direct mode"); 392 } 393 // no check for scale; 0 (2**0 = 1) by default 394 } 395 396 // same as compare_bitvector, with one additional exception for modrm-based 397 // instructions: they may use an extra displacement on occasion 398 void compare_bitvector_modrm(const line& inst, uint8_t expected, const word& op) { 399 if (all_hex_bytes(inst) && has_operands(inst)) return; // deliberately programming in raw hex; we'll raise a warning elsewhere 400 uint8_t bitvector = compute_expected_operand_bitvector(inst); 401 if (trace_contains_errors()) return; // duplicate operand type 402 // update 'expected' bitvector for the additional exception 403 if (has_operand_metadata(inst, "mod")) { 404 int32_t mod = parse_int(metadata(inst, "mod").data); 405 switch (mod) { 406 case 0: 407 if (has_operand_metadata(inst, "rm32") && parse_int(metadata(inst, "rm32").data) == 5) 408 expected |= (1<<DISP32); 409 break; 410 case 1: 411 expected |= (1<<DISP8); 412 break; 413 case 2: 414 expected |= (1<<DISP32); 415 break; 416 } 417 } 418 if (bitvector == expected) return; // all good with this instruction 419 for (int i = 0; i < NUM_OPERAND_TYPES; ++i, bitvector >>= 1, expected >>= 1) { 420 //? cerr << "comparing for modrm " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n'; 421 if ((bitvector & 0x1) == (expected & 0x1)) continue; // all good with this operand 422 const string& optype = Operand_type_name.at(i); 423 if ((bitvector & 0x1) > (expected & 0x1)) 424 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << optype << " operand\n" << end(); 425 else 426 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << optype << " operand\n" << end(); 427 // continue giving all errors for a single instruction 428 } 429 // ignore settings in any unused bits 430 } 431 432 void check_operand_metadata_present(const line& inst, const string& type, const word& op) { 433 if (!has_operand_metadata(inst, type)) 434 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << type << " operand\n" << end(); 435 } 436 437 void check_operand_metadata_absent(const line& inst, const string& type, const word& op, const string& msg) { 438 if (has_operand_metadata(inst, type)) 439 raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << type << " operand (" << msg << ")\n" << end(); 440 } 441 442 void test_modrm_with_displacement() { 443 Reg[EAX].u = 0x1; 444 transform( 445 "== code 0x1\n" 446 // just avoid null pointer 447 "8b/copy 1/mod/lookup+disp8 0/rm32/EAX 2/r32/EDX 4/disp8\n" // copy *(EAX+4) to EDX 448 ); 449 CHECK_TRACE_COUNT("error", 0); 450 } 451 452 void test_check_missing_disp8() { 453 Hide_errors = true; 454 transform( 455 "== code 0x1\n" 456 "89/copy 1/mod/lookup+disp8 0/rm32/EAX 1/r32/ECX\n" // missing disp8 457 ); 458 CHECK_TRACE_CONTENTS( 459 "error: '89/copy 1/mod/lookup+disp8 0/rm32/EAX 1/r32/ECX' (copy r32 to rm32): missing disp8 operand\n" 460 ); 461 } 462 463 void test_check_missing_disp32() { 464 Hide_errors = true; 465 transform( 466 "== code 0x1\n" 467 "8b/copy 0/mod/indirect 5/rm32/.disp32 2/r32/EDX\n" // missing disp32 468 ); 469 CHECK_TRACE_CONTENTS( 470 "error: '8b/copy 0/mod/indirect 5/rm32/.disp32 2/r32/EDX' (copy rm32 to r32): missing disp32 operand\n" 471 ); 472 } 473 474 void test_conflicting_operands_in_modrm_instruction() { 475 Hide_errors = true; 476 run( 477 "== code 0x1\n" 478 "01/add 0/mod 3/mod\n" 479 ); 480 CHECK_TRACE_CONTENTS( 481 "error: '01/add 0/mod 3/mod' has conflicting mod operands\n" 482 ); 483 } 484 485 void test_conflicting_operand_type_modrm() { 486 Hide_errors = true; 487 run( 488 "== code 0x1\n" 489 "01/add 0/mod 3/rm32/r32\n" 490 ); 491 CHECK_TRACE_CONTENTS( 492 "error: '3/rm32/r32' has conflicting operand types; it should have only one\n" 493 ); 494 } 495 496 void test_check_missing_rm32_operand() { 497 Hide_errors = true; 498 run( 499 "== code 0x1\n" 500 "81 0/add/subop 0/mod 1/imm32\n" 501 ); 502 CHECK_TRACE_CONTENTS( 503 "error: '81 0/add/subop 0/mod 1/imm32' (combine rm32 with imm32 based on subop): missing rm32 operand\n" 504 ); 505 } 506 507 void test_check_missing_subop_operand() { 508 Hide_errors = true; 509 run( 510 "== code 0x1\n" 511 "81 0/mod 3/rm32/ebx 1/imm32\n" 512 ); 513 CHECK_TRACE_CONTENTS( 514 "error: '81 0/mod 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing subop operand\n" 515 ); 516 } 517 518 void test_check_missing_base_operand() { 519 Hide_errors = true; 520 run( 521 "== code 0x1\n" 522 "81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32\n" 523 ); 524 CHECK_TRACE_CONTENTS( 525 "error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32' (combine rm32 with imm32 based on subop): missing base operand\n" 526 ); 527 } 528 529 void test_check_missing_index_operand() { 530 Hide_errors = true; 531 run( 532 "== code 0x1\n" 533 "81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32\n" 534 ); 535 CHECK_TRACE_CONTENTS( 536 "error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32' (combine rm32 with imm32 based on subop): missing index operand\n" 537 ); 538 } 539 540 void test_check_missing_base_operand_2() { 541 Hide_errors = true; 542 run( 543 "== code 0x1\n" 544 "81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32\n" 545 ); 546 CHECK_TRACE_CONTENTS( 547 "error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32' (combine rm32 with imm32 based on subop): missing base operand\n" 548 ); 549 } 550 551 void test_check_extra_displacement() { 552 Hide_errors = true; 553 run( 554 "== code 0x1\n" 555 "89/copy 0/mod/indirect 0/rm32/EAX 1/r32/ECX 4/disp8\n" 556 ); 557 CHECK_TRACE_CONTENTS( 558 "error: '89/copy 0/mod/indirect 0/rm32/EAX 1/r32/ECX 4/disp8' (copy r32 to rm32): unexpected disp8 operand\n" 559 ); 560 } 561 562 void test_check_duplicate_operand() { 563 Hide_errors = true; 564 run( 565 "== code 0x1\n" 566 "89/copy 0/mod/indirect 0/rm32/EAX 1/r32/ECX 1/r32\n" 567 ); 568 CHECK_TRACE_CONTENTS( 569 "error: '89/copy 0/mod/indirect 0/rm32/EAX 1/r32/ECX 1/r32': duplicate r32 operand\n" 570 ); 571 } 572 573 void test_check_base_operand_not_needed_in_direct_mode() { 574 run( 575 "== code 0x1\n" 576 "81 0/add/subop 3/mod/indirect 4/rm32/use-sib 1/imm32\n" 577 ); 578 CHECK_TRACE_COUNT("error", 0); 579 } 580 581 void test_extra_modrm() { 582 Hide_errors = true; 583 run( 584 "== code 0x1\n" 585 "59/pop-to-ECX 3/mod/direct 1/rm32/ECX 4/r32/ESP\n" 586 ); 587 CHECK_TRACE_CONTENTS( 588 "error: '59/pop-to-ECX 3/mod/direct 1/rm32/ECX 4/r32/ESP' (pop top of stack to ECX): unexpected modrm operand\n" 589 ); 590 } 591 592 //:: similarly handle multi-byte opcodes 593 594 void check_operands_0f(const line& inst) { 595 assert(inst.words.at(0).data == "0f"); 596 if (SIZE(inst.words) == 1) { 597 raise << "opcode '0f' requires a second opcode\n" << end(); 598 return; 599 } 600 word op = preprocess_op(inst.words.at(1)); 601 if (!contains_key(Name_0f, op.data)) { 602 raise << "unknown 2-byte opcode '0f " << op.data << "'\n" << end(); 603 return; 604 } 605 check_operands_0f(inst, op); 606 } 607 608 void check_operands_f3(const line& /*unused*/) { 609 raise << "no supported opcodes starting with f3\n" << end(); 610 } 611 612 void test_check_missing_disp32_operand() { 613 Hide_errors = true; 614 run( 615 "== code 0x1\n" 616 " 0f 84 # jmp if ZF to ??\n" 617 ); 618 CHECK_TRACE_CONTENTS( 619 "error: '0f 84' (jump disp32 bytes away if equal, if ZF is set): missing disp32 operand\n" 620 ); 621 } 622 623 :(before "End Globals") 624 map</*op*/string, /*bitvector*/uint8_t> Permitted_operands_0f; 625 :(before "End Init Permitted Operands") 626 //// Class D: just op and disp32 627 // imm32 imm8 disp32 |disp16 disp8 subop modrm 628 // 0 0 1 |0 0 0 0 629 put_new(Permitted_operands_0f, "82", 0x10); 630 put_new(Permitted_operands_0f, "83", 0x10); 631 put_new(Permitted_operands_0f, "84", 0x10); 632 put_new(Permitted_operands_0f, "85", 0x10); 633 put_new(Permitted_operands_0f, "86", 0x10); 634 put_new(Permitted_operands_0f, "87", 0x10); 635 put_new(Permitted_operands_0f, "8c", 0x10); 636 put_new(Permitted_operands_0f, "8d", 0x10); 637 put_new(Permitted_operands_0f, "8e", 0x10); 638 put_new(Permitted_operands_0f, "8f", 0x10); 639 640 //// Class M: using ModR/M byte 641 // imm32 imm8 disp32 |disp16 disp8 subop modrm 642 // 0 0 0 |0 0 0 1 643 put_new(Permitted_operands_0f, "af", 0x01); 644 645 :(code) 646 void check_operands_0f(const line& inst, const word& op) { 647 uint8_t expected_bitvector = get(Permitted_operands_0f, op.data); 648 if (HAS(expected_bitvector, MODRM)) 649 check_operands_modrm(inst, op); 650 compare_bitvector_0f(inst, CLEAR(expected_bitvector, MODRM), op); 651 } 652 653 void compare_bitvector_0f(const line& inst, uint8_t expected, const word& op) { 654 if (all_hex_bytes(inst) && has_operands(inst)) return; // deliberately programming in raw hex; we'll raise a warning elsewhere 655 uint8_t bitvector = compute_expected_operand_bitvector(inst); 656 if (trace_contains_errors()) return; // duplicate operand type 657 if (bitvector == expected) return; // all good with this instruction 658 for (int i = 0; i < NUM_OPERAND_TYPES; ++i, bitvector >>= 1, expected >>= 1) { 659 //? cerr << "comparing " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n'; 660 if ((bitvector & 0x1) == (expected & 0x1)) continue; // all good with this operand 661 const string& optype = Operand_type_name.at(i); 662 if ((bitvector & 0x1) > (expected & 0x1)) 663 raise << "'" << to_string(inst) << "'" << maybe_name_0f(op) << ": unexpected " << optype << " operand\n" << end(); 664 else 665 raise << "'" << to_string(inst) << "'" << maybe_name_0f(op) << ": missing " << optype << " operand\n" << end(); 666 // continue giving all errors for a single instruction 667 } 668 // ignore settings in any unused bits 669 } 670 671 string maybe_name_0f(const word& op) { 672 if (!is_hex_byte(op)) return ""; 673 if (!contains_key(Name_0f, op.data)) return ""; 674 // strip stuff in parens from the name 675 const string& s = get(Name_0f, op.data); 676 return " ("+s.substr(0, s.find(" ("))+')'; 677 } 678 679 string tolower(const char* s) { 680 ostringstream out; 681 for (/*nada*/; *s; ++s) 682 out << static_cast<char>(tolower(*s)); 683 return out.str(); 684 } 685 686 #undef HAS 687 #undef SET 688 #undef CLEAR 689 690 :(before "End Includes") 691 #include<cctype>