https://github.com/akkartik/mu/blob/master/032operands.cc
  1 //: Metadata for fields of an x86 instruction.
  2 //:
  3 //: The x86 instruction set is variable-length, and how a byte is interpreted
  4 //: affects later instruction boundaries. A lot of the pain in programming
  5 //: machine code stems from computer and programmer going out of sync on what
  6 //: a byte means. The miscommunication is usually not immediately caught, and
  7 //: metastasizes at runtime into kilobytes of misinterpreted instructions.
  8 //:
  9 //: To mitigate these issues, we'll start programming in terms of logical
 10 //: operands rather than physical bytes. Some operands are smaller than a
 11 //: byte, and others may consist of multiple bytes. This layer will correctly
 12 //: pack and order the bytes corresponding to the operands in an instruction.
 13 
 14 :(before "End Help Texts")
 15 put_new(Help, "instructions",
 16   "Each x86 instruction consists of an instruction or opcode and some number\n"
 17   "of operands.\n"
 18   "Each operand has a type. An instruction won't have more than one operand of\n"
 19   "any type.\n"
 20   "Each instruction has some set of allowed operand types. It'll reject others.\n"
 21   "The complete list of operand types: mod, subop, r32 (register), rm32\n"
 22   "(register or memory), scale, index, base, disp8, disp16, disp32, imm8,\n"
 23   "imm32.\n"
 24   "Each of these has its own help page. Try reading 'bootstrap help mod' next.\n"
 25 );
 26 :(before "End Help Contents")
 27 cerr << "  instructions\n";
 28 
 29 :(before "Running Test Program")
 30 transform(p);
 31 if (trace_contains_errors()) return;
 32 
 33 :(code)
 34 void test_pack_immediate_constants() {
 35   run(
 36       "== code 0x1\n"
 37       "bb  0x2a/imm32\n"
 38   );
 39   CHECK_TRACE_CONTENTS(
 40       "transform: packing instruction 'bb 0x2a/imm32'\n"
 41       "transform: instruction after packing: 'bb 2a 00 00 00'\n"
 42       "run: copy imm32 0x0000002a to EBX\n"
 43   );
 44 }
 45 
 46 //: complete set of valid operand types
 47 
 48 :(before "End Globals")
 49 set<string> Instruction_operands;
 50 :(before "End One-time Setup")
 51 Instruction_operands.insert("subop");
 52 Instruction_operands.insert("mod");
 53 Instruction_operands.insert("rm32");
 54 Instruction_operands.insert("base");
 55 Instruction_operands.insert("index");
 56 Instruction_operands.insert("scale");
 57 Instruction_operands.insert("r32");
 58 Instruction_operands.insert("disp8");
 59 Instruction_operands.insert("disp16");
 60 Instruction_operands.insert("disp32");
 61 Instruction_operands.insert("imm8");
 62 Instruction_operands.insert("imm32");
 63 
 64 :(before "End Help Texts")
 65 init_operand_type_help();
 66 :(code)
 67 void init_operand_type_help() {
 68   put(Help, "mod",
 69     "2-bit operand controlling the _addressing mode_ of many instructions,\n"
 70     "to determine how to compute the _effective address_ to look up memory at\n"
 71     "based on the 'rm32' operand and potentially others.\n"
 72     "\n"
 73     "If mod = 3, just operate on the contents of the register specified by rm32\n"
 74     "            (direct mode).\n"
 75     "If mod = 2, effective address is usually* rm32 + disp32\n"
 76     "            (indirect mode with displacement).\n"
 77     "If mod = 1, effective address is usually* rm32 + disp8\n"
 78     "            (indirect mode with displacement).\n"
 79     "If mod = 0, effective address is usually* rm32 (indirect mode).\n"
 80     "(* - The exception is when rm32 is '4'. Register 4 is the stack pointer (ESP).\n"
 81     "     Using it as an address gets more involved. For more details,\n"
 82     "     try reading the help pages for 'base', 'index' and 'scale'.)\n"
 83     "\n"
 84     "For complete details, spend some time with two tables in the IA-32 software\n"
 85     "developer's manual that are also included in this repo:\n"
 86     "  - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n"
 87     "  - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
 88   );
 89   put(Help, "subop",
 90     "Additional 3-bit operand for determining the instruction when the opcode\n"
 91     "is 81, 8f, d3, f7 or ff.\n"
 92     "Can't coexist with operand of type 'r32' in a single instruction, because\n"
 93     "the two use the same bits.\n"
 94   );
 95   put(Help, "r32",
 96     "3-bit operand specifying a register operand used directly, without any further addressing modes.\n"
 97   );
 98   put(Help, "rm32",
 99     "32-bit value in register or memory. The precise details of its construction\n"
100     "depend on the eponymous 3-bit 'rm32' operand, the 'mod' operand, and also\n"
101     "potentially the 'SIB' operands ('scale', 'index' and 'base') and a displacement\n"
102     "('disp8' or 'disp32').\n"
103     "\n"
104     "For complete details, spend some time with two tables in the IA-32 software\n"
105     "developer's manual that are also included in this repo:\n"
106     "  - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n"
107     "  - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
108   );
109   put(Help, "base",
110     "Additional 3-bit operand (when 'rm32' is 4, unless 'mod' is 3) specifying the\n"
111     "register containing an address to look up.\n"
112     "This address may be further modified by 'index' and 'scale' operands.\n"
113     "  effective address = base + index*scale + displacement (disp8 or disp32)\n"
114     "For complete details, spend some time with the IA-32 software developer's manual,\n"
115     "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
116     "It is included in this repository as 'sib.pdf'.\n"
117   );
118   put(Help, "index",
119     "Optional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be added to\n"
120     "the 'base' operand to compute the 'effective address' at which to look up memory.\n"
121     "  effective address = base + index*scale + displacement (disp8 or disp32)\n"
122     "For complete details, spend some time with the IA-32 software developer's manual,\n"
123     "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
124     "It is included in this repository as 'sib.pdf'.\n"
125   );
126   put(Help, "scale",
127     "Optional 2-bit operand (when 'rm32' is 4 unless 'mod' is 3) that encodes a\n"
128     "power of 2 to be multiplied to the 'index' operand before adding the result to\n"
129     "the 'base' operand to compute the _effective address_ to operate on.\n"
130     "  effective address = base + index * scale + displacement (disp8 or disp32)\n"
131     "\n"
132     "When scale is 0, use index unmodified.\n"
133     "When scale is 1, multiply index by 2.\n"
134     "When scale is 2, multiply index by 4.\n"
135     "When scale is 3, multiply index by 8.\n"
136     "\n"
137     "For complete details, spend some time with the IA-32 software developer's manual,\n"
138     "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
139     "It is included in this repository as 'sib.pdf'.\n"
140   );
141   put(Help, "disp8",
142     "8-bit value to be added in many instructions.\n"
143   );
144   put(Help, "disp16",
145     "16-bit value to be added in many instructions.\n"
146     "Currently not used in any SubX instructions.\n"
147   );
148   put(Help, "disp32",
149     "32-bit value to be added in many instructions.\n"
150   );
151   put(Help, "imm8",
152     "8-bit value for many instructions.\n"
153   );
154   put(Help, "imm32",
155     "32-bit value for many instructions.\n"
156   );
157 }
158 
159 //:: transform packing operands into bytes in the right order
160 
161 :(after "Begin Transforms")
162 Transform.push_back(pack_operands);
163 
164 :(code)
165 void pack_operands(program& p) {
166   if (p.segments.empty()) return;
167   segment& code = *find(p, "code");
168   // Pack Operands(segment code)
169   trace(3, "transform") << "-- pack operands" << end();
170   for (int i = 0;  i < SIZE(code.lines);  ++i) {
171     line& inst = code.lines.at(i);
172     if (all_hex_bytes(inst)) continue;
173     trace(99, "transform") << "packing instruction '" << to_string(/*with metadata*/inst) << "'" << end();
174     pack_operands(inst);
175     trace(99, "transform") << "instruction after packing: '" << to_string(/*without metadata*/inst.words) << "'" << end();
176   }
177 }
178 
179 void pack_operands(line& inst) {
180   line new_inst;
181   add_opcodes(inst, new_inst);
182   add_modrm_byte(inst, new_inst);
183   add_sib_byte(inst, new_inst);
184   add_disp_bytes(inst, new_inst);
185   add_imm_bytes(inst, new_inst);
186   inst.words.swap(new_inst.words);
187 }
188 
189 void add_opcodes(const line& in, line& out) {
190   out.words.push_back(in.words.at(0));
191   if (in.words.at(0).data == "0f" || in.words.at(0).data == "f2" || in.words.at(0).data == "f3")
192     out.words.push_back(in.words.at(1));
193   if (in.words.at(0).data == "f3" && in.words.at(1).data == "0f")
194     out.words.push_back(in.words.at(2));
195   if (in.words.at(0).data == "f2" && in.words.at(1).data == "0f")
196     out.words.push_back(in.words.at(2));
197 }
198 
199 void add_modrm_byte(const line& in, line& out) {
200   uint8_t mod=0, reg_subop=0, rm32=0;
201   bool emit = false;
202   for (int i = 0;  i < SIZE(in.words);  ++i) {
203     const word& curr = in.words.at(i);
204     if (has_operand_metadata(curr, "mod")) {
205       mod = hex_byte(curr.data);
206       emit = true;
207     }
208     else if (has_operand_metadata(curr, "rm32")) {
209       rm32 = hex_byte(curr.data);
210       emit = true;
211     }
212     else if (has_operand_metadata(curr, "r32")) {
213       reg_subop = hex_byte(curr.data);
214       emit = true;
215     }
216     else if (has_operand_metadata(curr, "subop")) {
217       reg_subop = hex_byte(curr.data);
218       emit = true;
219     }
220   }
221   if (emit)
222     out.words.push_back(hex_byte_text((mod << 6) | (reg_subop << 3) | rm32));
223 }
224 
225 void add_sib_byte(const line& in, line& out) {
226   uint8_t scale=0, index=0, base=0;
227   bool emit = false;
228   for (int i = 0;  i < SIZE(in.words);  ++i) {
229     const word& curr = in.words.at(i);
230     if (has_operand_metadata(curr, "scale"*)privwin, THEME_ONLINE, "-", "-- %s has joined the room.", jidp->resourcepart);
    jid_destroy(jidp);
}

void
privwin_room_destroyed(ProfPrivateWin *privwin)
{
    assert(privwin != NULL);

    privwin->room_left = TRUE;
    Jid *jidp = jid_create(privwin->fulljid);
    win_println((ProfWin*)privwin, THEME_OFFLINE, "!", "-- %s has been destroyed.", jidp->barejid);
    jid_destroy(jidp);
}

void
privwin_room_joined(ProfPrivateWin *privwin)
{
    assert(privwin != NULL);

    privwin->room_left = FALSE;
    Jid *jidp = jid_create(privwin->fulljid);
    win_println((ProfWin*)privwin, THEME_OFFLINE, "!", "-- You have joined %s.", jidp->barejid);
    jid_destroy(jidp);
}

void
privwin_room_left(ProfPrivateWin *privwin)
{
    assert(privwin != NULL);

    privwin->room_left = TRUE;
    Jid *jidp = jid_create(privwin->fulljid);
    win_println((ProfWin*)privwin, THEME_OFFLINE, "!", "-- You have left %s.", jidp->barejid);
    jid_destroy(jidp);
}

void
privwin_room_kicked(ProfPrivateWin *privwin, const char *const actor, const char *const reason)
{
    assert(privwin != NULL);

    privwin->room_left = TRUE;
    GString *message = g_string_new("Kicked from ");
    Jid *jidp = jid_create(privwin->fulljid);
    g_string_append(message, jidp->barejid);
    jid_destroy(jidp);
    if (actor) {
        g_string_append(message, " by ");
        g_string_append(message, actor);
    }
    if (reason) {
        g_string_append(message, ", reason: ");
        g_string_append(message, reason);
    }

    win_println((ProfWin*)privwin, THEME_OFFLINE, "!", "<- %s", message->str);
    g_string_free(message, TRUE);
}

void
privwin_room_banned(ProfPrivateWin *privwin, const char *const actor, const char *const reason)
{
    assert(privwin != NULL);

    privwin->room_left = TRUE;
    GString *message = g_string_new("Banned from ");
    Jid *jidp = jid_create(privwin->fulljid);
    g_string_append(message, jidp->barejid);
    jid_destroy(jidp);
    if (actor) {
        g_string_append(message, " by ");
        g_string_append(message, actor);
    }
    if (reason) {
        g_string_append(message, ", reason: ");
        g_string_append(message, reason);
    }

    win_println((ProfWin*)privwin, THEME_OFFLINE, "!", "<- %s", message->str);
    g_string_free(message, TRUE);
}

char*
privwin_get_string(ProfPrivateWin *privwin)
{
    assert(privwin != NULL);

    GString *res = g_string_new("Private ");
    g_string_append(res, privwin->fulljid);

    if (privwin->unread > 0) {
        g_string_append_printf(res, ", %d unread", privwin->unread);
    }

    char *resstr = res->str;
    g_string_free(res, FALSE);

    return resstr;
}
8" class="LineNr">328 ); 329 CHECK_TRACE_CONTENTS( 330 "transform: packing instruction '74 2/disp8'\n" 331 "transform: instruction after packing: '74 02'\n" 332 ); 333 } 334 335 void test_pack_disp8_negative() { 336 transform( 337 "== code 0x1\n" 338 // running this will cause an infinite loop 339 "74 -1/disp8\n" // jump 1 byte before if ZF is set 340 ); 341 CHECK_TRACE_CONTENTS( 342 "transform: packing instruction '74 -1/disp8'\n" 343 "transform: instruction after packing: '74 ff'\n" 344 ); 345 } 346 347 //: helper for scenario 348 void transform(const string& text_bytes) { 349 program p; 350 istringstream in(text_bytes); 351 parse(in, p); 352 if (trace_contains_errors()) return; 353 transform(p); 354 } 355 356 void test_pack_modrm_imm32() { 357 run( 358 "== code 0x1\n" 359 // instruction effective address operand displacement immediate\n" 360 // op subop mod rm32 base index scale r32\n" 361 // 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes\n" 362 " 81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32 \n" // add 1 to EBX 363 ); 364 CHECK_TRACE_CONTENTS( 365 "transform: packing instruction '81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32'\n" 366 "transform: instruction after packing: '81 c3 01 00 00 00'\n" 367 ); 368 } 369 370 void test_pack_imm32_large() { 371 run( 372 "== code 0x1\n" 373 "b9 0x080490a7/imm32\n" 374 ); 375 CHECK_TRACE_CONTENTS( 376 "transform: packing instruction 'b9 0x080490a7/imm32'\n" 377 "transform: instruction after packing: 'b9 a7 90 04 08'\n" 378 ); 379 } 380 381 void test_pack_immediate_constants_hex() { 382 run( 383 "== code 0x1\n" 384 "b9 0x2a/imm32\n" 385 ); 386 CHECK_TRACE_CONTENTS( 387 "transform: packing instruction 'b9 0x2a/imm32'\n" 388 "transform: instruction after packing: 'b9 2a 00 00 00'\n" 389 "run: copy imm32 0x0000002a to ECX\n" 390 ); 391 } 392 393 void test_pack_silently_ignores_non_hex() { 394 Hide_errors = true; 395 transform( 396 "== code 0x1\n" 397 "b9 foo/imm32\n" 398 ); 399 CHECK_TRACE_CONTENTS( 400 "transform: packing instruction 'b9 foo/imm32'\n" 401 // no change (we're just not printing metadata to the trace) 402 "transform: instruction after packing: 'b9 foo'\n" 403 ); 404 } 405 406 void test_pack_flags_bad_hex() { 407 Hide_errors = true; 408 run( 409 "== code 0x1\n" 410 "b9 0xfoo/imm32\n" 411 ); 412 CHECK_TRACE_CONTENTS( 413 "error: not a number: 0xfoo\n" 414 ); 415 } 416 417 void test_pack_flags_uppercase_hex() { 418 Hide_errors = true; 419 run( 420 "== code 0x1\n" 421 "b9 0xAb/imm32\n" 422 ); 423 CHECK_TRACE_CONTENTS( 424 "error: uppercase hex not allowed: 0xAb\n" 425 ); 426 } 427 428 //:: helpers 429 430 bool all_hex_bytes(const line& inst) { 431 for (int i = 0; i < SIZE(inst.words); ++i) 432 if (!is_hex_byte(inst.words.at(i))) 433 return false; 434 return true; 435 } 436 437 bool is_hex_byte(const word& curr) { 438 if (contains_any_operand_metadata(curr)) 439 return false; 440 if (SIZE(curr.data) != 2) 441 return false; 442 if (curr.data.find_first_not_of("0123456789abcdef") != string::npos) 443 return false; 444 return true; 445 } 446 447 bool contains_any_operand_metadata(const word& word) { 448 for (int i = 0; i < SIZE(word.metadata); ++i) 449 if (Instruction_operands.find(word.metadata.at(i)) != Instruction_operands.end()) 450 return true; 451 return false; 452 } 453 454 bool has_operand_metadata(const line& inst, const string& m) { 455 bool result = false; 456 for (int i = 0; i < SIZE(inst.words); ++i) { 457 if (!has_operand_metadata(inst.words.at(i), m)) continue; 458 if (result) { 459 raise << "'" << to_string(inst) << "' has conflicting " << m << " operands\n" << end(); 460 return false; 461 } 462 result = true; 463 } 464 return result; 465 } 466 467 bool has_operand_metadata(const word& w, const string& m) { 468 bool result = false; 469 bool metadata_found = false; 470 for (int i = 0; i < SIZE(w.metadata); ++i) { 471 const string& curr = w.metadata.at(i); 472 if (Instruction_operands.find(curr) == Instruction_operands.end()) continue; // ignore unrecognized metadata 473 if (metadata_found) { 474 raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end(); 475 return false; 476 } 477 metadata_found = true; 478 result = (curr == m); 479 } 480 return result; 481 } 482 483 word metadata(const line& inst, const string& m) { 484 for (int i = 0; i < SIZE(inst.words); ++i) 485 if (has_operand_metadata(inst.words.at(i), m)) 486 return inst.words.at(i); 487 assert(false); 488 } 489 490 bool looks_like_hex_int(const string& s) { 491 if (s.empty()) return false; 492 if (s.at(0) == '-' || s.at(0) == '+') return true; 493 if (isdigit(s.at(0))) return true; // includes '0x' prefix 494 // End looks_like_hex_int(s) Detectors 495 return false; 496 } 497 498 string to_string(const line& inst) { 499 ostringstream out; 500 for (int i = 0; i < SIZE(inst.words); ++i) { 501 if (i > 0) out << ' '; 502 out << inst.words.at(i).original; 503 } 504 return out.str(); 505 } 506 507 int32_t parse_int(const string& s) { 508 if (s.empty()) return 0; 509 if (contains_uppercase(s)) { 510 raise << "uppercase hex not allowed: " << s << '\n' << end(); 511 return 0; 512 } 513 istringstream in(s); 514 in >> std::hex; 515 if (s.at(0) == '-') { 516 int32_t result = 0; 517 in >> result; 518 if (!in || !in.eof()) { 519 raise << "not a number: " << s << '\n' << end(); 520 return 0; 521 } 522 return result; 523 } 524 uint32_t uresult = 0; 525 in >> uresult; 526 if (!in || !in.eof()) { 527 raise << "not a number: " << s << '\n' << end(); 528 return 0; 529 } 530 return static_cast<int32_t>(uresult); 531 } 532 :(before "End Unit Tests") 533 void test_parse_int() { 534 CHECK_EQ(0, parse_int("0")); 535 CHECK_EQ(0, parse_int("0x0")); 536 CHECK_EQ(0, parse_int("0x0")); 537 CHECK_EQ(16, parse_int("10")); // hex always 538 CHECK_EQ(-1, parse_int("-1")); 539 CHECK_EQ(-1, parse_int("0xffffffff")); 540 }