From 6e1eeeebfb453fa7c871869c19375ce60fbd7413 Mon Sep 17 00:00:00 2001 From: Kartik Agaram Date: Sat, 27 Jul 2019 16:01:55 -0700 Subject: 5485 - promote SubX to top-level --- html/subx/030---operands.cc.html | 554 --------------------------------------- 1 file changed, 554 deletions(-) delete mode 100644 html/subx/030---operands.cc.html (limited to 'html/subx/030---operands.cc.html') diff --git a/html/subx/030---operands.cc.html b/html/subx/030---operands.cc.html deleted file mode 100644 index c42a6a3e..00000000 --- a/html/subx/030---operands.cc.html +++ /dev/null @@ -1,554 +0,0 @@ - - - - -Mu - subx/030---operands.cc - - - - - - - - - - -https://github.com/akkartik/mu/blob/master/subx/030---operands.cc -
-  1 //: Beginning of "level 2": tagging bytes with metadata around what field of
-  2 //: an x86 instruction they're for.
-  3 //:
-  4 //: The x86 instruction set is variable-length, and how a byte is interpreted
-  5 //: affects later instruction boundaries. A lot of the pain in programming
-  6 //: machine code stems from computer and programmer going out of sync on what
-  7 //: a byte means. The miscommunication is usually not immediately caught, and
-  8 //: metastasizes at runtime into kilobytes of misinterpreted instructions.
-  9 //:
- 10 //: To mitigate these issues, we'll start programming in terms of logical
- 11 //: operands rather than physical bytes. Some operands are smaller than a
- 12 //: byte, and others may consist of multiple bytes. This layer will correctly
- 13 //: pack and order the bytes corresponding to the operands in an instruction.
- 14 
- 15 :(before "End Help Texts")
- 16 put_new(Help, "instructions",
- 17   "Each x86 instruction consists of an instruction or opcode and some number\n"
- 18   "of operands.\n"
- 19   "Each operand has a type. An instruction won't have more than one operand of\n"
- 20   "any type.\n"
- 21   "Each instruction has some set of allowed operand types. It'll reject others.\n"
- 22   "The complete list of operand types: mod, subop, r32 (register), rm32\n"
- 23   "(register or memory), scale, index, base, disp8, disp16, disp32, imm8,\n"
- 24   "imm32.\n"
- 25   "Each of these has its own help page. Try reading 'subx help mod' next.\n"
- 26 );
- 27 :(before "End Help Contents")
- 28 cerr << "  instructions\n";
- 29 
- 30 :(code)
- 31 void test_pack_immediate_constants() {
- 32   run(
- 33       "== code 0x1\n"
- 34       "bb  0x2a/imm32\n"
- 35   );
- 36   CHECK_TRACE_CONTENTS(
- 37       "transform: packing instruction 'bb 0x2a/imm32'\n"
- 38       "transform: instruction after packing: 'bb 2a 00 00 00'\n"
- 39       "run: copy imm32 0x0000002a to EBX\n"
- 40   );
- 41 }
- 42 
- 43 //: complete set of valid operand types
- 44 
- 45 :(before "End Globals")
- 46 set<string> Instruction_operands;
- 47 :(before "End One-time Setup")
- 48 Instruction_operands.insert("subop");
- 49 Instruction_operands.insert("mod");
- 50 Instruction_operands.insert("rm32");
- 51 Instruction_operands.insert("base");
- 52 Instruction_operands.insert("index");
- 53 Instruction_operands.insert("scale");
- 54 Instruction_operands.insert("r32");
- 55 Instruction_operands.insert("disp8");
- 56 Instruction_operands.insert("disp16");
- 57 Instruction_operands.insert("disp32");
- 58 Instruction_operands.insert("imm8");
- 59 Instruction_operands.insert("imm32");
- 60 
- 61 :(before "End Help Texts")
- 62 init_operand_type_help();
- 63 :(code)
- 64 void init_operand_type_help() {
- 65   put(Help, "mod",
- 66     "2-bit operand controlling the _addressing mode_ of many instructions,\n"
- 67     "to determine how to compute the _effective address_ to look up memory at\n"
- 68     "based on the 'rm32' operand and potentially others.\n"
- 69     "\n"
- 70     "If mod = 3, just operate on the contents of the register specified by rm32\n"
- 71     "            (direct mode).\n"
- 72     "If mod = 2, effective address is usually* rm32 + disp32\n"
- 73     "            (indirect mode with displacement).\n"
- 74     "If mod = 1, effective address is usually* rm32 + disp8\n"
- 75     "            (indirect mode with displacement).\n"
- 76     "If mod = 0, effective address is usually* rm32 (indirect mode).\n"
- 77     "(* - The exception is when rm32 is '4'. Register 4 is the stack pointer (ESP).\n"
- 78     "     Using it as an address gets more involved. For more details,\n"
- 79     "     try reading the help pages for 'base', 'index' and 'scale'.)\n"
- 80     "\n"
- 81     "For complete details, spend some time with two tables in the IA-32 software\n"
- 82     "developer's manual that are also included in this repo:\n"
- 83     "  - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n"
- 84     "  - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
- 85   );
- 86   put(Help, "subop",
- 87     "Additional 3-bit operand for determining the instruction when the opcode is 81, 8f or ff.\n"
- 88     "Can't coexist with operand of type 'r32' in a single instruction, because the two use the same bits.\n"
- 89   );
- 90   put(Help, "r32",
- 91     "3-bit operand specifying a register operand used directly, without any further addressing modes.\n"
- 92   );
- 93   put(Help, "rm32",
- 94     "32-bit value in register or memory. The precise details of its construction\n"
- 95     "depend on the eponymous 3-bit 'rm32' operand, the 'mod' operand, and also\n"
- 96     "potentially the 'SIB' operands ('scale', 'index' and 'base') and a displacement\n"
- 97     "('disp8' or 'disp32').\n"
- 98     "\n"
- 99     "For complete details, spend some time with two tables in the IA-32 software\n"
-100     "developer's manual that are also included in this repo:\n"
-101     "  - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n"
-102     "  - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
-103   );
-104   put(Help, "base",
-105     "Additional 3-bit operand (when 'rm32' is 4, unless 'mod' is 3) specifying the\n"
-106     "register containing an address to look up.\n"
-107     "This address may be further modified by 'index' and 'scale' operands.\n"
-108     "  effective address = base + index*scale + displacement (disp8 or disp32)\n"
-109     "For complete details, spend some time with the IA-32 software developer's manual,\n"
-110     "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
-111     "It is included in this repository as 'sib.pdf'.\n"
-112   );
-113   put(Help, "index",
-114     "Optional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be added to\n"
-115     "the 'base' operand to compute the 'effective address' at which to look up memory.\n"
-116     "  effective address = base + index*scale + displacement (disp8 or disp32)\n"
-117     "For complete details, spend some time with the IA-32 software developer's manual,\n"
-118     "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
-119     "It is included in this repository as 'sib.pdf'.\n"
-120   );
-121   put(Help, "scale",
-122     "Optional 2-bit operand (when 'rm32' is 4 unless 'mod' is 3) that encodes a\n"
-123     "power of 2 to be multiplied to the 'index' operand before adding the result to\n"
-124     "the 'base' operand to compute the _effective address_ to operate on.\n"
-125     "  effective address = base + index * scale + displacement (disp8 or disp32)\n"
-126     "\n"
-127     "When scale is 0, use index unmodified.\n"
-128     "When scale is 1, multiply index by 2.\n"
-129     "When scale is 2, multiply index by 4.\n"
-130     "When scale is 3, multiply index by 8.\n"
-131     "\n"
-132     "For complete details, spend some time with the IA-32 software developer's manual,\n"
-133     "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
-134     "It is included in this repository as 'sib.pdf'.\n"
-135   );
-136   put(Help, "disp8",
-137     "8-bit value to be added in many instructions.\n"
-138   );
-139   put(Help, "disp16",
-140     "16-bit value to be added in many instructions.\n"
-141     "Currently not used in any SubX instructions.\n"
-142   );
-143   put(Help, "disp32",
-144     "32-bit value to be added in many instructions.\n"
-145   );
-146   put(Help, "imm8",
-147     "8-bit value for many instructions.\n"
-148   );
-149   put(Help, "imm32",
-150     "32-bit value for many instructions.\n"
-151   );
-152 }
-153 
-154 //:: transform packing operands into bytes in the right order
-155 
-156 :(after "Begin Transforms")
-157 // Begin Level-2 Transforms
-158 Transform.push_back(pack_operands);
-159 // End Level-2 Transforms
-160 
-161 :(code)
-162 void pack_operands(program& p) {
-163   if (p.segments.empty()) return;
-164   segment& code = *find(p, "code");
-165   // Pack Operands(segment code)
-166   trace(3, "transform") << "-- pack operands" << end();
-167   for (int i = 0;  i < SIZE(code.lines);  ++i) {
-168     line& inst = code.lines.at(i);
-169     if (all_hex_bytes(inst)) continue;
-170     trace(99, "transform") << "packing instruction '" << to_string(/*with metadata*/inst) << "'" << end();
-171     pack_operands(inst);
-172     trace(99, "transform") << "instruction after packing: '" << to_string(/*without metadata*/inst.words) << "'" << end();
-173   }
-174 }
-175 
-176 void pack_operands(line& inst) {
-177   line new_inst;
-178   add_opcodes(inst, new_inst);
-179   add_modrm_byte(inst, new_inst);
-180   add_sib_byte(inst, new_inst);
-181   add_disp_bytes(inst, new_inst);
-182   add_imm_bytes(inst, new_inst);
-183   inst.words.swap(new_inst.words);
-184 }
-185 
-186 void add_opcodes(const line& in, line& out) {
-187   out.words.push_back(in.words.at(0));
-188   if (in.words.at(0).data == "0f" || in.words.at(0).data == "f2" || in.words.at(0).data == "f3")
-189     out.words.push_back(in.words.at(1));
-190   if (in.words.at(0).data == "f3" && in.words.at(1).data == "0f")
-191     out.words.push_back(in.words.at(2));
-192   if (in.words.at(0).data == "f2" && in.words.at(1).data == "0f")
-193     out.words.push_back(in.words.at(2));
-194 }
-195 
-196 void add_modrm_byte(const line& in, line& out) {
-197   uint8_t mod=0, reg_subop=0, rm32=0;
-198   bool emit = false;
-199   for (int i = 0;  i < SIZE(in.words);  ++i) {
-200     const word& curr = in.words.at(i);
-201     if (has_operand_metadata(curr, "mod")) {
-202       mod = hex_byte(curr.data);
-203       emit = true;
-204     }
-205     else if (has_operand_metadata(curr, "rm32")) {
-206       rm32 = hex_byte(curr.data);
-207       emit = true;
-208     }
-209     else if (has_operand_metadata(curr, "r32")) {
-210       reg_subop = hex_byte(curr.data);
-211       emit = true;
-212     }
-213     else if (has_operand_metadata(curr, "subop")) {
-214       reg_subop = hex_byte(curr.data);
-215       emit = true;
-216     }
-217   }
-218   if (emit)
-219     out.words.push_back(hex_byte_text((mod << 6) | (reg_subop << 3) | rm32));
-220 }
-221 
-222 void add_sib_byte(const line& in, line& out) {
-223   uint8_t scale=0, index=0, base=0;
-224   bool emit = false;
-225   for (int i = 0;  i < SIZE(in.words);  ++i) {
-226     const word& curr = in.words.at(i);
-227     if (has_operand_metadata(curr, "scale")) {
-228       scale = hex_byte(curr.data);
-229       emit = true;
-230     }
-231     else if (has_operand_metadata(curr, "index")) {
-232       index = hex_byte(curr.data);
-233       emit = true;
-234     }
-235     else if (has_operand_metadata(curr, "base")) {
-236       base = hex_byte(curr.data);
-237       emit = true;
-238     }
-239   }
-240   if (emit)
-241     out.words.push_back(hex_byte_text((scale << 6) | (index << 3) | base));
-242 }
-243 
-244 void add_disp_bytes(const line& in, line& out) {
-245   for (int i = 0;  i < SIZE(in.words);  ++i) {
-246     const word& curr = in.words.at(i);
-247     if (has_operand_metadata(curr, "disp8"))
-248       emit_hex_bytes(out, curr, 1);
-249     if (has_operand_metadata(curr, "disp16"))
-250       emit_hex_bytes(out, curr, 2);
-251     else if (has_operand_metadata(curr, "disp32"))
-252       emit_hex_bytes(out, curr, 4);
-253   }
-254 }
-255 
-256 void add_imm_bytes(const line& in, line& out) {
-257   for (int i = 0;  i < SIZE(in.words);  ++i) {
-258     const word& curr = in.words.at(i);
-259     if (has_operand_metadata(curr, "imm8"))
-260       emit_hex_bytes(out, curr, 1);
-261     else if (has_operand_metadata(curr, "imm32"))
-262       emit_hex_bytes(out, curr, 4);
-263   }
-264 }
-265 
-266 void emit_hex_bytes(line& out, const word& w, int num) {
-267   assert(num <= 4);
-268   bool is_number = looks_like_hex_int(w.data);
-269   if (num == 1 || !is_number) {
-270     out.words.push_back(w);  // preserve existing metadata
-271     if (is_number)
-272       out.words.back().data = hex_byte_to_string(parse_int(w.data));
-273     return;
-274   }
-275   emit_hex_bytes(out, static_cast<uint32_t>(parse_int(w.data)), num);
-276 }
-277 
-278 void emit_hex_bytes(line& out, uint32_t val, int num) {
-279   assert(num <= 4);
-280   for (int i = 0;  i < num;  ++i) {
-281     out.words.push_back(hex_byte_text(val & 0xff));
-282     val = val >> 8;
-283   }
-284 }
-285 
-286 word hex_byte_text(uint8_t val) {
-287   word result;
-288   result.data = hex_byte_to_string(val);
-289   result.original = result.data+"/auto";
-290   return result;
-291 }
-292 
-293 string hex_byte_to_string(uint8_t val) {
-294   ostringstream out;
-295   // uint8_t prints without padding, but int8_t will expand to 32 bits again
-296   out << HEXBYTE << NUM(val);
-297   return out.str();
-298 }
-299 
-300 string to_string(const vector<word>& in) {
-301   ostringstream out;
-302   for (int i = 0;  i < SIZE(in);  ++i) {
-303     if (i > 0) out << ' ';
-304     out << in.at(i).data;
-305   }
-306   return out.str();
-307 }
-308 
-309 :(before "End Unit Tests")
-310 void test_preserve_metadata_when_emitting_single_byte() {
-311   word in;
-312   in.data = "f0";
-313   in.original = "f0/foo";
-314   line out;
-315   emit_hex_bytes(out, in, 1);
-316   CHECK_EQ(out.words.at(0).data, "f0");
-317   CHECK_EQ(out.words.at(0).original, "f0/foo");
-318 }
-319 
-320 :(code)
-321 void test_pack_disp8() {
-322   run(
-323       "== code 0x1\n"
-324       "74 2/disp8\n"  // jump 2 bytes away if ZF is set
-325   );
-326   CHECK_TRACE_CONTENTS(
-327       "transform: packing instruction '74 2/disp8'\n"
-328       "transform: instruction after packing: '74 02'\n"
-329   );
-330 }
-331 
-332 void test_pack_disp8_negative() {
-333   transform(
-334       "== code 0x1\n"
-335       // running this will cause an infinite loop
-336       "74 -1/disp8\n"  // jump 1 byte before if ZF is set
-337   );
-338   CHECK_TRACE_CONTENTS(
-339       "transform: packing instruction '74 -1/disp8'\n"
-340       "transform: instruction after packing: '74 ff'\n"
-341   );
-342 }
-343 
-344 //: helper for scenario
-345 void transform(const string& text_bytes) {
-346   program p;
-347   istringstream in(text_bytes);
-348   parse(in, p);
-349   if (trace_contains_errors()) return;
-350   transform(p);
-351 }
-352 
-353 void test_pack_modrm_imm32() {
-354   run(
-355       "== code 0x1\n"
-356       // instruction                     effective address                                                   operand     displacement    immediate\n"
-357       // op          subop               mod             rm32          base        index         scale       r32\n"
-358       // 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes\n"
-359       "  81          0/add/subop         3/mod/direct    3/ebx/rm32                                                                      1/imm32      \n"  // add 1 to EBX
-360   );
-361   CHECK_TRACE_CONTENTS(
-362       "transform: packing instruction '81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32'\n"
-363       "transform: instruction after packing: '81 c3 01 00 00 00'\n"
-364   );
-365 }
-366 
-367 void test_pack_imm32_large() {
-368   run(
-369       "== code 0x1\n"
-370       "b9  0x080490a7/imm32\n"
-371   );
-372   CHECK_TRACE_CONTENTS(
-373       "transform: packing instruction 'b9 0x080490a7/imm32'\n"
-374       "transform: instruction after packing: 'b9 a7 90 04 08'\n"
-375   );
-376 }
-377 
-378 void test_pack_immediate_constants_hex() {
-379   run(
-380       "== code 0x1\n"
-381       "b9  0x2a/imm32\n"
-382   );
-383   CHECK_TRACE_CONTENTS(
-384       "transform: packing instruction 'b9 0x2a/imm32'\n"
-385       "transform: instruction after packing: 'b9 2a 00 00 00'\n"
-386       "run: copy imm32 0x0000002a to ECX\n"
-387   );
-388 }
-389 
-390 void test_pack_silently_ignores_non_hex() {
-391   Hide_errors = true;
-392   transform(
-393       "== code 0x1\n"
-394       "b9  foo/imm32\n"
-395   );
-396   CHECK_TRACE_CONTENTS(
-397       "transform: packing instruction 'b9 foo/imm32'\n"
-398       // no change (we're just not printing metadata to the trace)
-399       "transform: instruction after packing: 'b9 foo'\n"
-400   );
-401 }
-402 
-403 void test_pack_flags_bad_hex() {
-404   Hide_errors = true;
-405   run(
-406       "== code 0x1\n"
-407       "b9  0xfoo/imm32\n"
-408   );
-409   CHECK_TRACE_CONTENTS(
-410       "error: not a number: 0xfoo\n"
-411   );
-412 }
-413 
-414 //:: helpers
-415 
-416 bool all_hex_bytes(const line& inst) {
-417   for (int i = 0;  i < SIZE(inst.words);  ++i)
-418     if (!is_hex_byte(inst.words.at(i)))
-419       return false;
-420   return true;
-421 }
-422 
-423 bool is_hex_byte(const word& curr) {
-424   if (contains_any_operand_metadata(curr))
-425     return false;
-426   if (SIZE(curr.data) != 2)
-427     return false;
-428   if (curr.data.find_first_not_of("0123456789abcdefABCDEF") != string::npos)
-429     return false;
-430   return true;
-431 }
-432 
-433 bool contains_any_operand_metadata(const word& word) {
-434   for (int i = 0;  i < SIZE(word.metadata);  ++i)
-435     if (Instruction_operands.find(word.metadata.at(i)) != Instruction_operands.end())
-436       return true;
-437   return false;
-438 }
-439 
-440 bool has_operand_metadata(const line& inst, const string& m) {
-441   bool result = false;
-442   for (int i = 0;  i < SIZE(inst.words);  ++i) {
-443     if (!has_operand_metadata(inst.words.at(i), m)) continue;
-444     if (result) {
-445       raise << "'" << to_string(inst) << "' has conflicting " << m << " operands\n" << end();
-446       return false;
-447     }
-448     result = true;
-449   }
-450   return result;
-451 }
-452 
-453 bool has_operand_metadata(const word& w, const string& m) {
-454   bool result = false;
-455   bool metadata_found = false;
-456   for (int i = 0;  i < SIZE(w.metadata);  ++i) {
-457     const string& curr = w.metadata.at(i);
-458     if (Instruction_operands.find(curr) == Instruction_operands.end()) continue;  // ignore unrecognized metadata
-459     if (metadata_found) {
-460       raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
-461       return false;
-462     }
-463     metadata_found = true;
-464     result = (curr == m);
-465   }
-466   return result;
-467 }
-468 
-469 word metadata(const line& inst, const string& m) {
-470   for (int i = 0;  i < SIZE(inst.words);  ++i)
-471     if (has_operand_metadata(inst.words.at(i), m))
-472       return inst.words.at(i);
-473   assert(false);
-474 }
-475 
-476 bool looks_like_hex_int(const string& s) {
-477   if (s.empty()) return false;
-478   if (s.at(0) == '-' || s.at(0) == '+') return true;
-479   if (isdigit(s.at(0))) return true;  // includes '0x' prefix
-480   // End looks_like_hex_int(s) Detectors
-481   return false;
-482 }
-483 
-484 string to_string(const line& inst) {
-485   ostringstream out;
-486   for (int i = 0;  i < SIZE(inst.words);  ++i) {
-487     if (i > 0) out << ' ';
-488     out << inst.words.at(i).original;
-489   }
-490   return out.str();
-491 }
-
- - - -- cgit 1.4.1-2-gfad0