From 9c1056f531217f3c1c12b3b3a648ca1cffe4beab Mon Sep 17 00:00:00 2001 From: Kartik Agaram Date: Fri, 29 Mar 2019 00:47:30 -0700 Subject: 5033 --- html/subx/030---operands.cc.html | 871 ++++++++++++++++++++------------------- 1 file changed, 453 insertions(+), 418 deletions(-) (limited to 'html/subx/030---operands.cc.html') diff --git a/html/subx/030---operands.cc.html b/html/subx/030---operands.cc.html index d1699356..3936f5eb 100644 --- a/html/subx/030---operands.cc.html +++ b/html/subx/030---operands.cc.html @@ -14,15 +14,14 @@ pre { white-space: pre-wrap; font-family: monospace; color: #000000; background- body { font-size:12pt; font-family: monospace; color: #000000; background-color: #c6c6c6; } a { color:inherit; } * { font-size:12pt; font-size: 1em; } -.Identifier { color: #af5f00; } .LineNr { } .Constant { color: #008787; } .Delimiter { color: #c000c0; } .Special { color: #d70000; } -.SalientComment { color: #0000af; } +.Identifier { color: #af5f00; } .Normal { color: #000000; background-color: #c6c6c6; padding-bottom: 1px; } .Comment { color: #005faf; } -.traceContains { color: #005f00; } +.SalientComment { color: #0000af; } --> @@ -79,440 +78,476 @@ if ('onhashchange' in window) { 19 "Each operand has a type. An instruction won't have more than one operand of\n" 20 "any type.\n" 21 "Each instruction has some set of allowed operand types. It'll reject others.\n" - 22 "The complete list of operand types: mod, subop, r32 (register), rm32\n" - 23 "(register or memory), scale, index, base, disp8, disp16, disp32, imm8,\n" + 22 "The complete list of operand types: mod, subop, r32 (register), rm32\n" + 23 "(register or memory), scale, index, base, disp8, disp16, disp32, imm8,\n" 24 "imm32.\n" 25 "Each of these has its own help page. Try reading 'subx help mod' next.\n" 26 ); 27 :(before "End Help Contents") 28 cerr << " instructions\n"; 29 - 30 :(scenario pack_immediate_constants) - 31 == 0x1 - 32 bb 0x2a/imm32 - 33 +transform: packing instruction 'bb 0x2a/imm32' - 34 +transform: instruction after packing: 'bb 2a 00 00 00' - 35 +run: copy imm32 0x0000002a to EBX - 36 - 37 //: complete set of valid operand types - 38 - 39 :(before "End Globals") - 40 set<string> Instruction_operands; - 41 :(before "End One-time Setup") - 42 Instruction_operands.insert("subop"); - 43 Instruction_operands.insert("mod"); - 44 Instruction_operands.insert("rm32"); - 45 Instruction_operands.insert("base"); - 46 Instruction_operands.insert("index"); - 47 Instruction_operands.insert("scale"); - 48 Instruction_operands.insert("r32"); - 49 Instruction_operands.insert("disp8"); - 50 Instruction_operands.insert("disp16"); - 51 Instruction_operands.insert("disp32"); - 52 Instruction_operands.insert("imm8"); - 53 Instruction_operands.insert("imm32"); - 54 - 55 :(before "End Help Texts") - 56 init_operand_type_help(); - 57 :(code) - 58 void init_operand_type_help() { - 59 put(Help, "mod", - 60 "2-bit operand controlling the _addressing mode_ of many instructions,\n" - 61 "to determine how to compute the _effective address_ to look up memory at\n" - 62 "based on the 'rm32' operand and potentially others.\n" - 63 "\n" - 64 "If mod = 3, just operate on the contents of the register specified by rm32\n" - 65 " (direct mode).\n" - 66 "If mod = 2, effective address is usually* rm32 + disp32\n" - 67 " (indirect mode with displacement).\n" - 68 "If mod = 1, effective address is usually* rm32 + disp8\n" - 69 " (indirect mode with displacement).\n" - 70 "If mod = 0, effective address is usually* rm32 (indirect mode).\n" - 71 "(* - The exception is when rm32 is '4'. Register 4 is the stack pointer (ESP).\n" - 72 " Using it as an address gets more involved. For more details,\n" - 73 " try reading the help pages for 'base', 'index' and 'scale'.)\n" - 74 "\n" - 75 "For complete details, spend some time with two tables in the IA-32 software\n" - 76 "developer's manual that are also included in this repo:\n" - 77 " - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n" - 78 " - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n" - 79 ); - 80 put(Help, "subop", - 81 "Additional 3-bit operand for determining the instruction when the opcode is 81, 8f or ff.\n" - 82 "Can't coexist with operand of type 'r32' in a single instruction, because the two use the same bits.\n" - 83 ); - 84 put(Help, "r32", - 85 "3-bit operand specifying a register operand used directly, without any further addressing modes.\n" - 86 ); - 87 put(Help, "rm32", - 88 "32-bit value in register or memory. The precise details of its construction\n" - 89 "depend on the eponymous 3-bit 'rm32' operand, the 'mod' operand, and also\n" - 90 "potentially the 'SIB' operands ('scale', 'index' and 'base') and a displacement\n" - 91 "('disp8' or 'disp32').\n" - 92 "\n" - 93 "For complete details, spend some time with two tables in the IA-32 software\n" - 94 "developer's manual that are also included in this repo:\n" - 95 " - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n" - 96 " - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n" - 97 ); - 98 put(Help, "base", - 99 "Additional 3-bit operand (when 'rm32' is 4, unless 'mod' is 3) specifying the\n" -100 "register containing an address to look up.\n" -101 "This address may be further modified by 'index' and 'scale' operands.\n" -102 " effective address = base + index*scale + displacement (disp8 or disp32)\n" -103 "For complete details, spend some time with the IA-32 software developer's manual,\n" -104 "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n" -105 "It is included in this repository as 'sib.pdf'.\n" -106 ); -107 put(Help, "index", -108 "Optional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be added to\n" -109 "the 'base' operand to compute the 'effective address' at which to look up memory.\n" -110 " effective address = base + index*scale + displacement (disp8 or disp32)\n" -111 "For complete details, spend some time with the IA-32 software developer's manual,\n" -112 "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n" -113 "It is included in this repository as 'sib.pdf'.\n" -114 ); -115 put(Help, "scale", -116 "Optional 2-bit operand (when 'rm32' is 4 unless 'mod' is 3) that encodes a\n" -117 "power of 2 to be multiplied to the 'index' operand before adding the result to\n" -118 "the 'base' operand to compute the _effective address_ to operate on.\n" -119 " effective address = base + index * scale + displacement (disp8 or disp32)\n" -120 "\n" -121 "When scale is 0, use index unmodified.\n" -122 "When scale is 1, multiply index by 2.\n" -123 "When scale is 2, multiply index by 4.\n" -124 "When scale is 3, multiply index by 8.\n" -125 "\n" -126 "For complete details, spend some time with the IA-32 software developer's manual,\n" -127 "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n" -128 "It is included in this repository as 'sib.pdf'.\n" -129 ); -130 put(Help, "disp8", -131 "8-bit value to be added in many instructions.\n" -132 ); -133 put(Help, "disp16", -134 "16-bit value to be added in many instructions.\n" + 30 :(code) + 31 void test_pack_immediate_constants() { + 32 run( + 33 "== 0x1\n" // code segment + 34 "bb 0x2a/imm32\n" + 35 ); + 36 CHECK_TRACE_CONTENTS( + 37 "transform: packing instruction 'bb 0x2a/imm32'\n" + 38 "transform: instruction after packing: 'bb 2a 00 00 00'\n" + 39 "run: copy imm32 0x0000002a to EBX\n" + 40 ); + 41 } + 42 + 43 //: complete set of valid operand types + 44 + 45 :(before "End Globals") + 46 set<string> Instruction_operands; + 47 :(before "End One-time Setup") + 48 Instruction_operands.insert("subop"); + 49 Instruction_operands.insert("mod"); + 50 Instruction_operands.insert("rm32"); + 51 Instruction_operands.insert("base"); + 52 Instruction_operands.insert("index"); + 53 Instruction_operands.insert("scale"); + 54 Instruction_operands.insert("r32"); + 55 Instruction_operands.insert("disp8"); + 56 Instruction_operands.insert("disp16"); + 57 Instruction_operands.insert("disp32"); + 58 Instruction_operands.insert("imm8"); + 59 Instruction_operands.insert("imm32"); + 60 + 61 :(before "End Help Texts") + 62 init_operand_type_help(); + 63 :(code) + 64 void init_operand_type_help() { + 65 put(Help, "mod", + 66 "2-bit operand controlling the _addressing mode_ of many instructions,\n" + 67 "to determine how to compute the _effective address_ to look up memory at\n" + 68 "based on the 'rm32' operand and potentially others.\n" + 69 "\n" + 70 "If mod = 3, just operate on the contents of the register specified by rm32\n" + 71 " (direct mode).\n" + 72 "If mod = 2, effective address is usually* rm32 + disp32\n" + 73 " (indirect mode with displacement).\n" + 74 "If mod = 1, effective address is usually* rm32 + disp8\n" + 75 " (indirect mode with displacement).\n" + 76 "If mod = 0, effective address is usually* rm32 (indirect mode).\n" + 77 "(* - The exception is when rm32 is '4'. Register 4 is the stack pointer (ESP).\n" + 78 " Using it as an address gets more involved. For more details,\n" + 79 " try reading the help pages for 'base', 'index' and 'scale'.)\n" + 80 "\n" + 81 "For complete details, spend some time with two tables in the IA-32 software\n" + 82 "developer's manual that are also included in this repo:\n" + 83 " - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n" + 84 " - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n" + 85 ); + 86 put(Help, "subop", + 87 "Additional 3-bit operand for determining the instruction when the opcode is 81, 8f or ff.\n" + 88 "Can't coexist with operand of type 'r32' in a single instruction, because the two use the same bits.\n" + 89 ); + 90 put(Help, "r32", + 91 "3-bit operand specifying a register operand used directly, without any further addressing modes.\n" + 92 ); + 93 put(Help, "rm32", + 94 "32-bit value in register or memory. The precise details of its construction\n" + 95 "depend on the eponymous 3-bit 'rm32' operand, the 'mod' operand, and also\n" + 96 "potentially the 'SIB' operands ('scale', 'index' and 'base') and a displacement\n" + 97 "('disp8' or 'disp32').\n" + 98 "\n" + 99 "For complete details, spend some time with two tables in the IA-32 software\n" +100 "developer's manual that are also included in this repo:\n" +101 " - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n" +102 " - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n" +103 ); +104 put(Help, "base", +105 "Additional 3-bit operand (when 'rm32' is 4, unless 'mod' is 3) specifying the\n" +106 "register containing an address to look up.\n" +107 "This address may be further modified by 'index' and 'scale' operands.\n" +108 " effective address = base + index*scale + displacement (disp8 or disp32)\n" +109 "For complete details, spend some time with the IA-32 software developer's manual,\n" +110 "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n" +111 "It is included in this repository as 'sib.pdf'.\n" +112 ); +113 put(Help, "index", +114 "Optional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be added to\n" +115 "the 'base' operand to compute the 'effective address' at which to look up memory.\n" +116 " effective address = base + index*scale + displacement (disp8 or disp32)\n" +117 "For complete details, spend some time with the IA-32 software developer's manual,\n" +118 "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n" +119 "It is included in this repository as 'sib.pdf'.\n" +120 ); +121 put(Help, "scale", +122 "Optional 2-bit operand (when 'rm32' is 4 unless 'mod' is 3) that encodes a\n" +123 "power of 2 to be multiplied to the 'index' operand before adding the result to\n" +124 "the 'base' operand to compute the _effective address_ to operate on.\n" +125 " effective address = base + index * scale + displacement (disp8 or disp32)\n" +126 "\n" +127 "When scale is 0, use index unmodified.\n" +128 "When scale is 1, multiply index by 2.\n" +129 "When scale is 2, multiply index by 4.\n" +130 "When scale is 3, multiply index by 8.\n" +131 "\n" +132 "For complete details, spend some time with the IA-32 software developer's manual,\n" +133 "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n" +134 "It is included in this repository as 'sib.pdf'.\n" 135 ); -136 put(Help, "disp32", -137 "32-bit value to be added in many instructions.\n" +136 put(Help, "disp8", +137 "8-bit value to be added in many instructions.\n" 138 ); -139 put(Help, "imm8", -140 "8-bit value for many instructions.\n" -141 ); -142 put(Help, "imm32", -143 "32-bit value for many instructions.\n" -144 ); -145 } -146 -147 //:: transform packing operands into bytes in the right order -148 -149 :(after "Begin Transforms") -150 // Begin Level-2 Transforms -151 Transform.push_back(pack_operands); -152 // End Level-2 Transforms +139 put(Help, "disp16", +140 "16-bit value to be added in many instructions.\n" +141 "Currently not used in any SubX instructions.\n" +142 ); +143 put(Help, "disp32", +144 "32-bit value to be added in many instructions.\n" +145 ); +146 put(Help, "imm8", +147 "8-bit value for many instructions.\n" +148 ); +149 put(Help, "imm32", +150 "32-bit value for many instructions.\n" +151 ); +152 } 153 -154 :(code) -155 void pack_operands(program& p) { -156 if (p.segments.empty()) return; -157 segment& code = p.segments.at(0); -158 // Pack Operands(segment code) -159 trace(3, "transform") << "-- pack operands" << end(); -160 for (int i = 0; i < SIZE(code.lines); ++i) { -161 line& inst = code.lines.at(i); -162 if (all_hex_bytes(inst)) continue; -163 trace(99, "transform") << "packing instruction '" << to_string(/*with metadata*/inst) << "'" << end(); -164 pack_operands(inst); -165 trace(99, "transform") << "instruction after packing: '" << to_string(/*without metadata*/inst.words) << "'" << end(); -166 } -167 } -168 -169 void pack_operands(line& inst) { -170 line new_inst; -171 add_opcodes(inst, new_inst); -172 add_modrm_byte(inst, new_inst); -173 add_sib_byte(inst, new_inst); -174 add_disp_bytes(inst, new_inst); -175 add_imm_bytes(inst, new_inst); -176 inst.words.swap(new_inst.words); -177 } -178 -179 void add_opcodes(const line& in, line& out) { -180 out.words.push_back(in.words.at(0)); -181 if (in.words.at(0).data == "0f" || in.words.at(0).data == "f2" || in.words.at(0).data == "f3") -182 out.words.push_back(in.words.at(1)); -183 if (in.words.at(0).data == "f3" && in.words.at(1).data == "0f") -184 out.words.push_back(in.words.at(2)); -185 if (in.words.at(0).data == "f2" && in.words.at(1).data == "0f") -186 out.words.push_back(in.words.at(2)); -187 } -188 -189 void add_modrm_byte(const line& in, line& out) { -190 uint8_t mod=0, reg_subop=0, rm32=0; -191 bool emit = false; -192 for (int i = 0; i < SIZE(in.words); ++i) { -193 const word& curr = in.words.at(i); -194 if (has_operand_metadata(curr, "mod")) { -195 mod = hex_byte(curr.data); -196 emit = true; -197 } -198 else if (has_operand_metadata(curr, "rm32")) { -199 rm32 = hex_byte(curr.data); -200 emit = true; -201 } -202 else if (has_operand_metadata(curr, "r32")) { -203 reg_subop = hex_byte(curr.data); -204 emit = true; -205 } -206 else if (has_operand_metadata(curr, "subop")) { -207 reg_subop = hex_byte(curr.data); -208 emit = true; -209 } -210 } -211 if (emit) -212 out.words.push_back(hex_byte_text((mod << 6) | (reg_subop << 3) | rm32)); -213 } -214 -215 void add_sib_byte(const line& in, line& out) { -216 uint8_t scale=0, index=0, base=0; -217 bool emit = false; -218 for (int i = 0; i < SIZE(in.words); ++i) { -219 const word& curr = in.words.at(i); -220 if (has_operand_metadata(curr, "scale")) { -221 scale = hex_byte(curr.data); -222 emit = true; -223 } -224 else if (has_operand_metadata(curr, "index")) { -225 index = hex_byte(curr.data); -226 emit = true; -227 } -228 else if (has_operand_metadata(curr, "base")) { -229 base = hex_byte(curr.data); -230 emit = true; -231 } -232 } -233 if (emit) -234 out.words.push_back(hex_byte_text((scale << 6) | (index << 3) | base)); -235 } -236 -237 void add_disp_bytes(const line& in, line& out) { -238 for (int i = 0; i < SIZE(in.words); ++i) { -239 const word& curr = in.words.at(i); -240 if (has_operand_metadata(curr, "disp8")) -241 emit_hex_bytes(out, curr, 1); -242 if (has_operand_metadata(curr, "disp16")) -243 emit_hex_bytes(out, curr, 2); -244 else if (has_operand_metadata(curr, "disp32")) -245 emit_hex_bytes(out, curr, 4); -246 } -247 } -248 -249 void add_imm_bytes(const line& in, line& out) { -250 for (int i = 0; i < SIZE(in.words); ++i) { -251 const word& curr = in.words.at(i); -252 if (has_operand_metadata(curr, "imm8")) -253 emit_hex_bytes(out, curr, 1); -254 else if (has_operand_metadata(curr, "imm32")) -255 emit_hex_bytes(out, curr, 4); -256 } -257 } -258 -259 void emit_hex_bytes(line& out, const word& w, int num) { -260 assert(num <= 4); -261 bool is_number = looks_like_hex_int(w.data); -262 if (num == 1 || !is_number) { -263 out.words.push_back(w); // preserve existing metadata -264 if (is_number) -265 out.words.back().data = hex_byte_to_string(parse_int(w.data)); -266 return; -267 } -268 emit_hex_bytes(out, static_cast<uint32_t>(parse_int(w.data)), num); -269 } -270 -271 void emit_hex_bytes(line& out, uint32_t val, int num) { -272 assert(num <= 4); -273 for (int i = 0; i < num; ++i) { -274 out.words.push_back(hex_byte_text(val & 0xff)); -275 val = val >> 8; -276 } -277 } -278 -279 word hex_byte_text(uint8_t val) { -280 word result; -281 result.data = hex_byte_to_string(val); -282 result.original = result.data+"/auto"; -283 return result; +154 //:: transform packing operands into bytes in the right order +155 +156 :(after "Begin Transforms") +157 // Begin Level-2 Transforms +158 Transform.push_back(pack_operands); +159 // End Level-2 Transforms +160 +161 :(code) +162 void pack_operands(program& p) { +163 if (p.segments.empty()) return; +164 segment& code = p.segments.at(0); +165 // Pack Operands(segment code) +166 trace(3, "transform") << "-- pack operands" << end(); +167 for (int i = 0; i < SIZE(code.lines); ++i) { +168 line& inst = code.lines.at(i); +169 if (all_hex_bytes(inst)) continue; +170 trace(99, "transform") << "packing instruction '" << to_string(/*with metadata*/inst) << "'" << end(); +171 pack_operands(inst); +172 trace(99, "transform") << "instruction after packing: '" << to_string(/*without metadata*/inst.words) << "'" << end(); +173 } +174 } +175 +176 void pack_operands(line& inst) { +177 line new_inst; +178 add_opcodes(inst, new_inst); +179 add_modrm_byte(inst, new_inst); +180 add_sib_byte(inst, new_inst); +181 add_disp_bytes(inst, new_inst); +182 add_imm_bytes(inst, new_inst); +183 inst.words.swap(new_inst.words); +184 } +185 +186 void add_opcodes(const line& in, line& out) { +187 out.words.push_back(in.words.at(0)); +188 if (in.words.at(0).data == "0f" || in.words.at(0).data == "f2" || in.words.at(0).data == "f3") +189 out.words.push_back(in.words.at(1)); +190 if (in.words.at(0).data == "f3" && in.words.at(1).data == "0f") +191 out.words.push_back(in.words.at(2)); +192 if (in.words.at(0).data == "f2" && in.words.at(1).data == "0f") +193 out.words.push_back(in.words.at(2)); +194 } +195 +196 void add_modrm_byte(const line& in, line& out) { +197 uint8_t mod=0, reg_subop=0, rm32=0; +198 bool emit = false; +199 for (int i = 0; i < SIZE(in.words); ++i) { +200 const word& curr = in.words.at(i); +201 if (has_operand_metadata(curr, "mod")) { +202 mod = hex_byte(curr.data); +203 emit = true; +204 } +205 else if (has_operand_metadata(curr, "rm32")) { +206 rm32 = hex_byte(curr.data); +207 emit = true; +208 } +209 else if (has_operand_metadata(curr, "r32")) { +210 reg_subop = hex_byte(curr.data); +211 emit = true; +212 } +213 else if (has_operand_metadata(curr, "subop")) { +214 reg_subop = hex_byte(curr.data); +215 emit = true; +216 } +217 } +218 if (emit) +219 out.words.push_back(hex_byte_text((mod << 6) | (reg_subop << 3) | rm32)); +220 } +221 +222 void add_sib_byte(const line& in, line& out) { +223 uint8_t scale=0, index=0, base=0; +224 bool emit = false; +225 for (int i = 0; i < SIZE(in.words); ++i) { +226 const word& curr = in.words.at(i); +227 if (has_operand_metadata(curr, "scale")) { +228 scale = hex_byte(curr.data); +229 emit = true; +230 } +231 else if (has_operand_metadata(curr, "index")) { +232 index = hex_byte(curr.data); +233 emit = true; +234 } +235 else if (has_operand_metadata(curr, "base")) { +236 base = hex_byte(curr.data); +237 emit = true; +238 } +239 } +240 if (emit) +241 out.words.push_back(hex_byte_text((scale << 6) | (index << 3) | base)); +242 } +243 +244 void add_disp_bytes(const line& in, line& out) { +245 for (int i = 0; i < SIZE(in.words); ++i) { +246 const word& curr = in.words.at(i); +247 if (has_operand_metadata(curr, "disp8")) +248 emit_hex_bytes(out, curr, 1); +249 if (has_operand_metadata(curr, "disp16")) +250 emit_hex_bytes(out, curr, 2); +251 else if (has_operand_metadata(curr, "disp32")) +252 emit_hex_bytes(out, curr, 4); +253 } +254 } +255 +256 void add_imm_bytes(const line& in, line& out) { +257 for (int i = 0; i < SIZE(in.words); ++i) { +258 const word& curr = in.words.at(i); +259 if (has_operand_metadata(curr, "imm8")) +260 emit_hex_bytes(out, curr, 1); +261 else if (has_operand_metadata(curr, "imm32")) +262 emit_hex_bytes(out, curr, 4); +263 } +264 } +265 +266 void emit_hex_bytes(line& out, const word& w, int num) { +267 assert(num <= 4); +268 bool is_number = looks_like_hex_int(w.data); +269 if (num == 1 || !is_number) { +270 out.words.push_back(w); // preserve existing metadata +271 if (is_number) +272 out.words.back().data = hex_byte_to_string(parse_int(w.data)); +273 return; +274 } +275 emit_hex_bytes(out, static_cast<uint32_t>(parse_int(w.data)), num); +276 } +277 +278 void emit_hex_bytes(line& out, uint32_t val, int num) { +279 assert(num <= 4); +280 for (int i = 0; i < num; ++i) { +281 out.words.push_back(hex_byte_text(val & 0xff)); +282 val = val >> 8; +283 } 284 } 285 -286 string hex_byte_to_string(uint8_t val) { -287 ostringstream out; -288 // uint8_t prints without padding, but int8_t will expand to 32 bits again -289 out << HEXBYTE << NUM(val); -290 return out.str(); +286 word hex_byte_text(uint8_t val) { +287 word result; +288 result.data = hex_byte_to_string(val); +289 result.original = result.data+"/auto"; +290 return result; 291 } 292 -293 string to_string(const vector<word>& in) { +293 string hex_byte_to_string(uint8_t val) { 294 ostringstream out; -295 for (int i = 0; i < SIZE(in); ++i) { -296 if (i > 0) out << ' '; -297 out << in.at(i).data; -298 } -299 return out.str(); -300 } -301 -302 :(before "End Unit Tests") -303 void test_preserve_metadata_when_emitting_single_byte() { -304 word in; -305 in.data = "f0"; -306 in.original = "f0/foo"; -307 line out; -308 emit_hex_bytes(out, in, 1); -309 CHECK_EQ(out.words.at(0).data, "f0"); -310 CHECK_EQ(out.words.at(0).original, "f0/foo"); -311 } -312 -313 :(scenario pack_disp8) -314 == 0x1 -315 74 2/disp8 # jump 2 bytes away if ZF is set -316 +transform: packing instruction '74 2/disp8' -317 +transform: instruction after packing: '74 02' -318 -319 :(scenarios transform) -320 :(scenario pack_disp8_negative) -321 == 0x1 -322 # running this will cause an infinite loop -323 74 -1/disp8 # jump 1 byte before if ZF is set -324 +transform: packing instruction '74 -1/disp8' -325 +transform: instruction after packing: '74 ff' -326 :(scenarios run) -327 -328 //: helper for scenario -329 :(code) -330 void transform(const string& text_bytes) { -331 program p; -332 istringstream in(text_bytes); -333 parse(in, p); -334 if (trace_contains_errors()) return; -335 transform(p); -336 } -337 -338 :(scenario pack_modrm_imm32) -339 == 0x1 -340 # instruction effective address operand displacement immediate -341 # op subop mod rm32 base index scale r32 -342 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes -343 81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32 # add 1 to EBX -344 +transform: packing instruction '81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32' -345 +transform: instruction after packing: '81 c3 01 00 00 00' -346 -347 :(scenario pack_imm32_large) -348 == 0x1 -349 b9 0x080490a7/imm32 -350 +transform: packing instruction 'b9 0x080490a7/imm32' -351 +transform: instruction after packing: 'b9 a7 90 04 08' +295 // uint8_t prints without padding, but int8_t will expand to 32 bits again +296 out << HEXBYTE << NUM(val); +297 return out.str(); +298 } +299 +300 string to_string(const vector<word>& in) { +301 ostringstream out; +302 for (int i = 0; i < SIZE(in); ++i) { +303 if (i > 0) out << ' '; +304 out << in.at(i).data; +305 } +306 return out.str(); +307 } +308 +309 :(before "End Unit Tests") +310 void test_preserve_metadata_when_emitting_single_byte() { +311 word in; +312 in.data = "f0"; +313 in.original = "f0/foo"; +314 line out; +315 emit_hex_bytes(out, in, 1); +316 CHECK_EQ(out.words.at(0).data, "f0"); +317 CHECK_EQ(out.words.at(0).original, "f0/foo"); +318 } +319 +320 :(code) +321 void test_pack_disp8() { +322 run( +323 "== 0x1\n" // code segment +324 "74 2/disp8\n" // jump 2 bytes away if ZF is set +325 ); +326 CHECK_TRACE_CONTENTS( +327 "transform: packing instruction '74 2/disp8'\n" +328 "transform: instruction after packing: '74 02'\n" +329 ); +330 } +331 +332 void test_pack_disp8_negative() { +333 transform( +334 "== 0x1\n" // code segment +335 // running this will cause an infinite loop +336 "74 -1/disp8\n" // jump 1 byte before if ZF is set +337 ); +338 CHECK_TRACE_CONTENTS( +339 "transform: packing instruction '74 -1/disp8'\n" +340 "transform: instruction after packing: '74 ff'\n" +341 ); +342 } +343 +344 //: helper for scenario +345 void transform(const string& text_bytes) { +346 program p; +347 istringstream in(text_bytes); +348 parse(in, p); +349 if (trace_contains_errors()) return; +350 transform(p); +351 } 352 -353 :(scenario pack_immediate_constants_hex) -354 == 0x1 -355 b9 0x2a/imm32 -356 +transform: packing instruction 'b9 0x2a/imm32' -357 +transform: instruction after packing: 'b9 2a 00 00 00' -358 +run: copy imm32 0x0000002a to ECX -359 -360 :(scenarios transform) -361 :(scenario pack_silently_ignores_non_hex) -362 % Hide_errors = true; -363 == 0x1 -364 b9 foo/imm32 -365 +transform: packing instruction 'b9 foo/imm32' -366 # no change (we're just not printing metadata to the trace) -367 +transform: instruction after packing: 'b9 foo' -368 :(scenarios run) -369 -370 :(scenario pack_flags_bad_hex) -371 % Hide_errors = true; -372 == 0x1 -373 b9 0xfoo/imm32 -374 +error: not a number: 0xfoo -375 -376 //:: helpers +353 void test_pack_modrm_imm32() { +354 run( +355 "== 0x1\n" // code segment +356 // instruction effective address operand displacement immediate\n" +357 // op subop mod rm32 base index scale r32\n" +358 // 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes\n" +359 " 81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32 \n" // add 1 to EBX +360 ); +361 CHECK_TRACE_CONTENTS( +362 "transform: packing instruction '81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32'\n" +363 "transform: instruction after packing: '81 c3 01 00 00 00'\n" +364 ); +365 } +366 +367 void test_pack_imm32_large() { +368 run( +369 "== 0x1\n" // code segment +370 "b9 0x080490a7/imm32\n" +371 ); +372 CHECK_TRACE_CONTENTS( +373 "transform: packing instruction 'b9 0x080490a7/imm32'\n" +374 "transform: instruction after packing: 'b9 a7 90 04 08'\n" +375 ); +376 } 377 -378 :(code) -379 bool all_hex_bytes(const line& inst) { -380 for (int i = 0; i < SIZE(inst.words); ++i) -381 if (!is_hex_byte(inst.words.at(i))) -382 return false; -383 return true; -384 } -385 -386 bool is_hex_byte(const word& curr) { -387 if (contains_any_operand_metadata(curr)) -388 return false; -389 if (SIZE(curr.data) != 2) -390 return false; -391 if (curr.data.find_first_not_of("0123456789abcdefABCDEF") != string::npos) -392 return false; -393 return true; -394 } -395 -396 bool contains_any_operand_metadata(const word& word) { -397 for (int i = 0; i < SIZE(word.metadata); ++i) -398 if (Instruction_operands.find(word.metadata.at(i)) != Instruction_operands.end()) -399 return true; -400 return false; +378 void test_pack_immediate_constants_hex() { +379 run( +380 "== 0x1\n" // code segment +381 "b9 0x2a/imm32\n" +382 ); +383 CHECK_TRACE_CONTENTS( +384 "transform: packing instruction 'b9 0x2a/imm32'\n" +385 "transform: instruction after packing: 'b9 2a 00 00 00'\n" +386 "run: copy imm32 0x0000002a to ECX\n" +387 ); +388 } +389 +390 void test_pack_silently_ignores_non_hex() { +391 Hide_errors = true; +392 transform( +393 "== 0x1\n" // code segment +394 "b9 foo/imm32\n" +395 ); +396 CHECK_TRACE_CONTENTS( +397 "transform: packing instruction 'b9 foo/imm32'\n" +398 // no change (we're just not printing metadata to the trace) +399 "transform: instruction after packing: 'b9 foo'\n" +400 ); 401 } 402 -403 bool has_operand_metadata(const line& inst, const string& m) { -404 bool result = false; -405 for (int i = 0; i < SIZE(inst.words); ++i) { -406 if (!has_operand_metadata(inst.words.at(i), m)) continue; -407 if (result) { -408 raise << "'" << to_string(inst) << "' has conflicting " << m << " operands\n" << end(); -409 return false; -410 } -411 result = true; -412 } -413 return result; -414 } +403 void test_pack_flags_bad_hex() { +404 Hide_errors = true; +405 run( +406 "== 0x1\n" // code segment +407 "b9 0xfoo/imm32\n" +408 ); +409 CHECK_TRACE_CONTENTS( +410 "error: not a number: 0xfoo\n" +411 ); +412 } +413 +414 //:: helpers 415 -416 bool has_operand_metadata(const word& w, const string& m) { -417 bool result = false; -418 bool metadata_found = false; -419 for (int i = 0; i < SIZE(w.metadata); ++i) { -420 const string& curr = w.metadata.at(i); -421 if (Instruction_operands.find(curr) == Instruction_operands.end()) continue; // ignore unrecognized metadata -422 if (metadata_found) { -423 raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end(); -424 return false; -425 } -426 metadata_found = true; -427 result = (curr == m); -428 } -429 return result; -430 } -431 -432 word metadata(const line& inst, const string& m) { -433 for (int i = 0; i < SIZE(inst.words); ++i) -434 if (has_operand_metadata(inst.words.at(i), m)) -435 return inst.words.at(i); -436 assert(false); -437 } -438 -439 bool looks_like_hex_int(const string& s) { -440 if (s.empty()) return false; -441 if (s.at(0) == '-' || s.at(0) == '+') return true; -442 if (isdigit(s.at(0))) return true; // includes '0x' prefix -443 // End looks_like_hex_int(s) Detectors -444 return false; -445 } -446 -447 :(code) -448 string to_string(const line& inst) { -449 ostringstream out; -450 for (int i = 0; i < SIZE(inst.words); ++i) { -451 if (i > 0) out << ' '; -452 out << inst.words.at(i).original; -453 } -454 return out.str(); -455 } +416 bool all_hex_bytes(const line& inst) { +417 for (int i = 0; i < SIZE(inst.words); ++i) +418 if (!is_hex_byte(inst.words.at(i))) +419 return false; +420 return true; +421 } +422 +423 bool is_hex_byte(const word& curr) { +424 if (contains_any_operand_metadata(curr)) +425 return false; +426 if (SIZE(curr.data) != 2) +427 return false; +428 if (curr.data.find_first_not_of("0123456789abcdefABCDEF") != string::npos) +429 return false; +430 return true; +431 } +432 +433 bool contains_any_operand_metadata(const word& word) { +434 for (int i = 0; i < SIZE(word.metadata); ++i) +435 if (Instruction_operands.find(word.metadata.at(i)) != Instruction_operands.end()) +436 return true; +437 return false; +438 } +439 +440 bool has_operand_metadata(const line& inst, const string& m) { +441 bool result = false; +442 for (int i = 0; i < SIZE(inst.words); ++i) { +443 if (!has_operand_metadata(inst.words.at(i), m)) continue; +444 if (result) { +445 raise << "'" << to_string(inst) << "' has conflicting " << m << " operands\n" << end(); +446 return false; +447 } +448 result = true; +449 } +450 return result; +451 } +452 +453 bool has_operand_metadata(const word& w, const string& m) { +454 bool result = false; +455 bool metadata_found = false; +456 for (int i = 0; i < SIZE(w.metadata); ++i) { +457 const string& curr = w.metadata.at(i); +458 if (Instruction_operands.find(curr) == Instruction_operands.end()) continue; // ignore unrecognized metadata +459 if (metadata_found) { +460 raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end(); +461 return false; +462 } +463 metadata_found = true; +464 result = (curr == m); +465 } +466 return result; +467 } +468 +469 word metadata(const line& inst, const string& m) { +470 for (int i = 0; i < SIZE(inst.words); ++i) +471 if (has_operand_metadata(inst.words.at(i), m)) +472 return inst.words.at(i); +473 assert(false); +474 } +475 +476 bool looks_like_hex_int(const string& s) { +477 if (s.empty()) return false; +478 if (s.at(0) == '-' || s.at(0) == '+') return true; +479 if (isdigit(s.at(0))) return true; // includes '0x' prefix +480 // End looks_like_hex_int(s) Detectors +481 return false; +482 } +483 +484 string to_string(const line& inst) { +485 ostringstream out; +486 for (int i = 0; i < SIZE(inst.words); ++i) { +487 if (i > 0) out << ' '; +488 out << inst.words.at(i).original; +489 } +490 return out.str(); +491 } -- cgit 1.4.1-2-gfad0