From 608a7fa8d0faf9a3e3d182d9eabe969804443aab Mon Sep 17 00:00:00 2001 From: Kartik Agaram Date: Fri, 7 Sep 2018 15:08:54 -0700 Subject: 4536 --- html/subx/030---operands.cc.html | 462 ++++++++++++++++++--------------------- 1 file changed, 216 insertions(+), 246 deletions(-) (limited to 'html/subx/030---operands.cc.html') diff --git a/html/subx/030---operands.cc.html b/html/subx/030---operands.cc.html index ea38d64b..207b0e05 100644 --- a/html/subx/030---operands.cc.html +++ b/html/subx/030---operands.cc.html @@ -3,7 +3,7 @@ Mu - subx/030---operands.cc - + @@ -15,15 +15,15 @@ body { font-size: 12pt; font-family: monospace; color: #aaaaaa; background-color a { color:#eeeeee; text-decoration: none; } a:hover { text-decoration: underline; } * { font-size: 12pt; font-size: 1em; } +.traceContains { color: #008000; } +.Identifier { color: #c0a020; } +.LineNr { color: #444444; } .Constant { color: #00a0a0; } +.Delimiter { color: #800080; } .SalientComment { color: #00ffff; } +.Normal { color: #aaaaaa; background-color: #080808; padding-bottom: 1px; } .Comment { color: #9090ff; } .Comment a { color:#0000ee; text-decoration:underline; } -.Delimiter { color: #800080; } -.LineNr { color: #444444; } -.Identifier { color: #c0a020; } -.Normal { color: #aaaaaa; background-color: #080808; padding-bottom: 1px; } -.traceContains { color: #008000; } .cSpecial { color: #008000; } --> @@ -80,7 +80,7 @@ if ('onhashchange' in window) { 19 "Each operand has a type. An instruction won't have more than one operand of\n" 20 "any type.\n" 21 "Each instruction has some set of allowed operand types. It'll reject others.\n" - 22 "The complete list of operand types: mod, subop, r32 (register), rm32\n" + 22 "The complete list of operand types: mod, subop, r32 (register), rm32\n" 23 "(register or memory), scale, index, base, disp8, disp16, disp32, imm8,\n" 24 "imm32.\n" 25 "Each of these has its own help page. Try reading 'subx help mod' next.\n" @@ -96,7 +96,7 @@ if ('onhashchange' in window) { 35 bb 0x2a/imm32 # copy 42 to EBX 36 +transform: packing instruction 'bb 0x2a/imm32' 37 +transform: instruction after packing: 'bb 2a 00 00 00' - 38 +run: copy imm32 0x0000002a to EBX + 38 +run: copy imm32 0x0000002a to EBX 39 40 //: complete set of valid operand types 41 @@ -194,19 +194,19 @@ if ('onhashchange' in window) { 133 134 //:: transform packing operands into bytes in the right order 135 -136 :(before "End Transforms") +136 :(after "Begin Transforms") 137 // Begin Level-2 Transforms -138 Transform.push_back(pack_operands); +138 Transform.push_back(pack_operands); 139 // End Level-2 Transforms 140 141 :(code) 142 void pack_operands(program& p) { 143 if (p.segments.empty()) return; -144 segment& code = p.segments.at(0); +144 segment& code = p.segments.at(0); 145 // Pack Operands(segment code) 146 trace(99, "transform") << "-- pack operands" << end(); -147 for (int i = 0; i < SIZE(code.lines); ++i) { -148 line& inst = code.lines.at(i); +147 for (int i = 0; i < SIZE(code.lines); ++i) { +148 line& inst = code.lines.at(i); 149 if (all_hex_bytes(inst)) continue; 150 trace(99, "transform") << "packing instruction '" << to_string(/*with metadata*/inst) << "'" << end(); 151 pack_operands(inst); @@ -215,76 +215,76 @@ if ('onhashchange' in window) { 154 } 155 156 void pack_operands(line& inst) { -157 line new_inst; +157 line new_inst; 158 add_opcodes(inst, new_inst); 159 add_modrm_byte(inst, new_inst); 160 add_sib_byte(inst, new_inst); 161 add_disp_bytes(inst, new_inst); 162 add_imm_bytes(inst, new_inst); -163 inst.words.swap(new_inst.words); +163 inst.words.swap(new_inst.words); 164 } 165 166 void add_opcodes(const line& in, line& out) { -167 out.words.push_back(in.words.at(0)); -168 if (in.words.at(0).data == "0f" || in.words.at(0).data == "f2" || in.words.at(0).data == "f3") -169 out.words.push_back(in.words.at(1)); -170 if (in.words.at(0).data == "f3" && in.words.at(1).data == "0f") -171 out.words.push_back(in.words.at(2)); -172 if (in.words.at(0).data == "f2" && in.words.at(1).data == "0f") -173 out.words.push_back(in.words.at(2)); +167 out.words.push_back(in.words.at(0)); +168 if (in.words.at(0).data == "0f" || in.words.at(0).data == "f2" || in.words.at(0).data == "f3") +169 out.words.push_back(in.words.at(1)); +170 if (in.words.at(0).data == "f3" && in.words.at(1).data == "0f") +171 out.words.push_back(in.words.at(2)); +172 if (in.words.at(0).data == "f2" && in.words.at(1).data == "0f") +173 out.words.push_back(in.words.at(2)); 174 } 175 176 void add_modrm_byte(const line& in, line& out) { 177 uint8_t mod=0, reg_subop=0, rm32=0; 178 bool emit = false; -179 for (int i = 0; i < SIZE(in.words); ++i) { -180 const word& curr = in.words.at(i); +179 for (int i = 0; i < SIZE(in.words); ++i) { +180 const word& curr = in.words.at(i); 181 if (has_metadata(curr, "mod")) { -182 mod = hex_byte(curr.data); +182 mod = hex_byte(curr.data); 183 emit = true; 184 } 185 else if (has_metadata(curr, "rm32")) { -186 rm32 = hex_byte(curr.data); +186 rm32 = hex_byte(curr.data); 187 emit = true; 188 } 189 else if (has_metadata(curr, "r32")) { -190 reg_subop = hex_byte(curr.data); +190 reg_subop = hex_byte(curr.data); 191 emit = true; 192 } 193 else if (has_metadata(curr, "subop")) { -194 reg_subop = hex_byte(curr.data); +194 reg_subop = hex_byte(curr.data); 195 emit = true; 196 } 197 } 198 if (emit) -199 out.words.push_back(hex_byte_text((mod << 6) | (reg_subop << 3) | rm32)); +199 out.words.push_back(hex_byte_text((mod << 6) | (reg_subop << 3) | rm32)); 200 } 201 202 void add_sib_byte(const line& in, line& out) { 203 uint8_t scale=0, index=0, base=0; 204 bool emit = false; -205 for (int i = 0; i < SIZE(in.words); ++i) { -206 const word& curr = in.words.at(i); +205 for (int i = 0; i < SIZE(in.words); ++i) { +206 const word& curr = in.words.at(i); 207 if (has_metadata(curr, "scale")) { -208 scale = hex_byte(curr.data); +208 scale = hex_byte(curr.data); 209 emit = true; 210 } 211 else if (has_metadata(curr, "index")) { -212 index = hex_byte(curr.data); +212 index = hex_byte(curr.data); 213 emit = true; 214 } 215 else if (has_metadata(curr, "base")) { -216 base = hex_byte(curr.data); +216 base = hex_byte(curr.data); 217 emit = true; 218 } 219 } 220 if (emit) -221 out.words.push_back(hex_byte_text((scale << 6) | (index << 3) | base)); +221 out.words.push_back(hex_byte_text((scale << 6) | (index << 3) | base)); 222 } 223 224 void add_disp_bytes(const line& in, line& out) { -225 for (int i = 0; i < SIZE(in.words); ++i) { -226 const word& curr = in.words.at(i); +225 for (int i = 0; i < SIZE(in.words); ++i) { +226 const word& curr = in.words.at(i); 227 if (has_metadata(curr, "disp8")) 228 emit_hex_bytes(out, curr, 1); 229 if (has_metadata(curr, "disp16")) @@ -295,8 +295,8 @@ if ('onhashchange' in window) { 234 } 235 236 void add_imm_bytes(const line& in, line& out) { -237 for (int i = 0; i < SIZE(in.words); ++i) { -238 const word& curr = in.words.at(i); +237 for (int i = 0; i < SIZE(in.words); ++i) { +238 const word& curr = in.words.at(i); 239 if (has_metadata(curr, "imm8")) 240 emit_hex_bytes(out, curr, 1); 241 else if (has_metadata(curr, "imm32")) @@ -306,230 +306,200 @@ if ('onhashchange' in window) { 245 246 void emit_hex_bytes(line& out, const word& w, int num) { 247 assert(num <= 4); -248 if (num == 1 || !is_hex_int(w.data)) { -249 out.words.push_back(w); -250 if (is_hex_int(w.data)) -251 out.words.back().data = hex_byte_to_string(parse_int(w.data)); +248 if (num == 1 || !is_hex_int(w.data)) { +249 out.words.push_back(w); +250 if (is_hex_int(w.data)) +251 out.words.back().data = hex_byte_to_string(parse_int(w.data)); 252 return; 253 } -254 emit_hex_bytes(out, static_cast<uint32_t>(parse_int(w.data)), num); +254 emit_hex_bytes(out, static_cast<uint32_t>(parse_int(w.data)), num); 255 } 256 257 void emit_hex_bytes(line& out, uint32_t val, int num) { 258 assert(num <= 4); 259 for (int i = 0; i < num; ++i) { -260 out.words.push_back(hex_byte_text(val & 0xff)); +260 out.words.push_back(hex_byte_text(val & 0xff)); 261 val = val >> 8; 262 } 263 } 264 -265 word hex_byte_text(uint8_t val) { -266 word result; -267 result.data = hex_byte_to_string(val); +265 word hex_byte_text(uint8_t val) { +266 word result; +267 result.data = hex_byte_to_string(val); 268 result.original = result.data+"/auto"; 269 return result; 270 } 271 272 string hex_byte_to_string(uint8_t val) { 273 ostringstream out; -274 out << HEXBYTE << NUM(val); -275 return out.str(); -276 } -277 -278 string to_string(const vector<word>& in) { -279 ostringstream out; -280 for (int i = 0; i < SIZE(in); ++i) { -281 if (i > 0) out << ' '; -282 out << in.at(i).data; -283 } -284 return out.str(); -285 } -286 -287 :(before "End Unit Tests") -288 void test_preserve_metadata_when_emitting_single_byte() { -289 word in; -290 in.data = "f0"; -291 in.original = "f0/foo"; -292 line out; -293 emit_hex_bytes(out, in, 1); -294 CHECK_EQ(out.words.at(0).data, "f0"); -295 CHECK_EQ(out.words.at(0).original, "f0/foo"); -296 } -297 -298 :(scenario pack_disp8) -299 == 0x1 -300 74 2/disp8 # jump 2 bytes away if ZF is set -301 +transform: packing instruction '74 2/disp8' -302 +transform: instruction after packing: '74 02' -303 -304 :(scenarios transform) -305 :(scenario pack_disp8_negative) -306 == 0x1 -307 # running this will cause an infinite loop -308 74 -1/disp8 # jump 1 byte before if ZF is set -309 +transform: packing instruction '74 -1/disp8' -310 +transform: instruction after packing: '74 ff' -311 :(scenarios run) -312 -313 //: helper for scenario -314 :(code) -315 void transform(const string& text_bytes) { -316 program p; -317 istringstream in(text_bytes); -318 parse(in, p); -319 if (trace_contains_errors()) return; -320 transform(p); -321 } -322 -323 :(scenario pack_modrm_imm32) -324 == 0x1 -325 # instruction effective address operand displacement immediate -326 # op subop mod rm32 base index scale r32 -327 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes -328 81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32 # add 1 to EBX -329 +transform: packing instruction '81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32' -330 +transform: instruction after packing: '81 c3 01 00 00 00' -331 -332 :(scenario pack_imm32_large) -333 == 0x1 -334 b9 0x080490a7/imm32 # copy to ECX -335 +transform: packing instruction 'b9 0x080490a7/imm32' -336 +transform: instruction after packing: 'b9 a7 90 04 08' -337 -338 :(scenario pack_immediate_constants_hex) -339 == 0x1 -340 # instruction effective address operand displacement immediate -341 # op subop mod rm32 base index scale r32 -342 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes -343 bb 0x2a/imm32 # copy 42 to EBX -344 +transform: packing instruction 'bb 0x2a/imm32' -345 +transform: instruction after packing: 'bb 2a 00 00 00' -346 +run: copy imm32 0x0000002a to EBX -347 -348 :(scenarios transform) -349 :(scenario pack_silently_ignores_non_hex) -350 == 0x1 -351 # instruction effective address operand displacement immediate -352 # op subop mod rm32 base index scale r32 -353 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes -354 bb foo/imm32 # copy foo to EBX -355 +transform: packing instruction 'bb foo/imm32' -356 # no change (we're just not printing metadata to the trace) -357 +transform: instruction after packing: 'bb foo' -358 $error: 0 -359 :(scenarios run) -360 -361 //:: helpers -362 -363 :(code) -364 bool all_hex_bytes(const line& inst) { -365 for (int i = 0; i < SIZE(inst.words); ++i) -366 if (!is_hex_byte(inst.words.at(i))) -367 return false; -368 return true; -369 } -370 -371 bool is_hex_byte(const word& curr) { -372 if (contains_any_operand_metadata(curr)) -373 return false; -374 if (SIZE(curr.data) != 2) -375 return false; -376 if (curr.data.find_first_not_of("0123456789abcdefABCDEF") != string::npos) -377 return false; -378 return true; -379 } -380 -381 bool contains_any_operand_metadata(const word& word) { -382 for (int i = 0; i < SIZE(word.metadata); ++i) -383 if (Instruction_operands.find(word.metadata.at(i)) != Instruction_operands.end()) -384 return true; -385 return false; -386 } -387 -388 bool has_metadata(const line& inst, const string& m) { -389 bool result = false; -390 for (int i = 0; i < SIZE(inst.words); ++i) { -391 if (!has_metadata(inst.words.at(i), m)) continue; -392 if (result) { -393 raise << "'" << to_string(inst) << "' has conflicting " << m << " operands\n" << end(); -394 return false; -395 } -396 result = true; -397 } -398 return result; -399 } -400 -401 bool has_metadata(const word& w, const string& m) { -402 bool result = false; -403 bool metadata_found = false; -404 for (int i = 0; i < SIZE(w.metadata); ++i) { -405 const string& curr = w.metadata.at(i); -406 if (Instruction_operands.find(curr) == Instruction_operands.end()) continue; // ignore unrecognized metadata -407 if (metadata_found) { -408 raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end(); -409 return false; -410 } -411 metadata_found = true; -412 result = (curr == m); -413 } -414 return result; -415 } -416 -417 word metadata(const line& inst, const string& m) { -418 for (int i = 0; i < SIZE(inst.words); ++i) -419 if (has_metadata(inst.words.at(i), m)) -420 return inst.words.at(i); -421 assert(false); -422 } -423 -424 bool is_hex_int(const string& s) { -425 if (s.empty()) return false; -426 size_t pos = 0; -427 if (s.at(0) == '-' || s.at(0) == '+') pos++; -428 if (s.substr(pos, pos+2) == "0x") pos += 2; -429 return s.find_first_not_of("0123456789abcdefABCDEF", pos) == string::npos; -430 } -431 -432 int32_t parse_int(const string& s) { -433 if (s.empty()) return 0; -434 istringstream in(s); -435 in >> std::hex; -436 if (s.at(0) == '-') { -437 int32_t result = 0; -438 in >> result; -439 if (!in || !in.eof()) { -440 raise << "not a number: " << s << '\n' << end(); -441 return 0; -442 } -443 return result; -444 } -445 uint32_t uresult = 0; -446 in >> uresult; -447 if (!in || !in.eof()) { -448 raise << "not a number: " << s << '\n' << end(); -449 return 0; -450 } -451 return static_cast<int32_t>(uresult); -452 } -453 :(before "End Unit Tests") -454 void test_parse_int() { -455 CHECK_EQ(0, parse_int("0")); -456 CHECK_EQ(0, parse_int("0x0")); -457 CHECK_EQ(0, parse_int("0x0")); -458 CHECK_EQ(16, parse_int("10")); // hex always -459 CHECK_EQ(-1, parse_int("-1")); -460 CHECK_EQ(-1, parse_int("0xffffffff")); -461 } -462 -463 :(code) -464 string to_string(const line& inst) { -465 ostringstream out; -466 for (int i = 0; i < SIZE(inst.words); ++i) { -467 if (i > 0) out << ' '; -468 out << inst.words.at(i).original; -469 } -470 return out.str(); -471 } +274 // uint8_t prints without padding, but int8_t will expand to 32 bits again +275 out << HEXBYTE << NUM(val); +276 return out.str(); +277 } +278 +279 string to_string(const vector<word>& in) { +280 ostringstream out; +281 for (int i = 0; i < SIZE(in); ++i) { +282 if (i > 0) out << ' '; +283 out << in.at(i).data; +284 } +285 return out.str(); +286 } +287 +288 :(before "End Unit Tests") +289 void test_preserve_metadata_when_emitting_single_byte() { +290 word in; +291 in.data = "f0"; +292 in.original = "f0/foo"; +293 line out; +294 emit_hex_bytes(out, in, 1); +295 CHECK_EQ(out.words.at(0).data, "f0"); +296 CHECK_EQ(out.words.at(0).original, "f0/foo"); +297 } +298 +299 :(scenario pack_disp8) +300 == 0x1 +301 74 2/disp8 # jump 2 bytes away if ZF is set +302 +transform: packing instruction '74 2/disp8' +303 +transform: instruction after packing: '74 02' +304 +305 :(scenarios transform) +306 :(scenario pack_disp8_negative) +307 == 0x1 +308 # running this will cause an infinite loop +309 74 -1/disp8 # jump 1 byte before if ZF is set +310 +transform: packing instruction '74 -1/disp8' +311 +transform: instruction after packing: '74 ff' +312 :(scenarios run) +313 +314 //: helper for scenario +315 :(code) +316 void transform(const string& text_bytes) { +317 program p; +318 istringstream in(text_bytes); +319 parse(in, p); +320 if (trace_contains_errors()) return; +321 transform(p); +322 } +323 +324 :(scenario pack_modrm_imm32) +325 == 0x1 +326 # instruction effective address operand displacement immediate +327 # op subop mod rm32 base index scale r32 +328 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes +329 81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32 # add 1 to EBX +330 +transform: packing instruction '81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32' +331 +transform: instruction after packing: '81 c3 01 00 00 00' +332 +333 :(scenario pack_imm32_large) +334 == 0x1 +335 b9 0x080490a7/imm32 # copy to ECX +336 +transform: packing instruction 'b9 0x080490a7/imm32' +337 +transform: instruction after packing: 'b9 a7 90 04 08' +338 +339 :(scenario pack_immediate_constants_hex) +340 == 0x1 +341 # instruction effective address operand displacement immediate +342 # op subop mod rm32 base index scale r32 +343 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes +344 bb 0x2a/imm32 # copy 42 to EBX +345 +transform: packing instruction 'bb 0x2a/imm32' +346 +transform: instruction after packing: 'bb 2a 00 00 00' +347 +run: copy imm32 0x0000002a to EBX +348 +349 :(scenarios transform) +350 :(scenario pack_silently_ignores_non_hex) +351 == 0x1 +352 # instruction effective address operand displacement immediate +353 # op subop mod rm32 base index scale r32 +354 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes +355 bb foo/imm32 # copy foo to EBX +356 +transform: packing instruction 'bb foo/imm32' +357 # no change (we're just not printing metadata to the trace) +358 +transform: instruction after packing: 'bb foo' +359 $error: 0 +360 :(scenarios run) +361 +362 //:: helpers +363 +364 :(code) +365 bool all_hex_bytes(const line& inst) { +366 for (int i = 0; i < SIZE(inst.words); ++i) +367 if (!is_hex_byte(inst.words.at(i))) +368 return false; +369 return true; +370 } +371 +372 bool is_hex_byte(const word& curr) { +373 if (contains_any_operand_metadata(curr)) +374 return false; +375 if (SIZE(curr.data) != 2) +376 return false; +377 if (curr.data.find_first_not_of("0123456789abcdefABCDEF") != string::npos) +378 return false; +379 return true; +380 } +381 +382 bool contains_any_operand_metadata(const word& word) { +383 for (int i = 0; i < SIZE(word.metadata); ++i) +384 if (Instruction_operands.find(word.metadata.at(i)) != Instruction_operands.end()) +385 return true; +386 return false; +387 } +388 +389 bool has_metadata(const line& inst, const string& m) { +390 bool result = false; +391 for (int i = 0; i < SIZE(inst.words); ++i) { +392 if (!has_metadata(inst.words.at(i), m)) continue; +393 if (result) { +394 raise << "'" << to_string(inst) << "' has conflicting " << m << " operands\n" << end(); +395 return false; +396 } +397 result = true; +398 } +399 return result; +400 } +401 +402 bool has_metadata(const word& w, const string& m) { +403 bool result = false; +404 bool metadata_found = false; +405 for (int i = 0; i < SIZE(w.metadata); ++i) { +406 const string& curr = w.metadata.at(i); +407 if (Instruction_operands.find(curr) == Instruction_operands.end()) continue; // ignore unrecognized metadata +408 if (metadata_found) { +409 raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end(); +410 return false; +411 } +412 metadata_found = true; +413 result = (curr == m); +414 } +415 return result; +416 } +417 +418 word metadata(const line& inst, const string& m) { +419 for (int i = 0; i < SIZE(inst.words); ++i) +420 if (has_metadata(inst.words.at(i), m)) +421 return inst.words.at(i); +422 assert(false); +423 } +424 +425 bool is_hex_int(const string& s) { +426 if (s.empty()) return false; +427 size_t pos = 0; +428 if (s.at(0) == '-' || s.at(0) == '+') pos++; +429 if (s.substr(pos, pos+2) == "0x") pos += 2; +430 return s.find_first_not_of("0123456789abcdefABCDEF", pos) == string::npos; +431 } +432 +433 :(code) +434 string to_string(const line& inst) { +435 ostringstream out; +436 for (int i = 0; i < SIZE(inst.words); ++i) { +437 if (i > 0) out << ' '; +438 out << inst.words.at(i).original; +439 } +440 return out.str(); +441 } -- cgit 1.4.1-2-gfad0