From 25ad969f7582d9253f5329812aed5ed5784d8ec8 Mon Sep 17 00:00:00 2001 From: "Kartik K. Agaram" Date: Thu, 12 Oct 2017 23:43:09 -0700 Subject: 4052 --- html/subx/010core.cc.html | 319 ++++++++++++++++++++++++++-------------------- 1 file changed, 179 insertions(+), 140 deletions(-) (limited to 'html/subx/010core.cc.html') diff --git a/html/subx/010core.cc.html b/html/subx/010core.cc.html index 606d4dec..8d7cb3a2 100644 --- a/html/subx/010core.cc.html +++ b/html/subx/010core.cc.html @@ -80,10 +80,10 @@ if ('onhashchange' in window) { 17 uint32_t u; 18 }; 19 :(before "End Globals") - 20 reg R[NUM_INT_REGISTERS] = { {0} }; + 20 reg Reg[NUM_INT_REGISTERS] = { {0} }; 21 uint32_t EIP = 0; 22 :(before "End Reset") - 23 bzero(R, sizeof(R)); + 23 bzero(Reg, sizeof(Reg)); 24 EIP = 0; 25 26 //:: simulated flag registers; just a subset that we care about @@ -103,146 +103,185 @@ if ('onhashchange' in window) { 40 /* arg1 and arg2 must be signed */ \ 41 int64_t tmp = arg1 op arg2; \ 42 arg1 = arg1 op arg2; \ - 43 SF = (arg1 < 0); \ - 44 ZF = (arg1 == 0); \ - 45 OF = (arg1 != tmp); \ - 46 } - 47 - 48 #define BINARY_BITWISE_OP(op, arg1, arg2) { \ - 49 /* arg1 and arg2 must be unsigned */ \ - 50 arg1 = arg1 op arg2; \ - 51 SF = (arg1 >> 31); \ - 52 ZF = (arg1 == 0); \ - 53 OF = false; \ - 54 } - 55 - 56 //:: simulated RAM + 43 trace(2, "run") << "storing 0x" << HEXWORD << arg1 << end(); \ + 44 SF = (arg1 < 0); \ + 45 ZF = (arg1 == 0); \ + 46 OF = (arg1 != tmp); \ + 47 } + 48 + 49 #define BINARY_BITWISE_OP(op, arg1, arg2) { \ + 50 /* arg1 and arg2 must be unsigned */ \ + 51 arg1 = arg1 op arg2; \ + 52 trace(2, "run") << "storing 0x" << HEXWORD << arg1 << end(); \ + 53 SF = (arg1 >> 31); \ + 54 ZF = (arg1 == 0); \ + 55 OF = false; \ + 56 } 57 - 58 :(before "End Globals") - 59 map<uint32_t, uint8_t> Memory; - 60 uint32_t End_of_program = 0; - 61 :(before "End Reset") - 62 Memory.clear(); - 63 End_of_program = 0; - 64 - 65 //:: core interpreter loop - 66 - 67 :(scenario add_imm32_to_eax) - 68 # In scenarios, programs are a series of hex bytes, each (variable-length) - 69 # instruction on one line. - 70 # - 71 # x86 instructions consist of the following parts (see cheatsheet.pdf): - 72 # opcode ModRM SIB displacement immediate - 73 # instruction mod, reg, R/M bits scale, index, base - 74 # 1-3 bytes 0/1 byte 0/1 byte 0/1/2/4 bytes 0/1/2/4 bytes - 75 ¦ 0x05 0a 0b 0c 0d # add 0x0d0c0b0a to EAX - 76 +load: 1 -> 05 - 77 +load: 2 -> 0a - 78 +load: 3 -> 0b - 79 +load: 4 -> 0c - 80 +load: 5 -> 0d - 81 +run: add imm32 0x0d0c0b0a to reg EAX - 82 +reg: storing 0x0d0c0b0a in reg EAX - 83 - 84 :(code) - 85 // helper for tests: load a program into memory from a textual representation - 86 // of its bytes, and run it - 87 void run(const string& text_bytes) { - 88 load_program(text_bytes); - 89 EIP = 1; // preserve null pointer - 90 while (EIP < End_of_program) - 91 ¦ run_one_instruction(); - 92 } - 93 - 94 void load_program(const string& text_bytes) { - 95 uint32_t addr = 1; - 96 // we'll use C's 'strtol` to parse ASCII hex bytes - 97 // strtol needs a char*, so we grab the buffer backing the string object - 98 char* curr = const_cast<char*>(&text_bytes[0]); // non-portable, but blessed by Herb Sutter (http://herbsutter.com/2008/04/07/cringe-not-vectors-are-guaranteed-to-be-contiguous/#comment-483) - 99 char* max = curr + strlen(curr); -100 while (curr < max) { -101 ¦ // skip whitespace -102 ¦ while (*curr == ' ' || *curr == '\n') ++curr; -103 ¦ // skip comments -104 ¦ if (*curr == '#') { -105 ¦ ¦ while (*curr != '\n') { -106 ¦ ¦ ¦ ++curr; -107 ¦ ¦ ¦ if (curr >= max) break; -108 ¦ ¦ } -109 ¦ ¦ ++curr; -110 ¦ ¦ continue; -111 ¦ } -112 ¦ put(Memory, addr, strtol(curr, &curr, /*hex*/16)); -113 ¦ trace(99, "load") << addr << " -> " << HEXBYTE << static_cast<unsigned int>(get_or_insert(Memory, addr)) << end(); // ugly that iostream doesn't print uint8_t as an integer -114 ¦ addr++; -115 } -116 End_of_program = addr; -117 } -118 -119 // skeleton of how x86 instructions are decoded -120 void run_one_instruction() { -121 uint8_t op=0, op2=0, op3=0; -122 switch(op = next()) { -123 // our first opcode -124 case 0xf4: // hlt -125 ¦ EIP = End_of_program; -126 ¦ break; -127 case 0x05: { // add imm32 to EAX -128 ¦ int32_t arg2 = imm32(); -129 ¦ trace(2, "run") << "add imm32 0x" << HEXWORD << arg2 << " to reg EAX" << end(); -130 ¦ BINARY_ARITHMETIC_OP(+, R[EAX].i, arg2); -131 ¦ trace(98, "reg") << "storing 0x" << HEXWORD << R[EAX].i << " in reg EAX" << end(); -132 ¦ break; -133 } -134 // End Single-Byte Opcodes -135 case 0x0f: -136 ¦ switch(op2 = next()) { -137 ¦ // End Two-Byte Opcodes Starting With 0f -138 ¦ default: -139 ¦ ¦ cerr << "unrecognized second opcode after 0f: " << std::hex << static_cast<int>(op2) << '\n'; -140 ¦ ¦ exit(1); -141 ¦ } -142 ¦ break; -143 case 0xf3: -144 ¦ switch(op2 = next()) { -145 ¦ // End Two-Byte Opcodes Starting With f3 -146 ¦ case 0x0f: -147 ¦ ¦ switch(op3 = next()) { -148 ¦ ¦ // End Three-Byte Opcodes Starting With f3 0f -149 ¦ ¦ default: -150 ¦ ¦ ¦ cerr << "unrecognized third opcode after f3 0f: " << std::hex << static_cast<int>(op3) << '\n'; -151 ¦ ¦ ¦ exit(1); -152 ¦ ¦ } -153 ¦ ¦ break; -154 ¦ default: -155 ¦ ¦ cerr << "unrecognized second opcode after f3: " << std::hex << static_cast<int>(op2) << '\n'; -156 ¦ ¦ exit(1); -157 ¦ } -158 ¦ break; -159 default: -160 ¦ cerr << "unrecognized opcode: " << std::hex << static_cast<int>(op) << '\n'; -161 ¦ exit(1); + 58 //:: simulated RAM + 59 + 60 :(before "End Globals") + 61 vector<uint8_t> Mem; + 62 uint32_t End_of_program = 0; + 63 :(before "End Reset") + 64 Mem.clear(); + 65 Mem.resize(1024); + 66 End_of_program = 0; + 67 + 68 //:: core interpreter loop + 69 + 70 :(scenario add_imm32_to_eax) + 71 # In scenarios, programs are a series of hex bytes, each (variable-length) + 72 # instruction on one line. + 73 # + 74 # x86 instructions consist of the following parts (see cheatsheet.pdf): + 75 # opcode ModR/M SIB displacement immediate + 76 # instruction mod, reg, Reg/Mem bits scale, index, base + 77 # 1-3 bytes 0/1 byte 0/1 byte 0/1/2/4 bytes 0/1/2/4 bytes + 78 ¦ 05 0a 0b 0c 0d # add 0x0d0c0b0a to EAX + 79 # All hex bytes must be exactly 2 characters each. No '0x' prefixes. + 80 +load: 1 -> 05 + 81 +load: 2 -> 0a + 82 +load: 3 -> 0b + 83 +load: 4 -> 0c + 84 +load: 5 -> 0d + 85 +run: add imm32 0x0d0c0b0a to reg EAX + 86 +run: storing 0x0d0c0b0a + 87 + 88 :(code) + 89 // helper for tests: load a program into memory from a textual representation + 90 // of its bytes, and run it + 91 void run(const string& text_bytes) { + 92 load_program(text_bytes); + 93 EIP = 1; // preserve null pointer + 94 while (EIP < End_of_program) + 95 ¦ run_one_instruction(); + 96 } + 97 + 98 // skeleton of how x86 instructions are decoded + 99 void run_one_instruction() { +100 uint8_t op=0, op2=0, op3=0; +101 switch (op = next()) { +102 case 0xf4: // hlt +103 ¦ EIP = End_of_program; +104 ¦ break; +105 // our first opcode +106 case 0x05: { // add imm32 to EAX +107 ¦ int32_t arg2 = imm32(); +108 ¦ trace(2, "run") << "add imm32 0x" << HEXWORD << arg2 << " to reg EAX" << end(); +109 ¦ BINARY_ARITHMETIC_OP(+, Reg[EAX].i, arg2); +110 ¦ break; +111 } +112 // End Single-Byte Opcodes +113 case 0x0f: +114 ¦ switch(op2 = next()) { +115 ¦ // End Two-Byte Opcodes Starting With 0f +116 ¦ default: +117 ¦ ¦ cerr << "unrecognized second opcode after 0f: " << HEXBYTE << NUM(op2) << '\n'; +118 ¦ ¦ exit(1); +119 ¦ } +120 ¦ break; +121 case 0xf3: +122 ¦ switch(op2 = next()) { +123 ¦ // End Two-Byte Opcodes Starting With f3 +124 ¦ case 0x0f: +125 ¦ ¦ switch(op3 = next()) { +126 ¦ ¦ // End Three-Byte Opcodes Starting With f3 0f +127 ¦ ¦ default: +128 ¦ ¦ ¦ cerr << "unrecognized third opcode after f3 0f: " << HEXBYTE << NUM(op3) << '\n'; +129 ¦ ¦ ¦ exit(1); +130 ¦ ¦ } +131 ¦ ¦ break; +132 ¦ default: +133 ¦ ¦ cerr << "unrecognized second opcode after f3: " << HEXBYTE << NUM(op2) << '\n'; +134 ¦ ¦ exit(1); +135 ¦ } +136 ¦ break; +137 default: +138 ¦ cerr << "unrecognized opcode: " << HEXBYTE << NUM(op) << '\n'; +139 ¦ exit(1); +140 } +141 } +142 +143 void load_program(const string& text_bytes) { +144 uint32_t addr = 1; +145 istringstream in(text_bytes); +146 in >> std::noskipws; +147 while (has_data(in)) { +148 ¦ char c1 = next_hex_byte(in); +149 ¦ if (c1 == '\0') break; +150 ¦ if (!has_data(in)) { +151 ¦ ¦ raise << "input program truncated mid-byte\n" << end(); +152 ¦ ¦ return; +153 ¦ } +154 ¦ char c2 = next_hex_byte(in); +155 ¦ if (c2 == '\0') { +156 ¦ ¦ raise << "input program truncated mid-byte\n" << end(); +157 ¦ ¦ return; +158 ¦ } +159 ¦ Mem.at(addr) = to_byte(c1, c2); +160 ¦ trace(99, "load") << addr << " -> " << HEXBYTE << NUM(Mem.at(addr)) << end(); +161 ¦ addr++; 162 } -163 } -164 -165 uint8_t next() { -166 return get_or_insert(Memory, EIP++); -167 } -168 -169 // read a 32-bit immediate in little-endian order from the instruction stream -170 int32_t imm32() { -171 int32_t result = next(); -172 result |= (next()<<8); -173 result |= (next()<<16); -174 result |= (next()<<24); -175 return result; -176 } -177 -178 :(before "End Includes") -179 #include <iomanip> -180 #define HEXBYTE std::hex << std::setw(2) << std::setfill('0') -181 #define HEXWORD std::hex << std::setw(8) << std::setfill('0') -182 #include <stdint.h> +163 End_of_program = addr; +164 } +165 +166 char next_hex_byte(istream& in) { +167 while (has_data(in)) { +168 ¦ char c = '\0'; +169 ¦ in >> c; +170 ¦ if (c == ' ' || c == '\n') continue; +171 ¦ while (c == '#') { +172 ¦ ¦ while (has_data(in)) { +173 ¦ ¦ ¦ in >> c; +174 ¦ ¦ ¦ if (c == '\n') { +175 ¦ ¦ ¦ ¦ in >> c; +176 ¦ ¦ ¦ ¦ break; +177 ¦ ¦ ¦ } +178 ¦ ¦ } +179 ¦ } +180 ¦ if (c >= '0' && c <= '9') return c; +181 ¦ else if (c >= 'a' && c <= 'f') return c; +182 ¦ else if (c >= 'A' && c <= 'F') return tolower(c); +183 ¦ // disallow any non-hex characters, including a '0x' prefix +184 ¦ if (!isspace(c)) { +185 ¦ ¦ raise << "invalid non-hex character '" << c << "'\n" << end(); +186 ¦ ¦ break; +187 ¦ } +188 } +189 return '\0'; +190 } +191 +192 uint8_t to_byte(char hex_byte1, char hex_byte2) { +193 return to_hex_num(hex_byte1)*16 + to_hex_num(hex_byte2); +194 } +195 uint8_t to_hex_num(char c) { +196 if (c >= '0' && c <= '9') return c - '0'; +197 if (c >= 'a' && c <= 'f') return c - 'a' + 10; +198 assert(false); +199 return 0; +200 } +201 +202 inline uint8_t next() { +203 return Mem.at(EIP++); +204 } +205 +206 // read a 32-bit immediate in little-endian order from the instruction stream +207 int32_t imm32() { +208 int32_t result = next(); +209 result |= (next()<<8); +210 result |= (next()<<16); +211 result |= (next()<<24); +212 return result; +213 } +214 +215 :(before "End Includes") +216 #include <iomanip> +217 #define HEXBYTE std::hex << std::setw(2) << std::setfill('0') +218 #define HEXWORD std::hex << std::setw(8) << std::setfill('0') +219 // ugly that iostream doesn't print uint8_t as an integer +220 #define NUM(X) static_cast<int>(X) +221 #include <stdint.h> -- cgit 1.4.1-2-gfad0