From 03dcb7c9400cf6dcf04deb8e1bfa804a9621d0b4 Mon Sep 17 00:00:00 2001 From: "Kartik K. Agaram" Date: Fri, 13 Oct 2017 21:53:00 -0700 Subject: 4062 --- html/subx/010core.cc.html | 366 +++++++++++++++++++++++----------------------- 1 file changed, 185 insertions(+), 181 deletions(-) (limited to 'html/subx/010core.cc.html') diff --git a/html/subx/010core.cc.html b/html/subx/010core.cc.html index f6772607..6b0d2643 100644 --- a/html/subx/010core.cc.html +++ b/html/subx/010core.cc.html @@ -98,190 +98,194 @@ if ('onhashchange' in window) { 35 //: how the flag registers are updated after each instruction 36 37 :(before "End Includes") - 38 // beware: no side-effects in args - 39 #define BINARY_ARITHMETIC_OP(op, arg1, arg2) { \ - 40 /* arg1 and arg2 must be signed */ \ - 41 int64_t tmp = arg1 op arg2; \ - 42 arg1 = arg1 op arg2; \ - 43 trace(2, "run") << "storing 0x" << HEXWORD << arg1 << end(); \ - 44 SF = (arg1 < 0); \ - 45 ZF = (arg1 == 0); \ - 46 OF = (arg1 != tmp); \ - 47 } - 48 - 49 #define BINARY_BITWISE_OP(op, arg1, arg2) { \ - 50 /* arg1 and arg2 must be unsigned */ \ - 51 arg1 = arg1 op arg2; \ - 52 trace(2, "run") << "storing 0x" << HEXWORD << arg1 << end(); \ - 53 SF = (arg1 >> 31); \ - 54 ZF = (arg1 == 0); \ - 55 OF = false; \ - 56 } - 57 - 58 //:: simulated RAM - 59 - 60 :(before "End Globals") - 61 vector<uint8_t> Mem; - 62 uint32_t End_of_program = 0; - 63 :(before "End Reset") - 64 Mem.clear(); - 65 Mem.resize(1024); - 66 End_of_program = 0; - 67 - 68 //:: core interpreter loop - 69 - 70 :(scenario add_imm32_to_eax) - 71 # In scenarios, programs are a series of hex bytes, each (variable-length) - 72 # instruction on one line. - 73 # - 74 # x86 instructions consist of the following parts (see cheatsheet.pdf): - 75 # opcode ModR/M SIB displacement immediate - 76 # instruction mod, reg, Reg/Mem bits scale, index, base - 77 # 1-3 bytes 0/1 byte 0/1 byte 0/1/2/4 bytes 0/1/2/4 bytes - 78 ¦ 05 0a 0b 0c 0d # add 0x0d0c0b0a to EAX - 79 # All hex bytes must be exactly 2 characters each. No '0x' prefixes. - 80 +load: 1 -> 05 - 81 +load: 2 -> 0a - 82 +load: 3 -> 0b - 83 +load: 4 -> 0c - 84 +load: 5 -> 0d - 85 +run: add imm32 0x0d0c0b0a to reg EAX - 86 +run: storing 0x0d0c0b0a - 87 - 88 :(code) - 89 // helper for tests: load a program into memory from a textual representation - 90 // of its bytes, and run it - 91 void run(const string& text_bytes) { - 92 load_program(text_bytes); - 93 EIP = 1; // preserve null pointer - 94 while (EIP < End_of_program) - 95 ¦ run_one_instruction(); - 96 } - 97 - 98 // skeleton of how x86 instructions are decoded - 99 void run_one_instruction() { -100 uint8_t op=0, op2=0, op3=0; -101 switch (op = next()) { -102 case 0xf4: // hlt -103 ¦ EIP = End_of_program; -104 ¦ break; -105 // our first opcode -106 case 0x05: { // add imm32 to EAX -107 ¦ int32_t arg2 = imm32(); -108 ¦ trace(2, "run") << "add imm32 0x" << HEXWORD << arg2 << " to reg EAX" << end(); -109 ¦ BINARY_ARITHMETIC_OP(+, Reg[EAX].i, arg2); -110 ¦ break; -111 } -112 // End Single-Byte Opcodes -113 case 0x0f: -114 ¦ switch(op2 = next()) { -115 ¦ // End Two-Byte Opcodes Starting With 0f -116 ¦ default: -117 ¦ ¦ cerr << "unrecognized second opcode after 0f: " << HEXBYTE << NUM(op2) << '\n'; -118 ¦ ¦ exit(1); -119 ¦ } -120 ¦ break; -121 case 0xf3: -122 ¦ switch(op2 = next()) { -123 ¦ // End Two-Byte Opcodes Starting With f3 -124 ¦ case 0x0f: -125 ¦ ¦ switch(op3 = next()) { -126 ¦ ¦ // End Three-Byte Opcodes Starting With f3 0f -127 ¦ ¦ default: -128 ¦ ¦ ¦ cerr << "unrecognized third opcode after f3 0f: " << HEXBYTE << NUM(op3) << '\n'; -129 ¦ ¦ ¦ exit(1); -130 ¦ ¦ } -131 ¦ ¦ break; -132 ¦ default: -133 ¦ ¦ cerr << "unrecognized second opcode after f3: " << HEXBYTE << NUM(op2) << '\n'; -134 ¦ ¦ exit(1); -135 ¦ } -136 ¦ break; -137 default: -138 ¦ cerr << "unrecognized opcode: " << HEXBYTE << NUM(op) << '\n'; -139 ¦ exit(1); -140 } -141 } -142 -143 void load_program(const string& text_bytes) { -144 uint32_t addr = 1; -145 istringstream in(text_bytes); -146 in >> std::noskipws; -147 while (has_data(in)) { -148 ¦ char c1 = next_hex_byte(in); -149 ¦ if (c1 == '\0') break; -150 ¦ if (!has_data(in)) { -151 ¦ ¦ raise << "input program truncated mid-byte\n" << end(); -152 ¦ ¦ return; -153 ¦ } -154 ¦ char c2 = next_hex_byte(in); -155 ¦ if (c2 == '\0') { -156 ¦ ¦ raise << "input program truncated mid-byte\n" << end(); -157 ¦ ¦ return; -158 ¦ } -159 ¦ Mem.at(addr) = to_byte(c1, c2); -160 ¦ trace(99, "load") << addr << " -> " << HEXBYTE << NUM(Mem.at(addr)) << end(); -161 ¦ addr++; -162 } -163 End_of_program = addr; -164 } -165 -166 char next_hex_byte(istream& in) { -167 while (has_data(in)) { -168 ¦ char c = '\0'; -169 ¦ in >> c; -170 ¦ if (c == ' ' || c == '\n') continue; -171 ¦ while (c == '#') { -172 ¦ ¦ while (has_data(in)) { -173 ¦ ¦ ¦ in >> c; -174 ¦ ¦ ¦ if (c == '\n') { -175 ¦ ¦ ¦ ¦ in >> c; -176 ¦ ¦ ¦ ¦ break; -177 ¦ ¦ ¦ } -178 ¦ ¦ } -179 ¦ } -180 ¦ if (c >= '0' && c <= '9') return c; -181 ¦ else if (c >= 'a' && c <= 'f') return c; -182 ¦ else if (c >= 'A' && c <= 'F') return tolower(c); -183 ¦ // disallow any non-hex characters, including a '0x' prefix -184 ¦ if (!isspace(c)) { -185 ¦ ¦ raise << "invalid non-hex character '" << c << "'\n" << end(); -186 ¦ ¦ break; -187 ¦ } -188 } -189 return '\0'; -190 } -191 -192 uint8_t to_byte(char hex_byte1, char hex_byte2) { -193 return to_hex_num(hex_byte1)*16 + to_hex_num(hex_byte2); + 38 // Combine 'arg1' and 'arg2' with arithmetic operation 'op' and store the + 39 // result in 'arg1', then update flags. + 40 // beware: no side-effects in args + 41 #define BINARY_ARITHMETIC_OP(op, arg1, arg2) { \ + 42 /* arg1 and arg2 must be signed */ \ + 43 int64_t tmp = arg1 op arg2; \ + 44 arg1 = arg1 op arg2; \ + 45 trace(2, "run") << "storing 0x" << HEXWORD << arg1 << end(); \ + 46 SF = (arg1 < 0); \ + 47 ZF = (arg1 == 0); \ + 48 OF = (arg1 != tmp); \ + 49 } + 50 + 51 // Combine 'arg1' and 'arg2' with bitwise operation 'op' and store the result + 52 // in 'arg1', then update flags. + 53 #define BINARY_BITWISE_OP(op, arg1, arg2) { \ + 54 /* arg1 and arg2 must be unsigned */ \ + 55 arg1 = arg1 op arg2; \ + 56 trace(2, "run") << "storing 0x" << HEXWORD << arg1 << end(); \ + 57 SF = (arg1 >> 31); \ + 58 ZF = (arg1 == 0); \ + 59 OF = false; \ + 60 } + 61 + 62 //:: simulated RAM + 63 + 64 :(before "End Globals") + 65 vector<uint8_t> Mem; + 66 uint32_t End_of_program = 0; + 67 :(before "End Reset") + 68 Mem.clear(); + 69 Mem.resize(1024); + 70 End_of_program = 0; + 71 + 72 //:: core interpreter loop + 73 + 74 :(scenario add_imm32_to_eax) + 75 # In scenarios, programs are a series of hex bytes, each (variable-length) + 76 # instruction on one line. + 77 # + 78 # x86 instructions consist of the following parts (see cheatsheet.pdf): + 79 # opcode ModR/M SIB displacement immediate + 80 # instruction mod, reg, Reg/Mem bits scale, index, base + 81 # 1-3 bytes 0/1 byte 0/1 byte 0/1/2/4 bytes 0/1/2/4 bytes + 82 ¦ 05 0a 0b 0c 0d # add 0x0d0c0b0a to EAX + 83 # All hex bytes must be exactly 2 characters each. No '0x' prefixes. + 84 +load: 1 -> 05 + 85 +load: 2 -> 0a + 86 +load: 3 -> 0b + 87 +load: 4 -> 0c + 88 +load: 5 -> 0d + 89 +run: add imm32 0x0d0c0b0a to reg EAX + 90 +run: storing 0x0d0c0b0a + 91 + 92 :(code) + 93 // helper for tests: load a program into memory from a textual representation + 94 // of its bytes, and run it + 95 void run(const string& text_bytes) { + 96 load_program(text_bytes); + 97 EIP = 1; // preserve null pointer + 98 while (EIP < End_of_program) + 99 ¦ run_one_instruction(); +100 } +101 +102 // skeleton of how x86 instructions are decoded +103 void run_one_instruction() { +104 uint8_t op=0, op2=0, op3=0; +105 switch (op = next()) { +106 case 0xf4: // hlt +107 ¦ EIP = End_of_program; +108 ¦ break; +109 // our first opcode +110 case 0x05: { // add imm32 to EAX +111 ¦ int32_t arg2 = imm32(); +112 ¦ trace(2, "run") << "add imm32 0x" << HEXWORD << arg2 << " to reg EAX" << end(); +113 ¦ BINARY_ARITHMETIC_OP(+, Reg[EAX].i, arg2); +114 ¦ break; +115 } +116 // End Single-Byte Opcodes +117 case 0x0f: +118 ¦ switch(op2 = next()) { +119 ¦ // End Two-Byte Opcodes Starting With 0f +120 ¦ default: +121 ¦ ¦ cerr << "unrecognized second opcode after 0f: " << HEXBYTE << NUM(op2) << '\n'; +122 ¦ ¦ exit(1); +123 ¦ } +124 ¦ break; +125 case 0xf3: +126 ¦ switch(op2 = next()) { +127 ¦ // End Two-Byte Opcodes Starting With f3 +128 ¦ case 0x0f: +129 ¦ ¦ switch(op3 = next()) { +130 ¦ ¦ // End Three-Byte Opcodes Starting With f3 0f +131 ¦ ¦ default: +132 ¦ ¦ ¦ cerr << "unrecognized third opcode after f3 0f: " << HEXBYTE << NUM(op3) << '\n'; +133 ¦ ¦ ¦ exit(1); +134 ¦ ¦ } +135 ¦ ¦ break; +136 ¦ default: +137 ¦ ¦ cerr << "unrecognized second opcode after f3: " << HEXBYTE << NUM(op2) << '\n'; +138 ¦ ¦ exit(1); +139 ¦ } +140 ¦ break; +141 default: +142 ¦ cerr << "unrecognized opcode: " << HEXBYTE << NUM(op) << '\n'; +143 ¦ exit(1); +144 } +145 } +146 +147 void load_program(const string& text_bytes) { +148 uint32_t addr = 1; +149 istringstream in(text_bytes); +150 in >> std::noskipws; +151 while (has_data(in)) { +152 ¦ char c1 = next_hex_byte(in); +153 ¦ if (c1 == '\0') break; +154 ¦ if (!has_data(in)) { +155 ¦ ¦ raise << "input program truncated mid-byte\n" << end(); +156 ¦ ¦ return; +157 ¦ } +158 ¦ char c2 = next_hex_byte(in); +159 ¦ if (c2 == '\0') { +160 ¦ ¦ raise << "input program truncated mid-byte\n" << end(); +161 ¦ ¦ return; +162 ¦ } +163 ¦ Mem.at(addr) = to_byte(c1, c2); +164 ¦ trace(99, "load") << addr << " -> " << HEXBYTE << NUM(Mem.at(addr)) << end(); +165 ¦ addr++; +166 } +167 End_of_program = addr; +168 } +169 +170 char next_hex_byte(istream& in) { +171 while (has_data(in)) { +172 ¦ char c = '\0'; +173 ¦ in >> c; +174 ¦ if (c == ' ' || c == '\n') continue; +175 ¦ while (c == '#') { +176 ¦ ¦ while (has_data(in)) { +177 ¦ ¦ ¦ in >> c; +178 ¦ ¦ ¦ if (c == '\n') { +179 ¦ ¦ ¦ ¦ in >> c; +180 ¦ ¦ ¦ ¦ break; +181 ¦ ¦ ¦ } +182 ¦ ¦ } +183 ¦ } +184 ¦ if (c >= '0' && c <= '9') return c; +185 ¦ else if (c >= 'a' && c <= 'f') return c; +186 ¦ else if (c >= 'A' && c <= 'F') return tolower(c); +187 ¦ // disallow any non-hex characters, including a '0x' prefix +188 ¦ if (!isspace(c)) { +189 ¦ ¦ raise << "invalid non-hex character '" << c << "'\n" << end(); +190 ¦ ¦ break; +191 ¦ } +192 } +193 return '\0'; 194 } -195 uint8_t to_hex_num(char c) { -196 if (c >= '0' && c <= '9') return c - '0'; -197 if (c >= 'a' && c <= 'f') return c - 'a' + 10; -198 assert(false); -199 return 0; -200 } -201 -202 inline uint8_t next() { -203 return Mem.at(EIP++); +195 +196 uint8_t to_byte(char hex_byte1, char hex_byte2) { +197 return to_hex_num(hex_byte1)*16 + to_hex_num(hex_byte2); +198 } +199 uint8_t to_hex_num(char c) { +200 if (c >= '0' && c <= '9') return c - '0'; +201 if (c >= 'a' && c <= 'f') return c - 'a' + 10; +202 assert(false); +203 return 0; 204 } 205 -206 // read a 32-bit immediate in little-endian order from the instruction stream -207 int32_t imm32() { -208 int32_t result = next(); -209 result |= (next()<<8); -210 result |= (next()<<16); -211 result |= (next()<<24); -212 return result; -213 } -214 -215 :(before "End Includes") -216 #include <iomanip> -217 #define HEXBYTE std::hex << std::setw(2) << std::setfill('0') -218 #define HEXWORD std::hex << std::setw(8) << std::setfill('0') -219 // ugly that iostream doesn't print uint8_t as an integer -220 #define NUM(X) static_cast<int>(X) -221 #include <stdint.h> +206 inline uint8_t next() { +207 return Mem.at(EIP++); +208 } +209 +210 // read a 32-bit immediate in little-endian order from the instruction stream +211 int32_t imm32() { +212 int32_t result = next(); +213 result |= (next()<<8); +214 result |= (next()<<16); +215 result |= (next()<<24); +216 return result; +217 } +218 +219 :(before "End Includes") +220 #include <iomanip> +221 #define HEXBYTE std::hex << std::setw(2) << std::setfill('0') +222 #define HEXWORD std::hex << std::setw(8) << std::setfill('0') +223 // ugly that iostream doesn't print uint8_t as an integer +224 #define NUM(X) static_cast<int>(X) +225 #include <stdint.h> -- cgit 1.4.1-2-gfad0