diff options
author | Kartik K. Agaram <vc@akkartik.com> | 2017-10-11 01:38:47 -0700 |
---|---|---|
committer | Kartik K. Agaram <vc@akkartik.com> | 2017-10-11 01:38:47 -0700 |
commit | f8831a023d91556eb105edc27e7cbb61bde573ca (patch) | |
tree | e36bf6a03e7aaba119044c2929255e5faa7f6b31 | |
parent | a7c35665a5b092eab1f4be159bd3f1e43f038283 (diff) | |
download | mu-f8831a023d91556eb105edc27e7cbb61bde573ca.tar.gz |
4014 - core skeleton for x86 interpreter
-rw-r--r-- | subx/000organization.cc | 3 | ||||
-rw-r--r-- | subx/001help.cc | 20 | ||||
-rw-r--r-- | subx/010core.cc | 173 |
3 files changed, 180 insertions, 16 deletions
diff --git a/subx/000organization.cc b/subx/000organization.cc index d9f072f9..753ddca1 100644 --- a/subx/000organization.cc +++ b/subx/000organization.cc @@ -112,6 +112,9 @@ int main(int argc, char* argv[]) { atexit(reset); + // run on a 32-bit system + assert(sizeof(int) == 4); + assert(sizeof(float) == 4); // End One-time Setup diff --git a/subx/001help.cc b/subx/001help.cc index ca1cb106..d4c3334f 100644 --- a/subx/001help.cc +++ b/subx/001help.cc @@ -2,24 +2,12 @@ //: This should give you a sense for what to look forward to in later layers. :(before "End Commandline Parsing") -if (argc != 1) { +if (argc <= 1 || is_equal(argv[1], "--help")) { //: this is the functionality later layers will provide // currently no automated tests for commandline arg parsing - return 1; -} - -//: Support for option parsing. -//: Options always begin with '--' and are always the first arguments. An -//: option will never follow a non-option. -:(before "End Commandline Parsing") -char** arg = &argv[1]; -while (argc > 1 && starts_with(*arg, "--")) { - if (false) - ; // no-op branch just so any further additions can consistently always start with 'else' - // End Commandline Options(*arg) - else - cerr << "skipping unknown option " << *arg << '\n'; - --argc; ++argv; ++arg; + cerr << "Usage:\n" + << " subx test\n"; + return 0; } //:: Helper function used by the above fragment of code (and later layers too, diff --git a/subx/010core.cc b/subx/010core.cc new file mode 100644 index 00000000..611b3f33 --- /dev/null +++ b/subx/010core.cc @@ -0,0 +1,173 @@ +//:: simulated x86 registers + +:(before "End Types") +enum { + EAX, + ECX, + EDX, + EBX, + ESP, + EBP, + ESI, + EDI, + NUM_INT_REGISTERS, +}; +union reg { + int32_t i; + uint32_t u; +}; +:(before "End Globals") +reg R[NUM_INT_REGISTERS] = { {0} }; +uint32_t EIP = 0; +:(before "End Reset") +bzero(R, sizeof(R)); +EIP = 0; + +//:: simulated flag registers; just a subset that we care about + +:(before "End Globals") +bool OF=false, ZF=false, SF=false; +:(before "End Reset") +OF = ZF = SF = false; + +//: how the flag registers are updated after each instruction + +:(before "End Includes") +// beware: no side-effects in args +#define PERFORM_ARITHMETIC_BINOP(op, arg1, arg2) { \ + /* arg1 and arg2 must be signed */ \ + int64_t tmp = arg1 op arg2; \ + arg1 = arg1 op arg2; \ + SF = (arg1 < 0); \ + ZF = (arg1 == 0); \ + OF = (arg1 != tmp); \ +} + +#define PERFORM_BITWISE_BINOP(op, arg1, arg2) { \ + /* arg1 and arg2 must be unsigned */ \ + arg1 = arg1 op arg2; \ + SF = (arg1 >> 31); \ + ZF = (arg1 == 0); \ + OF = false; \ +} + +//:: simulated RAM + +:(before "End Globals") +vector<uint8_t> Memory; +:(before "End Reset") +Memory.clear(); + +//:: core interpreter loop + +:(scenario add_imm32_to_eax) +# opcode modrm sib displacement immediate + 05 0a 0b 0c 0d # add EAX, 0x0d0c0b0a ++load: 05 ++load: 0a ++load: 0b ++load: 0c ++load: 0d ++run: add to EAX immediate 0x0d0c0b0a ++reg: storing 0x0d0c0b0a in register EAX + +:(code) +// helper for tests: load a program into memory from a textual representation +// of its bytes, and run it +void run(const string& text_bytes) { + load_program(text_bytes); + EIP = 1; // preserve null pointer + while (EIP < Memory.size()) + run_one_instruction(); +} + +void load_program(const string& text_bytes) { + assert(Memory.empty()); + // initialize address 0 + Memory.push_back(0); + // now, to read the hex bytes in ASCII, we'll use C's strtol + // strtol needs a char*, so we grab the buffer backing the string object + char* curr = const_cast<char*>(&text_bytes[0]); // non-standard approach, but blessed by Herb Sutter (http://herbsutter.com/2008/04/07/cringe-not-vectors-are-guaranteed-to-be-contiguous/#comment-483) + char* max = curr + strlen(curr); + while (true) { + if (curr >= max) return; + // skip whitespace + while (*curr == ' ' || *curr == '\n') ++curr; + // skip comments + if (*curr == '#') { + while (*curr != '\n') { + ++curr; + if (curr >= max) return; + } + ++curr; + if (curr >= max) return; + } + Memory.push_back(strtol(curr, &curr, /*hex*/16)); + trace(99, "load") << HEXBYTE << static_cast<unsigned int>(Memory.back()) << end(); // ugly that iostream doesn't print uint8_t as an integer + } +} + +// skeleton of how x86 instructions are decoded +void run_one_instruction() { + uint8_t op=0, op2=0, op3=0; + switch(op = next()) { + // our first opcode + case 0x05: { // add EAX, imm32 + int32_t arg2 = imm32(); + trace(2, "run") << "add to EAX immediate 0x" << HEXWORD << arg2 << end(); + PERFORM_ARITHMETIC_BINOP(+, R[EAX].i, arg2); + trace(98, "reg") << "storing 0x" << HEXWORD << R[EAX].i << " in register EAX" << end(); + break; + } + // End Single-Byte Opcodes + case 0x0f: + switch(op2 = next()) { + // End Two-Byte Opcodes Starting With 0x0f + default: + cerr << "unrecognized second opcode after 0x0f: " << std::hex << op2 << '\n'; + exit(1); + } + break; + case 0xf3: + switch(op2 = next()) { + // End Two-Byte Opcodes Starting With 0xf3 + case 0x0f: + switch(op3 = next()) { + // End Three-Byte Opcodes Starting With 0xf3 0x0f + default: + cerr << "unrecognized third opcode after 0xf3 0x0f: " << std::hex << op3 << '\n'; + exit(1); + } + break; + default: + cerr << "unrecognized second opcode after 0xf3: " << std::hex << op2 << '\n'; + exit(1); + } + break; + case 0xf4: // hlt + EIP = Memory.size(); + break; + default: + cerr << "unrecognized opcode: " << std::hex << op << '\n'; + exit(1); + } +} + +uint8_t next(void) { + if (EIP >= Memory.size()) return /*hlt*/0xf4; + return Memory.at(EIP++); +} + +// read a 32-bit immediate in little-endian order from the instruction stream +int32_t imm32(void) { + int result = next(); + result |= (next()<<8); + result |= (next()<<16); + result |= (next()<<24); + return result; +} + +:(before "End Includes") +#include <iomanip> +#define HEXBYTE std::hex << std::setw(2) << std::setfill('0') +#define HEXWORD std::hex << std::setw(8) << std::setfill('0') |