//: Core data structures for simulating the SubX VM (subset of an x86 processor) //: //: At the lowest level ("level 1") of abstraction, SubX executes x86 //: instructions provided in the form of an array of bytes, loaded into memory //: starting at a specific address. //:: registers //: assume segment registers are hard-coded to 0 //: no floating-point, MMX, etc. yet :(before "End Types") enum { EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, NUM_INT_REGISTERS, }; union reg { int32_t i; uint32_t u; }; :(before "End Globals") reg Reg[NUM_INT_REGISTERS] = { {0} }; uint32_t EIP = 1; // preserve null pointer :(before "End Reset") bzero(Reg, sizeof(Reg)); EIP = 1; // preserve null pointer :(before "End Help Contents") cerr << " registers\n"; :(before "End Help Texts") put(Help, "registers", "SubX currently supports eight 32-bit integer registers: R0 to R7.\n" "R4 (ESP) contains the top of the stack.\n" "\n" "There's also a register for the address of the currently executing\n" "instruction. It is modified by jumps.\n" "\n" "Various instructions modify one or more of three 1-bit 'flag' registers,\n" "as a side-effect:\n" "- the sign flag (SF): usually set if an arithmetic result is negative, or\n" " reset if not.\n" "- the zero flag (ZF): usually set if a result is zero, or reset if not.\n" "- the overflow flag (OF): usually set if an arithmetic result overflows.\n" "The flag bits are read by conditional jumps.\n" "\n" "We don't support non-integer (floating-point) registers yet.\n" ); :(before "End Globals") // the subset of x86 flag registers we care about bool SF = false; // sign flag bool ZF = false; // zero flag bool OF = false; // overflow flag :(before "End Reset") SF = ZF = OF = false; //: how the flag registers are updated after each instruction :(before "End Includes") // Combine 'arg1' and 'arg2' with arithmetic operation 'op' and store the // result in 'arg1', then update flags. // beware: no side-effects in args #define BINARY_ARITHMETIC_OP(op, arg1, arg2) { \ /* arg1 and arg2 must be signed */ \ int64_t tmp = arg1 op arg2; \ arg1 = arg1 op arg2; \ trace(90, "run") << "storing 0x" << HEXWORD << arg1 << end(); \ SF = (arg1 < 0); \ ZF = (arg1 == 0); \ OF = (arg1 != tmp); \ } // Combine 'arg1' and 'arg2' with bitwise operation 'op' and store the result // in 'arg1', then update flags. #define BINARY_BITWISE_OP(op, arg1, arg2) { \ /* arg1 and arg2 must be unsigned */ \ arg1 = arg1 op arg2; \ trace(90, "run") << "storing 0x" << HEXWORD << arg1 << end(); \ SF = (arg1 >> 31); \ ZF = (arg1 == 0); \ OF = false; \ } //:: simulated RAM :(before "End Globals") vector Mem; uint32_t Mem_offset = 0; uint32_t End_of_program = 0; :(before "End Reset") Mem.clear(); Mem.resize(1024); Mem_offset = 0; End_of_program = 0; :(code) // These helpers depend on Mem being laid out contiguously (so you can't use a // map, etc.) and on the host also being little-endian. inline uint8_t read_mem_u8(uint32_t addr) { return Mem.at(addr-Mem_offset); } inline int8_t read_mem_i8(uint32_t addr) { return static_cast(Mem.at(addr-Mem_offset)); } inline uint32_t read_mem_u32(uint32_t addr) { return *reinterpret_cast(&Mem.at(addr-Mem_offset)); } inline int32_t read_mem_i32(uint32_t addr) { return *reinterpret_cast(&Mem.at(addr-Mem_offset)); } inline uint8_t* mem_addr_u8(uint32_t addr) { return &Mem.at(addr-Mem_offset); } inline int8_t* mem_addr_i8(uint32_t addr) { return reinterpret_cast(&Mem.at(addr-Mem_offset)); } inline char* mem_addr_string(uint32_t addr) { return reinterpret_cast(&Mem.at(addr-Mem_offset)); } inline uint32_t* mem_addr_u32(uint32_t addr) { return reinterpret_cast(&Mem.at(addr-Mem_offset)); } inline int32_t* mem_addr_i32(uint32_t addr) { return reinterpret_cast(&Mem.at(addr-Mem_offset)); } inline void write_mem_u8(uint32_t addr, uint8_t val) { Mem.at(addr-Mem_offset) = val; } inline void write_mem_i8(uint32_t addr, int8_t val) { Mem.at(addr-Mem_offset) = static_cast(val); } inline void write_mem_u32(uint32_t addr, uint32_t val) { *reinterpret_cast(&Mem.at(addr-Mem_offset)) = val; } inline void write_mem_i32(uint32_t addr, int32_t val) { *reinterpret_cast(&Mem.at(addr-Mem_offset)) = val; } //:: core interpreter loop :(code) // skeleton of how x86 instructions are decoded void run_one_instruction() { uint8_t op=0, op2=0, op3=0; trace(90, "run") << "inst: 0x" << HEXWORD << EIP << end(); //? dump_registers(); //? cerr << "inst: 0x" << EIP << " => "; op = next(); //? cerr << HEXBYTE << NUM(op) << '\n'; switch (op) { case 0xf4: // hlt EIP = End_of_program; break; // End Single-Byte Opcodes case 0x0f: switch(op2 = next()) { // End Two-Byte Opcodes Starting With 0f default: cerr << "unrecognized second opcode after 0f: " << HEXBYTE << NUM(op2) << '\n'; DUMP(""); exit(1); } break; case 0xf2: switch(op2 = next()) { // End Two-Byte Opcodes Starting With f2 case 0x0f: switch(op3 = next()) { // End Three-Byte Opcodes Starting With f2 0f default: cerr << "unrecognized third opcode after f2 0f: " << HEXBYTE << NUM(op3) << '\n'; DUMP(""); exit(1); } break; default: cerr << "unrecognized second opcode after f2: " << HEXBYTE << NUM(op2) << '\n'; DUMP(""); exit(1); } break; case 0xf3: switch(op2 = next()) { // End Two-Byte Opcodes Starting With f3 case 0x0f: switch(op3 = next()) { // End Three-Byte Opcodes Starting With f3 0f default: cerr << "unrecognized third opcode after f3 0f: " << HEXBYTE << NUM(op3) << '\n'; DUMP(""); exit(1); } break; default: cerr << "unrecognized second opcode after f3: " << HEXBYTE << NUM(op2) << '\n'; DUMP(""); exit(1); } break; default: cerr << "unrecognized opcode: " << HEXBYTE << NUM(op) << '\n'; DUMP(""); exit(1); } } inline uint8_t next() { return read_mem_u8(EIP++); } void dump_registers() { for (int i = 0; i < NUM_INT_REGISTERS; ++i) { if (i > 0) cerr << "; "; cerr << " " << i << ": " << std::hex << std::setw(8) << std::setfill('_') << Reg[i].u; } cerr << " -- SF: " << SF << "; ZF: " << ZF << "; OF: " << OF << '\n'; } //: start tracking supported opcodes :(before "End Globals") map name; map name_0f; map name_f3; map name_f3_0f; :(before "End One-time Setup") init_op_names(); :(code) void init_op_names() { put(name, "f4", "halt"); // End Initialize Op Names(name) } :(before "End Help Special-cases(key)") if (key == "opcodes") { cerr << "Opcodes currently supported by SubX:\n"; for (map::iterator p = name.begin(); p != name.end(); ++p) cerr << " " << p->first << ": " << p->second << '\n'; for (map::iterator p = name_0f.begin(); p != name_0f.end(); ++p) cerr << " 0f " << p->first << ": " << p->second << '\n'; for (map::iterator p = name_f3.begin(); p != name_f3.end(); ++p) cerr << " f3 " << p->first << ": " << p->second << '\n'; for (map::iterator p = name_f3_0f.begin(); p != name_f3_0f.end(); ++p) cerr << " f3 0f " << p->first << ": " << p->second << '\n'; cerr << "Run `subx help instructions` for details on words like 'r32' and 'disp8'.\n"; return 0; } :(before "End Help Contents") cerr << " opcodes\n"; :(before "End Includes") #include #define HEXBYTE std::hex << std::setw(2) << std::setfill('0') #define HEXWORD std::hex << std::setw(8) << std::setfill('0') // ugly that iostream doesn't print uint8_t as an integer #define NUM(X) static_cast(X) #include