1 //: Core data structures for simulating the SubX VM (subset of an x86 processor)
  2 //:
  3 //: At the lowest level ("level 1") of abstraction, SubX executes x86
  4 //: instructions provided in the form of an array of bytes, loaded into memory
  5 //: starting at a specific address.
  6 
  7 //:: registers
  8 //: assume segment registers are hard-coded to 0
  9 //: no floating-point, MMX, etc. yet
 10 
 11 :(before "End Types")
 12 enum {
 13   EAX,
 14   ECX,
 15   EDX,
 16   EBX,
 17   ESP,
 18   EBP,
 19   ESI,
 20   EDI,
 21   NUM_INT_REGISTERS,
 22 };
 23 union reg {
 24   int32_t i;
 25   uint32_t u;
 26 };
 27 :(before "End Globals")
 28 reg Reg[NUM_INT_REGISTERS] = { {0} };
 29 uint32_t EIP = 1;  // preserve null pointer
 30 :(before "End Reset")
 31 bzero(Reg, sizeof(Reg));
 32 EIP = 1;  // preserve null pointer
 33 
 34 :(before "End Help Contents")
 35 cerr << "  registers\n";
 36 :(before "End Help Texts")
 37 put(Help, "registers",
 38   "SubX currently supports eight 32-bit integer registers: R0 to R7.\n"
 39   "R4 (ESP) contains the top of the stack.\n"
 40   "\n"
 41   "There's also a register for the address of the currently executing\n"
 42   "instruction. It is modified by jumps.\n"
 43   "\n"
 44   "Various instructions modify one or more of three 1-bit 'flag' registers,\n"
 45   "as a side-effect:\n"
 46   "- the sign flag (SF): usually set if an arithmetic result is negative, or\n"
 47   "  reset if not.\n"
 48   "- the zero flag (ZF): usually set if a result is zero, or reset if not.\n"
 49   "- the overflow flag (OF): usually set if an arithmetic result overflows.\n"
 50   "The flag bits are read by conditional jumps.\n"
 51   "\n"
 52   "We don't support non-integer (floating-point) registers yet.\n"
 53 );
 54 
 55 :(before "End Globals")
 56 // the subset of x86 flag registers we care about
 57 bool SF = false;  // sign flag
 58 bool ZF = false;  // zero flag
 59 bool OF = false;  // overflow flag
 60 :(before "End Reset")
 61 SF = ZF = OF = false;
 62 
 63 //: how the flag registers are updated after each instruction
 64 
 65 :(before "End Includes")
 66 // Combine 'arg1' and 'arg2' with arithmetic operation 'op' and store the
 67 // result in 'arg1', then update flags.
 68 // beware: no side-effects in args
 69 #define BINARY_ARITHMETIC_OP(op, arg1, arg2) { \
 70   /* arg1 and arg2 must be signed */ \
 71   int64_t tmp = arg1 op arg2; \
 72   arg1 = arg1 op arg2; \
 73   trace(90, "run") << "storing 0x" << HEXWORD << arg1 << end(); \
 74   SF = (arg1 < 0); \
 75   ZF = (arg1 == 0); \
 76   OF = (arg1 != tmp); \
 77 }
 78 
 79 // Combine 'arg1' and 'arg2' with bitwise operation 'op' and store the result
 80 // in 'arg1', then update flags.
 81 #define BINARY_BITWISE_OP(op, arg1, arg2) { \
 82   /* arg1 and arg2 must be unsigned */ \
 83   arg1 = arg1 op arg2; \
 84   trace(90, "run") << "storing 0x" << HEXWORD << arg1 << end(); \
 85   SF = (arg1 >> 31); \
 86   ZF = (arg1 == 0); \
 87   OF = false; \
 88 }
 89 
 90 //:: simulated RAM
 91 
 92 :(before "End Globals")
 93 vector<uint8_t> Mem;
 94 uint32_t Mem_offset = 0;
 95 uint32_t End_of_program = 0;
 96 :(before "End Reset")
 97 Mem.clear();
 98 Mem.resize(1024);
 99 Mem_offset = 0;
100 End_of_program = 0;
101 :(code)
102 // These helpers depend on Mem being laid out contiguously (so you can't use a
103 // map, etc.) and on the host also being little-endian.
104 inline uint8_t read_mem_u8(uint32_t addr) {
105   return Mem.at(addr-Mem_offset);
106 }
107 inline int8_t read_mem_i8(uint32_t addr) {
108   return static_cast<int8_t>(Mem.at(addr-Mem_offset));
109 }
110 inline uint32_t read_mem_u32(uint32_t addr) {
111   return *reinterpret_cast<uint32_t*>(&Mem.at(addr-Mem_offset));
112 }
113 inline int32_t read_mem_i32(uint32_t addr) {
114   return *reinterpret_cast<int32_t*>(&Mem.at(addr-Mem_offset));
115 }
116 
117 inline uint8_t* mem_addr_u8(uint32_t addr) {
118   return &Mem.at(addr-Mem_offset);
119 }
120 inline int8_t* mem_addr_i8(uint32_t addr) {
121   return reinterpret_cast<int8_t*>(&Mem.at(addr-Mem_offset));
122 }
123 inline uint32_t* mem_addr_u32(uint32_t addr) {
124   return reinterpret_cast<uint32_t*>(&Mem.at(addr-Mem_offset));
125 }
126 inline int32_t* mem_addr_i32(uint32_t addr) {
127   return reinterpret_cast<int32_t*>(&Mem.at(addr-Mem_offset));
128 }
129 
130 inline void write_mem_u8(uint32_t addr, uint8_t val) {
131   Mem.at(addr-Mem_offset) = val;
132 }
133 inline void write_mem_i8(uint32_t addr, int8_t val) {
134   Mem.at(addr-Mem_offset) = static_cast<uint8_t>(val);
135 }
136 inline void write_mem_u32(uint32_t addr, uint32_t val) {
137   *reinterpret_cast<uint32_t*>(&Mem.at(addr-Mem_offset)) = val;
138 }
139 inline void write_mem_i32(uint32_t addr, int32_t val) {
140   *reinterpret_cast<int32_t*>(&Mem.at(addr-Mem_offset)) = val;
141 }
142 
143 //:: core interpreter loop
144 
145 :(code)
146 // skeleton of how x86 instructions are decoded
147 void run_one_instruction() {
148   uint8_t op=0, op2=0, op3=0;
149   trace(90, "run") << "inst: 0x" << HEXWORD << EIP << end();
150 //?   dump_registers();
151 //?   cerr << "inst: 0x" << EIP << " => ";
152   op = next();
153 //?   cerr << HEXBYTE << NUM(op) << '\n';
154   switch (op) {
155   case 0xf4:  // hlt
156     EIP = End_of_program;
157     break;
158   // End Single-Byte Opcodes
159   case 0x0f:
160     switch(op2 = next()) {
161     // End Two-Byte Opcodes Starting With 0f
162     default:
163       cerr << "unrecognized second opcode after 0f: " << HEXBYTE << NUM(op2) << '\n';
164       DUMP("");
165       exit(1);
166     }
167     break;
168   case 0xf3:
169     switch(op2 = next()) {
170     // End Two-Byte Opcodes Starting With f3
171     case 0x0f:
172       switch(op3 = next()) {
173       // End Three-Byte Opcodes Starting With f3 0f
174       default:
175         cerr << "unrecognized third opcode after f3 0f: " << HEXBYTE << NUM(op3) << '\n';
176         DUMP("");
177         exit(1);
178       }
179       break;
180     default:
181       cerr << "unrecognized second opcode after f3: " << HEXBYTE << NUM(op2) << '\n';
182       DUMP("");
183       exit(1);
184     }
185     break;
186   default:
187     cerr << "unrecognized opcode: " << HEXBYTE << NUM(op) << '\n';
188     DUMP("");
189     exit(1);
190   }
191 }
192 
193 inline uint8_t next() {
194   return read_mem_u8(EIP++);
195 }
196 
197 void dump_registers() {
198   for (int i = 0;  i < NUM_INT_REGISTERS;  ++i) {
199     if (i > 0) cerr << "; ";
200     cerr << "  " << i << ": " << std::hex << std::setw(8) << std::setfill('_') << Reg[i].u;
201   }
202   cerr << " -- SF: " << SF << "; ZF: " << ZF << "; OF: " << OF << '\n';
203 }
204 
205 //: start tracking supported opcodes
206 :(before "End Globals")
207 map</*op*/string, string> name;
208 map</*op*/string, string> name_0f;
209 map</*op*/string, string> name_f3;
210 map</*op*/string, string> name_f3_0f;
211 :(before "End One-time Setup")
212 init_op_names();
213 :(code)
214 void init_op_names() {
215   put(name, "f4", "halt");
216   // End Initialize Op Names(name)
217 }
218 
219 :(before "End Help Special-cases(key)")
220 if (key == "opcodes") {
221   cerr << "Opcodes currently supported by SubX:\n";
222   for (map<string, string>::iterator p = name.begin();  p != name.end();  ++p)
223     cerr << "  " << p->first << ": " << p->second << '\n';
224   for (map<string, string>::iterator p = name_0f.begin();  p != name_0f.end();  ++p)
225     cerr << "  0f " << p->first << ": " << p->second << '\n';
226   for (map<string, string>::iterator p = name_f3.begin();  p != name_f3.end();  ++p)
227     cerr << "  f3 " << p->first << ": " << p->second << '\n';
228   for (map<string, string>::iterator p = name_f3_0f.begin();  p != name_f3_0f.end();  ++p)
229     cerr << "  f3 0f " << p->first << ": " << p->second << '\n';
230   cerr << "Run `subx help instructions` for details on words like 'r32' and 'disp8'.\n";
231   return 0;
232 }
233 :(before "End Help Contents")
234 cerr << "  opcodes\n";
235 
236 :(before "End Includes")
237 #include <iomanip>
238 #define HEXBYTE  std::hex << std::setw(2) << std::setfill('0')
239 #define HEXWORD  std::hex << std::setw(8) << std::setfill('0')
240 // ugly that iostream doesn't print uint8_t as an integer
241 #define NUM(X) static_cast<int>(X)
242 #include <stdint.h>