1 //: Core data structures for simulating the SubX VM (subset of an x86 processor)
  2 //:
  3 //: At the lowest level ("level 1") of abstraction, SubX executes x86
  4 //: instructions provided in the form of an array of bytes, loaded into memory
  5 //: starting at a specific address.
  6 
  7 //:: registers
  8 //: assume segment registers are hard-coded to 0
  9 //: no floating-point, MMX, etc. yet
 10 
 11 :(before "End Types")
 12 enum {
 13   EAX,
 14   ECX,
 15   EDX,
 16   EBX,
 17   ESP,
 18   EBP,
 19   ESI,
 20   EDI,
 21   NUM_INT_REGISTERS,
 22 };
 23 union reg {
 24   int32_t i;
 25   uint32_t u;
 26 };
 27 :(before "End Globals")
 28 reg Reg[NUM_INT_REGISTERS] = { {0} };
 29 uint32_t EIP = 1;  // preserve null pointer
 30 :(before "End Reset")
 31 bzero(Reg, sizeof(Reg));
 32 EIP = 1;  // preserve null pointer
 33 
 34 :(before "End Help Contents")
 35 cerr << "  registers\n";
 36 :(before "End Help Texts")
 37 put(Help, "registers",
 38   "SubX currently supports eight 32-bit integer registers: R0 to R7.\n"
 39   "R4 (ESP) contains the top of the stack.\n"
 40   "\n"
 41   "There's also a register for the address of the currently executing\n"
 42   "instruction. It is modified by jumps.\n"
 43   "\n"
 44   "Various instructions modify one or more of three 1-bit 'flag' registers,\n"
 45   "as a side-effect:\n"
 46   "- the sign flag (SF): usually set if an arithmetic result is negative, or\n"
 47   "  reset if not.\n"
 48   "- the zero flag (ZF): usually set if a result is zero, or reset if not.\n"
 49   "- the overflow flag (OF): usually set if an arithmetic result overflows.\n"
 50   "The flag bits are read by conditional jumps.\n"
 51   "\n"
 52   "We don't support non-integer (floating-point) registers yet.\n"
 53 );
 54 
 55 :(before "End Globals")
 56 // the subset of x86 flag registers we care about
 57 bool SF = false;  // sign flag
 58 bool ZF = false;  // zero flag
 59 bool OF = false;  // overflow flag
 60 :(before "End Reset")
 61 SF = ZF = OF = false;
 62 
 63 //: how the flag registers are updated after each instruction
 64 
 65 :(before "End Includes")
 66 // Combine 'arg1' and 'arg2' with arithmetic operation 'op' and store the
 67 // result in 'arg1', then update flags.
 68 // beware: no side-effects in args
 69 #define BINARY_ARITHMETIC_OP(op, arg1, arg2) { \
 70   /* arg1 and arg2 must be signed */ \
 71   int64_t tmp = arg1 op arg2; \
 72   arg1 = arg1 op arg2; \
 73   trace(90, "run") << "storing 0x" << HEXWORD << arg1 << end(); \
 74   SF = (arg1 < 0); \
 75   ZF = (arg1 == 0); \
 76   OF = (arg1 != tmp); \
 77 }
 78 
 79 // Combine 'arg1' and 'arg2' with bitwise operation 'op' and store the result
 80 // in 'arg1', then update flags.
 81 #define BINARY_BITWISE_OP(op, arg1, arg2) { \
 82   /* arg1 and arg2 must be unsigned */ \
 83   arg1 = arg1 op arg2; \
 84   trace(90, "run") << "storing 0x" << HEXWORD << arg1 << end(); \
 85   SF = (arg1 >> 31); \
 86   ZF = (arg1 == 0); \
 87   OF = false; \
 88 }
 89 
 90 //:: simulated RAM
 91 
 92 :(before "End Globals")
 93 vector<uint8_t> Mem;
 94 uint32_t Mem_offset = 0;
 95 uint32_t End_of_program = 0;
 96 :(before "End Reset")
 97 Mem.clear();
 98 Mem.resize(1024);
 99 Mem_offset = 0;
100 End_of_program = 0;
101 :(code)
102 // These helpers depend on Mem being laid out contiguously (so you can't use a
103 // map, etc.) and on the host also being little-endian.
104 inline uint8_t read_mem_u8(uint32_t addr) {
105   return Mem.at(addr-Mem_offset);
106 }
107 inline int8_t read_mem_i8(uint32_t addr) {
108   return static_cast<int8_t>(Mem.at(addr-Mem_offset));
109 }
110 inline uint32_t read_mem_u32(uint32_t addr) {
111   return *reinterpret_cast<uint32_t*>(&Mem.at(addr-Mem_offset));
112 }
113 inline int32_t read_mem_i32(uint32_t addr) {
114   return *reinterpret_cast<int32_t*>(&Mem.at(addr-Mem_offset));
115 }
116 
117 inline uint8_t* mem_addr_u8(uint32_t addr) {
118   return &Mem.at(addr-Mem_offset);
119 }
120 inline int8_t* mem_addr_i8(uint32_t addr) {
121   return reinterpret_cast<int8_t*>(&Mem.at(addr-Mem_offset));
122 }
123 inline char* mem_addr_string(uint32_t addr) {
124   return reinterpret_cast<char*>(&Mem.at(addr-Mem_offset));
125 }
126 inline uint32_t* mem_addr_u32(uint32_t addr) {
127   return reinterpret_cast<uint32_t*>(&Mem.at(addr-Mem_offset));
128 }
129 inline int32_t* mem_addr_i32(uint32_t addr) {
130   return reinterpret_cast<int32_t*>(&Mem.at(addr-Mem_offset));
131 }
132 
133 inline void write_mem_u8(uint32_t addr, uint8_t val) {
134   Mem.at(addr-Mem_offset) = val;
135 }
136 inline void write_mem_i8(uint32_t addr, int8_t val) {
137   Mem.at(addr-Mem_offset) = static_cast<uint8_t>(val);
138 }
139 inline void write_mem_u32(uint32_t addr, uint32_t val) {
140   *reinterpret_cast<uint32_t*>(&Mem.at(addr-Mem_offset)) = val;
141 }
142 inline void write_mem_i32(uint32_t addr, int32_t val) {
143   *reinterpret_cast<int32_t*>(&Mem.at(addr-Mem_offset)) = val;
144 }
145 
146 //:: core interpreter loop
147 
148 :(code)
149 // skeleton of how x86 instructions are decoded
150 void run_one_instruction() {
151   uint8_t op=0, op2=0, op3=0;
152   trace(90, "run") << "inst: 0x" << HEXWORD << EIP << end();
153 //?   dump_registers();
154 //?   cerr << "inst: 0x" << EIP << " => ";
155   op = next();
156 //?   cerr << HEXBYTE << NUM(op) << '\n';
157   switch (op) {
158   case 0xf4:  // hlt
159     EIP = End_of_program;
160     break;
161   // End Single-Byte Opcodes
162   case 0x0f:
163     switch(op2 = next()) {
164     // End Two-Byte Opcodes Starting With 0f
165     default:
166       cerr << "unrecognized second opcode after 0f: " << HEXBYTE << NUM(op2) << '\n';
167       DUMP("");
168       exit(1);
169     }
170     break;
171   case 0xf2:
172     switch(op2 = next()) {
173     // End Two-Byte Opcodes Starting With f2
174     case 0x0f:
175       switch(op3 = next()) {
176       // End Three-Byte Opcodes Starting With f2 0f
177       default:
178         cerr << "unrecognized third opcode after f2 0f: " << HEXBYTE << NUM(op3) << '\n';
179         DUMP("");
180         exit(1);
181       }
182       break;
183     default:
184       cerr << "unrecognized second opcode after f2: " << HEXBYTE << NUM(op2) << '\n';
185       DUMP("");
186       exit(1);
187     }
188     break;
189   case 0xf3:
190     switch(op2 = next()) {
191     // End Two-Byte Opcodes Starting With f3
192     case 0x0f:
193       switch(op3 = next()) {
194       // End Three-Byte Opcodes Starting With f3 0f
195       default:
196         cerr << "unrecognized third opcode after f3 0f: " << HEXBYTE << NUM(op3) << '\n';
197         DUMP("");
198         exit(1);
199       }
200       break;
201     default:
202       cerr << "unrecognized second opcode after f3: " << HEXBYTE << NUM(op2) << '\n';
203       DUMP("");
204       exit(1);
205     }
206     break;
207   default:
208     cerr << "unrecognized opcode: " << HEXBYTE << NUM(op) << '\n';
209     DUMP("");
210     exit(1);
211   }
212 }
213 
214 inline uint8_t next() {
215   return read_mem_u8(EIP++);
216 }
217 
218 void dump_registers() {
219   for (int i = 0;  i < NUM_INT_REGISTERS;  ++i) {
220     if (i > 0) cerr << "; ";
221     cerr << "  " << i << ": " << std::hex << std::setw(8) << std::setfill('_') << Reg[i].u;
222   }
223   cerr << " -- SF: " << SF << "; ZF: " << ZF << "; OF: " << OF << '\n';
224 }
225 
226 //: start tracking supported opcodes
227 :(before "End Globals")
228 map</*op*/string, string> name;
229 map</*op*/string, string> name_0f;
230 map</*op*/string, string> name_f3;
231 map</*op*/string, string> name_f3_0f;
232 :(before "End One-time Setup")
233 init_op_names();
234 :(code)
235 void init_op_names() {
236   put(name, "f4", "halt");
237   // End Initialize Op Names(name)
238 }
239 
240 :(before "End Help Special-cases(key)")
241 if (key == "opcodes") {
242   cerr << "Opcodes currently supported by SubX:\n";
243   for (map<string, string>::iterator p = name.begin();  p != name.end();  ++p)
244     cerr << "  " << p->first << ": " << p->second << '\n';
245   for (map<string, string>::iterator p = name_0f.begin();  p != name_0f.end();  ++p)
246     cerr << "  0f " << p->first << ": " << p->second << '\n';
247   for (map<string, string>::iterator p = name_f3.begin();  p != name_f3.end();  ++p)
248     cerr << "  f3 " << p->first << ": " << p->second << '\n';
249   for (map<string, string>::iterator p = name_f3_0f.begin();  p != name_f3_0f.end();  ++p)
250     cerr << "  f3 0f " << p->first << ": " << p->second << '\n';
251   cerr << "Run `subx help instructions` for details on words like 'r32' and 'disp8'.\n";
252   return 0;
253 }
254 :(before "End Help Contents")
255 cerr << "  opcodes\n";
256 
257 :(before "End Includes")
258 #include <iomanip>
259 #define HEXBYTE  std::hex << std::setw(2) << std::setfill('0')
260 #define HEXWORD  std::hex << std::setw(8) << std::setfill('0')
261 // ugly that iostream doesn't print uint8_t as an integer
262 #define NUM(X) static_cast<int>(X)
263 #include <stdint.h>