1 //: Core data structures for simulating the SubX VM (subset of an x86 processor)
  2 //:
  3 //: At the lowest level ("level 1") of abstraction, SubX executes x86
  4 //: instructions provided in the form of an array of bytes, loaded into memory
  5 //: starting at a specific address.
  6 
  7 //:: registers
  8 //: assume segment registers are hard-coded to 0
  9 //: no floating-point, MMX, etc. yet
 10 
 11 :(before "End Types")
 12 enum {
 13   EAX,
 14   ECX,
 15   EDX,
 16   EBX,
 17   ESP,
 18   EBP,
 19   ESI,
 20   EDI,
 21   NUM_INT_REGISTERS,
 22 };
 23 union reg {
 24   int32_t i;
 25   uint32_t u;
 26 };
 27 :(before "End Globals")
 28 reg Reg[NUM_INT_REGISTERS] = { {0} };
 29 uint32_t EIP = 1;  // preserve null pointer
 30 :(before "End Reset")
 31 bzero(Reg, sizeof(Reg));
 32 EIP = 1;  // preserve null pointer
 33 
 34 :(before "End Help Contents")
 35 cerr << "  registers\n";
 36 :(before "End Help Texts")
 37 put(Help, "registers",
 38   "SubX currently supports eight 32-bit integer registers: R0 to R7.\n"
 39   "R4 (ESP) contains the top of the stack.\n"
 40   "\n"
 41   "There's also a register for the address of the currently executing\n"
 42   "instruction. It is modified by jumps.\n"
 43   "\n"
 44   "Various instructions modify one or more of three 1-bit 'flag' registers,\n"
 45   "as a side-effect:\n"
 46   "- the sign flag (SF): usually set if an arithmetic result is negative, or\n"
 47   "  reset if not.\n"
 48   "- the zero flag (ZF): usually set if a result is zero, or reset if not.\n"
 49   "- the overflow flag (OF): usually set if an arithmetic result overflows.\n"
 50   "The flag bits are read by conditional jumps.\n"
 51   "\n"
 52   "We don't support non-integer (floating-point) registers yet.\n"
 53 );
 54 
 55 :(before "End Globals")
 56 // the subset of x86 flag registers we care about
 57 bool SF = false;  // sign flag
 58 bool ZF = false;  // zero flag
 59 bool OF = false;  // overflow flag
 60 :(before "End Reset")
 61 SF = ZF = OF = false;
 62 
 63 //: how the flag registers are updated after each instruction
 64 
 65 :(before "End Includes")
 66 // Combine 'arg1' and 'arg2' with arithmetic operation 'op' and store the
 67 // result in 'arg1', then update flags.
 68 // beware: no side-effects in args
 69 #define BINARY_ARITHMETIC_OP(op, arg1, arg2) { \
 70   /* arg1 and arg2 must be signed */ \
 71   int64_t tmp = arg1 op arg2; \
 72   arg1 = arg1 op arg2; \
 73   trace(90, "run") << "storing 0x" << HEXWORD << arg1 << end(); \
 74   SF = (arg1 < 0); \
 75   ZF = (arg1 == 0); \
 76   OF = (arg1 != tmp); \
 77 }
 78 
 79 // Combine 'arg1' and 'arg2' with bitwise operation 'op' and store the result
 80 // in 'arg1', then update flags.
 81 #define BINARY_BITWISE_OP(op, arg1, arg2) { \
 82   /* arg1 and arg2 must be unsigned */ \
 83   arg1 = arg1 op arg2; \
 84   trace(90, "run") << "storing 0x" << HEXWORD << arg1 << end(); \
 85   SF = (arg1 >> 31); \
 86   ZF = (arg1 == 0); \
 87   OF = false; \
 88 }
 89 
 90 //:: simulated RAM
 91 
 92 :(before "End Globals")
 93 vector<uint8_t> Mem;
 94 uint32_t Mem_offset = 0;
 95 uint32_t End_of_program = 0;
 96 :(before "End Reset")
 97 Mem.clear();
 98 Mem.resize(1024);
 99 Mem_offset = 0;
100 End_of_program = 0;
101 :(code)
102 // These helpers depend on Mem being laid out contiguously (so you can't use a
103 // map, etc.) and on the host also being little-endian.
104 inline uint8_t read_mem_u8(uint32_t addr) {
105   return Mem.at(addr-Mem_offset);
106 }
107 inline int8_t read_mem_i8(uint32_t addr) {
108   return static_cast<int8_t>(Mem.at(addr-Mem_offset));
109 }
110 inline uint32_t read_mem_u32(uint32_t addr) {
111   return *reinterpret_cast<uint32_t*>(&Mem.at(addr-Mem_offset));
112 }
113 inline int32_t read_mem_i32(uint32_t addr) {
114   return *reinterpret_cast<int32_t*>(&Mem.at(addr-Mem_offset));
115 }
116 
117 inline uint8_t* mem_addr_u8(uint32_t addr) {
118   return &Mem.at(addr-Mem_offset);
119 }
120 inline int8_t* mem_addr_i8(uint32_t addr) {
121   return reinterpret_cast<int8_t*>(&Mem.at(addr-Mem_offset));
122 }
123 inline uint32_t* mem_addr_u32(uint32_t addr) {
124   return reinterpret_cast<uint32_t*>(&Mem.at(addr-Mem_offset));
125 }
126 inline int32_t* mem_addr_i32(uint32_t addr) {
127   return reinterpret_cast<int32_t*>(&Mem.at(addr-Mem_offset));
128 }
129 
130 inline void write_mem_u8(uint32_t addr, uint8_t val) {
131   Mem.at(addr-Mem_offset) = val;
132 }
133 inline void write_mem_i8(uint32_t addr, int8_t val) {
134   Mem.at(addr-Mem_offset) = static_cast<uint8_t>(val);
135 }
136 inline void write_mem_u32(uint32_t addr, uint32_t val) {
137   *reinterpret_cast<uint32_t*>(&Mem.at(addr-Mem_offset)) = val;
138 }
139 inline void write_mem_i32(uint32_t addr, int32_t val) {
140   *reinterpret_cast<int32_t*>(&Mem.at(addr-Mem_offset)) = val;
141 }
142 
143 //:: core interpreter loop
144 
145 :(code)
146 // skeleton of how x86 instructions are decoded
147 void run_one_instruction() {
148   uint8_t op=0, op2=0, op3=0;
149   trace(90, "run") << "inst: 0x" << HEXWORD << EIP << end();
150 //?   cerr << "inst: 0x" << EIP << '\n';
151   switch (op = next()) {
152   case 0xf4:  // hlt
153     EIP = End_of_program;
154     break;
155   // End Single-Byte Opcodes
156   case 0x0f:
157     switch(op2 = next()) {
158     // End Two-Byte Opcodes Starting With 0f
159     default:
160       cerr << "unrecognized second opcode after 0f: " << HEXBYTE << NUM(op2) << '\n';
161       DUMP("");
162       exit(1);
163     }
164     break;
165   case 0xf3:
166     switch(op2 = next()) {
167     // End Two-Byte Opcodes Starting With f3
168     case 0x0f:
169       switch(op3 = next()) {
170       // End Three-Byte Opcodes Starting With f3 0f
171       default:
172         cerr << "unrecognized third opcode after f3 0f: " << HEXBYTE << NUM(op3) << '\n';
173         DUMP("");
174         exit(1);
175       }
176       break;
177     default:
178       cerr << "unrecognized second opcode after f3: " << HEXBYTE << NUM(op2) << '\n';
179       DUMP("");
180       exit(1);
181     }
182     break;
183   default:
184     cerr << "unrecognized opcode: " << HEXBYTE << NUM(op) << '\n';
185     DUMP("");
186     exit(1);
187   }
188 }
189 
190 inline uint8_t next() {
191   return read_mem_u8(EIP++);
192 }
193 
194 //: start tracking supported opcodes
195 :(before "End Globals")
196 map</*op*/string, string> name;
197 map</*op*/string, string> name_0f;
198 map</*op*/string, string> name_f3;
199 map</*op*/string, string> name_f3_0f;
200 :(before "End One-time Setup")
201 init_op_names();
202 :(code)
203 void init_op_names() {
204   put(name, "f4", "halt");
205   // End Initialize Op Names(name)
206 }
207 
208 :(before "End Help Special-cases(key)")
209 if (key == "opcodes") {
210   cerr << "Opcodes currently supported by SubX:\n";
211   for (map<string, string>::iterator p = name.begin();  p != name.end();  ++p)
212     cerr << "  " << p->first << ": " << p->second << '\n';
213   for (map<string, string>::iterator p = name_0f.begin();  p != name_0f.end();  ++p)
214     cerr << "  0f " << p->first << ": " << p->second << '\n';
215   for (map<string, string>::iterator p = name_f3.begin();  p != name_f3.end();  ++p)
216     cerr << "  f3 " << p->first << ": " << p->second << '\n';
217   for (map<string, string>::iterator p = name_f3_0f.begin();  p != name_f3_0f.end();  ++p)
218     cerr << "  f3 0f " << p->first << ": " << p->second << '\n';
219   cerr << "Run `subx help instructions` for details on words like 'r32' and 'disp8'.\n";
220   return 0;
221 }
222 :(before "End Help Contents")
223 cerr << "  opcodes\n";
224 
225 :(before "End Includes")
226 #include <iomanip>
227 #define HEXBYTE  std::hex << std::setw(2) << std::setfill('0')
228 #define HEXWORD  std::hex << std::setw(8) << std::setfill('0')
229 // ugly that iostream doesn't print uint8_t as an integer
230 #define NUM(X) static_cast<int>(X)
231 #include <stdint.h>