https://github.com/akkartik/mu/blob/master/subx/010---vm.cc
  1 //: Core data structures for simulating the SubX VM (subset of an x86 processor)
  2 //:
  3 //: At the lowest level ("level 1") of abstraction, SubX executes x86
  4 //: instructions provided in the form of an array of bytes, loaded into memory
  5 //: starting at a specific address.
  6 
  7 //:: registers
  8 //: assume segment registers are hard-coded to 0
  9 //: no floating-point, MMX, etc. yet
 10 
 11 :(before "End Types")
 12 enum {
 13   EAX,
 14   ECX,
 15   EDX,
 16   EBX,
 17   ESP,
 18   EBP,
 19   ESI,
 20   EDI,
 21   NUM_INT_REGISTERS,
 22 };
 23 union reg {
 24   int32_t i;
 25   uint32_t u;
 26 };
 27 :(before "End Globals")
 28 reg Reg[NUM_INT_REGISTERS] = { {0} };
 29 uint32_t EIP = 1;  // preserve null pointer
 30 :(before "End Reset")
 31 bzero(Reg, sizeof(Reg));
 32 EIP = 1;  // preserve null pointer
 33 
 34 :(before "End Help Contents")
 35 cerr << "  registers\n";
 36 :(before "End Help Texts")
 37 put_new(Help, "registers",
 38   "SubX currently supports eight 32-bit integer registers. From 0 to 7, they are:\n"
 39   "  EAX ECX EDX EBX ESP EBP ESI EDI\n"
 40   "ESP contains the top of the stack.\n"
 41   "\n"
 42   "-- 8-bit registers\n"
 43   "Some instructions operate on eight *overlapping* 8-bit registers.\n"
 44   "From 0 to 7, they are:\n"
 45   "  AL CL DL BL AH CH DH BH\n"
 46   "The 8-bit registers overlap with the 32-bit ones. AL is the lowest signicant byte\n"
 47   "of EAX, AH is the second lowest significant byte, and so on.\n"
 48   "\n"
 49   "For example, if EBX contains 0x11223344, then BL contains 0x44, and BH contains 0x33.\n"
 50   "\n"
 51   "There is no way to access bytes within ESP, EBP, ESI or EDI.\n"
 52   "\n"
 53   "For complete details consult the IA-32 software developer's manual, volume 2,\n"
 54   "table 2-2, \"32-bit addressing forms with the ModR/M byte\".\n"
 55   "It is included in this repository as 'modrm.pdf'.\n"
 56   "The register encodings are described in the top row of the table, but you'll need\n"
 57   "to spend some time with it.\n"
 58   "\n"
 59   "-- flag registers\n"
 60   "Various instructions (particularly 'compare') modify one or more of three 1-bit 'flag'\n"
 61   "registers, as a side-effect:\n"
 62   "- the sign flag (SF): usually set if an arithmetic result is negative, or\n"
 63   "  reset if not.\n"
 64   "- the zero flag (ZF): usually set if a result is zero, or reset if not.\n"
 65   "- the overflow flag (OF): usually set if an arithmetic result overflows.\n"
 66   "The flag bits are read by conditional jumps.\n"
 67   "\n"
 68   "For complete details on how different instructions update the flags, consult the IA-32\n"
 69   "manual (volume 2). There's various versions of it online, such as https://c9x.me/x86,\n"
 70   "though of course you'll need to be careful to ignore instructions and flag registers\n"
 71   "that SubX doesn't support.\n"
 72   "\n"
 73   "It isn't simple, but if this is the processor you have running on your computer,\n"
 74   "might as well get good at it.\n"
 75 );
 76 
 77 :(before "End Globals")
 78 // the subset of x86 flag registers we care about
 79 bool SF = false;  // sign flag
 80 bool ZF = false;  // zero flag
 81 bool OF = false;  // overflow flag
 82 :(before "End Reset")
 83 SF = ZF = OF = false;
 84 
 85 //: how the flag registers are updated after each instruction
 86 
 87 :(before "End Includes")
 88 // Combine 'arg1' and 'arg2' with arithmetic operation 'op' and store the
 89 // result in 'arg1', then update flags.
 90 // beware: no side-effects in args
 91 #define BINARY_ARITHMETIC_OP(op, arg1, arg2) { \
 92   /* arg1 and arg2 must be signed */ \
 93   int64_t tmp = arg1 op arg2; \
 94   arg1 = arg1 op arg2; \
 95   trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << arg1 << end(); \
 96   SF = (arg1 < 0); \
 97   ZF = (arg1 == 0); \
 98   OF = (arg1 != tmp); \
 99 }
100 
101 // Combine 'arg1' and 'arg2' with bitwise operation 'op' and store the result
102 // in 'arg1', then update flags.
103 #define BINARY_BITWISE_OP(op, arg1, arg2) { \
104   /* arg1 and arg2 must be unsigned */ \
105   arg1 = arg1 op arg2; \
106   trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << arg1 << end(); \
107   SF = (arg1 >> 31); \
108   ZF = (arg1 == 0); \
109   OF = false; \
110 }
111 
112 //:: simulated RAM
113 
114 :(before "End Types")
115 const uint32_t SEGMENT_ALIGNMENT = 0x1000000;  // 16MB
116 inline uint32_t align_upwards(uint32_t x, uint32_t align) {
117   return (x+align-1) & -(align);
118 }
119 
120 // Like in real-world Linux, we'll allocate RAM for our programs in disjoint
121 // slabs called VMAs or Virtual Memory Areas.
122 struct vma {
123   uint32_t start;  // inclusive
124   uint32_t end;  // exclusive
125   vector<uint8_t> _data;
126   vma(uint32_t s, uint32_t e) :start(s), end(e) {}
127   vma(uint32_t s) :start(s), end(align_upwards(s+1, SEGMENT_ALIGNMENT)) {}
128   bool match(uint32_t a) {
129     return a >= start && a < end;
130   }
131   bool match32(uint32_t a) {
132     return a >= start && a+4 <= end;
133   }
134   uint8_t& data(uint32_t a) {
135     assert(match(a));
136     uint32_t result_index = a-start;
137     if (_data.size() <= result_index) {
138       const int align = 0x1000;
139       uint32_t result_size = result_index + 1;  // size needed for result_index to be valid
140       uint32_t new_size = align_upwards(result_size, align);
141       // grow at least 2x to maintain some amortized complexity guarantees
142       if (new_size < _data.size() * 2)
143         new_size = _data.size() * 2;
144       // never grow past the stated limit
145       if (new_size > end-start)
146         new_size = end-start;
147       _data.resize(new_size);
148     }
149     return _data.at(result_index);
150   }
151   void grow_until(uint32_t new_end_address) {
152     if (new_end_address < end) return;
153     // Ugly: vma knows about the global Memory list of vmas
154     void sanity_check(uint32_t start, uint32_t end);
155     sanity_check(start, new_end_address);
156     end = new_end_address;
157   }
158   // End vma Methods
159 };
160 :(code)
161 void sanity_check(uint32_t start, uint32_t end) {
162   bool dup_found = false;
163   for (int i = 0;  i < SIZE(Mem);  ++i) {
164     const vma& curr = Mem.at(i);
165     if (curr.start == start) {
166       assert(!dup_found);
167       dup_found = true;
168     }
169     else if (curr.start > start) {
170       assert(curr.start > end);
171     }
172     else if (curr.start < start) {
173       assert(curr.end < start);
174     }
175   }
176 }
177 
178 :(before "End Globals")
179 // RAM is made of VMAs.
180 vector<vma> Mem;
181 :(code)
182 // The first 3 VMAs are special. When loading ELF binaries in later layers,
183 // we'll assume that the first VMA is for code, the second is for data
184 // (including the heap), and the third for the stack.
185 void grow_code_segment(uint32_t new_end_address) {
186   assert(!Mem.empty());
187   Mem.at(0).grow_until(new_end_address);
188 }
189 void grow_data_segment(uint32_t new_end_address) {
190   assert(SIZE(Mem) > 1);
191   Mem.at(1).grow_until(new_end_address);
192 }
193 :(before "End Globals")
194 uint32_t End_of_program = 0;  // when the program executes past this address in tests we'll stop the test
195 // The stack grows downward. Can't increase its size for now.
196 :(before "End Reset")
197 Mem.clear();
198 End_of_program = 0;
199 :(code)
200 // These helpers depend on Mem being laid out contiguously (so you can't use a
201 // map, etc.) and on the host also being little-endian.
202 inline uint8_t read_mem_u8(uint32_t addr) {
203   uint8_t* handle = mem_addr_u8(addr);  // error messages get printed here
204   return handle ? *handle : 0;
205 }
206 inline int8_t read_mem_i8(uint32_t addr) {
207   return static_cast<int8_t>(read_mem_u8(addr));
208 }
209 inline uint32_t read_mem_u32(uint32_t addr) {
210   uint32_t* handle = mem_addr_u32(addr);  // error messages get printed here
211   return handle ? *handle : 0;
212 }
213 inline int32_t read_mem_i32(uint32_t addr) {
214   return static_cast<int32_t>(read_mem_u32(addr));
215 }
216 
217 inline uint8_t* mem_addr_u8(uint32_t addr) {
218   uint8_t* result = NULL;
219   for (int i = 0;  i < SIZE(Mem);  ++i) {
220     if (Mem.at(i).match(addr)) {
221       if (result)
222         raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
223       result = &Mem.at(i).data(addr);
224     }
225   }
226   if (result == NULL)
227     raise << "Tried to access uninitialized memory at address 0x" << HEXWORD << addr << '\n' << end();
228   return result;
229 }
230 inline int8_t* mem_addr_i8(uint32_t addr) {
231   return reinterpret_cast<int8_t*>(mem_addr_u8(addr));
232 }
233 inline uint32_t* mem_addr_u32(uint32_t addr) {
234   uint32_t* result = NULL;
235   for (int i = 0;  i < SIZE(Mem);  ++i) {
236     if (Mem.at(i).match32(addr)) {
237       if (result)
238         raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
239       result = reinterpret_cast<uint32_t*>(&Mem.at(i).data(addr));
240     }
241   }
242   if (result == NULL) {
243     raise << "Tried to access uninitialized memory at address 0x" << HEXWORD << addr << '\n' << end();
244     raise << "The entire 4-byte word should be initialized and lie in a single segment.\n" << end();
245   }
246   return result;
247 }
248 inline int32_t* mem_addr_i32(uint32_t addr) {
249   return reinterpret_cast<int32_t*>(mem_addr_u32(addr));
250 }
251 // helper for some syscalls. But read-only.
252 inline const char* mem_addr_kernel_string(uint32_t addr) {
253   return reinterpret_cast<const char*>(mem_addr_u8(addr));
254 }
255 inline string mem_addr_string(uint32_t addr, uint32_t size) {
256   ostringstream out;
257   for (size_t i = 0;  i < size;  ++i)
258     out << read_mem_u8(addr+i);
259   return out.str();
260 }
261 
262 
263 inline void write_mem_u8(uint32_t addr, uint8_t val) {
264   uint8_t* handle = mem_addr_u8(addr);
265   if (handle != NULL) *handle = val;
266 }
267 inline void write_mem_i8(uint32_t addr, int8_t val) {
268   int8_t* handle = mem_addr_i8(addr);
269   if (handle != NULL) *handle = val;
270 }
271 inline void write_mem_u32(uint32_t addr, uint32_t val) {
272   uint32_t* handle = mem_addr_u32(addr);
273   if (handle != NULL) *handle = val;
274 }
275 inline void write_mem_i32(uint32_t addr, int32_t val) {
276   int32_t* handle = mem_addr_i32(addr);
277   if (handle != NULL) *handle = val;
278 }
279 
280 inline bool already_allocated(uint32_t addr) {
281   bool result = false;
282   for (int i = 0;  i < SIZE(Mem);  ++i) {
283     if (Mem.at(i).match(addr)) {
284       if (result)
285         raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
286       result = true;
287     }
288   }
289   return result;
290 }
291 
292 //:: core interpreter loop
293 
294 :(code)
295 // skeleton of how x86 instructions are decoded
296 void run_one_instruction() {
297   uint8_t op=0, op2=0, op3=0;
298   // Run One Instruction
299   if (Trace_file) {
300     dump_registers();
301     // End Dump Info for Instruction
302   }
303   uint32_t inst_start_address = EIP;
304   op = next();
305   trace(Callstack_depth, "run") << "0x" << HEXWORD << inst_start_address << " opcode: " << HEXBYTE << NUM(op) << (op == 0xe8 ? "/call" : "") << end();
306   switch (op) {
307   case 0xf4:  // hlt
308     EIP = End_of_program;
309     break;
310   // End Single-Byte Opcodes
311   case 0x0f:
312     switch(op2 = next()) {
313     // End Two-Byte Opcodes Starting With 0f
314     default:
315       cerr << "unrecognized second opcode after 0f: " << HEXBYTE << NUM(op2) << '\n';
316       DUMP("");
317       exit(1);
318     }
319     break;
320   case 0xf2:
321     switch(op2 = next()) {
322     // End Two-Byte Opcodes Starting With f2
323     case 0x0f:
324       switch(op3 = next()) {
325       // End Three-Byte Opcodes Starting With f2 0f
326       default:
327         cerr << "unrecognized third opcode after f2 0f: " << HEXBYTE << NUM(op3) << '\n';
328         DUMP("");
329         exit(1);
330       }
331       break;
332     default:
333       cerr << "unrecognized second opcode after f2: " << HEXBYTE << NUM(op2) << '\n';
334       DUMP("");
335       exit(1);
336     }
337     break;
338   case 0xf3:
339     switch(op2 = next()) {
340     // End Two-Byte Opcodes Starting With f3
341     case 0x0f:
342       switch(op3 = next()) {
343       // End Three-Byte Opcodes Starting With f3 0f
344       default:
345         cerr << "unrecognized third opcode after f3 0f: " << HEXBYTE << NUM(op3) << '\n';
346         DUMP("");
347         exit(1);
348       }
349       break;
350     default:
351       cerr << "unrecognized second opcode after f3: " << HEXBYTE << NUM(op2) << '\n';
352       DUMP("");
353       exit(1);
354     }
355     break;
356   default:
357     cerr << "unrecognized opcode: " << HEXBYTE << NUM(op) << '\n';
358     DUMP("");
359     exit(1);
360   }
361 }
362 
363 inline uint8_t next() {
364   return read_mem_u8(EIP++);
365 }
366 
367 void dump_registers() {
368   ostringstream out;
369   out << "registers: ";
370   for (int i = 0;  i < NUM_INT_REGISTERS;  ++i) {
371     if (i > 0) out << "; ";
372     out << "  " << i << ": " << std::hex << std::setw(8) << std::setfill('_') << Reg[i].u;
373   }
374   out << " -- SF: " << SF << "; ZF: " << ZF << "; OF: " << OF;
375   trace(Callstack_depth+1, "run") << out.str() << end();
376 }
377 
378 //: start tracking supported opcodes
379 :(before "End Globals")
380 map</*op*/string, string> Name;
381 map</*op*/string, string> Name_0f;
382 map</*op*/string, string> Name_f3;
383 map</*op*/string, string> Name_f3_0f;
384 :(before "End One-time Setup")
385 init_op_names();
386 :(code)
387 void init_op_names() {
388   put(Name, "f4", "halt (hlt)");
389   // End Initialize Op Names
390 }
391 
392 :(before "End Help Special-cases(key)")
393 if (key == "opcodes") {
394   cerr << "Opcodes currently supported by SubX:\n";
395   for (map<string, string>::iterator p = Name.begin();  p != Name.end();  ++p)
396     cerr << "  " << p->first << ": " << p->second << '\n';
397   for (map<string, string>::iterator p = Name_0f.begin();  p != Name_0f.end();  ++p)
398     cerr << "  0f " << p->first << ": " << p->second << '\n';
399   for (map<string, string>::iterator p = Name_f3.begin();  p != Name_f3.end();  ++p)
400     cerr << "  f3 " << p->first << ": " << p->second << '\n';
401   for (map<string, string>::iterator p = Name_f3_0f.begin();  p != Name_f3_0f.end();  ++p)
402     cerr << "  f3 0f " << p->first << ": " << p->second << '\n';
403   cerr << "Run `subx help instructions` for details on words like 'r32' and 'disp8'.\n"
404           "For complete details on these instructions, consult the IA-32 manual (volume 2).\n"
405           "There's various versions of it online, such as https://c9x.me/x86.\n"
406           "The mnemonics in brackets will help you locate each instruction.\n";
407   return 0;
408 }
409 :(before "End Help Contents")
410 cerr << "  opcodes\n";
411 
412 //: Helpers for managing trace depths
413 //:
414 //: We're going to use trace depths primarily to segment code running at
415 //: different frames of the call stack. This will make it easy for the trace
416 //: browser to collapse over entire calls.
417 //:
418 //: Errors will be at depth 0.
419 //: Warnings will be at depth 1.
420 //: SubX instructions will occupy depth 2 and up to Max_depth, organized by
421 //: stack frames. Each instruction's internal details will be one level deeper
422 //: than its 'main' depth. So 'call' instruction details will be at the same
423 //: depth as the instructions of the function it calls.
424 :(before "End Globals")
425 extern const int Initial_callstack_depth = 2;
426 int Callstack_depth = Initial_callstack_depth;
427 :(before "End Reset")
428 Callstack_depth = Initial_callstack_depth;
429 
430 :(before "End Includes")
431 #include <iomanip>
432 #define HEXBYTE  std::hex << std::setw(2) << std::setfill('0')
433 #define HEXWORD  std::hex << std::setw(8) << std::setfill('0')
434 // ugly that iostream doesn't print uint8_t as an integer
435 #define NUM(X) static_cast<int>(X)
436 #include <stdint.h>