https://github.com/akkartik/mu/blob/master/010---vm.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14 :(before "End Types")
15 enum {
16 EAX,
17 ECX,
18 EDX,
19 EBX,
20 ESP,
21 EBP,
22 ESI,
23 EDI,
24 NUM_INT_REGISTERS,
25 };
26 union reg {
27 int32_t i;
28 uint32_t u;
29 };
30 :(before "End Globals")
31 reg Reg[NUM_INT_REGISTERS] = { {0} };
32 uint32_t EIP = 1;
33 :(before "End Reset")
34 bzero(Reg, sizeof(Reg));
35 EIP = 1;
36
37 :(before "End Help Contents")
38 cerr << " registers\n";
39 :(before "End Help Texts")
40 put_new(Help, "registers",
41 "SubX currently supports eight 32-bit integer registers. From 0 to 7, they are:\n"
42 " EAX ECX EDX EBX ESP EBP ESI EDI\n"
43 "ESP contains the top of the stack.\n"
44 "\n"
45 "-- 8-bit registers\n"
46 "Some instructions operate on eight *overlapping* 8-bit registers.\n"
47 "From 0 to 7, they are:\n"
48 " AL CL DL BL AH CH DH BH\n"
49 "The 8-bit registers overlap with the 32-bit ones. AL is the lowest signicant byte\n"
50 "of EAX, AH is the second lowest significant byte, and so on.\n"
51 "\n"
52 "For example, if EBX contains 0x11223344, then BL contains 0x44, and BH contains 0x33.\n"
53 "\n"
54 "There is no way to access bytes within ESP, EBP, ESI or EDI.\n"
55 "\n"
56 "For complete details consult the IA-32 software developer's manual, volume 2,\n"
57 "table 2-2, \"32-bit addressing forms with the ModR/M byte\".\n"
58 "It is included in this repository as 'modrm.pdf'.\n"
59 "The register encodings are described in the top row of the table, but you'll need\n"
60 "to spend some time with it.\n"
61 "\n"
62 "-- flag registers\n"
63 "Various instructions (particularly 'compare') modify one or more of four 1-bit\n"
64 "'flag' registers, as a side-effect:\n"
65 "- the sign flag (SF): usually set if an arithmetic result is negative, or\n"
66 " reset if not.\n"
67 "- the zero flag (ZF): usually set if a result is zero, or reset if not.\n"
68 "- the carry flag (CF): usually set if an arithmetic result overflows by just one bit.\n"
69 " Useful for operating on unsigned numbers.\n"
70 "- the overflow flag (OF): usually set if an arithmetic result overflows by more\n"
71 " than one bit. Useful for operating on signed numbers.\n"
72 "The flag bits are read by conditional jumps.\n"
73 "\n"
74 "For complete details on how different instructions update the flags, consult the IA-32\n"
75 "manual (volume 2). There's various versions of it online, such as https://c9x.me/x86,\n"
76 "though of course you'll need to be careful to ignore instructions and flag registers\n"
77 "that SubX doesn't support.\n"
78 "\n"
79 "It isn't simple, but if this is the processor you have running on your computer.\n"
80 "Might as well get good at it.\n"
81 );
82
83 :(before "End Globals")
84
85 bool SF = false;
86 bool ZF = false;
87 bool CF = false;
88 bool OF = false;
89 :(before "End Reset")
90 SF = ZF = CF = OF = false;
91
92
93
94 :(before "End Types")
95 const uint32_t SEGMENT_ALIGNMENT = 0x1000000;
96 inline uint32_t align_upwards(uint32_t x, uint32_t align) {
97 return (x+align-1) & -(align);
98 }
99
100
101
102 struct vma {
103 uint32_t start;
104 uint32_t end;
105 vector<uint8_t> _data;
106 vma(uint32_t s, uint32_t e) :start(s), end(e) {}
107 vma(uint32_t s) :start(s), end(align_upwards(s+1, SEGMENT_ALIGNMENT)) {}
108 bool match(uint32_t a) {
109 return a >= start && a < end;
110 }
111 bool match32(uint32_t a) {
112 return a >= start && a+4 <= end;
113 }
114 uint8_t& data(uint32_t a) {
115 assert(match(a));
116 uint32_t result_index = a-start;
117 if (_data.size() <= result_index) {
118 const int align = 0x1000;
119 uint32_t result_size = result_index + 1;
120 uint32_t new_size = align_upwards(result_size, align);
121
122 if (new_size < _data.size() * 2)
123 new_size = _data.size() * 2;
124
125 if (new_size > end-start)
126 new_size = end-start;
127 _data.resize(new_size);
128 }
129 return _data.at(result_index);
130 }
131 void grow_until(uint32_t new_end_address) {
132 if (new_end_address < end) return;
133
134 void sanity_check(uint32_t start, uint32_t end);
135 sanity_check(start, new_end_address);
136 end = new_end_address;
137 }
138
139 };
140 :(code)
141 void sanity_check(uint32_t start, uint32_t end) {
142 bool dup_found = false;
143 for (int i = 0; i < SIZE(Mem); ++i) {
144 const vma& curr = Mem.at(i);
145 if (curr.start == start) {
146 assert(!dup_found);
147 dup_found = true;
148 }
149 else if (curr.start > start) {
150 assert(curr.start > end);
151 }
152 else if (curr.start < start) {
153 assert(curr.end < start);
154 }
155 }
156 }
157
158 :(before "End Globals")
159
160 vector<vma> Mem;
161 :(code)
162 :(before "End Globals")
163 uint32_t End_of_program = 0;
164
165 :(before "End Reset")
166 Mem.clear();
167 End_of_program = 0;
168 :(code)
169
170
171 inline uint8_t read_mem_u8(uint32_t addr) {
172 uint8_t* handle = mem_addr_u8(addr);
173 return handle ? *handle : 0;
174 }
175 inline int8_t read_mem_i8(uint32_t addr) {
176 return static_cast<int8_t>(read_mem_u8(addr));
177 }
178 inline uint32_t read_mem_u32(uint32_t addr) {
179 uint32_t* handle = mem_addr_u32(addr);
180 return handle ? *handle : 0;
181 }
182 inline int32_t read_mem_i32(uint32_t addr) {
183 return static_cast<int32_t>(read_mem_u32(addr));
184 }
185
186 inline uint8_t* mem_addr_u8(uint32_t addr) {
187 uint8_t* result = NULL;
188 for (int i = 0; i < SIZE(Mem); ++i) {
189 if (Mem.at(i).match(addr)) {
190 if (result)
191 raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
192 result = &Mem.at(i).data(addr);
193 }
194 }
195 if (result == NULL) {
196 if (Trace_file) Trace_file.flush();
197 raise << "Tried to access uninitialized memory at address 0x" << HEXWORD << addr << '\n' << end();
198 exit(1);
199 }
200 return result;
201 }
202 inline int8_t* mem_addr_i8(uint32_t addr) {
203 return reinterpret_cast<int8_t*>(mem_addr_u8(addr));
204 }
205 inline uint32_t* mem_addr_u32(uint32_t addr) {
206 uint32_t* result = NULL;
207 for (int i = 0; i < SIZE(Mem); ++i) {
208 if (Mem.at(i).match32(addr)) {
209 if (result)
210 raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
211 result = reinterpret_cast<uint32_t*>(&Mem.at(i).data(addr));
212 }
213 }
214 if (result == NULL) {
215 if (Trace_file) Trace_file.flush();
216 raise << "Tried to access uninitialized memory at address 0x" << HEXWORD << addr << '\n' << end();
217 raise << "The entire 4-byte word should be initialized and lie in a single segment.\n" << end();
218 exit(1);
219 }
220 return result;
221 }
222 inline int32_t* mem_addr_i32(uint32_t addr) {
223 return reinterpret_cast<int32_t*>(mem_addr_u32(addr));
224 }
225
226 inline const char* mem_addr_kernel_string(uint32_t addr) {
227 return reinterpret_cast<const char*>(mem_addr_u8(addr));
228 }
229 inline string mem_addr_string(uint32_t addr, uint32_t size) {
230 ostringstream out;
231 for (size_t i = 0; i < size; ++i)
232 out << read_mem_u8(addr+i);
233 return out.str();
234 }
235
236
237 inline void write_mem_u8(uint32_t addr, uint8_t val) {
238 uint8_t* handle = mem_addr_u8(addr);
239 if (handle != NULL) *handle = val;
240 }
241 inline void write_mem_i8(uint32_t addr, int8_t val) {
242 int8_t* handle = mem_addr_i8(addr);
243 if (handle != NULL) *handle = val;
244 }
245 inline void write_mem_u32(uint32_t addr, uint32_t val) {
246 uint32_t* handle = mem_addr_u32(addr);
247 if (handle != NULL) *handle = val;
248 }
249 inline void write_mem_i32(uint32_t addr, int32_t val) {
250 int32_t* handle = mem_addr_i32(addr);
251 if (handle != NULL) *handle = val;
252 }
253
254 inline bool already_allocated(uint32_t addr) {
255 bool result = false;
256 for (int i = 0; i < SIZE(Mem); ++i) {
257 if (Mem.at(i).match(addr)) {
258 if (result)
259 raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
260 result = true;
261 }
262 }
263 return result;
264 }
265
266
267
268 :(code)
269
270 void run_one_instruction() {
271 uint8_t op=0, op2=0, op3=0;
272
273 if (Trace_file) {
274 dump_registers();
275
276 }
277 uint32_t inst_start_address = EIP;
278 op = next();
279 trace(Callstack_depth+1, "run") << "0x" << HEXWORD << inst_start_address << " opcode: " << HEXBYTE << NUM(op) << end();
280 switch (op) {
281 case 0xf4:
282 EIP = End_of_program;
283 break;
284
285 case 0x0f:
286 switch(op2 = next()) {
287
288 default:
289 cerr << "unrecognized second opcode after 0f: " << HEXBYTE << NUM(op2) << '\n';
290 exit(1);
291 }
292 break;
293 case 0xf2:
294 switch(op2 = next()) {
295
296 case 0x0f:
297 switch(op3 = next()) {
298
299 default:
300 cerr << "unrecognized third opcode after f2 0f: " << HEXBYTE << NUM(op3) << '\n';
301 exit(1);
302 }
303 break;
304 default:
305 cerr << "unrecognized second opcode after f2: " << HEXBYTE << NUM(op2) << '\n';
306 exit(1);
307 }
308 break;
309 case 0xf3:
310 switch(op2 = next()) {
311
312 case 0x0f:
313 switch(op3 = next()) {
314
315 default:
316 cerr << "unrecognized third opcode after f3 0f: " << HEXBYTE << NUM(op3) << '\n';
317 exit(1);
318 }
319 break;
320 default:
321 cerr << "unrecognized second opcode after f3: " << HEXBYTE << NUM(op2) << '\n';
322 exit(1);
323 }
324 break;
325 default:
326 cerr << "unrecognized opcode: " << HEXBYTE << NUM(op) << '\n';
327 exit(1);
328 }
329 }
330
331 inline uint8_t next() {
332 return read_mem_u8(EIP++);
333 }
334
335 void dump_registers() {
336 ostringstream out;
337 out << "registers before: ";
338 for (int i = 0; i < NUM_INT_REGISTERS; ++i) {
339 if (i > 0) out << "; ";
340 out << " " << i << ": " << std::hex << std::setw(8) << std::setfill('_') << Reg[i].u;
341 }
342 out << " -- SF: " << SF << "; ZF: " << ZF << "; CF: " << CF << "; OF: " << OF;
343 trace(Callstack_depth+1, "run") << out.str() << end();
344 }
345
346
347 :(before "End Globals")
348 map<string, string> Name;
349 map<string, string> Name_0f;
350 map<string, string> Name_f3;
351 map<string, string> Name_f3_0f;
352 :(before "End One-time Setup")
353 init_op_names();
354 :(code)
355 void init_op_names() {
356 put(Name, "f4", "halt (hlt)");
357
358 }
359
360 :(before "End Help Special-cases(key)")
361 if (key == "opcodes") {
362 cerr << "Opcodes currently supported by SubX:\n";
363 for (map<string, string>::iterator p = Name.begin(); p != Name.end(); ++p)
364 cerr << " " << p->first << ": " << p->second << '\n';
365 for (map<string, string>::iterator p = Name_0f.begin(); p != Name_0f.end(); ++p)
366 cerr << " 0f " << p->first << ": " << p->second << '\n';
367 for (map<string, string>::iterator p = Name_f3.begin(); p != Name_f3.end(); ++p)
368 cerr << " f3 " << p->first << ": " << p->second << '\n';
369 for (map<string, string>::iterator p = Name_f3_0f.begin(); p != Name_f3_0f.end(); ++p)
370 cerr << " f3 0f " << p->first << ": " << p->second << '\n';
371 cerr << "Run `subx help instructions` for details on words like 'r32' and 'disp8'.\n"
372 "For complete details on these instructions, consult the IA-32 manual (volume 2).\n"
373 "There's various versions of it online, such as https://c9x.me/x86.\n"
374 "The mnemonics in brackets will help you locate each instruction.\n";
375 return 0;
376 }
377 :(before "End Help Contents")
378 cerr << " opcodes\n";
379
380
381
382
383
384
385
386
387
388
389
390
391
392 :(before "End Globals")
393 extern const int Initial_callstack_depth = 2;
394 int Callstack_depth = Initial_callstack_depth;
395 :(before "End Reset")
396 Callstack_depth = Initial_callstack_depth;
397
398 :(before "End Includes")
399 #include <iomanip>
400 #define HEXBYTE std::hex << std::setw(2) << std::setfill('0')
401 #define HEXWORD std::hex << std::setw(8) << std::setfill('0')
402
403 #define NUM(X) static_cast<int>(X)
404 #include <stdint.h>