https://github.com/akkartik/mu/blob/master/subx/010---vm.cc
1
2
3
4
5
6
7
8
9
10
11 :(before "End Types")
12 enum {
13 EAX,
14 ECX,
15 EDX,
16 EBX,
17 ESP,
18 EBP,
19 ESI,
20 EDI,
21 NUM_INT_REGISTERS,
22 };
23 union reg {
24 int32_t i;
25 uint32_t u;
26 };
27 :(before "End Globals")
28 reg Reg[NUM_INT_REGISTERS] = { {0} };
29 uint32_t EIP = 1;
30 :(before "End Reset")
31 bzero(Reg, sizeof(Reg));
32 EIP = 1;
33
34 :(before "End Help Contents")
35 cerr << " registers\n";
36 :(before "End Help Texts")
37 put_new(Help, "registers",
38 "SubX currently supports eight 32-bit integer registers. From 0 to 7, they are:\n"
39 " EAX ECX EDX EBX ESP EBP ESI EDI\n"
40 "ESP contains the top of the stack.\n"
41 "\n"
42 "-- 8-bit registers\n"
43 "Some instructions operate on eight *overlapping* 8-bit registers.\n"
44 "From 0 to 7, they are:\n"
45 " AL CL DL BL AH CH DH BH\n"
46 "The 8-bit registers overlap with the 32-bit ones. AL is the lowest signicant byte\n"
47 "of EAX, AH is the second lowest significant byte, and so on.\n"
48 "\n"
49 "For example, if EBX contains 0x11223344, then BL contains 0x44, and BH contains 0x33.\n"
50 "\n"
51 "There is no way to access bytes within ESP, EBP, ESI or EDI.\n"
52 "\n"
53 "For complete details consult the IA-32 software developer's manual, volume 2,\n"
54 "table 2-2, \"32-bit addressing forms with the ModR/M byte\".\n"
55 "It is included in this repository as 'modrm.pdf'.\n"
56 "The register encodings are described in the top row of the table, but you'll need\n"
57 "to spend some time with it.\n"
58 "\n"
59 "-- flag registers\n"
60 "Various instructions (particularly 'compare') modify one or more of three 1-bit 'flag'\n"
61 "registers, as a side-effect:\n"
62 "- the sign flag (SF): usually set if an arithmetic result is negative, or\n"
63 " reset if not.\n"
64 "- the zero flag (ZF): usually set if a result is zero, or reset if not.\n"
65 "- the overflow flag (OF): usually set if an arithmetic result overflows.\n"
66 "The flag bits are read by conditional jumps.\n"
67 "\n"
68 "For complete details on how different instructions update the flags, consult the IA-32\n"
69 "manual (volume 2). There's various versions of it online, such as https://c9x.me/x86,\n"
70 "though of course you'll need to be careful to ignore instructions and flag registers\n"
71 "that SubX doesn't support.\n"
72 "\n"
73 "It isn't simple, but if this is the processor you have running on your computer,\n"
74 "might as well get good at it.\n"
75 );
76
77 :(before "End Globals")
78
79 bool SF = false;
80 bool ZF = false;
81 bool OF = false;
82 :(before "End Reset")
83 SF = ZF = OF = false;
84
85
86
87 :(before "End Includes")
88
89
90
91
92 \
93 int64_t tmp = arg1 op arg2; \
94 arg1 = arg1 op arg2; \
95 trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << arg1 << end(); \
96 SF = (arg1 < 0); \
97 ZF = (arg1 == 0); \
98 OF = (arg1 != tmp); \
99 }
100
101
102
103
104 \
105 arg1 = arg1 op arg2; \
106 trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << arg1 << end(); \
107 SF = (arg1 >> 31); \
108 ZF = (arg1 == 0); \
109 OF = false; \
110 }
111
112
113
114 :(before "End Types")
115 const uint32_t SEGMENT_ALIGNMENT = 0x1000000;
116 inline uint32_t align_upwards(uint32_t x, uint32_t align) {
117 return (x+align-1) & -(align);
118 }
119
120
121
122 struct vma {
123 uint32_t start;
124 uint32_t end;
125 vector<uint8_t> _data;
126 vma(uint32_t s, uint32_t e) :start(s), end(e) {}
127 vma(uint32_t s) :start(s), end(align_upwards(s+1, SEGMENT_ALIGNMENT)) {}
128 bool match(uint32_t a) {
129 return a >= start && a < end;
130 }
131 bool match32(uint32_t a) {
132 return a >= start && a+4 <= end;
133 }
134 uint8_t& data(uint32_t a) {
135 assert(match(a));
136 uint32_t result_index = a-start;
137 if (_data.size() <= result_index) {
138 const int align = 0x1000;
139 uint32_t result_size = result_index + 1;
140 uint32_t new_size = align_upwards(result_size, align);
141
142 if (new_size < _data.size() * 2)
143 new_size = _data.size() * 2;
144
145 if (new_size > end-start)
146 new_size = end-start;
147 _data.resize(new_size);
148 }
149 return _data.at(result_index);
150 }
151 void grow_until(uint32_t new_end_address) {
152 if (new_end_address < end) return;
153
154 void sanity_check(uint32_t start, uint32_t end);
155 sanity_check(start, new_end_address);
156 end = new_end_address;
157 }
158
159 };
160 :(code)
161 void sanity_check(uint32_t start, uint32_t end) {
162 bool dup_found = false;
163 for (int i = 0; i < SIZE(Mem); ++i) {
164 const vma& curr = Mem.at(i);
165 if (curr.start == start) {
166 assert(!dup_found);
167 dup_found = true;
168 }
169 else if (curr.start > start) {
170 assert(curr.start > end);
171 }
172 else if (curr.start < start) {
173 assert(curr.end < start);
174 }
175 }
176 }
177
178 :(before "End Globals")
179
180 vector<vma> Mem;
181 :(code)
182
183
184
185 void grow_code_segment(uint32_t new_end_address) {
186 assert(!Mem.empty());
187 Mem.at(0).grow_until(new_end_address);
188 }
189 void grow_data_segment(uint32_t new_end_address) {
190 assert(SIZE(Mem) > 1);
191 Mem.at(1).grow_until(new_end_address);
192 }
193 :(before "End Globals")
194 uint32_t End_of_program = 0;
195
196 :(before "End Reset")
197 Mem.clear();
198 End_of_program = 0;
199 :(code)
200
201
202 inline uint8_t read_mem_u8(uint32_t addr) {
203 uint8_t* handle = mem_addr_u8(addr);
204 return handle ? *handle : 0;
205 }
206 inline int8_t read_mem_i8(uint32_t addr) {
207 return static_cast<int8_t>(read_mem_u8(addr));
208 }
209 inline uint32_t read_mem_u32(uint32_t addr) {
210 uint32_t* handle = mem_addr_u32(addr);
211 return handle ? *handle : 0;
212 }
213 inline int32_t read_mem_i32(uint32_t addr) {
214 return static_cast<int32_t>(read_mem_u32(addr));
215 }
216
217 inline uint8_t* mem_addr_u8(uint32_t addr) {
218 uint8_t* result = NULL;
219 for (int i = 0; i < SIZE(Mem); ++i) {
220 if (Mem.at(i).match(addr)) {
221 if (result)
222 raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
223 result = &Mem.at(i).data(addr);
224 }
225 }
226 if (result == NULL)
227 raise << "Tried to access uninitialized memory at address 0x" << HEXWORD << addr << '\n' << end();
228 return result;
229 }
230 inline int8_t* mem_addr_i8(uint32_t addr) {
231 return reinterpret_cast<int8_t*>(mem_addr_u8(addr));
232 }
233 inline uint32_t* mem_addr_u32(uint32_t addr) {
234 uint32_t* result = NULL;
235 for (int i = 0; i < SIZE(Mem); ++i) {
236 if (Mem.at(i).match32(addr)) {
237 if (result)
238 raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
239 result = reinterpret_cast<uint32_t*>(&Mem.at(i).data(addr));
240 }
241 }
242 if (result == NULL) {
243 raise << "Tried to access uninitialized memory at address 0x" << HEXWORD << addr << '\n' << end();
244 raise << "The entire 4-byte word should be initialized and lie in a single segment.\n" << end();
245 }
246 return result;
247 }
248 inline int32_t* mem_addr_i32(uint32_t addr) {
249 return reinterpret_cast<int32_t*>(mem_addr_u32(addr));
250 }
251
252 inline const char* mem_addr_kernel_string(uint32_t addr) {
253 return reinterpret_cast<const char*>(mem_addr_u8(addr));
254 }
255 inline string mem_addr_string(uint32_t addr, uint32_t size) {
256 ostringstream out;
257 for (size_t i = 0; i < size; ++i)
258 out << read_mem_u8(addr+i);
259 return out.str();
260 }
261
262
263 inline void write_mem_u8(uint32_t addr, uint8_t val) {
264 uint8_t* handle = mem_addr_u8(addr);
265 if (handle != NULL) *handle = val;
266 }
267 inline void write_mem_i8(uint32_t addr, int8_t val) {
268 int8_t* handle = mem_addr_i8(addr);
269 if (handle != NULL) *handle = val;
270 }
271 inline void write_mem_u32(uint32_t addr, uint32_t val) {
272 uint32_t* handle = mem_addr_u32(addr);
273 if (handle != NULL) *handle = val;
274 }
275 inline void write_mem_i32(uint32_t addr, int32_t val) {
276 int32_t* handle = mem_addr_i32(addr);
277 if (handle != NULL) *handle = val;
278 }
279
280 inline bool already_allocated(uint32_t addr) {
281 bool result = false;
282 for (int i = 0; i < SIZE(Mem); ++i) {
283 if (Mem.at(i).match(addr)) {
284 if (result)
285 raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
286 result = true;
287 }
288 }
289 return result;
290 }
291
292
293
294 :(code)
295
296 void run_one_instruction() {
297 uint8_t op=0, op2=0, op3=0;
298
299 if (Trace_file) {
300 dump_registers();
301
302 }
303 uint32_t inst_start_address = EIP;
304 op = next();
305 trace(Callstack_depth, "run") << "0x" << HEXWORD << inst_start_address << " opcode: " << HEXBYTE << NUM(op) << (op == 0xe8 ? "/call" : "") << end();
306 switch (op) {
307 case 0xf4:
308 EIP = End_of_program;
309 break;
310
311 case 0x0f:
312 switch(op2 = next()) {
313
314 default:
315 cerr << "unrecognized second opcode after 0f: " << HEXBYTE << NUM(op2) << '\n';
316 DUMP("");
317 exit(1);
318 }
319 break;
320 case 0xf2:
321 switch(op2 = next()) {
322
323 case 0x0f:
324 switch(op3 = next()) {
325
326 default:
327 cerr << "unrecognized third opcode after f2 0f: " << HEXBYTE << NUM(op3) << '\n';
328 DUMP("");
329 exit(1);
330 }
331 break;
332 default:
333 cerr << "unrecognized second opcode after f2: " << HEXBYTE << NUM(op2) << '\n';
334 DUMP("");
335 exit(1);
336 }
337 break;
338 case 0xf3:
339 switch(op2 = next()) {
340
341 case 0x0f:
342 switch(op3 = next()) {
343
344 default:
345 cerr << "unrecognized third opcode after f3 0f: " << HEXBYTE << NUM(op3) << '\n';
346 DUMP("");
347 exit(1);
348 }
349 break;
350 default:
351 cerr << "unrecognized second opcode after f3: " << HEXBYTE << NUM(op2) << '\n';
352 DUMP("");
353 exit(1);
354 }
355 break;
356 default:
357 cerr << "unrecognized opcode: " << HEXBYTE << NUM(op) << '\n';
358 DUMP("");
359 exit(1);
360 }
361 }
362
363 inline uint8_t next() {
364 return read_mem_u8(EIP++);
365 }
366
367 void dump_registers() {
368 ostringstream out;
369 out << "registers: ";
370 for (int i = 0; i < NUM_INT_REGISTERS; ++i) {
371 if (i > 0) out << "; ";
372 out << " " << i << ": " << std::hex << std::setw(8) << std::setfill('_') << Reg[i].u;
373 }
374 out << " -- SF: " << SF << "; ZF: " << ZF << "; OF: " << OF;
375 trace(Callstack_depth+1, "run") << out.str() << end();
376 }
377
378
379 :(before "End Globals")
380 map<string, string> Name;
381 map<string, string> Name_0f;
382 map<string, string> Name_f3;
383 map<string, string> Name_f3_0f;
384 :(before "End One-time Setup")
385 init_op_names();
386 :(code)
387 void init_op_names() {
388 put(Name, "f4", "halt (hlt)");
389
390 }
391
392 :(before "End Help Special-cases(key)")
393 if (key == "opcodes") {
394 cerr << "Opcodes currently supported by SubX:\n";
395 for (map<string, string>::iterator p = Name.begin(); p != Name.end(); ++p)
396 cerr << " " << p->first << ": " << p->second << '\n';
397 for (map<string, string>::iterator p = Name_0f.begin(); p != Name_0f.end(); ++p)
398 cerr << " 0f " << p->first << ": " << p->second << '\n';
399 for (map<string, string>::iterator p = Name_f3.begin(); p != Name_f3.end(); ++p)
400 cerr << " f3 " << p->first << ": " << p->second << '\n';
401 for (map<string, string>::iterator p = Name_f3_0f.begin(); p != Name_f3_0f.end(); ++p)
402 cerr << " f3 0f " << p->first << ": " << p->second << '\n';
403 cerr << "Run `subx help instructions` for details on words like 'r32' and 'disp8'.\n"
404 "For complete details on these instructions, consult the IA-32 manual (volume 2).\n"
405 "There's various versions of it online, such as https://c9x.me/x86.\n"
406 "The mnemonics in brackets will help you locate each instruction.\n";
407 return 0;
408 }
409 :(before "End Help Contents")
410 cerr << " opcodes\n";
411
412
413
414
415
416
417
418
419
420
421
422
423
424 :(before "End Globals")
425 extern const int Initial_callstack_depth = 2;
426 int Callstack_depth = Initial_callstack_depth;
427 :(before "End Reset")
428 Callstack_depth = Initial_callstack_depth;
429
430 :(before "End Includes")
431
432
433
434
435
436