https://github.com/akkartik/mu/blob/master/subx/010---vm.cc
1
2
3
4
5
6
7
8
9
10
11 :(before "End Types")
12 enum {
13 EAX,
14 ECX,
15 EDX,
16 EBX,
17 ESP,
18 EBP,
19 ESI,
20 EDI,
21 NUM_INT_REGISTERS,
22 };
23 union reg {
24 int32_t i;
25 uint32_t u;
26 };
27 :(before "End Globals")
28 reg Reg[NUM_INT_REGISTERS] = { {0} };
29 uint32_t EIP = 1;
30 :(before "End Reset")
31 bzero(Reg, sizeof(Reg));
32 EIP = 1;
33
34 :(before "End Help Contents")
35 cerr << " registers\n";
36 :(before "End Help Texts")
37 put_new(Help, "registers",
38 "SubX currently supports eight 32-bit integer registers. From 0 to 7, they are:\n"
39 " EAX ECX EDX EBX ESP EBP ESI EDI\n"
40 "ESP contains the top of the stack.\n"
41 "\n"
42 "-- 8-bit registers\n"
43 "Some instructions operate on eight *overlapping* 8-bit registers.\n"
44 "From 0 to 7, they are:\n"
45 " AL CL DL BL AH CH DH BH\n"
46 "The 8-bit registers overlap with the 32-bit ones. AL is the lowest signicant byte\n"
47 "of EAX, AH is the second lowest significant byte, and so on.\n"
48 "\n"
49 "For example, if EBX contains 0x11223344, then BL contains 0x44, and BH contains 0x33.\n"
50 "\n"
51 "There is no way to access bytes within ESP, EBP, ESI or EDI.\n"
52 "\n"
53 "For complete details consult the IA-32 software developer's manual, volume 2,\n"
54 "table 2-2, \"32-bit addressing forms with the ModR/M byte\".\n"
55 "It is included in this repository as 'modrm.pdf'.\n"
56 "The register encodings are described in the top row of the table, but you'll need\n"
57 "to spend some time with it.\n"
58 "\n"
59 "-- flag registers\n"
60 "Various instructions (particularly 'compare') modify one or more of three 1-bit 'flag'\n"
61 "registers, as a side-effect:\n"
62 "- the sign flag (SF): usually set if an arithmetic result is negative, or\n"
63 " reset if not.\n"
64 "- the zero flag (ZF): usually set if a result is zero, or reset if not.\n"
65 "- the carry flag (CF): usually set if an arithmetic result overflows by just one bit.\n"
66 " Useful for operating on unsigned numbers.\n"
67 "- the overflow flag (OF): usually set if an arithmetic result overflows by more than one bit.\n"
68 " Useful for operating on signed numbers.\n"
69 "The flag bits are read by conditional jumps.\n"
70 "\n"
71 "For complete details on how different instructions update the flags, consult the IA-32\n"
72 "manual (volume 2). There's various versions of it online, such as https://c9x.me/x86,\n"
73 "though of course you'll need to be careful to ignore instructions and flag registers\n"
74 "that SubX doesn't support.\n"
75 "\n"
76 "It isn't simple, but if this is the processor you have running on your computer,\n"
77 "might as well get good at it.\n"
78 );
79
80 :(before "End Globals")
81
82 bool SF = false;
83 bool ZF = false;
84 bool CF = false;
85 bool OF = false;
86 :(before "End Reset")
87 SF = ZF = CF = OF = false;
88
89
90
91 :(before "End Types")
92 const uint32_t SEGMENT_ALIGNMENT = 0x1000000;
93 inline uint32_t align_upwards(uint32_t x, uint32_t align) {
94 return (x+align-1) & -(align);
95 }
96
97
98
99 struct vma {
100 uint32_t start;
101 uint32_t end;
102 vector<uint8_t> _data;
103 vma(uint32_t s, uint32_t e) :start(s), end(e) {}
104 vma(uint32_t s) :start(s), end(align_upwards(s+1, SEGMENT_ALIGNMENT)) {}
105 bool match(uint32_t a) {
106 return a >= start && a < end;
107 }
108 bool match32(uint32_t a) {
109 return a >= start && a+4 <= end;
110 }
111 uint8_t& data(uint32_t a) {
112 assert(match(a));
113 uint32_t result_index = a-start;
114 if (_data.size() <= result_index) {
115 const int align = 0x1000;
116 uint32_t result_size = result_index + 1;
117 uint32_t new_size = align_upwards(result_size, align);
118
119 if (new_size < _data.size() * 2)
120 new_size = _data.size() * 2;
121
122 if (new_size > end-start)
123 new_size = end-start;
124 _data.resize(new_size);
125 }
126 return _data.at(result_index);
127 }
128 void grow_until(uint32_t new_end_address) {
129 if (new_end_address < end) return;
130
131 void sanity_check(uint32_t start, uint32_t end);
132 sanity_check(start, new_end_address);
133 end = new_end_address;
134 }
135
136 };
137 :(code)
138 void sanity_check(uint32_t start, uint32_t end) {
139 bool dup_found = false;
140 for (int i = 0; i < SIZE(Mem); ++i) {
141 const vma& curr = Mem.at(i);
142 if (curr.start == start) {
143 assert(!dup_found);
144 dup_found = true;
145 }
146 else if (curr.start > start) {
147 assert(curr.start > end);
148 }
149 else if (curr.start < start) {
150 assert(curr.end < start);
151 }
152 }
153 }
154
155 :(before "End Globals")
156
157 vector<vma> Mem;
158 :(code)
159
160
161
162 void grow_code_segment(uint32_t new_end_address) {
163 assert(!Mem.empty());
164 Mem.at(0).grow_until(new_end_address);
165 }
166 void grow_data_segment(uint32_t new_end_address) {
167 assert(SIZE(Mem) > 1);
168 Mem.at(1).grow_until(new_end_address);
169 }
170 :(before "End Globals")
171 uint32_t End_of_program = 0;
172
173 :(before "End Reset")
174 Mem.clear();
175 End_of_program = 0;
176 :(code)
177
178
179 inline uint8_t read_mem_u8(uint32_t addr) {
180 uint8_t* handle = mem_addr_u8(addr);
181 return handle ? *handle : 0;
182 }
183 inline int8_t read_mem_i8(uint32_t addr) {
184 return static_cast<int8_t>(read_mem_u8(addr));
185 }
186 inline uint32_t read_mem_u32(uint32_t addr) {
187 uint32_t* handle = mem_addr_u32(addr);
188 return handle ? *handle : 0;
189 }
190 inline int32_t read_mem_i32(uint32_t addr) {
191 return static_cast<int32_t>(read_mem_u32(addr));
192 }
193
194 inline uint8_t* mem_addr_u8(uint32_t addr) {
195 uint8_t* result = NULL;
196 for (int i = 0; i < SIZE(Mem); ++i) {
197 if (Mem.at(i).match(addr)) {
198 if (result)
199 raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
200 result = &Mem.at(i).data(addr);
201 }
202 }
203 if (result == NULL) {
204 if (Trace_file) Trace_file.flush();
205 raise << "Tried to access uninitialized memory at address 0x" << HEXWORD << addr << '\n' << end();
206 }
207 return result;
208 }
209 inline int8_t* mem_addr_i8(uint32_t addr) {
210 return reinterpret_cast<int8_t*>(mem_addr_u8(addr));
211 }
212 inline uint32_t* mem_addr_u32(uint32_t addr) {
213 uint32_t* result = NULL;
214 for (int i = 0; i < SIZE(Mem); ++i) {
215 if (Mem.at(i).match32(addr)) {
216 if (result)
217 raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
218 result = reinterpret_cast<uint32_t*>(&Mem.at(i).data(addr));
219 }
220 }
221 if (result == NULL) {
222 if (Trace_file) Trace_file.flush();
223 raise << "Tried to access uninitialized memory at address 0x" << HEXWORD << addr << '\n' << end();
224 raise << "The entire 4-byte word should be initialized and lie in a single segment.\n" << end();
225 }
226 return result;
227 }
228 inline int32_t* mem_addr_i32(uint32_t addr) {
229 return reinterpret_cast<int32_t*>(mem_addr_u32(addr));
230 }
231
232 inline const char* mem_addr_kernel_string(uint32_t addr) {
233 return reinterpret_cast<const char*>(mem_addr_u8(addr));
234 }
235 inline string mem_addr_string(uint32_t addr, uint32_t size) {
236 ostringstream out;
237 for (size_t i = 0; i < size; ++i)
238 out << read_mem_u8(addr+i);
239 return out.str();
240 }
241
242
243 inline void write_mem_u8(uint32_t addr, uint8_t val) {
244 uint8_t* handle = mem_addr_u8(addr);
245 if (handle != NULL) *handle = val;
246 }
247 inline void write_mem_i8(uint32_t addr, int8_t val) {
248 int8_t* handle = mem_addr_i8(addr);
249 if (handle != NULL) *handle = val;
250 }
251 inline void write_mem_u32(uint32_t addr, uint32_t val) {
252 uint32_t* handle = mem_addr_u32(addr);
253 if (handle != NULL) *handle = val;
254 }
255 inline void write_mem_i32(uint32_t addr, int32_t val) {
256 int32_t* handle = mem_addr_i32(addr);
257 if (handle != NULL) *handle = val;
258 }
259
260 inline bool already_allocated(uint32_t addr) {
261 bool result = false;
262 for (int i = 0; i < SIZE(Mem); ++i) {
263 if (Mem.at(i).match(addr)) {
264 if (result)
265 raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
266 result = true;
267 }
268 }
269 return result;
270 }
271
272
273
274 :(code)
275
276 void run_one_instruction() {
277 uint8_t op=0, op2=0, op3=0;
278
279 if (Trace_file) {
280 dump_registers();
281
282 }
283 uint32_t inst_start_address = EIP;
284 op = next();
285 trace(Callstack_depth+1, "run") << "0x" << HEXWORD << inst_start_address << " opcode: " << HEXBYTE << NUM(op) << end();
286 switch (op) {
287 case 0xf4:
288 EIP = End_of_program;
289 break;
290
291 case 0x0f:
292 switch(op2 = next()) {
293
294 default:
295 cerr << "unrecognized second opcode after 0f: " << HEXBYTE << NUM(op2) << '\n';
296 DUMP("");
297 exit(1);
298 }
299 break;
300 case 0xf2:
301 switch(op2 = next()) {
302
303 case 0x0f:
304 switch(op3 = next()) {
305
306 default:
307 cerr << "unrecognized third opcode after f2 0f: " << HEXBYTE << NUM(op3) << '\n';
308 DUMP("");
309 exit(1);
310 }
311 break;
312 default:
313 cerr << "unrecognized second opcode after f2: " << HEXBYTE << NUM(op2) << '\n';
314 DUMP("");
315 exit(1);
316 }
317 break;
318 case 0xf3:
319 switch(op2 = next()) {
320
321 case 0x0f:
322 switch(op3 = next()) {
323
324 default:
325 cerr << "unrecognized third opcode after f3 0f: " << HEXBYTE << NUM(op3) << '\n';
326 DUMP("");
327 exit(1);
328 }
329 break;
330 default:
331 cerr << "unrecognized second opcode after f3: " << HEXBYTE << NUM(op2) << '\n';
332 DUMP("");
333 exit(1);
334 }
335 break;
336 default:
337 cerr << "unrecognized opcode: " << HEXBYTE << NUM(op) << '\n';
338 DUMP("");
339 exit(1);
340 }
341 }
342
343 inline uint8_t next() {
344 return read_mem_u8(EIP++);
345 }
346
347 void dump_registers() {
348 ostringstream out;
349 out << "registers before: ";
350 for (int i = 0; i < NUM_INT_REGISTERS; ++i) {
351 if (i > 0) out << "; ";
352 out << " " << i << ": " << std::hex << std::setw(8) << std::setfill('_') << Reg[i].u;
353 }
354 out << " -- SF: " << SF << "; ZF: " << ZF << "; CF: " << CF << "; OF: " << OF;
355 trace(Callstack_depth+1, "run") << out.str() << end();
356 }
357
358
359 :(before "End Globals")
360 map<string, string> Name;
361 map<string, string> Name_0f;
362 map<string, string> Name_f3;
363 map<string, string> Name_f3_0f;
364 :(before "End One-time Setup")
365 init_op_names();
366 :(code)
367 void init_op_names() {
368 put(Name, "f4", "halt (hlt)");
369
370 }
371
372 :(before "End Help Special-cases(key)")
373 if (key == "opcodes") {
374 cerr << "Opcodes currently supported by SubX:\n";
375 for (map<string, string>::iterator p = Name.begin(); p != Name.end(); ++p)
376 cerr << " " << p->first << ": " << p->second << '\n';
377 for (map<string, string>::iterator p = Name_0f.begin(); p != Name_0f.end(); ++p)
378 cerr << " 0f " << p->first << ": " << p->second << '\n';
379 for (map<string, string>::iterator p = Name_f3.begin(); p != Name_f3.end(); ++p)
380 cerr << " f3 " << p->first << ": " << p->second << '\n';
381 for (map<string, string>::iterator p = Name_f3_0f.begin(); p != Name_f3_0f.end(); ++p)
382 cerr << " f3 0f " << p->first << ": " << p->second << '\n';
383 cerr << "Run `subx help instructions` for details on words like 'r32' and 'disp8'.\n"
384 "For complete details on these instructions, consult the IA-32 manual (volume 2).\n"
385 "There's various versions of it online, such as https://c9x.me/x86.\n"
386 "The mnemonics in brackets will help you locate each instruction.\n";
387 return 0;
388 }
389 :(before "End Help Contents")
390 cerr << " opcodes\n";
391
392
393
394
395
396
397
398
399
400
401
402
403
404 :(before "End Globals")
405 extern const int Initial_callstack_depth = 2;
406 int Callstack_depth = Initial_callstack_depth;
407 :(before "End Reset")
408 Callstack_depth = Initial_callstack_depth;
409
410 :(before "End Includes")
411 #include <iomanip>
412 #define HEXBYTE std::hex << std::setw(2) << std::setfill('0')
413 #define HEXWORD std::hex << std::setw(8) << std::setfill('0')
414
415 #define NUM(X) static_cast<int>(X)
416 #include <stdint.h>