https://github.com/akkartik/mu/blob/master/010---vm.cc
1
2
3
4
5
6
7
8
9
10
11 :(before "End Types")
12 enum {
13 EAX,
14 ECX,
15 EDX,
16 EBX,
17 ESP,
18 EBP,
19 ESI,
20 EDI,
21 NUM_INT_REGISTERS,
22 };
23 union reg {
24 int32_t i;
25 uint32_t u;
26 };
27 :(before "End Globals")
28 reg Reg[NUM_INT_REGISTERS] = { {0} };
29 uint32_t EIP = 1;
30 :(before "End Reset")
31 bzero(Reg, sizeof(Reg));
32 EIP = 1;
33
34 :(before "End Help Contents")
35 cerr << " registers\n";
36 :(before "End Help Texts")
37 put_new(Help, "registers",
38 "SubX currently supports eight 32-bit integer registers. From 0 to 7, they are:\n"
39 " EAX ECX EDX EBX ESP EBP ESI EDI\n"
40 "ESP contains the top of the stack.\n"
41 "\n"
42 "-- 8-bit registers\n"
43 "Some instructions operate on eight *overlapping* 8-bit registers.\n"
44 "From 0 to 7, they are:\n"
45 " AL CL DL BL AH CH DH BH\n"
46 "The 8-bit registers overlap with the 32-bit ones. AL is the lowest signicant byte\n"
47 "of EAX, AH is the second lowest significant byte, and so on.\n"
48 "\n"
49 "For example, if EBX contains 0x11223344, then BL contains 0x44, and BH contains 0x33.\n"
50 "\n"
51 "There is no way to access bytes within ESP, EBP, ESI or EDI.\n"
52 "\n"
53 "For complete details consult the IA-32 software developer's manual, volume 2,\n"
54 "table 2-2, \"32-bit addressing forms with the ModR/M byte\".\n"
55 "It is included in this repository as 'modrm.pdf'.\n"
56 "The register encodings are described in the top row of the table, but you'll need\n"
57 "to spend some time with it.\n"
58 "\n"
59 "-- flag registers\n"
60 "Various instructions (particularly 'compare') modify one or more of four 1-bit\n"
61 "'flag' registers, as a side-effect:\n"
62 "- the sign flag (SF): usually set if an arithmetic result is negative, or\n"
63 " reset if not.\n"
64 "- the zero flag (ZF): usually set if a result is zero, or reset if not.\n"
65 "- the carry flag (CF): usually set if an arithmetic result overflows by just one bit.\n"
66 " Useful for operating on unsigned numbers.\n"
67 "- the overflow flag (OF): usually set if an arithmetic result overflows by more\n"
68 " than one bit. Useful for operating on signed numbers.\n"
69 "The flag bits are read by conditional jumps.\n"
70 "\n"
71 "For complete details on how different instructions update the flags, consult the IA-32\n"
72 "manual (volume 2). There's various versions of it online, such as https://c9x.me/x86,\n"
73 "though of course you'll need to be careful to ignore instructions and flag registers\n"
74 "that SubX doesn't support.\n"
75 "\n"
76 "It isn't simple, but if this is the processor you have running on your computer.\n"
77 "Might as well get good at it.\n"
78 );
79
80 :(before "End Globals")
81
82 bool SF = false;
83 bool ZF = false;
84 bool CF = false;
85 bool OF = false;
86 :(before "End Reset")
87 SF = ZF = CF = OF = false;
88
89
90
91 :(before "End Types")
92 const uint32_t SEGMENT_ALIGNMENT = 0x1000000;
93 inline uint32_t align_upwards(uint32_t x, uint32_t align) {
94 return (x+align-1) & -(align);
95 }
96
97
98
99 struct vma {
100 uint32_t start;
101 uint32_t end;
102 vector<uint8_t> _data;
103 vma(uint32_t s, uint32_t e) :start(s), end(e) {}
104 vma(uint32_t s) :start(s), end(align_upwards(s+1, SEGMENT_ALIGNMENT)) {}
105 bool match(uint32_t a) {
106 return a >= start && a < end;
107 }
108 bool match32(uint32_t a) {
109 return a >= start && a+4 <= end;
110 }
111 uint8_t& data(uint32_t a) {
112 assert(match(a));
113 uint32_t result_index = a-start;
114 if (_data.size() <= result_index) {
115 const int align = 0x1000;
116 uint32_t result_size = result_index + 1;
117 uint32_t new_size = align_upwards(result_size, align);
118
119 if (new_size < _data.size() * 2)
120 new_size = _data.size() * 2;
121
122 if (new_size > end-start)
123 new_size = end-start;
124 _data.resize(new_size);
125 }
126 return _data.at(result_index);
127 }
128 void grow_until(uint32_t new_end_address) {
129 if (new_end_address < end) return;
130
131 void sanity_check(uint32_t start, uint32_t end);
132 sanity_check(start, new_end_address);
133 end = new_end_address;
134 }
135
136 };
137 :(code)
138 void sanity_check(uint32_t start, uint32_t end) {
139 bool dup_found = false;
140 for (int i = 0; i < SIZE(Mem); ++i) {
141 const vma& curr = Mem.at(i);
142 if (curr.start == start) {
143 assert(!dup_found);
144 dup_found = true;
145 }
146 else if (curr.start > start) {
147 assert(curr.start > end);
148 }
149 else if (curr.start < start) {
150 assert(curr.end < start);
151 }
152 }
153 }
154
155 :(before "End Globals")
156
157 vector<vma> Mem;
158 :(code)
159 :(before "End Globals")
160 uint32_t End_of_program = 0;
161
162 :(before "End Reset")
163 Mem.clear();
164 End_of_program = 0;
165 :(code)
166
167
168 inline uint8_t read_mem_u8(uint32_t addr) {
169 uint8_t* handle = mem_addr_u8(addr);
170 return handle ? *handle : 0;
171 }
172 inline int8_t read_mem_i8(uint32_t addr) {
173 return static_cast<int8_t>(read_mem_u8(addr));
174 }
175 inline uint32_t read_mem_u32(uint32_t addr) {
176 uint32_t* handle = mem_addr_u32(addr);
177 return handle ? *handle : 0;
178 }
179 inline int32_t read_mem_i32(uint32_t addr) {
180 return static_cast<int32_t>(read_mem_u32(addr));
181 }
182
183 inline uint8_t* mem_addr_u8(uint32_t addr) {
184 uint8_t* result = NULL;
185 for (int i = 0; i < SIZE(Mem); ++i) {
186 if (Mem.at(i).match(addr)) {
187 if (result)
188 raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
189 result = &Mem.at(i).data(addr);
190 }
191 }
192 if (result == NULL) {
193 if (Trace_file) Trace_file.flush();
194 raise << "Tried to access uninitialized memory at address 0x" << HEXWORD << addr << '\n' << end();
195 exit(1);
196 }
197 return result;
198 }
199 inline int8_t* mem_addr_i8(uint32_t addr) {
200 return reinterpret_cast<int8_t*>(mem_addr_u8(addr));
201 }
202 inline uint32_t* mem_addr_u32(uint32_t addr) {
203 uint32_t* result = NULL;
204 for (int i = 0; i < SIZE(Mem); ++i) {
205 if (Mem.at(i).match32(addr)) {
206 if (result)
207 raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
208 result = reinterpret_cast<uint32_t*>(&Mem.at(i).data(addr));
209 }
210 }
211 if (result == NULL) {
212 if (Trace_file) Trace_file.flush();
213 raise << "Tried to access uninitialized memory at address 0x" << HEXWORD << addr << '\n' << end();
214 raise << "The entire 4-byte word should be initialized and lie in a single segment.\n" << end();
215 exit(1);
216 }
217 return result;
218 }
219 inline int32_t* mem_addr_i32(uint32_t addr) {
220 return reinterpret_cast<int32_t*>(mem_addr_u32(addr));
221 }
222
223 inline const char* mem_addr_kernel_string(uint32_t addr) {
224 return reinterpret_cast<const char*>(mem_addr_u8(addr));
225 }
226 inline string mem_addr_string(uint32_t addr, uint32_t size) {
227 ostringstream out;
228 for (size_t i = 0; i < size; ++i)
229 out << read_mem_u8(addr+i);
230 return out.str();
231 }
232
233
234 inline void write_mem_u8(uint32_t addr, uint8_t val) {
235 uint8_t* handle = mem_addr_u8(addr);
236 if (handle != NULL) *handle = val;
237 }
238 inline void write_mem_i8(uint32_t addr, int8_t val) {
239 int8_t* handle = mem_addr_i8(addr);
240 if (handle != NULL) *handle = val;
241 }
242 inline void write_mem_u32(uint32_t addr, uint32_t val) {
243 uint32_t* handle = mem_addr_u32(addr);
244 if (handle != NULL) *handle = val;
245 }
246 inline void write_mem_i32(uint32_t addr, int32_t val) {
247 int32_t* handle = mem_addr_i32(addr);
248 if (handle != NULL) *handle = val;
249 }
250
251 inline bool already_allocated(uint32_t addr) {
252 bool result = false;
253 for (int i = 0; i < SIZE(Mem); ++i) {
254 if (Mem.at(i).match(addr)) {
255 if (result)
256 raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
257 result = true;
258 }
259 }
260 return result;
261 }
262
263
264
265 :(code)
266
267 void run_one_instruction() {
268 uint8_t op=0, op2=0, op3=0;
269
270 if (Trace_file) {
271 dump_registers();
272
273 }
274 uint32_t inst_start_address = EIP;
275 op = next();
276 trace(Callstack_depth+1, "run") << "0x" << HEXWORD << inst_start_address << " opcode: " << HEXBYTE << NUM(op) << end();
277 switch (op) {
278 case 0xf4:
279 EIP = End_of_program;
280 break;
281
282 case 0x0f:
283 switch(op2 = next()) {
284
285 default:
286 cerr << "unrecognized second opcode after 0f: " << HEXBYTE << NUM(op2) << '\n';
287 exit(1);
288 }
289 break;
290 case 0xf2:
291 switch(op2 = next()) {
292
293 case 0x0f:
294 switch(op3 = next()) {
295
296 default:
297 cerr << "unrecognized third opcode after f2 0f: " << HEXBYTE << NUM(op3) << '\n';
298 exit(1);
299 }
300 break;
301 default:
302 cerr << "unrecognized second opcode after f2: " << HEXBYTE << NUM(op2) << '\n';
303 exit(1);
304 }
305 break;
306 case 0xf3:
307 switch(op2 = next()) {
308
309 case 0x0f:
310 switch(op3 = next()) {
311
312 default:
313 cerr << "unrecognized third opcode after f3 0f: " << HEXBYTE << NUM(op3) << '\n';
314 exit(1);
315 }
316 break;
317 default:
318 cerr << "unrecognized second opcode after f3: " << HEXBYTE << NUM(op2) << '\n';
319 exit(1);
320 }
321 break;
322 default:
323 cerr << "unrecognized opcode: " << HEXBYTE << NUM(op) << '\n';
324 exit(1);
325 }
326 }
327
328 inline uint8_t next() {
329 return read_mem_u8(EIP++);
330 }
331
332 void dump_registers() {
333 ostringstream out;
334 out << "registers before: ";
335 for (int i = 0; i < NUM_INT_REGISTERS; ++i) {
336 if (i > 0) out << "; ";
337 out << " " << i << ": " << std::hex << std::setw(8) << std::setfill('_') << Reg[i].u;
338 }
339 out << " -- SF: " << SF << "; ZF: " << ZF << "; CF: " << CF << "; OF: " << OF;
340 trace(Callstack_depth+1, "run") << out.str() << end();
341 }
342
343
344 :(before "End Globals")
345 map<string, string> Name;
346 map<string, string> Name_0f;
347 map<string, string> Name_f3;
348 map<string, string> Name_f3_0f;
349 :(before "End One-time Setup")
350 init_op_names();
351 :(code)
352 void init_op_names() {
353 put(Name, "f4", "halt (hlt)");
354
355 }
356
357 :(before "End Help Special-cases(key)")
358 if (key == "opcodes") {
359 cerr << "Opcodes currently supported by SubX:\n";
360 for (map<string, string>::iterator p = Name.begin(); p != Name.end(); ++p)
361 cerr << " " << p->first << ": " << p->second << '\n';
362 for (map<string, string>::iterator p = Name_0f.begin(); p != Name_0f.end(); ++p)
363 cerr << " 0f " << p->first << ": " << p->second << '\n';
364 for (map<string, string>::iterator p = Name_f3.begin(); p != Name_f3.end(); ++p)
365 cerr << " f3 " << p->first << ": " << p->second << '\n';
366 for (map<string, string>::iterator p = Name_f3_0f.begin(); p != Name_f3_0f.end(); ++p)
367 cerr << " f3 0f " << p->first << ": " << p->second << '\n';
368 cerr << "Run `subx help instructions` for details on words like 'r32' and 'disp8'.\n"
369 "For complete details on these instructions, consult the IA-32 manual (volume 2).\n"
370 "There's various versions of it online, such as https://c9x.me/x86.\n"
371 "The mnemonics in brackets will help you locate each instruction.\n";
372 return 0;
373 }
374 :(before "End Help Contents")
375 cerr << " opcodes\n";
376
377
378
379
380
381
382
383
384
385
386
387
388
389 :(before "End Globals")
390 extern const int Initial_callstack_depth = 2;
391 int Callstack_depth = Initial_callstack_depth;
392 :(before "End Reset")
393 Callstack_depth = Initial_callstack_depth;
394
395 :(before "End Includes")
396 #include <iomanip>
397 #define HEXBYTE std::hex << std::setw(2) << std::setfill('0')
398 #define HEXWORD std::hex << std::setw(8) << std::setfill('0')
399
400 #define NUM(X) static_cast<int>(X)
401 #include <stdint.h>