https://github.com/akkartik/mu/blob/master/010vm.cc
1
2
3
4
5
6
7
8 :(before "End Types")
9 enum {
10 EAX,
11 ECX,
12 EDX,
13 EBX,
14 ESP,
15 EBP,
16 ESI,
17 EDI,
18 NUM_INT_REGISTERS,
19 };
20 union reg {
21 int32_t i;
22 uint32_t u;
23 };
24 :(before "End Globals")
25 reg Reg[NUM_INT_REGISTERS] = { {0} };
26 uint32_t EIP = 1;
27 :(before "End Reset")
28 bzero(Reg, sizeof(Reg));
29 EIP = 1;
30
31 :(before "End Help Contents")
32 cerr << " registers\n";
33 :(before "End Help Texts")
34 put_new(Help, "registers",
35 "SubX currently supports eight 32-bit integer registers. From 0 to 7, they are:\n"
36 " EAX ECX EDX EBX ESP EBP ESI EDI\n"
37 "ESP contains the top of the stack.\n"
38 "\n"
39 "-- 8-bit registers\n"
40 "Some instructions operate on eight *overlapping* 8-bit registers.\n"
41 "From 0 to 7, they are:\n"
42 " AL CL DL BL AH CH DH BH\n"
43 "The 8-bit registers overlap with the 32-bit ones. AL is the lowest signicant byte\n"
44 "of EAX, AH is the second lowest significant byte, and so on.\n"
45 "\n"
46 "For example, if EBX contains 0x11223344, then BL contains 0x44, and BH contains 0x33.\n"
47 "\n"
48 "There is no way to access bytes within ESP, EBP, ESI or EDI.\n"
49 "\n"
50 "For complete details consult the IA-32 software developer's manual, volume 2,\n"
51 "table 2-2, \"32-bit addressing forms with the ModR/M byte\".\n"
52 "It is included in this repository as 'modrm.pdf'.\n"
53 "The register encodings are described in the top row of the table, but you'll need\n"
54 "to spend some time with it.\n"
55 "\n"
56 "-- flag registers\n"
57 "Various instructions (particularly 'compare') modify one or more of four 1-bit\n"
58 "'flag' registers, as a side-effect:\n"
59 "- the sign flag (SF): usually set if an arithmetic result is negative, or\n"
60 " reset if not.\n"
61 "- the zero flag (ZF): usually set if a result is zero, or reset if not.\n"
62 "- the carry flag (CF): usually set if an arithmetic result overflows by just one bit.\n"
63 " Useful for operating on unsigned numbers.\n"
64 "- the overflow flag (OF): usually set if an arithmetic result overflows by more\n"
65 " than one bit. Useful for operating on signed numbers.\n"
66 "The flag bits are read by conditional jumps.\n"
67 "\n"
68 "For complete details on how different instructions update the flags, consult the IA-32\n"
69 "manual (volume 2). There's various versions of it online, such as https://c9x.me/x86,\n"
70 "though of course you'll need to be careful to ignore instructions and flag registers\n"
71 "that SubX doesn't support.\n"
72 "\n"
73 "It isn't simple, but if this is the processor you have running on your computer.\n"
74 "Might as well get good at it.\n"
75 );
76
77 :(before "End Globals")
78
79 bool SF = false;
80 bool ZF = false;
81 bool CF = false;
82 bool OF = false;
83 :(before "End Reset")
84 SF = ZF = CF = OF = false;
85
86
87
88 :(before "End Types")
89 const uint32_t SEGMENT_ALIGNMENT = 0x1000000;
90 inline uint32_t align_upwards(uint32_t x, uint32_t align) {
91 return (x+align-1) & -(align);
92 }
93
94
95
96 struct vma {
97 uint32_t start;
98 uint32_t end;
99 vector<uint8_t> _data;
100 vma(uint32_t s, uint32_t e) :start(s), end(e) {}
101 vma(uint32_t s) :start(s), end(align_upwards(s+1, SEGMENT_ALIGNMENT)) {}
102 bool match(uint32_t a) {
103 return a >= start && a < end;
104 }
105 bool match32(uint32_t a) {
106 return a >= start && a+4 <= end;
107 }
108 uint8_t& data(uint32_t a) {
109 assert(match(a));
110 uint32_t result_index = a-start;
111 if (_data.size() <= result_index+sizeof(int)) {
112 const int align = 0x1000;
113 uint32_t result_size = result_index + 1;
114 uint32_t new_size = align_upwards(result_size, align);
115
116 if (new_size < _data.size() * 2)
117 new_size = _data.size() * 2;
118
119 if (new_size > end-start)
120 new_size = end-start;
121 _data.resize(new_size);
122 }
123 return _data.at(result_index);
124 }
125 void grow_until(uint32_t new_end_address) {
126 if (new_end_address < end) return;
127
128 void sanity_check(uint32_t start, uint32_t end);
129 sanity_check(start, new_end_address);
130 end = new_end_address;
131 }
132
133 };
134 :(code)
135 void sanity_check(uint32_t start, uint32_t end) {
136 bool dup_found = false;
137 for (int i = 0; i < SIZE(Mem); ++i) {
138 const vma& curr = Mem.at(i);
139 if (curr.start == start) {
140 assert(!dup_found);
141 dup_found = true;
142 }
143 else if (curr.start > start) {
144 assert(curr.start > end);
145 }
146 else if (curr.start < start) {
147 assert(curr.end < start);
148 }
149 }
150 }
151
152 :(before "End Globals")
153
154 vector<vma> Mem;
155 :(code)
156 :(before "End Globals")
157 uint32_t End_of_program = 0;
158
159 :(before "End Reset")
160 Mem.clear();
161 End_of_program = 0;
162 :(code)
163
164
165 inline uint8_t read_mem_u8(uint32_t addr) {
166 uint8_t* handle = mem_addr_u8(addr);
167 return handle ? *handle : 0;
168 }
169 inline int8_t read_mem_i8(uint32_t addr) {
170 return static_cast<int8_t>(read_mem_u8(addr));
171 }
172 inline uint32_t read_mem_u32(uint32_t addr) {
173 uint32_t* handle = mem_addr_u32(addr);
174 return handle ? *handle : 0;
175 }
176 inline int32_t read_mem_i32(uint32_t addr) {
177 return static_cast<int32_t>(read_mem_u32(addr));
178 }
179
180 inline uint8_t* mem_addr_u8(uint32_t addr) {
181 uint8_t* result = NULL;
182 for (int i = 0; i < SIZE(Mem); ++i) {
183 if (Mem.at(i).match(addr)) {
184 if (result)
185 raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
186 result = &Mem.at(i).data(addr);
187 }
188 }
189 if (result == NULL) {
190 if (Trace_file) Trace_file.flush();
191 raise << "Tried to access uninitialized memory at address 0x" << HEXWORD << addr << '\n' << end();
192 exit(1);
193 }
194 return result;
195 }
196 inline int8_t* mem_addr_i8(uint32_t addr) {
197 return reinterpret_cast<int8_t*>(mem_addr_u8(addr));
198 }
199 inline uint32_t* mem_addr_u32(uint32_t addr) {
200 uint32_t* result = NULL;
201 for (int i = 0; i < SIZE(Mem); ++i) {
202 if (Mem.at(i).match32(addr)) {
203 if (result)
204 raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
205 result = reinterpret_cast<uint32_t*>(&Mem.at(i).data(addr));
206 }
207 }
208 if (result == NULL) {
209 if (Trace_file) Trace_file.flush();
210 raise << "Tried to access uninitialized memory at address 0x" << HEXWORD << addr << '\n' << end();
211 raise << "The entire 4-byte word should be initialized and lie in a single segment.\n" << end();
212 exit(1);
213 }
214 return result;
215 }
216 inline int32_t* mem_addr_i32(uint32_t addr) {
217 return reinterpret_cast<int32_t*>(mem_addr_u32(addr));
218 }
219
220 inline const char* mem_addr_kernel_string(uint32_t addr) {
221 return reinterpret_cast<const char*>(mem_addr_u8(addr));
222 }
223 inline string mem_addr_string(uint32_t addr, uint32_t size) {
224 ostringstream out;
225 for (size_t i = 0; i < size; ++i)
226 out << read_mem_u8(addr+i);
227 return out.str();
228 }
229
230
231 inline void write_mem_u8(uint32_t addr, uint8_t val) {
232 uint8_t* handle = mem_addr_u8(addr);
233 if (handle != NULL) *handle = val;
234 }
235 inline void write_mem_i8(uint32_t addr, int8_t val) {
236 int8_t* handle = mem_addr_i8(addr);
237 if (handle != NULL) *handle = val;
238 }
239 inline void write_mem_u32(uint32_t addr, uint32_t val) {
240 uint32_t* handle = mem_addr_u32(addr);
241 if (handle != NULL) *handle = val;
242 }
243 inline void write_mem_i32(uint32_t addr, int32_t val) {
244 int32_t* handle = mem_addr_i32(addr);
245 if (handle != NULL) *handle = val;
246 }
247
248 inline bool already_allocated(uint32_t addr) {
249 bool result = false;
250 for (int i = 0; i < SIZE(Mem); ++i) {
251 if (Mem.at(i).match(addr)) {
252 if (result)
253 raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
254 result = true;
255 }
256 }
257 return result;
258 }
259
260
261
262 :(code)
263
264 void run_one_instruction() {
265 uint8_t op=0, op2=0, op3=0;
266
267 if (Trace_file) {
268 dump_registers();
269
270 }
271 uint32_t inst_start_address = EIP;
272 op = next();
273 trace(Callstack_depth+1, "run") << "0x" << HEXWORD << inst_start_address << " opcode: " << HEXBYTE << NUM(op) << end();
274 switch (op) {
275 case 0xf4:
276 EIP = End_of_program;
277 break;
278
279 case 0x0f:
280 switch(op2 = next()) {
281
282 default:
283 cerr << "unrecognized second opcode after 0f: " << HEXBYTE << NUM(op2) << '\n';
284 exit(1);
285 }
286 break;
287 case 0xf2:
288 switch(op2 = next()) {
289
290 case 0x0f:
291 switch(op3 = next()) {
292
293 default:
294 cerr << "unrecognized third opcode after f2 0f: " << HEXBYTE << NUM(op3) << '\n';
295 exit(1);
296 }
297 break;
298 default:
299 cerr << "unrecognized second opcode after f2: " << HEXBYTE << NUM(op2) << '\n';
300 exit(1);
301 }
302 break;
303 case 0xf3:
304 switch(op2 = next()) {
305
306 case 0x0f:
307 switch(op3 = next()) {
308
309 default:
310 cerr << "unrecognized third opcode after f3 0f: " << HEXBYTE << NUM(op3) << '\n';
311 exit(1);
312 }
313 break;
314 default:
315 cerr << "unrecognized second opcode after f3: " << HEXBYTE << NUM(op2) << '\n';
316 exit(1);
317 }
318 break;
319 default:
320 cerr << "unrecognized opcode: " << HEXBYTE << NUM(op) << '\n';
321 exit(1);
322 }
323 }
324
325 inline uint8_t next() {
326 return read_mem_u8(EIP++);
327 }
328
329 void dump_registers() {
330 ostringstream out;
331 out << "regs: ";
332 for (int i = 0; i < NUM_INT_REGISTERS; ++i) {
333 if (i > 0) out << " ";
334 out << i << ": " << std::hex << std::setw(8) << std::setfill('_') << Reg[i].u;
335 }
336 out << " -- SF: " << SF << "; ZF: " << ZF << "; CF: " << CF << "; OF: " << OF;
337 trace(Callstack_depth+1, "run") << out.str() << end();
338 }
339
340
341 :(before "End Globals")
342 map<string, string> Name;
343 map<string, string> Name_0f;
344 map<string, string> Name_f3;
345 map<string, string> Name_f3_0f;
346 :(before "End One-time Setup")
347 init_op_names();
348 :(code)
349 void init_op_names() {
350 put(Name, "f4", "halt (hlt)");
351
352 }
353
354 :(before "End Help Special-cases(key)")
355 if (key == "opcodes") {
356 cerr << "Opcodes currently supported by SubX:\n";
357 for (map<string, string>::iterator p = Name.begin(); p != Name.end(); ++p)
358 cerr << " " << p->first << ": " << p->second << '\n';
359 for (map<string, string>::iterator p = Name_0f.begin(); p != Name_0f.end(); ++p)
360 cerr << " 0f " << p->first << ": " << p->second << '\n';
361 for (map<string, string>::iterator p = Name_f3.begin(); p != Name_f3.end(); ++p)
362 cerr << " f3 " << p->first << ": " << p->second << '\n';
363 for (map<string, string>::iterator p = Name_f3_0f.begin(); p != Name_f3_0f.end(); ++p)
364 cerr << " f3 0f " << p->first << ": " << p->second << '\n';
365 cerr << "Run `bootstrap help instructions` for details on words like 'r32' and 'disp8'.\n"
366 "For complete details on these instructions, consult the IA-32 manual (volume 2).\n"
367 "There's various versions of it online, such as https://c9x.me/x86.\n"
368 "The mnemonics in brackets will help you locate each instruction.\n";
369 return 0;
370 }
371 :(before "End Help Contents")
372 cerr << " opcodes\n";
373
374
375
376
377
378
379
380
381
382
383
384
385
386 :(before "End Globals")
387 extern const int Initial_callstack_depth = 2;
388 int Callstack_depth = Initial_callstack_depth;
389 :(before "End Reset")
390 Callstack_depth = Initial_callstack_depth;
391
392 :(before "End Includes")
393 #include <iomanip>
394 #define HEXBYTE std::hex << std::setw(2) << std::setfill('0')
395 #define HEXWORD std::hex << std::setw(8) << std::setfill('0')
396
397 #define NUM(X) static_cast<int>(X)
398 #include <stdint.h>