https://github.com/akkartik/mu/blob/master/subx/010---vm.cc
1
2
3
4
5
6
7
8
9
10
11 :(before "End Types")
12 enum {
13 EAX,
14 ECX,
15 EDX,
16 EBX,
17 ESP,
18 EBP,
19 ESI,
20 EDI,
21 NUM_INT_REGISTERS,
22 };
23 union reg {
24 int32_t i;
25 uint32_t u;
26 };
27 :(before "End Globals")
28 reg Reg[NUM_INT_REGISTERS] = { {0} };
29 uint32_t EIP = 1;
30 :(before "End Reset")
31 bzero(Reg, sizeof(Reg));
32 EIP = 1;
33
34 :(before "End Help Contents")
35 cerr << " registers\n";
36 :(before "End Help Texts")
37 put_new(Help, "registers",
38 "SubX currently supports eight 32-bit integer registers. From 0 to 7, they are:\n"
39 " EAX ECX EDX EBX ESP EBP ESI EDI\n"
40 "ESP contains the top of the stack.\n"
41 "\n"
42 "-- 8-bit registers\n"
43 "Some instructions operate on eight *overlapping* 8-bit registers.\n"
44 "From 0 to 7, they are:\n"
45 " AL CL DL BL AH CH DH BH\n"
46 "The 8-bit registers overlap with the 32-bit ones. AL is the lowest signicant byte\n"
47 "of EAX, AH is the second lowest significant byte, and so on.\n"
48 "\n"
49 "For example, if EBX contains 0x11223344, then BL contains 0x44, and BH contains 0x33.\n"
50 "\n"
51 "There is no way to access bytes within ESP, EBP, ESI or EDI.\n"
52 "\n"
53 "For complete details consult the IA-32 software developer's manual, volume 2,\n"
54 "table 2-2, \"32-bit addressing forms with the ModR/M byte\".\n"
55 "It is included in this repository as 'modrm.pdf'.\n"
56 "The register encodings are described in the top row of the table, but you'll need\n"
57 "to spend some time with it.\n"
58 "\n"
59 "-- flag registers\n"
60 "Various instructions (particularly 'compare') modify one or more of three 1-bit 'flag'\n"
61 "registers, as a side-effect:\n"
62 "- the sign flag (SF): usually set if an arithmetic result is negative, or\n"
63 " reset if not.\n"
64 "- the zero flag (ZF): usually set if a result is zero, or reset if not.\n"
65 "- the overflow flag (OF): usually set if an arithmetic result overflows.\n"
66 "The flag bits are read by conditional jumps.\n"
67 "\n"
68 "For complete details on how different instructions update the flags, consult the IA-32\n"
69 "manual (volume 2). There's various versions of it online, such as https://c9x.me/x86,\n"
70 "though of course you'll need to be careful to ignore instructions and flag registers\n"
71 "that SubX doesn't support.\n"
72 "\n"
73 "It isn't simple, but if this is the processor you have running on your computer,\n"
74 "might as well get good at it.\n"
75 );
76
77 :(before "End Globals")
78
79 bool SF = false;
80 bool ZF = false;
81 bool OF = false;
82 :(before "End Reset")
83 SF = ZF = OF = false;
84
85
86
87 :(before "End Includes")
88
89
90
91
92 \
93 int64_t tmp = arg1 op arg2; \
94 arg1 = arg1 op arg2; \
95 trace(90, "run") << "storing 0x" << HEXWORD << arg1 << end(); \
96 SF = (arg1 < 0); \
97 ZF = (arg1 == 0); \
98 OF = (arg1 != tmp); \
99 }
100
101
102
103
104 \
105 arg1 = arg1 op arg2; \
106 trace(90, "run") << "storing 0x" << HEXWORD << arg1 << end(); \
107 SF = (arg1 >> 31); \
108 ZF = (arg1 == 0); \
109 OF = false; \
110 }
111
112
113
114 :(before "End Types")
115 const uint32_t SEGMENT_ALIGNMENT = 0x1000000;
116 inline uint32_t align_upwards(uint32_t x, uint32_t align) {
117 return (x+align-1) & -(align);
118 }
119
120
121
122 struct vma {
123 uint32_t start;
124 uint32_t end;
125 vector<uint8_t> _data;
126 vma(uint32_t s, uint32_t e) :start(s), end(e) {}
127 vma(uint32_t s) :start(s), end(align_upwards(s+1, SEGMENT_ALIGNMENT)) {}
128 bool match(uint32_t a) {
129 return a >= start && a < end;
130 }
131 bool match32(uint32_t a) {
132 return a >= start && a+4 <= end;
133 }
134 uint8_t& data(uint32_t a) {
135 assert(match(a));
136 uint32_t result_index = a-start;
137 if (_data.size() <= result_index) {
138 const int align = 0x1000;
139 uint32_t result_size = result_index + 1;
140 uint32_t new_size = align_upwards(result_size, align);
141
142 if (new_size < _data.size() * 2)
143 new_size = _data.size() * 2;
144
145 if (new_size > end-start)
146 new_size = end-start;
147 _data.resize(new_size);
148 }
149 return _data.at(result_index);
150 }
151 void grow_until(uint32_t new_end_address) {
152 if (new_end_address < end) return;
153
154 void sanity_check(uint32_t start, uint32_t end);
155 sanity_check(start, new_end_address);
156 end = new_end_address;
157 }
158
159 };
160 :(code)
161 void sanity_check(uint32_t start, uint32_t end) {
162 bool dup_found = false;
163 for (int i = 0; i < SIZE(Mem); ++i) {
164 const vma& curr = Mem.at(i);
165 if (curr.start == start) {
166 assert(!dup_found);
167 dup_found = true;
168 }
169 else if (curr.start > start) {
170 assert(curr.start > end);
171 }
172 else if (curr.start < start) {
173 assert(curr.end < start);
174 }
175 }
176 }
177
178 :(before "End Globals")
179
180 vector<vma> Mem;
181 :(code)
182
183
184
185 void grow_code_segment(uint32_t new_end_address) {
186 assert(!Mem.empty());
187 Mem.at(0).grow_until(new_end_address);
188 }
189 void grow_data_segment(uint32_t new_end_address) {
190 assert(SIZE(Mem) > 1);
191 Mem.at(1).grow_until(new_end_address);
192 }
193 :(before "End Globals")
194 uint32_t End_of_program = 0;
195
196 :(before "End Reset")
197 Mem.clear();
198 End_of_program = 0;
199 :(code)
200
201
202 inline uint8_t read_mem_u8(uint32_t addr) {
203 uint8_t* handle = mem_addr_u8(addr);
204 return handle ? *handle : 0;
205 }
206 inline int8_t read_mem_i8(uint32_t addr) {
207 return static_cast<int8_t>(read_mem_u8(addr));
208 }
209 inline uint32_t read_mem_u32(uint32_t addr) {
210 uint32_t* handle = mem_addr_u32(addr);
211 return handle ? *handle : 0;
212 }
213 inline int32_t read_mem_i32(uint32_t addr) {
214 return static_cast<int32_t>(read_mem_u32(addr));
215 }
216
217 inline uint8_t* mem_addr_u8(uint32_t addr) {
218 uint8_t* result = NULL;
219 for (int i = 0; i < SIZE(Mem); ++i) {
220 if (Mem.at(i).match(addr)) {
221 if (result)
222 raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
223 result = &Mem.at(i).data(addr);
224 }
225 }
226 if (result == NULL)
227 raise << "Tried to access uninitialized memory at address 0x" << HEXWORD << addr << '\n' << end();
228 return result;
229 }
230 inline int8_t* mem_addr_i8(uint32_t addr) {
231 return reinterpret_cast<int8_t*>(mem_addr_u8(addr));
232 }
233 inline uint32_t* mem_addr_u32(uint32_t addr) {
234 uint32_t* result = NULL;
235 for (int i = 0; i < SIZE(Mem); ++i) {
236 if (Mem.at(i).match32(addr)) {
237 if (result)
238 raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
239 result = reinterpret_cast<uint32_t*>(&Mem.at(i).data(addr));
240 }
241 }
242 if (result == NULL) {
243 raise << "Tried to access uninitialized memory at address 0x" << HEXWORD << addr << '\n' << end();
244 raise << "The entire 4-byte word should be initialized and lie in a single segment.\n" << end();
245 }
246 return result;
247 }
248 inline int32_t* mem_addr_i32(uint32_t addr) {
249 return reinterpret_cast<int32_t*>(mem_addr_u32(addr));
250 }
251
252 inline const char* mem_addr_kernel_string(uint32_t addr) {
253 return reinterpret_cast<const char*>(mem_addr_u8(addr));
254 }
255 inline string mem_addr_string(uint32_t addr, uint32_t size) {
256 ostringstream out;
257 for (size_t i = 0; i < size; ++i)
258 out << read_mem_u8(addr+i);
259 return out.str();
260 }
261
262
263 inline void write_mem_u8(uint32_t addr, uint8_t val) {
264 uint8_t* handle = mem_addr_u8(addr);
265 if (handle != NULL) *handle = val;
266 }
267 inline void write_mem_i8(uint32_t addr, int8_t val) {
268 int8_t* handle = mem_addr_i8(addr);
269 if (handle != NULL) *handle = val;
270 }
271 inline void write_mem_u32(uint32_t addr, uint32_t val) {
272 uint32_t* handle = mem_addr_u32(addr);
273 if (handle != NULL) *handle = val;
274 }
275 inline void write_mem_i32(uint32_t addr, int32_t val) {
276 int32_t* handle = mem_addr_i32(addr);
277 if (handle != NULL) *handle = val;
278 }
279
280 inline bool already_allocated(uint32_t addr) {
281 bool result = false;
282 for (int i = 0; i < SIZE(Mem); ++i) {
283 if (Mem.at(i).match(addr)) {
284 if (result)
285 raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
286 result = true;
287 }
288 }
289 return result;
290 }
291
292
293
294 :(code)
295
296 void run_one_instruction() {
297 uint8_t op=0, op2=0, op3=0;
298
299 if (Dump_trace) {
300 cerr << "registers: ";
301 dump_registers();
302
303 }
304 trace(90, "run") << "inst: 0x" << HEXWORD << EIP << end();
305 op = next();
306 if (Dump_trace) cerr << "opcode: " << HEXBYTE << NUM(op) << '\n';
307 switch (op) {
308 case 0xf4:
309 EIP = End_of_program;
310 break;
311
312 case 0x0f:
313 switch(op2 = next()) {
314
315 default:
316 cerr << "unrecognized second opcode after 0f: " << HEXBYTE << NUM(op2) << '\n';
317 DUMP("");
318 exit(1);
319 }
320 break;
321 case 0xf2:
322 switch(op2 = next()) {
323
324 case 0x0f:
325 switch(op3 = next()) {
326
327 default:
328 cerr << "unrecognized third opcode after f2 0f: " << HEXBYTE << NUM(op3) << '\n';
329 DUMP("");
330 exit(1);
331 }
332 break;
333 default:
334 cerr << "unrecognized second opcode after f2: " << HEXBYTE << NUM(op2) << '\n';
335 DUMP("");
336 exit(1);
337 }
338 break;
339 case 0xf3:
340 switch(op2 = next()) {
341
342 case 0x0f:
343 switch(op3 = next()) {
344
345 default:
346 cerr << "unrecognized third opcode after f3 0f: " << HEXBYTE << NUM(op3) << '\n';
347 DUMP("");
348 exit(1);
349 }
350 break;
351 default:
352 cerr << "unrecognized second opcode after f3: " << HEXBYTE << NUM(op2) << '\n';
353 DUMP("");
354 exit(1);
355 }
356 break;
357 default:
358 cerr << "unrecognized opcode: " << HEXBYTE << NUM(op) << '\n';
359 DUMP("");
360 exit(1);
361 }
362 }
363
364 inline uint8_t next() {
365 return read_mem_u8(EIP++);
366 }
367
368 void dump_registers() {
369 for (int i = 0; i < NUM_INT_REGISTERS; ++i) {
370 if (i > 0) cerr << "; ";
371 cerr << " " << i << ": " << std::hex << std::setw(8) << std::setfill('_') << Reg[i].u;
372 }
373 cerr << " -- SF: " << SF << "; ZF: " << ZF << "; OF: " << OF << '\n';
374 }
375
376
377 :(before "End Globals")
378 map<string, string> Name;
379 map<string, string> Name_0f;
380 map<string, string> Name_f3;
381 map<string, string> Name_f3_0f;
382 :(before "End One-time Setup")
383 init_op_names();
384 :(code)
385 void init_op_names() {
386 put(Name, "f4", "halt (hlt)");
387
388 }
389
390 :(before "End Help Special-cases(key)")
391 if (key == "opcodes") {
392 cerr << "Opcodes currently supported by SubX:\n";
393 for (map<string, string>::iterator p = Name.begin(); p != Name.end(); ++p)
394 cerr << " " << p->first << ": " << p->second << '\n';
395 for (map<string, string>::iterator p = Name_0f.begin(); p != Name_0f.end(); ++p)
396 cerr << " 0f " << p->first << ": " << p->second << '\n';
397 for (map<string, string>::iterator p = Name_f3.begin(); p != Name_f3.end(); ++p)
398 cerr << " f3 " << p->first << ": " << p->second << '\n';
399 for (map<string, string>::iterator p = Name_f3_0f.begin(); p != Name_f3_0f.end(); ++p)
400 cerr << " f3 0f " << p->first << ": " << p->second << '\n';
401 cerr << "Run `subx help instructions` for details on words like 'r32' and 'disp8'.\n"
402 "For complete details on these instructions, consult the IA-32 manual (volume 2).\n"
403 "There's various versions of it online, such as https://c9x.me/x86.\n"
404 "The mnemonics in brackets will help you locate each instruction.\n";
405 return 0;
406 }
407 :(before "End Help Contents")
408 cerr << " opcodes\n";
409
410 :(before "End Includes")
411
412
413
414
415
416