1
2
3
4
5
6
7
8
9
10
11 :(before "End Types")
12 enum {
13 EAX,
14 ECX,
15 EDX,
16 EBX,
17 ESP,
18 EBP,
19 ESI,
20 EDI,
21 NUM_INT_REGISTERS,
22 };
23 union reg {
24 int32_t i;
25 uint32_t u;
26 };
27 :(before "End Globals")
28 reg Reg[NUM_INT_REGISTERS] = { {0} };
29 uint32_t EIP = 1;
30 :(before "End Reset")
31 bzero(Reg, sizeof(Reg));
32 EIP = 1;
33
34 :(before "End Help Contents")
35 cerr << " registers\n";
36 :(before "End Help Texts")
37 put_new(Help, "registers",
38 "SubX currently supports eight 32-bit integer registers. From 0 to 7, they are:\n"
39 " EAX ECX EDX EBX ESP EBP ESI EDI\n"
40 "ESP contains the top of the stack.\n"
41 "\n"
42 "-- 8-bit registers\n"
43 "Some instructions operate on eight *overlapping* 8-bit registers.\n"
44 "From 0 to 7, they are:\n"
45 " AL CL DL BL AH CH DH BH\n"
46 "The 8-bit registers overlap with the 32-bit ones. AL is the lowest signicant byte\n"
47 "of EAX, AH is the second lowest significant byte, and so on.\n"
48 "\n"
49 "For example, if EBX contains 0x11223344, then BL contains 0x44, and BH contains 0x33.\n"
50 "\n"
51 "There is no way to access bytes within ESP, EBP, ESI or EDI.\n"
52 "\n"
53 "For complete details consult the IA-32 software developer's manual, volume 2,\n"
54 "table 2-2, \"32-bit addressing forms with the ModR/M byte\".\n"
55 "It is included in this repository as 'modrm.pdf'.\n"
56 "The register encodings are described in the top row of the table, but you'll need\n"
57 "to spend some time with it.\n"
58 "\n"
59 "-- flag registers\n"
60 "Various instructions (particularly 'compare') modify one or more of three 1-bit 'flag'\n"
61 "registers, as a side-effect:\n"
62 "- the sign flag (SF): usually set if an arithmetic result is negative, or\n"
63 " reset if not.\n"
64 "- the zero flag (ZF): usually set if a result is zero, or reset if not.\n"
65 "- the overflow flag (OF): usually set if an arithmetic result overflows.\n"
66 "The flag bits are read by conditional jumps.\n"
67 "\n"
68 "For complete details on how different instructions update the flags, consult the IA-32\n"
69 "manual (volume 2). There's various versions of it online, such as https://c9x.me/x86,\n"
70 "though of course you'll need to be careful to ignore instructions and flag registers\n"
71 "that SubX doesn't support.\n"
72 "\n"
73 "It isn't simple, but if this is the processor you have running on your computer,\n"
74 "might as well get good at it.\n"
75 );
76
77 :(before "End Globals")
78
79 bool SF = false;
80 bool ZF = false;
81 bool OF = false;
82 :(before "End Reset")
83 SF = ZF = OF = false;
84
85
86
87 :(before "End Includes")
88
89
90
91
92 \
93 int64_t tmp = arg1 op arg2; \
94 arg1 = arg1 op arg2; \
95 trace(90, "run") << "storing 0x" << HEXWORD << arg1 << end(); \
96 SF = (arg1 < 0); \
97 ZF = (arg1 == 0); \
98 OF = (arg1 != tmp); \
99 }
100
101
102
103
104 \
105 arg1 = arg1 op arg2; \
106 trace(90, "run") << "storing 0x" << HEXWORD << arg1 << end(); \
107 SF = (arg1 >> 31); \
108 ZF = (arg1 == 0); \
109 OF = false; \
110 }
111
112
113
114 :(before "End Types")
115 const uint32_t INITIAL_SEGMENT_SIZE = 0x1000 - 1;
116
117
118
119
120
121 struct vma {
122 uint32_t start;
123 uint32_t end;
124 vector<uint8_t> _data;
125 vma(uint32_t s, uint32_t e) :start(s), end(e) {
126 _data.resize(end-start);
127 }
128 vma(uint32_t s) :start(s), end(s+INITIAL_SEGMENT_SIZE) {
129 _data.resize(end-start);
130 }
131 bool match(uint32_t a) {
132 return a >= start && a < end;
133 }
134 bool match32(uint32_t a) {
135 return a >= start && a+4 <= end;
136 }
137 uint8_t& data(uint32_t a) {
138 assert(match(a));
139 return _data.at(a-start);
140 }
141 void grow_until(uint32_t new_end_address) {
142 if (new_end_address < end) return;
143
144 void sanity_check(uint32_t start, uint32_t end);
145 sanity_check(start, new_end_address);
146 end = new_end_address;
147 _data.resize(new_end_address - start);
148 }
149
150 };
151 :(code)
152 void sanity_check(uint32_t start, uint32_t end) {
153 bool dup_found = false;
154 for (int i = 0; i < SIZE(Mem); ++i) {
155 const vma& curr = Mem.at(i);
156 if (curr.start == start) {
157 assert(!dup_found);
158 dup_found = true;
159 }
160 else if (curr.start > start) {
161 assert(curr.start > end);
162 }
163 else if (curr.start < start) {
164 assert(curr.end < start);
165 }
166 }
167 }
168
169 :(before "End Globals")
170
171 vector<vma> Mem;
172 :(code)
173
174
175
176 void grow_code_segment(uint32_t new_end_address) {
177 assert(!Mem.empty());
178 Mem.at(0).grow_until(new_end_address);
179 }
180 void grow_data_segment(uint32_t new_end_address) {
181 assert(SIZE(Mem) > 1);
182 Mem.at(1).grow_until(new_end_address);
183 }
184 :(before "End Globals")
185 uint32_t End_of_program = 0;
186
187 :(before "End Reset")
188 Mem.clear();
189 End_of_program = 0;
190 :(code)
191
192
193 inline uint8_t read_mem_u8(uint32_t addr) {
194 uint8_t* handle = mem_addr_u8(addr);
195 return handle ? *handle : 0;
196 }
197 inline int8_t read_mem_i8(uint32_t addr) {
198 return static_cast<int8_t>(read_mem_u8(addr));
199 }
200 inline uint32_t read_mem_u32(uint32_t addr) {
201 uint32_t* handle = mem_addr_u32(addr);
202 return handle ? *handle : 0;
203 }
204 inline int32_t read_mem_i32(uint32_t addr) {
205 return static_cast<int32_t>(read_mem_u32(addr));
206 }
207
208 inline uint8_t* mem_addr_u8(uint32_t addr) {
209 uint8_t* result = NULL;
210 for (int i = 0; i < SIZE(Mem); ++i) {
211 if (Mem.at(i).match(addr)) {
212 if (result)
213 raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
214 result = &Mem.at(i).data(addr);
215 }
216 }
217 if (result == NULL)
218 raise << "Tried to access uninitialized memory at address 0x" << HEXWORD << addr << '\n' << end();
219 return result;
220 }
221 inline int8_t* mem_addr_i8(uint32_t addr) {
222 return reinterpret_cast<int8_t*>(mem_addr_u8(addr));
223 }
224 inline uint32_t* mem_addr_u32(uint32_t addr) {
225 uint32_t* result = NULL;
226 for (int i = 0; i < SIZE(Mem); ++i) {
227 if (Mem.at(i).match32(addr)) {
228 if (result)
229 raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
230 result = reinterpret_cast<uint32_t*>(&Mem.at(i).data(addr));
231 }
232 }
233 if (result == NULL) {
234 raise << "Tried to access uninitialized memory at address 0x" << HEXWORD << addr << '\n' << end();
235 raise << "The entire 4-byte word should be initialized and lie in a single segment.\n" << end();
236 }
237 return result;
238 }
239 inline int32_t* mem_addr_i32(uint32_t addr) {
240 return reinterpret_cast<int32_t*>(mem_addr_u32(addr));
241 }
242
243 inline const char* mem_addr_kernel_string(uint32_t addr) {
244 return reinterpret_cast<const char*>(mem_addr_u8(addr));
245 }
246 inline string mem_addr_string(uint32_t addr, uint32_t size) {
247 ostringstream out;
248 for (size_t i = 0; i < size; ++i)
249 out << read_mem_u8(addr+i);
250 return out.str();
251 }
252
253
254 inline void write_mem_u8(uint32_t addr, uint8_t val) {
255 uint8_t* handle = mem_addr_u8(addr);
256 if (handle != NULL) *handle = val;
257 }
258 inline void write_mem_i8(uint32_t addr, int8_t val) {
259 int8_t* handle = mem_addr_i8(addr);
260 if (handle != NULL) *handle = val;
261 }
262 inline void write_mem_u32(uint32_t addr, uint32_t val) {
263 uint32_t* handle = mem_addr_u32(addr);
264 if (handle != NULL) *handle = val;
265 }
266 inline void write_mem_i32(uint32_t addr, int32_t val) {
267 int32_t* handle = mem_addr_i32(addr);
268 if (handle != NULL) *handle = val;
269 }
270
271 inline bool already_allocated(uint32_t addr) {
272 bool result = false;
273 for (int i = 0; i < SIZE(Mem); ++i) {
274 if (Mem.at(i).match(addr)) {
275 if (result)
276 raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
277 result = true;
278 }
279 }
280 return result;
281 }
282
283
284
285 :(code)
286
287 void run_one_instruction() {
288 uint8_t op=0, op2=0, op3=0;
289
290 trace(90, "run") << "inst: 0x" << HEXWORD << EIP << end();
291 op = next();
292 if (Dump_trace) {
293 cerr << "opcode: " << HEXBYTE << NUM(op) << '\n';
294 cerr << "registers at start: ";
295 dump_registers();
296
297 }
298 switch (op) {
299 case 0xf4:
300 EIP = End_of_program;
301 break;
302
303 case 0x0f:
304 switch(op2 = next()) {
305
306 default:
307 cerr << "unrecognized second opcode after 0f: " << HEXBYTE << NUM(op2) << '\n';
308 DUMP("");
309 exit(1);
310 }
311 break;
312 case 0xf2:
313 switch(op2 = next()) {
314
315 case 0x0f:
316 switch(op3 = next()) {
317
318 default:
319 cerr << "unrecognized third opcode after f2 0f: " << HEXBYTE << NUM(op3) << '\n';
320 DUMP("");
321 exit(1);
322 }
323 break;
324 default:
325 cerr << "unrecognized second opcode after f2: " << HEXBYTE << NUM(op2) << '\n';
326 DUMP("");
327 exit(1);
328 }
329 break;
330 case 0xf3:
331 switch(op2 = next()) {
332
333 case 0x0f:
334 switch(op3 = next()) {
335
336 default:
337 cerr << "unrecognized third opcode after f3 0f: " << HEXBYTE << NUM(op3) << '\n';
338 DUMP("");
339 exit(1);
340 }
341 break;
342 default:
343 cerr << "unrecognized second opcode after f3: " << HEXBYTE << NUM(op2) << '\n';
344 DUMP("");
345 exit(1);
346 }
347 break;
348 default:
349 cerr << "unrecognized opcode: " << HEXBYTE << NUM(op) << '\n';
350 DUMP("");
351 exit(1);
352 }
353 }
354
355 inline uint8_t next() {
356 return read_mem_u8(EIP++);
357 }
358
359 void dump_registers() {
360 for (int i = 0; i < NUM_INT_REGISTERS; ++i) {
361 if (i > 0) cerr << "; ";
362 cerr << " " << i << ": " << std::hex << std::setw(8) << std::setfill('_') << Reg[i].u;
363 }
364 cerr << " -- SF: " << SF << "; ZF: " << ZF << "; OF: " << OF << '\n';
365 }
366
367
368 :(before "End Globals")
369 map<string, string> Name;
370 map<string, string> Name_0f;
371 map<string, string> Name_f3;
372 map<string, string> Name_f3_0f;
373 :(before "End One-time Setup")
374 init_op_names();
375 :(code)
376 void init_op_names() {
377 put(Name, "f4", "halt (hlt)");
378
379 }
380
381 :(before "End Help Special-cases(key)")
382 if (key == "opcodes") {
383 cerr << "Opcodes currently supported by SubX:\n";
384 for (map<string, string>::iterator p = Name.begin(); p != Name.end(); ++p)
385 cerr << " " << p->first << ": " << p->second << '\n';
386 for (map<string, string>::iterator p = Name_0f.begin(); p != Name_0f.end(); ++p)
387 cerr << " 0f " << p->first << ": " << p->second << '\n';
388 for (map<string, string>::iterator p = Name_f3.begin(); p != Name_f3.end(); ++p)
389 cerr << " f3 " << p->first << ": " << p->second << '\n';
390 for (map<string, string>::iterator p = Name_f3_0f.begin(); p != Name_f3_0f.end(); ++p)
391 cerr << " f3 0f " << p->first << ": " << p->second << '\n';
392 cerr << "Run `subx help instructions` for details on words like 'r32' and 'disp8'.\n"
393 "For complete details on these instructions, consult the IA-32 manual (volume 2).\n"
394 "There's various versions of it online, such as https://c9x.me/x86.\n"
395 "The mnemonics in brackets will help you locate each instruction.\n";
396 return 0;
397 }
398 :(before "End Help Contents")
399 cerr << " opcodes\n";
400
401 :(before "End Includes")
402
403
404
405
406
407