1
2
3
4
5
6
7
8
9
10
11 :(before "End Types")
12 enum {
13 EAX,
14 ECX,
15 EDX,
16 EBX,
17 ESP,
18 EBP,
19 ESI,
20 EDI,
21 NUM_INT_REGISTERS,
22 };
23 union reg {
24 int32_t i;
25 uint32_t u;
26 };
27 :(before "End Globals")
28 reg Reg[NUM_INT_REGISTERS] = { {0} };
29 uint32_t EIP = 1;
30 :(before "End Reset")
31 bzero(Reg, sizeof(Reg));
32 EIP = 1;
33
34 :(before "End Help Contents")
35 cerr << " registers\n";
36 :(before "End Help Texts")
37 put_new(Help, "registers",
38 "SubX currently supports eight 32-bit integer registers. From 0 to 7, they are:\n"
39 " EAX ECX EDX EBX ESP EBP ESI EDI\n"
40 "ESP contains the top of the stack.\n"
41 "\n"
42 "-- 8-bit registers\n"
43 "Some instructions operate on eight *overlapping* 8-bit registers.\n"
44 "From 0 to 7, they are:\n"
45 " AL CL DL BL AH CH DH BH\n"
46 "The 8-bit registers overlap with the 32-bit ones. AL is the lowest signicant byte\n"
47 "of EAX, AH is the second lowest significant byte, and so on.\n"
48 "\n"
49 "For example, if EBX contains 0x11223344, then BL contains 0x44, and BH contains 0x33.\n"
50 "\n"
51 "There is no way to access bytes within ESP, EBP, ESI or EDI.\n"
52 "\n"
53 "For complete details consult the IA-32 software developer's manual, volume 2,\n"
54 "table 2-2, \"32-bit addressing forms with the ModR/M byte\".\n"
55 "It is included in this repository as 'modrm.pdf'.\n"
56 "The register encodings are described in the top row of the table, but you'll need\n"
57 "to spend some time with it.\n"
58 "\n"
59 "-- flag registers\n"
60 "Various instructions (particularly 'compare') modify one or more of three 1-bit 'flag'\n"
61 "registers, as a side-effect:\n"
62 "- the sign flag (SF): usually set if an arithmetic result is negative, or\n"
63 " reset if not.\n"
64 "- the zero flag (ZF): usually set if a result is zero, or reset if not.\n"
65 "- the overflow flag (OF): usually set if an arithmetic result overflows.\n"
66 "The flag bits are read by conditional jumps.\n"
67 "\n"
68 "For complete details on how different instructions update the flags, consult the IA-32\n"
69 "manual (volume 2). There's various versions of it online, such as https://c9x.me/x86,\n"
70 "though of course you'll need to be careful to ignore instructions and flag registers\n"
71 "that SubX doesn't support.\n"
72 "\n"
73 "It isn't simple, but if this is the processor you have running on your computer,\n"
74 "might as well get good at it.\n"
75 );
76
77 :(before "End Globals")
78
79 bool SF = false;
80 bool ZF = false;
81 bool OF = false;
82 :(before "End Reset")
83 SF = ZF = OF = false;
84
85
86
87 :(before "End Includes")
88
89
90
91
92 \
93 int64_t tmp = arg1 op arg2; \
94 arg1 = arg1 op arg2; \
95 trace(90, "run") << "storing 0x" << HEXWORD << arg1 << end(); \
96 SF = (arg1 < 0); \
97 ZF = (arg1 == 0); \
98 OF = (arg1 != tmp); \
99 }
100
101
102
103
104 \
105 arg1 = arg1 op arg2; \
106 trace(90, "run") << "storing 0x" << HEXWORD << arg1 << end(); \
107 SF = (arg1 >> 31); \
108 ZF = (arg1 == 0); \
109 OF = false; \
110 }
111
112
113
114 :(before "End Types")
115 const uint32_t INITIAL_SEGMENT_SIZE = 0x1000 - 1;
116
117
118
119
120
121 struct vma {
122 uint32_t start;
123 uint32_t end;
124 vector<uint8_t> _data;
125 vma(uint32_t s, uint32_t e) :start(s), end(e) {
126 _data.resize(end-start);
127 }
128 vma(uint32_t s) :start(s), end(s+INITIAL_SEGMENT_SIZE) {
129 _data.resize(end-start);
130 }
131 bool match(uint32_t a) {
132 return a >= start && a < end;
133 }
134 bool match32(uint32_t a) {
135 return a >= start && a+4 <= end;
136 }
137 uint8_t& data(uint32_t a) {
138 assert(match(a));
139 return _data.at(a-start);
140 }
141 void grow_until(uint32_t new_end_address) {
142 if (new_end_address < end) return;
143
144 void sanity_check(uint32_t start, uint32_t end);
145 sanity_check(start, new_end_address);
146 end = new_end_address;
147 _data.resize(new_end_address - start);
148 }
149
150 };
151 :(code)
152 void sanity_check(uint32_t start, uint32_t end) {
153 bool dup_found = false;
154 for (int i = 0; i < SIZE(Mem); ++i) {
155 const vma& curr = Mem.at(i);
156 if (curr.start == start) {
157 assert(!dup_found);
158 dup_found = true;
159 }
160 else if (curr.start > start) {
161 assert(curr.start > end);
162 }
163 else if (curr.start < start) {
164 assert(curr.end < start);
165 }
166 }
167 }
168
169 :(before "End Globals")
170
171 vector<vma> Mem;
172 :(code)
173
174
175
176 void grow_code_segment(uint32_t new_end_address) {
177 assert(!Mem.empty());
178 Mem.at(0).grow_until(new_end_address);
179 }
180 void grow_data_segment(uint32_t new_end_address) {
181 assert(SIZE(Mem) > 1);
182 Mem.at(1).grow_until(new_end_address);
183 }
184 :(before "End Globals")
185 uint32_t End_of_program = 0;
186
187 :(before "End Reset")
188 Mem.clear();
189 End_of_program = 0;
190 :(code)
191
192
193 inline uint8_t read_mem_u8(uint32_t addr) {
194 uint8_t* handle = mem_addr_u8(addr);
195 return handle ? *handle : 0;
196 }
197 inline int8_t read_mem_i8(uint32_t addr) {
198 return static_cast<int8_t>(read_mem_u8(addr));
199 }
200 inline uint32_t read_mem_u32(uint32_t addr) {
201 uint32_t* handle = mem_addr_u32(addr);
202 return handle ? *handle : 0;
203 }
204 inline int32_t read_mem_i32(uint32_t addr) {
205 return static_cast<int32_t>(read_mem_u32(addr));
206 }
207
208 inline uint8_t* mem_addr_u8(uint32_t addr) {
209 uint8_t* result = NULL;
210 for (int i = 0; i < SIZE(Mem); ++i) {
211 if (Mem.at(i).match(addr)) {
212 if (result)
213 raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
214 result = &Mem.at(i).data(addr);
215 }
216 }
217 if (result == NULL)
218 raise << "Tried to access uninitialized memory at address 0x" << HEXWORD << addr << '\n' << end();
219 return result;
220 }
221 inline int8_t* mem_addr_i8(uint32_t addr) {
222 return reinterpret_cast<int8_t*>(mem_addr_u8(addr));
223 }
224 inline uint32_t* mem_addr_u32(uint32_t addr) {
225 uint32_t* result = NULL;
226 for (int i = 0; i < SIZE(Mem); ++i) {
227 if (Mem.at(i).match32(addr)) {
228 if (result)
229 raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
230 result = reinterpret_cast<uint32_t*>(&Mem.at(i).data(addr));
231 }
232 }
233 if (result == NULL) {
234 raise << "Tried to access uninitialized memory at address 0x" << HEXWORD << addr << '\n' << end();
235 raise << "The entire 4-byte word should be initialized and lie in a single segment.\n" << end();
236 }
237 return result;
238 }
239 inline int32_t* mem_addr_i32(uint32_t addr) {
240 return reinterpret_cast<int32_t*>(mem_addr_u32(addr));
241 }
242
243 inline const char* mem_addr_kernel_string(uint32_t addr) {
244 return reinterpret_cast<const char*>(mem_addr_u8(addr));
245 }
246 inline string mem_addr_string(uint32_t addr, uint32_t size) {
247 ostringstream out;
248 for (size_t i = 0; i < size; ++i)
249 out << read_mem_u8(addr+i);
250 return out.str();
251 }
252
253
254 inline void write_mem_u8(uint32_t addr, uint8_t val) {
255 uint8_t* handle = mem_addr_u8(addr);
256 if (handle != NULL) *handle = val;
257 }
258 inline void write_mem_i8(uint32_t addr, int8_t val) {
259 int8_t* handle = mem_addr_i8(addr);
260 if (handle != NULL) *handle = val;
261 }
262 inline void write_mem_u32(uint32_t addr, uint32_t val) {
263 uint32_t* handle = mem_addr_u32(addr);
264 if (handle != NULL) *handle = val;
265 }
266 inline void write_mem_i32(uint32_t addr, int32_t val) {
267 int32_t* handle = mem_addr_i32(addr);
268 if (handle != NULL) *handle = val;
269 }
270
271 inline bool already_allocated(uint32_t addr) {
272 bool result = false;
273 for (int i = 0; i < SIZE(Mem); ++i) {
274 if (Mem.at(i).match(addr)) {
275 if (result)
276 raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
277 result = true;
278 }
279 }
280 return result;
281 }
282
283
284
285 :(code)
286
287 void run_one_instruction() {
288 uint8_t op=0, op2=0, op3=0;
289
290 trace(90, "run") << "inst: 0x" << HEXWORD << EIP << end();
291 op = next();
292 if (Dump_trace) {
293 cerr << "opcode: " << HEXBYTE << NUM(op) << '\n';
294 cerr << "registers at start: ";
295 dump_registers();
296 }
297 switch (op) {
298 case 0xf4:
299 EIP = End_of_program;
300 break;
301
302 case 0x0f:
303 switch(op2 = next()) {
304
305 default:
306 cerr << "unrecognized second opcode after 0f: " << HEXBYTE << NUM(op2) << '\n';
307 DUMP("");
308 exit(1);
309 }
310 break;
311 case 0xf2:
312 switch(op2 = next()) {
313
314 case 0x0f:
315 switch(op3 = next()) {
316
317 default:
318 cerr << "unrecognized third opcode after f2 0f: " << HEXBYTE << NUM(op3) << '\n';
319 DUMP("");
320 exit(1);
321 }
322 break;
323 default:
324 cerr << "unrecognized second opcode after f2: " << HEXBYTE << NUM(op2) << '\n';
325 DUMP("");
326 exit(1);
327 }
328 break;
329 case 0xf3:
330 switch(op2 = next()) {
331
332 case 0x0f:
333 switch(op3 = next()) {
334
335 default:
336 cerr << "unrecognized third opcode after f3 0f: " << HEXBYTE << NUM(op3) << '\n';
337 DUMP("");
338 exit(1);
339 }
340 break;
341 default:
342 cerr << "unrecognized second opcode after f3: " << HEXBYTE << NUM(op2) << '\n';
343 DUMP("");
344 exit(1);
345 }
346 break;
347 default:
348 cerr << "unrecognized opcode: " << HEXBYTE << NUM(op) << '\n';
349 DUMP("");
350 exit(1);
351 }
352 }
353
354 inline uint8_t next() {
355 return read_mem_u8(EIP++);
356 }
357
358 void dump_registers() {
359 for (int i = 0; i < NUM_INT_REGISTERS; ++i) {
360 if (i > 0) cerr << "; ";
361 cerr << " " << i << ": " << std::hex << std::setw(8) << std::setfill('_') << Reg[i].u;
362 }
363 cerr << " -- SF: " << SF << "; ZF: " << ZF << "; OF: " << OF << '\n';
364 }
365
366
367 :(before "End Globals")
368 map<string, string> Name;
369 map<string, string> Name_0f;
370 map<string, string> Name_f3;
371 map<string, string> Name_f3_0f;
372 :(before "End One-time Setup")
373 init_op_names();
374 :(code)
375 void init_op_names() {
376 put(Name, "f4", "halt (hlt)");
377
378 }
379
380 :(before "End Help Special-cases(key)")
381 if (key == "opcodes") {
382 cerr << "Opcodes currently supported by SubX:\n";
383 for (map<string, string>::iterator p = Name.begin(); p != Name.end(); ++p)
384 cerr << " " << p->first << ": " << p->second << '\n';
385 for (map<string, string>::iterator p = Name_0f.begin(); p != Name_0f.end(); ++p)
386 cerr << " 0f " << p->first << ": " << p->second << '\n';
387 for (map<string, string>::iterator p = Name_f3.begin(); p != Name_f3.end(); ++p)
388 cerr << " f3 " << p->first << ": " << p->second << '\n';
389 for (map<string, string>::iterator p = Name_f3_0f.begin(); p != Name_f3_0f.end(); ++p)
390 cerr << " f3 0f " << p->first << ": " << p->second << '\n';
391 cerr << "Run `subx help instructions` for details on words like 'r32' and 'disp8'.\n"
392 "For complete details on these instructions, consult the IA-32 manual (volume 2).\n"
393 "There's various versions of it online, such as https://c9x.me/x86.\n"
394 "The mnemonics in brackets will help you locate each instruction.\n";
395 return 0;
396 }
397 :(before "End Help Contents")
398 cerr << " opcodes\n";
399
400 :(before "End Includes")
401
402
403
404
405
406