https://github.com/akkartik/mu/blob/master/subx/011run.cc
1
2
3
4
5
6 :(before "End Help Texts")
7 put_new(Help, "syntax",
8 "SubX programs consist of segments, each segment in turn consisting of lines.\n"
9 "Line-endings are significant; each line should contain a single\n"
10 "instruction, macro or directive.\n"
11 "\n"
12 "Comments start with the '#' character. It should be at the start of a word\n"
13 "(start of line, or following a space).\n"
14 "\n"
15 "Each segment starts with a header line: a '==' delimiter followed by the name of\n"
16 "the segment and a (sometimes approximate) starting address in memory.\n"
17 "The name 'code' is special; instructions to execute should always go here.\n"
18 "\n"
19 "The resulting binary starts running from the start of the segment by default.\n"
20 "To start elsewhere in the code segment, define a special label called 'Entry'.\n"
21 "\n"
22 "Segments with the same name get merged together. This rule helps keep functions and\n"
23 "their data close together in .subx files.\n"
24 "You don't have to specify the starting address after the first time.\n"
25 "\n"
26 "Lines consist of a series of words. Words can contain arbitrary metadata\n"
27 "after a '/', but they can never contain whitespace. Metadata has no effect\n"
28 "at runtime, but can be handy when rewriting macros.\n"
29 "\n"
30 "Check out the examples in the examples/ directory.\n"
31 "Programming in machine code can be annoying, but let's see if we can make\n"
32 "it nice enough to be able to write a compiler in it.\n"
33 );
34 :(before "End Help Contents")
35 cerr << " syntax\n";
36
37 :(code)
38 void test_copy_imm32_to_EAX() {
39
40
41 run(
42
43 "# comment\n"
44
45
46
47 "== code 0x1\n"
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68 " b8 . . . 0a 0b 0c 0d\n"
69
70
71 );
72
73 CHECK_TRACE_CONTENTS(
74 "load: 0x00000001 -> b8\n"
75 "load: 0x00000002 -> 0a\n"
76 "load: 0x00000003 -> 0b\n"
77 "load: 0x00000004 -> 0c\n"
78 "load: 0x00000005 -> 0d\n"
79 "run: copy imm32 0x0d0c0b0a to EAX\n"
80 );
81 }
82
83
84
85 void run(const string& text_bytes) {
86 program p;
87 istringstream in(text_bytes);
88 parse(in, p);
89 if (trace_contains_errors()) return;
90 transform(p);
91 if (trace_contains_errors()) return;
92 load(p);
93 if (trace_contains_errors()) return;
94 while (EIP < End_of_program)
95 run_one_instruction();
96 }
97
98
99
100 :(before "End Types")
101 struct program {
102 vector<segment> segments;
103
104
105
106
107 };
108 :(before "struct program")
109 struct segment {
110 string name;
111 uint32_t start;
112 vector<line> lines;
113
114 segment() {
115 start = 0;
116
117 }
118 };
119 :(before "struct segment")
120 struct line {
121 vector<word> words;
122 vector<string> metadata;
123 string original;
124 };
125 :(before "struct line")
126 struct word {
127 string original;
128 string data;
129 vector<string> metadata;
130 };
131
132
133
134 :(code)
135 void parse(istream& fin, program& out) {
136 segment* curr_segment = NULL;
137 vector<line> l;
138 while (has_data(fin)) {
139 string line_data;
140 line curr;
141 getline(fin, line_data);
142 curr.original = line_data;
143 trace(99, "parse") << "line: " << line_data << end();
144
145 istringstream lin(line_data);
146 while (has_data(lin)) {
147 string word_data;
148 lin >> word_data;
149 if (word_data.empty()) continue;
150 if (word_data[0] == '#') break;
151 if (word_data == ".") continue;
152 if (word_data == "==") {
153 flush(curr_segment, l);
154 string segment_name;
155 lin >> segment_name;
156 curr_segment = find(out, segment_name);
157 if (curr_segment != NULL) {
158 trace(3, "parse") << "appending to segment '" << segment_name << "'" << end();
159 }
160 else {
161 trace(3, "parse") << "new segment '" << segment_name << "'" << end();
162 uint32_t seg_start = 0;
163 lin >> std::hex >> seg_start;
164 sanity_check_program_segment(out, seg_start);
165 out.segments.push_back(segment());
166 curr_segment = &out.segments.back();
167 curr_segment->name = segment_name;
168 curr_segment->start = seg_start;
169 if (trace_contains_errors()) continue;
170 trace(3, "parse") << "starts at address 0x" << HEXWORD << curr_segment->start << end();
171 }
172 break;
173 }
174 if (word_data[0] == ':') {
175
176 break;
177 }
178 curr.words.push_back(word());
179 parse_word(word_data, curr.words.back());
180 trace(99, "parse") << "word: " << to_string(curr.words.back());
181 }
182 if (!curr.words.empty())
183 l.push_back(curr);
184 }
185 flush(curr_segment, l);
186 trace(99, "parse") << "done" << end();
187 }
188
189 segment* find(program& p, const string& segment_name) {
190 for (int i = 0; i < SIZE(p.segments); ++i) {
191 if (p.segments.at(i).name == segment_name)
192 return &p.segments.at(i);
193 }
194 return NULL;
195 }
196
197 void flush(segment* s, vector<line>& lines) {
198 if (lines.empty()) return;
199 if (s == NULL) {
200 raise << "input does not start with a '==' section header\n" << end();
201 return;
202 }
203 trace(3, "parse") << "flushing segment" << end();
204 s->lines.insert(s->lines.end(), lines.begin(), lines.end());
205 lines.clear();
206 }
207
208 void parse_word(const string& data, word& out) {
209 out.original = data;
210 istringstream win(data);
211 if (getline(win, out.data, '/')) {
212 string m;
213 while (getline(win, m, '/'))
214 out.metadata.push_back(m);
215 }
216 }
217
218 void sanity_check_program_segment(const program& p, uint32_t addr) {
219 for (int i = 0; i < SIZE(p.segments); ++i) {
220 if (p.segments.at(i).start == addr)
221 raise << "can't have multiple segments starting at address 0x" << HEXWORD << addr << '\n' << end();
222 }
223 }
224
225
226 void parse(const string& text_bytes) {
227 program p;
228 istringstream in(text_bytes);
229 parse(in, p);
230 }
231
232 void test_detect_duplicate_segments() {
233 Hide_errors = true;
234 parse(
235 "== segment1 0xee\n"
236 "ab\n"
237 "== segment2 0xee\n"
238 "cd\n"
239 );
240 CHECK_TRACE_CONTENTS(
241 "error: can't have multiple segments starting at address 0x000000ee\n"
242 );
243 }
244
245
246
247 :(before "End Types")
248 typedef void (*transform_fn)(program&);
249 :(before "End Globals")
250 vector<transform_fn> Transform;
251
252 :(code)
253 void transform(program& p) {
254 for (int t = 0; t < SIZE(Transform); ++t)
255 (*Transform.at(t))(p);
256 }
257
258
259
260 void load(const program& p) {
261 if (find(p, "code") == NULL) {
262 raise << "no code to run\n" << end();
263 return;
264 }
265
266 set<uint32_t> overlap;
267 for (int i = 0; i < SIZE(p.segments); ++i) {
268 const segment& seg = p.segments.at(i);
269 uint32_t addr = seg.start;
270 if (!already_allocated(addr))
271 Mem.push_back(vma(seg.start));
272 trace(99, "load") << "loading segment " << i << " from " << HEXWORD << addr << end();
273 for (int j = 0; j < SIZE(seg.lines); ++j) {
274 const line& l = seg.lines.at(j);
275 for (int k = 0; k < SIZE(l.words); ++k) {
276 const word& w = l.words.at(k);
277 uint8_t val = hex_byte(w.data);
278 if (trace_contains_errors()) return;
279 assert(overlap.find(addr) == overlap.end());
280 write_mem_u8(addr, val);
281 overlap.insert(addr);
282 trace(99, "load") << "0x" << HEXWORD << addr << " -> " << HEXBYTE << NUM(read_mem_u8(addr)) << end();
283 ++addr;
284 }
285 }
286 if (seg.name == "code") {
287 End_of_program = addr;
288 EIP = seg.start;
289
290 }
291 }
292 }
293
294 const segment* find(const program& p, const string& segment_name) {
295 for (int i = 0; i < SIZE(p.segments); ++i) {
296 if (p.segments.at(i).name == segment_name)
297 return &p.segments.at(i);
298 }
299 return NULL;
300 }
301
302 uint8_t hex_byte(const string& s) {
303 istringstream in(s);
304 int result = 0;
305 in >> std::hex >> result;
306 if (!in || !in.eof()) {
307 raise << "token '" << s << "' is not a hex byte\n" << end();
308 return '\0';
309 }
310 if (result > 0xff || result < -0x8f) {
311 raise << "token '" << s << "' is not a hex byte\n" << end();
312 return '\0';
313 }
314 return static_cast<uint8_t>(result);
315 }
316
317 void test_number_too_large() {
318 Hide_errors = true;
319 parse_and_load(
320 "== code 0x1\n"
321 "01 cab\n"
322 );
323 CHECK_TRACE_CONTENTS(
324 "error: token 'cab' is not a hex byte\n"
325 );
326 }
327
328 void test_invalid_hex() {
329 Hide_errors = true;
330 parse_and_load(
331 "== code 0x1\n"
332 "01 cx\n"
333 );
334 CHECK_TRACE_CONTENTS(
335 "error: token 'cx' is not a hex byte\n"
336 );
337 }
338
339 void test_negative_number() {
340 parse_and_load(
341 "== code 0x1\n"
342 "01 -02\n"
343 );
344 CHECK_TRACE_COUNT("error", 0);
345 }
346
347 void test_negative_number_too_small() {
348 Hide_errors = true;
349 parse_and_load(
350 "== code 0x1\n"
351 "01 -12345\n"
352 );
353 CHECK_TRACE_CONTENTS(
354 "error: token '-12345' is not a hex byte\n"
355 );
356 }
357
358 void test_hex_prefix() {
359 parse_and_load(
360 "== code 0x1\n"
361 "0x01 -0x02\n"
362 );
363 CHECK_TRACE_COUNT("error", 0);
364 }
365
366 void test_repeated_segment_merges_data() {
367 parse_and_load(
368 "== code 0x1\n"
369 "11 22\n"
370 "== code\n"
371 "33 44\n"
372 );
373 CHECK_TRACE_CONTENTS(
374 "parse: new segment 'code'\n"
375 "parse: appending to segment 'code'\n"
376
377 "load: 0x00000001 -> 11\n"
378 "load: 0x00000002 -> 22\n"
379
380 "load: 0x00000003 -> 33\n"
381 "load: 0x00000004 -> 44\n"
382 );
383 }
384
385 void test_error_on_missing_segment_header() {
386 Hide_errors = true;
387 parse_and_load(
388 "01 02\n"
389 );
390 CHECK_TRACE_CONTENTS(
391 "error: input does not start with a '==' section header\n"
392 );
393 }
394
395
396 void parse_and_load(const string& text_bytes) {
397 program p;
398 istringstream in(text_bytes);
399 parse(in, p);
400 if (trace_contains_errors()) return;
401 load(p);
402 }
403
404
405
406 :(before "End Initialize Op Names")
407 put_new(Name, "b8", "copy imm32 to EAX (mov)");
408
409
410
411 :(before "End Single-Byte Opcodes")
412 case 0xb8: {
413 const int32_t src = next32();
414 trace(Callstack_depth+1, "run") << "copy imm32 0x" << HEXWORD << src << " to EAX" << end();
415 Reg[EAX].i = src;
416 break;
417 }
418
419 :(code)
420 void test_copy_imm32_to_EAX_again() {
421 run(
422 "== code 0x1\n"
423
424 " b8 0a 0b 0c 0d \n"
425 );
426 CHECK_TRACE_CONTENTS(
427 "run: copy imm32 0x0d0c0b0a to EAX\n"
428 );
429 }
430
431
432 int32_t next32() {
433 int32_t result = read_mem_i32(EIP);
434 EIP+=4;
435 return result;
436 }
437
438
439
440 string to_string(const word& w) {
441 ostringstream out;
442 out << w.data;
443 for (int i = 0; i < SIZE(w.metadata); ++i)
444 out << " /" << w.metadata.at(i);
445 return out.str();
446 }
447
448 int32_t parse_int(const string& s) {
449 if (s.empty()) return 0;
450 istringstream in(s);
451 in >> std::hex;
452 if (s.at(0) == '-') {
453 int32_t result = 0;
454 in >> result;
455 if (!in || !in.eof()) {
456 raise << "not a number: " << s << '\n' << end();
457 return 0;
458 }
459 return result;
460 }
461 uint32_t uresult = 0;
462 in >> uresult;
463 if (!in || !in.eof()) {
464 raise << "not a number: " << s << '\n' << end();
465 return 0;
466 }
467 return static_cast<int32_t>(uresult);
468 }
469 :(before "End Unit Tests")
470 void test_parse_int() {
471 CHECK_EQ(0, parse_int("0"));
472 CHECK_EQ(0, parse_int("0x0"));
473 CHECK_EQ(0, parse_int("0x0"));
474 CHECK_EQ(16, parse_int("10"));
475 CHECK_EQ(-1, parse_int("-1"));
476 CHECK_EQ(-1, parse_int("0xffffffff"));
477 }