https://github.com/akkartik/mu/blob/master/011run.cc
1
2
3
4
5
6 :(before "End Help Texts")
7 put_new(Help, "syntax",
8 "SubX programs consist of segments, each segment in turn consisting of lines.\n"
9 "Line-endings are significant; each line should contain a single\n"
10 "instruction, macro or directive.\n"
11 "\n"
12 "Comments start with the '#' character. It should be at the start of a word\n"
13 "(start of line, or following a space).\n"
14 "\n"
15 "Each segment starts with a header line: a '==' delimiter followed by the name of\n"
16 "the segment and a (sometimes approximate) starting address in memory.\n"
17 "The name 'code' is special; instructions to execute should always go here.\n"
18 "\n"
19 "The resulting binary starts running code from a label called 'Entry'\n"
20 "in the code segment.\n"
21 "\n"
22 "Segments with the same name get merged together. This rule helps keep functions and\n"
23 "their data close together in .subx files.\n"
24 "You don't have to specify the starting address after the first time.\n"
25 "\n"
26 "Lines consist of a series of words. Words can contain arbitrary metadata\n"
27 "after a '/', but they can never contain whitespace. Metadata has no effect\n"
28 "at runtime, but can be handy when rewriting macros.\n"
29 "\n"
30 "Check out the examples in the examples/ directory.\n"
31 );
32 :(before "End Help Contents")
33 cerr << " syntax\n";
34
35 :(code)
36 void test_copy_imm32_to_EAX() {
37
38
39 run(
40
41 "# comment\n"
42
43
44
45 "== code 0x1\n"
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66 " b8 . . . 0a 0b 0c 0d\n"
67
68
69 );
70
71 CHECK_TRACE_CONTENTS(
72 "load: 0x00000001 -> b8\n"
73 "load: 0x00000002 -> 0a\n"
74 "load: 0x00000003 -> 0b\n"
75 "load: 0x00000004 -> 0c\n"
76 "load: 0x00000005 -> 0d\n"
77 "run: copy imm32 0x0d0c0b0a to EAX\n"
78 );
79 }
80
81
82
83 void run(const string& text_bytes) {
84 program p;
85 istringstream in(text_bytes);
86 parse(in, p);
87 if (trace_contains_errors()) return;
88 transform(p);
89 if (trace_contains_errors()) return;
90 load(p);
91 if (trace_contains_errors()) return;
92
93
94 if (p.entry)
95 EIP = p.entry;
96 else
97 EIP = find(p, "code")->start;
98 while (EIP < End_of_program)
99 run_one_instruction();
100 }
101
102
103
104 :(before "End Types")
105 struct program {
106 uint32_t entry;
107 vector<segment> segments;
108 program() { entry = 0; }
109 };
110 :(before "struct program")
111 struct segment {
112 string name;
113 uint32_t start;
114 vector<line> lines;
115
116 segment() {
117 start = 0;
118
119 }
120 };
121 :(before "struct segment")
122 struct line {
123 vector<word> words;
124 vector<string> metadata;
125 string original;
126 };
127 :(before "struct line")
128 struct word {
129 string original;
130 string data;
131 vector<string> metadata;
132 };
133
134
135
136 :(code)
137 void parse(istream& fin, program& out) {
138 segment* curr_segment = NULL;
139 vector<line> l;
140 while (has_data(fin)) {
141 string line_data;
142 line curr;
143 getline(fin, line_data);
144 curr.original = line_data;
145 trace(99, "parse") << "line: " << line_data << end();
146
147 istringstream lin(line_data);
148 while (has_data(lin)) {
149 string word_data;
150 lin >> word_data;
151 if (word_data.empty()) continue;
152 if (word_data[0] == '#') break;
153 if (word_data == ".") continue;
154 if (word_data == "==") {
155 flush(curr_segment, l);
156 string segment_name;
157 lin >> segment_name;
158 curr_segment = find(out, segment_name);
159 if (curr_segment != NULL) {
160 trace(3, "parse") << "appending to segment '" << segment_name << "'" << end();
161 }
162 else {
163 trace(3, "parse") << "new segment '" << segment_name << "'" << end();
164 uint32_t seg_start = 0;
165 lin >> std::hex >> seg_start;
166 sanity_check_program_segment(out, seg_start);
167 out.segments.push_back(segment());
168 curr_segment = &out.segments.back();
169 curr_segment->name = segment_name;
170 curr_segment->start = seg_start;
171 if (trace_contains_errors()) continue;
172 trace(3, "parse") << "starts at address 0x" << HEXWORD << curr_segment->start << end();
173 }
174 break;
175 }
176 if (word_data[0] == ':') {
177
178 break;
179 }
180 curr.words.push_back(word());
181 parse_word(word_data, curr.words.back());
182 trace(99, "parse") << "word: " << to_string(curr.words.back());
183 }
184 if (!curr.words.empty())
185 l.push_back(curr);
186 }
187 flush(curr_segment, l);
188 trace(99, "parse") << "done" << end();
189 }
190
191 segment* find(program& p, const string& segment_name) {
192 for (int i = 0; i < SIZE(p.segments); ++i) {
193 if (p.segments.at(i).name == segment_name)
194 return &p.segments.at(i);
195 }
196 return NULL;
197 }
198
199 void flush(segment* s, vector<line>& lines) {
200 if (lines.empty()) return;
201 if (s == NULL) {
202 raise << "input does not start with a '==' section header\n" << end();
203 return;
204 }
205 trace(3, "parse") << "flushing segment" << end();
206 s->lines.insert(s->lines.end(), lines.begin(), lines.end());
207 lines.clear();
208 }
209
210 void parse_word(const string& data, word& out) {
211 out.original = data;
212 istringstream win(data);
213 if (getline(win, out.data, '/')) {
214 string m;
215 while (getline(win, m, '/'))
216 out.metadata.push_back(m);
217 }
218 }
219
220 void sanity_check_program_segment(const program& p, uint32_t addr) {
221 for (int i = 0; i < SIZE(p.segments); ++i) {
222 if (p.segments.at(i).start == addr)
223 raise << "can't have multiple segments starting at address 0x" << HEXWORD << addr << '\n' << end();
224 }
225 }
226
227
228 void parse(const string& text_bytes) {
229 program p;
230 istringstream in(text_bytes);
231 parse(in, p);
232 }
233
234 void test_detect_duplicate_segments() {
235 Hide_errors = true;
236 parse(
237 "== segment1 0xee\n"
238 "ab\n"
239 "== segment2 0xee\n"
240 "cd\n"
241 );
242 CHECK_TRACE_CONTENTS(
243 "error: can't have multiple segments starting at address 0x000000ee\n"
244 );
245 }
246
247
248
249 :(before "End Types")
250 typedef void (*transform_fn)(program&);
251 :(before "End Globals")
252 vector<transform_fn> Transform;
253
254 :(code)
255 void transform(program& p) {
256 for (int t = 0; t < SIZE(Transform); ++t)
257 (*Transform.at(t))(p);
258 }
259
260
261
262 void load(const program& p) {
263 if (find(p, "code") == NULL) {
264 raise << "no code to run\n" << end();
265 return;
266 }
267
268 set<uint32_t> overlap;
269 for (int i = 0; i < SIZE(p.segments); ++i) {
270 const segment& seg = p.segments.at(i);
271 uint32_t addr = seg.start;
272 if (!already_allocated(addr))
273 Mem.push_back(vma(seg.start));
274 trace(99, "load") << "loading segment " << i << " from " << HEXWORD << addr << end();
275 for (int j = 0; j < SIZE(seg.lines); ++j) {
276 const line& l = seg.lines.at(j);
277 for (int k = 0; k < SIZE(l.words); ++k) {
278 const word& w = l.words.at(k);
279 uint8_t val = hex_byte(w.data);
280 if (trace_contains_errors()) return;
281 assert(overlap.find(addr) == overlap.end());
282 write_mem_u8(addr, val);
283 overlap.insert(addr);
284 trace(99, "load") << "0x" << HEXWORD << addr << " -> " << HEXBYTE << NUM(read_mem_u8(addr)) << end();
285 ++addr;
286 }
287 }
288 if (seg.name == "code") {
289 End_of_program = addr;
290 }
291 }
292 }
293
294 const segment* find(const program& p, const string& segment_name) {
295 for (int i = 0; i < SIZE(p.segments); ++i) {
296 if (p.segments.at(i).name == segment_name)
297 return &p.segments.at(i);
298 }
299 return NULL;
300 }
301
302 uint8_t hex_byte(const string& s) {
303 if (contains_uppercase(s)) {
304 raise << "uppercase hex not allowed: " << s << '\n' << end();
305 return 0;
306 }
307 istringstream in(s);
308 int result = 0;
309 in >> std::hex >> result;
310 if (!in || !in.eof()) {
311 raise << "token '" << s << "' is not a hex byte\n" << end();
312 return '\0';
313 }
314 if (result > 0xff || result < -0x8f) {
315 raise << "token '" << s << "' is not a hex byte\n" << end();
316 return '\0';
317 }
318 return static_cast<uint8_t>(result);
319 }
320
321 void test_number_too_large() {
322 Hide_errors = true;
323 parse_and_load(
324 "== code 0x1\n"
325 "01 cab\n"
326 );
327 CHECK_TRACE_CONTENTS(
328 "error: token 'cab' is not a hex byte\n"
329 );
330 }
331
332 void test_invalid_hex() {
333 Hide_errors = true;
334 parse_and_load(
335 "== code 0x1\n"
336 "01 cx\n"
337 );
338 CHECK_TRACE_CONTENTS(
339 "error: token 'cx' is not a hex byte\n"
340 );
341 }
342
343 void test_negative_number() {
344 parse_and_load(
345 "== code 0x1\n"
346 "01 -02\n"
347 );
348 CHECK_TRACE_COUNT("error", 0);
349 }
350
351 void test_negative_number_too_small() {
352 Hide_errors = true;
353 parse_and_load(
354 "== code 0x1\n"
355 "01 -12345\n"
356 );
357 CHECK_TRACE_CONTENTS(
358 "error: token '-12345' is not a hex byte\n"
359 );
360 }
361
362 void test_hex_prefix() {
363 parse_and_load(
364 "== code 0x1\n"
365 "0x01 -0x02\n"
366 );
367 CHECK_TRACE_COUNT("error", 0);
368 }
369
370 void test_repeated_segment_merges_data() {
371 parse_and_load(
372 "== code 0x1\n"
373 "11 22\n"
374 "== code\n"
375 "33 44\n"
376 );
377 CHECK_TRACE_CONTENTS(
378 "parse: new segment 'code'\n"
379 "parse: appending to segment 'code'\n"
380
381 "load: 0x00000001 -> 11\n"
382 "load: 0x00000002 -> 22\n"
383
384 "load: 0x00000003 -> 33\n"
385 "load: 0x00000004 -> 44\n"
386 );
387 }
388
389 void test_error_on_missing_segment_header() {
390 Hide_errors = true;
391 parse_and_load(
392 "01 02\n"
393 );
394 CHECK_TRACE_CONTENTS(
395 "error: input does not start with a '==' section header\n"
396 );
397 }
398
399 void test_error_on_uppercase_hex() {
400 Hide_errors = true;
401 parse_and_load(
402 "== code\n"
403 "01 Ab\n"
404 );
405 CHECK_TRACE_CONTENTS(
406 "error: uppercase hex not allowed: Ab\n"
407 );
408 }
409
410
411 void parse_and_load(const string& text_bytes) {
412 program p;
413 istringstream in(text_bytes);
414 parse(in, p);
415 if (trace_contains_errors()) return;
416 load(p);
417 }
418
419
420
421 :(before "End Initialize Op Names")
422 put_new(Name, "b8", "copy imm32 to EAX (mov)");
423
424
425
426 :(before "End Single-Byte Opcodes")
427 case 0xb8: {
428 const int32_t src = next32();
429 trace(Callstack_depth+1, "run") << "copy imm32 0x" << HEXWORD << src << " to EAX" << end();
430 Reg[EAX].i = src;
431 break;
432 }
433
434 :(code)
435 void test_copy_imm32_to_EAX_again() {
436 run(
437 "== code 0x1\n"
438
439 " b8 0a 0b 0c 0d \n"
440 );
441 CHECK_TRACE_CONTENTS(
442 "run: copy imm32 0x0d0c0b0a to EAX\n"
443 );
444 }
445
446
447 int32_t next32() {
448 int32_t result = read_mem_i32(EIP);
449 EIP+=4;
450 return result;
451 }
452
453
454
455 string to_string(const word& w) {
456 ostringstream out;
457 out << w.data;
458 for (int i = 0; i < SIZE(w.metadata); ++i)
459 out << " /" << w.metadata.at(i);
460 return out.str();
461 }
462
463 bool contains_uppercase(const string& s) {
464 for (int i = 0; i < SIZE(s); ++i)
465 if (isupper(s.at(i))) return true;
466 return false;
467 }