https://github.com/akkartik/mu/blob/main/linux/bootstrap/011run.cc
1
2
3
4
5
6 :(before "End Help Texts")
7 put_new(Help, "syntax",
8 "SubX programs consist of segments, each segment in turn consisting of lines.\n"
9 "Line-endings are significant; each line should contain a single\n"
10 "instruction, macro or directive.\n"
11 "\n"
12 "Comments start with the '#' character. It should be at the start of a word\n"
13 "(start of line, or following a space).\n"
14 "\n"
15 "Each segment starts with a header line: a '==' delimiter followed by the name of\n"
16 "the segment and a (sometimes approximate) starting address in memory.\n"
17 "The name 'code' is special; instructions to execute should always go here.\n"
18 "\n"
19 "The resulting binary starts running code from a label called 'Entry'\n"
20 "in the code segment.\n"
21 "\n"
22 "Segments with the same name get merged together. This rule helps keep functions\n"
23 "and their data close together in .subx files.\n"
24 "You don't have to specify the starting address after the first time.\n"
25 "\n"
26 "Lines consist of a series of words. Words can contain arbitrary metadata\n"
27 "after a '/', but they can never contain whitespace. Metadata has no effect\n"
28 "at runtime, but can be handy when rewriting macros.\n"
29 "\n"
30 "Check out the example programs linux/ex*.\n"
31 );
32 :(before "End Help Contents")
33 cerr << " syntax\n";
34
35 :(code)
36 void test_copy_imm32_to_EAX() {
37
38
39 run(
40
41 "# comment\n"
42
43
44
45 "== code 0x1\n"
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66 " b8 . . . 0a 0b 0c 0d\n"
67
68
69 );
70
71 CHECK_TRACE_CONTENTS(
72 "load: 0x00000001 -> b8\n"
73 "load: 0x00000002 -> 0a\n"
74 "load: 0x00000003 -> 0b\n"
75 "load: 0x00000004 -> 0c\n"
76 "load: 0x00000005 -> 0d\n"
77 "run: copy imm32 0x0d0c0b0a to EAX\n"
78 );
79 }
80
81
82 void run(const string& text_bytes) {
83 program p;
84 istringstream in(text_bytes);
85
86 parse(in, p);
87 if (trace_contains_errors()) return;
88
89 load(p);
90 if (trace_contains_errors()) return;
91
92
93 if (p.entry)
94 EIP = p.entry;
95 else
96 EIP = find(p, "code")->start;
97 while (EIP < End_of_program)
98 run_one_instruction();
99 }
100
101
102
103 :(before "End Types")
104 struct program {
105 uint32_t entry;
106 vector<segment> segments;
107 program() { entry = 0; }
108 };
109 :(before "struct program")
110 struct segment {
111 string name;
112 uint32_t start;
113 vector<line> lines;
114
115 segment() {
116 start = 0;
117
118 }
119 };
120 :(before "struct segment")
121 struct line {
122 vector<word> words;
123 vector<string> metadata;
124 string original;
125 };
126 :(before "struct line")
127 struct word {
128 string original;
129 string data;
130 vector<string> metadata;
131 };
132
133
134
135 :(code)
136 void parse(istream& fin, program& out) {
137 segment* curr_segment = NULL;
138 vector<line> l;
139 while (has_data(fin)) {
140 string line_data;
141 line curr;
142 getline(fin, line_data);
143 curr.original = line_data;
144 trace(99, "parse") << "line: " << line_data << end();
145
146 istringstream lin(line_data);
147 while (has_data(lin)) {
148 string word_data;
149 lin >> word_data;
150 if (word_data.empty()) continue;
151 if (word_data[0] == '#') break;
152 if (word_data == ".") continue;
153 if (word_data == "==") {
154 flush(curr_segment, l);
155 string segment_name;
156 lin >> segment_name;
157 curr_segment = find(out, segment_name);
158 if (curr_segment != NULL) {
159 trace(3, "parse") << "appending to segment '" << segment_name << "'" << end();
160 }
161 else {
162 trace(3, "parse") << "new segment '" << segment_name << "'" << end();
163 uint32_t seg_start = 0;
164 lin >> std::hex >> seg_start;
165 sanity_check_program_segment(out, seg_start);
166 out.segments.push_back(segment());
167 curr_segment = &out.segments.back();
168 curr_segment->name = segment_name;
169 curr_segment->start = seg_start;
170 if (trace_contains_errors()) continue;
171 trace(3, "parse") << "starts at address 0x" << HEXWORD << curr_segment->start << end();
172 }
173 break;
174 }
175 if (word_data[0] == ':') {
176
177 break;
178 }
179 curr.words.push_back(word());
180 parse_word(word_data, curr.words.back());
181 trace(99, "parse") << "word: " << to_string(curr.words.back());
182 }
183 if (!curr.words.empty())
184 l.push_back(curr);
185 }
186 flush(curr_segment, l);
187 trace(99, "parse") << "done" << end();
188 }
189
190 segment* find(program& p, const string& segment_name) {
191 for (int i = 0; i < SIZE(p.segments); ++i) {
192 if (p.segments.at(i).name == segment_name)
193 return &p.segments.at(i);
194 }
195 return NULL;
196 }
197
198 void flush(segment* s, vector<line>& lines) {
199 if (lines.empty()) return;
200 if (s == NULL) {
201 raise << "input does not start with a '==' section header\n" << end();
202 return;
203 }
204 trace(3, "parse") << "flushing segment" << end();
205 s->lines.insert(s->lines.end(), lines.begin(), lines.end());
206 lines.clear();
207 }
208
209 void parse_word(const string& data, word& out) {
210 out.original = data;
211 istringstream win(data);
212 if (getline(win, out.data, '/')) {
213 string m;
214 while (getline(win, m, '/'))
215 out.metadata.push_back(m);
216 }
217 }
218
219 void sanity_check_program_segment(const program& p, uint32_t addr) {
220 for (int i = 0; i < SIZE(p.segments); ++i) {
221 if (p.segments.at(i).start == addr)
222 raise << "can't have multiple segments starting at address 0x" << HEXWORD << addr << '\n' << end();
223 }
224 }
225
226
227 void parse(const string& text_bytes) {
228 program p;
229 istringstream in(text_bytes);
230 parse(in, p);
231 }
232
233 void test_detect_duplicate_segments() {
234 Hide_errors = true;
235 parse(
236 "== segment1 0xee\n"
237 "ab\n"
238 "== segment2 0xee\n"
239 "cd\n"
240 );
241 CHECK_TRACE_CONTENTS(
242 "error: can't have multiple segments starting at address 0x000000ee\n"
243 );
244 }
245
246
247
248 void load(const program& p) {
249 if (find(p, "code") == NULL) {
250 raise << "no code to run\n" << end();
251 return;
252 }
253
254 set<uint32_t> overlap;
255 for (int i = 0; i < SIZE(p.segments); ++i) {
256 const segment& seg = p.segments.at(i);
257 uint32_t addr = seg.start;
258 if (!already_allocated(addr))
259 Mem.push_back(vma(seg.start));
260 trace(99, "load") << "loading segment " << i << " from " << HEXWORD << addr << end();
261 for (int j = 0; j < SIZE(seg.lines); ++j) {
262 const line& l = seg.lines.at(j);
263 for (int k = 0; k < SIZE(l.words); ++k) {
264 const word& w = l.words.at(k);
265 uint8_t val = hex_byte(w.data);
266 if (trace_contains_errors()) return;
267 assert(overlap.find(addr) == overlap.end());
268 write_mem_u8(addr, val);
269 overlap.insert(addr);
270 trace(99, "load") << "0x" << HEXWORD << addr << " -> " << HEXBYTE << NUM(read_mem_u8(addr)) << end();
271 ++addr;
272 }
273 }
274 if (seg.name == "code") {
275 End_of_program = addr;
276 }
277 }
278 }
279
280 const segment* find(const program& p, const string& segment_name) {
281 for (int i = 0; i < SIZE(p.segments); ++i) {
282 if (p.segments.at(i).name == segment_name)
283 return &p.segments.at(i);
284 }
285 return NULL;
286 }
287
288 uint8_t hex_byte(const string& s) {
289 if (contains_uppercase(s)) {
290 raise << "uppercase hex not allowed: " << s << '\n' << end();
291 return 0;
292 }
293 istringstream in(s);
294 int result = 0;
295 in >> std::hex >> result;
296 if (!in || !in.eof()) {
297 raise << "token '" << s << "' is not a hex byte\n" << end();
298 return '\0';
299 }
300 if (result > 0xff || result < -0x8f) {
301 raise << "token '" << s << "' is not a hex byte\n" << end();
302 return '\0';
303 }
304 return static_cast<uint8_t>(result);
305 }
306
307 void test_number_too_large() {
308 Hide_errors = true;
309 parse_and_load(
310 "== code 0x1\n"
311 "01 cab\n"
312 );
313 CHECK_TRACE_CONTENTS(
314 "error: token 'cab' is not a hex byte\n"
315 );
316 }
317
318 void test_invalid_hex() {
319 Hide_errors = true;
320 parse_and_load(
321 "== code 0x1\n"
322 "01 cx\n"
323 );
324 CHECK_TRACE_CONTENTS(
325 "error: token 'cx' is not a hex byte\n"
326 );
327 }
328
329 void test_negative_number() {
330 parse_and_load(
331 "== code 0x1\n"
332 "01 -02\n"
333 );
334 CHECK_TRACE_COUNT("error", 0);
335 }
336
337 void test_negative_number_too_small() {
338 Hide_errors = true;
339 parse_and_load(
340 "== code 0x1\n"
341 "01 -12345\n"
342 );
343 CHECK_TRACE_CONTENTS(
344 "error: token '-12345' is not a hex byte\n"
345 );
346 }
347
348 void test_hex_prefix() {
349 parse_and_load(
350 "== code 0x1\n"
351 "0x01 -0x02\n"
352 );
353 CHECK_TRACE_COUNT("error", 0);
354 }
355
356 void test_repeated_segment_merges_data() {
357 parse_and_load(
358 "== code 0x1\n"
359 "11 22\n"
360 "== code\n"
361 "33 44\n"
362 );
363 CHECK_TRACE_CONTENTS(
364 "parse: new segment 'code'\n"
365 "parse: appending to segment 'code'\n"
366
367 "load: 0x00000001 -> 11\n"
368 "load: 0x00000002 -> 22\n"
369
370 "load: 0x00000003 -> 33\n"
371 "load: 0x00000004 -> 44\n"
372 );
373 }
374
375 void test_error_on_missing_segment_header() {
376 Hide_errors = true;
377 parse_and_load(
378 "01 02\n"
379 );
380 CHECK_TRACE_CONTENTS(
381 "error: input does not start with a '==' section header\n"
382 );
383 }
384
385 void test_error_on_uppercase_hex() {
386 Hide_errors = true;
387 parse_and_load(
388 "== code\n"
389 "01 Ab\n"
390 );
391 CHECK_TRACE_CONTENTS(
392 "error: uppercase hex not allowed: Ab\n"
393 );
394 }
395
396
397 void parse_and_load(const string& text_bytes) {
398 program p;
399 istringstream in(text_bytes);
400 parse(in, p);
401 if (trace_contains_errors()) return;
402 load(p);
403 }
404
405
406
407 :(before "End Initialize Op Names")
408 put_new(Name, "b8", "copy imm32 to EAX (mov)");
409
410
411
412 :(before "End Single-Byte Opcodes")
413 case 0xb8: {
414 const int32_t src = next32();
415 trace(Callstack_depth+1, "run") << "copy imm32 0x" << HEXWORD << src << " to EAX" << end();
416 Reg[EAX].i = src;
417 break;
418 }
419
420 :(code)
421 void test_copy_imm32_to_EAX_again() {
422 run(
423 "== code 0x1\n"
424
425 " b8 0a 0b 0c 0d \n"
426 );
427 CHECK_TRACE_CONTENTS(
428 "run: copy imm32 0x0d0c0b0a to EAX\n"
429 );
430 }
431
432
433 int32_t next32() {
434 int32_t result = read_mem_i32(EIP);
435 EIP+=4;
436 return result;
437 }
438
439
440
441 string to_string(const word& w) {
442 ostringstream out;
443 out << w.data;
444 for (int i = 0; i < SIZE(w.metadata); ++i)
445 out << " /" << w.metadata.at(i);
446 return out.str();
447 }
448
449 bool contains_uppercase(const string& s) {
450 for (int i = 0; i < SIZE(s); ++i)
451 if (isupper(s.at(i))) return true;
452 return false;
453 }