https://github.com/akkartik/mu/blob/master/subx/011run.cc
1
2
3
4
5
6 :(before "End Help Texts")
7 put_new(Help, "syntax",
8 "SubX programs consist of segments, each segment in turn consisting of lines.\n"
9 "Line-endings are significant; each line should contain a single\n"
10 "instruction, macro or directive.\n"
11 "\n"
12 "Comments start with the '#' character. It should be at the start of a word\n"
13 "(start of line, or following a space).\n"
14 "\n"
15 "Each segment starts with a header line: a '==' delimiter followed by the name of\n"
16 "the segment.\n"
17 "\n"
18 "The first segment contains code and should be called 'code'.\n"
19 "The second segment should be called 'data'.\n"
20 "The resulting binary starts running from the start of the code segment by default.\n"
21 "To start elsewhere in the code segment, define a special label called 'Entry'.\n"
22 "\n"
23 "Segments with the same name get merged together. This rule helps keep functions and\n"
24 "their data close together in .subx files.\n"
25 "\n"
26 "Lines consist of a series of words. Words can contain arbitrary metadata\n"
27 "after a '/', but they can never contain whitespace. Metadata has no effect\n"
28 "at runtime, but can be handy when rewriting macros.\n"
29 "\n"
30 "Check out the examples in the examples/ directory.\n"
31 "Programming in machine code can be annoying, but let's see if we can make\n"
32 "it nice enough to be able to write a compiler in it.\n"
33 );
34 :(before "End Help Contents")
35 cerr << " syntax\n";
36
37 :(code)
38 void test_add_imm32_to_eax() {
39
40
41 run(
42
43 "# comment\n"
44
45
46
47 "== 0x1\n"
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68 " 05 . . . 0a 0b 0c 0d\n"
69
70
71 );
72
73 CHECK_TRACE_CONTENTS(
74 "load: 0x00000001 -> 05\n"
75 "load: 0x00000002 -> 0a\n"
76 "load: 0x00000003 -> 0b\n"
77 "load: 0x00000004 -> 0c\n"
78 "load: 0x00000005 -> 0d\n"
79 "run: add imm32 0x0d0c0b0a to reg EAX\n"
80 "run: storing 0x0d0c0b0a\n"
81 );
82 }
83
84
85
86 void run(const string& text_bytes) {
87 program p;
88 istringstream in(text_bytes);
89 parse(in, p);
90 if (trace_contains_errors()) return;
91 transform(p);
92 if (trace_contains_errors()) return;
93 load(p);
94 if (trace_contains_errors()) return;
95 while (EIP < End_of_program)
96 run_one_instruction();
97 }
98
99
100
101 :(before "End Types")
102 struct program {
103 vector<segment> segments;
104
105
106
107
108 };
109 :(before "struct program")
110 struct segment {
111 uint32_t start;
112 vector<line> lines;
113
114 segment() {
115 start = 0;
116
117 }
118 };
119 :(before "struct segment")
120 struct line {
121 vector<word> words;
122 vector<string> metadata;
123 string original;
124 };
125 :(before "struct line")
126 struct word {
127 string original;
128 string data;
129 vector<string> metadata;
130 };
131
132
133
134 :(code)
135 void parse(istream& fin, program& out) {
136 vector<line> l;
137 while (has_data(fin)) {
138 string line_data;
139 line curr;
140 getline(fin, line_data);
141 curr.original = line_data;
142 trace(99, "parse") << "line: " << line_data << end();
143
144 istringstream lin(line_data);
145 while (has_data(lin)) {
146 string word_data;
147 lin >> word_data;
148 if (word_data.empty()) continue;
149 if (word_data[0] == '#') break;
150 if (word_data == ".") continue;
151 if (word_data == "==") {
152 flush(out, l);
153 string segment_title;
154 lin >> segment_title;
155 if (starts_with(segment_title, "0x")) {
156 segment s;
157 s.start = parse_int(segment_title);
158 sanity_check_program_segment(out, s.start);
159 if (trace_contains_errors()) continue;
160 trace(3, "parse") << "new segment from 0x" << HEXWORD << s.start << end();
161 out.segments.push_back(s);
162 }
163
164
165 break;
166 }
167 if (word_data[0] == ':') {
168
169 break;
170 }
171 curr.words.push_back(word());
172 parse_word(word_data, curr.words.back());
173 trace(99, "parse") << "word: " << to_string(curr.words.back());
174 }
175 if (!curr.words.empty())
176 l.push_back(curr);
177 }
178 flush(out, l);
179 trace(99, "parse") << "done" << end();
180 }
181
182 void flush(program& p, vector<line>& lines) {
183 if (lines.empty()) return;
184 if (p.segments.empty()) {
185 raise << "input does not start with a '==' section header\n" << end();
186 return;
187 }
188
189 trace(99, "parse") << "flushing segment" << end();
190 p.segments.back().lines.swap(lines);
191 }
192
193 void parse_word(const string& data, word& out) {
194 out.original = data;
195 istringstream win(data);
196 if (getline(win, out.data, '/')) {
197 string m;
198 while (getline(win, m, '/'))
199 out.metadata.push_back(m);
200 }
201 }
202
203 void sanity_check_program_segment(const program& p, uint32_t addr) {
204 for (int i = 0; i < SIZE(p.segments); ++i) {
205 if (p.segments.at(i).start == addr)
206 raise << "can't have multiple segments starting at address 0x" << HEXWORD << addr << '\n' << end();
207 }
208 }
209
210
211 void parse(const string& text_bytes) {
212 program p;
213 istringstream in(text_bytes);
214 parse(in, p);
215 }
216
217 void test_detect_duplicate_segments() {
218 Hide_errors = true;
219 parse(
220 "== 0xee\n"
221 "ab\n"
222 "== 0xee\n"
223 "cd\n"
224 );
225 CHECK_TRACE_CONTENTS(
226 "error: can't have multiple segments starting at address 0x000000ee\n"
227 );
228 }
229
230
231
232 :(before "End Types")
233 typedef void (*transform_fn)(program&);
234 :(before "End Globals")
235 vector<transform_fn> Transform;
236
237 :(code)
238 void transform(program& p) {
239 for (int t = 0; t < SIZE(Transform); ++t)
240 (*Transform.at(t))(p);
241 }
242
243
244
245 void load(const program& p) {
246 if (p.segments.empty()) {
247 raise << "no code to run\n" << end();
248 return;
249 }
250
251 set<uint32_t> overlap;
252 for (int i = 0; i < SIZE(p.segments); ++i) {
253 const segment& seg = p.segments.at(i);
254 uint32_t addr = seg.start;
255 if (!already_allocated(addr))
256 Mem.push_back(vma(seg.start));
257 trace(99, "load") << "loading segment " << i << " from " << HEXWORD << addr << end();
258 for (int j = 0; j < SIZE(seg.lines); ++j) {
259 const line& l = seg.lines.at(j);
260 for (int k = 0; k < SIZE(l.words); ++k) {
261 const word& w = l.words.at(k);
262 uint8_t val = hex_byte(w.data);
263 if (trace_contains_errors()) return;
264 assert(overlap.find(addr) == overlap.end());
265 write_mem_u8(addr, val);
266 overlap.insert(addr);
267 trace(99, "load") << "0x" << HEXWORD << addr << " -> " << HEXBYTE << NUM(read_mem_u8(addr)) << end();
268 ++addr;
269 }
270 }
271 if (i == 0) End_of_program = addr;
272 }
273 EIP = p.segments.at(0).start;
274
275 }
276
277 uint8_t hex_byte(const string& s) {
278 istringstream in(s);
279 int result = 0;
280 in >> std::hex >> result;
281 if (!in || !in.eof()) {
282 raise << "token '" << s << "' is not a hex byte\n" << end();
283 return '\0';
284 }
285 if (result > 0xff || result < -0x8f) {
286 raise << "token '" << s << "' is not a hex byte\n" << end();
287 return '\0';
288 }
289 return static_cast<uint8_t>(result);
290 }
291
292 void test_number_too_large() {
293 Hide_errors = true;
294 parse_and_load(
295 "== 0x1\n"
296 "05 cab\n"
297 );
298 CHECK_TRACE_CONTENTS(
299 "error: token 'cab' is not a hex byte\n"
300 );
301 }
302
303 void test_invalid_hex() {
304 Hide_errors = true;
305 parse_and_load(
306 "== 0x1\n"
307 "05 cx\n"
308 );
309 CHECK_TRACE_CONTENTS(
310 "error: token 'cx' is not a hex byte\n"
311 );
312 }
313
314 void test_negative_number() {
315 parse_and_load(
316 "== 0x1\n"
317 "05 -12\n"
318 );
319 CHECK_TRACE_COUNT("error", 0);
320 }
321
322 void test_negative_number_too_small() {
323 Hide_errors = true;
324 parse_and_load(
325 "== 0x1\n"
326 "05 -12345\n"
327 );
328 CHECK_TRACE_CONTENTS(
329 "error: token '-12345' is not a hex byte\n"
330 );
331 }
332
333 void test_hex_prefix() {
334 parse_and_load(
335 "== 0x1\n"
336 "0x05 -0x12\n"
337 );
338 CHECK_TRACE_COUNT("error", 0);
339 }
340
341
342 void parse_and_load(const string& text_bytes) {
343 program p;
344 istringstream in(text_bytes);
345 parse(in, p);
346 if (trace_contains_errors()) return;
347 load(p);
348 }
349
350
351
352 :(before "End Initialize Op Names")
353 put_new(Name, "05", "add imm32 to EAX (add)");
354
355
356 :(before "End Single-Byte Opcodes")
357 case 0x05: {
358 int32_t arg2 = next32();
359 trace(Callstack_depth+1, "run") << "add imm32 0x" << HEXWORD << arg2 << " to reg EAX" << end();
360 BINARY_ARITHMETIC_OP(+, Reg[EAX].i, arg2);
361 break;
362 }
363
364 :(code)
365
366 int32_t next32() {
367 int32_t result = read_mem_i32(EIP);
368 EIP+=4;
369 return result;
370 }
371
372
373
374 string to_string(const word& w) {
375 ostringstream out;
376 out << w.data;
377 for (int i = 0; i < SIZE(w.metadata); ++i)
378 out << " /" << w.metadata.at(i);
379 return out.str();
380 }
381
382 int32_t parse_int(const string& s) {
383 if (s.empty()) return 0;
384 istringstream in(s);
385 in >> std::hex;
386 if (s.at(0) == '-') {
387 int32_t result = 0;
388 in >> result;
389 if (!in || !in.eof()) {
390 raise << "not a number: " << s << '\n' << end();
391 return 0;
392 }
393 return result;
394 }
395 uint32_t uresult = 0;
396 in >> uresult;
397 if (!in || !in.eof()) {
398 raise << "not a number: " << s << '\n' << end();
399 return 0;
400 }
401 return static_cast<int32_t>(uresult);
402 }
403 :(before "End Unit Tests")
404 void test_parse_int() {
405 CHECK_EQ(0, parse_int("0"));
406 CHECK_EQ(0, parse_int("0x0"));
407 CHECK_EQ(0, parse_int("0x0"));
408 CHECK_EQ(16, parse_int("10"));
409 CHECK_EQ(-1, parse_int("-1"));
410 CHECK_EQ(-1, parse_int("0xffffffff"));
411 }