https://github.com/akkartik/mu/blob/master/subx/011run.cc
1
2
3
4
5
6 :(before "End Help Texts")
7 put_new(Help, "syntax",
8 "SubX programs consist of segments, each segment in turn consisting of lines.\n"
9 "Line-endings are significant; each line should contain a single\n"
10 "instruction, macro or directive.\n"
11 "\n"
12 "Comments start with the '#' character. It should be at the start of a word\n"
13 "(start of line, or following a space).\n"
14 "\n"
15 "Each segment starts with a header line: a '==' delimiter followed by the name of\n"
16 "the segment.\n"
17 "\n"
18 "The first segment contains code and should be called 'code'.\n"
19 "The second segment should be called 'data'.\n"
20 "The resulting binary starts running from the start of the code segment by default.\n"
21 "To start elsewhere in the code segment, define a special label called 'Entry'.\n"
22 "\n"
23 "Segments with the same name get merged together. This rule helps keep functions and\n"
24 "their data close together in .subx files.\n"
25 "\n"
26 "Lines consist of a series of words. Words can contain arbitrary metadata\n"
27 "after a '/', but they can never contain whitespace. Metadata has no effect\n"
28 "at runtime, but can be handy when rewriting macros.\n"
29 "\n"
30 "Check out the examples in the examples/ directory.\n"
31 "Programming in machine code can be annoying, but let's see if we can make\n"
32 "it nice enough to be able to write a compiler in it.\n"
33 );
34 :(before "End Help Contents")
35 cerr << " syntax\n";
36
37 :(scenario add_imm32_to_eax)
38
39
40
41
42
43
44
45
46
47
48
49
50
51 == 0x1
52
53
54
55
56
57
58
59
60
61
62
63 05 . . . 0a 0b 0c 0d
64
65
66
67
68 +load: 0x00000001 -> 05
69 +load: 0x00000002 -> 0a
70 +load: 0x00000003 -> 0b
71 +load: 0x00000004 -> 0c
72 +load: 0x00000005 -> 0d
73 +run: add imm32 0x0d0c0b0a to reg EAX
74 +run: storing 0x0d0c0b0a
75
76 :(code)
77
78
79 void run(const string& text_bytes) {
80 program p;
81 istringstream in(text_bytes);
82 parse(in, p);
83 if (trace_contains_errors()) return;
84 transform(p);
85 if (trace_contains_errors()) return;
86 load(p);
87 if (trace_contains_errors()) return;
88 while (EIP < End_of_program)
89 run_one_instruction();
90 }
91
92
93
94 :(before "End Types")
95 struct program {
96 vector<segment> segments;
97
98
99
100
101 };
102 :(before "struct program")
103 struct segment {
104 uint32_t start;
105 vector<line> lines;
106
107 segment() {
108 start = 0;
109
110 }
111 };
112 :(before "struct segment")
113 struct line {
114 vector<word> words;
115 vector<string> metadata;
116 string original;
117 };
118 :(before "struct line")
119 struct word {
120 string original;
121 string data;
122 vector<string> metadata;
123 };
124
125
126
127 :(code)
128 void parse(istream& fin, program& out) {
129 vector<line> l;
130 while (has_data(fin)) {
131 string line_data;
132 line curr;
133 getline(fin, line_data);
134 curr.original = line_data;
135 trace(99, "parse") << "line: " << line_data << end();
136
137 istringstream lin(line_data);
138 while (has_data(lin)) {
139 string word_data;
140 lin >> word_data;
141 if (word_data.empty()) continue;
142 if (word_data[0] == '#') break;
143 if (word_data == ".") continue;
144 if (word_data == "==") {
145 flush(out, l);
146 string segment_title;
147 lin >> segment_title;
148 if (starts_with(segment_title, "0x")) {
149 segment s;
150 s.start = parse_int(segment_title);
151 sanity_check_program_segment(out, s.start);
152 if (trace_contains_errors()) continue;
153 trace(3, "parse") << "new segment from 0x" << HEXWORD << s.start << end();
154 out.segments.push_back(s);
155 }
156
157
158 break;
159 }
160 if (word_data[0] == ':') {
161
162 break;
163 }
164 curr.words.push_back(word());
165 parse_word(word_data, curr.words.back());
166 trace(99, "parse") << "word: " << to_string(curr.words.back());
167 }
168 if (!curr.words.empty())
169 l.push_back(curr);
170 }
171 flush(out, l);
172 trace(99, "parse") << "done" << end();
173 }
174
175 void flush(program& p, vector<line>& lines) {
176 if (lines.empty()) return;
177 if (p.segments.empty()) {
178 raise << "input does not start with a '==' section header\n" << end();
179 return;
180 }
181
182 trace(99, "parse") << "flushing segment" << end();
183 p.segments.back().lines.swap(lines);
184 }
185
186 void parse_word(const string& data, word& out) {
187 out.original = data;
188 istringstream win(data);
189 if (getline(win, out.data, '/')) {
190 string m;
191 while (getline(win, m, '/'))
192 out.metadata.push_back(m);
193 }
194 }
195
196 void sanity_check_program_segment(const program& p, uint32_t addr) {
197 for (int i = 0; i < SIZE(p.segments); ++i) {
198 if (p.segments.at(i).start == addr)
199 raise << "can't have multiple segments starting at address 0x" << HEXWORD << addr << '\n' << end();
200 }
201 }
202
203
204 void parse(const string& text_bytes) {
205 program p;
206 istringstream in(text_bytes);
207 parse(in, p);
208 }
209
210 :(scenarios parse)
211 :(scenario detect_duplicate_segments)
212 % Hide_errors = true;
213 == 0xee
214 ab
215 == 0xee
216 cd
217 +error: can't have multiple segments starting at address 0x000000ee
218
219
220
221 :(before "End Types")
222 typedef void (*transform_fn)(program&);
223 :(before "End Globals")
224 vector<transform_fn> Transform;
225
226 :(code)
227 void transform(program& p) {
228 for (int t = 0; t < SIZE(Transform); ++t)
229 (*Transform.at(t))(p);
230 }
231
232
233
234 void load(const program& p) {
235 if (p.segments.empty()) {
236 raise << "no code to run\n" << end();
237 return;
238 }
239
240 set<uint32_t> overlap;
241 for (int i = 0; i < SIZE(p.segments); ++i) {
242 const segment& seg = p.segments.at(i);
243 uint32_t addr = seg.start;
244 if (!already_allocated(addr))
245 Mem.push_back(vma(seg.start));
246 trace(99, "load") << "loading segment " << i << " from " << HEXWORD << addr << end();
247 for (int j = 0; j < SIZE(seg.lines); ++j) {
248 const line& l = seg.lines.at(j);
249 for (int k = 0; k < SIZE(l.words); ++k) {
250 const word& w = l.words.at(k);
251 uint8_t val = hex_byte(w.data);
252 if (trace_contains_errors()) return;
253 assert(overlap.find(addr) == overlap.end());
254 write_mem_u8(addr, val);
255 overlap.insert(addr);
256 trace(99, "load") << "0x" << HEXWORD << addr << " -> " << HEXBYTE << NUM(read_mem_u8(addr)) << end();
257 ++addr;
258 }
259 }
260 if (i == 0) End_of_program = addr;
261 }
262 EIP = p.segments.at(0).start;
263
264 }
265
266 uint8_t hex_byte(const string& s) {
267 istringstream in(s);
268 int result = 0;
269 in >> std::hex >> result;
270 if (!in || !in.eof()) {
271 raise << "token '" << s << "' is not a hex byte\n" << end();
272 return '\0';
273 }
274 if (result > 0xff || result < -0x8f) {
275 raise << "token '" << s << "' is not a hex byte\n" << end();
276 return '\0';
277 }
278 return static_cast<uint8_t>(result);
279 }
280
281 :(scenarios parse_and_load)
282 :(scenario number_too_large)
283 % Hide_errors = true;
284 == 0x1
285 05 cab
286 +error: token 'cab' is not a hex byte
287
288 :(scenario invalid_hex)
289 % Hide_errors = true;
290 == 0x1
291 05 cx
292 +error: token 'cx' is not a hex byte
293
294 :(scenario negative_number)
295 == 0x1
296 05 -12
297 $error: 0
298
299 :(scenario negative_number_too_small)
300 % Hide_errors = true;
301 == 0x1
302 05 -12345
303 +error: token '-12345' is not a hex byte
304
305 :(scenario hex_prefix)
306 == 0x1
307 0x05 -0x12
308 $error: 0
309
310
311 :(code)
312 void parse_and_load(const string& text_bytes) {
313 program p;
314 istringstream in(text_bytes);
315 parse(in, p);
316 if (trace_contains_errors()) return;
317 load(p);
318 }
319
320
321
322 :(before "End Initialize Op Names")
323 put_new(Name, "05", "add imm32 to EAX (add)");
324
325
326 :(before "End Single-Byte Opcodes")
327 case 0x05: {
328 int32_t arg2 = next32();
329 trace(Callstack_depth+1, "run") << "add imm32 0x" << HEXWORD << arg2 << " to reg EAX" << end();
330 BINARY_ARITHMETIC_OP(+, Reg[EAX].i, arg2);
331 break;
332 }
333
334 :(code)
335
336 int32_t next32() {
337 int32_t result = next();
338 result |= (next()<<8);
339 result |= (next()<<16);
340 result |= (next()<<24);
341 return result;
342 }
343
344
345
346 :(code)
347 string to_string(const word& w) {
348 ostringstream out;
349 out << w.data;
350 for (int i = 0; i < SIZE(w.metadata); ++i)
351 out << " /" << w.metadata.at(i);
352 return out.str();
353 }
354
355 int32_t parse_int(const string& s) {
356 if (s.empty()) return 0;
357 istringstream in(s);
358 in >> std::hex;
359 if (s.at(0) == '-') {
360 int32_t result = 0;
361 in >> result;
362 if (!in || !in.eof()) {
363 raise << "not a number: " << s << '\n' << end();
364 return 0;
365 }
366 return result;
367 }
368 uint32_t uresult = 0;
369 in >> uresult;
370 if (!in || !in.eof()) {
371 raise << "not a number: " << s << '\n' << end();
372 return 0;
373 }
374 return static_cast<int32_t>(uresult);
375 }
376 :(before "End Unit Tests")
377 void test_parse_int() {
378 CHECK_EQ(0, parse_int("0"));
379 CHECK_EQ(0, parse_int("0x0"));
380 CHECK_EQ(0, parse_int("0x0"));
381 CHECK_EQ(16, parse_int("10"));
382 CHECK_EQ(-1, parse_int("-1"));
383 CHECK_EQ(-1, parse_int("0xffffffff"));
384 }