1
2
3
4
5
6 :(before "End Help Texts")
7 put_new(Help, "syntax",
8 "SubX programs consist of segments, each segment in turn consisting of lines.\n"
9 "Line-endings are significant; each line should contain a single\n"
10 "instruction, macro or directive.\n"
11 "\n"
12 "Comments start with the '#' character. It should be at the start of a word\n"
13 "(start of line, or following a space).\n"
14 "\n"
15 "Each segment starts with a header line: a '==' delimiter followed by the name of\n"
16 "the segment.\n"
17 "\n"
18 "Currently SubX assumes the first segment encountered contains executable code, and\n"
19 "the second contains global variables. By convention we call them 'code' and 'data'\n"
20 "respectively.\n"
21 "The first instruction executed in the resulting binary is always the first\n"
22 "instruction of the first segment.\n"
23 "\n"
24 "Segments with the same name get merged together. This rule helps keep functions and\n"
25 "their data close together in .subx files.\n"
26 "Later segments with the same name get their contents *prepended* to earlier ones.\n"
27 "This rule helps each .subx file to put forth a different entrypoint for the binary,\n"
28 "overriding previously loaded files.\n"
29 "\n"
30 "Lines consist of a series of words. Words can contain arbitrary metadata\n"
31 "after a '/', but they can never contain whitespace. Metadata has no effect\n"
32 "at runtime, but can be handy when rewriting macros.\n"
33 "\n"
34 "Check out the examples in the examples/ directory.\n"
35 "Programming in machine code can be annoying, but let's see if we can make\n"
36 "it nice enough to be able to write a compiler in it.\n"
37 );
38 :(before "End Help Contents")
39 cerr << " syntax\n";
40
41 :(scenario add_imm32_to_eax)
42
43
44
45
46
47
48
49
50
51
52
53
54
55 == 0x1
56
57
58
59
60
61
62
63
64
65
66
67 05 . . . 0a 0b 0c 0d
68
69
70
71
72 +load: 0x00000001 -> 05
73 +load: 0x00000002 -> 0a
74 +load: 0x00000003 -> 0b
75 +load: 0x00000004 -> 0c
76 +load: 0x00000005 -> 0d
77 +run: add imm32 0x0d0c0b0a to reg EAX
78 +run: storing 0x0d0c0b0a
79
80 :(code)
81
82
83 void run(const string& text_bytes) {
84 program p;
85 istringstream in(text_bytes);
86 parse(in, p);
87 if (trace_contains_errors()) return;
88 transform(p);
89 if (trace_contains_errors()) return;
90 load(p);
91 if (trace_contains_errors()) return;
92 while (EIP < End_of_program)
93 run_one_instruction();
94 }
95
96
97
98 :(before "End Types")
99 struct program {
100 vector<segment> segments;
101
102
103
104
105 };
106 :(before "struct program")
107 struct segment {
108 uint32_t start;
109 vector<line> lines;
110
111 segment() {
112 start = 0;
113
114 }
115 };
116 :(before "struct segment")
117 struct line {
118 vector<word> words;
119 vector<string> metadata;
120 string original;
121 };
122 :(before "struct line")
123 struct word {
124 string original;
125 string data;
126 vector<string> metadata;
127 };
128
129
130
131 :(code)
132 void parse(istream& fin, program& out) {
133 vector<line> l;
134 trace(99, "parse") << "begin" << end();
135 while (has_data(fin)) {
136 string line_data;
137 line curr;
138 getline(fin, line_data);
139 curr.original = line_data;
140 trace(99, "parse") << "line: " << line_data << end();
141
142 istringstream lin(line_data);
143 while (has_data(lin)) {
144 string word_data;
145 lin >> word_data;
146 if (word_data.empty()) continue;
147 if (word_data[0] == '#') break;
148 if (word_data == ".") continue;
149 if (word_data == "==") {
150 flush(out, l);
151 string segment_title;
152 lin >> segment_title;
153 if (starts_with(segment_title, "0x")) {
154 segment s;
155 s.start = parse_int(segment_title);
156 sanity_check_program_segment(out, s.start);
157 if (trace_contains_errors()) continue;
158 trace(99, "parse") << "new segment from 0x" << HEXWORD << s.start << end();
159 out.segments.push_back(s);
160 }
161
162
163 break;
164 }
165 if (word_data[0] == ':') {
166
167 break;
168 }
169 curr.words.push_back(word());
170 parse_word(word_data, curr.words.back());
171 trace(99, "parse") << "word: " << to_string(curr.words.back());
172 }
173 if (!curr.words.empty())
174 l.push_back(curr);
175 }
176 flush(out, l);
177 trace(99, "parse") << "done" << end();
178 }
179
180 void flush(program& p, vector<line>& lines) {
181 if (lines.empty()) return;
182 if (p.segments.empty()) {
183 raise << "input does not start with a '==' section header\n" << end();
184 return;
185 }
186
187 trace(99, "parse") << "flushing to segment" << end();
188 p.segments.back().lines.swap(lines);
189 }
190
191 void parse_word(const string& data, word& out) {
192 out.original = data;
193 istringstream win(data);
194 if (getline(win, out.data, '/')) {
195 string m;
196 while (getline(win, m, '/'))
197 out.metadata.push_back(m);
198 }
199 }
200
201 void sanity_check_program_segment(const program& p, uint32_t addr) {
202 for (int i = 0; i < SIZE(p.segments); ++i) {
203 if (p.segments.at(i).start == addr)
204 raise << "can't have multiple segments starting at address 0x" << HEXWORD << addr << '\n' << end();
205 }
206 }
207
208
209 void parse(const string& text_bytes) {
210 program p;
211 istringstream in(text_bytes);
212 parse(in, p);
213 }
214
215 :(scenarios parse)
216 :(scenario detect_duplicate_segments)
217 % Hide_errors = true;
218 == 0xee
219 ab
220 == 0xee
221 cd
222 +error: can't have multiple segments starting at address 0x000000ee
223
224
225
226 :(before "End Types")
227 typedef void (*transform_fn)(program&);
228 :(before "End Globals")
229 vector<transform_fn> Transform;
230
231 void transform(program& p) {
232 trace(99, "transform") << "begin" << end();
233 for (int t = 0; t < SIZE(Transform); ++t)
234 (*Transform.at(t))(p);
235 trace(99, "transform") << "done" << end();
236 }
237
238
239
240 void load(const program& p) {
241 trace(99, "load") << "begin" << end();
242 if (p.segments.empty()) {
243 raise << "no code to run\n" << end();
244 return;
245 }
246
247 set<uint32_t> overlap;
248 for (int i = 0; i < SIZE(p.segments); ++i) {
249 const segment& seg = p.segments.at(i);
250 uint32_t addr = seg.start;
251 if (!already_allocated(addr))
252 Mem.push_back(vma(seg.start));
253 trace(99, "load") << "loading segment " << i << " from " << HEXWORD << addr << end();
254 for (int j = 0; j < SIZE(seg.lines); ++j) {
255 const line& l = seg.lines.at(j);
256 for (int k = 0; k < SIZE(l.words); ++k) {
257 const word& w = l.words.at(k);
258 uint8_t val = hex_byte(w.data);
259 if (trace_contains_errors()) return;
260 assert(overlap.find(addr) == overlap.end());
261 write_mem_u8(addr, val);
262 overlap.insert(addr);
263 trace(99, "load") << "0x" << HEXWORD << addr << " -> " << HEXBYTE << NUM(read_mem_u8(addr)) << end();
264 ++addr;
265 }
266 }
267 if (i == 0) End_of_program = addr;
268 }
269 EIP = p.segments.at(0).start;
270 trace(99, "load") << "done" << end();
271 }
272
273 uint8_t hex_byte(const string& s) {
274 istringstream in(s);
275 int result = 0;
276 in >> std::hex >> result;
277 if (!in || !in.eof()) {
278 raise << "token '" << s << "' is not a hex byte\n" << end();
279 return '\0';
280 }
281 if (result > 0xff || result < -0x8f) {
282 raise << "token '" << s << "' is not a hex byte\n" << end();
283 return '\0';
284 }
285 return static_cast<uint8_t>(result);
286 }
287
288 :(scenarios parse_and_load)
289 :(scenario number_too_large)
290 % Hide_errors = true;
291 == 0x1
292 05 cab
293 +error: token 'cab' is not a hex byte
294
295 :(scenario invalid_hex)
296 % Hide_errors = true;
297 == 0x1
298 05 cx
299 +error: token 'cx' is not a hex byte
300
301 :(scenario negative_number)
302 == 0x1
303 05 -12
304 $error: 0
305
306 :(scenario negative_number_too_small)
307 % Hide_errors = true;
308 == 0x1
309 05 -12345
310 +error: token '-12345' is not a hex byte
311
312 :(scenario hex_prefix)
313 == 0x1
314 0x05 -0x12
315 $error: 0
316
317
318 :(code)
319 void parse_and_load(const string& text_bytes) {
320 program p;
321 istringstream in(text_bytes);
322 parse(in, p);
323 if (trace_contains_errors()) return;
324 load(p);
325 }
326
327
328
329 :(before "End Initialize Op Names")
330 put_new(Name, "05", "add imm32 to EAX (add)");
331
332
333 :(before "End Single-Byte Opcodes")
334 case 0x05: {
335 int32_t arg2 = next32();
336 trace(90, "run") << "add imm32 0x" << HEXWORD << arg2 << " to reg EAX" << end();
337 BINARY_ARITHMETIC_OP(+, Reg[EAX].i, arg2);
338 break;
339 }
340
341 :(code)
342
343 int32_t next32() {
344 int32_t result = next();
345 result |= (next()<<8);
346 result |= (next()<<16);
347 result |= (next()<<24);
348 return result;
349 }
350
351
352
353 :(code)
354 string to_string(const word& w) {
355 ostringstream out;
356 out << w.data;
357 for (int i = 0; i < SIZE(w.metadata); ++i)
358 out << " /" << w.metadata.at(i);
359 return out.str();
360 }
361
362 int32_t parse_int(const string& s) {
363 if (s.empty()) return 0;
364 istringstream in(s);
365 in >> std::hex;
366 if (s.at(0) == '-') {
367 int32_t result = 0;
368 in >> result;
369 if (!in || !in.eof()) {
370 raise << "not a number: " << s << '\n' << end();
371 return 0;
372 }
373 return result;
374 }
375 uint32_t uresult = 0;
376 in >> uresult;
377 if (!in || !in.eof()) {
378 raise << "not a number: " << s << '\n' << end();
379 return 0;
380 }
381 return static_cast<int32_t>(uresult);
382 }
383 :(before "End Unit Tests")
384 void test_parse_int() {
385 CHECK_EQ(0, parse_int("0"));
386 CHECK_EQ(0, parse_int("0x0"));
387 CHECK_EQ(0, parse_int("0x0"));
388 CHECK_EQ(16, parse_int("10"));
389 CHECK_EQ(-1, parse_int("-1"));
390 CHECK_EQ(-1, parse_int("0xffffffff"));
391 }