1
2
3
4
5
6 :(before "End Help Texts")
7 put(Help, "syntax",
8 "SubX programs consist of segments, each segment in turn consisting of lines.\n"
9 "Line-endings are significant; each line should contain a single\n"
10 "instruction, macro or directive.\n"
11 "\n"
12 "Comments start with the '#' character. It should be at the start of a word\n"
13 "(start of line, or following a space).\n"
14 "\n"
15 "Each segment starts with a header line: a '==' delimiter followed by the\n"
16 "starting address for the segment.\n"
17 "\n"
18 "The starting address for a segment has some finicky requirements. But just\n"
19 "start with a round number, and `subx` will try to guide you to a valid\n"
20 "configuration.\n"
21 "A good rule of thumb is to try to start the first segment at the default\n"
22 "address of 0x08048000, and to start each subsequent segment at least 0x1000\n"
23 "(most common page size) bytes after the last.\n"
24 "If a segment occupies than 0x1000 bytes you'll need to push subsequent\n"
25 "segments further down.\n"
26 "Currently only the first segment contains executable code (because it gets\n"
27 "annoying to have to change addresses in later segments every time an earlier\n"
28 "one changes length; one of those finicky requirements).\n"
29 "\n"
30 "Lines consist of a series of words. Words can contain arbitrary metadata\n"
31 "after a '/', but they can never contain whitespace. Metadata has no effect\n"
32 "at runtime, but can be handy when rewriting macros.\n"
33 "\n"
34 "Check out some examples in this directory (ex*.subx)\n"
35 "Programming in machine code can be annoying, but let's see if we can make\n"
36 "it nice enough to be able to write a compiler in it.\n"
37 );
38 :(before "End Help Contents")
39 cerr << " syntax\n";
40
41 :(scenario add_imm32_to_eax)
42
43
44
45
46
47
48
49
50
51
52
53
54
55 == 0x1
56
57
58
59
60
61
62
63
64
65
66
67 05 . . . 0a 0b 0c 0d
68
69
70
71
72 +load: 0x00000001 -> 05
73 +load: 0x00000002 -> 0a
74 +load: 0x00000003 -> 0b
75 +load: 0x00000004 -> 0c
76 +load: 0x00000005 -> 0d
77 +run: add imm32 0x0d0c0b0a to reg EAX
78 +run: storing 0x0d0c0b0a
79
80 :(code)
81
82
83 void run(const string& text_bytes) {
84 program p;
85 istringstream in(text_bytes);
86 parse(in, p);
87 if (trace_contains_errors()) return;
88 transform(p);
89 if (trace_contains_errors()) return;
90 load(p);
91 if (trace_contains_errors()) return;
92 while (EIP < End_of_program)
93 run_one_instruction();
94 }
95
96
97
98 :(before "End Types")
99 struct program {
100 vector<segment> segments;
101
102
103
104
105 };
106 :(before "struct program")
107 struct segment {
108 uint32_t start;
109 vector<line> lines;
110
111 segment() {
112 start = 0;
113
114 }
115 };
116 :(before "struct segment")
117 struct line {
118 vector<word> words;
119 vector<string> metadata;
120 string original;
121 };
122 :(before "struct line")
123 struct word {
124 string original;
125 string data;
126 vector<string> metadata;
127 };
128
129
130
131 :(code)
132 void parse(istream& fin, program& out) {
133 vector<line> l;
134 trace(99, "parse") << "begin" << end();
135 while (has_data(fin)) {
136 string line_data;
137 line curr;
138 getline(fin, line_data);
139 curr.original = line_data;
140 trace(99, "parse") << "line: " << line_data << end();
141
142 istringstream lin(line_data);
143 while (has_data(lin)) {
144 string word_data;
145 lin >> word_data;
146 if (word_data.empty()) continue;
147 if (word_data[0] == '#') break;
148 if (word_data == ".") continue;
149 if (word_data == "==") {
150 if (!l.empty()) {
151 assert(!out.segments.empty());
152 trace(99, "parse") << "flushing to segment" << end();
153 out.segments.back().lines.swap(l);
154 }
155 segment s;
156 string segment_title;
157 lin >> segment_title;
158 if (starts_with(segment_title, "0x"))
159 s.start = parse_int(segment_title);
160 trace(99, "parse") << "new segment from " << HEXWORD << s.start << end();
161 out.segments.push_back(s);
162
163 break;
164 }
165 if (word_data[0] == ':') {
166
167 break;
168 }
169 curr.words.push_back(word());
170 parse_word(word_data, curr.words.back());
171 trace(99, "parse") << "word: " << to_string(curr.words.back());
172 }
173 if (!curr.words.empty())
174 l.push_back(curr);
175 }
176 if (!l.empty()) {
177 assert(!out.segments.empty());
178 trace(99, "parse") << "flushing to segment" << end();
179 out.segments.back().lines.swap(l);
180 }
181 trace(99, "parse") << "done" << end();
182 }
183
184 void parse_word(const string& data, word& out) {
185 out.original = data;
186 istringstream win(data);
187 if (getline(win, out.data, '/')) {
188 string m;
189 while (getline(win, m, '/'))
190 out.metadata.push_back(m);
191 }
192 }
193
194 string to_string(const word& w) {
195 ostringstream out;
196 out << w.data;
197 for (int i = 0; i < SIZE(w.metadata); ++i)
198 out << " /" << w.metadata.at(i);
199 return out.str();
200 }
201
202
203
204 :(before "End Types")
205 typedef void (*transform_fn)(program&);
206 :(before "End Globals")
207 vector<transform_fn> Transform;
208
209 void transform(program& p) {
210 trace(99, "transform") << "begin" << end();
211 for (int t = 0; t < SIZE(Transform); ++t)
212 (*Transform.at(t))(p);
213 trace(99, "transform") << "done" << end();
214 }
215
216
217
218 void load(const program& p) {
219 trace(99, "load") << "begin" << end();
220 if (p.segments.empty()) {
221 raise << "no code to run\n" << end();
222 return;
223 }
224 for (int i = 0; i < SIZE(p.segments); ++i) {
225 const segment& seg = p.segments.at(i);
226 uint32_t addr = seg.start;
227 trace(99, "load") << "loading segment " << i << " from " << HEXWORD << addr << end();
228 for (int j = 0; j < SIZE(seg.lines); ++j) {
229 const line& l = seg.lines.at(j);
230 for (int k = 0; k < SIZE(l.words); ++k) {
231 const word& w = l.words.at(k);
232 uint8_t val = hex_byte(w.data);
233 if (trace_contains_errors()) return;
234 write_mem_u8(addr, val);
235 trace(99, "load") << "0x" << HEXWORD << addr << " -> " << HEXBYTE << NUM(read_mem_u8(addr)) << end();
236 ++addr;
237 }
238 }
239 if (i == 0) End_of_program = addr;
240 }
241 EIP = p.segments.at(0).start;
242 trace(99, "load") << "done" << end();
243 }
244
245 uint8_t hex_byte(const string& s) {
246 istringstream in(s);
247 int result = 0;
248 in >> std::hex >> result;
249 if (!in || !in.eof()) {
250 raise << "token '" << s << "' is not a hex byte\n" << end();
251 return '\0';
252 }
253 if (result > 0xff || result < -0x8f) {
254 raise << "token '" << s << "' is not a hex byte\n" << end();
255 return '\0';
256 }
257 return static_cast<uint8_t>(result);
258 }
259
260 :(scenarios parse_and_load)
261 :(scenario number_too_large)
262 % Hide_errors = true;
263 == 0x1
264 05 cab
265 +error: token 'cab' is not a hex byte
266
267 :(scenario invalid_hex)
268 % Hide_errors = true;
269 == 0x1
270 05 cx
271 +error: token 'cx' is not a hex byte
272
273 :(scenario negative_number)
274 == 0x1
275 05 -12
276 $error: 0
277
278 :(scenario negative_number_too_small)
279 % Hide_errors = true;
280 == 0x1
281 05 -12345
282 +error: token '-12345' is not a hex byte
283
284 :(scenario hex_prefix)
285 == 0x1
286 0x05 -0x12
287 $error: 0
288
289
290 :(code)
291 void parse_and_load(const string& text_bytes) {
292 program p;
293 istringstream in(text_bytes);
294 parse(in, p);
295 if (trace_contains_errors()) return;
296 load(p);
297 }
298
299
300
301 :(before "End Initialize Op Names(name)")
302 put(name, "05", "add imm32 to R0 (EAX)");
303
304
305 :(before "End Single-Byte Opcodes")
306 case 0x05: {
307 int32_t arg2 = next32();
308 trace(90, "run") << "add imm32 0x" << HEXWORD << arg2 << " to reg EAX" << end();
309 BINARY_ARITHMETIC_OP(+, Reg[EAX].i, arg2);
310 break;
311 }
312
313 :(code)
314
315 int32_t next32() {
316 int32_t result = next();
317 result |= (next()<<8);
318 result |= (next()<<16);
319 result |= (next()<<24);
320 return result;
321 }
322
323 :(code)
324 int32_t parse_int(const string& s) {
325 if (s.empty()) return 0;
326 istringstream in(s);
327 in >> std::hex;
328 if (s.at(0) == '-') {
329 int32_t result = 0;
330 in >> result;
331 if (!in || !in.eof()) {
332 raise << "not a number: " << s << '\n' << end();
333 return 0;
334 }
335 return result;
336 }
337 uint32_t uresult = 0;
338 in >> uresult;
339 if (!in || !in.eof()) {
340 raise << "not a number: " << s << '\n' << end();
341 return 0;
342 }
343 return static_cast<int32_t>(uresult);
344 }
345 :(before "End Unit Tests")
346 void test_parse_int() {
347 CHECK_EQ(0, parse_int("0"));
348 CHECK_EQ(0, parse_int("0x0"));
349 CHECK_EQ(0, parse_int("0x0"));
350 CHECK_EQ(16, parse_int("10"));
351 CHECK_EQ(-1, parse_int("-1"));
352 CHECK_EQ(-1, parse_int("0xffffffff"));
353 }