1
2
3
4
5
6 :(before "End Help Texts")
7 put(Help, "syntax",
8 "SubX programs consist of segments, each segment in turn consisting of lines.\n"
9 "Line-endings are significant; each line should contain a single\n"
10 "instruction, macro or directive.\n"
11 "\n"
12 "Comments start with the '#' character. It should be at the start of a word\n"
13 "(start of line, or following a space).\n"
14 "\n"
15 "Each segment starts with a header line: a '==' delimiter followed by the\n"
16 "starting address for the segment.\n"
17 "\n"
18 "The starting address for a segment has some finicky requirements. But just\n"
19 "start with a round number, and `subx` will try to guide you to a valid\n"
20 "configuration.\n"
21 "A good rule of thumb is to try to start the first segment at the default\n"
22 "address of 0x08048000, and to start each subsequent segment at least 0x1000\n"
23 "(most common page size) bytes after the last.\n"
24 "If a segment occupies than 0x1000 bytes you'll need to push subsequent\n"
25 "segments further down.\n"
26 "Currently only the first segment contains executable code (because it gets\n"
27 "annoying to have to change addresses in later segments every time an earlier\n"
28 "one changes length; one of those finicky requirements).\n"
29 "\n"
30 "Lines consist of a series of words. Words can contain arbitrary metadata\n"
31 "after a '/', but they can never contain whitespace. Metadata has no effect\n"
32 "at runtime, but can be handy when rewriting macros.\n"
33 "\n"
34 "Check out some examples in this directory (ex*.subx)\n"
35 "Programming in machine code can be annoying, but let's see if we can make\n"
36 "it nice enough to be able to write a compiler in it.\n"
37 );
38 :(before "End Help Contents")
39 cerr << " syntax\n";
40
41 :(scenario add_imm32_to_eax)
42
43
44
45
46
47
48
49
50
51
52 == 0x1
53
54
55
56
57
58
59
60
61
62
63
64 05 . . . 0a 0b 0c 0d
65
66
67
68
69 +load: 0x00000001 -> 05
70 +load: 0x00000002 -> 0a
71 +load: 0x00000003 -> 0b
72 +load: 0x00000004 -> 0c
73 +load: 0x00000005 -> 0d
74 +run: add imm32 0x0d0c0b0a to reg EAX
75 +run: storing 0x0d0c0b0a
76
77 :(code)
78
79
80 void run(const string& text_bytes) {
81 program p;
82 istringstream in(text_bytes);
83 parse(in, p);
84 if (trace_contains_errors()) return;
85 transform(p);
86 if (trace_contains_errors()) return;
87 load(p);
88 if (trace_contains_errors()) return;
89 while (EIP < End_of_program)
90 run_one_instruction();
91 }
92
93
94
95 :(before "End Types")
96 struct program {
97 vector<segment> segments;
98
99
100
101
102 };
103 :(before "struct program")
104 struct segment {
105 uint32_t start;
106 vector<line> lines;
107
108 segment() {
109 start = 0;
110
111 }
112 };
113 :(before "struct segment")
114 struct line {
115 vector<word> words;
116 vector<string> metadata;
117 string original;
118 };
119 :(before "struct line")
120 struct word {
121 string original;
122 string data;
123 vector<string> metadata;
124 };
125
126
127
128 :(code)
129 void parse(istream& fin, program& out) {
130 vector<line> l;
131 trace(99, "parse") << "begin" << end();
132 while (has_data(fin)) {
133 string line_data;
134 line curr;
135 getline(fin, line_data);
136 curr.original = line_data;
137 trace(99, "parse") << "line: " << line_data << end();
138 istringstream lin(line_data);
139 while (has_data(lin)) {
140 string word_data;
141 lin >> word_data;
142 if (word_data.empty()) continue;
143 if (word_data[0] == '#') break;
144 if (word_data == ".") continue;
145 if (word_data == "==") {
146 if (!l.empty()) {
147 assert(!out.segments.empty());
148 trace(99, "parse") << "flushing to segment" << end();
149 out.segments.back().lines.swap(l);
150 }
151 segment s;
152 string segment_title;
153 lin >> segment_title;
154 if (starts_with(segment_title, "0x"))
155 s.start = parse_int(segment_title);
156 trace(99, "parse") << "new segment from " << HEXWORD << s.start << end();
157 out.segments.push_back(s);
158
159 break;
160 }
161 if (word_data[0] == ':') {
162
163 break;
164 }
165 curr.words.push_back(word());
166 curr.words.back().original = word_data;
167 istringstream win(word_data);
168 if (getline(win, curr.words.back().data, '/')) {
169 string m;
170 while (getline(win, m, '/'))
171 curr.words.back().metadata.push_back(m);
172 }
173 trace(99, "parse") << "new word: " << curr.words.back().data << end();
174 }
175 if (!curr.words.empty())
176 l.push_back(curr);
177 }
178 if (!l.empty()) {
179 assert(!out.segments.empty());
180 trace(99, "parse") << "flushing to segment" << end();
181 out.segments.back().lines.swap(l);
182 }
183 trace(99, "parse") << "done" << end();
184 }
185
186
187
188 :(before "End Types")
189 typedef void (*transform_fn)(program&);
190 :(before "End Globals")
191 vector<transform_fn> Transform;
192
193 void transform(program& p) {
194 trace(99, "transform") << "begin" << end();
195 for (int t = 0; t < SIZE(Transform); ++t)
196 (*Transform.at(t))(p);
197 trace(99, "transform") << "done" << end();
198 }
199
200
201
202 void load(const program& p) {
203 trace(99, "load") << "begin" << end();
204 if (p.segments.empty()) {
205 raise << "no code to run\n" << end();
206 return;
207 }
208 for (int i = 0; i < SIZE(p.segments); ++i) {
209 const segment& seg = p.segments.at(i);
210 uint32_t addr = seg.start;
211 trace(99, "load") << "loading segment " << i << " from " << HEXWORD << addr << end();
212 for (int j = 0; j < SIZE(seg.lines); ++j) {
213 const line& l = seg.lines.at(j);
214 for (int k = 0; k < SIZE(l.words); ++k) {
215 const word& w = l.words.at(k);
216 uint8_t val = hex_byte(w.data);
217 if (trace_contains_errors()) return;
218 write_mem_u8(addr, val);
219 trace(99, "load") << "0x" << HEXWORD << addr << " -> " << HEXBYTE << NUM(read_mem_u8(addr)) << end();
220 ++addr;
221 }
222 }
223 if (i == 0) End_of_program = addr;
224 }
225 EIP = p.segments.at(0).start;
226 trace(99, "load") << "done" << end();
227 }
228
229 uint8_t hex_byte(const string& s) {
230 istringstream in(s);
231 int result = 0;
232 in >> std::hex >> result;
233 if (!in || !in.eof()) {
234 raise << "token '" << s << "' is not a hex byte\n" << end();
235 return '\0';
236 }
237 if (result > 0xff || result < -0x8f) {
238 raise << "token '" << s << "' is not a hex byte\n" << end();
239 return '\0';
240 }
241 return static_cast<uint8_t>(result);
242 }
243
244 :(scenarios parse_and_load)
245 :(scenario number_too_large)
246 % Hide_errors = true;
247 == 0x1
248 05 cab
249 +error: token 'cab' is not a hex byte
250
251 :(scenario invalid_hex)
252 % Hide_errors = true;
253 == 0x1
254 05 cx
255 +error: token 'cx' is not a hex byte
256
257 :(scenario negative_number)
258 == 0x1
259 05 -12
260 $error: 0
261
262 :(scenario negative_number_too_small)
263 % Hide_errors = true;
264 == 0x1
265 05 -12345
266 +error: token '-12345' is not a hex byte
267
268 :(scenario hex_prefix)
269 == 0x1
270 0x05 -0x12
271 $error: 0
272
273
274 :(code)
275 void parse_and_load(const string& text_bytes) {
276 program p;
277 istringstream in(text_bytes);
278 parse(in, p);
279 if (trace_contains_errors()) return;
280 load(p);
281 }
282
283
284
285 :(before "End Initialize Op Names(name)")
286 put(name, "05", "add imm32 to R0 (EAX)");
287
288
289 :(before "End Single-Byte Opcodes")
290 case 0x05: {
291 int32_t arg2 = imm32();
292 trace(90, "run") << "add imm32 0x" << HEXWORD << arg2 << " to reg EAX" << end();
293 BINARY_ARITHMETIC_OP(+, Reg[EAX].i, arg2);
294 break;
295 }
296
297 :(code)
298
299 int32_t imm32() {
300 int32_t result = next();
301 result |= (next()<<8);
302 result |= (next()<<16);
303 result |= (next()<<24);
304 return result;
305 }
306
307 :(code)
308 int32_t parse_int(const string& s) {
309 if (s.empty()) return 0;
310 istringstream in(s);
311 in >> std::hex;
312 if (s.at(0) == '-') {
313 int32_t result = 0;
314 in >> result;
315 if (!in || !in.eof()) {
316 raise << "not a number: " << s << '\n' << end();
317 return 0;
318 }
319 return result;
320 }
321 uint32_t uresult = 0;
322 in >> uresult;
323 if (!in || !in.eof()) {
324 raise << "not a number: " << s << '\n' << end();
325 return 0;
326 }
327 return static_cast<int32_t>(uresult);
328 }
329 :(before "End Unit Tests")
330 void test_parse_int() {
331 CHECK_EQ(0, parse_int("0"));
332 CHECK_EQ(0, parse_int("0x0"));
333 CHECK_EQ(0, parse_int("0x0"));
334 CHECK_EQ(16, parse_int("10"));
335 CHECK_EQ(-1, parse_int("-1"));
336 CHECK_EQ(-1, parse_int("0xffffffff"));
337 }