1
2
3
4
5
6 :(before "End Help Texts")
7 put(Help, "syntax",
8 "SubX programs consist of segments, each segment in turn consisting of lines.\n"
9 "Line-endings are significant; each line should contain a single\n"
10 "instruction, macro or directive.\n"
11 "\n"
12 "Comments start with the '#' character. It should be at the start of a word\n"
13 "(start of line, or following a space).\n"
14 "\n"
15 "Each segment starts with a header line: a '==' delimiter followed by the\n"
16 "starting address for the segment.\n"
17 "\n"
18 "The starting address for a segment has some finicky requirements. But just\n"
19 "start with a round number, and `subx` will try to guide you to a valid\n"
20 "configuration.\n"
21 "A good rule of thumb is to try to start the first segment at the default\n"
22 "address of 0x08048000, and to start each subsequent segment at least 0x1000\n"
23 "(most common page size) bytes after the last.\n"
24 "If a segment occupies than 0x1000 bytes you'll need to push subsequent\n"
25 "segments further down.\n"
26 "Currently only the first segment contains executable code (because it gets\n"
27 "annoying to have to change addresses in later segments every time an earlier\n"
28 "one changes length; one of those finicky requirements).\n"
29 "\n"
30 "Lines consist of a series of words. Words can contain arbitrary metadata\n"
31 "after a '/', but they can never contain whitespace. Metadata has no effect\n"
32 "at runtime, but can be handy when rewriting macros.\n"
33 "\n"
34 "Check out some examples in this directory (ex*.subx)\n"
35 "Programming in machine code can be annoying, but let's see if we can make\n"
36 "it nice enough to be able to write a compiler in it.\n"
37 );
38 :(before "End Help Contents")
39 cerr << " syntax\n";
40
41 :(scenario add_imm32_to_eax)
42
43
44
45
46
47
48
49
50
51
52 == 0x1
53
54
55
56
57
58
59
60
61
62
63
64 05 . . . 0a 0b 0c 0d
65
66
67
68
69 +load: 0x00000001 -> 05
70 +load: 0x00000002 -> 0a
71 +load: 0x00000003 -> 0b
72 +load: 0x00000004 -> 0c
73 +load: 0x00000005 -> 0d
74 +run: add imm32 0x0d0c0b0a to reg EAX
75 +run: storing 0x0d0c0b0a
76
77 :(code)
78
79
80 void run(const string& text_bytes) {
81 program p;
82 istringstream in(text_bytes);
83 parse(in, p);
84 if (trace_contains_errors()) return;
85 transform(p);
86 if (trace_contains_errors()) return;
87 load(p);
88 if (trace_contains_errors()) return;
89 while (EIP < End_of_program)
90 run_one_instruction();
91 }
92
93
94
95 :(before "End Types")
96 struct program {
97 vector<segment> segments;
98
99
100
101
102 };
103 :(before "struct program")
104 struct segment {
105 uint32_t start;
106 vector<line> lines;
107 segment() :start(0) {}
108 };
109 :(before "struct segment")
110 struct line {
111 vector<word> words;
112 vector<string> metadata;
113 string original;
114 };
115 :(before "struct line")
116 struct word {
117 string original;
118 string data;
119 vector<string> metadata;
120 };
121
122
123
124 :(code)
125 void parse(istream& fin, program& out) {
126 vector<line> l;
127 trace(99, "parse") << "begin" << end();
128 while (has_data(fin)) {
129 string line_data;
130 line curr;
131 getline(fin, line_data);
132 curr.original = line_data;
133 trace(99, "parse") << "line: " << line_data << end();
134 istringstream lin(line_data);
135 while (has_data(lin)) {
136 string word_data;
137 lin >> word_data;
138 if (word_data.empty()) continue;
139 if (word_data[0] == '#') break;
140 if (word_data == ".") continue;
141 if (word_data == "==") {
142 if (!l.empty()) {
143 assert(!out.segments.empty());
144 trace(99, "parse") << "flushing to segment" << end();
145 out.segments.back().lines.swap(l);
146 }
147 segment s;
148 lin >> std::hex >> s.start;
149 trace(99, "parse") << "new segment from " << HEXWORD << s.start << end();
150 out.segments.push_back(s);
151
152 break;
153 }
154 if (word_data[0] == ':') {
155
156 break;
157 }
158 curr.words.push_back(word());
159 curr.words.back().original = word_data;
160 istringstream win(word_data);
161 if (getline(win, curr.words.back().data, '/')) {
162 string m;
163 while (getline(win, m, '/'))
164 curr.words.back().metadata.push_back(m);
165 }
166 trace(99, "parse") << "new word: " << curr.words.back().data << end();
167 }
168 if (!curr.words.empty())
169 l.push_back(curr);
170 }
171 if (!l.empty()) {
172 assert(!out.segments.empty());
173 trace(99, "parse") << "flushing to segment" << end();
174 out.segments.back().lines.swap(l);
175 }
176 trace(99, "parse") << "done" << end();
177 }
178
179
180
181 :(before "End Types")
182 typedef void (*transform_fn)(program&);
183 :(before "End Globals")
184 vector<transform_fn> Transform;
185
186 void transform(program& p) {
187 trace(99, "transform") << "begin" << end();
188 for (int t = 0; t < SIZE(Transform); ++t)
189 (*Transform.at(t))(p);
190 trace(99, "transform") << "done" << end();
191 }
192
193
194
195 void load(const program& p) {
196 trace(99, "load") << "begin" << end();
197 if (p.segments.empty()) {
198 raise << "no code to run\n" << end();
199 return;
200 }
201 for (int i = 0; i < SIZE(p.segments); ++i) {
202 const segment& seg = p.segments.at(i);
203 uint32_t addr = seg.start;
204 trace(99, "load") << "loading segment " << i << " from " << HEXWORD << addr << end();
205 for (int j = 0; j < SIZE(seg.lines); ++j) {
206 const line& l = seg.lines.at(j);
207 for (int k = 0; k < SIZE(l.words); ++k) {
208 const word& w = l.words.at(k);
209 uint8_t val = hex_byte(w.data);
210 if (trace_contains_errors()) return;
211 write_mem_u8(addr, val);
212 trace(99, "load") << "0x" << HEXWORD << addr << " -> " << HEXBYTE << NUM(read_mem_u8(addr)) << end();
213 ++addr;
214 }
215 }
216 if (i == 0) End_of_program = addr;
217 }
218 EIP = p.segments.at(0).start;
219 trace(99, "load") << "done" << end();
220 }
221
222 uint8_t hex_byte(const string& s) {
223 istringstream in(s);
224 int result = 0;
225 in >> std::hex >> result;
226 if (!in || !in.eof()) {
227 raise << "token '" << s << "' is not a hex byte\n" << end();
228 return '\0';
229 }
230 if (result > 0xff || result < -0x8f) {
231 raise << "token '" << s << "' is not a hex byte\n" << end();
232 return '\0';
233 }
234 return static_cast<uint8_t>(result);
235 }
236
237 :(scenarios parse_and_load)
238 :(scenario number_too_large)
239 % Hide_errors = true;
240 == 0x1
241 05 cab
242 +error: token 'cab' is not a hex byte
243
244 :(scenario invalid_hex)
245 % Hide_errors = true;
246 == 0x1
247 05 cx
248 +error: token 'cx' is not a hex byte
249
250 :(scenario negative_number)
251 == 0x1
252 05 -12
253 $error: 0
254
255 :(scenario negative_number_too_small)
256 % Hide_errors = true;
257 == 0x1
258 05 -12345
259 +error: token '-12345' is not a hex byte
260
261 :(scenario hex_prefix)
262 == 0x1
263 0x05 -0x12
264 $error: 0
265
266
267 :(code)
268 void parse_and_load(const string& text_bytes) {
269 program p;
270 istringstream in(text_bytes);
271 parse(in, p);
272 if (trace_contains_errors()) return;
273 load(p);
274 }
275
276
277
278 :(before "End Initialize Op Names(name)")
279 put(name, "05", "add imm32 to R0 (EAX)");
280
281
282 :(before "End Single-Byte Opcodes")
283 case 0x05: {
284 int32_t arg2 = imm32();
285 trace(90, "run") << "add imm32 0x" << HEXWORD << arg2 << " to reg EAX" << end();
286 BINARY_ARITHMETIC_OP(+, Reg[EAX].i, arg2);
287 break;
288 }
289
290 :(code)
291
292 int32_t imm32() {
293 int32_t result = next();
294 result |= (next()<<8);
295 result |= (next()<<16);
296 result |= (next()<<24);
297 return result;
298 }