1
2
3
4
5
6 :(before "End Help Texts")
7 put(Help, "syntax",
8 "SubX programs consist of segments, each segment in turn consisting of lines.\n"
9 "Line-endings are significant; each line should contain a single\n"
10 "instruction, macro or directive.\n"
11 "\n"
12 "Comments start with the '#' character. It should be at the start of a word\n"
13 "(start of line, or following a space).\n"
14 "\n"
15 "Each segment starts with a header line: a '==' delimiter followed by the\n"
16 "starting address for the segment.\n"
17 "\n"
18 "The starting address for a segment has some finicky requirements. But just\n"
19 "start with a round number, and `subx` will try to guide you to a valid\n"
20 "configuration.\n"
21 "A good rule of thumb is to try to start the first segment at the default\n"
22 "address of 0x08048000, and to start each subsequent segment at least 0x1000\n"
23 "(most common page size) bytes after the last.\n"
24 "If a segment occupies than 0x1000 bytes you'll need to push subsequent\n"
25 "segments further down.\n"
26 "Currently only the first segment contains executable code (because it gets\n"
27 "annoying to have to change addresses in later segments every time an earlier\n"
28 "one changes length; one of those finicky requirements).\n"
29 "\n"
30 "Lines consist of a series of words. Words can contain arbitrary metadata\n"
31 "after a '/', but they can never contain whitespace. Metadata has no effect\n"
32 "at runtime, but can be handy when rewriting macros.\n"
33 "\n"
34 "Check out some examples in this directory (ex*.subx)\n"
35 "Programming in machine code can be annoying, but let's see if we can make\n"
36 "it nice enough to be able to write a compiler in it.\n"
37 );
38 :(before "End Help Contents")
39 cerr << " syntax\n";
40
41 :(scenario add_imm32_to_eax)
42
43
44
45
46
47
48
49
50
51
52 == 0x1
53
54
55
56
57
58
59
60
61
62
63
64 05 . . . 0a 0b 0c 0d
65
66
67
68
69 +load: 0x00000001 -> 05
70 +load: 0x00000002 -> 0a
71 +load: 0x00000003 -> 0b
72 +load: 0x00000004 -> 0c
73 +load: 0x00000005 -> 0d
74 +run: add imm32 0x0d0c0b0a to reg EAX
75 +run: storing 0x0d0c0b0a
76
77 :(code)
78
79
80 void run(const string& text_bytes) {
81 program p;
82 istringstream in(text_bytes);
83 parse(in, p);
84 if (trace_contains_errors()) return;
85 transform(p);
86 if (trace_contains_errors()) return;
87 load(p);
88 if (trace_contains_errors()) return;
89 while (EIP < End_of_program)
90 run_one_instruction();
91 }
92
93
94
95 :(before "End Types")
96 struct program {
97 vector<segment> segments;
98
99
100
101
102 };
103 :(before "struct program")
104 struct segment {
105 uint32_t start;
106 vector<line> lines;
107 segment() :start(0) {}
108 };
109 :(before "struct segment")
110 struct line {
111 vector<word> words;
112 vector<string> metadata;
113 };
114 :(before "struct line")
115 struct word {
116 string original;
117 string data;
118 vector<string> metadata;
119 };
120
121
122
123 :(code)
124 void parse(istream& fin, program& out) {
125 vector<line> l;
126 trace(99, "parse") << "begin" << end();
127 while (has_data(fin)) {
128 string line_data;
129 getline(fin, line_data);
130 trace(99, "parse") << "line: " << line_data << end();
131 istringstream lin(line_data);
132 vector<word> w;
133 while (has_data(lin)) {
134 string word_data;
135 lin >> word_data;
136 if (word_data.empty()) continue;
137 if (word_data[0] == '#') break;
138 if (word_data == ".") continue;
139 if (word_data == "==") {
140 if (!l.empty()) {
141 assert(!out.segments.empty());
142 trace(99, "parse") << "flushing to segment" << end();
143 out.segments.back().lines.swap(l);
144 }
145 segment s;
146 lin >> std::hex >> s.start;
147 trace(99, "parse") << "new segment from " << HEXWORD << s.start << end();
148 out.segments.push_back(s);
149
150 break;
151 }
152 if (word_data[0] == ':') {
153
154 break;
155 }
156 w.push_back(word());
157 w.back().original = word_data;
158 istringstream win(word_data);
159 if (getline(win, w.back().data, '/')) {
160 string m;
161 while (getline(win, m, '/'))
162 w.back().metadata.push_back(m);
163 }
164 trace(99, "parse") << "new word: " << w.back().data << end();
165 }
166 if (!w.empty()) {
167 l.push_back(line());
168 l.back().words.swap(w);
169 }
170 }
171 if (!l.empty()) {
172 assert(!out.segments.empty());
173 trace(99, "parse") << "flushing to segment" << end();
174 out.segments.back().lines.swap(l);
175 }
176 trace(99, "parse") << "done" << end();
177 }
178
179
180
181 :(before "End Types")
182 typedef void (*transform_fn)(program&);
183 :(before "End Globals")
184 vector<transform_fn> Transform;
185
186 void transform(program& p) {
187 trace(99, "transform") << "begin" << end();
188 for (int t = 0; t < SIZE(Transform); ++t)
189 (*Transform.at(t))(p);
190 trace(99, "transform") << "done" << end();
191 }
192
193
194
195 void load(const program& p) {
196 trace(99, "load") << "begin" << end();
197 if (p.segments.empty()) {
198 raise << "no code to run\n" << end();
199 return;
200 }
201 for (int i = 0; i < SIZE(p.segments); ++i) {
202 const segment& seg = p.segments.at(i);
203 uint32_t addr = seg.start;
204 trace(99, "load") << "loading segment " << i << " from " << HEXWORD << addr << end();
205 for (int j = 0; j < SIZE(seg.lines); ++j) {
206 const line& l = seg.lines.at(j);
207 for (int k = 0; k < SIZE(l.words); ++k) {
208 const word& w = l.words.at(k);
209 uint8_t val = hex_byte(w.data);
210 if (trace_contains_errors()) return;
211 write_mem_u8(addr, val);
212 trace(99, "load") << "0x" << HEXWORD << addr << " -> " << HEXBYTE << NUM(read_mem_u8(addr)) << end();
213 ++addr;
214 }
215 }
216 if (i == 0) End_of_program = addr;
217 }
218 EIP = p.segments.at(0).start;
219 trace(99, "load") << "done" << end();
220 }
221
222 uint8_t hex_byte(const string& s) {
223 istringstream in(s);
224 int result = 0;
225 in >> std::hex >> result;
226 if (!in) {
227 raise << "invalid hex " << s << '\n' << end();
228 return '\0';
229 }
230 if (result > 0xff) {
231 raise << "invalid hex byte " << std::hex << result << '\n' << end();
232 return '\0';
233 }
234 return static_cast<uint8_t>(result);
235 }
236
237
238
239 :(before "End Initialize Op Names(name)")
240 put(name, "05", "add imm32 to R0 (EAX)");
241
242
243 :(before "End Single-Byte Opcodes")
244 case 0x05: {
245 int32_t arg2 = imm32();
246 trace(90, "run") << "add imm32 0x" << HEXWORD << arg2 << " to reg EAX" << end();
247 BINARY_ARITHMETIC_OP(+, Reg[EAX].i, arg2);
248 break;
249 }
250
251 :(code)
252
253 int32_t imm32() {
254 int32_t result = next();
255 result |= (next()<<8);
256 result |= (next()<<16);
257 result |= (next()<<24);
258 return result;
259 }