1 //: Running SubX programs on the VM.
  2 
  3 //: (Not to be confused with the 'run' subcommand for running ELF binaries on
  4 //: the VM. That comes later.)
  5 
  6 :(before "End Help Texts")
  7 put(Help, "syntax",
  8   "SubX programs consist of segments, each segment in turn consisting of lines.\n"
  9   "Line-endings are significant; each line should contain a single\n"
 10   "instruction, macro or directive.\n"
 11   "\n"
 12   "Comments start with the '#' character. It should be at the start of a word\n"
 13   "(start of line, or following a space).\n"
 14   "\n"
 15   "Each segment starts with a header line: a '==' delimiter followed by the\n"
 16   "starting address for the segment.\n"
 17   "\n"
 18   "The starting address for a segment has some finicky requirements. But just\n"
 19   "start with a round number, and `subx` will try to guide you to a valid\n"
 20   "configuration.\n"
 21   "A good rule of thumb is to try to start the first segment at the default\n"
 22   "address of 0x08048000, and to start each subsequent segment at least 0x1000\n"
 23   "(most common page size) bytes after the last.\n"
 24   "If a segment occupies than 0x1000 bytes you'll need to push subsequent\n"
 25   "segments further down.\n"
 26   "Currently only the first segment contains executable code (because it gets\n"
 27   "annoying to have to change addresses in later segments every time an earlier\n"
 28   "one changes length; one of those finicky requirements).\n"
 29   "\n"
 30   "Lines consist of a series of words. Words can contain arbitrary metadata\n"
 31   "after a '/', but they can never contain whitespace. Metadata has no effect\n"
 32   "at runtime, but can be handy when rewriting macros.\n"
 33   "\n"
 34   "Check out some examples in this directory (ex*.subx)\n"
 35   "Programming in machine code can be annoying, but let's see if we can make\n"
 36   "it nice enough to be able to write a compiler in it.\n"
 37 );
 38 :(before "End Help Contents")
 39 cerr << "  syntax\n";
 40 
 41 :(scenario add_imm32_to_eax)
 42 # At the lowest level, SubX programs are a series of hex bytes, each
 43 # (variable-length) instruction on one line.
 44 #
 45 # Later we'll make things nicer using macros. But you'll always be able to
 46 # insert hex bytes out of instructions.
 47 #
 48 # As you can see, comments start with '#' and are ignored.
 49 
 50 # Segment headers start with '==', specifying the hex address where they
 51 # begin. The first segment is always assumed to be code.
 52 == 0x1
 53 
 54 # We don't show it here, but all lines can have metadata after a ':'.
 55 # All words can have metadata after a '/'. No spaces allowed in word metadata, of course.
 56 # Metadata doesn't directly form instructions, but some macros may look at it.
 57 # Unrecognized metadata never causes errors, so you can also use it for
 58 # documentation.
 59 
 60 # Within the code segment, x86 instructions consist of the following parts (see cheatsheet.pdf):
 61 #   opcode        ModR/M                    SIB                   displacement    immediate
 62 #   instruction   mod, reg, Reg/Mem bits    scale, index, base
 63 #   1-3 bytes     0/1 byte                  0/1 byte              0/1/2/4 bytes   0/1/2/4 bytes
 64     05            .                         .                     .               0a 0b 0c 0d  # add 0x0d0c0b0a to EAX
 65 # (The single periods are just to help the eye track long gaps between
 66 # columns, and are otherwise ignored.)
 67 
 68 # This program, when run, causes the following events in the trace:
 69 +load: 0x00000001 -> 05
 70 +load: 0x00000002 -> 0a
 71 +load: 0x00000003 -> 0b
 72 +load: 0x00000004 -> 0c
 73 +load: 0x00000005 -> 0d
 74 +run: add imm32 0x0d0c0b0a to reg EAX
 75 +run: storing 0x0d0c0b0a
 76 
 77 :(code)
 78 // top-level helper for scenarios: parse the input, transform any macros, load
 79 // the final hex bytes into memory, run it
 80 void run(const string& text_bytes) {
 81   program p;
 82   istringstream in(text_bytes);
 83   parse(in, p);
 84   if (trace_contains_errors()) return;  // if any stage raises errors, stop immediately
 85   transform(p);
 86   if (trace_contains_errors()) return;
 87   load(p);
 88   if (trace_contains_errors()) return;
 89   while (EIP < End_of_program)
 90     run_one_instruction();
 91 }
 92 
 93 //:: core data structures
 94 
 95 :(before "End Types")
 96 struct program {
 97   vector<segment> segments;
 98   // random ideas for other things we may eventually need
 99   //map<name, address> globals;
100   //vector<recipe> recipes;
101   //map<string, type_info> types;
102 };
103 :(before "struct program")
104 struct segment {
105   uint32_t start;
106   vector<line> lines;
107   segment() :start(0) {}
108 };
109 :(before "struct segment")
110 struct line {
111   vector<word> words;
112   vector<string> metadata;
113 };
114 :(before "struct line")
115 struct word {
116   string original;
117   string data;
118   vector<string> metadata;
119 };
120 
121 //:: parse
122 
123 :(code)
124 void parse(istream& fin, program& out) {
125   vector<line> l;
126   trace(99, "parse") << "begin" << end();
127   while (has_data(fin)) {
128     string line_data;
129     getline(fin, line_data);
130     trace(99, "parse") << "line: " << line_data << end();
131     istringstream lin(line_data);
132     vector<word> w;
133     while (has_data(lin)) {
134       string word_data;
135       lin >> word_data;
136       if (word_data.empty()) continue;
137       if (word_data[0] == '#') break;  // comment
138       if (word_data == ".") continue;  // comment token
139       if (word_data == "==") {
140         if (!l.empty()) {
141           assert(!out.segments.empty());
142           trace(99, "parse") << "flushing to segment" << end();
143           out.segments.back().lines.swap(l);
144         }
145         segment s;
146         lin >> std::hex >> s.start;
147         trace(99, "parse") << "new segment from " << HEXWORD << s.start << end();
148         out.segments.push_back(s);
149         // todo?
150         break;  // skip rest of line
151       }
152       if (word_data[0] == ':') {
153         // todo: line metadata
154         break;
155       }
156       w.push_back(word());
157       w.back().original = word_data;
158       istringstream win(word_data);
159       if (getline(win, w.back().data, '/')) {
160         string m;
161         while (getline(win, m, '/'))
162           w.back().metadata.push_back(m);
163       }
164       trace(99, "parse") << "new word: " << w.back().data << end();
165     }
166     if (!w.empty()) {
167       l.push_back(line());
168       l.back().words.swap(w);
169     }
170   }
171   if (!l.empty()) {
172     assert(!out.segments.empty());
173     trace(99, "parse") << "flushing to segment" << end();
174     out.segments.back().lines.swap(l);
175   }
176   trace(99, "parse") << "done" << end();
177 }
178 
179 //:: transform
180 
181 :(before "End Types")
182 typedef void (*transform_fn)(program&);
183 :(before "End Globals")
184 vector<transform_fn> Transform;
185 
186 void transform(program& p) {
187   trace(99, "transform") << "begin" << end();
188   for (int t = 0;  t < SIZE(Transform);  ++t)
189     (*Transform.at(t))(p);
190   trace(99, "transform") << "done" << end();
191 }
192 
193 //:: load
194 
195 void load(const program& p) {
196   trace(99, "load") << "begin" << end();
197   if (p.segments.empty()) {
198     raise << "no code to run\n" << end();
199     return;
200   }
201   for (int i = 0;   i < SIZE(p.segments);  ++i) {
202     const segment& seg = p.segments.at(i);
203     uint32_t addr = seg.start;
204     trace(99, "load") << "loading segment " << i << " from " << HEXWORD << addr << end();
205     for (int j = 0;  j < SIZE(seg.lines);  ++j) {
206       const line& l = seg.lines.at(j);
207       for (int k = 0;  k < SIZE(l.words);  ++k) {
208         const word& w = l.words.at(k);
209         uint8_t val = hex_byte(w.data);
210         if (trace_contains_errors()) return;
211         write_mem_u8(addr, val);
212         trace(99, "load") << "0x" << HEXWORD << addr << " -> " << HEXBYTE << NUM(read_mem_u8(addr)) << end();
213         ++addr;
214       }
215     }
216     if (i == 0) End_of_program = addr;
217   }
218   EIP = p.segments.at(0).start;
219   trace(99, "load") << "done" << end();
220 }
221 
222 uint8_t hex_byte(const string& s) {
223   istringstream in(s);
224   int result = 0;
225   in >> std::hex >> result;
226   if (!in) {
227     raise << "invalid hex " << s << '\n' << end();
228     return '\0';
229   }
230   if (result > 0xff) {
231     raise << "invalid hex byte " << std::hex << result << '\n' << end();
232     return '\0';
233   }
234   return static_cast<uint8_t>(result);
235 }
236 
237 //:: run
238 
239 :(before "End Initialize Op Names(name)")
240 put(name, "05", "add imm32 to R0 (EAX)");
241 
242 //: our first opcode
243 :(before "End Single-Byte Opcodes")
244 case 0x05: {  // add imm32 to EAX
245   int32_t arg2 = imm32();
246   trace(90, "run") << "add imm32 0x" << HEXWORD << arg2 << " to reg EAX" << end();
247   BINARY_ARITHMETIC_OP(+, Reg[EAX].i, arg2);
248   break;
249 }
250 
251 :(code)
252 // read a 32-bit immediate in little-endian order from the instruction stream
253 int32_t imm32() {
254   int32_t result = next();
255   result |= (next()<<8);
256   result |= (next()<<16);
257   result |= (next()<<24);
258   return result;
259 }