diff options
Diffstat (limited to 'subx/011parse.cc')
-rw-r--r-- | subx/011parse.cc | 250 |
1 files changed, 0 insertions, 250 deletions
diff --git a/subx/011parse.cc b/subx/011parse.cc deleted file mode 100644 index 639299df..00000000 --- a/subx/011parse.cc +++ /dev/null @@ -1,250 +0,0 @@ -//: Loading programs into the VM. - -:(before "End Help Texts") -put(Help, "syntax", - "SubX programs consist of segments, each segment in turn consisting of lines.\n" - "Line-endings are significant; each line should contain a single\n" - "instruction, macro or directive.\n" - "\n" - "Comments start with the '#' character. It should be at the start of a word\n" - "(start of line, or following a space).\n" - "\n" - "Each segment starts with a header line: a '==' delimiter followed by the\n" - "starting address for the segment.\n" - "\n" - "The starting address for a segment has some finicky requirements. But just\n" - "start with a round number, and `subx` will try to guide you to a valid\n" - "configuration.\n" - "A good rule of thumb is to try to start the first segment at the default\n" - "address of 0x08048000, and to start each subsequent segment at least 0x1000\n" - "(most common page size) bytes after the last.\n" - "If a segment occupies than 0x1000 bytes you'll need to push subsequent\n" - "segments further down.\n" - "Currently only the first segment contains executable code (because it gets\n" - "annoying to have to change addresses in later segments every time an earlier\n" - "one changes length; one of those finicky requirements).\n" - "\n" - "Lines consist of a series of words. Words can contain arbitrary metadata\n" - "after a '/', but they can never contain whitespace. Metadata has no effect\n" - "at runtime, but can be handy when rewriting macros.\n" - "\n" - "Check out some examples in this directory (ex*.subx)\n" - "Programming in machine code can be annoying, but let's see if we can make\n" - "it nice enough to be able to write a compiler in it.\n" -); -:(before "End Help Contents") -cerr << " syntax\n"; - -:(scenario add_imm32_to_eax) -# At the lowest level, SubX programs are a series of hex bytes, each -# (variable-length) instruction on one line. -# -# Later we'll make things nicer using macros. But you'll always be able to -# insert hex bytes out of instructions. -# -# As you can see, comments start with '#' and are ignored. - -# Segment headers start with '==', specifying the hex address where they -# begin. The first segment is always assumed to be code. -== 0x1 - -# We don't show it here, but all lines can have metadata after a ':'. -# All words can have metadata after a '/'. No spaces allowed in word metadata, of course. -# Metadata doesn't directly form instructions, but some macros may look at it. -# Unrecognized metadata never causes errors, so you can also use it for -# documentation. - -# Within the code segment, x86 instructions consist of the following parts (see cheatsheet.pdf): -# opcode ModR/M SIB displacement immediate -# instruction mod, reg, Reg/Mem bits scale, index, base -# 1-3 bytes 0/1 byte 0/1 byte 0/1/2/4 bytes 0/1/2/4 bytes - 05 0a 0b 0c 0d # add 0x0d0c0b0a to EAX - -# This program, when run, causes the following events in the trace: -+load: 0x00000001 -> 05 -+load: 0x00000002 -> 0a -+load: 0x00000003 -> 0b -+load: 0x00000004 -> 0c -+load: 0x00000005 -> 0d -+run: add imm32 0x0d0c0b0a to reg EAX -+run: storing 0x0d0c0b0a - -:(code) -// top-level helper for scenarios: parse the input, transform any macros, load -// the final hex bytes into memory, run it -void run(const string& text_bytes) { - program p; - istringstream in(text_bytes); - parse(in, p); - if (trace_contains_errors()) return; // if any stage raises errors, stop immediately - transform(p); - if (trace_contains_errors()) return; - load(p); - if (trace_contains_errors()) return; - while (EIP < End_of_program) - run_one_instruction(); -} - -//:: core data structures - -:(before "End Types") -struct program { - vector<segment> segments; - // random ideas for other things we may eventually need - //map<name, address> globals; - //vector<recipe> recipes; - //map<string, type_info> types; -}; -:(before "struct program") -struct segment { - uint32_t start; - vector<line> lines; - segment() :start(0) {} -}; -:(before "struct segment") -struct line { - vector<word> words; - vector<string> metadata; -}; -:(before "struct line") -struct word { - string original; - string data; - vector<string> metadata; -}; - -//:: parse - -:(code) -void parse(istream& fin, program& out) { - vector<line> l; - while (has_data(fin)) { - string line_data; - getline(fin, line_data); - trace(99, "parse") << "line: " << line_data << end(); - istringstream lin(line_data); - vector<word> w; - while (has_data(lin)) { - string word_data; - lin >> word_data; - if (word_data.empty()) continue; - if (word_data == "==") { - if (!l.empty()) { - assert(!out.segments.empty()); - trace(99, "parse") << "flushing to segment" << end(); - out.segments.back().lines.swap(l); - } - segment s; - lin >> std::hex >> s.start; - trace(99, "parse") << "new segment from " << HEXWORD << s.start << end(); - out.segments.push_back(s); - // todo? - break; // skip rest of line - } - if (word_data[0] == ':') { - // todo: line metadata - break; - } - if (word_data[0] == '#') { - // comment - break; - } - w.push_back(word()); - w.back().original = word_data; - istringstream win(word_data); - if (getline(win, w.back().data, '/')) { - string m; - while (getline(win, m, '/')) - w.back().metadata.push_back(m); - } - trace(99, "parse") << "new word: " << w.back().data << end(); - } - if (!w.empty()) { - l.push_back(line()); - l.back().words.swap(w); - } - } - if (!l.empty()) { - assert(!out.segments.empty()); - trace(99, "parse") << "flushing to segment" << end(); - out.segments.back().lines.swap(l); - } -} - -//:: transform - -:(before "End Types") -typedef void (*transform_fn)(program&); -:(before "End Globals") -vector<transform_fn> Transform; - -void transform(program& p) { - for (int t = 0; t < SIZE(Transform); ++t) - (*Transform.at(t))(p); -} - -//:: load - -void load(const program& p) { - if (p.segments.empty()) { - raise << "no code to run\n" << end(); - return; - } - for (int i = 0; i < SIZE(p.segments); ++i) { - const segment& seg = p.segments.at(i); - uint32_t addr = seg.start; - trace(99, "load") << "loading segment " << i << " from " << HEXWORD << addr << end(); - for (int j = 0; j < SIZE(seg.lines); ++j) { - const line& l = seg.lines.at(j); - for (int k = 0; k < SIZE(l.words); ++k) { - const word& w = l.words.at(k); - uint8_t val = hex_byte(w.data); - if (trace_contains_errors()) return; - write_mem_u8(addr, val); - trace(99, "load") << "0x" << HEXWORD << addr << " -> " << HEXBYTE << NUM(read_mem_u8(addr)) << end(); - ++addr; - } - } - if (i == 0) End_of_program = addr; - } - EIP = p.segments.at(0).start; -} - -uint8_t hex_byte(const string& s) { - istringstream in(s); - int result = 0; - in >> std::hex >> result; - if (!in) { - raise << "invalid hex " << s << '\n' << end(); - return '\0'; - } - if (result > 0xff) { - raise << "invalid hex byte " << std::hex << result << '\n' << end(); - return '\0'; - } - return static_cast<uint8_t>(result); -} - -//:: run - -:(before "End Initialize Op Names(name)") -put(name, 0x05, "add imm32 to R0 (EAX)"); - -//: our first opcode -:(before "End Single-Byte Opcodes") -case 0x05: { // add imm32 to EAX - int32_t arg2 = imm32(); - trace(2, "run") << "add imm32 0x" << HEXWORD << arg2 << " to reg EAX" << end(); - BINARY_ARITHMETIC_OP(+, Reg[EAX].i, arg2); - break; -} - -:(code) -// read a 32-bit immediate in little-endian order from the instruction stream -int32_t imm32() { - int32_t result = next(); - result |= (next()<<8); - result |= (next()<<16); - result |= (next()<<24); - return result; -} |