From 4db6b370346d1a2fef2d35c1a64580e93e2bdf0d Mon Sep 17 00:00:00 2001 From: Kartik Agaram Date: Thu, 26 Jul 2018 15:51:29 -0700 Subject: 4425 Better name for a layer. --- subx/011parse.cc | 250 ------------------------------------------------------ subx/011run.cc | 253 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 253 insertions(+), 250 deletions(-) delete mode 100644 subx/011parse.cc create mode 100644 subx/011run.cc (limited to 'subx') diff --git a/subx/011parse.cc b/subx/011parse.cc deleted file mode 100644 index 639299df..00000000 --- a/subx/011parse.cc +++ /dev/null @@ -1,250 +0,0 @@ -//: Loading programs into the VM. - -:(before "End Help Texts") -put(Help, "syntax", - "SubX programs consist of segments, each segment in turn consisting of lines.\n" - "Line-endings are significant; each line should contain a single\n" - "instruction, macro or directive.\n" - "\n" - "Comments start with the '#' character. It should be at the start of a word\n" - "(start of line, or following a space).\n" - "\n" - "Each segment starts with a header line: a '==' delimiter followed by the\n" - "starting address for the segment.\n" - "\n" - "The starting address for a segment has some finicky requirements. But just\n" - "start with a round number, and `subx` will try to guide you to a valid\n" - "configuration.\n" - "A good rule of thumb is to try to start the first segment at the default\n" - "address of 0x08048000, and to start each subsequent segment at least 0x1000\n" - "(most common page size) bytes after the last.\n" - "If a segment occupies than 0x1000 bytes you'll need to push subsequent\n" - "segments further down.\n" - "Currently only the first segment contains executable code (because it gets\n" - "annoying to have to change addresses in later segments every time an earlier\n" - "one changes length; one of those finicky requirements).\n" - "\n" - "Lines consist of a series of words. Words can contain arbitrary metadata\n" - "after a '/', but they can never contain whitespace. Metadata has no effect\n" - "at runtime, but can be handy when rewriting macros.\n" - "\n" - "Check out some examples in this directory (ex*.subx)\n" - "Programming in machine code can be annoying, but let's see if we can make\n" - "it nice enough to be able to write a compiler in it.\n" -); -:(before "End Help Contents") -cerr << " syntax\n"; - -:(scenario add_imm32_to_eax) -# At the lowest level, SubX programs are a series of hex bytes, each -# (variable-length) instruction on one line. -# -# Later we'll make things nicer using macros. But you'll always be able to -# insert hex bytes out of instructions. -# -# As you can see, comments start with '#' and are ignored. - -# Segment headers start with '==', specifying the hex address where they -# begin. The first segment is always assumed to be code. -== 0x1 - -# We don't show it here, but all lines can have metadata after a ':'. -# All words can have metadata after a '/'. No spaces allowed in word metadata, of course. -# Metadata doesn't directly form instructions, but some macros may look at it. -# Unrecognized metadata never causes errors, so you can also use it for -# documentation. - -# Within the code segment, x86 instructions consist of the following parts (see cheatsheet.pdf): -# opcode ModR/M SIB displacement immediate -# instruction mod, reg, Reg/Mem bits scale, index, base -# 1-3 bytes 0/1 byte 0/1 byte 0/1/2/4 bytes 0/1/2/4 bytes - 05 0a 0b 0c 0d # add 0x0d0c0b0a to EAX - -# This program, when run, causes the following events in the trace: -+load: 0x00000001 -> 05 -+load: 0x00000002 -> 0a -+load: 0x00000003 -> 0b -+load: 0x00000004 -> 0c -+load: 0x00000005 -> 0d -+run: add imm32 0x0d0c0b0a to reg EAX -+run: storing 0x0d0c0b0a - -:(code) -// top-level helper for scenarios: parse the input, transform any macros, load -// the final hex bytes into memory, run it -void run(const string& text_bytes) { - program p; - istringstream in(text_bytes); - parse(in, p); - if (trace_contains_errors()) return; // if any stage raises errors, stop immediately - transform(p); - if (trace_contains_errors()) return; - load(p); - if (trace_contains_errors()) return; - while (EIP < End_of_program) - run_one_instruction(); -} - -//:: core data structures - -:(before "End Types") -struct program { - vector segments; - // random ideas for other things we may eventually need - //map globals; - //vector recipes; - //map types; -}; -:(before "struct program") -struct segment { - uint32_t start; - vector lines; - segment() :start(0) {} -}; -:(before "struct segment") -struct line { - vector words; - vector metadata; -}; -:(before "struct line") -struct word { - string original; - string data; - vector metadata; -}; - -//:: parse - -:(code) -void parse(istream& fin, program& out) { - vector l; - while (has_data(fin)) { - string line_data; - getline(fin, line_data); - trace(99, "parse") << "line: " << line_data << end(); - istringstream lin(line_data); - vector w; - while (has_data(lin)) { - string word_data; - lin >> word_data; - if (word_data.empty()) continue; - if (word_data == "==") { - if (!l.empty()) { - assert(!out.segments.empty()); - trace(99, "parse") << "flushing to segment" << end(); - out.segments.back().lines.swap(l); - } - segment s; - lin >> std::hex >> s.start; - trace(99, "parse") << "new segment from " << HEXWORD << s.start << end(); - out.segments.push_back(s); - // todo? - break; // skip rest of line - } - if (word_data[0] == ':') { - // todo: line metadata - break; - } - if (word_data[0] == '#') { - // comment - break; - } - w.push_back(word()); - w.back().original = word_data; - istringstream win(word_data); - if (getline(win, w.back().data, '/')) { - string m; - while (getline(win, m, '/')) - w.back().metadata.push_back(m); - } - trace(99, "parse") << "new word: " << w.back().data << end(); - } - if (!w.empty()) { - l.push_back(line()); - l.back().words.swap(w); - } - } - if (!l.empty()) { - assert(!out.segments.empty()); - trace(99, "parse") << "flushing to segment" << end(); - out.segments.back().lines.swap(l); - } -} - -//:: transform - -:(before "End Types") -typedef void (*transform_fn)(program&); -:(before "End Globals") -vector Transform; - -void transform(program& p) { - for (int t = 0; t < SIZE(Transform); ++t) - (*Transform.at(t))(p); -} - -//:: load - -void load(const program& p) { - if (p.segments.empty()) { - raise << "no code to run\n" << end(); - return; - } - for (int i = 0; i < SIZE(p.segments); ++i) { - const segment& seg = p.segments.at(i); - uint32_t addr = seg.start; - trace(99, "load") << "loading segment " << i << " from " << HEXWORD << addr << end(); - for (int j = 0; j < SIZE(seg.lines); ++j) { - const line& l = seg.lines.at(j); - for (int k = 0; k < SIZE(l.words); ++k) { - const word& w = l.words.at(k); - uint8_t val = hex_byte(w.data); - if (trace_contains_errors()) return; - write_mem_u8(addr, val); - trace(99, "load") << "0x" << HEXWORD << addr << " -> " << HEXBYTE << NUM(read_mem_u8(addr)) << end(); - ++addr; - } - } - if (i == 0) End_of_program = addr; - } - EIP = p.segments.at(0).start; -} - -uint8_t hex_byte(const string& s) { - istringstream in(s); - int result = 0; - in >> std::hex >> result; - if (!in) { - raise << "invalid hex " << s << '\n' << end(); - return '\0'; - } - if (result > 0xff) { - raise << "invalid hex byte " << std::hex << result << '\n' << end(); - return '\0'; - } - return static_cast(result); -} - -//:: run - -:(before "End Initialize Op Names(name)") -put(name, 0x05, "add imm32 to R0 (EAX)"); - -//: our first opcode -:(before "End Single-Byte Opcodes") -case 0x05: { // add imm32 to EAX - int32_t arg2 = imm32(); - trace(2, "run") << "add imm32 0x" << HEXWORD << arg2 << " to reg EAX" << end(); - BINARY_ARITHMETIC_OP(+, Reg[EAX].i, arg2); - break; -} - -:(code) -// read a 32-bit immediate in little-endian order from the instruction stream -int32_t imm32() { - int32_t result = next(); - result |= (next()<<8); - result |= (next()<<16); - result |= (next()<<24); - return result; -} diff --git a/subx/011run.cc b/subx/011run.cc new file mode 100644 index 00000000..906a8305 --- /dev/null +++ b/subx/011run.cc @@ -0,0 +1,253 @@ +//: Running SubX programs on the VM. + +//: (Not to be confused with the 'run' subcommand for running ELF binaries on +//: the VM. That comes later.) + +:(before "End Help Texts") +put(Help, "syntax", + "SubX programs consist of segments, each segment in turn consisting of lines.\n" + "Line-endings are significant; each line should contain a single\n" + "instruction, macro or directive.\n" + "\n" + "Comments start with the '#' character. It should be at the start of a word\n" + "(start of line, or following a space).\n" + "\n" + "Each segment starts with a header line: a '==' delimiter followed by the\n" + "starting address for the segment.\n" + "\n" + "The starting address for a segment has some finicky requirements. But just\n" + "start with a round number, and `subx` will try to guide you to a valid\n" + "configuration.\n" + "A good rule of thumb is to try to start the first segment at the default\n" + "address of 0x08048000, and to start each subsequent segment at least 0x1000\n" + "(most common page size) bytes after the last.\n" + "If a segment occupies than 0x1000 bytes you'll need to push subsequent\n" + "segments further down.\n" + "Currently only the first segment contains executable code (because it gets\n" + "annoying to have to change addresses in later segments every time an earlier\n" + "one changes length; one of those finicky requirements).\n" + "\n" + "Lines consist of a series of words. Words can contain arbitrary metadata\n" + "after a '/', but they can never contain whitespace. Metadata has no effect\n" + "at runtime, but can be handy when rewriting macros.\n" + "\n" + "Check out some examples in this directory (ex*.subx)\n" + "Programming in machine code can be annoying, but let's see if we can make\n" + "it nice enough to be able to write a compiler in it.\n" +); +:(before "End Help Contents") +cerr << " syntax\n"; + +:(scenario add_imm32_to_eax) +# At the lowest level, SubX programs are a series of hex bytes, each +# (variable-length) instruction on one line. +# +# Later we'll make things nicer using macros. But you'll always be able to +# insert hex bytes out of instructions. +# +# As you can see, comments start with '#' and are ignored. + +# Segment headers start with '==', specifying the hex address where they +# begin. The first segment is always assumed to be code. +== 0x1 + +# We don't show it here, but all lines can have metadata after a ':'. +# All words can have metadata after a '/'. No spaces allowed in word metadata, of course. +# Metadata doesn't directly form instructions, but some macros may look at it. +# Unrecognized metadata never causes errors, so you can also use it for +# documentation. + +# Within the code segment, x86 instructions consist of the following parts (see cheatsheet.pdf): +# opcode ModR/M SIB displacement immediate +# instruction mod, reg, Reg/Mem bits scale, index, base +# 1-3 bytes 0/1 byte 0/1 byte 0/1/2/4 bytes 0/1/2/4 bytes + 05 0a 0b 0c 0d # add 0x0d0c0b0a to EAX + +# This program, when run, causes the following events in the trace: ++load: 0x00000001 -> 05 ++load: 0x00000002 -> 0a ++load: 0x00000003 -> 0b ++load: 0x00000004 -> 0c ++load: 0x00000005 -> 0d ++run: add imm32 0x0d0c0b0a to reg EAX ++run: storing 0x0d0c0b0a + +:(code) +// top-level helper for scenarios: parse the input, transform any macros, load +// the final hex bytes into memory, run it +void run(const string& text_bytes) { + program p; + istringstream in(text_bytes); + parse(in, p); + if (trace_contains_errors()) return; // if any stage raises errors, stop immediately + transform(p); + if (trace_contains_errors()) return; + load(p); + if (trace_contains_errors()) return; + while (EIP < End_of_program) + run_one_instruction(); +} + +//:: core data structures + +:(before "End Types") +struct program { + vector segments; + // random ideas for other things we may eventually need + //map globals; + //vector recipes; + //map types; +}; +:(before "struct program") +struct segment { + uint32_t start; + vector lines; + segment() :start(0) {} +}; +:(before "struct segment") +struct line { + vector words; + vector metadata; +}; +:(before "struct line") +struct word { + string original; + string data; + vector metadata; +}; + +//:: parse + +:(code) +void parse(istream& fin, program& out) { + vector l; + while (has_data(fin)) { + string line_data; + getline(fin, line_data); + trace(99, "parse") << "line: " << line_data << end(); + istringstream lin(line_data); + vector w; + while (has_data(lin)) { + string word_data; + lin >> word_data; + if (word_data.empty()) continue; + if (word_data == "==") { + if (!l.empty()) { + assert(!out.segments.empty()); + trace(99, "parse") << "flushing to segment" << end(); + out.segments.back().lines.swap(l); + } + segment s; + lin >> std::hex >> s.start; + trace(99, "parse") << "new segment from " << HEXWORD << s.start << end(); + out.segments.push_back(s); + // todo? + break; // skip rest of line + } + if (word_data[0] == ':') { + // todo: line metadata + break; + } + if (word_data[0] == '#') { + // comment + break; + } + w.push_back(word()); + w.back().original = word_data; + istringstream win(word_data); + if (getline(win, w.back().data, '/')) { + string m; + while (getline(win, m, '/')) + w.back().metadata.push_back(m); + } + trace(99, "parse") << "new word: " << w.back().data << end(); + } + if (!w.empty()) { + l.push_back(line()); + l.back().words.swap(w); + } + } + if (!l.empty()) { + assert(!out.segments.empty()); + trace(99, "parse") << "flushing to segment" << end(); + out.segments.back().lines.swap(l); + } +} + +//:: transform + +:(before "End Types") +typedef void (*transform_fn)(program&); +:(before "End Globals") +vector Transform; + +void transform(program& p) { + for (int t = 0; t < SIZE(Transform); ++t) + (*Transform.at(t))(p); +} + +//:: load + +void load(const program& p) { + if (p.segments.empty()) { + raise << "no code to run\n" << end(); + return; + } + for (int i = 0; i < SIZE(p.segments); ++i) { + const segment& seg = p.segments.at(i); + uint32_t addr = seg.start; + trace(99, "load") << "loading segment " << i << " from " << HEXWORD << addr << end(); + for (int j = 0; j < SIZE(seg.lines); ++j) { + const line& l = seg.lines.at(j); + for (int k = 0; k < SIZE(l.words); ++k) { + const word& w = l.words.at(k); + uint8_t val = hex_byte(w.data); + if (trace_contains_errors()) return; + write_mem_u8(addr, val); + trace(99, "load") << "0x" << HEXWORD << addr << " -> " << HEXBYTE << NUM(read_mem_u8(addr)) << end(); + ++addr; + } + } + if (i == 0) End_of_program = addr; + } + EIP = p.segments.at(0).start; +} + +uint8_t hex_byte(const string& s) { + istringstream in(s); + int result = 0; + in >> std::hex >> result; + if (!in) { + raise << "invalid hex " << s << '\n' << end(); + return '\0'; + } + if (result > 0xff) { + raise << "invalid hex byte " << std::hex << result << '\n' << end(); + return '\0'; + } + return static_cast(result); +} + +//:: run + +:(before "End Initialize Op Names(name)") +put(name, 0x05, "add imm32 to R0 (EAX)"); + +//: our first opcode +:(before "End Single-Byte Opcodes") +case 0x05: { // add imm32 to EAX + int32_t arg2 = imm32(); + trace(2, "run") << "add imm32 0x" << HEXWORD << arg2 << " to reg EAX" << end(); + BINARY_ARITHMETIC_OP(+, Reg[EAX].i, arg2); + break; +} + +:(code) +// read a 32-bit immediate in little-endian order from the instruction stream +int32_t imm32() { + int32_t result = next(); + result |= (next()<<8); + result |= (next()<<16); + result |= (next()<<24); + return result; +} -- cgit 1.4.1-2-gfad0