diff options
Diffstat (limited to 'subx')
-rw-r--r-- | subx/010vm.cc (renamed from subx/010core.cc) | 122 | ||||
-rw-r--r-- | subx/011parse.cc | 209 | ||||
-rw-r--r-- | subx/012direct_addressing.cc | 26 | ||||
-rw-r--r-- | subx/013indirect_addressing.cc | 53 | ||||
-rw-r--r-- | subx/014immediate_addressing.cc | 18 | ||||
-rw-r--r-- | subx/016jump_relative.cc | 14 | ||||
-rw-r--r-- | subx/017jump_relative.cc | 14 | ||||
-rw-r--r-- | subx/018functions.cc | 2 | ||||
-rw-r--r-- | subx/020elf.cc | 3 | ||||
-rw-r--r-- | subx/021translate.cc | 127 | ||||
-rw-r--r-- | subx/022transform_immediate.cc | 186 | ||||
-rw-r--r-- | subx/ex1.1.subx | 1 | ||||
-rw-r--r-- | subx/ex1.2.subx | 1 | ||||
-rw-r--r-- | subx/ex2.subx | 1 | ||||
-rw-r--r-- | subx/ex3.subx | 1 | ||||
-rw-r--r-- | subx/ex4.subx | 1 |
16 files changed, 489 insertions, 290 deletions
diff --git a/subx/010core.cc b/subx/010vm.cc index 67fff68f..17c4e064 100644 --- a/subx/010core.cc +++ b/subx/010vm.cc @@ -1,6 +1,8 @@ -//:: simulated x86 registers; just a subset -//: assume segment registers are hard-coded to 0 -//: no floating-point, MMX, etc. yet +//: Core data structures for simulating the SubX VM (subset of an x86 processor) + +//:: registers +//: assume segment registers are hard-coded to 0 +//: no floating-point, MMX, etc. yet :(before "End Types") enum { @@ -25,9 +27,8 @@ uint32_t EIP = 1; // preserve null pointer bzero(Reg, sizeof(Reg)); EIP = 1; // preserve null pointer -//:: simulated flag registers; just a subset that we care about - :(before "End Globals") +// the subset of x86 flag registers we care about bool SF = false; // sign flag bool ZF = false; // zero flag bool OF = false; // overflow flag @@ -116,36 +117,7 @@ inline void write_mem_i32(uint32_t addr, int32_t val) { //:: core interpreter loop -:(scenario add_imm32_to_eax) -# In scenarios, programs are a series of hex bytes, each (variable-length) -# instruction on one line. -# -# x86 instructions consist of the following parts (see cheatsheet.pdf): -# opcode ModR/M SIB displacement immediate -# instruction mod, reg, Reg/Mem bits scale, index, base -# 1-3 bytes 0/1 byte 0/1 byte 0/1/2/4 bytes 0/1/2/4 bytes - 05 0a 0b 0c 0d # add 0x0d0c0b0a to EAX -# All hex bytes must be exactly 2 characters each. No '0x' prefixes. -+load: 1 -> 05 -+load: 2 -> 0a -+load: 3 -> 0b -+load: 4 -> 0c -+load: 5 -> 0d -+run: add imm32 0x0d0c0b0a to reg EAX -+run: storing 0x0d0c0b0a - :(code) -// helper for tests: load a program into memory from a textual representation -// of its bytes, and run it -void run(string text_bytes) { - // Begin run() For Scenarios -//? cerr << text_bytes << '\n'; - load_program(text_bytes); - EIP = 1; // preserve null pointer - while (EIP < End_of_program) - run_one_instruction(); -} - // skeleton of how x86 instructions are decoded void run_one_instruction() { uint8_t op=0, op2=0, op3=0; @@ -155,13 +127,6 @@ void run_one_instruction() { case 0xf4: // hlt EIP = End_of_program; break; - // our first opcode - case 0x05: { // add imm32 to EAX - int32_t arg2 = imm32(); - trace(2, "run") << "add imm32 0x" << HEXWORD << arg2 << " to reg EAX" << end(); - BINARY_ARITHMETIC_OP(+, Reg[EAX].i, arg2); - break; - } // End Single-Byte Opcodes case 0x0f: switch(op2 = next()) { @@ -193,85 +158,10 @@ void run_one_instruction() { } } -// Load regions of memory (called 'segments') with given hex values based on -// '-- ' section headers. -void load_program(const string& text_bytes) { - istringstream in(text_bytes); - load_program(in); -} -void load_program(istream& in) { - uint32_t addr = 1; // preserve null pointer - int segment_index = 0; - while (has_data(in)) { - string line_data; - getline(in, line_data); -//? cerr << "line: " << SIZE(line_data) << ": " << line_data << '\n'; - istringstream line(line_data); - while (has_data(line)) { - string word; - line >> word; - if (word.empty()) continue; - if (word == "==") { - // assume the first segment contains code - if (segment_index == 1) End_of_program = addr; - ++segment_index; - // new segment - line >> std::hex >> addr; - break; // skip rest of line - } - if (word[0] == ':') { - // metadata - break; - } - if (word[0] == '#') { - // comment - break; - } - // otherwise it's a hex byte - uint32_t next_byte = 0; - istringstream ss(word); - ss >> std::hex >> next_byte; - if (next_byte > 0xff) { - raise << "invalid hex byte " << word << '\n' << end(); - return; - } - write_mem_u8(addr, static_cast<uint8_t>(next_byte)); - trace(99, "load") << addr << " -> " << HEXBYTE << NUM(read_mem_u8(addr)) << end(); -//? cerr << addr << " -> " << HEXBYTE << NUM(read_mem_u8(addr)) << '\n'; - addr++; - } - } - // convenience: allow zero segment headers; code then starts at address 1 - if (segment_index == 0) End_of_program = addr; -} - inline uint8_t next() { return read_mem_u8(EIP++); } -// read a 32-bit immediate in little-endian order from the instruction stream -int32_t imm32() { - int32_t result = next(); - result |= (next()<<8); - result |= (next()<<16); - result |= (next()<<24); - return result; -} - -string rname(uint8_t r) { - switch (r) { - case 0: return "EAX"; - case 1: return "ECX"; - case 2: return "EDX"; - case 3: return "EBX"; - case 4: return "ESP"; - case 5: return "EBP"; - case 6: return "ESI"; - case 7: return "EDI"; - default: raise << "invalid register " << r << '\n' << end(); return ""; - } -} - :(before "End Includes") #include <iomanip> #define HEXBYTE std::hex << std::setw(2) << std::setfill('0') diff --git a/subx/011parse.cc b/subx/011parse.cc new file mode 100644 index 00000000..4735dfd2 --- /dev/null +++ b/subx/011parse.cc @@ -0,0 +1,209 @@ +//: Loading programs into the VM. + +:(scenario add_imm32_to_eax) +# At the lowest level, SubX programs are a series of hex bytes, each +# (variable-length) instruction on one line. +# +# Later we'll make things nicer using macros. But you'll always be able to +# insert hex bytes out of instructions. +# +# As you can see, comments start with '#' and are ignored. + +# Segment headers start with '==', specifying the hex address where they +# begin. The first segment is always assumed to be code. +== 0x1 + +# We don't show it here, but all lines can have metadata after a ':'. +# All words can have metadata after a '/'. No spaces allowed in word metadata, of course. +# Metadata doesn't directly form instructions, but some macros may look at it. +# Unrecognized metadata never causes errors, so you can also use it for +# documentation. + +# Within the code segment, x86 instructions consist of the following parts (see cheatsheet.pdf): +# opcode ModR/M SIB displacement immediate +# instruction mod, reg, Reg/Mem bits scale, index, base +# 1-3 bytes 0/1 byte 0/1 byte 0/1/2/4 bytes 0/1/2/4 bytes + 05 0a 0b 0c 0d # add 0x0d0c0b0a to EAX + +# This program, when run, causes the following events in the trace: ++load: 0x00000001 -> 05 ++load: 0x00000002 -> 0a ++load: 0x00000003 -> 0b ++load: 0x00000004 -> 0c ++load: 0x00000005 -> 0d ++run: add imm32 0x0d0c0b0a to reg EAX ++run: storing 0x0d0c0b0a + +:(code) +// top-level helper for scenarios: parse the input, transform any macros, load +// the final hex bytes into memory, run it +void run(const string& text_bytes) { + program p; + istringstream in(text_bytes); + parse(in, p); + if (trace_contains_errors()) return; // if any stage raises errors, stop immediately + transform(p); + if (trace_contains_errors()) return; + load(p); + if (trace_contains_errors()) return; + if (p.segments.empty()) return; + EIP = p.segments.at(0).start; + while (EIP < End_of_program) + run_one_instruction(); +} + +//:: core data structures + +:(before "End Types") +struct program { + vector<segment> segments; + // random ideas for other things we may eventually need + //map<name, address> globals; + //vector<recipe> recipes; + //map<string, type_info> types; +}; +:(before "struct program") +struct segment { + uint32_t start; + vector<line> lines; + segment() :start(0) {} +}; +:(before "struct segment") +struct line { + vector<word> words; + vector<string> metadata; +}; +:(before "struct line") +struct word { + string original; + string data; + vector<string> metadata; +}; + +//:: parse + +:(code) +void parse(istream& fin, program& out) { + vector<line> l; + while (has_data(fin)) { + string line_data; + getline(fin, line_data); + trace(99, "parse") << "line: " << line_data << end(); + istringstream lin(line_data); + vector<word> w; + while (has_data(lin)) { + string word_data; + lin >> word_data; + if (word_data.empty()) continue; + if (word_data == "==") { + if (!l.empty()) { + assert(!out.segments.empty()); + trace(99, "parse") << "flushing to segment" << end(); + out.segments.back().lines.swap(l); + } + segment s; + lin >> std::hex >> s.start; + trace(99, "parse") << "new segment from " << HEXWORD << s.start << end(); + out.segments.push_back(s); + // todo? + break; // skip rest of line + } + if (word_data[0] == ':') { + // todo: line metadata + break; + } + if (word_data[0] == '#') { + // comment + break; + } + w.push_back(word()); + w.back().original = word_data; + istringstream win(word_data); + if (getline(win, w.back().data, '/')) { + string m; + while (getline(win, m, '/')) + w.back().metadata.push_back(m); + } + trace(99, "parse") << "new word: " << w.back().data << end(); + } + if (!w.empty()) { + l.push_back(line()); + l.back().words.swap(w); + } + } + if (!l.empty()) { + assert(!out.segments.empty()); + trace(99, "parse") << "flushing to segment" << end(); + out.segments.back().lines.swap(l); + } +} + +//:: transform + +:(before "End Types") +typedef void (*transform_fn)(program&); +:(before "End Globals") +vector<transform_fn> Transform; + +void transform(program& p) { + for (int t = 0; t < SIZE(Transform); ++t) + (*Transform.at(t))(p); +} + +//:: load + +void load(const program& p) { + for (int i = 0; i < SIZE(p.segments); ++i) { + const segment& seg = p.segments.at(i); + uint32_t addr = seg.start; + trace(99, "load") << "loading segment " << i << " from " << HEXWORD << addr << end(); + for (int j = 0; j < SIZE(seg.lines); ++j) { + const line& l = seg.lines.at(j); + for (int k = 0; k < SIZE(l.words); ++k) { + const word& w = l.words.at(k); + uint8_t val = hex_byte(w.data); + if (trace_contains_errors()) return; + write_mem_u8(addr, val); + trace(99, "load") << "0x" << HEXWORD << addr << " -> " << HEXBYTE << NUM(read_mem_u8(addr)) << end(); + ++addr; + } + } + if (i == 0) End_of_program = addr; + } +} + +uint8_t hex_byte(const string& s) { + istringstream in(s); + int result = 0; + in >> std::hex >> result; + if (!in) { + raise << "invalid hex " << s << '\n' << end(); + return '\0'; + } + if (result > 0xff) { + raise << "invalid hex byte " << std::hex << result << '\n' << end(); + return '\0'; + } + return static_cast<uint8_t>(result); +} + +//:: run + +//: our first opcode +:(before "End Single-Byte Opcodes") +case 0x05: { // add imm32 to EAX + int32_t arg2 = imm32(); + trace(2, "run") << "add imm32 0x" << HEXWORD << arg2 << " to reg EAX" << end(); + BINARY_ARITHMETIC_OP(+, Reg[EAX].i, arg2); + break; +} + +:(code) +// read a 32-bit immediate in little-endian order from the instruction stream +int32_t imm32() { + int32_t result = next(); + result |= (next()<<8); + result |= (next()<<16); + result |= (next()<<24); + return result; +} diff --git a/subx/012direct_addressing.cc b/subx/012direct_addressing.cc index fffcbd6a..bdad57ab 100644 --- a/subx/012direct_addressing.cc +++ b/subx/012direct_addressing.cc @@ -3,6 +3,7 @@ :(scenario add_r32_to_r32) % Reg[EAX].i = 0x10; % Reg[EBX].i = 1; +== 0x1 # op ModR/M SIB displacement immediate 01 d8 # add EBX to EAX # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) @@ -43,11 +44,26 @@ int32_t* effective_address(uint8_t modrm) { return mem_addr_i32(addr); } +string rname(uint8_t r) { + switch (r) { + case 0: return "EAX"; + case 1: return "ECX"; + case 2: return "EDX"; + case 3: return "EBX"; + case 4: return "ESP"; + case 5: return "EBP"; + case 6: return "ESI"; + case 7: return "EDI"; + default: raise << "invalid register " << r << '\n' << end(); return ""; + } +} + //:: subtract :(scenario subtract_r32_from_r32) % Reg[EAX].i = 10; % Reg[EBX].i = 1; +== 0x1 # op ModR/M SIB displacement immediate 29 d8 # subtract EBX from EAX # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) @@ -70,6 +86,7 @@ case 0x29: { // subtract r32 from r/m32 :(scenario and_r32_with_r32) % Reg[EAX].i = 0x0a0b0c0d; % Reg[EBX].i = 0x000000ff; +== 0x1 # op ModR/M SIB displacement immediate 21 d8 # and EBX with destination EAX # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) @@ -92,6 +109,7 @@ case 0x21: { // and r32 with r/m32 :(scenario or_r32_with_r32) % Reg[EAX].i = 0x0a0b0c0d; % Reg[EBX].i = 0xa0b0c0d0; +== 0x1 # op ModR/M SIB displacement immediate 09 d8 # or EBX with destination EAX # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) @@ -114,6 +132,7 @@ case 0x09: { // or r32 with r/m32 :(scenario xor_r32_with_r32) % Reg[EAX].i = 0x0a0b0c0d; % Reg[EBX].i = 0xaabbc0d0; +== 0x1 # op ModR/M SIB displacement immediate 31 d8 # xor EBX with destination EAX # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) @@ -135,6 +154,7 @@ case 0x31: { // xor r32 with r/m32 :(scenario not_r32) % Reg[EBX].i = 0x0f0f00ff; +== 0x1 # op ModR/M SIB displacement immediate f7 c3 # not EBX # ModR/M in binary: 11 (direct mode) 000 (unused) 011 (dest EBX) @@ -160,6 +180,7 @@ case 0xf7: { // xor r32 with r/m32 :(scenario compare_r32_with_r32_greater) % Reg[EAX].i = 0x0a0b0c0d; % Reg[EBX].i = 0x0a0b0c07; +== 0x1 # op ModR/M SIB displacement immediate 39 d8 # compare EBX with EAX # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) @@ -186,6 +207,7 @@ case 0x39: { // set SF if r/m32 < r32 :(scenario compare_r32_with_r32_lesser) % Reg[EAX].i = 0x0a0b0c07; % Reg[EBX].i = 0x0a0b0c0d; +== 0x1 # op ModR/M SIB displacement immediate 39 d8 # compare EBX with EAX # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) @@ -196,6 +218,7 @@ case 0x39: { // set SF if r/m32 < r32 :(scenario compare_r32_with_r32_equal) % Reg[EAX].i = 0x0a0b0c0d; % Reg[EBX].i = 0x0a0b0c0d; +== 0x1 # op ModR/M SIB displacement immediate 39 d8 # compare EBX with EAX # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) @@ -207,6 +230,7 @@ case 0x39: { // set SF if r/m32 < r32 :(scenario copy_r32_to_r32) % Reg[EBX].i = 0xaf; +== 0x1 # op ModR/M SIB displacement immediate 89 d8 # copy EBX to EAX # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) @@ -230,6 +254,7 @@ case 0x89: { // copy r32 to r/m32 :(scenario xchg_r32_with_r32) % Reg[EBX].i = 0xaf; % Reg[EAX].i = 0x2e; +== 0x1 # op ModR/M SIB displacement immediate 87 d8 # exchange EBX with EAX # ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) @@ -257,6 +282,7 @@ case 0x87: { // exchange r32 with r/m32 :(scenario push_r32) % Reg[ESP].u = 0x64; % Reg[EBX].i = 0x0000000a; +== 0x1 # op ModR/M SIB displacement immediate 53 # push EBX to stack +run: push EBX diff --git a/subx/013indirect_addressing.cc b/subx/013indirect_addressing.cc index df56a248..232b67de 100644 --- a/subx/013indirect_addressing.cc +++ b/subx/013indirect_addressing.cc @@ -4,7 +4,7 @@ :(scenario add_r32_to_mem_at_r32) % Reg[EBX].i = 0x10; % Reg[EAX].i = 0x60; -== 0x01 # code segment +== 0x1 # code segment # op ModR/M SIB displacement immediate 01 18 # add EBX to *EAX # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) @@ -30,7 +30,7 @@ case 0: // indirect addressing :(scenario add_mem_at_r32_to_r32) % Reg[EAX].i = 0x60; % Reg[EBX].i = 0x10; -== 0x01 # code segment +== 0x1 # code segment # op ModR/M SIB displacement immediate 03 18 # add *EAX to EBX # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) @@ -55,7 +55,7 @@ case 0x03: { // add r/m32 to r32 :(scenario subtract_r32_from_mem_at_r32) % Reg[EAX].i = 0x60; % Reg[EBX].i = 1; -== 0x01 # code segment +== 0x1 # code segment # op ModR/M SIB displacement immediate 29 18 # subtract EBX from *EAX # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) @@ -70,7 +70,7 @@ case 0x03: { // add r/m32 to r32 :(scenario subtract_mem_at_r32_from_r32) % Reg[EAX].i = 0x60; % Reg[EBX].i = 10; -== 0x01 # code segment +== 0x1 # code segment # op ModR/M SIB displacement immediate 2b 18 # subtract *EAX from EBX # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) @@ -95,7 +95,7 @@ case 0x2b: { // subtract r/m32 from r32 :(scenario and_r32_with_mem_at_r32) % Reg[EAX].i = 0x60; % Reg[EBX].i = 0xff; -== 0x01 # code segment +== 0x1 # code segment # op ModR/M SIB displacement immediate 21 18 # and EBX with *EAX # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) @@ -110,7 +110,7 @@ case 0x2b: { // subtract r/m32 from r32 :(scenario and_mem_at_r32_with_r32) % Reg[EAX].i = 0x60; % Reg[EBX].i = 0x0a0b0c0d; -== 0x01 # code segment +== 0x1 # code segment # op ModR/M SIB displacement immediate 23 18 # and *EAX with EBX # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) @@ -135,7 +135,7 @@ case 0x23: { // and r/m32 with r32 :(scenario or_r32_with_mem_at_r32) % Reg[EAX].i = 0x60; % Reg[EBX].i = 0xa0b0c0d0; -== 0x01 # code segment +== 0x1 # code segment # op ModR/M SIB displacement immediate 09 18 # or EBX with *EAX # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) @@ -150,7 +150,7 @@ case 0x23: { // and r/m32 with r32 :(scenario or_mem_at_r32_with_r32) % Reg[EAX].i = 0x60; % Reg[EBX].i = 0xa0b0c0d0; -== 0x01 # code segment +== 0x1 # code segment # op ModR/M SIB displacement immediate 0b 18 # or *EAX with EBX # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) @@ -175,7 +175,7 @@ case 0x0b: { // or r/m32 with r32 :(scenario xor_r32_with_mem_at_r32) % Reg[EAX].i = 0x60; % Reg[EBX].i = 0xa0b0c0d0; -== 0x01 # code segment +== 0x1 # code segment # op ModR/M SIB displacement immediate 31 18 # xor EBX with *EAX # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) @@ -190,7 +190,7 @@ case 0x0b: { // or r/m32 with r32 :(scenario xor_mem_at_r32_with_r32) % Reg[EAX].i = 0x60; % Reg[EBX].i = 0xa0b0c0d0; -== 0x01 # code segment +== 0x1 # code segment # op ModR/M SIB displacement immediate 33 18 # xor *EAX with EBX # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) @@ -214,7 +214,7 @@ case 0x33: { // xor r/m32 with r32 :(scenario not_r32_with_mem_at_r32) % Reg[EBX].i = 0x60; -== 0x01 # code segment +== 0x1 # code segment # op ModR/M SIB displacement immediate f7 03 # negate *EBX # ModR/M in binary: 00 (indirect mode) 000 (unused) 011 (dest EBX) @@ -229,7 +229,7 @@ ff 00 0f 0f # 0x0f0f00ff :(scenario compare_mem_at_r32_with_r32_greater) % Reg[EAX].i = 0x60; % Reg[EBX].i = 0x0a0b0c07; -== 0x01 # code segment +== 0x1 # code segment # op ModR/M SIB displacement immediate 39 18 # compare EBX with *EAX # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) @@ -242,7 +242,7 @@ ff 00 0f 0f # 0x0f0f00ff :(scenario compare_mem_at_r32_with_r32_lesser) % Reg[EAX].i = 0x60; % Reg[EBX].i = 0x0a0b0c0d; -== 0x01 # code segment +== 0x1 # code segment # op ModR/M SIB displacement immediate 39 18 # compare EBX with *EAX # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) @@ -255,7 +255,7 @@ ff 00 0f 0f # 0x0f0f00ff :(scenario compare_mem_at_r32_with_r32_equal) % Reg[EAX].i = 0x60; % Reg[EBX].i = 0x0a0b0c0d; -== 0x01 # code segment +== 0x1 # code segment # op ModR/M SIB displacement immediate 39 18 # compare EBX with *EAX # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) @@ -270,7 +270,7 @@ ff 00 0f 0f # 0x0f0f00ff :(scenario compare_r32_with_mem_at_r32_greater) % Reg[EAX].i = 0x60; % Reg[EBX].i = 0x0a0b0c0d; -== 0x01 # code segment +== 0x1 # code segment # op ModR/M SIB displacement immediate 3b 18 # compare *EAX with EBX # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) @@ -299,7 +299,7 @@ case 0x3b: { // set SF if r32 < r/m32 :(scenario compare_r32_with_mem_at_r32_lesser) % Reg[EAX].i = 0x60; % Reg[EBX].i = 0x0a0b0c07; -== 0x01 # code segment +== 0x1 # code segment # op ModR/M SIB displacement immediate 3b 18 # compare *EAX with EBX # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) @@ -312,7 +312,7 @@ case 0x3b: { // set SF if r32 < r/m32 :(scenario compare_r32_with_mem_at_r32_equal) % Reg[EAX].i = 0x60; % Reg[EBX].i = 0x0a0b0c0d; -== 0x01 # code segment +== 0x1 # code segment # op ModR/M SIB displacement immediate 3b 18 # compare *EAX with EBX # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) @@ -327,6 +327,7 @@ case 0x3b: { // set SF if r32 < r/m32 :(scenario copy_r32_to_mem_at_r32) % Reg[EBX].i = 0xaf; % Reg[EAX].i = 0x60; +== 0x1 # op ModR/M SIB displacement immediate 89 18 # copy EBX to *EAX # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) @@ -338,7 +339,7 @@ case 0x3b: { // set SF if r32 < r/m32 :(scenario copy_mem_at_r32_to_r32) % Reg[EAX].i = 0x60; -== 0x01 # code segment +== 0x1 # code segment # op ModR/M SIB displacement immediate 8b 18 # copy *EAX to EBX # ModR/M in binary: 00 (indirect mode) 011 (src EAX) 000 (dest EAX) @@ -363,7 +364,7 @@ case 0x8b: { // copy r32 to r/m32 :(scenario jump_mem_at_r32) % Reg[EAX].i = 0x60; -== 0x01 # code segment +== 0x1 # code segment # op ModR/M SIB displacement immediate ff 20 # jump to *EAX # ModR/M in binary: 00 (indirect mode) 100 (jump to r/m32) 000 (src EAX) @@ -400,7 +401,7 @@ case 0xff: { :(scenario push_mem_at_r32) % Reg[EAX].i = 0x60; % Reg[ESP].u = 0x14; -== 0x01 # code segment +== 0x1 # code segment # op ModR/M SIB displacement immediate ff 30 # push *EAX to stack # ModR/M in binary: 00 (indirect mode) 110 (push r/m32) 000 (src EAX) @@ -424,7 +425,7 @@ case 6: { // push r/m32 to stack :(scenario pop_mem_at_r32) % Reg[EAX].i = 0x60; % Reg[ESP].u = 0x10; -== 0x01 # code segment +== 0x1 # code segment # op ModR/M SIB displacement immediate 8f 00 # pop stack into *EAX # ModR/M in binary: 00 (indirect mode) 000 (pop r/m32) 000 (dest EAX) @@ -454,7 +455,7 @@ case 0x8f: { // pop stack into r/m32 :(scenario add_r32_to_mem_at_displacement) % Reg[EBX].i = 0x10; // source -== 0x01 # code segment +== 0x1 # code segment # op ModR/M SIB displacement immediate 01 1d 60 00 00 00 # add EBX to *0x60 # ModR/M in binary: 00 (indirect mode) 011 (src EBX) 101 (dest in disp32) @@ -475,7 +476,7 @@ case 5: // exception: mod 0b00 rm 0b101 => incoming disp32 :(scenario add_r32_to_mem_at_r32_plus_disp8) % Reg[EBX].i = 0x10; // source % Reg[EAX].i = 0x5e; // dest -== 0x01 # code segment +== 0x1 # code segment # op ModR/M SIB displacement immediate 01 58 02 # add EBX to *(EAX+2) # ModR/M in binary: 01 (indirect+disp8 mode) 011 (src EBX) 000 (dest EAX) @@ -504,7 +505,7 @@ case 1: // indirect + disp8 addressing :(scenario add_r32_to_mem_at_r32_plus_negative_disp8) % Reg[EBX].i = 0x10; // source % Reg[EAX].i = 0x61; // dest -== 0x01 # code segment +== 0x1 # code segment # op ModR/M SIB displacement immediate 01 58 ff # add EBX to *(EAX-1) # ModR/M in binary: 01 (indirect+disp8 mode) 011 (src EBX) 000 (dest EAX) @@ -520,7 +521,7 @@ case 1: // indirect + disp8 addressing :(scenario add_r32_to_mem_at_r32_plus_disp32) % Reg[EBX].i = 0x10; // source % Reg[EAX].i = 0x5e; // dest -== 0x01 # code segment +== 0x1 # code segment # op ModR/M SIB displacement immediate 01 98 02 00 00 00 # add EBX to *(EAX+2) # ModR/M in binary: 10 (indirect+disp32 mode) 011 (src EBX) 000 (dest EAX) @@ -549,7 +550,7 @@ case 2: // indirect + disp32 addressing :(scenario add_r32_to_mem_at_r32_plus_negative_disp32) % Reg[EBX].i = 0x10; // source % Reg[EAX].i = 0x61; // dest -== 0x01 # code segment +== 0x1 # code segment # op ModR/M SIB displacement immediate 01 98 ff ff ff ff # add EBX to *(EAX-1) # ModR/M in binary: 10 (indirect+disp32 mode) 011 (src EBX) 000 (dest EAX) diff --git a/subx/014immediate_addressing.cc b/subx/014immediate_addressing.cc index c1e88f21..11e825b8 100644 --- a/subx/014immediate_addressing.cc +++ b/subx/014immediate_addressing.cc @@ -2,6 +2,7 @@ :(scenario add_imm32_to_r32) % Reg[EBX].i = 1; +== 0x1 # op ModR/M SIB displacement immediate 81 c3 0a 0b 0c 0d # add 0x0d0c0b0a to EBX # ModR/M in binary: 11 (direct mode) 000 (add imm32) 011 (dest EBX) @@ -49,6 +50,7 @@ case 0x81: { // combine imm32 with r/m32 :(scenario subtract_imm32_from_eax) % Reg[EAX].i = 0x0d0c0baa; +== 0x1 # op ModR/M SIB displacement immediate 2d 0a 0b 0c 0d # subtract 0x0d0c0b0a from EAX +run: subtract imm32 0x0d0c0b0a from EAX @@ -88,6 +90,7 @@ case 5: { :(scenario subtract_imm32_from_r32) % Reg[EBX].i = 10; +== 0x1 # op ModR/M SIB displacement immediate 81 eb 01 00 00 00 # subtract 1 from EBX # ModR/M in binary: 11 (direct mode) 101 (subtract imm32) 011 (dest EBX) @@ -100,6 +103,7 @@ case 5: { :(scenario and_imm32_with_eax) % Reg[EAX].i = 0xff; +== 0x1 # op ModR/M SIB displacement immediate 25 0a 0b 0c 0d # and 0x0d0c0b0a with EAX +run: and imm32 0x0d0c0b0a with EAX @@ -139,6 +143,7 @@ case 4: { :(scenario and_imm32_with_r32) % Reg[EBX].i = 0xff; +== 0x1 # op ModR/M SIB displacement immediate 81 e3 0a 0b 0c 0d # and 0x0d0c0b0a with EBX # ModR/M in binary: 11 (direct mode) 100 (and imm32) 011 (dest EBX) @@ -151,6 +156,7 @@ case 4: { :(scenario or_imm32_with_eax) % Reg[EAX].i = 0xd0c0b0a0; +== 0x1 # op ModR/M SIB displacement immediate 0d 0a 0b 0c 0d # or 0x0d0c0b0a with EAX +run: or imm32 0x0d0c0b0a with EAX @@ -188,6 +194,7 @@ case 1: { :(scenario or_imm32_with_r32) % Reg[EBX].i = 0xd0c0b0a0; +== 0x1 # op ModR/M SIB displacement immediate 81 cb 0a 0b 0c 0d # or 0x0d0c0b0a with EBX # ModR/M in binary: 11 (direct mode) 001 (or imm32) 011 (dest EBX) @@ -200,6 +207,7 @@ case 1: { :(scenario xor_imm32_with_eax) % Reg[EAX].i = 0xddccb0a0; +== 0x1 # op ModR/M SIB displacement immediate 35 0a 0b 0c 0d # xor 0x0d0c0b0a with EAX +run: xor imm32 0x0d0c0b0a with EAX @@ -237,6 +245,7 @@ case 6: { :(scenario xor_imm32_with_r32) % Reg[EBX].i = 0xd0c0b0a0; +== 0x1 # op ModR/M SIB displacement immediate 81 f3 0a 0b 0c 0d # xor 0x0d0c0b0a with EBX # ModR/M in binary: 11 (direct mode) 110 (xor imm32) 011 (dest EBX) @@ -249,6 +258,7 @@ case 6: { :(scenario compare_imm32_with_eax_greater) % Reg[EAX].i = 0x0d0c0b0a; +== 0x1 # op ModR/M SIB displacement immediate 3d 07 0b 0c 0d # compare 0x0d0c0b07 with EAX +run: compare EAX and imm32 0x0d0c0b07 @@ -270,6 +280,7 @@ case 0x3d: { // subtract imm32 from EAX :(scenario compare_imm32_with_eax_lesser) % Reg[EAX].i = 0x0d0c0b07; +== 0x1 # op ModR/M SIB displacement immediate 3d 0a 0b 0c 0d # compare 0x0d0c0b0a with EAX +run: compare EAX and imm32 0x0d0c0b0a @@ -277,6 +288,7 @@ case 0x3d: { // subtract imm32 from EAX :(scenario compare_imm32_with_eax_equal) % Reg[EAX].i = 0x0d0c0b0a; +== 0x1 # op ModR/M SIB displacement immediate 3d 0a 0b 0c 0d # compare 0x0d0c0b0a with EAX +run: compare EAX and imm32 0x0d0c0b0a @@ -286,6 +298,7 @@ case 0x3d: { // subtract imm32 from EAX :(scenario compare_imm32_with_r32_greater) % Reg[EBX].i = 0x0d0c0b0a; +== 0x1 # op ModR/M SIB displacement immediate 81 fb 07 0b 0c 0d # compare 0x0d0c0b07 with EBX # ModR/M in binary: 11 (direct mode) 111 (compare imm32) 011 (dest EBX) @@ -307,6 +320,7 @@ case 7: { :(scenario compare_imm32_with_r32_lesser) % Reg[EBX].i = 0x0d0c0b07; +== 0x1 # op ModR/M SIB displacement immediate 81 fb 0a 0b 0c 0d # compare 0x0d0c0b0a with EBX # ModR/M in binary: 11 (direct mode) 111 (compare imm32) 011 (dest EBX) @@ -316,6 +330,7 @@ case 7: { :(scenario compare_imm32_with_r32_equal) % Reg[EBX].i = 0x0d0c0b0a; +== 0x1 # op ModR/M SIB displacement immediate 81 fb 0a 0b 0c 0d # compare 0x0d0c0b0a with EBX # ModR/M in binary: 11 (direct mode) 111 (compare imm32) 011 (dest EBX) @@ -363,6 +378,7 @@ case 7: { //:: copy (mov) :(scenario copy_imm32_to_r32) +== 0x1 # op ModR/M SIB displacement immediate bb 0a 0b 0c 0d # copy 0x0d0c0b0a to EBX +run: copy imm32 0x0d0c0b0a to EBX @@ -387,6 +403,7 @@ case 0xbf: { // copy imm32 to r32 :(scenario copy_imm32_to_mem_at_r32) % Reg[EBX].i = 0x60; +== 0x1 # op ModR/M SIB displacement immediate c7 03 0a 0b 0c 0d # copy 0x0d0c0b0a to *EBX # ModR/M in binary: 00 (indirect mode) 000 (unused) 011 (dest EBX) @@ -407,6 +424,7 @@ case 0xc7: { // copy imm32 to r32 :(scenario push_imm32) % Reg[ESP].u = 0x14; +== 0x1 # op ModR/M SIB displacement immediate 68 af 00 00 00 # push *EAX to stack +run: push imm32 0x000000af diff --git a/subx/016jump_relative.cc b/subx/016jump_relative.cc index 2d3bbb2b..061a947f 100644 --- a/subx/016jump_relative.cc +++ b/subx/016jump_relative.cc @@ -3,6 +3,7 @@ //:: jump :(scenario jump_rel8) +== 0x1 # op ModR/M SIB displacement immediate eb 05 # skip 1 instruction 05 00 00 00 01 @@ -24,6 +25,7 @@ case 0xeb: { // jump rel8 :(scenario je_rel8_success) % ZF = true; +== 0x1 # op ModR/M SIB displacement immediate 74 05 # skip 1 instruction 05 00 00 00 01 @@ -45,6 +47,7 @@ case 0x74: { // jump rel8 if ZF :(scenario je_rel8_fail) % ZF = false; +== 0x1 # op ModR/M SIB displacement immediate 74 05 # skip 1 instruction 05 00 00 00 01 @@ -58,6 +61,7 @@ case 0x74: { // jump rel8 if ZF :(scenario jne_rel8_success) % ZF = false; +== 0x1 # op ModR/M SIB displacement immediate 75 05 # skip 1 instruction 05 00 00 00 01 @@ -79,6 +83,7 @@ case 0x75: { // jump rel8 unless ZF :(scenario jne_rel8_fail) % ZF = true; +== 0x1 # op ModR/M SIB displacement immediate 75 05 # skip 1 instruction 05 00 00 00 01 @@ -94,6 +99,7 @@ case 0x75: { // jump rel8 unless ZF % ZF = false; % SF = false; % OF = false; +== 0x1 # op ModR/M SIB displacement immediate 7f 05 # skip 1 instruction 05 00 00 00 01 @@ -117,6 +123,7 @@ case 0x7f: { // jump rel8 if !SF and !ZF % ZF = false; % SF = true; % OF = false; +== 0x1 # op ModR/M SIB displacement immediate 7f 05 # skip 1 instruction 05 00 00 00 01 @@ -131,6 +138,7 @@ case 0x7f: { // jump rel8 if !SF and !ZF :(scenario jge_rel8_success) % SF = false; % OF = false; +== 0x1 # op ModR/M SIB displacement immediate 7d 05 # skip 1 instruction 05 00 00 00 01 @@ -153,6 +161,7 @@ case 0x7d: { // jump rel8 if !SF :(scenario jge_rel8_fail) % SF = true; % OF = false; +== 0x1 # op ModR/M SIB displacement immediate 7d 05 # skip 1 instruction 05 00 00 00 01 @@ -168,6 +177,7 @@ case 0x7d: { // jump rel8 if !SF % ZF = false; % SF = true; % OF = false; +== 0x1 # op ModR/M SIB displacement immediate 7c 05 # skip 1 instruction 05 00 00 00 01 @@ -191,6 +201,7 @@ case 0x7c: { // jump rel8 if SF and !ZF % ZF = false; % SF = false; % OF = false; +== 0x1 # op ModR/M SIB displacement immediate 7c 05 # skip 1 instruction 05 00 00 00 01 @@ -206,6 +217,7 @@ case 0x7c: { // jump rel8 if SF and !ZF % ZF = true; % SF = false; % OF = false; +== 0x1 # op ModR/M SIB displacement immediate 7e 05 # skip 1 instruction 05 00 00 00 01 @@ -219,6 +231,7 @@ case 0x7c: { // jump rel8 if SF and !ZF % ZF = false; % SF = true; % OF = false; +== 0x1 # op ModR/M SIB displacement immediate 7e 05 # skip 1 instruction 05 00 00 00 01 @@ -242,6 +255,7 @@ case 0x7e: { // jump rel8 if SF or ZF % ZF = false; % SF = false; % OF = false; +== 0x1 # op ModR/M SIB displacement immediate 7e 05 # skip 1 instruction 05 00 00 00 01 diff --git a/subx/017jump_relative.cc b/subx/017jump_relative.cc index 8b8452bb..711a5b6a 100644 --- a/subx/017jump_relative.cc +++ b/subx/017jump_relative.cc @@ -3,6 +3,7 @@ //:: jump :(scenario jump_rel16) +== 0x1 # op ModR/M SIB displacement immediate e9 05 00 # skip 1 instruction 05 00 00 00 01 @@ -30,6 +31,7 @@ int16_t imm16() { :(scenario je_rel16_success) % ZF = true; +== 0x1 # op ModR/M SIB displacement immediate 0f 84 05 00 # skip 1 instruction 05 00 00 00 01 @@ -51,6 +53,7 @@ case 0x84: { // jump rel16 if ZF :(scenario je_rel16_fail) % ZF = false; +== 0x1 # op ModR/M SIB displacement immediate 0f 84 05 00 # skip 1 instruction 05 00 00 00 01 @@ -64,6 +67,7 @@ case 0x84: { // jump rel16 if ZF :(scenario jne_rel16_success) % ZF = false; +== 0x1 # op ModR/M SIB displacement immediate 0f 85 05 00 # skip 1 instruction 05 00 00 00 01 @@ -85,6 +89,7 @@ case 0x85: { // jump rel16 unless ZF :(scenario jne_rel16_fail) % ZF = true; +== 0x1 # op ModR/M SIB displacement immediate 0f 85 05 00 # skip 1 instruction 05 00 00 00 01 @@ -100,6 +105,7 @@ case 0x85: { // jump rel16 unless ZF % ZF = false; % SF = false; % OF = false; +== 0x1 # op ModR/M SIB displacement immediate 0f 8f 05 00 # skip 1 instruction 05 00 00 00 01 @@ -123,6 +129,7 @@ case 0x8f: { // jump rel16 if !SF and !ZF % ZF = false; % SF = true; % OF = false; +== 0x1 # op ModR/M SIB displacement immediate 0f 8f 05 00 # skip 1 instruction 05 00 00 00 01 @@ -137,6 +144,7 @@ case 0x8f: { // jump rel16 if !SF and !ZF :(scenario jge_rel16_success) % SF = false; % OF = false; +== 0x1 # op ModR/M SIB displacement immediate 0f 8d 05 00 # skip 1 instruction 05 00 00 00 01 @@ -159,6 +167,7 @@ case 0x8d: { // jump rel16 if !SF :(scenario jge_rel16_fail) % SF = true; % OF = false; +== 0x1 # op ModR/M SIB displacement immediate 0f 8d 05 00 # skip 1 instruction 05 00 00 00 01 @@ -174,6 +183,7 @@ case 0x8d: { // jump rel16 if !SF % ZF = false; % SF = true; % OF = false; +== 0x1 # op ModR/M SIB displacement immediate 0f 8c 05 00 # skip 1 instruction 05 00 00 00 01 @@ -197,6 +207,7 @@ case 0x8c: { // jump rel16 if SF and !ZF % ZF = false; % SF = false; % OF = false; +== 0x1 # op ModR/M SIB displacement immediate 0f 8c 05 00 # skip 1 instruction 05 00 00 00 01 @@ -212,6 +223,7 @@ case 0x8c: { // jump rel16 if SF and !ZF % ZF = true; % SF = false; % OF = false; +== 0x1 # op ModR/M SIB displacement immediate 0f 8e 05 00 # skip 1 instruction 05 00 00 00 01 @@ -225,6 +237,7 @@ case 0x8c: { // jump rel16 if SF and !ZF % ZF = false; % SF = true; % OF = false; +== 0x1 # op ModR/M SIB displacement immediate 0f 8e 05 00 # skip 1 instruction 05 00 00 00 01 @@ -248,6 +261,7 @@ case 0x8e: { // jump rel16 if SF or ZF % ZF = false; % SF = false; % OF = false; +== 0x1 # op ModR/M SIB displacement immediate 0f 8e 05 00 # skip 1 instruction 05 00 00 00 01 diff --git a/subx/018functions.cc b/subx/018functions.cc index 163894f2..c4fb5d23 100644 --- a/subx/018functions.cc +++ b/subx/018functions.cc @@ -2,6 +2,7 @@ :(scenario call_imm32) % Reg[ESP].u = 0x64; +== 0x1 # op ModR/M SIB displacement immediate e8 a0 00 00 00 # call function offset at 0x000000a0 # next EIP is 6 @@ -25,6 +26,7 @@ case 0xe8: { // call imm32 relative to next EIP :(scenario call_r32) % Reg[ESP].u = 0x64; % Reg[EBX].u = 0x000000a0; +== 0x1 # op ModR/M SIB displacement immediate ff d3 # call function offset at EBX # next EIP is 3 diff --git a/subx/020elf.cc b/subx/020elf.cc index 21d0d6bf..e61212b1 100644 --- a/subx/020elf.cc +++ b/subx/020elf.cc @@ -1,4 +1,5 @@ -// https://github.com/kragen/stoneknifeforth/blob/702d2ebe1b/386.c +// Helper for debugging and testing. +// Based on https://github.com/kragen/stoneknifeforth/blob/702d2ebe1b/386.c :(before "End Main") assert(argc > 1); diff --git a/subx/021translate.cc b/subx/021translate.cc index d33755ab..f9da5f19 100644 --- a/subx/021translate.cc +++ b/subx/021translate.cc @@ -7,47 +7,29 @@ //: passing through what it doesn't understand. The final program should be //: just machine code, suitable to write to an ELF binary. -:(before "End Types") -typedef void (*transform_fn)(const string& input, string& output); -:(before "End Globals") -vector<transform_fn> Transform; - :(before "End Main") if (is_equal(argv[1], "translate")) { assert(argc > 3); - string program; - slurp(argv[2], program); - perform_all_transforms(program); - dump_elf(program, argv[3]); + program p; + ifstream fin(argv[2]); + parse(fin, p); + if (trace_contains_errors()) return 1; + transform(p); + if (trace_contains_errors()) return 1; + dump_elf(p, argv[3]); } :(code) -void perform_all_transforms(string& program) { - string& in = program; - string out; - for (int t = 0; t < SIZE(Transform); ++t, in.swap(out), out.clear()) - (*Transform.at(t))(in, out); -} - -// write out the current Memory contents from address 1 to End_of_program to a -// bare-bones ELF file with a single section/segment and a hard-coded origin address. -void dump_elf(const string& program, const char* filename) { - initialize_mem(); - // load program into memory, filtering out comments - load_program(program); // Not where 'program' should be loaded for running. - // But we're not going to run it right now, so we - // can load it anywhere. - // dump contents of memory into ELF binary +// write out a program to a bare-bones ELF file +void dump_elf(const program& p, const char* filename) { ofstream out(filename, ios::binary); - dump_elf_header(out); - for (size_t i = 1; i < End_of_program; ++i) { - char c = read_mem_u8(i); - out.write(&c, sizeof(c)); - } + dump_elf_header(out, p); + for (size_t i = 0; i < p.segments.size(); ++i) + dump_segment(p.segments.at(i), out); out.close(); } -void dump_elf_header(ostream& out) { +void dump_elf_header(ostream& out, const program& p) { char c = '\0'; #define O(X) c = (X); out.write(&c, sizeof(c)) // host is required to be little-endian @@ -66,10 +48,10 @@ void dump_elf_header(ostream& out) { // e_version O(0x01); O(0x00); O(0x00); O(0x00); // e_entry - int e_entry = CODE_START + /*size of ehdr*/52 + /*size of phdr*/32; + int e_entry = p.segments.at(0).start; // convention emit(e_entry); // e_phoff -- immediately after ELF header - int e_phoff = 52; + int e_phoff = 0x34; emit(e_phoff); // e_shoff; unused int dummy32 = 0; @@ -77,13 +59,13 @@ void dump_elf_header(ostream& out) { // e_flags; unused emit(dummy32); // e_ehsize - uint16_t e_ehsize = 52; + uint16_t e_ehsize = 0x34; emit(e_ehsize); // e_phentsize uint16_t e_phentsize = 0x20; emit(e_phentsize); // e_phnum - uint16_t e_phnum = 0x1; + uint16_t e_phnum = SIZE(p.segments); emit(e_phnum); // e_shentsize uint16_t dummy16 = 0x0; @@ -93,47 +75,54 @@ void dump_elf_header(ostream& out) { // e_shstrndx emit(dummy16); - //// phdr - // p_type - uint32_t p_type = 0x1; - emit(p_type); - // p_offset - uint32_t p_offset = /*size of ehdr*/52 + /*size of phdr*/32; - emit(p_offset); - // p_vaddr - emit(e_entry); - // p_paddr - emit(e_entry); - // p_filesz - uint32_t size = End_of_program - /*we're not using location 0*/1; - assert(size < SEGMENT_SIZE); - emit(size); - // p_memsz - emit(size); - // p_flags - uint32_t p_flags = 0x5; // r-x - emit(p_flags); - // p_align - uint32_t p_align = 0x4; // p_offset must be congruent to p_paddr/p_vaddr modulo p_align - emit(p_align); + uint32_t p_offset = /*size of ehdr*/0x34 + SIZE(p.segments)*0x20/*size of each phdr*/; + for (int i = 0; i < SIZE(p.segments); ++i) { + //// phdr + // p_type + uint32_t p_type = 0x1; + emit(p_type); + // p_offset + emit(p_offset); + // p_vaddr + emit(e_entry); + // p_paddr + emit(e_entry); + // p_filesz + uint32_t size = size_of(p.segments.at(i)); + assert(size < SEGMENT_SIZE); + emit(size); + // p_memsz + emit(size); + // p_flags + uint32_t p_flags = (i == 0) ? /*r-x*/0x5 : /*rw-*/0x6; // convention: only first segment is code + emit(p_flags); + // p_align + uint32_t p_align = 0x4; + emit(p_align); + + // prepare for next segment + p_offset += size; + } #undef O #undef emit } -void slurp(const char* filename, string& out) { - ifstream fin(filename); - fin >> std::noskipws; - ostringstream fout; - char c = '\0'; - while(has_data(fin)) { - fin >> c; - fout << c; +void dump_segment(const segment& s, ostream& out) { + for (int i = 0; i < SIZE(s.lines); ++i) { + const vector<word>& w = s.lines.at(i).words; + for (int j = 0; j < SIZE(w); ++j) { + uint8_t x = hex_byte(w.at(j).data); // we're done with metadata by this point + out.write(reinterpret_cast<const char*>(&x), /*sizeof(byte)*/1); + } } - fout.str().swap(out); } -:(after "Begin run() For Scenarios") -perform_all_transforms(text_bytes); +uint32_t size_of(const segment& s) { + uint32_t sum = 0; + for (int i = 0; i < SIZE(s.lines); ++i) + sum += SIZE(s.lines.at(i).words); + return sum; +} :(before "End Includes") using std::ios; diff --git a/subx/022transform_immediate.cc b/subx/022transform_immediate.cc index 24539f10..b1825fda 100644 --- a/subx/022transform_immediate.cc +++ b/subx/022transform_immediate.cc @@ -1,8 +1,13 @@ //: Having to manually translate numbers into hex and enter them in //: little-endian order is tedious and error-prone. Let's automate the //: translation. +//: +//: We'll convert any immediate operands from decimal to hex and emit the +//: appropriate number of bytes. If they occur in a non-code segment we'll +//: raise an error. :(scenario translate_immediate_constants) +== 0x1 # opcode ModR/M SIB displacement immediate # instruction mod, reg, Reg/Mem bits scale, index, base # 1-3 bytes 0/1 byte 0/1 byte 0/1/2/4 bytes 0/1/2/4 bytes @@ -13,6 +18,7 @@ #: we don't have a testable instruction using 8-bit immediates yet, so can't run this instruction :(scenarios transform) :(scenario translate_imm8) +== 0x1 cd 128/imm8 +translate: converting '128/imm8' to '80' :(scenarios run) @@ -21,106 +27,130 @@ Transform.push_back(transform_immediate); :(code) -void transform_immediate(const string& input, string& output) { - istringstream in(input); - ostringstream out; - while (has_data(in)) { - string line_data; - getline(in, line_data); - istringstream line(line_data); - while (has_data(line)) { - string word; - line >> word; - if (word.empty()) continue; - if (word == "==") { - // new segment - uint32_t addr = 0; - line >> std::hex >> addr; - out << "== " << HEXWORD << addr; - break; // skip rest of line - } - if (word[0] == ':') { - // skip line metadata - break; - } - if (word[0] == '#') { - // skip comment - break; - } - if (word.find("/imm") == string::npos) { - out << word << ' '; - } - else { - string output = transform_immediate(word); - trace("translate") << "converting '" << word << "' to '" << output << "'" << end(); - out << output << ' '; - } +void transform_immediate(program& p) { + if (p.segments.empty()) return; + transform_immediate(p.segments.at(0)); + for (int i = 1; i < SIZE(p.segments); ++i) + flag_immediate(p.segments.at(i)); +} + +void transform_immediate(segment& seg) { + for (int i = 0; i < SIZE(seg.lines); ++i) { + for (int j = 0; j < SIZE(seg.lines.at(i).words); ++j) { + if (contains_immediate_metadata(seg.lines.at(i).words.at(j))) + transform_immediate(seg.lines.at(i).words, j); } - out << '\n'; } - out.str().swap(output); } -string transform_immediate(const string& word) { - istringstream in(word); // 'word' is guaranteed to have no whitespace - string data = slurp_until(in, '/'); - istringstream in2(data); - int value = 0; - in2 >> value; - ostringstream out; - string type = next_word(in); - if (type == "imm32") emit_octets(value, 4, out); - else if (type == "imm8") emit_octets(value, 1, out); - else raise << "unknown immediate tag /" << type << '\n' << end(); - return out.str(); +void transform_immediate(vector<word>& line, int index) { + assert(index < SIZE(line)); + if (contains_imm8_metadata(line.at(index))) + transform_imm8(line.at(index)); + else + transform_imm32(line, index); } -void emit_octets(int value, int num_octets, ostream& out) { - for (int i = 0; i < num_octets; ++i) { - if (i > 0) out << ' '; - out << HEXBYTE << (value & 0xff); - value = value >> 8; +void transform_imm8(word& w) { + // convert decimal to hex + uint32_t val = parse_decimal(w.data); + if (trace_contains_errors()) return; + if (val > 0xff) { + raise << "invalid /imm8 word " << w.data << '\n' << end(); + return; } + w.data = serialize_hex(val); + trace("translate") << "converting '" << w.original << "' to '" << w.data << "'" << end(); } -string slurp_until(istream& in, char delim) { +void transform_imm32(vector<word>& line, int index) { + vector<word>::iterator find(vector<word>&, int); + vector<word>::iterator x = find(line, index); + uint32_t val = parse_decimal(x->data); + if (trace_contains_errors()) return; + string orig = x->original; + x = line.erase(x); + emit_octets(line, x, val, orig); +} + +vector<word>::iterator find(vector<word>& l, int index) { + if (index >= SIZE(l)) { + raise << "find: index too large: " << index << " vs " << SIZE(l) << '\n' << end(); + return l.end(); + } + vector<word>::iterator result = l.begin(); + for (int i = 0; i < index; ++i) + ++result; + return result; +} + +void emit_octets(vector<word>& line, vector<word>::iterator pos, uint32_t val, const string& orig) { + vector<word> new_data; + for (int i = 0; i < /*num bytes*/4; ++i) { + word tmp; + tmp.data = serialize_hex(val & 0xff); // little-endian + new_data.push_back(tmp); + val = val >> 8; + } + trace("translate") << "converting '" << orig << "' to '" << to_string(new_data) << "'" << end(); + line.insert(pos, new_data.begin(), new_data.end()); +} + +string to_string(const vector<word>& in) { ostringstream out; - char c; - while (in >> c) { - if (c == delim) { - // drop the delim - break; - } - out << c; + for (int i = 0; i < SIZE(in); ++i) { + if (i > 0) out << ' '; + out << HEXBYTE << in.at(i).data; } return out.str(); } -string next_word(istream& in) { - skip_whitespace_and_comments(in); - string result; +uint32_t parse_decimal(const string& s) { + istringstream in(s); + uint32_t result = 0; in >> result; + if (!in) { + raise << "not a number: " << s << '\n' << end(); + return 0; + } return result; } -void skip_whitespace_and_comments(istream& in) { - while (true) { - char c = in.peek(); - if (isspace(c)) { in.get(); continue; } - else if (c == '#') skip_comment(in); - else return; +string serialize_hex(const int val) { + ostringstream out; + out << std::hex << val; + return out.str(); +} + +void flag_immediate(const segment& s) { + for (int i = 0; i < SIZE(s.lines); ++i) + for (int j = 0; j < SIZE(s.lines.at(i).words); ++j) + if (contains_immediate_metadata(s.lines.at(i).words.at(j))) + raise << "/imm8 and /imm32 only permitted in code segments, and we currently only allow the very first segment to be code.\n" << end(); +} + +bool contains_immediate_metadata(const word& curr) { + for (int k = 0; k < SIZE(curr.metadata); ++k) { + if (curr.metadata.at(k) == "imm8" + || curr.metadata.at(k) == "imm32") + return true; } + return false; } -void skip_comment(istream& in) { - assert(in.peek() == '#'); - char c = '\0'; - do { - in >> c; - } while (c != '\n'); +bool contains_imm8_metadata(const word& curr) { + for (int k = 0; k < SIZE(curr.metadata); ++k) { + if (curr.metadata.at(k) == "imm8") + return true; + } + return false; } // helper -void transform(string/*copy*/ in) { - perform_all_transforms(in); +void transform(const string& text_bytes) { + program p; + istringstream in(text_bytes); + parse(in, p); + if (trace_contains_errors()) return; + transform(p); } diff --git a/subx/ex1.1.subx b/subx/ex1.1.subx index 112fb86f..6b601926 100644 --- a/subx/ex1.1.subx +++ b/subx/ex1.1.subx @@ -4,6 +4,7 @@ # $ subx translate ex1.1.subx ex1 # $ subx run ex1 +== 0x08048054 # code segment, after leaving room for ELF header # opcode ModR/M SIB displacement immediate # instruction mod, reg, Reg/Mem bits scale, index, base # 1-3 bytes 0/1 byte 0/1 byte 0/1/2/4 bytes 0/1/2/4 bytes diff --git a/subx/ex1.2.subx b/subx/ex1.2.subx index cc8b55da..03b7ea9c 100644 --- a/subx/ex1.2.subx +++ b/subx/ex1.2.subx @@ -4,6 +4,7 @@ # $ subx translate ex1.2.subx ex1 # $ subx run ex1 +== 0x08048054 # code segment, after leaving room for ELF header # opcode ModR/M SIB displacement immediate # instruction mod, reg, Reg/Mem bits scale, index, base # 1-3 bytes 0/1 byte 0/1 byte 0/1/2/4 bytes 0/1/2/4 bytes diff --git a/subx/ex2.subx b/subx/ex2.subx index 39829cef..aa1bcbf4 100644 --- a/subx/ex2.subx +++ b/subx/ex2.subx @@ -1,5 +1,6 @@ ## add 1 and 1 +== 0x08048054 # code segment, after leaving room for ELF header # opcode ModR/M SIB displacement immediate # instruction mod, reg, Reg/Mem bits scale, index, base # 1-3 bytes 0/1 byte 0/1 byte 0/1/2/4 bytes 0/1/2/4 bytes diff --git a/subx/ex3.subx b/subx/ex3.subx index 06249ebf..39bcf3e2 100644 --- a/subx/ex3.subx +++ b/subx/ex3.subx @@ -1,5 +1,6 @@ ## add the first 10 numbers +== 0x08048054 # code segment, after leaving room for ELF header # opcode ModR/M SIB displacement immediate # instruction mod, reg, Reg/Mem bits scale, index, base # 1-3 bytes 0/1 byte 0/1 byte 0/1/2/4 bytes 0/1/2/4 bytes diff --git a/subx/ex4.subx b/subx/ex4.subx index 303cba1e..378b1a35 100644 --- a/subx/ex4.subx +++ b/subx/ex4.subx @@ -1,5 +1,6 @@ ## read a character from stdin +== 0x08048054 # code segment, after leaving room for ELF header # opcode ModR/M SIB displacement immediate # instruction mod, reg, Reg/Mem bits scale, index, base # 1-3 bytes 0/1 byte 0/1 byte 0/1/2/4 bytes 0/1/2/4 bytes |