diff options
author | Kartik Agaram <vc@akkartik.com> | 2018-09-01 23:03:50 -0700 |
---|---|---|
committer | Kartik Agaram <vc@akkartik.com> | 2018-09-01 23:03:50 -0700 |
commit | a6517ed821ff39fa56eb3b975799d22497d78a48 (patch) | |
tree | b934f09748be6c9dce80d4caf926d0f8dd2c9916 /subx | |
parent | f39c01128dfc6e46828cbab992d4b9e0ba1cb1e5 (diff) | |
download | mu-a6517ed821ff39fa56eb3b975799d22497d78a48.tar.gz |
4535 - support for global variable names
Diffstat (limited to 'subx')
-rw-r--r-- | subx/034compute_segment_address.cc | 28 | ||||
-rw-r--r-- | subx/035labels.cc | 50 | ||||
-rw-r--r-- | subx/036global_variables.cc | 111 | ||||
-rw-r--r-- | subx/examples/ex4.subx | 7 | ||||
-rw-r--r-- | subx/examples/ex6.subx | 12 | ||||
-rwxr-xr-x | subx/examples/ex7 | bin | 313 -> 313 bytes | |||
-rw-r--r-- | subx/examples/ex7.subx | 72 |
7 files changed, 203 insertions, 77 deletions
diff --git a/subx/034compute_segment_address.cc b/subx/034compute_segment_address.cc index ee2b1bb4..4a661742 100644 --- a/subx/034compute_segment_address.cc +++ b/subx/034compute_segment_address.cc @@ -2,6 +2,7 @@ //: segment. //: This gives up a measure of control in placing code and data. +//: segment address computation requires setting Mem_offset in test mode to what it'll be in run mode :(scenario segment_name) % Mem_offset = CODE_START; == code @@ -20,12 +21,35 @@ Transform.push_back(compute_segment_starts); :(code) void compute_segment_starts(program& p) { + trace(99, "transform") << "-- compute segment addresses" << end(); uint32_t p_offset = /*size of ehdr*/0x34 + SIZE(p.segments)*0x20/*size of each phdr*/; for (size_t i = 0; i < p.segments.size(); ++i) { segment& curr = p.segments.at(i); - if (curr.start == 0) + if (curr.start == 0) { curr.start = CODE_START + i*SEGMENT_SIZE + p_offset; - p_offset += num_words(curr); + trace(99, "transform") << "segment " << i << " begins at address " << curr.start << end(); + } + p_offset += size_of(curr); assert(p_offset < SEGMENT_SIZE); // for now we get less and less available space in each successive segment } } + +uint32_t size_of(const segment& s) { + uint32_t sum = 0; + for (int i = 0; i < SIZE(s.lines); ++i) + sum += num_bytes(s.lines.at(i)); + return sum; +} + +// Assumes all bitfields are packed. +uint32_t num_bytes(const line& inst) { + uint32_t sum = 0; + for (int i = 0; i < SIZE(inst.words); ++i) { + const word& curr = inst.words.at(i); + if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32")) // only multi-byte operands + sum += 4; + else + sum++; + } + return sum; +} diff --git a/subx/035labels.cc b/subx/035labels.cc index 659e4391..8c3ce1dd 100644 --- a/subx/035labels.cc +++ b/subx/035labels.cc @@ -1,5 +1,5 @@ //: Labels are defined by ending names with a ':'. This layer will compute -//: addresses for labels, and compute the offset for instructions using them. +//: displacements for labels, and compute the offset for instructions using them. //: //: We won't check this, but our convention will be that jump targets will //: start with a '$', while functions will not. Function names will never be @@ -52,16 +52,15 @@ void rewrite_labels(program& p) { trace(99, "transform") << "-- rewrite labels" << end(); if (p.segments.empty()) return; segment& code = p.segments.at(0); - // Rewrite Labels(segment code) - map<string, int32_t> address; // values are unsigned, but we're going to do subtractions on them so they need to fit in 31 bits - compute_addresses_for_labels(code, address); + map<string, int32_t> byte_index; // values are unsigned, but we're going to do subtractions on them so they need to fit in 31 bits + compute_byte_indices_for_labels(code, byte_index); if (trace_contains_errors()) return; drop_labels(code); if (trace_contains_errors()) return; - replace_labels_with_addresses(code, address); + replace_labels_with_displacements(code, byte_index); } -void compute_addresses_for_labels(const segment& code, map<string, int32_t>& address) { +void compute_byte_indices_for_labels(const segment& code, map<string, int32_t>& byte_index) { int current_byte = 0; for (int i = 0; i < SIZE(code.lines); ++i) { const line& inst = code.lines.at(i); @@ -90,7 +89,7 @@ void compute_addresses_for_labels(const segment& code, map<string, int32_t>& add raise << "'" << to_string(inst) << "': label definition (':') not allowed in operand\n" << end(); if (j > 0) raise << "'" << to_string(inst) << "': labels can only be the first word in a line.\n" << end(); - put(address, label, current_byte); + put(byte_index, label, current_byte); trace(99, "transform") << "label '" << label << "' is at address " << (current_byte+code.start) << end(); // no modifying current_byte; label definitions won't be in the final binary } @@ -110,30 +109,30 @@ bool is_label(const word& w) { return *w.data.rbegin() == ':'; } -void replace_labels_with_addresses(segment& code, const map<string, int32_t>& address) { - int32_t byte_next_instruction_starts_at = 0; +void replace_labels_with_displacements(segment& code, const map<string, int32_t>& byte_index) { + int32_t byte_index_next_instruction_starts_at = 0; for (int i = 0; i < SIZE(code.lines); ++i) { line& inst = code.lines.at(i); - byte_next_instruction_starts_at += num_bytes(inst); + byte_index_next_instruction_starts_at += num_bytes(inst); line new_inst; for (int j = 0; j < SIZE(inst.words); ++j) { const word& curr = inst.words.at(j); - if (contains_key(address, curr.data)) { - int32_t offset = static_cast<int32_t>(get(address, curr.data)) - byte_next_instruction_starts_at; + if (contains_key(byte_index, curr.data)) { + int32_t displacement = static_cast<int32_t>(get(byte_index, curr.data)) - byte_index_next_instruction_starts_at; if (has_metadata(curr, "disp8") || has_metadata(curr, "imm8")) { - if (offset > 0xff || offset < -0x7f) - raise << "'" << to_string(inst) << "': label too far away for distance " << std::hex << offset << " to fit in 8 bits\n" << end(); + if (displacement > 0xff || displacement < -0x7f) + raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 8 bits\n" << end(); else - emit_hex_bytes(new_inst, offset, 1); + emit_hex_bytes(new_inst, displacement, 1); } else if (has_metadata(curr, "disp16")) { - if (offset > 0xffff || offset < -0x7fff) - raise << "'" << to_string(inst) << "': label too far away for distance " << std::hex << offset << " to fit in 16 bits\n" << end(); + if (displacement > 0xffff || displacement < -0x7fff) + raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 16 bits\n" << end(); else - emit_hex_bytes(new_inst, offset, 2); + emit_hex_bytes(new_inst, displacement, 2); } else if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32")) { - emit_hex_bytes(new_inst, offset, 4); + emit_hex_bytes(new_inst, displacement, 4); } } else { @@ -145,19 +144,6 @@ void replace_labels_with_addresses(segment& code, const map<string, int32_t>& ad } } -// Assumes all bitfields are packed. -uint32_t num_bytes(const line& inst) { - uint32_t sum = 0; - for (int i = 0; i < SIZE(inst.words); ++i) { - const word& curr = inst.words.at(i); - if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32")) // only multi-byte operands - sum += 4; - else - sum++; - } - return sum; -} - string data_to_string(const line& inst) { ostringstream out; for (int i = 0; i < SIZE(inst.words); ++i) { diff --git a/subx/036global_variables.cc b/subx/036global_variables.cc new file mode 100644 index 00000000..7a6463e0 --- /dev/null +++ b/subx/036global_variables.cc @@ -0,0 +1,111 @@ +//: Global variables. +//: +//: Global variables are just labels in the data segment. +//: However, they can only be used in imm32 and not disp32 operands. And they +//: can't be used with jump and call instructions. +//: +//: This layer much the same structure as rewriting labels. + +:(scenario global_variable) +% Mem_offset = CODE_START; +% Mem.resize(0x2000); +== code +b9/copy x/imm32 # copy to ECX +== data +x: +00 00 00 00 ++transform: global variable 'x' is at address 0x08049079 + +:(before "End Level-2 Transforms") +Transform.push_back(rewrite_global_variables); +:(code) +void rewrite_global_variables(program& p) { + trace(99, "transform") << "-- rewrite global variables" << end(); + map<string, uint32_t> address; + compute_addresses_for_global_variables(p, address); + if (trace_contains_errors()) return; + drop_global_variables(p); + replace_global_variables_with_addresses(p, address); +} + +void compute_addresses_for_global_variables(const program& p, map<string, uint32_t>& address) { + for (int i = /*skip code segment*/1; i < SIZE(p.segments); ++i) + compute_addresses_for_global_variables(p.segments.at(i), address); +} + +void compute_addresses_for_global_variables(const segment& s, map<string, uint32_t>& address) { + int current_address = s.start; + for (int i = 0; i < SIZE(s.lines); ++i) { + const line& inst = s.lines.at(i); + for (int j = 0; j < SIZE(inst.words); ++j) { + const word& curr = inst.words.at(j); + if (*curr.data.rbegin() != ':') { + ++current_address; + } + else { + string variable = drop_last(curr.data); + // ensure variables look sufficiently different from raw hex + check_valid_name(variable); + if (trace_contains_errors()) return; + if (j > 0) + raise << "'" << to_string(inst) << "': global variable names can only be the first word in a line.\n" << end(); + put(address, variable, current_address); + trace(99, "transform") << "global variable '" << variable << "' is at address 0x" << HEXWORD << current_address << end(); + // no modifying current_address; global variable definitions won't be in the final binary + } + } + } +} + +void drop_global_variables(program& p) { + for (int i = /*skip code segment*/1; i < SIZE(p.segments); ++i) + drop_labels(p.segments.at(i)); +} + +void replace_global_variables_with_addresses(program& p, const map<string, uint32_t>& address) { + if (p.segments.empty()) return; + segment& code = p.segments.at(0); + for (int i = 0; i < SIZE(code.lines); ++i) { + line& inst = code.lines.at(i); + line new_inst; + for (int j = 0; j < SIZE(inst.words); ++j) { + const word& curr = inst.words.at(j); + if (contains_key(address, curr.data)) { + uint32_t value = get(address, curr.data); + if (!has_metadata(curr, "imm32")) + raise << "'" << to_string(inst) << "': data variables should always be in '/imm32' operands\n" << end(); + emit_hex_bytes(new_inst, value, 4); + } + else { + new_inst.words.push_back(curr); + } + } + inst.words.swap(new_inst.words); + trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end(); + } +} + +:(scenario global_variable_disallowed_in_jump) +% Mem_offset = CODE_START; +% Hide_errors = true; +== code +eb/jump x/disp8 +== data +x: +00 00 00 00 ++error: 'eb/jump x/disp8': data variables should always be in '/imm32' operands +# sub-optimal error message; should be +#? +error: can't jump to data (variable 'x') + +:(scenario global_variable_disallowed_in_call) +% Mem_offset = CODE_START; +% Hide_errors = true; +== code +e8/call x/disp32 +== data +x: +00 00 00 00 ++error: 'e8/call x/disp32': data variables should always be in '/imm32' operands +# sub-optimal error message; should be +#? +error: can't call a data variable ('x') +# also, what about function pointers? diff --git a/subx/examples/ex4.subx b/subx/examples/ex4.subx index 2f5b0e73..89b3bdc9 100644 --- a/subx/examples/ex4.subx +++ b/subx/examples/ex4.subx @@ -13,7 +13,7 @@ # fd = 0 (stdin) bb/copy 0/imm32 # copy 0 to EBX # initialize x (location to write result to) - b9/copy 0x080490a7/imm32 # copy to ECX + b9/copy x/imm32 # copy to ECX # size = 1 character ba/copy 1/imm32 # copy 1 to EDX # read(fd, x, size) @@ -24,7 +24,7 @@ # fd = 1 (stdout) bb/copy 1/imm32 # copy 1 to EBX # initialize x (location to read from) - b9/copy 0x080490a7/imm32 # copy to ECX + b9/copy x/imm32 # copy to ECX # size = 1 character ba/copy 1/imm32 # copy 1 to EDX # write(fd, x, size) @@ -35,7 +35,8 @@ b8/copy 1/imm32 # copy 1 to EAX cd/syscall 0x80/imm8 # int 80h -== 0x080490a7 # data segment +== data +x: 00 00 00 00 # space for read() to write to # vim:ft=subx:nowrap diff --git a/subx/examples/ex6.subx b/subx/examples/ex6.subx index 3d05f00a..48dbb7a3 100644 --- a/subx/examples/ex6.subx +++ b/subx/examples/ex6.subx @@ -14,9 +14,9 @@ # fd = 1 (stdout) bb/copy 1/imm32 # copy 1 to EBX # initialize x (location to write result to) - b9/copy 0x08049097/imm32 # copy to ECX + b9/copy x/imm32 # copy to ECX # initialize size - ba/copy 0x08049093/imm32 # copy to EDX + ba/copy size/imm32 # copy to EDX 8b/copy 0/mod/indirect 2/rm32/EDX 2/r32/EDX # copy *EDX to EDX # write(fd, x, size) b8/copy 4/imm32 # copy 4 to EAX @@ -26,10 +26,10 @@ b8/copy 1/imm32 # copy 1 to EAX cd/syscall 0x80/imm8 # int 80h -== 0x08049093 # data segment -# size of string -0e 00 00 00 -# string +== data +size: # size of string + 0e 00 00 00 # 14 +x: # string to print 48 65 6c 6c 6f 2c 20 77 6f 72 6c 64 21 0a 00 # h e l l o , ␣ w o r l d ! newline null diff --git a/subx/examples/ex7 b/subx/examples/ex7 index d756271e..39f6f98d 100755 --- a/subx/examples/ex7 +++ b/subx/examples/ex7 Binary files differdiff --git a/subx/examples/ex7.subx b/subx/examples/ex7.subx index 17a38e29..6e0294d9 100644 --- a/subx/examples/ex7.subx +++ b/subx/examples/ex7.subx @@ -16,84 +16,88 @@ # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes ## creat(filename) - bb/copy . . . . . . . 0x08049131/imm32/fname # copy to EBX - b9/copy . . . . . . . 0x180/imm32/fixed-perms # copy 0 to ECX + bb/copy . . . . . . . filename/imm32 # copy to EBX + b9/copy . . . . . . . 0x180/imm32/fixed-perms # copy to ECX b8/copy . . . . . . . 8/imm32/creat # copy 8 to EAX cd/syscall . . . . . . . 0x80/imm8 # int 80h - ## fd = open(filename, O_WRONLY, 0) - bb/copy . . . . . . . 0x08049131/imm32/fname # copy to EBX + ## stream = open(filename, O_WRONLY, 0) # we can't use 'fd' because it looks like a hex byte + bb/copy . . . . . . . filename/imm32 # copy to EBX b9/copy . . . . . . . 1/imm32/wronly # copy 1 to ECX ba/copy . . . . . . . 0x180/imm32/fixed-perms # copy 0 to EDX b8/copy . . . . . . . 5/imm32/open # copy 5 to EAX cd/syscall . . . . . . . 0x80/imm8 # int 80h - # save fd - bb/copy . . . . . . . 0x08049125/imm32/fd # copy to EBX + # save stream + bb/copy . . . . . . . stream/imm32 # copy to EBX 89/copy 0/mod/indirect 3/rm32/EBX 0/r32/EAX # copy EAX to *EBX - ## write(fd, "a", 1) - # load fd - bb/copy . . . . . . . 0x08049125/imm32/fd # copy to EBX + ## write(stream, "a", 1) + # load stream + bb/copy . . . . . . . stream/imm32 # copy to EBX 8b/copy 0/mod/indirect 3/rm32/EBX 3/r32/EBX # copy *EBX to EBX # - b9/copy . . . . . . . 0x08049129/imm32/a # copy to ECX + b9/copy . . . . . . . a/imm32 # copy to ECX ba/copy . . . . . . . 1/imm32/size # copy 1 to EDX b8/copy . . . . . . . 4/imm32/write # copy 4 to EAX cd/syscall . . . . . . . 0x80/imm8 # int 80h - ## close(fd) - # load fd - bb/copy . . . . . . . 0x08049125/imm32/fd # copy to EBX + ## close(stream) + # load stream + bb/copy . . . . . . . stream/imm32 # copy to EBX 8b/copy 0/mod/indirect 3/rm32/EBX 3/r32/EBX # copy *EBX to EBX # b8/copy . . . . . . . 6/imm32/close # copy 6 to EAX cd/syscall . . . . . . . 0x80/imm8 # int 80h - ## fd = open(filename, O_RDONLY, 0) - bb/copy . . . . . . . 0x08049131/imm32/fname # copy to EBX + ## stream = open(filename, O_RDONLY, 0) + bb/copy . . . . . . . filename/imm32 # copy to EBX b9/copy . . . . . . . 0/imm32/rdonly # copy 0 to ECX - ba/copy . . . . . . . 0x180/imm32/fixed-perms # copy 0 to EDX + ba/copy . . . . . . . 0x180/imm32/fixed-perms # copy to EDX b8/copy . . . . . . . 5/imm32/open # copy 5 to EAX cd/syscall . . . . . . . 0x80/imm8 # int 80h - # save fd - bb/copy . . . . . . . 0x08049125/imm32/fd # copy to EBX + # save stream + bb/copy . . . . . . . stream/imm32 # copy to EBX 89/copy 0/mod/indirect 3/rm32/EBX 0/r32/EAX # copy EAX to *EBX - ## read(fd, b, 1) - # load fd - bb/copy . . . . . . . 0x08049125/imm32/fd # copy to EBX + ## read(stream, b, 1) + # load stream + bb/copy . . . . . . . stream/imm32 # copy to EBX 8b/copy 0/mod/indirect 3/rm32/EBX 3/r32/EBX # copy *EBX to EBX # - b9/copy . . . . . . . 0x0804912d/imm32/b # copy to ECX + b9/copy . . . . . . . b/imm32 # copy to ECX ba/copy . . . . . . . 1/imm32/size # copy 1 to EDX b8/copy . . . . . . . 3/imm32/read # copy 3 to EAX cd/syscall . . . . . . . 0x80/imm8 # int 80h - ## close(fd) - # load fd - bb/copy . . . . . . . 0x08049125/imm32/fd # copy to EBX + ## close(stream) + # load stream + bb/copy . . . . . . . stream/imm32 # copy to EBX 8b/copy 0/mod/indirect 3/rm32/EBX 3/r32/EBX # copy *EBX to EBX # - b8/copy . . . . . . . 6/imm32/close # copy 8 to EAX + b8/copy . . . . . . . 6/imm32/close # copy 6 to EAX cd/syscall . . . . . . . 0x80/imm8 # int 80h ## unlink(filename) - bb/copy . . . . . . . 0x08049131/imm32/fname # copy to EBX - b8/copy . . . . . . . 0xa/imm32/unlink # copy 8 to EAX + bb/copy . . . . . . . filename/imm32 # copy to EBX + b8/copy . . . . . . . 0xa/imm32/unlink # copy 10 to EAX cd/syscall . . . . . . . 0x80/imm8 # int 80h ## exit(b) # load b - bb/copy . . . . . . . 0x0804912d/imm32/b # copy to EBX + bb/copy . . . . . . . b/imm32 # copy to EBX 8b/copy 0/mod/indirect 3/rm32/EBX 3/r32/EBX # copy *EBX to EBX # b8/copy . . . . . . . 1/imm32/exit # copy 1 to EAX cd/syscall . . . . . . . 0x80/imm8 # int 80h -== 0x08049125 # data segment -00 00 00 00 # fd -61 00 00 00 # a: string to write to file: 'a' -00 00 00 00 # b: space for string read from file -2e 66 6f 6f 00 00 00 00 # filename: '.foo' +== data +stream: +00 00 00 00 +a: +61 00 00 00 +b: +00 00 00 00 +filename: +2e 66 6f 6f 00 00 00 00 # vim:ft=subx:nowrap:tw& |