diff options
Diffstat (limited to 'subx')
-rw-r--r-- | subx/023check_operand_bounds.cc | 30 | ||||
-rw-r--r-- | subx/024pack_operands.cc | 17 | ||||
-rw-r--r-- | subx/026labels.cc | 77 | ||||
-rw-r--r-- | subx/ex3.subx | 9 |
4 files changed, 102 insertions, 31 deletions
diff --git a/subx/023check_operand_bounds.cc b/subx/023check_operand_bounds.cc index c868603a..cca4ab24 100644 --- a/subx/023check_operand_bounds.cc +++ b/subx/023check_operand_bounds.cc @@ -39,21 +39,29 @@ void check_operand_bounds(/*const*/ program& p) { void check_operand_bounds(const word& w) { for (map<string, uint32_t>::iterator p = Operand_bound.begin(); p != Operand_bound.end(); ++p) { - if (has_metadata(w, p->first)) { - int32_t x = parse_int(w.data); - if (x >= 0) { - if (static_cast<uint32_t>(x) >= p->second) - raise << "'" << w.original << "' too large to fit in bitfield " << p->first << '\n' << end(); - } - else { - // hacky? assuming bound is a power of 2 - if (x < -1*static_cast<int32_t>(p->second/2)) - raise << "'" << w.original << "' too large to fit in bitfield " << p->first << '\n' << end(); - } + if (!has_metadata(w, p->first)) continue; + if (!is_hex_int(w.data)) continue; // later transforms are on their own to do their own bounds checking + int32_t x = parse_int(w.data); + if (x >= 0) { + if (static_cast<uint32_t>(x) >= p->second) + raise << "'" << w.original << "' too large to fit in bitfield " << p->first << '\n' << end(); + } + else { + // hacky? assuming bound is a power of 2 + if (x < -1*static_cast<int32_t>(p->second/2)) + raise << "'" << w.original << "' too large to fit in bitfield " << p->first << '\n' << end(); } } } +bool is_hex_int(const string& s) { + if (s.empty()) return false; + size_t pos = 0; + if (s.at(0) == '-' || s.at(0) == '+') pos++; + if (s.substr(pos, pos+2) == "0x") pos += 2; + return s.find_first_not_of("0123456789abcdefABCDEF", pos) == string::npos; +} + int32_t parse_int(const string& s) { istringstream in(s); int32_t result = 0; diff --git a/subx/024pack_operands.cc b/subx/024pack_operands.cc index aa4fec7d..23e91c05 100644 --- a/subx/024pack_operands.cc +++ b/subx/024pack_operands.cc @@ -129,6 +129,8 @@ void add_disp_bytes(const line& in, line& out) { const word& curr = in.words.at(i); if (has_metadata(curr, "disp8")) emit_hex_bytes(out, curr, 1); + if (has_metadata(curr, "disp16")) + emit_hex_bytes(out, curr, 2); else if (has_metadata(curr, "disp32")) emit_hex_bytes(out, curr, 4); } @@ -150,26 +152,23 @@ void emit_hex_bytes(line& out, const word& w, int num) { out.words.push_back(w); return; } - uint32_t val = static_cast<uint32_t>(parse_int(w.data)); + emit_hex_bytes(out, static_cast<uint32_t>(parse_int(w.data)), num); +} + +void emit_hex_bytes(line& out, uint32_t val, int num) { + assert(num <= 4); for (int i = 0; i < num; ++i) { out.words.push_back(hex_byte_text(val & 0xff)); val = val >> 8; } } -bool is_hex_int(const string& s) { - if (s.empty()) return false; - size_t pos = 0; - if (s.at(0) == '-' || s.at(0) == '+') pos++; - if (s.substr(pos, pos+2) == "0x") pos += 2; - return s.find_first_not_of("0123456789abcdefABCDEF", pos) == string::npos; -} - word hex_byte_text(uint8_t val) { ostringstream out; out << HEXBYTE << NUM(val); word result; result.data = out.str(); + result.original = out.str()+"/auto"; return result; } diff --git a/subx/026labels.cc b/subx/026labels.cc index cc11eb0b..db6091d6 100644 --- a/subx/026labels.cc +++ b/subx/026labels.cc @@ -19,7 +19,7 @@ void rewrite_labels(program& p) { trace(99, "transform") << "-- rewrite labels" << end(); if (p.segments.empty()) return; segment& code = p.segments.at(0); - map<string, uint32_t> address; + map<string, int32_t> address; // values are unsigned, but we're going to do subtractions on them so they need to fit in 31 bits compute_addresses_for_labels(code, address); if (trace_contains_errors()) return; drop_labels(code); @@ -27,7 +27,7 @@ void rewrite_labels(program& p) { replace_labels_with_addresses(code, address); } -void compute_addresses_for_labels(const segment& code, map<string, uint32_t> address) { +void compute_addresses_for_labels(const segment& code, map<string, int32_t>& address) { int current_byte = 0; for (int i = 0; i < SIZE(code.lines); ++i) { const line& inst = code.lines.at(i); @@ -64,7 +64,8 @@ void compute_addresses_for_labels(const segment& code, map<string, uint32_t> add void drop_labels(segment& code) { for (int i = 0; i < SIZE(code.lines); ++i) { line& inst = code.lines.at(i); - remove_if(inst.words.begin(), inst.words.end(), is_label); + vector<word>::iterator new_end = remove_if(inst.words.begin(), inst.words.end(), is_label); + inst.words.erase(new_end, inst.words.end()); } } @@ -72,7 +73,61 @@ bool is_label(const word& w) { return *w.data.rbegin() == ':'; } -void replace_labels_with_addresses(const segment& code, map<string, uint32_t> address) { +void replace_labels_with_addresses(segment& code, const map<string, int32_t>& address) { + int32_t byte_next_instruction_starts_at = 0; + for (int i = 0; i < SIZE(code.lines); ++i) { + line& inst = code.lines.at(i); + byte_next_instruction_starts_at += num_bytes(inst); + line new_inst; + for (int j = 0; j < SIZE(inst.words); ++j) { + const word& curr = inst.words.at(j); + if (contains_key(address, curr.data)) { + int32_t offset = static_cast<int32_t>(get(address, curr.data)) - byte_next_instruction_starts_at; + if (has_metadata(curr, "disp8") || has_metadata(curr, "imm8")) { + if (offset > 0xff || offset < -0x7f) + raise << "'" << to_string(inst) << "': label too far away for distance " << std::hex << offset << " to fit in 8 bits\n" << end(); + else + emit_hex_bytes(new_inst, offset, 1); + } + else if (has_metadata(curr, "disp16")) { + if (offset > 0xffff || offset < -0x7fff) + raise << "'" << to_string(inst) << "': label too far away for distance " << std::hex << offset << " to fit in 16 bits\n" << end(); + else + emit_hex_bytes(new_inst, offset, 2); + } + else if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32")) { + emit_hex_bytes(new_inst, offset, 4); + } + } + else { + new_inst.words.push_back(curr); + } + } + inst.words.swap(new_inst.words); + trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end(); + } +} + +// Assumes all bitfields are packed. +uint32_t num_bytes(const line& inst) { + uint32_t sum = 0; + for (int i = 0; i < SIZE(inst.words); ++i) { + const word& curr = inst.words.at(i); + if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32")) // only multi-byte operands + sum += 4; + else + sum++; + } + return sum; +} + +string data_to_string(const line& inst) { + ostringstream out; + for (int i = 0; i < SIZE(inst.words); ++i) { + if (i > 0) out << ' '; + out << inst.words.at(i).data; + } + return out.str(); } //: Label definitions must be the first word on a line. No jumping inside @@ -86,11 +141,21 @@ void replace_labels_with_addresses(const segment& code, map<string, uint32_t> ad # instruction effective address operand displacement immediate # op subop mod rm32 base index scale r32 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes +# address 1 loop: loop2: +# address 1 (labels take up no space) 05 0x0d0c0b0a/imm32 # add to EAX +# address 6 + eb loop2/disp8 +# address 8 + eb loop3/disp8 +# address 10 loop3: - f +transform: label 'loop' is at address 1 +transform: label 'loop2' is at address 1 -+transform: label 'loop3' is at address 6 ++transform: label 'loop3' is at address 10 +# first jump is to -7 ++transform: instruction after transform: 'eb f9' +# second jump is to 0 (fall through) ++transform: instruction after transform: 'eb 00' diff --git a/subx/ex3.subx b/subx/ex3.subx index 4dcb10e9..5b3fdb16 100644 --- a/subx/ex3.subx +++ b/subx/ex3.subx @@ -12,23 +12,22 @@ # op subop mod rm32 base index scale r32 # 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes # result: EBX = 0 -# 0: e_entry = 0x08048054 bb 0/imm32 # copy 0 to EBX # counter: ECX = 1 b9 1/imm32 # copy 1 to ECX -# 10: loop: 0x0804805e +loop: # while (ECX <= 10) 81 7/subop/compare 3/mod/direct 1/rm32/ecx 0xa/imm32 # compare ECX, 10/imm - 7f 0xa/disp8 # jump-if-greater exit (+10 bytes) + 7f exit/disp8 # jump-if-greater # EBX += ECX 01 3/mod/direct 3/rm32/ebx 1/r32/ecx # add ECX to EBX # ECX++ 81 0/subop/add 3/mod/direct 1/rm32/ecx 1/imm32 # add 1 to ECX # loop - eb -0x12/disp8 # jump loop (-18 bytes) + eb loop/disp8 # jump -# 28: exit: 0x08048070 +exit: # exit(EBX) b8 1/imm32 # copy 1 to EAX cd 0x80/imm8 # int 80h |