diff options
author | Kartik Agaram <vc@akkartik.com> | 2018-09-12 21:21:31 -0700 |
---|---|---|
committer | Kartik Agaram <vc@akkartik.com> | 2018-09-12 21:27:04 -0700 |
commit | f75c7021d5d406470f04b1ef96e1ec910ee736b8 (patch) | |
tree | 1e3bc48c2585c7992e0122f43fec7ea92bdffa88 | |
parent | cac416f42312001fa46b6ebebb94010f57793b9c (diff) | |
download | mu-f75c7021d5d406470f04b1ef96e1ec910ee736b8.tar.gz |
4544
Attempt #3 at fixing CI. In the process the feature gets a lot less half-baked. Ridiculously misleading that we had `has_metadata()` was special-cased to one specific transform. I suck.
-rw-r--r-- | subx/030---operands.cc | 32 | ||||
-rw-r--r-- | subx/031check_operands.cc | 24 | ||||
-rw-r--r-- | subx/032check_operand_bounds.cc | 2 | ||||
-rw-r--r-- | subx/034compute_segment_address.cc | 2 | ||||
-rw-r--r-- | subx/035labels.cc | 8 | ||||
-rw-r--r-- | subx/036global_variables.cc | 83 |
6 files changed, 96 insertions, 55 deletions
diff --git a/subx/030---operands.cc b/subx/030---operands.cc index 6fa2354d..f6af2fa3 100644 --- a/subx/030---operands.cc +++ b/subx/030---operands.cc @@ -178,19 +178,19 @@ void add_modrm_byte(const line& in, line& out) { bool emit = false; for (int i = 0; i < SIZE(in.words); ++i) { const word& curr = in.words.at(i); - if (has_metadata(curr, "mod")) { + if (has_operand_metadata(curr, "mod")) { mod = hex_byte(curr.data); emit = true; } - else if (has_metadata(curr, "rm32")) { + else if (has_operand_metadata(curr, "rm32")) { rm32 = hex_byte(curr.data); emit = true; } - else if (has_metadata(curr, "r32")) { + else if (has_operand_metadata(curr, "r32")) { reg_subop = hex_byte(curr.data); emit = true; } - else if (has_metadata(curr, "subop")) { + else if (has_operand_metadata(curr, "subop")) { reg_subop = hex_byte(curr.data); emit = true; } @@ -204,15 +204,15 @@ void add_sib_byte(const line& in, line& out) { bool emit = false; for (int i = 0; i < SIZE(in.words); ++i) { const word& curr = in.words.at(i); - if (has_metadata(curr, "scale")) { + if (has_operand_metadata(curr, "scale")) { scale = hex_byte(curr.data); emit = true; } - else if (has_metadata(curr, "index")) { + else if (has_operand_metadata(curr, "index")) { index = hex_byte(curr.data); emit = true; } - else if (has_metadata(curr, "base")) { + else if (has_operand_metadata(curr, "base")) { base = hex_byte(curr.data); emit = true; } @@ -224,11 +224,11 @@ void add_sib_byte(const line& in, line& out) { void add_disp_bytes(const line& in, line& out) { for (int i = 0; i < SIZE(in.words); ++i) { const word& curr = in.words.at(i); - if (has_metadata(curr, "disp8")) + if (has_operand_metadata(curr, "disp8")) emit_hex_bytes(out, curr, 1); - if (has_metadata(curr, "disp16")) + if (has_operand_metadata(curr, "disp16")) emit_hex_bytes(out, curr, 2); - else if (has_metadata(curr, "disp32")) + else if (has_operand_metadata(curr, "disp32")) emit_hex_bytes(out, curr, 4); } } @@ -236,9 +236,9 @@ void add_disp_bytes(const line& in, line& out) { void add_imm_bytes(const line& in, line& out) { for (int i = 0; i < SIZE(in.words); ++i) { const word& curr = in.words.at(i); - if (has_metadata(curr, "imm8")) + if (has_operand_metadata(curr, "imm8")) emit_hex_bytes(out, curr, 1); - else if (has_metadata(curr, "imm32")) + else if (has_operand_metadata(curr, "imm32")) emit_hex_bytes(out, curr, 4); } } @@ -386,10 +386,10 @@ bool contains_any_operand_metadata(const word& word) { return false; } -bool has_metadata(const line& inst, const string& m) { +bool has_operand_metadata(const line& inst, const string& m) { bool result = false; for (int i = 0; i < SIZE(inst.words); ++i) { - if (!has_metadata(inst.words.at(i), m)) continue; + if (!has_operand_metadata(inst.words.at(i), m)) continue; if (result) { raise << "'" << to_string(inst) << "' has conflicting " << m << " operands\n" << end(); return false; @@ -399,7 +399,7 @@ bool has_metadata(const line& inst, const string& m) { return result; } -bool has_metadata(const word& w, const string& m) { +bool has_operand_metadata(const word& w, const string& m) { bool result = false; bool metadata_found = false; for (int i = 0; i < SIZE(w.metadata); ++i) { @@ -417,7 +417,7 @@ bool has_metadata(const word& w, const string& m) { word metadata(const line& inst, const string& m) { for (int i = 0; i < SIZE(inst.words); ++i) - if (has_metadata(inst.words.at(i), m)) + if (has_operand_metadata(inst.words.at(i), m)) return inst.words.at(i); assert(false); } diff --git a/subx/031check_operands.cc b/subx/031check_operands.cc index 934b1022..c56761ed 100644 --- a/subx/031check_operands.cc +++ b/subx/031check_operands.cc @@ -321,24 +321,24 @@ cd/software-interrupt 80/imm8/imm32 :(code) void check_operands_modrm(const line& inst, const word& op) { if (all_hex_bytes(inst)) return; // deliberately programming in raw hex; we'll raise a warning elsewhere - check_metadata_present(inst, "mod", op); - check_metadata_present(inst, "rm32", op); + check_operand_metadata_present(inst, "mod", op); + check_operand_metadata_present(inst, "rm32", op); // no check for r32; some instructions don't use it; just assume it's 0 if missing if (op.data == "81" || op.data == "8f" || op.data == "ff") { // keep sync'd with 'help subop' - check_metadata_present(inst, "subop", op); - check_metadata_absent(inst, "r32", op, "should be replaced by subop"); + check_operand_metadata_present(inst, "subop", op); + check_operand_metadata_absent(inst, "r32", op, "should be replaced by subop"); } if (trace_contains_errors()) return; if (metadata(inst, "rm32").data != "4") return; // SIB byte checks uint8_t mod = hex_byte(metadata(inst, "mod").data); if (mod != /*direct*/3) { - check_metadata_present(inst, "base", op); - check_metadata_present(inst, "index", op); // otherwise why go to SIB? + check_operand_metadata_present(inst, "base", op); + check_operand_metadata_present(inst, "index", op); // otherwise why go to SIB? } else { - check_metadata_absent(inst, "base", op, "direct mode"); - check_metadata_absent(inst, "index", op, "direct mode"); + check_operand_metadata_absent(inst, "base", op, "direct mode"); + check_operand_metadata_absent(inst, "index", op, "direct mode"); } // no check for scale; 0 (2**0 = 1) by default } @@ -366,13 +366,13 @@ void compare_bitvector_modrm(const line& inst, uint8_t expected, const word& op) // ignore settings in any unused bits } -void check_metadata_present(const line& inst, const string& type, const word& op) { - if (!has_metadata(inst, type)) +void check_operand_metadata_present(const line& inst, const string& type, const word& op) { + if (!has_operand_metadata(inst, type)) raise << "'" << to_string(inst) << "' (" << get(name, op.data) << "): missing " << type << " operand\n" << end(); } -void check_metadata_absent(const line& inst, const string& type, const word& op, const string& msg) { - if (has_metadata(inst, type)) +void check_operand_metadata_absent(const line& inst, const string& type, const word& op, const string& msg) { + if (has_operand_metadata(inst, type)) raise << "'" << to_string(inst) << "' (" << get(name, op.data) << "): unexpected " << type << " operand (" << msg << ")\n" << end(); } diff --git a/subx/032check_operand_bounds.cc b/subx/032check_operand_bounds.cc index 9a810453..69daeb46 100644 --- a/subx/032check_operand_bounds.cc +++ b/subx/032check_operand_bounds.cc @@ -38,7 +38,7 @@ void check_operand_bounds(const segment& code) { void check_operand_bounds(const word& w) { for (map<string, uint32_t>::iterator p = Operand_bound.begin(); p != Operand_bound.end(); ++p) { - if (!has_metadata(w, p->first)) continue; + if (!has_operand_metadata(w, p->first)) continue; if (!is_hex_int(w.data)) continue; // later transforms are on their own to do their own bounds checking int32_t x = parse_int(w.data); if (x >= 0) { diff --git a/subx/034compute_segment_address.cc b/subx/034compute_segment_address.cc index 4a661742..c4645fac 100644 --- a/subx/034compute_segment_address.cc +++ b/subx/034compute_segment_address.cc @@ -46,7 +46,7 @@ uint32_t num_bytes(const line& inst) { uint32_t sum = 0; for (int i = 0; i < SIZE(inst.words); ++i) { const word& curr = inst.words.at(i); - if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32")) // only multi-byte operands + if (has_operand_metadata(curr, "disp32") || has_operand_metadata(curr, "imm32")) // only multi-byte operands sum += 4; else sum++; diff --git a/subx/035labels.cc b/subx/035labels.cc index 8c3ce1dd..df54bf2f 100644 --- a/subx/035labels.cc +++ b/subx/035labels.cc @@ -71,7 +71,7 @@ void compute_byte_indices_for_labels(const segment& code, map<string, int32_t>& // Maybe we should just move this transform to before instruction // packing, and deduce the size of *all* operands. But then we'll also // have to deal with bitfields. - if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32")) { + if (has_operand_metadata(curr, "disp32") || has_operand_metadata(curr, "imm32")) { if (*curr.data.rbegin() == ':') raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end(); current_byte += 4; @@ -119,19 +119,19 @@ void replace_labels_with_displacements(segment& code, const map<string, int32_t> const word& curr = inst.words.at(j); if (contains_key(byte_index, curr.data)) { int32_t displacement = static_cast<int32_t>(get(byte_index, curr.data)) - byte_index_next_instruction_starts_at; - if (has_metadata(curr, "disp8") || has_metadata(curr, "imm8")) { + if (has_operand_metadata(curr, "disp8") || has_operand_metadata(curr, "imm8")) { if (displacement > 0xff || displacement < -0x7f) raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 8 bits\n" << end(); else emit_hex_bytes(new_inst, displacement, 1); } - else if (has_metadata(curr, "disp16")) { + else if (has_operand_metadata(curr, "disp16")) { if (displacement > 0xffff || displacement < -0x7fff) raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 16 bits\n" << end(); else emit_hex_bytes(new_inst, displacement, 2); } - else if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32")) { + else if (has_operand_metadata(curr, "disp32") || has_operand_metadata(curr, "imm32")) { emit_hex_bytes(new_inst, displacement, 4); } } diff --git a/subx/036global_variables.cc b/subx/036global_variables.cc index 2bdbf1f2..42790c0c 100644 --- a/subx/036global_variables.cc +++ b/subx/036global_variables.cc @@ -70,39 +70,60 @@ void replace_global_variables_with_addresses(program& p, const map<string, uint3 line new_inst; for (int j = 0; j < SIZE(inst.words); ++j) { const word& curr = inst.words.at(j); - if (contains_key(address, curr.data)) { - uint32_t value = get(address, curr.data); - if (!has_metadata(curr, "imm32") && !has_metadata(curr, "disp32")) - raise << "'" << to_string(inst) << "': data variables should be '/imm32' or '/disp32' operands\n" << end(); - emit_hex_bytes(new_inst, value, 4); - } - else { + if (!contains_key(address, curr.data)) { new_inst.words.push_back(curr); + continue; } + if (!valid_use_of_global_variable(curr)) { + raise << "'" << to_string(inst) << "': can't refer to global variable '" << curr.data << "'\n" << end(); + return; + } + emit_hex_bytes(new_inst, get(address, curr.data), 4); } inst.words.swap(new_inst.words); trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end(); } } -:(before "Pack Operands(segment code)") -check_disp32_data_variables(code); -if (trace_contains_errors()) return; +bool valid_use_of_global_variable(const word& curr) { + if (has_operand_metadata(curr, "imm32")) return true; + // End Valid Uses Of Global Variable(curr) + return false; +} + +//:: a more complex sanity check for how we use global variables +//: requires first saving some data early before we pack operands + +:(after "Begin Level-2 Transforms") +Transform.push_back(correlate_disp32_with_mod); :(code) -void check_disp32_data_variables(const segment& code) { +void correlate_disp32_with_mod(program& p) { + if (p.segments.empty()) return; + segment& code = p.segments.at(0); for (int i = 0; i < SIZE(code.lines); ++i) { - const line& inst = code.lines.at(i); + line& inst = code.lines.at(i); for (int j = 0; j < SIZE(inst.words); ++j) { - const word& curr = inst.words.at(j); - if (!has_metadata(curr, "disp32")) continue; - if (has_metadata(inst, "mod") && metadata(inst, "mod").data == "0" && metadata(inst, "rm32").data == "5") - continue; - else - raise << "'" << to_string(inst) << "': data variables can only be in '/disp32' operands if mod == 0 and rm32 == 5\n" << end(); + word& curr = inst.words.at(j); + if (has_operand_metadata(curr, "disp32") + && has_operand_metadata(inst, "mod")) + curr.metadata.push_back("has_mod"); } } } +:(before "End Valid Uses Of Global Variable(curr)") +if (has_operand_metadata(curr, "disp32")) + return has_metadata(curr, "has_mod"); +// todo: more sophisticated check, to ensure we don't use global variable +// addresses as a real displacement added to other operands. + +:(code) +bool has_metadata(const word& w, const string& m) { + for (int i = 0; i < SIZE(w.metadata); ++i) + if (w.metadata.at(i) == m) return true; + return false; +} + :(scenario global_variable_disallowed_in_jump) % Hide_errors = true; == code @@ -110,7 +131,7 @@ eb/jump x/disp8 == data x: 00 00 00 00 -+error: 'eb/jump x/disp8': data variables should be '/imm32' or '/disp32' operands ++error: 'eb/jump x/disp8': can't refer to global variable 'x' # sub-optimal error message; should be #? +error: can't jump to data (variable 'x') @@ -121,9 +142,9 @@ e8/call x/disp32 == data x: 00 00 00 00 -+error: 'e8/call x/disp32': data variables can only be in '/disp32' operands if mod == 0 and rm32 == 5 ++error: 'e8/call x/disp32': can't refer to global variable 'x' # sub-optimal error message; should be -#? +error: can't call a data variable ('x') +#? +error: can't call to the data segment ('x') :(scenario disp32_data_with_modrm) % Mem_offset = CODE_START; @@ -134,3 +155,23 @@ x: x: 00 00 00 00 $error: 0 + +:(scenarios transform) +:(scenario disp32_data_with_call) +== code +foo: +e8/call bar/disp32 +bar: +$error: 0 + +:(code) +string to_full_string(const line& in) { + ostringstream out; + for (int i = 0; i < SIZE(in.words); ++i) { + if (i > 0) out << ' '; + out << in.words.at(i).data; + for (int j = 0; j < SIZE(in.words.at(i).metadata); ++j) + out << '/' << in.words.at(i).metadata.at(j); + } + return out.str(); +} |