about summary refs log tree commit diff stats
path: root/subx
diff options
context:
space:
mode:
authorKartik Agaram <vc@akkartik.com>2018-09-12 21:21:31 -0700
committerKartik Agaram <vc@akkartik.com>2018-09-12 21:27:04 -0700
commitf75c7021d5d406470f04b1ef96e1ec910ee736b8 (patch)
tree1e3bc48c2585c7992e0122f43fec7ea92bdffa88 /subx
parentcac416f42312001fa46b6ebebb94010f57793b9c (diff)
downloadmu-f75c7021d5d406470f04b1ef96e1ec910ee736b8.tar.gz
4544
Attempt #3 at fixing CI.
In the process the feature gets a lot less half-baked.

Ridiculously misleading that we had `has_metadata()` was special-cased
to one specific transform. I suck.
Diffstat (limited to 'subx')
-rw-r--r--subx/030---operands.cc32
-rw-r--r--subx/031check_operands.cc24
-rw-r--r--subx/032check_operand_bounds.cc2
-rw-r--r--subx/034compute_segment_address.cc2
-rw-r--r--subx/035labels.cc8
-rw-r--r--subx/036global_variables.cc83
6 files changed, 96 insertions, 55 deletions
diff --git a/subx/030---operands.cc b/subx/030---operands.cc
index 6fa2354d..f6af2fa3 100644
--- a/subx/030---operands.cc
+++ b/subx/030---operands.cc
@@ -178,19 +178,19 @@ void add_modrm_byte(const line& in, line& out) {
   bool emit = false;
   for (int i = 0;  i < SIZE(in.words);  ++i) {
     const word& curr = in.words.at(i);
-    if (has_metadata(curr, "mod")) {
+    if (has_operand_metadata(curr, "mod")) {
       mod = hex_byte(curr.data);
       emit = true;
     }
-    else if (has_metadata(curr, "rm32")) {
+    else if (has_operand_metadata(curr, "rm32")) {
       rm32 = hex_byte(curr.data);
       emit = true;
     }
-    else if (has_metadata(curr, "r32")) {
+    else if (has_operand_metadata(curr, "r32")) {
       reg_subop = hex_byte(curr.data);
       emit = true;
     }
-    else if (has_metadata(curr, "subop")) {
+    else if (has_operand_metadata(curr, "subop")) {
       reg_subop = hex_byte(curr.data);
       emit = true;
     }
@@ -204,15 +204,15 @@ void add_sib_byte(const line& in, line& out) {
   bool emit = false;
   for (int i = 0;  i < SIZE(in.words);  ++i) {
     const word& curr = in.words.at(i);
-    if (has_metadata(curr, "scale")) {
+    if (has_operand_metadata(curr, "scale")) {
       scale = hex_byte(curr.data);
       emit = true;
     }
-    else if (has_metadata(curr, "index")) {
+    else if (has_operand_metadata(curr, "index")) {
       index = hex_byte(curr.data);
       emit = true;
     }
-    else if (has_metadata(curr, "base")) {
+    else if (has_operand_metadata(curr, "base")) {
       base = hex_byte(curr.data);
       emit = true;
     }
@@ -224,11 +224,11 @@ void add_sib_byte(const line& in, line& out) {
 void add_disp_bytes(const line& in, line& out) {
   for (int i = 0;  i < SIZE(in.words);  ++i) {
     const word& curr = in.words.at(i);
-    if (has_metadata(curr, "disp8"))
+    if (has_operand_metadata(curr, "disp8"))
       emit_hex_bytes(out, curr, 1);
-    if (has_metadata(curr, "disp16"))
+    if (has_operand_metadata(curr, "disp16"))
       emit_hex_bytes(out, curr, 2);
-    else if (has_metadata(curr, "disp32"))
+    else if (has_operand_metadata(curr, "disp32"))
       emit_hex_bytes(out, curr, 4);
   }
 }
@@ -236,9 +236,9 @@ void add_disp_bytes(const line& in, line& out) {
 void add_imm_bytes(const line& in, line& out) {
   for (int i = 0;  i < SIZE(in.words);  ++i) {
     const word& curr = in.words.at(i);
-    if (has_metadata(curr, "imm8"))
+    if (has_operand_metadata(curr, "imm8"))
       emit_hex_bytes(out, curr, 1);
-    else if (has_metadata(curr, "imm32"))
+    else if (has_operand_metadata(curr, "imm32"))
       emit_hex_bytes(out, curr, 4);
   }
 }
@@ -386,10 +386,10 @@ bool contains_any_operand_metadata(const word& word) {
   return false;
 }
 
-bool has_metadata(const line& inst, const string& m) {
+bool has_operand_metadata(const line& inst, const string& m) {
   bool result = false;
   for (int i = 0;  i < SIZE(inst.words);  ++i) {
-    if (!has_metadata(inst.words.at(i), m)) continue;
+    if (!has_operand_metadata(inst.words.at(i), m)) continue;
     if (result) {
       raise << "'" << to_string(inst) << "' has conflicting " << m << " operands\n" << end();
       return false;
@@ -399,7 +399,7 @@ bool has_metadata(const line& inst, const string& m) {
   return result;
 }
 
-bool has_metadata(const word& w, const string& m) {
+bool has_operand_metadata(const word& w, const string& m) {
   bool result = false;
   bool metadata_found = false;
   for (int i = 0;  i < SIZE(w.metadata);  ++i) {
@@ -417,7 +417,7 @@ bool has_metadata(const word& w, const string& m) {
 
 word metadata(const line& inst, const string& m) {
   for (int i = 0;  i < SIZE(inst.words);  ++i)
-    if (has_metadata(inst.words.at(i), m))
+    if (has_operand_metadata(inst.words.at(i), m))
       return inst.words.at(i);
   assert(false);
 }
diff --git a/subx/031check_operands.cc b/subx/031check_operands.cc
index 934b1022..c56761ed 100644
--- a/subx/031check_operands.cc
+++ b/subx/031check_operands.cc
@@ -321,24 +321,24 @@ cd/software-interrupt 80/imm8/imm32
 :(code)
 void check_operands_modrm(const line& inst, const word& op) {
   if (all_hex_bytes(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
-  check_metadata_present(inst, "mod", op);
-  check_metadata_present(inst, "rm32", op);
+  check_operand_metadata_present(inst, "mod", op);
+  check_operand_metadata_present(inst, "rm32", op);
   // no check for r32; some instructions don't use it; just assume it's 0 if missing
   if (op.data == "81" || op.data == "8f" || op.data == "ff") {  // keep sync'd with 'help subop'
-    check_metadata_present(inst, "subop", op);
-    check_metadata_absent(inst, "r32", op, "should be replaced by subop");
+    check_operand_metadata_present(inst, "subop", op);
+    check_operand_metadata_absent(inst, "r32", op, "should be replaced by subop");
   }
   if (trace_contains_errors()) return;
   if (metadata(inst, "rm32").data != "4") return;
   // SIB byte checks
   uint8_t mod = hex_byte(metadata(inst, "mod").data);
   if (mod != /*direct*/3) {
-    check_metadata_present(inst, "base", op);
-    check_metadata_present(inst, "index", op);  // otherwise why go to SIB?
+    check_operand_metadata_present(inst, "base", op);
+    check_operand_metadata_present(inst, "index", op);  // otherwise why go to SIB?
   }
   else {
-    check_metadata_absent(inst, "base", op, "direct mode");
-    check_metadata_absent(inst, "index", op, "direct mode");
+    check_operand_metadata_absent(inst, "base", op, "direct mode");
+    check_operand_metadata_absent(inst, "index", op, "direct mode");
   }
   // no check for scale; 0 (2**0 = 1) by default
 }
@@ -366,13 +366,13 @@ void compare_bitvector_modrm(const line& inst, uint8_t expected, const word& op)
   // ignore settings in any unused bits
 }
 
-void check_metadata_present(const line& inst, const string& type, const word& op) {
-  if (!has_metadata(inst, type))
+void check_operand_metadata_present(const line& inst, const string& type, const word& op) {
+  if (!has_operand_metadata(inst, type))
     raise << "'" << to_string(inst) << "' (" << get(name, op.data) << "): missing " << type << " operand\n" << end();
 }
 
-void check_metadata_absent(const line& inst, const string& type, const word& op, const string& msg) {
-  if (has_metadata(inst, type))
+void check_operand_metadata_absent(const line& inst, const string& type, const word& op, const string& msg) {
+  if (has_operand_metadata(inst, type))
     raise << "'" << to_string(inst) << "' (" << get(name, op.data) << "): unexpected " << type << " operand (" << msg << ")\n" << end();
 }
 
diff --git a/subx/032check_operand_bounds.cc b/subx/032check_operand_bounds.cc
index 9a810453..69daeb46 100644
--- a/subx/032check_operand_bounds.cc
+++ b/subx/032check_operand_bounds.cc
@@ -38,7 +38,7 @@ void check_operand_bounds(const segment& code) {
 
 void check_operand_bounds(const word& w) {
   for (map<string, uint32_t>::iterator p = Operand_bound.begin();  p != Operand_bound.end();  ++p) {
-    if (!has_metadata(w, p->first)) continue;
+    if (!has_operand_metadata(w, p->first)) continue;
     if (!is_hex_int(w.data)) continue;  // later transforms are on their own to do their own bounds checking
     int32_t x = parse_int(w.data);
     if (x >= 0) {
diff --git a/subx/034compute_segment_address.cc b/subx/034compute_segment_address.cc
index 4a661742..c4645fac 100644
--- a/subx/034compute_segment_address.cc
+++ b/subx/034compute_segment_address.cc
@@ -46,7 +46,7 @@ uint32_t num_bytes(const line& inst) {
   uint32_t sum = 0;
   for (int i = 0;  i < SIZE(inst.words);  ++i) {
     const word& curr = inst.words.at(i);
-    if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32"))  // only multi-byte operands
+    if (has_operand_metadata(curr, "disp32") || has_operand_metadata(curr, "imm32"))  // only multi-byte operands
       sum += 4;
     else
       sum++;
diff --git a/subx/035labels.cc b/subx/035labels.cc
index 8c3ce1dd..df54bf2f 100644
--- a/subx/035labels.cc
+++ b/subx/035labels.cc
@@ -71,7 +71,7 @@ void compute_byte_indices_for_labels(const segment& code, map<string, int32_t>&
       // Maybe we should just move this transform to before instruction
       // packing, and deduce the size of *all* operands. But then we'll also
       // have to deal with bitfields.
-      if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32")) {
+      if (has_operand_metadata(curr, "disp32") || has_operand_metadata(curr, "imm32")) {
         if (*curr.data.rbegin() == ':')
           raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end();
         current_byte += 4;
@@ -119,19 +119,19 @@ void replace_labels_with_displacements(segment& code, const map<string, int32_t>
       const word& curr = inst.words.at(j);
       if (contains_key(byte_index, curr.data)) {
         int32_t displacement = static_cast<int32_t>(get(byte_index, curr.data)) - byte_index_next_instruction_starts_at;
-        if (has_metadata(curr, "disp8") || has_metadata(curr, "imm8")) {
+        if (has_operand_metadata(curr, "disp8") || has_operand_metadata(curr, "imm8")) {
           if (displacement > 0xff || displacement < -0x7f)
             raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 8 bits\n" << end();
           else
             emit_hex_bytes(new_inst, displacement, 1);
         }
-        else if (has_metadata(curr, "disp16")) {
+        else if (has_operand_metadata(curr, "disp16")) {
           if (displacement > 0xffff || displacement < -0x7fff)
             raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 16 bits\n" << end();
           else
             emit_hex_bytes(new_inst, displacement, 2);
         }
-        else if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32")) {
+        else if (has_operand_metadata(curr, "disp32") || has_operand_metadata(curr, "imm32")) {
           emit_hex_bytes(new_inst, displacement, 4);
         }
       }
diff --git a/subx/036global_variables.cc b/subx/036global_variables.cc
index 2bdbf1f2..42790c0c 100644
--- a/subx/036global_variables.cc
+++ b/subx/036global_variables.cc
@@ -70,39 +70,60 @@ void replace_global_variables_with_addresses(program& p, const map<string, uint3
     line new_inst;
     for (int j = 0;  j < SIZE(inst.words);  ++j) {
       const word& curr = inst.words.at(j);
-      if (contains_key(address, curr.data)) {
-        uint32_t value = get(address, curr.data);
-        if (!has_metadata(curr, "imm32") && !has_metadata(curr, "disp32"))
-          raise << "'" << to_string(inst) << "': data variables should be '/imm32' or '/disp32' operands\n" << end();
-        emit_hex_bytes(new_inst, value, 4);
-      }
-      else {
+      if (!contains_key(address, curr.data)) {
         new_inst.words.push_back(curr);
+        continue;
       }
+      if (!valid_use_of_global_variable(curr)) {
+        raise << "'" << to_string(inst) << "': can't refer to global variable '" << curr.data << "'\n" << end();
+        return;
+      }
+      emit_hex_bytes(new_inst, get(address, curr.data), 4);
     }
     inst.words.swap(new_inst.words);
     trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
   }
 }
 
-:(before "Pack Operands(segment code)")
-check_disp32_data_variables(code);
-if (trace_contains_errors()) return;
+bool valid_use_of_global_variable(const word& curr) {
+  if (has_operand_metadata(curr, "imm32")) return true;
+  // End Valid Uses Of Global Variable(curr)
+  return false;
+}
+
+//:: a more complex sanity check for how we use global variables
+//: requires first saving some data early before we pack operands
+
+:(after "Begin Level-2 Transforms")
+Transform.push_back(correlate_disp32_with_mod);
 :(code)
-void check_disp32_data_variables(const segment& code) {
+void correlate_disp32_with_mod(program& p) {
+  if (p.segments.empty()) return;
+  segment& code = p.segments.at(0);
   for (int i = 0;  i < SIZE(code.lines);  ++i) {
-    const line& inst = code.lines.at(i);
+    line& inst = code.lines.at(i);
     for (int j = 0;  j < SIZE(inst.words);  ++j) {
-      const word& curr = inst.words.at(j);
-      if (!has_metadata(curr, "disp32")) continue;
-      if (has_metadata(inst, "mod") && metadata(inst, "mod").data == "0" && metadata(inst, "rm32").data == "5")
-        continue;
-      else
-        raise << "'" << to_string(inst) << "': data variables can only be in '/disp32' operands if mod == 0 and rm32 == 5\n" << end();
+      word& curr = inst.words.at(j);
+      if (has_operand_metadata(curr, "disp32")
+          && has_operand_metadata(inst, "mod"))
+        curr.metadata.push_back("has_mod");
     }
   }
 }
 
+:(before "End Valid Uses Of Global Variable(curr)")
+if (has_operand_metadata(curr, "disp32"))
+  return has_metadata(curr, "has_mod");
+// todo: more sophisticated check, to ensure we don't use global variable
+// addresses as a real displacement added to other operands.
+
+:(code)
+bool has_metadata(const word& w, const string& m) {
+  for (int i = 0;  i < SIZE(w.metadata);  ++i)
+    if (w.metadata.at(i) == m) return true;
+  return false;
+}
+
 :(scenario global_variable_disallowed_in_jump)
 % Hide_errors = true;
 == code
@@ -110,7 +131,7 @@ eb/jump x/disp8
 == data
 x:
 00 00 00 00
-+error: 'eb/jump x/disp8': data variables should be '/imm32' or '/disp32' operands
++error: 'eb/jump x/disp8': can't refer to global variable 'x'
 # sub-optimal error message; should be
 #? +error: can't jump to data (variable 'x')
 
@@ -121,9 +142,9 @@ e8/call x/disp32
 == data
 x:
 00 00 00 00
-+error: 'e8/call x/disp32': data variables can only be in '/disp32' operands if mod == 0 and rm32 == 5
++error: 'e8/call x/disp32': can't refer to global variable 'x'
 # sub-optimal error message; should be
-#? +error: can't call a data variable ('x')
+#? +error: can't call to the data segment ('x')
 
 :(scenario disp32_data_with_modrm)
 % Mem_offset = CODE_START;
@@ -134,3 +155,23 @@ x:
 x:
 00 00 00 00
 $error: 0
+
+:(scenarios transform)
+:(scenario disp32_data_with_call)
+== code
+foo:
+e8/call bar/disp32
+bar:
+$error: 0
+
+:(code)
+string to_full_string(const line& in) {
+  ostringstream out;
+  for (int i = 0;  i < SIZE(in.words);  ++i) {
+    if (i > 0) out << ' ';
+    out << in.words.at(i).data;
+    for (int j = 0;  j < SIZE(in.words.at(i).metadata);  ++j)
+      out << '/' << in.words.at(i).metadata.at(j);
+  }
+  return out.str();
+}