about summary refs log tree commit diff stats
path: root/subx
diff options
context:
space:
mode:
authorKartik Agaram <vc@akkartik.com>2018-07-23 09:32:26 -0700
committerKartik Agaram <vc@akkartik.com>2018-07-23 09:32:26 -0700
commit1d35e3370647a65937c4d6b5a8bb1bc5e8aac70d (patch)
treec5c04361a27e435cd34980a07057482bbe478479 /subx
parentfeac887ceb08f29fe4030c06d6ea1787033a19d2 (diff)
downloadmu-1d35e3370647a65937c4d6b5a8bb1bc5e8aac70d.tar.gz
4391 - checks for modrm-based instructions
Diffstat (limited to 'subx')
-rw-r--r--subx/022check_instruction.cc156
-rw-r--r--subx/023check_operand_sizes.cc52
2 files changed, 146 insertions, 62 deletions
diff --git a/subx/022check_instruction.cc b/subx/022check_instruction.cc
index 44b34a60..a8c044c3 100644
--- a/subx/022check_instruction.cc
+++ b/subx/022check_instruction.cc
@@ -225,12 +225,11 @@ void init_permitted_operands() {
 void check_operands(uint8_t op, const line& inst) {
   uint8_t expected_bitvector = get(Permitted_operands, op);
   if (HAS(expected_bitvector, MODRM))
-    check_operands_modrm(inst);
+    check_operands_modrm(inst, op);
   compare_bitvector(op, inst, CLEAR(expected_bitvector, MODRM));
 }
 
-void check_operands_modrm(const line& inst) {
-}
+//: Many instructions can be checked just by comparing bitvectors.
 
 void compare_bitvector(uint8_t op, const line& inst, uint8_t expected) {
   uint8_t bitvector = compute_operand_bitvector(inst);
@@ -251,17 +250,27 @@ void compare_bitvector(uint8_t op, const line& inst, uint8_t expected) {
 }
 
 bool has_operands(const line& inst) {
-  if (SIZE(inst.words) == 1) return false;
-  if (inst.words.at(0).data == "0f" && SIZE(inst.words) == 2) return false;
-  if (inst.words.at(0).data == "f3" && SIZE(inst.words) == 2) return false;
-  if (inst.words.at(0).data == "f3" && inst.words.at(1).data == "0f" && SIZE(inst.words) == 3) return false;
-  return true;
+  return SIZE(inst.words) > first_operand(inst);
+}
+
+int first_operand(const line& inst) {
+  if (inst.words.at(0).data == "0f") return 2;
+  if (inst.words.at(0).data == "f3") {
+    if (inst.words.at(1).data == "0f")
+      return 3;
+    else
+      return 2;
+  }
+  return 1;
 }
 
 bool all_hex_bytes(const line& inst) {
-  for (int i = 0;  i < SIZE(inst.words);  ++i)
+  for (int i = 0;  i < SIZE(inst.words);  ++i) {
+    if (SIZE(inst.words.at(i).data) != 2)
+      return false;
     if (inst.words.at(i).data.find_first_not_of("0123456789abcdefABCDEF") != string::npos)
       return false;
+  }
   return true;
 }
 
@@ -295,9 +304,136 @@ uint32_t bitvector_for_operand(const word& w) {
 :(scenario conflicting_operand_type)
 % Hide_errors = true;
 == 0x1
-cd 80/imm8/imm32
+cd/software-interrupt 80/imm8/imm32
 +error: '80/imm8/imm32' has conflicting operand types; it should have only one
 
+//: Instructions computing effective addresses have more complex rules, so
+//: we'll hard-code a common set of instruction-decoding rules.
+
+:(scenario check_missing_mod_operand)
+% Hide_errors = true;
+== 0x1
+81 0/add/subop       3/rm32/ebx 1/imm32
++error: '81 0/add/subop 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing mod operand
+
+:(before "End Globals")
+set<string> Instruction_operands;
+:(before "End One-time Setup")
+Instruction_operands.insert("subop");
+Instruction_operands.insert("mod");
+Instruction_operands.insert("rm32");
+Instruction_operands.insert("base");
+Instruction_operands.insert("index");
+Instruction_operands.insert("scale");
+Instruction_operands.insert("r32");
+Instruction_operands.insert("disp8");
+Instruction_operands.insert("disp16");
+Instruction_operands.insert("disp32");
+Instruction_operands.insert("imm8");
+Instruction_operands.insert("imm32");
+
+:(code)
+void check_operands_modrm(const line& inst, uint8_t op) {
+  if (all_hex_bytes(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
+  check_metadata_present(inst, "mod", op);
+  check_metadata_present(inst, "rm32", op);
+  // no check for r32; some instructions don't use it; just assume it's 0 if missing
+  if (op == 0x81 || op == 0x8f || op == 0xff) {  // keep sync'd with 'help subop'
+    check_metadata_present(inst, "subop", op);
+    if (has_metadata(inst, "r32", op))
+      raise << "'" << to_string(inst) << "' (" << get(name, op) << "): unexpected r32 operand (should be replaced by subop)\n" << end();
+  }
+  if (trace_contains_errors()) return;
+  if (metadata(inst, "rm32").data != "4") return;
+  // SIB byte checks
+  check_metadata_present(inst, "base", op);
+  check_metadata_present(inst, "index", op);  // otherwise why go to SIB?
+  // no check for scale; 0 (2**0 = 1) by default
+}
+
+void check_metadata_present(const line& inst, const string& type, uint8_t op) {
+  if (!has_metadata(inst, type, op))
+    raise << "'" << to_string(inst) << "' (" << get(name, op) << "): missing " << type << " operand\n" << end();
+}
+
+bool has_metadata(const line& inst, const string& m, uint8_t op) {
+  bool result = false;
+  for (int i = 0;  i < SIZE(inst.words);  ++i) {
+    if (!has_metadata(inst.words.at(i), m)) continue;
+    if (result) {
+      raise << "'" << to_string(inst) << "' has conflicting " << m << " operands\n" << end();
+      return false;
+    }
+    result = true;
+  }
+  return result;
+}
+
+bool has_metadata(const word& w, const string& m) {
+  bool result = false;
+  bool metadata_found = false;
+  for (int i = 0;  i < SIZE(w.metadata);  ++i) {
+    const string& curr = w.metadata.at(i);
+    if (!contains_key(Instruction_operands, curr)) continue;  // ignore unrecognized metadata
+    if (metadata_found) {
+      raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
+      return false;
+    }
+    metadata_found = true;
+    result = (curr == m);
+  }
+  return result;
+}
+
+word metadata(const line& inst, const string& m) {
+  for (int i = 0;  i < SIZE(inst.words);  ++i)
+    if (has_metadata(inst.words.at(i), m))
+      return inst.words.at(i);
+  assert(false);
+}
+
+:(scenario conflicting_operands_in_modrm_instruction)
+% Hide_errors = true;
+== 0x1
+01/add 0/mod 3/mod
++error: '01/add 0/mod 3/mod' has conflicting mod operands
+
+:(scenario conflicting_operand_type_modrm)
+% Hide_errors = true;
+== 0x1
+01/add 0/mod 3/rm32/r32
++error: '3/rm32/r32' has conflicting operand types; it should have only one
+
+:(scenario check_missing_rm32_operand)
+% Hide_errors = true;
+== 0x1
+81 0/add/subop 0/mod            1/imm32
++error: '81 0/add/subop 0/mod 1/imm32' (combine rm32 with imm32 based on subop): missing rm32 operand
+
+:(scenario check_missing_subop_operand)
+% Hide_errors = true;
+== 0x1
+81             0/mod 3/rm32/ebx 1/imm32
++error: '81 0/mod 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing subop operand
+
+:(scenario check_missing_base_operand)
+% Hide_errors = true;
+== 0x1
+81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32
++error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32' (combine rm32 with imm32 based on subop): missing base operand
+
+:(scenario check_missing_index_operand)
+% Hide_errors = true;
+== 0x1
+81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32
++error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32' (combine rm32 with imm32 based on subop): missing index operand
+
+:(scenario check_missing_base_operand_2)
+% Hide_errors = true;
+== 0x1
+81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32
++error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32' (combine rm32 with imm32 based on subop): missing base operand
+
 //:: similarly handle multi-byte opcodes
 
 :(code)
diff --git a/subx/023check_operand_sizes.cc b/subx/023check_operand_sizes.cc
index 3a492b0c..687ee41d 100644
--- a/subx/023check_operand_sizes.cc
+++ b/subx/023check_operand_sizes.cc
@@ -43,17 +43,6 @@ void check_operand_bounds(const word& w) {
         raise << "'" << w.original << "' too large to fit in bitfield " << p->first << '\n' << end();
 }
 
-int first_operand(const line& inst) {
-  if (inst.words.at(0).data == "0f") return 2;
-  if (inst.words.at(0).data == "f3") {
-    if (inst.words.at(1).data == "0f")
-      return 3;
-    else
-      return 2;
-  }
-  return 1;
-}
-
 uint32_t parse_int(const string& s) {
   istringstream in(s);
   uint32_t result = 0;
@@ -66,44 +55,3 @@ uint32_t parse_int(const string& s) {
   }
   return result;
 }
-
-void check_metadata_present(const line& inst, const string& type, uint8_t op) {
-  if (!has_metadata(inst, type, op))
-    raise << "'" << to_string(inst) << "' (" << get(name, op) << "): missing " << type << " operand\n" << end();
-}
-
-bool has_metadata(const line& inst, const string& m, uint8_t op) {
-  bool result = false;
-  for (int i = 0;  i < SIZE(inst.words);  ++i) {
-    if (!has_metadata(inst.words.at(i), m)) continue;
-    if (result) {
-      raise << "'" << to_string(inst) << "' has conflicting " << m << " operands\n" << end();
-      return false;
-    }
-    result = true;
-  }
-  return result;
-}
-
-bool has_metadata(const word& w, const string& m) {
-  bool result = false;
-  bool metadata_found = false;
-  for (int i = 0;  i < SIZE(w.metadata);  ++i) {
-    const string& curr = w.metadata.at(i);
-    if (!contains_key(Operand_bound, curr)) continue;  // ignore unrecognized metadata
-    if (metadata_found) {
-      raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
-      return false;
-    }
-    metadata_found = true;
-    result = (curr == m);
-  }
-  return result;
-}
-
-word metadata(const line& inst, const string& m) {
-  for (int i = 0;  i < SIZE(inst.words);  ++i)
-    if (has_metadata(inst.words.at(i), m))
-      return inst.words.at(i);
-  assert(false);
-}