about summary refs log tree commit diff stats
path: root/subx
diff options
context:
space:
mode:
Diffstat (limited to 'subx')
-rw-r--r--subx/023check_operand_bounds.cc30
-rw-r--r--subx/024pack_operands.cc17
-rw-r--r--subx/026labels.cc77
-rw-r--r--subx/ex3.subx9
4 files changed, 102 insertions, 31 deletions
diff --git a/subx/023check_operand_bounds.cc b/subx/023check_operand_bounds.cc
index c868603a..cca4ab24 100644
--- a/subx/023check_operand_bounds.cc
+++ b/subx/023check_operand_bounds.cc
@@ -39,21 +39,29 @@ void check_operand_bounds(/*const*/ program& p) {
 
 void check_operand_bounds(const word& w) {
   for (map<string, uint32_t>::iterator p = Operand_bound.begin();  p != Operand_bound.end();  ++p) {
-    if (has_metadata(w, p->first)) {
-      int32_t x = parse_int(w.data);
-      if (x >= 0) {
-        if (static_cast<uint32_t>(x) >= p->second)
-          raise << "'" << w.original << "' too large to fit in bitfield " << p->first << '\n' << end();
-      }
-      else {
-        // hacky? assuming bound is a power of 2
-        if (x < -1*static_cast<int32_t>(p->second/2))
-          raise << "'" << w.original << "' too large to fit in bitfield " << p->first << '\n' << end();
-      }
+    if (!has_metadata(w, p->first)) continue;
+    if (!is_hex_int(w.data)) continue;  // later transforms are on their own to do their own bounds checking
+    int32_t x = parse_int(w.data);
+    if (x >= 0) {
+      if (static_cast<uint32_t>(x) >= p->second)
+        raise << "'" << w.original << "' too large to fit in bitfield " << p->first << '\n' << end();
+    }
+    else {
+      // hacky? assuming bound is a power of 2
+      if (x < -1*static_cast<int32_t>(p->second/2))
+        raise << "'" << w.original << "' too large to fit in bitfield " << p->first << '\n' << end();
     }
   }
 }
 
+bool is_hex_int(const string& s) {
+  if (s.empty()) return false;
+  size_t pos = 0;
+  if (s.at(0) == '-' || s.at(0) == '+') pos++;
+  if (s.substr(pos, pos+2) == "0x") pos += 2;
+  return s.find_first_not_of("0123456789abcdefABCDEF", pos) == string::npos;
+}
+
 int32_t parse_int(const string& s) {
   istringstream in(s);
   int32_t result = 0;
diff --git a/subx/024pack_operands.cc b/subx/024pack_operands.cc
index aa4fec7d..23e91c05 100644
--- a/subx/024pack_operands.cc
+++ b/subx/024pack_operands.cc
@@ -129,6 +129,8 @@ void add_disp_bytes(const line& in, line& out) {
     const word& curr = in.words.at(i);
     if (has_metadata(curr, "disp8"))
       emit_hex_bytes(out, curr, 1);
+    if (has_metadata(curr, "disp16"))
+      emit_hex_bytes(out, curr, 2);
     else if (has_metadata(curr, "disp32"))
       emit_hex_bytes(out, curr, 4);
   }
@@ -150,26 +152,23 @@ void emit_hex_bytes(line& out, const word& w, int num) {
     out.words.push_back(w);
     return;
   }
-  uint32_t val = static_cast<uint32_t>(parse_int(w.data));
+  emit_hex_bytes(out, static_cast<uint32_t>(parse_int(w.data)), num);
+}
+
+void emit_hex_bytes(line& out, uint32_t val, int num) {
+  assert(num <= 4);
   for (int i = 0;  i < num;  ++i) {
     out.words.push_back(hex_byte_text(val & 0xff));
     val = val >> 8;
   }
 }
 
-bool is_hex_int(const string& s) {
-  if (s.empty()) return false;
-  size_t pos = 0;
-  if (s.at(0) == '-' || s.at(0) == '+') pos++;
-  if (s.substr(pos, pos+2) == "0x") pos += 2;
-  return s.find_first_not_of("0123456789abcdefABCDEF", pos) == string::npos;
-}
-
 word hex_byte_text(uint8_t val) {
   ostringstream out;
   out << HEXBYTE << NUM(val);
   word result;
   result.data = out.str();
+  result.original = out.str()+"/auto";
   return result;
 }
 
diff --git a/subx/026labels.cc b/subx/026labels.cc
index cc11eb0b..db6091d6 100644
--- a/subx/026labels.cc
+++ b/subx/026labels.cc
@@ -19,7 +19,7 @@ void rewrite_labels(program& p) {
   trace(99, "transform") << "-- rewrite labels" << end();
   if (p.segments.empty()) return;
   segment& code = p.segments.at(0);
-  map<string, uint32_t> address;
+  map<string, int32_t> address;  // values are unsigned, but we're going to do subtractions on them so they need to fit in 31 bits
   compute_addresses_for_labels(code, address);
   if (trace_contains_errors()) return;
   drop_labels(code);
@@ -27,7 +27,7 @@ void rewrite_labels(program& p) {
   replace_labels_with_addresses(code, address);
 }
 
-void compute_addresses_for_labels(const segment& code, map<string, uint32_t> address) {
+void compute_addresses_for_labels(const segment& code, map<string, int32_t>& address) {
   int current_byte = 0;
   for (int i = 0;  i < SIZE(code.lines);  ++i) {
     const line& inst = code.lines.at(i);
@@ -64,7 +64,8 @@ void compute_addresses_for_labels(const segment& code, map<string, uint32_t> add
 void drop_labels(segment& code) {
   for (int i = 0;  i < SIZE(code.lines);  ++i) {
     line& inst = code.lines.at(i);
-    remove_if(inst.words.begin(), inst.words.end(), is_label);
+    vector<word>::iterator new_end = remove_if(inst.words.begin(), inst.words.end(), is_label);
+    inst.words.erase(new_end, inst.words.end());
   }
 }
 
@@ -72,7 +73,61 @@ bool is_label(const word& w) {
   return *w.data.rbegin() == ':';
 }
 
-void replace_labels_with_addresses(const segment& code, map<string, uint32_t> address) {
+void replace_labels_with_addresses(segment& code, const map<string, int32_t>& address) {
+  int32_t byte_next_instruction_starts_at = 0;
+  for (int i = 0;  i < SIZE(code.lines);  ++i) {
+    line& inst = code.lines.at(i);
+    byte_next_instruction_starts_at += num_bytes(inst);
+    line new_inst;
+    for (int j = 0;  j < SIZE(inst.words);  ++j) {
+      const word& curr = inst.words.at(j);
+      if (contains_key(address, curr.data)) {
+        int32_t offset = static_cast<int32_t>(get(address, curr.data)) - byte_next_instruction_starts_at;
+        if (has_metadata(curr, "disp8") || has_metadata(curr, "imm8")) {
+          if (offset > 0xff || offset < -0x7f)
+            raise << "'" << to_string(inst) << "': label too far away for distance " << std::hex << offset << " to fit in 8 bits\n" << end();
+          else
+            emit_hex_bytes(new_inst, offset, 1);
+        }
+        else if (has_metadata(curr, "disp16")) {
+          if (offset > 0xffff || offset < -0x7fff)
+            raise << "'" << to_string(inst) << "': label too far away for distance " << std::hex << offset << " to fit in 16 bits\n" << end();
+          else
+            emit_hex_bytes(new_inst, offset, 2);
+        }
+        else if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32")) {
+          emit_hex_bytes(new_inst, offset, 4);
+        }
+      }
+      else {
+        new_inst.words.push_back(curr);
+      }
+    }
+    inst.words.swap(new_inst.words);
+    trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
+  }
+}
+
+// Assumes all bitfields are packed.
+uint32_t num_bytes(const line& inst) {
+  uint32_t sum = 0;
+  for (int i = 0;  i < SIZE(inst.words);  ++i) {
+    const word& curr = inst.words.at(i);
+    if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32"))  // only multi-byte operands
+      sum += 4;
+    else
+      sum++;
+  }
+  return sum;
+}
+
+string data_to_string(const line& inst) {
+  ostringstream out;
+  for (int i = 0;  i < SIZE(inst.words);  ++i) {
+    if (i > 0) out << ' ';
+    out << inst.words.at(i).data;
+  }
+  return out.str();
 }
 
 //: Label definitions must be the first word on a line. No jumping inside
@@ -86,11 +141,21 @@ void replace_labels_with_addresses(const segment& code, map<string, uint32_t> ad
           # instruction                     effective address                                           operand     displacement    immediate
           # op          subop               mod             rm32          base      index     scale     r32
           # 1-3 bytes   3 bits              2 bits          3 bits        3 bits    3 bits    2 bits    2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+# address 1
 loop:
 loop2:
+# address 1 (labels take up no space)
             05                                                                                                                      0x0d0c0b0a/imm32  # add to EAX
+# address 6
+            eb                                                                                                      loop2/disp8
+# address 8
+            eb                                                                                                      loop3/disp8
+# address 10
 loop3:
-            f
 +transform: label 'loop' is at address 1
 +transform: label 'loop2' is at address 1
-+transform: label 'loop3' is at address 6
++transform: label 'loop3' is at address 10
+# first jump is to -7
++transform: instruction after transform: 'eb f9'
+# second jump is to 0 (fall through)
++transform: instruction after transform: 'eb 00'
diff --git a/subx/ex3.subx b/subx/ex3.subx
index 4dcb10e9..5b3fdb16 100644
--- a/subx/ex3.subx
+++ b/subx/ex3.subx
@@ -12,23 +12,22 @@
 # op          subop               mod             rm32          base      index     scale     r32
 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits    3 bits    2 bits    2 bits      0/1/2/4 bytes   0/1/2/4 bytes
   # result: EBX = 0
-# 0: e_entry = 0x08048054
   bb                                                                                                                      0/imm32           # copy 0 to EBX
   # counter: ECX = 1
   b9                                                                                                                      1/imm32           # copy 1 to ECX
 
-# 10: loop: 0x0804805e
+loop:
   # while (ECX <= 10)
   81            7/subop/compare   3/mod/direct    1/rm32/ecx                                                              0xa/imm32         # compare ECX, 10/imm
-  7f                                                                                                      0xa/disp8                         # jump-if-greater exit (+10 bytes)
+  7f                                                                                                      exit/disp8                        # jump-if-greater
   # EBX += ECX
   01                              3/mod/direct    3/rm32/ebx                                  1/r32/ecx                                     # add ECX to EBX
   # ECX++
   81            0/subop/add       3/mod/direct    1/rm32/ecx                                                              1/imm32           # add 1 to ECX
   # loop
-  eb                                                                                                      -0x12/disp8                       # jump loop (-18 bytes)
+  eb                                                                                                      loop/disp8                        # jump
 
-# 28: exit: 0x08048070
+exit:
   # exit(EBX)
   b8                                                                                                                      1/imm32           # copy 1 to EAX
   cd                                                                                                                      0x80/imm8         # int 80h