about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorKartik Agaram <vc@akkartik.com>2018-07-27 13:26:12 -0700
committerKartik Agaram <vc@akkartik.com>2018-07-27 13:30:19 -0700
commit071afeff5d99732b2f50f2a5009dc6f2e8303909 (patch)
treec64ab64375bc1d7d79bc6d4022198c9b392bbf87
parent4718a77ce26c02bac7cfe28637c2892091ac0075 (diff)
downloadmu-071afeff5d99732b2f50f2a5009dc6f2e8303909.tar.gz
4445 - support labels
-rw-r--r--subx/023check_operand_bounds.cc30
-rw-r--r--subx/024pack_operands.cc17
-rw-r--r--subx/026labels.cc77
-rw-r--r--subx/ex3.subx9
4 files changed, 102 insertions, 31 deletions
diff --git a/subx/023check_operand_bounds.cc b/subx/023check_operand_bounds.cc
index c868603a..cca4ab24 100644
--- a/subx/023check_operand_bounds.cc
+++ b/subx/023check_operand_bounds.cc
@@ -39,21 +39,29 @@ void check_operand_bounds(/*const*/ program& p) {
 
 void check_operand_bounds(const word& w) {
   for (map<string, uint32_t>::iterator p = Operand_bound.begin();  p != Operand_bound.end();  ++p) {
-    if (has_metadata(w, p->first)) {
-      int32_t x = parse_int(w.data);
-      if (x >= 0) {
-        if (static_cast<uint32_t>(x) >= p->second)
-          raise << "'" << w.original << "' too large to fit in bitfield " << p->first << '\n' << end();
-      }
-      else {
-        // hacky? assuming bound is a power of 2
-        if (x < -1*static_cast<int32_t>(p->second/2))
-          raise << "'" << w.original << "' too large to fit in bitfield " << p->first << '\n' << end();
-      }
+    if (!has_metadata(w, p->first)) continue;
+    if (!is_hex_int(w.data)) continue;  // later transforms are on their own to do their own bounds checking
+    int32_t x = parse_int(w.data);
+    if (x >= 0) {
+      if (static_cast<uint32_t>(x) >= p->second)
+        raise << "'" << w.original << "' too large to fit in bitfield " << p->first << '\n' << end();
+    }
+    else {
+      // hacky? assuming bound is a power of 2
+      if (x < -1*static_cast<int32_t>(p->second/2))
+        raise << "'" << w.original << "' too large to fit in bitfield " << p->first << '\n' << end();
     }
   }
 }
 
+bool is_hex_int(const string& s) {
+  if (s.empty()) return false;
+  size_t pos = 0;
+  if (s.at(0) == '-' || s.at(0) == '+') pos++;
+  if (s.substr(pos, pos+2) == "0x") pos += 2;
+  return s.find_first_not_of("0123456789abcdefABCDEF", pos) == string::npos;
+}
+
 int32_t parse_int(const string& s) {
   istringstream in(s);
   int32_t result = 0;
diff --git a/subx/024pack_operands.cc b/subx/024pack_operands.cc
index aa4fec7d..23e91c05 100644
--- a/subx/024pack_operands.cc
+++ b/subx/024pack_operands.cc
@@ -129,6 +129,8 @@ void add_disp_bytes(const line& in, line& out) {
     const word& curr = in.words.at(i);
     if (has_metadata(curr, "disp8"))
       emit_hex_bytes(out, curr, 1);
+    if (has_metadata(curr, "disp16"))
+      emit_hex_bytes(out, curr, 2);
     else if (has_metadata(curr, "disp32"))
       emit_hex_bytes(out, curr, 4);
   }
@@ -150,26 +152,23 @@ void emit_hex_bytes(line& out, const word& w, int num) {
     out.words.push_back(w);
     return;
   }
-  uint32_t val = static_cast<uint32_t>(parse_int(w.data));
+  emit_hex_bytes(out, static_cast<uint32_t>(parse_int(w.data)), num);
+}
+
+void emit_hex_bytes(line& out, uint32_t val, int num) {
+  assert(num <= 4);
   for (int i = 0;  i < num;  ++i) {
     out.words.push_back(hex_byte_text(val & 0xff));
     val = val >> 8;
   }
 }
 
-bool is_hex_int(const string& s) {
-  if (s.empty()) return false;
-  size_t pos = 0;
-  if (s.at(0) == '-' || s.at(0) == '+') pos++;
-  if (s.substr(pos, pos+2) == "0x") pos += 2;
-  return s.find_first_not_of("0123456789abcdefABCDEF", pos) == string::npos;
-}
-
 word hex_byte_text(uint8_t val) {
   ostringstream out;
   out << HEXBYTE << NUM(val);
   word result;
   result.data = out.str();
+  result.original = out.str()+"/auto";
   return result;
 }
 
diff --git a/subx/026labels.cc b/subx/026labels.cc
index cc11eb0b..db6091d6 100644
--- a/subx/026labels.cc
+++ b/subx/026labels.cc
@@ -19,7 +19,7 @@ void rewrite_labels(program& p) {
   trace(99, "transform") << "-- rewrite labels" << end();
   if (p.segments.empty()) return;
   segment& code = p.segments.at(0);
-  map<string, uint32_t> address;
+  map<string, int32_t> address;  // values are unsigned, but we're going to do subtractions on them so they need to fit in 31 bits
   compute_addresses_for_labels(code, address);
   if (trace_contains_errors()) return;
   drop_labels(code);
@@ -27,7 +27,7 @@ void rewrite_labels(program& p) {
   replace_labels_with_addresses(code, address);
 }
 
-void compute_addresses_for_labels(const segment& code, map<string, uint32_t> address) {
+void compute_addresses_for_labels(const segment& code, map<string, int32_t>& address) {
   int current_byte = 0;
   for (int i = 0;  i < SIZE(code.lines);  ++i) {
     const line& inst = code.lines.at(i);
@@ -64,7 +64,8 @@ void compute_addresses_for_labels(const segment& code, map<string, uint32_t> add
 void drop_labels(segment& code) {
   for (int i = 0;  i < SIZE(code.lines);  ++i) {
     line& inst = code.lines.at(i);
-    remove_if(inst.words.begin(), inst.words.end(), is_label);
+    vector<word>::iterator new_end = remove_if(inst.words.begin(), inst.words.end(), is_label);
+    inst.words.erase(new_end, inst.words.end());
   }
 }
 
@@ -72,7 +73,61 @@ bool is_label(const word& w) {
   return *w.data.rbegin() == ':';
 }
 
-void replace_labels_with_addresses(const segment& code, map<string, uint32_t> address) {
+void replace_labels_with_addresses(segment& code, const map<string, int32_t>& address) {
+  int32_t byte_next_instruction_starts_at = 0;
+  for (int i = 0;  i < SIZE(code.lines);  ++i) {
+    line& inst = code.lines.at(i);
+    byte_next_instruction_starts_at += num_bytes(inst);
+    line new_inst;
+    for (int j = 0;  j < SIZE(inst.words);  ++j) {
+      const word& curr = inst.words.at(j);
+      if (contains_key(address, curr.data)) {
+        int32_t offset = static_cast<int32_t>(get(address, curr.data)) - byte_next_instruction_starts_at;
+        if (has_metadata(curr, "disp8") || has_metadata(curr, "imm8")) {
+          if (offset > 0xff || offset < -0x7f)
+            raise << "'" << to_string(inst) << "': label too far away for distance " << std::hex << offset << " to fit in 8 bits\n" << end();
+          else
+            emit_hex_bytes(new_inst, offset, 1);
+        }
+        else if (has_metadata(curr, "disp16")) {
+          if (offset > 0xffff || offset < -0x7fff)
+            raise << "'" << to_string(inst) << "': label too far away for distance " << std::hex << offset << " to fit in 16 bits\n" << end();
+          else
+            emit_hex_bytes(new_inst, offset, 2);
+        }
+        else if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32")) {
+          emit_hex_bytes(new_inst, offset, 4);
+        }
+      }
+      else {
+        new_inst.words.push_back(curr);
+      }
+    }
+    inst.words.swap(new_inst.words);
+    trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
+  }
+}
+
+// Assumes all bitfields are packed.
+uint32_t num_bytes(const line& inst) {
+  uint32_t sum = 0;
+  for (int i = 0;  i < SIZE(inst.words);  ++i) {
+    const word& curr = inst.words.at(i);
+    if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32"))  // only multi-byte operands
+      sum += 4;
+    else
+      sum++;
+  }
+  return sum;
+}
+
+string data_to_string(const line& inst) {
+  ostringstream out;
+  for (int i = 0;  i < SIZE(inst.words);  ++i) {
+    if (i > 0) out << ' ';
+    out << inst.words.at(i).data;
+  }
+  return out.str();
 }
 
 //: Label definitions must be the first word on a line. No jumping inside
@@ -86,11 +141,21 @@ void replace_labels_with_addresses(const segment& code, map<string, uint32_t> ad
           # instruction                     effective address                                           operand     displacement    immediate
           # op          subop               mod             rm32          base      index     scale     r32
           # 1-3 bytes   3 bits              2 bits          3 bits        3 bits    3 bits    2 bits    2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+# address 1
 loop:
 loop2:
+# address 1 (labels take up no space)
             05                                                                                                                      0x0d0c0b0a/imm32  # add to EAX
+# address 6
+            eb                                                                                                      loop2/disp8
+# address 8
+            eb                                                                                                      loop3/disp8
+# address 10
 loop3:
-            f
 +transform: label 'loop' is at address 1
 +transform: label 'loop2' is at address 1
-+transform: label 'loop3' is at address 6
++transform: label 'loop3' is at address 10
+# first jump is to -7
++transform: instruction after transform: 'eb f9'
+# second jump is to 0 (fall through)
++transform: instruction after transform: 'eb 00'
diff --git a/subx/ex3.subx b/subx/ex3.subx
index 4dcb10e9..5b3fdb16 100644
--- a/subx/ex3.subx
+++ b/subx/ex3.subx
@@ -12,23 +12,22 @@
 # op          subop               mod             rm32          base      index     scale     r32
 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits    3 bits    2 bits    2 bits      0/1/2/4 bytes   0/1/2/4 bytes
   # result: EBX = 0
-# 0: e_entry = 0x08048054
   bb                                                                                                                      0/imm32           # copy 0 to EBX
   # counter: ECX = 1
   b9                                                                                                                      1/imm32           # copy 1 to ECX
 
-# 10: loop: 0x0804805e
+loop:
   # while (ECX <= 10)
   81            7/subop/compare   3/mod/direct    1/rm32/ecx                                                              0xa/imm32         # compare ECX, 10/imm
-  7f                                                                                                      0xa/disp8                         # jump-if-greater exit (+10 bytes)
+  7f                                                                                                      exit/disp8                        # jump-if-greater
   # EBX += ECX
   01                              3/mod/direct    3/rm32/ebx                                  1/r32/ecx                                     # add ECX to EBX
   # ECX++
   81            0/subop/add       3/mod/direct    1/rm32/ecx                                                              1/imm32           # add 1 to ECX
   # loop
-  eb                                                                                                      -0x12/disp8                       # jump loop (-18 bytes)
+  eb                                                                                                      loop/disp8                        # jump
 
-# 28: exit: 0x08048070
+exit:
   # exit(EBX)
   b8                                                                                                                      1/imm32           # copy 1 to EAX
   cd                                                                                                                      0x80/imm8         # int 80h
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140