about summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--subx/022check_instruction.cc1
-rw-r--r--subx/026labels.cc96
2 files changed, 97 insertions, 0 deletions
diff --git a/subx/022check_instruction.cc b/subx/022check_instruction.cc
index c4b169ff..860921ae 100644
--- a/subx/022check_instruction.cc
+++ b/subx/022check_instruction.cc
@@ -266,6 +266,7 @@ void init_permitted_operands() {
 
 :(code)
 void check_operands(const line& inst, const word& op) {
+  if (!is_hex_byte(op)) return;
   uint8_t expected_bitvector = get(Permitted_operands, op.data);
   if (HAS(expected_bitvector, MODRM))
     check_operands_modrm(inst, op);
diff --git a/subx/026labels.cc b/subx/026labels.cc
new file mode 100644
index 00000000..ece513aa
--- /dev/null
+++ b/subx/026labels.cc
@@ -0,0 +1,96 @@
+//: Labels are defined by ending names with a ':'. This layer will compute
+//: addresses for labels, and compute the offset to in jump instructions using
+//: them.
+
+:(scenarios transform)
+:(scenario map_label)
+== 0x1
+          # instruction                     effective address                                           operand     displacement    immediate
+          # op          subop               mod             rm32          base      index     scale     r32
+          # 1-3 bytes   3 bits              2 bits          3 bits        3 bits    3 bits    2 bits    2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+loop:
+            05                                                                                                                      0x0d0c0b0a/imm32  # add to EAX
++label: label 'loop' is at address 1
+
+:(before "End One-time Setup")
+Transform.push_back(replace_labels_with_addresses);
+
+:(code)
+void replace_labels_with_addresses(program& p) {
+  if (p.segments.empty()) return;
+  segment& code = p.segments.at(0);
+  map<string, uint32_t> address;
+  compute_addresses_for_labels(code, address);
+  if (trace_contains_errors()) return;
+  drop_labels(code);
+  if (trace_contains_errors()) return;
+  replace_labels_with_addresses(code, address);
+}
+
+void compute_addresses_for_labels(const segment& code, map<string, uint32_t> address) {
+  int current_byte = 0;
+  for (int i = 0;  i < SIZE(code.lines);  ++i) {
+    const line& inst = code.lines.at(i);
+    for (int j = 0;  j < SIZE(inst.words);  ++j) {
+      const word& curr = inst.words.at(j);
+      // hack: if we have any operand metadata left after previous transforms,
+      // deduce its size
+      // Maybe we should just move this transform to before instruction
+      // packing, and deduce the size of *all* operands. But then we'll also
+      // have to deal with bitfields.
+      if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32")) {
+        if (*curr.data.rbegin() == ':')
+          raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end();
+        current_byte += 4;
+      }
+      // automatically handle /disp8 and /imm8 here
+      else if (*curr.data.rbegin() != ':') {
+        ++current_byte;
+      }
+      else {
+        if (contains_any_operand_metadata(curr))
+          raise << "'" << to_string(inst) << "': mixing label definition with ':' in operand\n" << end();
+        if (j == 0) {
+          string label = curr.data.substr(0, SIZE(curr.data)-1);
+          put(address, label, current_byte);
+          trace(99, "label") << "label '" << label << "' is at address " << (current_byte+code.start) << end();
+          // no modifying current_byte; label definitions won't be in the final binary
+        }
+        else {
+          raise << "'" << to_string(inst) << "': labels can only be the first word in a line.\n" << end();
+        }
+      }
+    }
+  }
+}
+
+void drop_labels(segment& code) {
+  for (int i = 0;  i < SIZE(code.lines);  ++i) {
+    line& inst = code.lines.at(i);
+    remove_if(inst.words.begin(), inst.words.end(), is_label);
+  }
+}
+
+bool is_label(const word& w) {
+  return *w.data.rbegin() == ':';
+}
+
+void replace_labels_with_addresses(const segment& code, map<string, uint32_t> address) {
+}
+
+//: Label definitions must be the first word on a line. No jumping inside
+//: instructions.
+//: They should also be the only word on a line.
+//: However, you can absolutely have multiple labels map to the same address,
+//: as long as they're on separate lines.
+
+:(scenario multiple_labels_at)
+== 0x1
+          # instruction                     effective address                                           operand     displacement    immediate
+          # op          subop               mod             rm32          base      index     scale     r32
+          # 1-3 bytes   3 bits              2 bits          3 bits        3 bits    3 bits    2 bits    2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+loop:
+loop2:
+            05                                                                                                                      0x0d0c0b0a/imm32  # add to EAX
++label: label 'loop' is at address 1
++label: label 'loop2' is at address 1