4505 - start warning on jumps without labels

As we climb the ladder of abstraction we'll gradually pull the ladder up behind ourselves.
author: Kartik Agaram <vc@akkartik.com> 2018-08-11 11:09:22 -0700
committer: Kartik Agaram <vc@akkartik.com> 2018-08-11 11:09:22 -0700
commit: 76aace4625d4a2e0e1614acd0bea646f6db0f606 (patch)
tree: e93716ccfb23a1a6ea6275af31068c010c32deed
parent: f39429b60dbceaa5ef4c2cf8a30d20fb58e3bb2e (diff)
download: mu-76aace4625d4a2e0e1614acd0bea646f6db0f606.tar.gz
4 files changed, 111 insertions, 10 deletions
diff --git a/subx/011run.cc b/subx/011run.cc
index 8c13b6ba..9c024e27 100644
--- a/subx/011run.cc
+++ b/subx/011run.cc
@@ -110,6 +110,7 @@ struct segment {
 struct line {
   vector<word> words;
   vector<string> metadata;
+  string original;
 };
 :(before "struct line")
 struct word {
@@ -126,10 +127,11 @@ void parse(istream& fin, program& out) {
   trace(99, "parse") << "begin" << end();
   while (has_data(fin)) {
     string line_data;
+    line curr;
     getline(fin, line_data);
+    curr.original = line_data;
     trace(99, "parse") << "line: " << line_data << end();
     istringstream lin(line_data);
-    vector<word> w;
     while (has_data(lin)) {
       string word_data;
       lin >> word_data;
@@ -153,20 +155,18 @@ void parse(istream& fin, program& out) {
         // todo: line metadata
         break;
       }
-      w.push_back(word());
-      w.back().original = word_data;
+      curr.words.push_back(word());
+      curr.words.back().original = word_data;
       istringstream win(word_data);
-      if (getline(win, w.back().data, '/')) {
+      if (getline(win, curr.words.back().data, '/')) {
         string m;
         while (getline(win, m, '/'))
-          w.back().metadata.push_back(m);
+          curr.words.back().metadata.push_back(m);
       }
-      trace(99, "parse") << "new word: " << w.back().data << end();
-    }
-    if (!w.empty()) {
-      l.push_back(line());
-      l.back().words.swap(w);
+      trace(99, "parse") << "new word: " << curr.words.back().data << end();
     }
+    if (!curr.words.empty())
+      l.push_back(curr);
   }
   if (!l.empty()) {
     assert(!out.segments.empty());
diff --git a/subx/035labels.cc b/subx/035labels.cc
index 957fa5a0..ca5851dc 100644
--- a/subx/035labels.cc
+++ b/subx/035labels.cc
@@ -14,6 +14,15 @@
 //: be a single character long. 'a' is not a hex number, it's a variable.
 //: Later layers may add more conventions partitioning the space of names. But
 //: the above rules will remain inviolate.
+:(code)
+bool is_number(const string& s) {
+  if (s.at(0) == '-') return true;
+  if (isdigit(s.at(0))) return true;
+  return SIZE(s) == 2;
+}
+void test_is_number() {
+  CHECK(!is_number("a"));
+}
 
 :(scenarios transform)
 :(scenario map_label)
@@ -33,6 +42,7 @@ void rewrite_labels(program& p) {
   trace(99, "transform") << "-- rewrite labels" << end();
   if (p.segments.empty()) return;
   segment& code = p.segments.at(0);
+  // Rewrite Labels(segment code)
   map<string, int32_t> address;  // values are unsigned, but we're going to do subtractions on them so they need to fit in 31 bits
   compute_addresses_for_labels(code, address);
   if (trace_contains_errors()) return;
diff --git a/subx/036recommend_labels.cc b/subx/036recommend_labels.cc
new file mode 100644
index 00000000..b40e2a66
--- /dev/null
+++ b/subx/036recommend_labels.cc
@@ -0,0 +1,81 @@
+//: Now that we have labels, using non-label offsets should be unnecessary.
+//: While SubX will allow programmers to write raw machine code, that isn't
+//: *recommended* once we have more ergonomic alternatives.
+
+:(scenario warn_on_jump_offset)
+== 0x1
+7e 1/disp8
++warn: '7e 1/disp8': using raw offsets for jumps is not recommended; use labels instead
+
+:(scenarios transform)
+:(scenario warn_on_call_offset)
+== 0x1
+e8 1/disp32
++warn: 'e8 1/disp32': using raw offsets for calls is not recommended; use labels instead
+:(scenarios run)
+
+:(before "Rewrite Labels(segment code)")
+recommend_labels(code);
+
+:(code)
+void recommend_labels(const segment& code) {
+  trace(99, "transform") << "-- check for numeric labels" << end();
+  for (int i = 0;  i < SIZE(code.lines);  ++i)
+    recommend_labels(code.lines.at(i));
+}
+
+void recommend_labels(const line& inst) {
+  int idx = first_operand(inst);
+  if (idx >= SIZE(inst.words)) return;
+  if (!is_number(inst.words.at(idx).data)) return;
+  if (is_jump(inst))
+    warn << "'" << inst.original << "': using raw offsets for jumps is not recommended; use labels instead\n" << end();
+  else if (is_call(inst))
+    warn << "'" << inst.original << "': using raw offsets for calls is not recommended; use labels instead\n" << end();
+}
+
+bool is_jump(const line& inst) {
+  string op1 = preprocess_op(inst.words.at(0)).data;
+  if (op1 == "0f") {
+    string op2 = preprocess_op(inst.words.at(1)).data;
+    return Jump_opcodes_0f.find(op1) != Jump_opcodes_0f.end();
+  }
+  if (op1 == "ff") return subop(inst) == /*subop for opcode ff*/4;
+  return Jump_opcodes.find(op1) != Jump_opcodes.end();
+}
+
+bool is_call(const line& inst) {
+  string op1 = preprocess_op(inst.words.at(0)).data;
+  if (op1 == "e8") return true;
+  if (op1 == "ff") return subop(inst) == /*subop for opcode ff*/2;
+  return false;  // no multi-byte call opcodes
+}
+
+int subop(const line& inst) {
+  int idx = first_operand(inst);
+  assert(idx < SIZE(inst.words));
+  return (parse_int(inst.words.at(idx).data)>>3) & 0x7;
+}
+
+:(before "End Globals")
+set<string> Jump_opcodes;
+set<string> Jump_opcodes_0f;
+:(before "End One-time Setup")
+init_jump_opcodes();
+:(code)
+void init_jump_opcodes() {
+  Jump_opcodes.insert("74");
+  Jump_opcodes.insert("75");
+  Jump_opcodes.insert("7c");
+  Jump_opcodes.insert("7d");
+  Jump_opcodes.insert("7e");
+  Jump_opcodes.insert("7f");
+  Jump_opcodes_0f.insert("84");
+  Jump_opcodes_0f.insert("85");
+  Jump_opcodes_0f.insert("8c");
+  Jump_opcodes_0f.insert("8d");
+  Jump_opcodes_0f.insert("8e");
+  Jump_opcodes_0f.insert("8f");
+  Jump_opcodes.insert("e9");
+  Jump_opcodes.insert("eb");
+}
diff --git a/subx/subx.vim b/subx/subx.vim
index 438ad732..309ebfec 100644
--- a/subx/subx.vim
+++ b/subx/subx.vim
@@ -24,3 +24,13 @@ let b:cmt_head = "#? "
 
 " comment token
 syntax match subxDelimiter / \. /  | highlight link subxDelimiter Delimiter
+
+"" highlight low-level idioms in red as I provide more high-level replacements
+
+" Once we have labels, highlight raw displacement
+highlight Warn ctermbg=brown ctermfg=black
+call matchadd("Warn", '\c^\s*e8.*\<\(0x\)\?[0-9a-f]\+/disp32')  " call
+call matchadd("Warn", '\c^\s*e9.*\<\(0x\)\?[0-9a-f]\+/disp8')  " unconditional jump disp8
+call matchadd("Warn", '\c^\s*7[45cdef].*\<\(0x\)\?[0-9a-f]\+/disp8')  " conditional jump disp8
+call matchadd("Warn", '\c^\s*eb.*\<\(0x\)\?[0-9a-f]\+/disp16')  " unconditional jump disp16
+call matchadd("Warn", '\c^\s*0f[^\s]*\s*8[45cdef].*\<\(0x\)\?[0-9a-f]\+/disp16')  " conditional jump disp16
author	Kartik Agaram <vc@akkartik.com>	2018-08-11 11:09:22 -0700
committer	Kartik Agaram <vc@akkartik.com>	2018-08-11 11:09:22 -0700
commit	76aace4625d4a2e0e1614acd0bea646f6db0f606 (patch)
tree	e93716ccfb23a1a6ea6275af31068c010c32deed
parent	f39429b60dbceaa5ef4c2cf8a30d20fb58e3bb2e (diff)
download	mu-76aace4625d4a2e0e1614acd0bea646f6db0f606.tar.gz