From 76aace4625d4a2e0e1614acd0bea646f6db0f606 Mon Sep 17 00:00:00 2001 From: Kartik Agaram Date: Sat, 11 Aug 2018 11:09:22 -0700 Subject: 4505 - start warning on jumps without labels As we climb the ladder of abstraction we'll gradually pull the ladder up behind ourselves. --- subx/011run.cc | 20 +++++------ subx/035labels.cc | 10 ++++++ subx/036recommend_labels.cc | 81 +++++++++++++++++++++++++++++++++++++++++++++ subx/subx.vim | 10 ++++++ 4 files changed, 111 insertions(+), 10 deletions(-) create mode 100644 subx/036recommend_labels.cc diff --git a/subx/011run.cc b/subx/011run.cc index 8c13b6ba..9c024e27 100644 --- a/subx/011run.cc +++ b/subx/011run.cc @@ -110,6 +110,7 @@ struct segment { struct line { vector words; vector metadata; + string original; }; :(before "struct line") struct word { @@ -126,10 +127,11 @@ void parse(istream& fin, program& out) { trace(99, "parse") << "begin" << end(); while (has_data(fin)) { string line_data; + line curr; getline(fin, line_data); + curr.original = line_data; trace(99, "parse") << "line: " << line_data << end(); istringstream lin(line_data); - vector w; while (has_data(lin)) { string word_data; lin >> word_data; @@ -153,20 +155,18 @@ void parse(istream& fin, program& out) { // todo: line metadata break; } - w.push_back(word()); - w.back().original = word_data; + curr.words.push_back(word()); + curr.words.back().original = word_data; istringstream win(word_data); - if (getline(win, w.back().data, '/')) { + if (getline(win, curr.words.back().data, '/')) { string m; while (getline(win, m, '/')) - w.back().metadata.push_back(m); + curr.words.back().metadata.push_back(m); } - trace(99, "parse") << "new word: " << w.back().data << end(); - } - if (!w.empty()) { - l.push_back(line()); - l.back().words.swap(w); + trace(99, "parse") << "new word: " << curr.words.back().data << end(); } + if (!curr.words.empty()) + l.push_back(curr); } if (!l.empty()) { assert(!out.segments.empty()); diff --git a/subx/035labels.cc b/subx/035labels.cc index 957fa5a0..ca5851dc 100644 --- a/subx/035labels.cc +++ b/subx/035labels.cc @@ -14,6 +14,15 @@ //: be a single character long. 'a' is not a hex number, it's a variable. //: Later layers may add more conventions partitioning the space of names. But //: the above rules will remain inviolate. +:(code) +bool is_number(const string& s) { + if (s.at(0) == '-') return true; + if (isdigit(s.at(0))) return true; + return SIZE(s) == 2; +} +void test_is_number() { + CHECK(!is_number("a")); +} :(scenarios transform) :(scenario map_label) @@ -33,6 +42,7 @@ void rewrite_labels(program& p) { trace(99, "transform") << "-- rewrite labels" << end(); if (p.segments.empty()) return; segment& code = p.segments.at(0); + // Rewrite Labels(segment code) map address; // values are unsigned, but we're going to do subtractions on them so they need to fit in 31 bits compute_addresses_for_labels(code, address); if (trace_contains_errors()) return; diff --git a/subx/036recommend_labels.cc b/subx/036recommend_labels.cc new file mode 100644 index 00000000..b40e2a66 --- /dev/null +++ b/subx/036recommend_labels.cc @@ -0,0 +1,81 @@ +//: Now that we have labels, using non-label offsets should be unnecessary. +//: While SubX will allow programmers to write raw machine code, that isn't +//: *recommended* once we have more ergonomic alternatives. + +:(scenario warn_on_jump_offset) +== 0x1 +7e 1/disp8 ++warn: '7e 1/disp8': using raw offsets for jumps is not recommended; use labels instead + +:(scenarios transform) +:(scenario warn_on_call_offset) +== 0x1 +e8 1/disp32 ++warn: 'e8 1/disp32': using raw offsets for calls is not recommended; use labels instead +:(scenarios run) + +:(before "Rewrite Labels(segment code)") +recommend_labels(code); + +:(code) +void recommend_labels(const segment& code) { + trace(99, "transform") << "-- check for numeric labels" << end(); + for (int i = 0; i < SIZE(code.lines); ++i) + recommend_labels(code.lines.at(i)); +} + +void recommend_labels(const line& inst) { + int idx = first_operand(inst); + if (idx >= SIZE(inst.words)) return; + if (!is_number(inst.words.at(idx).data)) return; + if (is_jump(inst)) + warn << "'" << inst.original << "': using raw offsets for jumps is not recommended; use labels instead\n" << end(); + else if (is_call(inst)) + warn << "'" << inst.original << "': using raw offsets for calls is not recommended; use labels instead\n" << end(); +} + +bool is_jump(const line& inst) { + string op1 = preprocess_op(inst.words.at(0)).data; + if (op1 == "0f") { + string op2 = preprocess_op(inst.words.at(1)).data; + return Jump_opcodes_0f.find(op1) != Jump_opcodes_0f.end(); + } + if (op1 == "ff") return subop(inst) == /*subop for opcode ff*/4; + return Jump_opcodes.find(op1) != Jump_opcodes.end(); +} + +bool is_call(const line& inst) { + string op1 = preprocess_op(inst.words.at(0)).data; + if (op1 == "e8") return true; + if (op1 == "ff") return subop(inst) == /*subop for opcode ff*/2; + return false; // no multi-byte call opcodes +} + +int subop(const line& inst) { + int idx = first_operand(inst); + assert(idx < SIZE(inst.words)); + return (parse_int(inst.words.at(idx).data)>>3) & 0x7; +} + +:(before "End Globals") +set Jump_opcodes; +set Jump_opcodes_0f; +:(before "End One-time Setup") +init_jump_opcodes(); +:(code) +void init_jump_opcodes() { + Jump_opcodes.insert("74"); + Jump_opcodes.insert("75"); + Jump_opcodes.insert("7c"); + Jump_opcodes.insert("7d"); + Jump_opcodes.insert("7e"); + Jump_opcodes.insert("7f"); + Jump_opcodes_0f.insert("84"); + Jump_opcodes_0f.insert("85"); + Jump_opcodes_0f.insert("8c"); + Jump_opcodes_0f.insert("8d"); + Jump_opcodes_0f.insert("8e"); + Jump_opcodes_0f.insert("8f"); + Jump_opcodes.insert("e9"); + Jump_opcodes.insert("eb"); +} diff --git a/subx/subx.vim b/subx/subx.vim index 438ad732..309ebfec 100644 --- a/subx/subx.vim +++ b/subx/subx.vim @@ -24,3 +24,13 @@ let b:cmt_head = "#? " " comment token syntax match subxDelimiter / \. / | highlight link subxDelimiter Delimiter + +"" highlight low-level idioms in red as I provide more high-level replacements + +" Once we have labels, highlight raw displacement +highlight Warn ctermbg=brown ctermfg=black +call matchadd("Warn", '\c^\s*e8.*\<\(0x\)\?[0-9a-f]\+/disp32') " call +call matchadd("Warn", '\c^\s*e9.*\<\(0x\)\?[0-9a-f]\+/disp8') " unconditional jump disp8 +call matchadd("Warn", '\c^\s*7[45cdef].*\<\(0x\)\?[0-9a-f]\+/disp8') " conditional jump disp8 +call matchadd("Warn", '\c^\s*eb.*\<\(0x\)\?[0-9a-f]\+/disp16') " unconditional jump disp16 +call matchadd("Warn", '\c^\s*0f[^\s]*\s*8[45cdef].*\<\(0x\)\?[0-9a-f]\+/disp16') " conditional jump disp16 -- cgit 1.4.1-2-gfad0