diff options
author | Kartik Agaram <vc@akkartik.com> | 2018-08-20 22:13:45 -0700 |
---|---|---|
committer | Kartik Agaram <vc@akkartik.com> | 2018-08-20 22:19:41 -0700 |
commit | 10ad628f73a02697763ad82a69a91ffacf5edb47 (patch) | |
tree | 09058cc51441fa7af8b2a9a8dccb643982298ba6 /subx | |
parent | a754efc54223240c351d14869f32cd90ada958c1 (diff) | |
download | mu-10ad628f73a02697763ad82a69a91ffacf5edb47.tar.gz |
4523 - Give up on pass-through phases
I'm going to continue using them for now, but I'm fairly certain now that they're just a temporary device to help rapidly-prototype ideas. The reason: there's just too many ways to abuse low-level features, and it ends up taking too much code to disallow things soon after you allow them. New plan: stop trying to write checks, just treat them as temporary conventions for now. Goal is now to just get the core sequence of passes nailed down. Then we'll start reimplementing them from the ground up. First implication of this new plan: ripping out most existing checks. I'm still going to eventually build type checks. But no degenerate checks for code just being too low-level. (This decision is the outcome of a few days of noodling over Forth and https://mastodon.social/@akkartik/100549913519614800.)
Diffstat (limited to 'subx')
-rw-r--r-- | subx/033non_code_segment.cc | 30 | ||||
-rw-r--r-- | subx/034discourage_raw_hex.cc | 36 | ||||
-rw-r--r-- | subx/035labels.cc | 4 | ||||
-rw-r--r-- | subx/036recommend_labels.cc | 81 | ||||
-rw-r--r-- | subx/037label_types.cc | 45 | ||||
-rw-r--r-- | subx/038check_local_jumps.cc | 60 | ||||
-rw-r--r-- | subx/subx.vim | 17 |
7 files changed, 4 insertions, 269 deletions
diff --git a/subx/033non_code_segment.cc b/subx/033non_code_segment.cc deleted file mode 100644 index c0549023..00000000 --- a/subx/033non_code_segment.cc +++ /dev/null @@ -1,30 +0,0 @@ -//: Raise an error when operand metadata is used in non-code segments. - -:(scenario operand_metadata_outside_code_segment) -% Hide_errors = true; -== 0x1 # code segment -cd 0x80/imm8 -== 0x1000 # data segment -cd 12/imm8 -+error: 12/imm8: metadata imm8 is only allowed in the (first) code segment - -:(before "Pack Operands(segment code)") -ensure_operands_only_in_code_segments(p); -if (trace_contains_errors()) return; -:(code) -void ensure_operands_only_in_code_segments(const program& p) { - trace(99, "transform") << "-- ensure operands only in code segments" << end(); - if (p.segments.empty()) return; - for (int i = /*skip code segment*/1; i < SIZE(p.segments); ++i) { - const segment& seg = p.segments.at(i); - for (int j = 0; j < SIZE(seg.lines); ++j) { - const line& l = seg.lines.at(j); - for (int k = 0; k < SIZE(l.words); ++k) { - const word& w = l.words.at(k); - for (map<string, uint32_t>::iterator p = Operand_bound.begin(); p != Operand_bound.end(); ++p) - if (has_metadata(w, p->first)) - raise << w.original << ": metadata " << p->first << " is only allowed in the (first) code segment\n" << end(); - } - } - } -} diff --git a/subx/034discourage_raw_hex.cc b/subx/034discourage_raw_hex.cc deleted file mode 100644 index 2b2e2675..00000000 --- a/subx/034discourage_raw_hex.cc +++ /dev/null @@ -1,36 +0,0 @@ -//: Now that we have operand metadata, start warning on instructions that -//: don't use it. -//: -//: While SubX will let you write raw machine code, don't do that unless you -//: have a very good reason. - -:(before "Pack Operands(segment code)") -warn_on_raw_hex(code); -if (trace_contains_errors()) return; -:(code) -void warn_on_raw_hex(const segment& code) { - trace(99, "transform") << "-- warn on raw hex instructions" << end(); - for (int i = 0; i < SIZE(code.lines); ++i) { - const line& inst = code.lines.at(i); - if (all_hex_bytes(inst) && has_operands(inst)) { - warn << "'" << to_string(inst) << "': using raw hex is not recommended\n" << end(); - break; - } - } -} - -:(scenarios transform) -:(scenario warn_on_hex_bytes_without_operands) -== 0x1 -bb 2a 00 00 00 # copy 0x2a (42) to EBX -+warn: 'bb 2a 00 00 00': using raw hex is not recommended - -:(scenario warn_on_non_operand_metadata) -== 0x1 -bb 2a 00/foo 00/bar 00 # copy 0x2a (42) to EBX -+warn: 'bb 2a 00/foo 00/bar 00': using raw hex is not recommended - -:(scenario no_warn_on_instructions_without_operands) -== 0x1 -55 # push EBP --warn: '55': using raw hex is not recommended diff --git a/subx/035labels.cc b/subx/035labels.cc index 236c8fa2..0d366d2e 100644 --- a/subx/035labels.cc +++ b/subx/035labels.cc @@ -1,5 +1,9 @@ //: Labels are defined by ending names with a ':'. This layer will compute //: addresses for labels, and compute the offset for instructions using them. +//: +//: We won't check this, but our convention will be that jump targets will +//: start with a '$', while functions will not. Function names will never be +//: jumped to, and jump targets will never be called. //: We're introducing non-number names for the first time, so it's worth //: laying down some ground rules all transforms will follow, so things don't diff --git a/subx/036recommend_labels.cc b/subx/036recommend_labels.cc deleted file mode 100644 index 3d603842..00000000 --- a/subx/036recommend_labels.cc +++ /dev/null @@ -1,81 +0,0 @@ -//: Now that we have labels, using non-label offsets should be unnecessary. -//: While SubX will allow programmers to write raw machine code, that isn't -//: *recommended* once we have more ergonomic alternatives. - -:(scenario warn_on_jump_offset) -== 0x1 -7e/jump-if 1/disp8 -+warn: '7e/jump-if 1/disp8': using raw offsets for jumps is not recommended; use labels instead - -:(scenarios transform) -:(scenario warn_on_call_offset) -== 0x1 -e8/call 1/disp32 -+warn: 'e8/call 1/disp32': using raw offsets for calls is not recommended; use labels instead -:(scenarios run) - -:(before "Rewrite Labels(segment code)") -recommend_labels(code); -if (trace_contains_errors()) return; -:(code) -void recommend_labels(const segment& code) { - trace(99, "transform") << "-- check for numeric labels" << end(); - for (int i = 0; i < SIZE(code.lines); ++i) - recommend_labels(code.lines.at(i)); -} - -void recommend_labels(const line& inst) { - int idx = first_operand(inst); - if (idx >= SIZE(inst.words)) return; - if (!is_number(inst.words.at(idx).data)) return; - if (is_jump(inst)) - warn << "'" << inst.original << "': using raw offsets for jumps is not recommended; use labels instead\n" << end(); - else if (is_call(inst)) - warn << "'" << inst.original << "': using raw offsets for calls is not recommended; use labels instead\n" << end(); -} - -bool is_jump(const line& inst) { - string op1 = preprocess_op(inst.words.at(0)).data; - if (op1 == "0f") { - string op2 = preprocess_op(inst.words.at(1)).data; - return Jump_opcodes_0f.find(op1) != Jump_opcodes_0f.end(); - } - if (op1 == "ff") return subop(inst) == /*subop for opcode ff*/4; - return Jump_opcodes.find(op1) != Jump_opcodes.end(); -} - -bool is_call(const line& inst) { - string op1 = preprocess_op(inst.words.at(0)).data; - if (op1 == "e8") return true; - if (op1 == "ff") return subop(inst) == /*subop for opcode ff*/2; - return false; // no multi-byte call opcodes -} - -int subop(const line& inst) { - int idx = first_operand(inst); - assert(idx < SIZE(inst.words)); - return (parse_int(inst.words.at(idx).data)>>3) & 0x7; -} - -:(before "End Globals") -set<string> Jump_opcodes; -set<string> Jump_opcodes_0f; -:(before "End One-time Setup") -init_jump_opcodes(); -:(code) -void init_jump_opcodes() { - Jump_opcodes.insert("74"); - Jump_opcodes.insert("75"); - Jump_opcodes.insert("7c"); - Jump_opcodes.insert("7d"); - Jump_opcodes.insert("7e"); - Jump_opcodes.insert("7f"); - Jump_opcodes_0f.insert("84"); - Jump_opcodes_0f.insert("85"); - Jump_opcodes_0f.insert("8c"); - Jump_opcodes_0f.insert("8d"); - Jump_opcodes_0f.insert("8e"); - Jump_opcodes_0f.insert("8f"); - Jump_opcodes.insert("e9"); - Jump_opcodes.insert("eb"); -} diff --git a/subx/037label_types.cc b/subx/037label_types.cc deleted file mode 100644 index b80db732..00000000 --- a/subx/037label_types.cc +++ /dev/null @@ -1,45 +0,0 @@ -//: Distinguish between labels marking the start of a function, and labels -//: inside functions. -//: -//: - Labels within functions start with a '$', and are only permitted in -//: 'jump' instructions. -//: -//: - Labels marking the start of functions lack the '$' sigil, and are only -//: permitted in 'call' instructions. - -:(before "Rewrite Labels(segment code)") -check_label_types(code); -if (trace_contains_errors()) return; -:(code) -void check_label_types(const segment& code) { - trace(99, "transform") << "-- check label types" << end(); - for (int i = 0; i < SIZE(code.lines); ++i) - check_label_types(code.lines.at(i)); -} - -void check_label_types(const line& inst) { - int idx = first_operand(inst); - if (idx >= SIZE(inst.words)) return; - const word& target = inst.words.at(idx); - if (is_number(target.data)) return; // handled elsewhere - if (is_jump(inst) && target.data.at(0) != '$') - raise << "'" << inst.original << "': jumps should always be to internal labels starting with '$'\n" << end(); - if (is_call(inst) && target.data.at(0) == '$') - raise << "'" << inst.original << "': calls should always be to function labels (not starting with '$')\n" << end(); -} - -:(scenario catch_jump_to_function) -% Hide_errors = true; -== 0x1 -main: -7e/jump-if foo/disp8 -foo: -+error: '7e/jump-if foo/disp8': jumps should always be to internal labels starting with '$' - -:(scenario catch_call_to_internal_label) -% Hide_errors = true; -== 0x1 -main: -e8/call $foo/disp32 - $foo: # indent to avoid looking like a trace_count command for this scenario -+error: 'e8/call $foo/disp32': calls should always be to function labels (not starting with '$') diff --git a/subx/038check_local_jumps.cc b/subx/038check_local_jumps.cc deleted file mode 100644 index 41f8e471..00000000 --- a/subx/038check_local_jumps.cc +++ /dev/null @@ -1,60 +0,0 @@ -//: Make sure that we never jump from one function to within another. -//: -//: (The check for label types already ensures we can't jump to the start of -//: another function.) - -:(scenario jump_to_different_function) -% Hide_errors = true; -== 0x1 -fn1: - 7e/jump-if $target/disp8 -fn2: - $target: -+error: '7e/jump-if $target/disp8' in function 'fn1': jump to within another function 'fn2' is a *really* bad idea - -:(before "Rewrite Labels(segment code)") -check_local_jumps(code); -if (trace_contains_errors()) return; -:(code) -void check_local_jumps(const segment& code) { - map</*jump target*/string, /*containing call target*/string> function; - compute_function_target(code, function); - if (trace_contains_errors()) return; - string current_function; - for (int i = 0; i < SIZE(code.lines); ++i) { - const line& inst = code.lines.at(i); - if (SIZE(inst.words) == 1 && is_label(inst.words.at(0))) { - // label definition - if (inst.words.at(0).data.at(0) != '$') - current_function = drop_last(inst.words.at(0).data); - } - else if (is_jump(inst)) { - const word& target = inst.words.at(first_operand(inst)); - if (!contains_key(function, target.data)) continue; // error/warning handled elsewhere - if (get(function, target.data) == current_function) continue; - raise << "'" << to_string(inst) << "' in function '" << current_function << "': jump to within another function '" << get(function, target.data) << "' is a *really* bad idea\n" << end(); - return; - } - } -} - -void compute_function_target(const segment& code, map<string, string>& out) { - string current_function; - for (int i = 0; i < SIZE(code.lines); ++i) { - const line& inst = code.lines.at(i); - if (SIZE(inst.words) != 1) continue; - const word& curr = inst.words.at(0); - if (!is_label(curr)) continue; - const string& label = drop_last(curr.data); - if (label.at(0) != '$') { - current_function = label; - continue; - } - if (contains_key(out, label)) { - raise << "duplicate label '" << label << "'\n" << end(); - return; - } - // current_function can be empty! if so that would be 'main'. - put(out, label, current_function); - } -} diff --git a/subx/subx.vim b/subx/subx.vim index e6825c72..454c9454 100644 --- a/subx/subx.vim +++ b/subx/subx.vim @@ -25,21 +25,4 @@ let b:cmt_head = "#? " " comment token syntax match subxDelimiter / \. / | highlight link subxDelimiter Delimiter -"" highlight low-level idioms in red as I provide more high-level replacements - -" Once we have labels, highlight raw displacement -highlight Warn ctermbg=brown ctermfg=black -call matchadd("Warn", '\c^\s*e8.*\<\(0x\)\?[0-9a-f]\+/disp32') " call -call matchadd("Warn", '\c^\s*e9.*\<\(0x\)\?[0-9a-f]\+/disp8') " unconditional jump disp8 -call matchadd("Warn", '\c^\s*7[45cdef].*\<\(0x\)\?[0-9a-f]\+/disp8') " conditional jump disp8 -call matchadd("Warn", '\c^\s*eb.*\<\(0x\)\?[0-9a-f]\+/disp16') " unconditional jump disp16 -call matchadd("Warn", '\c^\s*0f[^\s]*\s*8[45cdef].*\<\(0x\)\?[0-9a-f]\+/disp16') " conditional jump disp16 - -" Mismatch in label type -call matchadd("Error", '\c^\s*e8.*\$') " call -call matchadd("Error", '\c^\s*e9\(/[^ ]*\)\?\s*\(\.\s*\)\+[^\$\. ]\([ \$0-9a-fA-F-]\+\>\)\@!') " unconditional jump disp8 -call matchadd("Error", '\c^\s*7[45cdef]\(/[^ ]*\)\?\s*\(\.\s*\)\+[^\$\. ]\([ \$0-9a-fA-F-]\+\>\)\@!') " conditional jump disp8 -call matchadd("Error", '\c^\s*eb\(/[^ ]*\)\?\s*\(\.\s*\)\+[^\$\. ]\([ \$0-9a-fA-F-]\+\>\)\@!') " unconditional jump disp16 -call matchadd("Error", '\c^\s*0f[^\s]*\s*8[45cdef]\(/[^ ]*\)\?\s*\(\.\s*\)\+[^\$\. ]\([ \$0-9a-fA-F-]\+\>\)\@!') " conditional jump disp16 - let &cpo = s:save_cpo |