diff options
Diffstat (limited to 'subx')
-rw-r--r-- | subx/001help.cc | 7 | ||||
-rw-r--r-- | subx/010---vm.cc | 11 | ||||
-rw-r--r-- | subx/035labels.cc | 33 | ||||
-rw-r--r-- | subx/036global_variables.cc | 4 | ||||
-rw-r--r-- | subx/038---literal_strings.cc | 1 | ||||
-rw-r--r-- | subx/039debug.cc | 94 | ||||
-rw-r--r-- | subx/Readme.md | 66 | ||||
-rw-r--r-- | subx/apps/dquotes.subx | 2 | ||||
-rwxr-xr-x | subx/dgen | 4 | ||||
-rwxr-xr-x | subx/drun | 4 |
10 files changed, 171 insertions, 55 deletions
diff --git a/subx/001help.cc b/subx/001help.cc index 8d815be5..79589cf8 100644 --- a/subx/001help.cc +++ b/subx/001help.cc @@ -80,10 +80,9 @@ void init_help() { "== Debugging aids\n" "- Add '--trace' to any of these commands to print a trace to stderr\n" " for debugging purposes.\n" - "- Add '--map' to add information to traces. 'subx --map translate' will save\n" - " (to a file called 'map') the mapping from labels to addresses that it computes\n" - " during translation. This file is then available to 'subx --map --trace run'\n" - " which prints out label names in the trace as it encounters them.\n" + "- Add '--debug' to add information to traces. 'subx --debug translate' will\n" + " save various mappings to files that 'subx --debug --trace run'\n" + " can use to make traces more informative.\n" "\n" "Options starting with '--' must always come before any other arguments.\n" "\n" diff --git a/subx/010---vm.cc b/subx/010---vm.cc index 6fac9cf7..31e5608f 100644 --- a/subx/010---vm.cc +++ b/subx/010---vm.cc @@ -305,7 +305,7 @@ void run_one_instruction() { } uint32_t inst_start_address = EIP; op = next(); - trace(Callstack_depth, "run") << "0x" << HEXWORD << inst_start_address << " opcode: " << HEXBYTE << NUM(op) << call_label(op) << end(); + trace(Callstack_depth+1, "run") << "0x" << HEXWORD << inst_start_address << " opcode: " << HEXBYTE << NUM(op) << end(); switch (op) { case 0xf4: // hlt EIP = End_of_program; @@ -369,7 +369,7 @@ inline uint8_t next() { void dump_registers() { ostringstream out; - out << "registers: "; + out << "registers before: "; for (int i = 0; i < NUM_INT_REGISTERS; ++i) { if (i > 0) out << "; "; out << " " << i << ": " << std::hex << std::setw(8) << std::setfill('_') << Reg[i].u; @@ -378,13 +378,6 @@ void dump_registers() { trace(Callstack_depth+1, "run") << out.str() << end(); } -// debugging info from a later layer -string call_label(uint8_t op) { - if (op != 0xe8) return ""; - // End Trace Call Instruction - return "/call"; -} - //: start tracking supported opcodes :(before "End Globals") map</*op*/string, string> Name; diff --git a/subx/035labels.cc b/subx/035labels.cc index 5596f6cc..f3131168 100644 --- a/subx/035labels.cc +++ b/subx/035labels.cc @@ -138,6 +138,8 @@ void compute_byte_indices_for_labels(const segment& code, map<string, int32_t>& int current_byte = 0; for (int i = 0; i < SIZE(code.lines); ++i) { const line& inst = code.lines.at(i); + if (Source_lines_file.is_open() && !inst.original.empty() && /*not a label*/ *inst.words.at(0).data.rbegin() != ':') + Source_lines_file << "0x" << HEXWORD << (code.start + current_byte) << ' ' << inst.original << '\n'; for (int j = 0; j < SIZE(inst.words); ++j) { const word& curr = inst.words.at(j); // hack: if we have any operand metadata left after previous transforms, @@ -168,8 +170,8 @@ void compute_byte_indices_for_labels(const segment& code, map<string, int32_t>& raise << "'" << to_string(inst) << "': label definition (':') not allowed in operand\n" << end(); if (j > 0) raise << "'" << to_string(inst) << "': labels can only be the first word in a line.\n" << end(); - if (Map_file.is_open()) - Map_file << "0x" << HEXWORD << (code.start + current_byte) << ' ' << label << '\n'; + if (Labels_file.is_open()) + Labels_file << "0x" << HEXWORD << (code.start + current_byte) << ' ' << label << '\n'; if (contains_key(byte_index, label) && label != "Entry") { raise << "duplicate label '" << label << "'\n" << end(); return; @@ -183,20 +185,27 @@ void compute_byte_indices_for_labels(const segment& code, map<string, int32_t>& } :(before "End Globals") -bool Dump_map = false; // currently used only by 'subx translate' -ofstream Map_file; +bool Dump_debug_info = false; // currently used only by 'subx translate' +ofstream Labels_file; +ofstream Source_lines_file; :(before "End Commandline Options") -else if (is_equal(*arg, "--map")) { - Dump_map = true; - // End --map Settings +else if (is_equal(*arg, "--debug")) { + Dump_debug_info = true; + // End --debug Settings } -//: wait to open "map" for writing until we're sure we aren't trying to read it +//: wait to open "labels" for writing until we're sure we aren't trying to read it :(after "Begin subx translate") -if (Dump_map) - Map_file.open("map"); +if (Dump_debug_info) { + cerr << "saving address->label information to 'labels'\n"; + Labels_file.open("labels"); + cerr << "saving address->source information to 'source_lines'\n"; + Source_lines_file.open("source_lines"); +} :(before "End subx translate") -if (Dump_map) - Map_file.close(); +if (Dump_debug_info) { + Labels_file.close(); + Source_lines_file.close(); +} :(code) void drop_labels(segment& code) { diff --git a/subx/036global_variables.cc b/subx/036global_variables.cc index 846cd291..fffabf72 100644 --- a/subx/036global_variables.cc +++ b/subx/036global_variables.cc @@ -54,8 +54,8 @@ void compute_addresses_for_global_variables(const segment& s, map<string, uint32 if (trace_contains_errors()) return; if (j > 0) raise << "'" << to_string(inst) << "': global variable names can only be the first word in a line.\n" << end(); - if (Map_file.is_open()) - Map_file << "0x" << HEXWORD << current_address << ' ' << variable << '\n'; + if (Labels_file.is_open()) + Labels_file << "0x" << HEXWORD << current_address << ' ' << variable << '\n'; if (contains_key(address, variable)) { raise << "duplicate global '" << variable << "'\n" << end(); return; diff --git a/subx/038---literal_strings.cc b/subx/038---literal_strings.cc index a795ce23..65a7740b 100644 --- a/subx/038---literal_strings.cc +++ b/subx/038---literal_strings.cc @@ -104,6 +104,7 @@ void parse_instruction_character_by_character(const string& line_data, vector<li istringstream in(line_data); in >> std::noskipws; line result; + result.original = line_data; // add tokens (words or strings) one by one while (has_data(in)) { skip_whitespace(in); diff --git a/subx/039debug.cc b/subx/039debug.cc index fc0b622b..26bb5f7a 100644 --- a/subx/039debug.cc +++ b/subx/039debug.cc @@ -1,15 +1,18 @@ //:: Some helpers for debugging. -//: Load the 'map' file generated during 'subx --map translate' when running 'subx --map --trace run'. +//: Load the 'map' file generated during 'subx --debug translate' when running +//: 'subx --debug --trace run'. //: (It'll only affect the trace.) :(before "End Globals") map</*address*/uint32_t, string> Symbol_name; // used only by 'subx run' -:(before "End --map Settings") -load_map("map"); +map</*address*/uint32_t, string> Source_line; // used only by 'subx run' +:(before "End --debug Settings") +load_labels(); +load_source_lines(); :(code) -void load_map(const string& map_filename) { - ifstream fin(map_filename.c_str()); +void load_labels() { + ifstream fin("labels"); fin >> std::hex; while (has_data(fin)) { uint32_t addr = 0; @@ -20,17 +23,43 @@ void load_map(const string& map_filename) { } } +void load_source_lines() { + ifstream fin("source_lines"); + fin >> std::hex; + while (has_data(fin)) { + uint32_t addr = 0; + fin >> addr; + string line; + getline(fin, line); + put(Source_line, addr, hacky_squeeze_out_whitespace(line)); + } +} + :(after "Run One Instruction") if (contains_key(Symbol_name, EIP)) trace(Callstack_depth, "run") << "== label " << get(Symbol_name, EIP) << end(); +if (contains_key(Source_line, EIP)) + trace(Callstack_depth, "run") << "0x" << HEXWORD << EIP << ": " << get(Source_line, EIP) << end(); +else + // no source line info; do what you can + trace(Callstack_depth, "run") << "0x" << HEXWORD << EIP << ": " << debug_info(EIP) << end(); -//: make calls in particular more salient -:(before "End Trace Call Instruction") -// at this point we've skipped past the e8 opcode, but not the offset operand -int32_t offset = read_mem_i32(EIP); -uint32_t next_eip = EIP+offset+4; -if (contains_key(Symbol_name, next_eip)) - return "/call "+get(Symbol_name, next_eip); +:(code) +string debug_info(uint32_t inst_address) { + uint8_t op = read_mem_u8(EIP); + if (op != 0xe8) { + ostringstream out; + out << HEXBYTE << NUM(op); + return out.str(); + } + int32_t offset = read_mem_i32(EIP+/*skip op*/1); + uint32_t next_eip = EIP+/*inst length*/5+offset; + if (contains_key(Symbol_name, next_eip)) + return "e8/call "+get(Symbol_name, next_eip); + ostringstream out; + out << "e8/call 0x" << HEXWORD << next_eip; + return out.str(); +} //: If a label starts with '$watch-', make a note of the effective address //: computed by the next instruction. Start dumping out its contents to the @@ -61,3 +90,44 @@ if (!Watch_this_effective_address.empty()) { dbg << "now watching " << HEXWORD << addr << " for " << Watch_this_effective_address << end(); put(Watch_points, Watch_this_effective_address, addr); } + +//: helpers + +:(code) +string hacky_squeeze_out_whitespace(const string& s) { + // strip whitespace at start + string::const_iterator first = s.begin(); + while (first != s.end() && isspace(*first)) + ++first; + if (first == s.end()) return ""; + + // strip whitespace at end + string::const_iterator last = --s.end(); + while (last != s.begin() && isspace(*last)) + --last; + ++last; + + // replace runs of spaces/dots with single space until comment or string + // TODO: + // leave alone dots not surrounded by whitespace + // leave alone '#' within word + // leave alone '"' within word + // squeeze spaces after end of string + ostringstream out; + bool previous_was_space = false; + bool in_comment_or_string = false; + for (string::const_iterator curr = first; curr != last; ++curr) { + if (in_comment_or_string) + out << *curr; + else if (isspace(*curr) || *curr == '.') + previous_was_space = true; + else { + if (previous_was_space) + out << ' '; + out << *curr; + previous_was_space = false; + if (*curr == '#' || *curr == '"') in_comment_or_string = true; + } + } + return out.str(); +} diff --git a/subx/Readme.md b/subx/Readme.md index b3b58c86..e8e4ad6d 100644 --- a/subx/Readme.md +++ b/subx/Readme.md @@ -2,16 +2,22 @@ SubX is a simple, minimalist stack for programming your computer. - ``` + ```sh $ git clone https://github.com/akkartik/mu $ cd mu/subx $ ./subx # print out a help message ``` +SubX is designed: + +* to explore ways to turn arbitrary manual tests into reproducible automated tests, +* to be easy to implement in itself, and +* to help learn and teach the x86 instruction set. + It requires a Unix-like environment with a C++ compiler (Linux or BSD or Mac OS). Running `subx` will transparently compile it as necessary. -[![Build Status](https://api.travis-ci.org/akkartik/mu.svg)](https://travis-ci.org/akkartik/mu) +[![Build Status](https://api.travis-ci.org/akkartik/mu.svg?branch=master)](https://travis-ci.org/akkartik/mu) You can generate native ELF binaries with it that run on a bare Linux kernel. No other dependencies needed. @@ -35,8 +41,10 @@ messages. Emulated runs generate a trace that permits [time-travel debugging](https://github.com/akkartik/mu/blob/master/browse_trace/Readme.md). ```sh - $ ./subx --map translate examples/factorial.subx -o examples/factorial - $ ./subx --map --trace run examples/factorial + $ ./subx --debug translate examples/factorial.subx -o examples/factorial + saving address->label information to 'labels' + saving address->source information to 'source_lines' + $ ./subx --debug --trace run examples/factorial saving trace to 'last_run' $ ../browse_trace/browse_trace last_run # text-mode debugger UI ``` @@ -402,12 +410,12 @@ rudimentary but hopefully still workable toolkit: * As a further refinement, it is possible to render label names in the trace by adding a second flag to both the `translate` and `run` commands: ``` - $ ./subx --map translate input.subx -o binary - $ ./subx --map --trace run binary arg1 arg2 2>trace + $ ./subx --debug translate input.subx -o binary + $ ./subx --debug --trace run binary arg1 arg2 2>trace ``` - `subx --map translate` emits a mapping from label to address in a file - called `map`. `subx --map --trace run` reads in the `map` file at the start - and prints out any matching label name as it traces each instruction + `subx --debug translate` emits a mapping from label to address in a file + called `labels`. `subx --debug --trace run` reads in the `labels` file at + the start and prints out any matching label name as it traces each instruction executed. Here's a sample of what a trace looks like, with a few boxes highlighted: @@ -513,8 +521,14 @@ trace, or if you have questions or complaints. ### 'system calls' -A major goal of SubX is testable wrappers for operating system syscalls. -Here's what I've built so far: +As I said at the top, a primary design goal of SubX (and Mu more broadly) is +to explore ways to turn arbitrary manual tests into reproducible automated +tests. SubX aims for this goal by baking testable interfaces deep into the +stack, at the OS syscall level. The idea is that every syscall that interacts +with hardware (and so the environment) should be *dependency injected* so that +it's possible to insert fake hardware in tests. + +But those are big goals. Here are the syscalls I have so far: * `write`: takes two arguments, a file `f` and an address to array `s`. @@ -568,6 +582,10 @@ Here's what I've built so far: * ... _(to be continued)_ +I will continue to import syscalls over time from [the old Mu VM in the parent +directory](https://github.com/akkartik/mu), which has experimented with +interfaces for the screen, keyboard, mouse, disk and network. + ### primitives built atop system calls _(Compound arguments are usually passed in by reference. Where the results are @@ -654,6 +672,32 @@ from a slice: * `skip-chars-matching-in-slice`: curr, end, delimiter byte -> new-curr (in `EAX`) * `skip-chars-not-matching-in-slice`: curr, end, delimiter byte -> new-curr (in `EAX`) +## Conclusion + +The hypothesis of Mu and SubX is that designing the entire system to be +testable from day 1 and from the ground up would radically impact the culture +of the eco-system in a way that no bolted-on tool or service at higher levels +can replicate: + +* Tests would make it easier to write programs that can be easily understood + by newcomers. + +* More broad-based understanding would lead to more forks. + +* Tests would make it easy to share code across forks. Copy the tests over, + and then copy code over and polish it until the tests pass. Manual work, but + tractable and without major risks. + +* The community would gain a diversified portfolio of forks for each program, + a “wavefront” of possible combinations of features and alternative + implementations of features. Application writers who wrote thorough tests + for their apps (something they just can’t do today) would be able to bounce + around between forks more easily without getting locked in to a single one + as currently happens. + +* There would be a stronger culture of reviewing the code for programs you use + or libraries you depend on. [More eyeballs would make more bugs shallow.](https://en.wikipedia.org/wiki/Linus%27s_Law) + ## Resources * [Single-page cheatsheet for the x86 ISA](https://net.cs.uni-bonn.de/fileadmin/user_upload/plohmann/x86_opcode_structure_and_instruction_overview.pdf) diff --git a/subx/apps/dquotes.subx b/subx/apps/dquotes.subx index 02e99e33..b236ac2f 100644 --- a/subx/apps/dquotes.subx +++ b/subx/apps/dquotes.subx @@ -1051,7 +1051,7 @@ Segment-size: 0x100/imm32 #? 0x1000/imm32/4KB -Next-string-literal: +Next-string-literal: # tracks the next auto-generated variable name 1/imm32 # . . vim:nowrap:textwidth=0 diff --git a/subx/dgen b/subx/dgen index 4aeec375..1a5a8366 100755 --- a/subx/dgen +++ b/subx/dgen @@ -18,11 +18,11 @@ export CFLAGS=-g case $1 in ex*) - ./subx --map translate examples/$1.subx -o examples/`echo $1 |sed 's/\..*//'` + ./subx --debug translate examples/$1.subx -o examples/`echo $1 |sed 's/\..*//'` exit $? ;; *) - ./subx --map translate *.subx apps/$1.subx -o apps/`echo $1 |sed 's/\..*//'` + ./subx --debug translate *.subx apps/$1.subx -o apps/`echo $1 |sed 's/\..*//'` exit $? ;; esac diff --git a/subx/drun b/subx/drun index b094f995..71b2f0e0 100755 --- a/subx/drun +++ b/subx/drun @@ -12,11 +12,11 @@ fi case $1 in ex*) - ./subx --map --trace run examples/$* + ./subx --debug --trace run examples/$* exit $? ;; *) - ./subx --map --trace run apps/$* + ./subx --debug --trace run apps/$* exit $? ;; esac |