about summary refs log tree commit diff stats
path: root/subx
diff options
context:
space:
mode:
Diffstat (limited to 'subx')
-rw-r--r--subx/001help.cc7
-rw-r--r--subx/010---vm.cc11
-rw-r--r--subx/035labels.cc33
-rw-r--r--subx/036global_variables.cc4
-rw-r--r--subx/038---literal_strings.cc1
-rw-r--r--subx/039debug.cc94
-rw-r--r--subx/Readme.md66
-rw-r--r--subx/apps/dquotes.subx2
-rwxr-xr-xsubx/dgen4
-rwxr-xr-xsubx/drun4
10 files changed, 171 insertions, 55 deletions
diff --git a/subx/001help.cc b/subx/001help.cc
index 8d815be5..79589cf8 100644
--- a/subx/001help.cc
+++ b/subx/001help.cc
@@ -80,10 +80,9 @@ void init_help() {
     "== Debugging aids\n"
     "- Add '--trace' to any of these commands to print a trace to stderr\n"
     "  for debugging purposes.\n"
-    "- Add '--map' to add information to traces. 'subx --map translate' will save\n"
-    "  (to a file called 'map') the mapping from labels to addresses that it computes\n"
-    "  during translation. This file is then available to 'subx --map --trace run'\n"
-    "  which prints out label names in the trace as it encounters them.\n"
+    "- Add '--debug' to add information to traces. 'subx --debug translate' will\n"
+    "  save various mappings to files that 'subx --debug --trace run'\n"
+    "  can use to make traces more informative.\n"
     "\n"
     "Options starting with '--' must always come before any other arguments.\n"
     "\n"
diff --git a/subx/010---vm.cc b/subx/010---vm.cc
index 6fac9cf7..31e5608f 100644
--- a/subx/010---vm.cc
+++ b/subx/010---vm.cc
@@ -305,7 +305,7 @@ void run_one_instruction() {
   }
   uint32_t inst_start_address = EIP;
   op = next();
-  trace(Callstack_depth, "run") << "0x" << HEXWORD << inst_start_address << " opcode: " << HEXBYTE << NUM(op) << call_label(op) << end();
+  trace(Callstack_depth+1, "run") << "0x" << HEXWORD << inst_start_address << " opcode: " << HEXBYTE << NUM(op) << end();
   switch (op) {
   case 0xf4:  // hlt
     EIP = End_of_program;
@@ -369,7 +369,7 @@ inline uint8_t next() {
 
 void dump_registers() {
   ostringstream out;
-  out << "registers: ";
+  out << "registers before: ";
   for (int i = 0;  i < NUM_INT_REGISTERS;  ++i) {
     if (i > 0) out << "; ";
     out << "  " << i << ": " << std::hex << std::setw(8) << std::setfill('_') << Reg[i].u;
@@ -378,13 +378,6 @@ void dump_registers() {
   trace(Callstack_depth+1, "run") << out.str() << end();
 }
 
-// debugging info from a later layer
-string call_label(uint8_t op) {
-  if (op != 0xe8) return "";
-  // End Trace Call Instruction
-  return "/call";
-}
-
 //: start tracking supported opcodes
 :(before "End Globals")
 map</*op*/string, string> Name;
diff --git a/subx/035labels.cc b/subx/035labels.cc
index 5596f6cc..f3131168 100644
--- a/subx/035labels.cc
+++ b/subx/035labels.cc
@@ -138,6 +138,8 @@ void compute_byte_indices_for_labels(const segment& code, map<string, int32_t>&
   int current_byte = 0;
   for (int i = 0;  i < SIZE(code.lines);  ++i) {
     const line& inst = code.lines.at(i);
+    if (Source_lines_file.is_open() && !inst.original.empty() && /*not a label*/ *inst.words.at(0).data.rbegin() != ':')
+      Source_lines_file << "0x" << HEXWORD << (code.start + current_byte) << ' ' << inst.original << '\n';
     for (int j = 0;  j < SIZE(inst.words);  ++j) {
       const word& curr = inst.words.at(j);
       // hack: if we have any operand metadata left after previous transforms,
@@ -168,8 +170,8 @@ void compute_byte_indices_for_labels(const segment& code, map<string, int32_t>&
           raise << "'" << to_string(inst) << "': label definition (':') not allowed in operand\n" << end();
         if (j > 0)
           raise << "'" << to_string(inst) << "': labels can only be the first word in a line.\n" << end();
-        if (Map_file.is_open())
-          Map_file << "0x" << HEXWORD << (code.start + current_byte) << ' ' << label << '\n';
+        if (Labels_file.is_open())
+          Labels_file << "0x" << HEXWORD << (code.start + current_byte) << ' ' << label << '\n';
         if (contains_key(byte_index, label) && label != "Entry") {
           raise << "duplicate label '" << label << "'\n" << end();
           return;
@@ -183,20 +185,27 @@ void compute_byte_indices_for_labels(const segment& code, map<string, int32_t>&
 }
 
 :(before "End Globals")
-bool Dump_map = false;  // currently used only by 'subx translate'
-ofstream Map_file;
+bool Dump_debug_info = false;  // currently used only by 'subx translate'
+ofstream Labels_file;
+ofstream Source_lines_file;
 :(before "End Commandline Options")
-else if (is_equal(*arg, "--map")) {
-  Dump_map = true;
-  // End --map Settings
+else if (is_equal(*arg, "--debug")) {
+  Dump_debug_info = true;
+  // End --debug Settings
 }
-//: wait to open "map" for writing until we're sure we aren't trying to read it
+//: wait to open "labels" for writing until we're sure we aren't trying to read it
 :(after "Begin subx translate")
-if (Dump_map)
-  Map_file.open("map");
+if (Dump_debug_info) {
+  cerr << "saving address->label information to 'labels'\n";
+  Labels_file.open("labels");
+  cerr << "saving address->source information to 'source_lines'\n";
+  Source_lines_file.open("source_lines");
+}
 :(before "End subx translate")
-if (Dump_map)
-  Map_file.close();
+if (Dump_debug_info) {
+  Labels_file.close();
+  Source_lines_file.close();
+}
 
 :(code)
 void drop_labels(segment& code) {
diff --git a/subx/036global_variables.cc b/subx/036global_variables.cc
index 846cd291..fffabf72 100644
--- a/subx/036global_variables.cc
+++ b/subx/036global_variables.cc
@@ -54,8 +54,8 @@ void compute_addresses_for_global_variables(const segment& s, map<string, uint32
         if (trace_contains_errors()) return;
         if (j > 0)
           raise << "'" << to_string(inst) << "': global variable names can only be the first word in a line.\n" << end();
-        if (Map_file.is_open())
-          Map_file << "0x" << HEXWORD << current_address << ' ' << variable << '\n';
+        if (Labels_file.is_open())
+          Labels_file << "0x" << HEXWORD << current_address << ' ' << variable << '\n';
         if (contains_key(address, variable)) {
           raise << "duplicate global '" << variable << "'\n" << end();
           return;
diff --git a/subx/038---literal_strings.cc b/subx/038---literal_strings.cc
index a795ce23..65a7740b 100644
--- a/subx/038---literal_strings.cc
+++ b/subx/038---literal_strings.cc
@@ -104,6 +104,7 @@ void parse_instruction_character_by_character(const string& line_data, vector<li
   istringstream in(line_data);
   in >> std::noskipws;
   line result;
+  result.original = line_data;
   // add tokens (words or strings) one by one
   while (has_data(in)) {
     skip_whitespace(in);
diff --git a/subx/039debug.cc b/subx/039debug.cc
index fc0b622b..26bb5f7a 100644
--- a/subx/039debug.cc
+++ b/subx/039debug.cc
@@ -1,15 +1,18 @@
 //:: Some helpers for debugging.
 
-//: Load the 'map' file generated during 'subx --map translate' when running 'subx --map --trace run'.
+//: Load the 'map' file generated during 'subx --debug translate' when running
+//: 'subx --debug --trace run'.
 //: (It'll only affect the trace.)
 
 :(before "End Globals")
 map</*address*/uint32_t, string> Symbol_name;  // used only by 'subx run'
-:(before "End --map Settings")
-load_map("map");
+map</*address*/uint32_t, string> Source_line;  // used only by 'subx run'
+:(before "End --debug Settings")
+load_labels();
+load_source_lines();
 :(code)
-void load_map(const string& map_filename) {
-  ifstream fin(map_filename.c_str());
+void load_labels() {
+  ifstream fin("labels");
   fin >> std::hex;
   while (has_data(fin)) {
     uint32_t addr = 0;
@@ -20,17 +23,43 @@ void load_map(const string& map_filename) {
   }
 }
 
+void load_source_lines() {
+  ifstream fin("source_lines");
+  fin >> std::hex;
+  while (has_data(fin)) {
+    uint32_t addr = 0;
+    fin >> addr;
+    string line;
+    getline(fin, line);
+    put(Source_line, addr, hacky_squeeze_out_whitespace(line));
+  }
+}
+
 :(after "Run One Instruction")
 if (contains_key(Symbol_name, EIP))
   trace(Callstack_depth, "run") << "== label " << get(Symbol_name, EIP) << end();
+if (contains_key(Source_line, EIP))
+  trace(Callstack_depth, "run") << "0x" << HEXWORD << EIP << ": " << get(Source_line, EIP) << end();
+else
+  // no source line info; do what you can
+  trace(Callstack_depth, "run") << "0x" << HEXWORD << EIP << ": " << debug_info(EIP) << end();
 
-//: make calls in particular more salient
-:(before "End Trace Call Instruction")
-// at this point we've skipped past the e8 opcode, but not the offset operand
-int32_t offset = read_mem_i32(EIP);
-uint32_t next_eip = EIP+offset+4;
-if (contains_key(Symbol_name, next_eip))
-  return "/call "+get(Symbol_name, next_eip);
+:(code)
+string debug_info(uint32_t inst_address) {
+  uint8_t op = read_mem_u8(EIP);
+  if (op != 0xe8) {
+    ostringstream out;
+    out << HEXBYTE << NUM(op);
+    return out.str();
+  }
+  int32_t offset = read_mem_i32(EIP+/*skip op*/1);
+  uint32_t next_eip = EIP+/*inst length*/5+offset;
+  if (contains_key(Symbol_name, next_eip))
+    return "e8/call "+get(Symbol_name, next_eip);
+  ostringstream out;
+  out << "e8/call 0x" << HEXWORD << next_eip;
+  return out.str();
+}
 
 //: If a label starts with '$watch-', make a note of the effective address
 //: computed by the next instruction. Start dumping out its contents to the
@@ -61,3 +90,44 @@ if (!Watch_this_effective_address.empty()) {
   dbg << "now watching " << HEXWORD << addr << " for " << Watch_this_effective_address << end();
   put(Watch_points, Watch_this_effective_address, addr);
 }
+
+//: helpers
+
+:(code)
+string hacky_squeeze_out_whitespace(const string& s) {
+  // strip whitespace at start
+  string::const_iterator first = s.begin();
+  while (first != s.end() && isspace(*first))
+    ++first;
+  if (first == s.end()) return "";
+
+  // strip whitespace at end
+  string::const_iterator last = --s.end();
+  while (last != s.begin() && isspace(*last))
+    --last;
+  ++last;
+
+  // replace runs of spaces/dots with single space until comment or string
+  // TODO:
+  //   leave alone dots not surrounded by whitespace
+  //   leave alone '#' within word
+  //   leave alone '"' within word
+  //   squeeze spaces after end of string
+  ostringstream out;
+  bool previous_was_space = false;
+  bool in_comment_or_string = false;
+  for (string::const_iterator curr = first;  curr != last;  ++curr) {
+    if (in_comment_or_string)
+      out << *curr;
+    else if (isspace(*curr) || *curr == '.')
+      previous_was_space = true;
+    else {
+      if (previous_was_space)
+        out << ' ';
+      out << *curr;
+      previous_was_space = false;
+      if (*curr == '#' || *curr == '"') in_comment_or_string = true;
+    }
+  }
+  return out.str();
+}
diff --git a/subx/Readme.md b/subx/Readme.md
index b3b58c86..e8e4ad6d 100644
--- a/subx/Readme.md
+++ b/subx/Readme.md
@@ -2,16 +2,22 @@
 
 SubX is a simple, minimalist stack for programming your computer.
 
-  ```
+  ```sh
   $ git clone https://github.com/akkartik/mu
   $ cd mu/subx
   $ ./subx  # print out a help message
   ```
 
+SubX is designed:
+
+* to explore ways to turn arbitrary manual tests into reproducible automated tests,
+* to be easy to implement in itself, and
+* to help learn and teach the x86 instruction set.
+
 It requires a Unix-like environment with a C++ compiler (Linux or BSD or Mac
 OS). Running `subx` will transparently compile it as necessary.
 
-[![Build Status](https://api.travis-ci.org/akkartik/mu.svg)](https://travis-ci.org/akkartik/mu)
+[![Build Status](https://api.travis-ci.org/akkartik/mu.svg?branch=master)](https://travis-ci.org/akkartik/mu)
 
 You can generate native ELF binaries with it that run on a bare Linux
 kernel. No other dependencies needed.
@@ -35,8 +41,10 @@ messages.
 Emulated runs generate a trace that permits [time-travel debugging](https://github.com/akkartik/mu/blob/master/browse_trace/Readme.md).
 
   ```sh
-  $ ./subx --map translate examples/factorial.subx -o examples/factorial
-  $ ./subx --map --trace run examples/factorial
+  $ ./subx --debug translate examples/factorial.subx -o examples/factorial
+  saving address->label information to 'labels'
+  saving address->source information to 'source_lines'
+  $ ./subx --debug --trace run examples/factorial
   saving trace to 'last_run'
   $ ../browse_trace/browse_trace last_run  # text-mode debugger UI
   ```
@@ -402,12 +410,12 @@ rudimentary but hopefully still workable toolkit:
 * As a further refinement, it is possible to render label names in the trace
   by adding a second flag to both the `translate` and `run` commands:
   ```
-  $ ./subx --map translate input.subx -o binary
-  $ ./subx --map --trace run binary arg1 arg2  2>trace
+  $ ./subx --debug translate input.subx -o binary
+  $ ./subx --debug --trace run binary arg1 arg2  2>trace
   ```
-  `subx --map translate` emits a mapping from label to address in a file
-  called `map`. `subx --map --trace run` reads in the `map` file at the start
-  and prints out any matching label name as it traces each instruction
+  `subx --debug translate` emits a mapping from label to address in a file
+  called `labels`. `subx --debug --trace run` reads in the `labels` file at
+  the start and prints out any matching label name as it traces each instruction
   executed.
 
   Here's a sample of what a trace looks like, with a few boxes highlighted:
@@ -513,8 +521,14 @@ trace, or if you have questions or complaints.
 
 ### 'system calls'
 
-A major goal of SubX is testable wrappers for operating system syscalls.
-Here's what I've built so far:
+As I said at the top, a primary design goal of SubX (and Mu more broadly) is
+to explore ways to turn arbitrary manual tests into reproducible automated
+tests. SubX aims for this goal by baking testable interfaces deep into the
+stack, at the OS syscall level. The idea is that every syscall that interacts
+with hardware (and so the environment) should be *dependency injected* so that
+it's possible to insert fake hardware in tests.
+
+But those are big goals. Here are the syscalls I have so far:
 
 * `write`: takes two arguments, a file `f` and an address to array `s`.
 
@@ -568,6 +582,10 @@ Here's what I've built so far:
 
 * ... _(to be continued)_
 
+I will continue to import syscalls over time from [the old Mu VM in the parent
+directory](https://github.com/akkartik/mu), which has experimented with
+interfaces for the screen, keyboard, mouse, disk and network.
+
 ### primitives built atop system calls
 
 _(Compound arguments are usually passed in by reference. Where the results are
@@ -654,6 +672,32 @@ from a slice:
 * `skip-chars-matching-in-slice`: curr, end, delimiter byte -> new-curr (in `EAX`)
 * `skip-chars-not-matching-in-slice`:  curr, end, delimiter byte -> new-curr (in `EAX`)
 
+## Conclusion
+
+The hypothesis of Mu and SubX is that designing the entire system to be
+testable from day 1 and from the ground up would radically impact the culture
+of the eco-system in a way that no bolted-on tool or service at higher levels
+can replicate:
+
+* Tests would make it easier to write programs that can be easily understood
+  by newcomers.
+
+* More broad-based understanding would lead to more forks.
+
+* Tests would make it easy to share code across forks. Copy the tests over,
+  and then copy code over and polish it until the tests pass. Manual work, but
+  tractable and without major risks.
+
+* The community would gain a diversified portfolio of forks for each program,
+  a “wavefront” of possible combinations of features and alternative
+  implementations of features. Application writers who wrote thorough tests
+  for their apps (something they just can’t do today) would be able to bounce
+  around between forks more easily without getting locked in to a single one
+  as currently happens.
+
+* There would be a stronger culture of reviewing the code for programs you use
+  or libraries you depend on. [More eyeballs would make more bugs shallow.](https://en.wikipedia.org/wiki/Linus%27s_Law)
+
 ## Resources
 
 * [Single-page cheatsheet for the x86 ISA](https://net.cs.uni-bonn.de/fileadmin/user_upload/plohmann/x86_opcode_structure_and_instruction_overview.pdf)
diff --git a/subx/apps/dquotes.subx b/subx/apps/dquotes.subx
index 02e99e33..b236ac2f 100644
--- a/subx/apps/dquotes.subx
+++ b/subx/apps/dquotes.subx
@@ -1051,7 +1051,7 @@ Segment-size:
   0x100/imm32
 #?   0x1000/imm32/4KB
 
-Next-string-literal:
+Next-string-literal:  # tracks the next auto-generated variable name
   1/imm32
 
 # . . vim:nowrap:textwidth=0
diff --git a/subx/dgen b/subx/dgen
index 4aeec375..1a5a8366 100755
--- a/subx/dgen
+++ b/subx/dgen
@@ -18,11 +18,11 @@ export CFLAGS=-g
 
 case $1 in
   ex*)
-    ./subx --map translate examples/$1.subx -o examples/`echo $1 |sed 's/\..*//'`
+    ./subx --debug translate examples/$1.subx -o examples/`echo $1 |sed 's/\..*//'`
     exit $?
     ;;
   *)
-    ./subx --map translate *.subx apps/$1.subx  -o apps/`echo $1 |sed 's/\..*//'`
+    ./subx --debug translate *.subx apps/$1.subx  -o apps/`echo $1 |sed 's/\..*//'`
     exit $?
     ;;
 esac
diff --git a/subx/drun b/subx/drun
index b094f995..71b2f0e0 100755
--- a/subx/drun
+++ b/subx/drun
@@ -12,11 +12,11 @@ fi
 
 case $1 in
   ex*)
-    ./subx --map --trace run examples/$*
+    ./subx --debug --trace run examples/$*
     exit $?
     ;;
   *)
-    ./subx --map --trace run apps/$*
+    ./subx --debug --trace run apps/$*
     exit $?
     ;;
 esac