From 248e789e7cedf9dfab25657d3dfa195d7ad0127b Mon Sep 17 00:00:00 2001
From: "Kartik K. Agaram" <vc@akkartik.com>
Date: Mon, 19 Jun 2017 21:47:07 -0700
Subject: 3930 - experimental bytecode interpreter

---
 subx/000organization.cc | 140 +++++++++++++++++
 subx/001help.cc         | 220 ++++++++++++++++++++++++++
 subx/002test.cc         | 112 ++++++++++++++
 subx/003trace.cc        | 399 ++++++++++++++++++++++++++++++++++++++++++++++++
 subx/003trace.test.cc   | 124 +++++++++++++++
 subx/Readme.md          |   1 +
 subx/build              | 106 +++++++++++++
 subx/clean              |   6 +
 8 files changed, 1108 insertions(+)
 create mode 100644 subx/000organization.cc
 create mode 100644 subx/001help.cc
 create mode 100644 subx/002test.cc
 create mode 100644 subx/003trace.cc
 create mode 100644 subx/003trace.test.cc
 create mode 100644 subx/Readme.md
 create mode 100755 subx/build
 create mode 100755 subx/clean

(limited to 'subx')

diff --git a/subx/000organization.cc b/subx/000organization.cc
new file mode 100644
index 00000000..a29a8813
--- /dev/null
+++ b/subx/000organization.cc
@@ -0,0 +1,140 @@
+//: You guessed right: the '000' prefix means you should start reading here.
+//:
+//: This project is set up to load all files with a numeric prefix. Just
+//: create a new file and start hacking.
+//:
+//: The first few files (00*) are independent of what this program does, an
+//: experimental skeleton that will hopefully make it both easier for others to
+//: understand and more malleable, easier to rewrite and remould into radically
+//: different shapes without breaking in subtle corner cases. The premise is
+//: that understandability and rewrite-friendliness are related in a virtuous
+//: cycle. Doing one well makes it easier to do the other.
+//:
+//: Lower down, this file contains a legal, bare-bones C++ program. It doesn't
+//: do anything yet; subsequent files will contain :(...) directives to insert
+//: lines into it. For example:
+//:   :(after "more events")
+//: This directive means: insert the following lines after a line in the
+//: program containing the words "more events".
+//:
+//: A simple tool is included to 'tangle' all the files together in sequence
+//: according to their directives into a single source file containing all the
+//: code for the project, and then feed the source file to the compiler.
+//: (It'll drop these comments starting with a '//:' prefix that only make
+//: sense before tangling.)
+//:
+//: Directives free up the programmer to order code for others to read rather
+//: than as forced by the computer or compiler. Each individual feature can be
+//: organized in a self-contained 'layer' that adds code to many different data
+//: structures and functions all over the program. The right decomposition into
+//: layers will let each layer make sense in isolation.
+//:
+//:   "If I look at any small part of it, I can see what is going on -- I don't
+//:   need to refer to other parts to understand what something is doing.
+//:
+//:   If I look at any large part in overview, I can see what is going on -- I
+//:   don't need to know all the details to get it.
+//:
+//:   Every level of detail is as locally coherent and as well thought-out as
+//:   any other level."
+//:
+//:       -- Richard Gabriel, "The Quality Without A Name"
+//:          (http://dreamsongs.com/Files/PatternsOfSoftware.pdf, page 42)
+//:
+//: Directives are powerful; they permit inserting or modifying any point in
+//: the program. Using them tastefully requires mapping out specific lines as
+//: waypoints for future layers to hook into. Often such waypoints will be in
+//: comments, capitalized to hint that other layers rely on their presence.
+//:
+//: A single waypoint might have many different code fragments hooking into
+//: it from all over the codebase. Use 'before' directives to insert
+//: code at a location in order, top to bottom, and 'after' directives to
+//: insert code in reverse order. By convention waypoints intended for insertion
+//: before begin with 'End'. Notice below how the layers line up above the "End
+//: Foo" waypoint.
+//:
+//:   File 001          File 002                File 003
+//:   ============      ===================     ===================
+//:   // Foo
+//:   ------------
+//:              <----  :(before "End Foo")
+//:                     ....
+//:                     ...
+//:   ------------
+//:              <----------------------------  :(before "End Foo")
+//:                                             ....
+//:                                             ...
+//:   // End Foo
+//:   ============
+//:
+//: Here's part of a layer in color: http://i.imgur.com/0eONnyX.png. Directives
+//: are shaded dark.
+//:
+//: Layers do more than just shuffle code around. In a well-organized codebase
+//: it should be possible to stop loading after any file/layer, build and run
+//: the program, and pass all tests for loaded features. (Relevant is
+//: http://youtube.com/watch?v=c8N72t7aScY, a scene from "2001: A Space
+//: Odyssey".) Get into the habit of running the included script called
+//: 'test_layers' before you commit any changes.
+//:
+//: This 'subsetting guarantee' ensures that this directory contains a
+//: cleaned-up narrative of the evolution of this codebase. Organizing
+//: autobiographically allows a newcomer to rapidly orient himself, reading the
+//: first few files to understand a simple gestalt of a program's core purpose
+//: and features, and later gradually working his way through other features as
+//: the need arises.
+//:
+//: Programmers shouldn't need to understand everything about a program to hack
+//: on it. But they shouldn't be prevented from a thorough understanding of
+//: each aspect either. The goal of layers is to reward curiosity.
+
+// Includes
+// End Includes
+
+// Types
+// End Types
+
+// Prototypes are auto-generated in the 'build' script; define your functions
+// in any order. Just be sure to declare each function header all on one line.
+// Our auto-generation scripts are too minimal and simple-minded to handle
+// anything else.
+#include "function_list"  // by convention, files ending with '_list' are auto-generated
+
+// Globals
+//
+// All statements in this section should always define a single variable on a
+// single line. The 'build' script will simple-mindedly auto-generate extern
+// declarations for them. Don't forget to define (not just declare) constants
+// with extern linkage in this section, since C++ global constants have
+// internal linkage by default.
+//
+// End Globals
+
+int main(int argc, char* argv[]) {
+  atexit(teardown);
+
+  // End One-time Setup
+
+  // Commandline Parsing
+  // End Commandline Parsing
+
+  return 0;  // End Main
+}
+
+// Unit Tests
+// End Unit Tests
+
+//: our first directive; will move the include above the program
+:(before "End Includes")
+#include <stdlib.h>
+
+//: Without directives or with the :(code) directive, lines get added at the
+//: end.
+:(code)
+void setup() {
+  // End Setup
+}
+
+void teardown() {
+  // End Teardown
+}
diff --git a/subx/001help.cc b/subx/001help.cc
new file mode 100644
index 00000000..d20d8144
--- /dev/null
+++ b/subx/001help.cc
@@ -0,0 +1,220 @@
+//: Everything this project/binary supports.
+//: This should give you a sense for what to look forward to in later layers.
+
+:(before "End Commandline Parsing")
+if (argc != 1) {
+  //: this is the functionality later layers will provide
+  // currently no automated tests for commandline arg parsing
+  return 1;
+}
+
+//: Support for option parsing.
+//: Options always begin with '--' and are always the first arguments. An
+//: option will never follow a non-option.
+:(before "End Commandline Parsing")
+char** arg = &argv[1];
+while (argc > 1 && starts_with(*arg, "--")) {
+  if (false)
+    ;  // no-op branch just so any further additions can consistently always start with 'else'
+  // End Commandline Options(*arg)
+  else
+    cerr << "skipping unknown option " << *arg << '\n';
+  --argc;  ++argv;  ++arg;
+}
+
+//:: Helper function used by the above fragment of code (and later layers too,
+//:: who knows?).
+//: The :(code) directive appends function definitions to the end of the
+//: project. Regardless of where functions are defined, we can call them
+//: anywhere we like as long as we format the function header in a specific
+//: way: put it all on a single line without indent, end the line with ') {'
+//: and no trailing whitespace. As long as functions uniformly start this
+//: way, our 'build' script contains a little command to automatically
+//: generate declarations for them.
+:(code)
+bool is_equal(char* s, const char* lit) {
+  return strncmp(s, lit, strlen(lit)) == 0;
+}
+
+bool starts_with(const string& s, const string& pat) {
+  string::const_iterator a=s.begin(), b=pat.begin();
+  for (/*nada*/;  a!=s.end() && b!=pat.end();  ++a, ++b)
+    if (*a != *b) return false;
+  return b == pat.end();
+}
+
+//: I'll throw some style conventions here for want of a better place for them.
+//: As a rule I hate style guides. Do what you want, that's my motto. But since
+//: we're dealing with C/C++, the one big thing we want to avoid is undefined
+//: behavior. If a compiler ever encounters undefined behavior it can make
+//: your program do anything it wants.
+//:
+//: For reference, my checklist of undefined behaviors to watch out for:
+//:   out-of-bounds access
+//:   uninitialized variables
+//:   use after free
+//:   dereferencing invalid pointers: null, a new of size 0, others
+//:
+//:   casting a large number to a type too small to hold it
+//:
+//:   integer overflow
+//:   division by zero and other undefined expressions
+//:   left-shift by negative count
+//:   shifting values by more than or equal to the number of bits they contain
+//:   bitwise operations on signed numbers
+//:
+//:   Converting pointers to types of different alignment requirements
+//:     T* -> void* -> T*: defined
+//:     T* -> U* -> T*: defined if non-function pointers and alignment requirements are same
+//:     function pointers may be cast to other function pointers
+//:
+//:       Casting a numeric value into a value that can't be represented by the target type (either directly or via static_cast)
+//:
+//: To guard against these, some conventions:
+//:
+//: 0. Initialize all primitive variables in functions and constructors.
+//:
+//: 1. Minimize use of pointers and pointer arithmetic. Avoid 'new' and
+//: 'delete' as far as possible. Rely on STL to perform memory management to
+//: avoid use-after-free issues (and memory leaks).
+//:
+//: 2. Avoid naked arrays to avoid out-of-bounds access. Never use operator[]
+//: except with map. Use at() with STL vectors and so on.
+//:
+//: 3. Valgrind all the things.
+//:
+//: 4. Avoid unsigned numbers. Not strictly an undefined-behavior issue, but
+//: the extra range doesn't matter, and it's one less confusing category of
+//: interaction gotchas to worry about.
+//:
+//: Corollary: don't use the size() method on containers, since it returns an
+//: unsigned and that'll cause warnings about mixing signed and unsigned,
+//: yadda-yadda. Instead use this macro below to perform an unsafe cast to
+//: signed. We'll just give up immediately if a container's ever too large.
+//: Basically, Mu is not concerned about this being a little slower than it
+//: could be. (https://gist.github.com/rygorous/e0f055bfb74e3d5f0af20690759de5a7)
+//:
+//: Addendum to corollary: We're going to uniformly use int everywhere, to
+//: indicate that we're oblivious to number size, and since Clang on 32-bit
+//: platforms doesn't yet support multiplication over 64-bit integers, and
+//: since multiplying two integers seems like a more common situation to end
+//: up in than integer overflow.
+:(before "End Includes")
+#define SIZE(X) (assert((X).size() < (1LL<<(sizeof(int)*8-2))), static_cast<int>((X).size()))
+
+//: 5. Integer overflow is guarded against at runtime using the -ftrapv flag
+//: to the compiler, supported by Clang (GCC version only works sometimes:
+//: http://stackoverflow.com/questions/20851061/how-to-make-gcc-ftrapv-work).
+:(before "atexit(teardown)")
+initialize_signal_handlers();  // not always necessary, but doesn't hurt
+//? cerr << INT_MAX+1 << '\n';  // test overflow
+//? assert(false);  // test SIGABRT
+:(code)
+// based on https://spin.atomicobject.com/2013/01/13/exceptions-stack-traces-c
+void initialize_signal_handlers() {
+  struct sigaction action;
+  bzero(&action, sizeof(action));
+  action.sa_sigaction = dump_and_exit;
+  sigemptyset(&action.sa_mask);
+  sigaction(SIGABRT, &action, NULL);  // assert() failure or integer overflow on linux (with -ftrapv)
+  sigaction(SIGILL,  &action, NULL);  // integer overflow on OS X (with -ftrapv)
+}
+void dump_and_exit(int sig, unused siginfo_t* dummy1, unused void* dummy2) {
+  switch (sig) {
+    case SIGABRT:
+      #ifndef __APPLE__
+        cerr << "SIGABRT: might be an integer overflow if it wasn't an assert() failure\n";
+        _Exit(1);
+      #endif
+      break;
+    case SIGILL:
+      #ifdef __APPLE__
+        cerr << "SIGILL: most likely caused by integer overflow\n";
+        _Exit(1);
+      #endif
+      break;
+    default:
+      break;
+  }
+}
+:(before "End Includes")
+#include <signal.h>
+
+//: For good measure we'll also enable SIGFPE.
+:(before "atexit(teardown)")
+feenableexcept(FE_OVERFLOW | FE_UNDERFLOW);
+//? assert(sizeof(int) == 4 && sizeof(float) == 4);
+//? //                          | exp   |  mantissa
+//? int smallest_subnormal = 0b00000000000000000000000000000001;
+//? float smallest_subnormal_f = *reinterpret_cast<float*>(&smallest_subnormal);
+//? cerr << "ε: " << smallest_subnormal_f << '\n';
+//? cerr << "ε/2: " << smallest_subnormal_f/2 << " (underflow)\n";  // test SIGFPE
+:(before "End Includes")
+#include <fenv.h>
+:(code)
+#ifdef __APPLE__
+// Public domain polyfill for feenableexcept on OS X
+// http://www-personal.umich.edu/~williams/archive/computation/fe-handling-example.c
+int feenableexcept (unsigned int excepts) {
+  static fenv_t fenv;
+  unsigned int new_excepts = excepts & FE_ALL_EXCEPT;
+  unsigned int old_excepts;
+  if (fegetenv(&fenv)) return -1;
+  old_excepts = fenv.__control & FE_ALL_EXCEPT;
+  fenv.__control &= ~new_excepts;
+  fenv.__mxcsr   &= ~(new_excepts << 7);
+  return fesetenv(&fenv) ? -1 : old_excepts;
+}
+#endif
+
+//: 6. Map's operator[] being non-const is fucking evil.
+:(before "Globals")  // can't generate prototypes for these
+// from http://stackoverflow.com/questions/152643/idiomatic-c-for-reading-from-a-const-map
+template<typename T> typename T::mapped_type& get(T& map, typename T::key_type const& key) {
+  typename T::iterator iter(map.find(key));
+  assert(iter != map.end());
+  return iter->second;
+}
+template<typename T> typename T::mapped_type const& get(const T& map, typename T::key_type const& key) {
+  typename T::const_iterator iter(map.find(key));
+  assert(iter != map.end());
+  return iter->second;
+}
+template<typename T> typename T::mapped_type const& put(T& map, typename T::key_type const& key, typename T::mapped_type const& value) {
+  map[key] = value;
+  return map[key];
+}
+template<typename T> bool contains_key(T& map, typename T::key_type const& key) {
+  return map.find(key) != map.end();
+}
+template<typename T> typename T::mapped_type& get_or_insert(T& map, typename T::key_type const& key) {
+  return map[key];
+}
+//: The contract: any container that relies on get_or_insert should never call
+//: contains_key.
+
+//: 7. istreams are a royal pain in the arse. You have to be careful about
+//: what subclass you try to putback into. You have to watch out for the pesky
+//: failbit and badbit. Just avoid eof() and use this helper instead.
+:(code)
+bool has_data(istream& in) {
+  return in && !in.eof();
+}
+
+:(before "End Includes")
+#include <assert.h>
+
+#include <iostream>
+using std::istream;
+using std::ostream;
+using std::iostream;
+using std::cin;
+using std::cout;
+using std::cerr;
+#include <iomanip>
+
+#include <string.h>
+#include <string>
+using std::string;
+
+#define unused  __attribute__((unused))
diff --git a/subx/002test.cc b/subx/002test.cc
new file mode 100644
index 00000000..1d8efb08
--- /dev/null
+++ b/subx/002test.cc
@@ -0,0 +1,112 @@
+//: A simple test harness. To create new tests, define functions starting with
+//: 'test_'. To run all tests so defined, run:
+//:   $ ./mu test
+//:
+//: Every layer should include tests, and can reach into previous layers.
+//: However, it seems like a good idea never to reach into tests from previous
+//: layers. Every test should be a contract that always passes as originally
+//: written, regardless of any later layers. Avoid writing 'temporary' tests
+//: that are only meant to work until some layer.
+
+:(before "End Types")
+typedef void (*test_fn)(void);
+:(before "Globals")
+// move a global ahead into types that we can't generate an extern declaration for
+const test_fn Tests[] = {
+  #include "test_list"  // auto-generated; see 'build' script
+};
+
+:(before "End Globals")
+bool Run_tests = false;
+bool Passed = true;  // set this to false inside any test to indicate failure
+long Num_failures = 0;
+
+:(before "End Includes")
+#define CHECK(X) \
+  if (Passed && !(X)) { \
+    cerr << "\nF - " << __FUNCTION__ << "(" << __FILE__ << ":" << __LINE__ << "): " << #X << '\n'; \
+    Passed = false; \
+    return;  /* Currently we stop at the very first failure. */ \
+  }
+
+#define CHECK_EQ(X, Y) \
+  if (Passed && (X) != (Y)) { \
+    cerr << "\nF - " << __FUNCTION__ << "(" << __FILE__ << ":" << __LINE__ << "): " << #X << " == " << #Y << '\n'; \
+    cerr << "  got " << (X) << '\n';  /* BEWARE: multiple eval */ \
+    Passed = false; \
+    return;  /* Currently we stop at the very first failure. */ \
+  }
+
+:(before "End Setup")
+Passed = true;
+
+:(before "End Commandline Parsing")
+if (argc > 1 && is_equal(argv[1], "test")) {
+  Run_tests = true;  --argc;  ++argv;  // shift 'test' out of commandline args
+}
+
+:(before "End Main")
+if (Run_tests) {
+  // Test Runs
+  // we run some tests and then exit; assume no state need be maintained afterward
+
+  // End Test Run Initialization
+  time_t t;  time(&t);
+  cerr << "C tests: " << ctime(&t);
+  for (size_t i=0;  i < sizeof(Tests)/sizeof(Tests[0]);  ++i) {
+//?     cerr << "running .build/test_list line " << (i+1) << '\n';
+    run_test(i);
+  }
+  cerr << '\n';
+  // End Tests
+  if (Num_failures > 0) {
+    cerr << Num_failures << " failure"
+         << (Num_failures > 1 ? "s" : "")
+         << '\n';
+    return 1;
+  }
+  return 0;
+}
+
+:(code)
+void run_test(size_t i) {
+  if (i >= sizeof(Tests)/sizeof(Tests[0])) {
+    cerr << "no test " << i << '\n';
+    return;
+  }
+  setup();
+  // End Test Setup
+  (*Tests[i])();
+  // End Test Teardown
+  teardown();
+  if (Passed) cerr << '.';
+  else ++Num_failures;
+}
+
+bool is_integer(const string& s) {
+  return s.find_first_not_of("0123456789-") == string::npos  // no other characters
+      && s.find_first_of("0123456789") != string::npos  // at least one digit
+      && s.find('-', 1) == string::npos;  // '-' only at first position
+}
+
+int to_integer(string n) {
+  char* end = NULL;
+  // safe because string.c_str() is guaranteed to be null-terminated
+  int result = strtoll(n.c_str(), &end, /*any base*/0);
+  if (*end != '\0') cerr << "tried to convert " << n << " to number\n";
+  assert(*end == '\0');
+  return result;
+}
+
+void test_is_integer() {
+  CHECK(is_integer("1234"));
+  CHECK(is_integer("-1"));
+  CHECK(!is_integer("234.0"));
+  CHECK(is_integer("-567"));
+  CHECK(!is_integer("89-0"));
+  CHECK(!is_integer("-"));
+  CHECK(!is_integer("1e3"));  // not supported
+}
+
+:(before "End Includes")
+#include <stdlib.h>
diff --git a/subx/003trace.cc b/subx/003trace.cc
new file mode 100644
index 00000000..2810b420
--- /dev/null
+++ b/subx/003trace.cc
@@ -0,0 +1,399 @@
+//: The goal of layers is to make programs more easy to understand and more
+//: malleable, easy to rewrite in radical ways without accidentally breaking
+//: some corner case. Tests further both goals. They help understandability by
+//: letting one make small changes and get feedback. What if I wrote this line
+//: like so? What if I removed this function call, is it really necessary?
+//: Just try it, see if the tests pass. Want to explore rewriting this bit in
+//: this way? Tests put many refactorings on a firmer footing.
+//:
+//: But the usual way we write tests seems incomplete. Refactorings tend to
+//: work in the small, but don't help with changes to function boundaries. If
+//: you want to extract a new function you have to manually test-drive it to
+//: create tests for it. If you want to inline a function its tests are no
+//: longer valid. In both cases you end up having to reorganize code as well as
+//: tests, an error-prone activity.
+//:
+//: In response, this layer introduces the notion of *domain-driven* testing.
+//: We focus on the domain of inputs the whole program needs to handle rather
+//: than the correctness of individual functions. All tests invoke the program
+//: in a single way: by calling run() with some input. As the program operates
+//: on the input, it traces out a list of _facts_ deduced about the domain:
+//:   trace("label") << "fact 1: " << val;
+//:
+//: Tests can now check these facts:
+//:   :(scenario foo)
+//:   34  # call run() with this input
+//:   +label: fact 1: 34  # 'run' should have deduced this fact
+//:   -label: fact 1: 35  # the trace should not contain such a fact
+//:
+//: Since we never call anything but the run() function directly, we never have
+//: to rewrite the tests when we reorganize the internals of the program. We
+//: just have to make sure our rewrite deduces the same facts about the domain,
+//: and that's something we're going to have to do anyway.
+//:
+//: To avoid the combinatorial explosion of integration tests, each layer
+//: mainly logs facts to the trace with a common *label*. All tests in a layer
+//: tend to check facts with this label. Validating the facts logged with a
+//: specific label is like calling functions of that layer directly.
+//:
+//: To build robust tests, trace facts about your domain rather than details of
+//: how you computed them.
+//:
+//: More details: http://akkartik.name/blog/tracing-tests
+//:
+//: ---
+//:
+//: Between layers and domain-driven testing, programming starts to look like a
+//: fundamentally different activity. Instead of a) superficial, b) local rules
+//: on c) code [like say http://blog.bbv.ch/2013/06/05/clean-code-cheat-sheet],
+//: we allow programmers to engage with the a) deep, b) global structure of the
+//: c) domain. If you can systematically track discontinuities in the domain,
+//: you don't care if the code used gotos as long as it passed the tests. If
+//: tests become more robust to run it becomes easier to try out radically
+//: different implementations for the same program. If code is super-easy to
+//: rewrite, it becomes less important what indentation style it uses, or that
+//: the objects are appropriately encapsulated, or that the functions are
+//: referentially transparent.
+//:
+//: Instead of plumbing, programming becomes building and gradually refining a
+//: map of the environment the program must operate under. Whether a program is
+//: 'correct' at a given point in time is a red herring; what matters is
+//: avoiding regression by monotonically nailing down the more 'eventful' parts
+//: of the terrain. It helps readers new and old, and rewards curiosity, to
+//: organize large programs in self-similar hierarchies of example scenarios
+//: colocated with the code that makes them work.
+//:
+//:   "Programming properly should be regarded as an activity by which
+//:   programmers form a mental model, rather than as production of a program."
+//:   -- Peter Naur (http://alistair.cockburn.us/ASD+book+extract%3A+%22Naur,+Ehn,+Musashi%22)
+
+:(before "End Types")
+struct trace_line {
+  int depth;  // optional field just to help browse traces later
+  string label;
+  string contents;
+  trace_line(string l, string c) :depth(0), label(l), contents(c) {}
+  trace_line(int d, string l, string c) :depth(d), label(l), contents(c) {}
+};
+
+:(before "End Globals")
+bool Hide_errors = false;
+bool Dump_trace = false;
+string Dump_label = "";
+:(before "End Setup")
+Hide_errors = false;
+Dump_trace = false;
+Dump_label = "";
+
+:(before "End Types")
+// Pre-define some global constants that trace_stream needs to know about.
+// Since they're in the Types section, they'll be included in any cleaved
+// compilation units. So no extern linkage.
+const int Max_depth = 9999;
+const int Error_depth = 0;  // definitely always print errors
+const int App_depth = 2;  // temporarily where all Mu code will trace to
+
+struct trace_stream {
+  vector<trace_line> past_lines;
+  // accumulator for current line
+  ostringstream* curr_stream;
+  string curr_label;
+  int curr_depth;
+  int callstack_depth;
+  int collect_depth;
+  ofstream null_stream;  // never opens a file, so writes silently fail
+  trace_stream() :curr_stream(NULL), curr_depth(Max_depth), callstack_depth(0), collect_depth(Max_depth) {}
+  ~trace_stream() { if (curr_stream) delete curr_stream; }
+
+  ostream& stream(string label) {
+    return stream(Max_depth, label);
+  }
+
+  ostream& stream(int depth, string label) {
+    if (depth > collect_depth) return null_stream;
+    curr_stream = new ostringstream;
+    curr_label = label;
+    curr_depth = depth;
+    return *curr_stream;
+  }
+
+  // be sure to call this before messing with curr_stream or curr_label
+  void newline();
+  // useful for debugging
+  string readable_contents(string label);  // empty label = show everything
+};
+
+:(code)
+void trace_stream::newline() {
+  if (!curr_stream) return;
+  string curr_contents = curr_stream->str();
+  if (!curr_contents.empty()) {
+    past_lines.push_back(trace_line(curr_depth, trim(curr_label), curr_contents));  // preserve indent in contents
+    if ((!Hide_errors && curr_label == "error")
+        || Dump_trace
+        || (!Dump_label.empty() && curr_label == Dump_label))
+      cerr << curr_label << ": " << curr_contents << '\n';
+  }
+  delete curr_stream;
+  curr_stream = NULL;
+  curr_label.clear();
+  curr_depth = Max_depth;
+}
+
+string trace_stream::readable_contents(string label) {
+  ostringstream output;
+  label = trim(label);
+  for (vector<trace_line>::iterator p = past_lines.begin();  p != past_lines.end();  ++p)
+    if (label.empty() || label == p->label) {
+      output << std::setw(4) << p->depth << ' ' << p->label << ": " << p->contents << '\n';
+    }
+  return output.str();
+}
+
+:(before "End Globals")
+trace_stream* Trace_stream = NULL;
+int Trace_errors = 0;  // used only when Trace_stream is NULL
+
+:(before "End Includes")
+#define CLEAR_TRACE  delete Trace_stream, Trace_stream = new trace_stream;
+
+// Top-level helper. IMPORTANT: can't nest
+#define trace(...)  !Trace_stream ? cerr /*print nothing*/ : Trace_stream->stream(__VA_ARGS__)
+
+// Just for debugging; 'git log' should never show any calls to 'dbg'.
+#define dbg trace(0, "a")
+#define DUMP(label)  if (Trace_stream) cerr << Trace_stream->readable_contents(label);
+
+// Errors are a special layer.
+#define raise  (!Trace_stream ? (++Trace_errors,cerr) /*do print*/ : Trace_stream->stream(Error_depth, "error"))
+// If we aren't yet sure how to deal with some corner case, use assert_for_now
+// to indicate that it isn't an inviolable invariant.
+#define assert_for_now assert
+
+// Inside tests, fail any tests that displayed (unexpected) errors.
+// Expected errors in tests should always be hidden and silently checked for.
+:(before "End Test Teardown")
+if (Passed && !Hide_errors && trace_contains_errors()) {
+  Passed = false;
+}
+:(code)
+bool trace_contains_errors() {
+  return Trace_errors > 0 || trace_count("error") > 0;
+}
+
+:(before "End Types")
+struct end {};
+:(code)
+ostream& operator<<(ostream& os, unused end) {
+  if (Trace_stream) Trace_stream->newline();
+  return os;
+}
+
+:(before "End Globals")
+bool Save_trace = false;
+
+// Trace_stream is a resource, lease_tracer uses RAII to manage it.
+:(before "End Types")
+struct lease_tracer {
+  lease_tracer();
+  ~lease_tracer();
+};
+:(code)
+lease_tracer::lease_tracer() { Trace_stream = new trace_stream; }
+lease_tracer::~lease_tracer() {
+  if (!Trace_stream) return;  // in case tests close Trace_stream
+  if (Save_trace) {
+    ofstream fout("last_trace");
+    fout << Trace_stream->readable_contents("");
+    fout.close();
+  }
+  delete Trace_stream, Trace_stream = NULL;
+}
+:(before "End Includes")
+#define START_TRACING_UNTIL_END_OF_SCOPE  lease_tracer leased_tracer;
+:(before "End Test Setup")
+START_TRACING_UNTIL_END_OF_SCOPE
+
+:(before "End Includes")
+#define CHECK_TRACE_CONTENTS(...)  check_trace_contents(__FUNCTION__, __FILE__, __LINE__, __VA_ARGS__)
+
+#define CHECK_TRACE_CONTAINS_ERRORS()  CHECK(trace_contains_errors())
+#define CHECK_TRACE_DOESNT_CONTAIN_ERRORS() \
+  if (Passed && trace_contains_errors()) { \
+    cerr << "\nF - " << __FUNCTION__ << "(" << __FILE__ << ":" << __LINE__ << "): unexpected errors\n"; \
+    DUMP("error"); \
+    Passed = false; \
+    return; \
+  }
+
+#define CHECK_TRACE_COUNT(label, count) \
+  if (Passed && trace_count(label) != (count)) { \
+    cerr << "\nF - " << __FUNCTION__ << "(" << __FILE__ << ":" << __LINE__ << "): trace_count of " << label << " should be " << count << '\n'; \
+    cerr << "  got " << trace_count(label) << '\n';  /* multiple eval */ \
+    DUMP(label); \
+    Passed = false; \
+    return;  /* Currently we stop at the very first failure. */ \
+  }
+
+#define CHECK_TRACE_DOESNT_CONTAIN(...)  CHECK(trace_doesnt_contain(__VA_ARGS__))
+
+:(code)
+bool check_trace_contents(string FUNCTION, string FILE, int LINE, string expected) {
+  if (!Passed) return false;
+  if (!Trace_stream) return false;
+  vector<string> expected_lines = split(expected, "");
+  int curr_expected_line = 0;
+  while (curr_expected_line < SIZE(expected_lines) && expected_lines.at(curr_expected_line).empty())
+    ++curr_expected_line;
+  if (curr_expected_line == SIZE(expected_lines)) return true;
+  string label, contents;
+  split_label_contents(expected_lines.at(curr_expected_line), &label, &contents);
+  for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin();  p != Trace_stream->past_lines.end();  ++p) {
+    if (label != p->label) continue;
+    if (contents != trim(p->contents)) continue;
+    ++curr_expected_line;
+    while (curr_expected_line < SIZE(expected_lines) && expected_lines.at(curr_expected_line).empty())
+      ++curr_expected_line;
+    if (curr_expected_line == SIZE(expected_lines)) return true;
+    split_label_contents(expected_lines.at(curr_expected_line), &label, &contents);
+  }
+
+  if (line_exists_anywhere(label, contents)) {
+    cerr << "\nF - " << FUNCTION << "(" << FILE << ":" << LINE << "): line [" << label << ": " << contents << "] out of order in trace:\n";
+    DUMP("");
+  }
+  else {
+    cerr << "\nF - " << FUNCTION << "(" << FILE << ":" << LINE << "): missing [" << contents << "] in trace:\n";
+    DUMP(label);
+  }
+  Passed = false;
+  return false;
+}
+
+void split_label_contents(const string& s, string* label, string* contents) {
+  static const string delim(": ");
+  size_t pos = s.find(delim);
+  if (pos == string::npos) {
+    *label = "";
+    *contents = trim(s);
+  }
+  else {
+    *label = trim(s.substr(0, pos));
+    *contents = trim(s.substr(pos+SIZE(delim)));
+  }
+}
+
+bool line_exists_anywhere(const string& label, const string& contents) {
+  for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin();  p != Trace_stream->past_lines.end();  ++p) {
+    if (label != p->label) continue;
+    if (contents == trim(p->contents)) return true;
+  }
+  return false;
+}
+
+int trace_count(string label) {
+  return trace_count(label, "");
+}
+
+int trace_count(string label, string line) {
+  if (!Trace_stream) return 0;
+  long result = 0;
+  for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin();  p != Trace_stream->past_lines.end();  ++p) {
+    if (label == p->label) {
+      if (line == "" || trim(line) == trim(p->contents))
+        ++result;
+    }
+  }
+  return result;
+}
+
+int trace_count_prefix(string label, string prefix) {
+  if (!Trace_stream) return 0;
+  long result = 0;
+  for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin();  p != Trace_stream->past_lines.end();  ++p) {
+    if (label == p->label) {
+      if (starts_with(trim(p->contents), trim(prefix)))
+        ++result;
+    }
+  }
+  return result;
+}
+
+bool trace_doesnt_contain(string label, string line) {
+  return trace_count(label, line) == 0;
+}
+
+bool trace_doesnt_contain(string expected) {
+  vector<string> tmp = split_first(expected, ": ");
+  return trace_doesnt_contain(tmp.at(0), tmp.at(1));
+}
+
+vector<string> split(string s, string delim) {
+  vector<string> result;
+  size_t begin=0, end=s.find(delim);
+  while (true) {
+    if (end == string::npos) {
+      result.push_back(string(s, begin, string::npos));
+      break;
+    }
+    result.push_back(string(s, begin, end-begin));
+    begin = end+SIZE(delim);
+    end = s.find(delim, begin);
+  }
+  return result;
+}
+
+vector<string> split_first(string s, string delim) {
+  vector<string> result;
+  size_t end=s.find(delim);
+  result.push_back(string(s, 0, end));
+  if (end != string::npos)
+    result.push_back(string(s, end+SIZE(delim), string::npos));
+  return result;
+}
+
+string trim(const string& s) {
+  string::const_iterator first = s.begin();
+  while (first != s.end() && isspace(*first))
+    ++first;
+  if (first == s.end()) return "";
+
+  string::const_iterator last = --s.end();
+  while (last != s.begin() && isspace(*last))
+    --last;
+  ++last;
+  return string(first, last);
+}
+
+:(before "End Includes")
+#include <vector>
+using std::vector;
+#include <list>
+using std::list;
+#include <map>
+using std::map;
+#include <set>
+using std::set;
+#include <algorithm>
+
+#include <sstream>
+using std::istringstream;
+using std::ostringstream;
+
+#include <fstream>
+using std::ifstream;
+using std::ofstream;
+
+:(before "End Globals")
+//: In future layers we'll use the depth field as follows:
+//:
+//: Errors will be depth 0.
+//: Mu 'applications' will be able to use depths 1-100 as they like.
+//: Primitive statements will occupy 101-9989
+extern const int Initial_callstack_depth = 101;
+extern const int Max_callstack_depth = 9989;
+//: Finally, details of primitive Mu statements will occupy depth 9990-9999
+//: (more on that later as well)
+//:
+//: This framework should help us hide some details at each level, mixing
+//: static ideas like layers with the dynamic notion of call-stack depth.
diff --git a/subx/003trace.test.cc b/subx/003trace.test.cc
new file mode 100644
index 00000000..67b4c345
--- /dev/null
+++ b/subx/003trace.test.cc
@@ -0,0 +1,124 @@
+void test_trace_check_compares() {
+  trace("test layer") << "foo" << end();
+  CHECK_TRACE_CONTENTS("test layer: foo");
+}
+
+void test_trace_check_ignores_other_layers() {
+  trace("test layer 1") << "foo" << end();
+  trace("test layer 2") << "bar" << end();
+  CHECK_TRACE_CONTENTS("test layer 1: foo");
+  CHECK_TRACE_DOESNT_CONTAIN("test layer 2: foo");
+}
+
+void test_trace_check_ignores_leading_whitespace() {
+  trace("test layer 1") << " foo" << end();
+  CHECK_EQ(trace_count("test layer 1", /*too little whitespace*/"foo"), 1);
+  CHECK_EQ(trace_count("test layer 1", /*too much whitespace*/"  foo"), 1);
+}
+
+void test_trace_check_ignores_other_lines() {
+  trace("test layer 1") << "foo" << end();
+  trace("test layer 1") << "bar" << end();
+  CHECK_TRACE_CONTENTS("test layer 1: foo");
+}
+
+void test_trace_check_ignores_other_lines2() {
+  trace("test layer 1") << "foo" << end();
+  trace("test layer 1") << "bar" << end();
+  CHECK_TRACE_CONTENTS("test layer 1: bar");
+}
+
+void test_trace_ignores_trailing_whitespace() {
+  trace("test layer 1") << "foo\n" << end();
+  CHECK_TRACE_CONTENTS("test layer 1: foo");
+}
+
+void test_trace_ignores_trailing_whitespace2() {
+  trace("test layer 1") << "foo " << end();
+  CHECK_TRACE_CONTENTS("test layer 1: foo");
+}
+
+void test_trace_orders_across_layers() {
+  trace("test layer 1") << "foo" << end();
+  trace("test layer 2") << "bar" << end();
+  trace("test layer 1") << "qux" << end();
+  CHECK_TRACE_CONTENTS("test layer 1: footest layer 2: bartest layer 1: qux");
+}
+
+void test_trace_supports_count() {
+  trace("test layer 1") << "foo" << end();
+  trace("test layer 1") << "foo" << end();
+  CHECK_EQ(trace_count("test layer 1", "foo"), 2);
+}
+
+void test_trace_supports_count2() {
+  trace("test layer 1") << "foo" << end();
+  trace("test layer 1") << "bar" << end();
+  CHECK_EQ(trace_count("test layer 1"), 2);
+}
+
+void test_trace_count_ignores_trailing_whitespace() {
+  trace("test layer 1") << "foo\n" << end();
+  CHECK_EQ(trace_count("test layer 1", "foo"), 1);
+}
+
+// pending: DUMP tests
+// pending: readable_contents() adds newline if necessary.
+// pending: raise also prints to stderr.
+// pending: raise doesn't print to stderr if Hide_errors is set.
+// pending: raise doesn't have to be saved if Hide_errors is set, just printed.
+// pending: raise prints to stderr if Trace_stream is NULL.
+// pending: raise prints to stderr if Trace_stream is NULL even if Hide_errors is set.
+
+// can't check trace because trace methods call 'split'
+
+void test_split_returns_at_least_one_elem() {
+  vector<string> result = split("", ",");
+  CHECK_EQ(result.size(), 1);
+  CHECK_EQ(result.at(0), "");
+}
+
+void test_split_returns_entire_input_when_no_delim() {
+  vector<string> result = split("abc", ",");
+  CHECK_EQ(result.size(), 1);
+  CHECK_EQ(result.at(0), "abc");
+}
+
+void test_split_works() {
+  vector<string> result = split("abc,def", ",");
+  CHECK_EQ(result.size(), 2);
+  CHECK_EQ(result.at(0), "abc");
+  CHECK_EQ(result.at(1), "def");
+}
+
+void test_split_works2() {
+  vector<string> result = split("abc,def,ghi", ",");
+  CHECK_EQ(result.size(), 3);
+  CHECK_EQ(result.at(0), "abc");
+  CHECK_EQ(result.at(1), "def");
+  CHECK_EQ(result.at(2), "ghi");
+}
+
+void test_split_handles_multichar_delim() {
+  vector<string> result = split("abc,,def,,ghi", ",,");
+  CHECK_EQ(result.size(), 3);
+  CHECK_EQ(result.at(0), "abc");
+  CHECK_EQ(result.at(1), "def");
+  CHECK_EQ(result.at(2), "ghi");
+}
+
+void test_trim() {
+  CHECK_EQ(trim(""), "");
+  CHECK_EQ(trim(" "), "");
+  CHECK_EQ(trim("  "), "");
+  CHECK_EQ(trim("a"), "a");
+  CHECK_EQ(trim(" a"), "a");
+  CHECK_EQ(trim("  a"), "a");
+  CHECK_EQ(trim("  ab"), "ab");
+  CHECK_EQ(trim("a "), "a");
+  CHECK_EQ(trim("a  "), "a");
+  CHECK_EQ(trim("ab  "), "ab");
+  CHECK_EQ(trim(" a "), "a");
+  CHECK_EQ(trim("  a  "), "a");
+  CHECK_EQ(trim("  ab  "), "ab");
+}
diff --git a/subx/Readme.md b/subx/Readme.md
new file mode 100644
index 00000000..d68cc331
--- /dev/null
+++ b/subx/Readme.md
@@ -0,0 +1 @@
+Bytecode interpreter for a subset of the 32-bit x86 ISA.
diff --git a/subx/build b/subx/build
new file mode 100755
index 00000000..f5ea62c3
--- /dev/null
+++ b/subx/build
@@ -0,0 +1,106 @@
+#!/bin/sh
+# returns 0 on successful build or nothing to build
+# non-zero exit status only on error during building
+set -e  # stop immediately on error
+
+# [0-9]*.cc -> subx.cc -> subx
+# (layers)   |          |
+#          tangle      $CXX
+
+# can also be called with a layer to only build until
+#   $ ./build --until 050
+UNTIL_LAYER=${2:-zzz}
+
+# there's two mechanisms for fast builds here:
+# - if a command is quick to run, always run it but update the result only on any change
+# - otherwise run it only if the output is 'older_than' the inputs
+#
+# avoid combining both mechanisms for a single file
+# otherwise you'll see spurious messages about files being updated
+# risk: a file may unnecessarily update without changes, causing unnecessary work downstream
+
+test "$CXX" || export CXX=c++
+test "$CC" || export CC=cc
+test "$CFLAGS" || export CFLAGS="-g -O3"
+export CFLAGS="$CFLAGS -Wall -Wextra -ftrapv -fno-strict-aliasing"
+
+# return 1 if $1 is older than _any_ of the remaining args
+older_than() {
+  local target=$1
+  shift
+  if [ ! -e $target ]
+  then
+#?     echo "$target doesn't exist"
+    echo "updating $target" >&2
+    return 0  # success
+  fi
+  local f
+  for f in $*
+  do
+    if [ $f -nt $target ]
+    then
+      echo "updating $target" >&2
+      return 0  # success
+    fi
+  done
+  return 1  # failure
+}
+
+# redirect to $1, unless it's already identical
+update() {
+  if [ ! -e $1 ]
+  then
+    cat > $1
+  else
+    cat > $1.tmp
+    diff -q $1 $1.tmp >/dev/null  &&  rm $1.tmp  ||  mv $1.tmp $1
+  fi
+}
+
+update_cp() {
+  if [ ! -e $2/$1 ]
+  then
+    cp $1 $2
+  elif [ $1 -nt $2/$1 ]
+  then
+    cp $1 $2
+  fi
+}
+
+noisy_cd() {
+  cd $1
+  echo "-- `pwd`" >&2
+}
+
+older_than ../enumerate/enumerate ../enumerate/enumerate.cc && {
+  $CXX $CFLAGS ../enumerate/enumerate.cc -o ../enumerate/enumerate
+}
+
+older_than ../tangle/tangle ../tangle/*.cc && {
+  noisy_cd ../tangle
+    {
+      grep -h "^struct .* {" [0-9]*.cc  |sed 's/\(struct *[^ ]*\).*/\1;/'
+      grep -h "^typedef " [0-9]*.cc
+    }  |update type_list
+    grep -h "^[^ #].*) {" [0-9]*.cc  |sed 's/ {.*/;/'  |update function_list
+    ls [0-9]*.cc  |grep -v "\.test\.cc$"  |sed 's/.*/#include "&"/'  |update file_list
+    ls [0-9]*.test.cc  |sed 's/.*/#include "&"/'  |update test_file_list
+    grep -h "^[[:space:]]*void test_" [0-9]*.cc  |sed 's/^\s*void \(.*\)() {$/\1,/'  |update test_list
+    $CXX $CFLAGS boot.cc -o tangle
+  noisy_cd ../subx  # no effect; just to show us returning to the parent directory
+}
+
+LAYERS=$(../enumerate/enumerate --until $UNTIL_LAYER  |grep '.cc$')
+older_than subx.cc $LAYERS ../enumerate/enumerate ../tangle/tangle && {
+  # no update here; rely on 'update' calls downstream
+  ../tangle/tangle $LAYERS  > subx.cc
+}
+
+grep -h "^[^[:space:]#].*) {$" subx.cc  |grep -v ":.*("  |sed 's/ {.*/;/'  |update function_list
+grep -h "^\s*void test_" subx.cc  |sed 's/^\s*void \(.*\)() {.*/\1,/'  |update test_list
+
+older_than subx subx.cc *_list && {
+  $CXX $CFLAGS subx.cc -o subx
+}
+
+exit 0
diff --git a/subx/clean b/subx/clean
new file mode 100755
index 00000000..6294eb56
--- /dev/null
+++ b/subx/clean
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+set -v
+rm -rf subx.cc subx *_list *.dSYM
+test $# -gt 0 && exit 0  # convenience: 'clean top-level' to leave subsidiary tools alone
+rm -rf ../enumerate/enumerate ../tangle/tangle ../tangle/*_list ../*/*.dSYM
-- 
cgit 1.4.1-2-gfad0