41 files changed, 13590 insertions, 0 deletions
diff --git a/linux/bootstrap/000organization.cc b/linux/bootstrap/000organization.cc
new file mode 100644
index 00000000..1bbaa057
--- /dev/null
+++ b/linux/bootstrap/000organization.cc
@@ -0,0 +1,164 @@
+//: You guessed right: the '000' prefix means you should start reading here.
+//:
+//: This project is set up to load all files with a numeric prefix. Just
+//: create a new file and start hacking.
+//:
+//: The first few files (00*) are independent of what this program does, an
+//: experimental skeleton that will hopefully make it both easier for others to
+//: understand and more malleable, easier to rewrite and remould into radically
+//: different shapes without breaking in subtle corner cases. The premise is
+//: that understandability and rewrite-friendliness are related in a virtuous
+//: cycle. Doing one well makes it easier to do the other.
+//:
+//: Lower down, this file contains a legal, bare-bones C++ program. It doesn't
+//: do anything yet; subsequent files will contain :(...) directives to insert
+//: lines into it. For example:
+//:   :(after "more events")
+//: This directive means: insert the following lines after a line in the
+//: program containing the words "more events".
+//:
+//: A simple tool is included to 'tangle' all the files together in sequence
+//: according to their directives into a single source file containing all the
+//: code for the project, and then feed the source file to the compiler.
+//: (It'll drop these comments starting with a '//:' prefix that only make
+//: sense before tangling.)
+//:
+//: Directives free up the programmer to order code for others to read rather
+//: than as forced by the computer or compiler. Each individual feature can be
+//: organized in a self-contained 'layer' that adds code to many different data
+//: structures and functions all over the program. The right decomposition into
+//: layers will let each layer make sense in isolation.
+//:
+//:   "If I look at any small part of it, I can see what is going on -- I don't
+//:   need to refer to other parts to understand what something is doing.
+//:
+//:   If I look at any large part in overview, I can see what is going on -- I
+//:   don't need to know all the details to get it.
+//:
+//:   Every level of detail is as locally coherent and as well thought-out as
+//:   any other level."
+//:
+//:       -- Richard Gabriel, "The Quality Without A Name"
+//:          (http://dreamsongs.com/Files/PatternsOfSoftware.pdf, page 42)
+//:
+//: Directives are powerful; they permit inserting or modifying any point in
+//: the program. Using them tastefully requires mapping out specific lines as
+//: waypoints for future layers to hook into. Often such waypoints will be in
+//: comments, capitalized to hint that other layers rely on their presence.
+//:
+//: A single waypoint might have many different code fragments hooking into
+//: it from all over the codebase. Use 'before' directives to insert
+//: code at a location in order, top to bottom, and 'after' directives to
+//: insert code in reverse order. By convention waypoints intended for insertion
+//: before begin with 'End'. Notice below how the layers line up above the "End
+//: Foo" waypoint.
+//:
+//:   File 001          File 002                File 003
+//:   ============      ===================     ===================
+//:   // Foo
+//:   ------------
+//:              <----  :(before "End Foo")
+//:                     ....
+//:                     ...
+//:   ------------
+//:              <----------------------------  :(before "End Foo")
+//:                                             ....
+//:                                             ...
+//:   // End Foo
+//:   ============
+//:
+//: Here's part of a layer in color: http://i.imgur.com/0eONnyX.png. Directives
+//: are shaded dark.
+//:
+//: Layers do more than just shuffle code around. In a well-organized codebase
+//: it should be possible to stop loading after any file/layer, build and run
+//: the program, and pass all tests for loaded features. (Relevant is
+//: http://youtube.com/watch?v=c8N72t7aScY, a scene from "2001: A Space
+//: Odyssey".) Get into the habit of running the included script called
+//: 'test_layers' before you commit any changes.
+//:
+//: This 'subsetting guarantee' ensures that this directory contains a
+//: cleaned-up narrative of the evolution of this codebase. Organizing
+//: autobiographically allows newcomers to rapidly orient themselves, reading
+//: the first few files to understand a simple gestalt of a program's core
+//: purpose and features, and later gradually working their way through other
+//: features as the need arises.
+//:
+//: Programmers shouldn't need to understand everything about a program to
+//: hack on it. But they shouldn't be prevented from a thorough understanding
+//: of each aspect either. The goal of layers is to reward curiosity.
+//:
+//: More information: http://akkartik.name/post/wart-layers
+
+// Includes
+// End Includes
+
+// Types
+// End Types
+
+// Function prototypes are auto-generated in the 'build' script; define your
+// functions in any order. Just be sure to declare each function header all on
+// one line, ending with the '{'. Our auto-generation scripts are too minimal
+// and simple-minded to handle anything else.
+#include "function_list"  // by convention, files ending with '_list' are auto-generated
+
+// Globals
+//
+// All statements in this section should always define a single variable on a
+// single line. The 'build' script will simple-mindedly auto-generate extern
+// declarations for them. Remember to define (not just declare) constants with
+// extern linkage in this section, since C++ global constants have internal
+// linkage by default.
+//
+// End Globals
+
+int main(int argc, char* argv[]) {
+  atexit(reset);
+  // we require a 32-bit little-endian system
+  assert(sizeof(int) == 4);
+  assert(sizeof(float) == 4);
+  assert_little_endian();
+
+  // End One-time Setup
+
+  // Commandline Parsing
+  // End Commandline Parsing
+
+  // End Main
+
+  return 0;
+}
+
+// Unit Tests
+// End Unit Tests
+
+//: our first directive; insert the following headers at the start of the program
+:(before "End Includes")
+#include <assert.h>
+#include <stdlib.h>
+
+//: Without directives or with the :(code) directive, lines get added at the
+//: end.
+//:
+//: Regardless of where functions are defined, we can call them anywhere we
+//: like as long as we format the function header in a specific way: put it
+//: all on a single line without indent, end the line with ') {' and no
+//: trailing whitespace. As long as functions uniformly start this way, our
+//: 'build' script contains a little command to automatically generate
+//: declarations for them.
+:(code)
+void reset() {
+  // End Reset
+}
+
+void assert_little_endian() {
+  const int x = 1;
+  const char* y = reinterpret_cast<const char*>(&x);
+  if (*y != 1) {
+    cerr << "SubX requires a little-endian processor. Do you have Intel (or AMD or Atom) inside?\n";
+    exit(1);
+  }
+}
+:(before "End Includes")
+#include<iostream>
+using std::cerr;
diff --git a/linux/bootstrap/001help.cc b/linux/bootstrap/001help.cc
new file mode 100644
index 00000000..87466acd
--- /dev/null
+++ b/linux/bootstrap/001help.cc
@@ -0,0 +1,271 @@
+//: Everything this project/binary supports.
+//: This should give you a sense for what to look forward to in later layers.
+
+:(before "End Commandline Parsing")
+if (argc <= 1 || is_equal(argv[1], "--help")) {
+  //: this is the functionality later layers will provide
+  // currently no automated tests for commandline arg parsing
+  cerr << get(Help, "usage");
+  return 0;
+}
+
+//: Support for option parsing.
+//: Options always begin with '--' and are always the first arguments. An
+//: option will never follow a non-option.
+char** arg = &argv[1];
+while (argc > 1 && starts_with(*arg, "--")) {
+  if (false)
+    ;  // no-op branch just so any further additions can consistently always start with 'else'
+  // End Commandline Options(*arg)
+  else
+    cerr << "skipping unknown option " << *arg << '\n';
+  --argc;  ++argv;  ++arg;
+}
+
+if (is_equal(argv[1], "help")) {
+  if (argc == 2) {
+    cerr << "help on what?\n";
+    help_contents();
+    return 0;
+  }
+  string key(argv[2]);
+  // End Help Special-cases(key)
+  if (contains_key(Help, key)) {
+    cerr << get(Help, key);
+    return 0;
+  }
+  else {
+    cerr << "No help found for '" << key << "'\n";
+    help_contents();
+    cerr << "Please check your command for typos.\n";
+    return 1;
+  }
+}
+
+:(code)
+void help_contents() {
+  cerr << "Available top-level topics:\n";
+  cerr << "  usage\n";
+  // End Help Contents
+}
+
+:(before "End Globals")
+map<string, string> Help;
+:(before "End Includes")
+#include <map>
+using std::map;
+:(before "End One-time Setup")
+init_help();
+:(code)
+void init_help() {
+  put(Help, "usage",
+    "bootstrap: the bootstrap translator for SubX.\n"
+    "This program also wraps some miscellaneous useful functionality:\n"
+    "  - an x86 emulator: `bootstrap run`\n"
+    "  - online help: `bootstrap help`\n"
+    "\n"
+    "== Ways to invoke bootstrap\n"
+    "- See this message:\n"
+    "    bootstrap --help\n"
+    "- Convert a textual SubX program into a standard ELF binary that you can\n"
+    "  run on your computer:\n"
+    "    bootstrap translate input1.subx input2.subx ... -o <output ELF binary>\n"
+    "- Run a SubX binary using SubX itself (for better error messages):\n"
+    "    bootstrap run <ELF binary>\n"
+    "- Run all bootstrap's unit tests:\n"
+    "    bootstrap test\n"
+    "- Run a single unit test:\n"
+    "    bootstrap test <test name>\n"
+    "     e.g. bootstrap test test_copy_imm32_to_EAX\n"
+    "\n"
+    "== Debugging aids\n"
+    "- Add '--trace' to any of these commands to save a trace to disk at the end.\n"
+    "  This can run out of memory for long-running commands.\n"
+    "- Add '--debug' to emit additional debug information during translation.\n"
+    "  'bootstrap --debug translate' will save metadata to disk that\n"
+    "  'bootstrap --trace run' uses to make traces more informative.\n"
+    "\n"
+    "Options starting with '--' must always come before any other arguments.\n"
+    "\n"
+    "To start learning how to write SubX programs, see Readme.md (particularly\n"
+    "the section on the x86 instruction set) and then run:\n"
+    "  bootstrap help\n"
+  );
+  // End Help Texts
+}
+
+:(code)
+bool is_equal(const char* s, const char* lit) {
+  size_t len = strlen(lit);
+  if (strlen(s) != len) return false;
+  return strncmp(s, lit, len) == 0;
+}
+
+bool starts_with(const string& s, const string& pat) {
+  string::const_iterator a=s.begin(), b=pat.begin();
+  for (/*nada*/;  a!=s.end() && b!=pat.end();  ++a, ++b)
+    if (*a != *b) return false;
+  return b == pat.end();
+}
+
+//: I'll throw some style conventions here for want of a better place for them.
+//: As a rule I hate style guides. Do what you want, that's my motto. But since
+//: we're dealing with C/C++, the one big thing we want to avoid is undefined
+//: behavior. If a compiler ever encounters undefined behavior it can make
+//: your program do anything it wants.
+//:
+//: For reference, my checklist of undefined behaviors to watch out for:
+//:   out-of-bounds access
+//:   uninitialized variables
+//:   use after free
+//:   dereferencing invalid pointers: null, a new of size 0, others
+//:
+//:   casting a large number to a type too small to hold it
+//:
+//:   integer overflow
+//:   division by zero and other undefined expressions
+//:   left-shift by negative count
+//:   shifting values by more than or equal to the number of bits they contain
+//:   bitwise operations on signed numbers
+//:
+//:   Converting pointers to types of different alignment requirements
+//:     T* -> void* -> T*: defined
+//:     T* -> U* -> T*: defined if non-function pointers and alignment requirements are same
+//:     function pointers may be cast to other function pointers
+//:
+//:       Casting a numeric value into a value that can't be represented by the target type (either directly or via static_cast)
+//:
+//: To guard against these, some conventions:
+//:
+//: 0. Initialize all primitive variables in functions and constructors.
+//:
+//: 1. Minimize use of pointers and pointer arithmetic. Avoid 'new' and
+//: 'delete' as far as possible. Rely on STL to perform memory management to
+//: avoid use-after-free issues (and memory leaks).
+//:
+//: 2. Avoid naked arrays to avoid out-of-bounds access. Never use operator[]
+//: except with map. Use at() with STL vectors and so on.
+//:
+//: 3. Valgrind all the things.
+//:
+//: 4. Avoid unsigned numbers. Not strictly an undefined-behavior issue, but
+//: the extra range doesn't matter, and it's one less confusing category of
+//: interaction gotchas to worry about.
+//:
+//: Corollary: don't use the size() method on containers, since it returns an
+//: unsigned and that'll cause warnings about mixing signed and unsigned,
+//: yadda-yadda. Instead use this macro below to perform an unsafe cast to
+//: signed. We'll just give up immediately if a container's ever too large.
+//: Basically, Mu is not concerned about this being a little slower than it
+//: could be. (https://gist.github.com/rygorous/e0f055bfb74e3d5f0af20690759de5a7)
+//:
+//: Addendum to corollary: We're going to uniformly use int everywhere, to
+//: indicate that we're oblivious to number size, and since Clang on 32-bit
+//: platforms doesn't yet support multiplication over 64-bit integers, and
+//: since multiplying two integers seems like a more common situation to end
+//: up in than integer overflow.
+:(before "End Includes")
+#define SIZE(X) (assert((X).size() < (1LL<<(sizeof(int)*8-2))), static_cast<int>((X).size()))
+
+//: 5. Integer overflow is guarded against at runtime using the -ftrapv flag
+//: to the compiler, supported by Clang (GCC version only works sometimes:
+//: http://stackoverflow.com/questions/20851061/how-to-make-gcc-ftrapv-work).
+:(before "atexit(reset)")
+initialize_signal_handlers();  // not always necessary, but doesn't hurt
+//? cerr << INT_MAX+1 << '\n';  // test overflow
+//? assert(false);  // test SIGABRT
+:(code)
+// based on https://spin.atomicobject.com/2013/01/13/exceptions-stack-traces-c
+void initialize_signal_handlers() {
+  struct sigaction action;
+  bzero(&action, sizeof(action));
+  action.sa_sigaction = dump_and_exit;
+  sigemptyset(&action.sa_mask);
+  sigaction(SIGABRT, &action, NULL);  // assert() failure or integer overflow on linux (with -ftrapv)
+  sigaction(SIGILL,  &action, NULL);  // integer overflow on OS X (with -ftrapv)
+}
+void dump_and_exit(int sig, siginfo_t* /*unused*/, void* /*unused*/) {
+  switch (sig) {
+    case SIGABRT:
+      #ifndef __APPLE__
+        cerr << "SIGABRT: might be an integer overflow if it wasn't an assert() failure\n";
+        _Exit(1);
+      #endif
+      break;
+    case SIGILL:
+      #ifdef __APPLE__
+        cerr << "SIGILL: most likely caused by integer overflow\n";
+        _Exit(1);
+      #endif
+      break;
+    default:
+      break;
+  }
+}
+:(before "End Includes")
+#include <signal.h>
+
+//: 6. Map's operator[] being non-const is fucking evil.
+:(before "Globals")  // can't generate prototypes for these
+// from http://stackoverflow.com/questions/152643/idiomatic-c-for-reading-from-a-const-map
+template<typename T> typename T::mapped_type& get(T& map, typename T::key_type const& key) {
+  typename T::iterator iter(map.find(key));
+  if (iter == map.end()) {
+    cerr << "get couldn't find key '" << key << "'\n";
+    assert(iter != map.end());
+  }
+  return iter->second;
+}
+template<typename T> typename T::mapped_type const& get(const T& map, typename T::key_type const& key) {
+  typename T::const_iterator iter(map.find(key));
+  if (iter == map.end()) {
+    cerr << "get couldn't find key '" << key << "'\n";
+    assert(iter != map.end());
+  }
+  return iter->second;
+}
+template<typename T> typename T::mapped_type const& put(T& map, typename T::key_type const& key, typename T::mapped_type const& value) {
+  map[key] = value;
+  return map[key];
+}
+template<typename T> bool contains_key(T& map, typename T::key_type const& key) {
+  return map.find(key) != map.end();
+}
+template<typename T> typename T::mapped_type& get_or_insert(T& map, typename T::key_type const& key) {
+  return map[key];
+}
+template<typename T> typename T::mapped_type const& put_new(T& map, typename T::key_type const& key, typename T::mapped_type const& value) {
+  assert(map.find(key) == map.end());
+  map[key] = value;
+  return map[key];
+}
+//: The contract: any container that relies on get_or_insert should never call
+//: contains_key.
+
+//: 7. istreams are a royal pain in the arse. You have to be careful about
+//: what subclass you try to putback into. You have to watch out for the pesky
+//: failbit and badbit. Just avoid eof() and use this helper instead.
+:(code)
+bool has_data(istream& in) {
+  return in && !in.eof();
+}
+
+:(before "End Includes")
+#include <assert.h>
+
+#include <iostream>
+using std::istream;
+using std::ostream;
+using std::iostream;
+using std::cin;
+using std::cout;
+using std::cerr;
+#include <iomanip>
+
+#include <string.h>
+#include <string>
+using std::string;
+
+#include <algorithm>
+using std::min;
+using std::max;
diff --git a/linux/bootstrap/002test.cc b/linux/bootstrap/002test.cc
new file mode 100644
index 00000000..7865c3b5
--- /dev/null
+++ b/linux/bootstrap/002test.cc
@@ -0,0 +1,123 @@
+//: A simple test harness. To create new tests, define functions starting with
+//: 'test_'. To run all tests so defined, run:
+//:   $ ./bootstrap test
+//:
+//: Every layer should include tests, and can reach into previous layers.
+//: However, it seems like a good idea never to reach into tests from previous
+//: layers. Every test should be a contract that always passes as originally
+//: written, regardless of any later layers. Avoid writing 'temporary' tests
+//: that are only meant to work until some layer.
+
+:(before "End Types")
+typedef void (*test_fn)(void);
+:(before "Globals")
+// move a global ahead into types that we can't generate an extern declaration for
+const test_fn Tests[] = {
+  #include "test_list"  // auto-generated; see 'build*' scripts
+};
+
+:(before "End Globals")
+bool Run_tests = false;
+bool Passed = true;  // set this to false inside any test to indicate failure
+
+:(before "End Includes")
+#define CHECK(X) \
+  if (Passed && !(X)) { \
+    cerr << "\nF - " << __FUNCTION__ << "(" << __FILE__ << ":" << __LINE__ << "): " << #X << '\n'; \
+    Passed = false; \
+    return;  /* Currently we stop at the very first failure. */ \
+  }
+
+#define CHECK_EQ(X, Y) \
+  if (Passed && (X) != (Y)) { \
+    cerr << "\nF - " << __FUNCTION__ << "(" << __FILE__ << ":" << __LINE__ << "): " << #X << " == " << #Y << '\n'; \
+    cerr << "  got " << (X) << '\n';  /* BEWARE: multiple eval */ \
+    Passed = false; \
+    return;  /* Currently we stop at the very first failure. */ \
+  }
+
+:(before "End Reset")
+Passed = true;
+
+:(before "End Commandline Parsing")
+if (argc > 1 && is_equal(argv[1], "test")) {
+  Run_tests = true;  --argc;  ++argv;  // shift 'test' out of commandline args
+}
+
+:(before "End Main")
+if (Run_tests) {
+  // Test Runs
+  // we run some tests and then exit; assume no state need be maintained afterward
+
+  long num_failures = 0;
+  // End Test Run Initialization
+  time_t t;  time(&t);
+  cerr << "C tests: " << ctime(&t);
+  for (size_t i=0;  i < sizeof(Tests)/sizeof(Tests[0]);  ++i) {
+//?     cerr << "running " << Test_names[i] << '\n';
+    run_test(i);
+    if (Passed) cerr << '.';
+    else ++num_failures;
+  }
+  cerr << '\n';
+  // End Tests
+  if (num_failures > 0) {
+    cerr << num_failures << " failure"
+         << (num_failures > 1 ? "s" : "")
+         << '\n';
+    return 1;
+  }
+  return 0;
+}
+
+:(after "End Main")
+//: Raise other unrecognized sub-commands as errors.
+//: We couldn't do this until now because we want `./bootstrap test` to always
+//: succeed, no matter how many layers are included in the build.
+cerr << "nothing to do\n";
+return 1;
+
+:(code)
+void run_test(size_t i) {
+  if (i >= sizeof(Tests)/sizeof(Tests[0])) {
+    cerr << "no test " << i << '\n';
+    return;
+  }
+  reset();
+  // End Test Setup
+  (*Tests[i])();
+  // End Test Teardown
+}
+
+//: Convenience: run a single test
+:(before "Globals")
+// Names for each element of the 'Tests' global, respectively.
+const string Test_names[] = {
+  #include "test_name_list"  // auto-generated; see 'build*' scripts
+};
+:(after "Test Runs")
+string maybe_single_test_to_run = argv[argc-1];
+for (size_t i=0;  i < sizeof(Tests)/sizeof(Tests[0]);  ++i) {
+  if (Test_names[i] == maybe_single_test_to_run) {
+    run_test(i);
+    if (Passed) cerr << ".\n";
+    return 0;
+  }
+}
+
+//: A pending test that also serves to put our test harness through its paces.
+
+:(code)
+void test_is_equal() {
+  CHECK(is_equal("", ""));
+  CHECK(!is_equal("", "foo"));
+  CHECK(!is_equal("foo", ""));
+  CHECK(!is_equal("f", "bar"));
+  CHECK(!is_equal("bar", "f"));
+  CHECK(!is_equal("bar", "ba"));
+  CHECK(!is_equal("ba", "bar"));
+  CHECK(is_equal("bar", "bar"));
+}
+
+:(before "End Includes")
+#include <stdlib.h>
diff --git a/linux/bootstrap/003trace.cc b/linux/bootstrap/003trace.cc
new file mode 100644
index 00000000..75dc0253
--- /dev/null
+++ b/linux/bootstrap/003trace.cc
@@ -0,0 +1,523 @@
+//: The goal of layers is to make programs more easy to understand and more
+//: malleable, easy to rewrite in radical ways without accidentally breaking
+//: some corner case. Tests further both goals. They help understandability by
+//: letting one make small changes and get feedback. What if I wrote this line
+//: like so? What if I removed this function call, is it really necessary?
+//: Just try it, see if the tests pass. Want to explore rewriting this bit in
+//: this way? Tests put many refactorings on a firmer footing.
+//:
+//: But the usual way we write tests seems incomplete. Refactorings tend to
+//: work in the small, but don't help with changes to function boundaries. If
+//: you want to extract a new function you have to manually test-drive it to
+//: create tests for it. If you want to inline a function its tests are no
+//: longer valid. In both cases you end up having to reorganize code as well as
+//: tests, an error-prone activity.
+//:
+//: In response, this layer introduces the notion of domain-driven *white-box*
+//: testing. We focus on the domain of inputs the whole program needs to
+//: handle rather than the correctness of individual functions. All white-box
+//: tests invoke the program in a single way: by calling run() with some
+//: input. As the program operates on the input, it traces out a list of
+//: _facts_ deduced about the domain:
+//:   trace("label") << "fact 1: " << val;
+//:
+//: Tests can now check for these facts in the trace:
+//:   CHECK_TRACE_CONTENTS("label", "fact 1: 34\n"
+//:                                 "fact 2: 35\n");
+//:
+//: Since we never call anything but the run() function directly, we never have
+//: to rewrite the tests when we reorganize the internals of the program. We
+//: just have to make sure our rewrite deduces the same facts about the domain,
+//: and that's something we're going to have to do anyway.
+//:
+//: To avoid the combinatorial explosion of integration tests, each layer
+//: mainly logs facts to the trace with a common *label*. All tests in a layer
+//: tend to check facts with this label. Validating the facts logged with a
+//: specific label is like calling functions of that layer directly.
+//:
+//: To build robust tests, trace facts about your domain rather than details of
+//: how you computed them.
+//:
+//: More details: http://akkartik.name/blog/tracing-tests
+//:
+//: ---
+//:
+//: Between layers and domain-driven testing, programming starts to look like a
+//: fundamentally different activity. Instead of focusing on a) superficial,
+//: b) local rules on c) code [like say http://blog.bbv.ch/2013/06/05/clean-code-cheat-sheet],
+//: we allow programmers to engage with the a) deep, b) global structure of
+//: the c) domain. If you can systematically track discontinuities in the
+//: domain, you don't care if the code used gotos as long as it passed all
+//: tests. If tests become more robust to run, it becomes easier to try out
+//: radically different implementations for the same program. If code is
+//: super-easy to rewrite, it becomes less important what indentation style it
+//: uses, or that the objects are appropriately encapsulated, or that the
+//: functions are referentially transparent.
+//:
+//: Instead of plumbing, programming becomes building and gradually refining a
+//: map of the environment the program must operate under. Whether a program
+//: is 'correct' at a given point in time is a red herring; what matters is
+//: avoiding regression by monotonically nailing down the more 'eventful'
+//: parts of the terrain. It helps readers new and old, and rewards curiosity,
+//: to organize large programs in self-similar hierarchies of example tests
+//: colocated with the code that makes them work.
+//:
+//:   "Programming properly should be regarded as an activity by which
+//:   programmers form a mental model, rather than as production of a program."
+//:   -- Peter Naur (http://akkartik.name/naur.pdf)
+
+//:: == Core data structures
+
+:(before "End Globals")
+trace_stream* Trace_stream = NULL;
+
+:(before "End Types")
+struct trace_stream {
+  vector<trace_line> past_lines;
+  // End trace_stream Fields
+
+  trace_stream() {
+    // End trace_stream Constructor
+  }
+  ~trace_stream() {
+    // End trace_stream Destructor
+  }
+  // End trace_stream Methods
+};
+
+//:: == Adding to the trace
+
+//: Top-level method is trace() which can be used like an ostream. Usage:
+//:   trace(depth, label) << ... << end();
+//: Don't forget the 'end()' to actually append to the trace.
+:(before "End Includes")
+// No brackets around the expansion so that it prints nothing if Trace_stream
+// isn't initialized.
+#define trace(...)  !Trace_stream ? cerr : Trace_stream->stream(__VA_ARGS__)
+
+:(before "End trace_stream Fields")
+// accumulator for current trace_line
+ostringstream* curr_stream;
+string curr_label;
+int curr_depth;
+// other stuff
+int collect_depth;  // avoid tracing lower levels for speed
+ofstream null_stream;  // never opened, so writes to it silently fail
+
+//: Some constants.
+:(before "struct trace_stream")  // include constants in all cleaved compilation units
+const int Max_depth = 9999;
+:(before "End trace_stream Constructor")
+curr_stream = NULL;
+curr_depth = Max_depth;
+collect_depth = Max_depth;
+
+:(before "struct trace_stream")
+struct trace_line {
+  string contents;
+  string label;
+  int depth;  // 0 is 'sea level'; positive integers are progressively 'deeper' and lower level
+  trace_line(string c, string l) {
+    contents = c;
+    label = l;
+    depth = 0;
+  }
+  trace_line(string c, string l, int d) {
+    contents = c;
+    label = l;
+    depth = d;
+  }
+};
+
+string unescape_newline(string& s) {
+  std::stringstream ss;
+  for (int i = 0;  i < SIZE(s);  ++i) {
+    if (s.at(i) == '\n')
+      ss << "\\n";
+    else
+      ss << s.at(i);
+  }
+  return ss.str();
+}
+
+void dump_trace_line(ostream& s, trace_line& t) {
+  s << std::setw(2) << t.depth << ' ' << t.label << ": " << unescape_newline(t.contents) << '\n';
+}
+
+//: Starting a new trace line.
+:(before "End trace_stream Methods")
+ostream& stream(string label) {
+  return stream(Max_depth, label);
+}
+
+ostream& stream(int depth, string label) {
+  if (depth > collect_depth) return null_stream;
+  curr_stream = new ostringstream;
+  curr_label = label;
+  curr_depth = depth;
+  (*curr_stream) << std::hex;  // printing addresses is the common case
+  return *curr_stream;
+}
+
+//: End of a trace line; append it to the trace.
+:(before "End Types")
+struct end {};
+:(code)
+ostream& operator<<(ostream& os, end /*unused*/) {
+  if (Trace_stream) Trace_stream->newline();
+  return os;
+}
+
+//: Fatal error.
+:(before "End Types")
+struct die {};
+:(code)
+ostream& operator<<(ostream& /*unused*/, die /*unused*/) {
+  if (Trace_stream) Trace_stream->newline();
+  exit(1);
+}
+
+:(before "End trace_stream Methods")
+void newline();
+:(code)
+void trace_stream::newline() {
+  if (!curr_stream) return;
+  string curr_contents = curr_stream->str();
+  if (!curr_contents.empty()) {
+    past_lines.push_back(trace_line(curr_contents, trim(curr_label), curr_depth));  // preserve indent in contents
+    // maybe print this line to stderr
+    trace_line& t = past_lines.back();
+    if (should_incrementally_print_trace()) {
+      dump_trace_line(cerr, t);
+    }
+    // End trace Commit
+  }
+
+  // clean up
+  delete curr_stream;
+  curr_stream = NULL;
+  curr_label.clear();
+  curr_depth = Max_depth;
+}
+
+//:: == Initializing the trace in tests
+
+:(before "End Includes")
+#define START_TRACING_UNTIL_END_OF_SCOPE  lease_tracer leased_tracer;
+:(before "End Test Setup")
+START_TRACING_UNTIL_END_OF_SCOPE
+
+//: Trace_stream is a resource, lease_tracer uses RAII to manage it.
+:(before "End Types")
+struct lease_tracer {
+  lease_tracer();
+  ~lease_tracer();
+};
+:(code)
+lease_tracer::lease_tracer() { Trace_stream = new trace_stream; }
+lease_tracer::~lease_tracer() {
+  delete Trace_stream;
+  Trace_stream = NULL;
+}
+
+//:: == Errors and warnings using traces
+
+:(before "End Includes")
+#define raise  (!Trace_stream ? (++Trace_errors,cerr) /*do print*/ : Trace_stream->stream(Error_depth, "error"))
+#define warn (!Trace_stream ? (++Trace_errors,cerr) /*do print*/ : Trace_stream->stream(Warn_depth, "warn"))
+
+//: Print errors and warnings to the screen by default.
+:(before "struct trace_stream")  // include constants in all cleaved compilation units
+const int Error_depth = 0;
+const int Warn_depth = 1;
+:(before "End Globals")
+int Hide_errors = false;  // if set, don't print errors or warnings to screen
+int Hide_warnings = false;  // if set, don't print warnings to screen
+:(before "End Reset")
+Hide_errors = false;
+Hide_warnings = false;
+//: Never dump warnings in tests
+:(before "End Test Setup")
+Hide_warnings = true;
+:(code)
+bool trace_stream::should_incrementally_print_trace() {
+  if (!Hide_errors && curr_depth == Error_depth) return true;
+  if (!Hide_warnings && !Hide_errors && curr_depth == Warn_depth) return true;
+  // End Incremental Trace Print Conditions
+  return false;
+}
+:(before "End trace_stream Methods")
+bool should_incrementally_print_trace();
+
+:(before "End Globals")
+int Trace_errors = 0;  // used only when Trace_stream is NULL
+
+// Fail tests that displayed (unexpected) errors.
+// Expected errors should always be hidden and silently checked for.
+:(before "End Test Teardown")
+if (Passed && !Hide_errors && trace_contains_errors()) {
+  Passed = false;
+}
+:(code)
+bool trace_contains_errors() {
+  return Trace_errors > 0 || trace_count("error") > 0;
+}
+
+:(before "End Includes")
+// If we aren't yet sure how to deal with some corner case, use assert_for_now
+// to indicate that it isn't an inviolable invariant.
+#define assert_for_now assert
+#define raise_for_now raise
+
+//:: == Other assertions on traces
+//: Primitives:
+//:   - CHECK_TRACE_CONTENTS(lines)
+//:     Assert that the trace contains the given lines (separated by newlines)
+//:     in order. There can be other intervening lines between them.
+//:   - CHECK_TRACE_DOESNT_CONTAIN(line)
+//:   - CHECK_TRACE_DOESNT_CONTAIN(label, contents)
+//:     Assert that the trace doesn't contain the given (single) line.
+//:   - CHECK_TRACE_COUNT(label, count)
+//:     Assert that the trace contains exactly 'count' lines with the given
+//:     'label'.
+//:   - CHECK_TRACE_CONTAINS_ERRORS()
+//:   - CHECK_TRACE_DOESNT_CONTAIN_ERRORS()
+//:   - trace_count_prefix(label, prefix)
+//:     Count the number of trace lines with the given 'label' that start with
+//:     the given 'prefix'.
+
+:(before "End Includes")
+#define CHECK_TRACE_CONTENTS(...)  check_trace_contents(__FUNCTION__, __FILE__, __LINE__, __VA_ARGS__)
+
+#define CHECK_TRACE_DOESNT_CONTAIN(...)  CHECK(trace_doesnt_contain(__VA_ARGS__))
+
+#define CHECK_TRACE_COUNT(label, count) \
+  if (Passed && trace_count(label) != (count)) { \
+    cerr << "\nF - " << __FUNCTION__ << "(" << __FILE__ << ":" << __LINE__ << "): trace_count of " << label << " should be " << count << '\n'; \
+    cerr << "  got " << trace_count(label) << '\n';  /* multiple eval */ \
+    DUMP(label); \
+    Passed = false; \
+    return;  /* Currently we stop at the very first failure. */ \
+  }
+
+#define CHECK_TRACE_CONTAINS_ERRORS()  CHECK(trace_contains_errors())
+#define CHECK_TRACE_DOESNT_CONTAIN_ERRORS() \
+  if (Passed && trace_contains_errors()) { \
+    cerr << "\nF - " << __FUNCTION__ << "(" << __FILE__ << ":" << __LINE__ << "): unexpected errors\n"; \
+    DUMP("error"); \
+    Passed = false; \
+    return; \
+  }
+
+// Allow tests to ignore trace lines generated during setup.
+#define CLEAR_TRACE  delete Trace_stream, Trace_stream = new trace_stream
+
+:(code)
+bool check_trace_contents(string FUNCTION, string FILE, int LINE, string expected) {
+  if (!Passed) return false;
+  if (!Trace_stream) return false;
+  vector<string> expected_lines = split(expected, "\n");
+  int curr_expected_line = 0;
+  while (curr_expected_line < SIZE(expected_lines) && expected_lines.at(curr_expected_line).empty())
+    ++curr_expected_line;
+  if (curr_expected_line == SIZE(expected_lines)) return true;
+  string label, contents;
+  split_label_contents(expected_lines.at(curr_expected_line), &label, &contents);
+  for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin();  p != Trace_stream->past_lines.end();  ++p) {
+    if (label != p->label) continue;
+    string t = trim(p->contents);
+    if (contents != unescape_newline(t)) continue;
+    ++curr_expected_line;
+    while (curr_expected_line < SIZE(expected_lines) && expected_lines.at(curr_expected_line).empty())
+      ++curr_expected_line;
+    if (curr_expected_line == SIZE(expected_lines)) return true;
+    split_label_contents(expected_lines.at(curr_expected_line), &label, &contents);
+  }
+
+  if (line_exists_anywhere(label, contents)) {
+    cerr << "\nF - " << FUNCTION << "(" << FILE << ":" << LINE << "): line [" << label << ": " << contents << "] out of order in trace:\n";
+    DUMP("");
+  }
+  else {
+    cerr << "\nF - " << FUNCTION << "(" << FILE << ":" << LINE << "): missing [" << contents << "] in trace:\n";
+    DUMP(label);
+  }
+  Passed = false;
+  return false;
+}
+
+bool trace_doesnt_contain(string expected) {
+  vector<string> tmp = split_first(expected, ": ");
+  if (SIZE(tmp) == 1) {
+    raise << expected << ": missing label or contents in trace line\n" << end();
+    assert(false);
+  }
+  return trace_count(tmp.at(0), tmp.at(1)) == 0;
+}
+
+int trace_count(string label) {
+  return trace_count(label, "");
+}
+
+int trace_count(string label, string line) {
+  if (!Trace_stream) return 0;
+  long result = 0;
+  for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin();  p != Trace_stream->past_lines.end();  ++p) {
+    if (label == p->label) {
+      if (line == "" || trim(line) == trim(p->contents))
+        ++result;
+    }
+  }
+  return result;
+}
+
+int trace_count_prefix(string label, string prefix) {
+  if (!Trace_stream) return 0;
+  long result = 0;
+  for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin();  p != Trace_stream->past_lines.end();  ++p) {
+    if (label == p->label) {
+      if (starts_with(trim(p->contents), trim(prefix)))
+        ++result;
+    }
+  }
+  return result;
+}
+
+void split_label_contents(const string& s, string* label, string* contents) {
+  static const string delim(": ");
+  size_t pos = s.find(delim);
+  if (pos == string::npos) {
+    *label = "";
+    *contents = trim(s);
+  }
+  else {
+    *label = trim(s.substr(0, pos));
+    *contents = trim(s.substr(pos+SIZE(delim)));
+  }
+}
+
+bool line_exists_anywhere(const string& label, const string& contents) {
+  for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin();  p != Trace_stream->past_lines.end();  ++p) {
+    if (label != p->label) continue;
+    if (contents == trim(p->contents)) return true;
+  }
+  return false;
+}
+
+vector<string> split(string s, string delim) {
+  vector<string> result;
+  size_t begin=0, end=s.find(delim);
+  while (true) {
+    if (end == string::npos) {
+      result.push_back(string(s, begin, string::npos));
+      break;
+    }
+    result.push_back(string(s, begin, end-begin));
+    begin = end+SIZE(delim);
+    end = s.find(delim, begin);
+  }
+  return result;
+}
+
+vector<string> split_first(string s, string delim) {
+  vector<string> result;
+  size_t end=s.find(delim);
+  result.push_back(string(s, 0, end));
+  if (end != string::npos)
+    result.push_back(string(s, end+SIZE(delim), string::npos));
+  return result;
+}
+
+//:: == Helpers for debugging using traces
+
+:(before "End Includes")
+// To debug why a test is failing, dump its trace using '?'.
+#define DUMP(label)  if (Trace_stream) cerr << Trace_stream->readable_contents(label);
+
+// To add temporary prints to the trace, use 'dbg'.
+// `git log` should never show any calls to 'dbg'.
+#define dbg trace(0, "a")
+
+//: Dump the entire trace to file where it can be browsed offline.
+//: Dump the trace as it happens; that way you get something even if the
+//: program crashes.
+
+:(before "End Globals")
+ofstream Trace_file;
+:(before "End Commandline Options(*arg)")
+else if (is_equal(*arg, "--trace")) {
+  cerr << "saving trace to 'last_run'\n";
+  Trace_file.open("last_run");
+  // Add a dummy line up top; otherwise the `browse_trace` tool currently has
+  // no way to expand any lines above an error.
+  Trace_file << "   0 dummy: start\n";
+  // End --trace Settings
+}
+:(before "End trace Commit")
+if (Trace_file.is_open()) {
+  dump_trace_line(Trace_file, t);
+  Trace_file.flush();
+  past_lines.pop_back();  // economize on memory
+}
+:(before "End One-time Setup")
+atexit(cleanup_main);
+:(code)
+void cleanup_main() {
+  if (Trace_file.is_open()) Trace_file.close();
+  // End cleanup_main
+}
+
+:(before "End trace_stream Methods")
+string readable_contents(string label) {
+  string trim(const string& s);  // prototype
+  ostringstream output;
+  label = trim(label);
+  for (vector<trace_line>::iterator p = past_lines.begin();  p != past_lines.end();  ++p)
+    if (label.empty() || label == p->label)
+      dump_trace_line(output, *p);
+  return output.str();
+}
+
+//: Print traces to the screen as they happen.
+//: Particularly useful when juggling multiple trace streams, like when
+//: debugging sandboxes.
+:(before "End Globals")
+bool Dump_trace = false;
+:(before "End Commandline Options(*arg)")
+else if (is_equal(*arg, "--dump")) {
+  Dump_trace = true;
+}
+:(before "End Incremental Trace Print Conditions")
+if (Dump_trace) return true;
+
+//: Miscellaneous helpers.
+
+:(code)
+string trim(const string& s) {
+  string::const_iterator first = s.begin();
+  while (first != s.end() && isspace(*first))
+    ++first;
+  if (first == s.end()) return "";
+
+  string::const_iterator last = --s.end();
+  while (last != s.begin() && isspace(*last))
+    --last;
+  ++last;
+  return string(first, last);
+}
+
+:(before "End Includes")
+#include <vector>
+using std::vector;
+#include <list>
+using std::list;
+#include <set>
+using std::set;
+
+#include <sstream>
+using std::istringstream;
+using std::ostringstream;
+
+#include <fstream>
+using std::ifstream;
+using std::ofstream;
diff --git a/linux/bootstrap/003trace.cc.filter b/linux/bootstrap/003trace.cc.filter
new file mode 100644
index 00000000..6d938c61
--- /dev/null
+++ b/linux/bootstrap/003trace.cc.filter
@@ -0,0 +1,525 @@
+//: The goal of layers is to make programs more easy to understand and more
+//: malleable, easy to rewrite in radical ways without accidentally breaking
+//: some corner case. Tests further both goals. They help understandability by
+//: letting one make small changes and get feedback. What if I wrote this line
+//: like so? What if I removed this function call, is it really necessary?
+//: Just try it, see if the tests pass. Want to explore rewriting this bit in
+//: this way? Tests put many refactorings on a firmer footing.
+//:
+//: But the usual way we write tests seems incomplete. Refactorings tend to
+//: work in the small, but don't help with changes to function boundaries. If
+//: you want to extract a new function you have to manually test-drive it to
+//: create tests for it. If you want to inline a function its tests are no
+//: longer valid. In both cases you end up having to reorganize code as well as
+//: tests, an error-prone activity.
+//:
+//: In response, this layer introduces the notion of domain-driven *white-box*
+//: testing. We focus on the domain of inputs the whole program needs to
+//: handle rather than the correctness of individual functions. All white-box
+//: tests invoke the program in a single way: by calling run() with some
+//: input. As the program operates on the input, it traces out a list of
+//: _facts_ deduced about the domain:
+//:   trace("label") << "fact 1: " << val;
+//:
+//: Tests can now check for these facts in the trace:
+//:   CHECK_TRACE_CONTENTS("label", "fact 1: 34\n"
+//:                                 "fact 2: 35\n");
+//:
+//: Since we never call anything but the run() function directly, we never have
+//: to rewrite the tests when we reorganize the internals of the program. We
+//: just have to make sure our rewrite deduces the same facts about the domain,
+//: and that's something we're going to have to do anyway.
+//:
+//: To avoid the combinatorial explosion of integration tests, each layer
+//: mainly logs facts to the trace with a common *label*. All tests in a layer
+//: tend to check facts with this label. Validating the facts logged with a
+//: specific label is like calling functions of that layer directly.
+//:
+//: To build robust tests, trace facts about your domain rather than details of
+//: how you computed them.
+//:
+//: More details: http://akkartik.name/blog/tracing-tests
+//:
+//: ---
+//:
+//: Between layers and domain-driven testing, programming starts to look like a
+//: fundamentally different activity. Instead of focusing on a) superficial,
+//: b) local rules on c) code [like say http://blog.bbv.ch/2013/06/05/clean-code-cheat-sheet],
+//: we allow programmers to engage with the a) deep, b) global structure of
+//: the c) domain. If you can systematically track discontinuities in the
+//: domain, you don't care if the code used gotos as long as it passed all
+//: tests. If tests become more robust to run, it becomes easier to try out
+//: radically different implementations for the same program. If code is
+//: super-easy to rewrite, it becomes less important what indentation style it
+//: uses, or that the objects are appropriately encapsulated, or that the
+//: functions are referentially transparent.
+//:
+//: Instead of plumbing, programming becomes building and gradually refining a
+//: map of the environment the program must operate under. Whether a program
+//: is 'correct' at a given point in time is a red herring; what matters is
+//: avoiding regression by monotonically nailing down the more 'eventful'
+//: parts of the terrain. It helps readers new and old, and rewards curiosity,
+//: to organize large programs in self-similar hierarchies of example tests
+//: colocated with the code that makes them work.
+//:
+//:   "Programming properly should be regarded as an activity by which
+//:   programmers form a mental model, rather than as production of a program."
+//:   -- Peter Naur (http://akkartik.name/naur.pdf)
+
+//:: == Core data structures
+
+:(before "End Globals")
+trace_stream* Trace_stream = NULL;
+
+:(before "End Types")
+struct trace_stream {
+  vector<trace_line> past_lines;
+  // End trace_stream Fields
+
+  trace_stream() {
+    // End trace_stream Constructor
+  }
+  ~trace_stream() {
+    // End trace_stream Destructor
+  }
+  // End trace_stream Methods
+};
+
+//:: == Adding to the trace
+
+//: Top-level method is trace() which can be used like an ostream. Usage:
+//:   trace(depth, label) << ... << end();
+//: Don't forget the 'end()' to actually append to the trace.
+:(before "End Includes")
+// No brackets around the expansion so that it prints nothing if Trace_stream
+// isn't initialized.
+#define trace(...)  !Trace_stream ? cerr : Trace_stream->stream(__VA_ARGS__)
+
+:(before "End trace_stream Fields")
+// accumulator for current trace_line
+ostringstream* curr_stream;
+string curr_label;
+int curr_depth;
+// other stuff
+int collect_depth;  // avoid tracing lower levels for speed
+ofstream null_stream;  // never opened, so writes to it silently fail
+
+//: Some constants.
+:(before "struct trace_stream")  // include constants in all cleaved compilation units
+const int Max_depth = 9999;
+:(before "End trace_stream Constructor")
+curr_stream = NULL;
+curr_depth = Max_depth;
+collect_depth = Max_depth;
+
+:(before "struct trace_stream")
+struct trace_line {
+  string contents;
+  string label;
+  int depth;  // 0 is 'sea level'; positive integers are progressively 'deeper' and lower level
+  trace_line(string c, string l) {
+    contents = c;
+    label = l;
+    depth = 0;
+  }
+  trace_line(string c, string l, int d) {
+    contents = c;
+    label = l;
+    depth = d;
+  }
+};
+
+string unescape_newline(string& s) {
+  std::stringstream ss;
+  for (int i = 0;  i < SIZE(s);  ++i) {
+    if (s.at(i) == '\n')
+      ss << "\\n";
+    else
+      ss << s.at(i);
+  }
+  return ss.str();
+}
+
+void dump_trace_line(ostream& s, trace_line& t) {
+  s << std::setw(2) << t.depth << ' ' << t.label << ": " << unescape_newline(t.contents) << '\n';
+}
+
+//: Starting a new trace line.
+:(before "End trace_stream Methods")
+ostream& stream(string label) {
+  return stream(Max_depth, label);
+}
+
+ostream& stream(int depth, string label) {
+  if (depth > collect_depth) return null_stream;
+  curr_stream = new ostringstream;
+  curr_label = label;
+  curr_depth = depth;
+  (*curr_stream) << std::hex;  // printing addresses is the common case
+  return *curr_stream;
+}
+
+//: End of a trace line; append it to the trace.
+:(before "End Types")
+struct end {};
+:(code)
+ostream& operator<<(ostream& os, end /*unused*/) {
+  if (Trace_stream) Trace_stream->newline();
+  return os;
+}
+
+//: Fatal error.
+:(before "End Types")
+struct die {};
+:(code)
+ostream& operator<<(ostream& /*unused*/, die /*unused*/) {
+  if (Trace_stream) Trace_stream->newline();
+  exit(1);
+}
+
+:(before "End trace_stream Methods")
+void newline();
+:(code)
+void trace_stream::newline() {
+  if (!curr_stream) return;
+  string curr_contents = curr_stream->str();
+  if (!curr_contents.empty()) {
+    past_lines.push_back(trace_line(curr_contents, trim(curr_label), curr_depth));  // preserve indent in contents
+    // maybe print this line to stderr
+    trace_line& t = past_lines.back();
+    if (should_incrementally_print_trace()) {
+      dump_trace_line(cerr, t);
+    }
+    // End trace Commit
+  }
+
+  // clean up
+  delete curr_stream;
+  curr_stream = NULL;
+  curr_label.clear();
+  curr_depth = Max_depth;
+}
+
+//:: == Initializing the trace in tests
+
+:(before "End Includes")
+#define START_TRACING_UNTIL_END_OF_SCOPE  lease_tracer leased_tracer;
+:(before "End Test Setup")
+START_TRACING_UNTIL_END_OF_SCOPE
+
+//: Trace_stream is a resource, lease_tracer uses RAII to manage it.
+:(before "End Types")
+struct lease_tracer {
+  lease_tracer();
+  ~lease_tracer();
+};
+:(code)
+lease_tracer::lease_tracer() { Trace_stream = new trace_stream; }
+lease_tracer::~lease_tracer() {
+  delete Trace_stream;
+  Trace_stream = NULL;
+}
+
+//:: == Errors and warnings using traces
+
+:(before "End Includes")
+#define raise  (!Trace_stream ? (++Trace_errors,cerr) /*do print*/ : Trace_stream->stream(Error_depth, "error"))
+#define warn (!Trace_stream ? (++Trace_errors,cerr) /*do print*/ : Trace_stream->stream(Warn_depth, "warn"))
+
+//: Print errors and warnings to the screen by default.
+:(before "struct trace_stream")  // include constants in all cleaved compilation units
+const int Error_depth = 0;
+const int Warn_depth = 1;
+:(before "End Globals")
+int Hide_errors = false;  // if set, don't print errors or warnings to screen
+int Hide_warnings = false;  // if set, don't print warnings to screen
+:(before "End Reset")
+Hide_errors = false;
+Hide_warnings = false;
+//: Never dump warnings in tests
+:(before "End Test Setup")
+Hide_warnings = true;
+:(code)
+bool trace_stream::should_incrementally_print_trace() {
+  if (!Hide_errors && curr_depth == Error_depth) return true;
+  if (!Hide_warnings && !Hide_errors && curr_depth == Warn_depth) return true;
+  // End Incremental Trace Print Conditions
+  return false;
+}
+:(before "End trace_stream Methods")
+bool should_incrementally_print_trace();
+
+:(before "End Globals")
+int Trace_errors = 0;  // used only when Trace_stream is NULL
+
+// Fail tests that displayed (unexpected) errors.
+// Expected errors should always be hidden and silently checked for.
+:(before "End Test Teardown")
+if (Passed && !Hide_errors && trace_contains_errors()) {
+  Passed = false;
+}
+:(code)
+bool trace_contains_errors() {
+  return Trace_errors > 0 || trace_count("error") > 0;
+}
+
+:(before "End Includes")
+// If we aren't yet sure how to deal with some corner case, use assert_for_now
+// to indicate that it isn't an inviolable invariant.
+#define assert_for_now assert
+#define raise_for_now raise
+
+//:: == Other assertions on traces
+//: Primitives:
+//:   - CHECK_TRACE_CONTENTS(lines)
+//:     Assert that the trace contains the given lines (separated by newlines)
+//:     in order. There can be other intervening lines between them.
+//:   - CHECK_TRACE_DOESNT_CONTAIN(line)
+//:   - CHECK_TRACE_DOESNT_CONTAIN(label, contents)
+//:     Assert that the trace doesn't contain the given (single) line.
+//:   - CHECK_TRACE_COUNT(label, count)
+//:     Assert that the trace contains exactly 'count' lines with the given
+//:     'label'.
+//:   - CHECK_TRACE_CONTAINS_ERRORS()
+//:   - CHECK_TRACE_DOESNT_CONTAIN_ERRORS()
+//:   - trace_count_prefix(label, prefix)
+//:     Count the number of trace lines with the given 'label' that start with
+//:     the given 'prefix'.
+
+:(before "End Includes")
+#define CHECK_TRACE_CONTENTS(...)  check_trace_contents(__FUNCTION__, __FILE__, __LINE__, __VA_ARGS__)
+
+#define CHECK_TRACE_DOESNT_CONTAIN(...)  CHECK(trace_doesnt_contain(__VA_ARGS__))
+
+#define CHECK_TRACE_COUNT(label, count) \
+  if (Passed && trace_count(label) != (count)) { \
+    cerr << "\nF - " << __FUNCTION__ << "(" << __FILE__ << ":" << __LINE__ << "): trace_count of " << label << " should be " << count << '\n'; \
+    cerr << "  got " << trace_count(label) << '\n';  /* multiple eval */ \
+    DUMP(label); \
+    Passed = false; \
+    return;  /* Currently we stop at the very first failure. */ \
+  }
+
+#define CHECK_TRACE_CONTAINS_ERRORS()  CHECK(trace_contains_errors())
+#define CHECK_TRACE_DOESNT_CONTAIN_ERRORS() \
+  if (Passed && trace_contains_errors()) { \
+    cerr << "\nF - " << __FUNCTION__ << "(" << __FILE__ << ":" << __LINE__ << "): unexpected errors\n"; \
+    DUMP("error"); \
+    Passed = false; \
+    return; \
+  }
+
+// Allow tests to ignore trace lines generated during setup.
+#define CLEAR_TRACE  delete Trace_stream, Trace_stream = new trace_stream
+
+:(code)
+bool check_trace_contents(string FUNCTION, string FILE, int LINE, string expected) {
+  if (!Passed) return false;
+  if (!Trace_stream) return false;
+  vector<string> expected_lines = split(expected, "\n");
+  int curr_expected_line = 0;
+  while (curr_expected_line < SIZE(expected_lines) && expected_lines.at(curr_expected_line).empty())
+    ++curr_expected_line;
+  if (curr_expected_line == SIZE(expected_lines)) return true;
+  string label, contents;
+  split_label_contents(expected_lines.at(curr_expected_line), &label, &contents);
+  for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin();  p != Trace_stream->past_lines.end();  ++p) {
+    if (label != p->label) continue;
+    string t = trim(p->contents);
+    if (contents != unescape_newline(t)) continue;
+    ++curr_expected_line;
+    while (curr_expected_line < SIZE(expected_lines) && expected_lines.at(curr_expected_line).empty())
+      ++curr_expected_line;
+    if (curr_expected_line == SIZE(expected_lines)) return true;
+    split_label_contents(expected_lines.at(curr_expected_line), &label, &contents);
+  }
+
+  if (line_exists_anywhere(label, contents)) {
+    cerr << "\nF - " << FUNCTION << "(" << FILE << ":" << LINE << "): line [" << label << ": " << contents << "] out of order in trace:\n";
+    DUMP("");
+  }
+  else {
+    cerr << "\nF - " << FUNCTION << "(" << FILE << ":" << LINE << "): missing [" << contents << "] in trace:\n";
+    DUMP(label);
+  }
+  Passed = false;
+  return false;
+}
+
+bool trace_doesnt_contain(string expected) {
+  vector<string> tmp = split_first(expected, ": ");
+  if (SIZE(tmp) == 1) {
+    raise << expected << ": missing label or contents in trace line\n" << end();
+    assert(false);
+  }
+  return trace_count(tmp.at(0), tmp.at(1)) == 0;
+}
+
+int trace_count(string label) {
+  return trace_count(label, "");
+}
+
+int trace_count(string label, string line) {
+  if (!Trace_stream) return 0;
+  long result = 0;
+  for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin();  p != Trace_stream->past_lines.end();  ++p) {
+    if (label == p->label) {
+      if (line == "" || trim(line) == trim(p->contents))
+        ++result;
+    }
+  }
+  return result;
+}
+
+int trace_count_prefix(string label, string prefix) {
+  if (!Trace_stream) return 0;
+  long result = 0;
+  for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin();  p != Trace_stream->past_lines.end();  ++p) {
+    if (label == p->label) {
+      if (starts_with(trim(p->contents), trim(prefix)))
+        ++result;
+    }
+  }
+  return result;
+}
+
+void split_label_contents(const string& s, string* label, string* contents) {
+  static const string delim(": ");
+  size_t pos = s.find(delim);
+  if (pos == string::npos) {
+    *label = "";
+    *contents = trim(s);
+  }
+  else {
+    *label = trim(s.substr(0, pos));
+    *contents = trim(s.substr(pos+SIZE(delim)));
+  }
+}
+
+bool line_exists_anywhere(const string& label, const string& contents) {
+  for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin();  p != Trace_stream->past_lines.end();  ++p) {
+    if (label != p->label) continue;
+    if (contents == trim(p->contents)) return true;
+  }
+  return false;
+}
+
+vector<string> split(string s, string delim) {
+  vector<string> result;
+  size_t begin=0, end=s.find(delim);
+  while (true) {
+    if (end == string::npos) {
+      result.push_back(string(s, begin, string::npos));
+      break;
+    }
+    result.push_back(string(s, begin, end-begin));
+    begin = end+SIZE(delim);
+    end = s.find(delim, begin);
+  }
+  return result;
+}
+
+vector<string> split_first(string s, string delim) {
+  vector<string> result;
+  size_t end=s.find(delim);
+  result.push_back(string(s, 0, end));
+  if (end != string::npos)
+    result.push_back(string(s, end+SIZE(delim), string::npos));
+  return result;
+}
+
+//:: == Helpers for debugging using traces
+
+:(before "End Includes")
+// To debug why a test is failing, dump its trace using '?'.
+#define DUMP(label)  if (Trace_stream) cerr << Trace_stream->readable_contents(label);
+
+// To add temporary prints to the trace, use 'dbg'.
+// `git log` should never show any calls to 'dbg'.
+#define dbg trace(0, "a")
+
+//: Dump the entire trace to file where it can be browsed offline.
+//: Dump the trace as it happens; that way you get something even if the
+//: program crashes.
+
+:(before "End Globals")
+ofstream Trace_file;
+:(before "End Commandline Options(*arg)")
+else if (is_equal(*arg, "--trace")) {
+  cerr << "saving trace to 'last_run'\n";
+  Trace_file.open("last_run");
+  // Add a dummy line up top; otherwise the `browse_trace` tool currently has
+  // no way to expand any lines above an error.
+  Trace_file << "   0 dummy: start\n";
+  // End --trace Settings
+}
+:(before "End trace Commit")
+if (Trace_file.is_open()) {
+  if (t.depth <= 4) {
+    dump_trace_line(Trace_file, t);
+    Trace_file.flush();
+  }
+  past_lines.pop_back();  // economize on memory
+}
+:(before "End One-time Setup")
+atexit(cleanup_main);
+:(code)
+void cleanup_main() {
+  if (Trace_file.is_open()) Trace_file.close();
+  // End cleanup_main
+}
+
+:(before "End trace_stream Methods")
+string readable_contents(string label) {
+  string trim(const string& s);  // prototype
+  ostringstream output;
+  label = trim(label);
+  for (vector<trace_line>::iterator p = past_lines.begin();  p != past_lines.end();  ++p)
+    if (label.empty() || label == p->label)
+      dump_trace_line(output, *p);
+  return output.str();
+}
+
+//: Print traces to the screen as they happen.
+//: Particularly useful when juggling multiple trace streams, like when
+//: debugging sandboxes.
+:(before "End Globals")
+bool Dump_trace = false;
+:(before "End Commandline Options(*arg)")
+else if (is_equal(*arg, "--dump")) {
+  Dump_trace = true;
+}
+:(before "End Incremental Trace Print Conditions")
+if (Dump_trace) return true;
+
+//: Miscellaneous helpers.
+
+:(code)
+string trim(const string& s) {
+  string::const_iterator first = s.begin();
+  while (first != s.end() && isspace(*first))
+    ++first;
+  if (first == s.end()) return "";
+
+  string::const_iterator last = --s.end();
+  while (last != s.begin() && isspace(*last))
+    --last;
+  ++last;
+  return string(first, last);
+}
+
+:(before "End Includes")
+#include <vector>
+using std::vector;
+#include <list>
+using std::list;
+#include <set>
+using std::set;
+
+#include <sstream>
+using std::istringstream;
+using std::ostringstream;
+
+#include <fstream>
+using std::ifstream;
+using std::ofstream;
diff --git a/linux/bootstrap/003trace.cc.rotate b/linux/bootstrap/003trace.cc.rotate
new file mode 100644
index 00000000..f68f65bb
--- /dev/null
+++ b/linux/bootstrap/003trace.cc.rotate
@@ -0,0 +1,537 @@
+//: The goal of layers is to make programs more easy to understand and more
+//: malleable, easy to rewrite in radical ways without accidentally breaking
+//: some corner case. Tests further both goals. They help understandability by
+//: letting one make small changes and get feedback. What if I wrote this line
+//: like so? What if I removed this function call, is it really necessary?
+//: Just try it, see if the tests pass. Want to explore rewriting this bit in
+//: this way? Tests put many refactorings on a firmer footing.
+//:
+//: But the usual way we write tests seems incomplete. Refactorings tend to
+//: work in the small, but don't help with changes to function boundaries. If
+//: you want to extract a new function you have to manually test-drive it to
+//: create tests for it. If you want to inline a function its tests are no
+//: longer valid. In both cases you end up having to reorganize code as well as
+//: tests, an error-prone activity.
+//:
+//: In response, this layer introduces the notion of domain-driven *white-box*
+//: testing. We focus on the domain of inputs the whole program needs to
+//: handle rather than the correctness of individual functions. All white-box
+//: tests invoke the program in a single way: by calling run() with some
+//: input. As the program operates on the input, it traces out a list of
+//: _facts_ deduced about the domain:
+//:   trace("label") << "fact 1: " << val;
+//:
+//: Tests can now check for these facts in the trace:
+//:   CHECK_TRACE_CONTENTS("label", "fact 1: 34\n"
+//:                                 "fact 2: 35\n");
+//:
+//: Since we never call anything but the run() function directly, we never have
+//: to rewrite the tests when we reorganize the internals of the program. We
+//: just have to make sure our rewrite deduces the same facts about the domain,
+//: and that's something we're going to have to do anyway.
+//:
+//: To avoid the combinatorial explosion of integration tests, each layer
+//: mainly logs facts to the trace with a common *label*. All tests in a layer
+//: tend to check facts with this label. Validating the facts logged with a
+//: specific label is like calling functions of that layer directly.
+//:
+//: To build robust tests, trace facts about your domain rather than details of
+//: how you computed them.
+//:
+//: More details: http://akkartik.name/blog/tracing-tests
+//:
+//: ---
+//:
+//: Between layers and domain-driven testing, programming starts to look like a
+//: fundamentally different activity. Instead of focusing on a) superficial,
+//: b) local rules on c) code [like say http://blog.bbv.ch/2013/06/05/clean-code-cheat-sheet],
+//: we allow programmers to engage with the a) deep, b) global structure of
+//: the c) domain. If you can systematically track discontinuities in the
+//: domain, you don't care if the code used gotos as long as it passed all
+//: tests. If tests become more robust to run, it becomes easier to try out
+//: radically different implementations for the same program. If code is
+//: super-easy to rewrite, it becomes less important what indentation style it
+//: uses, or that the objects are appropriately encapsulated, or that the
+//: functions are referentially transparent.
+//:
+//: Instead of plumbing, programming becomes building and gradually refining a
+//: map of the environment the program must operate under. Whether a program
+//: is 'correct' at a given point in time is a red herring; what matters is
+//: avoiding regression by monotonically nailing down the more 'eventful'
+//: parts of the terrain. It helps readers new and old, and rewards curiosity,
+//: to organize large programs in self-similar hierarchies of example tests
+//: colocated with the code that makes them work.
+//:
+//:   "Programming properly should be regarded as an activity by which
+//:   programmers form a mental model, rather than as production of a program."
+//:   -- Peter Naur (http://akkartik.name/naur.pdf)
+
+//:: == Core data structures
+
+:(before "End Globals")
+trace_stream* Trace_stream = NULL;
+
+:(before "End Types")
+struct trace_stream {
+  vector<trace_line> past_lines;
+  // End trace_stream Fields
+
+  trace_stream() {
+    // End trace_stream Constructor
+  }
+  ~trace_stream() {
+    // End trace_stream Destructor
+  }
+  // End trace_stream Methods
+};
+
+//:: == Adding to the trace
+
+//: Top-level method is trace() which can be used like an ostream. Usage:
+//:   trace(depth, label) << ... << end();
+//: Don't forget the 'end()' to actually append to the trace.
+:(before "End Includes")
+// No brackets around the expansion so that it prints nothing if Trace_stream
+// isn't initialized.
+#define trace(...)  !Trace_stream ? cerr : Trace_stream->stream(__VA_ARGS__)
+
+:(before "End trace_stream Fields")
+// accumulator for current trace_line
+ostringstream* curr_stream;
+string curr_label;
+int curr_depth;
+// other stuff
+int collect_depth;  // avoid tracing lower levels for speed
+ofstream null_stream;  // never opened, so writes to it silently fail
+
+//: Some constants.
+:(before "struct trace_stream")  // include constants in all cleaved compilation units
+const int Max_depth = 9999;
+:(before "End trace_stream Constructor")
+curr_stream = NULL;
+curr_depth = Max_depth;
+collect_depth = Max_depth;
+
+:(before "struct trace_stream")
+struct trace_line {
+  string contents;
+  string label;
+  int depth;  // 0 is 'sea level'; positive integers are progressively 'deeper' and lower level
+  trace_line(string c, string l) {
+    contents = c;
+    label = l;
+    depth = 0;
+  }
+  trace_line(string c, string l, int d) {
+    contents = c;
+    label = l;
+    depth = d;
+  }
+};
+
+string unescape_newline(string& s) {
+  std::stringstream ss;
+  for (int i = 0;  i < SIZE(s);  ++i) {
+    if (s.at(i) == '\n')
+      ss << "\\n";
+    else
+      ss << s.at(i);
+  }
+  return ss.str();
+}
+
+void dump_trace_line(ostream& s, trace_line& t) {
+  s << std::setw(2) << t.depth << ' ' << t.label << ": " << unescape_newline(t.contents) << '\n';
+}
+
+//: Starting a new trace line.
+:(before "End trace_stream Methods")
+ostream& stream(string label) {
+  return stream(Max_depth, label);
+}
+
+ostream& stream(int depth, string label) {
+  if (depth > collect_depth) return null_stream;
+  curr_stream = new ostringstream;
+  curr_label = label;
+  curr_depth = depth;
+  (*curr_stream) << std::hex;  // printing addresses is the common case
+  return *curr_stream;
+}
+
+//: End of a trace line; append it to the trace.
+:(before "End Types")
+struct end {};
+:(code)
+ostream& operator<<(ostream& os, end /*unused*/) {
+  if (Trace_stream) Trace_stream->newline();
+  return os;
+}
+
+//: Fatal error.
+:(before "End Types")
+struct die {};
+:(code)
+ostream& operator<<(ostream& /*unused*/, die /*unused*/) {
+  if (Trace_stream) Trace_stream->newline();
+  exit(1);
+}
+
+:(before "End trace_stream Methods")
+void newline();
+:(code)
+void trace_stream::newline() {
+  if (!curr_stream) return;
+  string curr_contents = curr_stream->str();
+  if (!curr_contents.empty()) {
+    past_lines.push_back(trace_line(curr_contents, trim(curr_label), curr_depth));  // preserve indent in contents
+    // maybe print this line to stderr
+    trace_line& t = past_lines.back();
+    if (should_incrementally_print_trace()) {
+      dump_trace_line(cerr, t);
+    }
+    // End trace Commit
+  }
+
+  // clean up
+  delete curr_stream;
+  curr_stream = NULL;
+  curr_label.clear();
+  curr_depth = Max_depth;
+}
+
+//:: == Initializing the trace in tests
+
+:(before "End Includes")
+#define START_TRACING_UNTIL_END_OF_SCOPE  lease_tracer leased_tracer;
+:(before "End Test Setup")
+START_TRACING_UNTIL_END_OF_SCOPE
+
+//: Trace_stream is a resource, lease_tracer uses RAII to manage it.
+:(before "End Types")
+struct lease_tracer {
+  lease_tracer();
+  ~lease_tracer();
+};
+:(code)
+lease_tracer::lease_tracer() { Trace_stream = new trace_stream; }
+lease_tracer::~lease_tracer() {
+  delete Trace_stream;
+  Trace_stream = NULL;
+}
+
+//:: == Errors and warnings using traces
+
+:(before "End Includes")
+#define raise  (!Trace_stream ? (++Trace_errors,cerr) /*do print*/ : Trace_stream->stream(Error_depth, "error"))
+#define warn (!Trace_stream ? (++Trace_errors,cerr) /*do print*/ : Trace_stream->stream(Warn_depth, "warn"))
+
+//: Print errors and warnings to the screen by default.
+:(before "struct trace_stream")  // include constants in all cleaved compilation units
+const int Error_depth = 0;
+const int Warn_depth = 1;
+:(before "End Globals")
+int Hide_errors = false;  // if set, don't print errors or warnings to screen
+int Hide_warnings = false;  // if set, don't print warnings to screen
+:(before "End Reset")
+Hide_errors = false;
+Hide_warnings = false;
+//: Never dump warnings in tests
+:(before "End Test Setup")
+Hide_warnings = true;
+:(code)
+bool trace_stream::should_incrementally_print_trace() {
+  if (!Hide_errors && curr_depth == Error_depth) return true;
+  if (!Hide_warnings && !Hide_errors && curr_depth == Warn_depth) return true;
+  // End Incremental Trace Print Conditions
+  return false;
+}
+:(before "End trace_stream Methods")
+bool should_incrementally_print_trace();
+
+:(before "End Globals")
+int Trace_errors = 0;  // used only when Trace_stream is NULL
+
+// Fail tests that displayed (unexpected) errors.
+// Expected errors should always be hidden and silently checked for.
+:(before "End Test Teardown")
+if (Passed && !Hide_errors && trace_contains_errors()) {
+  Passed = false;
+}
+:(code)
+bool trace_contains_errors() {
+  return Trace_errors > 0 || trace_count("error") > 0;
+}
+
+:(before "End Includes")
+// If we aren't yet sure how to deal with some corner case, use assert_for_now
+// to indicate that it isn't an inviolable invariant.
+#define assert_for_now assert
+#define raise_for_now raise
+
+//:: == Other assertions on traces
+//: Primitives:
+//:   - CHECK_TRACE_CONTENTS(lines)
+//:     Assert that the trace contains the given lines (separated by newlines)
+//:     in order. There can be other intervening lines between them.
+//:   - CHECK_TRACE_DOESNT_CONTAIN(line)
+//:   - CHECK_TRACE_DOESNT_CONTAIN(label, contents)
+//:     Assert that the trace doesn't contain the given (single) line.
+//:   - CHECK_TRACE_COUNT(label, count)
+//:     Assert that the trace contains exactly 'count' lines with the given
+//:     'label'.
+//:   - CHECK_TRACE_CONTAINS_ERRORS()
+//:   - CHECK_TRACE_DOESNT_CONTAIN_ERRORS()
+//:   - trace_count_prefix(label, prefix)
+//:     Count the number of trace lines with the given 'label' that start with
+//:     the given 'prefix'.
+
+:(before "End Includes")
+#define CHECK_TRACE_CONTENTS(...)  check_trace_contents(__FUNCTION__, __FILE__, __LINE__, __VA_ARGS__)
+
+#define CHECK_TRACE_DOESNT_CONTAIN(...)  CHECK(trace_doesnt_contain(__VA_ARGS__))
+
+#define CHECK_TRACE_COUNT(label, count) \
+  if (Passed && trace_count(label) != (count)) { \
+    cerr << "\nF - " << __FUNCTION__ << "(" << __FILE__ << ":" << __LINE__ << "): trace_count of " << label << " should be " << count << '\n'; \
+    cerr << "  got " << trace_count(label) << '\n';  /* multiple eval */ \
+    DUMP(label); \
+    Passed = false; \
+    return;  /* Currently we stop at the very first failure. */ \
+  }
+
+#define CHECK_TRACE_CONTAINS_ERRORS()  CHECK(trace_contains_errors())
+#define CHECK_TRACE_DOESNT_CONTAIN_ERRORS() \
+  if (Passed && trace_contains_errors()) { \
+    cerr << "\nF - " << __FUNCTION__ << "(" << __FILE__ << ":" << __LINE__ << "): unexpected errors\n"; \
+    DUMP("error"); \
+    Passed = false; \
+    return; \
+  }
+
+// Allow tests to ignore trace lines generated during setup.
+#define CLEAR_TRACE  delete Trace_stream, Trace_stream = new trace_stream
+
+:(code)
+bool check_trace_contents(string FUNCTION, string FILE, int LINE, string expected) {
+  if (!Passed) return false;
+  if (!Trace_stream) return false;
+  vector<string> expected_lines = split(expected, "\n");
+  int curr_expected_line = 0;
+  while (curr_expected_line < SIZE(expected_lines) && expected_lines.at(curr_expected_line).empty())
+    ++curr_expected_line;
+  if (curr_expected_line == SIZE(expected_lines)) return true;
+  string label, contents;
+  split_label_contents(expected_lines.at(curr_expected_line), &label, &contents);
+  for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin();  p != Trace_stream->past_lines.end();  ++p) {
+    if (label != p->label) continue;
+    string t = trim(p->contents);
+    if (contents != unescape_newline(t)) continue;
+    ++curr_expected_line;
+    while (curr_expected_line < SIZE(expected_lines) && expected_lines.at(curr_expected_line).empty())
+      ++curr_expected_line;
+    if (curr_expected_line == SIZE(expected_lines)) return true;
+    split_label_contents(expected_lines.at(curr_expected_line), &label, &contents);
+  }
+
+  if (line_exists_anywhere(label, contents)) {
+    cerr << "\nF - " << FUNCTION << "(" << FILE << ":" << LINE << "): line [" << label << ": " << contents << "] out of order in trace:\n";
+    DUMP("");
+  }
+  else {
+    cerr << "\nF - " << FUNCTION << "(" << FILE << ":" << LINE << "): missing [" << contents << "] in trace:\n";
+    DUMP(label);
+  }
+  Passed = false;
+  return false;
+}
+
+bool trace_doesnt_contain(string expected) {
+  vector<string> tmp = split_first(expected, ": ");
+  if (SIZE(tmp) == 1) {
+    raise << expected << ": missing label or contents in trace line\n" << end();
+    assert(false);
+  }
+  return trace_count(tmp.at(0), tmp.at(1)) == 0;
+}
+
+int trace_count(string label) {
+  return trace_count(label, "");
+}
+
+int trace_count(string label, string line) {
+  if (!Trace_stream) return 0;
+  long result = 0;
+  for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin();  p != Trace_stream->past_lines.end();  ++p) {
+    if (label == p->label) {
+      if (line == "" || trim(line) == trim(p->contents))
+        ++result;
+    }
+  }
+  return result;
+}
+
+int trace_count_prefix(string label, string prefix) {
+  if (!Trace_stream) return 0;
+  long result = 0;
+  for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin();  p != Trace_stream->past_lines.end();  ++p) {
+    if (label == p->label) {
+      if (starts_with(trim(p->contents), trim(prefix)))
+        ++result;
+    }
+  }
+  return result;
+}
+
+void split_label_contents(const string& s, string* label, string* contents) {
+  static const string delim(": ");
+  size_t pos = s.find(delim);
+  if (pos == string::npos) {
+    *label = "";
+    *contents = trim(s);
+  }
+  else {
+    *label = trim(s.substr(0, pos));
+    *contents = trim(s.substr(pos+SIZE(delim)));
+  }
+}
+
+bool line_exists_anywhere(const string& label, const string& contents) {
+  for (vector<trace_line>::iterator p = Trace_stream->past_lines.begin();  p != Trace_stream->past_lines.end();  ++p) {
+    if (label != p->label) continue;
+    if (contents == trim(p->contents)) return true;
+  }
+  return false;
+}
+
+vector<string> split(string s, string delim) {
+  vector<string> result;
+  size_t begin=0, end=s.find(delim);
+  while (true) {
+    if (end == string::npos) {
+      result.push_back(string(s, begin, string::npos));
+      break;
+    }
+    result.push_back(string(s, begin, end-begin));
+    begin = end+SIZE(delim);
+    end = s.find(delim, begin);
+  }
+  return result;
+}
+
+vector<string> split_first(string s, string delim) {
+  vector<string> result;
+  size_t end=s.find(delim);
+  result.push_back(string(s, 0, end));
+  if (end != string::npos)
+    result.push_back(string(s, end+SIZE(delim), string::npos));
+  return result;
+}
+
+//:: == Helpers for debugging using traces
+
+:(before "End Includes")
+// To debug why a test is failing, dump its trace using '?'.
+#define DUMP(label)  if (Trace_stream) cerr << Trace_stream->readable_contents(label);
+
+// To add temporary prints to the trace, use 'dbg'.
+// `git log` should never show any calls to 'dbg'.
+#define dbg trace(0, "a")
+
+//: Dump the entire trace to file where it can be browsed offline.
+//: Dump the trace as it happens; that way you get something even if the
+//: program crashes.
+
+:(before "End Globals")
+ofstream Trace_file;
+unsigned long long int Count = 0;
+int File_count = 0;
+:(before "End Commandline Options(*arg)")
+else if (is_equal(*arg, "--trace")) {
+  ostringstream filename;
+  filename << "last_run." << File_count;
+  cerr << "saving trace to " << filename.str() << '\n';
+  Trace_file.open(filename.str().c_str());
+  // Add a dummy line up top; otherwise the `browse_trace` tool currently has
+  // no way to expand any lines above an error.
+  Trace_file << "   0 dummy: start\n";
+  // End --trace Settings
+}
+:(before "End trace Commit")
+if (Trace_file.is_open()) {
+  dump_trace_line(Trace_file, t);
+  Trace_file.flush();
+  past_lines.pop_back();  // economize on memory
+  ++Count;
+  if (Count >= 10000000) {
+    Trace_file.close();
+    ++File_count;
+    ostringstream filename;
+    filename << "last_run." << File_count;
+    Trace_file.open(filename.str().c_str());
+    cerr << "switching to " << filename.str() << '\n';
+    Count = 0;
+  }
+}
+:(before "End One-time Setup")
+atexit(cleanup_main);
+:(code)
+void cleanup_main() {
+  if (Trace_file.is_open()) Trace_file.close();
+  // End cleanup_main
+}
+
+:(before "End trace_stream Methods")
+string readable_contents(string label) {
+  string trim(const string& s);  // prototype
+  ostringstream output;
+  label = trim(label);
+  for (vector<trace_line>::iterator p = past_lines.begin();  p != past_lines.end();  ++p)
+    if (label.empty() || label == p->label)
+      dump_trace_line(output, *p);
+  return output.str();
+}
+
+//: Print traces to the screen as they happen.
+//: Particularly useful when juggling multiple trace streams, like when
+//: debugging sandboxes.
+:(before "End Globals")
+bool Dump_trace = false;
+:(before "End Commandline Options(*arg)")
+else if (is_equal(*arg, "--dump")) {
+  Dump_trace = true;
+}
+:(before "End Incremental Trace Print Conditions")
+if (Dump_trace) return true;
+
+//: Miscellaneous helpers.
+
+:(code)
+string trim(const string& s) {
+  string::const_iterator first = s.begin();
+  while (first != s.end() && isspace(*first))
+    ++first;
+  if (first == s.end()) return "";
+
+  string::const_iterator last = --s.end();
+  while (last != s.begin() && isspace(*last))
+    --last;
+  ++last;
+  return string(first, last);
+}
+
+:(before "End Includes")
+#include <vector>
+using std::vector;
+#include <list>
+using std::list;
+#include <set>
+using std::set;
+
+#include <sstream>
+using std::istringstream;
+using std::ostringstream;
+
+#include <fstream>
+using std::ifstream;
+using std::ofstream;
diff --git a/linux/bootstrap/003trace.test.cc b/linux/bootstrap/003trace.test.cc
new file mode 100644
index 00000000..bec1b789
--- /dev/null
+++ b/linux/bootstrap/003trace.test.cc
@@ -0,0 +1,133 @@
+void test_trace_check_compares() {
+  trace("test layer") << "foo" << end();
+  CHECK_TRACE_CONTENTS("test layer: foo");
+}
+
+void test_trace_check_ignores_other_layers() {
+  trace("test layer 1") << "foo" << end();
+  trace("test layer 2") << "bar" << end();
+  CHECK_TRACE_CONTENTS("test layer 1: foo");
+  CHECK_TRACE_DOESNT_CONTAIN("test layer 2: foo");
+}
+
+void test_trace_check_ignores_leading_whitespace() {
+  trace("test layer 1") << " foo" << end();
+  CHECK_EQ(trace_count("test layer 1", /*too little whitespace*/"foo"), 1);
+  CHECK_EQ(trace_count("test layer 1", /*too much whitespace*/"  foo"), 1);
+}
+
+void test_trace_check_ignores_other_lines() {
+  trace("test layer 1") << "foo" << end();
+  trace("test layer 1") << "bar" << end();
+  CHECK_TRACE_CONTENTS("test layer 1: foo");
+}
+
+void test_trace_check_ignores_other_lines2() {
+  trace("test layer 1") << "foo" << end();
+  trace("test layer 1") << "bar" << end();
+  CHECK_TRACE_CONTENTS("test layer 1: bar");
+}
+
+void test_trace_ignores_trailing_whitespace() {
+  trace("test layer 1") << "foo\n" << end();
+  CHECK_TRACE_CONTENTS("test layer 1: foo");
+}
+
+void test_trace_ignores_trailing_whitespace2() {
+  trace("test layer 1") << "foo " << end();
+  CHECK_TRACE_CONTENTS("test layer 1: foo");
+}
+
+void test_trace_orders_across_layers() {
+  trace("test layer 1") << "foo" << end();
+  trace("test layer 2") << "bar" << end();
+  trace("test layer 1") << "qux" << end();
+  CHECK_TRACE_CONTENTS("test layer 1: foo\n"
+                       "test layer 2: bar\n"
+                       "test layer 1: qux\n");
+}
+
+void test_trace_supports_count() {
+  trace("test layer 1") << "foo" << end();
+  trace("test layer 1") << "foo" << end();
+  CHECK_EQ(trace_count("test layer 1", "foo"), 2);
+}
+
+void test_trace_supports_count2() {
+  trace("test layer 1") << "foo" << end();
+  trace("test layer 1") << "bar" << end();
+  CHECK_EQ(trace_count("test layer 1"), 2);
+}
+
+void test_trace_count_ignores_trailing_whitespace() {
+  trace("test layer 1") << "foo\n" << end();
+  CHECK_EQ(trace_count("test layer 1", "foo"), 1);
+}
+
+void test_trace_unescapes_newlines() {
+  trace("test layer 1") << "f\no\no\n" << end();
+  CHECK_TRACE_CONTENTS("test layer 1: f\\no\\no");
+}
+
+// pending: DUMP tests
+// pending: readable_contents() adds newline if necessary.
+// pending: raise also prints to stderr.
+// pending: raise doesn't print to stderr if Hide_errors is set.
+// pending: warn doesn't print to stderr if Hide_errors is set.
+// pending: warn doesn't print to stderr if Hide_warnings is set.
+// pending: raise doesn't have to be saved if Hide_errors is set, just printed.
+// pending: raise prints to stderr if Trace_stream is NULL.
+// pending: raise prints to stderr if Trace_stream is NULL even if Hide_errors is set.
+
+// can't check trace because trace methods call 'split'
+
+void test_split_returns_at_least_one_elem() {
+  vector<string> result = split("", ",");
+  CHECK_EQ(result.size(), 1);
+  CHECK_EQ(result.at(0), "");
+}
+
+void test_split_returns_entire_input_when_no_delim() {
+  vector<string> result = split("abc", ",");
+  CHECK_EQ(result.size(), 1);
+  CHECK_EQ(result.at(0), "abc");
+}
+
+void test_split_works() {
+  vector<string> result = split("abc,def", ",");
+  CHECK_EQ(result.size(), 2);
+  CHECK_EQ(result.at(0), "abc");
+  CHECK_EQ(result.at(1), "def");
+}
+
+void test_split_works2() {
+  vector<string> result = split("abc,def,ghi", ",");
+  CHECK_EQ(result.size(), 3);
+  CHECK_EQ(result.at(0), "abc");
+  CHECK_EQ(result.at(1), "def");
+  CHECK_EQ(result.at(2), "ghi");
+}
+
+void test_split_handles_multichar_delim() {
+  vector<string> result = split("abc,,def,,ghi", ",,");
+  CHECK_EQ(result.size(), 3);
+  CHECK_EQ(result.at(0), "abc");
+  CHECK_EQ(result.at(1), "def");
+  CHECK_EQ(result.at(2), "ghi");
+}
+
+void test_trim() {
+  CHECK_EQ(trim(""), "");
+  CHECK_EQ(trim(" "), "");
+  CHECK_EQ(trim("  "), "");
+  CHECK_EQ(trim("a"), "a");
+  CHECK_EQ(trim(" a"), "a");
+  CHECK_EQ(trim("  a"), "a");
+  CHECK_EQ(trim("  ab"), "ab");
+  CHECK_EQ(trim("a "), "a");
+  CHECK_EQ(trim("a  "), "a");
+  CHECK_EQ(trim("ab  "), "ab");
+  CHECK_EQ(trim(" a "), "a");
+  CHECK_EQ(trim("  a  "), "a");
+  CHECK_EQ(trim("  ab  "), "ab");
+}
diff --git a/linux/bootstrap/010vm.cc b/linux/bootstrap/010vm.cc
new file mode 100644
index 00000000..bce4467c
--- /dev/null
+++ b/linux/bootstrap/010vm.cc
@@ -0,0 +1,411 @@
+//: Core data structures for simulating the SubX VM (subset of an x86 processor),
+//: either in tests or debug aids.
+
+//:: registers
+//: assume segment registers are hard-coded to 0
+//: no MMX, etc.
+
+:(before "End Types")
+enum {
+  EAX,
+  ECX,
+  EDX,
+  EBX,
+  ESP,
+  EBP,
+  ESI,
+  EDI,
+  NUM_INT_REGISTERS,
+};
+union reg {
+  int32_t i;
+  uint32_t u;
+};
+:(before "End Globals")
+reg Reg[NUM_INT_REGISTERS] = { {0} };
+uint32_t EIP = 1;  // preserve null pointer
+:(before "End Reset")
+bzero(Reg, sizeof(Reg));
+EIP = 1;  // preserve null pointer
+
+:(before "End Types")
+const int NUM_XMM_REGISTERS = 8;
+float Xmm[NUM_XMM_REGISTERS] = { 0.0 };
+const string Xname[NUM_XMM_REGISTERS] = { "XMM0", "XMM1", "XMM2", "XMM3", "XMM4", "XMM5", "XMM6", "XMM7" };
+:(before "End Reset")
+bzero(Xmm, sizeof(Xmm));
+
+:(before "End Help Contents")
+cerr << "  registers\n";
+:(before "End Help Texts")
+put_new(Help, "registers",
+  "SubX supports 16 registers: eight 32-bit integer registers and eight single-precision\n"
+  "floating-point registers. From 0 to 7, they are:\n"
+  "  integer: EAX ECX EDX EBX ESP EBP ESI EDI\n"
+  "  floating point: XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7\n"
+  "ESP contains the top of the stack.\n"
+  "\n"
+  "-- 8-bit registers\n"
+  "Some instructions operate on eight *overlapping* 8-bit registers.\n"
+  "From 0 to 7, they are:\n"
+  "  AL CL DL BL AH CH DH BH\n"
+  "The 8-bit registers overlap with the 32-bit ones. AL is the lowest signicant byte\n"
+  "of EAX, AH is the second lowest significant byte, and so on.\n"
+  "\n"
+  "For example, if EBX contains 0x11223344, then BL contains 0x44, and BH contains 0x33.\n"
+  "\n"
+  "There is no way to access bytes within ESP, EBP, ESI or EDI.\n"
+  "\n"
+  "For complete details consult the IA-32 software developer's manual, volume 2,\n"
+  "table 2-2, \"32-bit addressing forms with the ModR/M byte\".\n"
+  "It is included in this repository as 'modrm.pdf'.\n"
+  "The register encodings are described in the top row of the table, but you'll need\n"
+  "to spend some time with it.\n"
+  "\n"
+  "-- flag registers\n"
+  "Various instructions (particularly 'compare') modify one or more of four 1-bit\n"
+  "'flag' registers, as a side-effect:\n"
+  "- the sign flag (SF): usually set if an arithmetic result is negative, or\n"
+  "  reset if not.\n"
+  "- the zero flag (ZF): usually set if a result is zero, or reset if not.\n"
+  "- the carry flag (CF): usually set if an arithmetic result overflows by just one bit.\n"
+  "  Useful for operating on unsigned numbers.\n"
+  "- the overflow flag (OF): usually set if an arithmetic result overflows by more\n"
+  "  than one bit. Useful for operating on signed numbers.\n"
+  "The flag bits are read by conditional jumps.\n"
+  "\n"
+  "For complete details on how different instructions update the flags, consult the IA-32\n"
+  "manual (volume 2). There's various versions of it online, such as https://c9x.me/x86,\n"
+  "though of course you'll need to be careful to ignore instructions and flag registers\n"
+  "that SubX doesn't support.\n"
+  "\n"
+  "It isn't simple, but if this is the processor you have running on your computer,\n"
+  "might as well get good at it.\n"
+);
+
+:(before "End Globals")
+// the subset of x86 flag registers we care about
+bool SF = false;  // sign flag
+bool ZF = false;  // zero flag
+bool CF = false;  // carry flag
+bool OF = false;  // overflow flag
+:(before "End Reset")
+SF = ZF = CF = OF = false;
+
+//:: simulated RAM
+
+:(before "End Types")
+const uint32_t SEGMENT_ALIGNMENT = 0x1000000;  // 16MB
+inline uint32_t align_upwards(uint32_t x, uint32_t align) {
+  return (x+align-1) & -(align);
+}
+
+// Like in real-world Linux, we'll allocate RAM for our programs in disjoint
+// slabs called VMAs or Virtual Memory Areas.
+struct vma {
+  uint32_t start;  // inclusive
+  uint32_t end;  // exclusive
+  vector<uint8_t> _data;
+  vma(uint32_t s, uint32_t e) :start(s), end(e) {}
+  vma(uint32_t s) :start(s), end(align_upwards(s+1, SEGMENT_ALIGNMENT)) {}
+  bool match(uint32_t a) {
+    return a >= start && a < end;
+  }
+  bool match32(uint32_t a) {
+    return a >= start && a+4 <= end;
+  }
+  uint8_t& data(uint32_t a) {
+    assert(match(a));
+    uint32_t result_index = a-start;
+    if (_data.size() <= result_index+/*largest word size that can be accessed in one instruction*/sizeof(int)) {
+      const int align = 0x1000;
+      uint32_t result_size = result_index + 1;  // size needed for result_index to be valid
+      uint32_t new_size = align_upwards(result_size, align);
+      // grow at least 2x to maintain some amortized complexity guarantees
+      if (new_size < _data.size() * 2)
+        new_size = _data.size() * 2;
+      // never grow past the stated limit
+      if (new_size > end-start)
+        new_size = end-start;
+      _data.resize(new_size);
+    }
+    return _data.at(result_index);
+  }
+  void grow_until(uint32_t new_end_address) {
+    if (new_end_address < end) return;
+    // Ugly: vma knows about the global Memory list of vmas
+    void sanity_check(uint32_t start, uint32_t end);
+    sanity_check(start, new_end_address);
+    end = new_end_address;
+  }
+  // End vma Methods
+};
+:(code)
+void sanity_check(uint32_t start, uint32_t end) {
+  bool dup_found = false;
+  for (int i = 0;  i < SIZE(Mem);  ++i) {
+    const vma& curr = Mem.at(i);
+    if (curr.start == start) {
+      assert(!dup_found);
+      dup_found = true;
+    }
+    else if (curr.start > start) {
+      assert(curr.start > end);
+    }
+    else if (curr.start < start) {
+      assert(curr.end < start);
+    }
+  }
+}
+
+:(before "End Globals")
+// RAM is made of VMAs.
+vector<vma> Mem;
+:(code)
+:(before "End Globals")
+uint32_t End_of_program = 0;  // when the program executes past this address in tests we'll stop the test
+// The stack grows downward. Can't increase its size for now.
+:(before "End Reset")
+Mem.clear();
+End_of_program = 0;
+:(code)
+// These helpers depend on Mem being laid out contiguously (so you can't use a
+// map, etc.) and on the host also being little-endian.
+inline uint8_t read_mem_u8(uint32_t addr) {
+  uint8_t* handle = mem_addr_u8(addr);  // error messages get printed here
+  return handle ? *handle : 0;
+}
+inline int8_t read_mem_i8(uint32_t addr) {
+  return static_cast<int8_t>(read_mem_u8(addr));
+}
+inline uint32_t read_mem_u32(uint32_t addr) {
+  uint32_t* handle = mem_addr_u32(addr);  // error messages get printed here
+  return handle ? *handle : 0;
+}
+inline int32_t read_mem_i32(uint32_t addr) {
+  return static_cast<int32_t>(read_mem_u32(addr));
+}
+inline float read_mem_f32(uint32_t addr) {
+  return static_cast<float>(read_mem_u32(addr));
+}
+
+inline uint8_t* mem_addr_u8(uint32_t addr) {
+  uint8_t* result = NULL;
+  for (int i = 0;  i < SIZE(Mem);  ++i) {
+    if (Mem.at(i).match(addr)) {
+      if (result)
+        raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
+      result = &Mem.at(i).data(addr);
+    }
+  }
+  if (result == NULL) {
+    if (Trace_file.is_open()) Trace_file.flush();
+    raise << "Tried to access uninitialized memory at address 0x" << HEXWORD << addr << '\n' << end();
+    exit(1);
+  }
+  return result;
+}
+inline int8_t* mem_addr_i8(uint32_t addr) {
+  return reinterpret_cast<int8_t*>(mem_addr_u8(addr));
+}
+inline uint32_t* mem_addr_u32(uint32_t addr) {
+  uint32_t* result = NULL;
+  for (int i = 0;  i < SIZE(Mem);  ++i) {
+    if (Mem.at(i).match32(addr)) {
+      if (result)
+        raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
+      result = reinterpret_cast<uint32_t*>(&Mem.at(i).data(addr));
+    }
+  }
+  if (result == NULL) {
+    if (Trace_file.is_open()) Trace_file.flush();
+    raise << "Tried to access uninitialized memory at address 0x" << HEXWORD << addr << '\n' << end();
+    exit(1);
+  }
+  return result;
+}
+inline int32_t* mem_addr_i32(uint32_t addr) {
+  return reinterpret_cast<int32_t*>(mem_addr_u32(addr));
+}
+inline float* mem_addr_f32(uint32_t addr) {
+  return reinterpret_cast<float*>(mem_addr_u32(addr));
+}
+// helper for some syscalls. But read-only.
+inline const char* mem_addr_kernel_string(uint32_t addr) {
+  return reinterpret_cast<const char*>(mem_addr_u8(addr));
+}
+inline string mem_addr_string(uint32_t addr, uint32_t size) {
+  ostringstream out;
+  for (size_t i = 0;  i < size;  ++i)
+    out << read_mem_u8(addr+i);
+  return out.str();
+}
+
+inline void write_mem_u8(uint32_t addr, uint8_t val) {
+  uint8_t* handle = mem_addr_u8(addr);
+  if (handle != NULL) *handle = val;
+}
+inline void write_mem_i8(uint32_t addr, int8_t val) {
+  int8_t* handle = mem_addr_i8(addr);
+  if (handle != NULL) *handle = val;
+}
+inline void write_mem_u32(uint32_t addr, uint32_t val) {
+  uint32_t* handle = mem_addr_u32(addr);
+  if (handle != NULL) *handle = val;
+}
+inline void write_mem_i32(uint32_t addr, int32_t val) {
+  int32_t* handle = mem_addr_i32(addr);
+  if (handle != NULL) *handle = val;
+}
+
+inline bool already_allocated(uint32_t addr) {
+  bool result = false;
+  for (int i = 0;  i < SIZE(Mem);  ++i) {
+    if (Mem.at(i).match(addr)) {
+      if (result)
+        raise << "address 0x" << HEXWORD << addr << " is in two segments\n" << end();
+      result = true;
+    }
+  }
+  return result;
+}
+
+//:: core interpreter loop
+
+:(code)
+// skeleton of how x86 instructions are decoded
+void run_one_instruction() {
+  uint8_t op=0, op2=0, op3=0;
+  // Run One Instruction
+  if (Trace_file.is_open()) {
+    dump_registers();
+    // End Dump Info for Instruction
+  }
+  uint32_t inst_start_address = EIP;
+  op = next();
+  trace(Callstack_depth+1, "run") << "0x" << HEXWORD << inst_start_address << " opcode: " << HEXBYTE << NUM(op) << end();
+  switch (op) {
+  case 0xf4:  // hlt
+    EIP = End_of_program;
+    break;
+  // End Single-Byte Opcodes
+  case 0x0f:
+    switch(op2 = next()) {
+    // End Two-Byte Opcodes Starting With 0f
+    default:
+      cerr << "unrecognized second opcode after 0f: " << HEXBYTE << NUM(op2) << '\n';
+      exit(1);
+    }
+    break;
+  case 0xf2:
+    switch(op2 = next()) {
+    // End Two-Byte Opcodes Starting With f2
+    case 0x0f:
+      switch(op3 = next()) {
+      // End Three-Byte Opcodes Starting With f2 0f
+      default:
+        cerr << "unrecognized third opcode after f2 0f: " << HEXBYTE << NUM(op3) << '\n';
+        exit(1);
+      }
+      break;
+    default:
+      cerr << "unrecognized second opcode after f2: " << HEXBYTE << NUM(op2) << '\n';
+      exit(1);
+    }
+    break;
+  case 0xf3:
+    switch(op2 = next()) {
+    // End Two-Byte Opcodes Starting With f3
+    case 0x0f:
+      switch(op3 = next()) {
+      // End Three-Byte Opcodes Starting With f3 0f
+      default:
+        cerr << "unrecognized third opcode after f3 0f: " << HEXBYTE << NUM(op3) << '\n';
+        exit(1);
+      }
+      break;
+    default:
+      cerr << "unrecognized second opcode after f3: " << HEXBYTE << NUM(op2) << '\n';
+      exit(1);
+    }
+    break;
+  default:
+    cerr << "unrecognized opcode: " << HEXBYTE << NUM(op) << '\n';
+    exit(1);
+  }
+}
+
+inline uint8_t next() {
+  return read_mem_u8(EIP++);
+}
+
+void dump_registers() {
+  ostringstream out;
+  out << "regs: ";
+  for (int i = 0;  i < NUM_INT_REGISTERS;  ++i) {
+    if (i > 0) out << "  ";
+    out << i << ": " << std::hex << std::setw(8) << std::setfill('_') << Reg[i].u;
+  }
+  out << " -- SF: " << SF << "; ZF: " << ZF << "; CF: " << CF << "; OF: " << OF;
+  trace(Callstack_depth+1, "run") << out.str() << end();
+}
+
+//: start tracking supported opcodes
+:(before "End Globals")
+map</*op*/string, string> Name;
+map</*op*/string, string> Name_0f;
+map</*op*/string, string> Name_f3;
+map</*op*/string, string> Name_f3_0f;
+:(before "End One-time Setup")
+init_op_names();
+:(code)
+void init_op_names() {
+  put(Name, "f4", "halt (hlt)");
+  // End Initialize Op Names
+}
+
+:(before "End Help Special-cases(key)")
+if (key == "opcodes") {
+  cerr << "Opcodes currently supported by SubX:\n";
+  for (map<string, string>::iterator p = Name.begin();  p != Name.end();  ++p)
+    cerr << "  " << p->first << ": " << p->second << '\n';
+  for (map<string, string>::iterator p = Name_0f.begin();  p != Name_0f.end();  ++p)
+    cerr << "  0f " << p->first << ": " << p->second << '\n';
+  for (map<string, string>::iterator p = Name_f3.begin();  p != Name_f3.end();  ++p)
+    cerr << "  f3 " << p->first << ": " << p->second << '\n';
+  for (map<string, string>::iterator p = Name_f3_0f.begin();  p != Name_f3_0f.end();  ++p)
+    cerr << "  f3 0f " << p->first << ": " << p->second << '\n';
+  cerr << "Run `bootstrap help instructions` for details on words like 'r32' and 'disp8'.\n"
+          "For complete details on these instructions, consult the IA-32 manual (volume 2).\n"
+          "There's various versions of it online, such as https://c9x.me/x86.\n"
+          "The mnemonics in brackets will help you locate each instruction.\n";
+  return 0;
+}
+:(before "End Help Contents")
+cerr << "  opcodes\n";
+
+//: Helpers for managing trace depths
+//:
+//: We're going to use trace depths primarily to segment code running at
+//: different frames of the call stack. This will make it easy for the trace
+//: browser to collapse over entire calls.
+//:
+//: Errors will be at depth 0.
+//: Warnings will be at depth 1.
+//: SubX instructions will occupy depth 2 and up to Max_depth, organized by
+//: stack frames. Each instruction's internal details will be one level deeper
+//: than its 'main' depth. So 'call' instruction details will be at the same
+//: depth as the instructions of the function it calls.
+:(before "End Globals")
+extern const int Initial_callstack_depth = 2;
+int Callstack_depth = Initial_callstack_depth;
+:(before "End Reset")
+Callstack_depth = Initial_callstack_depth;
+
+:(before "End Includes")
+#include <iomanip>
+#define HEXBYTE  std::hex << std::setw(2) << std::setfill('0')
+#define HEXWORD  std::hex << std::setw(8) << std::setfill('0')
+// ugly that iostream doesn't print uint8_t as an integer
+#define NUM(X) static_cast<int>(X)
+#include <stdint.h>
diff --git a/linux/bootstrap/011run.cc b/linux/bootstrap/011run.cc
new file mode 100644
index 00000000..424e40e9
--- /dev/null
+++ b/linux/bootstrap/011run.cc
@@ -0,0 +1,453 @@
+//: Running SubX programs on the VM.
+
+//: (Not to be confused with the 'run' subcommand for running ELF binaries on
+//: the VM. That comes later.)
+
+:(before "End Help Texts")
+put_new(Help, "syntax",
+  "SubX programs consist of segments, each segment in turn consisting of lines.\n"
+  "Line-endings are significant; each line should contain a single\n"
+  "instruction, macro or directive.\n"
+  "\n"
+  "Comments start with the '#' character. It should be at the start of a word\n"
+  "(start of line, or following a space).\n"
+  "\n"
+  "Each segment starts with a header line: a '==' delimiter followed by the name of\n"
+  "the segment and a (sometimes approximate) starting address in memory.\n"
+  "The name 'code' is special; instructions to execute should always go here.\n"
+  "\n"
+  "The resulting binary starts running code from a label called 'Entry'\n"
+  "in the code segment.\n"
+  "\n"
+  "Segments with the same name get merged together. This rule helps keep functions\n"
+  "and their data close together in .subx files.\n"
+  "You don't have to specify the starting address after the first time.\n"
+  "\n"
+  "Lines consist of a series of words. Words can contain arbitrary metadata\n"
+  "after a '/', but they can never contain whitespace. Metadata has no effect\n"
+  "at runtime, but can be handy when rewriting macros.\n"
+  "\n"
+  "Check out the example programs in the apps/ directory, particularly apps/ex*.\n"
+);
+:(before "End Help Contents")
+cerr << "  syntax\n";
+
+:(code)
+void test_copy_imm32_to_EAX() {
+  // At the lowest level, SubX programs are a series of hex bytes, each
+  // (variable-length) instruction on one line.
+  run(
+      // Comments start with '#' and are ignored.
+      "# comment\n"
+      // Segment headers start with '==', a name and a starting hex address.
+      // There's usually one code and one data segment. The code segment
+      // always comes first.
+      "== code 0x1\n"  // code segment
+
+      // After the header, each segment consists of lines, and each line
+      // consists of words separated by whitespace.
+      //
+      // All words can have metadata after a '/'. No spaces allowed in
+      // metadata, of course.
+      // Unrecognized metadata never causes errors, so you can use it for
+      // documentation.
+      //
+      // Within the code segment in particular, x86 instructions consist of
+      // some number of the following parts and sub-parts (see the Readme and
+      // cheatsheet.pdf for details):
+      //   opcodes: 1-3 bytes
+      //   ModR/M byte
+      //   SIB byte
+      //   displacement: 0/1/2/4 bytes
+      //   immediate: 0/1/2/4 bytes
+      // opcode        ModR/M                    SIB                   displacement    immediate
+      // instruction   mod, reg, Reg/Mem bits    scale, index, base
+      // 1-3 bytes     0/1 byte                  0/1 byte              0/1/2/4 bytes   0/1/2/4 bytes
+      "  b8            .                         .                     .               0a 0b 0c 0d\n"  // copy 0x0d0c0b0a to EAX
+      // The periods are just to help the eye track long gaps between columns,
+      // and are otherwise ignored.
+  );
+  // This program, when run, causes the following events in the trace:
+  CHECK_TRACE_CONTENTS(
+      "load: 0x00000001 -> b8\n"
+      "load: 0x00000002 -> 0a\n"
+      "load: 0x00000003 -> 0b\n"
+      "load: 0x00000004 -> 0c\n"
+      "load: 0x00000005 -> 0d\n"
+      "run: copy imm32 0x0d0c0b0a to EAX\n"
+  );
+}
+
+// top-level helper for tests: parse the input, load the hex bytes into memory, run
+void run(const string& text_bytes) {
+  program p;
+  istringstream in(text_bytes);
+  // Loading Test Program
+  parse(in, p);
+  if (trace_contains_errors()) return;  // if any stage raises errors, stop immediately
+  // Running Test Program
+  load(p);
+  if (trace_contains_errors()) return;
+  // convenience to keep tests concise: 'Entry' label need not be provided
+  // not allowed in real programs
+  if (p.entry)
+    EIP = p.entry;
+  else
+    EIP = find(p, "code")->start;
+  while (EIP < End_of_program)
+    run_one_instruction();
+}
+
+//:: core data structures
+
+:(before "End Types")
+struct program {
+  uint32_t entry;
+  vector<segment> segments;
+  program() { entry = 0; }
+};
+:(before "struct program")
+struct segment {
+  string name;
+  uint32_t start;
+  vector<line> lines;
+  // End segment Fields
+  segment() {
+    start = 0;
+    // End segment Constructor
+  }
+};
+:(before "struct segment")
+struct line {
+  vector<word> words;
+  vector<string> metadata;
+  string original;
+};
+:(before "struct line")
+struct word {
+  string original;
+  string data;
+  vector<string> metadata;
+};
+
+//:: parse
+
+:(code)
+void parse(istream& fin, program& out) {
+  segment* curr_segment = NULL;
+  vector<line> l;
+  while (has_data(fin)) {
+    string line_data;
+    line curr;
+    getline(fin, line_data);
+    curr.original = line_data;
+    trace(99, "parse") << "line: " << line_data << end();
+    // End Line Parsing Special-cases(line_data -> l)
+    istringstream lin(line_data);
+    while (has_data(lin)) {
+      string word_data;
+      lin >> word_data;
+      if (word_data.empty()) continue;
+      if (word_data[0] == '#') break;  // comment
+      if (word_data == ".") continue;  // comment token
+      if (word_data == "==") {
+        flush(curr_segment, l);
+        string segment_name;
+        lin >> segment_name;
+        curr_segment = find(out, segment_name);
+        if (curr_segment != NULL) {
+          trace(3, "parse") << "appending to segment '" << segment_name << "'" << end();
+        }
+        else {
+          trace(3, "parse") << "new segment '" << segment_name << "'" << end();
+          uint32_t seg_start = 0;
+          lin >> std::hex >> seg_start;
+          sanity_check_program_segment(out, seg_start);
+          out.segments.push_back(segment());
+          curr_segment = &out.segments.back();
+          curr_segment->name = segment_name;
+          curr_segment->start = seg_start;
+          if (trace_contains_errors()) continue;
+          trace(3, "parse") << "starts at address 0x" << HEXWORD << curr_segment->start << end();
+        }
+        break;  // skip rest of line
+      }
+      if (word_data[0] == ':') {
+        // todo: line metadata
+        break;
+      }
+      curr.words.push_back(word());
+      parse_word(word_data, curr.words.back());
+      trace(99, "parse") << "word: " << to_string(curr.words.back());
+    }
+    if (!curr.words.empty())
+      l.push_back(curr);
+  }
+  flush(curr_segment, l);
+  trace(99, "parse") << "done" << end();
+}
+
+segment* find(program& p, const string& segment_name) {
+  for (int i = 0;  i < SIZE(p.segments);  ++i) {
+    if (p.segments.at(i).name == segment_name)
+      return &p.segments.at(i);
+  }
+  return NULL;
+}
+
+void flush(segment* s, vector<line>& lines) {
+  if (lines.empty()) return;
+  if (s == NULL) {
+    raise << "input does not start with a '==' section header\n" << end();
+    return;
+  }
+  trace(3, "parse") << "flushing segment" << end();
+  s->lines.insert(s->lines.end(), lines.begin(), lines.end());
+  lines.clear();
+}
+
+void parse_word(const string& data, word& out) {
+  out.original = data;
+  istringstream win(data);
+  if (getline(win, out.data, '/')) {
+    string m;
+    while (getline(win, m, '/'))
+      out.metadata.push_back(m);
+  }
+}
+
+void sanity_check_program_segment(const program& p, uint32_t addr) {
+  for (int i = 0;  i < SIZE(p.segments);  ++i) {
+    if (p.segments.at(i).start == addr)
+      raise << "can't have multiple segments starting at address 0x" << HEXWORD << addr << '\n' << end();
+  }
+}
+
+// helper for tests
+void parse(const string& text_bytes) {
+  program p;
+  istringstream in(text_bytes);
+  parse(in, p);
+}
+
+void test_detect_duplicate_segments() {
+  Hide_errors = true;
+  parse(
+      "== segment1 0xee\n"
+      "ab\n"
+      "== segment2 0xee\n"
+      "cd\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: can't have multiple segments starting at address 0x000000ee\n"
+  );
+}
+
+//:: load
+
+void load(const program& p) {
+  if (find(p, "code") == NULL) {
+    raise << "no code to run\n" << end();
+    return;
+  }
+  // Ensure segments are disjoint.
+  set<uint32_t> overlap;
+  for (int i = 0;   i < SIZE(p.segments);  ++i) {
+    const segment& seg = p.segments.at(i);
+    uint32_t addr = seg.start;
+    if (!already_allocated(addr))
+      Mem.push_back(vma(seg.start));
+    trace(99, "load") << "loading segment " << i << " from " << HEXWORD << addr << end();
+    for (int j = 0;  j < SIZE(seg.lines);  ++j) {
+      const line& l = seg.lines.at(j);
+      for (int k = 0;  k < SIZE(l.words);  ++k) {
+        const word& w = l.words.at(k);
+        uint8_t val = hex_byte(w.data);
+        if (trace_contains_errors()) return;
+        assert(overlap.find(addr) == overlap.end());
+        write_mem_u8(addr, val);
+        overlap.insert(addr);
+        trace(99, "load") << "0x" << HEXWORD << addr << " -> " << HEXBYTE << NUM(read_mem_u8(addr)) << end();
+        ++addr;
+      }
+    }
+    if (seg.name == "code") {
+      End_of_program = addr;
+    }
+  }
+}
+
+const segment* find(const program& p, const string& segment_name) {
+  for (int i = 0;  i < SIZE(p.segments);  ++i) {
+    if (p.segments.at(i).name == segment_name)
+      return &p.segments.at(i);
+  }
+  return NULL;
+}
+
+uint8_t hex_byte(const string& s) {
+  if (contains_uppercase(s)) {
+    raise << "uppercase hex not allowed: " << s << '\n' << end();
+    return 0;
+  }
+  istringstream in(s);
+  int result = 0;
+  in >> std::hex >> result;
+  if (!in || !in.eof()) {
+    raise << "token '" << s << "' is not a hex byte\n" << end();
+    return '\0';
+  }
+  if (result > 0xff || result < -0x8f) {
+    raise << "token '" << s << "' is not a hex byte\n" << end();
+    return '\0';
+  }
+  return static_cast<uint8_t>(result);
+}
+
+void test_number_too_large() {
+  Hide_errors = true;
+  parse_and_load(
+      "== code 0x1\n"
+      "01 cab\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: token 'cab' is not a hex byte\n"
+  );
+}
+
+void test_invalid_hex() {
+  Hide_errors = true;
+  parse_and_load(
+      "== code 0x1\n"
+      "01 cx\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: token 'cx' is not a hex byte\n"
+  );
+}
+
+void test_negative_number() {
+  parse_and_load(
+      "== code 0x1\n"
+      "01 -02\n"
+  );
+  CHECK_TRACE_COUNT("error", 0);
+}
+
+void test_negative_number_too_small() {
+  Hide_errors = true;
+  parse_and_load(
+      "== code 0x1\n"
+      "01 -12345\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: token '-12345' is not a hex byte\n"
+  );
+}
+
+void test_hex_prefix() {
+  parse_and_load(
+      "== code 0x1\n"
+      "0x01 -0x02\n"
+  );
+  CHECK_TRACE_COUNT("error", 0);
+}
+
+void test_repeated_segment_merges_data() {
+  parse_and_load(
+      "== code 0x1\n"
+      "11 22\n"
+      "== code\n"  // again
+      "33 44\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "parse: new segment 'code'\n"
+      "parse: appending to segment 'code'\n"
+      // first segment
+      "load: 0x00000001 -> 11\n"
+      "load: 0x00000002 -> 22\n"
+      // second segment
+      "load: 0x00000003 -> 33\n"
+      "load: 0x00000004 -> 44\n"
+  );
+}
+
+void test_error_on_missing_segment_header() {
+  Hide_errors = true;
+  parse_and_load(
+      "01 02\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: input does not start with a '==' section header\n"
+  );
+}
+
+void test_error_on_uppercase_hex() {
+  Hide_errors = true;
+  parse_and_load(
+      "== code\n"
+      "01 Ab\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: uppercase hex not allowed: Ab\n"
+  );
+}
+
+//: helper for tests
+void parse_and_load(const string& text_bytes) {
+  program p;
+  istringstream in(text_bytes);
+  parse(in, p);
+  if (trace_contains_errors()) return;  // if any stage raises errors, stop immediately
+  load(p);
+}
+
+//:: run
+
+:(before "End Initialize Op Names")
+put_new(Name, "b8", "copy imm32 to EAX (mov)");
+
+//: our first opcode
+
+:(before "End Single-Byte Opcodes")
+case 0xb8: {  // copy imm32 to EAX
+  const int32_t src = next32();
+  trace(Callstack_depth+1, "run") << "copy imm32 0x" << HEXWORD << src << " to EAX" << end();
+  Reg[EAX].i = src;
+  break;
+}
+
+:(code)
+void test_copy_imm32_to_EAX_again() {
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  b8                                 0a 0b 0c 0d \n"  // copy 0x0d0c0b0a to EAX
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: copy imm32 0x0d0c0b0a to EAX\n"
+  );
+}
+
+// read a 32-bit int in little-endian order from the instruction stream
+int32_t next32() {
+  int32_t result = read_mem_i32(EIP);
+  EIP+=4;
+  return result;
+}
+
+//:: helpers
+
+string to_string(const word& w) {
+  ostringstream out;
+  out << w.data;
+  for (int i = 0;  i < SIZE(w.metadata);  ++i)
+    out << " /" << w.metadata.at(i);
+  return out.str();
+}
+
+bool contains_uppercase(const string& s) {
+  for (int i = 0;  i < SIZE(s);  ++i)
+    if (isupper(s.at(i))) return true;
+  return false;
+}
diff --git a/linux/bootstrap/012elf.cc b/linux/bootstrap/012elf.cc
new file mode 100644
index 00000000..c852e372
--- /dev/null
+++ b/linux/bootstrap/012elf.cc
@@ -0,0 +1,193 @@
+//: Loading SubX programs from ELF binaries.
+//: This will allow us to run them natively on a Linux kernel.
+//: Based on https://github.com/kragen/stoneknifeforth/blob/702d2ebe1b/386.c
+
+:(before "End Main")
+assert(argc > 1);
+if (is_equal(argv[1], "run")) {
+  // Outside of tests, traces must be explicitly requested.
+  if (Trace_file.is_open()) Trace_stream = new trace_stream;
+  trace(2, "run") << "=== Starting to run" << end();
+  if (argc <= 2) {
+    raise << "Not enough arguments provided.\n" << die();
+  }
+  reset();
+  cerr << std::hex;
+  load_elf(argv[2], argc, argv);
+  while (EIP < End_of_program)  // weak final-gasp termination check
+    run_one_instruction();
+  raise << "executed past end of the world: " << EIP << " vs " << End_of_program << '\n' << end();
+  return 1;
+}
+
+:(code)
+void load_elf(const string& filename, int argc, char* argv[]) {
+  int fd = open(filename.c_str(), O_RDONLY);
+  if (fd < 0) raise << filename.c_str() << ": open" << perr() << '\n' << die();
+  off_t size = lseek(fd, 0, SEEK_END);
+  lseek(fd, 0, SEEK_SET);
+  uint8_t* elf_contents = static_cast<uint8_t*>(malloc(size));
+  if (elf_contents == NULL) raise << "malloc(" << size << ')' << perr() << '\n' << die();
+  ssize_t read_size = read(fd, elf_contents, size);
+  if (size != read_size) raise << "read → " << size << " (!= " << read_size << ')' << perr() << '\n' << die();
+  load_elf_contents(elf_contents, size, argc, argv);
+  free(elf_contents);
+}
+
+void load_elf_contents(uint8_t* elf_contents, size_t size, int argc, char* argv[]) {
+  uint8_t magic[5] = {0};
+  memcpy(magic, elf_contents, 4);
+  if (memcmp(magic, "\177ELF", 4) != 0)
+    raise << "Invalid ELF file; starts with \"" << magic << '"' << die();
+  if (elf_contents[4] != 1)
+    raise << "Only 32-bit ELF files (4-byte words; virtual addresses up to 4GB) supported.\n" << die();
+  if (elf_contents[5] != 1)
+    raise << "Only little-endian ELF files supported.\n" << die();
+  // unused: remaining 10 bytes of e_ident
+  uint32_t e_machine_type = u32_in(&elf_contents[16]);
+  if (e_machine_type != 0x00030002)
+    raise << "ELF type/machine 0x" << HEXWORD << e_machine_type << " isn't i386 executable\n" << die();
+  // unused: e_version. We only support version 1, and later versions will be backwards compatible.
+  uint32_t e_entry = u32_in(&elf_contents[24]);
+  uint32_t e_phoff = u32_in(&elf_contents[28]);
+  // unused: e_shoff
+  // unused: e_flags
+  uint32_t e_ehsize = u16_in(&elf_contents[40]);
+  if (e_ehsize < 52) raise << "Invalid binary; ELF header too small\n" << die();
+  uint32_t e_phentsize = u16_in(&elf_contents[42]);
+  uint32_t e_phnum = u16_in(&elf_contents[44]);
+  trace(90, "load") << e_phnum << " entries in the program header, each " << e_phentsize << " bytes long" << end();
+  // unused: e_shentsize
+  // unused: e_shnum
+  // unused: e_shstrndx
+
+  set<uint32_t> overlap;  // to detect overlapping segments
+  for (size_t i = 0;  i < e_phnum;  ++i)
+    load_segment_from_program_header(elf_contents, i, size, e_phoff + i*e_phentsize, e_ehsize, overlap);
+
+  // initialize code and stack
+  assert(overlap.find(STACK_SEGMENT) == overlap.end());
+  Mem.push_back(vma(STACK_SEGMENT));
+  assert(overlap.find(AFTER_STACK) == overlap.end());
+  // The stack grows downward.
+  Reg[ESP].u = AFTER_STACK;
+  Reg[EBP].u = 0;
+  EIP = e_entry;
+
+  // initialize args on stack
+  // no envp for now
+  // we wastefully use a separate page of memory for argv
+  Mem.push_back(vma(ARGV_DATA_SEGMENT));
+  uint32_t argv_data = ARGV_DATA_SEGMENT;
+  for (int i = argc-1;  i >= /*skip 'subx_bin' and 'run'*/2;  --i) {
+    push(argv_data);
+    for (size_t j = 0;  j <= strlen(argv[i]);  ++j) {
+      assert(overlap.find(argv_data) == overlap.end());  // don't bother comparing ARGV and STACK
+      write_mem_u8(argv_data, argv[i][j]);
+      argv_data += sizeof(char);
+      assert(argv_data < ARGV_DATA_SEGMENT + SEGMENT_ALIGNMENT);
+    }
+  }
+  push(argc-/*skip 'subx_bin' and 'run'*/2);
+}
+
+void push(uint32_t val) {
+  Reg[ESP].u -= 4;
+  if (Reg[ESP].u < STACK_SEGMENT) {
+    raise << "The stack overflowed its segment. "
+          << "Maybe SPACE_FOR_SEGMENT should be larger? "
+          << "Or you need to carve out an exception for the stack segment "
+          << "to be larger.\n" << die();
+  }
+  trace(Callstack_depth+1, "run") << "decrementing ESP to 0x" << HEXWORD << Reg[ESP].u << end();
+  trace(Callstack_depth+1, "run") << "pushing value 0x" << HEXWORD << val << end();
+  write_mem_u32(Reg[ESP].u, val);
+}
+
+void load_segment_from_program_header(uint8_t* elf_contents, int segment_index, size_t size, uint32_t offset, uint32_t e_ehsize, set<uint32_t>& overlap) {
+  uint32_t p_type = u32_in(&elf_contents[offset]);
+  trace(90, "load") << "program header at offset " << offset << ": type " << p_type << end();
+  if (p_type != 1) {
+    trace(90, "load") << "ignoring segment at offset " << offset << " of non PT_LOAD type " << p_type << " (see http://refspecs.linuxbase.org/elf/elf.pdf)" << end();
+    return;
+  }
+  uint32_t p_offset = u32_in(&elf_contents[offset + 4]);
+  uint32_t p_vaddr = u32_in(&elf_contents[offset + 8]);
+  if (e_ehsize > p_vaddr) raise << "Invalid binary; program header overlaps ELF header\n" << die();
+  // unused: p_paddr
+  uint32_t p_filesz = u32_in(&elf_contents[offset + 16]);
+  uint32_t p_memsz = u32_in(&elf_contents[offset + 20]);
+  if (p_filesz != p_memsz)
+    raise << "Can't yet handle segments where p_filesz != p_memsz (see http://refspecs.linuxbase.org/elf/elf.pdf)\n" << die();
+
+  if (p_offset + p_filesz > size)
+    raise << "Invalid binary; segment at offset " << offset << " is too large: wants to end at " << p_offset+p_filesz << " but the file ends at " << size << '\n' << die();
+  if (p_memsz >= SEGMENT_ALIGNMENT) {
+    raise << "Code segment too small for SubX; for now please manually increase SEGMENT_ALIGNMENT.\n" << end();
+    return;
+  }
+  trace(90, "load") << "blitting file offsets (" << p_offset << ", " << (p_offset+p_filesz) << ") to addresses (" << p_vaddr << ", " << (p_vaddr+p_memsz) << ')' << end();
+  if (size > p_memsz) size = p_memsz;
+  Mem.push_back(vma(p_vaddr));
+  for (size_t i = 0;  i < p_filesz;  ++i) {
+    assert(overlap.find(p_vaddr+i) == overlap.end());
+    write_mem_u8(p_vaddr+i, elf_contents[p_offset+i]);
+    overlap.insert(p_vaddr+i);
+  }
+  if (segment_index == 0 && End_of_program < p_vaddr+p_memsz)
+    End_of_program = p_vaddr+p_memsz;
+}
+
+:(before "End Includes")
+// Very primitive/fixed/insecure ELF segments for now.
+//   --- inaccessible:        0x00000000 -> 0x08047fff
+//   code:                    0x09000000 -> 0x09ffffff (specified in ELF binary)
+//   data:                    0x0a000000 -> 0x0affffff (specified in ELF binary)
+//                      --- heap gets mmap'd somewhere here ---
+//   stack:                   0xbdffffff -> 0xbd000000 (downward; not in ELF binary)
+//   argv hack:               0xbf000000 -> 0xbfffffff (not in ELF binary)
+//   --- reserved for kernel: 0xc0000000 -> ...
+const uint32_t START_HEAP        = 0x0b000000;
+const uint32_t END_HEAP          = 0xbd000000;
+const uint32_t STACK_SEGMENT     = 0xbd000000;
+const uint32_t AFTER_STACK       = 0xbe000000;
+const uint32_t ARGV_DATA_SEGMENT = 0xbf000000;
+// When updating the above memory map, don't forget to update `mmap`'s
+// implementation in the 'syscalls' layer.
+:(before "End Dump Info for Instruction")
+//? dump_stack();  // slow
+:(code)
+void dump_stack() {
+  ostringstream out;
+  trace(Callstack_depth+1, "run") << "stack:" << end();
+  for (uint32_t a = AFTER_STACK-4;  a > Reg[ESP].u;  a -= 4)
+    trace(Callstack_depth+2, "run") << "  0x" << HEXWORD << a << " => 0x" << HEXWORD << read_mem_u32(a) << end();
+  trace(Callstack_depth+2, "run") << "  0x" << HEXWORD << Reg[ESP].u << " => 0x" << HEXWORD << read_mem_u32(Reg[ESP].u) << "  <=== ESP" << end();
+  for (uint32_t a = Reg[ESP].u-4;  a > Reg[ESP].u-40;  a -= 4)
+    trace(Callstack_depth+2, "run") << "  0x" << HEXWORD << a << " => 0x" << HEXWORD << read_mem_u32(a) << end();
+}
+
+inline uint32_t u32_in(uint8_t* p) {
+  return p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24;
+}
+
+inline uint16_t u16_in(uint8_t* p) {
+  return p[0] | p[1] << 8;
+}
+
+:(before "End Types")
+struct perr {};
+:(code)
+ostream& operator<<(ostream& os, perr /*unused*/) {
+  if (errno)
+    os << ": " << strerror(errno);
+  return os;
+}
+
+:(before "End Includes")
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <unistd.h>
diff --git a/linux/bootstrap/013direct_addressing.cc b/linux/bootstrap/013direct_addressing.cc
new file mode 100644
index 00000000..cbdc25a8
--- /dev/null
+++ b/linux/bootstrap/013direct_addressing.cc
@@ -0,0 +1,1280 @@
+//: operating directly on a register
+
+:(before "End Initialize Op Names")
+put_new(Name, "01", "add r32 to rm32 (add)");
+
+:(code)
+void test_add_r32_to_r32() {
+  Reg[EAX].i = 0x10;
+  Reg[EBX].i = 1;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  01     d8                                    \n" // add EBX to EAX
+      // ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: add EBX to r/m32\n"
+      "run: r/m32 is EAX\n"
+      "run: storing 0x00000011\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x01: {  // add r32 to r/m32
+  uint8_t modrm = next();
+  uint8_t arg2 = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "add " << rname(arg2) << " to r/m32" << end();
+  int32_t* signed_arg1 = effective_address(modrm);
+  int32_t signed_result = *signed_arg1 + Reg[arg2].i;
+  SF = (signed_result < 0);
+  ZF = (signed_result == 0);
+  int64_t signed_full_result = static_cast<int64_t>(*signed_arg1) + Reg[arg2].i;
+  OF = (signed_result != signed_full_result);
+  // set CF
+  uint32_t unsigned_arg1 = static_cast<uint32_t>(*signed_arg1);
+  uint32_t unsigned_result = unsigned_arg1 + Reg[arg2].u;
+  uint64_t unsigned_full_result = static_cast<uint64_t>(unsigned_arg1) + Reg[arg2].u;
+  CF = (unsigned_result != unsigned_full_result);
+  trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+  *signed_arg1 = signed_result;
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << *signed_arg1 << end();
+  break;
+}
+
+:(code)
+void test_add_r32_to_r32_signed_overflow() {
+  Reg[EAX].i = INT32_MAX;
+  Reg[EBX].i = 1;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  01     d8                                    \n" // add EBX to EAX
+      // ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: add EBX to r/m32\n"
+      "run: r/m32 is EAX\n"
+      "run: SF=1; ZF=0; CF=0; OF=1\n"
+      "run: storing 0x80000000\n"
+  );
+}
+
+void test_add_r32_to_r32_unsigned_overflow() {
+  Reg[EAX].u = UINT32_MAX;
+  Reg[EBX].u = 1;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  01     d8                                    \n" // add EBX to EAX
+      // ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: add EBX to r/m32\n"
+      "run: r/m32 is EAX\n"
+      "run: SF=0; ZF=1; CF=1; OF=0\n"
+      "run: storing 0x00000000\n"
+  );
+}
+
+void test_add_r32_to_r32_unsigned_and_signed_overflow() {
+  Reg[EAX].i = Reg[EBX].i = INT32_MIN;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  01     d8                                    \n" // add EBX to EAX
+      // ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: add EBX to r/m32\n"
+      "run: r/m32 is EAX\n"
+      "run: SF=0; ZF=1; CF=1; OF=1\n"
+      "run: storing 0x00000000\n"
+  );
+}
+
+:(code)
+// Implement tables 2-2 and 2-3 in the Intel manual, Volume 2.
+// We return a pointer so that instructions can write to multiple bytes in
+// 'Mem' at once.
+// beware: will eventually have side-effects
+int32_t* effective_address(uint8_t modrm) {
+  const uint8_t mod = (modrm>>6);
+  // ignore middle 3 'reg opcode' bits
+  const uint8_t rm = modrm & 0x7;
+  if (mod == 3) {
+    // mod 3 is just register direct addressing
+    trace(Callstack_depth+1, "run") << "r/m32 is " << rname(rm) << end();
+    return &Reg[rm].i;
+  }
+  uint32_t addr = effective_address_number(modrm);
+  trace(Callstack_depth+1, "run") << "effective address contains 0x" << HEXWORD << read_mem_i32(addr) << end();
+  return mem_addr_i32(addr);
+}
+
+// beware: will eventually have side-effects
+uint32_t effective_address_number(uint8_t modrm) {
+  const uint8_t mod = (modrm>>6);
+  // ignore middle 3 'reg opcode' bits
+  const uint8_t rm = modrm & 0x7;
+  uint32_t addr = 0;
+  switch (mod) {
+  case 3:
+    // mod 3 is just register direct addressing
+    raise << "unexpected direct addressing mode\n" << end();
+    return 0;
+  // End Mod Special-cases(addr)
+  default:
+    cerr << "unrecognized mod bits: " << NUM(mod) << '\n';
+    exit(1);
+  }
+  //: other mods are indirect, and they'll set addr appropriately
+  // Found effective_address(addr)
+  return addr;
+}
+
+string rname(uint8_t r) {
+  switch (r) {
+  case 0: return "EAX";
+  case 1: return "ECX";
+  case 2: return "EDX";
+  case 3: return "EBX";
+  case 4: return "ESP";
+  case 5: return "EBP";
+  case 6: return "ESI";
+  case 7: return "EDI";
+  default: raise << "invalid register " << r << '\n' << end();  return "";
+  }
+}
+
+//:: subtract
+
+:(before "End Initialize Op Names")
+put_new(Name, "29", "subtract r32 from rm32 (sub)");
+
+:(code)
+void test_subtract_r32_from_r32() {
+  Reg[EAX].i = 10;
+  Reg[EBX].i = 1;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  29     d8                                    \n"  // subtract EBX from EAX
+      // ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: subtract EBX from r/m32\n"
+      "run: r/m32 is EAX\n"
+      "run: storing 0x00000009\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x29: {  // subtract r32 from r/m32
+  const uint8_t modrm = next();
+  const uint8_t arg2 = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "subtract " << rname(arg2) << " from r/m32" << end();
+  int32_t* signed_arg1 = effective_address(modrm);
+  int32_t signed_result = *signed_arg1 - Reg[arg2].i;
+  SF = (signed_result < 0);
+  ZF = (signed_result == 0);
+  int64_t signed_full_result = static_cast<int64_t>(*signed_arg1) - Reg[arg2].i;
+  OF = (signed_result != signed_full_result);
+  // set CF
+  uint32_t unsigned_arg1 = static_cast<uint32_t>(*signed_arg1);
+  uint32_t unsigned_result = unsigned_arg1 - Reg[arg2].u;
+  uint64_t unsigned_full_result = static_cast<uint64_t>(unsigned_arg1) - Reg[arg2].u;
+  CF = (unsigned_result != unsigned_full_result);
+  trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+  *signed_arg1 = signed_result;
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << *signed_arg1 << end();
+  break;
+}
+
+:(code)
+void test_subtract_r32_from_r32_signed_overflow() {
+  Reg[EAX].i = INT32_MIN;
+  Reg[EBX].i = INT32_MAX;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  29     d8                                    \n"  // subtract EBX from EAX
+      // ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: subtract EBX from r/m32\n"
+      "run: r/m32 is EAX\n"
+      "run: SF=0; ZF=0; CF=0; OF=1\n"
+      "run: storing 0x00000001\n"
+  );
+}
+
+void test_subtract_r32_from_r32_unsigned_overflow() {
+  Reg[EAX].i = 0;
+  Reg[EBX].i = 1;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  29     d8                                    \n"  // subtract EBX from EAX
+      // ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: subtract EBX from r/m32\n"
+      "run: r/m32 is EAX\n"
+      "run: SF=1; ZF=0; CF=1; OF=0\n"
+      "run: storing 0xffffffff\n"
+  );
+}
+
+void test_subtract_r32_from_r32_signed_and_unsigned_overflow() {
+  Reg[EAX].i = 0;
+  Reg[EBX].i = INT32_MIN;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  29     d8                                    \n"  // subtract EBX from EAX
+      // ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: subtract EBX from r/m32\n"
+      "run: r/m32 is EAX\n"
+      "run: SF=1; ZF=0; CF=1; OF=1\n"
+      "run: storing 0x80000000\n"
+  );
+}
+
+//:: multiply
+
+:(before "End Initialize Op Names")
+put_new(Name, "f7", "negate/multiply/divide rm32 (with EAX and EDX if necessary) depending on subop (neg/mul/idiv)");
+
+:(code)
+void test_multiply_EAX_by_r32() {
+  Reg[EAX].i = 4;
+  Reg[ECX].i = 3;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  f7     e1                                    \n"  // multiply EAX by ECX
+      // ModR/M in binary: 11 (direct mode) 100 (subop mul) 001 (src ECX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: operate on r/m32\n"
+      "run: r/m32 is ECX\n"
+      "run: subop: multiply EAX by r/m32\n"
+      "run: storing 0x0000000c\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0xf7: {
+  const uint8_t modrm = next();
+  trace(Callstack_depth+1, "run") << "operate on r/m32" << end();
+  int32_t* arg1 = effective_address(modrm);
+  const uint8_t subop = (modrm>>3)&0x7;  // middle 3 'reg opcode' bits
+  switch (subop) {
+  case 4: {  // mul unsigned EAX by r/m32
+    trace(Callstack_depth+1, "run") << "subop: multiply EAX by r/m32" << end();
+    const uint64_t result = static_cast<uint64_t>(Reg[EAX].u) * static_cast<uint32_t>(*arg1);
+    Reg[EAX].u = result & 0xffffffff;
+    Reg[EDX].u = result >> 32;
+    OF = (Reg[EDX].u != 0);
+    CF = OF;
+    trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+    trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << Reg[EAX].u << end();
+    break;
+  }
+  // End Op f7 Subops
+  default:
+    cerr << "unrecognized subop for opcode f7: " << NUM(subop) << '\n';
+    exit(1);
+  }
+  break;
+}
+
+//:
+
+:(before "End Initialize Op Names")
+put_new(Name_0f, "af", "multiply rm32 into r32 (imul)");
+
+:(code)
+void test_multiply_r32_into_r32() {
+  Reg[EAX].i = 4;
+  Reg[EBX].i = 2;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  0f af  d8                                    \n"  // subtract EBX into EAX
+      // ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: multiply EBX by r/m32\n"
+      "run: r/m32 is EAX\n"
+      "run: storing 0x00000008\n"
+  );
+}
+
+:(before "End Two-Byte Opcodes Starting With 0f")
+case 0xaf: {  // multiply r32 by r/m32
+  const uint8_t modrm = next();
+  const uint8_t arg1 = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "multiply " << rname(arg1) << " by r/m32" << end();
+  const int32_t* arg2 = effective_address(modrm);
+  int32_t result = Reg[arg1].i * (*arg2);
+  int64_t full_result = static_cast<int64_t>(Reg[arg1].i) * (*arg2);
+  OF = (result != full_result);
+  CF = OF;
+  trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+  Reg[arg1].i = result;
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << Reg[arg1].i << end();
+  break;
+}
+
+//:: negate
+
+:(code)
+void test_negate_r32() {
+  Reg[EBX].i = 1;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  f7     db                                    \n"  // negate EBX
+      // ModR/M in binary: 11 (direct mode) 011 (subop negate) 011 (dest EBX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: operate on r/m32\n"
+      "run: r/m32 is EBX\n"
+      "run: subop: negate\n"
+      "run: storing 0xffffffff\n"
+  );
+}
+
+:(before "End Op f7 Subops")
+case 3: {  // negate r/m32
+  trace(Callstack_depth+1, "run") << "subop: negate" << end();
+  // one case that can overflow
+  if (static_cast<uint32_t>(*arg1) == 0x80000000) {
+    trace(Callstack_depth+1, "run") << "overflow" << end();
+    SF = true;
+    ZF = false;
+    OF = true;
+    break;
+  }
+  int32_t result = -(*arg1);
+  SF = (result >> 31);
+  ZF = (result == 0);
+  OF = false;
+  CF = (*arg1 != 0);
+  trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+  *arg1 = result;
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << *arg1 << end();
+  break;
+}
+
+:(code)
+// negate can overflow in exactly one situation
+void test_negate_can_overflow() {
+  Reg[EBX].i = INT32_MIN;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  f7     db                                    \n"  // negate EBX
+      // ModR/M in binary: 11 (direct mode) 011 (subop negate) 011 (dest EBX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: operate on r/m32\n"
+      "run: r/m32 is EBX\n"
+      "run: subop: negate\n"
+      "run: overflow\n"
+  );
+}
+
+//:: divide with remainder
+
+void test_divide_EAX_by_rm32() {
+  Reg[EAX].u = 7;
+  Reg[EDX].u = 0;
+  Reg[ECX].i = 3;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  f7     f9                                    \n"  // multiply EAX by ECX
+      // ModR/M in binary: 11 (direct mode) 111 (subop idiv) 001 (divisor ECX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: operate on r/m32\n"
+      "run: r/m32 is ECX\n"
+      "run: subop: divide EDX:EAX by r/m32, storing quotient in EAX and remainder in EDX\n"
+      "run: quotient: 0x00000002\n"
+      "run: remainder: 0x00000001\n"
+  );
+}
+
+:(before "End Op f7 Subops")
+case 7: {  // divide EDX:EAX by r/m32, storing quotient in EAX and remainder in EDX
+  trace(Callstack_depth+1, "run") << "subop: divide EDX:EAX by r/m32, storing quotient in EAX and remainder in EDX" << end();
+  int64_t dividend = static_cast<int64_t>((static_cast<uint64_t>(Reg[EDX].u) << 32) | Reg[EAX].u);
+  int32_t divisor = *arg1;
+  assert(divisor != 0);
+  Reg[EAX].i = dividend/divisor;  // quotient
+  Reg[EDX].i = dividend%divisor;  // remainder
+  // flag state undefined
+  trace(Callstack_depth+1, "run") << "quotient: 0x" << HEXWORD << Reg[EAX].i << end();
+  trace(Callstack_depth+1, "run") << "remainder: 0x" << HEXWORD << Reg[EDX].i << end();
+  break;
+}
+
+:(code)
+void test_divide_EAX_by_negative_rm32() {
+  Reg[EAX].u = 7;
+  Reg[EDX].u = 0;
+  Reg[ECX].i = -3;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  f7     f9                                    \n"  // multiply EAX by ECX
+      // ModR/M in binary: 11 (direct mode) 111 (subop idiv) 001 (divisor ECX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: operate on r/m32\n"
+      "run: r/m32 is ECX\n"
+      "run: subop: divide EDX:EAX by r/m32, storing quotient in EAX and remainder in EDX\n"
+      "run: quotient: 0xfffffffe\n"  // -2
+      "run: remainder: 0x00000001\n"
+  );
+}
+
+void test_divide_negative_EAX_by_rm32() {
+  Reg[EAX].i = -7;
+  Reg[EDX].i = -1;  // sign extend
+  Reg[ECX].i = 3;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  f7     f9                                    \n"  // multiply EAX by ECX
+      // ModR/M in binary: 11 (direct mode) 111 (subop idiv) 001 (divisor ECX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: operate on r/m32\n"
+      "run: r/m32 is ECX\n"
+      "run: subop: divide EDX:EAX by r/m32, storing quotient in EAX and remainder in EDX\n"
+      "run: quotient: 0xfffffffe\n"  // -2
+      "run: remainder: 0xffffffff\n"  // -1, same sign as divident (EDX:EAX)
+  );
+}
+
+void test_divide_negative_EDX_EAX_by_rm32() {
+  Reg[EAX].i = 0;  // lower 32 bits are clear
+  Reg[EDX].i = -7;
+  Reg[ECX].i = 0x40000000;  // 2^30 (largest positive power of 2)
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  f7     f9                                    \n"  // multiply EAX by ECX
+      // ModR/M in binary: 11 (direct mode) 111 (subop idiv) 001 (divisor ECX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: operate on r/m32\n"
+      "run: r/m32 is ECX\n"
+      "run: subop: divide EDX:EAX by r/m32, storing quotient in EAX and remainder in EDX\n"
+      "run: quotient: 0xffffffe4\n"  // (-7 << 32) / (1 << 30) = -7 << 2 = -28
+      "run: remainder: 0x00000000\n"
+  );
+}
+
+//:: shift left
+
+:(before "End Initialize Op Names")
+put_new(Name, "d3", "shift rm32 by CL bits depending on subop (sal/sar/shl/shr)");
+
+:(code)
+void test_shift_left_r32_with_cl() {
+  Reg[EBX].i = 13;
+  Reg[ECX].i = 1;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  d3     e3                                    \n"  // shift EBX left by CL bits
+      // ModR/M in binary: 11 (direct mode) 100 (subop shift left) 011 (dest EBX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: operate on r/m32\n"
+      "run: r/m32 is EBX\n"
+      "run: subop: shift left by CL bits\n"
+      "run: storing 0x0000001a\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0xd3: {
+  const uint8_t modrm = next();
+  trace(Callstack_depth+1, "run") << "operate on r/m32" << end();
+  int32_t* arg1 = effective_address(modrm);
+  const uint8_t subop = (modrm>>3)&0x7;  // middle 3 'reg opcode' bits
+  switch (subop) {
+  case 4: {  // shift left r/m32 by CL
+    trace(Callstack_depth+1, "run") << "subop: shift left by CL bits" << end();
+    uint8_t count = Reg[ECX].u & 0x1f;
+    // OF is only defined if count is 1
+    if (count == 1) {
+      bool msb = (*arg1 & 0x80000000) >> 1;
+      bool pnsb = (*arg1 & 0x40000000);
+      OF = (msb != pnsb);
+    }
+    int32_t result = (*arg1 << count);
+    ZF = (result == 0);
+    SF = (result < 0);
+    CF = (*arg1 << (count-1)) & 0x80000000;
+    trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+    *arg1 = result;
+    trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << *arg1 << end();
+    break;
+  }
+  // End Op d3 Subops
+  default:
+    cerr << "unrecognized subop for opcode d3: " << NUM(subop) << '\n';
+    exit(1);
+  }
+  break;
+}
+
+//:: shift right arithmetic
+
+:(code)
+void test_shift_right_arithmetic_r32_with_cl() {
+  Reg[EBX].i = 26;
+  Reg[ECX].i = 1;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  d3     fb                                    \n"  // shift EBX right by CL bits, while preserving sign
+      // ModR/M in binary: 11 (direct mode) 111 (subop shift right arithmetic) 011 (dest EBX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: operate on r/m32\n"
+      "run: r/m32 is EBX\n"
+      "run: subop: shift right by CL bits, while preserving sign\n"
+      "run: storing 0x0000000d\n"
+  );
+}
+
+:(before "End Op d3 Subops")
+case 7: {  // shift right r/m32 by CL, preserving sign
+  trace(Callstack_depth+1, "run") << "subop: shift right by CL bits, while preserving sign" << end();
+  uint8_t count = Reg[ECX].u & 0x1f;
+  *arg1 = (*arg1 >> count);
+  ZF = (*arg1 == 0);
+  SF = (*arg1 < 0);
+  // OF is only defined if count is 1
+  if (count == 1) OF = false;
+  // CF undefined
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << *arg1 << end();
+  break;
+}
+
+:(code)
+void test_shift_right_arithmetic_odd_r32_with_cl() {
+  Reg[EBX].i = 27;
+  Reg[ECX].i = 1;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  d3     fb                                    \n"  // shift EBX right by CL bits, while preserving sign
+      // ModR/M in binary: 11 (direct mode) 111 (subop shift right arithmetic) 011 (dest EBX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: operate on r/m32\n"
+      "run: r/m32 is EBX\n"
+      "run: subop: shift right by CL bits, while preserving sign\n"
+      // result: 13
+      "run: storing 0x0000000d\n"
+  );
+}
+
+void test_shift_right_arithmetic_negative_r32_with_cl() {
+  Reg[EBX].i = 0xfffffffd;  // -3
+  Reg[ECX].i = 1;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  d3     fb                                    \n"  // shift EBX right by CL bits, while preserving sign
+      // ModR/M in binary: 11 (direct mode) 111 (subop shift right arithmetic) 011 (dest EBX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: operate on r/m32\n"
+      "run: r/m32 is EBX\n"
+      "run: subop: shift right by CL bits, while preserving sign\n"
+      // result: -2
+      "run: storing 0xfffffffe\n"
+  );
+}
+
+//:: shift right logical
+
+:(code)
+void test_shift_right_logical_r32_with_cl() {
+  Reg[EBX].i = 26;
+  Reg[ECX].i = 1;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  d3     eb                                    \n"  // shift EBX right by CL bits, while padding zeroes
+      // ModR/M in binary: 11 (direct mode) 101 (subop shift right logical) 011 (dest EBX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: operate on r/m32\n"
+      "run: r/m32 is EBX\n"
+      "run: subop: shift right by CL bits, while padding zeroes\n"
+      // result: 13
+      "run: storing 0x0000000d\n"
+  );
+}
+
+:(before "End Op d3 Subops")
+case 5: {  // shift right r/m32 by CL, padding zeroes
+  trace(Callstack_depth+1, "run") << "subop: shift right by CL bits, while padding zeroes" << end();
+  uint8_t count = Reg[ECX].u & 0x1f;
+  // OF is only defined if count is 1
+  if (count == 1) {
+    bool msb = (*arg1 & 0x80000000) >> 1;
+    bool pnsb = (*arg1 & 0x40000000);
+    OF = (msb != pnsb);
+  }
+  uint32_t* uarg1 = reinterpret_cast<uint32_t*>(arg1);
+  *uarg1 = (*uarg1 >> count);
+  ZF = (*uarg1 == 0);
+  // result is always positive by definition
+  SF = false;
+  // CF undefined
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << *arg1 << end();
+  break;
+}
+
+:(code)
+void test_shift_right_logical_odd_r32_with_cl() {
+  Reg[EBX].i = 27;
+  Reg[ECX].i = 1;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  d3     eb                                    \n"  // shift EBX right by CL bits, while padding zeroes
+      // ModR/M in binary: 11 (direct mode) 101 (subop shift right logical) 011 (dest EBX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: operate on r/m32\n"
+      "run: r/m32 is EBX\n"
+      "run: subop: shift right by CL bits, while padding zeroes\n"
+      // result: 13
+      "run: storing 0x0000000d\n"
+  );
+}
+
+void test_shift_right_logical_negative_r32_with_cl() {
+  Reg[EBX].i = 0xfffffffd;
+  Reg[ECX].i = 1;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  d3     eb                                    \n"  // shift EBX right by CL bits, while padding zeroes
+      // ModR/M in binary: 11 (direct mode) 101 (subop shift right logical) 011 (dest EBX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: operate on r/m32\n"
+      "run: r/m32 is EBX\n"
+      "run: subop: shift right by CL bits, while padding zeroes\n"
+      "run: storing 0x7ffffffe\n"
+  );
+}
+
+//:: and
+
+:(before "End Initialize Op Names")
+put_new(Name, "21", "rm32 = bitwise AND of r32 with rm32 (and)");
+
+:(code)
+void test_and_r32_with_r32() {
+  Reg[EAX].i = 0x0a0b0c0d;
+  Reg[EBX].i = 0x000000ff;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  21     d8                                    \n"  // and EBX with destination EAX
+      // ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: and EBX with r/m32\n"
+      "run: r/m32 is EAX\n"
+      "run: storing 0x0000000d\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x21: {  // and r32 with r/m32
+  const uint8_t modrm = next();
+  const uint8_t arg2 = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "and " << rname(arg2) << " with r/m32" << end();
+  // bitwise ops technically operate on unsigned numbers, but it makes no
+  // difference
+  int32_t* signed_arg1 = effective_address(modrm);
+  *signed_arg1 &= Reg[arg2].i;
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << *signed_arg1 << end();
+  SF = (*signed_arg1 >> 31);
+  ZF = (*signed_arg1 == 0);
+  CF = false;
+  OF = false;
+  trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+  break;
+}
+
+//:: or
+
+:(before "End Initialize Op Names")
+put_new(Name, "09", "rm32 = bitwise OR of r32 with rm32 (or)");
+
+:(code)
+void test_or_r32_with_r32() {
+  Reg[EAX].i = 0x0a0b0c0d;
+  Reg[EBX].i = 0xa0b0c0d0;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  09     d8                                    \n"  // or EBX with destination EAX
+      // ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: or EBX with r/m32\n"
+      "run: r/m32 is EAX\n"
+      "run: storing 0xaabbccdd\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x09: {  // or r32 with r/m32
+  const uint8_t modrm = next();
+  const uint8_t arg2 = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "or " << rname(arg2) << " with r/m32" << end();
+  // bitwise ops technically operate on unsigned numbers, but it makes no
+  // difference
+  int32_t* signed_arg1 = effective_address(modrm);
+  *signed_arg1 |= Reg[arg2].i;
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << *signed_arg1 << end();
+  SF = (*signed_arg1 >> 31);
+  ZF = (*signed_arg1 == 0);
+  CF = false;
+  OF = false;
+  trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+  break;
+}
+
+//:: xor
+
+:(before "End Initialize Op Names")
+put_new(Name, "31", "rm32 = bitwise XOR of r32 with rm32 (xor)");
+
+:(code)
+void test_xor_r32_with_r32() {
+  Reg[EAX].i = 0x0a0b0c0d;
+  Reg[EBX].i = 0xaabbc0d0;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  31     d8                                    \n"  // xor EBX with destination EAX
+      // ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: xor EBX with r/m32\n"
+      "run: r/m32 is EAX\n"
+      "run: storing 0xa0b0ccdd\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x31: {  // xor r32 with r/m32
+  const uint8_t modrm = next();
+  const uint8_t arg2 = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "xor " << rname(arg2) << " with r/m32" << end();
+  // bitwise ops technically operate on unsigned numbers, but it makes no
+  // difference
+  int32_t* signed_arg1 = effective_address(modrm);
+  *signed_arg1 ^= Reg[arg2].i;
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << *signed_arg1 << end();
+  SF = (*signed_arg1 >> 31);
+  ZF = (*signed_arg1 == 0);
+  CF = false;
+  OF = false;
+  trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+  break;
+}
+
+//:: not
+
+:(code)
+void test_not_r32() {
+  Reg[EBX].i = 0x0f0f00ff;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  f7     d3                                    \n"  // not EBX
+      // ModR/M in binary: 11 (direct mode) 010 (subop not) 011 (dest EBX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: operate on r/m32\n"
+      "run: r/m32 is EBX\n"
+      "run: subop: not\n"
+      "run: storing 0xf0f0ff00\n"
+  );
+}
+
+:(before "End Op f7 Subops")
+case 2: {  // not r/m32
+  trace(Callstack_depth+1, "run") << "subop: not" << end();
+  *arg1 = ~(*arg1);
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << *arg1 << end();
+  // no flags affected
+  break;
+}
+
+//:: compare (cmp)
+
+:(before "End Initialize Op Names")
+put_new(Name, "39", "compare: set SF if rm32 < r32 (cmp)");
+
+:(code)
+void test_compare_r32_with_r32_greater() {
+  Reg[EAX].i = 0x0a0b0c0d;
+  Reg[EBX].i = 0x0a0b0c07;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  39     d8                                    \n"  // compare EAX with EBX
+      // ModR/M in binary: 11 (direct mode) 011 (rhs EBX) 000 (lhs EAX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: compare r/m32 with EBX\n"
+      "run: r/m32 is EAX\n"
+      "run: SF=0; ZF=0; CF=0; OF=0\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x39: {  // set SF if r/m32 < r32
+  const uint8_t modrm = next();
+  const uint8_t reg2 = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "compare r/m32 with " << rname(reg2) << end();
+  const int32_t* signed_arg1 = effective_address(modrm);
+  const int32_t signed_difference = *signed_arg1 - Reg[reg2].i;
+  SF = (signed_difference < 0);
+  ZF = (signed_difference == 0);
+  const int64_t signed_full_difference = static_cast<int64_t>(*signed_arg1) - Reg[reg2].i;
+  OF = (signed_difference != signed_full_difference);
+  // set CF
+  const uint32_t unsigned_arg1 = static_cast<uint32_t>(*signed_arg1);
+  const uint32_t unsigned_difference = unsigned_arg1 - Reg[reg2].u;
+  const uint64_t unsigned_full_difference = static_cast<uint64_t>(unsigned_arg1) - Reg[reg2].u;
+  CF = (unsigned_difference != unsigned_full_difference);
+  trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+  break;
+}
+
+:(code)
+void test_compare_r32_with_r32_lesser_unsigned_and_signed() {
+  Reg[EAX].i = 0x0a0b0c07;
+  Reg[EBX].i = 0x0a0b0c0d;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  39     d8                                    \n"  // compare EAX with EBX
+      // ModR/M in binary: 11 (direct mode) 011 (rhs EBX) 000 (lhs EAX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: compare r/m32 with EBX\n"
+      "run: r/m32 is EAX\n"
+      "run: SF=1; ZF=0; CF=1; OF=0\n"
+  );
+}
+
+void test_compare_r32_with_r32_lesser_unsigned_and_signed_due_to_overflow() {
+  Reg[EAX].i = INT32_MAX;
+  Reg[EBX].i = INT32_MIN;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  39     d8                                    \n"  // compare EAX with EBX
+      // ModR/M in binary: 11 (direct mode) 011 (rhs EBX) 000 (lhs EAX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: compare r/m32 with EBX\n"
+      "run: r/m32 is EAX\n"
+      "run: SF=1; ZF=0; CF=1; OF=1\n"
+  );
+}
+
+void test_compare_r32_with_r32_lesser_signed() {
+  Reg[EAX].i = -1;
+  Reg[EBX].i = 1;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  39     d8                                    \n"  // compare EAX with EBX
+      // ModR/M in binary: 11 (direct mode) 011 (rhs EBX) 000 (lhs EAX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: compare r/m32 with EBX\n"
+      "run: r/m32 is EAX\n"
+      "run: SF=1; ZF=0; CF=0; OF=0\n"
+  );
+}
+
+void test_compare_r32_with_r32_lesser_unsigned() {
+  Reg[EAX].i = 1;
+  Reg[EBX].i = -1;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  39     d8                                    \n"  // compare EAX with EBX
+      // ModR/M in binary: 11 (direct mode) 011 (rhs EBX) 000 (lhs EAX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: compare r/m32 with EBX\n"
+      "run: r/m32 is EAX\n"
+      "run: SF=0; ZF=0; CF=1; OF=0\n"
+  );
+}
+
+void test_compare_r32_with_r32_equal() {
+  Reg[EAX].i = 0x0a0b0c0d;
+  Reg[EBX].i = 0x0a0b0c0d;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  39     d8                                    \n"  // compare EAX and EBX
+      // ModR/M in binary: 11 (direct mode) 011 (rhs EBX) 000 (lhs EAX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: compare r/m32 with EBX\n"
+      "run: r/m32 is EAX\n"
+      "run: SF=0; ZF=1; CF=0; OF=0\n"
+  );
+}
+
+//:: copy (mov)
+
+:(before "End Initialize Op Names")
+put_new(Name, "89", "copy r32 to rm32 (mov)");
+
+:(code)
+void test_copy_r32_to_r32() {
+  Reg[EBX].i = 0xaf;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  89     d8                                    \n"  // copy EBX to EAX
+      // ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: copy EBX to r/m32\n"
+      "run: r/m32 is EAX\n"
+      "run: storing 0x000000af\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x89: {  // copy r32 to r/m32
+  const uint8_t modrm = next();
+  const uint8_t rsrc = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "copy " << rname(rsrc) << " to r/m32" << end();
+  int32_t* dest = effective_address(modrm);
+  *dest = Reg[rsrc].i;  // Write multiple elements of vector<uint8_t> at once. Assumes sizeof(int) == 4 on the host as well.
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << *dest << end();
+  break;
+}
+
+//:: xchg
+
+:(before "End Initialize Op Names")
+put_new(Name, "87", "swap the contents of r32 and rm32 (xchg)");
+
+:(code)
+void test_xchg_r32_with_r32() {
+  Reg[EBX].i = 0xaf;
+  Reg[EAX].i = 0x2e;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  87     d8                                    \n"  // exchange EBX with EAX
+      // ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: exchange EBX with r/m32\n"
+      "run: r/m32 is EAX\n"
+      "run: storing 0x000000af in r/m32\n"
+      "run: storing 0x0000002e in EBX\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x87: {  // exchange r32 with r/m32
+  const uint8_t modrm = next();
+  const uint8_t reg2 = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "exchange " << rname(reg2) << " with r/m32" << end();
+  int32_t* arg1 = effective_address(modrm);
+  const int32_t tmp = *arg1;
+  *arg1 = Reg[reg2].i;
+  Reg[reg2].i = tmp;
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << *arg1 << " in r/m32" << end();
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << Reg[reg2].i << " in " << rname(reg2) << end();
+  break;
+}
+
+//:: increment
+
+:(before "End Initialize Op Names")
+put_new(Name, "40", "increment EAX (inc)");
+put_new(Name, "41", "increment ECX (inc)");
+put_new(Name, "42", "increment EDX (inc)");
+put_new(Name, "43", "increment EBX (inc)");
+put_new(Name, "44", "increment ESP (inc)");
+put_new(Name, "45", "increment EBP (inc)");
+put_new(Name, "46", "increment ESI (inc)");
+put_new(Name, "47", "increment EDI (inc)");
+
+:(code)
+void test_increment_r32() {
+  Reg[ECX].u = 0x1f;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  41                                           \n"  // increment ECX
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: increment ECX\n"
+      "run: storing value 0x00000020\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x40:
+case 0x41:
+case 0x42:
+case 0x43:
+case 0x44:
+case 0x45:
+case 0x46:
+case 0x47: {  // increment r32
+  const uint8_t reg = op & 0x7;
+  trace(Callstack_depth+1, "run") << "increment " << rname(reg) << end();
+  ++Reg[reg].u;
+  trace(Callstack_depth+1, "run") << "storing value 0x" << HEXWORD << Reg[reg].u << end();
+  break;
+}
+
+:(before "End Initialize Op Names")
+put_new(Name, "ff", "increment/decrement/jump/push/call rm32 based on subop (inc/dec/jmp/push/call)");
+
+:(code)
+void test_increment_rm32() {
+  Reg[EAX].u = 0x20;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  ff     c0                                    \n"  // increment EAX
+      // ModR/M in binary: 11 (direct mode) 000 (subop inc) 000 (EAX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: increment r/m32\n"
+      "run: r/m32 is EAX\n"
+      "run: storing value 0x00000021\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0xff: {
+  const uint8_t modrm = next();
+  const uint8_t subop = (modrm>>3)&0x7;  // middle 3 'reg opcode' bits
+  switch (subop) {
+    case 0: {  // increment r/m32
+      trace(Callstack_depth+1, "run") << "increment r/m32" << end();
+      int32_t* arg = effective_address(modrm);
+      ++*arg;
+      trace(Callstack_depth+1, "run") << "storing value 0x" << HEXWORD << *arg << end();
+      break;
+    }
+    default:
+      cerr << "unrecognized subop for ff: " << HEXBYTE << NUM(subop) << '\n';
+      exit(1);
+    // End Op ff Subops
+  }
+  break;
+}
+
+//:: decrement
+
+:(before "End Initialize Op Names")
+put_new(Name, "48", "decrement EAX (dec)");
+put_new(Name, "49", "decrement ECX (dec)");
+put_new(Name, "4a", "decrement EDX (dec)");
+put_new(Name, "4b", "decrement EBX (dec)");
+put_new(Name, "4c", "decrement ESP (dec)");
+put_new(Name, "4d", "decrement EBP (dec)");
+put_new(Name, "4e", "decrement ESI (dec)");
+put_new(Name, "4f", "decrement EDI (dec)");
+
+:(code)
+void test_decrement_r32() {
+  Reg[ECX].u = 0x1f;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  49                                           \n"  // decrement ECX
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: decrement ECX\n"
+      "run: storing value 0x0000001e\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x48:
+case 0x49:
+case 0x4a:
+case 0x4b:
+case 0x4c:
+case 0x4d:
+case 0x4e:
+case 0x4f: {  // decrement r32
+  const uint8_t reg = op & 0x7;
+  trace(Callstack_depth+1, "run") << "decrement " << rname(reg) << end();
+  --Reg[reg].u;
+  trace(Callstack_depth+1, "run") << "storing value 0x" << HEXWORD << Reg[reg].u << end();
+  break;
+}
+
+:(code)
+void test_decrement_rm32() {
+  Reg[EAX].u = 0x20;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  ff     c8                                    \n"  // decrement EAX
+      // ModR/M in binary: 11 (direct mode) 001 (subop inc) 000 (EAX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: decrement r/m32\n"
+      "run: r/m32 is EAX\n"
+      "run: storing value 0x0000001f\n"
+  );
+}
+
+:(before "End Op ff Subops")
+case 1: {  // decrement r/m32
+  trace(Callstack_depth+1, "run") << "decrement r/m32" << end();
+  int32_t* arg = effective_address(modrm);
+  --*arg;
+  trace(Callstack_depth+1, "run") << "storing value 0x" << HEXWORD << *arg << end();
+  break;
+}
+
+//:: push
+
+:(before "End Initialize Op Names")
+put_new(Name, "50", "push EAX to stack (push)");
+put_new(Name, "51", "push ECX to stack (push)");
+put_new(Name, "52", "push EDX to stack (push)");
+put_new(Name, "53", "push EBX to stack (push)");
+put_new(Name, "54", "push ESP to stack (push)");
+put_new(Name, "55", "push EBP to stack (push)");
+put_new(Name, "56", "push ESI to stack (push)");
+put_new(Name, "57", "push EDI to stack (push)");
+
+:(code)
+void test_push_r32() {
+  Mem.push_back(vma(0xbd000000));  // manually allocate memory
+  Reg[ESP].u = 0xbd000008;
+  Reg[EBX].i = 0x0000000a;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  53                                           \n"  // push EBX to stack
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: push EBX\n"
+      "run: decrementing ESP to 0xbd000004\n"
+      "run: pushing value 0x0000000a\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x50:
+case 0x51:
+case 0x52:
+case 0x53:
+case 0x54:
+case 0x55:
+case 0x56:
+case 0x57: {  // push r32 to stack
+  uint8_t reg = op & 0x7;
+  trace(Callstack_depth+1, "run") << "push " << rname(reg) << end();
+//?   cerr << "push: " << NUM(reg) << ": " << Reg[reg].u << " => " << Reg[ESP].u << '\n';
+  push(Reg[reg].u);
+  break;
+}
+
+//:: pop
+
+:(before "End Initialize Op Names")
+put_new(Name, "58", "pop top of stack to EAX (pop)");
+put_new(Name, "59", "pop top of stack to ECX (pop)");
+put_new(Name, "5a", "pop top of stack to EDX (pop)");
+put_new(Name, "5b", "pop top of stack to EBX (pop)");
+put_new(Name, "5c", "pop top of stack to ESP (pop)");
+put_new(Name, "5d", "pop top of stack to EBP (pop)");
+put_new(Name, "5e", "pop top of stack to ESI (pop)");
+put_new(Name, "5f", "pop top of stack to EDI (pop)");
+
+:(code)
+void test_pop_r32() {
+  Mem.push_back(vma(0xbd000000));  // manually allocate memory
+  Reg[ESP].u = 0xbd000008;
+  write_mem_i32(0xbd000008, 0x0000000a);  // ..before this write
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "  5b                                           \n"  // pop stack to EBX
+      "== data 0x2000\n"  // data segment
+      "0a 00 00 00\n"  // 0xa
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: pop into EBX\n"
+      "run: popping value 0x0000000a\n"
+      "run: incrementing ESP to 0xbd00000c\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x58:
+case 0x59:
+case 0x5a:
+case 0x5b:
+case 0x5c:
+case 0x5d:
+case 0x5e:
+case 0x5f: {  // pop stack into r32
+  const uint8_t reg = op & 0x7;
+  trace(Callstack_depth+1, "run") << "pop into " << rname(reg) << end();
+//?   cerr << "pop from " << Reg[ESP].u << '\n';
+  Reg[reg].u = pop();
+//?   cerr << "=> " << NUM(reg) << ": " << Reg[reg].u << '\n';
+  break;
+}
+:(code)
+uint32_t pop() {
+  const uint32_t result = read_mem_u32(Reg[ESP].u);
+  trace(Callstack_depth+1, "run") << "popping value 0x" << HEXWORD << result << end();
+  Reg[ESP].u += 4;
+  trace(Callstack_depth+1, "run") << "incrementing ESP to 0x" << HEXWORD << Reg[ESP].u << end();
+  assert(Reg[ESP].u < AFTER_STACK);
+  return result;
+}
+
+:(before "End Includes")
+#include <climits>
diff --git a/linux/bootstrap/014indirect_addressing.cc b/linux/bootstrap/014indirect_addressing.cc
new file mode 100644
index 00000000..a96b6ebb
--- /dev/null
+++ b/linux/bootstrap/014indirect_addressing.cc
@@ -0,0 +1,1005 @@
+//: operating on memory at the address provided by some register
+//: we'll now start providing data in a separate segment
+
+void test_add_r32_to_mem_at_rm32() {
+  Reg[EBX].i = 0x10;
+  Reg[EAX].i = 0x2000;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  01     18                                    \n"  // add EBX to *EAX
+      // ModR/M in binary: 00 (indirect mode) 011 (src EBX) 000 (dest EAX)
+      "== data 0x2000\n"
+      "01 00 00 00\n"  // 1
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: add EBX to r/m32\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: storing 0x00000011\n"
+  );
+}
+
+:(before "End Mod Special-cases(addr)")
+case 0:  // indirect addressing
+  switch (rm) {
+  default:  // address in register
+    trace(Callstack_depth+1, "run") << "effective address is 0x" << HEXWORD << Reg[rm].u << " (" << rname(rm) << ")" << end();
+    addr = Reg[rm].u;
+    break;
+  // End Mod 0 Special-cases(addr)
+  }
+  break;
+
+//:
+
+:(before "End Initialize Op Names")
+put_new(Name, "03", "add rm32 to r32 (add)");
+
+:(code)
+void test_add_mem_at_rm32_to_r32() {
+  Reg[EAX].i = 0x2000;
+  Reg[EBX].i = 0x10;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  03     18                                    \n"  // add *EAX to EBX
+      // ModR/M in binary: 00 (indirect mode) 011 (src EBX) 000 (dest EAX)
+      "== data 0x2000\n"
+      "01 00 00 00\n"  // 1
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: add r/m32 to EBX\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: storing 0x00000011\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x03: {  // add r/m32 to r32
+  const uint8_t modrm = next();
+  const uint8_t arg1 = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "add r/m32 to " << rname(arg1) << end();
+  const int32_t* signed_arg2 = effective_address(modrm);
+  int32_t signed_result = Reg[arg1].i + *signed_arg2;
+  SF = (signed_result < 0);
+  ZF = (signed_result == 0);
+  int64_t signed_full_result = static_cast<int64_t>(Reg[arg1].i) + *signed_arg2;
+  OF = (signed_result != signed_full_result);
+  // set CF
+  uint32_t unsigned_arg2 = static_cast<uint32_t>(*signed_arg2);
+  uint32_t unsigned_result = Reg[arg1].u + unsigned_arg2;
+  uint64_t unsigned_full_result = static_cast<uint64_t>(Reg[arg1].u) + unsigned_arg2;
+  CF = (unsigned_result != unsigned_full_result);
+  trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+  Reg[arg1].i = signed_result;
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << Reg[arg1].i << end();
+  break;
+}
+
+:(code)
+void test_add_mem_at_rm32_to_r32_signed_overflow() {
+  Reg[EAX].i = 0x2000;
+  Reg[EBX].i = INT32_MAX;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  03     18                                    \n" // add *EAX to EBX
+      // ModR/M in binary: 00 (indirect mode) 011 (src EBX) 000 (dest EAX)
+      "== data 0x2000\n"
+      "01 00 00 00\n"  // 1
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: add r/m32 to EBX\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: effective address contains 0x00000001\n"
+      "run: SF=1; ZF=0; CF=0; OF=1\n"
+      "run: storing 0x80000000\n"
+  );
+}
+
+void test_add_mem_at_rm32_to_r32_unsigned_overflow() {
+  Reg[EAX].u = 0x2000;
+  Reg[EBX].u = UINT32_MAX;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  03     18                                    \n" // add *EAX to EBX
+      // ModR/M in binary: 00 (indirect mode) 011 (src EBX) 000 (dest EAX)
+      "== data 0x2000\n"
+      "01 00 00 00\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: add r/m32 to EBX\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: effective address contains 0x00000001\n"
+      "run: SF=0; ZF=1; CF=1; OF=0\n"
+      "run: storing 0x00000000\n"
+  );
+}
+
+void test_add_mem_at_rm32_to_r32_unsigned_and_signed_overflow() {
+  Reg[EAX].u = 0x2000;
+  Reg[EBX].i = INT32_MIN;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  03     18                                    \n" // add *EAX to EBX
+      // ModR/M in binary: 00 (indirect mode) 011 (src EBX) 000 (dest EAX)
+      "== data 0x2000\n"
+      "00 00 00 80\n"  // INT32_MIN
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: add r/m32 to EBX\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: effective address contains 0x80000000\n"
+      "run: SF=0; ZF=1; CF=1; OF=1\n"
+      "run: storing 0x00000000\n"
+  );
+}
+
+//:: subtract
+
+:(code)
+void test_subtract_r32_from_mem_at_rm32() {
+  Reg[EAX].i = 0x2000;
+  Reg[EBX].i = 1;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  29     18                                    \n"  // subtract EBX from *EAX
+      // ModR/M in binary: 00 (indirect mode) 011 (src EBX) 000 (dest EAX)
+      "== data 0x2000\n"
+      "0a 00 00 00\n"  // 0xa
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: subtract EBX from r/m32\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: storing 0x00000009\n"
+  );
+}
+
+//:
+
+:(before "End Initialize Op Names")
+put_new(Name, "2b", "subtract rm32 from r32 (sub)");
+
+:(code)
+void test_subtract_mem_at_rm32_from_r32() {
+  Reg[EAX].i = 0x2000;
+  Reg[EBX].i = 10;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  2b     18                                    \n"  // subtract *EAX from EBX
+      // ModR/M in binary: 00 (indirect mode) 011 (src EBX) 000 (dest EAX)
+      "== data 0x2000\n"
+      "01 00 00 00\n"  // 1
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: subtract r/m32 from EBX\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: storing 0x00000009\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x2b: {  // subtract r/m32 from r32
+  const uint8_t modrm = next();
+  const uint8_t arg1 = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "subtract r/m32 from " << rname(arg1) << end();
+  const int32_t* signed_arg2 = effective_address(modrm);
+  const int32_t signed_result = Reg[arg1].i - *signed_arg2;
+  SF = (signed_result < 0);
+  ZF = (signed_result == 0);
+  int64_t signed_full_result = static_cast<int64_t>(Reg[arg1].i) - *signed_arg2;
+  OF = (signed_result != signed_full_result);
+  // set CF
+  uint32_t unsigned_arg2 = static_cast<uint32_t>(*signed_arg2);
+  uint32_t unsigned_result = Reg[arg1].u - unsigned_arg2;
+  uint64_t unsigned_full_result = static_cast<uint64_t>(Reg[arg1].u) - unsigned_arg2;
+  CF = (unsigned_result != unsigned_full_result);
+  trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+  Reg[arg1].i = signed_result;
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << Reg[arg1].i << end();
+  break;
+}
+
+:(code)
+void test_subtract_mem_at_rm32_from_r32_signed_overflow() {
+  Reg[EAX].i = 0x2000;
+  Reg[EBX].i = INT32_MIN;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  2b     18                                    \n"  // subtract *EAX from EBX
+      // ModR/M in binary: 00 (indirect mode) 011 (src EBX) 000 (dest EAX)
+      "== data 0x2000\n"
+      "ff ff ff 7f\n"  // INT32_MAX
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: subtract r/m32 from EBX\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: effective address contains 0x7fffffff\n"
+      "run: SF=0; ZF=0; CF=0; OF=1\n"
+      "run: storing 0x00000001\n"
+  );
+}
+
+void test_subtract_mem_at_rm32_from_r32_unsigned_overflow() {
+  Reg[EAX].i = 0x2000;
+  Reg[EBX].i = 0;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  2b     18                                    \n"  // subtract *EAX from EBX
+      // ModR/M in binary: 00 (indirect mode) 011 (src EBX) 000 (dest EAX)
+      "== data 0x2000\n"
+      "01 00 00 00\n"  // 1
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: subtract r/m32 from EBX\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: effective address contains 0x00000001\n"
+      "run: SF=1; ZF=0; CF=1; OF=0\n"
+      "run: storing 0xffffffff\n"
+  );
+}
+
+void test_subtract_mem_at_rm32_from_r32_signed_and_unsigned_overflow() {
+  Reg[EAX].i = 0x2000;
+  Reg[EBX].i = 0;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  2b     18                                    \n"  // subtract *EAX from EBX
+      // ModR/M in binary: 00 (indirect mode) 011 (src EBX) 000 (dest EAX)
+      "== data 0x2000\n"
+      "00 00 00 80\n"  // INT32_MIN
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: subtract r/m32 from EBX\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: effective address contains 0x80000000\n"
+      "run: SF=1; ZF=0; CF=1; OF=1\n"
+      "run: storing 0x80000000\n"
+  );
+}
+
+//:: and
+:(code)
+void test_and_r32_with_mem_at_rm32() {
+  Reg[EAX].i = 0x2000;
+  Reg[EBX].i = 0xff;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  21     18                                    \n"  // and EBX with *EAX
+      // ModR/M in binary: 00 (indirect mode) 011 (src EBX) 000 (dest EAX)
+      "== data 0x2000\n"
+      "0d 0c 0b 0a\n"  // 0x0a0b0c0d
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: and EBX with r/m32\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: storing 0x0000000d\n"
+  );
+}
+
+//:
+
+:(before "End Initialize Op Names")
+put_new(Name, "23", "r32 = bitwise AND of r32 with rm32 (and)");
+
+:(code)
+void test_and_mem_at_rm32_with_r32() {
+  Reg[EAX].i = 0x2000;
+  Reg[EBX].i = 0x0a0b0c0d;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  23     18                                    \n"  // and *EAX with EBX
+      // ModR/M in binary: 00 (indirect mode) 011 (src EBX) 000 (dest EAX)
+      "== data 0x2000\n"
+      "ff 00 00 00\n"  // 0xff
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: and r/m32 with EBX\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: storing 0x0000000d\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x23: {  // and r/m32 with r32
+  const uint8_t modrm = next();
+  const uint8_t arg1 = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "and r/m32 with " << rname(arg1) << end();
+  // bitwise ops technically operate on unsigned numbers, but it makes no
+  // difference
+  const int32_t* signed_arg2 = effective_address(modrm);
+  Reg[arg1].i &= *signed_arg2;
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << Reg[arg1].i << end();
+  SF = (Reg[arg1].i >> 31);
+  ZF = (Reg[arg1].i == 0);
+  CF = false;
+  OF = false;
+  trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+  break;
+}
+
+//:: or
+
+:(code)
+void test_or_r32_with_mem_at_rm32() {
+  Reg[EAX].i = 0x2000;
+  Reg[EBX].i = 0xa0b0c0d0;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  09     18                                   #\n"  // EBX with *EAX
+      // ModR/M in binary: 00 (indirect mode) 011 (src EBX) 000 (dest EAX)
+      "== data 0x2000\n"
+      "0d 0c 0b 0a\n"  // 0x0a0b0c0d
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: or EBX with r/m32\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: storing 0xaabbccdd\n"
+  );
+}
+
+//:
+
+:(before "End Initialize Op Names")
+put_new(Name, "0b", "r32 = bitwise OR of r32 with rm32 (or)");
+
+:(code)
+void test_or_mem_at_rm32_with_r32() {
+  Reg[EAX].i = 0x2000;
+  Reg[EBX].i = 0xa0b0c0d0;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  0b     18                                    \n"  // or *EAX with EBX
+      // ModR/M in binary: 00 (indirect mode) 011 (src EBX) 000 (dest EAX)
+      "== data 0x2000\n"
+      "0d 0c 0b 0a\n"  // 0x0a0b0c0d
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: or r/m32 with EBX\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: storing 0xaabbccdd\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x0b: {  // or r/m32 with r32
+  const uint8_t modrm = next();
+  const uint8_t arg1 = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "or r/m32 with " << rname(arg1) << end();
+  // bitwise ops technically operate on unsigned numbers, but it makes no
+  // difference
+  const int32_t* signed_arg2 = effective_address(modrm);
+  Reg[arg1].i |= *signed_arg2;
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << Reg[arg1].i << end();
+  SF = (Reg[arg1].i >> 31);
+  ZF = (Reg[arg1].i == 0);
+  CF = false;
+  OF = false;
+  trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+  break;
+}
+
+//:: xor
+
+:(code)
+void test_xor_r32_with_mem_at_rm32() {
+  Reg[EAX].i = 0x2000;
+  Reg[EBX].i = 0xa0b0c0d0;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  31     18                                    \n"  // xor EBX with *EAX
+      "== data 0x2000\n"
+      "0d 0c bb aa\n"  // 0xaabb0c0d
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: xor EBX with r/m32\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: storing 0x0a0bccdd\n"
+  );
+}
+
+//:
+
+:(before "End Initialize Op Names")
+put_new(Name, "33", "r32 = bitwise XOR of r32 with rm32 (xor)");
+
+:(code)
+void test_xor_mem_at_rm32_with_r32() {
+  Reg[EAX].i = 0x2000;
+  Reg[EBX].i = 0xa0b0c0d0;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  33     18                                    \n"  // xor *EAX with EBX
+      // ModR/M in binary: 00 (indirect mode) 011 (src EBX) 000 (dest EAX)
+      "== data 0x2000\n"
+      "0d 0c 0b 0a\n"  // 0x0a0b0c0d
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: xor r/m32 with EBX\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: storing 0xaabbccdd\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x33: {  // xor r/m32 with r32
+  const uint8_t modrm = next();
+  const uint8_t arg1 = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "xor r/m32 with " << rname(arg1) << end();
+  // bitwise ops technically operate on unsigned numbers, but it makes no
+  // difference
+  const int32_t* signed_arg2 = effective_address(modrm);
+  Reg[arg1].i |= *signed_arg2;
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << Reg[arg1].i << end();
+  SF = (Reg[arg1].i >> 31);
+  ZF = (Reg[arg1].i == 0);
+  CF = false;
+  OF = false;
+  trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+  break;
+}
+
+//:: not
+
+:(code)
+void test_not_of_mem_at_rm32() {
+  Reg[EBX].i = 0x2000;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  f7     13                                    \n"  // not *EBX
+      // ModR/M in binary: 00 (indirect mode) 010 (subop not) 011 (dest EBX)
+      "== data 0x2000\n"
+      "ff 00 0f 0f\n"  // 0x0f0f00ff
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: operate on r/m32\n"
+      "run: effective address is 0x00002000 (EBX)\n"
+      "run: subop: not\n"
+      "run: storing 0xf0f0ff00\n"
+  );
+}
+
+//:: compare (cmp)
+
+:(code)
+void test_compare_mem_at_rm32_with_r32_greater() {
+  Reg[EAX].i = 0x2000;
+  Reg[EBX].i = 0x0a0b0c07;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  39     18                                    \n"  // compare *EAX with EBX
+      // ModR/M in binary: 00 (indirect mode) 011 (src EBX) 000 (dest EAX)
+      "== data 0x2000\n"
+      "0d 0c 0b 0a\n"  // 0x0a0b0c0d
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: compare r/m32 with EBX\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: SF=0; ZF=0; CF=0; OF=0\n"
+  );
+}
+
+:(code)
+void test_compare_mem_at_rm32_with_r32_lesser() {
+  Reg[EAX].i = 0x2000;
+  Reg[EBX].i = 0x0a0b0c0d;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  39     18                                    \n"  // compare *EAX with EBX
+      // ModR/M in binary: 00 (indirect mode) 011 (src EBX) 000 (dest EAX)
+      "== data 0x2000\n"
+      "07 0c 0b 0a\n"  // 0x0a0b0c0d
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: compare r/m32 with EBX\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: SF=1; ZF=0; CF=1; OF=0\n"
+  );
+}
+
+:(code)
+void test_compare_mem_at_rm32_with_r32_equal() {
+  Reg[EAX].i = 0x2000;
+  Reg[EBX].i = 0x0a0b0c0d;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  39     18                                    \n"  // compare *EAX and EBX
+      // ModR/M in binary: 00 (indirect mode) 011 (src EBX) 000 (dest EAX)
+      "== data 0x2000\n"
+      "0d 0c 0b 0a\n"  // 0x0a0b0c0d
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: compare r/m32 with EBX\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: SF=0; ZF=1; CF=0; OF=0\n"
+  );
+}
+
+//:
+
+:(before "End Initialize Op Names")
+put_new(Name, "3b", "compare: set SF if r32 < rm32 (cmp)");
+
+:(code)
+void test_compare_r32_with_mem_at_rm32_greater() {
+  Reg[EAX].i = 0x2000;
+  Reg[EBX].i = 0x0a0b0c0d;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  3b     18                                    \n"  // compare EBX with *EAX
+      // ModR/M in binary: 00 (indirect mode) 011 (lhs EBX) 000 (rhs EAX)
+      "== data 0x2000\n"
+      "07 0c 0b 0a\n"  // 0x0a0b0c07
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: compare EBX with r/m32\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: SF=0; ZF=0; CF=0; OF=0\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x3b: {  // set SF if r32 < r/m32
+  const uint8_t modrm = next();
+  const uint8_t reg1 = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "compare " << rname(reg1) << " with r/m32" << end();
+  const int32_t* signed_arg2 = effective_address(modrm);
+  const int32_t signed_difference = Reg[reg1].i - *signed_arg2;
+  SF = (signed_difference < 0);
+  ZF = (signed_difference == 0);
+  int64_t full_signed_difference = static_cast<int64_t>(Reg[reg1].i) - *signed_arg2;
+  OF = (signed_difference != full_signed_difference);
+  const uint32_t unsigned_arg2 = static_cast<uint32_t>(*signed_arg2);
+  const uint32_t unsigned_difference = Reg[reg1].u - unsigned_arg2;
+  const uint64_t full_unsigned_difference = static_cast<uint64_t>(Reg[reg1].u) - unsigned_arg2;
+  CF = (unsigned_difference != full_unsigned_difference);
+  trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+  break;
+}
+
+:(code)
+void test_compare_r32_with_mem_at_rm32_lesser_unsigned_and_signed() {
+  Reg[EAX].i = 0x2000;
+  Reg[EBX].i = 0x0a0b0c07;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  3b     18                                    \n"  // compare EBX with *EAX
+      // ModR/M in binary: 00 (indirect mode) 011 (lhs EBX) 000 (rhs EAX)
+      "== data 0x2000\n"
+      "0d 0c 0b 0a\n"  // 0x0a0b0c0d
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: compare EBX with r/m32\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: effective address contains 0x0a0b0c0d\n"
+      "run: SF=1; ZF=0; CF=1; OF=0\n"
+  );
+}
+
+void test_compare_r32_with_mem_at_rm32_lesser_unsigned_and_signed_due_to_overflow() {
+  Reg[EAX].i = 0x2000;
+  Reg[EBX].i = INT32_MAX;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  3b     18                                    \n"  // compare EBX with *EAX
+      // ModR/M in binary: 00 (indirect mode) 011 (lhs EBX) 000 (rhs EAX)
+      "== data 0x2000\n"
+      "00 00 00 80\n"  // INT32_MIN
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: compare EBX with r/m32\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: effective address contains 0x80000000\n"
+      "run: SF=1; ZF=0; CF=1; OF=1\n"
+  );
+}
+
+void test_compare_r32_with_mem_at_rm32_lesser_signed() {
+  Reg[EAX].i = 0x2000;
+  Reg[EBX].i = -1;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  3b     18                                    \n"  // compare EBX with *EAX
+      // ModR/M in binary: 00 (indirect mode) 011 (lhs EBX) 000 (rhs EAX)
+      "== data 0x2000\n"
+      "01 00 00 00\n"  // 1
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: compare EBX with r/m32\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: effective address contains 0x00000001\n"
+      "run: SF=1; ZF=0; CF=0; OF=0\n"
+  );
+}
+
+void test_compare_r32_with_mem_at_rm32_lesser_unsigned() {
+  Reg[EAX].i = 0x2000;
+  Reg[EBX].i = 1;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  3b     18                                    \n"  // compare EBX with *EAX
+      // ModR/M in binary: 00 (indirect mode) 011 (lhs EBX) 000 (rhs EAX)
+      "== data 0x2000\n"
+      "ff ff ff ff\n"  // -1
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: compare EBX with r/m32\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: effective address contains 0xffffffff\n"
+      "run: SF=0; ZF=0; CF=1; OF=0\n"
+  );
+}
+
+void test_compare_r32_with_mem_at_rm32_equal() {
+  Reg[EAX].i = 0x2000;
+  Reg[EBX].i = 0x0a0b0c0d;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  3b     18                                    \n"  // compare EBX with *EAX
+      // ModR/M in binary: 00 (indirect mode) 011 (lhs EBX) 000 (rhs EAX)
+      "== data 0x2000\n"
+      "0d 0c 0b 0a\n"  // 0x0a0b0c0d
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: compare EBX with r/m32\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: SF=0; ZF=1; CF=0; OF=0\n"
+  );
+}
+
+//:: copy (mov)
+
+void test_copy_r32_to_mem_at_rm32() {
+  Reg[EBX].i = 0xaf;
+  Reg[EAX].i = 0x60;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  89     18                                    \n"  // copy EBX to *EAX
+      // ModR/M in binary: 00 (indirect mode) 011 (src EBX) 000 (dest EAX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: copy EBX to r/m32\n"
+      "run: effective address is 0x00000060 (EAX)\n"
+      "run: storing 0x000000af\n"
+  );
+}
+
+//:
+
+:(before "End Initialize Op Names")
+put_new(Name, "8b", "copy rm32 to r32 (mov)");
+
+:(code)
+void test_copy_mem_at_rm32_to_r32() {
+  Reg[EAX].i = 0x2000;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  8b     18                                    \n"  // copy *EAX to EBX
+      "== data 0x2000\n"
+      "af 00 00 00\n"  // 0xaf
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: copy r/m32 to EBX\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: storing 0x000000af\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x8b: {  // copy r32 to r/m32
+  const uint8_t modrm = next();
+  const uint8_t rdest = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "copy r/m32 to " << rname(rdest) << end();
+  const int32_t* src = effective_address(modrm);
+  Reg[rdest].i = *src;
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << *src << end();
+  break;
+}
+
+//:: jump
+
+:(code)
+void test_jump_mem_at_rm32() {
+  Reg[EAX].i = 0x2000;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  ff     20                                    \n"  // jump to *EAX
+      // ModR/M in binary: 00 (indirect mode) 100 (jump to r/m32) 000 (src EAX)
+      "  b8                                 00 00 00 01\n"
+      "  b8                                 00 00 00 02\n"
+      "== data 0x2000\n"
+      "08 00 00 00\n"  // 0x8
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: 0x00000001 opcode: ff\n"
+      "run: jump to r/m32\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: jumping to 0x00000008\n"
+      "run: 0x00000008 opcode: b8\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("run: 0x00000003 opcode: b8");
+}
+
+:(before "End Op ff Subops")
+case 4: {  // jump to r/m32
+  trace(Callstack_depth+1, "run") << "jump to r/m32" << end();
+  const int32_t* arg2 = effective_address(modrm);
+  EIP = *arg2;
+  trace(Callstack_depth+1, "run") << "jumping to 0x" << HEXWORD << EIP << end();
+  break;
+}
+
+//:: push
+
+:(code)
+void test_push_mem_at_rm32() {
+  Reg[EAX].i = 0x2000;
+  Mem.push_back(vma(0xbd000000));  // manually allocate memory
+  Reg[ESP].u = 0xbd000014;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  ff     30                                    \n"  // push *EAX to stack
+      "== data 0x2000\n"
+      "af 00 00 00\n"  // 0xaf
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: push r/m32\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: decrementing ESP to 0xbd000010\n"
+      "run: pushing value 0x000000af\n"
+  );
+}
+
+:(before "End Op ff Subops")
+case 6: {  // push r/m32 to stack
+  trace(Callstack_depth+1, "run") << "push r/m32" << end();
+  const int32_t* val = effective_address(modrm);
+  push(*val);
+  break;
+}
+
+//:: pop
+
+:(before "End Initialize Op Names")
+put_new(Name, "8f", "pop top of stack to rm32 (pop)");
+
+:(code)
+void test_pop_mem_at_rm32() {
+  Reg[EAX].i = 0x60;
+  Mem.push_back(vma(0xbd000000));  // manually allocate memory
+  Reg[ESP].u = 0xbd000000;
+  write_mem_i32(0xbd000000, 0x00000030);
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  8f     00                                    \n"  // pop stack into *EAX
+      // ModR/M in binary: 00 (indirect mode) 000 (pop r/m32) 000 (dest EAX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: pop into r/m32\n"
+      "run: effective address is 0x00000060 (EAX)\n"
+      "run: popping value 0x00000030\n"
+      "run: incrementing ESP to 0xbd000004\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x8f: {  // pop stack into r/m32
+  const uint8_t modrm = next();
+  const uint8_t subop = (modrm>>3)&0x7;
+  switch (subop) {
+    case 0: {
+      trace(Callstack_depth+1, "run") << "pop into r/m32" << end();
+      int32_t* dest = effective_address(modrm);
+      *dest = pop();  // Write multiple elements of vector<uint8_t> at once. Assumes sizeof(int) == 4 on the host as well.
+      break;
+    }
+  }
+  break;
+}
+
+//:: special-case for loading address from disp32 rather than register
+
+:(code)
+void test_add_r32_to_mem_at_displacement() {
+  Reg[EBX].i = 0x10;  // source
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  01     1d            00 20 00 00             \n"  // add EBX to *0x2000
+      // ModR/M in binary: 00 (indirect mode) 011 (src EBX) 101 (dest in disp32)
+      "== data 0x2000\n"
+      "01 00 00 00\n"  // 1
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: add EBX to r/m32\n"
+      "run: effective address is 0x00002000 (disp32)\n"
+      "run: storing 0x00000011\n"
+  );
+}
+
+:(before "End Mod 0 Special-cases(addr)")
+case 5:  // exception: mod 0b00 rm 0b101 => incoming disp32
+  addr = next32();
+  trace(Callstack_depth+1, "run") << "effective address is 0x" << HEXWORD << addr << " (disp32)" << end();
+  break;
+
+//:
+
+:(code)
+void test_add_r32_to_mem_at_rm32_plus_disp8() {
+  Reg[EBX].i = 0x10;  // source
+  Reg[EAX].i = 0x1ffe;  // dest
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  01     58            02                      \n"  // add EBX to *(EAX+2)
+      // ModR/M in binary: 01 (indirect+disp8 mode) 011 (src EBX) 000 (dest EAX)
+      "== data 0x2000\n"
+      "01 00 00 00\n"  // 1
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: add EBX to r/m32\n"
+      "run: effective address is initially 0x00001ffe (EAX)\n"
+      "run: effective address is 0x00002000 (after adding disp8)\n"
+      "run: storing 0x00000011\n"
+  );
+}
+
+:(before "End Mod Special-cases(addr)")
+case 1: {  // indirect + disp8 addressing
+  switch (rm) {
+  default:
+    addr = Reg[rm].u;
+    trace(Callstack_depth+1, "run") << "effective address is initially 0x" << HEXWORD << addr << " (" << rname(rm) << ")" << end();
+    break;
+  // End Mod 1 Special-cases(addr)
+  }
+  int8_t displacement = static_cast<int8_t>(next());
+  if (addr > 0) {
+    addr += displacement;
+    trace(Callstack_depth+1, "run") << "effective address is 0x" << HEXWORD << addr << " (after adding disp8)" << end();
+  }
+  else {
+    trace(Callstack_depth+1, "run") << "null address; skipping displacement" << end();
+  }
+  break;
+}
+
+:(code)
+void test_add_r32_to_mem_at_rm32_plus_negative_disp8() {
+  Reg[EBX].i = 0x10;  // source
+  Reg[EAX].i = 0x2001;  // dest
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  01     58            ff                      \n"  // add EBX to *(EAX-1)
+      // ModR/M in binary: 01 (indirect+disp8 mode) 011 (src EBX) 000 (dest EAX)
+      "== data 0x2000\n"
+      "01 00 00 00\n"  // 1
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: add EBX to r/m32\n"
+      "run: effective address is initially 0x00002001 (EAX)\n"
+      "run: effective address is 0x00002000 (after adding disp8)\n"
+      "run: storing 0x00000011\n"
+  );
+}
+
+//:
+
+:(code)
+void test_add_r32_to_mem_at_rm32_plus_disp32() {
+  Reg[EBX].i = 0x10;  // source
+  Reg[EAX].i = 0x1ffe;  // dest
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  01     98            02 00 00 00             \n"  // add EBX to *(EAX+2)
+      // ModR/M in binary: 10 (indirect+disp32 mode) 011 (src EBX) 000 (dest EAX)
+      "== data 0x2000\n"
+      "01 00 00 00\n"  // 1
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: add EBX to r/m32\n"
+      "run: effective address is initially 0x00001ffe (EAX)\n"
+      "run: effective address is 0x00002000 (after adding disp32)\n"
+      "run: storing 0x00000011\n"
+  );
+}
+
+:(before "End Mod Special-cases(addr)")
+case 2: {  // indirect + disp32 addressing
+  switch (rm) {
+  default:
+    addr = Reg[rm].u;
+    trace(Callstack_depth+1, "run") << "effective address is initially 0x" << HEXWORD << addr << " (" << rname(rm) << ")" << end();
+    break;
+  // End Mod 2 Special-cases(addr)
+  }
+  int32_t displacement = static_cast<int32_t>(next32());
+  if (addr > 0) {
+    addr += displacement;
+    trace(Callstack_depth+1, "run") << "effective address is 0x" << HEXWORD << addr << " (after adding disp32)" << end();
+  }
+  else {
+    trace(Callstack_depth+1, "run") << "null address; skipping displacement" << end();
+  }
+  break;
+}
+
+:(code)
+void test_add_r32_to_mem_at_rm32_plus_negative_disp32() {
+  Reg[EBX].i = 0x10;  // source
+  Reg[EAX].i = 0x2001;  // dest
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  01     98            ff ff ff ff             \n"  // add EBX to *(EAX-1)
+      // ModR/M in binary: 10 (indirect+disp32 mode) 011 (src EBX) 000 (dest EAX)
+      "== data 0x2000\n"
+      "01 00 00 00\n"  // 1
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: add EBX to r/m32\n"
+      "run: effective address is initially 0x00002001 (EAX)\n"
+      "run: effective address is 0x00002000 (after adding disp32)\n"
+      "run: storing 0x00000011\n"
+  );
+}
+
+//:: copy address (lea)
+
+:(before "End Initialize Op Names")
+put_new(Name, "8d", "copy address in rm32 into r32 (lea)");
+
+:(code)
+void test_copy_address() {
+  Reg[EAX].u = 0x2000;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  8d     18                                    \n"  // copy address in EAX into EBX
+      // ModR/M in binary: 00 (indirect mode) 011 (dest EBX) 000 (src EAX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: copy address into EBX\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x8d: {  // copy address of m32 to r32
+  const uint8_t modrm = next();
+  const uint8_t arg1 = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "copy address into " << rname(arg1) << end();
+  Reg[arg1].u = effective_address_number(modrm);
+  break;
+}
diff --git a/linux/bootstrap/015immediate_addressing.cc b/linux/bootstrap/015immediate_addressing.cc
new file mode 100644
index 00000000..17025e70
--- /dev/null
+++ b/linux/bootstrap/015immediate_addressing.cc
@@ -0,0 +1,1311 @@
+//: instructions that (immediately) contain an argument to act with
+
+:(before "End Initialize Op Names")
+put_new(Name, "05", "add imm32 to EAX (add)");
+
+:(before "End Single-Byte Opcodes")
+case 0x05: {  // add imm32 to EAX
+  int32_t signed_arg2 = next32();
+  trace(Callstack_depth+1, "run") << "add imm32 0x" << HEXWORD << signed_arg2 << " to EAX" << end();
+  int32_t signed_result = Reg[EAX].i + signed_arg2;
+  SF = (signed_result < 0);
+  ZF = (signed_result == 0);
+  int64_t signed_full_result = static_cast<int64_t>(Reg[EAX].i) + signed_arg2;
+  OF = (signed_result != signed_full_result);
+  // set CF
+  uint32_t unsigned_arg2 = static_cast<uint32_t>(signed_arg2);
+  uint32_t unsigned_result = Reg[EAX].u + unsigned_arg2;
+  uint64_t unsigned_full_result = static_cast<uint64_t>(Reg[EAX].u) + unsigned_arg2;
+  CF = (unsigned_result != unsigned_full_result);
+  trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+  Reg[EAX].i = signed_result;
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << Reg[EAX].i << end();
+  break;
+}
+
+:(code)
+void test_add_imm32_to_EAX_signed_overflow() {
+  Reg[EAX].i = INT32_MAX;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  05                                 01 00 00 00 \n" // add 1 to EAX
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: add imm32 0x00000001 to EAX\n"
+      "run: SF=1; ZF=0; CF=0; OF=1\n"
+      "run: storing 0x80000000\n"
+  );
+}
+
+void test_add_imm32_to_EAX_unsigned_overflow() {
+  Reg[EAX].u = UINT32_MAX;
+  Reg[EBX].u = 1;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  05                                 01 00 00 00 \n" // add 1 to EAX
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: add imm32 0x00000001 to EAX\n"
+      "run: SF=0; ZF=1; CF=1; OF=0\n"
+      "run: storing 0x00000000\n"
+  );
+}
+
+void test_add_imm32_to_EAX_unsigned_and_signed_overflow() {
+  Reg[EAX].i = INT32_MIN;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  05                                 00 00 00 80 \n" // add 0x80000000 to EAX
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: add imm32 0x80000000 to EAX\n"
+      "run: SF=0; ZF=1; CF=1; OF=1\n"
+      "run: storing 0x00000000\n"
+  );
+}
+
+//:
+
+:(before "End Initialize Op Names")
+put_new(Name, "81", "combine rm32 with imm32 based on subop (add/sub/and/or/xor/cmp)");
+
+:(code)
+void test_add_imm32_to_r32() {
+  Reg[EBX].i = 1;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  81     c3                          0a 0b 0c 0d\n"  // add 0x0d0c0b0a to EBX
+      // ModR/M in binary: 11 (direct mode) 000 (subop add) 011 (dest EBX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: combine r/m32 with imm32\n"
+      "run: r/m32 is EBX\n"
+      "run: imm32 is 0x0d0c0b0a\n"
+      "run: subop add\n"
+      "run: storing 0x0d0c0b0b\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x81: {  // combine r/m32 with imm32
+  trace(Callstack_depth+1, "run") << "combine r/m32 with imm32" << end();
+  const uint8_t modrm = next();
+  int32_t* signed_arg1 = effective_address(modrm);
+  const int32_t signed_arg2 = next32();
+  trace(Callstack_depth+1, "run") << "imm32 is 0x" << HEXWORD << signed_arg2 << end();
+  const uint8_t subop = (modrm>>3)&0x7;  // middle 3 'reg opcode' bits
+  switch (subop) {
+  case 0: {
+    trace(Callstack_depth+1, "run") << "subop add" << end();
+    int32_t signed_result = *signed_arg1 + signed_arg2;
+    SF = (signed_result < 0);
+    ZF = (signed_result == 0);
+    int64_t signed_full_result = static_cast<int64_t>(*signed_arg1) + signed_arg2;
+    OF = (signed_result != signed_full_result);
+    // set CF
+    uint32_t unsigned_arg1 = static_cast<uint32_t>(*signed_arg1);
+    uint32_t unsigned_arg2 = static_cast<uint32_t>(signed_arg2);
+    uint32_t unsigned_result = unsigned_arg1 + unsigned_arg2;
+    uint64_t unsigned_full_result = static_cast<uint64_t>(unsigned_arg1) + unsigned_arg2;
+    CF = (unsigned_result != unsigned_full_result);
+    trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+    *signed_arg1 = signed_result;
+    trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << *signed_arg1 << end();
+    break;
+  }
+  // End Op 81 Subops
+  default:
+    cerr << "unrecognized subop for opcode 81: " << NUM(subop) << '\n';
+    exit(1);
+  }
+  break;
+}
+
+:(code)
+void test_add_imm32_to_r32_signed_overflow() {
+  Reg[EBX].i = INT32_MAX;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  81     c3                          01 00 00 00\n"  // add 1 to EBX
+      // ModR/M in binary: 11 (direct mode) 000 (subop add) 011 (dest EBX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: combine r/m32 with imm32\n"
+      "run: r/m32 is EBX\n"
+      "run: imm32 is 0x00000001\n"
+      "run: subop add\n"
+      "run: SF=1; ZF=0; CF=0; OF=1\n"
+      "run: storing 0x80000000\n"
+  );
+}
+
+void test_add_imm32_to_r32_unsigned_overflow() {
+  Reg[EBX].u = UINT32_MAX;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  81     c3                          01 00 00 00\n"  // add 1 to EBX
+      // ModR/M in binary: 11 (direct mode) 011 (subop add) 011 (dest EBX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: combine r/m32 with imm32\n"
+      "run: r/m32 is EBX\n"
+      "run: imm32 is 0x00000001\n"
+      "run: subop add\n"
+      "run: SF=0; ZF=1; CF=1; OF=0\n"
+      "run: storing 0x00000000\n"
+  );
+}
+
+void test_add_imm32_to_r32_unsigned_and_signed_overflow() {
+  Reg[EBX].i = INT32_MIN;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  81     c3                          00 00 00 80\n"  // add 0x80000000 to EBX
+      // ModR/M in binary: 11 (direct mode) 011 (subop add) 011 (dest EBX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: combine r/m32 with imm32\n"
+      "run: r/m32 is EBX\n"
+      "run: imm32 is 0x80000000\n"
+      "run: subop add\n"
+      "run: SF=0; ZF=1; CF=1; OF=1\n"
+      "run: storing 0x00000000\n"
+  );
+}
+
+//:
+
+:(code)
+void test_add_imm32_to_mem_at_rm32() {
+  Reg[EBX].i = 0x2000;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  81     03                          0a 0b 0c 0d \n"  // add 0x0d0c0b0a to *EBX
+      // ModR/M in binary: 00 (indirect mode) 000 (subop add) 011 (dest EBX)
+      "== data 0x2000\n"
+      "01 00 00 00\n"  // 1
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: combine r/m32 with imm32\n"
+      "run: effective address is 0x00002000 (EBX)\n"
+      "run: imm32 is 0x0d0c0b0a\n"
+      "run: subop add\n"
+      "run: storing 0x0d0c0b0b\n"
+  );
+}
+
+//:: subtract
+
+:(before "End Initialize Op Names")
+put_new(Name, "2d", "subtract imm32 from EAX (sub)");
+
+:(code)
+void test_subtract_imm32_from_EAX() {
+  Reg[EAX].i = 0x0d0c0baa;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  2d                                 0a 0b 0c 0d \n"  // subtract 0x0d0c0b0a from EAX
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: subtract imm32 0x0d0c0b0a from EAX\n"
+      "run: storing 0x000000a0\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x2d: {  // subtract imm32 from EAX
+  const int32_t signed_arg2 = next32();
+  trace(Callstack_depth+1, "run") << "subtract imm32 0x" << HEXWORD << signed_arg2 << " from EAX" << end();
+  int32_t signed_result = Reg[EAX].i - signed_arg2;
+  SF = (signed_result < 0);
+  ZF = (signed_result == 0);
+  int64_t signed_full_result = static_cast<int64_t>(Reg[EAX].i) - signed_arg2;
+  OF = (signed_result != signed_full_result);
+  // set CF
+  uint32_t unsigned_arg2 = static_cast<uint32_t>(signed_arg2);
+  uint32_t unsigned_result = Reg[EAX].u - unsigned_arg2;
+  uint64_t unsigned_full_result = static_cast<uint64_t>(Reg[EAX].u) - unsigned_arg2;
+  CF = (unsigned_result != unsigned_full_result);
+  trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+  Reg[EAX].i = signed_result;
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << Reg[EAX].i << end();
+  break;
+}
+
+:(code)
+void test_subtract_imm32_from_EAX_signed_overflow() {
+  Reg[EAX].i = INT32_MIN;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  2d                                 01 00 00 00 \n"  // subtract 1 from EAX
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: subtract imm32 0x00000001 from EAX\n"
+      "run: SF=0; ZF=0; CF=0; OF=1\n"
+      "run: storing 0x7fffffff\n"  // INT32_MAX
+  );
+}
+
+void test_subtract_imm32_from_EAX_unsigned_overflow() {
+  Reg[EAX].i = 0;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  2d                                 01 00 00 00 \n"  // subtract 1 from EAX
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: subtract imm32 0x00000001 from EAX\n"
+      "run: SF=1; ZF=0; CF=1; OF=0\n"
+      "run: storing 0xffffffff\n"
+  );
+}
+
+void test_subtract_imm32_from_EAX_signed_and_unsigned_overflow() {
+  Reg[EAX].i = 0;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  2d                                 00 00 00 80 \n"  // subtract INT32_MIN from EAX
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: subtract imm32 0x80000000 from EAX\n"
+      "run: SF=1; ZF=0; CF=1; OF=1\n"
+      "run: storing 0x80000000\n"
+  );
+}
+
+//:
+
+void test_subtract_imm32_from_mem_at_rm32() {
+  Reg[EBX].i = 0x2000;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  81     2b                          01 00 00 00 \n"  // subtract 1 from *EBX
+      // ModR/M in binary: 00 (indirect mode) 101 (subop subtract) 011 (dest EBX)
+      "== data 0x2000\n"
+      "0a 00 00 00\n"  // 0xa
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: combine r/m32 with imm32\n"
+      "run: effective address is 0x00002000 (EBX)\n"
+      "run: imm32 is 0x00000001\n"
+      "run: subop subtract\n"
+      "run: storing 0x00000009\n"
+  );
+}
+
+:(before "End Op 81 Subops")
+case 5: {
+  trace(Callstack_depth+1, "run") << "subop subtract" << end();
+  int32_t signed_result = *signed_arg1 - signed_arg2;
+  SF = (signed_result < 0);
+  ZF = (signed_result == 0);
+  int64_t signed_full_result = static_cast<int64_t>(*signed_arg1) - signed_arg2;
+  OF = (signed_result != signed_full_result);
+  // set CF
+  uint32_t unsigned_arg1 = static_cast<uint32_t>(*signed_arg1);
+  uint32_t unsigned_arg2 = static_cast<uint32_t>(signed_arg2);
+  uint32_t unsigned_result = unsigned_arg1 - unsigned_arg2;
+  uint64_t unsigned_full_result = static_cast<uint64_t>(unsigned_arg1) - unsigned_arg2;
+  CF = (unsigned_result != unsigned_full_result);
+  trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+  *signed_arg1 = signed_result;
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << *signed_arg1 << end();
+  break;
+}
+
+:(code)
+void test_subtract_imm32_from_mem_at_rm32_signed_overflow() {
+  Reg[EBX].i = 0x2000;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  81     2b                          ff ff ff 7f \n"  // subtract INT32_MAX from *EBX
+      // ModR/M in binary: 00 (indirect mode) 101 (subop subtract) 011 (dest EBX)
+      "== data 0x2000\n"
+      "00 00 00 80\n"  // INT32_MIN
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: combine r/m32 with imm32\n"
+      "run: effective address is 0x00002000 (EBX)\n"
+      "run: effective address contains 0x80000000\n"
+      "run: imm32 is 0x7fffffff\n"
+      "run: subop subtract\n"
+      "run: SF=0; ZF=0; CF=0; OF=1\n"
+      "run: storing 0x00000001\n"
+  );
+}
+
+void test_subtract_imm32_from_mem_at_rm32_unsigned_overflow() {
+  Reg[EBX].i = 0x2000;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  81     2b                          01 00 00 00 \n"  // subtract 1 from *EBX
+      // ModR/M in binary: 00 (indirect mode) 101 (subop subtract) 011 (dest EBX)
+      "== data 0x2000\n"
+      "00 00 00 00\n"  // 0
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: combine r/m32 with imm32\n"
+      "run: effective address is 0x00002000 (EBX)\n"
+      "run: effective address contains 0x00000000\n"
+      "run: imm32 is 0x00000001\n"
+      "run: subop subtract\n"
+      "run: SF=1; ZF=0; CF=1; OF=0\n"
+      "run: storing 0xffffffff\n"
+  );
+}
+
+void test_subtract_imm32_from_mem_at_rm32_signed_and_unsigned_overflow() {
+  Reg[EBX].i = 0x2000;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  81     2b                          00 00 00 80 \n"  // subtract INT32_MIN from *EBX
+      // ModR/M in binary: 00 (indirect mode) 101 (subop subtract) 011 (dest EBX)
+      "== data 0x2000\n"
+      "00 00 00 00\n"  // 0
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: combine r/m32 with imm32\n"
+      "run: effective address is 0x00002000 (EBX)\n"
+      "run: effective address contains 0x00000000\n"
+      "run: imm32 is 0x80000000\n"
+      "run: subop subtract\n"
+      "run: SF=1; ZF=0; CF=1; OF=1\n"
+      "run: storing 0x80000000\n"
+  );
+}
+
+//:
+
+void test_subtract_imm32_from_r32() {
+  Reg[EBX].i = 10;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  81     eb                          01 00 00 00 \n"  // subtract 1 from EBX
+      // ModR/M in binary: 11 (direct mode) 101 (subop subtract) 011 (dest EBX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: combine r/m32 with imm32\n"
+      "run: r/m32 is EBX\n"
+      "run: imm32 is 0x00000001\n"
+      "run: subop subtract\n"
+      "run: storing 0x00000009\n"
+  );
+}
+
+//:: shift left
+
+:(before "End Initialize Op Names")
+put_new(Name, "c1", "shift rm32 by imm8 bits depending on subop (sal/sar/shl/shr)");
+
+:(code)
+void test_shift_left_r32_with_imm8() {
+  Reg[EBX].i = 13;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  c1     e3                          01          \n"  // shift EBX left by 1 bit
+      // ModR/M in binary: 11 (direct mode) 100 (subop shift left) 011 (dest EBX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: operate on r/m32\n"
+      "run: r/m32 is EBX\n"
+      "run: subop: shift left by CL bits\n"
+      "run: storing 0x0000001a\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0xc1: {
+  const uint8_t modrm = next();
+  trace(Callstack_depth+1, "run") << "operate on r/m32" << end();
+  int32_t* arg1 = effective_address(modrm);
+  const uint8_t subop = (modrm>>3)&0x7;  // middle 3 'reg opcode' bits
+  switch (subop) {
+  case 4: {  // shift left r/m32 by CL
+    trace(Callstack_depth+1, "run") << "subop: shift left by CL bits" << end();
+    uint8_t count = next() & 0x1f;
+    // OF is only defined if count is 1
+    if (count == 1) {
+      bool msb = (*arg1 & 0x80000000) >> 1;
+      bool pnsb = (*arg1 & 0x40000000);
+      OF = (msb != pnsb);
+    }
+    *arg1 = (*arg1 << count);
+    ZF = (*arg1 == 0);
+    SF = (*arg1 < 0);
+    // CF undefined
+    trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+    trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << *arg1 << end();
+    break;
+  }
+  // End Op c1 Subops
+  default:
+    cerr << "unrecognized subop for opcode c1: " << NUM(subop) << '\n';
+    exit(1);
+  }
+  break;
+}
+
+//:: shift right arithmetic
+
+:(code)
+void test_shift_right_arithmetic_r32_with_imm8() {
+  Reg[EBX].i = 26;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  c1     fb                          01          \n"  // shift EBX right by 1 bit
+      // ModR/M in binary: 11 (direct mode) 111 (subop shift right arithmetic) 011 (dest EBX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: operate on r/m32\n"
+      "run: r/m32 is EBX\n"
+      "run: subop: shift right by CL bits, while preserving sign\n"
+      "run: storing 0x0000000d\n"
+  );
+}
+
+:(before "End Op c1 Subops")
+case 7: {  // shift right r/m32 by CL, preserving sign
+  trace(Callstack_depth+1, "run") << "subop: shift right by CL bits, while preserving sign" << end();
+  uint8_t count = next() & 0x1f;
+  int32_t result = (*arg1 >> count);
+  ZF = (*arg1 == 0);
+  SF = (*arg1 < 0);
+  // OF is only defined if count is 1
+  if (count == 1) OF = false;
+  // CF
+  CF = ((*arg1 >> (count-1)) & 0x1);
+  trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+  *arg1 = result;
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << *arg1 << end();
+  break;
+}
+
+:(code)
+void test_shift_right_arithmetic_odd_r32_with_imm8() {
+  Reg[EBX].i = 27;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  c1     fb                          01          \n"  // shift EBX right by 1 bit
+      // ModR/M in binary: 11 (direct mode) 111 (subop shift right arithmetic) 011 (dest EBX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: operate on r/m32\n"
+      "run: r/m32 is EBX\n"
+      "run: subop: shift right by CL bits, while preserving sign\n"
+      // result: 13
+      "run: storing 0x0000000d\n"
+  );
+}
+
+:(code)
+void test_shift_right_arithmetic_negative_r32_with_imm8() {
+  Reg[EBX].i = 0xfffffffd;  // -3
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  c1     fb                          01          \n"  // shift EBX right by 1 bit, while preserving sign
+      // ModR/M in binary: 11 (direct mode) 111 (subop shift right arithmetic) 011 (dest EBX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: operate on r/m32\n"
+      "run: r/m32 is EBX\n"
+      "run: subop: shift right by CL bits, while preserving sign\n"
+      // result: -2
+      "run: storing 0xfffffffe\n"
+  );
+}
+
+//:: shift right logical
+
+:(code)
+void test_shift_right_logical_r32_with_imm8() {
+  Reg[EBX].i = 26;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  c1     eb                          01          \n"  // shift EBX right by 1 bit, while padding zeroes
+      // ModR/M in binary: 11 (direct mode) 101 (subop shift right logical) 011 (dest EBX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: operate on r/m32\n"
+      "run: r/m32 is EBX\n"
+      "run: subop: shift right by CL bits, while padding zeroes\n"
+      "run: storing 0x0000000d\n"
+  );
+}
+
+:(before "End Op c1 Subops")
+case 5: {  // shift right r/m32 by CL, preserving sign
+  trace(Callstack_depth+1, "run") << "subop: shift right by CL bits, while padding zeroes" << end();
+  uint8_t count = next() & 0x1f;
+  // OF is only defined if count is 1
+  if (count == 1) {
+    bool msb = (*arg1 & 0x80000000) >> 1;
+    bool pnsb = (*arg1 & 0x40000000);
+    OF = (msb != pnsb);
+  }
+  uint32_t* uarg1 = reinterpret_cast<uint32_t*>(arg1);
+  *uarg1 = (*uarg1 >> count);
+  ZF = (*uarg1 == 0);
+  // result is always positive by definition
+  SF = false;
+  // CF undefined
+  trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << *arg1 << end();
+  break;
+}
+
+:(code)
+void test_shift_right_logical_odd_r32_with_imm8() {
+  Reg[EBX].i = 27;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  c1     eb                          01          \n"  // shift EBX right by 1 bit, while padding zeroes
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: operate on r/m32\n"
+      "run: r/m32 is EBX\n"
+      "run: subop: shift right by CL bits, while padding zeroes\n"
+      // result: 13
+      "run: storing 0x0000000d\n"
+  );
+}
+
+:(code)
+void test_shift_right_logical_negative_r32_with_imm8() {
+  Reg[EBX].i = 0xfffffffd;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  c1     eb                          01          \n"  // shift EBX right by 1 bit, while padding zeroes
+      // ModR/M in binary: 11 (direct mode) 101 (subop shift right logical) 011 (dest EBX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: operate on r/m32\n"
+      "run: r/m32 is EBX\n"
+      "run: subop: shift right by CL bits, while padding zeroes\n"
+      "run: storing 0x7ffffffe\n"
+  );
+}
+
+//:: and
+
+:(before "End Initialize Op Names")
+put_new(Name, "25", "EAX = bitwise AND of imm32 with EAX (and)");
+
+:(code)
+void test_and_EAX_with_imm32() {
+  Reg[EAX].i = 0xff;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  25                                 0a 0b 0c 0d \n"  // and 0x0d0c0b0a with EAX
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: and imm32 0x0d0c0b0a with EAX\n"
+      "run: storing 0x0000000a\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x25: {  // and imm32 with EAX
+  // bitwise ops technically operate on unsigned numbers, but it makes no
+  // difference
+  const int32_t signed_arg2 = next32();
+  trace(Callstack_depth+1, "run") << "and imm32 0x" << HEXWORD << signed_arg2 << " with EAX" << end();
+  Reg[EAX].i &= signed_arg2;
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << Reg[EAX].i << end();
+  SF = (Reg[EAX].i >> 31);
+  ZF = (Reg[EAX].i == 0);
+  CF = false;
+  OF = false;
+  trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+  break;
+}
+
+//:
+
+:(code)
+void test_and_imm32_with_mem_at_rm32() {
+  Reg[EBX].i = 0x2000;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  81     23                          0a 0b 0c 0d \n"  // and 0x0d0c0b0a with *EBX
+      // ModR/M in binary: 00 (indirect mode) 100 (subop and) 011 (dest EBX)
+      "== data 0x2000\n"
+      "ff 00 00 00\n"  // 0xff
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: combine r/m32 with imm32\n"
+      "run: effective address is 0x00002000 (EBX)\n"
+      "run: imm32 is 0x0d0c0b0a\n"
+      "run: subop and\n"
+      "run: storing 0x0000000a\n"
+  );
+}
+
+:(before "End Op 81 Subops")
+case 4: {
+  trace(Callstack_depth+1, "run") << "subop and" << end();
+  // bitwise ops technically operate on unsigned numbers, but it makes no
+  // difference
+  *signed_arg1 &= signed_arg2;
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << *signed_arg1 << end();
+  SF = (*signed_arg1 >> 31);
+  ZF = (*signed_arg1 == 0);
+  CF = false;
+  OF = false;
+  trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+  break;
+}
+
+//:
+
+:(code)
+void test_and_imm32_with_r32() {
+  Reg[EBX].i = 0xff;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  81     e3                          0a 0b 0c 0d \n"  // and 0x0d0c0b0a with EBX
+      // ModR/M in binary: 11 (direct mode) 100 (subop and) 011 (dest EBX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: combine r/m32 with imm32\n"
+      "run: r/m32 is EBX\n"
+      "run: imm32 is 0x0d0c0b0a\n"
+      "run: subop and\n"
+      "run: storing 0x0000000a\n"
+  );
+}
+
+//:: or
+
+:(before "End Initialize Op Names")
+put_new(Name, "0d", "EAX = bitwise OR of imm32 with EAX (or)");
+
+:(code)
+void test_or_EAX_with_imm32() {
+  Reg[EAX].i = 0xd0c0b0a0;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  0d                                 0a 0b 0c 0d \n"  // or 0x0d0c0b0a with EAX
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: or imm32 0x0d0c0b0a with EAX\n"
+      "run: storing 0xddccbbaa\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x0d: {  // or imm32 with EAX
+  // bitwise ops technically operate on unsigned numbers, but it makes no
+  // difference
+  const int32_t signed_arg2 = next32();
+  trace(Callstack_depth+1, "run") << "or imm32 0x" << HEXWORD << signed_arg2 << " with EAX" << end();
+  Reg[EAX].i |= signed_arg2;
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << Reg[EAX].i << end();
+  SF = (Reg[EAX].i >> 31);
+  ZF = (Reg[EAX].i == 0);
+  CF = false;
+  OF = false;
+  trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+  break;
+}
+
+//:
+
+:(code)
+void test_or_imm32_with_mem_at_rm32() {
+  Reg[EBX].i = 0x2000;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  81     0b                          0a 0b 0c 0d \n"  // or 0x0d0c0b0a with *EBX
+      // ModR/M in binary: 00 (indirect mode) 001 (subop or) 011 (dest EBX)
+      "== data 0x2000\n"
+      "a0 b0 c0 d0\n"  // 0xd0c0b0a0
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: combine r/m32 with imm32\n"
+      "run: effective address is 0x00002000 (EBX)\n"
+      "run: imm32 is 0x0d0c0b0a\n"
+      "run: subop or\n"
+      "run: storing 0xddccbbaa\n"
+  );
+}
+
+:(before "End Op 81 Subops")
+case 1: {
+  trace(Callstack_depth+1, "run") << "subop or" << end();
+  // bitwise ops technically operate on unsigned numbers, but it makes no
+  // difference
+  *signed_arg1 |= signed_arg2;
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << *signed_arg1 << end();
+  SF = (*signed_arg1 >> 31);
+  ZF = (*signed_arg1 == 0);
+  CF = false;
+  OF = false;
+  trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+  break;
+}
+
+:(code)
+void test_or_imm32_with_r32() {
+  Reg[EBX].i = 0xd0c0b0a0;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  81     cb                          0a 0b 0c 0d \n"  // or 0x0d0c0b0a with EBX
+      // ModR/M in binary: 11 (direct mode) 001 (subop or) 011 (dest EBX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: combine r/m32 with imm32\n"
+      "run: r/m32 is EBX\n"
+      "run: imm32 is 0x0d0c0b0a\n"
+      "run: subop or\n"
+      "run: storing 0xddccbbaa\n"
+  );
+}
+
+//:: xor
+
+:(before "End Initialize Op Names")
+put_new(Name, "35", "EAX = bitwise XOR of imm32 with EAX (xor)");
+
+:(code)
+void test_xor_EAX_with_imm32() {
+  Reg[EAX].i = 0xddccb0a0;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  35                                 0a 0b 0c 0d \n"  // xor 0x0d0c0b0a with EAX
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: xor imm32 0x0d0c0b0a with EAX\n"
+      "run: storing 0xd0c0bbaa\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x35: {  // xor imm32 with EAX
+  // bitwise ops technically operate on unsigned numbers, but it makes no
+  // difference
+  const int32_t signed_arg2 = next32();
+  trace(Callstack_depth+1, "run") << "xor imm32 0x" << HEXWORD << signed_arg2 << " with EAX" << end();
+  Reg[EAX].i ^= signed_arg2;
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << Reg[EAX].i << end();
+  SF = (Reg[EAX].i >> 31);
+  ZF = (Reg[EAX].i == 0);
+  CF = false;
+  OF = false;
+  trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+  break;
+}
+
+//:
+
+:(code)
+void test_xor_imm32_with_mem_at_rm32() {
+  Reg[EBX].i = 0x2000;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  81     33                          0a 0b 0c 0d \n"  // xor 0x0d0c0b0a with *EBX
+      // ModR/M in binary: 00 (indirect mode) 110 (subop xor) 011 (dest EBX)
+      "== data 0x2000\n"
+      "a0 b0 c0 d0\n"  // 0xd0c0b0a0
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: combine r/m32 with imm32\n"
+      "run: effective address is 0x00002000 (EBX)\n"
+      "run: imm32 is 0x0d0c0b0a\n"
+      "run: subop xor\n"
+      "run: storing 0xddccbbaa\n"
+  );
+}
+
+:(before "End Op 81 Subops")
+case 6: {
+  trace(Callstack_depth+1, "run") << "subop xor" << end();
+  // bitwise ops technically operate on unsigned numbers, but it makes no
+  // difference
+  *signed_arg1 ^= signed_arg2;
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << *signed_arg1 << end();
+  SF = (*signed_arg1 >> 31);
+  ZF = (*signed_arg1 == 0);
+  CF = false;
+  OF = false;
+  trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+  break;
+}
+
+:(code)
+void test_xor_imm32_with_r32() {
+  Reg[EBX].i = 0xd0c0b0a0;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  81     f3                          0a 0b 0c 0d \n"  // xor 0x0d0c0b0a with EBX
+      // ModR/M in binary: 11 (direct mode) 110 (subop xor) 011 (dest EBX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: combine r/m32 with imm32\n"
+      "run: r/m32 is EBX\n"
+      "run: imm32 is 0x0d0c0b0a\n"
+      "run: subop xor\n"
+      "run: storing 0xddccbbaa\n"
+  );
+}
+
+//:: compare (cmp)
+
+:(before "End Initialize Op Names")
+put_new(Name, "3d", "compare: set SF if EAX < imm32 (cmp)");
+
+:(code)
+void test_compare_EAX_with_imm32_greater() {
+  Reg[EAX].i = 0x0d0c0b0a;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  3d                                 07 0b 0c 0d \n"  // compare EAX with 0x0d0c0b07
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: compare EAX with imm32 0x0d0c0b07\n"
+      "run: SF=0; ZF=0; CF=0; OF=0\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x3d: {  // compare EAX with imm32
+  const int32_t signed_arg1 = Reg[EAX].i;
+  const int32_t signed_arg2 = next32();
+  trace(Callstack_depth+1, "run") << "compare EAX with imm32 0x" << HEXWORD << signed_arg2 << end();
+  const int32_t signed_difference = signed_arg1 - signed_arg2;
+  SF = (signed_difference < 0);
+  ZF = (signed_difference == 0);
+  const int64_t full_signed_difference = static_cast<int64_t>(signed_arg1) - signed_arg2;
+  OF = (signed_difference != full_signed_difference);
+  const uint32_t unsigned_arg1 = static_cast<uint32_t>(signed_arg1);
+  const uint32_t unsigned_arg2 = static_cast<uint32_t>(signed_arg2);
+  const uint32_t unsigned_difference = unsigned_arg1 - unsigned_arg2;
+  const uint64_t full_unsigned_difference = static_cast<uint64_t>(unsigned_arg1) - unsigned_arg2;
+  CF = (unsigned_difference != full_unsigned_difference);
+  trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+  break;
+}
+
+:(code)
+void test_compare_EAX_with_imm32_lesser_unsigned_and_signed() {
+  Reg[EAX].i = 0x0a0b0c07;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  3d                                 0d 0c 0b 0a \n"  // compare EAX with imm32
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: compare EAX with imm32 0x0a0b0c0d\n"
+      "run: SF=1; ZF=0; CF=1; OF=0\n"
+  );
+}
+
+void test_compare_EAX_with_imm32_lesser_unsigned_and_signed_due_to_overflow() {
+  Reg[EAX].i = INT32_MAX;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  3d                                 00 00 00 80\n"  // compare EAX with INT32_MIN
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: compare EAX with imm32 0x80000000\n"
+      "run: SF=1; ZF=0; CF=1; OF=1\n"
+  );
+}
+
+void test_compare_EAX_with_imm32_lesser_signed() {
+  Reg[EAX].i = -1;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  3d                                 01 00 00 00\n"  // compare EAX with 1
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: compare EAX with imm32 0x00000001\n"
+      "run: SF=1; ZF=0; CF=0; OF=0\n"
+  );
+}
+
+void test_compare_EAX_with_imm32_lesser_unsigned() {
+  Reg[EAX].i = 1;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  3d                                 ff ff ff ff\n"  // compare EAX with -1
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: compare EAX with imm32 0xffffffff\n"
+      "run: SF=0; ZF=0; CF=1; OF=0\n"
+  );
+}
+
+void test_compare_EAX_with_imm32_equal() {
+  Reg[EAX].i = 0x0d0c0b0a;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  3d                                 0a 0b 0c 0d \n"  // compare 0x0d0c0b0a with EAX
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: compare EAX with imm32 0x0d0c0b0a\n"
+      "run: SF=0; ZF=1; CF=0; OF=0\n"
+  );
+}
+
+//:
+
+void test_compare_imm32_with_r32_greater() {
+  Reg[EBX].i = 0x0d0c0b0a;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  81     fb                          07 0b 0c 0d \n"  // compare 0x0d0c0b07 with EBX
+      // ModR/M in binary: 11 (direct mode) 111 (subop compare) 011 (dest EBX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: combine r/m32 with imm32\n"
+      "run: r/m32 is EBX\n"
+      "run: imm32 is 0x0d0c0b07\n"
+      "run: SF=0; ZF=0; CF=0; OF=0\n"
+  );
+}
+
+:(before "End Op 81 Subops")
+case 7: {
+  trace(Callstack_depth+1, "run") << "subop compare" << end();
+  const int32_t tmp1 = *signed_arg1 - signed_arg2;
+  SF = (tmp1 < 0);
+  ZF = (tmp1 == 0);
+  const int64_t tmp2 = static_cast<int64_t>(*signed_arg1) - signed_arg2;
+  OF = (tmp1 != tmp2);
+  const uint32_t unsigned_arg1 = static_cast<uint32_t>(*signed_arg1);
+  const uint32_t unsigned_arg2 = static_cast<uint32_t>(signed_arg2);
+  const uint32_t tmp3 = unsigned_arg1 - unsigned_arg2;
+  const uint64_t tmp4 = static_cast<uint64_t>(unsigned_arg1) - unsigned_arg2;
+  CF = (tmp3 != tmp4);
+  trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+  break;
+}
+
+:(code)
+void test_compare_rm32_with_imm32_lesser_unsigned_and_signed() {
+  Reg[EAX].i = 0x0a0b0c07;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  81     f8                          0d 0c 0b 0a \n"  // compare EAX with imm32
+      // ModR/M in binary: 11 (direct mode) 111 (subop compare) 000 (dest EAX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: combine r/m32 with imm32\n"
+      "run: r/m32 is EAX\n"
+      "run: imm32 is 0x0a0b0c0d\n"
+      "run: subop compare\n"
+      "run: SF=1; ZF=0; CF=1; OF=0\n"
+  );
+}
+
+void test_compare_rm32_with_imm32_lesser_unsigned_and_signed_due_to_overflow() {
+  Reg[EAX].i = INT32_MAX;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  81     f8                          00 00 00 80\n"  // compare EAX with INT32_MIN
+      // ModR/M in binary: 11 (direct mode) 111 (subop compare) 000 (dest EAX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: combine r/m32 with imm32\n"
+      "run: r/m32 is EAX\n"
+      "run: imm32 is 0x80000000\n"
+      "run: subop compare\n"
+      "run: SF=1; ZF=0; CF=1; OF=1\n"
+  );
+}
+
+void test_compare_rm32_with_imm32_lesser_signed() {
+  Reg[EAX].i = -1;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  81     f8                          01 00 00 00\n"  // compare EAX with 1
+      // ModR/M in binary: 11 (direct mode) 111 (subop compare) 000 (dest EAX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: combine r/m32 with imm32\n"
+      "run: r/m32 is EAX\n"
+      "run: imm32 is 0x00000001\n"
+      "run: subop compare\n"
+      "run: SF=1; ZF=0; CF=0; OF=0\n"
+  );
+}
+
+void test_compare_rm32_with_imm32_lesser_unsigned() {
+  Reg[EAX].i = 1;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  81     f8                          ff ff ff ff\n"  // compare EAX with -1
+      // ModR/M in binary: 11 (direct mode) 111 (subop compare) 000 (dest EAX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: combine r/m32 with imm32\n"
+      "run: r/m32 is EAX\n"
+      "run: imm32 is 0xffffffff\n"
+      "run: subop compare\n"
+      "run: SF=0; ZF=0; CF=1; OF=0\n"
+  );
+}
+
+:(code)
+void test_compare_imm32_with_r32_equal() {
+  Reg[EBX].i = 0x0d0c0b0a;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  81     fb                          0a 0b 0c 0d \n"  // compare 0x0d0c0b0a with EBX
+      // ModR/M in binary: 11 (direct mode) 111 (subop compare) 011 (dest EBX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: combine r/m32 with imm32\n"
+      "run: r/m32 is EBX\n"
+      "run: imm32 is 0x0d0c0b0a\n"
+      "run: SF=0; ZF=1; CF=0; OF=0\n"
+  );
+}
+
+:(code)
+void test_compare_imm32_with_mem_at_rm32_greater() {
+  Reg[EBX].i = 0x2000;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  81     3b                          07 0b 0c 0d \n"  // compare 0x0d0c0b07 with *EBX
+      // ModR/M in binary: 00 (indirect mode) 111 (subop compare) 011 (dest EBX)
+      "== data 0x2000\n"
+      "0a 0b 0c 0d\n"  // 0x0d0c0b0a
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: combine r/m32 with imm32\n"
+      "run: effective address is 0x00002000 (EBX)\n"
+      "run: imm32 is 0x0d0c0b07\n"
+      "run: SF=0; ZF=0; CF=0; OF=0\n"
+  );
+}
+
+:(code)
+void test_compare_imm32_with_mem_at_rm32_lesser() {
+  Reg[EAX].i = 0x2000;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  81     38                          0a 0b 0c 0d \n"  // compare 0x0d0c0b0a with *EAX
+      // ModR/M in binary: 00 (indirect mode) 111 (subop compare) 000 (dest EAX)
+      "== data 0x2000\n"
+      "07 0b 0c 0d\n"  // 0x0d0c0b07
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: combine r/m32 with imm32\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: imm32 is 0x0d0c0b0a\n"
+      "run: SF=1; ZF=0; CF=1; OF=0\n"
+  );
+}
+
+:(code)
+void test_compare_imm32_with_mem_at_rm32_equal() {
+  Reg[EBX].i = 0x0d0c0b0a;
+  Reg[EBX].i = 0x2000;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  81     3b                          0a 0b 0c 0d \n"  // compare 0x0d0c0b0a with *EBX
+      // ModR/M in binary: 00 (indirect mode) 111 (subop compare) 011 (dest EBX)
+      "== data 0x2000\n"
+      "0a 0b 0c 0d\n"  // 0x0d0c0b0a
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: combine r/m32 with imm32\n"
+      "run: effective address is 0x00002000 (EBX)\n"
+      "run: imm32 is 0x0d0c0b0a\n"
+      "run: SF=0; ZF=1; CF=0; OF=0\n"
+  );
+}
+
+//:: copy (mov)
+
+:(before "End Initialize Op Names")
+// b8 defined earlier to copy imm32 to EAX
+put_new(Name, "b9", "copy imm32 to ECX (mov)");
+put_new(Name, "ba", "copy imm32 to EDX (mov)");
+put_new(Name, "bb", "copy imm32 to EBX (mov)");
+put_new(Name, "bc", "copy imm32 to ESP (mov)");
+put_new(Name, "bd", "copy imm32 to EBP (mov)");
+put_new(Name, "be", "copy imm32 to ESI (mov)");
+put_new(Name, "bf", "copy imm32 to EDI (mov)");
+
+:(code)
+void test_copy_imm32_to_r32() {
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  bb                                 0a 0b 0c 0d \n"  // copy 0x0d0c0b0a to EBX
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: copy imm32 0x0d0c0b0a to EBX\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0xb9:
+case 0xba:
+case 0xbb:
+case 0xbc:
+case 0xbd:
+case 0xbe:
+case 0xbf: {  // copy imm32 to r32
+  const uint8_t rdest = op & 0x7;
+  const int32_t src = next32();
+  trace(Callstack_depth+1, "run") << "copy imm32 0x" << HEXWORD << src << " to " << rname(rdest) << end();
+  Reg[rdest].i = src;
+  break;
+}
+
+//:
+
+:(before "End Initialize Op Names")
+put_new(Name, "c7", "copy imm32 to rm32 with subop 0 (mov)");
+
+:(code)
+void test_copy_imm32_to_mem_at_rm32() {
+  Reg[EBX].i = 0x60;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  c7     03                          0a 0b 0c 0d \n"  // copy 0x0d0c0b0a to *EBX
+      // ModR/M in binary: 00 (indirect mode) 000 (subop) 011 (dest EBX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: copy imm32 to r/m32\n"
+      "run: effective address is 0x00000060 (EBX)\n"
+      "run: imm32 is 0x0d0c0b0a\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0xc7: {  // copy imm32 to r32
+  const uint8_t modrm = next();
+  trace(Callstack_depth+1, "run") << "copy imm32 to r/m32" << end();
+  const uint8_t subop = (modrm>>3)&0x7;  // middle 3 'reg opcode' bits
+  if (subop != 0) {
+    cerr << "unrecognized subop for opcode c7: " << NUM(subop) << " (only 0/copy currently implemented)\n";
+    exit(1);
+  }
+  int32_t* dest = effective_address(modrm);
+  const int32_t src = next32();
+  trace(Callstack_depth+1, "run") << "imm32 is 0x" << HEXWORD << src << end();
+  *dest = src;  // Write multiple elements of vector<uint8_t> at once. Assumes sizeof(int) == 4 on the host as well.
+  break;
+}
+
+//:: push
+
+:(before "End Initialize Op Names")
+put_new(Name, "68", "push imm32 to stack (push)");
+
+:(code)
+void test_push_imm32() {
+  Mem.push_back(vma(0xbd000000));  // manually allocate memory
+  Reg[ESP].u = 0xbd000014;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  68                                 af 00 00 00 \n"  // push *EAX to stack
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: push imm32 0x000000af\n"
+      "run: ESP is now 0xbd000010\n"
+      "run: contents at ESP: 0x000000af\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x68: {
+  const uint32_t val = static_cast<uint32_t>(next32());
+  trace(Callstack_depth+1, "run") << "push imm32 0x" << HEXWORD << val << end();
+//?   cerr << "push: " << val << " => " << Reg[ESP].u << '\n';
+  push(val);
+  trace(Callstack_depth+1, "run") << "ESP is now 0x" << HEXWORD << Reg[ESP].u << end();
+  trace(Callstack_depth+1, "run") << "contents at ESP: 0x" << HEXWORD << read_mem_u32(Reg[ESP].u) << end();
+  break;
+}
+
+//:: multiply
+
+:(before "End Initialize Op Names")
+put_new(Name, "69", "multiply rm32 by imm32 and store result in r32 (imul)");
+
+:(code)
+void test_multiply_imm32() {
+  Reg[EAX].i = 2;
+  Reg[EBX].i = 3;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  69     c3                          04 00 00 00 \n"  // EAX = EBX * 4
+      // ModR/M in binary: 11 (direct) 000 (dest EAX) 011 (src EBX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: multiply r/m32 by 0x00000004 and store result in EAX\n"
+      "run: r/m32 is EBX\n"
+      "run: storing 0x0000000c\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x69: {
+  const uint8_t modrm = next();
+  const uint8_t rdest = (modrm>>3)&0x7;
+  const int32_t val = next32();
+  trace(Callstack_depth+1, "run") << "multiply r/m32 by 0x" << HEXWORD << val << " and store result in " << rname(rdest) << end();
+  const int32_t* signed_arg1 = effective_address(modrm);
+  int32_t result = *signed_arg1 * val;
+  int64_t full_result = static_cast<int64_t>(*signed_arg1) * val;
+  OF = (result != full_result);
+  CF = OF;
+  trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+  Reg[rdest].i = result;
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << Reg[rdest].i << end();
+  break;
+}
diff --git a/linux/bootstrap/016index_addressing.cc b/linux/bootstrap/016index_addressing.cc
new file mode 100644
index 00000000..f6c6f2aa
--- /dev/null
+++ b/linux/bootstrap/016index_addressing.cc
@@ -0,0 +1,155 @@
+//: operating on memory at the address provided by some register plus optional scale and offset
+
+:(code)
+void test_add_r32_to_mem_at_rm32_with_sib() {
+  Reg[EBX].i = 0x10;
+  Reg[EAX].i = 0x2000;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  01     1c      20                              \n"  // add EBX to *EAX
+      // ModR/M in binary: 00 (indirect mode) 011 (src EBX) 100 (dest in SIB)
+      // SIB in binary: 00 (scale 1) 100 (no index) 000 (base EAX)
+      "== data 0x2000\n"
+      "01 00 00 00\n"  // 1
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: add EBX to r/m32\n"
+      "run: effective address is initially 0x00002000 (EAX)\n"
+      "run: effective address is 0x00002000\n"
+      "run: storing 0x00000011\n"
+  );
+}
+
+:(before "End Mod 0 Special-cases(addr)")
+case 4:  // exception: mod 0b00 rm 0b100 => incoming SIB (scale-index-base) byte
+  addr = effective_address_from_sib(mod);
+  break;
+:(code)
+uint32_t effective_address_from_sib(uint8_t mod) {
+  const uint8_t sib = next();
+  const uint8_t base = sib&0x7;
+  uint32_t addr = 0;
+  if (base != EBP || mod != 0) {
+    addr = Reg[base].u;
+    trace(Callstack_depth+1, "run") << "effective address is initially 0x" << HEXWORD << addr << " (" << rname(base) << ")" << end();
+  }
+  else {
+    // base == EBP && mod == 0
+    addr = next32();  // ignore base
+    trace(Callstack_depth+1, "run") << "effective address is initially 0x" << HEXWORD << addr << " (disp32)" << end();
+  }
+  const uint8_t index = (sib>>3)&0x7;
+  if (index == ESP) {
+    // ignore index and scale
+    trace(Callstack_depth+1, "run") << "effective address is 0x" << HEXWORD << addr << end();
+  }
+  else {
+    const uint8_t scale = (1 << (sib>>6));
+    addr += Reg[index].i*scale;  // treat index register as signed. Maybe base as well? But we'll always ensure it's non-negative.
+    trace(Callstack_depth+1, "run") << "effective address is 0x" << HEXWORD << addr << " (after adding " << rname(index) << "*" << NUM(scale) << ")" << end();
+  }
+  return addr;
+}
+
+:(code)
+void test_add_r32_to_mem_at_base_r32_index_r32() {
+  Reg[EBX].i = 0x10;  // source
+  Reg[EAX].i = 0x1ffe;  // dest base
+  Reg[ECX].i = 0x2;  // dest index
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  01     1c      08                              \n"  // add EBX to *(EAX+ECX)
+      // ModR/M in binary: 00 (indirect mode) 011 (src EBX) 100 (dest in SIB)
+      // SIB in binary: 00 (scale 1) 001 (index ECX) 000 (base EAX)
+      "== data 0x2000\n"
+      "01 00 00 00\n"  // 1
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: add EBX to r/m32\n"
+      "run: effective address is initially 0x00001ffe (EAX)\n"
+      "run: effective address is 0x00002000 (after adding ECX*1)\n"
+      "run: storing 0x00000011\n"
+  );
+}
+
+:(code)
+void test_add_r32_to_mem_at_displacement_using_sib() {
+  Reg[EBX].i = 0x10;  // source
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  01     1c      25    00 20 00 00               \n"  // add EBX to *0x2000
+      // ModR/M in binary: 00 (indirect mode) 011 (src EBX) 100 (dest in SIB)
+      // SIB in binary: 00 (scale 1) 100 (no index) 101 (not EBP but disp32)
+      "== data 0x2000\n"
+      "01 00 00 00\n"  // 1
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: add EBX to r/m32\n"
+      "run: effective address is initially 0x00002000 (disp32)\n"
+      "run: effective address is 0x00002000\n"
+      "run: storing 0x00000011\n"
+  );
+}
+
+//:
+
+:(code)
+void test_add_r32_to_mem_at_base_r32_index_r32_plus_disp8() {
+  Reg[EBX].i = 0x10;  // source
+  Reg[EAX].i = 0x1ff9;  // dest base
+  Reg[ECX].i = 0x5;  // dest index
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  01     5c      08    02                        \n"  // add EBX to *(EAX+ECX+2)
+      // ModR/M in binary: 01 (indirect+disp8 mode) 011 (src EBX) 100 (dest in SIB)
+      // SIB in binary: 00 (scale 1) 001 (index ECX) 000 (base EAX)
+      "== data 0x2000\n"
+      "01 00 00 00\n"  // 1
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: add EBX to r/m32\n"
+      "run: effective address is initially 0x00001ff9 (EAX)\n"
+      "run: effective address is 0x00001ffe (after adding ECX*1)\n"
+      "run: effective address is 0x00002000 (after adding disp8)\n"
+      "run: storing 0x00000011\n"
+  );
+}
+
+:(before "End Mod 1 Special-cases(addr)")
+case 4:  // exception: mod 0b01 rm 0b100 => incoming SIB (scale-index-base) byte
+  addr = effective_address_from_sib(mod);
+  break;
+
+//:
+
+:(code)
+void test_add_r32_to_mem_at_base_r32_index_r32_plus_disp32() {
+  Reg[EBX].i = 0x10;  // source
+  Reg[EAX].i = 0x1ff9;  // dest base
+  Reg[ECX].i = 0x5;  // dest index
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  01     9c      08    02 00 00 00               \n"  // add EBX to *(EAX+ECX+2)
+      // ModR/M in binary: 10 (indirect+disp32 mode) 011 (src EBX) 100 (dest in SIB)
+      // SIB in binary: 00 (scale 1) 001 (index ECX) 000 (base EAX)
+      "== data 0x2000\n"
+      "01 00 00 00\n"  // 1
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: add EBX to r/m32\n"
+      "run: effective address is initially 0x00001ff9 (EAX)\n"
+      "run: effective address is 0x00001ffe (after adding ECX*1)\n"
+      "run: effective address is 0x00002000 (after adding disp32)\n"
+      "run: storing 0x00000011\n"
+  );
+}
+
+:(before "End Mod 2 Special-cases(addr)")
+case 4:  // exception: mod 0b10 rm 0b100 => incoming SIB (scale-index-base) byte
+  addr = effective_address_from_sib(mod);
+  break;
diff --git a/linux/bootstrap/017jump_disp8.cc b/linux/bootstrap/017jump_disp8.cc
new file mode 100644
index 00000000..30e60a74
--- /dev/null
+++ b/linux/bootstrap/017jump_disp8.cc
@@ -0,0 +1,407 @@
+//: jump to 8-bit offset
+
+//:: jump
+
+:(before "End Initialize Op Names")
+put_new(Name, "eb", "jump disp8 bytes away (jmp)");
+
+:(code)
+void test_jump_disp8() {
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  eb                   05                        \n"  // skip 1 instruction
+      "  05                                 00 00 00 01 \n"
+      "  05                                 00 00 00 02 \n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: 0x00000001 opcode: eb\n"
+      "run: jump 5\n"
+      "run: 0x00000008 opcode: 05\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("run: 0x00000003 opcode: 05");
+}
+
+:(before "End Single-Byte Opcodes")
+case 0xeb: {  // jump disp8
+  int8_t offset = static_cast<int>(next());
+  trace(Callstack_depth+1, "run") << "jump " << NUM(offset) << end();
+  EIP += offset;
+  break;
+}
+
+//:: jump if equal/zero
+
+:(before "End Initialize Op Names")
+put_new(Name, "74", "jump disp8 bytes away if equal, if ZF is set (jcc/jz/je)");
+
+:(code)
+void test_je_disp8_success() {
+  ZF = true;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  74                   05                        \n"  // skip 1 instruction
+      "  05                                 00 00 00 01 \n"
+      "  05                                 00 00 00 02 \n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: 0x00000001 opcode: 74\n"
+      "run: jump 5\n"
+      "run: 0x00000008 opcode: 05\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("run: 0x00000003 opcode: 05");
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x74: {  // jump disp8 if ZF
+  const int8_t offset = static_cast<int>(next());
+  if (ZF) {
+    trace(Callstack_depth+1, "run") << "jump " << NUM(offset) << end();
+    EIP += offset;
+  }
+  break;
+}
+
+:(code)
+void test_je_disp8_fail() {
+  ZF = false;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  74                   05                        \n"  // skip 1 instruction
+      "  05                                 00 00 00 01 \n"
+      "  05                                 00 00 00 02 \n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: 0x00000001 opcode: 74\n"
+      "run: 0x00000003 opcode: 05\n"
+      "run: 0x00000008 opcode: 05\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("run: jump 5");
+}
+
+//:: jump if not equal/not zero
+
+:(before "End Initialize Op Names")
+put_new(Name, "75", "jump disp8 bytes away if not equal, if ZF is not set (jcc/jnz/jne)");
+
+:(code)
+void test_jne_disp8_success() {
+  ZF = false;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  75                   05                        \n"  // skip 1 instruction
+      "  05                                 00 00 00 01 \n"
+      "  05                                 00 00 00 02 \n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: 0x00000001 opcode: 75\n"
+      "run: jump 5\n"
+      "run: 0x00000008 opcode: 05\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("run: 0x00000003 opcode: 05");
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x75: {  // jump disp8 if !ZF
+  const int8_t offset = static_cast<int>(next());
+  if (!ZF) {
+    trace(Callstack_depth+1, "run") << "jump " << NUM(offset) << end();
+    EIP += offset;
+  }
+  break;
+}
+
+:(code)
+void test_jne_disp8_fail() {
+  ZF = true;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  75                   05                        \n"  // skip 1 instruction
+      "  05                                 00 00 00 01 \n"
+      "  05                                 00 00 00 02 \n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: 0x00000001 opcode: 75\n"
+      "run: 0x00000003 opcode: 05\n"
+      "run: 0x00000008 opcode: 05\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("run: jump 5");
+}
+
+//:: jump if greater
+
+:(before "End Initialize Op Names")
+put_new(Name, "7f", "jump disp8 bytes away if greater, if ZF is unset and SF == OF (jcc/jg/jnle)");
+put_new(Name, "77", "jump disp8 bytes away if greater (addr, float), if ZF is unset and CF is unset (jcc/ja/jnbe)");
+
+:(code)
+void test_jg_disp8_success() {
+  ZF = false;
+  SF = false;
+  OF = false;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  7f                   05                        \n"  // skip 1 instruction
+      "  05                                 00 00 00 01 \n"
+      "  05                                 00 00 00 02 \n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: 0x00000001 opcode: 7f\n"
+      "run: jump 5\n"
+      "run: 0x00000008 opcode: 05\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("run: 0x00000003 opcode: 05");
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x7f: {  // jump disp8 if SF == OF and !ZF
+  const int8_t offset = static_cast<int>(next());
+  if (SF == OF && !ZF) {
+    trace(Callstack_depth+1, "run") << "jump " << NUM(offset) << end();
+    EIP += offset;
+  }
+  break;
+}
+case 0x77: {  // jump disp8 if !CF and !ZF
+  const int8_t offset = static_cast<int>(next());
+  if (!CF && !ZF) {
+    trace(Callstack_depth+1, "run") << "jump " << NUM(offset) << end();
+    EIP += offset;
+  }
+  break;
+}
+
+:(code)
+void test_jg_disp8_fail() {
+  ZF = false;
+  SF = true;
+  OF = false;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  7f                   05                        \n"  // skip 1 instruction
+      "  05                                 00 00 00 01 \n"
+      "  05                                 00 00 00 02 \n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: 0x00000001 opcode: 7f\n"
+      "run: 0x00000003 opcode: 05\n"
+      "run: 0x00000008 opcode: 05\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("run: jump 5");
+}
+
+//:: jump if greater or equal
+
+:(before "End Initialize Op Names")
+put_new(Name, "7d", "jump disp8 bytes away if greater or equal, if SF == OF (jcc/jge/jnl)");
+put_new(Name, "73", "jump disp8 bytes away if greater or equal (addr, float), if CF is unset (jcc/jae/jnb)");
+
+:(code)
+void test_jge_disp8_success() {
+  SF = false;
+  OF = false;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  7d                   05                        \n"  // skip 1 instruction
+      "  05                                 00 00 00 01 \n"
+      "  05                                 00 00 00 02 \n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: 0x00000001 opcode: 7d\n"
+      "run: jump 5\n"
+      "run: 0x00000008 opcode: 05\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("run: 0x00000003 opcode: 05");
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x7d: {  // jump disp8 if SF == OF
+  const int8_t offset = static_cast<int>(next());
+  if (SF == OF) {
+    trace(Callstack_depth+1, "run") << "jump " << NUM(offset) << end();
+    EIP += offset;
+  }
+  break;
+}
+case 0x73: {  // jump disp8 if !CF
+  const int8_t offset = static_cast<int>(next());
+  if (!CF) {
+    trace(Callstack_depth+1, "run") << "jump " << NUM(offset) << end();
+    EIP += offset;
+  }
+  break;
+}
+
+:(code)
+void test_jge_disp8_fail() {
+  SF = true;
+  OF = false;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  7d                   05                        \n"  // skip 1 instruction
+      "  05                                 00 00 00 01 \n"
+      "  05                                 00 00 00 02 \n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: 0x00000001 opcode: 7d\n"
+      "run: 0x00000003 opcode: 05\n"
+      "run: 0x00000008 opcode: 05\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("run: jump 5");
+}
+
+//:: jump if lesser
+
+:(before "End Initialize Op Names")
+put_new(Name, "7c", "jump disp8 bytes away if lesser, if SF != OF (jcc/jl/jnge)");
+put_new(Name, "72", "jump disp8 bytes away if lesser (addr, float), if CF is set (jcc/jb/jnae)");
+
+:(code)
+void test_jl_disp8_success() {
+  ZF = false;
+  SF = true;
+  OF = false;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  7c                   05                        \n"  // skip 1 instruction
+      "  05                                 00 00 00 01 \n"
+      "  05                                 00 00 00 02 \n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: 0x00000001 opcode: 7c\n"
+      "run: jump 5\n"
+      "run: 0x00000008 opcode: 05\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("run: 0x00000003 opcode: 05");
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x7c: {  // jump disp8 if SF != OF
+  const int8_t offset = static_cast<int>(next());
+  if (SF != OF) {
+    trace(Callstack_depth+1, "run") << "jump " << NUM(offset) << end();
+    EIP += offset;
+  }
+  break;
+}
+case 0x72: {  // jump disp8 if CF
+  const int8_t offset = static_cast<int>(next());
+  if (CF) {
+    trace(Callstack_depth+1, "run") << "jump " << NUM(offset) << end();
+    EIP += offset;
+  }
+  break;
+}
+
+:(code)
+void test_jl_disp8_fail() {
+  ZF = false;
+  SF = false;
+  OF = false;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  7c                   05                        \n"  // skip 1 instruction
+      "  05                                 00 00 00 01 \n"
+      "  05                                 00 00 00 02 \n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: 0x00000001 opcode: 7c\n"
+      "run: 0x00000003 opcode: 05\n"
+      "run: 0x00000008 opcode: 05\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("run: jump 5");
+}
+
+//:: jump if lesser or equal
+
+:(before "End Initialize Op Names")
+put_new(Name, "7e", "jump disp8 bytes away if lesser or equal, if ZF is set or SF != OF (jcc/jle/jng)");
+put_new(Name, "76", "jump disp8 bytes away if lesser or equal (addr, float), if ZF is set or CF is set (jcc/jbe/jna)");
+
+:(code)
+void test_jle_disp8_equal() {
+  ZF = true;
+  SF = false;
+  OF = false;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  7e                   05                        \n"  // skip 1 instruction
+      "  05                                 00 00 00 01 \n"
+      "  05                                 00 00 00 02 \n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: 0x00000001 opcode: 7e\n"
+      "run: jump 5\n"
+      "run: 0x00000008 opcode: 05\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("run: 0x00000003 opcode: 05");
+}
+
+:(code)
+void test_jle_disp8_lesser() {
+  ZF = false;
+  SF = true;
+  OF = false;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  7e                   05                        \n"  // skip 1 instruction
+      "  05                                 00 00 00 01 \n"
+      "  05                                 00 00 00 02 \n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: 0x00000001 opcode: 7e\n"
+      "run: jump 5\n"
+      "run: 0x00000008 opcode: 05\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("run: 0x00000003 opcode: 05");
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x7e: {  // jump disp8 if ZF or SF != OF
+  const int8_t offset = static_cast<int>(next());
+  if (ZF || SF != OF) {
+    trace(Callstack_depth+1, "run") << "jump " << NUM(offset) << end();
+    EIP += offset;
+  }
+  break;
+}
+case 0x76: {  // jump disp8 if ZF or CF
+  const int8_t offset = static_cast<int>(next());
+  if (ZF || CF) {
+    trace(Callstack_depth+1, "run") << "jump " << NUM(offset) << end();
+    EIP += offset;
+  }
+  break;
+}
+
+:(code)
+void test_jle_disp8_greater() {
+  ZF = false;
+  SF = false;
+  OF = false;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  7e                   05                        \n"  // skip 1 instruction
+      "  05                                 00 00 00 01 \n"
+      "  05                                 00 00 00 02 \n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: 0x00000001 opcode: 7e\n"
+      "run: 0x00000003 opcode: 05\n"
+      "run: 0x00000008 opcode: 05\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("run: jump 5");
+}
diff --git a/linux/bootstrap/018jump_disp32.cc b/linux/bootstrap/018jump_disp32.cc
new file mode 100644
index 00000000..e77bc584
--- /dev/null
+++ b/linux/bootstrap/018jump_disp32.cc
@@ -0,0 +1,407 @@
+//: jump to 32-bit offset
+
+//:: jump
+
+:(before "End Initialize Op Names")
+put_new(Name, "e9", "jump disp32 bytes away (jmp)");
+
+:(code)
+void test_jump_disp32() {
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  e9                   05 00 00 00               \n"  // skip 1 instruction
+      "  05                                 00 00 00 01 \n"
+      "  05                                 00 00 00 02 \n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: 0x00000001 opcode: e9\n"
+      "run: jump 5\n"
+      "run: 0x0000000b opcode: 05\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("run: 0x00000006 opcode: 05");
+}
+
+:(before "End Single-Byte Opcodes")
+case 0xe9: {  // jump disp32
+  const int32_t offset = next32();
+  trace(Callstack_depth+1, "run") << "jump " << offset << end();
+  EIP += offset;
+  break;
+}
+
+//:: jump if equal/zero
+
+:(before "End Initialize Op Names")
+put_new(Name_0f, "84", "jump disp32 bytes away if equal, if ZF is set (jcc/jz/je)");
+
+:(code)
+void test_je_disp32_success() {
+  ZF = true;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  0f 84                05 00 00 00               \n"  // skip 1 instruction
+      "  05                                 00 00 00 01 \n"
+      "  05                                 00 00 00 02 \n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: 0x00000001 opcode: 0f\n"
+      "run: jump 5\n"
+      "run: 0x0000000c opcode: 05\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("run: 0x00000007 opcode: 05");
+}
+
+:(before "End Two-Byte Opcodes Starting With 0f")
+case 0x84: {  // jump disp32 if ZF
+  const int32_t offset = next32();
+  if (ZF) {
+    trace(Callstack_depth+1, "run") << "jump " << offset << end();
+    EIP += offset;
+  }
+  break;
+}
+
+:(code)
+void test_je_disp32_fail() {
+  ZF = false;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  0f 84                05 00 00 00               \n"  // skip 1 instruction
+      "  05                                 00 00 00 01 \n"
+      "  05                                 00 00 00 02 \n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: 0x00000001 opcode: 0f\n"
+      "run: 0x00000007 opcode: 05\n"
+      "run: 0x0000000c opcode: 05\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("run: jump 5");
+}
+
+//:: jump if not equal/not zero
+
+:(before "End Initialize Op Names")
+put_new(Name_0f, "85", "jump disp32 bytes away if not equal, if ZF is not set (jcc/jnz/jne)");
+
+:(code)
+void test_jne_disp32_success() {
+  ZF = false;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  0f 85                05 00 00 00               \n"  // skip 1 instruction
+      "  05                                 00 00 00 01 \n"
+      "  05                                 00 00 00 02 \n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: 0x00000001 opcode: 0f\n"
+      "run: jump 5\n"
+      "run: 0x0000000c opcode: 05\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("run: 0x00000007 opcode: 05");
+}
+
+:(before "End Two-Byte Opcodes Starting With 0f")
+case 0x85: {  // jump disp32 if !ZF
+  const int32_t offset = next32();
+  if (!ZF) {
+    trace(Callstack_depth+1, "run") << "jump " << offset << end();
+    EIP += offset;
+  }
+  break;
+}
+
+:(code)
+void test_jne_disp32_fail() {
+  ZF = true;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  0f 85                05 00 00 00               \n"  // skip 1 instruction
+      "  05                                 00 00 00 01 \n"
+      "  05                                 00 00 00 02 \n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: 0x00000001 opcode: 0f\n"
+      "run: 0x00000007 opcode: 05\n"
+      "run: 0x0000000c opcode: 05\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("run: jump 5");
+}
+
+//:: jump if greater
+
+:(before "End Initialize Op Names")
+put_new(Name_0f, "8f", "jump disp32 bytes away if greater, if ZF is unset and SF == OF (jcc/jg/jnle)");
+put_new(Name_0f, "87", "jump disp32 bytes away if greater (addr, float), if ZF is unset and CF is unset (jcc/ja/jnbe)");
+
+:(code)
+void test_jg_disp32_success() {
+  ZF = false;
+  SF = false;
+  OF = false;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  0f 8f                05 00 00 00               \n"  // skip 1 instruction
+      "  05                                 00 00 00 01 \n"
+      "  05                                 00 00 00 02 \n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: 0x00000001 opcode: 0f\n"
+      "run: jump 5\n"
+      "run: 0x0000000c opcode: 05\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("run: 0x00000007 opcode: 05");
+}
+
+:(before "End Two-Byte Opcodes Starting With 0f")
+case 0x8f: {  // jump disp32 if !SF and !ZF
+  const int32_t offset = next32();
+  if (!ZF && SF == OF) {
+    trace(Callstack_depth+1, "run") << "jump " << offset << end();
+    EIP += offset;
+  }
+  break;
+}
+case 0x87: {  // jump disp32 if !CF and !ZF
+  const int32_t offset = next32();
+  if (!CF && !ZF) {
+    trace(Callstack_depth+1, "run") << "jump " << offset << end();
+    EIP += offset;
+  }
+  break;
+}
+
+:(code)
+void test_jg_disp32_fail() {
+  ZF = false;
+  SF = true;
+  OF = false;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  0f 8f                05 00 00 00               \n"  // skip 1 instruction
+      "  05                                 00 00 00 01 \n"
+      "  05                                 00 00 00 02 \n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: 0x00000001 opcode: 0f\n"
+      "run: 0x00000007 opcode: 05\n"
+      "run: 0x0000000c opcode: 05\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("run: jump 5");
+}
+
+//:: jump if greater or equal
+
+:(before "End Initialize Op Names")
+put_new(Name_0f, "8d", "jump disp32 bytes away if greater or equal, if SF == OF (jcc/jge/jnl)");
+put_new(Name_0f, "83", "jump disp32 bytes away if greater or equal (addr, float), if CF is unset (jcc/jae/jnb)");
+
+:(code)
+void test_jge_disp32_success() {
+  SF = false;
+  OF = false;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  0f 8d                05 00 00 00               \n"  // skip 1 instruction
+      "  05                                 00 00 00 01 \n"
+      "  05                                 00 00 00 02 \n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: 0x00000001 opcode: 0f\n"
+      "run: jump 5\n"
+      "run: 0x0000000c opcode: 05\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("run: 0x00000007 opcode: 05");
+}
+
+:(before "End Two-Byte Opcodes Starting With 0f")
+case 0x8d: {  // jump disp32 if !SF
+  const int32_t offset = next32();
+  if (SF == OF) {
+    trace(Callstack_depth+1, "run") << "jump " << offset << end();
+    EIP += offset;
+  }
+  break;
+}
+case 0x83: {  // jump disp32 if !CF
+  const int32_t offset = next32();
+  if (!CF) {
+    trace(Callstack_depth+1, "run") << "jump " << offset << end();
+    EIP += offset;
+  }
+  break;
+}
+
+:(code)
+void test_jge_disp32_fail() {
+  SF = true;
+  OF = false;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  0f 8d                05 00 00 00               \n"  // skip 1 instruction
+      "  05                                 00 00 00 01 \n"
+      "  05                                 00 00 00 02 \n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: 0x00000001 opcode: 0f\n"
+      "run: 0x00000007 opcode: 05\n"
+      "run: 0x0000000c opcode: 05\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("run: jump 5");
+}
+
+//:: jump if lesser
+
+:(before "End Initialize Op Names")
+put_new(Name_0f, "8c", "jump disp32 bytes away if lesser, if SF != OF (jcc/jl/jnge)");
+put_new(Name_0f, "82", "jump disp32 bytes away if lesser (addr, float), if CF is set (jcc/jb/jnae)");
+
+:(code)
+void test_jl_disp32_success() {
+  ZF = false;
+  SF = true;
+  OF = false;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  0f 8c                05 00 00 00               \n"  // skip 1 instruction
+      "  05                                 00 00 00 01 \n"
+      "  05                                 00 00 00 02 \n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: 0x00000001 opcode: 0f\n"
+      "run: jump 5\n"
+      "run: 0x0000000c opcode: 05\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("run: 0x00000007 opcode: 05");
+}
+
+:(before "End Two-Byte Opcodes Starting With 0f")
+case 0x8c: {  // jump disp32 if SF and !ZF
+  const int32_t offset = next32();
+  if (SF != OF) {
+    trace(Callstack_depth+1, "run") << "jump " << offset << end();
+    EIP += offset;
+  }
+  break;
+}
+case 0x82: {  // jump disp32 if CF
+  const int32_t offset = next32();
+  if (CF) {
+    trace(Callstack_depth+1, "run") << "jump " << offset << end();
+    EIP += offset;
+  }
+  break;
+}
+
+:(code)
+void test_jl_disp32_fail() {
+  ZF = false;
+  SF = false;
+  OF = false;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  0f 8c                05 00 00 00               \n"  // skip 1 instruction
+      "  05                                 00 00 00 01 \n"
+      "  05                                 00 00 00 02 \n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: 0x00000001 opcode: 0f\n"
+      "run: 0x00000007 opcode: 05\n"
+      "run: 0x0000000c opcode: 05\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("run: jump 5");
+}
+
+//:: jump if lesser or equal
+
+:(before "End Initialize Op Names")
+put_new(Name_0f, "8e", "jump disp32 bytes away if lesser or equal, if ZF is set or SF != OF (jcc/jle/jng)");
+put_new(Name_0f, "86", "jump disp32 bytes away if lesser or equal (addr, float), if ZF is set or CF is set (jcc/jbe/jna)");
+
+:(code)
+void test_jle_disp32_equal() {
+  ZF = true;
+  SF = false;
+  OF = false;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  0f 8e                05 00 00 00               \n"  // skip 1 instruction
+      "  05                                 00 00 00 01 \n"
+      "  05                                 00 00 00 02 \n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: 0x00000001 opcode: 0f\n"
+      "run: jump 5\n"
+      "run: 0x0000000c opcode: 05\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("run: 0x00000007 opcode: 05");
+}
+
+:(code)
+void test_jle_disp32_lesser() {
+  ZF = false;
+  SF = true;
+  OF = false;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  0f 8e                05 00 00 00               \n"  // skip 1 instruction
+      "  05                                 00 00 00 01 \n"
+      "  05                                 00 00 00 02 \n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: 0x00000001 opcode: 0f\n"
+      "run: jump 5\n"
+      "run: 0x0000000c opcode: 05\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("run: 0x00000007 opcode: 05");
+}
+
+:(before "End Two-Byte Opcodes Starting With 0f")
+case 0x8e: {  // jump disp32 if SF or ZF
+  const int32_t offset = next32();
+  if (ZF || SF != OF) {
+    trace(Callstack_depth+1, "run") << "jump " << offset << end();
+    EIP += offset;
+  }
+  break;
+}
+case 0x86: {  // jump disp32 if ZF or CF
+  const int32_t offset = next32();
+  if (ZF || CF) {
+    trace(Callstack_depth+1, "run") << "jump " << offset << end();
+    EIP += offset;
+  }
+  break;
+}
+
+:(code)
+void test_jle_disp32_greater() {
+  ZF = false;
+  SF = false;
+  OF = false;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  0f 8e                05 00 00 00               \n"  // skip 1 instruction
+      "  05                                 00 00 00 01 \n"
+      "  05                                 00 00 00 02 \n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: 0x00000001 opcode: 0f\n"
+      "run: 0x00000007 opcode: 05\n"
+      "run: 0x0000000c opcode: 05\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("run: jump 5");
+}
diff --git a/linux/bootstrap/019functions.cc b/linux/bootstrap/019functions.cc
new file mode 100644
index 00000000..2fde10ef
--- /dev/null
+++ b/linux/bootstrap/019functions.cc
@@ -0,0 +1,122 @@
+//:: call
+
+:(before "End Initialize Op Names")
+put_new(Name, "e8", "call disp32 (call)");
+
+:(code)
+void test_call_disp32() {
+  Mem.push_back(vma(0xbd000000));  // manually allocate memory
+  Reg[ESP].u = 0xbd000064;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  e8                                 a0 00 00 00 \n"  // call function offset at 0xa0
+      // next EIP is 6
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: call imm32 0x000000a0\n"
+      "run: decrementing ESP to 0xbd000060\n"
+      "run: pushing value 0x00000006\n"
+      "run: jumping to 0x000000a6\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0xe8: {  // call disp32 relative to next EIP
+  const int32_t offset = next32();
+  ++Callstack_depth;
+  trace(Callstack_depth+1, "run") << "call imm32 0x" << HEXWORD << offset << end();
+//?   cerr << "push: EIP: " << EIP << " => " << Reg[ESP].u << '\n';
+  push(EIP);
+  EIP += offset;
+  trace(Callstack_depth+1, "run") << "jumping to 0x" << HEXWORD << EIP << end();
+  break;
+}
+
+//:
+
+:(code)
+void test_call_r32() {
+  Mem.push_back(vma(0xbd000000));  // manually allocate memory
+  Reg[ESP].u = 0xbd000064;
+  Reg[EBX].u = 0x000000a0;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  ff     d3                                      \n"  // call function offset at EBX
+      // next EIP is 3
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: call to r/m32\n"
+      "run: r/m32 is EBX\n"
+      "run: decrementing ESP to 0xbd000060\n"
+      "run: pushing value 0x00000003\n"
+      "run: jumping to 0x000000a0\n"
+  );
+}
+
+:(before "End Op ff Subops")
+case 2: {  // call function pointer at r/m32
+  trace(Callstack_depth+1, "run") << "call to r/m32" << end();
+  const int32_t* offset = effective_address(modrm);
+  push(EIP);
+  EIP = *offset;
+  trace(Callstack_depth+1, "run") << "jumping to 0x" << HEXWORD << EIP << end();
+  ++Callstack_depth;
+  break;
+}
+
+:(code)
+void test_call_mem_at_rm32() {
+  Mem.push_back(vma(0xbd000000));  // manually allocate memory
+  Reg[ESP].u = 0xbd000064;
+  Reg[EBX].u = 0x2000;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  ff     13                                      \n"  // call function offset at *EBX
+      // next EIP is 3
+      "== data 0x2000\n"
+      "a0 00 00 00\n"  // 0xa0
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: call to r/m32\n"
+      "run: effective address is 0x00002000 (EBX)\n"
+      "run: decrementing ESP to 0xbd000060\n"
+      "run: pushing value 0x00000003\n"
+      "run: jumping to 0x000000a0\n"
+  );
+}
+
+//:: ret
+
+:(before "End Initialize Op Names")
+put_new(Name, "c3", "return from most recent unfinished call (ret)");
+
+:(code)
+void test_ret() {
+  Mem.push_back(vma(0xbd000000));  // manually allocate memory
+  Reg[ESP].u = 0xbd000064;
+  write_mem_u32(Reg[ESP].u, 0x10);
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  c3                                           \n"  // return
+      "== data 0x2000\n"
+      "10 00 00 00\n"  // 0x10
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: return\n"
+      "run: popping value 0x00000010\n"
+      "run: jumping to 0x00000010\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0xc3: {  // return from a call
+  trace(Callstack_depth+1, "run") << "return" << end();
+  --Callstack_depth;
+  EIP = pop();
+  trace(Callstack_depth+1, "run") << "jumping to 0x" << HEXWORD << EIP << end();
+  break;
+}
diff --git a/linux/bootstrap/020byte_addressing.cc b/linux/bootstrap/020byte_addressing.cc
new file mode 100644
index 00000000..399e9199
--- /dev/null
+++ b/linux/bootstrap/020byte_addressing.cc
@@ -0,0 +1,272 @@
+//: SubX mostly deals with instructions operating on 32-bit operands, but we
+//: still need to deal with raw bytes for strings and so on.
+
+//: Unfortunately the register encodings when dealing with bytes are a mess.
+//: We need a special case for them.
+:(code)
+string rname_8bit(uint8_t r) {
+  switch (r) {
+  case 0: return "AL";  // lowest byte of EAX
+  case 1: return "CL";  // lowest byte of ECX
+  case 2: return "DL";  // lowest byte of EDX
+  case 3: return "BL";  // lowest byte of EBX
+  case 4: return "AH";  // second lowest byte of EAX
+  case 5: return "CH";  // second lowest byte of ECX
+  case 6: return "DH";  // second lowest byte of EDX
+  case 7: return "BH";  // second lowest byte of EBX
+  default: raise << "invalid 8-bit register " << r << '\n' << end();  return "";
+  }
+}
+
+uint8_t* effective_byte_address(uint8_t modrm) {
+  uint8_t mod = (modrm>>6);
+  uint8_t rm = modrm & 0x7;
+  if (mod == 3) {
+    // select an 8-bit register
+    trace(Callstack_depth+1, "run") << "r/m8 is " << rname_8bit(rm) << end();
+    return reg_8bit(rm);
+  }
+  // the rest is as usual
+  return mem_addr_u8(effective_address_number(modrm));
+}
+
+uint8_t* reg_8bit(uint8_t rm) {
+  uint8_t* result = reinterpret_cast<uint8_t*>(&Reg[rm & 0x3].i);  // _L register
+  if (rm & 0x4)
+    ++result;  // _H register;  assumes host is little-endian
+  return result;
+}
+
+:(before "End Initialize Op Names")
+put_new(Name, "88", "copy r8 to r8/m8-at-r32");
+
+:(code)
+void test_copy_r8_to_mem_at_rm32() {
+  Reg[EBX].i = 0x224488ab;
+  Reg[EAX].i = 0x2000;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  88     18                                      \n"  // copy BL to the byte at *EAX
+      // ModR/M in binary: 00 (indirect mode) 011 (src BL) 000 (dest EAX)
+      "== data 0x2000\n"
+      "f0 cc bb aa\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: copy BL to r8/m8-at-r32\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: storing 0xab\n"
+  );
+  CHECK_EQ(0xaabbccab, read_mem_u32(0x2000));
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x88: {  // copy r8 to r/m8
+  const uint8_t modrm = next();
+  const uint8_t rsrc = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "copy " << rname_8bit(rsrc) << " to r8/m8-at-r32" << end();
+  // use unsigned to zero-extend 8-bit value to 32 bits
+  uint8_t* dest = effective_byte_address(modrm);
+  const uint8_t* src = reg_8bit(rsrc);
+  *dest = *src;  // Read/write multiple elements of vector<uint8_t> at once. Assumes sizeof(int) == 4 on the host as well.
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXBYTE << NUM(*dest) << end();
+  break;
+}
+
+//:
+
+:(before "End Initialize Op Names")
+put_new(Name, "8a", "copy r8/m8-at-r32 to r8");
+
+:(code)
+void test_copy_mem_at_rm32_to_r8() {
+  Reg[EBX].i = 0xaabbcc0f;  // one nibble each of lowest byte set to all 0s and all 1s, to maximize value of this test
+  Reg[EAX].i = 0x2000;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  8a     18                                      \n"  // copy just the byte at *EAX to BL
+      // ModR/M in binary: 00 (indirect mode) 011 (dest EBX) 000 (src EAX)
+      "== data 0x2000\n"
+      "ab ff ff ff\n"  // 0xab with more data in following bytes
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: copy r8/m8-at-r32 to BL\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: storing 0xab\n"
+      // remaining bytes of EBX are *not* cleared
+      "run: EBX now contains 0xaabbccab\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x8a: {  // copy r/m8 to r8
+  const uint8_t modrm = next();
+  const uint8_t rdest = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "copy r8/m8-at-r32 to " << rname_8bit(rdest) << end();
+  // use unsigned to zero-extend 8-bit value to 32 bits
+  const uint8_t* src = effective_byte_address(modrm);
+  uint8_t* dest = reg_8bit(rdest);
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXBYTE << NUM(*src) << end();
+  *dest = *src;  // Read/write multiple elements of vector<uint8_t> at once. Assumes sizeof(int) == 4 on the host as well.
+  const uint8_t rdest_32bit = rdest & 0x3;
+  trace(Callstack_depth+1, "run") << rname(rdest_32bit) << " now contains 0x" << HEXWORD << Reg[rdest_32bit].u << end();
+  break;
+}
+
+:(code)
+void test_cannot_copy_byte_to_ESP_EBP_ESI_EDI() {
+  Reg[ESI].u = 0xaabbccdd;
+  Reg[EBX].u = 0x11223344;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  8a     f3                                      \n"  // copy just the byte at *EBX to 8-bit register '6'
+      // ModR/M in binary: 11 (direct mode) 110 (dest 8-bit 'register 6') 011 (src EBX)
+  );
+  CHECK_TRACE_CONTENTS(
+      // ensure 8-bit register '6' is DH, not ESI
+      "run: copy r8/m8-at-r32 to DH\n"
+      "run: storing 0x44\n"
+  );
+  // ensure ESI is unchanged
+  CHECK_EQ(Reg[ESI].u, 0xaabbccdd);
+}
+
+//:
+
+:(before "End Initialize Op Names")
+put_new(Name, "c6", "copy imm8 to r8/m8-at-r32 with subop 0 (mov)");
+
+:(code)
+void test_copy_imm8_to_mem_at_rm32() {
+  Reg[EAX].i = 0x2000;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  c6     00                          dd          \n"  // copy to the byte at *EAX
+      // ModR/M in binary: 00 (indirect mode) 000 (unused) 000 (dest EAX)
+      "== data 0x2000\n"
+      "f0 cc bb aa\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: copy imm8 to r8/m8-at-r32\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: storing 0xdd\n"
+  );
+  CHECK_EQ(0xaabbccdd, read_mem_u32(0x2000));
+}
+
+:(before "End Single-Byte Opcodes")
+case 0xc6: {  // copy imm8 to r/m8
+  const uint8_t modrm = next();
+  const uint8_t src = next();
+  trace(Callstack_depth+1, "run") << "copy imm8 to r8/m8-at-r32" << end();
+  trace(Callstack_depth+1, "run") << "imm8 is 0x" << HEXBYTE << NUM(src) << end();
+  const uint8_t subop = (modrm>>3)&0x7;  // middle 3 'reg opcode' bits
+  if (subop != 0) {
+    cerr << "unrecognized subop for opcode c6: " << NUM(subop) << " (only 0/copy currently implemented)\n";
+    exit(1);
+  }
+  // use unsigned to zero-extend 8-bit value to 32 bits
+  uint8_t* dest = effective_byte_address(modrm);
+  *dest = src;  // Write multiple elements of vector<uint8_t> at once. Assumes sizeof(int) == 4 on the host as well.
+  trace(Callstack_depth+1, "run") << "storing 0x" << HEXBYTE << NUM(*dest) << end();
+  break;
+}
+
+//:: set flags (setcc)
+
+:(before "End Initialize Op Names")
+put_new(Name_0f, "94", "set r8/m8-at-rm32 to 1 if equal, if ZF is set, 0 otherwise (setcc/setz/sete)");
+put_new(Name_0f, "95", "set r8/m8-at-rm32 to 1 if not equal, if ZF is not set, 0 otherwise (setcc/setnz/setne)");
+put_new(Name_0f, "9f", "set r8/m8-at-rm32 to 1 if greater, if ZF is unset and SF == OF, 0 otherwise (setcc/setg/setnle)");
+put_new(Name_0f, "97", "set r8/m8-at-rm32 to 1 if greater (addr, float), if ZF is unset and CF is unset, 0 otherwise (setcc/seta/setnbe)");
+put_new(Name_0f, "9d", "set r8/m8-at-rm32 to 1 if greater or equal, if SF == OF, 0 otherwise (setcc/setge/setnl)");
+put_new(Name_0f, "93", "set r8/m8-at-rm32 to 1 if greater or equal (addr, float), if CF is unset, 0 otherwise (setcc/setae/setnb)");
+put_new(Name_0f, "9c", "set r8/m8-at-rm32 to 1 if lesser, if SF != OF, 0 otherwise (setcc/setl/setnge)");
+put_new(Name_0f, "92", "set r8/m8-at-rm32 to 1 if lesser (addr, float), if CF is set, 0 otherwise (setcc/setb/setnae)");
+put_new(Name_0f, "9e", "set r8/m8-at-rm32 to 1 if lesser or equal, if ZF is set or SF != OF, 0 otherwise (setcc/setle/setng)");
+put_new(Name_0f, "96", "set r8/m8-at-rm32 to 1 if lesser or equal (addr, float), if ZF is set or CF is set, 0 otherwise (setcc/setbe/setna)");
+
+:(before "End Two-Byte Opcodes Starting With 0f")
+case 0x94: {  // set r8/m8-at-rm32 if ZF
+  const uint8_t modrm = next();
+  trace(Callstack_depth+1, "run") << "set r8/m8-at-rm32" << end();
+  uint8_t* dest = effective_byte_address(modrm);
+  *dest = ZF;
+  trace(Callstack_depth+1, "run") << "storing " << NUM(*dest) << end();
+  break;
+}
+case 0x95: {  // set r8/m8-at-rm32 if !ZF
+  const uint8_t modrm = next();
+  trace(Callstack_depth+1, "run") << "set r8/m8-at-rm32" << end();
+  uint8_t* dest = effective_byte_address(modrm);
+  *dest = !ZF;
+  trace(Callstack_depth+1, "run") << "storing " << NUM(*dest) << end();
+  break;
+}
+case 0x9f: {  // set r8/m8-at-rm32 if !SF and !ZF
+  const uint8_t modrm = next();
+  trace(Callstack_depth+1, "run") << "set r8/m8-at-rm32" << end();
+  uint8_t* dest = effective_byte_address(modrm);
+  *dest = !ZF && SF == OF;
+  trace(Callstack_depth+1, "run") << "storing " << NUM(*dest) << end();
+  break;
+}
+case 0x97: {  // set r8/m8-at-rm32 if !CF and !ZF
+  const uint8_t modrm = next();
+  trace(Callstack_depth+1, "run") << "set r8/m8-at-rm32" << end();
+  uint8_t* dest = effective_byte_address(modrm);
+  *dest = (!CF && !ZF);
+  trace(Callstack_depth+1, "run") << "storing " << NUM(*dest) << end();
+  break;
+}
+case 0x9d: {  // set r8/m8-at-rm32 if !SF
+  const uint8_t modrm = next();
+  trace(Callstack_depth+1, "run") << "set r8/m8-at-rm32" << end();
+  uint8_t* dest = effective_byte_address(modrm);
+  *dest = (SF == OF);
+  trace(Callstack_depth+1, "run") << "storing " << NUM(*dest) << end();
+  break;
+}
+case 0x93: {  // set r8/m8-at-rm32 if !CF
+  const uint8_t modrm = next();
+  trace(Callstack_depth+1, "run") << "set r8/m8-at-rm32" << end();
+  uint8_t* dest = effective_byte_address(modrm);
+  *dest = !CF;
+  trace(Callstack_depth+1, "run") << "storing " << NUM(*dest) << end();
+  break;
+}
+case 0x9c: {  // set r8/m8-at-rm32 if SF and !ZF
+  const uint8_t modrm = next();
+  trace(Callstack_depth+1, "run") << "set r8/m8-at-rm32" << end();
+  uint8_t* dest = effective_byte_address(modrm);
+  *dest = (SF != OF);
+  trace(Callstack_depth+1, "run") << "storing " << NUM(*dest) << end();
+  break;
+}
+case 0x92: {  // set r8/m8-at-rm32 if CF
+  const uint8_t modrm = next();
+  trace(Callstack_depth+1, "run") << "set r8/m8-at-rm32" << end();
+  uint8_t* dest = effective_byte_address(modrm);
+  *dest = CF;
+  trace(Callstack_depth+1, "run") << "storing " << NUM(*dest) << end();
+  break;
+}
+case 0x9e: {  // set r8/m8-at-rm32 if SF or ZF
+  const uint8_t modrm = next();
+  trace(Callstack_depth+1, "run") << "set r8/m8-at-rm32" << end();
+  uint8_t* dest = effective_byte_address(modrm);
+  *dest = (ZF || SF != OF);
+  trace(Callstack_depth+1, "run") << "storing " << NUM(*dest) << end();
+  break;
+}
+case 0x96: {  // set r8/m8-at-rm32 if ZF or CF
+  const uint8_t modrm = next();
+  trace(Callstack_depth+1, "run") << "set r8/m8-at-rm32" << end();
+  uint8_t* dest = effective_byte_address(modrm);
+  *dest = (ZF || CF);
+  trace(Callstack_depth+1, "run") << "storing " << NUM(*dest) << end();
+  break;
+}
diff --git a/linux/bootstrap/021div.cc b/linux/bootstrap/021div.cc
new file mode 100644
index 00000000..15ed89d8
--- /dev/null
+++ b/linux/bootstrap/021div.cc
@@ -0,0 +1,38 @@
+//: helper for division operations: sign-extend EAX into EDX
+
+:(before "End Initialize Op Names")
+put_new(Name, "99", "sign-extend EAX into EDX (cdq)");
+
+:(code)
+void test_cdq() {
+  Reg[EAX].i = 10;
+  run(
+      "== code 0x1\n"
+      "99\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: sign-extend EAX into EDX\n"
+      "run: EDX is now 0x00000000\n"
+  );
+}
+
+:(before "End Single-Byte Opcodes")
+case 0x99: {  // sign-extend EAX into EDX
+  trace(Callstack_depth+1, "run") << "sign-extend EAX into EDX" << end();
+  Reg[EDX].i = (Reg[EAX].i < 0) ? -1 : 0;
+  trace(Callstack_depth+1, "run") << "EDX is now 0x" << HEXWORD << Reg[EDX].u << end();
+  break;
+}
+
+:(code)
+void test_cdq_negative() {
+  Reg[EAX].i = -10;
+  run(
+      "== code 0x1\n"
+      "99\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: sign-extend EAX into EDX\n"
+      "run: EDX is now 0xffffffff\n"
+  );
+}
diff --git a/linux/bootstrap/022float.cc b/linux/bootstrap/022float.cc
new file mode 100644
index 00000000..7b313c25
--- /dev/null
+++ b/linux/bootstrap/022float.cc
@@ -0,0 +1,519 @@
+//: floating-point operations
+
+//:: copy
+
+:(before "End Initialize Op Names")
+put_new(Name_f3_0f, "10", "copy xm32 to x32 (movss)");
+put_new(Name_f3_0f, "11", "copy x32 to xm32 (movss)");
+
+:(code)
+void test_copy_x32_to_x32() {
+  Xmm[3] = 0.5;
+  run(
+      "== code 0x1\n"  // code segment
+      // op     ModR/M  SIB   displacement  immediate
+      "f3 0f 11 d8                                    \n"  // copy XMM3 to XMM0
+      // ModR/M in binary: 11 (direct mode) 011 (src XMM3) 000 (dest XMM0)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: copy XMM3 to x/m32\n"
+      "run: x/m32 is XMM0\n"
+      "run: storing 0.5\n"
+  );
+}
+
+:(before "End Three-Byte Opcodes Starting With f3 0f")
+case 0x10: {  // copy x/m32 to x32
+  const uint8_t modrm = next();
+  const uint8_t rdest = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "copy x/m32 to " << Xname[rdest] << end();
+  float* src = effective_address_float(modrm);
+  Xmm[rdest] = *src;  // Write multiple elements of vector<uint8_t> at once. Assumes sizeof(float) == 4 on the host as well.
+  trace(Callstack_depth+1, "run") << "storing " << Xmm[rdest] << end();
+  break;
+}
+case 0x11: {  // copy x32 to x/m32
+  const uint8_t modrm = next();
+  const uint8_t rsrc = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "copy " << Xname[rsrc] << " to x/m32" << end();
+  float* dest = effective_address_float(modrm);
+  *dest = Xmm[rsrc];  // Write multiple elements of vector<uint8_t> at once. Assumes sizeof(float) == 4 on the host as well.
+  trace(Callstack_depth+1, "run") << "storing " << *dest << end();
+  break;
+}
+
+:(code)
+void test_copy_x32_to_mem_at_xm32() {
+  Xmm[3] = 0.5;
+  Reg[EAX].i = 0x60;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "f3 0f 11 18                                    \n"  // copy XMM3 to *EAX
+      // ModR/M in binary: 00 (indirect mode) 011 (src XMM3) 000 (dest EAX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: copy XMM3 to x/m32\n"
+      "run: effective address is 0x00000060 (EAX)\n"
+      "run: storing 0.5\n"
+  );
+}
+
+void test_copy_mem_at_xm32_to_x32() {
+  Reg[EAX].i = 0x2000;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "f3 0f 10 18                                    \n"  // copy *EAX to XMM3
+      "== data 0x2000\n"
+      "00 00 00 3f\n"  // 0x3f000000 = 0.5
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: copy x/m32 to XMM3\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: storing 0.5\n"
+  );
+}
+
+//:: convert to floating point
+
+:(before "End Initialize Op Names")
+put_new(Name_f3_0f, "2a", "convert integer to floating-point (cvtsi2ss)");
+
+:(code)
+void test_cvtsi2ss() {
+  Reg[EAX].i = 10;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "f3 0f 2a c0                                    \n"
+      // ModR/M in binary: 11 (direct mode) 000 (XMM0) 000 (EAX)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: convert r/m32 to XMM0\n"
+      "run: r/m32 is EAX\n"
+      "run: XMM0 is now 10\n"
+  );
+}
+
+:(before "End Three-Byte Opcodes Starting With f3 0f")
+case 0x2a: {  // convert integer to float
+  const uint8_t modrm = next();
+  const uint8_t dest = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "convert r/m32 to " << Xname[dest] << end();
+  const int32_t* src = effective_address(modrm);
+  Xmm[dest] = *src;
+  trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
+  break;
+}
+
+//:: convert floating point to int
+
+:(before "End Initialize Op Names")
+put_new(Name_f3_0f, "2d", "convert floating-point to int (cvtss2si)");
+put_new(Name_f3_0f, "2c", "truncate floating-point to int (cvttss2si)");
+
+:(code)
+void test_cvtss2si() {
+  Xmm[0] = 9.8;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "f3 0f 2d c0                                    \n"
+      // ModR/M in binary: 11 (direct mode) 000 (EAX) 000 (XMM0)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: convert x/m32 to EAX\n"
+      "run: x/m32 is XMM0\n"
+      "run: EAX is now 0x0000000a\n"
+  );
+}
+
+:(before "End Three-Byte Opcodes Starting With f3 0f")
+case 0x2d: {  // convert float to integer
+  const uint8_t modrm = next();
+  const uint8_t dest = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "convert x/m32 to " << rname(dest) << end();
+  const float* src = effective_address_float(modrm);
+  Reg[dest].i = round(*src);
+  trace(Callstack_depth+1, "run") << rname(dest) << " is now 0x" << HEXWORD << Reg[dest].i << end();
+  break;
+}
+
+:(code)
+void test_cvttss2si() {
+  Xmm[0] = 9.8;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "f3 0f 2c c0                                    \n"
+      // ModR/M in binary: 11 (direct mode) 000 (EAX) 000 (XMM0)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: truncate x/m32 to EAX\n"
+      "run: x/m32 is XMM0\n"
+      "run: EAX is now 0x00000009\n"
+  );
+}
+
+:(before "End Three-Byte Opcodes Starting With f3 0f")
+case 0x2c: {  // truncate float to integer
+  const uint8_t modrm = next();
+  const uint8_t dest = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "truncate x/m32 to " << rname(dest) << end();
+  const float* src = effective_address_float(modrm);
+  Reg[dest].i = trunc(*src);
+  trace(Callstack_depth+1, "run") << rname(dest) << " is now 0x" << HEXWORD << Reg[dest].i << end();
+  break;
+}
+
+//:: add
+
+:(before "End Initialize Op Names")
+put_new(Name_f3_0f, "58", "add floats (addss)");
+
+:(code)
+void test_addss() {
+  Xmm[0] = 3.0;
+  Xmm[1] = 2.0;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "f3 0f 58 c1                                    \n"
+      // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: add x/m32 to XMM0\n"
+      "run: x/m32 is XMM1\n"
+      "run: XMM0 is now 5\n"
+  );
+}
+
+:(before "End Three-Byte Opcodes Starting With f3 0f")
+case 0x58: {  // add x/m32 to x32
+  const uint8_t modrm = next();
+  const uint8_t dest = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "add x/m32 to " << Xname[dest] << end();
+  const float* src = effective_address_float(modrm);
+  Xmm[dest] += *src;
+  trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
+  break;
+}
+
+//:: subtract
+
+:(before "End Initialize Op Names")
+put_new(Name_f3_0f, "5c", "subtract floats (subss)");
+
+:(code)
+void test_subss() {
+  Xmm[0] = 3.0;
+  Xmm[1] = 2.0;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "f3 0f 5c c1                                    \n"
+      // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: subtract x/m32 from XMM0\n"
+      "run: x/m32 is XMM1\n"
+      "run: XMM0 is now 1\n"
+  );
+}
+
+:(before "End Three-Byte Opcodes Starting With f3 0f")
+case 0x5c: {  // subtract x/m32 from x32
+  const uint8_t modrm = next();
+  const uint8_t dest = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "subtract x/m32 from " << Xname[dest] << end();
+  const float* src = effective_address_float(modrm);
+  Xmm[dest] -= *src;
+  trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
+  break;
+}
+
+//:: multiply
+
+:(before "End Initialize Op Names")
+put_new(Name_f3_0f, "59", "multiply floats (mulss)");
+
+:(code)
+void test_mulss() {
+  Xmm[0] = 3.0;
+  Xmm[1] = 2.0;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "f3 0f 59 c1                                    \n"
+      // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: multiply XMM0 by x/m32\n"
+      "run: x/m32 is XMM1\n"
+      "run: XMM0 is now 6\n"
+  );
+}
+
+:(before "End Three-Byte Opcodes Starting With f3 0f")
+case 0x59: {  // multiply x32 by x/m32
+  const uint8_t modrm = next();
+  const uint8_t dest = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "multiply " << Xname[dest] << " by x/m32" << end();
+  const float* src = effective_address_float(modrm);
+  Xmm[dest] *= *src;
+  trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
+  break;
+}
+
+//:: divide
+
+:(before "End Initialize Op Names")
+put_new(Name_f3_0f, "5e", "divide floats (divss)");
+
+:(code)
+void test_divss() {
+  Xmm[0] = 3.0;
+  Xmm[1] = 2.0;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "f3 0f 5e c1                                    \n"
+      // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: divide XMM0 by x/m32\n"
+      "run: x/m32 is XMM1\n"
+      "run: XMM0 is now 1.5\n"
+  );
+}
+
+:(before "End Three-Byte Opcodes Starting With f3 0f")
+case 0x5e: {  // divide x32 by x/m32
+  const uint8_t modrm = next();
+  const uint8_t dest = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "divide " << Xname[dest] << " by x/m32" << end();
+  const float* src = effective_address_float(modrm);
+  Xmm[dest] /= *src;
+  trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
+  break;
+}
+
+//:: min
+
+:(before "End Initialize Op Names")
+put_new(Name_f3_0f, "5d", "minimum of two floats (minss)");
+
+:(code)
+void test_minss() {
+  Xmm[0] = 3.0;
+  Xmm[1] = 2.0;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "f3 0f 5d c1                                    \n"
+      // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: minimum of XMM0 and x/m32\n"
+      "run: x/m32 is XMM1\n"
+      "run: XMM0 is now 2\n"
+  );
+}
+
+:(before "End Three-Byte Opcodes Starting With f3 0f")
+case 0x5d: {  // minimum of x32, x/m32
+  const uint8_t modrm = next();
+  const uint8_t dest = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "minimum of " << Xname[dest] << " and x/m32" << end();
+  const float* src = effective_address_float(modrm);
+  Xmm[dest] = min(Xmm[dest], *src);
+  trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
+  break;
+}
+
+//:: max
+
+:(before "End Initialize Op Names")
+put_new(Name_f3_0f, "5f", "maximum of two floats (maxss)");
+
+:(code)
+void test_maxss() {
+  Xmm[0] = 3.0;
+  Xmm[1] = 2.0;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "f3 0f 5f c1                                    \n"
+      // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: maximum of XMM0 and x/m32\n"
+      "run: x/m32 is XMM1\n"
+      "run: XMM0 is now 3\n"
+  );
+}
+
+:(before "End Three-Byte Opcodes Starting With f3 0f")
+case 0x5f: {  // maximum of x32, x/m32
+  const uint8_t modrm = next();
+  const uint8_t dest = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "maximum of " << Xname[dest] << " and x/m32" << end();
+  const float* src = effective_address_float(modrm);
+  Xmm[dest] = max(Xmm[dest], *src);
+  trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
+  break;
+}
+
+//:: reciprocal
+
+:(before "End Initialize Op Names")
+put_new(Name_f3_0f, "53", "reciprocal of float (rcpss)");
+
+:(code)
+void test_rcpss() {
+  Xmm[1] = 2.0;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "f3 0f 53 c1                                    \n"
+      // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: reciprocal of x/m32 into XMM0\n"
+      "run: x/m32 is XMM1\n"
+      "run: XMM0 is now 0.5\n"
+  );
+}
+
+:(before "End Three-Byte Opcodes Starting With f3 0f")
+case 0x53: {  // reciprocal of x/m32 into x32
+  const uint8_t modrm = next();
+  const uint8_t dest = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "reciprocal of x/m32 into " << Xname[dest] << end();
+  const float* src = effective_address_float(modrm);
+  Xmm[dest] = 1.0 / *src;
+  trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
+  break;
+}
+
+//:: square root
+
+:(before "End Initialize Op Names")
+put_new(Name_f3_0f, "51", "square root of float (sqrtss)");
+
+:(code)
+void test_sqrtss() {
+  Xmm[1] = 2.0;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "f3 0f 51 c1                                    \n"
+      // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: square root of x/m32 into XMM0\n"
+      "run: x/m32 is XMM1\n"
+      "run: XMM0 is now 1.41421\n"
+  );
+}
+
+:(before "End Three-Byte Opcodes Starting With f3 0f")
+case 0x51: {  // square root of x/m32 into x32
+  const uint8_t modrm = next();
+  const uint8_t dest = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "square root of x/m32 into " << Xname[dest] << end();
+  const float* src = effective_address_float(modrm);
+  Xmm[dest] = sqrt(*src);
+  trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
+  break;
+}
+
+:(before "End Includes")
+#include <math.h>
+
+//:: inverse square root
+
+:(before "End Initialize Op Names")
+put_new(Name_f3_0f, "52", "inverse square root of float (rsqrtss)");
+
+:(code)
+void test_rsqrtss() {
+  Xmm[1] = 0.01;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "f3 0f 52 c1                                    \n"
+      // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: inverse square root of x/m32 into XMM0\n"
+      "run: x/m32 is XMM1\n"
+      "run: XMM0 is now 10\n"
+  );
+}
+
+:(before "End Three-Byte Opcodes Starting With f3 0f")
+case 0x52: {  // inverse square root of x/m32 into x32
+  const uint8_t modrm = next();
+  const uint8_t dest = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "inverse square root of x/m32 into " << Xname[dest] << end();
+  const float* src = effective_address_float(modrm);
+  Xmm[dest] = 1.0 / sqrt(*src);
+  trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
+  break;
+}
+
+:(code)
+float* effective_address_float(uint8_t modrm) {
+  const uint8_t mod = (modrm>>6);
+  // ignore middle 3 'reg opcode' bits
+  const uint8_t rm = modrm & 0x7;
+  if (mod == 3) {
+    // mod 3 is just register direct addressing
+    trace(Callstack_depth+1, "run") << "x/m32 is " << Xname[rm] << end();
+    return &Xmm[rm];
+  }
+  uint32_t addr = effective_address_number(modrm);
+  trace(Callstack_depth+1, "run") << "effective address contains " << read_mem_f32(addr) << end();
+  return mem_addr_f32(addr);
+}
+
+//: compare
+
+:(before "End Initialize Op Names")
+put_new(Name_0f, "2f", "compare: set CF if x32 < xm32 (comiss)");
+
+:(code)
+void test_compare_x32_with_mem_at_rm32() {
+  Reg[EAX].i = 0x2000;
+  Xmm[3] = 0.5;
+  run(
+      "== code 0x1\n"
+      // op     ModR/M  SIB   displacement  immediate
+      "  0f 2f  18                                    \n"  // compare XMM3 with *EAX
+      // ModR/M in binary: 00 (indirect mode) 011 (lhs XMM3) 000 (rhs EAX)
+      "== data 0x2000\n"
+      "00 00 00 00\n"  // 0x00000000 = 0.0
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: compare XMM3 with x/m32\n"
+      "run: effective address is 0x00002000 (EAX)\n"
+      "run: SF=0; ZF=0; CF=0; OF=0\n"
+  );
+}
+
+:(before "End Two-Byte Opcodes Starting With 0f")
+case 0x2f: {  // set CF if x32 < x/m32
+  const uint8_t modrm = next();
+  const uint8_t reg1 = (modrm>>3)&0x7;
+  trace(Callstack_depth+1, "run") << "compare " << Xname[reg1] << " with x/m32" << end();
+  const float* arg2 = effective_address_float(modrm);
+  // Flag settings carefully copied from the Intel manual.
+  // See also https://stackoverflow.com/questions/7057501/x86-assembler-floating-point-compare/7057771#7057771
+  SF = ZF = CF = OF = false;
+  if (Xmm[reg1] == *arg2) ZF = true;
+  if (Xmm[reg1] < *arg2) CF = true;
+  trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+  break;
+}
diff --git a/linux/bootstrap/029syscalls.cc b/linux/bootstrap/029syscalls.cc
new file mode 100644
index 00000000..84a6af34
--- /dev/null
+++ b/linux/bootstrap/029syscalls.cc
@@ -0,0 +1,126 @@
+:(before "End Initialize Op Names")
+put_new(Name, "cd", "software interrupt (int)");
+
+:(before "End Single-Byte Opcodes")
+case 0xcd: {  // int imm8 (software interrupt)
+  trace(Callstack_depth+1, "run") << "syscall" << end();
+  uint8_t code = next();
+  if (code != 0x80) {
+    raise << "Unimplemented interrupt code " << HEXBYTE << code << '\n' << end();
+    raise << "  Only `int 80h` supported for now.\n" << end();
+    break;
+  }
+  process_int80();
+  break;
+}
+
+:(code)
+void process_int80() {
+  switch (Reg[EAX].u) {
+  case 1:
+    exit(/*exit code*/Reg[EBX].u);
+    break;
+  case 3:
+    trace(Callstack_depth+1, "run") << "read: " << Reg[EBX].u << ' ' << Reg[ECX].u << ' ' << Reg[EDX].u << end();
+    Reg[EAX].i = read(/*file descriptor*/Reg[EBX].u, /*memory buffer*/mem_addr_u8(Reg[ECX].u), /*size*/Reg[EDX].u);
+    trace(Callstack_depth+1, "run") << "result: " << Reg[EAX].i << end();
+    if (Reg[EAX].i == -1) raise << "read: " << strerror(errno) << '\n' << end();
+    break;
+  case 4:
+    trace(Callstack_depth+1, "run") << "write: " << Reg[EBX].u << ' ' << Reg[ECX].u << ' ' << Reg[EDX].u << end();
+    trace(Callstack_depth+1, "run") << Reg[ECX].u << " => " << mem_addr_string(Reg[ECX].u, Reg[EDX].u) << end();
+    Reg[EAX].i = write(/*file descriptor*/Reg[EBX].u, /*memory buffer*/mem_addr_u8(Reg[ECX].u), /*size*/Reg[EDX].u);
+    trace(Callstack_depth+1, "run") << "result: " << Reg[EAX].i << end();
+    if (Reg[EAX].i == -1) raise << "write: " << strerror(errno) << '\n' << end();
+    break;
+  case 5: {
+    check_flags(ECX);
+    check_mode(EDX);
+    trace(Callstack_depth+1, "run") << "open: " << Reg[EBX].u << ' ' << Reg[ECX].u << end();
+    trace(Callstack_depth+1, "run") << Reg[EBX].u << " => " << mem_addr_kernel_string(Reg[EBX].u) << end();
+    Reg[EAX].i = open(/*filename*/mem_addr_kernel_string(Reg[EBX].u), /*flags*/Reg[ECX].u, /*mode*/0640);
+    trace(Callstack_depth+1, "run") << "result: " << Reg[EAX].i << end();
+    if (Reg[EAX].i == -1) raise << "open: " << strerror(errno) << '\n' << end();
+    break;
+  }
+  case 6:
+    trace(Callstack_depth+1, "run") << "close: " << Reg[EBX].u << end();
+    Reg[EAX].i = close(/*file descriptor*/Reg[EBX].u);
+    trace(Callstack_depth+1, "run") << "result: " << Reg[EAX].i << end();
+    if (Reg[EAX].i == -1) raise << "close: " << strerror(errno) << '\n' << end();
+    break;
+  case 8:
+    check_mode(ECX);
+    trace(Callstack_depth+1, "run") << "creat: " << Reg[EBX].u << end();
+    trace(Callstack_depth+1, "run") << Reg[EBX].u << " => " << mem_addr_kernel_string(Reg[EBX].u) << end();
+    Reg[EAX].i = creat(/*filename*/mem_addr_kernel_string(Reg[EBX].u), /*mode*/0640);
+    trace(Callstack_depth+1, "run") << "result: " << Reg[EAX].i << end();
+    if (Reg[EAX].i == -1) raise << "creat: " << strerror(errno) << '\n' << end();
+    break;
+  case 10:
+    trace(Callstack_depth+1, "run") << "unlink: " << Reg[EBX].u << end();
+    trace(Callstack_depth+1, "run") << Reg[EBX].u << " => " << mem_addr_kernel_string(Reg[EBX].u) << end();
+    Reg[EAX].i = unlink(/*filename*/mem_addr_kernel_string(Reg[EBX].u));
+    trace(Callstack_depth+1, "run") << "result: " << Reg[EAX].i << end();
+    if (Reg[EAX].i == -1) raise << "unlink: " << strerror(errno) << '\n' << end();
+    break;
+  case 38:
+    trace(Callstack_depth+1, "run") << "rename: " << Reg[EBX].u << " -> " << Reg[ECX].u << end();
+    trace(Callstack_depth+1, "run") << Reg[EBX].u << " => " << mem_addr_kernel_string(Reg[EBX].u) << end();
+    trace(Callstack_depth+1, "run") << Reg[ECX].u << " => " << mem_addr_kernel_string(Reg[ECX].u) << end();
+    Reg[EAX].i = rename(/*old filename*/mem_addr_kernel_string(Reg[EBX].u), /*new filename*/mem_addr_kernel_string(Reg[ECX].u));
+    trace(Callstack_depth+1, "run") << "result: " << Reg[EAX].i << end();
+    if (Reg[EAX].i == -1) raise << "rename: " << strerror(errno) << '\n' << end();
+    break;
+  case 90:  // mmap: allocate memory outside existing segment allocations
+    trace(Callstack_depth+1, "run") << "mmap: allocate new segment" << end();
+    // Ignore most arguments for now: address hint, protection flags, sharing flags, fd, offset.
+    // We only support anonymous maps.
+    Reg[EAX].u = new_segment(/*length*/read_mem_u32(Reg[EBX].u+0x4));
+    trace(Callstack_depth+1, "run") << "result: " << Reg[EAX].u << end();
+    break;
+  case 0xa2:  // nanosleep
+    cerr << "not sleeping\n";
+    break;
+  default:
+    raise << HEXWORD << EIP << ": unimplemented syscall " << Reg[EAX].u << '\n' << end();
+  }
+}
+
+// SubX is oblivious to file permissions, directories, symbolic links, terminals, and much else besides.
+// Also ignoring any concurrency considerations for now.
+void check_flags(int reg) {
+  uint32_t flags = Reg[reg].u;
+  if (flags != ((flags & O_RDONLY) | (flags & O_WRONLY))) {
+    cerr << HEXWORD << EIP << ": most POSIX flags to the open() syscall are not supported. Just O_RDONLY and O_WRONLY for now. Zero concurrent access support.\n";
+    exit(1);
+  }
+  if ((flags & O_RDONLY) && (flags & O_WRONLY)) {
+    cerr << HEXWORD << EIP << ": can't open a file for both reading and writing at once. See http://man7.org/linux/man-pages/man2/open.2.html.\n";
+    exit(1);
+  }
+}
+
+void check_mode(int reg) {
+  if (Reg[reg].u != 0600) {
+    cerr << HEXWORD << EIP << ": SubX is oblivious to file permissions; register " << reg << " must be 0x180.\n";
+    exit(1);
+  }
+}
+
+:(before "End Globals")
+// Very primitive/fixed/insecure mmap segments for now.
+uint32_t Segments_allocated_above = END_HEAP;
+:(code)
+// always allocate multiples of the segment size
+uint32_t new_segment(uint32_t length) {
+  assert(length > 0);
+  uint32_t result = (Segments_allocated_above - length) & 0xff000000;  // same number of zeroes as SEGMENT_ALIGNMENT
+  if (result <= START_HEAP) {
+    raise << "Allocated too many segments; the VM ran out of memory. "
+          << "Maybe SEGMENT_ALIGNMENT can be smaller?\n" << die();
+  }
+  Mem.push_back(vma(result, result+length));
+  Segments_allocated_above = result;
+  return result;
+}
diff --git a/linux/bootstrap/030translate.cc b/linux/bootstrap/030translate.cc
new file mode 100644
index 00000000..3c4c7f2f
--- /dev/null
+++ b/linux/bootstrap/030translate.cc
@@ -0,0 +1,206 @@
+//: After that lengthy prelude to define an x86 emulator, we are now ready to
+//: start translating SubX notation.
+
+//: Translator workflow: read 'source' file. Run a series of transforms on it,
+//: each passing through what it doesn't understand. The final program should
+//: be just machine code, suitable to emulate, or to write to an ELF binary.
+
+:(before "End Main")
+if (is_equal(argv[1], "translate")) {
+  // Outside of tests, traces must be explicitly requested.
+  if (Trace_file.is_open()) Trace_stream = new trace_stream;
+  reset();
+  // Begin bootstrap translate
+  program p;
+  string output_filename;
+  for (int i = /*skip 'bootstrap translate'*/2;  i < argc;  ++i) {
+    if (is_equal(argv[i], "-o")) {
+      ++i;
+      if (i >= argc) {
+        print_translate_usage();
+        cerr << "'-o' must be followed by a filename to write results to\n";
+        exit(1);
+      }
+      output_filename = argv[i];
+    }
+    else {
+      trace(2, "parse") << argv[i] << end();
+      ifstream fin(argv[i]);
+      if (!fin) {
+        cerr << "could not open " << argv[i] << '\n';
+        return 1;
+      }
+      parse(fin, p);
+      if (trace_contains_errors()) return 1;
+    }
+  }
+  if (p.segments.empty()) {
+    print_translate_usage();
+    cerr << "nothing to do; must provide at least one file to read\n";
+    exit(1);
+  }
+  if (output_filename.empty()) {
+    print_translate_usage();
+    cerr << "must provide a filename to write to using '-o'\n";
+    exit(1);
+  }
+  trace(2, "transform") << "begin" << end();
+  transform(p);
+  if (trace_contains_errors()) return 1;
+  trace(2, "translate") << "begin" << end();
+  save_elf(p, output_filename);
+  if (trace_contains_errors()) {
+    unlink(output_filename.c_str());
+    return 1;
+  }
+  // End bootstrap translate
+  return 0;
+}
+
+:(code)
+void transform(program& p) {
+  // End transform(program& p)
+}
+
+void print_translate_usage() {
+  cerr << "Usage: bootstrap translate file1 file2 ... -o output\n";
+}
+
+// write out a program to a bare-bones ELF file
+void save_elf(const program& p, const string& filename) {
+  ofstream out(filename.c_str(), ios::binary);
+  save_elf(p, out);
+  out.close();
+}
+
+void save_elf(const program& p, ostream& out) {
+  // validation: stay consistent with the self-hosted translator
+  if (p.entry == 0) {
+    raise << "no 'Entry' label found\n" << end();
+    return;
+  }
+  if (find(p, "data") == NULL) {
+    raise << "must include a 'data' segment\n" << end();
+    return;
+  }
+  // processing
+  write_elf_header(out, p);
+  for (size_t i = 0;  i < p.segments.size();  ++i)
+    write_segment(p.segments.at(i), out);
+}
+
+void write_elf_header(ostream& out, const program& p) {
+  char c = '\0';
+#define O(X)  c = (X); out.write(&c, sizeof(c))
+// host is required to be little-endian
+#define emit(X)  out.write(reinterpret_cast<const char*>(&X), sizeof(X))
+  //// ehdr
+  // e_ident
+  O(0x7f); O(/*E*/0x45); O(/*L*/0x4c); O(/*F*/0x46);
+    O(0x1);  // 32-bit format
+    O(0x1);  // little-endian
+    O(0x1); O(0x0);
+  for (size_t i = 0;  i < 8;  ++i) { O(0x0); }
+  // e_type
+  O(0x02); O(0x00);
+  // e_machine
+  O(0x03); O(0x00);
+  // e_version
+  O(0x01); O(0x00); O(0x00); O(0x00);
+  // e_entry
+  uint32_t e_entry = p.entry;
+  // Override e_entry
+  emit(e_entry);
+  // e_phoff -- immediately after ELF header
+  uint32_t e_phoff = 0x34;
+  emit(e_phoff);
+  // e_shoff; unused
+  uint32_t dummy32 = 0;
+  emit(dummy32);
+  // e_flags; unused
+  emit(dummy32);
+  // e_ehsize
+  uint16_t e_ehsize = 0x34;
+  emit(e_ehsize);
+  // e_phentsize
+  uint16_t e_phentsize = 0x20;
+  emit(e_phentsize);
+  // e_phnum
+  uint16_t e_phnum = SIZE(p.segments);
+  emit(e_phnum);
+  // e_shentsize
+  uint16_t dummy16 = 0x0;
+  emit(dummy16);
+  // e_shnum
+  emit(dummy16);
+  // e_shstrndx
+  emit(dummy16);
+
+  uint32_t p_offset = /*size of ehdr*/0x34 + SIZE(p.segments)*0x20/*size of each phdr*/;
+  for (int i = 0;  i < SIZE(p.segments);  ++i) {
+    const segment& curr = p.segments.at(i);
+    //// phdr
+    // p_type
+    uint32_t p_type = 0x1;
+    emit(p_type);
+    // p_offset
+    emit(p_offset);
+    // p_vaddr
+    uint32_t p_start = curr.start;
+    emit(p_start);
+    // p_paddr
+    emit(p_start);
+    // p_filesz
+    uint32_t size = num_words(curr);
+    assert(p_offset + size < SEGMENT_ALIGNMENT);
+    emit(size);
+    // p_memsz
+    emit(size);
+    // p_flags
+    uint32_t p_flags = (curr.name == "code") ? /*r-x*/0x5 : /*rw-*/0x6;
+    emit(p_flags);
+
+    // p_align
+    // "As the system creates or augments a process image, it logically copies
+    // a file's segment to a virtual memory segment.  When—and if— the system
+    // physically reads the file depends on the program's execution behavior,
+    // system load, and so on.  A process does not require a physical page
+    // unless it references the logical page during execution, and processes
+    // commonly leave many pages unreferenced. Therefore delaying physical
+    // reads frequently obviates them, improving system performance. To obtain
+    // this efficiency in practice, executable and shared object files must
+    // have segment images whose file offsets and virtual addresses are
+    // congruent, modulo the page size." -- http://refspecs.linuxbase.org/elf/elf.pdf (page 95)
+    uint32_t p_align = 0x1000;  // default page size on linux
+    emit(p_align);
+    if (p_offset % p_align != p_start % p_align) {
+      raise << "segment starting at 0x" << HEXWORD << p_start << " is improperly aligned; alignment for p_offset " << p_offset << " should be " << (p_offset % p_align) << " but is " << (p_start % p_align) << '\n' << end();
+      return;
+    }
+
+    // prepare for next segment
+    p_offset += size;
+  }
+#undef O
+#undef emit
+}
+
+void write_segment(const segment& s, ostream& out) {
+  for (int i = 0;  i < SIZE(s.lines);  ++i) {
+    const vector<word>& w = s.lines.at(i).words;
+    for (int j = 0;  j < SIZE(w);  ++j) {
+      uint8_t x = hex_byte(w.at(j).data);  // we're done with metadata by this point
+      out.write(reinterpret_cast<const char*>(&x), /*sizeof(byte)*/1);
+    }
+  }
+}
+
+uint32_t num_words(const segment& s) {
+  uint32_t sum = 0;
+  for (int i = 0;  i < SIZE(s.lines);  ++i)
+    sum += SIZE(s.lines.at(i).words);
+  return sum;
+}
+
+:(before "End Includes")
+using std::ios;
diff --git a/linux/bootstrap/031transforms.cc b/linux/bootstrap/031transforms.cc
new file mode 100644
index 00000000..5f13b697
--- /dev/null
+++ b/linux/bootstrap/031transforms.cc
@@ -0,0 +1,12 @@
+:(before "End Types")
+typedef void (*transform_fn)(program&);
+:(before "End Globals")
+vector<transform_fn> Transform;
+
+:(before "End transform(program& p)")
+for (int t = 0;  t < SIZE(Transform);  ++t)
+  (*Transform.at(t))(p);
+
+:(before "End One-time Setup")
+// Begin Transforms
+// End Transforms
diff --git a/linux/bootstrap/032operands.cc b/linux/bootstrap/032operands.cc
new file mode 100644
index 00000000..8c163932
--- /dev/null
+++ b/linux/bootstrap/032operands.cc
@@ -0,0 +1,641 @@
+//: Metadata for fields of an x86 instruction.
+//:
+//: The x86 instruction set is variable-length, and how a byte is interpreted
+//: affects later instruction boundaries. A lot of the pain in programming
+//: machine code stems from computer and programmer going out of sync on what
+//: a byte means. The miscommunication is usually not immediately caught, and
+//: metastasizes at runtime into kilobytes of misinterpreted instructions.
+//:
+//: To mitigate these issues, we'll start programming in terms of logical
+//: arguments rather than physical bytes. Some arguments are smaller than a
+//: byte, and others may consist of multiple bytes. This layer will correctly
+//: pack and order the bytes corresponding to the arguments in an instruction.
+
+:(before "End Help Texts")
+put_new(Help, "instructions",
+  "Each x86 instruction consists of an instruction or opcode and some number\n"
+  "of arguments.\n"
+  "Each argument has a type. An instruction won't have more than one argument of\n"
+  "any type.\n"
+  "Each instruction has some set of allowed argument types. It'll reject others.\n"
+  "The complete list of argument types: mod, subop, r32 (integer register),\n"
+  "rm32 (integer register or memory), x32 (floating point register),\n"
+  "xm32 (floating point register or memory), scale, index, base, disp8, disp16,\n"
+  "disp32,imm8,imm32.\n"
+  "Each of these has its own help page. Try reading 'bootstrap help mod' next.\n"
+);
+:(before "End Help Contents")
+cerr << "  instructions\n";
+
+:(before "Running Test Program")
+transform(p);
+if (trace_contains_errors()) return;
+
+:(code)
+void test_pack_immediate_constants() {
+  run(
+      "== code 0x1\n"
+      "bb  0x2a/imm32\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "transform: packing instruction 'bb 0x2a/imm32'\n"
+      "transform: instruction after packing: 'bb 2a 00 00 00'\n"
+      "run: copy imm32 0x0000002a to EBX\n"
+  );
+}
+
+//: complete set of valid argument types
+
+:(before "End Globals")
+set<string> Instruction_arguments;
+:(before "End One-time Setup")
+Instruction_arguments.insert("subop");
+Instruction_arguments.insert("mod");
+Instruction_arguments.insert("rm32");
+Instruction_arguments.insert("xm32");
+Instruction_arguments.insert("base");
+Instruction_arguments.insert("index");
+Instruction_arguments.insert("scale");
+Instruction_arguments.insert("r32");
+Instruction_arguments.insert("x32");
+Instruction_arguments.insert("disp8");
+Instruction_arguments.insert("disp16");
+Instruction_arguments.insert("disp32");
+Instruction_arguments.insert("imm8");
+Instruction_arguments.insert("imm32");
+
+:(before "End Help Texts")
+init_argument_type_help();
+:(code)
+void init_argument_type_help() {
+  put(Help, "mod",
+    "2-bit argument controlling the _addressing mode_ of many instructions,\n"
+    "to determine how to compute the _effective address_ to look up memory at\n"
+    "based on the 'rm32' argument and potentially others.\n"
+    "\n"
+    "If mod = 3, just operate on the contents of the register specified by rm32\n"
+    "            (direct mode).\n"
+    "If mod = 2, effective address is usually* rm32 + disp32\n"
+    "            (indirect mode with displacement).\n"
+    "If mod = 1, effective address is usually* rm32 + disp8\n"
+    "            (indirect mode with displacement).\n"
+    "If mod = 0, effective address is usually* rm32 (indirect mode).\n"
+    "(* - The exception is when rm32 is '4'. Register 4 is the stack pointer (ESP).\n"
+    "     Using it as an address gets more involved. For more details,\n"
+    "     try reading the help pages for 'base', 'index' and 'scale'.)\n"
+    "\n"
+    "For complete details, spend some time with two tables in the IA-32 software\n"
+    "developer's manual that are also included in this repo:\n"
+    "  - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n"
+    "  - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
+  );
+  put(Help, "subop",
+    "Additional 3-bit argument for determining the instruction when the opcode\n"
+    "is 81, 8f, d3, f7 or ff.\n"
+    "Can't coexist with argument of type 'r32' in a single instruction, because\n"
+    "the two use the same bits.\n"
+  );
+  put(Help, "r32",
+    "3-bit argument specifying an integer register argument used directly,\n"
+    "without any further addressing modes.\n"
+  );
+  put(Help, "x32",
+    "3-bit argument specifying a floating-point register argument used directly,\n"
+    "without any further addressing modes.\n"
+  );
+  put(Help, "rm32",
+    "32-bit value in an integer register or memory. The precise details of its\n"
+    "construction depend on the eponymous 3-bit 'rm32' argument, the 'mod' argument,\n"
+    "and also potentially the 'SIB' arguments ('scale', 'index' and 'base')\n"
+    "and a displacement ('disp8' or 'disp32').\n"
+    "\n"
+    "For complete details, spend some time with two tables in the IA-32 software\n"
+    "developer's manual that are also included in this repo:\n"
+    "  - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n"
+    "  - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
+  );
+  put(Help, "xm32",
+    "32-bit value in a floating-point register or memory. The precise details of its\n"
+    "construction depend on the eponymous 3-bit 'xm32' argument, the 'mod' argument,\n"
+    "and also potentially the 'SIB' arguments ('scale', 'index' and 'base')\n"
+    "and a displacement ('disp8' or 'disp32').\n"
+    "\n"
+    "For complete details, spend some time with two tables in the IA-32 software\n"
+    "developer's manual that are also included in this repo:\n"
+    "  - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n"
+    "  - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
+    "\n"
+    "One subtlety here: while /xm32 refers to floating-point registers in direct mode\n"
+    "(when /mod is 3), other addressing modes to construct memory addresses use integer registers\n"
+    "(just like /rm32). Other than direct mode, its behavior is identical to /rm32.\n"
+  );
+  put(Help, "base",
+    "Additional 3-bit argument (when 'rm32' is 4, unless 'mod' is 3) specifying the\n"
+    "register containing an address to look up.\n"
+    "This address may be further modified by 'index' and 'scale' arguments.\n"
+    "  effective address = base + index*scale + displacement (disp8 or disp32)\n"
+    "For complete details, spend some time with the IA-32 software developer's manual,\n"
+    "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
+    "It is included in this repository as 'sib.pdf'.\n"
+  );
+  put(Help, "index",
+    "Optional 3-bit argument (when 'rm32' is 4 unless 'mod' is 3) that can be added to\n"
+    "the 'base' argument to compute the 'effective address' at which to look up memory.\n"
+    "  effective address = base + index*scale + displacement (disp8 or disp32)\n"
+    "For complete details, spend some time with the IA-32 software developer's manual,\n"
+    "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
+    "It is included in this repository as 'sib.pdf'.\n"
+  );
+  put(Help, "scale",
+    "Optional 2-bit argument (when 'rm32' is 4 unless 'mod' is 3) that encodes a\n"
+    "power of 2 to be multiplied to the 'index' argument before adding the result to\n"
+    "the 'base' argument to compute the _effective address_ to operate on.\n"
+    "  effective address = base + index * scale + displacement (disp8 or disp32)\n"
+    "\n"
+    "When scale is 0, use index unmodified.\n"
+    "When scale is 1, multiply index by 2.\n"
+    "When scale is 2, multiply index by 4.\n"
+    "When scale is 3, multiply index by 8.\n"
+    "\n"
+    "For complete details, spend some time with the IA-32 software developer's manual,\n"
+    "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
+    "It is included in this repository as 'sib.pdf'.\n"
+  );
+  put(Help, "disp8",
+    "8-bit value to be added in many instructions.\n"
+  );
+  put(Help, "disp16",
+    "16-bit value to be added in many instructions.\n"
+    "Currently not used in any SubX instructions.\n"
+  );
+  put(Help, "disp32",
+    "32-bit value to be added in many instructions.\n"
+  );
+  put(Help, "imm8",
+    "8-bit value for many instructions.\n"
+  );
+  put(Help, "imm32",
+    "32-bit value for many instructions.\n"
+  );
+}
+
+//:: transform packing arguments into bytes in the right order
+
+:(after "Begin Transforms")
+Transform.push_back(pack_arguments);
+
+:(code)
+void pack_arguments(program& p) {
+  if (p.segments.empty()) return;
+  segment& code = *find(p, "code");
+  // Pack Operands(segment code)
+  trace(3, "transform") << "-- pack arguments" << end();
+  for (int i = 0;  i < SIZE(code.lines);  ++i) {
+    line& inst = code.lines.at(i);
+    if (all_hex_bytes(inst)) continue;
+    trace(99, "transform") << "packing instruction '" << to_string(/*with metadata*/inst) << "'" << end();
+    pack_arguments(inst);
+    trace(99, "transform") << "instruction after packing: '" << to_string(/*without metadata*/inst.words) << "'" << end();
+  }
+}
+
+void pack_arguments(line& inst) {
+  line new_inst;
+  add_opcodes(inst, new_inst);
+  add_modrm_byte(inst, new_inst);
+  add_sib_byte(inst, new_inst);
+  add_disp_bytes(inst, new_inst);
+  add_imm_bytes(inst, new_inst);
+  inst.words.swap(new_inst.words);
+}
+
+void add_opcodes(const line& in, line& out) {
+  out.words.push_back(in.words.at(0));
+  if (in.words.at(0).data == "0f" || in.words.at(0).data == "f2" || in.words.at(0).data == "f3")
+    out.words.push_back(in.words.at(1));
+  if (in.words.at(0).data == "f3" && in.words.at(1).data == "0f")
+    out.words.push_back(in.words.at(2));
+  if (in.words.at(0).data == "f2" && in.words.at(1).data == "0f")
+    out.words.push_back(in.words.at(2));
+}
+
+void add_modrm_byte(const line& in, line& out) {
+  uint8_t mod=0, reg_subop=0, rm32=0;
+  bool emit = false;
+  for (int i = 0;  i < SIZE(in.words);  ++i) {
+    const word& curr = in.words.at(i);
+    if (has_argument_metadata(curr, "mod")) {
+      mod = hex_byte(curr.data);
+      emit = true;
+    }
+    else if (has_argument_metadata(curr, "rm32")) {
+      rm32 = hex_byte(curr.data);
+      emit = true;
+    }
+    else if (has_argument_metadata(curr, "r32")) {
+      reg_subop = hex_byte(curr.data);
+      emit = true;
+    }
+    else if (has_argument_metadata(curr, "xm32")) {
+      rm32 = hex_byte(curr.data);
+      emit = true;
+    }
+    else if (has_argument_metadata(curr, "x32")) {
+      reg_subop = hex_byte(curr.data);
+      emit = true;
+    }
+    else if (has_argument_metadata(curr, "subop")) {
+      reg_subop = hex_byte(curr.data);
+      emit = true;
+    }
+  }
+  if (emit)
+    out.words.push_back(hex_byte_text((mod << 6) | (reg_subop << 3) | rm32));
+}
+
+void add_sib_byte(const line& in, line& out) {
+  uint8_t scale=0, index=0, base=0;
+  bool emit = false;
+  for (int i = 0;  i < SIZE(in.words);  ++i) {
+    const word& curr = in.words.at(i);
+    if (has_argument_metadata(curr, "scale")) {
+      scale = hex_byte(curr.data);
+      emit = true;
+    }
+    else if (has_argument_metadata(curr, "index")) {
+      index = hex_byte(curr.data);
+      emit = true;
+    }
+    else if (has_argument_metadata(curr, "base")) {
+      base = hex_byte(curr.data);
+      emit = true;
+    }
+  }
+  if (emit)
+    out.words.push_back(hex_byte_text((scale << 6) | (index << 3) | base));
+}
+
+void add_disp_bytes(const line& in, line& out) {
+  for (int i = 0;  i < SIZE(in.words);  ++i) {
+    const word& curr = in.words.at(i);
+    if (has_argument_metadata(curr, "disp8"))
+      emit_hex_bytes(out, curr, 1);
+    if (has_argument_metadata(curr, "disp16"))
+      emit_hex_bytes(out, curr, 2);
+    else if (has_argument_metadata(curr, "disp32"))
+      emit_hex_bytes(out, curr, 4);
+  }
+}
+
+void add_imm_bytes(const line& in, line& out) {
+  for (int i = 0;  i < SIZE(in.words);  ++i) {
+    const word& curr = in.words.at(i);
+    if (has_argument_metadata(curr, "imm8"))
+      emit_hex_bytes(out, curr, 1);
+    else if (has_argument_metadata(curr, "imm32"))
+      emit_hex_bytes(out, curr, 4);
+  }
+}
+
+void emit_hex_bytes(line& out, const word& w, int num) {
+  assert(num <= 4);
+  bool is_number = looks_like_hex_int(w.data);
+  if (num == 1 || !is_number) {
+    out.words.push_back(w);  // preserve existing metadata
+    if (is_number)
+      out.words.back().data = hex_byte_to_string(parse_int(w.data));
+    return;
+  }
+  emit_hex_bytes(out, static_cast<uint32_t>(parse_int(w.data)), num);
+}
+
+void emit_hex_bytes(line& out, uint32_t val, int num) {
+  assert(num <= 4);
+  for (int i = 0;  i < num;  ++i) {
+    out.words.push_back(hex_byte_text(val & 0xff));
+    val = val >> 8;
+  }
+}
+
+word hex_byte_text(uint8_t val) {
+  word result;
+  result.data = hex_byte_to_string(val);
+  result.original = result.data+"/auto";
+  return result;
+}
+
+string hex_byte_to_string(uint8_t val) {
+  ostringstream out;
+  // uint8_t prints without padding, but int8_t will expand to 32 bits again
+  out << HEXBYTE << NUM(val);
+  return out.str();
+}
+
+string to_string(const vector<word>& in) {
+  ostringstream out;
+  for (int i = 0;  i < SIZE(in);  ++i) {
+    if (i > 0) out << ' ';
+    out << in.at(i).data;
+  }
+  return out.str();
+}
+
+:(before "End Unit Tests")
+void test_preserve_metadata_when_emitting_single_byte() {
+  word in;
+  in.data = "f0";
+  in.original = "f0/foo";
+  line out;
+  emit_hex_bytes(out, in, 1);
+  CHECK_EQ(out.words.at(0).data, "f0");
+  CHECK_EQ(out.words.at(0).original, "f0/foo");
+}
+
+:(code)
+void test_pack_disp8() {
+  run(
+      "== code 0x1\n"
+      "74 2/disp8\n"  // jump 2 bytes away if ZF is set
+  );
+  CHECK_TRACE_CONTENTS(
+      "transform: packing instruction '74 2/disp8'\n"
+      "transform: instruction after packing: '74 02'\n"
+  );
+}
+
+void test_pack_disp8_negative() {
+  transform(
+      "== code 0x1\n"
+      // running this will cause an infinite loop
+      "74 -1/disp8\n"  // jump 1 byte before if ZF is set
+  );
+  CHECK_TRACE_CONTENTS(
+      "transform: packing instruction '74 -1/disp8'\n"
+      "transform: instruction after packing: '74 ff'\n"
+  );
+}
+
+void test_pack_rm32_direct() {
+  run(
+      "== code 0x1\n"
+      // instruction                     effective address                                                   operand     displacement    immediate\n"
+      // op          subop               mod             rm32          base        index         scale       r32\n"
+      // 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes\n"
+      "  01                              3/mod/direct    3/rm32/ebx                                          0/r32/eax                                \n"  // add EAX to EBX
+  );
+  CHECK_TRACE_CONTENTS(
+      "transform: packing instruction '01 3/mod/direct 3/rm32/ebx 0/r32/eax'\n"
+      "transform: instruction after packing: '01 c3'\n"
+  );
+}
+
+void test_pack_rm32_indirect() {
+  transform(
+      "== code 0x1\n"
+      // instruction                     effective address                                                   operand     displacement    immediate\n"
+      // op          subop               mod             rm32          base        index         scale       r32\n"
+      // 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes\n"
+      "  01                              0/mod/indirect  3/rm32/ebx                                          0/r32/eax                                \n"  // add EAX to *EBX
+  );
+  CHECK_TRACE_CONTENTS(
+      "transform: packing instruction '01 0/mod/indirect 3/rm32/ebx 0/r32/eax'\n"
+      "transform: instruction after packing: '01 03'\n"
+  );
+}
+
+void test_pack_x32() {
+  run(
+      "== code 0x1\n"
+      // instruction                     effective address                                                   operand     displacement    immediate\n"
+      // op          subop               mod             rm32          base        index         scale       r32\n"
+      // 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes\n"
+      "  f3 0f 2a                        3/mod/direct    3/rm32/ebx                                          1/x32                                    \n"  // convert EBX to XMM1
+  );
+  CHECK_TRACE_CONTENTS(
+      "transform: packing instruction 'f3 0f 2a 3/mod/direct 3/rm32/ebx 1/x32'\n"
+      "transform: instruction after packing: 'f3 0f 2a cb'\n"
+  );
+}
+
+void test_pack_xm32_direct() {
+  transform(
+      "== code 0x1\n"
+      // instruction                     effective address                                                   operand     displacement    immediate\n"
+      // op          subop               mod             rm32          base        index         scale       r32\n"
+      // 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes\n"
+      "  f3 0f 5e                        3/mod/direct    3/xm32                                              1/x32                                    \n"  // divide XMM1 by XMM3
+  );
+  CHECK_TRACE_CONTENTS(
+      "transform: packing instruction 'f3 0f 5e 3/mod/direct 3/xm32 1/x32'\n"
+      "transform: instruction after packing: 'f3 0f 5e cb'\n"
+  );
+}
+
+void test_pack_xm32_indirect() {
+  transform(
+      "== code 0x1\n"
+      // instruction                     effective address                                                   operand     displacement    immediate\n"
+      // op          subop               mod             rm32          base        index         scale       r32\n"
+      // 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes\n"
+      "  f3 0f 5e                        0/mod/indirect  3/rm32/ebx                                          1/x32                                    \n"  // divide XMM1 by *EBX
+  );
+  CHECK_TRACE_CONTENTS(
+      "transform: packing instruction 'f3 0f 5e 0/mod/indirect 3/rm32/ebx 1/x32'\n"
+      "transform: instruction after packing: 'f3 0f 5e 0b'\n"
+  );
+}
+
+//: helper for scenario
+void transform(const string& text_bytes) {
+  program p;
+  istringstream in(text_bytes);
+  parse(in, p);
+  if (trace_contains_errors()) return;
+  transform(p);
+}
+
+void test_pack_modrm_imm32() {
+  run(
+      "== code 0x1\n"
+      // instruction                     effective address                                                   operand     displacement    immediate\n"
+      // op          subop               mod             rm32          base        index         scale       r32\n"
+      // 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes\n"
+      "  81          0/add/subop         3/mod/direct    3/rm32/ebx                                                                      1/imm32      \n"  // add 1 to EBX
+  );
+  CHECK_TRACE_CONTENTS(
+      "transform: packing instruction '81 0/add/subop 3/mod/direct 3/rm32/ebx 1/imm32'\n"
+      "transform: instruction after packing: '81 c3 01 00 00 00'\n"
+  );
+}
+
+void test_pack_imm32_large() {
+  run(
+      "== code 0x1\n"
+      "b9  0x080490a7/imm32\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "transform: packing instruction 'b9 0x080490a7/imm32'\n"
+      "transform: instruction after packing: 'b9 a7 90 04 08'\n"
+  );
+}
+
+void test_pack_immediate_constants_hex() {
+  run(
+      "== code 0x1\n"
+      "b9  0x2a/imm32\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "transform: packing instruction 'b9 0x2a/imm32'\n"
+      "transform: instruction after packing: 'b9 2a 00 00 00'\n"
+      "run: copy imm32 0x0000002a to ECX\n"
+  );
+}
+
+void test_pack_silently_ignores_non_hex() {
+  Hide_errors = true;
+  transform(
+      "== code 0x1\n"
+      "b9  foo/imm32\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "transform: packing instruction 'b9 foo/imm32'\n"
+      // no change (we're just not printing metadata to the trace)
+      "transform: instruction after packing: 'b9 foo'\n"
+  );
+}
+
+void test_pack_flags_bad_hex() {
+  Hide_errors = true;
+  run(
+      "== code 0x1\n"
+      "b9  0xfoo/imm32\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: not a number: 0xfoo\n"
+  );
+}
+
+void test_pack_flags_uppercase_hex() {
+  Hide_errors = true;
+  run(
+      "== code 0x1\n"
+      "b9 0xAb/imm32\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: uppercase hex not allowed: 0xAb\n"
+  );
+}
+
+//:: helpers
+
+bool all_hex_bytes(const line& inst) {
+  for (int i = 0;  i < SIZE(inst.words);  ++i)
+    if (!is_hex_byte(inst.words.at(i)))
+      return false;
+  return true;
+}
+
+bool is_hex_byte(const word& curr) {
+  if (contains_any_argument_metadata(curr))
+    return false;
+  if (SIZE(curr.data) != 2)
+    return false;
+  if (curr.data.find_first_not_of("0123456789abcdef") != string::npos)
+    return false;
+  return true;
+}
+
+bool contains_any_argument_metadata(const word& word) {
+  for (int i = 0;  i < SIZE(word.metadata);  ++i)
+    if (Instruction_arguments.find(word.metadata.at(i)) != Instruction_arguments.end())
+      return true;
+  return false;
+}
+
+bool has_argument_metadata(const line& inst, const string& m) {
+  bool result = false;
+  for (int i = 0;  i < SIZE(inst.words);  ++i) {
+    if (!has_argument_metadata(inst.words.at(i), m)) continue;
+    if (result) {
+      raise << "'" << to_string(inst) << "' has conflicting " << m << " arguments\n" << end();
+      return false;
+    }
+    result = true;
+  }
+  return result;
+}
+
+bool has_argument_metadata(const word& w, const string& m) {
+  bool result = false;
+  bool metadata_found = false;
+  for (int i = 0;  i < SIZE(w.metadata);  ++i) {
+    const string& curr = w.metadata.at(i);
+    if (Instruction_arguments.find(curr) == Instruction_arguments.end()) continue;  // ignore unrecognized metadata
+    if (metadata_found) {
+      raise << "'" << w.original << "' has conflicting argument types; it should have only one\n" << end();
+      return false;
+    }
+    metadata_found = true;
+    result = (curr == m);
+  }
+  return result;
+}
+
+word metadata(const line& inst, const string& m) {
+  for (int i = 0;  i < SIZE(inst.words);  ++i)
+    if (has_argument_metadata(inst.words.at(i), m))
+      return inst.words.at(i);
+  assert(false);
+}
+
+bool looks_like_hex_int(const string& s) {
+  if (s.empty()) return false;
+  if (s.at(0) == '-' || s.at(0) == '+') return true;
+  if (isdigit(s.at(0))) return true;  // includes '0x' prefix
+  // End looks_like_hex_int(s) Detectors
+  return false;
+}
+
+string to_string(const line& inst) {
+  ostringstream out;
+  for (int i = 0;  i < SIZE(inst.words);  ++i) {
+    if (i > 0) out << ' ';
+    out << inst.words.at(i).original;
+  }
+  return out.str();
+}
+
+int32_t parse_int(const string& s) {
+  if (s.empty()) return 0;
+  if (contains_uppercase(s)) {
+    raise << "uppercase hex not allowed: " << s << '\n' << end();
+    return 0;
+  }
+  istringstream in(s);
+  in >> std::hex;
+  if (s.at(0) == '-') {
+    int32_t result = 0;
+    in >> result;
+    if (!in || !in.eof()) {
+      raise << "not a number: " << s << '\n' << end();
+      return 0;
+    }
+    return result;
+  }
+  uint32_t uresult = 0;
+  in >> uresult;
+  if (!in || !in.eof()) {
+    raise << "not a number: " << s << '\n' << end();
+    return 0;
+  }
+  return static_cast<int32_t>(uresult);
+}
+:(before "End Unit Tests")
+void test_parse_int() {
+  CHECK_EQ(0, parse_int("0"));
+  CHECK_EQ(0, parse_int("0x0"));
+  CHECK_EQ(0, parse_int("0x0"));
+  CHECK_EQ(16, parse_int("10"));  // hex always
+  CHECK_EQ(-1, parse_int("-1"));
+  CHECK_EQ(-1, parse_int("0xffffffff"));
+}
diff --git a/linux/bootstrap/033check_operands.cc b/linux/bootstrap/033check_operands.cc
new file mode 100644
index 00000000..c764fc61
--- /dev/null
+++ b/linux/bootstrap/033check_operands.cc
@@ -0,0 +1,786 @@
+//: Since we're tagging arguments with their types, let's start checking these
+//: argument types for each instruction.
+
+void test_check_missing_imm8_argument() {
+  Hide_errors = true;
+  run(
+      "== code 0x1\n"
+      "cd\n"  // interrupt ??
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: 'cd' (software interrupt): missing imm8 argument\n"
+  );
+}
+
+:(before "Pack Operands(segment code)")
+check_arguments(code);
+if (trace_contains_errors()) return;
+
+:(code)
+void check_arguments(const segment& code) {
+  trace(3, "transform") << "-- check arguments" << end();
+  for (int i = 0;  i < SIZE(code.lines);  ++i) {
+    check_arguments(code.lines.at(i));
+    if (trace_contains_errors()) return;  // stop at the first mal-formed instruction
+  }
+}
+
+void check_arguments(const line& inst) {
+  word op = preprocess_op(inst.words.at(0));
+  if (op.data == "0f") {
+    check_arguments_0f(inst);
+    return;
+  }
+  if (op.data == "f3") {
+    check_arguments_f3(inst);
+    return;
+  }
+  check_arguments(inst, op);
+}
+
+word preprocess_op(word/*copy*/ op) {
+  op.data = tolower(op.data.c_str());
+  // opcodes can't be negative
+  if (starts_with(op.data, "0x"))
+    op.data = op.data.substr(2);
+  if (SIZE(op.data) == 1)
+    op.data = string("0")+op.data;
+  return op;
+}
+
+void test_preprocess_op() {
+  word w1;  w1.data = "0xf";
+  word w2;  w2.data = "0f";
+  CHECK_EQ(preprocess_op(w1).data, preprocess_op(w2).data);
+}
+
+//: To check the arguments for an opcode, we'll track the permitted arguments
+//: for each supported opcode in a bitvector. That way we can often compute the
+//: 'received' argument bitvector for each instruction's arguments and compare
+//: it with the 'expected' bitvector.
+//:
+//: The 'expected' and 'received' bitvectors can be different; the MODRM bit
+//: in the 'expected' bitvector maps to multiple 'received' argument types in
+//: an instruction. We deal in expected bitvectors throughout.
+
+:(before "End Types")
+enum expected_argument_type {
+  // start from the least significant bit
+  MODRM,  // more complex, may also involve disp8 or disp32
+  SUBOP,
+  DISP8,
+  DISP16,
+  DISP32,
+  IMM8,
+  IMM32,
+  NUM_OPERAND_TYPES
+};
+:(before "End Globals")
+vector<string> Operand_type_name;
+map<string, expected_argument_type> Operand_type;
+:(before "End One-time Setup")
+init_op_types();
+:(code)
+void init_op_types() {
+  assert(NUM_OPERAND_TYPES <= /*bits in a uint8_t*/8);
+  Operand_type_name.resize(NUM_OPERAND_TYPES);
+  #define DEF(type) Operand_type_name.at(type) = tolower(#type), put(Operand_type, tolower(#type), type);
+  DEF(MODRM);
+  DEF(SUBOP);
+  DEF(DISP8);
+  DEF(DISP16);
+  DEF(DISP32);
+  DEF(IMM8);
+  DEF(IMM32);
+  #undef DEF
+}
+
+:(before "End Globals")
+map</*op*/string, /*bitvector*/uint8_t> Permitted_arguments;
+const uint8_t INVALID_OPERANDS = 0xff;  // no instruction uses all the argument types
+:(before "End One-time Setup")
+init_permitted_arguments();
+:(code)
+void init_permitted_arguments() {
+  //// Class A: just op, no arguments
+  // halt
+  put(Permitted_arguments, "f4", 0x00);
+  // inc
+  put(Permitted_arguments, "40", 0x00);
+  put(Permitted_arguments, "41", 0x00);
+  put(Permitted_arguments, "42", 0x00);
+  put(Permitted_arguments, "43", 0x00);
+  put(Permitted_arguments, "44", 0x00);
+  put(Permitted_arguments, "45", 0x00);
+  put(Permitted_arguments, "46", 0x00);
+  put(Permitted_arguments, "47", 0x00);
+  // dec
+  put(Permitted_arguments, "48", 0x00);
+  put(Permitted_arguments, "49", 0x00);
+  put(Permitted_arguments, "4a", 0x00);
+  put(Permitted_arguments, "4b", 0x00);
+  put(Permitted_arguments, "4c", 0x00);
+  put(Permitted_arguments, "4d", 0x00);
+  put(Permitted_arguments, "4e", 0x00);
+  put(Permitted_arguments, "4f", 0x00);
+  // push
+  put(Permitted_arguments, "50", 0x00);
+  put(Permitted_arguments, "51", 0x00);
+  put(Permitted_arguments, "52", 0x00);
+  put(Permitted_arguments, "53", 0x00);
+  put(Permitted_arguments, "54", 0x00);
+  put(Permitted_arguments, "55", 0x00);
+  put(Permitted_arguments, "56", 0x00);
+  put(Permitted_arguments, "57", 0x00);
+  // pop
+  put(Permitted_arguments, "58", 0x00);
+  put(Permitted_arguments, "59", 0x00);
+  put(Permitted_arguments, "5a", 0x00);
+  put(Permitted_arguments, "5b", 0x00);
+  put(Permitted_arguments, "5c", 0x00);
+  put(Permitted_arguments, "5d", 0x00);
+  put(Permitted_arguments, "5e", 0x00);
+  put(Permitted_arguments, "5f", 0x00);
+  // sign-extend EAX into EDX
+  put(Permitted_arguments, "99", 0x00);
+  // return
+  put(Permitted_arguments, "c3", 0x00);
+
+  //// Class B: just op and disp8
+  //  imm32 imm8  disp32 |disp16  disp8 subop modrm
+  //  0     0     0      |0       1     0     0
+
+  // jump
+  put(Permitted_arguments, "eb", 0x04);
+  put(Permitted_arguments, "72", 0x04);
+  put(Permitted_arguments, "73", 0x04);
+  put(Permitted_arguments, "74", 0x04);
+  put(Permitted_arguments, "75", 0x04);
+  put(Permitted_arguments, "76", 0x04);
+  put(Permitted_arguments, "77", 0x04);
+  put(Permitted_arguments, "7c", 0x04);
+  put(Permitted_arguments, "7d", 0x04);
+  put(Permitted_arguments, "7e", 0x04);
+  put(Permitted_arguments, "7f", 0x04);
+
+  //// Class D: just op and disp32
+  //  imm32 imm8  disp32 |disp16  disp8 subop modrm
+  //  0     0     1      |0       0     0     0
+  put(Permitted_arguments, "e8", 0x10);  // call
+  put(Permitted_arguments, "e9", 0x10);  // jump
+
+  //// Class E: just op and imm8
+  //  imm32 imm8  disp32 |disp16  disp8 subop modrm
+  //  0     1     0      |0       0     0     0
+  put(Permitted_arguments, "cd", 0x20);  // software interrupt
+
+  //// Class F: just op and imm32
+  //  imm32 imm8  disp32 |disp16  disp8 subop modrm
+  //  1     0     0      |0       0     0     0
+  put(Permitted_arguments, "05", 0x40);  // add
+  put(Permitted_arguments, "2d", 0x40);  // subtract
+  put(Permitted_arguments, "25", 0x40);  // and
+  put(Permitted_arguments, "0d", 0x40);  // or
+  put(Permitted_arguments, "35", 0x40);  // xor
+  put(Permitted_arguments, "3d", 0x40);  // compare
+  put(Permitted_arguments, "68", 0x40);  // push
+  // copy
+  put(Permitted_arguments, "b8", 0x40);
+  put(Permitted_arguments, "b9", 0x40);
+  put(Permitted_arguments, "ba", 0x40);
+  put(Permitted_arguments, "bb", 0x40);
+  put(Permitted_arguments, "bc", 0x40);
+  put(Permitted_arguments, "bd", 0x40);
+  put(Permitted_arguments, "be", 0x40);
+  put(Permitted_arguments, "bf", 0x40);
+
+  //// Class M: using ModR/M byte
+  //  imm32 imm8  disp32 |disp16  disp8 subop modrm
+  //  0     0     0      |0       0     0     1
+
+  // add
+  put(Permitted_arguments, "01", 0x01);
+  put(Permitted_arguments, "03", 0x01);
+  // subtract
+  put(Permitted_arguments, "29", 0x01);
+  put(Permitted_arguments, "2b", 0x01);
+  // and
+  put(Permitted_arguments, "21", 0x01);
+  put(Permitted_arguments, "23", 0x01);
+  // or
+  put(Permitted_arguments, "09", 0x01);
+  put(Permitted_arguments, "0b", 0x01);
+  // xor
+  put(Permitted_arguments, "31", 0x01);
+  put(Permitted_arguments, "33", 0x01);
+  // compare
+  put(Permitted_arguments, "39", 0x01);
+  put(Permitted_arguments, "3b", 0x01);
+  // copy
+  put(Permitted_arguments, "88", 0x01);
+  put(Permitted_arguments, "89", 0x01);
+  put(Permitted_arguments, "8a", 0x01);
+  put(Permitted_arguments, "8b", 0x01);
+  // swap
+  put(Permitted_arguments, "87", 0x01);
+  // copy address (lea)
+  put(Permitted_arguments, "8d", 0x01);
+
+  //// Class N: op, ModR/M and subop (not r32)
+  //  imm32 imm8  disp32 |disp16  disp8 subop modrm
+  //  0     0     0      |0       0     1     1
+  put(Permitted_arguments, "8f", 0x03);  // pop
+  put(Permitted_arguments, "d3", 0x03);  // shift
+  put(Permitted_arguments, "f7", 0x03);  // test/not/mul/div
+  put(Permitted_arguments, "ff", 0x03);  // jump/push/call
+
+  //// Class O: op, ModR/M, subop (not r32) and imm8
+  //  imm32 imm8  disp32 |disp16  disp8 subop modrm
+  //  0     1     0      |0       0     1     1
+  put(Permitted_arguments, "c1", 0x23);  // combine
+  put(Permitted_arguments, "c6", 0x23);  // copy
+
+  //// Class P: op, ModR/M, subop (not r32) and imm32
+  //  imm32 imm8  disp32 |disp16  disp8 subop modrm
+  //  1     0     0      |0       0     1     1
+  put(Permitted_arguments, "81", 0x43);  // combine
+  put(Permitted_arguments, "c7", 0x43);  // copy
+
+  //// Class Q: op, ModR/M and imm32
+  //  imm32 imm8  disp32 |disp16  disp8 subop modrm
+  //  1     0     0      |0       0     0     1
+  put(Permitted_arguments, "69", 0x41);  // multiply
+
+  // End Init Permitted Operands
+}
+
+#define HAS(bitvector, bit)  ((bitvector) & (1 << (bit)))
+#define SET(bitvector, bit)  ((bitvector) | (1 << (bit)))
+#define CLEAR(bitvector, bit)  ((bitvector) & (~(1 << (bit))))
+
+void check_arguments(const line& inst, const word& op) {
+  if (!is_hex_byte(op)) return;
+  uint8_t expected_bitvector = get(Permitted_arguments, op.data);
+  if (HAS(expected_bitvector, MODRM)) {
+    check_arguments_modrm(inst, op);
+    compare_bitvector_modrm(inst, expected_bitvector, maybe_name(op));
+  }
+  else {
+    compare_bitvector(inst, expected_bitvector, maybe_name(op));
+  }
+}
+
+//: Many instructions can be checked just by comparing bitvectors.
+
+void compare_bitvector(const line& inst, uint8_t expected, const string& maybe_op_name) {
+  if (all_hex_bytes(inst) && has_arguments(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
+  uint8_t bitvector = compute_expected_argument_bitvector(inst);
+  if (trace_contains_errors()) return;  // duplicate argument type
+  if (bitvector == expected) return;  // all good with this instruction
+  for (int i = 0;  i < NUM_OPERAND_TYPES;  ++i, bitvector >>= 1, expected >>= 1) {
+//?     cerr << "comparing " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n';
+    if ((bitvector & 0x1) == (expected & 0x1)) continue;  // all good with this argument
+    const string& optype = Operand_type_name.at(i);
+    if ((bitvector & 0x1) > (expected & 0x1))
+      raise << "'" << to_string(inst) << "'" << maybe_op_name << ": unexpected " << optype << " argument\n" << end();
+    else
+      raise << "'" << to_string(inst) << "'" << maybe_op_name << ": missing " << optype << " argument\n" << end();
+    // continue giving all errors for a single instruction
+  }
+  // ignore settings in any unused bits
+}
+
+string maybe_name(const word& op) {
+  if (!is_hex_byte(op)) return "";
+  if (!contains_key(Name, op.data)) return "";
+  // strip stuff in parens from the name
+  const string& s = get(Name, op.data);
+  return " ("+s.substr(0, s.find(" ("))+')';
+}
+
+uint32_t compute_expected_argument_bitvector(const line& inst) {
+  set<string> arguments_found;
+  uint32_t bitvector = 0;
+  for (int i = /*skip op*/1;  i < SIZE(inst.words);  ++i) {
+    bitvector = bitvector | expected_bit_for_received_argument(inst.words.at(i), arguments_found, inst);
+    if (trace_contains_errors()) return INVALID_OPERANDS;  // duplicate argument type
+  }
+  return bitvector;
+}
+
+bool has_arguments(const line& inst) {
+  return SIZE(inst.words) > first_argument(inst);
+}
+
+int first_argument(const line& inst) {
+  if (inst.words.at(0).data == "0f") return 2;
+  if (inst.words.at(0).data == "f2" || inst.words.at(0).data == "f3") {
+    if (inst.words.at(1).data == "0f")
+      return 3;
+    else
+      return 2;
+  }
+  return 1;
+}
+
+// Scan the metadata of 'w' and return the expected bit corresponding to any argument type.
+// Also raise an error if metadata contains multiple argument types.
+uint32_t expected_bit_for_received_argument(const word& w, set<string>& instruction_arguments, const line& inst) {
+  uint32_t bv = 0;
+  bool found = false;
+  for (int i = 0;  i < SIZE(w.metadata);  ++i) {
+    string/*copy*/ curr = w.metadata.at(i);
+    string expected_metadata = curr;
+    if (curr == "mod" || curr == "rm32" || curr == "r32" || curr == "xm32" || curr == "x32" || curr == "scale" || curr == "index" || curr == "base")
+      expected_metadata = "modrm";
+    else if (!contains_key(Operand_type, curr)) continue;  // ignore unrecognized metadata
+    if (found) {
+      raise << "'" << w.original << "' has conflicting argument types; it should have only one\n" << end();
+      return INVALID_OPERANDS;
+    }
+    if (instruction_arguments.find(curr) != instruction_arguments.end()) {
+      raise << "'" << to_string(inst) << "': duplicate " << curr << " argument\n" << end();
+      return INVALID_OPERANDS;
+    }
+    instruction_arguments.insert(curr);
+    bv = (1 << get(Operand_type, expected_metadata));
+    found = true;
+  }
+  return bv;
+}
+
+void test_conflicting_argument_type() {
+  Hide_errors = true;
+  run(
+      "== code 0x1\n"
+      "cd/software-interrupt 80/imm8/imm32\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: '80/imm8/imm32' has conflicting argument types; it should have only one\n"
+  );
+}
+
+//: Instructions computing effective addresses have more complex rules, so
+//: we'll hard-code a common set of instruction-decoding rules.
+
+void test_check_missing_mod_argument() {
+  Hide_errors = true;
+  run(
+      "== code 0x1\n"
+      "81 0/add/subop       3/rm32/ebx 1/imm32\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: '81 0/add/subop 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing mod argument\n"
+  );
+}
+
+void check_arguments_modrm(const line& inst, const word& op) {
+  if (all_hex_bytes(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
+  check_argument_metadata_present(inst, "mod", op);
+  if (!has_argument_metadata(inst, "rm32") && !has_argument_metadata(inst, "xm32"))
+    raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing rm32 (or xm32) argument\n" << end();
+  // no check for r32; some instructions don't use it; just assume it's 0 if missing
+  if (op.data == "81" || op.data == "8f" || op.data == "f7" || op.data == "ff") {  // keep sync'd with 'help subop'
+    check_argument_metadata_present(inst, "subop", op);
+    check_argument_metadata_absent(inst, "r32", op, "should be replaced by subop");
+    check_argument_metadata_absent(inst, "x32", op, "should be replaced by subop");
+  }
+  if (trace_contains_errors()) return;
+  if (metadata_m32(inst).data != "4") return;
+  // SIB byte checks
+  uint8_t mod = hex_byte(metadata(inst, "mod").data);
+  if (mod != /*direct*/3) {
+    check_argument_metadata_present(inst, "base", op);
+    check_argument_metadata_present(inst, "index", op);  // otherwise why go to SIB?
+  }
+  else {
+    check_argument_metadata_absent(inst, "base", op, "direct mode");
+    check_argument_metadata_absent(inst, "index", op, "direct mode");
+  }
+  // no check for scale; 0 (2**0 = 1) by default
+}
+
+word metadata_m32(const line& inst) {
+  for (int i = 0;  i < SIZE(inst.words);  ++i)
+    if (has_argument_metadata(inst.words.at(i), "rm32") || has_argument_metadata(inst.words.at(i), "xm32"))
+      return inst.words.at(i);
+  assert(false);
+}
+
+// same as compare_bitvector, with one additional exception for modrm-based
+// instructions: they may use an extra displacement on occasion
+void compare_bitvector_modrm(const line& inst, uint8_t expected, const string& maybe_op_name) {
+  if (all_hex_bytes(inst) && has_arguments(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
+  uint8_t bitvector = compute_expected_argument_bitvector(inst);
+  if (trace_contains_errors()) return;  // duplicate argument type
+  // update 'expected' bitvector for the additional exception
+  if (has_argument_metadata(inst, "mod")) {
+    int32_t mod = parse_int(metadata(inst, "mod").data);
+    switch (mod) {
+    case 0:
+      if (has_argument_metadata(inst, "rm32") && parse_int(metadata(inst, "rm32").data) == 5)
+        expected |= (1<<DISP32);
+      break;
+    case 1:
+      expected |= (1<<DISP8);
+      break;
+    case 2:
+      expected |= (1<<DISP32);
+      break;
+    }
+  }
+  if (bitvector == expected) return;  // all good with this instruction
+  for (int i = 0;  i < NUM_OPERAND_TYPES;  ++i, bitvector >>= 1, expected >>= 1) {
+//?     cerr << "comparing for modrm " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n';
+    if ((bitvector & 0x1) == (expected & 0x1)) continue;  // all good with this argument
+    const string& optype = Operand_type_name.at(i);
+    if ((bitvector & 0x1) > (expected & 0x1))
+      raise << "'" << to_string(inst) << "'" << maybe_op_name << ": unexpected " << optype << " argument\n" << end();
+    else
+      raise << "'" << to_string(inst) << "'" << maybe_op_name << ": missing " << optype << " argument\n" << end();
+    // continue giving all errors for a single instruction
+  }
+  // ignore settings in any unused bits
+}
+
+void check_argument_metadata_present(const line& inst, const string& type, const word& op) {
+  if (!has_argument_metadata(inst, type))
+    raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": missing " << type << " argument\n" << end();
+}
+
+void check_argument_metadata_absent(const line& inst, const string& type, const word& op, const string& msg) {
+  if (has_argument_metadata(inst, type))
+    raise << "'" << to_string(inst) << "'" << maybe_name(op) << ": unexpected " << type << " argument (" << msg << ")\n" << end();
+}
+
+void test_modrm_with_displacement() {
+  Reg[EAX].u = 0x1;
+  transform(
+      "== code 0x1\n"
+      // just avoid null pointer
+      "8b/copy 1/mod/lookup+disp8 0/rm32/EAX 2/r32/EDX 4/disp8\n"  // copy *(EAX+4) to EDX
+  );
+  CHECK_TRACE_COUNT("error", 0);
+}
+
+void test_check_missing_disp8() {
+  Hide_errors = true;
+  transform(
+      "== code 0x1\n"
+      "89/copy 1/mod/lookup+disp8 0/rm32/EAX 1/r32/ECX\n"  // missing disp8
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: '89/copy 1/mod/lookup+disp8 0/rm32/EAX 1/r32/ECX' (copy r32 to rm32): missing disp8 argument\n"
+  );
+}
+
+void test_check_missing_disp32() {
+  Hide_errors = true;
+  transform(
+      "== code 0x1\n"
+      "8b/copy 0/mod/indirect 5/rm32/.disp32 2/r32/EDX\n"  // missing disp32
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: '8b/copy 0/mod/indirect 5/rm32/.disp32 2/r32/EDX' (copy rm32 to r32): missing disp32 argument\n"
+  );
+}
+
+void test_conflicting_arguments_in_modrm_instruction() {
+  Hide_errors = true;
+  run(
+      "== code 0x1\n"
+      "01/add 0/mod 3/mod\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: '01/add 0/mod 3/mod' has conflicting mod arguments\n"
+  );
+}
+
+void test_conflicting_argument_type_modrm() {
+  Hide_errors = true;
+  run(
+      "== code 0x1\n"
+      "01/add 0/mod 3/rm32/r32\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: '3/rm32/r32' has conflicting argument types; it should have only one\n"
+  );
+}
+
+void test_check_missing_rm32_argument() {
+  Hide_errors = true;
+  run(
+      "== code 0x1\n"
+      "81 0/add/subop 0/mod            1/imm32\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: '81 0/add/subop 0/mod 1/imm32' (combine rm32 with imm32 based on subop): missing rm32 (or xm32) argument\n"
+  );
+}
+
+void test_check_missing_subop_argument() {
+  Hide_errors = true;
+  run(
+      "== code 0x1\n"
+      "81             0/mod 3/rm32/ebx 1/imm32\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: '81 0/mod 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing subop argument\n"
+  );
+}
+
+void test_check_missing_base_argument() {
+  Hide_errors = true;
+  run(
+      "== code 0x1\n"
+      "81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32' (combine rm32 with imm32 based on subop): missing base argument\n"
+  );
+}
+
+void test_check_missing_index_argument() {
+  Hide_errors = true;
+  run(
+      "== code 0x1\n"
+      "81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32' (combine rm32 with imm32 based on subop): missing index argument\n"
+  );
+}
+
+void test_check_missing_base_argument_2() {
+  Hide_errors = true;
+  run(
+      "== code 0x1\n"
+      "81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32' (combine rm32 with imm32 based on subop): missing base argument\n"
+  );
+}
+
+void test_check_extra_displacement() {
+  Hide_errors = true;
+  run(
+      "== code 0x1\n"
+      "89/copy 0/mod/indirect 0/rm32/EAX 1/r32/ECX 4/disp8\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: '89/copy 0/mod/indirect 0/rm32/EAX 1/r32/ECX 4/disp8' (copy r32 to rm32): unexpected disp8 argument\n"
+  );
+}
+
+void test_check_duplicate_argument() {
+  Hide_errors = true;
+  run(
+      "== code 0x1\n"
+      "89/copy 0/mod/indirect 0/rm32/EAX 1/r32/ECX 1/r32\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: '89/copy 0/mod/indirect 0/rm32/EAX 1/r32/ECX 1/r32': duplicate r32 argument\n"
+  );
+}
+
+void test_check_base_argument_not_needed_in_direct_mode() {
+  run(
+      "== code 0x1\n"
+      "81 0/add/subop 3/mod/indirect 4/rm32/use-sib 1/imm32\n"
+  );
+  CHECK_TRACE_COUNT("error", 0);
+}
+
+void test_extra_modrm() {
+  Hide_errors = true;
+  run(
+      "== code 0x1\n"
+      "59/pop-to-ECX  3/mod/direct 1/rm32/ECX 4/r32/ESP\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: '59/pop-to-ECX 3/mod/direct 1/rm32/ECX 4/r32/ESP' (pop top of stack to ECX): unexpected modrm argument\n"
+  );
+}
+
+//:: similarly handle multi-byte opcodes
+
+void check_arguments_0f(const line& inst) {
+  assert(inst.words.at(0).data == "0f");
+  if (SIZE(inst.words) == 1) {
+    raise << "opcode '0f' requires a second opcode\n" << end();
+    return;
+  }
+  word op = preprocess_op(inst.words.at(1));
+  if (!contains_key(Name_0f, op.data)) {
+    raise << "unknown 2-byte opcode '0f " << op.data << "'\n" << end();
+    return;
+  }
+  check_arguments_0f(inst, op);
+}
+
+void check_arguments_f3(const line& inst) {
+  assert(inst.words.at(0).data == "f3");
+  if (SIZE(inst.words) == 1) {
+    raise << "opcode 'f3' requires a second opcode\n" << end();
+    return;
+  }
+  word op = preprocess_op(inst.words.at(1));
+  if (op.data == "0f") {
+    word op2 = preprocess_op(inst.words.at(2));
+    check_arguments_f3_0f(inst, op2);
+    return;
+  }
+  if (!contains_key(Name_f3, op.data)) {
+    raise << "unknown 2-byte opcode 'f3 " << op.data << "'\n" << end();
+    return;
+  }
+  check_arguments_f3(inst, op);
+}
+
+void test_check_missing_disp32_argument() {
+  Hide_errors = true;
+  run(
+      "== code 0x1\n"
+      "  0f 84  # jmp if ZF to ??\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: '0f 84' (jump disp32 bytes away if equal, if ZF is set): missing disp32 argument\n"
+  );
+}
+
+void test_0f_opcode_with_modrm() {
+  transform(
+      "== code 0x1\n"
+      "0f af/multiply 2/mod/*+disp32 5/rm32/ebp 8/disp32 0/r32\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN_ERRORS();
+}
+
+:(before "End Globals")
+map</*op*/string, /*bitvector*/uint8_t> Permitted_arguments_0f;
+:(before "End Init Permitted Operands")
+//// Class D: just op and disp32
+//  imm32 imm8  disp32 |disp16  disp8 subop modrm
+//  0     0     1      |0       0     0     0
+put_new(Permitted_arguments_0f, "82", 0x10);
+put_new(Permitted_arguments_0f, "83", 0x10);
+put_new(Permitted_arguments_0f, "84", 0x10);
+put_new(Permitted_arguments_0f, "85", 0x10);
+put_new(Permitted_arguments_0f, "86", 0x10);
+put_new(Permitted_arguments_0f, "87", 0x10);
+put_new(Permitted_arguments_0f, "8c", 0x10);
+put_new(Permitted_arguments_0f, "8d", 0x10);
+put_new(Permitted_arguments_0f, "8e", 0x10);
+put_new(Permitted_arguments_0f, "8f", 0x10);
+
+//// Class M: using ModR/M byte
+//  imm32 imm8  disp32 |disp16  disp8 subop modrm
+//  0     0     0      |0       0     0     1
+put_new(Permitted_arguments_0f, "2f", 0x01);  // compare floats
+put_new(Permitted_arguments_0f, "af", 0x01);  // multiply ints
+// setcc
+put_new(Permitted_arguments_0f, "92", 0x01);
+put_new(Permitted_arguments_0f, "93", 0x01);
+put_new(Permitted_arguments_0f, "94", 0x01);
+put_new(Permitted_arguments_0f, "95", 0x01);
+put_new(Permitted_arguments_0f, "96", 0x01);
+put_new(Permitted_arguments_0f, "97", 0x01);
+put_new(Permitted_arguments_0f, "9c", 0x01);
+put_new(Permitted_arguments_0f, "9d", 0x01);
+put_new(Permitted_arguments_0f, "9e", 0x01);
+put_new(Permitted_arguments_0f, "9f", 0x01);
+
+:(before "End Globals")
+map</*op*/string, /*bitvector*/uint8_t> Permitted_arguments_f3;
+map</*op*/string, /*bitvector*/uint8_t> Permitted_arguments_f3_0f;
+:(before "End Init Permitted Operands")
+//// Class M: using ModR/M byte
+//  imm32 imm8  disp32 |disp16  disp8 subop modrm
+//  0     0     0      |0       0     0     1
+put_new(Permitted_arguments_f3_0f, "10", 0x01);  // copy xm32 to x32
+put_new(Permitted_arguments_f3_0f, "11", 0x01);  // copy x32 to xm32
+put_new(Permitted_arguments_f3_0f, "2a", 0x01);  // convert-to-float
+put_new(Permitted_arguments_f3_0f, "2c", 0x01);  // truncate-to-int
+put_new(Permitted_arguments_f3_0f, "2d", 0x01);  // convert-to-int
+put_new(Permitted_arguments_f3_0f, "51", 0x01);  // square root
+put_new(Permitted_arguments_f3_0f, "52", 0x01);  // inverse square root
+put_new(Permitted_arguments_f3_0f, "53", 0x01);  // reciprocal
+put_new(Permitted_arguments_f3_0f, "58", 0x01);  // add floats
+put_new(Permitted_arguments_f3_0f, "59", 0x01);  // multiply floats
+put_new(Permitted_arguments_f3_0f, "5c", 0x01);  // subtract floats
+put_new(Permitted_arguments_f3_0f, "5d", 0x01);  // minimum of floats
+put_new(Permitted_arguments_f3_0f, "5e", 0x01);  // divide floats
+put_new(Permitted_arguments_f3_0f, "5f", 0x01);  // maximum of floats
+
+:(code)
+void check_arguments_0f(const line& inst, const word& op) {
+  uint8_t expected_bitvector = get(Permitted_arguments_0f, op.data);
+  if (HAS(expected_bitvector, MODRM)) {
+    check_arguments_modrm(inst, op);
+    compare_bitvector_modrm(inst, expected_bitvector, maybe_name_0f(op));
+  }
+  else {
+    compare_bitvector(inst, CLEAR(expected_bitvector, MODRM), maybe_name_0f(op));
+  }
+}
+
+void check_arguments_f3(const line& inst, const word& op) {
+  uint8_t expected_bitvector = get(Permitted_arguments_f3, op.data);
+  if (HAS(expected_bitvector, MODRM)) {
+    check_arguments_modrm(inst, op);
+    compare_bitvector_modrm(inst, expected_bitvector, maybe_name_f3(op));
+  }
+  else {
+    compare_bitvector(inst, CLEAR(expected_bitvector, MODRM), maybe_name_f3(op));
+  }
+}
+
+void check_arguments_f3_0f(const line& inst, const word& op) {
+  uint8_t expected_bitvector = get(Permitted_arguments_f3_0f, op.data);
+  if (HAS(expected_bitvector, MODRM)) {
+    check_arguments_modrm(inst, op);
+    compare_bitvector_modrm(inst, expected_bitvector, maybe_name_f3_0f(op));
+  }
+  else {
+    compare_bitvector(inst, CLEAR(expected_bitvector, MODRM), maybe_name_f3_0f(op));
+  }
+}
+
+string maybe_name_0f(const word& op) {
+  if (!is_hex_byte(op)) return "";
+  if (!contains_key(Name_0f, op.data)) return "";
+  // strip stuff in parens from the name
+  const string& s = get(Name_0f, op.data);
+  return " ("+s.substr(0, s.find(" ("))+')';
+}
+
+string maybe_name_f3(const word& op) {
+  if (!is_hex_byte(op)) return "";
+  if (!contains_key(Name_f3, op.data)) return "";
+  // strip stuff in parens from the name
+  const string& s = get(Name_f3, op.data);
+  return " ("+s.substr(0, s.find(" ("))+')';
+}
+
+string maybe_name_f3_0f(const word& op) {
+  if (!is_hex_byte(op)) return "";
+  if (!contains_key(Name_f3_0f, op.data)) return "";
+  // strip stuff in parens from the name
+  const string& s = get(Name_f3_0f, op.data);
+  return " ("+s.substr(0, s.find(" ("))+')';
+}
+
+string tolower(const char* s) {
+  ostringstream out;
+  for (/*nada*/;  *s;  ++s)
+    out << static_cast<char>(tolower(*s));
+  return out.str();
+}
+
+#undef HAS
+#undef SET
+#undef CLEAR
+
+:(before "End Includes")
+#include<cctype>
diff --git a/linux/bootstrap/034check_operand_bounds.cc b/linux/bootstrap/034check_operand_bounds.cc
new file mode 100644
index 00000000..efc3385e
--- /dev/null
+++ b/linux/bootstrap/034check_operand_bounds.cc
@@ -0,0 +1,143 @@
+//:: Check that the different arguments of an instruction aren't too large for their bitfields.
+
+void test_check_bitfield_sizes() {
+  Hide_errors = true;
+  run(
+      "== code 0x1\n"
+      "01/add 4/mod 3/rm32 1/r32\n"  // add ECX to EBX
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: '4/mod' too large to fit in bitfield mod\n"
+  );
+}
+
+:(before "End Globals")
+map<string, uint32_t> Operand_bound;
+:(before "End One-time Setup")
+put_new(Operand_bound, "subop", 1<<3);
+put_new(Operand_bound, "mod", 1<<2);
+put_new(Operand_bound, "rm32", 1<<3);
+put_new(Operand_bound, "base", 1<<3);
+put_new(Operand_bound, "index", 1<<3);
+put_new(Operand_bound, "scale", 1<<2);
+put_new(Operand_bound, "r32", 1<<3);
+put_new(Operand_bound, "disp8", 1<<8);
+put_new(Operand_bound, "disp16", 1<<16);
+// no bound needed for disp32
+put_new(Operand_bound, "imm8", 1<<8);
+// no bound needed for imm32
+
+:(before "Pack Operands(segment code)")
+check_argument_bounds(code);
+if (trace_contains_errors()) return;
+:(code)
+void check_argument_bounds(const segment& code) {
+  trace(3, "transform") << "-- check argument bounds" << end();
+  for (int i = 0;  i < SIZE(code.lines);  ++i) {
+    const line& inst = code.lines.at(i);
+    for (int j = first_argument(inst);  j < SIZE(inst.words);  ++j)
+      check_argument_bounds(inst.words.at(j));
+    if (trace_contains_errors()) return;  // stop at the first mal-formed instruction
+  }
+}
+
+void check_argument_bounds(const word& w) {
+  for (map<string, uint32_t>::iterator p = Operand_bound.begin();  p != Operand_bound.end();  ++p) {
+    if (!has_argument_metadata(w, p->first)) continue;
+    if (!looks_like_hex_int(w.data)) continue;  // later transforms are on their own to do their own bounds checking
+    int32_t x = parse_int(w.data);
+    if (x >= 0) {
+      if (p->first == "disp8" || p->first == "disp16") {
+        if (static_cast<uint32_t>(x) >= p->second/2)
+          raise << "'" << w.original << "' too large to fit in signed bitfield " << p->first << '\n' << end();
+      }
+      else {
+        if (static_cast<uint32_t>(x) >= p->second)
+          raise << "'" << w.original << "' too large to fit in bitfield " << p->first << '\n' << end();
+      }
+    }
+    else {
+      // hacky? assuming bound is a power of 2
+      if (x < -1*static_cast<int32_t>(p->second/2))
+        raise << "'" << w.original << "' too large to fit in bitfield " << p->first << '\n' << end();
+    }
+  }
+}
+
+void test_check_bitfield_sizes_for_imm8() {
+  run(
+      "== code 0x1\n"
+      "c1/shift 4/subop/left 3/mod/direct 1/rm32/ECX 0xff/imm8"  // shift EBX left
+  );
+  CHECK(!trace_contains_errors());
+}
+
+void test_check_bitfield_sizes_for_imm8_error() {
+  Hide_errors = true;
+  run(
+      "== code 0x1\n"
+      "c1/shift 4/subop/left 3/mod/direct 1/rm32/ECX 0x100/imm8"  // shift EBX left
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: '0x100/imm8' too large to fit in bitfield imm8\n"
+  );
+}
+
+void test_check_bitfield_sizes_for_negative_imm8() {
+  run(
+      "== code 0x1\n"
+      "c1/shift 4/subop/left 3/mod/direct 1/rm32/ECX -0x80/imm8"  // shift EBX left
+  );
+  CHECK(!trace_contains_errors());
+}
+
+void test_check_bitfield_sizes_for_negative_imm8_error() {
+  Hide_errors = true;
+  run(
+      "== code 0x1\n"
+      "c1/shift 4/subop/left 3/mod/direct 1/rm32/ECX -0x81/imm8"  // shift EBX left
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: '-0x81/imm8' too large to fit in bitfield imm8\n"
+  );
+}
+
+void test_check_bitfield_sizes_for_disp8() {
+  // not bothering to run
+  transform(
+      "== code 0x1\n"
+      "01/add 1/mod/*+disp8 3/rm32 1/r32 0x7f/disp8\n"  // add ECX to *(EBX+0x7f)
+  );
+  CHECK(!trace_contains_errors());
+}
+
+void test_check_bitfield_sizes_for_disp8_error() {
+  Hide_errors = true;
+  run(
+      "== code 0x1\n"
+      "01/add 1/mod/*+disp8 3/rm32 1/r32 0x80/disp8\n"  // add ECX to *(EBX+0x80)
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: '0x80/disp8' too large to fit in signed bitfield disp8\n"
+  );
+}
+
+void test_check_bitfield_sizes_for_negative_disp8() {
+  // not bothering to run
+  transform(
+      "== code 0x1\n"
+      "01/add 1/mod/*+disp8 3/rm32 1/r32 -0x80/disp8\n"  // add ECX to *(EBX-0x80)
+  );
+  CHECK(!trace_contains_errors());
+}
+
+void test_check_bitfield_sizes_for_negative_disp8_error() {
+  Hide_errors = true;
+  run(
+      "== code 0x1\n"
+      "01/add 1/mod/*+disp8 3/rm32 1/r32 -0x81/disp8\n"  // add ECX to *(EBX-0x81)
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: '-0x81/disp8' too large to fit in bitfield disp8\n"
+  );
+}
diff --git a/linux/bootstrap/035compute_segment_address.cc b/linux/bootstrap/035compute_segment_address.cc
new file mode 100644
index 00000000..5c627a7b
--- /dev/null
+++ b/linux/bootstrap/035compute_segment_address.cc
@@ -0,0 +1,86 @@
+//: ELF binaries have finicky rules about the precise alignment each segment
+//: should start at. They depend on the amount of code in a program.
+//: We shouldn't expect people to adjust segment addresses everytime they make
+//: a change to their programs.
+//: Let's start taking the given segment addresses as guidelines, and adjust
+//: them as necessary.
+//: This gives up a measure of control in placing code and data.
+
+void test_segment_name() {
+  run(
+      "== code 0x09000000\n"
+      "05/add-to-EAX  0x0d0c0b0a/imm32\n"
+      // code starts at 0x09000000 + p_offset, which is 0x54 for a single-segment binary
+  );
+  CHECK_TRACE_CONTENTS(
+      "load: 0x09000054 -> 05\n"
+      "load: 0x09000055 -> 0a\n"
+      "load: 0x09000056 -> 0b\n"
+      "load: 0x09000057 -> 0c\n"
+      "load: 0x09000058 -> 0d\n"
+      "run: add imm32 0x0d0c0b0a to EAX\n"
+      "run: storing 0x0d0c0b0a\n"
+  );
+}
+
+//: compute segment address
+
+:(before "End Transforms")
+Transform.push_back(compute_segment_starts);
+
+:(code)
+void compute_segment_starts(program& p) {
+  trace(3, "transform") << "-- compute segment addresses" << end();
+  uint32_t p_offset = /*size of ehdr*/0x34 + SIZE(p.segments)*0x20/*size of each phdr*/;
+  for (size_t i = 0;  i < p.segments.size();  ++i) {
+    segment& curr = p.segments.at(i);
+    if (curr.start >= 0x08000000) {
+      // valid address for user space, so assume we're creating a real ELF binary, not just running a test
+      curr.start &= 0xfffff000;  // same number of zeros as the p_align used when emitting the ELF binary
+      curr.start |= (p_offset & 0xfff);
+      trace(99, "transform") << "segment " << i << " begins at address 0x" << HEXWORD << curr.start << end();
+    }
+    p_offset += size_of(curr);
+    assert(p_offset < SEGMENT_ALIGNMENT);  // for now we get less and less available space in each successive segment
+  }
+}
+
+uint32_t size_of(const segment& s) {
+  uint32_t sum = 0;
+  for (int i = 0;  i < SIZE(s.lines);  ++i)
+    sum += num_bytes(s.lines.at(i));
+  return sum;
+}
+
+// Assumes all bitfields are packed.
+uint32_t num_bytes(const line& inst) {
+  uint32_t sum = 0;
+  for (int i = 0;  i < SIZE(inst.words);  ++i)
+    sum += size_of(inst.words.at(i));
+  return sum;
+}
+
+int size_of(const word& w) {
+  if (has_argument_metadata(w, "disp32") || has_argument_metadata(w, "imm32"))
+    return 4;
+  else if (has_argument_metadata(w, "disp16"))
+    return 2;
+  // End size_of(word w) Special-cases
+  else
+    return 1;
+}
+
+//: Dependencies:
+//: - We'd like to compute segment addresses before setting up global variables,
+//:   because computing addresses for global variables requires knowing where
+//:   the data segment starts.
+//: - We'd like to finish expanding labels before computing segment addresses,
+//:   because it would make computing the sizes of segments more self-contained
+//:   (num_bytes).
+//:
+//: Decision: compute segment addresses before expanding labels, by being
+//: aware in this layer of certain argument types that will eventually occupy
+//: multiple bytes.
+//:
+//: The layer to expand labels later hooks into num_bytes() to teach this
+//: layer that labels occupy zero space in the binary.
diff --git a/linux/bootstrap/036labels.cc b/linux/bootstrap/036labels.cc
new file mode 100644
index 00000000..72d11da5
--- /dev/null
+++ b/linux/bootstrap/036labels.cc
@@ -0,0 +1,429 @@
+//: Labels are defined by ending names with a ':'. This layer will compute
+//: displacements for labels, and compute the offset for instructions using them.
+//:
+//: We won't check this, but our convention will be that jump targets will
+//: start with a '$', while functions will not. Function names will never be
+//: jumped to, and jump targets will never be called.
+
+//: We're introducing non-number names for the first time, so it's worth
+//: laying down some ground rules all transforms will follow, so things don't
+//: get too confusing:
+//:   - if it starts with a digit, it's treated as a number. If it can't be
+//:     parsed as hex it will raise an error.
+//:   - if it starts with '-' it's treated as a number.
+//:   - if it starts with '0x' it's treated as a number.
+//:   - if it's two characters long, it can't be a name. Either it's a hex
+//:     byte, or it raises an error.
+//: That's it. Names can start with any non-digit that isn't a dash. They can
+//: be a single character long. 'a' is not a hex number, it's a variable.
+//: Later layers may add more conventions partitioning the space of names. But
+//: the above rules will remain inviolate.
+
+//: One special label is 'Entry', the address to start running the program at.
+//: It can be non-unique; the last declaration overrides earlier ones.
+//: It must exist in a program. Otherwise we don't know where to start running
+//: programs.
+
+void test_Entry_label() {
+  run(
+      "== code 0x1\n"
+      "05 0x0d0c0b0a/imm32\n"
+      "Entry:\n"
+      "05 0x0d0c0b0a/imm32\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "run: 0x00000006 opcode: 05\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("run: 0x00000001 opcode: 05");
+}
+
+:(before "End looks_like_hex_int(s) Detectors")
+if (SIZE(s) == 2) return true;
+
+:(code)
+void test_pack_immediate_ignores_single_byte_nondigit_argument() {
+  Hide_errors = true;
+  transform(
+      "== code 0x1\n"
+      "b9/copy  a/imm32\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "transform: packing instruction 'b9/copy a/imm32'\n"
+      // no change (we're just not printing metadata to the trace)
+      "transform: instruction after packing: 'b9 a'\n"
+  );
+}
+
+void test_pack_immediate_ignores_3_hex_digit_argument() {
+  Hide_errors = true;
+  transform(
+      "== code 0x1\n"
+      "b9/copy  aaa/imm32\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "transform: packing instruction 'b9/copy aaa/imm32'\n"
+      // no change (we're just not printing metadata to the trace)
+      "transform: instruction after packing: 'b9 aaa'\n"
+  );
+}
+
+void test_pack_immediate_ignores_non_hex_argument() {
+  Hide_errors = true;
+  transform(
+      "== code 0x1\n"
+      "b9/copy xxx/imm32\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "transform: packing instruction 'b9/copy xxx/imm32'\n"
+      // no change (we're just not printing metadata to the trace)
+      "transform: instruction after packing: 'b9 xxx'\n"
+  );
+}
+
+//: a helper we'll find handy later
+void check_valid_name(const string& s) {
+  if (s.empty()) {
+    raise << "empty name!\n" << end();
+    return;
+  }
+  if (s.at(0) == '-')
+    raise << "'" << s << "' starts with '-', which can be confused with a negative number; use a different name\n" << end();
+  if (s.substr(0, 2) == "0x") {
+    raise << "'" << s << "' looks like a hex number; use a different name\n" << end();
+    return;
+  }
+  if (isdigit(s.at(0)))
+    raise << "'" << s << "' starts with a digit, and so can be confused with a number; use a different name.\n" << end();
+  if (SIZE(s) == 2)
+    raise << "'" << s << "' is two characters long, which can look like raw hex bytes at a glance; use a different name\n" << end();
+}
+
+//: Now that that's done, let's start using names as labels.
+
+void test_map_label() {
+  transform(
+      "== code 0x1\n"
+      "loop:\n"
+      "  05  0x0d0c0b0a/imm32\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "transform: label 'loop' is at address 1\n"
+  );
+}
+
+:(before "End Transforms")
+Transform.push_back(rewrite_labels);
+:(code)
+void rewrite_labels(program& p) {
+  trace(3, "transform") << "-- rewrite labels" << end();
+  if (p.segments.empty()) return;
+  segment& code = *find(p, "code");
+  map<string, int32_t> byte_index;  // values are unsigned, but we're going to do subtractions on them so they need to fit in 31 bits
+  compute_byte_indices_for_labels(code, byte_index);
+  if (trace_contains_errors()) return;
+  drop_labels(code);
+  if (trace_contains_errors()) return;
+  replace_labels_with_displacements(code, byte_index);
+  if (contains_key(byte_index, "Entry"))
+    p.entry = code.start + get(byte_index, "Entry");
+}
+
+void compute_byte_indices_for_labels(const segment& code, map<string, int32_t>& byte_index) {
+  int current_byte = 0;
+  for (int i = 0;  i < SIZE(code.lines);  ++i) {
+    const line& inst = code.lines.at(i);
+    if (Source_lines_file.is_open() && !inst.original.empty() && /*not a label*/ *inst.words.at(0).data.rbegin() != ':')
+      Source_lines_file << "0x" << HEXWORD << (code.start + current_byte) << ' ' << inst.original << '\n';
+    for (int j = 0;  j < SIZE(inst.words);  ++j) {
+      const word& curr = inst.words.at(j);
+      // hack: if we have any argument metadata left after previous transforms,
+      // deduce its size
+      // Maybe we should just move this transform to before instruction
+      // packing, and deduce the size of *all* arguments. But then we'll also
+      // have to deal with bitfields.
+      if (has_argument_metadata(curr, "disp32") || has_argument_metadata(curr, "imm32")) {
+        if (*curr.data.rbegin() == ':')
+          raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end();
+        current_byte += 4;
+      }
+      else if (has_argument_metadata(curr, "disp16")) {
+        if (*curr.data.rbegin() == ':')
+          raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end();
+        current_byte += 2;
+      }
+      // automatically handle /disp8 and /imm8 here
+      else if (*curr.data.rbegin() != ':') {
+        ++current_byte;
+      }
+      else {
+        string label = drop_last(curr.data);
+        // ensure labels look sufficiently different from raw hex
+        check_valid_name(label);
+        if (trace_contains_errors()) return;
+        if (contains_any_argument_metadata(curr))
+          raise << "'" << to_string(inst) << "': label definition (':') not allowed in argument\n" << end();
+        if (j > 0)
+          raise << "'" << to_string(inst) << "': labels can only be the first word in a line.\n" << end();
+        if (Labels_file.is_open())
+          Labels_file << "0x" << HEXWORD << (code.start + current_byte) << ' ' << label << '\n';
+        if (contains_key(byte_index, label) && label != "Entry") {
+          raise << "duplicate label '" << label << "'\n" << end();
+          return;
+        }
+        put(byte_index, label, current_byte);
+        trace(99, "transform") << "label '" << label << "' is at address " << (current_byte+code.start) << end();
+        // no modifying current_byte; label definitions won't be in the final binary
+      }
+    }
+  }
+}
+
+:(before "End Globals")
+bool Dump_debug_info = false;  // currently used only by 'bootstrap translate'
+ofstream Labels_file;
+ofstream Source_lines_file;
+:(before "End Commandline Options")
+else if (is_equal(*arg, "--debug")) {
+  Dump_debug_info = true;
+  // End --debug Settings
+}
+//: wait to open "labels" for writing until we're sure we aren't trying to read it
+:(after "Begin bootstrap translate")
+if (Dump_debug_info) {
+  cerr << "saving address->label information to 'labels'\n";
+  Labels_file.open("labels");
+  cerr << "saving address->source information to 'source_lines'\n";
+  Source_lines_file.open("source_lines");
+}
+:(before "End bootstrap translate")
+if (Dump_debug_info) {
+  Labels_file.close();
+  Source_lines_file.close();
+}
+
+:(code)
+void drop_labels(segment& code) {
+  for (int i = 0;  i < SIZE(code.lines);  ++i) {
+    line& inst = code.lines.at(i);
+    vector<word>::iterator new_end = remove_if(inst.words.begin(), inst.words.end(), is_label);
+    inst.words.erase(new_end, inst.words.end());
+  }
+}
+
+bool is_label(const word& w) {
+  return *w.data.rbegin() == ':';
+}
+
+void replace_labels_with_displacements(segment& code, const map<string, int32_t>& byte_index) {
+  int32_t byte_index_next_instruction_starts_at = 0;
+  for (int i = 0;  i < SIZE(code.lines);  ++i) {
+    line& inst = code.lines.at(i);
+    byte_index_next_instruction_starts_at += num_bytes(inst);
+    line new_inst;
+    for (int j = 0;  j < SIZE(inst.words);  ++j) {
+      const word& curr = inst.words.at(j);
+      if (contains_key(byte_index, curr.data)) {
+        int32_t displacement = static_cast<int32_t>(get(byte_index, curr.data)) - byte_index_next_instruction_starts_at;
+        int32_t absolute_address = code.start + get(byte_index, curr.data);
+        if (has_argument_metadata(curr, "disp8")) {
+          if (displacement > 0x7f || displacement < -0x7f)
+            raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 8 signed bits\n" << end();
+          else
+            emit_hex_bytes(new_inst, displacement, 1);
+        }
+        else if (has_argument_metadata(curr, "disp16")) {
+          if (displacement > 0x7fff || displacement < -0x7fff)
+            raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 16 signed bits\n" << end();
+          else
+            emit_hex_bytes(new_inst, displacement, 2);
+        }
+        else if (has_argument_metadata(curr, "disp32")) {
+          if (is_far_jump_or_call(new_inst))
+            emit_hex_bytes(new_inst, displacement, 4);
+          else
+            emit_hex_bytes(new_inst, absolute_address, 4);
+        } else if (has_argument_metadata(curr, "imm32")) {
+          emit_hex_bytes(new_inst, absolute_address, 4);
+        }
+      }
+      else {
+        new_inst.words.push_back(curr);
+      }
+    }
+    inst.words.swap(new_inst.words);
+    trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
+  }
+}
+
+bool is_far_jump_or_call(const line& inst) {
+  string first_opcode = inst.words.at(0).data;
+  if (first_opcode == "e8" || first_opcode == "e9") return true;
+  if (SIZE(inst.words) < 2) return false;
+  if (first_opcode != "0f") return false;
+  string second_opcode = inst.words.at(1).data;
+  return starts_with(second_opcode, "8");
+}
+
+string data_to_string(const line& inst) {
+  ostringstream out;
+  for (int i = 0;  i < SIZE(inst.words);  ++i) {
+    if (i > 0) out << ' ';
+    out << inst.words.at(i).data;
+  }
+  return out.str();
+}
+
+string drop_last(const string& s) {
+  return string(s.begin(), --s.end());
+}
+
+//: Label definitions must be the first word on a line. No jumping inside
+//: instructions.
+//: They should also be the only word on a line.
+//: However, you can absolutely have multiple labels map to the same address,
+//: as long as they're on separate lines.
+
+void test_multiple_labels_at() {
+  transform(
+      "== code 0x1\n"
+      // address 1
+      "loop:\n"
+      " $loop2:\n"
+      // address 1 (labels take up no space)
+      "    05  0x0d0c0b0a/imm32\n"
+      // address 6
+      "    eb  $loop2/disp8\n"
+      // address 8
+      "    eb  $loop3/disp8\n"
+      // address 0xa
+      " $loop3:\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "transform: label 'loop' is at address 1\n"
+      "transform: label '$loop2' is at address 1\n"
+      "transform: label '$loop3' is at address a\n"
+      // first jump is to -7
+      "transform: instruction after transform: 'eb f9'\n"
+      // second jump is to 0 (fall through)
+      "transform: instruction after transform: 'eb 00'\n"
+  );
+}
+
+void test_loading_label_as_imm32() {
+  transform(
+      "== code 0x1\n"
+      "label:\n"
+      "  be/copy-to-ESI  label/imm32\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "transform: label 'label' is at address 1\n"
+      "transform: instruction after transform: 'be 01 00 00 00'\n"
+  );
+}
+
+void test_duplicate_label() {
+  Hide_errors = true;
+  transform(
+      "== code 0x1\n"
+      "loop:\n"
+      "loop:\n"
+      "    05  0x0d0c0b0a/imm32\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: duplicate label 'loop'\n"
+  );
+}
+
+void test_label_too_short() {
+  Hide_errors = true;
+  transform(
+      "== code 0x1\n"
+      "xz:\n"
+      "  05  0x0d0c0b0a/imm32\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: 'xz' is two characters long, which can look like raw hex bytes at a glance; use a different name\n"
+  );
+}
+
+void test_label_hex() {
+  Hide_errors = true;
+  transform(
+      "== code 0x1\n"
+      "0xab:\n"
+      "  05  0x0d0c0b0a/imm32\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: '0xab' looks like a hex number; use a different name\n"
+  );
+}
+
+void test_label_negative_hex() {
+  Hide_errors = true;
+  transform(
+      "== code 0x1\n"
+      "-a:\n"
+      "    05  0x0d0c0b0a/imm32\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: '-a' starts with '-', which can be confused with a negative number; use a different name\n"
+  );
+}
+
+//: As said up top, the 'Entry' label is special.
+//: It can be non-unique; the last declaration overrides earlier ones.
+//: It must exist in a program. Otherwise we don't know where to start running
+//: programs.
+
+void test_duplicate_Entry_label() {
+  transform(
+      "== code 0x1\n"
+      "Entry:\n"
+      "Entry:\n"
+      "    05  0x0d0c0b0a/imm32\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN_ERRORS();
+}
+
+// This test could do with some refactoring.
+// We're duplicating the flow inside `bootstrap translate`, but without
+// reading/writing files.
+// We can't just use run(string) because most of our tests allow programs
+// without 'Entry' labels, as a convenience.
+void test_programs_without_Entry_label() {
+  Hide_errors = true;
+  program p;
+  istringstream in(
+      "== code 0x1\n"
+      "05 0x0d0c0b0a/imm32\n"
+      "05 0x0d0c0b0a/imm32\n"
+  );
+  parse(in, p);
+  transform(p);
+  ostringstream dummy;
+  save_elf(p, dummy);
+  CHECK_TRACE_CONTENTS(
+      "error: no 'Entry' label found\n"
+  );
+}
+
+//: now that we have labels, we need to adjust segment size computation to
+//: ignore them.
+
+void test_segment_size_ignores_labels() {
+  transform(
+      "== code 0x09000074\n"
+      "  05/add  0x0d0c0b0a/imm32\n"  // 5 bytes
+      "foo:\n"                        // 0 bytes
+      "== data 0x0a000000\n"
+      "bar:\n"
+      "  00\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "transform: segment 1 begins at address 0x0a000079\n"
+  );
+}
+
+:(before "End size_of(word w) Special-cases")
+else if (is_label(w))
+  return 0;
diff --git a/linux/bootstrap/037global_variables.cc b/linux/bootstrap/037global_variables.cc
new file mode 100644
index 00000000..b8306d16
--- /dev/null
+++ b/linux/bootstrap/037global_variables.cc
@@ -0,0 +1,305 @@
+//: Global variables.
+//:
+//: Global variables are just labels in the data segment.
+//: However, they can only be used in imm32 and not disp32 arguments. And they
+//: can't be used with jump and call instructions.
+//:
+//: This layer has much the same structure as rewriting labels.
+
+:(code)
+void test_global_variable() {
+  run(
+      "== code 0x1\n"
+      "b9  x/imm32\n"
+      "== data 0x2000\n"
+      "x:\n"
+      "  00 00 00 00\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "transform: global variable 'x' is at address 0x00002000\n"
+  );
+}
+
+:(before "End Transforms")
+Transform.push_back(rewrite_global_variables);
+:(code)
+void rewrite_global_variables(program& p) {
+  trace(3, "transform") << "-- rewrite global variables" << end();
+  // Begin rewrite_global_variables
+  map<string, uint32_t> address;
+  compute_addresses_for_global_variables(p, address);
+  if (trace_contains_errors()) return;
+  drop_global_variables(p);
+  replace_global_variables_with_addresses(p, address);
+}
+
+void compute_addresses_for_global_variables(const program& p, map<string, uint32_t>& address) {
+  for (int i = 0;  i < SIZE(p.segments);  ++i) {
+    if (p.segments.at(i).name != "code")
+      compute_addresses_for_global_variables(p.segments.at(i), address);
+  }
+}
+
+void compute_addresses_for_global_variables(const segment& s, map<string, uint32_t>& address) {
+  int current_address = s.start;
+  for (int i = 0;  i < SIZE(s.lines);  ++i) {
+    const line& inst = s.lines.at(i);
+    for (int j = 0;  j < SIZE(inst.words);  ++j) {
+      const word& curr = inst.words.at(j);
+      if (*curr.data.rbegin() != ':') {
+        current_address += size_of(curr);
+      }
+      else {
+        string variable = drop_last(curr.data);
+        // ensure variables look sufficiently different from raw hex
+        check_valid_name(variable);
+        if (trace_contains_errors()) return;
+        if (j > 0)
+          raise << "'" << to_string(inst) << "': global variable names can only be the first word in a line.\n" << end();
+        if (Labels_file.is_open())
+          Labels_file << "0x" << HEXWORD << current_address << ' ' << variable << '\n';
+        if (contains_key(address, variable)) {
+          raise << "duplicate global '" << variable << "'\n" << end();
+          return;
+        }
+        put(address, variable, current_address);
+        trace(99, "transform") << "global variable '" << variable << "' is at address 0x" << HEXWORD << current_address << end();
+        // no modifying current_address; global variable definitions won't be in the final binary
+      }
+    }
+  }
+}
+
+void drop_global_variables(program& p) {
+  for (int i = 0;  i < SIZE(p.segments);  ++i) {
+    if (p.segments.at(i).name != "code")
+      drop_labels(p.segments.at(i));
+  }
+}
+
+void replace_global_variables_with_addresses(program& p, const map<string, uint32_t>& address) {
+  if (p.segments.empty()) return;
+  for (int i = 0;  i < SIZE(p.segments);  ++i) {
+    segment& curr = p.segments.at(i);
+    if (curr.name == "code")
+      replace_global_variables_in_code_segment(curr, address);
+    else
+      replace_global_variables_in_data_segment(curr, address);
+  }
+}
+
+void replace_global_variables_in_code_segment(segment& code, const map<string, uint32_t>& address) {
+  for (int i = 0;  i < SIZE(code.lines);  ++i) {
+    line& inst = code.lines.at(i);
+    line new_inst;
+    for (int j = 0;  j < SIZE(inst.words);  ++j) {
+      const word& curr = inst.words.at(j);
+      if (!contains_key(address, curr.data)) {
+        if (!looks_like_hex_int(curr.data))
+          raise << "missing reference to global '" << curr.data << "'\n" << end();
+        new_inst.words.push_back(curr);
+        continue;
+      }
+      if (!valid_use_of_global_variable(curr)) {
+        raise << "'" << to_string(inst) << "': can't refer to global variable '" << curr.data << "'\n" << end();
+        return;
+      }
+      emit_hex_bytes(new_inst, get(address, curr.data), 4);
+    }
+    inst.words.swap(new_inst.words);
+    trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
+  }
+}
+
+void replace_global_variables_in_data_segment(segment& data, const map<string, uint32_t>& address) {
+  for (int i = 0;  i < SIZE(data.lines);  ++i) {
+    line& l = data.lines.at(i);
+    line new_l;
+    for (int j = 0;  j < SIZE(l.words);  ++j) {
+      const word& curr = l.words.at(j);
+      if (!contains_key(address, curr.data)) {
+        if (looks_like_hex_int(curr.data)) {
+          if (has_argument_metadata(curr, "imm32"))
+            emit_hex_bytes(new_l, curr, 4);
+          else if (has_argument_metadata(curr, "imm16"))
+            emit_hex_bytes(new_l, curr, 2);
+          else if (has_argument_metadata(curr, "imm8"))
+            emit_hex_bytes(new_l, curr, 1);
+          else if (has_argument_metadata(curr, "disp8"))
+            raise << "can't use /disp8 in a non-code segment\n" << end();
+          else if (has_argument_metadata(curr, "disp16"))
+            raise << "can't use /disp16 in a non-code segment\n" << end();
+          else if (has_argument_metadata(curr, "disp32"))
+            raise << "can't use /disp32 in a non-code segment\n" << end();
+          else
+            new_l.words.push_back(curr);
+        }
+        else {
+          raise << "missing reference to global '" << curr.data << "'\n" << end();
+          new_l.words.push_back(curr);
+        }
+        continue;
+      }
+      trace(99, "transform") << curr.data << " maps to " << HEXWORD << get(address, curr.data) << end();
+      emit_hex_bytes(new_l, get(address, curr.data), 4);
+    }
+    l.words.swap(new_l.words);
+    trace(99, "transform") << "after transform: '" << data_to_string(l) << "'" << end();
+  }
+}
+
+bool valid_use_of_global_variable(const word& curr) {
+  if (has_argument_metadata(curr, "imm32")) return true;
+  // End Valid Uses Of Global Variable(curr)
+  return false;
+}
+
+//:: a more complex sanity check for how we use global variables
+//: requires first saving some data early before we pack arguments
+
+:(after "Begin Transforms")
+Transform.push_back(correlate_disp32_with_mod);
+:(code)
+void correlate_disp32_with_mod(program& p) {
+  if (p.segments.empty()) return;
+  segment& code = *find(p, "code");
+  for (int i = 0;  i < SIZE(code.lines);  ++i) {
+    line& inst = code.lines.at(i);
+    for (int j = 0;  j < SIZE(inst.words);  ++j) {
+      word& curr = inst.words.at(j);
+      if (has_argument_metadata(curr, "disp32")
+          && has_argument_metadata(inst, "mod"))
+        curr.metadata.push_back("has_mod");
+    }
+  }
+}
+
+:(before "End Valid Uses Of Global Variable(curr)")
+if (has_argument_metadata(curr, "disp32"))
+  return has_metadata(curr, "has_mod");
+// todo: more sophisticated check, to ensure we don't use global variable
+// addresses as a real displacement added to other arguments.
+
+:(code)
+bool has_metadata(const word& w, const string& m) {
+  for (int i = 0;  i < SIZE(w.metadata);  ++i)
+    if (w.metadata.at(i) == m) return true;
+  return false;
+}
+
+void test_global_variable_disallowed_in_jump() {
+  Hide_errors = true;
+  run(
+      "== code 0x1\n"
+      "eb/jump  x/disp8\n"
+      "== data 0x2000\n"
+      "x:\n"
+      "  00 00 00 00\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: 'eb/jump x/disp8': can't refer to global variable 'x'\n"
+      // sub-optimal error message; should be
+//?       "error: can't jump to data (variable 'x')\n"
+  );
+}
+
+void test_global_variable_disallowed_in_call() {
+  Hide_errors = true;
+  run(
+      "== code 0x1\n"
+      "e8/call  x/disp32\n"
+      "== data 0x2000\n"
+      "x:\n"
+      "  00 00 00 00\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: 'e8/call x/disp32': can't refer to global variable 'x'\n"
+      // sub-optimal error message; should be
+//?       "error: can't call to the data segment ('x')\n"
+  );
+}
+
+void test_global_variable_in_data_segment() {
+  run(
+      "== code 0x1\n"
+      "b9  x/imm32\n"
+      "== data 0x2000\n"
+      "x:\n"
+      "  y/imm32\n"
+      "y:\n"
+      "  00 00 00 00\n"
+  );
+  // check that we loaded 'x' with the address of 'y'
+  CHECK_TRACE_CONTENTS(
+      "load: 0x00002000 -> 04\n"
+      "load: 0x00002001 -> 20\n"
+      "load: 0x00002002 -> 00\n"
+      "load: 0x00002003 -> 00\n"
+  );
+  CHECK_TRACE_COUNT("error", 0);
+}
+
+void test_raw_number_with_imm32_in_data_segment() {
+  run(
+      "== code 0x1\n"
+      "b9  x/imm32\n"
+      "== data 0x2000\n"
+      "x:\n"
+      "  1/imm32\n"
+  );
+  // check that we loaded 'x' with the address of 1
+  CHECK_TRACE_CONTENTS(
+      "load: 0x00002000 -> 01\n"
+      "load: 0x00002001 -> 00\n"
+      "load: 0x00002002 -> 00\n"
+      "load: 0x00002003 -> 00\n"
+  );
+  CHECK_TRACE_COUNT("error", 0);
+}
+
+void test_duplicate_global_variable() {
+  Hide_errors = true;
+  run(
+      "== code 0x1\n"
+      "40/increment-EAX\n"
+      "== data 0x2000\n"
+      "x:\n"
+      "x:\n"
+      "  00\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: duplicate global 'x'\n"
+  );
+}
+
+void test_global_variable_disp32_with_modrm() {
+  run(
+      "== code 0x1\n"
+      "8b/copy 0/mod/indirect 5/rm32/.disp32 2/r32/EDX x/disp32\n"
+      "== data 0x2000\n"
+      "x:\n"
+      "  00 00 00 00\n"
+  );
+  CHECK_TRACE_COUNT("error", 0);
+}
+
+void test_global_variable_disp32_with_call() {
+  transform(
+      "== code 0x1\n"
+      "foo:\n"
+      "  e8/call bar/disp32\n"
+      "bar:\n"
+  );
+  CHECK_TRACE_COUNT("error", 0);
+}
+
+string to_full_string(const line& in) {
+  ostringstream out;
+  for (int i = 0;  i < SIZE(in.words);  ++i) {
+    if (i > 0) out << ' ';
+    out << in.words.at(i).data;
+    for (int j = 0;  j < SIZE(in.words.at(i).metadata);  ++j)
+      out << '/' << in.words.at(i).metadata.at(j);
+  }
+  return out.str();
+}
diff --git a/linux/bootstrap/038literal_strings.cc b/linux/bootstrap/038literal_strings.cc
new file mode 100644
index 00000000..b0b3c13f
--- /dev/null
+++ b/linux/bootstrap/038literal_strings.cc
@@ -0,0 +1,362 @@
+//: Allow instructions to mention literals directly.
+//:
+//: This layer will transparently move them to the global segment (assumed to
+//: always be the second segment).
+
+void test_transform_literal_string() {
+  run(
+      "== code 0x1\n"
+      "b8/copy  \"test\"/imm32\n"
+      "== data 0x2000\n"  // need an empty segment
+  );
+  CHECK_TRACE_CONTENTS(
+      "transform: -- move literal strings to data segment\n"
+      "transform: adding global variable '__subx_global_1' containing \"test\"\n"
+      "transform: line after transform: 'b8 __subx_global_1'\n"
+  );
+}
+
+//: We don't rely on any transforms running in previous layers, but this layer
+//: knows about labels and global variables and will emit them for previous
+//: layers to transform.
+:(after "Begin Transforms")
+Transform.push_back(transform_literal_strings);
+
+:(before "End Globals")
+int Next_auto_global = 1;
+:(before "End Reset")
+Next_auto_global = 1;
+:(code)
+void transform_literal_strings(program& p) {
+  trace(3, "transform") << "-- move literal strings to data segment" << end();
+  if (p.segments.empty()) return;
+  vector<line> new_lines;
+  for (int s = 0;  s < SIZE(p.segments);  ++s) {
+    segment& seg = p.segments.at(s);
+    trace(99, "transform") << "segment '" << seg.name << "'" << end();
+    for (int i = 0;  i < SIZE(seg.lines);  ++i) {
+//?       cerr << seg.name << '/' << i << '\n';
+      line& line = seg.lines.at(i);
+      for (int j = 0;  j < SIZE(line.words);  ++j) {
+        word& curr = line.words.at(j);
+        if (curr.data.at(0) != '"') continue;
+        ostringstream global_name;
+        global_name << "__subx_global_" << Next_auto_global;
+        ++Next_auto_global;
+        add_global_to_data_segment(global_name.str(), curr, new_lines);
+        curr.data = global_name.str();
+      }
+      trace(99, "transform") << "line after transform: '" << data_to_string(line) << "'" << end();
+    }
+  }
+  segment* data = find(p, "data");
+  if (data)
+    data->lines.insert(data->lines.end(), new_lines.begin(), new_lines.end());
+}
+
+void add_global_to_data_segment(const string& name, const word& value, vector<line>& out) {
+  trace(99, "transform") << "adding global variable '" << name << "' containing " << value.data << end();
+  // emit label
+  out.push_back(label(name));
+  // emit size for size-prefixed array
+  out.push_back(line());
+  emit_hex_bytes(out.back(), SIZE(value.data)-/*skip quotes*/2, 4/*bytes*/);
+  // emit data byte by byte
+  out.push_back(line());
+  line& curr = out.back();
+  for (int i = /*skip start quote*/1;  i < SIZE(value.data)-/*skip end quote*/1;  ++i) {
+    char c = value.data.at(i);
+    curr.words.push_back(word());
+    curr.words.back().data = hex_byte_to_string(c);
+    curr.words.back().metadata.push_back(string(1, c));
+  }
+}
+
+//: Within strings, whitespace is significant. So we need to redo our instruction
+//: parsing.
+
+void test_instruction_with_string_literal() {
+  parse_instruction_character_by_character(
+      "a \"abc  def\" z\n"  // two spaces inside string
+  );
+  CHECK_TRACE_CONTENTS(
+      "parse2: word: a\n"
+      "parse2: word: \"abc  def\"\n"
+      "parse2: word: z\n"
+  );
+  // no other words
+  CHECK_TRACE_COUNT("parse2", 3);
+}
+
+void test_string_literal_in_data_segment() {
+  run(
+      "== code 0x1\n"
+      "b8/copy  X/imm32\n"
+      "== data 0x2000\n"
+      "X:\n"
+      "\"test\"/imm32\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "transform: -- move literal strings to data segment\n"
+      "transform: adding global variable '__subx_global_1' containing \"test\"\n"
+      "transform: line after transform: '__subx_global_1'\n"
+  );
+}
+
+void test_string_literal_with_missing_quote() {
+  Hide_errors = true;
+  run(
+      "== code 0x1\n"
+      "b8/copy  \"test/imm32\n"
+      "== data 0x2000\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "error: unclosed string in: b8/copy  \"test/imm32"
+  );
+}
+
+:(before "End Line Parsing Special-cases(line_data -> l)")
+if (line_data.find('"') != string::npos) {  // can cause false-positives, but we can handle them
+  parse_instruction_character_by_character(line_data, l);
+  continue;
+}
+
+:(code)
+void parse_instruction_character_by_character(const string& line_data, vector<line>& out) {
+  if (line_data.find('\n') != string::npos  && line_data.find('\n') != line_data.size()-1) {
+    raise << "parse_instruction_character_by_character: should receive only a single line\n" << end();
+    return;
+  }
+  // parse literals
+  istringstream in(line_data);
+  in >> std::noskipws;
+  line result;
+  result.original = line_data;
+  // add tokens (words or strings) one by one
+  while (has_data(in)) {
+    skip_whitespace(in);
+    if (!has_data(in)) break;
+    char c = in.get();
+    if (c == '#') break;  // comment; drop rest of line
+    if (c == ':') break;  // line metadata; skip for now
+    if (c == '.') {
+      if (!has_data(in)) break;  // comment token at end of line
+      if (isspace(in.peek()))
+        continue;  // '.' followed by space is comment token; skip
+    }
+    result.words.push_back(word());
+    if (c == '"') {
+      // string literal; slurp everything between quotes into data
+      ostringstream d;
+      d << c;
+      while (true) {
+        if (!has_data(in)) {
+          raise << "unclosed string in: " << line_data << end();
+          return;
+        }
+        in >> c;
+        if (c == '\\') {
+          in >> c;
+          if (c == 'n') d << '\n';
+          else if (c == '"') d << '"';
+          else if (c == '\\') d << '\\';
+          else {
+            raise << "parse_instruction_character_by_character: unknown escape sequence '\\" << c << "'\n" << end();
+            return;
+          }
+          continue;
+        } else {
+          d << c;
+        }
+        if (c == '"') break;
+      }
+      result.words.back().data = d.str();
+      result.words.back().original = d.str();
+      // slurp metadata
+      ostringstream m;
+      while (!isspace(in.peek()) && has_data(in)) {  // peek can sometimes trigger eof(), so do it first
+        in >> c;
+        if (c == '/') {
+          if (!m.str().empty()) result.words.back().metadata.push_back(m.str());
+          m.str("");
+        }
+        else {
+          m << c;
+        }
+      }
+      if (!m.str().empty()) result.words.back().metadata.push_back(m.str());
+    }
+    else {
+      // not a string literal; slurp all characters until whitespace
+      ostringstream w;
+      w << c;
+      while (!isspace(in.peek()) && has_data(in)) {  // peek can sometimes trigger eof(), so do it first
+        in >> c;
+        w << c;
+      }
+      parse_word(w.str(), result.words.back());
+    }
+    trace(99, "parse2") << "word: " << to_string(result.words.back()) << end();
+  }
+  if (!result.words.empty())
+    out.push_back(result);
+}
+
+void skip_whitespace(istream& in) {
+  while (has_data(in) && isspace(in.peek())) {
+    in.get();
+  }
+}
+
+void skip_comment(istream& in) {
+  if (has_data(in) && in.peek() == '#') {
+    in.get();
+    while (has_data(in) && in.peek() != '\n') in.get();
+  }
+}
+
+line label(string s) {
+  line result;
+  result.words.push_back(word());
+  result.words.back().data = (s+":");
+  return result;
+}
+
+// helper for tests
+void parse_instruction_character_by_character(const string& line_data) {
+  vector<line> out;
+  parse_instruction_character_by_character(line_data, out);
+}
+
+void test_parse2_comment_token_in_middle() {
+  parse_instruction_character_by_character(
+      "a . z\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "parse2: word: a\n"
+      "parse2: word: z\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("parse2: word: .");
+  // no other words
+  CHECK_TRACE_COUNT("parse2", 2);
+}
+
+void test_parse2_word_starting_with_dot() {
+  parse_instruction_character_by_character(
+      "a .b c\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "parse2: word: a\n"
+      "parse2: word: .b\n"
+      "parse2: word: c\n"
+  );
+}
+
+void test_parse2_comment_token_at_start() {
+  parse_instruction_character_by_character(
+      ". a b\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "parse2: word: a\n"
+      "parse2: word: b\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("parse2: word: .");
+}
+
+void test_parse2_comment_token_at_end() {
+  parse_instruction_character_by_character(
+      "a b .\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "parse2: word: a\n"
+      "parse2: word: b\n"
+  );
+  CHECK_TRACE_DOESNT_CONTAIN("parse2: word: .");
+}
+
+void test_parse2_word_starting_with_dot_at_start() {
+  parse_instruction_character_by_character(
+      ".a b c\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "parse2: word: .a\n"
+      "parse2: word: b\n"
+      "parse2: word: c\n"
+  );
+}
+
+void test_parse2_metadata() {
+  parse_instruction_character_by_character(
+      ".a b/c d\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "parse2: word: .a\n"
+      "parse2: word: b /c\n"
+      "parse2: word: d\n"
+  );
+}
+
+void test_parse2_string_with_metadata() {
+  parse_instruction_character_by_character(
+      "a \"bc  def\"/disp32 g\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "parse2: word: a\n"
+      "parse2: word: \"bc  def\" /disp32\n"
+      "parse2: word: g\n"
+  );
+}
+
+void test_parse2_string_with_metadata_at_end() {
+  parse_instruction_character_by_character(
+      "a \"bc  def\"/disp32\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "parse2: word: a\n"
+      "parse2: word: \"bc  def\" /disp32\n"
+  );
+}
+
+void test_parse2_string_with_metadata_at_end_of_line_without_newline() {
+  parse_instruction_character_by_character(
+      "68/push \"test\"/f"  // no newline, which is how calls from parse() will look
+  );
+  CHECK_TRACE_CONTENTS(
+      "parse2: word: 68 /push\n"
+      "parse2: word: \"test\" /f\n"
+  );
+}
+
+//: Make sure slashes inside strings don't trigger adding stuff from inside the
+//: string to metadata.
+
+void test_parse2_string_containing_slashes() {
+  parse_instruction_character_by_character(
+      "a \"bc/def\"/disp32\n"
+  );
+  CHECK_TRACE_CONTENTS(
+      "parse2: word: \"bc/def\" /disp32\n"
+  );
+}
+
+void test_instruction_with_string_literal_with_escaped_quote() {
+  parse_instruction_character_by_character(
+      "\"a\\\"b\"\n"  // escaped quote inside string
+  );
+  CHECK_TRACE_CONTENTS(
+      "parse2: word: \"a\"b\"\n"
+  );
+  // no other words
+  CHECK_TRACE_COUNT("parse2", 1);
+}
+
+void test_instruction_with_string_literal_with_escaped_backslash() {
+  parse_instruction_character_by_character(
+      "\"a\\\\b\"\n"  // escaped backslash inside string
+  );
+  CHECK_TRACE_CONTENTS(
+      "parse2: word: \"a\\b\"\n"
+  );
+  // no other words
+  CHECK_TRACE_COUNT("parse2", 1);
+}
diff --git a/linux/bootstrap/039debug.cc b/linux/bootstrap/039debug.cc
new file mode 100644
index 00000000..411818ff
--- /dev/null
+++ b/linux/bootstrap/039debug.cc
@@ -0,0 +1,175 @@
+//:: Some helpers for debugging.
+
+//: Load the 'map' file generated during 'bootstrap --debug translate' when running
+//: 'bootstrap --trace run'.
+//: (It'll only affect the trace.)
+
+:(before "End Globals")
+map</*address*/uint32_t, string> Symbol_name;  // used only by 'bootstrap run'
+map</*address*/uint32_t, string> Source_line;  // used only by 'bootstrap run'
+:(before "End --trace Settings")
+load_labels();
+load_source_lines();
+:(code)
+void load_labels() {
+  ifstream fin("labels");
+  if (fin.fail()) return;
+  fin >> std::hex;
+  while (has_data(fin)) {
+    uint32_t addr = 0;
+    fin >> addr;
+    string name;
+    fin >> name;
+    put(Symbol_name, addr, name);
+  }
+}
+
+void load_source_lines() {
+  ifstream fin("source_lines");
+  if (fin.fail()) return;
+  fin >> std::hex;
+  while (has_data(fin)) {
+    uint32_t addr = 0;
+    fin >> addr;
+    string line;
+    getline(fin, line);
+    put(Source_line, addr, hacky_squeeze_out_whitespace(line));
+  }
+}
+
+:(after "Run One Instruction")
+if (contains_key(Symbol_name, EIP))
+  trace(Callstack_depth, "run") << "== label " << get(Symbol_name, EIP) << end();
+if (contains_key(Source_line, EIP))
+  trace(Callstack_depth, "run") << "inst: " << get(Source_line, EIP) << end();
+else
+  // no source line info; do what you can
+  trace(Callstack_depth, "run") << "inst: " << debug_info(EIP) << end();
+
+:(code)
+string debug_info(uint32_t inst_address) {
+  uint8_t op = read_mem_u8(inst_address);
+  if (op != 0xe8) {
+    ostringstream out;
+    out << HEXBYTE << NUM(op);
+    return out.str();
+  }
+  int32_t offset = read_mem_i32(inst_address+/*skip op*/1);
+  uint32_t next_eip = inst_address+/*inst length*/5+offset;
+  if (contains_key(Symbol_name, next_eip))
+    return "e8/call "+get(Symbol_name, next_eip);
+  ostringstream out;
+  out << "e8/call 0x" << HEXWORD << next_eip;
+  return out.str();
+}
+
+//: If a label starts with '$watch-', make a note of the effective address
+//: computed by the next instruction. Start dumping out its contents to the
+//: trace after every subsequent instruction.
+
+:(after "Run One Instruction")
+dump_watch_points();
+:(before "End Globals")
+map<string, uint32_t> Watch_points;
+:(before "End Reset")
+Watch_points.clear();
+:(code)
+void dump_watch_points() {
+  if (Watch_points.empty()) return;
+  trace(Callstack_depth, "dbg") << "watch points:" << end();
+  for (map<string, uint32_t>::iterator p = Watch_points.begin();  p != Watch_points.end();  ++p)
+    trace(Callstack_depth, "dbg") << "  " << p->first << ": " << HEXWORD << p->second << " -> " << HEXWORD << read_mem_u32(p->second) << end();
+}
+
+:(before "End Globals")
+string Watch_this_effective_address;
+:(after "Run One Instruction")
+Watch_this_effective_address = "";
+if (contains_key(Symbol_name, EIP) && starts_with(get(Symbol_name, EIP), "$watch-"))
+  Watch_this_effective_address = get(Symbol_name, EIP);
+:(after "Found effective_address(addr)")
+if (!Watch_this_effective_address.empty()) {
+  dbg << "now watching " << HEXWORD << addr << " for " << Watch_this_effective_address << end();
+  put(Watch_points, Watch_this_effective_address, addr);
+}
+
+//: If a label starts with '$dump-stack', dump out to the trace n bytes on
+//: either side of ESP.
+
+:(after "Run One Instruction")
+if (contains_key(Symbol_name, EIP) && starts_with(get(Symbol_name, EIP), "$dump-stack")) {
+  dump_stack(64);
+}
+:(code)
+void dump_stack(int n) {
+  uint32_t stack_pointer = Reg[ESP].u;
+  uint32_t start = ((stack_pointer-n)&0xfffffff0);
+  dbg << "stack:" << end();
+  for (uint32_t addr = start;  addr < start+n*2;  addr+=16) {
+    if (addr >= AFTER_STACK) break;
+    ostringstream out;
+    out << HEXWORD << addr << ":";
+    for (int i = 0;  i < 16;  i+=4) {
+      out << ' ';
+      out << ((addr+i == stack_pointer) ? '[' : ' ');
+      out << HEXWORD << read_mem_u32(addr+i);
+      out << ((addr+i == stack_pointer) ? ']' : ' ');
+    }
+    dbg << out.str() << end();
+  }
+}
+
+//: Special label that dumps regions of memory.
+//: Not a general mechanism; by the time you get here you're willing to hack
+//: on the emulator.
+:(after "Run One Instruction")
+if (contains_key(Symbol_name, EIP) && get(Symbol_name, EIP) == "$dump-stream-at-EAX")
+  dump_stream_at(Reg[EAX].u);
+:(code)
+void dump_stream_at(uint32_t stream_start) {
+  int32_t stream_length = read_mem_i32(stream_start + 8);
+  dbg << "stream length: " << std::dec << stream_length << end();
+  for (int i = 0;  i < stream_length + 12;  ++i)
+    dbg << "0x" << HEXWORD << (stream_start+i) << ": " << HEXBYTE << NUM(read_mem_u8(stream_start+i)) << end();
+}
+
+//: helpers
+
+:(code)
+string hacky_squeeze_out_whitespace(const string& s) {
+  // strip whitespace at start
+  string::const_iterator first = s.begin();
+  while (first != s.end() && isspace(*first))
+    ++first;
+  if (first == s.end()) return "";
+
+  // strip whitespace at end
+  string::const_iterator last = --s.end();
+  while (last != s.begin() && isspace(*last))
+    --last;
+  ++last;
+
+  // replace runs of spaces/dots with single space until comment or string
+  // TODO:
+  //   leave alone dots not surrounded by whitespace
+  //   leave alone '#' within word
+  //   leave alone '"' within word
+  //   squeeze spaces after end of string
+  ostringstream out;
+  bool previous_was_space = false;
+  bool in_comment_or_string = false;
+  for (string::const_iterator curr = first;  curr != last;  ++curr) {
+    if (in_comment_or_string)
+      out << *curr;
+    else if (isspace(*curr) || *curr == '.')
+      previous_was_space = true;
+    else {
+      if (previous_was_space)
+        out << ' ';
+      out << *curr;
+      previous_was_space = false;
+      if (*curr == '#' || *curr == '"') in_comment_or_string = true;
+    }
+  }
+  return out.str();
+}
diff --git a/linux/bootstrap/040tests.cc b/linux/bootstrap/040tests.cc
new file mode 100644
index 00000000..0586249b
--- /dev/null
+++ b/linux/bootstrap/040tests.cc
@@ -0,0 +1,95 @@
+//: Automatically aggregate functions starting with 'test-' into a test suite
+//: called 'run-tests'. Running this function will run all tests.
+//:
+//: This is actually SubX's first (trivial) compiler. We generate all the code
+//: needed for the 'run-tests' function.
+//:
+//: By convention, temporary functions needed by tests will start with
+//: '_test-'.
+
+//: We don't rely on any transforms running in previous layers, but this layer
+//: knows about labels and will emit labels for previous layers to transform.
+:(after "Begin Transforms")
+Transform.push_back(create_test_function);
+
+:(code)
+void test_run_test() {
+  Mem.push_back(vma(0xbd000000));  // manually allocate memory
+  Reg[ESP].u = 0xbd000100;
+  run(
+      "== code 0x1\n"  // code segment
+      "main:\n"
+      "  e8/call run-tests/disp32\n"  // 5 bytes
+      "  f4/halt\n"                   // 1 byte
+      "test-foo:\n"  // offset 7
+      "  01 d8\n"  // just some unique instruction: add EBX to EAX
+      "  c3/return\n"
+  );
+  // check that code in test-foo ran (implicitly called by run-tests)
+  CHECK_TRACE_CONTENTS(
+      "run: 0x00000007 opcode: 01\n"
+  );
+}
+
+void create_test_function(program& p) {
+  if (p.segments.empty()) return;
+  segment& code = *find(p, "code");
+  trace(3, "transform") << "-- create 'run-tests'" << end();
+  vector<line> new_insts;
+  for (int i = 0;  i < SIZE(code.lines);  ++i) {
+    line& inst = code.lines.at(i);
+    for (int j = 0;  j < SIZE(inst.words);  ++j) {
+      const word& curr = inst.words.at(j);
+      if (*curr.data.rbegin() != ':') continue;  // not a label
+      if (!starts_with(curr.data, "test-")) continue;
+      string fn = drop_last(curr.data);
+      new_insts.push_back(call(fn));
+    }
+  }
+  if (new_insts.empty()) return;  // no tests found
+  code.lines.push_back(label("run-tests"));
+  code.lines.insert(code.lines.end(), new_insts.begin(), new_insts.end());
+  code.lines.push_back(ret());
+}
+
+string to_string(const segment& s) {
+  ostringstream out;
+  for (int i = 0;  i < SIZE(s.lines);  ++i) {
+    const line& l = s.lines.at(i);
+    for (int j = 0;  j < SIZE(l.words);  ++j) {
+      if (j > 0) out << ' ';
+      out << to_string(l.words.at(j));
+    }
+    out << '\n';
+  }
+  return out.str();
+}
+
+line call(string s) {
+  line result;
+  result.words.push_back(call());
+  result.words.push_back(disp32(s));
+  return result;
+}
+
+word call() {
+  word result;
+  result.data = "e8";
+  result.metadata.push_back("call");
+  return result;
+}
+
+word disp32(string s) {
+  word result;
+  result.data = s;
+  result.metadata.push_back("disp32");
+  return result;
+}
+
+line ret() {
+  line result;
+  result.words.push_back(word());
+  result.words.back().data = "c3";
+  result.words.back().metadata.push_back("return");
+  return result;
+}
diff --git a/linux/bootstrap/README.md b/linux/bootstrap/README.md
new file mode 100644
index 00000000..fdc3213a
--- /dev/null
+++ b/linux/bootstrap/README.md
@@ -0,0 +1,7 @@
+This tool is 2 things:
+
+a) An emulator for SubX, the subset of the 32-bit x86 instruction set used by
+Mu.
+
+b) A second translator for SubX programs that emits identical binaries to the
+self-hosting versions in the parent directory.
diff --git a/linux/bootstrap/bootstrap b/linux/bootstrap/bootstrap
new file mode 100755
index 00000000..b3e6cc60
--- /dev/null
+++ b/linux/bootstrap/bootstrap
@@ -0,0 +1,5 @@
+#!/bin/sh
+# Run SubX VM, first compiling if necessary.
+set -e
+
+$(dirname $0)/build  &&  $(dirname $0)/bootstrap_bin "$@"
diff --git a/linux/bootstrap/build b/linux/bootstrap/build
new file mode 100755
index 00000000..de5b432d
--- /dev/null
+++ b/linux/bootstrap/build
@@ -0,0 +1,105 @@
+#!/bin/sh
+# returns 0 on successful build or nothing to build
+# non-zero exit status only on error during building
+set -e  # stop immediately on error
+
+cd $(dirname $0)
+
+# [0-9]*.cc -> bootstrap.cc -> bootstrap_bin
+# (layers)   |               |
+#          tangle           $CXX
+
+# can also be called with a layer to only build until
+#   $ ./build --until 050
+UNTIL_LAYER=${2:-zzz}
+
+# we use two mechanisms to speed up rebuilds:
+# - older_than: run a command if the output is older than any of the inputs
+# - update: if a command is quick to run, always run it but update the result only on any change
+#
+# avoid combining both mechanisms to generate a single file
+# otherwise you'll see spurious messages about files being updated
+# risk: a file may unnecessarily update without changes, causing unnecessary work downstream
+
+test "$CXX" || export CXX=c++
+test "$CXXFLAGS" || export CXXFLAGS="-g -O3 -std=c++98"  # CI has an ancient version; don't expect recent dialects
+export CXXFLAGS="$CXXFLAGS -Wall -Wextra -fno-strict-aliasing"
+
+# return 1 if $1 is older than _any_ of the remaining args
+older_than() {
+  local target=$1
+  shift
+  if [ ! -e $target ]
+  then
+#?     echo "$target doesn't exist"
+    echo "updating $target" >&2
+    return 0  # success
+  fi
+  local f
+  for f in $*
+  do
+    if [ $f -nt $target ]
+    then
+      echo "updating $target" >&2
+      return 0  # success
+    fi
+  done
+  return 1  # failure
+}
+
+# redirect to $1, unless it's already identical
+update() {
+  if [ ! -e $1 ]
+  then
+    cat > $1
+  else
+    cat > $1.tmp
+    diff -q $1 $1.tmp >/dev/null  &&  rm $1.tmp  ||  mv $1.tmp $1
+  fi
+}
+
+update_cp() {
+  if [ ! -e $2/$1 ]
+  then
+    cp $1 $2
+  elif [ $1 -nt $2/$1 ]
+  then
+    cp $1 $2
+  fi
+}
+
+noisy_cd() {
+  cd $1
+  echo "-- `pwd`" >&2
+}
+
+older_than tools/enumerate tools/enumerate.cc && {
+  $CXX $CXXFLAGS tools/enumerate.cc -o tools/enumerate
+}
+
+older_than tools/tangle tools/tangle.cc && {
+  noisy_cd tools
+    grep -h "^[^ #].*) {" tangle.cc  |sed 's/ {.*/;/'  |update tangle.function_list
+    grep -h "^[[:space:]]*void test_" tangle.cc  |sed 's/^\s*void \(.*\)() {$/\1,/'  |update tangle.test_list
+    grep -h "^\s*void test_" tangle.cc  |sed 's/^\s*void \(.*\)() {.*/"\1",/'  |update tangle.test_name_list
+    $CXX $CXXFLAGS tangle.cc -o tangle
+    ./tangle test
+  noisy_cd ..  # no effect; just to show us returning to the parent directory
+}
+
+LAYERS=$(tools/enumerate --until $UNTIL_LAYER  |grep '.cc$')
+older_than bootstrap.cc $LAYERS tools/enumerate tools/tangle && {
+  # no update here; rely on 'update' calls downstream
+  tools/tangle $LAYERS  > bootstrap.cc
+}
+
+grep -h "^[^[:space:]#].*) {$" bootstrap.cc  |grep -v ":.*("  |sed 's/ {.*/;/'  |update function_list
+grep -h "^\s*void test_" bootstrap.cc  |sed 's/^\s*void \(.*\)() {.*/\1,/'  |update test_list
+grep -h "^\s*void test_" bootstrap.cc  |sed 's/^\s*void \(.*\)() {.*/"\1",/'  |update test_name_list
+
+older_than bootstrap_bin bootstrap.cc *_list && {
+  $CXX $CXXFLAGS bootstrap.cc -o bootstrap_bin
+  echo
+}
+
+exit 0
diff --git a/linux/bootstrap/build_and_test_until b/linux/bootstrap/build_and_test_until
new file mode 100755
index 00000000..6f0782e1
--- /dev/null
+++ b/linux/bootstrap/build_and_test_until
@@ -0,0 +1,18 @@
+#!/bin/sh
+# Run tests for just a subset of layers.
+#
+# Usage:
+#   build_and_test_until [file prefix] [test name]
+# Provide the second arg to run just a single test.
+set -e
+
+# clean previous builds if they were building until a different layer
+touch .until
+PREV_UNTIL=`cat .until`
+if [ "$PREV_UNTIL" != $1 ]
+then
+  ./clean top-level
+  echo $1 > .until
+fi
+
+./build --until $1  &&  ./bootstrap_bin test $2
diff --git a/linux/bootstrap/clean b/linux/bootstrap/clean
new file mode 100755
index 00000000..ebd37d70
--- /dev/null
+++ b/linux/bootstrap/clean
@@ -0,0 +1,7 @@
+#!/bin/sh
+set -e
+
+set -v
+rm -rf bootstrap.cc bootstrap_bin* *_list
+rm -rf .until
+rm -rf tools/enumerate tools/tangle tools/*_list tools/*.dSYM
diff --git a/linux/bootstrap/test_layers b/linux/bootstrap/test_layers
new file mode 100755
index 00000000..eda1150c
--- /dev/null
+++ b/linux/bootstrap/test_layers
@@ -0,0 +1,18 @@
+#!/bin/bash
+# Repeatedly stop building until successive layers, and run all tests built.
+#
+# Assumes .subx files all come after .cc files.
+
+set -e
+
+cd `dirname $0`
+# add C++ files one at a time
+for f in [0-9]*cc
+do
+  echo "=== bootstrap $f"
+  ./build_and_test_until $f
+done
+
+# build everything one last time
+./clean
+./build  # build optimized since we'll be running it repeatedly below
diff --git a/linux/bootstrap/tools/enumerate.cc b/linux/bootstrap/tools/enumerate.cc
new file mode 100644
index 00000000..2777c407
--- /dev/null
+++ b/linux/bootstrap/tools/enumerate.cc
@@ -0,0 +1,26 @@
+#include<assert.h>
+#include<cstdlib>
+#include<dirent.h>
+#include<vector>
+using std::vector;
+#include<string>
+using std::string;
+#include<iostream>
+using std::cout;
+
+int main(int argc, const char* argv[]) {
+  assert(argc == 3);
+  assert(string(argv[1]) == "--until");
+  string last_file(argv[2]);
+
+  dirent** files;
+  int num_files = scandir(".", &files, NULL, alphasort);
+  for (int i = 0; i < num_files; ++i) {
+    string curr_file = files[i]->d_name;
+    if (!isdigit(curr_file.at(0))) continue;
+    if (!last_file.empty() && curr_file > last_file) break;
+    cout << curr_file << '\n';
+  }
+  // don't bother freeing files
+  return 0;
+}
diff --git a/linux/bootstrap/tools/tangle.cc b/linux/bootstrap/tools/tangle.cc
new file mode 100644
index 00000000..c63db5df
--- /dev/null
+++ b/linux/bootstrap/tools/tangle.cc
@@ -0,0 +1,1077 @@
+// Reorder a file based on directives starting with ':(' (tangle directives).
+// Insert #line directives to preserve line numbers in the original.
+// Clear lines starting with '//:' (tangle comments).
+
+#include<assert.h>
+#include<cstdlib>
+#include<cstring>
+
+#include<vector>
+using std::vector;
+#include<list>
+using std::list;
+#include<utility>
+using std::pair;
+
+#include<string>
+using std::string;
+
+#include<iostream>
+using std::istream;
+using std::ostream;
+using std::cin;
+using std::cout;
+using std::cerr;
+
+#include<sstream>
+using std::istringstream;
+using std::ostringstream;
+
+#include<fstream>
+using std::ifstream;
+
+#include <locale>
+using std::isspace;  // unicode-aware
+
+//// Core data structures
+
+struct Line {
+  string filename;
+  size_t line_number;
+  string contents;
+  Line() :line_number(0) {}
+  Line(const string& text) :line_number(0) {
+    contents = text;
+  }
+  Line(const string& text, const string& f, const size_t& l) {
+    contents = text;
+    filename = f;
+    line_number = l;
+  }
+  Line(const string& text, const Line& origin) {
+    contents = text;
+    filename = origin.filename;
+    line_number = origin.line_number;
+  }
+};
+
+// Emit a list of line contents, inserting directives just at discontinuities.
+// Needs to be a macro because 'out' can have the side effect of creating a
+// new trace in Trace_stream.
+#define EMIT(lines, out) if (!lines.empty()) { \
+  string last_file = lines.begin()->filename; \
+  size_t last_line = lines.begin()->line_number-1; \
+  out << line_directive(lines.begin()->line_number, lines.begin()->filename) << '\n'; \
+  for (list<Line>::const_iterator p = lines.begin(); p != lines.end(); ++p) { \
+    if (last_file != p->filename || last_line != p->line_number-1) \
+      out << line_directive(p->line_number, p->filename) << '\n'; \
+    out << p->contents << '\n'; \
+    last_file = p->filename; \
+    last_line = p->line_number; \
+  } \
+}
+
+//// Traces and white-box tests
+
+bool Passed = true;
+
+long Num_failures = 0;
+
+#define CHECK(X) \
+  if (!(X)) { \
+    ++Num_failures; \
+    cerr << "\nF " << __FUNCTION__ << "(" << __FILE__ << ":" << __LINE__ << "): " << #X << '\n'; \
+    Passed = false; \
+    return; \
+  }
+
+#define CHECK_EQ(X, Y) \
+  if ((X) != (Y)) { \
+    ++Num_failures; \
+    cerr << "\nF " << __FUNCTION__ << "(" << __FILE__ << ":" << __LINE__ << "): " << #X << " == " << #Y << '\n'; \
+    cerr << "  got " << (X) << '\n';  /* BEWARE: multiple eval */ \
+    Passed = false; \
+    return; \
+  }
+
+bool Hide_warnings = false;
+
+struct trace_stream {
+  vector<pair<string, string> > past_lines;  // [(layer label, line)]
+  // accumulator for current line
+  ostringstream* curr_stream;
+  string curr_layer;
+  trace_stream() :curr_stream(NULL) {}
+  ~trace_stream() { if (curr_stream) delete curr_stream; }
+
+  ostringstream& stream(string layer) {
+    newline();
+    curr_stream = new ostringstream;
+    curr_layer = layer;
+    return *curr_stream;
+  }
+
+  // be sure to call this before messing with curr_stream or curr_layer
+  void newline() {
+    if (!curr_stream) return;
+    string curr_contents = curr_stream->str();
+    curr_contents.erase(curr_contents.find_last_not_of("\r\n")+1);
+    past_lines.push_back(pair<string, string>(curr_layer, curr_contents));
+    delete curr_stream;
+    curr_stream = NULL;
+  }
+
+  string readable_contents(string layer) {  // missing layer = everything
+    newline();
+    ostringstream output;
+    for (vector<pair<string, string> >::iterator p = past_lines.begin(); p != past_lines.end(); ++p)
+      if (layer.empty() || layer == p->first)
+        output << p->first << ": " << with_newline(p->second);
+    return output.str();
+  }
+
+  string with_newline(string s) {
+    if (s[s.size()-1] != '\n') return s+'\n';
+    return s;
+  }
+};
+
+trace_stream* Trace_stream = NULL;
+
+// Top-level helper. IMPORTANT: can't nest.
+#define trace(layer)  !Trace_stream ? cerr /*print nothing*/ : Trace_stream->stream(layer)
+// Warnings should go straight to cerr by default since calls to trace() have
+// some unfriendly constraints (they delay printing, they can't nest)
+#define raise  ((!Trace_stream || !Hide_warnings) ? cerr /*do print*/ : Trace_stream->stream("warn")) << __FILE__ << ":" << __LINE__ << " "
+
+// raise << die exits after printing -- unless Hide_warnings is set.
+struct die {};
+ostream& operator<<(ostream& os, __attribute__((unused)) die) {
+  if (Hide_warnings) return os;
+  os << "dying\n";
+  exit(1);
+}
+
+#define CLEAR_TRACE  delete Trace_stream, Trace_stream = new trace_stream;
+
+#define DUMP(layer)  cerr << Trace_stream->readable_contents(layer)
+
+// Trace_stream is a resource, lease_tracer uses RAII to manage it.
+struct lease_tracer {
+  lease_tracer() { Trace_stream = new trace_stream; }
+  ~lease_tracer() { delete Trace_stream, Trace_stream = NULL; }
+};
+
+#define START_TRACING_UNTIL_END_OF_SCOPE  lease_tracer leased_tracer;
+
+vector<string> split(string s, string delim) {
+  vector<string> result;
+  string::size_type begin=0, end=s.find(delim);
+  while (true) {
+    if (end == string::npos) {
+      result.push_back(string(s, begin, string::npos));
+      break;
+    }
+    result.push_back(string(s, begin, end-begin));
+    begin = end+delim.size();
+    end = s.find(delim, begin);
+  }
+  return result;
+}
+
+bool check_trace_contents(string FUNCTION, string FILE, int LINE, string layer, string expected) {  // empty layer == everything
+  vector<string> expected_lines = split(expected, "\n");
+  size_t curr_expected_line = 0;
+  while (curr_expected_line < expected_lines.size() && expected_lines[curr_expected_line].empty())
+    ++curr_expected_line;
+  if (curr_expected_line == expected_lines.size()) return true;
+  Trace_stream->newline();
+  ostringstream output;
+  for (vector<pair<string, string> >::iterator p = Trace_stream->past_lines.begin(); p != Trace_stream->past_lines.end(); ++p) {
+    if (!layer.empty() && layer != p->first)
+      continue;
+    if (p->second != expected_lines[curr_expected_line])
+      continue;
+    ++curr_expected_line;
+    while (curr_expected_line < expected_lines.size() && expected_lines[curr_expected_line].empty())
+      ++curr_expected_line;
+    if (curr_expected_line == expected_lines.size()) return true;
+  }
+
+  ++Num_failures;
+  cerr << "\nF " << FUNCTION << "(" << FILE << ":" << LINE << "): missing [" << expected_lines[curr_expected_line] << "] in trace:\n";
+  DUMP(layer);
+  Passed = false;
+  return false;
+}
+
+#define CHECK_TRACE_CONTENTS(...)  check_trace_contents(__FUNCTION__, __FILE__, __LINE__, __VA_ARGS__)
+
+int trace_count(string layer, string line) {
+  Trace_stream->newline();
+  long result = 0;
+  for (vector<pair<string, string> >::iterator p = Trace_stream->past_lines.begin(); p != Trace_stream->past_lines.end(); ++p) {
+    if (layer == p->first)
+      if (line == "" || p->second == line)
+        ++result;
+  }
+  return result;
+}
+
+#define CHECK_TRACE_WARNS()  CHECK(trace_count("warn", "") > 0)
+#define CHECK_TRACE_DOESNT_WARN() \
+  if (trace_count("warn") > 0) { \
+    ++Num_failures; \
+    cerr << "\nF " << __FUNCTION__ << "(" << __FILE__ << ":" << __LINE__ << "): unexpected warnings\n"; \
+    DUMP("warn"); \
+    Passed = false; \
+    return; \
+  }
+
+bool trace_doesnt_contain(string layer, string line) {
+  return trace_count(layer, line) == 0;
+}
+
+#define CHECK_TRACE_DOESNT_CONTAIN(...)  CHECK(trace_doesnt_contain(__VA_ARGS__))
+
+// Tests for trace infrastructure
+
+void test_trace_check_compares() {
+  CHECK_TRACE_CONTENTS("test layer", "");
+  trace("test layer") << "foo";
+  CHECK_TRACE_CONTENTS("test layer", "foo");
+}
+
+void test_trace_check_filters_layers() {
+  trace("test layer 1") << "foo";
+  trace("test layer 2") << "bar";
+  CHECK_TRACE_CONTENTS("test layer 1", "foo");
+}
+
+void test_trace_check_ignores_other_lines() {
+  trace("test layer 1") << "foo";
+  trace("test layer 1") << "bar";
+  CHECK_TRACE_CONTENTS("test layer 1", "foo");
+}
+
+void test_trace_check_always_finds_empty_lines() {
+  CHECK_TRACE_CONTENTS("test layer 1", "");
+}
+
+void test_trace_check_treats_empty_layers_as_wildcards() {
+  trace("test layer 1") << "foo";
+  CHECK_TRACE_CONTENTS("", "foo");
+}
+
+void test_trace_check_multiple_lines_at_once() {
+  trace("test layer 1") << "foo";
+  trace("test layer 2") << "bar";
+  CHECK_TRACE_CONTENTS("", "foo\n"
+                           "bar\n");
+}
+
+void test_trace_check_always_finds_empty_lines2() {
+  CHECK_TRACE_CONTENTS("test layer 1", "\n\n\n");
+}
+
+void test_trace_orders_across_layers() {
+  trace("test layer 1") << "foo";
+  trace("test layer 2") << "bar";
+  trace("test layer 1") << "qux";
+  CHECK_TRACE_CONTENTS("", "foo\n"
+                           "bar\n"
+                           "qux\n");
+}
+
+void test_trace_supports_count() {
+  trace("test layer 1") << "foo";
+  trace("test layer 1") << "foo";
+  CHECK_EQ(trace_count("test layer 1", "foo"), 2);
+}
+
+//// helpers
+
+// can't check trace because trace methods call 'split'
+
+void test_split_returns_at_least_one_elem() {
+  vector<string> result = split("", ",");
+  CHECK_EQ(result.size(), 1);
+  CHECK_EQ(result[0], "");
+}
+
+void test_split_returns_entire_input_when_no_delim() {
+  vector<string> result = split("abc", ",");
+  CHECK_EQ(result.size(), 1);
+  CHECK_EQ(result[0], "abc");
+}
+
+void test_split_works() {
+  vector<string> result = split("abc,def", ",");
+  CHECK_EQ(result.size(), 2);
+  CHECK_EQ(result[0], "abc");
+  CHECK_EQ(result[1], "def");
+}
+
+void test_split_works2() {
+  vector<string> result = split("abc,def,ghi", ",");
+  CHECK_EQ(result.size(), 3);
+  CHECK_EQ(result[0], "abc");
+  CHECK_EQ(result[1], "def");
+  CHECK_EQ(result[2], "ghi");
+}
+
+void test_split_handles_multichar_delim() {
+  vector<string> result = split("abc,,def,,ghi", ",,");
+  CHECK_EQ(result.size(), 3);
+  CHECK_EQ(result[0], "abc");
+  CHECK_EQ(result[1], "def");
+  CHECK_EQ(result[2], "ghi");
+}
+
+//// Core program
+
+#include "tangle.function_list"
+
+string line_directive(size_t line_number, string filename) {
+  ostringstream result;
+  if (filename.empty())
+    result << "#line " << line_number;
+  else
+    result << "#line " << line_number << " \"" << filename << '"';
+  return result.str();
+}
+
+string Toplevel = "run";
+
+int main(int argc, const char* argv[]) {
+  if (flag("test", argc, argv))
+    return run_tests();
+  return tangle(argc, argv);
+}
+
+bool flag(const string& flag, int argc, const char* argv[]) {
+  for (int i = 1; i < argc; ++i)
+    if (string(argv[i]) == flag)
+      return true;
+  return false;
+}
+
+void setup() {
+  Hide_warnings = false;
+  Passed = true;
+}
+
+void verify() {
+  Hide_warnings = false;
+  if (!Passed)
+    ;
+  else
+    cerr << ".";
+}
+
+int tangle(int argc, const char* argv[]) {
+  list<Line> result;
+  for (int i = 1; i < argc; ++i) {
+//?     cerr << "new file " << argv[i] << '\n';
+    Toplevel = "run";
+    ifstream in(argv[i]);
+    tangle(in, argv[i], result);
+  }
+
+  EMIT(result, cout);
+  return 0;
+}
+
+void tangle(istream& in, const string& filename, list<Line>& out) {
+  string curr_line;
+  size_t line_number = 1;
+  while (!in.eof()) {
+    getline(in, curr_line);
+    if (starts_with(curr_line, ":(")) {
+      ++line_number;
+      process_next_hunk(in, trim(curr_line), filename, line_number, out);
+      continue;
+    }
+    if (starts_with(curr_line, "//:")) {
+      ++line_number;
+      continue;
+    }
+    out.push_back(Line(curr_line, filename, line_number));
+    ++line_number;
+  }
+
+  // Trace all line contents, inserting directives just at discontinuities.
+  if (!Trace_stream) return;
+  EMIT(out, Trace_stream->stream("tangle"));
+}
+
+// just for tests
+void tangle(istream& in, list<Line>& out) {
+  tangle(in, "", out);
+}
+
+void process_next_hunk(istream& in, const string& directive, const string& filename, size_t& line_number, list<Line>& out) {
+  istringstream directive_stream(directive.substr(2));  // length of ":("
+  string cmd = next_tangle_token(directive_stream);
+
+  // first slurp all lines until next directive
+  list<Line> hunk;
+  {
+    string curr_line;
+    while (!in.eof()) {
+      std::streampos old = in.tellg();
+      getline(in, curr_line);
+      if (starts_with(curr_line, ":(")) {
+        in.seekg(old);
+        break;
+      }
+      if (starts_with(curr_line, "//:")) {
+        // tangle comments
+        ++line_number;
+        continue;
+      }
+      hunk.push_back(Line(curr_line, filename, line_number));
+      ++line_number;
+    }
+  }
+
+  if (cmd == "code") {
+    out.insert(out.end(), hunk.begin(), hunk.end());
+    return;
+  }
+
+  if (cmd == "before" || cmd == "after" || cmd == "replace" || cmd == "replace{}" || cmd == "delete" || cmd == "delete{}") {
+    list<Line>::iterator target = locate_target(out, directive_stream);
+    if (target == out.end()) {
+      raise << "couldn't find target " << directive << '\n' << die();
+      return;
+    }
+
+    indent_all(hunk, target);
+
+    if (cmd == "before") {
+      out.splice(target, hunk);
+    }
+    else if (cmd == "after") {
+      ++target;
+      out.splice(target, hunk);
+    }
+    else if (cmd == "replace" || cmd == "delete") {
+      out.splice(target, hunk);
+      out.erase(target);
+    }
+    else if (cmd == "replace{}" || cmd == "delete{}") {
+      if (find_trim(hunk, ":OLD_CONTENTS") == hunk.end()) {
+        out.splice(target, hunk);
+        out.erase(target, balancing_curly(target));
+      }
+      else {
+        list<Line>::iterator next = balancing_curly(target);
+        list<Line> old_version;
+        old_version.splice(old_version.begin(), out, target, next);
+        old_version.pop_back();  old_version.pop_front();  // contents only please, not surrounding curlies
+
+        list<Line>::iterator new_pos = find_trim(hunk, ":OLD_CONTENTS");
+        indent_all(old_version, new_pos);
+        hunk.splice(new_pos, old_version);
+        hunk.erase(new_pos);
+        out.splice(next, hunk);
+      }
+    }
+    return;
+  }
+
+  raise << "unknown directive " << cmd << '\n' << die();
+}
+
+list<Line>::iterator locate_target(list<Line>& out, istream& directive_stream) {
+  string pat = next_tangle_token(directive_stream);
+  if (pat == "") return out.end();
+
+  string next_token = next_tangle_token(directive_stream);
+  if (next_token == "") {
+    return find_substr(out, pat);
+  }
+  // first way to do nested pattern: pattern 'following' intermediate
+  else if (next_token == "following") {
+    string pat2 = next_tangle_token(directive_stream);
+    if (pat2 == "") return out.end();
+    list<Line>::iterator intermediate = find_substr(out, pat2);
+    if (intermediate == out.end()) return out.end();
+    return find_substr(out, intermediate, pat);
+  }
+  // second way to do nested pattern: intermediate 'then' pattern
+  else if (next_token == "then") {
+    list<Line>::iterator intermediate = find_substr(out, pat);
+    if (intermediate == out.end()) return out.end();
+    string pat2 = next_tangle_token(directive_stream);
+    if (pat2 == "") return out.end();
+    return find_substr(out, intermediate, pat2);
+  }
+  raise << "unknown keyword in directive: " << next_token << '\n';
+  return out.end();
+}
+
+// indent all lines in l like indentation at exemplar
+void indent_all(list<Line>& l, list<Line>::iterator exemplar) {
+  string curr_indent = indent(exemplar->contents);
+  for (list<Line>::iterator p = l.begin(); p != l.end(); ++p)
+    if (!p->contents.empty())
+      p->contents.insert(p->contents.begin(), curr_indent.begin(), curr_indent.end());
+}
+
+string next_tangle_token(istream& in) {
+  in >> std::noskipws;
+  ostringstream out;
+  skip_whitespace(in);
+  if (in.peek() == '"')
+    slurp_tangle_string(in, out);
+  else
+    slurp_word(in, out);
+  return out.str();
+}
+
+void slurp_tangle_string(istream& in, ostream& out) {
+  in.get();
+  char c;
+  while (in >> c) {
+    if (c == '\\') {
+      // skip backslash and save next character unconditionally
+      in >> c;
+      out << c;
+      continue;
+    }
+    if (c == '"') break;
+    out << c;
+  }
+}
+
+void slurp_word(istream& in, ostream& out) {
+  char c;
+  while (in >> c) {
+    if (isspace(c) || c == ')') {
+      in.putback(c);
+      break;
+    }
+    out << c;
+  }
+}
+
+void skip_whitespace(istream& in) {
+  while (isspace(in.peek()))
+    in.get();
+}
+
+list<Line>::iterator balancing_curly(list<Line>::iterator curr) {
+  long open_curlies = 0;
+  do {
+    for (string::iterator p = curr->contents.begin(); p != curr->contents.end(); ++p) {
+      if (*p == '{') ++open_curlies;
+      if (*p == '}') --open_curlies;
+    }
+    ++curr;
+    // no guard so far against unbalanced curly, including inside comments or strings
+  } while (open_curlies != 0);
+  return curr;
+}
+
+list<Line>::iterator find_substr(list<Line>& in, const string& pat) {
+  for (list<Line>::iterator p = in.begin(); p != in.end(); ++p)
+    if (p->contents.find(pat) != string::npos)
+      return p;
+  return in.end();
+}
+
+list<Line>::iterator find_substr(list<Line>& in, list<Line>::iterator p, const string& pat) {
+  for (; p != in.end(); ++p)
+    if (p->contents.find(pat) != string::npos)
+      return p;
+  return in.end();
+}
+
+list<Line>::iterator find_trim(list<Line>& in, const string& pat) {
+  for (list<Line>::iterator p = in.begin(); p != in.end(); ++p)
+    if (trim(p->contents) == pat)
+      return p;
+  return in.end();
+}
+
+string escape(string s) {
+  s = replace_all(s, "\\", "\\\\");
+  s = replace_all(s, "\"", "\\\"");
+  s = replace_all(s, "", "\\n");
+  return s;
+}
+
+string replace_all(string s, const string& a, const string& b) {
+  for (size_t pos = s.find(a); pos != string::npos; pos = s.find(a, pos+b.size()))
+    s = s.replace(pos, a.size(), b);
+  return s;
+}
+
+// does s start with pat, after skipping whitespace?
+// pat can't start with whitespace
+bool starts_with(const string& s, const string& pat) {
+  for (size_t pos = 0; pos < s.size(); ++pos)
+    if (!isspace(s.at(pos)))
+      return s.compare(pos, pat.size(), pat) == 0;
+  return false;
+}
+
+string indent(const string& s) {
+  for (size_t pos = 0; pos < s.size(); ++pos)
+    if (!isspace(s.at(pos)))
+      return s.substr(0, pos);
+  return "";
+}
+
+string strip_indent(const string& s, size_t n) {
+  if (s.empty()) return "";
+  string::const_iterator curr = s.begin();
+  while (curr != s.end() && n > 0 && isspace(*curr)) {
+    ++curr;
+    --n;
+  }
+  return string(curr, s.end());
+}
+
+string trim(const string& s) {
+  string::const_iterator first = s.begin();
+  while (first != s.end() && isspace(*first))
+    ++first;
+  if (first == s.end()) return "";
+
+  string::const_iterator last = --s.end();
+  while (last != s.begin() && isspace(*last))
+    --last;
+  ++last;
+  return string(first, last);
+}
+
+const Line& front(const list<Line>& l) {
+  assert(!l.empty());
+  return l.front();
+}
+
+//// Tests for tangle
+
+void test_tangle() {
+  istringstream in("a\n"
+                   "b\n"
+                   "c\n"
+                   ":(before b)\n"
+                   "d\n");
+  list<Line> dummy;
+  tangle(in, dummy);
+  CHECK_TRACE_CONTENTS("tangle", "a\n"
+                                 "d\n"
+                                 "b\n"
+                                 "c\n");
+}
+
+void test_tangle_with_linenumber() {
+  istringstream in("a\n"
+                   "b\n"
+                   "c\n"
+                   ":(before b)\n"
+                   "d\n");
+  list<Line> dummy;
+  tangle(in, dummy);
+  CHECK_TRACE_CONTENTS("tangle", "#line 1\n"
+                                 "a\n"
+                                 "#line 5\n"
+                                 "d\n"
+                                 "#line 2\n"
+                                 "b\n"
+                                 "c\n");
+  // no other #line directives
+  CHECK_TRACE_DOESNT_CONTAIN("tangle", "#line 3");
+  CHECK_TRACE_DOESNT_CONTAIN("tangle", "#line 4");
+}
+
+void test_tangle_linenumbers_with_filename() {
+  istringstream in("a\n"
+                   "b\n"
+                   "c\n"
+                   ":(before b)\n"
+                   "d\n");
+  list<Line> dummy;
+  tangle(in, "foo", dummy);
+  CHECK_TRACE_CONTENTS("tangle", "a\n"
+                                 "#line 5 \"foo\"\n"
+                                 "d\n"
+                                 "b\n"
+                                 "c\n");
+}
+
+void test_tangle_line_numbers_with_multiple_filenames() {
+  istringstream in1("a\n"
+                    "b\n"
+                    "c");
+  list<Line> dummy;
+  tangle(in1, "foo", dummy);
+  CLEAR_TRACE;
+  istringstream in2(":(before b)\n"
+                    "d\n");
+  tangle(in2, "bar", dummy);
+  CHECK_TRACE_CONTENTS("tangle", "a\n"
+                                 "#line 2 \"bar\"\n"
+                                 "d\n"
+                                 "#line 2 \"foo\"\n"
+                                 "b\n"
+                                 "c\n");
+}
+
+void test_tangle_linenumbers_with_multiple_directives() {
+  istringstream in1("a\n"
+                    "b\n"
+                    "c");
+  list<Line> dummy;
+  tangle(in1, "foo", dummy);
+  CLEAR_TRACE;
+  istringstream in2(":(before b)\n"
+                    "d\n"
+                    ":(before c)\n"
+                    "e");
+  tangle(in2, "bar", dummy);
+  CHECK_TRACE_CONTENTS("tangle", "a\n"
+                                 "#line 2 \"bar\"\n"
+                                 "d\n"
+                                 "#line 2 \"foo\"\n"
+                                 "b\n"
+                                 "#line 4 \"bar\"\n"
+                                 "e\n"
+                                 "#line 3 \"foo\"\n"
+                                 "c\n");
+}
+
+void test_tangle_with_multiple_filenames_after() {
+  istringstream in1("a\n"
+                    "b\n"
+                    "c");
+  list<Line> dummy;
+  tangle(in1, "foo", dummy);
+  CLEAR_TRACE;
+  istringstream in2(":(after b)\n"
+                    "d\n");
+  tangle(in2, "bar", dummy);
+  CHECK_TRACE_CONTENTS("tangle", "a\n"
+                                 "b\n"
+                                 "#line 2 \"bar\"\n"
+                                 "d\n"
+                                 "#line 3 \"foo\"\n"
+                                 "c\n");
+}
+
+void test_tangle_skip_tanglecomments() {
+  istringstream in("a\n"
+                   "b\n"
+                   "c\n"
+                   "//: 1\n"
+                   "//: 2\n"
+                   "d\n");
+  list<Line> dummy;
+  tangle(in, dummy);
+  CHECK_TRACE_CONTENTS("tangle", "a\n"
+                                 "b\n"
+                                 "c\n"
+                                 "\n"
+                                 "\n"
+                                 "d\n");
+  CHECK_TRACE_DOESNT_CONTAIN("tangle", "//: 1");
+}
+
+void test_tangle_with_tanglecomments_and_directive() {
+  istringstream in("a\n"
+                   "//: 1\n"
+                   "b\n"
+                   "c\n"
+                   ":(before b)\n"
+                   "d\n"
+                   ":(code)\n"
+                   "e\n");
+  list<Line> dummy;
+  tangle(in, dummy);
+  CHECK_TRACE_CONTENTS("tangle", "a\n"
+                                 "#line 6\n"
+                                 "d\n"
+                                 "#line 3\n"
+                                 "b\n"
+                                 "c\n"
+                                 "#line 8\n"
+                                 "e\n");
+  CHECK_TRACE_DOESNT_CONTAIN("tangle", "//: 1");
+}
+
+void test_tangle_with_tanglecomments_inside_directive() {
+  istringstream in("a\n"
+                   "//: 1\n"
+                   "b\n"
+                   "c\n"
+                   ":(before b)\n"
+                   "//: abc\n"
+                   "d\n"
+                   ":(code)\n"
+                   "e\n");
+  list<Line> dummy;
+  tangle(in, dummy);
+  CHECK_TRACE_CONTENTS("tangle", "a\n"
+                                 "#line 7\n"
+                                 "d\n"
+                                 "#line 3\n"
+                                 "b\n"
+                                 "c\n"
+                                 "#line 9\n"
+                                 "e\n");
+  CHECK_TRACE_DOESNT_CONTAIN("tangle", "//: 1");
+}
+
+void test_tangle_with_multiword_directives() {
+  istringstream in("a b\n"
+                   "c\n"
+                   ":(after \"a b\")\n"
+                   "d\n");
+  list<Line> dummy;
+  tangle(in, dummy);
+  CHECK_TRACE_CONTENTS("tangle", "a b\n"
+                                 "d\n"
+                                 "c\n");
+}
+
+void test_tangle_with_quoted_multiword_directives() {
+  istringstream in("a \"b\"\n"
+                   "c\n"
+                   ":(after \"a \\\"b\\\"\")\n"
+                   "d\n");
+  list<Line> dummy;
+  tangle(in, dummy);
+  CHECK_TRACE_CONTENTS("tangle", "a \"b\"\n"
+                                 "d\n"
+                                 "c\n");
+}
+
+void test_tangle2() {
+  istringstream in("a\n"
+                   "b\n"
+                   "c\n"
+                   ":(after b)\n"
+                   "d\n");
+  list<Line> dummy;
+  tangle(in, dummy);
+  CHECK_TRACE_CONTENTS("tangle", "a\n"
+                                 "b\n"
+                                 "d\n"
+                                 "c\n");
+}
+
+void test_tangle_at_end() {
+  istringstream in("a\n"
+                   "b\n"
+                   "c\n"
+                   ":(after c)\n"
+                   "d\n");
+  list<Line> dummy;
+  tangle(in, dummy);
+  CHECK_TRACE_CONTENTS("tangle", "a\n"
+                                 "b\n"
+                                 "c\n"
+                                 "d\n");
+}
+
+void test_tangle_indents_hunks_correctly() {
+  istringstream in("a\n"
+                   "  b\n"
+                   "c\n"
+                   ":(after b)\n"
+                   "d\n");
+  list<Line> dummy;
+  tangle(in, dummy);
+  CHECK_TRACE_CONTENTS("tangle", "a\n"
+                                 "  b\n"
+                                 "  d\n"
+                                 "c\n");
+}
+
+void test_tangle_warns_on_missing_target() {
+  Hide_warnings = true;
+  istringstream in(":(before)\n"
+                   "abc def\n");
+  list<Line> lines;
+  tangle(in, lines);
+  CHECK_TRACE_WARNS();
+}
+
+void test_tangle_warns_on_unknown_target() {
+  Hide_warnings = true;
+  istringstream in(":(before \"foo\")\n"
+                   "abc def\n");
+  list<Line> lines;
+  tangle(in, lines);
+  CHECK_TRACE_WARNS();
+}
+
+void test_tangle_delete_range_of_lines() {
+  istringstream in("a\n"
+                   "b {\n"
+                   "c\n"
+                   "}\n"
+                   ":(delete{} \"b\")\n");
+  list<Line> dummy;
+  tangle(in, dummy);
+  CHECK_TRACE_CONTENTS("tangle", "a\n");
+  CHECK_TRACE_DOESNT_CONTAIN("tangle", "b");
+  CHECK_TRACE_DOESNT_CONTAIN("tangle", "c");
+}
+
+void test_tangle_replace() {
+  istringstream in("a\n"
+                   "b\n"
+                   "c\n"
+                   ":(replace b)\n"
+                   "d\n");
+  list<Line> dummy;
+  tangle(in, dummy);
+  CHECK_TRACE_CONTENTS("tangle", "a\n"
+                                 "d\n"
+                                 "c\n");
+  CHECK_TRACE_DOESNT_CONTAIN("tangle", "b");
+}
+
+void test_tangle_replace_range_of_lines() {
+  istringstream in("a\n"
+                   "b {\n"
+                   "c\n"
+                   "}\n"
+                   ":(replace{} \"b\")\n"
+                   "d\n"
+                   "e\n");
+  list<Line> dummy;
+  tangle(in, dummy);
+  CHECK_TRACE_CONTENTS("tangle", "a\n"
+                                 "d\n"
+                                 "e\n");
+  CHECK_TRACE_DOESNT_CONTAIN("tangle", "b {");
+  CHECK_TRACE_DOESNT_CONTAIN("tangle", "c");
+}
+
+void test_tangle_replace_tracks_old_lines() {
+  istringstream in("a\n"
+                   "b {\n"
+                   "c\n"
+                   "}\n"
+                   ":(replace{} \"b\")\n"
+                   "d\n"
+                   ":OLD_CONTENTS\n"
+                   "e\n");
+  list<Line> dummy;
+  tangle(in, dummy);
+  CHECK_TRACE_CONTENTS("tangle", "a\n"
+                                 "d\n"
+                                 "c\n"
+                                 "e\n");
+  CHECK_TRACE_DOESNT_CONTAIN("tangle", "b {");
+}
+
+void test_tangle_nested_patterns() {
+  istringstream in("a\n"
+                   "c\n"
+                   "b\n"
+                   "c\n"
+                   "d\n"
+                   ":(after \"b\" then \"c\")\n"
+                   "e");
+  list<Line> dummy;
+  tangle(in, dummy);
+  CHECK_TRACE_CONTENTS("tangle", "a\n"
+                                 "c\n"
+                                 "b\n"
+                                 "c\n"
+                                 "e\n"
+                                 "d\n");
+}
+
+void test_tangle_nested_patterns2() {
+  istringstream in("a\n"
+                   "c\n"
+                   "b\n"
+                   "c\n"
+                   "d\n"
+                   ":(after \"c\" following \"b\")\n"
+                   "e");
+  list<Line> dummy;
+  tangle(in, dummy);
+  CHECK_TRACE_CONTENTS("tangle", "a\n"
+                                 "c\n"
+                                 "b\n"
+                                 "c\n"
+                                 "e\n"
+                                 "d\n");
+}
+
+// todo: include line numbers in tangle errors
+
+void test_trim() {
+  CHECK_EQ(trim(""), "");
+  CHECK_EQ(trim(" "), "");
+  CHECK_EQ(trim("  "), "");
+  CHECK_EQ(trim("a"), "a");
+  CHECK_EQ(trim(" a"), "a");
+  CHECK_EQ(trim("  a"), "a");
+  CHECK_EQ(trim("  ab"), "ab");
+  CHECK_EQ(trim("a "), "a");
+  CHECK_EQ(trim("a  "), "a");
+  CHECK_EQ(trim("ab  "), "ab");
+  CHECK_EQ(trim(" a "), "a");
+  CHECK_EQ(trim("  a  "), "a");
+  CHECK_EQ(trim("  ab  "), "ab");
+}
+
+void test_strip_indent() {
+  CHECK_EQ(strip_indent("", 0), "");
+  CHECK_EQ(strip_indent("", 1), "");
+  CHECK_EQ(strip_indent("", 3), "");
+  CHECK_EQ(strip_indent(" ", 0), " ");
+  CHECK_EQ(strip_indent(" a", 0), " a");
+  CHECK_EQ(strip_indent(" ", 1), "");
+  CHECK_EQ(strip_indent(" a", 1), "a");
+  CHECK_EQ(strip_indent(" ", 2), "");
+  CHECK_EQ(strip_indent(" a", 2), "a");
+  CHECK_EQ(strip_indent("  ", 0), "  ");
+  CHECK_EQ(strip_indent("  a", 0), "  a");
+  CHECK_EQ(strip_indent("  ", 1), " ");
+  CHECK_EQ(strip_indent("  a", 1), " a");
+  CHECK_EQ(strip_indent("  ", 2), "");
+  CHECK_EQ(strip_indent("  a", 2), "a");
+  CHECK_EQ(strip_indent("  ", 3), "");
+  CHECK_EQ(strip_indent("  a", 3), "a");
+}
+
+//// Test harness
+
+typedef void (*test_fn)(void);
+
+const test_fn Tests[] = {
+  #include "tangle.test_list"  // auto-generated; see 'build*' scripts
+};
+
+// Names for each element of the 'Tests' global, respectively.
+const string Test_names[] = {
+  #include "tangle.test_name_list"  // auto-generated; see 'build*' scripts
+};
+
+int run_tests() {
+  for (unsigned long i=0; i < sizeof(Tests)/sizeof(Tests[0]); ++i) {
+//?     cerr << "running " << Test_names[i] << '\n';
+    START_TRACING_UNTIL_END_OF_SCOPE;
+    setup();
+    (*Tests[i])();
+    verify();
+  }
+
+  cerr << '\n';
+  if (Num_failures > 0)
+    cerr << Num_failures << " failure"
+         << (Num_failures > 1 ? "s" : "")
+         << '\n';
+  return Num_failures;
+}
diff --git a/linux/bootstrap/tools/tangle.readme.md b/linux/bootstrap/tools/tangle.readme.md
new file mode 100644
index 00000000..be61d40e
--- /dev/null
+++ b/linux/bootstrap/tools/tangle.readme.md
@@ -0,0 +1,112 @@
+[Literate Programming](https://en.wikipedia.org/wiki/Literate_programming)
+tool to convert Mu's layers into compilable form.
+
+Mu's tangling directives differ from Knuth's classic implementation. The
+classical approach starts out with labeled subsystems that are initially
+empty, and adds code to them using two major directives:
+
+```
+<name> ≡
+<code>
+```
+
+```
+<name> +≡
+<code>
+```
+
+_(`<code>` can span multiple lines.)_
+
+This approach is best suited for top-down exposition.
+
+On the other hand, Mu's tangling directives are better suited for a cleaned-up
+history of a codebase. Subsystems start out with a simple skeleton of the core
+of the program. Later versions then tell a story of the evolution of the
+program, with each version colocating all the code related to new features.
+
+Read more:
+* http://akkartik.name/post/wart-layers
+* http://akkartik.name/post/literate-programming
+* https://github.com/akkartik/mu/blob/master/000organization.cc
+
+## directives
+
+Add code to a project:
+
+```
+:(code)
+<code>
+```
+
+Insert code before a specific line:
+
+```
+:(before <waypoint>)
+<code>
+```
+
+Here `<waypoint>` is a substring matching a single line in the codebase. (We
+never use regular expressions.) Surround the substring in `"` quotes if it
+spans multiple words.
+
+Insert code _after_ a specific line:
+
+```
+:(after <waypoint>)
+<code>
+```
+
+Delete a specific previously-added line (because it's not needed in a newer
+version).
+
+```
+:(delete <line>)
+```
+
+Delete a block of code starting with a given header and surrounded by `{` and
+`}`:
+
+```
+:(delete{} <header>)
+```
+
+_(Caveat: doesn't directly support C's `do`..`while` loops.)_
+
+Replace a specific line with new code:
+
+```
+:(replace <line>)
+<code>
+```
+
+This is identical to:
+```
+:(before <line>)
+<code>
+:(delete <line>)
+```
+_(Assuming `<code>` did not insert a new line matching the substring `<line>`.)_
+
+Replace a block of code with another:
+
+```
+:(replace{} <header>)
+<code>
+```
+
+Insert code before or after a substring pattern that isn't quite a unique
+waypoint in the whole codebase:
+
+```
+:(before <line> following <waypoint>)
+<code>
+:(after <line> following <waypoint>)
+<code>
+```
+
+```
+:(before <waypoint> then <line>)
+<code>
+:(after <waypoint> then <line>)
+<code>
+```