From aaf24db4aeca73e985437d065b36815677716694 Mon Sep 17 00:00:00 2001
From: Kartik Agaram <vc@akkartik.com>
Date: Sat, 4 Aug 2018 22:38:23 -0700
Subject: 4482

---
 subx/010---vm.cc      | 242 ++++++++++++++++++++++++++++++++++++++++++++++++++
 subx/010vm.cc         | 242 --------------------------------------------------
 subx/028translate.cc  | 159 +++++++++++++++++++++++++++++++++
 subx/029transforms.cc |  65 ++++++++++++++
 subx/029translate.cc  | 224 ----------------------------------------------
 5 files changed, 466 insertions(+), 466 deletions(-)
 create mode 100644 subx/010---vm.cc
 delete mode 100644 subx/010vm.cc
 create mode 100644 subx/028translate.cc
 create mode 100644 subx/029transforms.cc
 delete mode 100644 subx/029translate.cc

diff --git a/subx/010---vm.cc b/subx/010---vm.cc
new file mode 100644
index 00000000..c467255b
--- /dev/null
+++ b/subx/010---vm.cc
@@ -0,0 +1,242 @@
+//: Core data structures for simulating the SubX VM (subset of an x86 processor)
+//:
+//: At the lowest level ("level 1") of abstraction, SubX executes x86
+//: instructions provided in the form of an array of bytes, loaded into memory
+//: starting at a specific address.
+
+//:: registers
+//: assume segment registers are hard-coded to 0
+//: no floating-point, MMX, etc. yet
+
+:(before "End Types")
+enum {
+  EAX,
+  ECX,
+  EDX,
+  EBX,
+  ESP,
+  EBP,
+  ESI,
+  EDI,
+  NUM_INT_REGISTERS,
+};
+union reg {
+  int32_t i;
+  uint32_t u;
+};
+:(before "End Globals")
+reg Reg[NUM_INT_REGISTERS] = { {0} };
+uint32_t EIP = 1;  // preserve null pointer
+:(before "End Reset")
+bzero(Reg, sizeof(Reg));
+EIP = 1;  // preserve null pointer
+
+:(before "End Help Contents")
+cerr << "  registers\n";
+:(before "End Help Texts")
+put(Help, "registers",
+  "SubX currently supports eight 32-bit integer registers: R0 to R7.\n"
+  "R4 (ESP) contains the top of the stack.\n"
+  "\n"
+  "There's also a register for the address of the currently executing\n"
+  "instruction. It is modified by jumps.\n"
+  "\n"
+  "Various instructions modify one or more of three 1-bit 'flag' registers,\n"
+  "as a side-effect:\n"
+  "- the sign flag (SF): usually set if an arithmetic result is negative, or\n"
+  "  reset if not.\n"
+  "- the zero flag (ZF): usually set if a result is zero, or reset if not.\n"
+  "- the overflow flag (OF): usually set if an arithmetic result overflows.\n"
+  "The flag bits are read by conditional jumps.\n"
+  "\n"
+  "We don't support non-integer (floating-point) registers yet.\n"
+);
+
+:(before "End Globals")
+// the subset of x86 flag registers we care about
+bool SF = false;  // sign flag
+bool ZF = false;  // zero flag
+bool OF = false;  // overflow flag
+:(before "End Reset")
+SF = ZF = OF = false;
+
+//: how the flag registers are updated after each instruction
+
+:(before "End Includes")
+// Combine 'arg1' and 'arg2' with arithmetic operation 'op' and store the
+// result in 'arg1', then update flags.
+// beware: no side-effects in args
+#define BINARY_ARITHMETIC_OP(op, arg1, arg2) { \
+  /* arg1 and arg2 must be signed */ \
+  int64_t tmp = arg1 op arg2; \
+  arg1 = arg1 op arg2; \
+  trace(90, "run") << "storing 0x" << HEXWORD << arg1 << end(); \
+  SF = (arg1 < 0); \
+  ZF = (arg1 == 0); \
+  OF = (arg1 != tmp); \
+}
+
+// Combine 'arg1' and 'arg2' with bitwise operation 'op' and store the result
+// in 'arg1', then update flags.
+#define BINARY_BITWISE_OP(op, arg1, arg2) { \
+  /* arg1 and arg2 must be unsigned */ \
+  arg1 = arg1 op arg2; \
+  trace(90, "run") << "storing 0x" << HEXWORD << arg1 << end(); \
+  SF = (arg1 >> 31); \
+  ZF = (arg1 == 0); \
+  OF = false; \
+}
+
+//:: simulated RAM
+
+:(before "End Globals")
+vector<uint8_t> Mem;
+uint32_t Mem_offset = 0;
+uint32_t End_of_program = 0;
+:(before "End Reset")
+Mem.clear();
+Mem.resize(1024);
+Mem_offset = 0;
+End_of_program = 0;
+:(code)
+// These helpers depend on Mem being laid out contiguously (so you can't use a
+// map, etc.) and on the host also being little-endian.
+inline uint8_t read_mem_u8(uint32_t addr) {
+  return Mem.at(addr-Mem_offset);
+}
+inline int8_t read_mem_i8(uint32_t addr) {
+  return static_cast<int8_t>(Mem.at(addr-Mem_offset));
+}
+inline uint32_t read_mem_u32(uint32_t addr) {
+  return *reinterpret_cast<uint32_t*>(&Mem.at(addr-Mem_offset));
+}
+inline int32_t read_mem_i32(uint32_t addr) {
+  return *reinterpret_cast<int32_t*>(&Mem.at(addr-Mem_offset));
+}
+
+inline uint8_t* mem_addr_u8(uint32_t addr) {
+  return &Mem.at(addr-Mem_offset);
+}
+inline int8_t* mem_addr_i8(uint32_t addr) {
+  return reinterpret_cast<int8_t*>(&Mem.at(addr-Mem_offset));
+}
+inline uint32_t* mem_addr_u32(uint32_t addr) {
+  return reinterpret_cast<uint32_t*>(&Mem.at(addr-Mem_offset));
+}
+inline int32_t* mem_addr_i32(uint32_t addr) {
+  return reinterpret_cast<int32_t*>(&Mem.at(addr-Mem_offset));
+}
+
+inline void write_mem_u8(uint32_t addr, uint8_t val) {
+  Mem.at(addr-Mem_offset) = val;
+}
+inline void write_mem_i8(uint32_t addr, int8_t val) {
+  Mem.at(addr-Mem_offset) = static_cast<uint8_t>(val);
+}
+inline void write_mem_u32(uint32_t addr, uint32_t val) {
+  *reinterpret_cast<uint32_t*>(&Mem.at(addr-Mem_offset)) = val;
+}
+inline void write_mem_i32(uint32_t addr, int32_t val) {
+  *reinterpret_cast<int32_t*>(&Mem.at(addr-Mem_offset)) = val;
+}
+
+//:: core interpreter loop
+
+:(code)
+// skeleton of how x86 instructions are decoded
+void run_one_instruction() {
+  uint8_t op=0, op2=0, op3=0;
+  trace(90, "run") << "inst: 0x" << HEXWORD << EIP << end();
+//?   dump_registers();
+//?   cerr << "inst: 0x" << EIP << " => ";
+  op = next();
+//?   cerr << HEXBYTE << NUM(op) << '\n';
+  switch (op) {
+  case 0xf4:  // hlt
+    EIP = End_of_program;
+    break;
+  // End Single-Byte Opcodes
+  case 0x0f:
+    switch(op2 = next()) {
+    // End Two-Byte Opcodes Starting With 0f
+    default:
+      cerr << "unrecognized second opcode after 0f: " << HEXBYTE << NUM(op2) << '\n';
+      DUMP("");
+      exit(1);
+    }
+    break;
+  case 0xf3:
+    switch(op2 = next()) {
+    // End Two-Byte Opcodes Starting With f3
+    case 0x0f:
+      switch(op3 = next()) {
+      // End Three-Byte Opcodes Starting With f3 0f
+      default:
+        cerr << "unrecognized third opcode after f3 0f: " << HEXBYTE << NUM(op3) << '\n';
+        DUMP("");
+        exit(1);
+      }
+      break;
+    default:
+      cerr << "unrecognized second opcode after f3: " << HEXBYTE << NUM(op2) << '\n';
+      DUMP("");
+      exit(1);
+    }
+    break;
+  default:
+    cerr << "unrecognized opcode: " << HEXBYTE << NUM(op) << '\n';
+    DUMP("");
+    exit(1);
+  }
+}
+
+inline uint8_t next() {
+  return read_mem_u8(EIP++);
+}
+
+void dump_registers() {
+  for (int i = 0;  i < NUM_INT_REGISTERS;  ++i) {
+    if (i > 0) cerr << "; ";
+    cerr << "  " << i << ": " << std::hex << std::setw(8) << std::setfill('_') << Reg[i].u;
+  }
+  cerr << " -- SF: " << SF << "; ZF: " << ZF << "; OF: " << OF << '\n';
+}
+
+//: start tracking supported opcodes
+:(before "End Globals")
+map</*op*/string, string> name;
+map</*op*/string, string> name_0f;
+map</*op*/string, string> name_f3;
+map</*op*/string, string> name_f3_0f;
+:(before "End One-time Setup")
+init_op_names();
+:(code)
+void init_op_names() {
+  put(name, "f4", "halt");
+  // End Initialize Op Names(name)
+}
+
+:(before "End Help Special-cases(key)")
+if (key == "opcodes") {
+  cerr << "Opcodes currently supported by SubX:\n";
+  for (map<string, string>::iterator p = name.begin();  p != name.end();  ++p)
+    cerr << "  " << p->first << ": " << p->second << '\n';
+  for (map<string, string>::iterator p = name_0f.begin();  p != name_0f.end();  ++p)
+    cerr << "  0f " << p->first << ": " << p->second << '\n';
+  for (map<string, string>::iterator p = name_f3.begin();  p != name_f3.end();  ++p)
+    cerr << "  f3 " << p->first << ": " << p->second << '\n';
+  for (map<string, string>::iterator p = name_f3_0f.begin();  p != name_f3_0f.end();  ++p)
+    cerr << "  f3 0f " << p->first << ": " << p->second << '\n';
+  cerr << "Run `subx help instructions` for details on words like 'r32' and 'disp8'.\n";
+  return 0;
+}
+:(before "End Help Contents")
+cerr << "  opcodes\n";
+
+:(before "End Includes")
+#include <iomanip>
+#define HEXBYTE  std::hex << std::setw(2) << std::setfill('0')
+#define HEXWORD  std::hex << std::setw(8) << std::setfill('0')
+// ugly that iostream doesn't print uint8_t as an integer
+#define NUM(X) static_cast<int>(X)
+#include <stdint.h>
diff --git a/subx/010vm.cc b/subx/010vm.cc
deleted file mode 100644
index c467255b..00000000
--- a/subx/010vm.cc
+++ /dev/null
@@ -1,242 +0,0 @@
-//: Core data structures for simulating the SubX VM (subset of an x86 processor)
-//:
-//: At the lowest level ("level 1") of abstraction, SubX executes x86
-//: instructions provided in the form of an array of bytes, loaded into memory
-//: starting at a specific address.
-
-//:: registers
-//: assume segment registers are hard-coded to 0
-//: no floating-point, MMX, etc. yet
-
-:(before "End Types")
-enum {
-  EAX,
-  ECX,
-  EDX,
-  EBX,
-  ESP,
-  EBP,
-  ESI,
-  EDI,
-  NUM_INT_REGISTERS,
-};
-union reg {
-  int32_t i;
-  uint32_t u;
-};
-:(before "End Globals")
-reg Reg[NUM_INT_REGISTERS] = { {0} };
-uint32_t EIP = 1;  // preserve null pointer
-:(before "End Reset")
-bzero(Reg, sizeof(Reg));
-EIP = 1;  // preserve null pointer
-
-:(before "End Help Contents")
-cerr << "  registers\n";
-:(before "End Help Texts")
-put(Help, "registers",
-  "SubX currently supports eight 32-bit integer registers: R0 to R7.\n"
-  "R4 (ESP) contains the top of the stack.\n"
-  "\n"
-  "There's also a register for the address of the currently executing\n"
-  "instruction. It is modified by jumps.\n"
-  "\n"
-  "Various instructions modify one or more of three 1-bit 'flag' registers,\n"
-  "as a side-effect:\n"
-  "- the sign flag (SF): usually set if an arithmetic result is negative, or\n"
-  "  reset if not.\n"
-  "- the zero flag (ZF): usually set if a result is zero, or reset if not.\n"
-  "- the overflow flag (OF): usually set if an arithmetic result overflows.\n"
-  "The flag bits are read by conditional jumps.\n"
-  "\n"
-  "We don't support non-integer (floating-point) registers yet.\n"
-);
-
-:(before "End Globals")
-// the subset of x86 flag registers we care about
-bool SF = false;  // sign flag
-bool ZF = false;  // zero flag
-bool OF = false;  // overflow flag
-:(before "End Reset")
-SF = ZF = OF = false;
-
-//: how the flag registers are updated after each instruction
-
-:(before "End Includes")
-// Combine 'arg1' and 'arg2' with arithmetic operation 'op' and store the
-// result in 'arg1', then update flags.
-// beware: no side-effects in args
-#define BINARY_ARITHMETIC_OP(op, arg1, arg2) { \
-  /* arg1 and arg2 must be signed */ \
-  int64_t tmp = arg1 op arg2; \
-  arg1 = arg1 op arg2; \
-  trace(90, "run") << "storing 0x" << HEXWORD << arg1 << end(); \
-  SF = (arg1 < 0); \
-  ZF = (arg1 == 0); \
-  OF = (arg1 != tmp); \
-}
-
-// Combine 'arg1' and 'arg2' with bitwise operation 'op' and store the result
-// in 'arg1', then update flags.
-#define BINARY_BITWISE_OP(op, arg1, arg2) { \
-  /* arg1 and arg2 must be unsigned */ \
-  arg1 = arg1 op arg2; \
-  trace(90, "run") << "storing 0x" << HEXWORD << arg1 << end(); \
-  SF = (arg1 >> 31); \
-  ZF = (arg1 == 0); \
-  OF = false; \
-}
-
-//:: simulated RAM
-
-:(before "End Globals")
-vector<uint8_t> Mem;
-uint32_t Mem_offset = 0;
-uint32_t End_of_program = 0;
-:(before "End Reset")
-Mem.clear();
-Mem.resize(1024);
-Mem_offset = 0;
-End_of_program = 0;
-:(code)
-// These helpers depend on Mem being laid out contiguously (so you can't use a
-// map, etc.) and on the host also being little-endian.
-inline uint8_t read_mem_u8(uint32_t addr) {
-  return Mem.at(addr-Mem_offset);
-}
-inline int8_t read_mem_i8(uint32_t addr) {
-  return static_cast<int8_t>(Mem.at(addr-Mem_offset));
-}
-inline uint32_t read_mem_u32(uint32_t addr) {
-  return *reinterpret_cast<uint32_t*>(&Mem.at(addr-Mem_offset));
-}
-inline int32_t read_mem_i32(uint32_t addr) {
-  return *reinterpret_cast<int32_t*>(&Mem.at(addr-Mem_offset));
-}
-
-inline uint8_t* mem_addr_u8(uint32_t addr) {
-  return &Mem.at(addr-Mem_offset);
-}
-inline int8_t* mem_addr_i8(uint32_t addr) {
-  return reinterpret_cast<int8_t*>(&Mem.at(addr-Mem_offset));
-}
-inline uint32_t* mem_addr_u32(uint32_t addr) {
-  return reinterpret_cast<uint32_t*>(&Mem.at(addr-Mem_offset));
-}
-inline int32_t* mem_addr_i32(uint32_t addr) {
-  return reinterpret_cast<int32_t*>(&Mem.at(addr-Mem_offset));
-}
-
-inline void write_mem_u8(uint32_t addr, uint8_t val) {
-  Mem.at(addr-Mem_offset) = val;
-}
-inline void write_mem_i8(uint32_t addr, int8_t val) {
-  Mem.at(addr-Mem_offset) = static_cast<uint8_t>(val);
-}
-inline void write_mem_u32(uint32_t addr, uint32_t val) {
-  *reinterpret_cast<uint32_t*>(&Mem.at(addr-Mem_offset)) = val;
-}
-inline void write_mem_i32(uint32_t addr, int32_t val) {
-  *reinterpret_cast<int32_t*>(&Mem.at(addr-Mem_offset)) = val;
-}
-
-//:: core interpreter loop
-
-:(code)
-// skeleton of how x86 instructions are decoded
-void run_one_instruction() {
-  uint8_t op=0, op2=0, op3=0;
-  trace(90, "run") << "inst: 0x" << HEXWORD << EIP << end();
-//?   dump_registers();
-//?   cerr << "inst: 0x" << EIP << " => ";
-  op = next();
-//?   cerr << HEXBYTE << NUM(op) << '\n';
-  switch (op) {
-  case 0xf4:  // hlt
-    EIP = End_of_program;
-    break;
-  // End Single-Byte Opcodes
-  case 0x0f:
-    switch(op2 = next()) {
-    // End Two-Byte Opcodes Starting With 0f
-    default:
-      cerr << "unrecognized second opcode after 0f: " << HEXBYTE << NUM(op2) << '\n';
-      DUMP("");
-      exit(1);
-    }
-    break;
-  case 0xf3:
-    switch(op2 = next()) {
-    // End Two-Byte Opcodes Starting With f3
-    case 0x0f:
-      switch(op3 = next()) {
-      // End Three-Byte Opcodes Starting With f3 0f
-      default:
-        cerr << "unrecognized third opcode after f3 0f: " << HEXBYTE << NUM(op3) << '\n';
-        DUMP("");
-        exit(1);
-      }
-      break;
-    default:
-      cerr << "unrecognized second opcode after f3: " << HEXBYTE << NUM(op2) << '\n';
-      DUMP("");
-      exit(1);
-    }
-    break;
-  default:
-    cerr << "unrecognized opcode: " << HEXBYTE << NUM(op) << '\n';
-    DUMP("");
-    exit(1);
-  }
-}
-
-inline uint8_t next() {
-  return read_mem_u8(EIP++);
-}
-
-void dump_registers() {
-  for (int i = 0;  i < NUM_INT_REGISTERS;  ++i) {
-    if (i > 0) cerr << "; ";
-    cerr << "  " << i << ": " << std::hex << std::setw(8) << std::setfill('_') << Reg[i].u;
-  }
-  cerr << " -- SF: " << SF << "; ZF: " << ZF << "; OF: " << OF << '\n';
-}
-
-//: start tracking supported opcodes
-:(before "End Globals")
-map</*op*/string, string> name;
-map</*op*/string, string> name_0f;
-map</*op*/string, string> name_f3;
-map</*op*/string, string> name_f3_0f;
-:(before "End One-time Setup")
-init_op_names();
-:(code)
-void init_op_names() {
-  put(name, "f4", "halt");
-  // End Initialize Op Names(name)
-}
-
-:(before "End Help Special-cases(key)")
-if (key == "opcodes") {
-  cerr << "Opcodes currently supported by SubX:\n";
-  for (map<string, string>::iterator p = name.begin();  p != name.end();  ++p)
-    cerr << "  " << p->first << ": " << p->second << '\n';
-  for (map<string, string>::iterator p = name_0f.begin();  p != name_0f.end();  ++p)
-    cerr << "  0f " << p->first << ": " << p->second << '\n';
-  for (map<string, string>::iterator p = name_f3.begin();  p != name_f3.end();  ++p)
-    cerr << "  f3 " << p->first << ": " << p->second << '\n';
-  for (map<string, string>::iterator p = name_f3_0f.begin();  p != name_f3_0f.end();  ++p)
-    cerr << "  f3 0f " << p->first << ": " << p->second << '\n';
-  cerr << "Run `subx help instructions` for details on words like 'r32' and 'disp8'.\n";
-  return 0;
-}
-:(before "End Help Contents")
-cerr << "  opcodes\n";
-
-:(before "End Includes")
-#include <iomanip>
-#define HEXBYTE  std::hex << std::setw(2) << std::setfill('0')
-#define HEXWORD  std::hex << std::setw(8) << std::setfill('0')
-// ugly that iostream doesn't print uint8_t as an integer
-#define NUM(X) static_cast<int>(X)
-#include <stdint.h>
diff --git a/subx/028translate.cc b/subx/028translate.cc
new file mode 100644
index 00000000..f3e30126
--- /dev/null
+++ b/subx/028translate.cc
@@ -0,0 +1,159 @@
+//: The bedrock level 1 of abstraction is now done, and we're going to start
+//: building levels above it that make programming in x86 machine code a
+//: little more ergonomic.
+//:
+//: All levels will be "pass through by default". Whatever they don't
+//: understand they will silently pass through to lower levels.
+//:
+//: Since raw hex bytes of machine code are always possible to inject, SubX is
+//: not a language, and we aren't building a compiler. This is something
+//: deliberately leakier. Levels are more for improving auditing, checks and
+//: error messages rather than for hiding low-level details.
+
+//: Translator workflow: read 'source' file. Run a series of transforms on it,
+//: each passing through what it doesn't understand. The final program should
+//: be just machine code, suitable to write to an ELF binary.
+//:
+//: Higher levels usually transform code on the basis of metadata.
+
+:(before "End Main")
+if (is_equal(argv[1], "translate")) {
+  START_TRACING_UNTIL_END_OF_SCOPE;
+  assert(argc > 3);
+  program p;
+  ifstream fin(argv[2]);
+  if (!fin) {
+    cerr << "could not open " << argv[2] << '\n';
+    return 1;
+  }
+  parse(fin, p);
+  if (trace_contains_errors()) return 1;
+  transform(p);
+  if (trace_contains_errors()) return 1;
+  save_elf(p, argv[3]);
+  if (trace_contains_errors()) unlink(argv[3]);
+  return 0;
+}
+
+:(code)
+// write out a program to a bare-bones ELF file
+void save_elf(const program& p, const char* filename) {
+  ofstream out(filename, ios::binary);
+  write_elf_header(out, p);
+  for (size_t i = 0;  i < p.segments.size();  ++i)
+    write_segment(p.segments.at(i), out);
+  out.close();
+}
+
+void write_elf_header(ostream& out, const program& p) {
+  char c = '\0';
+#define O(X)  c = (X); out.write(&c, sizeof(c))
+// host is required to be little-endian
+#define emit(X)  out.write(reinterpret_cast<const char*>(&X), sizeof(X))
+  //// ehdr
+  // e_ident
+  O(0x7f); O(/*E*/0x45); O(/*L*/0x4c); O(/*F*/0x46);
+    O(0x1);  // 32-bit format
+    O(0x1);  // little-endian
+    O(0x1); O(0x0);
+  for (size_t i = 0;  i < 8;  ++i) { O(0x0); }
+  // e_type
+  O(0x02); O(0x00);
+  // e_machine
+  O(0x03); O(0x00);
+  // e_version
+  O(0x01); O(0x00); O(0x00); O(0x00);
+  // e_entry
+  int e_entry = p.segments.at(0).start;  // convention
+  emit(e_entry);
+  // e_phoff -- immediately after ELF header
+  int e_phoff = 0x34;
+  emit(e_phoff);
+  // e_shoff; unused
+  int dummy32 = 0;
+  emit(dummy32);
+  // e_flags; unused
+  emit(dummy32);
+  // e_ehsize
+  uint16_t e_ehsize = 0x34;
+  emit(e_ehsize);
+  // e_phentsize
+  uint16_t e_phentsize = 0x20;
+  emit(e_phentsize);
+  // e_phnum
+  uint16_t e_phnum = SIZE(p.segments);
+  emit(e_phnum);
+  // e_shentsize
+  uint16_t dummy16 = 0x0;
+  emit(dummy16);
+  // e_shnum
+  emit(dummy16);
+  // e_shstrndx
+  emit(dummy16);
+
+  uint32_t p_offset = /*size of ehdr*/0x34 + SIZE(p.segments)*0x20/*size of each phdr*/;
+  for (int i = 0;  i < SIZE(p.segments);  ++i) {
+    //// phdr
+    // p_type
+    uint32_t p_type = 0x1;
+    emit(p_type);
+    // p_offset
+    emit(p_offset);
+    // p_vaddr
+    emit(p.segments.at(i).start);
+    // p_paddr
+    emit(p.segments.at(i).start);
+    // p_filesz
+    uint32_t size = size_of(p.segments.at(i));
+    assert(size < SEGMENT_SIZE);
+    emit(size);
+    // p_memsz
+    emit(size);
+    // p_flags
+    uint32_t p_flags = (i == 0) ? /*r-x*/0x5 : /*rw-*/0x6;  // convention: only first segment is code
+    emit(p_flags);
+
+    // p_align
+    // "As the system creates or augments a process image, it logically copies
+    // a file's segment to a virtual memory segment.  When—and if— the system
+    // physically reads the file depends on the program's execution behavior,
+    // system load, and so on.  A process does not require a physical page
+    // unless it references the logical page during execution, and processes
+    // commonly leave many pages unreferenced. Therefore delaying physical
+    // reads frequently obviates them, improving system performance. To obtain
+    // this efficiency in practice, executable and shared object files must
+    // have segment images whose file offsets and virtual addresses are
+    // congruent, modulo the page size." -- http://refspecs.linuxbase.org/elf/elf.pdf (page 95)
+    uint32_t p_align = 0x1000;  // default page size on linux
+    emit(p_align);
+    if (p_offset % p_align != p.segments.at(i).start % p_align) {
+      raise << "segment starting at 0x" << HEXWORD << p.segments.at(i).start << " is improperly aligned; alignment for p_offset " << p_offset << " should be " << (p_offset % p_align) << " but is " << (p.segments.at(i).start % p_align) << '\n' << end();
+      return;
+    }
+
+    // prepare for next segment
+    p_offset += size;
+  }
+#undef O
+#undef emit
+}
+
+void write_segment(const segment& s, ostream& out) {
+  for (int i = 0;  i < SIZE(s.lines);  ++i) {
+    const vector<word>& w = s.lines.at(i).words;
+    for (int j = 0;  j < SIZE(w);  ++j) {
+      uint8_t x = hex_byte(w.at(j).data);  // we're done with metadata by this point
+      out.write(reinterpret_cast<const char*>(&x), /*sizeof(byte)*/1);
+    }
+  }
+}
+
+uint32_t size_of(const segment& s) {
+  uint32_t sum = 0;
+  for (int i = 0;  i < SIZE(s.lines);  ++i)
+    sum += SIZE(s.lines.at(i).words);
+  return sum;
+}
+
+:(before "End Includes")
+using std::ios;
diff --git a/subx/029transforms.cc b/subx/029transforms.cc
new file mode 100644
index 00000000..9546ea91
--- /dev/null
+++ b/subx/029transforms.cc
@@ -0,0 +1,65 @@
+//: Ordering transforms is a well-known hard problem when building compilers.
+//: In our case we also have the additional notion of layers. The ordering of
+//: layers can have nothing in common with the ordering of transforms when
+//: SubX is tangled and run. This can be confusing for readers, particularly
+//: if later layers start inserting transforms at arbitrary points between
+//: transforms introduced earlier. Over time adding transforms can get harder
+//: and harder, having to meet the constraints of everything that's come
+//: before. It's worth thinking about organization up-front so the ordering is
+//: easy to hold in our heads, and it's obvious where to add a new transform.
+//: Some constraints:
+//:
+//:   1. Layers force us to build SubX bottom-up; since we want to be able to
+//:   build and run SubX after stopping loading at any layer, the overall
+//:   organization has to be to introduce primitives before we start using
+//:   them.
+//:
+//:   2. Transforms usually need to be run top-down, converting high-level
+//:   representations to low-level ones so that low-level layers can be
+//:   oblivious to them.
+//:
+//:   3. When running we'd often like new representations to be checked before
+//:   they are transformed away. The whole reason for new representations is
+//:   often to add new kinds of automatic checking for our machine code
+//:   programs.
+//:
+//: Putting these constraints together, we'll use the following broad
+//: organization:
+//:
+//:   a) We'll divide up our transforms into "levels", each level consisting
+//:   of multiple transforms, and dealing in some new set of representational
+//:   ideas. Levels will be added in reverse order to the one their transforms
+//:   will be run in.
+//:
+//:     To run all transforms:
+//:       Load transforms for level n
+//:       Load transforms for level n-1
+//:       ...
+//:       Load transforms for level 2
+//:       Run code at level 1
+//:
+//:   b) *Within* a level we'll usually introduce transforms in the order
+//:   they're run in.
+//:
+//:     To run transforms for level n:
+//:       Perform transform of layer l
+//:       Perform transform of layer l+1
+//:       ...
+//:
+//:   c) Within a level it's often most natural to introduce a new
+//:   representation by showing how it's transformed to the level below. To
+//:   make such exceptions more obvious checks usually won't be first-class
+//:   transforms; instead code that keeps the program unmodified will run
+//:   within transforms before they mutate the program.
+//:
+//:     Level l transforms programs
+//:     Level l+1 inserts checks to run *before* the transform of level l runs
+//:
+//: This may all seem abstract, but will hopefully make sense over time. The
+//: goals are basically to always have a working program after any layer, to
+//: have the order of layers make narrative sense, and to order transforms
+//: correctly at runtime.
+
+:(before "End One-time Setup")
+// Begin Transforms
+// End Transforms
diff --git a/subx/029translate.cc b/subx/029translate.cc
deleted file mode 100644
index 1d5e940e..00000000
--- a/subx/029translate.cc
+++ /dev/null
@@ -1,224 +0,0 @@
-//: The bedrock level 1 of abstraction is now done, and we're going to start
-//: building levels above it that make programming in x86 machine code a
-//: little more ergonomic.
-//:
-//: All levels will be "pass through by default". Whatever they don't
-//: understand they will silently pass through to lower levels.
-//:
-//: Since raw hex bytes of machine code are always possible to inject, SubX is
-//: not a language, and we aren't building a compiler. This is something
-//: deliberately leakier. Levels are more for improving auditing, checks and
-//: error messages rather than for hiding low-level details.
-
-//: Translator workflow: read 'source' file. Run a series of transforms on it,
-//: each passing through what it doesn't understand. The final program should
-//: be just machine code, suitable to write to an ELF binary.
-//:
-//: Higher levels usually transform code on the basis of metadata.
-
-:(before "End Main")
-if (is_equal(argv[1], "translate")) {
-  START_TRACING_UNTIL_END_OF_SCOPE;
-  assert(argc > 3);
-  program p;
-  ifstream fin(argv[2]);
-  if (!fin) {
-    cerr << "could not open " << argv[2] << '\n';
-    return 1;
-  }
-  parse(fin, p);
-  if (trace_contains_errors()) return 1;
-  transform(p);
-  if (trace_contains_errors()) return 1;
-  save_elf(p, argv[3]);
-  if (trace_contains_errors()) unlink(argv[3]);
-  return 0;
-}
-
-//: Ordering transforms is a well-known hard problem when building compilers.
-//: In our case we also have the additional notion of layers. The ordering of
-//: layers can have nothing in common with the ordering of transforms when
-//: SubX is tangled and run. This can be confusing for readers, particularly
-//: if later layers start inserting transforms at arbitrary points between
-//: transforms introduced earlier. Over time adding transforms can get harder
-//: and harder, having to meet the constraints of everything that's come
-//: before. It's worth thinking about organization up-front so the ordering is
-//: easy to hold in our heads, and it's obvious where to add a new transform.
-//: Some constraints:
-//:
-//:   1. Layers force us to build SubX bottom-up; since we want to be able to
-//:   build and run SubX after stopping loading at any layer, the overall
-//:   organization has to be to introduce primitives before we start using
-//:   them.
-//:
-//:   2. Transforms usually need to be run top-down, converting high-level
-//:   representations to low-level ones so that low-level layers can be
-//:   oblivious to them.
-//:
-//:   3. When running we'd often like new representations to be checked before
-//:   they are transformed away. The whole reason for new representations is
-//:   often to add new kinds of automatic checking for our machine code
-//:   programs.
-//:
-//: Putting these constraints together, we'll use the following broad
-//: organization:
-//:
-//:   a) We'll divide up our transforms into "levels", each level consisting
-//:   of multiple transforms, and dealing in some new set of representational
-//:   ideas. Levels will be added in reverse order to the one their transforms
-//:   will be run in.
-//:
-//:     To run all transforms:
-//:       Load transforms for level n
-//:       Load transforms for level n-1
-//:       ...
-//:       Load transforms for level 2
-//:       Run code at level 1
-//:
-//:   b) *Within* a level we'll usually introduce transforms in the order
-//:   they're run in.
-//:
-//:     To run transforms for level n:
-//:       Perform transform of layer l
-//:       Perform transform of layer l+1
-//:       ...
-//:
-//:   c) Within a level it's often most natural to introduce a new
-//:   representation by showing how it's transformed to the level below. To
-//:   make such exceptions more obvious checks usually won't be first-class
-//:   transforms; instead code that keeps the program unmodified will run
-//:   within transforms before they mutate the program.
-//:
-//:     Level l transforms programs
-//:     Level l+1 inserts checks to run *before* the transform of level l runs
-//:
-//: This may all seem abstract, but will hopefully make sense over time. The
-//: goals are basically to always have a working program after any layer, to
-//: have the order of layers make narrative sense, and to order transforms
-//: correctly at runtime.
-:(before "End One-time Setup")
-// Begin Transforms
-// End Transforms
-
-:(code)
-// write out a program to a bare-bones ELF file
-void save_elf(const program& p, const char* filename) {
-  ofstream out(filename, ios::binary);
-  write_elf_header(out, p);
-  for (size_t i = 0;  i < p.segments.size();  ++i)
-    write_segment(p.segments.at(i), out);
-  out.close();
-}
-
-void write_elf_header(ostream& out, const program& p) {
-  char c = '\0';
-#define O(X)  c = (X); out.write(&c, sizeof(c))
-// host is required to be little-endian
-#define emit(X)  out.write(reinterpret_cast<const char*>(&X), sizeof(X))
-  //// ehdr
-  // e_ident
-  O(0x7f); O(/*E*/0x45); O(/*L*/0x4c); O(/*F*/0x46);
-    O(0x1);  // 32-bit format
-    O(0x1);  // little-endian
-    O(0x1); O(0x0);
-  for (size_t i = 0;  i < 8;  ++i) { O(0x0); }
-  // e_type
-  O(0x02); O(0x00);
-  // e_machine
-  O(0x03); O(0x00);
-  // e_version
-  O(0x01); O(0x00); O(0x00); O(0x00);
-  // e_entry
-  int e_entry = p.segments.at(0).start;  // convention
-  emit(e_entry);
-  // e_phoff -- immediately after ELF header
-  int e_phoff = 0x34;
-  emit(e_phoff);
-  // e_shoff; unused
-  int dummy32 = 0;
-  emit(dummy32);
-  // e_flags; unused
-  emit(dummy32);
-  // e_ehsize
-  uint16_t e_ehsize = 0x34;
-  emit(e_ehsize);
-  // e_phentsize
-  uint16_t e_phentsize = 0x20;
-  emit(e_phentsize);
-  // e_phnum
-  uint16_t e_phnum = SIZE(p.segments);
-  emit(e_phnum);
-  // e_shentsize
-  uint16_t dummy16 = 0x0;
-  emit(dummy16);
-  // e_shnum
-  emit(dummy16);
-  // e_shstrndx
-  emit(dummy16);
-
-  uint32_t p_offset = /*size of ehdr*/0x34 + SIZE(p.segments)*0x20/*size of each phdr*/;
-  for (int i = 0;  i < SIZE(p.segments);  ++i) {
-    //// phdr
-    // p_type
-    uint32_t p_type = 0x1;
-    emit(p_type);
-    // p_offset
-    emit(p_offset);
-    // p_vaddr
-    emit(p.segments.at(i).start);
-    // p_paddr
-    emit(p.segments.at(i).start);
-    // p_filesz
-    uint32_t size = size_of(p.segments.at(i));
-    assert(size < SEGMENT_SIZE);
-    emit(size);
-    // p_memsz
-    emit(size);
-    // p_flags
-    uint32_t p_flags = (i == 0) ? /*r-x*/0x5 : /*rw-*/0x6;  // convention: only first segment is code
-    emit(p_flags);
-
-    // p_align
-    // "As the system creates or augments a process image, it logically copies
-    // a file's segment to a virtual memory segment.  When—and if— the system
-    // physically reads the file depends on the program's execution behavior,
-    // system load, and so on.  A process does not require a physical page
-    // unless it references the logical page during execution, and processes
-    // commonly leave many pages unreferenced. Therefore delaying physical
-    // reads frequently obviates them, improving system performance. To obtain
-    // this efficiency in practice, executable and shared object files must
-    // have segment images whose file offsets and virtual addresses are
-    // congruent, modulo the page size." -- http://refspecs.linuxbase.org/elf/elf.pdf (page 95)
-    uint32_t p_align = 0x1000;  // default page size on linux
-    emit(p_align);
-    if (p_offset % p_align != p.segments.at(i).start % p_align) {
-      raise << "segment starting at 0x" << HEXWORD << p.segments.at(i).start << " is improperly aligned; alignment for p_offset " << p_offset << " should be " << (p_offset % p_align) << " but is " << (p.segments.at(i).start % p_align) << '\n' << end();
-      return;
-    }
-
-    // prepare for next segment
-    p_offset += size;
-  }
-#undef O
-#undef emit
-}
-
-void write_segment(const segment& s, ostream& out) {
-  for (int i = 0;  i < SIZE(s.lines);  ++i) {
-    const vector<word>& w = s.lines.at(i).words;
-    for (int j = 0;  j < SIZE(w);  ++j) {
-      uint8_t x = hex_byte(w.at(j).data);  // we're done with metadata by this point
-      out.write(reinterpret_cast<const char*>(&x), /*sizeof(byte)*/1);
-    }
-  }
-}
-
-uint32_t size_of(const segment& s) {
-  uint32_t sum = 0;
-  for (int i = 0;  i < SIZE(s.lines);  ++i)
-    sum += SIZE(s.lines.at(i).words);
-  return sum;
-}
-
-:(before "End Includes")
-using std::ios;
-- 
cgit 1.4.1-2-gfad0