10 files changed, 920 insertions, 121 deletions
diff --git a/subx/022check_instruction.cc b/subx/022check_instruction.cc
new file mode 100644
index 00000000..afa6329f
--- /dev/null
+++ b/subx/022check_instruction.cc
@@ -0,0 +1,613 @@
+//: Beginning of "level 2": tagging bytes with metadata around what field of
+//: an x86 instruction they're for.
+//:
+//: The x86 instruction set is variable-length, and how a byte is interpreted
+//: affects later instruction boundaries. A lot of the pain in programming machine code
+//: stems from computer and programmer going out of sync on what a byte
+//: means. The miscommunication is usually not immediately caught, and
+//: metastasizes at runtime into kilobytes of misinterpreted instructions.
+//: Tagging bytes with what the programmer expects them to be interpreted as
+//: helps the computer catch miscommunication immediately.
+//:
+//: This is one way SubX is going to be different from a 'language': we
+//: typically think of languages as less verbose than machine code. Here we're
+//: making machine code *more* verbose.
+//:
+//: ---
+//:
+//: While we're here, we'll also improve a couple of other things:
+//:
+//: a) Machine code often packs logically separate operands into bitfields of
+//: a single byte. We'll start writing out each operand separately, and the
+//: translator will construct the right bytes out of operands.
+//:
+//: SubX now gets still more verbose. What used to be a single byte, say 'c3',
+//: can now expand to '3/mod 0/subop 3/rm32'.
+//:
+//: b) Since each operand is tagged, we can loosen ordering restrictions and
+//: allow writing out the operands in any order, like keyword arguments.
+//:
+//: c) Operand values can be expressed in either decimal or hex (when prefixed
+//: with '0x'. Raw 2-character hex bytes without the '0x' are only valid when
+//: tagged without any operand metadata. (This may be a bad idea.)
+//:
+//: Coda: the actual opcodes (1-3 bytes) will continue to be at the start of
+//: each line, in hex, and untagged. The x86 instruction set is a mess, and
+//: instructions don't admit good names.
+
+:(before "End Help Texts")
+put(Help, "instructions",
+  "Each x86 instruction consists of an instruction or opcode and some number of operands.\n"
+  "Each operand has a type. An instruction won't have more than one operand of any type.\n"
+  "Each instruction has some set of allowed operand types. It'll reject others.\n"
+  "The complete list of operand types: mod, subop, r32 (register), rm32 (register or memory), scale, index, base, disp8, disp16, disp32, imm8, imm32.\n"
+  "Each of these has its own help page. Try reading 'subx help mod' next.\n"
+);
+:(before "End Help Contents")
+cerr << "  instructions\n";
+
+//:: Check for 'syntax errors'; missing or unexpected operands.
+
+:(scenario check_missing_imm8_operand)
+% Hide_errors = true;
+== 0x1
+# opcode        ModR/M                    SIB                   displacement    immediate
+# instruction   mod, reg, Reg/Mem bits    scale, index, base
+# 1-3 bytes     0/1 byte                  0/1 byte              0/1/2/4 bytes   0/1/2/4 bytes
+  cd                                                                                          # int ??
++error: 'cd' (software interrupt): missing imm8 operand
+
+:(before "End One-time Setup")
+Transform.push_back(check_operands);
+
+:(code)
+void check_operands(/*const*/ program& p) {
+  if (p.segments.empty()) return;
+  const segment& seg = p.segments.at(0);
+  for (int i = 0;  i < SIZE(seg.lines);  ++i) {
+    check_operands(seg.lines.at(i));
+    if (trace_contains_errors()) return;  // stop at the first mal-formed instruction
+  }
+}
+
+void check_operands(const line& inst) {
+  uint8_t op = hex_byte(inst.words.at(0).data);
+  if (trace_contains_errors()) return;
+  if (op == 0x0f) {
+    check_operands_0f(inst);
+    return;
+  }
+  if (op == 0xf3) {
+    check_operands_f3(inst);
+    return;
+  }
+  if (!contains_key(name, op)) {
+    raise << "unknown opcode '" << HEXBYTE << NUM(op) << "'\n" << end();
+    return;
+  }
+  check_operands(op, inst);
+}
+
+//: To check the operands for an opcode, we'll track the permitted operands
+//: for each supported opcode in a bitvector. That way we can often compute the
+//: bitvector for each instruction's operands and compare it with the expected.
+
+:(before "End Types")
+enum operand_type {
+  // start from the least significant bit
+  MODRM,  // more complex, may also involve disp8 or disp32
+  SUBOP,
+  DISP8,
+  DISP16,
+  DISP32,
+  IMM8,
+  IMM32,
+  NUM_OPERAND_TYPES
+};
+:(before "End Globals")
+vector<string> Operand_type_name;
+map<string, operand_type> Operand_type;
+:(before "End One-time Setup")
+init_op_types();
+:(code)
+void init_op_types() {
+  assert(NUM_OPERAND_TYPES <= /*bits in a uint8_t*/8);
+  Operand_type_name.resize(NUM_OPERAND_TYPES);
+  #define DEF(type) Operand_type_name.at(type) = tolower(#type), put(Operand_type, tolower(#type), type);
+  DEF(MODRM);
+  DEF(SUBOP);
+  DEF(DISP8);
+  DEF(DISP16);
+  DEF(DISP32);
+  DEF(IMM8);
+  DEF(IMM32);
+  #undef DEF
+}
+
+:(before "End Globals")
+map</*op*/uint8_t, /*bitvector*/uint8_t> Permitted_operands;
+const uint8_t INVALID_OPERANDS = 0xff;  // no instruction uses all the operands
+:(before "End One-time Setup")
+init_permitted_operands();
+:(code)
+void init_permitted_operands() {
+  //// Class A: just op, no operands
+  // halt
+  put(Permitted_operands, 0xf4, 0x00);
+  // push
+  put(Permitted_operands, 0x50, 0x00);
+  put(Permitted_operands, 0x51, 0x00);
+  put(Permitted_operands, 0x52, 0x00);
+  put(Permitted_operands, 0x53, 0x00);
+  put(Permitted_operands, 0x54, 0x00);
+  put(Permitted_operands, 0x55, 0x00);
+  put(Permitted_operands, 0x56, 0x00);
+  put(Permitted_operands, 0x57, 0x00);
+  // pop
+  put(Permitted_operands, 0x58, 0x00);
+  put(Permitted_operands, 0x59, 0x00);
+  put(Permitted_operands, 0x5a, 0x00);
+  put(Permitted_operands, 0x5b, 0x00);
+  put(Permitted_operands, 0x5c, 0x00);
+  put(Permitted_operands, 0x5d, 0x00);
+  put(Permitted_operands, 0x5e, 0x00);
+  put(Permitted_operands, 0x5f, 0x00);
+  // return
+  put(Permitted_operands, 0xc3, 0x00);
+
+  //// Class B: just op and disp8
+  //  imm32 imm8  disp32 |disp16  disp8 subop modrm
+  //  0     0     0      |0       1     0     0
+
+  // jump
+  put(Permitted_operands, 0xeb, 0x04);
+  put(Permitted_operands, 0x74, 0x04);
+  put(Permitted_operands, 0x75, 0x04);
+  put(Permitted_operands, 0x7c, 0x04);
+  put(Permitted_operands, 0x7d, 0x04);
+  put(Permitted_operands, 0x7e, 0x04);
+  put(Permitted_operands, 0x7f, 0x04);
+
+  //// Class C: just op and disp16
+  //  imm32 imm8  disp32 |disp16  disp8 subop modrm
+  //  0     0     0      |1       0     0     0
+  put(Permitted_operands, 0xe8, 0x08);  // jump
+
+  //// Class D: just op and disp32
+  //  imm32 imm8  disp32 |disp16  disp8 subop modrm
+  //  0     0     1      |0       0     0     0
+  put(Permitted_operands, 0xe9, 0x10);  // call
+
+  //// Class E: just op and imm8
+  //  imm32 imm8  disp32 |disp16  disp8 subop modrm
+  //  0     1     0      |0       0     0     0
+  put(Permitted_operands, 0xcd, 0x20);  // software interrupt
+
+  //// Class F: just op and imm32
+  //  imm32 imm8  disp32 |disp16  disp8 subop modrm
+  //  1     0     0      |0       0     0     0
+  put(Permitted_operands, 0x05, 0x40);  // add
+  put(Permitted_operands, 0x2d, 0x40);  // subtract
+  put(Permitted_operands, 0x25, 0x40);  // and
+  put(Permitted_operands, 0x0d, 0x40);  // or
+  put(Permitted_operands, 0x35, 0x40);  // xor
+  put(Permitted_operands, 0x3d, 0x40);  // compare
+  put(Permitted_operands, 0x68, 0x40);  // push
+  // copy
+  put(Permitted_operands, 0xb8, 0x40);
+  put(Permitted_operands, 0xb9, 0x40);
+  put(Permitted_operands, 0xba, 0x40);
+  put(Permitted_operands, 0xbb, 0x40);
+  put(Permitted_operands, 0xbc, 0x40);
+  put(Permitted_operands, 0xbd, 0x40);
+  put(Permitted_operands, 0xbe, 0x40);
+  put(Permitted_operands, 0xbf, 0x40);
+
+  //// Class M: using ModR/M byte
+  //  imm32 imm8  disp32 |disp16  disp8 subop modrm
+  //  0     0     0      |0       0     0     1
+
+  // add
+  put(Permitted_operands, 0x01, 0x01);
+  put(Permitted_operands, 0x03, 0x01);
+  // subtract
+  put(Permitted_operands, 0x29, 0x01);
+  put(Permitted_operands, 0x2b, 0x01);
+  // and
+  put(Permitted_operands, 0x21, 0x01);
+  put(Permitted_operands, 0x23, 0x01);
+  // or
+  put(Permitted_operands, 0x09, 0x01);
+  put(Permitted_operands, 0x0b, 0x01);
+  // complement
+  put(Permitted_operands, 0xf7, 0x01);
+  // xor
+  put(Permitted_operands, 0x31, 0x01);
+  put(Permitted_operands, 0x33, 0x01);
+  // compare
+  put(Permitted_operands, 0x39, 0x01);
+  put(Permitted_operands, 0x3b, 0x01);
+  // copy
+  put(Permitted_operands, 0x89, 0x01);
+  put(Permitted_operands, 0x8b, 0x01);
+  // swap
+  put(Permitted_operands, 0x87, 0x01);
+  // pop
+  put(Permitted_operands, 0x8f, 0x01);
+
+  //// Class O: op, ModR/M and subop (not r32)
+  //  imm32 imm8  disp32 |disp16  disp8 subop modrm
+  //  0     0     0      |0       0     1     1
+  put(Permitted_operands, 0xff, 0x03);  // jump/push/call
+
+  //// Class N: op, ModR/M and imm32
+  //  imm32 imm8  disp32 |disp16  disp8 subop modrm
+  //  1     0     0      |0       0     0     1
+  put(Permitted_operands, 0xc7, 0x41);  // copy
+
+  //// Class P: op, ModR/M, subop (not r32) and imm32
+  //  imm32 imm8  disp32 |disp16  disp8 subop modrm
+  //  1     0     0      |0       0     1     1
+  put(Permitted_operands, 0x81, 0x43);  // combine
+}
+
+:(before "End Includes")
+#define HAS(bitvector, bit)  ((bitvector) & (1 << (bit)))
+#define SET(bitvector, bit)  ((bitvector) | (1 << (bit)))
+#define CLEAR(bitvector, bit)  ((bitvector) & (~(1 << (bit))))
+
+:(code)
+void check_operands(uint8_t op, const line& inst) {
+  uint8_t expected_bitvector = get(Permitted_operands, op);
+  if (HAS(expected_bitvector, MODRM))
+    check_operands_modrm(inst, op);
+  compare_bitvector(op, inst, CLEAR(expected_bitvector, MODRM));
+}
+
+//: Many instructions can be checked just by comparing bitvectors.
+
+void compare_bitvector(uint8_t op, const line& inst, uint8_t expected) {
+  if (all_raw_hex_bytes(inst) && has_operands(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
+  uint8_t bitvector = compute_operand_bitvector(inst);
+  if (trace_contains_errors()) return;  // duplicate operand type
+  if (bitvector == expected) return;  // all good with this instruction
+  for (int i = 0;  i < NUM_OPERAND_TYPES;  ++i, bitvector >>= 1, expected >>= 1) {
+//?     cerr << "comparing " << HEXBYTE << NUM(bitvector) << " with " << NUM(expected) << '\n';
+    if ((bitvector & 0x1) == (expected & 0x1)) continue;  // all good with this operand
+    const string& optype = Operand_type_name.at(i);
+    if ((bitvector & 0x1) > (expected & 0x1))
+      raise << "'" << to_string(inst) << "' (" << get(name, op) << "): unexpected " << optype << " operand\n" << end();
+    else
+      raise << "'" << to_string(inst) << "' (" << get(name, op) << "): missing " << optype << " operand\n" << end();
+    // continue giving all errors for a single instruction
+  }
+  // ignore settings in any unused bits
+}
+
+uint32_t compute_operand_bitvector(const line& inst) {
+  uint32_t bitvector = 0;
+  for (int i = /*skip op*/1;  i < SIZE(inst.words);  ++i) {
+    bitvector = bitvector | bitvector_for_operand(inst.words.at(i));
+    if (trace_contains_errors()) return INVALID_OPERANDS;  // duplicate operand type
+  }
+  return bitvector;
+}
+
+bool has_operands(const line& inst) {
+  return SIZE(inst.words) > first_operand(inst);
+}
+
+int first_operand(const line& inst) {
+  if (inst.words.at(0).data == "0f") return 2;
+  if (inst.words.at(0).data == "f3") {
+    if (inst.words.at(1).data == "0f")
+      return 3;
+    else
+      return 2;
+  }
+  return 1;
+}
+
+bool all_raw_hex_bytes(const line& inst) {
+  for (int i = 0;  i < SIZE(inst.words);  ++i) {
+    const word& curr = inst.words.at(i);
+    if (contains_any_operand_metadata(curr))
+      return false;
+    if (SIZE(curr.data) != 2)
+      return false;
+    if (curr.data.find_first_not_of("0123456789abcdefABCDEF") != string::npos)
+      return false;
+  }
+  return true;
+}
+
+bool contains_any_operand_metadata(const word& word) {
+  for (int i = 0;  i < SIZE(word.metadata);  ++i)
+    if (Instruction_operands.find(word.metadata.at(i)) != Instruction_operands.end())
+      return true;
+  return false;
+}
+
+// Scan the metadata of 'w' and return the bit corresponding to any operand type.
+// Also raise an error if metadata contains multiple operand types.
+uint32_t bitvector_for_operand(const word& w) {
+  uint32_t bv = 0;
+  bool found = false;
+  for (int i = 0;  i < SIZE(w.metadata);  ++i) {
+    const string& curr = w.metadata.at(i);
+    if (!contains_key(Operand_type, curr)) continue;  // ignore unrecognized metadata
+    if (found) {
+      raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
+      return INVALID_OPERANDS;
+    }
+    bv = (1 << get(Operand_type, curr));
+    found = true;
+  }
+  return bv;
+}
+
+:(scenario conflicting_operand_type)
+% Hide_errors = true;
+== 0x1
+cd/software-interrupt 80/imm8/imm32
++error: '80/imm8/imm32' has conflicting operand types; it should have only one
+
+//: Instructions computing effective addresses have more complex rules, so
+//: we'll hard-code a common set of instruction-decoding rules.
+
+:(scenario check_missing_mod_operand)
+% Hide_errors = true;
+== 0x1
+81 0/add/subop       3/rm32/ebx 1/imm32
++error: '81 0/add/subop 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing mod operand
+
+:(before "End Globals")
+set<string> Instruction_operands;
+:(before "End One-time Setup")
+Instruction_operands.insert("subop");
+Instruction_operands.insert("mod");
+Instruction_operands.insert("rm32");
+Instruction_operands.insert("base");
+Instruction_operands.insert("index");
+Instruction_operands.insert("scale");
+Instruction_operands.insert("r32");
+Instruction_operands.insert("disp8");
+Instruction_operands.insert("disp16");
+Instruction_operands.insert("disp32");
+Instruction_operands.insert("imm8");
+Instruction_operands.insert("imm32");
+
+:(code)
+void check_operands_modrm(const line& inst, uint8_t op) {
+  if (all_raw_hex_bytes(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
+  check_metadata_present(inst, "mod", op);
+  check_metadata_present(inst, "rm32", op);
+  // no check for r32; some instructions don't use it; just assume it's 0 if missing
+  if (op == 0x81 || op == 0x8f || op == 0xff) {  // keep sync'd with 'help subop'
+    check_metadata_present(inst, "subop", op);
+    check_metadata_absent(inst, "r32", op, "should be replaced by subop");
+  }
+  if (trace_contains_errors()) return;
+  if (metadata(inst, "rm32").data != "4") return;
+  // SIB byte checks
+  uint8_t mod = hex_byte(metadata(inst, "mod").data);
+  if (mod != /*direct*/3) {
+    check_metadata_present(inst, "base", op);
+    check_metadata_present(inst, "index", op);  // otherwise why go to SIB?
+  }
+  else {
+    check_metadata_absent(inst, "base", op, "direct mode");
+    check_metadata_absent(inst, "index", op, "direct mode");
+  }
+  // no check for scale; 0 (2**0 = 1) by default
+}
+
+void check_metadata_present(const line& inst, const string& type, uint8_t op) {
+  if (!has_metadata(inst, type))
+    raise << "'" << to_string(inst) << "' (" << get(name, op) << "): missing " << type << " operand\n" << end();
+}
+
+void check_metadata_absent(const line& inst, const string& type, uint8_t op, const string& msg) {
+  if (has_metadata(inst, type))
+    raise << "'" << to_string(inst) << "' (" << get(name, op) << "): unexpected " << type << " operand (" << msg << ")\n" << end();
+}
+
+bool has_metadata(const line& inst, const string& m) {
+  bool result = false;
+  for (int i = 0;  i < SIZE(inst.words);  ++i) {
+    if (!has_metadata(inst.words.at(i), m)) continue;
+    if (result) {
+      raise << "'" << to_string(inst) << "' has conflicting " << m << " operands\n" << end();
+      return false;
+    }
+    result = true;
+  }
+  return result;
+}
+
+bool has_metadata(const word& w, const string& m) {
+  bool result = false;
+  bool metadata_found = false;
+  for (int i = 0;  i < SIZE(w.metadata);  ++i) {
+    const string& curr = w.metadata.at(i);
+    if (!contains_key(Instruction_operands, curr)) continue;  // ignore unrecognized metadata
+    if (metadata_found) {
+      raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
+      return false;
+    }
+    metadata_found = true;
+    result = (curr == m);
+  }
+  return result;
+}
+
+word metadata(const line& inst, const string& m) {
+  for (int i = 0;  i < SIZE(inst.words);  ++i)
+    if (has_metadata(inst.words.at(i), m))
+      return inst.words.at(i);
+  assert(false);
+}
+
+:(scenario conflicting_operands_in_modrm_instruction)
+% Hide_errors = true;
+== 0x1
+01/add 0/mod 3/mod
++error: '01/add 0/mod 3/mod' has conflicting mod operands
+
+:(scenario conflicting_operand_type_modrm)
+% Hide_errors = true;
+== 0x1
+01/add 0/mod 3/rm32/r32
++error: '3/rm32/r32' has conflicting operand types; it should have only one
+
+:(scenario check_missing_rm32_operand)
+% Hide_errors = true;
+== 0x1
+81 0/add/subop 0/mod            1/imm32
++error: '81 0/add/subop 0/mod 1/imm32' (combine rm32 with imm32 based on subop): missing rm32 operand
+
+:(scenario check_missing_subop_operand)
+% Hide_errors = true;
+== 0x1
+81             0/mod 3/rm32/ebx 1/imm32
++error: '81 0/mod 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing subop operand
+
+:(scenario check_missing_base_operand)
+% Hide_errors = true;
+== 0x1
+81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32
++error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 1/imm32' (combine rm32 with imm32 based on subop): missing base operand
+
+:(scenario check_missing_index_operand)
+% Hide_errors = true;
+== 0x1
+81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32
++error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 0/base 1/imm32' (combine rm32 with imm32 based on subop): missing index operand
+
+:(scenario check_missing_base_operand_2)
+% Hide_errors = true;
+== 0x1
+81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32
++error: '81 0/add/subop 0/mod/indirect 4/rm32/use-sib 2/index 3/scale 1/imm32' (combine rm32 with imm32 based on subop): missing base operand
+
+:(scenario check_base_operand_not_needed_in_direct_mode)
+== 0x1
+81 0/add/subop 3/mod/indirect 4/rm32/use-sib 1/imm32
+$error: 0
+
+//:: similarly handle multi-byte opcodes
+
+:(code)
+void check_operands_0f(const line& inst) {
+  assert(inst.words.at(0).data == "0f");
+  if (SIZE(inst.words) == 1) {
+    raise << "no 2-byte opcode specified starting with '0f'\n" << end();
+    return;
+  }
+  uint8_t op = hex_byte(inst.words.at(1).data);
+  if (!contains_key(name_0f, op)) {
+    raise << "unknown 2-byte opcode '0f " << HEXBYTE << NUM(op) << "'\n" << end();
+    return;
+  }
+  check_operands_0f(op, inst);
+}
+
+void check_operands_f3(const line& /*unused*/) {
+  raise << "no supported opcodes starting with f3\n" << end();
+}
+
+void check_operands_0f(uint8_t /*op*/, const line& /*inst*/) {
+}
+
+string to_string(const line& inst) {
+  ostringstream out;
+  for (int i = 0;  i < SIZE(inst.words);  ++i) {
+    if (i > 0) out << ' ';
+    out << inst.words.at(i).original;
+  }
+  return out.str();
+}
+
+string tolower(const char* s) {
+  ostringstream out;
+  for (/*nada*/;  *s;  ++s)
+    out << static_cast<char>(tolower(*s));
+  return out.str();
+}
+
+//:: docs on each operand type
+
+:(before "End Help Texts")
+init_operand_type_help();
+:(code)
+void init_operand_type_help() {
+  put(Help, "mod",
+    "2-bit operand controlling the _addressing mode_ of many instructions,\n"
+    "to determine how to compute the _effective address_ to look up memory at\n"
+    "based on the 'rm32' operand and potentially others.\n"
+    "\n"
+    "If mod = 3, just operate on the contents of the register specified by rm32 (direct mode).\n"
+    "If mod = 2, effective address is usually* rm32 + disp32 (indirect mode with displacement).\n"
+    "If mod = 1, effective address is usually* rm32 + disp8 (indirect mode with displacement).\n"
+    "If mod = 0, effective address is usually* rm32 (indirect mode).\n"
+    "(* - The exception is when rm32 is '4'. Register 4 is the stack pointer (ESP). Using it as an address gets more involved.\n"
+    "     For more details, try reading the help pages for 'base', 'index' and 'scale'.)\n"
+    "\n"
+    "For complete details consult the IA-32 software developer's manual, table 2-2,\n"
+    "\"32-bit addressing forms with the ModR/M byte\".\n"
+    "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
+  );
+  put(Help, "subop",
+    "Additional 3-bit operand for determining the instruction when the opcode is 81, 8f or ff.\n"
+    "Can't coexist with operand of type 'r32' in a single instruction, because the two use the same bits.\n"
+  );
+  put(Help, "r32",
+    "3-bit operand specifying a register operand used directly, without any further addressing modes.\n"
+  );
+  put(Help, "rm32",
+    "3-bit operand specifying a register operand whose precise interpretation interacts with 'mod'.\n"
+    "For complete details consult the IA-32 software developer's manual, table 2-2,\n"
+    "\"32-bit addressing forms with the ModR/M byte\".\n"
+    "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
+  );
+  put(Help, "base",
+    "Additional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) specifying the register containing an address to look up.\n"
+    "This address may be further modified by 'index' and 'scale' operands.\n"
+    "  effective address = base + index*scale + displacement (disp8 or disp32)\n"
+    "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
+    "\"32-bit addressing forms with the SIB byte\".\n"
+    "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
+  );
+  put(Help, "index",
+    "Optional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be added to the 'base' operand to compute the 'effective address' at which to look up memory.\n"
+    "  effective address = base + index*scale + displacement (disp8 or disp32)\n"
+    "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
+    "\"32-bit addressing forms with the SIB byte\".\n"
+    "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
+  );
+  put(Help, "scale",
+    "Optional 2-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be multiplied to the 'index' operand before adding the result to the 'base' operand to compute the _effective address_ to operate on.\n"
+    "  effective address = base + index * scale + displacement (disp8 or disp32)\n"
+    "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
+    "\"32-bit addressing forms with the SIB byte\".\n"
+    "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
+  );
+  put(Help, "disp8",
+    "8-bit value to be added in many instructions.\n"
+  );
+  put(Help, "disp16",
+    "16-bit value to be added in many instructions.\n"
+  );
+  put(Help, "disp32",
+    "32-bit value to be added in many instructions.\n"
+  );
+  put(Help, "imm8",
+    "8-bit value for many instructions.\n"
+  );
+  put(Help, "imm32",
+    "32-bit value for many instructions.\n"
+  );
+}
+
+:(before "End Includes")
+#include<cctype>
diff --git a/subx/023check_operand_sizes.cc b/subx/023check_operand_sizes.cc
index 5d2ec456..cec625af 100644
--- a/subx/023check_operand_sizes.cc
+++ b/subx/023check_operand_sizes.cc
@@ -37,26 +37,25 @@ void check_operand_bounds(/*const*/ program& p) {
 }
 
 void check_operand_bounds(const word& w) {
-  for (map<string, uint32_t>::iterator p = Operand_bound.begin();  p != Operand_bound.end();  ++p)
-    if (has_metadata(w, p->first))
-      if (parse_int(w.data) >= p->second)
-        raise << "'" << w.original << "' too large to fit in bitfield " << p->first << '\n' << end();
-}
-
-int first_operand(const line& inst) {
-  if (inst.words.at(0).data == "0f") return 2;
-  if (inst.words.at(0).data == "f3") {
-    if (inst.words.at(1).data == "0f")
-      return 3;
-    else
-      return 2;
+  for (map<string, uint32_t>::iterator p = Operand_bound.begin();  p != Operand_bound.end();  ++p) {
+    if (has_metadata(w, p->first)) {
+      int32_t x = parse_int(w.data);
+      if (x >= 0) {
+        if (static_cast<uint32_t>(x) >= p->second)
+          raise << "'" << w.original << "' too large to fit in bitfield " << p->first << '\n' << end();
+      }
+      else {
+        // hacky? assuming bound is a power of 2
+        if (x < -1*static_cast<int32_t>(p->second/2))
+          raise << "'" << w.original << "' too large to fit in bitfield " << p->first << '\n' << end();
+      }
+    }
   }
-  return 1;
 }
 
-uint32_t parse_int(const string& s) {
+int32_t parse_int(const string& s) {
   istringstream in(s);
-  uint32_t result = 0;
+  int32_t result = 0;
   if (starts_with(s, "0x"))
     in >> std::hex;
   in >> result;
@@ -66,53 +65,3 @@ uint32_t parse_int(const string& s) {
   }
   return result;
 }
-
-void check_metadata_present(const line& inst, const string& type, uint8_t op) {
-  if (!has_metadata(inst, type))
-    raise << "'" << to_string(inst) << "' (" << get(name, op) << "): missing " << type << " operand\n" << end();
-}
-
-bool has_metadata(const line& inst, const string& m) {
-  bool result = false;
-  for (int i = 0;  i < SIZE(inst.words);  ++i) {
-    if (!has_metadata(inst.words.at(i), m)) continue;
-    if (result) {
-      raise << "'" << to_string(inst) << "' has conflicting " << m << " operands\n" << end();
-      return false;
-    }
-    result = true;
-  }
-  return result;
-}
-
-bool has_metadata(const word& w, const string& m) {
-  bool result = false;
-  bool metadata_found = false;
-  for (int i = 0;  i < SIZE(w.metadata);  ++i) {
-    const string& curr = w.metadata.at(i);
-    if (!contains_key(Operand_bound, curr)) continue;  // ignore unrecognized metadata
-    if (metadata_found) {
-      raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
-      return false;
-    }
-    metadata_found = true;
-    result = (curr == m);
-  }
-  return result;
-}
-
-word metadata(const line& inst, const string& m) {
-  for (int i = 0;  i < SIZE(inst.words);  ++i)
-    if (has_metadata(inst.words.at(i), m))
-      return inst.words.at(i);
-  assert(false);
-}
-
-string to_string(const line& inst) {
-  ostringstream out;
-  for (int i = 0;  i < SIZE(inst.words);  ++i) {
-    if (i > 0) out << ' ';
-    out << inst.words.at(i).original;
-  }
-  return out.str();
-}
diff --git a/subx/024pack_instructions.cc b/subx/024pack_instructions.cc
new file mode 100644
index 00000000..92e557ae
--- /dev/null
+++ b/subx/024pack_instructions.cc
@@ -0,0 +1,175 @@
+//: Operands can refer to bitfields smaller than a byte. This layer packs
+//: operands into their containing bytes in the right order.
+
+:(scenario pack_immediate_constants)
+== 0x1
+# instruction             effective address                                         operand displacement    immediate
+# op          subop       mod           rm32          base      index     scale     r32
+# 1-3 bytes                                                                                 0/1/2/4 bytes   0/1/2/4 bytes
+  bb                                                                                                        42/imm32      # copy 42 to EBX
++translate: packing instruction 'bb 42/imm32'
++translate: instruction after packing: 'bb 2a 00 00 00'
++run: copy imm32 0x0000002a to EBX
+
+:(scenario pack_disp8)
+== 0x1
+74 2/disp8  # jump 2 bytes away if ZF is set
++translate: packing instruction '74 2/disp8'
++translate: instruction after packing: '74 02'
+
+:(scenarios transform)
+:(scenario pack_disp8_negative)
+== 0x1
+# running this will cause an infinite loop
+74 -1/disp8  # jump 1 byte before if ZF is set
++translate: packing instruction '74 -1/disp8'
++translate: instruction after packing: '74 ff'
+:(scenarios run)
+
+:(scenario pack_modrm_imm32)
+== 0x1
+# instruction             effective address                                         operand displacement    immediate
+# op          subop       mod           rm32          base      index     scale     r32
+# 1-3 bytes                                                                                 0/1/2/4 bytes   0/1/2/4 bytes
+  81          0/add/subop 3/mod/direct  3/ebx/rm32                                                          1/imm32       # add 1 to EBX
++translate: packing instruction '81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32'
++translate: instruction after packing: '81 c3 01 00 00 00'
+
+:(scenario pack_imm32_large)
+== 0x1
+b9 0x080490a7/imm32  # copy to ECX
++translate: packing instruction 'b9 0x080490a7/imm32'
++translate: instruction after packing: 'b9 a7 90 04 08'
+
+:(before "End One-time Setup")
+Transform.push_back(pack_instructions);
+
+:(code)
+void pack_instructions(program& p) {
+  if (p.segments.empty()) return;
+  segment& seg = p.segments.at(0);
+  for (int i = 0;  i < SIZE(seg.lines);  ++i) {
+    line& inst = seg.lines.at(i);
+    if (all_raw_hex_bytes(inst)) continue;
+    trace(99, "translate") << "packing instruction '" << to_string(/*with metadata*/inst) << "'" << end();
+    pack_instruction(inst);
+    trace(99, "translate") << "instruction after packing: '" << to_string(/*without metadata*/inst.words) << "'" << end();
+  }
+}
+
+void pack_instruction(line& inst) {
+  line new_inst;
+  add_opcodes(inst, new_inst);
+  add_modrm_byte(inst, new_inst);
+  add_sib_byte(inst, new_inst);
+  add_disp_bytes(inst, new_inst);
+  add_imm_bytes(inst, new_inst);
+  inst.words.swap(new_inst.words);
+}
+
+void add_opcodes(const line& in, line& out) {
+  out.words.push_back(in.words.at(0));
+  if (in.words.at(0).data == "0f" || in.words.at(0).data == "f3")
+    out.words.push_back(in.words.at(1));
+  if (in.words.at(0).data == "f3" && in.words.at(1).data == "0f")
+    out.words.push_back(in.words.at(2));
+}
+
+void add_modrm_byte(const line& in, line& out) {
+  uint8_t mod=0, reg_subop=0, rm32=0;
+  bool emit = false;
+  for (int i = 0;  i < SIZE(in.words);  ++i) {
+    const word& curr = in.words.at(i);
+    if (has_metadata(curr, "mod"))
+      mod = hex_byte(curr.data), emit = true;
+    else if (has_metadata(curr, "rm32"))
+      rm32 = hex_byte(curr.data), emit = true;
+    else if (has_metadata(curr, "r32"))
+      reg_subop = hex_byte(curr.data), emit = true;
+    else if (has_metadata(curr, "subop"))
+      reg_subop = hex_byte(curr.data), emit = true;
+  }
+  if (emit)
+    out.words.push_back(hex_byte_text((mod << 6) | (reg_subop << 3) | rm32));
+}
+
+void add_sib_byte(const line& in, line& out) {
+  uint8_t scale=0, index=0, base=0;
+  bool emit = false;
+  for (int i = 0;  i < SIZE(in.words);  ++i) {
+    const word& curr = in.words.at(i);
+    if (has_metadata(curr, "scale"))
+      scale = hex_byte(curr.data), emit = true;
+    else if (has_metadata(curr, "index"))
+      index = hex_byte(curr.data), emit = true;
+    else if (has_metadata(curr, "base"))
+      base = hex_byte(curr.data), emit = true;
+  }
+  if (emit)
+    out.words.push_back(hex_byte_text((scale << 6) | (index << 3) | base));
+}
+
+void add_disp_bytes(const line& in, line& out) {
+  for (int i = 0;  i < SIZE(in.words);  ++i) {
+    const word& curr = in.words.at(i);
+    if (has_metadata(curr, "disp8"))
+      emit_hex_bytes(out, curr, 1);
+    else if (has_metadata(curr, "disp32"))
+      emit_hex_bytes(out, curr, 4);
+  }
+}
+
+void add_imm_bytes(const line& in, line& out) {
+  for (int i = 0;  i < SIZE(in.words);  ++i) {
+    const word& curr = in.words.at(i);
+    if (has_metadata(curr, "imm8"))
+      emit_hex_bytes(out, curr, 1);
+    else if (has_metadata(curr, "imm32"))
+      emit_hex_bytes(out, curr, 4);
+  }
+}
+
+void emit_hex_bytes(line& out, const word& w, int num) {
+  assert(num <= 4);
+  uint32_t val = static_cast<uint32_t>(parse_int(w.data));
+  for (int i = 0;  i < num;  ++i) {
+    out.words.push_back(hex_byte_text(val & 0xff));
+    val = val >> 8;
+  }
+}
+
+word hex_byte_text(uint8_t val) {
+  ostringstream out;
+  out << HEXBYTE << NUM(val);
+  word result;
+  result.data = out.str();
+  return result;
+}
+
+string to_string(const vector<word>& in) {
+  ostringstream out;
+  for (int i = 0;  i < SIZE(in);  ++i) {
+    if (i > 0) out << ' ';
+    out << in.at(i).data;
+  }
+  return out.str();
+}
+
+// helper
+void transform(const string& text_bytes) {
+  program p;
+  istringstream in(text_bytes);
+  parse(in, p);
+  if (trace_contains_errors()) return;
+  transform(p);
+}
+
+:(scenario pack_immediate_constants_hex)
+== 0x1
+# instruction             effective address                                         operand displacement    immediate
+# op          subop       mod           rm32          base      index     scale     r32
+# 1-3 bytes                                                                                 0/1/2/4 bytes   0/1/2/4 bytes
+  bb                                                                                                        0x2a/imm32    # copy 42 to EBX
++translate: packing instruction 'bb 0x2a/imm32'
++translate: instruction after packing: 'bb 2a 00 00 00'
++run: copy imm32 0x0000002a to EBX
diff --git a/subx/025non_code_segment.cc b/subx/025non_code_segment.cc
new file mode 100644
index 00000000..bf21d253
--- /dev/null
+++ b/subx/025non_code_segment.cc
@@ -0,0 +1,28 @@
+//: Raise an error when operand metadata is used in non-code segments.
+
+:(scenario operand_metadata_outside_code_segment)
+% Hide_errors = true;
+== 0x1  # code segment
+cd 128/imm8
+== 0x1000  # data segment
+cd 12/imm8
++error: 12/imm8: metadata imm8 is only allowed in the (first) code segment
+
+:(before "End One-time Setup")
+Transform.push_back(check_operands_in_non_code_segments);
+:(code)
+void check_operands_in_non_code_segments(/*const*/ program& p) {
+  if (p.segments.empty()) return;
+  for (int i = /*skip code segment*/1;  i < SIZE(p.segments);  ++i) {
+    const segment& seg = p.segments.at(i);
+    for (int j = 0;  j < SIZE(seg.lines);  ++j) {
+      const line& l = seg.lines.at(j);
+      for (int k = 0;  k < SIZE(l.words);  ++k) {
+        const word& w = l.words.at(k);
+        for (map<string, uint32_t>::iterator p = Operand_bound.begin();  p != Operand_bound.end();  ++p)
+          if (has_metadata(w, p->first))
+            raise << w.original << ": metadata " << p->first << " is only allowed in the (first) code segment\n" << end();
+      }
+    }
+  }
+}
diff --git a/subx/ex2.subx b/subx/ex2.subx
index 4708a8b5..21c0ec54 100644
--- a/subx/ex2.subx
+++ b/subx/ex2.subx
@@ -8,13 +8,13 @@
 #   2
 
 == 0x08048054  # code segment, after leaving room for ELF header
-# opcode        ModR/M                    SIB                   displacement    immediate
-# instruction   mod, reg, Reg/Mem bits    scale, index, base
-# 1-3 bytes     0/1 byte                  0/1 byte              0/1/2/4 bytes   0/1/2/4 bytes
-  bb                                                                            1/imm32       # copy 1 to EBX
-  81            c3                                                              1/imm32       # add 1 to EBX
+# instruction             effective address                                         operand displacement    immediate
+# op          subop       mod           rm32          base      index     scale     r32
+# 1-3 bytes                                                                                 0/1/2/4 bytes   0/1/2/4 bytes
+  bb                                                                                                        1/imm32       # copy 1 to EBX
+  81          0/add/subop 3/mod/direct  3/ebx/rm32                                                          1/imm32       # add 1 to EBX
   # exit(EBX)
-  b8                                                                            1/imm32       # copy 1 to EAX
-  cd                                                                            128/imm8      # int 80h
+  b8                                                                                                        1/imm32       # copy 1 to EAX
+  cd                                                                                                        128/imm8      # int 80h
 
 # vim:ft=subx
diff --git a/subx/ex3.subx b/subx/ex3.subx
index 8560167d..9b5391a8 100644
--- a/subx/ex3.subx
+++ b/subx/ex3.subx
@@ -8,29 +8,29 @@
 #   55
 
 == 0x08048054  # code segment, after leaving room for ELF header
-# opcode        ModR/M                    SIB                   displacement    immediate
-# instruction   mod, reg, Reg/Mem bits    scale, index, base
-# 1-3 bytes     0/1 byte                  0/1 byte              0/1/2/4 bytes   0/1/2/4 bytes
+# instruction                     effective address                                         operand     displacement    immediate
+# op          subop               mod           rm32          base      index     scale     r32
+# 1-3 bytes                                                                                             0/1/2/4 bytes   0/1/2/4 bytes
   # result: EBX = 0
 # 0: e_entry = 0x08048054
-  bb                                                                            0/imm32       # copy 0 to EBX
+  bb                                                                                                                    0/imm32       # copy 0 to EBX
   # counter: ECX = 1
-  b9                                                                            1/imm32       # copy 1 to ECX
+  b9                                                                                                                    1/imm32       # copy 1 to ECX
 
 # 10: loop: 0x0804805e
   # while (ECX <= 10)
-  81            f9                                                              10/imm32      # compare ECX, 10/imm
-  7f                                                            0a                            # jump-if-greater exit (+10)
+  81            7/subop/compare   3/mod/direct  1/rm32/ecx                                                              10/imm32      # compare ECX, 10/imm
+  7f                                                                                                    10/disp8                      # jump-if-greater exit
   # EBX += ECX
-  01            cb                                                                            # add ECX to EBX
+  01                              3/mod/direct  3/rm32/ebx                                  1/r32/ecx                                 # add ECX to EBX
   # ECX++
-  81            c1                                                              1/imm32       # add 1 to ECX
+  81            0/subop/add       3/mod/direct  1/rm32/ecx                                                              1/imm32       # add 1 to ECX
   # loop
-  eb                                                            ee                            # jump loop (-18; -00010010; 11101110)
+  eb                                                                                                    -18/disp8                     # jump loop
 
 # 28: exit: 0x08048070
   # exit(EBX)
-  b8                                                                            1/imm32       # copy 1 to EAX
-  cd                                                                            128/imm8      # int 80h
+  b8                                                                                                                    1/imm32       # copy 1 to EAX
+  cd                                                                                                                    0x80/imm8     # int 80h
 
 # vim:ft=subx:nowrap
diff --git a/subx/ex4.subx b/subx/ex4.subx
index af0ef419..2d841a81 100644
--- a/subx/ex4.subx
+++ b/subx/ex4.subx
@@ -5,37 +5,37 @@
 #   $ subx run ex4
 
 == 0x08048074  # code segment, after leaving room for ELF header and segment headers
-# opcode        ModR/M                    SIB                   displacement    immediate
-# instruction   mod, reg, Reg/Mem bits    scale, index, base
-# 1-3 bytes     0/1 byte                  0/1 byte              0/1/2/4 bytes   0/1/2/4 bytes
+# instruction                     effective address                                         operand     displacement    immediate
+# op          subop               mod           rm32          base      index     scale     r32
+# 1-3 bytes                                                                                             0/1/2/4 bytes   0/1/2/4 bytes
 
   ## read(stdin, x, 1)
   # fd = 0 (stdin)
-  bb                                                                            0/imm32       # copy 0 to EBX
+  bb                                                                                                                    0/imm32       # copy 0 to EBX
   # set location to write to
-  b9                                                                            a7 90 04 08   # copy 0x080490a7 to ECX
+  b9                                                                                                                    0x080490a7/imm32  # copy to ECX
   # size = 1 character
-  ba                                                                            1/imm32       # copy 1 to EDX
+  ba                                                                                                                    1/imm32       # copy 1 to EDX
   # syscall = read
-  b8                                                                            3/imm32       # copy 3 to EAX
+  b8                                                                                                                    3/imm32       # copy 3 to EAX
   # call
-  cd                                                                            128/imm8      # int 80h
+  cd                                                                                                                    128/imm8      # int 80h
 
   ## write(stdout, x, 1)
   # fd = 1 (stdout)
-  bb                                                                            1/imm32       # copy 1 to EBX
+  bb                                                                                                                    1/imm32       # copy 1 to EBX
   # set location to write to
-  b9                                                                            a7 90 04 08   # copy 0x080490a7 to ECX
+  b9                                                                                                                    0x080490a7/imm32  # copy to ECX
   # size = 1 character
-  ba                                                                            1/imm32       # copy 1 to EDX
+  ba                                                                                                                    1/imm32       # copy 1 to EDX
   # syscall = write
-  b8                                                                            4/imm32       # copy 3 to EAX
+  b8                                                                                                                    4/imm32       # copy 4 to EAX
   # call
-  cd                                                                            128/imm8      # int 80h
+  cd                                                                                                                    128/imm8      # int 80h
 
   ## exit(EBX)
-  b8                                                                            1/imm32       # copy 1 to EAX
-  cd                                                                            128/imm8      # int 80h
+  b8                                                                                                                    1/imm32       # copy 1 to EAX
+  cd                                                                                                                    128/imm8      # int 80h
 
 == 0x080490a7
 00 00 00 00  # space for read() to write to
diff --git a/subx/ex5.subx b/subx/ex5.subx
index be7266a4..b74af463 100644
--- a/subx/ex5.subx
+++ b/subx/ex5.subx
@@ -5,47 +5,44 @@
 #   $ subx run ex5
 
 == 0x08048054  # code segment, after leaving room for ELF header and segment headers
-# opcode        ModR/M                    SIB                   displacement    immediate
-# instruction   mod, reg, Reg/Mem bits    scale, index, base
-# 1-3 bytes     0/1 byte                  0/1 byte              0/1/2/4 bytes   0/1/2/4 bytes
+# instruction                     effective address                                         operand     displacement    immediate
+# op          subop               mod           rm32          base      index     scale     r32
+# 1-3 bytes                                                                                             0/1/2/4 bytes   0/1/2/4 bytes
 
 ## function main
   # prolog
-  55                                                                                          # push EBP
-  89            e5                                                                            # copy ESP to EBP
-                # ModR/M: 11 (direct mode) 100 (src ESP) 101 (dest EBP)
+  55                                                                                                                                  # push EBP
+  89                              3/mod/direct  5/rm32/EBP                                  4/r32/ESP                                 # copy ESP to EBP
   # allocate x on the stack
-  81            ec                                                              4/imm32       # subtract 4 bytes from ESP
-                # ModR/M: 11 (direct mode) 101 (subtract imm32) 100 (dest EBP)
+  81            5/subop/subtract  3/mod/direct  4/rm32/ESP                                                              4/imm32       # subtract 4 bytes from ESP
 
   ## read(stdin, x, 1)
   # fd = 0 (stdin)
-  bb                                                                            0/imm32       # copy 0 to EBX
+  bb                                                                                                                    0/imm32       # copy 0 to EBX
   # set location to read character to
-  89            e9                                                                            # copy EBP to ECX
-                # ModR/M: 11 (direct mode) 101 (src EBP) 001 (dest ECX)
+  89                              3/mod/direct  1/rm32/ECX                                  5/r32/EBP                                 # copy EBP to ECX
   # size = 1 character
-  ba                                                                            1/imm32       # copy 1 to EDX
+  ba                                                                                                                    1/imm32       # copy 1 to EDX
   # syscall = read
-  b8                                                                            3/imm32       # copy 3 to EAX
+  b8                                                                                                                    3/imm32       # copy 3 to EAX
   # call
-  cd                                                                            128/imm8      # int 80h
+  cd                                                                                                                    0x80/imm8     # int 80h
 
   ## write(stdout, x, 1)
   # fd = 1 (stdout)
-  bb                                                                            1/imm32       # copy 1 to EBX
+  bb                                                                                                                    1/imm32       # copy 1 to EBX
   # set location of character to write out
-  89            e9                                                                            # copy EBP to ECX
+  89                              3/mod/direct  1/rm32/ECX                                  5/r32/EBP                                 # copy EBP to ECX
                 # ModR/M: 11 (direct mode) 101 (src EBP) 001 (dest ECX)
   # size = 1 character
-  ba                                                                            1/imm32       # copy 1 to EDX
+  ba                                                                                                                    1/imm32       # copy 1 to EDX
   # syscall = write
-  b8                                                                            4/imm32       # copy 4 to EAX
+  b8                                                                                                                    4/imm32       # copy 4 to EAX
   # call
-  cd                                                                            128/imm8      # int 80h
+  cd                                                                                                                    0x80/imm8     # int 80h
 
   ## exit(EBX)
-  b8                                                                            1/imm32       # copy 1 to EAX
-  cd                                                                            128/imm8      # int 80h
+  b8                                                                                                                    1/imm32       # copy 1 to EAX
+  cd                                                                                                                    0x80/imm8     # int 80h
 
 # vim:ft=subx:nowrap
diff --git a/subx/ex6 b/subx/ex6
new file mode 100755
index 00000000..dd2219bb
--- /dev/null
+++ b/subx/ex6
Binary files differdiff --git a/subx/ex6.subx b/subx/ex6.subx
new file mode 100644
index 00000000..5e4f34f5
--- /dev/null
+++ b/subx/ex6.subx
@@ -0,0 +1,37 @@
+## print out a (global variable) string to stdout
+#
+# To run:
+#   $ subx translate ex6.subx ex6
+#   $ subx run ex6
+#   Hello, world!
+
+== 0x08048074  # code segment, after leaving room for ELF header and segment headers
+# instruction                     effective address                                         operand     displacement    immediate
+# op          subop               mod             rm32          base      index     scale     r32
+# 1-3 bytes                                                                                               0/1/2/4 bytes   0/1/2/4 bytes
+
+  ## write(stdout, x, 1)
+  # fd = 1 (stdout)
+  bb                                                                                                                      1/imm32           # copy 1 to EBX
+  # set location to write out
+  b9                                                                                                                      0x08049097/imm32  # copy to ECX
+  # set size
+  ba                                                                                                                      0x08049093/imm32  # copy to EDX
+  8b                              0/mod/indirect  2/rm32/edx                                  2/r32/edx                                     # copy *EDX to EDX
+  # syscall = write
+  b8                                                                                                                      4/imm32       # copy 4 to EAX
+  # call
+  cd                                                                                                                      0x80/imm8     # int 80h
+
+  ## exit(EBX)
+  b8                                                                                                                      1/imm32       # copy 1 to EAX
+  cd                                                                                                                      0x80/imm8     # int 80h
+
+== 0x08049093
+# size of string
+0e 00 00 00
+# string
+  48 65 6c 6c 6f 2c 20 77 6f 72 6c 64 21 0a
+# h  e  l  l  o  ,  ␣  w  o  r  l  d  !  newline
+
+# vim:ft=subx:nowrap