about summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--subx/030---operands.cc431
-rw-r--r--subx/031check_operands.cc (renamed from subx/030check_operands.cc)237
-rw-r--r--subx/032check_operand_bounds.cc (renamed from subx/031check_operand_bounds.cc)28
-rw-r--r--subx/032pack_operands.cc205
-rw-r--r--subx/033non_code_segment.cc7
5 files changed, 445 insertions, 463 deletions
diff --git a/subx/030---operands.cc b/subx/030---operands.cc
new file mode 100644
index 00000000..1a4f0f10
--- /dev/null
+++ b/subx/030---operands.cc
@@ -0,0 +1,431 @@
+//: Beginning of "level 2": tagging bytes with metadata around what field of
+//: an x86 instruction they're for.
+//:
+//: The x86 instruction set is variable-length, and how a byte is interpreted
+//: affects later instruction boundaries. A lot of the pain in programming
+//: machine code stems from computer and programmer going out of sync on what
+//: a byte means. The miscommunication is usually not immediately caught, and
+//: metastasizes at runtime into kilobytes of misinterpreted instructions.
+//:
+//: To mitigate these issues, we'll start programming in terms of logical
+//: operands rather than physical bytes. Some operands are smaller than a
+//: byte, and others may consist of multiple bytes. This layer will correctly
+//: pack and order the bytes corresponding to the operands in an instruction.
+
+:(before "End Help Texts")
+put(Help, "instructions",
+  "Each x86 instruction consists of an instruction or opcode and some number\n"
+  "of operands.\n"
+  "Each operand has a type. An instruction won't have more than one operand of\n"
+  "any type.\n"
+  "Each instruction has some set of allowed operand types. It'll reject others.\n"
+  "The complete list of operand types: mod, subop, r32 (register), rm32\n"
+  "(register or memory), scale, index, base, disp8, disp16, disp32, imm8,\n"
+  "imm32.\n"
+  "Each of these has its own help page. Try reading 'subx help mod' next.\n"
+);
+:(before "End Help Contents")
+cerr << "  instructions\n";
+
+:(scenario pack_immediate_constants)
+== 0x1
+# instruction                     effective address                                                   operand     displacement    immediate
+# op          subop               mod             rm32          base        index         scale       r32
+# 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+  bb                                                                                                                              0x2a/imm32        # copy 42 to EBX
++transform: packing instruction 'bb 0x2a/imm32'
++transform: instruction after packing: 'bb 2a 00 00 00'
++run: copy imm32 0x0000002a to EBX
+
+//: complete set of valid operand types
+
+:(before "End Globals")
+set<string> Instruction_operands;
+:(before "End One-time Setup")
+Instruction_operands.insert("subop");
+Instruction_operands.insert("mod");
+Instruction_operands.insert("rm32");
+Instruction_operands.insert("base");
+Instruction_operands.insert("index");
+Instruction_operands.insert("scale");
+Instruction_operands.insert("r32");
+Instruction_operands.insert("disp8");
+Instruction_operands.insert("disp16");
+Instruction_operands.insert("disp32");
+Instruction_operands.insert("imm8");
+Instruction_operands.insert("imm32");
+
+:(before "End Help Texts")
+init_operand_type_help();
+:(code)
+void init_operand_type_help() {
+  put(Help, "mod",
+    "2-bit operand controlling the _addressing mode_ of many instructions,\n"
+    "to determine how to compute the _effective address_ to look up memory at\n"
+    "based on the 'rm32' operand and potentially others.\n"
+    "\n"
+    "If mod = 3, just operate on the contents of the register specified by rm32\n"
+    "            (direct mode).\n"
+    "If mod = 2, effective address is usually* rm32 + disp32\n"
+    "            (indirect mode with displacement).\n"
+    "If mod = 1, effective address is usually* rm32 + disp8\n"
+    "            (indirect mode with displacement).\n"
+    "If mod = 0, effective address is usually* rm32 (indirect mode).\n"
+    "(* - The exception is when rm32 is '4'. Register 4 is the stack pointer (ESP).\n"
+    "     Using it as an address gets more involved. For more details,\n"
+    "     try reading the help pages for 'base', 'index' and 'scale'.)\n"
+    "\n"
+    "For complete details consult the IA-32 software developer's manual, table 2-2,\n"
+    "\"32-bit addressing forms with the ModR/M byte\".\n"
+    "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
+  );
+  put(Help, "subop",
+    "Additional 3-bit operand for determining the instruction when the opcode is 81, 8f or ff.\n"
+    "Can't coexist with operand of type 'r32' in a single instruction, because the two use the same bits.\n"
+  );
+  put(Help, "r32",
+    "3-bit operand specifying a register operand used directly, without any further addressing modes.\n"
+  );
+  put(Help, "rm32",
+    "3-bit operand specifying a register operand whose precise interpretation interacts with 'mod'.\n"
+    "For complete details consult the IA-32 software developer's manual, table 2-2,\n"
+    "\"32-bit addressing forms with the ModR/M byte\".\n"
+    "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
+  );
+  put(Help, "base",
+    "Additional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) specifying the register containing an address to look up.\n"
+    "This address may be further modified by 'index' and 'scale' operands.\n"
+    "  effective address = base + index*scale + displacement (disp8 or disp32)\n"
+    "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
+    "\"32-bit addressing forms with the SIB byte\".\n"
+    "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
+  );
+  put(Help, "index",
+    "Optional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be added to the 'base' operand to compute the 'effective address' at which to look up memory.\n"
+    "  effective address = base + index*scale + displacement (disp8 or disp32)\n"
+    "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
+    "\"32-bit addressing forms with the SIB byte\".\n"
+    "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
+  );
+  put(Help, "scale",
+    "Optional 2-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be multiplied to the 'index' operand before adding the result to the 'base' operand to compute the _effective address_ to operate on.\n"
+    "  effective address = base + index * scale + displacement (disp8 or disp32)\n"
+    "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
+    "\"32-bit addressing forms with the SIB byte\".\n"
+    "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
+  );
+  put(Help, "disp8",
+    "8-bit value to be added in many instructions.\n"
+  );
+  put(Help, "disp16",
+    "16-bit value to be added in many instructions.\n"
+  );
+  put(Help, "disp32",
+    "32-bit value to be added in many instructions.\n"
+  );
+  put(Help, "imm8",
+    "8-bit value for many instructions.\n"
+  );
+  put(Help, "imm32",
+    "32-bit value for many instructions.\n"
+  );
+}
+
+//:: transform packing operands into bytes in the right order
+
+:(before "End Transforms")
+// Begin Level-2 Transforms
+Transform.push_back(pack_operands);
+// End Level-2 Transforms
+
+:(code)
+void pack_operands(program& p) {
+  if (p.segments.empty()) return;
+  segment& code = p.segments.at(0);
+  // Pack Operands(segment code)
+  trace(99, "transform") << "-- pack operands" << end();
+  for (int i = 0;  i < SIZE(code.lines);  ++i) {
+    line& inst = code.lines.at(i);
+    if (all_hex_bytes(inst)) continue;
+    trace(99, "transform") << "packing instruction '" << to_string(/*with metadata*/inst) << "'" << end();
+    pack_operands(inst);
+    trace(99, "transform") << "instruction after packing: '" << to_string(/*without metadata*/inst.words) << "'" << end();
+  }
+}
+
+void pack_operands(line& inst) {
+  line new_inst;
+  add_opcodes(inst, new_inst);
+  add_modrm_byte(inst, new_inst);
+  add_sib_byte(inst, new_inst);
+  add_disp_bytes(inst, new_inst);
+  add_imm_bytes(inst, new_inst);
+  inst.words.swap(new_inst.words);
+}
+
+void add_opcodes(const line& in, line& out) {
+  out.words.push_back(in.words.at(0));
+  if (in.words.at(0).data == "0f" || in.words.at(0).data == "f3")
+    out.words.push_back(in.words.at(1));
+  if (in.words.at(0).data == "f3" && in.words.at(1).data == "0f")
+    out.words.push_back(in.words.at(2));
+}
+
+void add_modrm_byte(const line& in, line& out) {
+  uint8_t mod=0, reg_subop=0, rm32=0;
+  bool emit = false;
+  for (int i = 0;  i < SIZE(in.words);  ++i) {
+    const word& curr = in.words.at(i);
+    if (has_metadata(curr, "mod")) {
+      mod = hex_byte(curr.data);
+      emit = true;
+    }
+    else if (has_metadata(curr, "rm32")) {
+      rm32 = hex_byte(curr.data);
+      emit = true;
+    }
+    else if (has_metadata(curr, "r32")) {
+      reg_subop = hex_byte(curr.data);
+      emit = true;
+    }
+    else if (has_metadata(curr, "subop")) {
+      reg_subop = hex_byte(curr.data);
+      emit = true;
+    }
+  }
+  if (emit)
+    out.words.push_back(hex_byte_text((mod << 6) | (reg_subop << 3) | rm32));
+}
+
+void add_sib_byte(const line& in, line& out) {
+  uint8_t scale=0, index=0, base=0;
+  bool emit = false;
+  for (int i = 0;  i < SIZE(in.words);  ++i) {
+    const word& curr = in.words.at(i);
+    if (has_metadata(curr, "scale")) {
+      scale = hex_byte(curr.data);
+      emit = true;
+    }
+    else if (has_metadata(curr, "index")) {
+      index = hex_byte(curr.data);
+      emit = true;
+    }
+    else if (has_metadata(curr, "base")) {
+      base = hex_byte(curr.data);
+      emit = true;
+    }
+  }
+  if (emit)
+    out.words.push_back(hex_byte_text((scale << 6) | (index << 3) | base));
+}
+
+void add_disp_bytes(const line& in, line& out) {
+  for (int i = 0;  i < SIZE(in.words);  ++i) {
+    const word& curr = in.words.at(i);
+    if (has_metadata(curr, "disp8"))
+      emit_hex_bytes(out, curr, 1);
+    if (has_metadata(curr, "disp16"))
+      emit_hex_bytes(out, curr, 2);
+    else if (has_metadata(curr, "disp32"))
+      emit_hex_bytes(out, curr, 4);
+  }
+}
+
+void add_imm_bytes(const line& in, line& out) {
+  for (int i = 0;  i < SIZE(in.words);  ++i) {
+    const word& curr = in.words.at(i);
+    if (has_metadata(curr, "imm8"))
+      emit_hex_bytes(out, curr, 1);
+    else if (has_metadata(curr, "imm32"))
+      emit_hex_bytes(out, curr, 4);
+  }
+}
+
+void emit_hex_bytes(line& out, const word& w, int num) {
+  assert(num <= 4);
+  if (!is_hex_int(w.data)) {
+    out.words.push_back(w);
+    return;
+  }
+  emit_hex_bytes(out, static_cast<uint32_t>(parse_int(w.data)), num);
+}
+
+void emit_hex_bytes(line& out, uint32_t val, int num) {
+  assert(num <= 4);
+  for (int i = 0;  i < num;  ++i) {
+    out.words.push_back(hex_byte_text(val & 0xff));
+    val = val >> 8;
+  }
+}
+
+word hex_byte_text(uint8_t val) {
+  ostringstream out;
+  out << HEXBYTE << NUM(val);
+  word result;
+  result.data = out.str();
+  result.original = out.str()+"/auto";
+  return result;
+}
+
+string to_string(const vector<word>& in) {
+  ostringstream out;
+  for (int i = 0;  i < SIZE(in);  ++i) {
+    if (i > 0) out << ' ';
+    out << in.at(i).data;
+  }
+  return out.str();
+}
+
+:(scenario pack_disp8)
+== 0x1
+74 2/disp8  # jump 2 bytes away if ZF is set
++transform: packing instruction '74 2/disp8'
++transform: instruction after packing: '74 02'
+
+:(scenarios transform)
+:(scenario pack_disp8_negative)
+== 0x1
+# running this will cause an infinite loop
+74 -1/disp8  # jump 1 byte before if ZF is set
++transform: packing instruction '74 -1/disp8'
++transform: instruction after packing: '74 ff'
+:(scenarios run)
+
+//: helper for scenario
+:(code)
+void transform(const string& text_bytes) {
+  program p;
+  istringstream in(text_bytes);
+  parse(in, p);
+  if (trace_contains_errors()) return;
+  transform(p);
+}
+
+:(scenario pack_modrm_imm32)
+== 0x1
+# instruction                     effective address                                                   operand     displacement    immediate
+# op          subop               mod             rm32          base        index         scale       r32
+# 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+  81          0/add/subop         3/mod/direct    3/ebx/rm32                                                                      1/imm32           # add 1 to EBX
++transform: packing instruction '81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32'
++transform: instruction after packing: '81 c3 01 00 00 00'
+
+:(scenario pack_imm32_large)
+== 0x1
+b9 0x080490a7/imm32  # copy to ECX
++transform: packing instruction 'b9 0x080490a7/imm32'
++transform: instruction after packing: 'b9 a7 90 04 08'
+
+:(scenario pack_immediate_constants_hex)
+== 0x1
+# instruction                     effective address                                                   operand     displacement    immediate
+# op          subop               mod             rm32          base        index         scale       r32
+# 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+  bb                                                                                                                              0x2a/imm32        # copy 42 to EBX
++transform: packing instruction 'bb 0x2a/imm32'
++transform: instruction after packing: 'bb 2a 00 00 00'
++run: copy imm32 0x0000002a to EBX
+
+:(scenarios transform)
+:(scenario pack_silently_ignores_non_hex)
+== 0x1
+# instruction                     effective address                                                   operand     displacement    immediate
+# op          subop               mod             rm32          base        index         scale       r32
+# 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
+  bb                                                                                                                              foo/imm32         # copy foo to EBX
++transform: packing instruction 'bb foo/imm32'
+# no change (we're just not printing metadata to the trace)
++transform: instruction after packing: 'bb foo'
+$error: 0
+:(scenarios run)
+
+//:: helpers
+
+:(code)
+bool all_hex_bytes(const line& inst) {
+  for (int i = 0;  i < SIZE(inst.words);  ++i)
+    if (!is_hex_byte(inst.words.at(i)))
+      return false;
+  return true;
+}
+
+bool is_hex_byte(const word& curr) {
+  if (contains_any_operand_metadata(curr))
+    return false;
+  if (SIZE(curr.data) != 2)
+    return false;
+  if (curr.data.find_first_not_of("0123456789abcdefABCDEF") != string::npos)
+    return false;
+  return true;
+}
+
+bool contains_any_operand_metadata(const word& word) {
+  for (int i = 0;  i < SIZE(word.metadata);  ++i)
+    if (Instruction_operands.find(word.metadata.at(i)) != Instruction_operands.end())
+      return true;
+  return false;
+}
+
+bool has_metadata(const line& inst, const string& m) {
+  bool result = false;
+  for (int i = 0;  i < SIZE(inst.words);  ++i) {
+    if (!has_metadata(inst.words.at(i), m)) continue;
+    if (result) {
+      raise << "'" << to_string(inst) << "' has conflicting " << m << " operands\n" << end();
+      return false;
+    }
+    result = true;
+  }
+  return result;
+}
+
+bool has_metadata(const word& w, const string& m) {
+  bool result = false;
+  bool metadata_found = false;
+  for (int i = 0;  i < SIZE(w.metadata);  ++i) {
+    const string& curr = w.metadata.at(i);
+    if (!contains_key(Instruction_operands, curr)) continue;  // ignore unrecognized metadata
+    if (metadata_found) {
+      raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
+      return false;
+    }
+    metadata_found = true;
+    result = (curr == m);
+  }
+  return result;
+}
+
+word metadata(const line& inst, const string& m) {
+  for (int i = 0;  i < SIZE(inst.words);  ++i)
+    if (has_metadata(inst.words.at(i), m))
+      return inst.words.at(i);
+  assert(false);
+}
+
+bool is_hex_int(const string& s) {
+  if (s.empty()) return false;
+  size_t pos = 0;
+  if (s.at(0) == '-' || s.at(0) == '+') pos++;
+  if (s.substr(pos, pos+2) == "0x") pos += 2;
+  return s.find_first_not_of("0123456789abcdefABCDEF", pos) == string::npos;
+}
+
+int32_t parse_int(const string& s) {
+  istringstream in(s);
+  int32_t result = 0;
+  in >> std::hex >> result;
+  if (!in || !in.eof()) {
+    raise << "not a number: " << s << '\n' << end();
+    return 0;
+  }
+  return result;
+}
+
+string to_string(const line& inst) {
+  ostringstream out;
+  for (int i = 0;  i < SIZE(inst.words);  ++i) {
+    if (i > 0) out << ' ';
+    out << inst.words.at(i).original;
+  }
+  return out.str();
+}
diff --git a/subx/030check_operands.cc b/subx/031check_operands.cc
index 74a44740..882a91db 100644
--- a/subx/030check_operands.cc
+++ b/subx/031check_operands.cc
@@ -1,56 +1,5 @@
-//: Beginning of "level 2": tagging bytes with metadata around what field of
-//: an x86 instruction they're for.
-//:
-//: The x86 instruction set is variable-length, and how a byte is interpreted
-//: affects later instruction boundaries. A lot of the pain in programming machine code
-//: stems from computer and programmer going out of sync on what a byte
-//: means. The miscommunication is usually not immediately caught, and
-//: metastasizes at runtime into kilobytes of misinterpreted instructions.
-//: Tagging bytes with what the programmer expects them to be interpreted as
-//: helps the computer catch miscommunication immediately.
-//:
-//: This is one way SubX is going to be different from a 'language': we
-//: typically think of languages as less verbose than machine code. Here we're
-//: making machine code *more* verbose.
-//:
-//: ---
-//:
-//: While we're here, we'll also improve a couple of other things in level 2:
-//:
-//: a) Machine code often packs logically separate operands into bitfields of
-//: a single byte. In a later layer (pack_operands) we'll start writing out
-//: each operand separately, and the translator will construct the right bytes
-//: out of operands.
-//:
-//: SubX now gets still more verbose. What used to be a single byte, say 'c3',
-//: can now expand to '3/mod 0/subop 3/rm32'.
-//:
-//: b) Since each operand is tagged, we can loosen ordering restrictions and
-//: allow writing out the operands in any order, like keyword arguments.
-//:
-//: The actual opcodes (first 1-3 bytes of each instruction) will continue to
-//: be at the start of each line. The x86 instruction set is a mess, and
-//: opcodes often don't admit good names.
-:(before "End Transforms")
-// Begin Level-2 Transforms
-// End Level-2 Transforms
-
-:(before "End Help Texts")
-put(Help, "instructions",
-  "Each x86 instruction consists of an instruction or opcode and some number\n"
-  "of operands.\n"
-  "Each operand has a type. An instruction won't have more than one operand of\n"
-  "any type.\n"
-  "Each instruction has some set of allowed operand types. It'll reject others.\n"
-  "The complete list of operand types: mod, subop, r32 (register), rm32\n"
-  "(register or memory), scale, index, base, disp8, disp16, disp32, imm8,\n"
-  "imm32.\n"
-  "Each of these has its own help page. Try reading 'subx help mod' next.\n"
-);
-:(before "End Help Contents")
-cerr << "  instructions\n";
-
-//:: Check for 'syntax errors'; missing or unexpected operands.
+//: Since we're tagging operands with their types, let's start checking these
+//: operand types for each instruction.
 
 :(scenario check_missing_imm8_operand)
 % Hide_errors = true;
@@ -61,14 +10,13 @@ cerr << "  instructions\n";
   cd                                                                                                                                                # int ??
 +error: 'cd' (software interrupt): missing imm8 operand
 
-:(before "End Level-2 Transforms")
-Transform.push_back(check_operands);
+:(after "Pack Operands")
+check_operands(code);
+if (trace_contains_errors()) return;
 
 :(code)
-void check_operands(/*const*/ program& p) {
+void check_operands(const segment& code) {
   trace(99, "transform") << "-- check operands" << end();
-  if (p.segments.empty()) return;
-  const segment& code = p.segments.at(0);
   for (int i = 0;  i < SIZE(code.lines);  ++i) {
     check_operands(code.lines.at(i));
     if (trace_contains_errors()) return;  // stop at the first mal-formed instruction
@@ -303,16 +251,6 @@ string maybe_name(const word& op) {
   return " ("+get(name, op.data)+')';
 }
 
-bool is_hex_byte(const word& curr) {
-  if (contains_any_operand_metadata(curr))
-    return false;
-  if (SIZE(curr.data) != 2)
-    return false;
-  if (curr.data.find_first_not_of("0123456789abcdefABCDEF") != string::npos)
-    return false;
-  return true;
-}
-
 uint32_t compute_operand_bitvector(const line& inst) {
   uint32_t bitvector = 0;
   for (int i = /*skip op*/1;  i < SIZE(inst.words);  ++i) {
@@ -337,20 +275,6 @@ int first_operand(const line& inst) {
   return 1;
 }
 
-bool all_hex_bytes(const line& inst) {
-  for (int i = 0;  i < SIZE(inst.words);  ++i)
-    if (!is_hex_byte(inst.words.at(i)))
-      return false;
-  return true;
-}
-
-bool contains_any_operand_metadata(const word& word) {
-  for (int i = 0;  i < SIZE(word.metadata);  ++i)
-    if (Instruction_operands.find(word.metadata.at(i)) != Instruction_operands.end())
-      return true;
-  return false;
-}
-
 // Scan the metadata of 'w' and return the bit corresponding to any operand type.
 // Also raise an error if metadata contains multiple operand types.
 uint32_t bitvector_for_operand(const word& w) {
@@ -384,22 +308,6 @@ cd/software-interrupt 80/imm8/imm32
 81 0/add/subop       3/rm32/ebx 1/imm32
 +error: '81 0/add/subop 3/rm32/ebx 1/imm32' (combine rm32 with imm32 based on subop): missing mod operand
 
-:(before "End Globals")
-set<string> Instruction_operands;
-:(before "End One-time Setup")
-Instruction_operands.insert("subop");
-Instruction_operands.insert("mod");
-Instruction_operands.insert("rm32");
-Instruction_operands.insert("base");
-Instruction_operands.insert("index");
-Instruction_operands.insert("scale");
-Instruction_operands.insert("r32");
-Instruction_operands.insert("disp8");
-Instruction_operands.insert("disp16");
-Instruction_operands.insert("disp32");
-Instruction_operands.insert("imm8");
-Instruction_operands.insert("imm32");
-
 :(code)
 void check_operands_modrm(const line& inst, const word& op) {
   if (all_hex_bytes(inst)) return;  // deliberately programming in raw hex; we'll raise a warning elsewhere
@@ -458,42 +366,6 @@ void check_metadata_absent(const line& inst, const string& type, const word& op,
     raise << "'" << to_string(inst) << "' (" << get(name, op.data) << "): unexpected " << type << " operand (" << msg << ")\n" << end();
 }
 
-bool has_metadata(const line& inst, const string& m) {
-  bool result = false;
-  for (int i = 0;  i < SIZE(inst.words);  ++i) {
-    if (!has_metadata(inst.words.at(i), m)) continue;
-    if (result) {
-      raise << "'" << to_string(inst) << "' has conflicting " << m << " operands\n" << end();
-      return false;
-    }
-    result = true;
-  }
-  return result;
-}
-
-bool has_metadata(const word& w, const string& m) {
-  bool result = false;
-  bool metadata_found = false;
-  for (int i = 0;  i < SIZE(w.metadata);  ++i) {
-    const string& curr = w.metadata.at(i);
-    if (!contains_key(Instruction_operands, curr)) continue;  // ignore unrecognized metadata
-    if (metadata_found) {
-      raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
-      return false;
-    }
-    metadata_found = true;
-    result = (curr == m);
-  }
-  return result;
-}
-
-word metadata(const line& inst, const string& m) {
-  for (int i = 0;  i < SIZE(inst.words);  ++i)
-    if (has_metadata(inst.words.at(i), m))
-      return inst.words.at(i);
-  assert(false);
-}
-
 :(scenarios transform)
 :(scenario modrm_with_displacement)
 % Reg[EAX].u = 0x1;
@@ -503,16 +375,6 @@ word metadata(const line& inst, const string& m) {
 $error: 0
 :(scenarios run)
 
-//: helper for scenario
-:(code)
-void transform(const string& text_bytes) {
-  program p;
-  istringstream in(text_bytes);
-  parse(in, p);
-  if (trace_contains_errors()) return;
-  transform(p);
-}
-
 :(scenario conflicting_operands_in_modrm_instruction)
 % Hide_errors = true;
 == 0x1
@@ -634,15 +496,6 @@ void compare_bitvector_0f(const line& inst, uint8_t expected, const word& op) {
   // ignore settings in any unused bits
 }
 
-string to_string(const line& inst) {
-  ostringstream out;
-  for (int i = 0;  i < SIZE(inst.words);  ++i) {
-    if (i > 0) out << ' ';
-    out << inst.words.at(i).original;
-  }
-  return out.str();
-}
-
 string tolower(const char* s) {
   ostringstream out;
   for (/*nada*/;  *s;  ++s)
@@ -654,83 +507,5 @@ string tolower(const char* s) {
 #undef SET
 #undef CLEAR
 
-//:: docs on each operand type
-
-:(before "End Help Texts")
-init_operand_type_help();
-:(code)
-void init_operand_type_help() {
-  put(Help, "mod",
-    "2-bit operand controlling the _addressing mode_ of many instructions,\n"
-    "to determine how to compute the _effective address_ to look up memory at\n"
-    "based on the 'rm32' operand and potentially others.\n"
-    "\n"
-    "If mod = 3, just operate on the contents of the register specified by rm32\n"
-    "            (direct mode).\n"
-    "If mod = 2, effective address is usually* rm32 + disp32\n"
-    "            (indirect mode with displacement).\n"
-    "If mod = 1, effective address is usually* rm32 + disp8\n"
-    "            (indirect mode with displacement).\n"
-    "If mod = 0, effective address is usually* rm32 (indirect mode).\n"
-    "(* - The exception is when rm32 is '4'. Register 4 is the stack pointer (ESP).\n"
-    "     Using it as an address gets more involved. For more details,\n"
-    "     try reading the help pages for 'base', 'index' and 'scale'.)\n"
-    "\n"
-    "For complete details consult the IA-32 software developer's manual, table 2-2,\n"
-    "\"32-bit addressing forms with the ModR/M byte\".\n"
-    "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
-  );
-  put(Help, "subop",
-    "Additional 3-bit operand for determining the instruction when the opcode is 81, 8f or ff.\n"
-    "Can't coexist with operand of type 'r32' in a single instruction, because the two use the same bits.\n"
-  );
-  put(Help, "r32",
-    "3-bit operand specifying a register operand used directly, without any further addressing modes.\n"
-  );
-  put(Help, "rm32",
-    "3-bit operand specifying a register operand whose precise interpretation interacts with 'mod'.\n"
-    "For complete details consult the IA-32 software developer's manual, table 2-2,\n"
-    "\"32-bit addressing forms with the ModR/M byte\".\n"
-    "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
-  );
-  put(Help, "base",
-    "Additional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) specifying the register containing an address to look up.\n"
-    "This address may be further modified by 'index' and 'scale' operands.\n"
-    "  effective address = base + index*scale + displacement (disp8 or disp32)\n"
-    "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
-    "\"32-bit addressing forms with the SIB byte\".\n"
-    "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
-  );
-  put(Help, "index",
-    "Optional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be added to the 'base' operand to compute the 'effective address' at which to look up memory.\n"
-    "  effective address = base + index*scale + displacement (disp8 or disp32)\n"
-    "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
-    "\"32-bit addressing forms with the SIB byte\".\n"
-    "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
-  );
-  put(Help, "scale",
-    "Optional 2-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be multiplied to the 'index' operand before adding the result to the 'base' operand to compute the _effective address_ to operate on.\n"
-    "  effective address = base + index * scale + displacement (disp8 or disp32)\n"
-    "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
-    "\"32-bit addressing forms with the SIB byte\".\n"
-    "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
-  );
-  put(Help, "disp8",
-    "8-bit value to be added in many instructions.\n"
-  );
-  put(Help, "disp16",
-    "16-bit value to be added in many instructions.\n"
-  );
-  put(Help, "disp32",
-    "32-bit value to be added in many instructions.\n"
-  );
-  put(Help, "imm8",
-    "8-bit value for many instructions.\n"
-  );
-  put(Help, "imm32",
-    "32-bit value for many instructions.\n"
-  );
-}
-
 :(before "End Includes")
 #include<cctype>
diff --git a/subx/031check_operand_bounds.cc b/subx/032check_operand_bounds.cc
index b373a48b..7ed2732b 100644
--- a/subx/031check_operand_bounds.cc
+++ b/subx/032check_operand_bounds.cc
@@ -22,13 +22,12 @@ put(Operand_bound, "disp16", 1<<16);
 put(Operand_bound, "imm8", 1<<8);
 // no bound needed for imm32
 
-:(before "End Level-2 Transforms")
-Transform.push_back(check_operand_bounds);
+:(after "Pack Operands")
+check_operand_bounds(code);
+if (trace_contains_errors()) return;
 :(code)
-void check_operand_bounds(/*const*/ program& p) {
+void check_operand_bounds(const segment& code) {
   trace(99, "transform") << "-- check operand bounds" << end();
-  if (p.segments.empty()) return;
-  const segment& code = p.segments.at(0);
   for (int i = 0;  i < SIZE(code.lines);  ++i) {
     const line& inst = code.lines.at(i);
     for (int j = first_operand(inst);  j < SIZE(inst.words);  ++j)
@@ -53,22 +52,3 @@ void check_operand_bounds(const word& w) {
     }
   }
 }
-
-bool is_hex_int(const string& s) {
-  if (s.empty()) return false;
-  size_t pos = 0;
-  if (s.at(0) == '-' || s.at(0) == '+') pos++;
-  if (s.substr(pos, pos+2) == "0x") pos += 2;
-  return s.find_first_not_of("0123456789abcdefABCDEF", pos) == string::npos;
-}
-
-int32_t parse_int(const string& s) {
-  istringstream in(s);
-  int32_t result = 0;
-  in >> std::hex >> result;
-  if (!in || !in.eof()) {
-    raise << "not a number: " << s << '\n' << end();
-    return 0;
-  }
-  return result;
-}
diff --git a/subx/032pack_operands.cc b/subx/032pack_operands.cc
deleted file mode 100644
index 771e063a..00000000
--- a/subx/032pack_operands.cc
+++ /dev/null
@@ -1,205 +0,0 @@
-//: Operands can refer to bitfields smaller than a byte. This layer packs
-//: operands into their containing bytes in the right order.
-
-:(scenario pack_immediate_constants)
-== 0x1
-# instruction                     effective address                                                   operand     displacement    immediate
-# op          subop               mod             rm32          base        index         scale       r32
-# 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
-  bb                                                                                                                              0x2a/imm32        # copy 42 to EBX
-+transform: packing instruction 'bb 0x2a/imm32'
-+transform: instruction after packing: 'bb 2a 00 00 00'
-+run: copy imm32 0x0000002a to EBX
-
-:(scenario pack_disp8)
-== 0x1
-74 2/disp8  # jump 2 bytes away if ZF is set
-+transform: packing instruction '74 2/disp8'
-+transform: instruction after packing: '74 02'
-
-:(scenarios transform)
-:(scenario pack_disp8_negative)
-== 0x1
-# running this will cause an infinite loop
-74 -1/disp8  # jump 1 byte before if ZF is set
-+transform: packing instruction '74 -1/disp8'
-+transform: instruction after packing: '74 ff'
-:(scenarios run)
-
-:(scenario pack_modrm_imm32)
-== 0x1
-# instruction                     effective address                                                   operand     displacement    immediate
-# op          subop               mod             rm32          base        index         scale       r32
-# 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
-  81          0/add/subop         3/mod/direct    3/ebx/rm32                                                                      1/imm32           # add 1 to EBX
-+transform: packing instruction '81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32'
-+transform: instruction after packing: '81 c3 01 00 00 00'
-
-:(scenario pack_imm32_large)
-== 0x1
-b9 0x080490a7/imm32  # copy to ECX
-+transform: packing instruction 'b9 0x080490a7/imm32'
-+transform: instruction after packing: 'b9 a7 90 04 08'
-
-:(before "End Level-2 Transforms")
-Transform.push_back(pack_operands);
-
-:(code)
-void pack_operands(program& p) {
-  trace(99, "transform") << "-- pack operands" << end();
-  if (p.segments.empty()) return;
-  segment& code = p.segments.at(0);
-  for (int i = 0;  i < SIZE(code.lines);  ++i) {
-    line& inst = code.lines.at(i);
-    if (all_hex_bytes(inst)) continue;
-    trace(99, "transform") << "packing instruction '" << to_string(/*with metadata*/inst) << "'" << end();
-    pack_operands(inst);
-    trace(99, "transform") << "instruction after packing: '" << to_string(/*without metadata*/inst.words) << "'" << end();
-  }
-}
-
-void pack_operands(line& inst) {
-  line new_inst;
-  add_opcodes(inst, new_inst);
-  add_modrm_byte(inst, new_inst);
-  add_sib_byte(inst, new_inst);
-  add_disp_bytes(inst, new_inst);
-  add_imm_bytes(inst, new_inst);
-  inst.words.swap(new_inst.words);
-}
-
-void add_opcodes(const line& in, line& out) {
-  out.words.push_back(in.words.at(0));
-  if (in.words.at(0).data == "0f" || in.words.at(0).data == "f3")
-    out.words.push_back(in.words.at(1));
-  if (in.words.at(0).data == "f3" && in.words.at(1).data == "0f")
-    out.words.push_back(in.words.at(2));
-}
-
-void add_modrm_byte(const line& in, line& out) {
-  uint8_t mod=0, reg_subop=0, rm32=0;
-  bool emit = false;
-  for (int i = 0;  i < SIZE(in.words);  ++i) {
-    const word& curr = in.words.at(i);
-    if (has_metadata(curr, "mod")) {
-      mod = hex_byte(curr.data);
-      emit = true;
-    }
-    else if (has_metadata(curr, "rm32")) {
-      rm32 = hex_byte(curr.data);
-      emit = true;
-    }
-    else if (has_metadata(curr, "r32")) {
-      reg_subop = hex_byte(curr.data);
-      emit = true;
-    }
-    else if (has_metadata(curr, "subop")) {
-      reg_subop = hex_byte(curr.data);
-      emit = true;
-    }
-  }
-  if (emit)
-    out.words.push_back(hex_byte_text((mod << 6) | (reg_subop << 3) | rm32));
-}
-
-void add_sib_byte(const line& in, line& out) {
-  uint8_t scale=0, index=0, base=0;
-  bool emit = false;
-  for (int i = 0;  i < SIZE(in.words);  ++i) {
-    const word& curr = in.words.at(i);
-    if (has_metadata(curr, "scale")) {
-      scale = hex_byte(curr.data);
-      emit = true;
-    }
-    else if (has_metadata(curr, "index")) {
-      index = hex_byte(curr.data);
-      emit = true;
-    }
-    else if (has_metadata(curr, "base")) {
-      base = hex_byte(curr.data);
-      emit = true;
-    }
-  }
-  if (emit)
-    out.words.push_back(hex_byte_text((scale << 6) | (index << 3) | base));
-}
-
-void add_disp_bytes(const line& in, line& out) {
-  for (int i = 0;  i < SIZE(in.words);  ++i) {
-    const word& curr = in.words.at(i);
-    if (has_metadata(curr, "disp8"))
-      emit_hex_bytes(out, curr, 1);
-    if (has_metadata(curr, "disp16"))
-      emit_hex_bytes(out, curr, 2);
-    else if (has_metadata(curr, "disp32"))
-      emit_hex_bytes(out, curr, 4);
-  }
-}
-
-void add_imm_bytes(const line& in, line& out) {
-  for (int i = 0;  i < SIZE(in.words);  ++i) {
-    const word& curr = in.words.at(i);
-    if (has_metadata(curr, "imm8"))
-      emit_hex_bytes(out, curr, 1);
-    else if (has_metadata(curr, "imm32"))
-      emit_hex_bytes(out, curr, 4);
-  }
-}
-
-void emit_hex_bytes(line& out, const word& w, int num) {
-  assert(num <= 4);
-  if (!is_hex_int(w.data)) {
-    out.words.push_back(w);
-    return;
-  }
-  emit_hex_bytes(out, static_cast<uint32_t>(parse_int(w.data)), num);
-}
-
-void emit_hex_bytes(line& out, uint32_t val, int num) {
-  assert(num <= 4);
-  for (int i = 0;  i < num;  ++i) {
-    out.words.push_back(hex_byte_text(val & 0xff));
-    val = val >> 8;
-  }
-}
-
-word hex_byte_text(uint8_t val) {
-  ostringstream out;
-  out << HEXBYTE << NUM(val);
-  word result;
-  result.data = out.str();
-  result.original = out.str()+"/auto";
-  return result;
-}
-
-string to_string(const vector<word>& in) {
-  ostringstream out;
-  for (int i = 0;  i < SIZE(in);  ++i) {
-    if (i > 0) out << ' ';
-    out << in.at(i).data;
-  }
-  return out.str();
-}
-
-:(scenario pack_immediate_constants_hex)
-== 0x1
-# instruction                     effective address                                                   operand     displacement    immediate
-# op          subop               mod             rm32          base        index         scale       r32
-# 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
-  bb                                                                                                                              0x2a/imm32        # copy 42 to EBX
-+transform: packing instruction 'bb 0x2a/imm32'
-+transform: instruction after packing: 'bb 2a 00 00 00'
-+run: copy imm32 0x0000002a to EBX
-
-:(scenarios transform)
-:(scenario pack_silently_ignores_non_hex)
-== 0x1
-# instruction                     effective address                                                   operand     displacement    immediate
-# op          subop               mod             rm32          base        index         scale       r32
-# 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
-  bb                                                                                                                              foo/imm32         # copy foo to EBX
-+transform: packing instruction 'bb foo/imm32'
-# no change (we're just not printing metadata to the trace)
-+transform: instruction after packing: 'bb foo'
-$error: 0
-:(scenarios run)
diff --git a/subx/033non_code_segment.cc b/subx/033non_code_segment.cc
index 88b27d3b..829a61ca 100644
--- a/subx/033non_code_segment.cc
+++ b/subx/033non_code_segment.cc
@@ -8,10 +8,11 @@ cd 0x80/imm8
 cd 12/imm8
 +error: 12/imm8: metadata imm8 is only allowed in the (first) code segment
 
-:(before "End Level-2 Transforms")
-Transform.push_back(ensure_operands_only_in_code_segments);
+:(after "Pack Operands")
+ensure_operands_only_in_code_segments(p);
+if (trace_contains_errors()) return;
 :(code)
-void ensure_operands_only_in_code_segments(/*const*/ program& p) {
+void ensure_operands_only_in_code_segments(const program& p) {
   trace(99, "transform") << "-- ensure operands only in code segments" << end();
   if (p.segments.empty()) return;
   for (int i = /*skip code segment*/1;  i < SIZE(p.segments);  ++i) {