about summary refs log tree commit diff stats
path: root/arc/.traces/init-tagged-value
blob: af322df4811bb57d02530534b1d3c5cbf577eb9b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
c{0: 0 (((1 integer)) <- ((copy)) ((34 literal))) -- nil
c{0: 1 (((2 tagged-value-address)) <- ((init-tagged-value)) ((integer literal)) ((1 integer))) -- nil
c{0: 2 (((3 integer)) ((4 boolean)) <- ((maybe-coerce)) ((2 tagged-value-address) (deref)) ((integer literal))) -- nil
c{1: 0 ✓ (((1 integer)) <- ((copy)) ((34 literal)))
c{1: 1 ✓ (((2 tagged-value-address)) <- ((init-tagged-value)) ((integer literal)) ((1 integer)))
c{1: 2 ✓ (((3 integer)) ((4 boolean)) <- ((maybe-coerce)) ((2 tagged-value-address) (deref)) ((integer literal)))
cn0: convert-names in main
cn0: (((1 integer)) <- ((copy)) ((34 literal))) nil nil
cn0: checking arg ((34 literal))
cn0: checking oarg ((1 integer))
maybe-add: ((1 integer))
cn0: (((2 tagged-value-address)) <- ((init-tagged-value)) ((integer literal)) ((1 integer))) nil nil
cn0: checking arg ((integer literal))
cn0: checking arg ((1 integer))
maybe-add: ((1 integer))
cn0: checking oarg ((2 tagged-value-address))
maybe-add: ((2 tagged-value-address))
cn0: (((3 integer)) ((4 boolean)) <- ((maybe-coerce)) ((2 tagged-value-address) (deref)) ((integer literal))) nil nil
cn0: checking arg ((2 tagged-value-address) (deref))
maybe-add: ((2 tagged-value-address) (deref))
cn0: checking arg ((integer literal))
cn0: checking oarg ((3 integer))
maybe-add: ((3 integer))
cn0: checking oarg ((4 boolean))
maybe-add: ((4 boolean))
cn1: (((1 integer)) <- ((copy)) ((34 literal)))
cn1: (((2 tagged-value-address)) <- ((init-tagged-value)) ((integer literal)) ((1 integer)))
cn1: (((3 integer)) ((4 boolean)) <- ((maybe-coerce)) ((2 tagged-value-address) (deref)) ((integer literal)))
schedule: main
run: main 0: (((1 integer)) <- ((copy)) ((34 literal)))
run: main 0: 34 => ((1 integer))
mem: ((1 integer)): 1 <= 34
run: main 1: (((2 tagged-value-address)) <- ((init-tagged-value)) ((integer literal)) ((1 integer)))
mem: ((1 integer)) => 34
run: init-tagged-value/main 0: (((default-space space-address)) <- ((new)) ((space literal)) ((30 literal)))
run: init-tagged-value/main 0: 1000 => ((default-space space-address))
run: init-tagged-value/main 1: (((1 type)) <- ((next-input)))
arg: nil 0 (integer 34)
run: init-tagged-value/main 1: integer => ((1 type))
mem: ((1 type)): 1002 <= integer
run: init-tagged-value/main 2: (((2 integer)) <- ((sizeof)) ((1 type)))
mem: ((1 type)) => integer
run: init-tagged-value/main 2: 1 => ((2 integer))
mem: ((2 integer)): 1003 <= 1
run: init-tagged-value/main 3: (((3 boolean)) <- ((equal)) ((2 integer)) ((1 literal)))
mem: ((2 integer)) => 1
run: init-tagged-value/main 3: t => ((3 boolean))
mem: ((3 boolean)): 1004 <= t
run: init-tagged-value/main 4: (((assert)) ((3 boolean)))
mem: ((3 boolean)) => t
run: init-tagged-value/main 5: (((4 tagged-value-address)) <- ((new)) ((tagged-value literal)))
run: init-tagged-value/main 5: 1031 => ((4 tagged-value-address))
mem: ((4 tagged-value-address)): 1005 <= 1031
run: init-tagged-value/main 6: (((5 location)) <- ((get-address)) ((4 tagged-value-address) (deref)) ((0 offset)))
run: init-tagged-value/main 6: 1031 => ((5 location))
mem: ((5 location)): 1006 <= 1031
run: init-tagged-value/main 7: (((5 location) (deref)) <- ((copy)) ((1 type)))
mem: ((1 type)) => integer
run: init-tagged-value/main 7: integer => ((5 location) (deref))
mem: ((5 location) (deref)): 1031 <= integer
run: init-tagged-value/main 8: (((6 location)) <- ((get-address)) ((4 tagged-value-address) (deref)) ((1 offset)))
run: init-tagged-value/main 8: 1032 => ((6 location))
mem: ((6 location)): 1007 <= 1032
run: init-tagged-value/main 9: (((6 location) (deref)) <- ((next-input)))
arg: nil 1 (integer 34)
run: init-tagged-value/main 9: 34 => ((6 location) (deref))
mem: ((6 location) (deref)): 1032 <= 34
run: init-tagged-value/main 10: (((reply)) ((4 tagged-value-address)))
mem: ((4 tagged-value-address)) => 1031
run: main 1: 1031 => ((2 tagged-value-address))
mem: ((2 tagged-value-address)): 2 <= 1031
run: main 2: (((3 integer)) ((4 boolean)) <- ((maybe-coerce)) ((2 tagged-value-address) (deref)) ((integer literal)))
mem: ((2 tagged-value-address) (deref)) => #(tagged record (integer 34 . nil))
run: maybe-coerce/main 0: (((default-space space-address)) <- ((new)) ((space literal)) ((30 literal)))
run: maybe-coerce/main 0: 1033 => ((default-space space-address))
run: maybe-coerce/main 1: (((1 tagged-value-address)) <- ((new)) ((tagged-value literal)))
run: maybe-coerce/main 1: 1064 => ((1 tagged-value-address))
mem: ((1 tagged-value-address)): 1035 <= 1064
run: maybe-coerce/main 2: (((1 tagged-value-address) (deref)) <- ((next-input)))
arg: nil 0 (#(tagged record (integer 34 . nil)) integer)
run: maybe-coerce/main 2: #(tagged record (integer 34 . nil)) => ((1 tagged-value-address) (deref))
mem: ((1 tagged-value-address) (deref)): 1064 <= integer
mem: ((1 tagged-value-address) (deref)): 1065 <= 34
run: maybe-coerce/main 3: (((2 type)) <- ((next-input)))
arg: nil 1 (#(tagged record (integer 34 . nil)) integer)
run: maybe-coerce/main 3: integer => ((2 type))
mem: ((2 type)): 1036 <= integer
run: maybe-coerce/main 4: (((3 type)) <- ((get)) ((1 tagged-value-address) (deref)) ((0 offset)))
mem: ((1064 type) (raw)) => integer
run: maybe-coerce/main 4: integer => ((3 type))
mem: ((3 type)): 1037 <= integer
run: maybe-coerce/main 5: (((4 boolean)) <- ((equal)) ((3 type)) ((2 type)))
mem: ((3 type)) => integer
mem: ((2 type)) => integer
run: maybe-coerce/main 5: t => ((4 boolean))
mem: ((4 boolean)): 1038 <= t
run: maybe-coerce/main 6: (((jump-if)) ((4 boolean)) ((1 offset)))
mem: ((4 boolean)) => t
run: maybe-coerce/main 8: (((5 location)) <- ((get)) ((1 tagged-value-address) (deref)) ((1 offset)))
mem: ((1065 location) (raw)) => 34
run: maybe-coerce/main 8: 34 => ((5 location))
mem: ((5 location)): 1039 <= 34
run: maybe-coerce/main 9: (((reply)) ((5 location)) ((4 boolean)))
mem: ((5 location)) => 34
mem: ((4 boolean)) => t
run: main 2: 34 => ((3 integer))
mem: ((3 integer)): 3 <= 34
run: main 2: t => ((4 boolean))
mem: ((4 boolean)): 4 <= t
schedule: done with routine nil
#888888 } /* Comment.PreprocFile */ .highlight .c1 { color: #888888 } /* Comment.Single */ .highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */ .highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */ .highlight .ge { font-style: italic } /* Generic.Emph */ .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ .highlight .gr { color: #aa0000 } /* Generic.Error */ .highlight .gh { color: #333333 } /* Generic.Heading */ .highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */ .highlight .go { color: #888888 } /* Generic.Output */ .highlight .gp { color: #555555 } /* Generic.Prompt */ .highlight .gs { font-weight: bold } /* Generic.Strong */ .highlight .gu { color: #666666 } /* Generic.Subheading */ .highlight .gt { color: #aa0000 } /* Generic.Traceback */ .highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */ .highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */ .highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */ .highlight .kp { color: #008800 } /* Keyword.Pseudo */ .highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */ .highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */ .highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */ .highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */ .highlight .na { color: #336699 } /* Name.Attribute */ .highlight .nb { color: #003388 } /* Name.Builtin */ .highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */ .highlight .no { color: #003366; font-weight: bold } /* Name.Constant */ .highlight .nd { color: #555555 } /* Name.Decorator */ .highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */ .highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */ .highlight .nl { color: #336699; font-style: italic } /* Name.Label */ .highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */ .highlight .py { color: #336699; font-weight: bold } /* Name.Property */ .highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */ .highlight .nv { color: #336699 } /* Name.Variable */ .highlight .ow { color: #008800 } /* Operator.Word */ .highlight .w { color: #bbbbbb } /* Text.Whitespace */ .highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */ .highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */ .highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */ .highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */ .highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */ .highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */ .highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */ .highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */ .highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */ .highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */ .highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */ .highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */ .highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */ .highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */ .highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */ .highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */ .highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */ .highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */ .highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */ .highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */ .highlight .vc { color: #336699 } /* Name.Variable.Class */ .highlight .vg { color: #dd7700 } /* Name.Variable.Global */ .highlight .vi { color: #3333bb } /* Name.Variable.Instance */ .highlight .vm { color: #336699 } /* Name.Variable.Magic */ .highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */
//: Beginning of "level 2": tagging bytes with metadata around what field of
//: an x86 instruction they're for.
//:
//: The x86 instruction set is variable-length, and how a byte is interpreted
//: affects later instruction boundaries. A lot of the pain in programming
//: machine code stems from computer and programmer going out of sync on what
//: a byte means. The miscommunication is usually not immediately caught, and
//: metastasizes at runtime into kilobytes of misinterpreted instructions.
//:
//: To mitigate these issues, we'll start programming in terms of logical
//: operands rather than physical bytes. Some operands are smaller than a
//: byte, and others may consist of multiple bytes. This layer will correctly
//: pack and order the bytes corresponding to the operands in an instruction.

:(before "End Help Texts")
put(Help, "instructions",
  "Each x86 instruction consists of an instruction or opcode and some number\n"
  "of operands.\n"
  "Each operand has a type. An instruction won't have more than one operand of\n"
  "any type.\n"
  "Each instruction has some set of allowed operand types. It'll reject others.\n"
  "The complete list of operand types: mod, subop, r32 (register), rm32\n"
  "(register or memory), scale, index, base, disp8, disp16, disp32, imm8,\n"
  "imm32.\n"
  "Each of these has its own help page. Try reading 'subx help mod' next.\n"
);
:(before "End Help Contents")
cerr << "  instructions\n";

:(scenario pack_immediate_constants)
== 0x1
bb  0x2a/imm32
+transform: packing instruction 'bb 0x2a/imm32'
+transform: instruction after packing: 'bb 2a 00 00 00'
+run: copy imm32 0x0000002a to EBX

//: complete set of valid operand types

:(before "End Globals")
set<string> Instruction_operands;
:(before "End One-time Setup")
Instruction_operands.insert("subop");
Instruction_operands.insert("mod");
Instruction_operands.insert("rm32");
Instruction_operands.insert("base");
Instruction_operands.insert("index");
Instruction_operands.insert("scale");
Instruction_operands.insert("r32");
Instruction_operands.insert("disp8");
Instruction_operands.insert("disp16");
Instruction_operands.insert("disp32");
Instruction_operands.insert("imm8");
Instruction_operands.insert("imm32");

:(before "End Help Texts")
init_operand_type_help();
:(code)
void init_operand_type_help() {
  put(Help, "mod",
    "2-bit operand controlling the _addressing mode_ of many instructions,\n"
    "to determine how to compute the _effective address_ to look up memory at\n"
    "based on the 'rm32' operand and potentially others.\n"
    "\n"
    "If mod = 3, just operate on the contents of the register specified by rm32\n"
    "            (direct mode).\n"
    "If mod = 2, effective address is usually* rm32 + disp32\n"
    "            (indirect mode with displacement).\n"
    "If mod = 1, effective address is usually* rm32 + disp8\n"
    "            (indirect mode with displacement).\n"
    "If mod = 0, effective address is usually* rm32 (indirect mode).\n"
    "(* - The exception is when rm32 is '4'. Register 4 is the stack pointer (ESP).\n"
    "     Using it as an address gets more involved. For more details,\n"
    "     try reading the help pages for 'base', 'index' and 'scale'.)\n"
    "\n"
    "For complete details consult the IA-32 software developer's manual, table 2-2,\n"
    "\"32-bit addressing forms with the ModR/M byte\".\n"
    "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
  );
  put(Help, "subop",
    "Additional 3-bit operand for determining the instruction when the opcode is 81, 8f or ff.\n"
    "Can't coexist with operand of type 'r32' in a single instruction, because the two use the same bits.\n"
  );
  put(Help, "r32",
    "3-bit operand specifying a register operand used directly, without any further addressing modes.\n"
  );
  put(Help, "rm32",
    "32-bit value in register or memory. The precise details of its construction depend on the eponymous 3-bit\n"
    "'rm32' operand, the 'mod' operand, and also potentially the 'SIB' operands ('scale', 'index' and 'base')\n"
    "and a displacement ('disp8' or 'disp32').\n"
    "For complete details consult the IA-32 software developer's manual, table 2-2,\n"
    "\"32-bit addressing forms with the ModR/M byte\".\n"
    "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
  );
  put(Help, "base",
    "Additional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) specifying the register containing an address to look up.\n"
    "This address may be further modified by 'index' and 'scale' operands.\n"
    "  effective address = base + index*scale + displacement (disp8 or disp32)\n"
    "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
    "\"32-bit addressing forms with the SIB byte\".\n"
    "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
  );
  put(Help, "index",
    "Optional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be added to the 'base' operand to compute the 'effective address' at which to look up memory.\n"
    "  effective address = base + index*scale + displacement (disp8 or disp32)\n"
    "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
    "\"32-bit addressing forms with the SIB byte\".\n"
    "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
  );
  put(Help, "scale",
    "Optional 2-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be multiplied to the 'index' operand before adding the result to the 'base' operand to compute the _effective address_ to operate on.\n"
    "  effective address = base + index * scale + displacement (disp8 or disp32)\n"
    "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
    "\"32-bit addressing forms with the SIB byte\".\n"
    "  https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
  );
  put(Help, "disp8",
    "8-bit value to be added in many instructions.\n"
  );
  put(Help, "disp16",
    "16-bit value to be added in many instructions.\n"
  );
  put(Help, "disp32",
    "32-bit value to be added in many instructions.\n"
  );
  put(Help, "imm8",
    "8-bit value for many instructions.\n"
  );
  put(Help, "imm32",
    "32-bit value for many instructions.\n"
  );
}

//:: transform packing operands into bytes in the right order

:(after "Begin Transforms")
// Begin Level-2 Transforms
Transform.push_back(pack_operands);
// End Level-2 Transforms

:(code)
void pack_operands(program& p) {
  if (p.segments.empty()) return;
  segment& code = p.segments.at(0);
  // Pack Operands(segment code)
  trace(99, "transform") << "-- pack operands" << end();
  for (int i = 0;  i < SIZE(code.lines);  ++i) {
    line& inst = code.lines.at(i);
    if (all_hex_bytes(inst)) continue;
    trace(99, "transform") << "packing instruction '" << to_string(/*with metadata*/inst) << "'" << end();
    pack_operands(inst);
    trace(99, "transform") << "instruction after packing: '" << to_string(/*without metadata*/inst.words) << "'" << end();
  }
}

void pack_operands(line& inst) {
  line new_inst;
  add_opcodes(inst, new_inst);
  add_modrm_byte(inst, new_inst);
  add_sib_byte(inst, new_inst);
  add_disp_bytes(inst, new_inst);
  add_imm_bytes(inst, new_inst);
  inst.words.swap(new_inst.words);
}

void add_opcodes(const line& in, line& out) {
  out.words.push_back(in.words.at(0));
  if (in.words.at(0).data == "0f" || in.words.at(0).data == "f2" || in.words.at(0).data == "f3")
    out.words.push_back(in.words.at(1));
  if (in.words.at(0).data == "f3" && in.words.at(1).data == "0f")
    out.words.push_back(in.words.at(2));
  if (in.words.at(0).data == "f2" && in.words.at(1).data == "0f")
    out.words.push_back(in.words.at(2));
}

void add_modrm_byte(const line& in, line& out) {
  uint8_t mod=0, reg_subop=0, rm32=0;
  bool emit = false;
  for (int i = 0;  i < SIZE(in.words);  ++i) {
    const word& curr = in.words.at(i);
    if (has_operand_metadata(curr, "mod")) {
      mod = hex_byte(curr.data);
      emit = true;
    }
    else if (has_operand_metadata(curr, "rm32")) {
      rm32 = hex_byte(curr.data);
      emit = true;
    }
    else if (has_operand_metadata(curr, "r32")) {
      reg_subop = hex_byte(curr.data);
      emit = true;
    }
    else if (has_operand_metadata(curr, "subop")) {
      reg_subop = hex_byte(curr.data);
      emit = true;
    }
  }
  if (emit)
    out.words.push_back(hex_byte_text((mod << 6) | (reg_subop << 3) | rm32));
}

void add_sib_byte(const line& in, line& out) {
  uint8_t scale=0, index=0, base=0;
  bool emit = false;
  for (int i = 0;  i < SIZE(in.words);  ++i) {
    const word& curr = in.words.at(i);
    if (has_operand_metadata(curr, "scale")) {
      scale = hex_byte(curr.data);
      emit = true;
    }
    else if (has_operand_metadata(curr, "index")) {
      index = hex_byte(curr.data);
      emit = true;
    }
    else if (has_operand_metadata(curr, "base")) {
      base = hex_byte(curr.data);
      emit = true;
    }
  }
  if (emit)
    out.words.push_back(hex_byte_text((scale << 6) | (index << 3) | base));
}

void add_disp_bytes(const line& in, line& out) {
  for (int i = 0;  i < SIZE(in.words);  ++i) {
    const word& curr = in.words.at(i);
    if (has_operand_metadata(curr, "disp8"))
      emit_hex_bytes(out, curr, 1);
    if (has_operand_metadata(curr, "disp16"))
      emit_hex_bytes(out, curr, 2);
    else if (has_operand_metadata(curr, "disp32"))
      emit_hex_bytes(out, curr, 4);
  }
}

void add_imm_bytes(const line& in, line& out) {
  for (int i = 0;  i < SIZE(in.words);  ++i) {
    const word& curr = in.words.at(i);
    if (has_operand_metadata(curr, "imm8"))
      emit_hex_bytes(out, curr, 1);
    else if (has_operand_metadata(curr, "imm32"))
      emit_hex_bytes(out, curr, 4);
  }
}

void emit_hex_bytes(line& out, const word& w, int num) {
  assert(num <= 4);
  bool is_number = looks_like_hex_int(w.data);
  if (num == 1 || !is_number) {
    out.words.push_back(w);  // preserve existing metadata
    if (is_number)
      out.words.back().data = hex_byte_to_string(parse_int(w.data));
    return;
  }
  emit_hex_bytes(out, static_cast<uint32_t>(parse_int(w.data)), num);
}

void emit_hex_bytes(line& out, uint32_t val, int num) {
  assert(num <= 4);
  for (int i = 0;  i < num;  ++i) {
    out.words.push_back(hex_byte_text(val & 0xff));
    val = val >> 8;
  }
}

word hex_byte_text(uint8_t val) {
  word result;
  result.data = hex_byte_to_string(val);
  result.original = result.data+"/auto";
  return result;
}

string hex_byte_to_string(uint8_t val) {
  ostringstream out;
  // uint8_t prints without padding, but int8_t will expand to 32 bits again
  out << HEXBYTE << NUM(val);
  return out.str();
}

string to_string(const vector<word>& in) {
  ostringstream out;
  for (int i = 0;  i < SIZE(in);  ++i) {
    if (i > 0) out << ' ';
    out << in.at(i).data;
  }
  return out.str();
}

:(before "End Unit Tests")
void test_preserve_metadata_when_emitting_single_byte() {
  word in;
  in.data = "f0";
  in.original = "f0/foo";
  line out;
  emit_hex_bytes(out, in, 1);
  CHECK_EQ(out.words.at(0).data, "f0");
  CHECK_EQ(out.words.at(0).original, "f0/foo");
}

:(scenario pack_disp8)
== 0x1
74 2/disp8  # jump 2 bytes away if ZF is set
+transform: packing instruction '74 2/disp8'
+transform: instruction after packing: '74 02'

:(scenarios transform)
:(scenario pack_disp8_negative)
== 0x1
# running this will cause an infinite loop
74 -1/disp8  # jump 1 byte before if ZF is set
+transform: packing instruction '74 -1/disp8'
+transform: instruction after packing: '74 ff'
:(scenarios run)

//: helper for scenario
:(code)
void transform(const string& text_bytes) {
  program p;
  istringstream in(text_bytes);
  parse(in, p);
  if (trace_contains_errors()) return;
  transform(p);
}

:(scenario pack_modrm_imm32)
== 0x1
# instruction                     effective address                                                   operand     displacement    immediate
# op          subop               mod             rm32          base        index         scale       r32
# 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
  81          0/add/subop         3/mod/direct    3/ebx/rm32                                                                      1/imm32           # add 1 to EBX
+transform: packing instruction '81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32'
+transform: instruction after packing: '81 c3 01 00 00 00'

:(scenario pack_imm32_large)
== 0x1
b9  0x080490a7/imm32
+transform: packing instruction 'b9 0x080490a7/imm32'
+transform: instruction after packing: 'b9 a7 90 04 08'

:(scenario pack_immediate_constants_hex)
== 0x1
b9  0x2a/imm32
+transform: packing instruction 'b9 0x2a/imm32'
+transform: instruction after packing: 'b9 2a 00 00 00'
+run: copy imm32 0x0000002a to ECX

:(scenarios transform)
:(scenario pack_silently_ignores_non_hex)
% Hide_errors = true;
== 0x1
b9  foo/imm32
+transform: packing instruction 'b9 foo/imm32'
# no change (we're just not printing metadata to the trace)
+transform: instruction after packing: 'b9 foo'
:(scenarios run)

:(scenario pack_flags_bad_hex)
% Hide_errors = true;
== 0x1
b9  0xfoo/imm32
+error: not a number: 0xfoo

//:: helpers

:(code)
bool all_hex_bytes(const line& inst) {
  for (int i = 0;  i < SIZE(inst.words);  ++i)
    if (!is_hex_byte(inst.words.at(i)))
      return false;
  return true;
}

bool is_hex_byte(const word& curr) {
  if (contains_any_operand_metadata(curr))
    return false;
  if (SIZE(curr.data) != 2)
    return false;
  if (curr.data.find_first_not_of("0123456789abcdefABCDEF") != string::npos)
    return false;
  return true;
}

bool contains_any_operand_metadata(const word& word) {
  for (int i = 0;  i < SIZE(word.metadata);  ++i)
    if (Instruction_operands.find(word.metadata.at(i)) != Instruction_operands.end())
      return true;
  return false;
}

bool has_operand_metadata(const line& inst, const string& m) {
  bool result = false;
  for (int i = 0;  i < SIZE(inst.words);  ++i) {
    if (!has_operand_metadata(inst.words.at(i), m)) continue;
    if (result) {
      raise << "'" << to_string(inst) << "' has conflicting " << m << " operands\n" << end();
      return false;
    }
    result = true;
  }
  return result;
}

bool has_operand_metadata(const word& w, const string& m) {
  bool result = false;
  bool metadata_found = false;
  for (int i = 0;  i < SIZE(w.metadata);  ++i) {
    const string& curr = w.metadata.at(i);
    if (Instruction_operands.find(curr) == Instruction_operands.end()) continue;  // ignore unrecognized metadata
    if (metadata_found) {
      raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
      return false;
    }
    metadata_found = true;
    result = (curr == m);
  }
  return result;
}

word metadata(const line& inst, const string& m) {
  for (int i = 0;  i < SIZE(inst.words);  ++i)
    if (has_operand_metadata(inst.words.at(i), m))
      return inst.words.at(i);
  assert(false);
}

bool looks_like_hex_int(const string& s) {
  if (s.empty()) return false;
  if (s.at(0) == '-' || s.at(0) == '+') return true;
  if (isdigit(s.at(0))) return true;  // includes '0x' prefix
  // End looks_like_hex_int(s) Detectors
  return false;
}

:(code)
string to_string(const line& inst) {
  ostringstream out;
  for (int i = 0;  i < SIZE(inst.words);  ++i) {
    if (i > 0) out << ' ';
    out << inst.words.at(i).original;
  }
  return out.str();
}