https://github.com/akkartik/mu/blob/main/linux/bootstrap/032operands.cc
  1 //: Metadata for fields of an x86 instruction.
  2 //:
  3 //: The x86 instruction set is variable-length, and how a byte is interpreted
  4 //: affects later instruction boundaries. A lot of the pain in programming
  5 //: machine code stems from computer and programmer going out of sync on what
  6 //: a byte means. The miscommunication is usually not immediately caught, and
  7 //: metastasizes at runtime into kilobytes of misinterpreted instructions.
  8 //:
  9 //: To mitigate these issues, we'll start programming in terms of logical
 10 //: arguments rather than physical bytes. Some arguments are smaller than a
 11 //: byte, and others may consist of multiple bytes. This layer will correctly
 12 //: pack and order the bytes corresponding to the arguments in an instruction.
 13 
 14 :(before "End Help Texts")
 15 put_new(Help, "instructions",
 16   "Each x86 instruction consists of an instruction or opcode and some number\n"
 17   "of arguments.\n"
 18   "Each argument has a type. An instruction won't have more than one argument of\n"
 19   "any type.\n"
 20   "Each instruction has some set of allowed argument types. It'll reject others.\n"
 21   "The complete list of argument types: mod, subop, r32 (integer register),\n"
 22   "rm32 (integer register or memory), x32 (floating point register),\n"
 23   "xm32 (floating point register or memory), scale, index, base, disp8, disp16,\n"
 24   "disp32,imm8,imm32.\n"
 25   "Each of these has its own help page. Try reading 'bootstrap help mod' next.\n"
 26 );
 27 :(before "End Help Contents")
 28 cerr << "  instructions\n";
 29 
 30 :(before "Running Test Program")
 31 transform(p);
 32 if (trace_contains_errors()) return;
 33 
 34 :(code)
 35 void test_pack_immediate_constants() {
 36   run(
 37       "== code 0x1\n"
 38       "bb  0x2a/imm32\n"
 39   );
 40   CHECK_TRACE_CONTENTS(
 41       "transform: packing instruction 'bb 0x2a/imm32'\n"
 42       "transform: instruction after packing: 'bb 2a 00 00 00'\n"
 43       "run: copy imm32 0x0000002a to EBX\n"
 44   );
 45 }
 46 
 47 //: complete set of valid argument types
 48 
 49 :(before "End Globals")
 50 set<string> Instruction_arguments;
 51 :(before "End One-time Setup")
 52 Instruction_arguments.insert("subop");
 53 Instruction_arguments.insert("mod");
 54 Instruction_arguments.insert("rm32");
 55 Instruction_arguments.insert("xm32");
 56 Instruction_arguments.insert("base");
 57 Instruction_arguments.insert("index");
 58 Instruction_arguments.insert("scale");
 59 Instruction_arguments.insert("r32");
 60 Instruction_arguments.insert("x32");
 61 Instruction_arguments.insert("disp8");
 62 Instruction_arguments.insert("disp16");
 63 Instruction_arguments.insert("disp32");
 64 Instruction_arguments.insert("imm8");
 65 Instruction_arguments.insert("imm32");
 66 
 67 :(before "End Help Texts")
 68 init_argument_type_help();
 69 :(code)
 70 void init_argument_type_help() {
 71   put(Help, "mod",
 72     "2-bit argument controlling the _addressing mode_ of many instructions,\n"
 73     "to determine how to compute the _effective address_ to look up memory at\n"
 74     "based on the 'rm32' argument and potentially others.\n"
 75     "\n"
 76     "If mod = 3, just operate on the contents of the register specified by rm32\n"
 77     "            (direct mode)\n"
 78     "If mod = 2, effective address is usually* rm32 + disp32\n"
 79     "            (indirect mode with displacement)\n"
 80     "If mod = 1, effective address is usually* rm32 + disp8\n"
 81     "            (indirect mode with displacement)\n"
 82     "If mod = 0, effective address is usually* rm32\n"
 83     "            (indirect mode)\n"
 84     "\n"
 85     "* - The exception is when rm32 is '4'. Register 4 is the stack pointer (ESP).\n"
 86     "    Using it as an address gets more involved. For more details,\n"
 87     "    try reading the help pages for 'base', 'index' and 'scale'.\n"
 88     "\n"
 89     "For complete details, spend some time with two tables in the IA-32 software\n"
 90     "developer's manual that are also included in this repo:\n"
 91     "  - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n"
 92     "  - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
 93   );
 94   put(Help, "subop",
 95     "Additional 3-bit argument for determining the instruction when the opcode\n"
 96     "is 81, 8f, d3, f7 or ff.\n"
 97     "Can't coexist with argument of type 'r32' in a single instruction, because\n"
 98     "the two use the same bits.\n"
 99   );
100   put(Help, "r32",
101     "3-bit argument specifying an integer register argument used directly,\n"
102     "without any further addressing modes.\n"
103   );
104   put(Help, "x32",
105     "3-bit argument specifying a floating-point register argument used directly,\n"
106     "without any further addressing modes.\n"
107   );
108   put(Help, "rm32",
109     "32-bit value in an integer register or memory. The precise details of its\n"
110     "construction depend on the eponymous 3-bit 'rm32' argument, the 'mod' argument,\n"
111     "and also potentially the 'SIB' arguments ('scale', 'index' and 'base')\n"
112     "and a displacement ('disp8' or 'disp32').\n"
113     "\n"
114     "For complete details, spend some time with two tables in the IA-32 software\n"
115     "developer's manual that are also included in this repo:\n"
116     "  - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n"
117     "  - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
118   );
119   put(Help, "xm32",
120     "32-bit value in a floating-point register or memory. The precise details of its\n"
121     "construction depend on the eponymous 3-bit 'xm32' argument, the 'mod' argument,\n"
122     "and also potentially the 'SIB' arguments ('scale', 'index' and 'base')\n"
123     "and a displacement ('disp8' or 'disp32').\n"
124     "\n"
125     "For complete details, spend some time with two tables in the IA-32 software\n"
126     "developer's manual that are also included in this repo:\n"
127     "  - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n"
128     "  - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
129     "\n"
130     "One subtlety here: while /xm32 refers to floating-point registers in direct mode\n"
131     "(when /mod is 3), other addressing modes to construct memory addresses use integer registers\n"
132     "(just like /rm32). Other than direct mode, its behavior is identical to /rm32.\n"
133   );
134   put(Help, "base",
135     "Additional 3-bit argument (when 'rm32' is 4, unless 'mod' is 3) specifying the\n"
136     "register containing an address to look up.\n"
137     "This address may be further modified by 'index' and 'scale' arguments.\n"
138     "  effective address = base + index*scale + displacement (disp8 or disp32)\n"
139     "For complete details, spend some time with the IA-32 software developer's manual,\n"
140     "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
141     "It is included in this repository as 'sib.pdf'.\n"
142   );
143   put(Help, "index",
144     "Optional 3-bit argument (when 'rm32' is 4 unless 'mod' is 3) that can be added to\n"
145     "the 'base' argument to compute the 'effective address' at which to look up memory.\n"
146     "  effective address = base + index*scale + displacement (disp8 or disp32)\n"
147     "For complete details, spend some time with the IA-32 software developer's manual,\n"
148     "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
149     "It is included in this repository as 'sib.pdf'.\n"
150   );
151   put(Help, "scale",
152     "Optional 2-bit argument (when 'rm32' is 4 unless 'mod' is 3) that encodes a\n"
153     "power of 2 to be multiplied to the 'index' argument before adding the result to\n"
154     "the 'base' argument to compute the _effective address_ to operate on.\n"
155     "  effective address = base + index * scale + displacement (disp8 or disp32)\n"
156     "\n"
157     "When scale is 0, use index unmodified.\n"
158     "When scale is 1, multiply index by 2.\n"
159     "When scale is 2, multiply index by 4.\n"
160     "When scale is 3, multiply index by 8.\n"
161     "\n"
162     "For complete details, spend some time with the IA-32 software developer's manual,\n"
163     "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
164     "It is included in this repository as 'sib.pdf'.\n"
165   );
166   put(Help, "disp8",
167     "8-bit value to be added in many instructions.\n"
168   );
169   put(Help, "disp16",
170     "16-bit value to be added in many instructions.\n"
171     "Currently not used in any SubX instructions.\n"
172   );
173   put(Help, "disp32",
174     "32-bit value to be added in many instructions.\n"
175   );
176   put(Help, "imm8",
177     "8-bit value for many instructions.\n"
178   );
179   put(Help, "imm32",
180     "32-bit value for many instructions.\n"
181   );
182 }
183 
184 //:: transform packing arguments into bytes in the right order
185 
186 :(after "Begin Transforms")
187 Transform.push_back(pack_arguments);
188 
189 :(code)
190 void pack_arguments(program& p) {
191   if (p.segments.empty()) return;
192   segment& code = *find(p, "code");
193   // Pack Operands(segment code)
194   trace(3, "transform") << "-- pack arguments" << end();
195   for (int i = 0;  i < SIZE(code.lines);  ++i) {
196     line& inst = code.lines.at(i);
197     if (all_hex_bytes(inst)) continue;
198     trace(99, "transform") << "packing instruction '" << to_string(/*with metadata*/inst) << "'" << end();
199     pack_arguments(inst);
200     trace(99, "transform") << "instruction after packing: '" << to_string(/*without metadata*/inst.words) << "'" << end();
201   }
202 }
203 
204 void pack_arguments(line& inst) {
205   line new_inst;
206   add_opcodes(inst, new_inst);
207   add_modrm_byte(inst, new_inst);
208   add_sib_byte(inst, new_inst);
209   add_disp_bytes(inst, new_inst);
210   add_imm_bytes(inst, new_inst);
211   inst.words.swap(new_inst.words);
212 }
213 
214 void add_opcodes(const line& in, line& out) {
215   out.words.push_back(in.words.at(0));
216   if (in.words.at(0).data == "0f" || in.words.at(0).data == "f2" || in.words.at(0).data == "f3")
217     out.words.push_back(in.words.at(1));
218   if (in.words.at(0).data == "f3" && in.words.at(1).data == "0f")
219     out.words.push_back(in.words.at(2));
220   if (in.words.at(0).data == "f2" && in.words.at(1).data == "0f")
221     out.words.push_back(in.words.at(2));
222 }
223 
224 void add_modrm_byte(const line& in, line& out) {
225   uint8_t mod=0, reg_subop=0, rm32=0;
226   bool emit = false;
227   for (int i = 0;  i < SIZE(in.words);  ++i) {
228     const word& curr = in.words.at(i);
229     if (has_argument_metadata(curr, "mod")) {
230       mod = hex_byte(curr.data);
231       emit = true;
232     }
233     else if (has_argument_metadata(curr, "rm32")) {
234       rm32 = hex_byte(curr.data);
235       emit = true;
236     }
237     else if (has_argument_metadata(curr, "r32")) {
238       reg_subop = hex_byte(curr.data);
239       emit = true;
240     }
241     else if (has_argument_metadata(curr, "xm32")) {
242       rm32 = hex_byte(curr.data);
243       emit = true;
244     }
245     else if (has_argument_metadata(curr, "x32")) {
246       reg_subop = hex_byte(curr.data);
247       emit = true;
248     }
249     else if (has_argument_metadata(curr, "subop")) {
250       reg_subop = hex_byte(curr.data);
251       emit = true;
252     }
253   }
254   if (emit)
255     out.words.push_back(hex_byte_text((mod << 6) | (reg_subop << 3) | rm32));
256 }
257 
258 void add_sib_byte(const line& in, line& out) {
259   uint8_t scale=0, index=0, base=0;
260   bool emit = false;
261   for (int i = 0;  i < SIZE(in.words);  ++i) {
262     const word& curr = in.words.at(i);
263     if (has_argument_metadata(curr, "scale")) {
264       scale = hex_byte(curr.data);
265       emit = true;
266     }
267     else if (has_argument_metadata(curr, "index")) {
268       index = hex_byte(curr.data);
269       emit = true;
270     }
271     else if (has_argument_metadata(curr, "base")) {
272       base = hex_byte(curr.data);
273       emit = true;
274     }
275   }
276   if (emit)
277     out.words.push_back(hex_byte_text((scale << 6) | (index << 3) | base));
278 }
279 
280 void add_disp_bytes(const line& in, line& out) {
281   for (int i = 0;  i < SIZE(in.words);  ++i) {
282     const word& curr = in.words.at(i);
283     if (has_argument_metadata(curr, "disp8"))
284       emit_hex_bytes(out, curr, 1);
285     if (has_argument_metadata(curr, "disp16"))
286       emit_hex_bytes(out, curr, 2);
287     else if (has_argument_metadata(curr, "disp32"))
288       emit_hex_bytes(out, curr, 4);
289   }
290 }
291 
292 void add_imm_bytes(const line& in, line& out) {
293   for (int i = 0;  i < SIZE(in.words);  ++i) {
294     const word& curr = in.words.at(i);
295     if (has_argument_metadata(curr, "imm8"))
296       emit_hex_bytes(out, curr, 1);
297     else if (has_argument_metadata(curr, "imm32"))
298       emit_hex_bytes(out, curr, 4);
299   }
300 }
301 
302 void emit_hex_bytes(line& out, const word& w, int num) {
303   assert(num <= 4);
304   bool is_number = looks_like_hex_int(w.data);
305   if (num == 1 || !is_number) {
306     out.words.push_back(w);  // preserve existing metadata
307     if (is_number)
308       out.words.back().data = hex_byte_to_string(parse_int(w.data));
309     return;
310   }
311   emit_hex_bytes(out, static_cast<uint32_t>(parse_int(w.data)), num);
312 }
313 
314 void emit_hex_bytes(line& out, uint32_t val, int num) {
315   assert(num <= 4);
316   for (int i = 0;  i < num;  ++i) {
317     out.words.push_back(hex_byte_text(val & 0xff));
318     val = val >> 8;
319   }
320 }
321 
322 word hex_byte_text(uint8_t val) {
323   word result;
324   result.data = hex_byte_to_string(val);
325   result.original = result.data+"/auto";
326   return result;
327 }
328 
329 string hex_byte_to_string(uint8_t val) {
330   ostringstream out;
331   // uint8_t prints without padding, but int8_t will expand to 32 bits again
332   out << HEXBYTE << NUM(val);
333   return out.str();
334 }
335 
336 string to_string(const vector<word>& in) {
337   ostringstream out;
338   for (int i = 0;  i < SIZE(in);  ++i) {
339     if (i > 0) out << ' ';
340     out << in.at(i).data;
341   }
342   return out.str();
343 }
344 
345 :(before "End Unit Tests")
346 void test_preserve_metadata_when_emitting_single_byte() {
347   word in;
348   in.data = "f0";
349   in.original = "f0/foo";
350   line out;
351   emit_hex_bytes(out, in, 1);
352   CHECK_EQ(out.words.at(0).data, "f0");
353   CHECK_EQ(out.words.at(0).original, "f0/foo");
354 }
355 
356 :(code)
357 void test_pack_disp8() {
358   run(
359       "== code 0x1\n"
360       "74 2/disp8\n"  // jump 2 bytes away if ZF is set
361   );
362   CHECK_TRACE_CONTENTS(
363       "transform: packing instruction '74 2/disp8'\n"
364       "transform: instruction after packing: '74 02'\n"
365   );
366 }
367 
368 void test_pack_disp8_negative() {
369   transform(
370       "== code 0x1\n"
371       // running this will cause an infinite loop
372       "74 -1/disp8\n"  // jump 1 byte before if ZF is set
373   );
374   CHECK_TRACE_CONTENTS(
375       "transform: packing instruction '74 -1/disp8'\n"
376       "transform: instruction after packing: '74 ff'\n"
377   );
378 }
379 
380 void test_pack_rm32_direct() {
381   run(
382       "== code 0x1\n"
383       // instruction                     effective address                                                   operand     displacement    immediate\n"
384       // op          subop               mod             rm32          base        index         scale       r32\n"
385       // 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes\n"
386       "  01                              3/mod/direct    3/rm32/ebx                                          0/r32/eax                                \n"  // add EAX to EBX
387   );
388   CHECK_TRACE_CONTENTS(
389       "transform: packing instruction '01 3/mod/direct 3/rm32/ebx 0/r32/eax'\n"
390       "transform: instruction after packing: '01 c3'\n"
391   );
392 }
393 
394 void test_pack_rm32_indirect() {
395   transform(
396       "== code 0x1\n"
397       // instruction                     effective address                                                   operand     displacement    immediate\n"
398       // op          subop               mod             rm32          base        index         scale       r32\n"
399       // 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes\n"
400       "  01                              0/mod/indirect  3/rm32/ebx                                          0/r32/eax                                \n"  // add EAX to *EBX
401   );
402   CHECK_TRACE_CONTENTS(
403       "transform: packing instruction '01 0/mod/indirect 3/rm32/ebx 0/r32/eax'\n"
404       "transform: instruction after packing: '01 03'\n"
405   );
406 }
407 
408 void test_pack_x32() {
409   run(
410       "== code 0x1\n"
411       // instruction                     effective address                                                   operand     displacement    immediate\n"
412       // op          subop               mod             rm32          base        index         scale       r32\n"
413       // 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes\n"
414       "  f3 0f 2a                        3/mod/direct    3/rm32/ebx                                          1/x32                                    \n"  // convert EBX to XMM1
415   );
416   CHECK_TRACE_CONTENTS(
417       "transform: packing instruction 'f3 0f 2a 3/mod/direct 3/rm32/ebx 1/x32'\n"
418       "transform: instruction after packing: 'f3 0f 2a cb'\n"
419   );
420 }
421 
422 void test_pack_xm32_direct() {
423   transform(
424       "== code 0x1\n"
425       // instruction                     effective address                                                   operand     displacement    immediate\n"
426       // op          subop               mod             rm32          base        index         scale       r32\n"
427       // 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes\n"
428       "  f3 0f 5e                        3/mod/direct    3/xm32                                              1/x32                                    \n"  // divide XMM1 by XMM3
429   );
430   CHECK_TRACE_CONTENTS(
431       "transform: packing instruction 'f3 0f 5e 3/mod/direct 3/xm32 1/x32'\n"
432       "transform: instruction after packing: 'f3 0f 5e cb'\n"
433   );
434 }
435 
436 void test_pack_xm32_indirect() {
437   transform(
438       "== code 0x1\n"
439       // instruction                     effective address                                                   operand     displacement    immediate\n"
440       // op          subop               mod             rm32          base        index         scale       r32\n"
441       // 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes\n"
442       "  f3 0f 5e                        0/mod/indirect  3/rm32/ebx                                          1/x32                                    \n"  // divide XMM1 by *EBX
443   );
444   CHECK_TRACE_CONTENTS(
445       "transform: packing instruction 'f3 0f 5e 0/mod/indirect 3/rm32/ebx 1/x32'\n"
446       "transform: instruction after packing: 'f3 0f 5e 0b'\n"
447   );
448 }
449 
450 //: helper for scenario
451 void transform(const string& text_bytes) {
452   program p;
453   istringstream in(text_bytes);
454   parse(in, p);
455   if (trace_contains_errors()) return;
456   transform(p);
457 }
458 
459 void test_pack_modrm_imm32() {
460   run(
461       "== code 0x1\n"
462       // instruction                     effective address                                                   operand     displacement    immediate\n"
463       // op          subop               mod             rm32          base        index         scale       r32\n"
464       // 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes\n"
465       "  81          0/add/subop         3/mod/direct    3/rm32/ebx                                                                      1/imm32      \n"  // add 1 to EBX
466   );
467   CHECK_TRACE_CONTENTS(
468       "transform: packing instruction '81 0/add/subop 3/mod/direct 3/rm32/ebx 1/imm32'\n"
469       "transform: instruction after packing: '81 c3 01 00 00 00'\n"
470   );
471 }
472 
473 void test_pack_imm32_large() {
474   run(
475       "== code 0x1\n"
476       "b9  0x080490a7/imm32\n"
477   );
478   CHECK_TRACE_CONTENTS(
479       "transform: packing instruction 'b9 0x080490a7/imm32'\n"
480       "transform: instruction after packing: 'b9 a7 90 04 08'\n"
481   );
482 }
483 
484 void test_pack_immediate_constants_hex() {
485   run(
486       "== code 0x1\n"
487       "b9  0x2a/imm32\n"
488   );
489   CHECK_TRACE_CONTENTS(
490       "transform: packing instruction 'b9 0x2a/imm32'\n"
491       "transform: instruction after packing: 'b9 2a 00 00 00'\n"
492       "run: copy imm32 0x0000002a to ECX\n"
493   );
494 }
495 
496 void test_pack_silently_ignores_non_hex() {
497   Hide_errors = true;
498   transform(
499       "== code 0x1\n"
500       "b9  foo/imm32\n"
501   );
502   CHECK_TRACE_CONTENTS(
503       "transform: packing instruction 'b9 foo/imm32'\n"
504       // no change (we're just not printing metadata to the trace)
505       "transform: instruction after packing: 'b9 foo'\n"
506   );
507 }
508 
509 void test_pack_flags_bad_hex() {
510   Hide_errors = true;
511   run(
512       "== code 0x1\n"
513       "b9  0xfoo/imm32\n"
514   );
515   CHECK_TRACE_CONTENTS(
516       "error: not a number: 0xfoo\n"
517   );
518 }
519 
520 void test_pack_flags_uppercase_hex() {
521   Hide_errors = true;
522   run(
523       "== code 0x1\n"
524       "b9 0xAb/imm32\n"
525   );
526   CHECK_TRACE_CONTENTS(
527       "error: uppercase hex not allowed: 0xAb\n"
528   );
529 }
530 
531 //:: helpers
532 
533 bool all_hex_bytes(const line& inst) {
534   for (int i = 0;  i < SIZE(inst.words);  ++i)
535     if (!is_hex_byte(inst.words.at(i)))
536       return false;
537   return true;
538 }
539 
540 bool is_hex_byte(const word& curr) {
541   if (contains_any_argument_metadata(curr))
542     return false;
543   if (SIZE(curr.data) != 2)
544     return false;
545   if (curr.data.find_first_not_of("0123456789abcdef") != string::npos)
546     return false;
547   return true;
548 }
549 
550 bool contains_any_argument_metadata(const word& word) {
551   for (int i = 0;  i < SIZE(word.metadata);  ++i)
552     if (Instruction_arguments.find(word.metadata.at(i)) != Instruction_arguments.end())
553       return true;
554   return false;
555 }
556 
557 bool has_argument_metadata(const line& inst, const string& m) {
558   bool result = false;
559   for (int i = 0;  i < SIZE(inst.words);  ++i) {
560     if (!has_argument_metadata(inst.words.at(i), m)) continue;
561     if (result) {
562       raise << "'" << to_string(inst) << "' has conflicting " << m << " arguments\n" << end();
563       return false;
564     }
565     result = true;
566   }
567   return result;
568 }
569 
570 bool has_argument_metadata(const word& w, const string& m) {
571   bool result = false;
572   bool metadata_found = false;
573   for (int i = 0;  i < SIZE(w.metadata);  ++i) {
574     const string& curr = w.metadata.at(i);
575     if (Instruction_arguments.find(curr) == Instruction_arguments.end()) continue;  // ignore unrecognized metadata
576     if (metadata_found) {
577       raise << "'" << w.original << "' has conflicting argument types; it should have only one\n" << end();
578       return false;
579     }
580     metadata_found = true;
581     result = (curr == m);
582   }
583   return result;
584 }
585 
586 word metadata(const line& inst, const string& m) {
587   for (int i = 0;  i < SIZE(inst.words);  ++i)
588     if (has_argument_metadata(inst.words.at(i), m))
589       return inst.words.at(i);
590   assert(false);
591 }
592 
593 bool looks_like_hex_int(const string& s) {
594   if (s.empty()) return false;
595   if (s.at(0) == '-' || s.at(0) == '+') return true;
596   if (isdigit(s.at(0))) return true;  // includes '0x' prefix
597   // End looks_like_hex_int(s) Detectors
598   return false;
599 }
600 
601 string to_string(const line& inst) {
602   ostringstream out;
603   for (int i = 0;  i < SIZE(inst.words);  ++i) {
604     if (i > 0) out << ' ';
605     out << inst.words.at(i).original;
606   }
607   return out.str();
608 }
609 
610 int32_t parse_int(const string& s) {
611   if (s.empty()) return 0;
612   if (contains_uppercase(s)) {
613     raise << "uppercase hex not allowed: " << s << '\n' << end();
614     return 0;
615   }
616   istringstream in(s);
617   in >> std::hex;
618   if (s.at(0) == '-') {
619     int32_t result = 0;
620     in >> result;
621     if (!in || !in.eof()) {
622       raise << "not a number: " << s << '\n' << end();
623       return 0;
624     }
625     return result;
626   }
627   uint32_t uresult = 0;
628   in >> uresult;
629   if (!in || !in.eof()) {
630     raise << "not a number: " << s << '\n' << end();
631     return 0;
632   }
633   return static_cast<int32_t>(uresult);
634 }
635 :(before "End Unit Tests")
636 void test_parse_int() {
637   CHECK_EQ(0, parse_int("0"));
638   CHECK_EQ(0, parse_int("0x0"));
639   CHECK_EQ(0, parse_int("0x0"));
640   CHECK_EQ(16, parse_int("10"));  // hex always
641   CHECK_EQ(-1, parse_int("-1"));
642   CHECK_EQ(-1, parse_int("0xffffffff"));
643 }