https://github.com/akkartik/mu/blob/main/linux/bootstrap/032operands.cc
  1 //: Metadata for fields of an x86 instruction.
  2 //:
  3 //: The x86 instruction set is variable-length, and how a byte is interpreted
  4 //: affects later instruction boundaries. A lot of the pain in programming
  5 //: machine code stems from computer and programmer going out of sync on what
  6 //: a byte means. The miscommunication is usually not immediately caught, and
  7 //: metastasizes at runtime into kilobytes of misinterpreted instructions.
  8 //:
  9 //: To mitigate these issues, we'll start programming in terms of logical
 10 //: arguments rather than physical bytes. Some arguments are smaller than a
 11 //: byte, and others may consist of multiple bytes. This layer will correctly
 12 //: pack and order the bytes corresponding to the arguments in an instruction.
 13 
 14 :(before "End Help Texts")
 15 put_new(Help, "instructions",
 16   "Each x86 instruction consists of an instruction or opcode and some number\n"
 17   "of arguments.\n"
 18   "Each argument has a type. An instruction won't have more than one argument of\n"
 19   "any type.\n"
 20   "Each instruction has some set of allowed argument types. It'll reject others.\n"
 21   "The complete list of argument types: mod, subop, r32 (integer register),\n"
 22   "rm32 (integer register or memory), x32 (floating point register),\n"
 23   "xm32 (floating point register or memory), scale, index, base, disp8, disp16,\n"
 24   "disp32,imm8,imm32.\n"
 25   "Each of these has its own help page. Try reading 'bootstrap help mod' next.\n"
 26 );
 27 :(before "End Help Contents")
 28 cerr << "  instructions\n";
 29 
 30 :(before "Running Test Program")
 31 transform(p);
 32 if (trace_contains_errors()) return;
 33 
 34 :(code)
 35 void test_pack_immediate_constants() {
 36   run(
 37       "== code 0x1\n"
 38       "bb  0x2a/imm32\n"
 39   );
 40   CHECK_TRACE_CONTENTS(
 41       "transform: packing instruction 'bb 0x2a/imm32'\n"
 42       "transform: instruction after packing: 'bb 2a 00 00 00'\n"
 43       "run: copy imm32 0x0000002a to EBX\n"
 44   );
 45 }
 46 
 47 //: complete set of valid argument types
 48 
 49 :(before "End Globals")
 50 set<string> Instruction_arguments;
 51 :(before "End One-time Setup")
 52 Instruction_arguments.insert("subop");
 53 Instruction_arguments.insert("mod");
 54 Instruction_arguments.insert("rm32");
 55 Instruction_arguments.insert("xm32");
 56 Instruction_arguments.insert("base");
 57 Instruction_arguments.insert("index");
 58 Instruction_arguments.insert("scale");
 59 Instruction_arguments.insert("r32");
 60 Instruction_arguments.insert("x32");
 61 Instruction_arguments.insert("disp8");
 62 Instruction_arguments.insert("disp16");
 63 Instruction_arguments.insert("disp32");
 64 Instruction_arguments.insert("imm8");
 65 Instruction_arguments.insert("imm32");
 66 
 67 :(before "End Help Texts")
 68 init_argument_type_help();
 69 :(code)
 70 void init_argument_type_help() {
 71   put(Help, "mod",
 72     "2-bit argument controlling the _addressing mode_ of many instructions,\n"
 73     "to determine how to compute the _effective address_ to look up memory at\n"
 74     "based on the 'rm32' argument and potentially others.\n"
 75     "\n"
 76     "If mod = 3, just operate on the contents of the register specified by rm32\n"
 77     "            (direct mode).\n"
 78     "If mod = 2, effective address is usually* rm32 + disp32\n"
 79     "            (indirect mode with displacement).\n"
 80     "If mod = 1, effective address is usually* rm32 + disp8\n"
 81     "            (indirect mode with displacement).\n"
 82     "If mod = 0, effective address is usually* rm32 (indirect mode).\n"
 83     "(* - The exception is when rm32 is '4'. Register 4 is the stack pointer (ESP).\n"
 84     "     Using it as an address gets more involved. For more details,\n"
 85     "     try reading the help pages for 'base', 'index' and 'scale'.)\n"
 86     "\n"
 87     "For complete details, spend some time with two tables in the IA-32 software\n"
 88     "developer's manual that are also included in this repo:\n"
 89     "  - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n"
 90     "  - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
 91   );
 92   put(Help, "subop",
 93     "Additional 3-bit argument for determining the instruction when the opcode\n"
 94     "is 81, 8f, d3, f7 or ff.\n"
 95     "Can't coexist with argument of type 'r32' in a single instruction, because\n"
 96     "the two use the same bits.\n"
 97   );
 98   put(Help, "r32",
 99     "3-bit argument specifying an integer register argument used directly,\n"
100     "without any further addressing modes.\n"
101   );
102   put(Help, "x32",
103     "3-bit argument specifying a floating-point register argument used directly,\n"
104     "without any further addressing modes.\n"
105   );
106   put(Help, "rm32",
107     "32-bit value in an integer register or memory. The precise details of its\n"
108     "construction depend on the eponymous 3-bit 'rm32' argument, the 'mod' argument,\n"
109     "and also potentially the 'SIB' arguments ('scale', 'index' and 'base')\n"
110     "and a displacement ('disp8' or 'disp32').\n"
111     "\n"
112     "For complete details, spend some time with two tables in the IA-32 software\n"
113     "developer's manual that are also included in this repo:\n"
114     "  - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n"
115     "  - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
116   );
117   put(Help, "xm32",
118     "32-bit value in a floating-point register or memory. The precise details of its\n"
119     "construction depend on the eponymous 3-bit 'xm32' argument, the 'mod' argument,\n"
120     "and also potentially the 'SIB' arguments ('scale', 'index' and 'base')\n"
121     "and a displacement ('disp8' or 'disp32').\n"
122     "\n"
123     "For complete details, spend some time with two tables in the IA-32 software\n"
124     "developer's manual that are also included in this repo:\n"
125     "  - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n"
126     "  - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
127     "\n"
128     "One subtlety here: while /xm32 refers to floating-point registers in direct mode\n"
129     "(when /mod is 3), other addressing modes to construct memory addresses use integer registers\n"
130     "(just like /rm32). Other than direct mode, its behavior is identical to /rm32.\n"
131   );
132   put(Help, "base",
133     "Additional 3-bit argument (when 'rm32' is 4, unless 'mod' is 3) specifying the\n"
134     "register containing an address to look up.\n"
135     "This address may be further modified by 'index' and 'scale' arguments.\n"
136     "  effective address = base + index*scale + displacement (disp8 or disp32)\n"
137     "For complete details, spend some time with the IA-32 software developer's manual,\n"
138     "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
139     "It is included in this repository as 'sib.pdf'.\n"
140   );
141   put(Help, "index",
142     "Optional 3-bit argument (when 'rm32' is 4 unless 'mod' is 3) that can be added to\n"
143     "the 'base' argument to compute the 'effective address' at which to look up memory.\n"
144     "  effective address = base + index*scale + displacement (disp8 or disp32)\n"
145     "For complete details, spend some time with the IA-32 software developer's manual,\n"
146     "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
147     "It is included in this repository as 'sib.pdf'.\n"
148   );
149   put(Help, "scale",
150     "Optional 2-bit argument (when 'rm32' is 4 unless 'mod' is 3) that encodes a\n"
151     "power of 2 to be multiplied to the 'index' argument before adding the result to\n"
152     "the 'base' argument to compute the _effective address_ to operate on.\n"
153     "  effective address = base + index * scale + displacement (disp8 or disp32)\n"
154     "\n"
155     "When scale is 0, use index unmodified.\n"
156     "When scale is 1, multiply index by 2.\n"
157     "When scale is 2, multiply index by 4.\n"
158     "When scale is 3, multiply index by 8.\n"
159     "\n"
160     "For complete details, spend some time with the IA-32 software developer's manual,\n"
161     "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
162     "It is included in this repository as 'sib.pdf'.\n"
163   );
164   put(Help, "disp8",
165     "8-bit value to be added in many instructions.\n"
166   );
167   put(Help, "disp16",
168     "16-bit value to be added in many instructions.\n"
169     "Currently not used in any SubX instructions.\n"
170   );
171   put(Help, "disp32",
172     "32-bit value to be added in many instructions.\n"
173   );
174   put(Help, "imm8",
175     "8-bit value for many instructions.\n"
176   );
177   put(Help, "imm32",
178     "32-bit value for many instructions.\n"
179   );
180 }
181 
182 //:: transform packing arguments into bytes in the right order
183 
184 :(after "Begin Transforms")
185 Transform.push_back(pack_arguments);
186 
187 :(code)
188 void pack_arguments(program& p) {
189   if (p.segments.empty()) return;
190   segment& code = *find(p, "code");
191   // Pack Operands(segment code)
192   trace(3, "transform") << "-- pack arguments" << end();
193   for (int i = 0;  i < SIZE(code.lines);  ++i) {
194     line& inst = code.lines.at(i);
195     if (all_hex_bytes(inst)) continue;
196     trace(99, "transform") << "packing instruction '" << to_string(/*with metadata*/inst) << "'" << end();
197     pack_arguments(inst);
198     trace(99, "transform") << "instruction after packing: '" << to_string(/*without metadata*/inst.words) << "'" << end();
199   }
200 }
201 
202 void pack_arguments(line& inst) {
203   line new_inst;
204   add_opcodes(inst, new_inst);
205   add_modrm_byte(inst, new_inst);
206   add_sib_byte(inst, new_inst);
207   add_disp_bytes(inst, new_inst);
208   add_imm_bytes(inst, new_inst);
209   inst.words.swap(new_inst.words);
210 }
211 
212 void add_opcodes(const line& in, line& out) {
213   out.words.push_back(in.words.at(0));
214   if (in.words.at(0).data == "0f" || in.words.at(0).data == "f2" || in.words.at(0).data == "f3")
215     out.words.push_back(in.words.at(1));
216   if (in.words.at(0).data == "f3" && in.words.at(1).data == "0f")
217     out.words.push_back(in.words.at(2));
218   if (in.words.at(0).data == "f2" && in.words.at(1).data == "0f")
219     out.words.push_back(in.words.at(2));
220 }
221 
222 void add_modrm_byte(const line& in, line& out) {
223   uint8_t mod=0, reg_subop=0, rm32=0;
224   bool emit = false;
225   for (int i = 0;  i < SIZE(in.words);  ++i) {
226     const word& curr = in.words.at(i);
227     if (has_argument_metadata(curr, "mod")) {
228       mod = hex_byte(curr.data);
229       emit = true;
230     }
231     else if (has_argument_metadata(curr, "rm32")) {
232       rm32 = hex_byte(curr.data);
233       emit = true;
234     }
235     else if (has_argument_metadata(curr, "r32")) {
236       reg_subop = hex_byte(curr.data);
237       emit = true;
238     }
239     else if (has_argument_metadata(curr, "xm32")) {
240       rm32 = hex_byte(curr.data);
241       emit = true;
242     }
243     else if (has_argument_metadata(curr, "x32")) {
244       reg_subop = hex_byte(curr.data);
245       emit = true;
246     }
247     else if (has_argument_metadata(curr, "subop")) {
248       reg_subop = hex_byte(curr.data);
249       emit = true;
250     }
251   }
252   if (emit)
253     out.words.push_back(hex_byte_text((mod << 6) | (reg_subop << 3) | rm32));
254 }
255 
256 void add_sib_byte(const line& in, line& out) {
257   uint8_t scale=0, index=0, base=0;
258   bool emit = false;
259   for (int i = 0;  i < SIZE(in.words);  ++i) {
260     const word& curr = in.words.at(i);
261     if (has_argument_metadata(curr, "scale")) {
262       scale = hex_byte(curr.data);
263       emit = true;
264     }
265     else if (has_argument_metadata(curr, "index")) {
266       index = hex_byte(curr.data);
267       emit = true;
268     }
269     else if (has_argument_metadata(curr, "base")) {
270       base = hex_byte(curr.data);
271       emit = true;
272     }
273   }
274   if (emit)
275     out.words.push_back(hex_byte_text((scale << 6) | (index << 3) | base));
276 }
277 
278 void add_disp_bytes(const line& in, line& out) {
279   for (int i = 0;  i < SIZE(in.words);  ++i) {
280     const word& curr = in.words.at(i);
281     if (has_argument_metadata(curr, "disp8"))
282       emit_hex_bytes(out, curr, 1);
283     if (has_argument_metadata(curr, "disp16"))
284       emit_hex_bytes(out, curr, 2);
285     else if (has_argument_metadata(curr, "disp32"))
286       emit_hex_bytes(out, curr, 4);
287   }
288 }
289 
290 void add_imm_bytes(const line& in, line& out) {
291   for (int i = 0;  i < SIZE(in.words);  ++i) {
292     const word& curr = in.words.at(i);
293     if (has_argument_metadata(curr, "imm8"))
294       emit_hex_bytes(out, curr, 1);
295     else if (has_argument_metadata(curr, "imm32"))
296       emit_hex_bytes(out, curr, 4);
297   }
298 }
299 
300 void emit_hex_bytes(line& out, const word& w, int num) {
301   assert(num <= 4);
302   bool is_number = looks_like_hex_int(w.data);
303   if (num == 1 || !is_number) {
304     out.words.push_back(w);  // preserve existing metadata
305     if (is_number)
306       out.words.back().data = hex_byte_to_string(parse_int(w.data));
307     return;
308   }
309   emit_hex_bytes(out, static_cast<uint32_t>(parse_int(w.data)), num);
310 }
311 
312 void emit_hex_bytes(line& out, uint32_t val, int num) {
313   assert(num <= 4);
314   for (int i = 0;  i < num;  ++i) {
315     out.words.push_back(hex_byte_text(val & 0xff));
316     val = val >> 8;
317   }
318 }
319 
320 word hex_byte_text(uint8_t val) {
321   word result;
322   result.data = hex_byte_to_string(val);
323   result.original = result.data+"/auto";
324   return result;
325 }
326 
327 string hex_byte_to_string(uint8_t val) {
328   ostringstream out;
329   // uint8_t prints without padding, but int8_t will expand to 32 bits again
330   out << HEXBYTE << NUM(val);
331   return out.str();
332 }
333 
334 string to_string(const vector<word>& in) {
335   ostringstream out;
336   for (int i = 0;  i < SIZE(in);  ++i) {
337     if (i > 0) out << ' ';
338     out << in.at(i).data;
339   }
340   return out.str();
341 }
342 
343 :(before "End Unit Tests")
344 void test_preserve_metadata_when_emitting_single_byte() {
345   word in;
346   in.data = "f0";
347   in.original = "f0/foo";
348   line out;
349   emit_hex_bytes(out, in, 1);
350   CHECK_EQ(out.words.at(0).data, "f0");
351   CHECK_EQ(out.words.at(0).original, "f0/foo");
352 }
353 
354 :(code)
355 void test_pack_disp8() {
356   run(
357       "== code 0x1\n"
358       "74 2/disp8\n"  // jump 2 bytes away if ZF is set
359   );
360   CHECK_TRACE_CONTENTS(
361       "transform: packing instruction '74 2/disp8'\n"
362       "transform: instruction after packing: '74 02'\n"
363   );
364 }
365 
366 void test_pack_disp8_negative() {
367   transform(
368       "== code 0x1\n"
369       // running this will cause an infinite loop
370       "74 -1/disp8\n"  // jump 1 byte before if ZF is set
371   );
372   CHECK_TRACE_CONTENTS(
373       "transform: packing instruction '74 -1/disp8'\n"
374       "transform: instruction after packing: '74 ff'\n"
375   );
376 }
377 
378 void test_pack_rm32_direct() {
379   run(
380       "== code 0x1\n"
381       // instruction                     effective address                                                   operand     displacement    immediate\n"
382       // op          subop               mod             rm32          base        index         scale       r32\n"
383       // 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes\n"
384       "  01                              3/mod/direct    3/rm32/ebx                                          0/r32/eax                                \n"  // add EAX to EBX
385   );
386   CHECK_TRACE_CONTENTS(
387       "transform: packing instruction '01 3/mod/direct 3/rm32/ebx 0/r32/eax'\n"
388       "transform: instruction after packing: '01 c3'\n"
389   );
390 }
391 
392 void test_pack_rm32_indirect() {
393   transform(
394       "== code 0x1\n"
395       // instruction                     effective address                                                   operand     displacement    immediate\n"
396       // op          subop               mod             rm32          base        index         scale       r32\n"
397       // 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes\n"
398       "  01                              0/mod/indirect  3/rm32/ebx                                          0/r32/eax                                \n"  // add EAX to *EBX
399   );
400   CHECK_TRACE_CONTENTS(
401       "transform: packing instruction '01 0/mod/indirect 3/rm32/ebx 0/r32/eax'\n"
402       "transform: instruction after packing: '01 03'\n"
403   );
404 }
405 
406 void test_pack_x32() {
407   run(
408       "== code 0x1\n"
409       // instruction                     effective address                                                   operand     displacement    immediate\n"
410       // op          subop               mod             rm32          base        index         scale       r32\n"
411       // 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes\n"
412       "  f3 0f 2a                        3/mod/direct    3/rm32/ebx                                          1/x32                                    \n"  // convert EBX to XMM1
413   );
414   CHECK_TRACE_CONTENTS(
415       "transform: packing instruction 'f3 0f 2a 3/mod/direct 3/rm32/ebx 1/x32'\n"
416       "transform: instruction after packing: 'f3 0f 2a cb'\n"
417   );
418 }
419 
420 void test_pack_xm32_direct() {
421   transform(
422       "== code 0x1\n"
423       // instruction                     effective address                                                   operand     displacement    immediate\n"
424       // op          subop               mod             rm32          base        index         scale       r32\n"
425       // 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes\n"
426       "  f3 0f 5e                        3/mod/direct    3/xm32                                              1/x32                                    \n"  // divide XMM1 by XMM3
427   );
428   CHECK_TRACE_CONTENTS(
429       "transform: packing instruction 'f3 0f 5e 3/mod/direct 3/xm32 1/x32'\n"
430       "transform: instruction after packing: 'f3 0f 5e cb'\n"
431   );
432 }
433 
434 void test_pack_xm32_indirect() {
435   transform(
436       "== code 0x1\n"
437       // instruction                     effective address                                                   operand     displacement    immediate\n"
438       // op          subop               mod             rm32          base        index         scale       r32\n"
439       // 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes\n"
440       "  f3 0f 5e                        0/mod/indirect  3/rm32/ebx                                          1/x32                                    \n"  // divide XMM1 by *EBX
441   );
442   CHECK_TRACE_CONTENTS(
443       "transform: packing instruction 'f3 0f 5e 0/mod/indirect 3/rm32/ebx 1/x32'\n"
444       "transform: instruction after packing: 'f3 0f 5e 0b'\n"
445   );
446 }
447 
448 //: helper for scenario
449 void transform(const string& text_bytes) {
450   program p;
451   istringstream in(text_bytes);
452   parse(in, p);
453   if (trace_contains_errors()) return;
454   transform(p);
455 }
456 
457 void test_pack_modrm_imm32() {
458   run(
459       "== code 0x1\n"
460       // instruction                     effective address                                                   operand     displacement    immediate\n"
461       // op          subop               mod             rm32          base        index         scale       r32\n"
462       // 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes\n"
463       "  81          0/add/subop         3/mod/direct    3/rm32/ebx                                                                      1/imm32      \n"  // add 1 to EBX
464   );
465   CHECK_TRACE_CONTENTS(
466       "transform: packing instruction '81 0/add/subop 3/mod/direct 3/rm32/ebx 1/imm32'\n"
467       "transform: instruction after packing: '81 c3 01 00 00 00'\n"
468   );
469 }
470 
471 void test_pack_imm32_large() {
472   run(
473       "== code 0x1\n"
474       "b9  0x080490a7/imm32\n"
475   );
476   CHECK_TRACE_CONTENTS(
477       "transform: packing instruction 'b9 0x080490a7/imm32'\n"
478       "transform: instruction after packing: 'b9 a7 90 04 08'\n"
479   );
480 }
481 
482 void test_pack_immediate_constants_hex() {
483   run(
484       "== code 0x1\n"
485       "b9  0x2a/imm32\n"
486   );
487   CHECK_TRACE_CONTENTS(
488       "transform: packing instruction 'b9 0x2a/imm32'\n"
489       "transform: instruction after packing: 'b9 2a 00 00 00'\n"
490       "run: copy imm32 0x0000002a to ECX\n"
491   );
492 }
493 
494 void test_pack_silently_ignores_non_hex() {
495   Hide_errors = true;
496   transform(
497       "== code 0x1\n"
498       "b9  foo/imm32\n"
499   );
500   CHECK_TRACE_CONTENTS(
501       "transform: packing instruction 'b9 foo/imm32'\n"
502       // no change (we're just not printing metadata to the trace)
503       "transform: instruction after packing: 'b9 foo'\n"
504   );
505 }
506 
507 void test_pack_flags_bad_hex() {
508   Hide_errors = true;
509   run(
510       "== code 0x1\n"
511       "b9  0xfoo/imm32\n"
512   );
513   CHECK_TRACE_CONTENTS(
514       "error: not a number: 0xfoo\n"
515   );
516 }
517 
518 void test_pack_flags_uppercase_hex() {
519   Hide_errors = true;
520   run(
521       "== code 0x1\n"
522       "b9 0xAb/imm32\n"
523   );
524   CHECK_TRACE_CONTENTS(
525       "error: uppercase hex not allowed: 0xAb\n"
526   );
527 }
528 
529 //:: helpers
530 
531 bool all_hex_bytes(const line& inst) {
532   for (int i = 0;  i < SIZE(inst.words);  ++i)
533     if (!is_hex_byte(inst.words.at(i)))
534       return false;
535   return true;
536 }
537 
538 bool is_hex_byte(const word& curr) {
539   if (contains_any_argument_metadata(curr))
540     return false;
541   if (SIZE(curr.data) != 2)
542     return false;
543   if (curr.data.find_first_not_of("0123456789abcdef") != string::npos)
544     return false;
545   return true;
546 }
547 
548 bool contains_any_argument_metadata(const word& word) {
549   for (int i = 0;  i < SIZE(word.metadata);  ++i)
550     if (Instruction_arguments.find(word.metadata.at(i)) != Instruction_arguments.end())
551       return true;
552   return false;
553 }
554 
555 bool has_argument_metadata(const line& inst, const string& m) {
556   bool result = false;
557   for (int i = 0;  i < SIZE(inst.words);  ++i) {
558     if (!has_argument_metadata(inst.words.at(i), m)) continue;
559     if (result) {
560       raise << "'" << to_string(inst) << "' has conflicting " << m << " arguments\n" << end();
561       return false;
562     }
563     result = true;
564   }
565   return result;
566 }
567 
568 bool has_argument_metadata(const word& w, const string& m) {
569   bool result = false;
570   bool metadata_found = false;
571   for (int i = 0;  i < SIZE(w.metadata);  ++i) {
572     const string& curr = w.metadata.at(i);
573     if (Instruction_arguments.find(curr) == Instruction_arguments.end()) continue;  // ignore unrecognized metadata
574     if (metadata_found) {
575       raise << "'" << w.original << "' has conflicting argument types; it should have only one\n" << end();
576       return false;
577     }
578     metadata_found = true;
579     result = (curr == m);
580   }
581   return result;
582 }
583 
584 word metadata(const line& inst, const string& m) {
585   for (int i = 0;  i < SIZE(inst.words);  ++i)
586     if (has_argument_metadata(inst.words.at(i), m))
587       return inst.words.at(i);
588   assert(false);
589 }
590 
591 bool looks_like_hex_int(const string& s) {
592   if (s.empty()) return false;
593   if (s.at(0) == '-' || s.at(0) == '+') return true;
594   if (isdigit(s.at(0))) return true;  // includes '0x' prefix
595   // End looks_like_hex_int(s) Detectors
596   return false;
597 }
598 
599 string to_string(const line& inst) {
600   ostringstream out;
601   for (int i = 0;  i < SIZE(inst.words);  ++i) {
602     if (i > 0) out << ' ';
603     out << inst.words.at(i).original;
604   }
605   return out.str();
606 }
607 
608 int32_t parse_int(const string& s) {
609   if (s.empty()) return 0;
610   if (contains_uppercase(s)) {
611     raise << "uppercase hex not allowed: " << s << '\n' << end();
612     return 0;
613   }
614   istringstream in(s);
615   in >> std::hex;
616   if (s.at(0) == '-') {
617     int32_t result = 0;
618     in >> result;
619     if (!in || !in.eof()) {
620       raise << "not a number: " << s << '\n' << end();
621       return 0;
622     }
623     return result;
624   }
625   uint32_t uresult = 0;
626   in >> uresult;
627   if (!in || !in.eof()) {
628     raise << "not a number: " << s << '\n' << end();
629     return 0;
630   }
631   return static_cast<int32_t>(uresult);
632 }
633 :(before "End Unit Tests")
634 void test_parse_int() {
635   CHECK_EQ(0, parse_int("0"));
636   CHECK_EQ(0, parse_int("0x0"));
637   CHECK_EQ(0, parse_int("0x0"));
638   CHECK_EQ(16, parse_int("10"));  // hex always
639   CHECK_EQ(-1, parse_int("-1"));
640   CHECK_EQ(-1, parse_int("0xffffffff"));
641 }