//: Labels are defined by ending names with a ':'. This layer will compute //: displacements for labels, and compute the offset for instructions using them. //: //: We won't check this, but our convention will be that jump targets will //: start with a '$', while functions will not. Function names will never be //: jumped to, and jump targets will never be called. //: We're introducing non-number names for the first time, so it's worth //: laying down some ground rules all transforms will follow, so things don't //: get too confusing: //: - if it starts with a digit, it's treated as a number. If it can't be //: parsed as hex it will raise an error. //: - if it starts with '-' it's treated as a number. //: - if it starts with '0x' it's treated as a number. //: - if it's two characters long, it can't be a name. Either it's a hex //: byte, or it raises an error. //: That's it. Names can start with any non-digit that isn't a dash. They can //: be a single character long. 'a' is not a hex number, it's a variable. //: Later layers may add more conventions partitioning the space of names. But //: the above rules will remain inviolate. //: One special label is 'Entry', the address to start running the program at. //: It can be non-unique; the last declaration overrides earlier ones. //: It must exist in a program. Otherwise we don't know where to start running //: programs. void test_Entry_label() { run( "== code 0x1\n" "05 0x0d0c0b0a/imm32\n" "Entry:\n" "05 0x0d0c0b0a/imm32\n" ); CHECK_TRACE_CONTENTS( "run: 0x00000006 opcode: 05\n" ); CHECK_TRACE_DOESNT_CONTAIN("run: 0x00000001 opcode: 05"); } :(before "End looks_like_hex_int(s) Detectors") if (SIZE(s) == 2) return true; :(code) void test_pack_immediate_ignores_single_byte_nondigit_operand() { Hide_errors = true; transform( "== code 0x1\n" "b9/copy a/imm32\n" ); CHECK_TRACE_CONTENTS( "transform: packing instruction 'b9/copy a/imm32'\n" // no change (we're just not printing metadata to the trace) "transform: instruction after packing: 'b9 a'\n" ); } void test_pack_immediate_ignores_3_hex_digit_operand() { Hide_errors = true; transform( "== code 0x1\n" "b9/copy aaa/imm32\n" ); CHECK_TRACE_CONTENTS( "transform: packing instruction 'b9/copy aaa/imm32'\n" // no change (we're just not printing metadata to the trace) "transform: instruction after packing: 'b9 aaa'\n" ); } void test_pack_immediate_ignores_non_hex_operand() { Hide_errors = true; transform( "== code 0x1\n" "b9/copy xxx/imm32\n" ); CHECK_TRACE_CONTENTS( "transform: packing instruction 'b9/copy xxx/imm32'\n" // no change (we're just not printing metadata to the trace) "transform: instruction after packing: 'b9 xxx'\n" ); } //: a helper we'll find handy later void check_valid_name(const string& s) { if (s.empty()) { raise << "empty name!\n" << end(); return; } if (s.at(0) == '-') raise << "'" << s << "' starts with '-', which can be confused with a negative number; use a different name\n" << end(); if (s.substr(0, 2) == "0x") { raise << "'" << s << "' looks like a hex number; use a different name\n" << end(); return; } if (isdigit(s.at(0))) raise << "'" << s << "' starts with a digit, and so can be confused with a number; use a different name.\n" << end(); if (SIZE(s) == 2) raise << "'" << s << "' is two characters long, which can look lik
*~
*.pyc
*.pyo
stuff/*
lacement << " to fit in 16 signed bits\n" << end(); else emit_hex_bytes(new_inst, displacement, 2); } else if (has_operand_metadata(curr, "disp32")) { emit_hex_bytes(new_inst, displacement, 4); } else if (has_operand_metadata(curr, "imm32")) { emit_hex_bytes(new_inst, code.start + get(byte_index, curr.data), 4); } } else { new_inst.words.push_back(curr); } } inst.words.swap(new_inst.words); trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end(); } } string data_to_string(const line& inst) { ostringstream out; for (int i = 0; i < SIZE(inst.words); ++i) { if (i > 0) out << ' '; out << inst.words.at(i).data; } return out.str(); } string drop_last(const string& s) { return string(s.begin(), --s.end()); } //: Label definitions must be the first word on a line. No jumping inside //: instructions. //: They should also be the only word on a line. //: However, you can absolutely have multiple labels map to the same address, //: as long as they're on separate lines. void test_multiple_labels_at() { transform( "== code 0x1\n" // address 1 "loop:\n" " $loop2:\n" // address 1 (labels take up no space) " 05 0x0d0c0b0a/imm32\n" // address 6 " eb $loop2/disp8\n" // address 8 " eb $loop3/disp8\n" // address 0xa " $loop3:\n" ); CHECK_TRACE_CONTENTS( "transform: label 'loop' is at address 1\n" "transform: label '$loop2' is at address 1\n" "transform: label '$loop3' is at address a\n" // first jump is to -7 "transform: instruction after transform: 'eb f9'\n" // second jump is to 0 (fall through) "transform: instruction after transform: 'eb 00'\n" ); } void test_loading_label_as_imm32() { transform( "== code 0x1\n" "label:\n" " be/copy-to-ESI label/imm32\n" ); CHECK_TRACE_CONTENTS( "transform: label 'label' is at address 1\n" "transform: instruction after transform: 'be 01 00 00 00'\n" ); } void test_duplicate_label() { Hide_errors = true; transform( "== code 0x1\n" "loop:\n" "loop:\n" " 05 0x0d0c0b0a/imm32\n" ); CHECK_TRACE_CONTENTS( "error: duplicate label 'loop'\n" ); } void test_label_too_short() { Hide_errors = true; transform( "== code 0x1\n" "xz:\n" " 05 0x0d0c0b0a/imm32\n" ); CHECK_TRACE_CONTENTS( "error: 'xz' is two characters long, which can look like raw hex bytes at a glance; use a different name\n" ); } void test_label_hex() { Hide_errors = true; transform( "== code 0x1\n" "0xab:\n" " 05 0x0d0c0b0a/imm32\n" ); CHECK_TRACE_CONTENTS( "error: '0xab' looks like a hex number; use a different name\n" ); } void test_label_negative_hex() { Hide_errors = true; transform( "== code 0x1\n" "-a:\n" " 05 0x0d0c0b0a/imm32\n" ); CHECK_TRACE_CONTENTS( "error: '-a' starts with '-', which can be confused with a negative number; use a different name\n" ); } //: As said up top, the 'Entry' label is special. //: It can be non-unique; the last declaration overrides earlier ones. //: It must exist in a program. Otherwise we don't know where to start running //: programs. void test_duplicate_Entry_label() { transform( "== code 0x1\n" "Entry:\n" "Entry:\n" " 05 0x0d0c0b0a/imm32\n" ); CHECK_TRACE_DOESNT_CONTAIN_ERRORS(); } // This test could do with some refactoring. // We're duplicating the flow inside `bootstrap translate`, but without // reading/writing files. // We can't just use run(string) because most of our tests allow programs // without 'Entry' labels, as a convenience. void test_programs_without_Entry_label() { Hide_errors = true; program p; istringstream in( "== code 0x1\n" "05 0x0d0c0b0a/imm32\n" "05 0x0d0c0b0a/imm32\n" ); parse(in, p); transform(p); ostringstream dummy; save_elf(p, dummy); CHECK_TRACE_CONTENTS( "error: no 'Entry' label found\n" ); } //: now that we have labels, we need to adjust segment size computation to //: ignore them. void test_segment_size_ignores_labels() { transform( "== code 0x09000074\n" " 05/add 0x0d0c0b0a/imm32\n" // 5 bytes "foo:\n" // 0 bytes "== data 0x0a000000\n" "bar:\n" " 00\n" ); CHECK_TRACE_CONTENTS( "transform: segment 1 begins at address 0x0a000079\n" ); } :(before "End size_of(word w) Special-cases") else if (is_label(w)) return 0;