Mu - subx/035labels.cc

From 6e1eeeebfb453fa7c871869c19375ce60fbd7413 Mon Sep 17 00:00:00 2001 From: Kartik Agaram Date: Sat, 27 Jul 2019 16:01:55 -0700 Subject: 5485 - promote SubX to top-level --- html/035labels.cc.html | 448 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 448 insertions(+) create mode 100644 html/035labels.cc.html (limited to 'html/035labels.cc.html') diff --git a/html/035labels.cc.html b/html/035labels.cc.html new file mode 100644 index 00000000..b143a3f8 --- /dev/null +++ b/html/035labels.cc.html @@ -0,0 +1,448 @@ + + + + +Mu - subx/035labels.cc + + + + + + + + + + +https://github.com/akkartik/mu/blob/master/subx/035labels.cc +
+  1 //: Labels are defined by ending names with a ':'. This layer will compute
+  2 //: displacements for labels, and compute the offset for instructions using them.
+  3 //:
+  4 //: We won't check this, but our convention will be that jump targets will
+  5 //: start with a '$', while functions will not. Function names will never be
+  6 //: jumped to, and jump targets will never be called.
+  7 
+  8 //: We're introducing non-number names for the first time, so it's worth
+  9 //: laying down some ground rules all transforms will follow, so things don't
+ 10 //: get too confusing:
+ 11 //:   - if it starts with a digit, it's treated as a number. If it can't be
+ 12 //:     parsed as hex it will raise an error.
+ 13 //:   - if it starts with '-' it's treated as a number.
+ 14 //:   - if it starts with '0x' it's treated as a number.
+ 15 //:   - if it's two characters long, it can't be a name. Either it's a hex
+ 16 //:     byte, or it raises an error.
+ 17 //: That's it. Names can start with any non-digit that isn't a dash. They can
+ 18 //: be a single character long. 'a' is not a hex number, it's a variable.
+ 19 //: Later layers may add more conventions partitioning the space of names. But
+ 20 //: the above rules will remain inviolate.
+ 21 
+ 22 //: One special label: the address to start running the program at.
+ 23 
+ 24 void test_entry_label() {
+ 25   run(
+ 26       "== code 0x1\n"
+ 27       "05 0x0d0c0b0a/imm32\n"
+ 28       "Entry:\n"
+ 29       "05 0x0d0c0b0a/imm32\n"
+ 30   );
+ 31   CHECK_TRACE_CONTENTS(
+ 32       "run: 0x00000006 opcode: 05\n"
+ 33   );
+ 34   CHECK_TRACE_DOESNT_CONTAIN("run: 0x00000001 opcode: 05");
+ 35 }
+ 36 
+ 37 :(before "End Globals")
+ 38 uint32_t Entry_address = 0;
+ 39 :(before "End Reset")
+ 40 Entry_address = 0;
+ 41 :(before "End Initialize EIP")
+ 42 if (Entry_address) EIP = Entry_address;
+ 43 :(after "Override e_entry")
+ 44 if (Entry_address) e_entry = Entry_address;
+ 45 
+ 46 :(before "End looks_like_hex_int(s) Detectors")
+ 47 if (SIZE(s) == 2) return true;
+ 48 
+ 49 :(code)
+ 50 void test_pack_immediate_ignores_single_byte_nondigit_operand() {
+ 51   Hide_errors = true;
+ 52   transform(
+ 53       "== code 0x1\n"
+ 54       "b9/copy  a/imm32\n"
+ 55   );
+ 56   CHECK_TRACE_CONTENTS(
+ 57       "transform: packing instruction 'b9/copy a/imm32'\n"
+ 58       // no change (we're just not printing metadata to the trace)
+ 59       "transform: instruction after packing: 'b9 a'\n"
+ 60   );
+ 61 }
+ 62 
+ 63 void test_pack_immediate_ignores_3_hex_digit_operand() {
+ 64   Hide_errors = true;
+ 65   transform(
+ 66       "== code 0x1\n"
+ 67       "b9/copy  aaa/imm32\n"
+ 68   );
+ 69   CHECK_TRACE_CONTENTS(
+ 70       "transform: packing instruction 'b9/copy aaa/imm32'\n"
+ 71       // no change (we're just not printing metadata to the trace)
+ 72       "transform: instruction after packing: 'b9 aaa'\n"
+ 73   );
+ 74 }
+ 75 
+ 76 void test_pack_immediate_ignores_non_hex_operand() {
+ 77   Hide_errors = true;
+ 78   transform(
+ 79       "== code 0x1\n"
+ 80       "b9/copy xxx/imm32\n"
+ 81   );
+ 82   CHECK_TRACE_CONTENTS(
+ 83       "transform: packing instruction 'b9/copy xxx/imm32'\n"
+ 84       // no change (we're just not printing metadata to the trace)
+ 85       "transform: instruction after packing: 'b9 xxx'\n"
+ 86   );
+ 87 }
+ 88 
+ 89 //: a helper we'll find handy later
+ 90 void check_valid_name(const string& s) {
+ 91   if (s.empty()) {
+ 92     raise << "empty name!\n" << end();
+ 93     return;
+ 94   }
+ 95   if (s.at(0) == '-')
+ 96     raise << "'" << s << "' starts with '-', which can be confused with a negative number; use a different name\n" << end();
+ 97   if (s.substr(0, 2) == "0x") {
+ 98     raise << "'" << s << "' looks like a hex number; use a different name\n" << end();
+ 99     return;
+100   }
+101   if (isdigit(s.at(0)))
+102     raise << "'" << s << "' starts with a digit, and so can be confused with a negative number; use a different name.\n" << end();
+103   if (SIZE(s) == 2)
+104     raise << "'" << s << "' is two characters long which can look like raw hex bytes at a glance; use a different name\n" << end();
+105 }
+106 
+107 //: Now that that's done, let's start using names as labels.
+108 
+109 void test_map_label() {
+110   transform(
+111       "== code 0x1\n"
+112       "loop:\n"
+113       "  05  0x0d0c0b0a/imm32\n"
+114   );
+115   CHECK_TRACE_CONTENTS(
+116       "transform: label 'loop' is at address 1\n"
+117   );
+118 }
+119 
+120 :(before "End Level-2 Transforms")
+121 Transform.push_back(rewrite_labels);
+122 :(code)
+123 void rewrite_labels(program& p) {
+124   trace(3, "transform") << "-- rewrite labels" << end();
+125   if (p.segments.empty()) return;
+126   segment& code = *find(p, "code");
+127   map<string, int32_t> byte_index;  // values are unsigned, but we're going to do subtractions on them so they need to fit in 31 bits
+128   compute_byte_indices_for_labels(code, byte_index);
+129   if (trace_contains_errors()) return;
+130   drop_labels(code);
+131   if (trace_contains_errors()) return;
+132   replace_labels_with_displacements(code, byte_index);
+133   if (contains_key(byte_index, "Entry"))
+134     Entry_address = code.start + get(byte_index, "Entry");
+135 }
+136 
+137 void compute_byte_indices_for_labels(const segment& code, map<string, int32_t>& byte_index) {
+138   int current_byte = 0;
+139   for (int i = 0;  i < SIZE(code.lines);  ++i) {
+140     const line& inst = code.lines.at(i);
+141     if (Source_lines_file.is_open() && !inst.original.empty() && /*not a label*/ *inst.words.at(0).data.rbegin() != ':')
+142       Source_lines_file << "0x" << HEXWORD << (code.start + current_byte) << ' ' << inst.original << '\n';
+143     for (int j = 0;  j < SIZE(inst.words);  ++j) {
+144       const word& curr = inst.words.at(j);
+145       // hack: if we have any operand metadata left after previous transforms,
+146       // deduce its size
+147       // Maybe we should just move this transform to before instruction
+148       // packing, and deduce the size of *all* operands. But then we'll also
+149       // have to deal with bitfields.
+150       if (has_operand_metadata(curr, "disp32") || has_operand_metadata(curr, "imm32")) {
+151         if (*curr.data.rbegin() == ':')
+152           raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end();
+153         current_byte += 4;
+154       }
+155       else if (has_operand_metadata(curr, "disp16")) {
+156         if (*curr.data.rbegin() == ':')
+157           raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end();
+158         current_byte += 2;
+159       }
+160       // automatically handle /disp8 and /imm8 here
+161       else if (*curr.data.rbegin() != ':') {
+162         ++current_byte;
+163       }
+164       else {
+165         string label = drop_last(curr.data);
+166         // ensure labels look sufficiently different from raw hex
+167         check_valid_name(label);
+168         if (trace_contains_errors()) return;
+169         if (contains_any_operand_metadata(curr))
+170           raise << "'" << to_string(inst) << "': label definition (':') not allowed in operand\n" << end();
+171         if (j > 0)
+172           raise << "'" << to_string(inst) << "': labels can only be the first word in a line.\n" << end();
+173         if (Labels_file.is_open())
+174           Labels_file << "0x" << HEXWORD << (code.start + current_byte) << ' ' << label << '\n';
+175         if (contains_key(byte_index, label) && label != "Entry") {
+176           raise << "duplicate label '" << label << "'\n" << end();
+177           return;
+178         }
+179         put(byte_index, label, current_byte);
+180         trace(99, "transform") << "label '" << label << "' is at address " << (current_byte+code.start) << end();
+181         // no modifying current_byte; label definitions won't be in the final binary
+182       }
+183     }
+184   }
+185 }
+186 
+187 :(before "End Globals")
+188 bool Dump_debug_info = false;  // currently used only by 'subx translate'
+189 ofstream Labels_file;
+190 ofstream Source_lines_file;
+191 :(before "End Commandline Options")
+192 else if (is_equal(*arg, "--debug")) {
+193   Dump_debug_info = true;
+194   // End --debug Settings
+195 }
+196 //: wait to open "labels" for writing until we're sure we aren't trying to read it
+197 :(after "Begin subx translate")
+198 if (Dump_debug_info) {
+199   cerr << "saving address->label information to 'labels'\n";
+200   Labels_file.open("labels");
+201   cerr << "saving address->source information to 'source_lines'\n";
+202   Source_lines_file.open("source_lines");
+203 }
+204 :(before "End subx translate")
+205 if (Dump_debug_info) {
+206   Labels_file.close();
+207   Source_lines_file.close();
+208 }
+209 
+210 :(code)
+211 void drop_labels(segment& code) {
+212   for (int i = 0;  i < SIZE(code.lines);  ++i) {
+213     line& inst = code.lines.at(i);
+214     vector<word>::iterator new_end = remove_if(inst.words.begin(), inst.words.end(), is_label);
+215     inst.words.erase(new_end, inst.words.end());
+216   }
+217 }
+218 
+219 bool is_label(const word& w) {
+220   return *w.data.rbegin() == ':';
+221 }
+222 
+223 void replace_labels_with_displacements(segment& code, const map<string, int32_t>& byte_index) {
+224   int32_t byte_index_next_instruction_starts_at = 0;
+225   for (int i = 0;  i < SIZE(code.lines);  ++i) {
+226     line& inst = code.lines.at(i);
+227     byte_index_next_instruction_starts_at += num_bytes(inst);
+228     line new_inst;
+229     for (int j = 0;  j < SIZE(inst.words);  ++j) {
+230       const word& curr = inst.words.at(j);
+231       if (contains_key(byte_index, curr.data)) {
+232         int32_t displacement = static_cast<int32_t>(get(byte_index, curr.data)) - byte_index_next_instruction_starts_at;
+233         if (has_operand_metadata(curr, "disp8")) {
+234           if (displacement > 0x7f || displacement < -0x7f)
+235             raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 8 signed bits\n" << end();
+236           else
+237             emit_hex_bytes(new_inst, displacement, 1);
+238         }
+239         else if (has_operand_metadata(curr, "disp16")) {
+240           if (displacement > 0x7fff || displacement < -0x7fff)
+241             raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 16 signed bits\n" << end();
+242           else
+243             emit_hex_bytes(new_inst, displacement, 2);
+244         }
+245         else if (has_operand_metadata(curr, "disp32")) {
+246           emit_hex_bytes(new_inst, displacement, 4);
+247         } else if (has_operand_metadata(curr, "imm32")) {
+248           emit_hex_bytes(new_inst, code.start + get(byte_index, curr.data), 4);
+249         }
+250       }
+251       else {
+252         new_inst.words.push_back(curr);
+253       }
+254     }
+255     inst.words.swap(new_inst.words);
+256     trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
+257   }
+258 }
+259 
+260 string data_to_string(const line& inst) {
+261   ostringstream out;
+262   for (int i = 0;  i < SIZE(inst.words);  ++i) {
+263     if (i > 0) out << ' ';
+264     out << inst.words.at(i).data;
+265   }
+266   return out.str();
+267 }
+268 
+269 string drop_last(const string& s) {
+270   return string(s.begin(), --s.end());
+271 }
+272 
+273 //: Label definitions must be the first word on a line. No jumping inside
+274 //: instructions.
+275 //: They should also be the only word on a line.
+276 //: However, you can absolutely have multiple labels map to the same address,
+277 //: as long as they're on separate lines.
+278 
+279 void test_multiple_labels_at() {
+280   transform(
+281       "== code 0x1\n"
+282       // address 1
+283       "loop:\n"
+284       " $loop2:\n"
+285       // address 1 (labels take up no space)
+286       "    05  0x0d0c0b0a/imm32\n"
+287       // address 6
+288       "    eb  $loop2/disp8\n"
+289       // address 8
+290       "    eb  $loop3/disp8\n"
+291       // address 0xa
+292       " $loop3:\n"
+293   );
+294   CHECK_TRACE_CONTENTS(
+295       "transform: label 'loop' is at address 1\n"
+296       "transform: label '$loop2' is at address 1\n"
+297       "transform: label '$loop3' is at address a\n"
+298       // first jump is to -7
+299       "transform: instruction after transform: 'eb f9'\n"
+300       // second jump is to 0 (fall through)
+301       "transform: instruction after transform: 'eb 00'\n"
+302   );
+303 }
+304 
+305 void test_loading_label_as_imm32() {
+306   transform(
+307       "== code 0x1\n"
+308       "label:\n"
+309       "  be/copy-to-ESI  label/imm32\n"
+310   );
+311   CHECK_TRACE_CONTENTS(
+312       "transform: label 'label' is at address 1\n"
+313       "transform: instruction after transform: 'be 01 00 00 00'\n"
+314   );
+315 }
+316 
+317 void test_duplicate_label() {
+318   Hide_errors = true;
+319   transform(
+320       "== code 0x1\n"
+321       "loop:\n"
+322       "loop:\n"
+323       "    05  0x0d0c0b0a/imm32\n"
+324   );
+325   CHECK_TRACE_CONTENTS(
+326       "error: duplicate label 'loop'\n"
+327   );
+328 }
+329 
+330 void test_label_too_short() {
+331   Hide_errors = true;
+332   transform(
+333       "== code 0x1\n"
+334       "xz:\n"
+335       "  05  0x0d0c0b0a/imm32\n"
+336   );
+337   CHECK_TRACE_CONTENTS(
+338       "error: 'xz' is two characters long which can look like raw hex bytes at a glance; use a different name\n"
+339   );
+340 }
+341 
+342 void test_label_hex() {
+343   Hide_errors = true;
+344   transform(
+345       "== code 0x1\n"
+346       "0xab:\n"
+347       "  05  0x0d0c0b0a/imm32\n"
+348   );
+349   CHECK_TRACE_CONTENTS(
+350       "error: '0xab' looks like a hex number; use a different name\n"
+351   );
+352 }
+353 
+354 void test_label_negative_hex() {
+355   Hide_errors = true;
+356   transform(
+357       "== code 0x1\n"
+358       "-a:\n"
+359       "    05  0x0d0c0b0a/imm32\n"
+360   );
+361   CHECK_TRACE_CONTENTS(
+362       "error: '-a' starts with '-', which can be confused with a negative number; use a different name\n"
+363   );
+364 }
+365 
+366 //: now that we have labels, we need to adjust segment size computation to
+367 //: ignore them.
+368 
+369 void test_segment_size_ignores_labels() {
+370   transform(
+371       "== code 0x09000074\n"
+372       "  05/add  0x0d0c0b0a/imm32\n"  // 5 bytes
+373       "foo:\n"                        // 0 bytes
+374       "== data 0x0a000000\n"
+375       "bar:\n"
+376       "  00\n"
+377   );
+378   CHECK_TRACE_CONTENTS(
+379       "transform: segment 1 begins at address 0x0a000079\n"
+380   );
+381 }
+382 
+383 :(before "End size_of(word w) Special-cases")
+384 else if (is_label(w))
+385   return 0;
+
+ + + -- cgit 1.4.1-2-gfad0