1 //: Labels are defined by ending names with a ':'. This layer will compute
  2 //: displacements for labels, and compute the offset for instructions using them.
  3 //:
  4 //: We won't check this, but our convention will be that jump targets will
  5 //: start with a '$', while functions will not. Function names will never be
  6 //: jumped to, and jump targets will never be called.
  7 
  8 //: We're introducing non-number names for the first time, so it's worth
  9 //: laying down some ground rules all transforms will follow, so things don't
 10 //: get too confusing:
 11 //:   - if it starts with a digit, it's treated as a number. If it can't be
 12 //:     parsed as hex it will raise an error.
 13 //:   - if it starts with '-' it's treated as a number.
 14 //:   - if it starts with '0x' it's treated as a number.
 15 //:   - if it's two characters long, it can't be a name. Either it's a hex
 16 //:     byte, or it raises an error.
 17 //: That's it. Names can start with any non-digit that isn't a dash. They can
 18 //: be a single character long. 'a' is not a hex number, it's a variable.
 19 //: Later layers may add more conventions partitioning the space of names. But
 20 //: the above rules will remain inviolate.
 21 
 22 :(before "End looks_like_hex_int(s) Detectors")
 23 if (SIZE(s) == 2) return true;
 24 
 25 :(scenarios transform)
 26 :(scenario pack_immediate_ignores_single_byte_nondigit_operand)
 27 % Hide_errors = true;
 28 == 0x1
 29 b9/copy  a/imm32
 30 +transform: packing instruction 'b9/copy a/imm32'
 31 # no change (we're just not printing metadata to the trace)
 32 +transform: instruction after packing: 'b9 a'
 33 
 34 :(scenario pack_immediate_ignores_3_hex_digit_operand)
 35 % Hide_errors = true;
 36 == 0x1
 37 b9/copy  aaa/imm32
 38 +transform: packing instruction 'b9/copy aaa/imm32'
 39 # no change (we're just not printing metadata to the trace)
 40 +transform: instruction after packing: 'b9 aaa'
 41 
 42 :(scenario pack_immediate_ignores_non_hex_operand)
 43 % Hide_errors = true;
 44 == 0x1
 45 b9/copy xxx/imm32
 46 +transform: packing instruction 'b9/copy xxx/imm32'
 47 # no change (we're just not printing metadata to the trace)
 48 +transform: instruction after packing: 'b9 xxx'
 49 
 50 //: a helper we'll find handy later
 51 :(code)
 52 void check_valid_name(const string& s) {
 53   if (s.empty()) {
 54     raise << "empty name!\n" << end();
 55     return;
 56   }
 57   if (s.at(0) == '-')
 58     raise << "'" << s << "' starts with '-', which can be confused with a negative number; use a different name\n" << end();
 59   if (s.substr(0, 2) == "0x") {
 60     raise << "'" << s << "' looks like a hex number; use a different name\n" << end();
 61     return;
 62   }
 63   if (isdigit(s.at(0)))
 64     raise << "'" << s << "' starts with a digit, and so can be confused with a negative number; use a different name.\n" << end();
 65   if (SIZE(s) == 2)
 66     raise << "'" << s << "' is two characters long which can look like raw hex bytes at a glance; use a different name\n" << end();
 67 }
 68 
 69 //: Now that that's done, let's start using names as labels.
 70 
 71 :(scenario map_label)
 72 == 0x1
 73 loop:
 74   05  0x0d0c0b0a/imm32
 75 +transform: label 'loop' is at address 1
 76 
 77 :(before "End Level-2 Transforms")
 78 Transform.push_back(rewrite_labels);
 79 :(code)
 80 void rewrite_labels(program& p) {
 81   trace(99, "transform") << "-- rewrite labels" << end();
 82   if (p.segments.empty()) return;
 83   segment& code = p.segments.at(0);
 84   map<string, int32_t> byte_index;  // values are unsigned, but we're going to do subtractions on them so they need to fit in 31 bits
 85   compute_byte_indices_for_labels(code, byte_index);
 86   if (trace_contains_errors()) return;
 87   drop_labels(code);
 88   if (trace_contains_errors()) return;
 89   replace_labels_with_displacements(code, byte_index);
 90 }
 91 
 92 void compute_byte_indices_for_labels(const segment& code, map<string, int32_t>& byte_index) {
 93   int current_byte = 0;
 94   for (int i = 0;  i < SIZE(code.lines);  ++i) {
 95     const line& inst = code.lines.at(i);
 96     for (int j = 0;  j < SIZE(inst.words);  ++j) {
 97       const word& curr = inst.words.at(j);
 98       // hack: if we have any operand metadata left after previous transforms,
 99       // deduce its size
100       // Maybe we should just move this transform to before instruction
101       // packing, and deduce the size of *all* operands. But then we'll also
102       // have to deal with bitfields.
103       if (has_operand_metadata(curr, "disp32") || has_operand_metadata(curr, "imm32")) {
104         if (*curr.data.rbegin() == ':')
105           raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end();
106         current_byte += 4;
107       }
108       // automatically handle /disp8 and /imm8 here
109       else if (*curr.data.rbegin() != ':') {
110         ++current_byte;
111       }
112       else {
113         string label = drop_last(curr.data);
114         // ensure labels look sufficiently different from raw hex
115         check_valid_name(label);
116         if (trace_contains_errors()) return;
117         if (contains_any_operand_metadata(curr))
118           raise << "'" << to_string(inst) << "': label definition (':') not allowed in operand\n" << end();
119         if (j > 0)
120           raise << "'" << to_string(inst) << "': labels can only be the first word in a line.\n" << end();
121         if (Map_file.is_open())
122           Map_file << "0x" << HEXWORD << (code.start + current_byte) << ' ' << label << '\n';
123         put(byte_index, label, current_byte);
124         trace(99, "transform") << "label '" << label << "' is at address " << (current_byte+code.start) << end();
125         // no modifying current_byte; label definitions won't be in the final binary
126       }
127     }
128   }
129 }
130 
131 :(before "End Globals")
132 bool Dump_map = false;  // currently used only by 'subx translate'
133 ofstream Map_file;
134 :(before "End Commandline Options")
135 else if (is_equal(*arg, "--map")) {
136   Dump_map = true;
137   // End --map Settings
138 }
139 //: wait to open "map" for writing until we're sure we aren't trying to read it
140 :(after "Begin subx translate")
141 if (Dump_map)
142   Map_file.open("map");
143 :(before "End subx translate")
144 if (Dump_map)
145   Map_file.close();
146 
147 :(code)
148 void drop_labels(segment& code) {
149   for (int i = 0;  i < SIZE(code.lines);  ++i) {
150     line& inst = code.lines.at(i);
151     vector<word>::iterator new_end = remove_if(inst.words.begin(), inst.words.end(), is_label);
152     inst.words.erase(new_end, inst.words.end());
153   }
154 }
155 
156 bool is_label(const word& w) {
157   return *w.data.rbegin() == ':';
158 }
159 
160 void replace_labels_with_displacements(segment& code, const map<string, int32_t>& byte_index) {
161   int32_t byte_index_next_instruction_starts_at = 0;
162   for (int i = 0;  i < SIZE(code.lines);  ++i) {
163     line& inst = code.lines.at(i);
164     byte_index_next_instruction_starts_at += num_bytes(inst);
165     line new_inst;
166     for (int j = 0;  j < SIZE(inst.words);  ++j) {
167       const word& curr = inst.words.at(j);
168       if (contains_key(byte_index, curr.data)) {
169         int32_t displacement = static_cast<int32_t>(get(byte_index, curr.data)) - byte_index_next_instruction_starts_at;
170         if (has_operand_metadata(curr, "disp8")) {
171           if (displacement > 0xff || displacement < -0x7f)
172             raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 8 bits\n" << end();
173           else
174             emit_hex_bytes(new_inst, displacement, 1);
175         }
176         else if (has_operand_metadata(curr, "disp16")) {
177           if (displacement > 0xffff || displacement < -0x7fff)
178             raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 16 bits\n" << end();
179           else
180             emit_hex_bytes(new_inst, displacement, 2);
181         }
182         else if (has_operand_metadata(curr, "disp32")) {
183           emit_hex_bytes(new_inst, displacement, 4);
184         }
185       }
186       else {
187         new_inst.words.push_back(curr);
188       }
189     }
190     inst.words.swap(new_inst.words);
191     trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
192   }
193 }
194 
195 string data_to_string(const line& inst) {
196   ostringstream out;
197   for (int i = 0;  i < SIZE(inst.words);  ++i) {
198     if (i > 0) out << ' ';
199     out << inst.words.at(i).data;
200   }
201   return out.str();
202 }
203 
204 string drop_last(const string& s) {
205   return string(s.begin(), --s.end());
206 }
207 
208 //: Label definitions must be the first word on a line. No jumping inside
209 //: instructions.
210 //: They should also be the only word on a line.
211 //: However, you can absolutely have multiple labels map to the same address,
212 //: as long as they're on separate lines.
213 
214 :(scenario multiple_labels_at)
215 == 0x1
216 # address 1
217 loop:
218  $loop2:
219 # address 1 (labels take up no space)
220     05  0x0d0c0b0a/imm32
221 # address 6
222     eb  $loop2/disp8
223 # address 8
224     eb  $loop3/disp8
225 # address 0xa
226  $loop3:
227 +transform: label 'loop' is at address 1
228 +transform: label '$loop2' is at address 1
229 +transform: label '$loop3' is at address a
230 # first jump is to -7
231 +transform: instruction after transform: 'eb f9'
232 # second jump is to 0 (fall through)
233 +transform: instruction after transform: 'eb 00'
234 
235 :(scenario label_too_short)
236 % Hide_errors = true;
237 == 0x1
238 xz:
239   05  0x0d0c0b0a/imm32
240 +error: 'xz' is two characters long which can look like raw hex bytes at a glance; use a different name
241 
242 :(scenario label_hex)
243 % Hide_errors = true;
244 == 0x1
245 0xab:
246   05  0x0d0c0b0a/imm32
247 +error: '0xab' looks like a hex number; use a different name
248 
249 :(scenario label_negative_hex)
250 % Hide_errors = true;
251 == 0x1
252  -a:  # indent to avoid looking like a trace_should_not_contain command for this scenario
253     05  0x0d0c0b0a/imm32
254 +error: '-a' starts with '-', which can be confused with a negative number; use a different name
255 
256 //: now that we have labels, we need to adjust segment size computation to
257 //: ignore them.
258 
259 :(scenario segment_size_ignores_labels)
260 == code  # 0x09000074
261   05/add  0x0d0c0b0a/imm32  # 5 bytes
262 foo:                      # 0 bytes
263 == data  # 0x0a000079
264 bar:
265   00
266 +transform: segment 1 begins at address 0x0a000079
267 
268 :(before "End num_bytes(curr) Special-cases")
269 else if (is_label(curr))
270   ;  // don't count it