1 //: Labels are defined by ending names with a ':'. This layer will compute
  2 //: displacements for labels, and compute the offset for instructions using them.
  3 //:
  4 //: We won't check this, but our convention will be that jump targets will
  5 //: start with a '$', while functions will not. Function names will never be
  6 //: jumped to, and jump targets will never be called.
  7 
  8 //: We're introducing non-number names for the first time, so it's worth
  9 //: laying down some ground rules all transforms will follow, so things don't
 10 //: get too confusing:
 11 //:   - if it starts with a digit, it's treated as a number. If it can't be
 12 //:     parsed as hex it will raise an error.
 13 //:   - if it starts with '-' it's treated as a number.
 14 //:   - if it starts with '0x' it's treated as a number.
 15 //:   - if it's two characters long, it can't be a name. Either it's a hex
 16 //:     byte, or it raises an error.
 17 //: That's it. Names can start with any non-digit that isn't a dash. They can
 18 //: be a single character long. 'a' is not a hex number, it's a variable.
 19 //: Later layers may add more conventions partitioning the space of names. But
 20 //: the above rules will remain inviolate.
 21 void check_valid_name(const string& s) {
 22   if (s.empty()) {
 23     raise << "empty name!\n" << end();
 24     return;
 25   }
 26   if (s.at(0) == '-')
 27     raise << "'" << s << "' starts with '-', which can be confused with a negative number; use a different name\n" << end();
 28   if (s.substr(0, 2) == "0x") {
 29     raise << "'" << s << "' looks like a hex number; use a different name\n" << end();
 30     return;
 31   }
 32   if (isdigit(s.at(0)))
 33     raise << "'" << s << "' starts with a digit, and so can be confused with a negative number; use a different name.\n" << end();
 34   if (SIZE(s) == 2)
 35     raise << "'" << s << "' is two characters long which can look like raw hex bytes at a glance; use a different name\n" << end();
 36 }
 37 
 38 :(scenarios transform)
 39 :(scenario map_label)
 40 == 0x1
 41           # instruction                     effective address                                                   operand     displacement    immediate
 42           # op          subop               mod             rm32          base        index         scale       r32
 43           # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
 44 loop:
 45             05                                                                                                                              0x0d0c0b0a/imm32  # add to EAX
 46 +transform: label 'loop' is at address 1
 47 
 48 :(before "End Level-2 Transforms")
 49 Transform.push_back(rewrite_labels);
 50 :(code)
 51 void rewrite_labels(program& p) {
 52   trace(99, "transform") << "-- rewrite labels" << end();
 53   if (p.segments.empty()) return;
 54   segment& code = p.segments.at(0);
 55   map<string, int32_t> byte_index;  // values are unsigned, but we're going to do subtractions on them so they need to fit in 31 bits
 56   compute_byte_indices_for_labels(code, byte_index);
 57   if (trace_contains_errors()) return;
 58   drop_labels(code);
 59   if (trace_contains_errors()) return;
 60   replace_labels_with_displacements(code, byte_index);
 61 }
 62 
 63 void compute_byte_indices_for_labels(const segment& code, map<string, int32_t>& byte_index) {
 64   int current_byte = 0;
 65   for (int i = 0;  i < SIZE(code.lines);  ++i) {
 66     const line& inst = code.lines.at(i);
 67     for (int j = 0;  j < SIZE(inst.words);  ++j) {
 68       const word& curr = inst.words.at(j);
 69       // hack: if we have any operand metadata left after previous transforms,
 70       // deduce its size
 71       // Maybe we should just move this transform to before instruction
 72       // packing, and deduce the size of *all* operands. But then we'll also
 73       // have to deal with bitfields.
 74       if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32")) {
 75         if (*curr.data.rbegin() == ':')
 76           raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end();
 77         current_byte += 4;
 78       }
 79       // automatically handle /disp8 and /imm8 here
 80       else if (*curr.data.rbegin() != ':') {
 81         ++current_byte;
 82       }
 83       else {
 84         string label = drop_last(curr.data);
 85         // ensure labels look sufficiently different from raw hex
 86         check_valid_name(label);
 87         if (trace_contains_errors()) return;
 88         if (contains_any_operand_metadata(curr))
 89           raise << "'" << to_string(inst) << "': label definition (':') not allowed in operand\n" << end();
 90         if (j > 0)
 91           raise << "'" << to_string(inst) << "': labels can only be the first word in a line.\n" << end();
 92         put(byte_index, label, current_byte);
 93         trace(99, "transform") << "label '" << label << "' is at address " << (current_byte+code.start) << end();
 94         // no modifying current_byte; label definitions won't be in the final binary
 95       }
 96     }
 97   }
 98 }
 99 
100 void drop_labels(segment& code) {
101   for (int i = 0;  i < SIZE(code.lines);  ++i) {
102     line& inst = code.lines.at(i);
103     vector<word>::iterator new_end = remove_if(inst.words.begin(), inst.words.end(), is_label);
104     inst.words.erase(new_end, inst.words.end());
105   }
106 }
107 
108 bool is_label(const word& w) {
109   return *w.data.rbegin() == ':';
110 }
111 
112 void replace_labels_with_displacements(segment& code, const map<string, int32_t>& byte_index) {
113   int32_t byte_index_next_instruction_starts_at = 0;
114   for (int i = 0;  i < SIZE(code.lines);  ++i) {
115     line& inst = code.lines.at(i);
116     byte_index_next_instruction_starts_at += num_bytes(inst);
117     line new_inst;
118     for (int j = 0;  j < SIZE(inst.words);  ++j) {
119       const word& curr = inst.words.at(j);
120       if (contains_key(byte_index, curr.data)) {
121         int32_t displacement = static_cast<int32_t>(get(byte_index, curr.data)) - byte_index_next_instruction_starts_at;
122         if (has_metadata(curr, "disp8") || has_metadata(curr, "imm8")) {
123           if (displacement > 0xff || displacement < -0x7f)
124             raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 8 bits\n" << end();
125           else
126             emit_hex_bytes(new_inst, displacement, 1);
127         }
128         else if (has_metadata(curr, "disp16")) {
129           if (displacement > 0xffff || displacement < -0x7fff)
130             raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 16 bits\n" << end();
131           else
132             emit_hex_bytes(new_inst, displacement, 2);
133         }
134         else if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32")) {
135           emit_hex_bytes(new_inst, displacement, 4);
136         }
137       }
138       else {
139         new_inst.words.push_back(curr);
140       }
141     }
142     inst.words.swap(new_inst.words);
143     trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
144   }
145 }
146 
147 string data_to_string(const line& inst) {
148   ostringstream out;
149   for (int i = 0;  i < SIZE(inst.words);  ++i) {
150     if (i > 0) out << ' ';
151     out << inst.words.at(i).data;
152   }
153   return out.str();
154 }
155 
156 string drop_last(const string& s) {
157   return string(s.begin(), --s.end());
158 }
159 
160 //: Label definitions must be the first word on a line. No jumping inside
161 //: instructions.
162 //: They should also be the only word on a line.
163 //: However, you can absolutely have multiple labels map to the same address,
164 //: as long as they're on separate lines.
165 
166 :(scenario multiple_labels_at)
167 == 0x1
168           # instruction                     effective address                                                   operand     displacement    immediate
169           # op          subop               mod             rm32          base        index         scale       r32
170           # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
171 # address 1
172 loop:
173  $loop2:
174 # address 1 (labels take up no space)
175             05                                                                                                                              0x0d0c0b0a/imm32  # add to EAX
176 # address 6
177             eb                                                                                                              $loop2/disp8
178 # address 8
179             eb                                                                                                              $loop3/disp8
180 # address 0xa
181  $loop3:
182 +transform: label 'loop' is at address 1
183 +transform: label '$loop2' is at address 1
184 +transform: label '$loop3' is at address a
185 # first jump is to -7
186 +transform: instruction after transform: 'eb f9'
187 # second jump is to 0 (fall through)
188 +transform: instruction after transform: 'eb 00'
189 
190 :(scenario label_too_short)
191 % Hide_errors = true;
192 == 0x1
193           # instruction                     effective address                                                   operand     displacement    immediate
194           # op          subop               mod             rm32          base        index         scale       r32
195           # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
196 xz:
197             05                                                                                                                              0x0d0c0b0a/imm32  # add to EAX
198 +error: 'xz' is two characters long which can look like raw hex bytes at a glance; use a different name
199 
200 :(scenario label_hex)
201 % Hide_errors = true;
202 == 0x1
203           # instruction                     effective address                                                   operand     displacement    immediate
204           # op          subop               mod             rm32          base        index         scale       r32
205           # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
206 0xab:
207             05                                                                                                                              0x0d0c0b0a/imm32  # add to EAX
208 +error: '0xab' looks like a hex number; use a different name
209 
210 :(scenario label_negative_hex)
211 % Hide_errors = true;
212 == 0x1
213           # instruction                     effective address                                                   operand     displacement    immediate
214           # op          subop               mod             rm32          base        index         scale       r32
215           # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
216  -a:  # indent to avoid looking like a trace_should_not_contain command for this scenario
217             05                                                                                                                              0x0d0c0b0a/imm32  # add to EAX
218 +error: '-a' starts with '-', which can be confused with a negative number; use a different name