1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 :(before "End looks_like_hex_int(s) Detectors")
23 if (SIZE(s) == 2) return true;
24
25 :(scenarios transform)
26 :(scenario pack_immediate_ignores_single_byte_nondigit_operand)
27 % Hide_errors = true;
28 == 0x1
29 b9/copy a/imm32
30 +transform: packing instruction 'b9/copy a/imm32'
31
32 +transform: instruction after packing: 'b9 a'
33
34 :(scenario pack_immediate_ignores_3_hex_digit_operand)
35 % Hide_errors = true;
36 == 0x1
37 b9/copy aaa/imm32
38 +transform: packing instruction 'b9/copy aaa/imm32'
39
40 +transform: instruction after packing: 'b9 aaa'
41
42 :(scenario pack_immediate_ignores_non_hex_operand)
43 % Hide_errors = true;
44 == 0x1
45 b9/copy xxx/imm32
46 +transform: packing instruction 'b9/copy xxx/imm32'
47
48 +transform: instruction after packing: 'b9 xxx'
49
50
51 :(code)
52 void check_valid_name(const string& s) {
53 if (s.empty()) {
54 raise << "empty name!\n" << end();
55 return;
56 }
57 if (s.at(0) == '-')
58 raise << "'" << s << "' starts with '-', which can be confused with a negative number; use a different name\n" << end();
59 if (s.substr(0, 2) == "0x") {
60 raise << "'" << s << "' looks like a hex number; use a different name\n" << end();
61 return;
62 }
63 if (isdigit(s.at(0)))
64 raise << "'" << s << "' starts with a digit, and so can be confused with a negative number; use a different name.\n" << end();
65 if (SIZE(s) == 2)
66 raise << "'" << s << "' is two characters long which can look like raw hex bytes at a glance; use a different name\n" << end();
67 }
68
69
70
71 :(scenario map_label)
72 == 0x1
73
74
75
76 loop:
77 05 0x0d0c0b0a/imm32
78 +transform: label 'loop' is at address 1
79
80 :(before "End Level-2 Transforms")
81 Transform.push_back(rewrite_labels);
82 :(code)
83 void rewrite_labels(program& p) {
84 trace(99, "transform") << "-- rewrite labels" << end();
85 if (p.segments.empty()) return;
86 segment& code = p.segments.at(0);
87 map<string, int32_t> byte_index;
88 compute_byte_indices_for_labels(code, byte_index);
89 if (trace_contains_errors()) return;
90 drop_labels(code);
91 if (trace_contains_errors()) return;
92 replace_labels_with_displacements(code, byte_index);
93 }
94
95 void compute_byte_indices_for_labels(const segment& code, map<string, int32_t>& byte_index) {
96 int current_byte = 0;
97 for (int i = 0; i < SIZE(code.lines); ++i) {
98 const line& inst = code.lines.at(i);
99 for (int j = 0; j < SIZE(inst.words); ++j) {
100 const word& curr = inst.words.at(j);
101
102
103
104
105
106 if (has_operand_metadata(curr, "disp32") || has_operand_metadata(curr, "imm32")) {
107 if (*curr.data.rbegin() == ':')
108 raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end();
109 current_byte += 4;
110 }
111
112 else if (*curr.data.rbegin() != ':') {
113 ++current_byte;
114 }
115 else {
116 string label = drop_last(curr.data);
117
118 check_valid_name(label);
119 if (trace_contains_errors()) return;
120 if (contains_any_operand_metadata(curr))
121 raise << "'" << to_string(inst) << "': label definition (':') not allowed in operand\n" << end();
122 if (j > 0)
123 raise << "'" << to_string(inst) << "': labels can only be the first word in a line.\n" << end();
124 if (Dump_map)
125 cerr << "0x" << HEXWORD << (code.start + current_byte) << ' ' << label << '\n';
126 put(byte_index, label, current_byte);
127 trace(99, "transform") << "label '" << label << "' is at address " << (current_byte+code.start) << end();
128
129 }
130 }
131 }
132 }
133
134 :(before "End Globals")
135 bool Dump_map = false;
136 :(before "End Commandline Options")
137 else if (is_equal(*arg, "--map")) {
138 Dump_map = true;
139 }
140
141 :(code)
142 void drop_labels(segment& code) {
143 for (int i = 0; i < SIZE(code.lines); ++i) {
144 line& inst = code.lines.at(i);
145 vector<word>::iterator new_end = remove_if(inst.words.begin(), inst.words.end(), is_label);
146 inst.words.erase(new_end, inst.words.end());
147 }
148 }
149
150 bool is_label(const word& w) {
151 return *w.data.rbegin() == ':';
152 }
153
154 void replace_labels_with_displacements(segment& code, const map<string, int32_t>& byte_index) {
155 int32_t byte_index_next_instruction_starts_at = 0;
156 for (int i = 0; i < SIZE(code.lines); ++i) {
157 line& inst = code.lines.at(i);
158 byte_index_next_instruction_starts_at += num_bytes(inst);
159 line new_inst;
160 for (int j = 0; j < SIZE(inst.words); ++j) {
161 const word& curr = inst.words.at(j);
162 if (contains_key(byte_index, curr.data)) {
163 int32_t displacement = static_cast<int32_t>(get(byte_index, curr.data)) - byte_index_next_instruction_starts_at;
164 if (has_operand_metadata(curr, "disp8")) {
165 if (displacement > 0xff || displacement < -0x7f)
166 raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 8 bits\n" << end();
167 else
168 emit_hex_bytes(new_inst, displacement, 1);
169 }
170 else if (has_operand_metadata(curr, "disp16")) {
171 if (displacement > 0xffff || displacement < -0x7fff)
172 raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 16 bits\n" << end();
173 else
174 emit_hex_bytes(new_inst, displacement, 2);
175 }
176 else if (has_operand_metadata(curr, "disp32")) {
177 emit_hex_bytes(new_inst, displacement, 4);
178 }
179 }
180 else {
181 new_inst.words.push_back(curr);
182 }
183 }
184 inst.words.swap(new_inst.words);
185 trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
186 }
187 }
188
189 string data_to_string(const line& inst) {
190 ostringstream out;
191 for (int i = 0; i < SIZE(inst.words); ++i) {
192 if (i > 0) out << ' ';
193 out << inst.words.at(i).data;
194 }
195 return out.str();
196 }
197
198 string drop_last(const string& s) {
199 return string(s.begin(), --s.end());
200 }
201
202
203
204
205
206
207
208 :(scenario multiple_labels_at)
209 == 0x1
210
211
212
213
214 loop:
215 $loop2:
216
217 05 0x0d0c0b0a/imm32
218
219 eb $loop2/disp8
220
221 eb $loop3/disp8
222
223 $loop3:
224 +transform: label 'loop' is at address 1
225 +transform: label '$loop2' is at address 1
226 +transform: label '$loop3' is at address a
227
228 +transform: instruction after transform: 'eb f9'
229
230 +transform: instruction after transform: 'eb 00'
231
232 :(scenario label_too_short)
233 % Hide_errors = true;
234 == 0x1
235
236
237
238 xz:
239 05 0x0d0c0b0a/imm32
240 +error: 'xz' is two characters long which can look like raw hex bytes at a glance; use a different name
241
242 :(scenario label_hex)
243 % Hide_errors = true;
244 == 0x1
245
246
247
248 0xab:
249 05 0x0d0c0b0a/imm32
250 +error: '0xab' looks like a hex number; use a different name
251
252 :(scenario label_negative_hex)
253 % Hide_errors = true;
254 == 0x1
255
256
257
258 -a:
259 05 0x0d0c0b0a/imm32
260 +error: '-a' starts with '-', which can be confused with a negative number; use a different name
261
262
263
264
265 :(scenario segment_size_ignores_labels)
266 == code
267 05/add 0x0d0c0b0a/imm32
268 foo:
269 == data
270 bar:
271 00
272 +transform: segment 1 begins at address 0x08049079
273
274 :(before "End num_bytes(curr) Special-cases")
275 else if (is_label(curr))
276 ;