https://github.com/akkartik/mu/blob/master/035labels.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 void test_Entry_label() {
28 run(
29 "== code 0x1\n"
30 "05 0x0d0c0b0a/imm32\n"
31 "Entry:\n"
32 "05 0x0d0c0b0a/imm32\n"
33 );
34 CHECK_TRACE_CONTENTS(
35 "run: 0x00000006 opcode: 05\n"
36 );
37 CHECK_TRACE_DOESNT_CONTAIN("run: 0x00000001 opcode: 05");
38 }
39
40 :(before "End looks_like_hex_int(s) Detectors")
41 if (SIZE(s) == 2) return true;
42
43 :(code)
44 void test_pack_immediate_ignores_single_byte_nondigit_operand() {
45 Hide_errors = true;
46 transform(
47 "== code 0x1\n"
48 "b9/copy a/imm32\n"
49 );
50 CHECK_TRACE_CONTENTS(
51 "transform: packing instruction 'b9/copy a/imm32'\n"
52
53 "transform: instruction after packing: 'b9 a'\n"
54 );
55 }
56
57 void test_pack_immediate_ignores_3_hex_digit_operand() {
58 Hide_errors = true;
59 transform(
60 "== code 0x1\n"
61 "b9/copy aaa/imm32\n"
62 );
63 CHECK_TRACE_CONTENTS(
64 "transform: packing instruction 'b9/copy aaa/imm32'\n"
65
66 "transform: instruction after packing: 'b9 aaa'\n"
67 );
68 }
69
70 void test_pack_immediate_ignores_non_hex_operand() {
71 Hide_errors = true;
72 transform(
73 "== code 0x1\n"
74 "b9/copy xxx/imm32\n"
75 );
76 CHECK_TRACE_CONTENTS(
77 "transform: packing instruction 'b9/copy xxx/imm32'\n"
78
79 "transform: instruction after packing: 'b9 xxx'\n"
80 );
81 }
82
83
84 void check_valid_name(const string& s) {
85 if (s.empty()) {
86 raise << "empty name!\n" << end();
87 return;
88 }
89 if (s.at(0) == '-')
90 raise << "'" << s << "' starts with '-', which can be confused with a negative number; use a different name\n" << end();
91 if (s.substr(0, 2) == "0x") {
92 raise << "'" << s << "' looks like a hex number; use a different name\n" << end();
93 return;
94 }
95 if (isdigit(s.at(0)))
96 raise << "'" << s << "' starts with a digit, and so can be confused with a number; use a different name.\n" << end();
97 if (SIZE(s) == 2)
98 raise << "'" << s << "' is two characters long, which can look like raw hex bytes at a glance; use a different name\n" << end();
99 }
100
101
102
103 void test_map_label() {
104 transform(
105 "== code 0x1\n"
106 "loop:\n"
107 " 05 0x0d0c0b0a/imm32\n"
108 );
109 CHECK_TRACE_CONTENTS(
110 "transform: label 'loop' is at address 1\n"
111 );
112 }
113
114 :(before "End Level-2 Transforms")
115 Transform.push_back(rewrite_labels);
116 :(code)
117 void rewrite_labels(program& p) {
118 trace(3, "transform") << "-- rewrite labels" << end();
119 if (p.segments.empty()) return;
120 segment& code = *find(p, "code");
121 map<string, int32_t> byte_index;
122 compute_byte_indices_for_labels(code, byte_index);
123 if (trace_contains_errors()) return;
124 drop_labels(code);
125 if (trace_contains_errors()) return;
126 replace_labels_with_displacements(code, byte_index);
127 if (contains_key(byte_index, "Entry"))
128 p.entry = code.start + get(byte_index, "Entry");
129 }
130
131 void compute_byte_indices_for_labels(const segment& code, map<string, int32_t>& byte_index) {
132 int current_byte = 0;
133 for (int i = 0; i < SIZE(code.lines); ++i) {
134 const line& inst = code.lines.at(i);
135 if (Source_lines_file.is_open() && !inst.original.empty() && *inst.words.at(0).data.rbegin() != ':')
136 Source_lines_file << "0x" << HEXWORD << (code.start + current_byte) << ' ' << inst.original << '\n';
137 for (int j = 0; j < SIZE(inst.words); ++j) {
138 const word& curr = inst.words.at(j);
139
140
141
142
143
144 if (has_operand_metadata(curr, "disp32") || has_operand_metadata(curr, "imm32")) {
145 if (*curr.data.rbegin() == ':')
146 raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end();
147 current_byte += 4;
148 }
149 else if (has_operand_metadata(curr, "disp16")) {
150 if (*curr.data.rbegin() == ':')
151 raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end();
152 current_byte += 2;
153 }
154
155 else if (*curr.data.rbegin() != ':') {
156 ++current_byte;
157 }
158 else {
159 string label = drop_last(curr.data);
160
161 check_valid_name(label);
162 if (trace_contains_errors()) return;
163 if (contains_any_operand_metadata(curr))
164 raise << "'" << to_string(inst) << "': label definition (':') not allowed in operand\n" << end();
165 if (j > 0)
166 raise << "'" << to_string(inst) << "': labels can only be the first word in a line.\n" << end();
167 if (Labels_file.is_open())
168 Labels_file << "0x" << HEXWORD << (code.start + current_byte) << ' ' << label << '\n';
169 if (contains_key(byte_index, label) && label != "Entry") {
170 raise << "duplicate label '" << label << "'\n" << end();
171 return;
172 }
173 put(byte_index, label, current_byte);
174 trace(99, "transform") << "label '" << label << "' is at address " << (current_byte+code.start) << end();
175
176 }
177 }
178 }
179 }
180
181 :(before "End Globals")
182 bool Dump_debug_info = false;
183 ofstream Labels_file;
184 ofstream Source_lines_file;
185 :(before "End Commandline Options")
186 else if (is_equal(*arg, "--debug")) {
187 Dump_debug_info = true;
188
189 }
190
191 :(after "Begin subx translate")
192 if (Dump_debug_info) {
193 cerr << "saving address->label information to 'labels'\n";
194 Labels_file.open("labels");
195 cerr << "saving address->source information to 'source_lines'\n";
196 Source_lines_file.open("source_lines");
197 }
198 :(before "End subx translate")
199 if (Dump_debug_info) {
200 Labels_file.close();
201 Source_lines_file.close();
202 }
203
204 :(code)
205 void drop_labels(segment& code) {
206 for (int i = 0; i < SIZE(code.lines); ++i) {
207 line& inst = code.lines.at(i);
208 vector<word>::iterator new_end = remove_if(inst.words.begin(), inst.words.end(), is_label);
209 inst.words.erase(new_end, inst.words.end());
210 }
211 }
212
213 bool is_label(const word& w) {
214 return *w.data.rbegin() == ':';
215 }
216
217 void replace_labels_with_displacements(segment& code, const map<string, int32_t>& byte_index) {
218 int32_t byte_index_next_instruction_starts_at = 0;
219 for (int i = 0; i < SIZE(code.lines); ++i) {
220 line& inst = code.lines.at(i);
221 byte_index_next_instruction_starts_at += num_bytes(inst);
222 line new_inst;
223 for (int j = 0; j < SIZE(inst.words); ++j) {
224 const word& curr = inst.words.at(j);
225 if (contains_key(byte_index, curr.data)) {
226 int32_t displacement = static_cast<int32_t>(get(byte_index, curr.data)) - byte_index_next_instruction_starts_at;
227 if (has_operand_metadata(curr, "disp8")) {
228 if (displacement > 0x7f || displacement < -0x7f)
229 raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 8 signed bits\n" << end();
230 else
231 emit_hex_bytes(new_inst, displacement, 1);
232 }
233 else if (has_operand_metadata(curr, "disp16")) {
234 if (displacement > 0x7fff || displacement < -0x7fff)
235 raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 16 signed bits\n" << end();
236 else
237 emit_hex_bytes(new_inst, displacement, 2);
238 }
239 else if (has_operand_metadata(curr, "disp32")) {
240 emit_hex_bytes(new_inst, displacement, 4);
241 } else if (has_operand_metadata(curr, "imm32")) {
242 emit_hex_bytes(new_inst, code.start + get(byte_index, curr.data), 4);
243 }
244 }
245 else {
246 new_inst.words.push_back(curr);
247 }
248 }
249 inst.words.swap(new_inst.words);
250 trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
251 }
252 }
253
254 string data_to_string(const line& inst) {
255 ostringstream out;
256 for (int i = 0; i < SIZE(inst.words); ++i) {
257 if (i > 0) out << ' ';
258 out << inst.words.at(i).data;
259 }
260 return out.str();
261 }
262
263 string drop_last(const string& s) {
264 return string(s.begin(), --s.end());
265 }
266
267
268
269
270
271
272
273 void test_multiple_labels_at() {
274 transform(
275 "== code 0x1\n"
276
277 "loop:\n"
278 " $loop2:\n"
279
280 " 05 0x0d0c0b0a/imm32\n"
281
282 " eb $loop2/disp8\n"
283
284 " eb $loop3/disp8\n"
285
286 " $loop3:\n"
287 );
288 CHECK_TRACE_CONTENTS(
289 "transform: label 'loop' is at address 1\n"
290 "transform: label '$loop2' is at address 1\n"
291 "transform: label '$loop3' is at address a\n"
292
293 "transform: instruction after transform: 'eb f9'\n"
294
295 "transform: instruction after transform: 'eb 00'\n"
296 );
297 }
298
299 void test_loading_label_as_imm32() {
300 transform(
301 "== code 0x1\n"
302 "label:\n"
303 " be/copy-to-ESI label/imm32\n"
304 );
305 CHECK_TRACE_CONTENTS(
306 "transform: label 'label' is at address 1\n"
307 "transform: instruction after transform: 'be 01 00 00 00'\n"
308 );
309 }
310
311 void test_duplicate_label() {
312 Hide_errors = true;
313 transform(
314 "== code 0x1\n"
315 "loop:\n"
316 "loop:\n"
317 " 05 0x0d0c0b0a/imm32\n"
318 );
319 CHECK_TRACE_CONTENTS(
320 "error: duplicate label 'loop'\n"
321 );
322 }
323
324 void test_label_too_short() {
325 Hide_errors = true;
326 transform(
327 "== code 0x1\n"
328 "xz:\n"
329 " 05 0x0d0c0b0a/imm32\n"
330 );
331 CHECK_TRACE_CONTENTS(
332 "error: 'xz' is two characters long, which can look like raw hex bytes at a glance; use a different name\n"
333 );
334 }
335
336 void test_label_hex() {
337 Hide_errors = true;
338 transform(
339 "== code 0x1\n"
340 "0xab:\n"
341 " 05 0x0d0c0b0a/imm32\n"
342 );
343 CHECK_TRACE_CONTENTS(
344 "error: '0xab' looks like a hex number; use a different name\n"
345 );
346 }
347
348 void test_label_negative_hex() {
349 Hide_errors = true;
350 transform(
351 "== code 0x1\n"
352 "-a:\n"
353 " 05 0x0d0c0b0a/imm32\n"
354 );
355 CHECK_TRACE_CONTENTS(
356 "error: '-a' starts with '-', which can be confused with a negative number; use a different name\n"
357 );
358 }
359
360
361
362
363
364
365 void test_duplicate_Entry_label() {
366 transform(
367 "== code 0x1\n"
368 "Entry:\n"
369 "Entry:\n"
370 " 05 0x0d0c0b0a/imm32\n"
371 );
372 CHECK_TRACE_DOESNT_CONTAIN_ERRORS();
373 }
374
375
376
377
378
379
380 void test_programs_without_Entry_label() {
381 Hide_errors = true;
382 program p;
383 istringstream in(
384 "== code 0x1\n"
385 "05 0x0d0c0b0a/imm32\n"
386 "05 0x0d0c0b0a/imm32\n"
387 );
388 parse(in, p);
389 transform(p);
390 ostringstream dummy;
391 save_elf(p, dummy);
392 CHECK_TRACE_CONTENTS(
393 "error: no 'Entry' label found\n"
394 );
395 }
396
397
398
399
400 void test_segment_size_ignores_labels() {
401 transform(
402 "== code 0x09000074\n"
403 " 05/add 0x0d0c0b0a/imm32\n"
404 "foo:\n"
405 "== data 0x0a000000\n"
406 "bar:\n"
407 " 00\n"
408 );
409 CHECK_TRACE_CONTENTS(
410 "transform: segment 1 begins at address 0x0a000079\n"
411 );
412 }
413
414 :(before "End size_of(word w) Special-cases")
415 else if (is_label(w))
416 return 0;