https://github.com/akkartik/mu/blob/main/linux/bootstrap/036labels.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 void test_Entry_label() {
28 run(
29 "== code 0x1\n"
30 "05 0x0d0c0b0a/imm32\n"
31 "Entry:\n"
32 "05 0x0d0c0b0a/imm32\n"
33 );
34 CHECK_TRACE_CONTENTS(
35 "run: 0x00000006 opcode: 05\n"
36 );
37 CHECK_TRACE_DOESNT_CONTAIN("run: 0x00000001 opcode: 05");
38 }
39
40 :(before "End looks_like_hex_int(s) Detectors")
41 if (SIZE(s) == 2) return true;
42
43 :(code)
44 void test_pack_immediate_ignores_single_byte_nondigit_argument() {
45 Hide_errors = true;
46 transform(
47 "== code 0x1\n"
48 "b9/copy a/imm32\n"
49 );
50 CHECK_TRACE_CONTENTS(
51 "transform: packing instruction 'b9/copy a/imm32'\n"
52
53 "transform: instruction after packing: 'b9 a'\n"
54 );
55 }
56
57 void test_pack_immediate_ignores_3_hex_digit_argument() {
58 Hide_errors = true;
59 transform(
60 "== code 0x1\n"
61 "b9/copy aaa/imm32\n"
62 );
63 CHECK_TRACE_CONTENTS(
64 "transform: packing instruction 'b9/copy aaa/imm32'\n"
65
66 "transform: instruction after packing: 'b9 aaa'\n"
67 );
68 }
69
70 void test_pack_immediate_ignores_non_hex_argument() {
71 Hide_errors = true;
72 transform(
73 "== code 0x1\n"
74 "b9/copy xxx/imm32\n"
75 );
76 CHECK_TRACE_CONTENTS(
77 "transform: packing instruction 'b9/copy xxx/imm32'\n"
78
79 "transform: instruction after packing: 'b9 xxx'\n"
80 );
81 }
82
83
84 void check_valid_name(const string& s) {
85 if (s.empty()) {
86 raise << "empty name!\n" << end();
87 return;
88 }
89 if (s.at(0) == '-')
90 raise << "'" << s << "' starts with '-', which can be confused with a negative number; use a different name\n" << end();
91 if (s.substr(0, 2) == "0x") {
92 raise << "'" << s << "' looks like a hex number; use a different name\n" << end();
93 return;
94 }
95 if (isdigit(s.at(0)))
96 raise << "'" << s << "' starts with a digit, and so can be confused with a number; use a different name.\n" << end();
97 if (SIZE(s) == 2)
98 raise << "'" << s << "' is two characters long, which can look like raw hex bytes at a glance; use a different name\n" << end();
99 }
100
101
102
103 void test_map_label() {
104 transform(
105 "== code 0x1\n"
106 "loop:\n"
107 " 05 0x0d0c0b0a/imm32\n"
108 );
109 CHECK_TRACE_CONTENTS(
110 "transform: label 'loop' is at address 1\n"
111 );
112 }
113
114 :(before "End Transforms")
115 Transform.push_back(rewrite_labels);
116 :(code)
117 void rewrite_labels(program& p) {
118 trace(3, "transform") << "-- rewrite labels" << end();
119 if (p.segments.empty()) return;
120 segment& code = *find(p, "code");
121 map<string, int32_t> byte_index;
122 compute_byte_indices_for_labels(code, byte_index);
123 if (trace_contains_errors()) return;
124 drop_labels(code);
125 if (trace_contains_errors()) return;
126 replace_labels_with_displacements(code, byte_index);
127 if (contains_key(byte_index, "Entry"))
128 p.entry = code.start + get(byte_index, "Entry");
129 }
130
131 void compute_byte_indices_for_labels(const segment& code, map<string, int32_t>& byte_index) {
132 int current_byte = 0;
133 for (int i = 0; i < SIZE(code.lines); ++i) {
134 const line& inst = code.lines.at(i);
135 if (Source_lines_file.is_open() && !inst.original.empty() && *inst.words.at(0).data.rbegin() != ':')
136 Source_lines_file << "0x" << HEXWORD << (code.start + current_byte) << ' ' << inst.original << '\n';
137 for (int j = 0; j < SIZE(inst.words); ++j) {
138 const word& curr = inst.words.at(j);
139
140
141
142
143
144 if (has_argument_metadata(curr, "disp32") || has_argument_metadata(curr, "imm32")) {
145 if (*curr.data.rbegin() == ':')
146 raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end();
147 current_byte += 4;
148 }
149 else if (has_argument_metadata(curr, "disp16")) {
150 if (*curr.data.rbegin() == ':')
151 raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end();
152 current_byte += 2;
153 }
154
155 else if (*curr.data.rbegin() != ':') {
156 ++current_byte;
157 }
158 else {
159 string label = drop_last(curr.data);
160
161 check_valid_name(label);
162 if (trace_contains_errors()) return;
163 if (contains_any_argument_metadata(curr))
164 raise << "'" << to_string(inst) << "': label definition (':') not allowed in argument\n" << end();
165 if (j > 0)
166 raise << "'" << to_string(inst) << "': labels can only be the first word in a line.\n" << end();
167 if (Labels_file.is_open())
168 Labels_file << "0x" << HEXWORD << (code.start + current_byte) << ' ' << label << '\n';
169 if (contains_key(byte_index, label) && label != "Entry") {
170 raise << "duplicate label '" << label << "'\n" << end();
171 return;
172 }
173 put(byte_index, label, current_byte);
174 trace(99, "transform") << "label '" << label << "' is at address " << (current_byte+code.start) << end();
175
176 }
177 }
178 }
179 }
180
181 :(before "End Globals")
182 bool Dump_debug_info = false;
183 ofstream Labels_file;
184 ofstream Source_lines_file;
185 :(before "End Commandline Options")
186 else if (is_equal(*arg, "--debug")) {
187 Dump_debug_info = true;
188
189 }
190
191 :(after "Begin bootstrap translate")
192 if (Dump_debug_info) {
193 cerr << "saving address->label information to 'labels'\n";
194 Labels_file.open("labels");
195 cerr << "saving address->source information to 'source_lines'\n";
196 Source_lines_file.open("source_lines");
197 }
198 :(before "End bootstrap translate")
199 if (Dump_debug_info) {
200 Labels_file.close();
201 Source_lines_file.close();
202 }
203
204 :(code)
205 void drop_labels(segment& code) {
206 for (int i = 0; i < SIZE(code.lines); ++i) {
207 line& inst = code.lines.at(i);
208 vector<word>::iterator new_end = remove_if(inst.words.begin(), inst.words.end(), is_label);
209 inst.words.erase(new_end, inst.words.end());
210 }
211 }
212
213 bool is_label(const word& w) {
214 return *w.data.rbegin() == ':';
215 }
216
217 void replace_labels_with_displacements(segment& code, const map<string, int32_t>& byte_index) {
218 int32_t byte_index_next_instruction_starts_at = 0;
219 for (int i = 0; i < SIZE(code.lines); ++i) {
220 line& inst = code.lines.at(i);
221 byte_index_next_instruction_starts_at += num_bytes(inst);
222 line new_inst;
223 for (int j = 0; j < SIZE(inst.words); ++j) {
224 const word& curr = inst.words.at(j);
225 if (contains_key(byte_index, curr.data)) {
226 int32_t displacement = static_cast<int32_t>(get(byte_index, curr.data)) - byte_index_next_instruction_starts_at;
227 int32_t absolute_address = code.start + get(byte_index, curr.data);
228 if (has_argument_metadata(curr, "disp8")) {
229 if (displacement > 0x7f || displacement < -0x7f)
230 raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 8 signed bits\n" << end();
231 else
232 emit_hex_bytes(new_inst, displacement, 1);
233 }
234 else if (has_argument_metadata(curr, "disp16")) {
235 if (displacement > 0x7fff || displacement < -0x7fff)
236 raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 16 signed bits\n" << end();
237 else
238 emit_hex_bytes(new_inst, displacement, 2);
239 }
240 else if (has_argument_metadata(curr, "disp32")) {
241 if (is_far_jump_or_call(new_inst))
242 emit_hex_bytes(new_inst, displacement, 4);
243 else
244 emit_hex_bytes(new_inst, absolute_address, 4);
245 } else if (has_argument_metadata(curr, "imm32")) {
246 emit_hex_bytes(new_inst, absolute_address, 4);
247 }
248 }
249 else {
250 new_inst.words.push_back(curr);
251 }
252 }
253 inst.words.swap(new_inst.words);
254 trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
255 }
256 }
257
258 bool is_far_jump_or_call(const line& inst) {
259 string first_opcode = inst.words.at(0).data;
260 if (first_opcode == "e8" || first_opcode == "e9") return true;
261 if (SIZE(inst.words) < 2) return false;
262 if (first_opcode != "0f") return false;
263 string second_opcode = inst.words.at(1).data;
264 return starts_with(second_opcode, "8");
265 }
266
267 string data_to_string(const line& inst) {
268 ostringstream out;
269 for (int i = 0; i < SIZE(inst.words); ++i) {
270 if (i > 0) out << ' ';
271 out << inst.words.at(i).data;
272 }
273 return out.str();
274 }
275
276 string drop_last(const string& s) {
277 return string(s.begin(), --s.end());
278 }
279
280
281
282
283
284
285
286 void test_multiple_labels_at() {
287 transform(
288 "== code 0x1\n"
289
290 "loop:\n"
291 " $loop2:\n"
292
293 " 05 0x0d0c0b0a/imm32\n"
294
295 " eb $loop2/disp8\n"
296
297 " eb $loop3/disp8\n"
298
299 " $loop3:\n"
300 );
301 CHECK_TRACE_CONTENTS(
302 "transform: label 'loop' is at address 1\n"
303 "transform: label '$loop2' is at address 1\n"
304 "transform: label '$loop3' is at address a\n"
305
306 "transform: instruction after transform: 'eb f9'\n"
307
308 "transform: instruction after transform: 'eb 00'\n"
309 );
310 }
311
312 void test_loading_label_as_imm32() {
313 transform(
314 "== code 0x1\n"
315 "label:\n"
316 " be/copy-to-ESI label/imm32\n"
317 );
318 CHECK_TRACE_CONTENTS(
319 "transform: label 'label' is at address 1\n"
320 "transform: instruction after transform: 'be 01 00 00 00'\n"
321 );
322 }
323
324 void test_duplicate_label() {
325 Hide_errors = true;
326 transform(
327 "== code 0x1\n"
328 "loop:\n"
329 "loop:\n"
330 " 05 0x0d0c0b0a/imm32\n"
331 );
332 CHECK_TRACE_CONTENTS(
333 "error: duplicate label 'loop'\n"
334 );
335 }
336
337 void test_label_too_short() {
338 Hide_errors = true;
339 transform(
340 "== code 0x1\n"
341 "xz:\n"
342 " 05 0x0d0c0b0a/imm32\n"
343 );
344 CHECK_TRACE_CONTENTS(
345 "error: 'xz' is two characters long, which can look like raw hex bytes at a glance; use a different name\n"
346 );
347 }
348
349 void test_label_hex() {
350 Hide_errors = true;
351 transform(
352 "== code 0x1\n"
353 "0xab:\n"
354 " 05 0x0d0c0b0a/imm32\n"
355 );
356 CHECK_TRACE_CONTENTS(
357 "error: '0xab' looks like a hex number; use a different name\n"
358 );
359 }
360
361 void test_label_negative_hex() {
362 Hide_errors = true;
363 transform(
364 "== code 0x1\n"
365 "-a:\n"
366 " 05 0x0d0c0b0a/imm32\n"
367 );
368 CHECK_TRACE_CONTENTS(
369 "error: '-a' starts with '-', which can be confused with a negative number; use a different name\n"
370 );
371 }
372
373
374
375
376
377
378 void test_duplicate_Entry_label() {
379 transform(
380 "== code 0x1\n"
381 "Entry:\n"
382 "Entry:\n"
383 " 05 0x0d0c0b0a/imm32\n"
384 );
385 CHECK_TRACE_DOESNT_CONTAIN_ERRORS();
386 }
387
388
389
390
391
392
393 void test_programs_without_Entry_label() {
394 Hide_errors = true;
395 program p;
396 istringstream in(
397 "== code 0x1\n"
398 "05 0x0d0c0b0a/imm32\n"
399 "05 0x0d0c0b0a/imm32\n"
400 );
401 parse(in, p);
402 transform(p);
403 ostringstream dummy;
404 save_elf(p, dummy);
405 CHECK_TRACE_CONTENTS(
406 "error: no 'Entry' label found\n"
407 );
408 }
409
410
411
412
413 void test_segment_size_ignores_labels() {
414 transform(
415 "== code 0x09000074\n"
416 " 05/add 0x0d0c0b0a/imm32\n"
417 "foo:\n"
418 "== data 0x0a000000\n"
419 "bar:\n"
420 " 00\n"
421 );
422 CHECK_TRACE_CONTENTS(
423 "transform: segment 1 begins at address 0x0a000079\n"
424 );
425 }
426
427 :(before "End size_of(word w) Special-cases")
428 else if (is_label(w))
429 return 0;