https://github.com/akkartik/mu/blob/master/subx/035labels.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 void test_entry_label() {
25 run(
26 "== code 0x1\n"
27 "05 0x0d0c0b0a/imm32\n"
28 "Entry:\n"
29 "05 0x0d0c0b0a/imm32\n"
30 );
31 CHECK_TRACE_CONTENTS(
32 "run: 0x00000006 opcode: 05\n"
33 );
34 CHECK_TRACE_DOESNT_CONTAIN("run: 0x00000001 opcode: 05");
35 }
36
37 :(before "End Globals")
38 uint32_t Entry_address = 0;
39 :(before "End Reset")
40 Entry_address = 0;
41 :(before "End Initialize EIP")
42 if (Entry_address) EIP = Entry_address;
43 :(after "Override e_entry")
44 if (Entry_address) e_entry = Entry_address;
45
46 :(before "End looks_like_hex_int(s) Detectors")
47 if (SIZE(s) == 2) return true;
48
49 :(code)
50 void test_pack_immediate_ignores_single_byte_nondigit_operand() {
51 Hide_errors = true;
52 transform(
53 "== code 0x1\n"
54 "b9/copy a/imm32\n"
55 );
56 CHECK_TRACE_CONTENTS(
57 "transform: packing instruction 'b9/copy a/imm32'\n"
58
59 "transform: instruction after packing: 'b9 a'\n"
60 );
61 }
62
63 void test_pack_immediate_ignores_3_hex_digit_operand() {
64 Hide_errors = true;
65 transform(
66 "== code 0x1\n"
67 "b9/copy aaa/imm32\n"
68 );
69 CHECK_TRACE_CONTENTS(
70 "transform: packing instruction 'b9/copy aaa/imm32'\n"
71
72 "transform: instruction after packing: 'b9 aaa'\n"
73 );
74 }
75
76 void test_pack_immediate_ignores_non_hex_operand() {
77 Hide_errors = true;
78 transform(
79 "== code 0x1\n"
80 "b9/copy xxx/imm32\n"
81 );
82 CHECK_TRACE_CONTENTS(
83 "transform: packing instruction 'b9/copy xxx/imm32'\n"
84
85 "transform: instruction after packing: 'b9 xxx'\n"
86 );
87 }
88
89
90 void check_valid_name(const string& s) {
91 if (s.empty()) {
92 raise << "empty name!\n" << end();
93 return;
94 }
95 if (s.at(0) == '-')
96 raise << "'" << s << "' starts with '-', which can be confused with a negative number; use a different name\n" << end();
97 if (s.substr(0, 2) == "0x") {
98 raise << "'" << s << "' looks like a hex number; use a different name\n" << end();
99 return;
100 }
101 if (isdigit(s.at(0)))
102 raise << "'" << s << "' starts with a digit, and so can be confused with a negative number; use a different name.\n" << end();
103 if (SIZE(s) == 2)
104 raise << "'" << s << "' is two characters long which can look like raw hex bytes at a glance; use a different name\n" << end();
105 }
106
107
108
109 void test_map_label() {
110 transform(
111 "== code 0x1\n"
112 "loop:\n"
113 " 05 0x0d0c0b0a/imm32\n"
114 );
115 CHECK_TRACE_CONTENTS(
116 "transform: label 'loop' is at address 1\n"
117 );
118 }
119
120 :(before "End Level-2 Transforms")
121 Transform.push_back(rewrite_labels);
122 :(code)
123 void rewrite_labels(program& p) {
124 trace(3, "transform") << "-- rewrite labels" << end();
125 if (p.segments.empty()) return;
126 segment& code = *find(p, "code");
127 map<string, int32_t> byte_index;
128 compute_byte_indices_for_labels(code, byte_index);
129 if (trace_contains_errors()) return;
130 drop_labels(code);
131 if (trace_contains_errors()) return;
132 replace_labels_with_displacements(code, byte_index);
133 if (contains_key(byte_index, "Entry"))
134 Entry_address = code.start + get(byte_index, "Entry");
135 }
136
137 void compute_byte_indices_for_labels(const segment& code, map<string, int32_t>& byte_index) {
138 int current_byte = 0;
139 for (int i = 0; i < SIZE(code.lines); ++i) {
140 const line& inst = code.lines.at(i);
141 if (Source_lines_file.is_open() && !inst.original.empty() && *inst.words.at(0).data.rbegin() != ':')
142 Source_lines_file << "0x" << HEXWORD << (code.start + current_byte) << ' ' << inst.original << '\n';
143 for (int j = 0; j < SIZE(inst.words); ++j) {
144 const word& curr = inst.words.at(j);
145
146
147
148
149
150 if (has_operand_metadata(curr, "disp32") || has_operand_metadata(curr, "imm32")) {
151 if (*curr.data.rbegin() == ':')
152 raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end();
153 current_byte += 4;
154 }
155 else if (has_operand_metadata(curr, "disp16")) {
156 if (*curr.data.rbegin() == ':')
157 raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end();
158 current_byte += 2;
159 }
160
161 else if (*curr.data.rbegin() != ':') {
162 ++current_byte;
163 }
164 else {
165 string label = drop_last(curr.data);
166
167 check_valid_name(label);
168 if (trace_contains_errors()) return;
169 if (contains_any_operand_metadata(curr))
170 raise << "'" << to_string(inst) << "': label definition (':') not allowed in operand\n" << end();
171 if (j > 0)
172 raise << "'" << to_string(inst) << "': labels can only be the first word in a line.\n" << end();
173 if (Labels_file.is_open())
174 Labels_file << "0x" << HEXWORD << (code.start + current_byte) << ' ' << label << '\n';
175 if (contains_key(byte_index, label) && label != "Entry") {
176 raise << "duplicate label '" << label << "'\n" << end();
177 return;
178 }
179 put(byte_index, label, current_byte);
180 trace(99, "transform") << "label '" << label << "' is at address " << (current_byte+code.start) << end();
181
182 }
183 }
184 }
185 }
186
187 :(before "End Globals")
188 bool Dump_debug_info = false;
189 ofstream Labels_file;
190 ofstream Source_lines_file;
191 :(before "End Commandline Options")
192 else if (is_equal(*arg, "--debug")) {
193 Dump_debug_info = true;
194
195 }
196
197 :(after "Begin subx translate")
198 if (Dump_debug_info) {
199 cerr << "saving address->label information to 'labels'\n";
200 Labels_file.open("labels");
201 cerr << "saving address->source information to 'source_lines'\n";
202 Source_lines_file.open("source_lines");
203 }
204 :(before "End subx translate")
205 if (Dump_debug_info) {
206 Labels_file.close();
207 Source_lines_file.close();
208 }
209
210 :(code)
211 void drop_labels(segment& code) {
212 for (int i = 0; i < SIZE(code.lines); ++i) {
213 line& inst = code.lines.at(i);
214 vector<word>::iterator new_end = remove_if(inst.words.begin(), inst.words.end(), is_label);
215 inst.words.erase(new_end, inst.words.end());
216 }
217 }
218
219 bool is_label(const word& w) {
220 return *w.data.rbegin() == ':';
221 }
222
223 void replace_labels_with_displacements(segment& code, const map<string, int32_t>& byte_index) {
224 int32_t byte_index_next_instruction_starts_at = 0;
225 for (int i = 0; i < SIZE(code.lines); ++i) {
226 line& inst = code.lines.at(i);
227 byte_index_next_instruction_starts_at += num_bytes(inst);
228 line new_inst;
229 for (int j = 0; j < SIZE(inst.words); ++j) {
230 const word& curr = inst.words.at(j);
231 if (contains_key(byte_index, curr.data)) {
232 int32_t displacement = static_cast<int32_t>(get(byte_index, curr.data)) - byte_index_next_instruction_starts_at;
233 if (has_operand_metadata(curr, "disp8")) {
234 if (displacement > 0x7f || displacement < -0x7f)
235 raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 8 signed bits\n" << end();
236 else
237 emit_hex_bytes(new_inst, displacement, 1);
238 }
239 else if (has_operand_metadata(curr, "disp16")) {
240 if (displacement > 0x7fff || displacement < -0x7fff)
241 raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 16 signed bits\n" << end();
242 else
243 emit_hex_bytes(new_inst, displacement, 2);
244 }
245 else if (has_operand_metadata(curr, "disp32")) {
246 emit_hex_bytes(new_inst, displacement, 4);
247 } else if (has_operand_metadata(curr, "imm32")) {
248 emit_hex_bytes(new_inst, code.start + get(byte_index, curr.data), 4);
249 }
250 }
251 else {
252 new_inst.words.push_back(curr);
253 }
254 }
255 inst.words.swap(new_inst.words);
256 trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
257 }
258 }
259
260 string data_to_string(const line& inst) {
261 ostringstream out;
262 for (int i = 0; i < SIZE(inst.words); ++i) {
263 if (i > 0) out << ' ';
264 out << inst.words.at(i).data;
265 }
266 return out.str();
267 }
268
269 string drop_last(const string& s) {
270 return string(s.begin(), --s.end());
271 }
272
273
274
275
276
277
278
279 void test_multiple_labels_at() {
280 transform(
281 "== code 0x1\n"
282
283 "loop:\n"
284 " $loop2:\n"
285
286 " 05 0x0d0c0b0a/imm32\n"
287
288 " eb $loop2/disp8\n"
289
290 " eb $loop3/disp8\n"
291
292 " $loop3:\n"
293 );
294 CHECK_TRACE_CONTENTS(
295 "transform: label 'loop' is at address 1\n"
296 "transform: label '$loop2' is at address 1\n"
297 "transform: label '$loop3' is at address a\n"
298
299 "transform: instruction after transform: 'eb f9'\n"
300
301 "transform: instruction after transform: 'eb 00'\n"
302 );
303 }
304
305 void test_loading_label_as_imm32() {
306 transform(
307 "== code 0x1\n"
308 "label:\n"
309 " be/copy-to-ESI label/imm32\n"
310 );
311 CHECK_TRACE_CONTENTS(
312 "transform: label 'label' is at address 1\n"
313 "transform: instruction after transform: 'be 01 00 00 00'\n"
314 );
315 }
316
317 void test_duplicate_label() {
318 Hide_errors = true;
319 transform(
320 "== code 0x1\n"
321 "loop:\n"
322 "loop:\n"
323 " 05 0x0d0c0b0a/imm32\n"
324 );
325 CHECK_TRACE_CONTENTS(
326 "error: duplicate label 'loop'\n"
327 );
328 }
329
330 void test_label_too_short() {
331 Hide_errors = true;
332 transform(
333 "== code 0x1\n"
334 "xz:\n"
335 " 05 0x0d0c0b0a/imm32\n"
336 );
337 CHECK_TRACE_CONTENTS(
338 "error: 'xz' is two characters long which can look like raw hex bytes at a glance; use a different name\n"
339 );
340 }
341
342 void test_label_hex() {
343 Hide_errors = true;
344 transform(
345 "== code 0x1\n"
346 "0xab:\n"
347 " 05 0x0d0c0b0a/imm32\n"
348 );
349 CHECK_TRACE_CONTENTS(
350 "error: '0xab' looks like a hex number; use a different name\n"
351 );
352 }
353
354 void test_label_negative_hex() {
355 Hide_errors = true;
356 transform(
357 "== code 0x1\n"
358 "-a:\n"
359 " 05 0x0d0c0b0a/imm32\n"
360 );
361 CHECK_TRACE_CONTENTS(
362 "error: '-a' starts with '-', which can be confused with a negative number; use a different name\n"
363 );
364 }
365
366
367
368
369 void test_segment_size_ignores_labels() {
370 transform(
371 "== code 0x09000074\n"
372 " 05/add 0x0d0c0b0a/imm32\n"
373 "foo:\n"
374 "== data 0x0a000000\n"
375 "bar:\n"
376 " 00\n"
377 );
378 CHECK_TRACE_CONTENTS(
379 "transform: segment 1 begins at address 0x0a000079\n"
380 );
381 }
382
383 :(before "End size_of(word w) Special-cases")
384 else if (is_label(w))
385 return 0;