https://github.com/akkartik/mu/blob/master/subx/035labels.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 :(scenario entry_label)
25 == 0x1
26 05 0x0d0c0b0a/imm32
27 Entry:
28 05 0x0d0c0b0a/imm32
29 +run: 0x00000006 opcode: 05
30 -run: 0x00000001 opcode: 05
31
32 :(before "End Globals")
33 uint32_t Entry_address = 0;
34 :(before "End Reset")
35 Entry_address = 0;
36 :(before "End Initialize EIP")
37 if (Entry_address) EIP = Entry_address;
38 :(after "Override e_entry")
39 if (Entry_address) e_entry = Entry_address;
40
41 :(before "End looks_like_hex_int(s) Detectors")
42 if (SIZE(s) == 2) return true;
43
44 :(scenarios transform)
45 :(scenario pack_immediate_ignores_single_byte_nondigit_operand)
46 % Hide_errors = true;
47 == 0x1
48 b9/copy a/imm32
49 +transform: packing instruction 'b9/copy a/imm32'
50
51 +transform: instruction after packing: 'b9 a'
52
53 :(scenario pack_immediate_ignores_3_hex_digit_operand)
54 % Hide_errors = true;
55 == 0x1
56 b9/copy aaa/imm32
57 +transform: packing instruction 'b9/copy aaa/imm32'
58
59 +transform: instruction after packing: 'b9 aaa'
60
61 :(scenario pack_immediate_ignores_non_hex_operand)
62 % Hide_errors = true;
63 == 0x1
64 b9/copy xxx/imm32
65 +transform: packing instruction 'b9/copy xxx/imm32'
66
67 +transform: instruction after packing: 'b9 xxx'
68
69
70 :(code)
71 void check_valid_name(const string& s) {
72 if (s.empty()) {
73 raise << "empty name!\n" << end();
74 return;
75 }
76 if (s.at(0) == '-')
77 raise << "'" << s << "' starts with '-', which can be confused with a negative number; use a different name\n" << end();
78 if (s.substr(0, 2) == "0x") {
79 raise << "'" << s << "' looks like a hex number; use a different name\n" << end();
80 return;
81 }
82 if (isdigit(s.at(0)))
83 raise << "'" << s << "' starts with a digit, and so can be confused with a negative number; use a different name.\n" << end();
84 if (SIZE(s) == 2)
85 raise << "'" << s << "' is two characters long which can look like raw hex bytes at a glance; use a different name\n" << end();
86 }
87
88
89
90 :(scenario map_label)
91 == 0x1
92 loop:
93 05 0x0d0c0b0a/imm32
94 +transform: label 'loop' is at address 1
95
96 :(before "End Level-2 Transforms")
97 Transform.push_back(rewrite_labels);
98 :(code)
99 void rewrite_labels(program& p) {
100 trace(3, "transform") << "-- rewrite labels" << end();
101 if (p.segments.empty()) return;
102 segment& code = p.segments.at(0);
103 map<string, int32_t> byte_index;
104 compute_byte_indices_for_labels(code, byte_index);
105 if (trace_contains_errors()) return;
106 drop_labels(code);
107 if (trace_contains_errors()) return;
108 replace_labels_with_displacements(code, byte_index);
109 if (contains_key(byte_index, "Entry"))
110 Entry_address = code.start + get(byte_index, "Entry");
111 }
112
113 void compute_byte_indices_for_labels(const segment& code, map<string, int32_t>& byte_index) {
114 int current_byte = 0;
115 for (int i = 0; i < SIZE(code.lines); ++i) {
116 const line& inst = code.lines.at(i);
117 for (int j = 0; j < SIZE(inst.words); ++j) {
118 const word& curr = inst.words.at(j);
119
120
121
122
123
124 if (has_operand_metadata(curr, "disp32") || has_operand_metadata(curr, "imm32")) {
125 if (*curr.data.rbegin() == ':')
126 raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end();
127 current_byte += 4;
128 }
129 else if (has_operand_metadata(curr, "disp16")) {
130 if (*curr.data.rbegin() == ':')
131 raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end();
132 current_byte += 2;
133 }
134
135 else if (*curr.data.rbegin() != ':') {
136 ++current_byte;
137 }
138 else {
139 string label = drop_last(curr.data);
140
141 check_valid_name(label);
142 if (trace_contains_errors()) return;
143 if (contains_any_operand_metadata(curr))
144 raise << "'" << to_string(inst) << "': label definition (':') not allowed in operand\n" << end();
145 if (j > 0)
146 raise << "'" << to_string(inst) << "': labels can only be the first word in a line.\n" << end();
147 if (Map_file.is_open())
148 Map_file << "0x" << HEXWORD << (code.start + current_byte) << ' ' << label << '\n';
149 if (contains_key(byte_index, label) && label != "Entry") {
150 raise << "duplicate label '" << label << "'\n" << end();
151 return;
152 }
153 put(byte_index, label, current_byte);
154 trace(99, "transform") << "label '" << label << "' is at address " << (current_byte+code.start) << end();
155
156 }
157 }
158 }
159 }
160
161 :(before "End Globals")
162 bool Dump_map = false;
163 ofstream Map_file;
164 :(before "End Commandline Options")
165 else if (is_equal(*arg, "--map")) {
166 Dump_map = true;
167
168 }
169
170 :(after "Begin subx translate")
171 if (Dump_map)
172 Map_file.open("map");
173 :(before "End subx translate")
174 if (Dump_map)
175 Map_file.close();
176
177 :(code)
178 void drop_labels(segment& code) {
179 for (int i = 0; i < SIZE(code.lines); ++i) {
180 line& inst = code.lines.at(i);
181 vector<word>::iterator new_end = remove_if(inst.words.begin(), inst.words.end(), is_label);
182 inst.words.erase(new_end, inst.words.end());
183 }
184 }
185
186 bool is_label(const word& w) {
187 return *w.data.rbegin() == ':';
188 }
189
190 void replace_labels_with_displacements(segment& code, const map<string, int32_t>& byte_index) {
191 int32_t byte_index_next_instruction_starts_at = 0;
192 for (int i = 0; i < SIZE(code.lines); ++i) {
193 line& inst = code.lines.at(i);
194 byte_index_next_instruction_starts_at += num_bytes(inst);
195 line new_inst;
196 for (int j = 0; j < SIZE(inst.words); ++j) {
197 const word& curr = inst.words.at(j);
198 if (contains_key(byte_index, curr.data)) {
199 int32_t displacement = static_cast<int32_t>(get(byte_index, curr.data)) - byte_index_next_instruction_starts_at;
200 if (has_operand_metadata(curr, "disp8")) {
201 if (displacement > 0xff || displacement < -0x7f)
202 raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 8 bits\n" << end();
203 else
204 emit_hex_bytes(new_inst, displacement, 1);
205 }
206 else if (has_operand_metadata(curr, "disp16")) {
207 if (displacement > 0xffff || displacement < -0x7fff)
208 raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 16 bits\n" << end();
209 else
210 emit_hex_bytes(new_inst, displacement, 2);
211 }
212 else if (has_operand_metadata(curr, "disp32")) {
213 emit_hex_bytes(new_inst, displacement, 4);
214 }
215 }
216 else {
217 new_inst.words.push_back(curr);
218 }
219 }
220 inst.words.swap(new_inst.words);
221 trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
222 }
223 }
224
225 string data_to_string(const line& inst) {
226 ostringstream out;
227 for (int i = 0; i < SIZE(inst.words); ++i) {
228 if (i > 0) out << ' ';
229 out << inst.words.at(i).data;
230 }
231 return out.str();
232 }
233
234 string drop_last(const string& s) {
235 return string(s.begin(), --s.end());
236 }
237
238
239
240
241
242
243
244 :(scenario multiple_labels_at)
245 == 0x1
246
247 loop:
248 $loop2:
249
250 05 0x0d0c0b0a/imm32
251
252 eb $loop2/disp8
253
254 eb $loop3/disp8
255
256 $loop3:
257 +transform: label 'loop' is at address 1
258 +transform: label '$loop2' is at address 1
259 +transform: label '$loop3' is at address a
260
261 +transform: instruction after transform: 'eb f9'
262
263 +transform: instruction after transform: 'eb 00'
264
265 :(scenario duplicate_label)
266 % Hide_errors = true;
267 == 0x1
268 loop:
269 loop:
270 05 0x0d0c0b0a/imm32
271 +error: duplicate label 'loop'
272
273 :(scenario label_too_short)
274 % Hide_errors = true;
275 == 0x1
276 xz:
277 05 0x0d0c0b0a/imm32
278 +error: 'xz' is two characters long which can look like raw hex bytes at a glance; use a different name
279
280 :(scenario label_hex)
281 % Hide_errors = true;
282 == 0x1
283 0xab:
284 05 0x0d0c0b0a/imm32
285 +error: '0xab' looks like a hex number; use a different name
286
287 :(scenario label_negative_hex)
288 % Hide_errors = true;
289 == 0x1
290 -a:
291 05 0x0d0c0b0a/imm32
292 +error: '-a' starts with '-', which can be confused with a negative number; use a different name
293
294
295
296
297 :(scenario segment_size_ignores_labels)
298 == code
299 05/add 0x0d0c0b0a/imm32
300 foo:
301 == data
302 bar:
303 00
304 +transform: segment 1 begins at address 0x0a000079
305
306 :(before "End size_of(word w) Special-cases")
307 else if (is_label(w))
308 return 0;