https://github.com/akkartik/mu/blob/master/subx/035labels.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 :(before "End looks_like_hex_int(s) Detectors")
23 if (SIZE(s) == 2) return true;
24
25 :(scenarios transform)
26 :(scenario pack_immediate_ignores_single_byte_nondigit_operand)
27 % Hide_errors = true;
28 == 0x1
29 b9/copy a/imm32
30 +transform: packing instruction 'b9/copy a/imm32'
31
32 +transform: instruction after packing: 'b9 a'
33
34 :(scenario pack_immediate_ignores_3_hex_digit_operand)
35 % Hide_errors = true;
36 == 0x1
37 b9/copy aaa/imm32
38 +transform: packing instruction 'b9/copy aaa/imm32'
39
40 +transform: instruction after packing: 'b9 aaa'
41
42 :(scenario pack_immediate_ignores_non_hex_operand)
43 % Hide_errors = true;
44 == 0x1
45 b9/copy xxx/imm32
46 +transform: packing instruction 'b9/copy xxx/imm32'
47
48 +transform: instruction after packing: 'b9 xxx'
49
50
51 :(code)
52 void check_valid_name(const string& s) {
53 if (s.empty()) {
54 raise << "empty name!\n" << end();
55 return;
56 }
57 if (s.at(0) == '-')
58 raise << "'" << s << "' starts with '-', which can be confused with a negative number; use a different name\n" << end();
59 if (s.substr(0, 2) == "0x") {
60 raise << "'" << s << "' looks like a hex number; use a different name\n" << end();
61 return;
62 }
63 if (isdigit(s.at(0)))
64 raise << "'" << s << "' starts with a digit, and so can be confused with a negative number; use a different name.\n" << end();
65 if (SIZE(s) == 2)
66 raise << "'" << s << "' is two characters long which can look like raw hex bytes at a glance; use a different name\n" << end();
67 }
68
69
70
71 :(scenario map_label)
72 == 0x1
73 loop:
74 05 0x0d0c0b0a/imm32
75 +transform: label 'loop' is at address 1
76
77 :(before "End Level-2 Transforms")
78 Transform.push_back(rewrite_labels);
79 :(code)
80 void rewrite_labels(program& p) {
81 trace(99, "transform") << "-- rewrite labels" << end();
82 if (p.segments.empty()) return;
83 segment& code = p.segments.at(0);
84 map<string, int32_t> byte_index;
85 compute_byte_indices_for_labels(code, byte_index);
86 if (trace_contains_errors()) return;
87 drop_labels(code);
88 if (trace_contains_errors()) return;
89 replace_labels_with_displacements(code, byte_index);
90 }
91
92 void compute_byte_indices_for_labels(const segment& code, map<string, int32_t>& byte_index) {
93 int current_byte = 0;
94 for (int i = 0; i < SIZE(code.lines); ++i) {
95 const line& inst = code.lines.at(i);
96 for (int j = 0; j < SIZE(inst.words); ++j) {
97 const word& curr = inst.words.at(j);
98
99
100
101
102
103 if (has_operand_metadata(curr, "disp32") || has_operand_metadata(curr, "imm32")) {
104 if (*curr.data.rbegin() == ':')
105 raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end();
106 current_byte += 4;
107 }
108 else if (has_operand_metadata(curr, "disp16")) {
109 if (*curr.data.rbegin() == ':')
110 raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end();
111 current_byte += 2;
112 }
113
114 else if (*curr.data.rbegin() != ':') {
115 ++current_byte;
116 }
117 else {
118 string label = drop_last(curr.data);
119
120 check_valid_name(label);
121 if (trace_contains_errors()) return;
122 if (contains_any_operand_metadata(curr))
123 raise << "'" << to_string(inst) << "': label definition (':') not allowed in operand\n" << end();
124 if (j > 0)
125 raise << "'" << to_string(inst) << "': labels can only be the first word in a line.\n" << end();
126 if (Map_file.is_open())
127 Map_file << "0x" << HEXWORD << (code.start + current_byte) << ' ' << label << '\n';
128 if (contains_key(byte_index, label)) {
129 raise << "duplicate label '" << label << "'\n" << end();
130 return;
131 }
132 put(byte_index, label, current_byte);
133 trace(99, "transform") << "label '" << label << "' is at address " << (current_byte+code.start) << end();
134
135 }
136 }
137 }
138 }
139
140 :(before "End Globals")
141 bool Dump_map = false;
142 ofstream Map_file;
143 :(before "End Commandline Options")
144 else if (is_equal(*arg, "--map")) {
145 Dump_map = true;
146
147 }
148
149 :(after "Begin subx translate")
150 if (Dump_map)
151 Map_file.open("map");
152 :(before "End subx translate")
153 if (Dump_map)
154 Map_file.close();
155
156 :(code)
157 void drop_labels(segment& code) {
158 for (int i = 0; i < SIZE(code.lines); ++i) {
159 line& inst = code.lines.at(i);
160 vector<word>::iterator new_end = remove_if(inst.words.begin(), inst.words.end(), is_label);
161 inst.words.erase(new_end, inst.words.end());
162 }
163 }
164
165 bool is_label(const word& w) {
166 return *w.data.rbegin() == ':';
167 }
168
169 void replace_labels_with_displacements(segment& code, const map<string, int32_t>& byte_index) {
170 int32_t byte_index_next_instruction_starts_at = 0;
171 for (int i = 0; i < SIZE(code.lines); ++i) {
172 line& inst = code.lines.at(i);
173 byte_index_next_instruction_starts_at += num_bytes(inst);
174 line new_inst;
175 for (int j = 0; j < SIZE(inst.words); ++j) {
176 const word& curr = inst.words.at(j);
177 if (contains_key(byte_index, curr.data)) {
178 int32_t displacement = static_cast<int32_t>(get(byte_index, curr.data)) - byte_index_next_instruction_starts_at;
179 if (has_operand_metadata(curr, "disp8")) {
180 if (displacement > 0xff || displacement < -0x7f)
181 raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 8 bits\n" << end();
182 else
183 emit_hex_bytes(new_inst, displacement, 1);
184 }
185 else if (has_operand_metadata(curr, "disp16")) {
186 if (displacement > 0xffff || displacement < -0x7fff)
187 raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 16 bits\n" << end();
188 else
189 emit_hex_bytes(new_inst, displacement, 2);
190 }
191 else if (has_operand_metadata(curr, "disp32")) {
192 emit_hex_bytes(new_inst, displacement, 4);
193 }
194 }
195 else {
196 new_inst.words.push_back(curr);
197 }
198 }
199 inst.words.swap(new_inst.words);
200 trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
201 }
202 }
203
204 string data_to_string(const line& inst) {
205 ostringstream out;
206 for (int i = 0; i < SIZE(inst.words); ++i) {
207 if (i > 0) out << ' ';
208 out << inst.words.at(i).data;
209 }
210 return out.str();
211 }
212
213 string drop_last(const string& s) {
214 return string(s.begin(), --s.end());
215 }
216
217
218
219
220
221
222
223 :(scenario multiple_labels_at)
224 == 0x1
225
226 loop:
227 $loop2:
228
229 05 0x0d0c0b0a/imm32
230
231 eb $loop2/disp8
232
233 eb $loop3/disp8
234
235 $loop3:
236 +transform: label 'loop' is at address 1
237 +transform: label '$loop2' is at address 1
238 +transform: label '$loop3' is at address a
239
240 +transform: instruction after transform: 'eb f9'
241
242 +transform: instruction after transform: 'eb 00'
243
244 :(scenario duplicate_label)
245 % Hide_errors = true;
246 == 0x1
247 loop:
248 loop:
249 05 0x0d0c0b0a/imm32
250 +error: duplicate label 'loop'
251
252 :(scenario label_too_short)
253 % Hide_errors = true;
254 == 0x1
255 xz:
256 05 0x0d0c0b0a/imm32
257 +error: 'xz' is two characters long which can look like raw hex bytes at a glance; use a different name
258
259 :(scenario label_hex)
260 % Hide_errors = true;
261 == 0x1
262 0xab:
263 05 0x0d0c0b0a/imm32
264 +error: '0xab' looks like a hex number; use a different name
265
266 :(scenario label_negative_hex)
267 % Hide_errors = true;
268 == 0x1
269 -a:
270 05 0x0d0c0b0a/imm32
271 +error: '-a' starts with '-', which can be confused with a negative number; use a different name
272
273
274
275
276 :(scenario segment_size_ignores_labels)
277 == code
278 05/add 0x0d0c0b0a/imm32
279 foo:
280 == data
281 bar:
282 00
283 +transform: segment 1 begins at address 0x0a000079
284
285 :(before "End size_of(word w) Special-cases")
286 else if (is_label(w))
287 return 0;