https://github.com/akkartik/mu/blob/master/subx/035labels.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 :(before "End looks_like_hex_int(s) Detectors")
23 if (SIZE(s) == 2) return true;
24
25 :(scenarios transform)
26 :(scenario pack_immediate_ignores_single_byte_nondigit_operand)
27 % Hide_errors = true;
28 == 0x1
29 b9/copy a/imm32
30 +transform: packing instruction 'b9/copy a/imm32'
31
32 +transform: instruction after packing: 'b9 a'
33
34 :(scenario pack_immediate_ignores_3_hex_digit_operand)
35 % Hide_errors = true;
36 == 0x1
37 b9/copy aaa/imm32
38 +transform: packing instruction 'b9/copy aaa/imm32'
39
40 +transform: instruction after packing: 'b9 aaa'
41
42 :(scenario pack_immediate_ignores_non_hex_operand)
43 % Hide_errors = true;
44 == 0x1
45 b9/copy xxx/imm32
46 +transform: packing instruction 'b9/copy xxx/imm32'
47
48 +transform: instruction after packing: 'b9 xxx'
49
50
51 :(code)
52 void check_valid_name(const string& s) {
53 if (s.empty()) {
54 raise << "empty name!\n" << end();
55 return;
56 }
57 if (s.at(0) == '-')
58 raise << "'" << s << "' starts with '-', which can be confused with a negative number; use a different name\n" << end();
59 if (s.substr(0, 2) == "0x") {
60 raise << "'" << s << "' looks like a hex number; use a different name\n" << end();
61 return;
62 }
63 if (isdigit(s.at(0)))
64 raise << "'" << s << "' starts with a digit, and so can be confused with a negative number; use a different name.\n" << end();
65 if (SIZE(s) == 2)
66 raise << "'" << s << "' is two characters long which can look like raw hex bytes at a glance; use a different name\n" << end();
67 }
68
69
70
71 :(scenario map_label)
72 == 0x1
73 loop:
74 05 0x0d0c0b0a/imm32
75 +transform: label 'loop' is at address 1
76
77 :(before "End Level-2 Transforms")
78 Transform.push_back(rewrite_labels);
79 :(code)
80 void rewrite_labels(program& p) {
81 trace(99, "transform") << "-- rewrite labels" << end();
82 if (p.segments.empty()) return;
83 segment& code = p.segments.at(0);
84 map<string, int32_t> byte_index;
85 compute_byte_indices_for_labels(code, byte_index);
86 if (trace_contains_errors()) return;
87 drop_labels(code);
88 if (trace_contains_errors()) return;
89 replace_labels_with_displacements(code, byte_index);
90 }
91
92 void compute_byte_indices_for_labels(const segment& code, map<string, int32_t>& byte_index) {
93 int current_byte = 0;
94 for (int i = 0; i < SIZE(code.lines); ++i) {
95 const line& inst = code.lines.at(i);
96 for (int j = 0; j < SIZE(inst.words); ++j) {
97 const word& curr = inst.words.at(j);
98
99
100
101
102
103 if (has_operand_metadata(curr, "disp32") || has_operand_metadata(curr, "imm32")) {
104 if (*curr.data.rbegin() == ':')
105 raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end();
106 current_byte += 4;
107 }
108 else if (has_operand_metadata(curr, "disp16")) {
109 if (*curr.data.rbegin() == ':')
110 raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end();
111 current_byte += 2;
112 }
113
114 else if (*curr.data.rbegin() != ':') {
115 ++current_byte;
116 }
117 else {
118 string label = drop_last(curr.data);
119
120 check_valid_name(label);
121 if (trace_contains_errors()) return;
122 if (contains_any_operand_metadata(curr))
123 raise << "'" << to_string(inst) << "': label definition (':') not allowed in operand\n" << end();
124 if (j > 0)
125 raise << "'" << to_string(inst) << "': labels can only be the first word in a line.\n" << end();
126 if (Map_file.is_open())
127 Map_file << "0x" << HEXWORD << (code.start + current_byte) << ' ' << label << '\n';
128 put(byte_index, label, current_byte);
129 trace(99, "transform") << "label '" << label << "' is at address " << (current_byte+code.start) << end();
130
131 }
132 }
133 }
134 }
135
136 :(before "End Globals")
137 bool Dump_map = false;
138 ofstream Map_file;
139 :(before "End Commandline Options")
140 else if (is_equal(*arg, "--map")) {
141 Dump_map = true;
142
143 }
144
145 :(after "Begin subx translate")
146 if (Dump_map)
147 Map_file.open("map");
148 :(before "End subx translate")
149 if (Dump_map)
150 Map_file.close();
151
152 :(code)
153 void drop_labels(segment& code) {
154 for (int i = 0; i < SIZE(code.lines); ++i) {
155 line& inst = code.lines.at(i);
156 vector<word>::iterator new_end = remove_if(inst.words.begin(), inst.words.end(), is_label);
157 inst.words.erase(new_end, inst.words.end());
158 }
159 }
160
161 bool is_label(const word& w) {
162 return *w.data.rbegin() == ':';
163 }
164
165 void replace_labels_with_displacements(segment& code, const map<string, int32_t>& byte_index) {
166 int32_t byte_index_next_instruction_starts_at = 0;
167 for (int i = 0; i < SIZE(code.lines); ++i) {
168 line& inst = code.lines.at(i);
169 byte_index_next_instruction_starts_at += num_bytes(inst);
170 line new_inst;
171 for (int j = 0; j < SIZE(inst.words); ++j) {
172 const word& curr = inst.words.at(j);
173 if (contains_key(byte_index, curr.data)) {
174 int32_t displacement = static_cast<int32_t>(get(byte_index, curr.data)) - byte_index_next_instruction_starts_at;
175 if (has_operand_metadata(curr, "disp8")) {
176 if (displacement > 0xff || displacement < -0x7f)
177 raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 8 bits\n" << end();
178 else
179 emit_hex_bytes(new_inst, displacement, 1);
180 }
181 else if (has_operand_metadata(curr, "disp16")) {
182 if (displacement > 0xffff || displacement < -0x7fff)
183 raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 16 bits\n" << end();
184 else
185 emit_hex_bytes(new_inst, displacement, 2);
186 }
187 else if (has_operand_metadata(curr, "disp32")) {
188 emit_hex_bytes(new_inst, displacement, 4);
189 }
190 }
191 else {
192 new_inst.words.push_back(curr);
193 }
194 }
195 inst.words.swap(new_inst.words);
196 trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
197 }
198 }
199
200 string data_to_string(const line& inst) {
201 ostringstream out;
202 for (int i = 0; i < SIZE(inst.words); ++i) {
203 if (i > 0) out << ' ';
204 out << inst.words.at(i).data;
205 }
206 return out.str();
207 }
208
209 string drop_last(const string& s) {
210 return string(s.begin(), --s.end());
211 }
212
213
214
215
216
217
218
219 :(scenario multiple_labels_at)
220 == 0x1
221
222 loop:
223 $loop2:
224
225 05 0x0d0c0b0a/imm32
226
227 eb $loop2/disp8
228
229 eb $loop3/disp8
230
231 $loop3:
232 +transform: label 'loop' is at address 1
233 +transform: label '$loop2' is at address 1
234 +transform: label '$loop3' is at address a
235
236 +transform: instruction after transform: 'eb f9'
237
238 +transform: instruction after transform: 'eb 00'
239
240 :(scenario label_too_short)
241 % Hide_errors = true;
242 == 0x1
243 xz:
244 05 0x0d0c0b0a/imm32
245 +error: 'xz' is two characters long which can look like raw hex bytes at a glance; use a different name
246
247 :(scenario label_hex)
248 % Hide_errors = true;
249 == 0x1
250 0xab:
251 05 0x0d0c0b0a/imm32
252 +error: '0xab' looks like a hex number; use a different name
253
254 :(scenario label_negative_hex)
255 % Hide_errors = true;
256 == 0x1
257 -a:
258 05 0x0d0c0b0a/imm32
259 +error: '-a' starts with '-', which can be confused with a negative number; use a different name
260
261
262
263
264 :(scenario segment_size_ignores_labels)
265 == code
266 05/add 0x0d0c0b0a/imm32
267 foo:
268 == data
269 bar:
270 00
271 +transform: segment 1 begins at address 0x0a000079
272
273 :(before "End size_of(word w) Special-cases")
274 else if (is_label(w))
275 return 0;