1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 void check_valid_name(const string& s) {
22 if (s.empty()) {
23 raise << "empty name!\n" << end();
24 return;
25 }
26 if (s.at(0) == '-')
27 raise << "'" << s << "' starts with '-', which can be confused with a negative number; use a different name\n" << end();
28 if (s.substr(0, 2) == "0x") {
29 raise << "'" << s << "' looks like a hex number; use a different name\n" << end();
30 return;
31 }
32 if (isdigit(s.at(0)))
33 raise << "'" << s << "' starts with a digit, and so can be confused with a negative number; use a different name.\n" << end();
34 if (SIZE(s) == 2)
35 raise << "'" << s << "' is two characters long which can look like raw hex bytes at a glance; use a different name\n" << end();
36 }
37
38 :(scenarios transform)
39 :(scenario map_label)
40 == 0x1
41
42
43
44 loop:
45 05 0x0d0c0b0a/imm32
46 +transform: label 'loop' is at address 1
47
48 :(before "End Level-2 Transforms")
49 Transform.push_back(rewrite_labels);
50 :(code)
51 void rewrite_labels(program& p) {
52 trace(99, "transform") << "-- rewrite labels" << end();
53 if (p.segments.empty()) return;
54 segment& code = p.segments.at(0);
55 map<string, int32_t> byte_index;
56 compute_byte_indices_for_labels(code, byte_index);
57 if (trace_contains_errors()) return;
58 drop_labels(code);
59 if (trace_contains_errors()) return;
60 replace_labels_with_displacements(code, byte_index);
61 }
62
63 void compute_byte_indices_for_labels(const segment& code, map<string, int32_t>& byte_index) {
64 int current_byte = 0;
65 for (int i = 0; i < SIZE(code.lines); ++i) {
66 const line& inst = code.lines.at(i);
67 for (int j = 0; j < SIZE(inst.words); ++j) {
68 const word& curr = inst.words.at(j);
69
70
71
72
73
74 if (has_operand_metadata(curr, "disp32") || has_operand_metadata(curr, "imm32")) {
75 if (*curr.data.rbegin() == ':')
76 raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end();
77 current_byte += 4;
78 }
79
80 else if (*curr.data.rbegin() != ':') {
81 ++current_byte;
82 }
83 else {
84 string label = drop_last(curr.data);
85
86 check_valid_name(label);
87 if (trace_contains_errors()) return;
88 if (contains_any_operand_metadata(curr))
89 raise << "'" << to_string(inst) << "': label definition (':') not allowed in operand\n" << end();
90 if (j > 0)
91 raise << "'" << to_string(inst) << "': labels can only be the first word in a line.\n" << end();
92 if (Dump_map)
93 cerr << "0x" << HEXWORD << (code.start + current_byte) << ' ' << label << '\n';
94 put(byte_index, label, current_byte);
95 trace(99, "transform") << "label '" << label << "' is at address " << (current_byte+code.start) << end();
96
97 }
98 }
99 }
100 }
101
102 :(before "End Globals")
103 bool Dump_map = false;
104 :(before "End Commandline Options")
105 else if (is_equal(*arg, "--map")) {
106 Dump_map = true;
107 }
108
109 :(code)
110 void drop_labels(segment& code) {
111 for (int i = 0; i < SIZE(code.lines); ++i) {
112 line& inst = code.lines.at(i);
113 vector<word>::iterator new_end = remove_if(inst.words.begin(), inst.words.end(), is_label);
114 inst.words.erase(new_end, inst.words.end());
115 }
116 }
117
118 bool is_label(const word& w) {
119 return *w.data.rbegin() == ':';
120 }
121
122 void replace_labels_with_displacements(segment& code, const map<string, int32_t>& byte_index) {
123 int32_t byte_index_next_instruction_starts_at = 0;
124 for (int i = 0; i < SIZE(code.lines); ++i) {
125 line& inst = code.lines.at(i);
126 byte_index_next_instruction_starts_at += num_bytes(inst);
127 line new_inst;
128 for (int j = 0; j < SIZE(inst.words); ++j) {
129 const word& curr = inst.words.at(j);
130 if (contains_key(byte_index, curr.data)) {
131 int32_t displacement = static_cast<int32_t>(get(byte_index, curr.data)) - byte_index_next_instruction_starts_at;
132 if (has_operand_metadata(curr, "disp8")) {
133 if (displacement > 0xff || displacement < -0x7f)
134 raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 8 bits\n" << end();
135 else
136 emit_hex_bytes(new_inst, displacement, 1);
137 }
138 else if (has_operand_metadata(curr, "disp16")) {
139 if (displacement > 0xffff || displacement < -0x7fff)
140 raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 16 bits\n" << end();
141 else
142 emit_hex_bytes(new_inst, displacement, 2);
143 }
144 else if (has_operand_metadata(curr, "disp32")) {
145 emit_hex_bytes(new_inst, displacement, 4);
146 }
147 }
148 else {
149 new_inst.words.push_back(curr);
150 }
151 }
152 inst.words.swap(new_inst.words);
153 trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
154 }
155 }
156
157 string data_to_string(const line& inst) {
158 ostringstream out;
159 for (int i = 0; i < SIZE(inst.words); ++i) {
160 if (i > 0) out << ' ';
161 out << inst.words.at(i).data;
162 }
163 return out.str();
164 }
165
166 string drop_last(const string& s) {
167 return string(s.begin(), --s.end());
168 }
169
170
171
172
173
174
175
176 :(scenario multiple_labels_at)
177 == 0x1
178
179
180
181
182 loop:
183 $loop2:
184
185 05 0x0d0c0b0a/imm32
186
187 eb $loop2/disp8
188
189 eb $loop3/disp8
190
191 $loop3:
192 +transform: label 'loop' is at address 1
193 +transform: label '$loop2' is at address 1
194 +transform: label '$loop3' is at address a
195
196 +transform: instruction after transform: 'eb f9'
197
198 +transform: instruction after transform: 'eb 00'
199
200 :(scenario label_too_short)
201 % Hide_errors = true;
202 == 0x1
203
204
205
206 xz:
207 05 0x0d0c0b0a/imm32
208 +error: 'xz' is two characters long which can look like raw hex bytes at a glance; use a different name
209
210 :(scenario label_hex)
211 % Hide_errors = true;
212 == 0x1
213
214
215
216 0xab:
217 05 0x0d0c0b0a/imm32
218 +error: '0xab' looks like a hex number; use a different name
219
220 :(scenario label_negative_hex)
221 % Hide_errors = true;
222 == 0x1
223
224
225
226 -a:
227 05 0x0d0c0b0a/imm32
228 +error: '-a' starts with '-', which can be confused with a negative number; use a different name
229
230
231
232
233 :(scenario segment_size_ignores_labels)
234 % Mem_offset = CODE_START;
235 == code
236 05/add 0x0d0c0b0a/imm32
237 foo:
238 == data
239 bar:
240 00
241 +transform: segment 1 begins at address 0x08049079
242
243 :(before "End num_bytes(curr) Special-cases")
244 else if (is_label(curr))
245 ;