1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 bool is_number(const string& s) {
18 if (s.at(0) == '-') return true;
19 if (isdigit(s.at(0))) return true;
20 return SIZE(s) == 2;
21 }
22 :(before "End Unit Tests")
23 void test_is_number() {
24 CHECK(!is_number("a"));
25 }
26 :(code)
27 void check_valid_name(const string& s) {
28 if (s.empty()) {
29 raise << "empty name!\n" << end();
30 return;
31 }
32 if (s.at(0) == '-')
33 raise << "'" << s << "' starts with '-', which can be confused with a negative number; use a different name\n" << end();
34 if (s.substr(0, 2) == "0x") {
35 raise << "'" << s << "' looks like a hex number; use a different name\n" << end();
36 return;
37 }
38 if (isdigit(s.at(0)))
39 raise << "'" << s << "' starts with a digit, and so can be confused with a negative number; use a different name.\n" << end();
40 if (SIZE(s) == 2)
41 raise << "'" << s << "' is two characters long which can look like raw hex bytes at a glance; use a different name\n" << end();
42 }
43
44 :(scenarios transform)
45 :(scenario map_label)
46 == 0x1
47
48
49
50 loop:
51 05 0x0d0c0b0a/imm32
52 +transform: label 'loop' is at address 1
53
54 :(before "End Level-2 Transforms")
55 Transform.push_back(rewrite_labels);
56 :(code)
57 void rewrite_labels(program& p) {
58 trace(99, "transform") << "-- rewrite labels" << end();
59 if (p.segments.empty()) return;
60 segment& code = p.segments.at(0);
61
62 map<string, int32_t> address;
63 compute_addresses_for_labels(code, address);
64 if (trace_contains_errors()) return;
65 drop_labels(code);
66 if (trace_contains_errors()) return;
67 replace_labels_with_addresses(code, address);
68 }
69
70 void compute_addresses_for_labels(const segment& code, map<string, int32_t>& address) {
71 int current_byte = 0;
72 for (int i = 0; i < SIZE(code.lines); ++i) {
73 const line& inst = code.lines.at(i);
74 for (int j = 0; j < SIZE(inst.words); ++j) {
75 const word& curr = inst.words.at(j);
76
77
78
79
80
81 if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32")) {
82 if (*curr.data.rbegin() == ':')
83 raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end();
84 current_byte += 4;
85 }
86
87 else if (*curr.data.rbegin() != ':') {
88 ++current_byte;
89 }
90 else {
91 string label = drop_last(curr.data);
92
93 check_valid_name(label);
94 if (trace_contains_errors()) return;
95 if (contains_any_operand_metadata(curr))
96 raise << "'" << to_string(inst) << "': label definition (':') not allowed in operand\n" << end();
97 if (j > 0)
98 raise << "'" << to_string(inst) << "': labels can only be the first word in a line.\n" << end();
99 put(address, label, current_byte);
100 trace(99, "transform") << "label '" << label << "' is at address " << (current_byte+code.start) << end();
101
102 }
103 }
104 }
105 }
106
107 void drop_labels(segment& code) {
108 for (int i = 0; i < SIZE(code.lines); ++i) {
109 line& inst = code.lines.at(i);
110 vector<word>::iterator new_end = remove_if(inst.words.begin(), inst.words.end(), is_label);
111 inst.words.erase(new_end, inst.words.end());
112 }
113 }
114
115 bool is_label(const word& w) {
116 return *w.data.rbegin() == ':';
117 }
118
119 void replace_labels_with_addresses(segment& code, const map<string, int32_t>& address) {
120 int32_t byte_next_instruction_starts_at = 0;
121 for (int i = 0; i < SIZE(code.lines); ++i) {
122 line& inst = code.lines.at(i);
123 byte_next_instruction_starts_at += num_bytes(inst);
124 line new_inst;
125 for (int j = 0; j < SIZE(inst.words); ++j) {
126 const word& curr = inst.words.at(j);
127 if (contains_key(address, curr.data)) {
128 int32_t offset = static_cast<int32_t>(get(address, curr.data)) - byte_next_instruction_starts_at;
129 if (has_metadata(curr, "disp8") || has_metadata(curr, "imm8")) {
130 if (offset > 0xff || offset < -0x7f)
131 raise << "'" << to_string(inst) << "': label too far away for distance " << std::hex << offset << " to fit in 8 bits\n" << end();
132 else
133 emit_hex_bytes(new_inst, offset, 1);
134 }
135 else if (has_metadata(curr, "disp16")) {
136 if (offset > 0xffff || offset < -0x7fff)
137 raise << "'" << to_string(inst) << "': label too far away for distance " << std::hex << offset << " to fit in 16 bits\n" << end();
138 else
139 emit_hex_bytes(new_inst, offset, 2);
140 }
141 else if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32")) {
142 emit_hex_bytes(new_inst, offset, 4);
143 }
144 }
145 else {
146 new_inst.words.push_back(curr);
147 }
148 }
149 inst.words.swap(new_inst.words);
150 trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
151 }
152 }
153
154
155 uint32_t num_bytes(const line& inst) {
156 uint32_t sum = 0;
157 for (int i = 0; i < SIZE(inst.words); ++i) {
158 const word& curr = inst.words.at(i);
159 if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32"))
160 sum += 4;
161 else
162 sum++;
163 }
164 return sum;
165 }
166
167 string data_to_string(const line& inst) {
168 ostringstream out;
169 for (int i = 0; i < SIZE(inst.words); ++i) {
170 if (i > 0) out << ' ';
171 out << inst.words.at(i).data;
172 }
173 return out.str();
174 }
175
176 string drop_last(const string& s) {
177 return string(s.begin(), --s.end());
178 }
179
180
181
182
183
184
185
186 :(scenario multiple_labels_at)
187 == 0x1
188
189
190
191
192 loop:
193 $loop2:
194
195 05 0x0d0c0b0a/imm32
196
197 eb $loop2/disp8
198
199 eb $loop3/disp8
200
201 $loop3:
202 +transform: label 'loop' is at address 1
203 +transform: label '$loop2' is at address 1
204 +transform: label '$loop3' is at address a
205
206 +transform: instruction after transform: 'eb f9'
207
208 +transform: instruction after transform: 'eb 00'
209
210 :(scenario label_too_short)
211 % Hide_errors = true;
212 == 0x1
213
214
215
216 xz:
217 05 0x0d0c0b0a/imm32
218 +error: 'xz' is two characters long which can look like raw hex bytes at a glance; use a different name
219
220 :(scenario label_hex)
221 % Hide_errors = true;
222 == 0x1
223
224
225
226 0xab:
227 05 0x0d0c0b0a/imm32
228 +error: '0xab' looks like a hex number; use a different name
229
230 :(scenario label_negative_hex)
231 % Hide_errors = true;
232 == 0x1
233
234
235
236 -a:
237 05 0x0d0c0b0a/imm32
238 +error: '-a' starts with '-', which can be confused with a negative number; use a different name