1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 void check_valid_name(const string& s) {
22 if (s.empty()) {
23 raise << "empty name!\n" << end();
24 return;
25 }
26 if (s.at(0) == '-')
27 raise << "'" << s << "' starts with '-', which can be confused with a negative number; use a different name\n" << end();
28 if (s.substr(0, 2) == "0x") {
29 raise << "'" << s << "' looks like a hex number; use a different name\n" << end();
30 return;
31 }
32 if (isdigit(s.at(0)))
33 raise << "'" << s << "' starts with a digit, and so can be confused with a negative number; use a different name.\n" << end();
34 if (SIZE(s) == 2)
35 raise << "'" << s << "' is two characters long which can look like raw hex bytes at a glance; use a different name\n" << end();
36 }
37
38 :(scenarios transform)
39 :(scenario map_label)
40 == 0x1
41
42
43
44 loop:
45 05 0x0d0c0b0a/imm32
46 +transform: label 'loop' is at address 1
47
48 :(before "End Level-2 Transforms")
49 Transform.push_back(rewrite_labels);
50 :(code)
51 void rewrite_labels(program& p) {
52 trace(99, "transform") << "-- rewrite labels" << end();
53 if (p.segments.empty()) return;
54 segment& code = p.segments.at(0);
55 map<string, int32_t> byte_index;
56 compute_byte_indices_for_labels(code, byte_index);
57 if (trace_contains_errors()) return;
58 drop_labels(code);
59 if (trace_contains_errors()) return;
60 replace_labels_with_displacements(code, byte_index);
61 }
62
63 void compute_byte_indices_for_labels(const segment& code, map<string, int32_t>& byte_index) {
64 int current_byte = 0;
65 for (int i = 0; i < SIZE(code.lines); ++i) {
66 const line& inst = code.lines.at(i);
67 for (int j = 0; j < SIZE(inst.words); ++j) {
68 const word& curr = inst.words.at(j);
69
70
71
72
73
74 if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32")) {
75 if (*curr.data.rbegin() == ':')
76 raise << "'" << to_string(inst) << "': don't use ':' when jumping to labels\n" << end();
77 current_byte += 4;
78 }
79
80 else if (*curr.data.rbegin() != ':') {
81 ++current_byte;
82 }
83 else {
84 string label = drop_last(curr.data);
85
86 check_valid_name(label);
87 if (trace_contains_errors()) return;
88 if (contains_any_operand_metadata(curr))
89 raise << "'" << to_string(inst) << "': label definition (':') not allowed in operand\n" << end();
90 if (j > 0)
91 raise << "'" << to_string(inst) << "': labels can only be the first word in a line.\n" << end();
92 put(byte_index, label, current_byte);
93 trace(99, "transform") << "label '" << label << "' is at address " << (current_byte+code.start) << end();
94
95 }
96 }
97 }
98 }
99
100 void drop_labels(segment& code) {
101 for (int i = 0; i < SIZE(code.lines); ++i) {
102 line& inst = code.lines.at(i);
103 vector<word>::iterator new_end = remove_if(inst.words.begin(), inst.words.end(), is_label);
104 inst.words.erase(new_end, inst.words.end());
105 }
106 }
107
108 bool is_label(const word& w) {
109 return *w.data.rbegin() == ':';
110 }
111
112 void replace_labels_with_displacements(segment& code, const map<string, int32_t>& byte_index) {
113 int32_t byte_index_next_instruction_starts_at = 0;
114 for (int i = 0; i < SIZE(code.lines); ++i) {
115 line& inst = code.lines.at(i);
116 byte_index_next_instruction_starts_at += num_bytes(inst);
117 line new_inst;
118 for (int j = 0; j < SIZE(inst.words); ++j) {
119 const word& curr = inst.words.at(j);
120 if (contains_key(byte_index, curr.data)) {
121 int32_t displacement = static_cast<int32_t>(get(byte_index, curr.data)) - byte_index_next_instruction_starts_at;
122 if (has_metadata(curr, "disp8") || has_metadata(curr, "imm8")) {
123 if (displacement > 0xff || displacement < -0x7f)
124 raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 8 bits\n" << end();
125 else
126 emit_hex_bytes(new_inst, displacement, 1);
127 }
128 else if (has_metadata(curr, "disp16")) {
129 if (displacement > 0xffff || displacement < -0x7fff)
130 raise << "'" << to_string(inst) << "': label too far away for displacement " << std::hex << displacement << " to fit in 16 bits\n" << end();
131 else
132 emit_hex_bytes(new_inst, displacement, 2);
133 }
134 else if (has_metadata(curr, "disp32") || has_metadata(curr, "imm32")) {
135 emit_hex_bytes(new_inst, displacement, 4);
136 }
137 }
138 else {
139 new_inst.words.push_back(curr);
140 }
141 }
142 inst.words.swap(new_inst.words);
143 trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
144 }
145 }
146
147 string data_to_string(const line& inst) {
148 ostringstream out;
149 for (int i = 0; i < SIZE(inst.words); ++i) {
150 if (i > 0) out << ' ';
151 out << inst.words.at(i).data;
152 }
153 return out.str();
154 }
155
156 string drop_last(const string& s) {
157 return string(s.begin(), --s.end());
158 }
159
160
161
162
163
164
165
166 :(scenario multiple_labels_at)
167 == 0x1
168
169
170
171
172 loop:
173 $loop2:
174
175 05 0x0d0c0b0a/imm32
176
177 eb $loop2/disp8
178
179 eb $loop3/disp8
180
181 $loop3:
182 +transform: label 'loop' is at address 1
183 +transform: label '$loop2' is at address 1
184 +transform: label '$loop3' is at address a
185
186 +transform: instruction after transform: 'eb f9'
187
188 +transform: instruction after transform: 'eb 00'
189
190 :(scenario label_too_short)
191 % Hide_errors = true;
192 == 0x1
193
194
195
196 xz:
197 05 0x0d0c0b0a/imm32
198 +error: 'xz' is two characters long which can look like raw hex bytes at a glance; use a different name
199
200 :(scenario label_hex)
201 % Hide_errors = true;
202 == 0x1
203
204
205
206 0xab:
207 05 0x0d0c0b0a/imm32
208 +error: '0xab' looks like a hex number; use a different name
209
210 :(scenario label_negative_hex)
211 % Hide_errors = true;
212 == 0x1
213
214
215
216 -a:
217 05 0x0d0c0b0a/imm32
218 +error: '-a' starts with '-', which can be confused with a negative number; use a different name