https://github.com/akkartik/mu/blob/main/037global_variables.cc
1
2
3
4
5
6
7
8
9 :(code)
10 void test_global_variable() {
11 run(
12 "== code 0x1\n"
13 "b9 x/imm32\n"
14 "== data 0x2000\n"
15 "x:\n"
16 " 00 00 00 00\n"
17 );
18 CHECK_TRACE_CONTENTS(
19 "transform: global variable 'x' is at address 0x00002000\n"
20 );
21 }
22
23 :(before "End Transforms")
24 Transform.push_back(rewrite_global_variables);
25 :(code)
26 void rewrite_global_variables(program& p) {
27 trace(3, "transform") << "-- rewrite global variables" << end();
28
29 map<string, uint32_t> address;
30 compute_addresses_for_global_variables(p, address);
31 if (trace_contains_errors()) return;
32 drop_global_variables(p);
33 replace_global_variables_with_addresses(p, address);
34 }
35
36 void compute_addresses_for_global_variables(const program& p, map<string, uint32_t>& address) {
37 for (int i = 0; i < SIZE(p.segments); ++i) {
38 if (p.segments.at(i).name != "code")
39 compute_addresses_for_global_variables(p.segments.at(i), address);
40 }
41 }
42
43 void compute_addresses_for_global_variables(const segment& s, map<string, uint32_t>& address) {
44 int current_address = s.start;
45 for (int i = 0; i < SIZE(s.lines); ++i) {
46 const line& inst = s.lines.at(i);
47 for (int j = 0; j < SIZE(inst.words); ++j) {
48 const word& curr = inst.words.at(j);
49 if (*curr.data.rbegin() != ':') {
50 current_address += size_of(curr);
51 }
52 else {
53 string variable = drop_last(curr.data);
54
55 check_valid_name(variable);
56 if (trace_contains_errors()) return;
57 if (j > 0)
58 raise << "'" << to_string(inst) << "': global variable names can only be the first word in a line.\n" << end();
59 if (Labels_file.is_open())
60 Labels_file << "0x" << HEXWORD << current_address << ' ' << variable << '\n';
61 if (contains_key(address, variable)) {
62 raise << "duplicate global '" << variable << "'\n" << end();
63 return;
64 }
65 put(address, variable, current_address);
66 trace(99, "transform") << "global variable '" << variable << "' is at address 0x" << HEXWORD << current_address << end();
67
68 }
69 }
70 }
71 }
72
73 void drop_global_variables(program& p) {
74 for (int i = 0; i < SIZE(p.segments); ++i) {
75 if (p.segments.at(i).name != "code")
76 drop_labels(p.segments.at(i));
77 }
78 }
79
80 void replace_global_variables_with_addresses(program& p, const map<string, uint32_t>& address) {
81 if (p.segments.empty()) return;
82 for (int i = 0; i < SIZE(p.segments); ++i) {
83 segment& curr = p.segments.at(i);
84 if (curr.name == "code")
85 replace_global_variables_in_code_segment(curr, address);
86 else
87 replace_global_variables_in_data_segment(curr, address);
88 }
89 }
90
91 void replace_global_variables_in_code_segment(segment& code, const map<string, uint32_t>& address) {
92 for (int i = 0; i < SIZE(code.lines); ++i) {
93 line& inst = code.lines.at(i);
94 line new_inst;
95 for (int j = 0; j < SIZE(inst.words); ++j) {
96 const word& curr = inst.words.at(j);
97 if (!contains_key(address, curr.data)) {
98 if (!looks_like_hex_int(curr.data))
99 raise << "missing reference to global '" << curr.data << "'\n" << end();
100 new_inst.words.push_back(curr);
101 continue;
102 }
103 if (!valid_use_of_global_variable(curr)) {
104 raise << "'" << to_string(inst) << "': can't refer to global variable '" << curr.data << "'\n" << end();
105 return;
106 }
107 emit_hex_bytes(new_inst, get(address, curr.data), 4);
108 }
109 inst.words.swap(new_inst.words);
110 trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
111 }
112 }
113
114 void replace_global_variables_in_data_segment(segment& data, const map<string, uint32_t>& address) {
115 for (int i = 0; i < SIZE(data.lines); ++i) {
116 line& l = data.lines.at(i);
117 line new_l;
118 for (int j = 0; j < SIZE(l.words); ++j) {
119 const word& curr = l.words.at(j);
120 if (!contains_key(address, curr.data)) {
121 if (looks_like_hex_int(curr.data)) {
122 if (has_argument_metadata(curr, "imm32"))
123 emit_hex_bytes(new_l, curr, 4);
124 else if (has_argument_metadata(curr, "imm16"))
125 emit_hex_bytes(new_l, curr, 2);
126 else if (has_argument_metadata(curr, "imm8"))
127 emit_hex_bytes(new_l, curr, 1);
128 else if (has_argument_metadata(curr, "disp8"))
129 raise << "can't use /disp8 in a non-code segment\n" << end();
130 else if (has_argument_metadata(curr, "disp16"))
131 raise << "can't use /disp16 in a non-code segment\n" << end();
132 else if (has_argument_metadata(curr, "disp32"))
133 raise << "can't use /disp32 in a non-code segment\n" << end();
134 else
135 new_l.words.push_back(curr);
136 }
137 else {
138 raise << "missing reference to global '" << curr.data << "'\n" << end();
139 new_l.words.push_back(curr);
140 }
141 continue;
142 }
143 trace(99, "transform") << curr.data << " maps to " << HEXWORD << get(address, curr.data) << end();
144 emit_hex_bytes(new_l, get(address, curr.data), 4);
145 }
146 l.words.swap(new_l.words);
147 trace(99, "transform") << "after transform: '" << data_to_string(l) << "'" << end();
148 }
149 }
150
151 bool valid_use_of_global_variable(const word& curr) {
152 if (has_argument_metadata(curr, "imm32")) return true;
153
154 return false;
155 }
156
157
158
159
160 :(after "Begin Transforms")
161 Transform.push_back(correlate_disp32_with_mod);
162 :(code)
163 void correlate_disp32_with_mod(program& p) {
164 if (p.segments.empty()) return;
165 segment& code = *find(p, "code");
166 for (int i = 0; i < SIZE(code.lines); ++i) {
167 line& inst = code.lines.at(i);
168 for (int j = 0; j < SIZE(inst.words); ++j) {
169 word& curr = inst.words.at(j);
170 if (has_argument_metadata(curr, "disp32")
171 && has_argument_metadata(inst, "mod"))
172 curr.metadata.push_back("has_mod");
173 }
174 }
175 }
176
177 :(before "End Valid Uses Of Global Variable(curr)")
178 if (has_argument_metadata(curr, "disp32"))
179 return has_metadata(curr, "has_mod");
180
181
182
183 :(code)
184 bool has_metadata(const word& w, const string& m) {
185 for (int i = 0; i < SIZE(w.metadata); ++i)
186 if (w.metadata.at(i) == m) return true;
187 return false;
188 }
189
190 void test_global_variable_disallowed_in_jump() {
191 Hide_errors = true;
192 run(
193 "== code 0x1\n"
194 "eb/jump x/disp8\n"
195 "== data 0x2000\n"
196 "x:\n"
197 " 00 00 00 00\n"
198 );
199 CHECK_TRACE_CONTENTS(
200 "error: 'eb/jump x/disp8': can't refer to global variable 'x'\n"
201
202
203 );
204 }
205
206 void test_global_variable_disallowed_in_call() {
207 Hide_errors = true;
208 run(
209 "== code 0x1\n"
210 "e8/call x/disp32\n"
211 "== data 0x2000\n"
212 "x:\n"
213 " 00 00 00 00\n"
214 );
215 CHECK_TRACE_CONTENTS(
216 "error: 'e8/call x/disp32': can't refer to global variable 'x'\n"
217
218
219 );
220 }
221
222 void test_global_variable_in_data_segment() {
223 run(
224 "== code 0x1\n"
225 "b9 x/imm32\n"
226 "== data 0x2000\n"
227 "x:\n"
228 " y/imm32\n"
229 "y:\n"
230 " 00 00 00 00\n"
231 );
232
233 CHECK_TRACE_CONTENTS(
234 "load: 0x00002000 -> 04\n"
235 "load: 0x00002001 -> 20\n"
236 "load: 0x00002002 -> 00\n"
237 "load: 0x00002003 -> 00\n"
238 );
239 CHECK_TRACE_COUNT("error", 0);
240 }
241
242 void test_raw_number_with_imm32_in_data_segment() {
243 run(
244 "== code 0x1\n"
245 "b9 x/imm32\n"
246 "== data 0x2000\n"
247 "x:\n"
248 " 1/imm32\n"
249 );
250
251 CHECK_TRACE_CONTENTS(
252 "load: 0x00002000 -> 01\n"
253 "load: 0x00002001 -> 00\n"
254 "load: 0x00002002 -> 00\n"
255 "load: 0x00002003 -> 00\n"
256 );
257 CHECK_TRACE_COUNT("error", 0);
258 }
259
260 void test_duplicate_global_variable() {
261 Hide_errors = true;
262 run(
263 "== code 0x1\n"
264 "40/increment-EAX\n"
265 "== data 0x2000\n"
266 "x:\n"
267 "x:\n"
268 " 00\n"
269 );
270 CHECK_TRACE_CONTENTS(
271 "error: duplicate global 'x'\n"
272 );
273 }
274
275 void test_global_variable_disp32_with_modrm() {
276 run(
277 "== code 0x1\n"
278 "8b/copy 0/mod/indirect 5/rm32/.disp32 2/r32/EDX x/disp32\n"
279 "== data 0x2000\n"
280 "x:\n"
281 " 00 00 00 00\n"
282 );
283 CHECK_TRACE_COUNT("error", 0);
284 }
285
286 void test_global_variable_disp32_with_call() {
287 transform(
288 "== code 0x1\n"
289 "foo:\n"
290 " e8/call bar/disp32\n"
291 "bar:\n"
292 );
293 CHECK_TRACE_COUNT("error", 0);
294 }
295
296 string to_full_string(const line& in) {
297 ostringstream out;
298 for (int i = 0; i < SIZE(in.words); ++i) {
299 if (i > 0) out << ' ';
300 out << in.words.at(i).data;
301 for (int j = 0; j < SIZE(in.words.at(i).metadata); ++j)
302 out << '/' << in.words.at(i).metadata.at(j);
303 }
304 return out.str();
305 }