https://github.com/akkartik/mu/blob/master/subx/036global_variables.cc
1
2
3
4
5
6
7
8
9 :(scenario global_variable)
10 == code
11 b9 x/imm32
12 == data
13 x:
14 00 00 00 00
15 +transform: global variable 'x' is at address 0x0a000079
16
17 :(before "End Level-2 Transforms")
18 Transform.push_back(rewrite_global_variables);
19 :(code)
20 void rewrite_global_variables(program& p) {
21 trace(3, "transform") << "-- rewrite global variables" << end();
22
23 map<string, uint32_t> address;
24 compute_addresses_for_global_variables(p, address);
25 if (trace_contains_errors()) return;
26 drop_global_variables(p);
27 replace_global_variables_with_addresses(p, address);
28 }
29
30 void compute_addresses_for_global_variables(const program& p, map<string, uint32_t>& address) {
31 for (int i = 1; i < SIZE(p.segments); ++i)
32 compute_addresses_for_global_variables(p.segments.at(i), address);
33 }
34
35 void compute_addresses_for_global_variables(const segment& s, map<string, uint32_t>& address) {
36 int current_address = s.start;
37 for (int i = 0; i < SIZE(s.lines); ++i) {
38 const line& inst = s.lines.at(i);
39 for (int j = 0; j < SIZE(inst.words); ++j) {
40 const word& curr = inst.words.at(j);
41 if (*curr.data.rbegin() != ':') {
42 current_address += size_of(curr);
43 }
44 else {
45 string variable = drop_last(curr.data);
46
47 check_valid_name(variable);
48 if (trace_contains_errors()) return;
49 if (j > 0)
50 raise << "'" << to_string(inst) << "': global variable names can only be the first word in a line.\n" << end();
51 if (Map_file.is_open())
52 Map_file << "0x" << HEXWORD << current_address << ' ' << variable << '\n';
53 if (contains_key(address, variable)) {
54 raise << "duplicate global '" << variable << "'\n" << end();
55 return;
56 }
57 put(address, variable, current_address);
58 trace(99, "transform") << "global variable '" << variable << "' is at address 0x" << HEXWORD << current_address << end();
59
60 }
61 }
62 }
63 }
64
65 void drop_global_variables(program& p) {
66 for (int i = 1; i < SIZE(p.segments); ++i)
67 drop_labels(p.segments.at(i));
68 }
69
70 void replace_global_variables_with_addresses(program& p, const map<string, uint32_t>& address) {
71 if (p.segments.empty()) return;
72 replace_global_variables_in_code_segment(p.segments.at(0), address);
73 for (int i = 1; i < SIZE(p.segments); ++i)
74 replace_global_variables_in_data_segment(p.segments.at(i), address);
75 }
76
77 void replace_global_variables_in_code_segment(segment& code, const map<string, uint32_t>& address) {
78 for (int i = 0; i < SIZE(code.lines); ++i) {
79 line& inst = code.lines.at(i);
80 line new_inst;
81 for (int j = 0; j < SIZE(inst.words); ++j) {
82 const word& curr = inst.words.at(j);
83 if (!contains_key(address, curr.data)) {
84 if (!looks_like_hex_int(curr.data))
85 raise << "missing reference to global '" << curr.data << "'\n" << end();
86 new_inst.words.push_back(curr);
87 continue;
88 }
89 if (!valid_use_of_global_variable(curr)) {
90 raise << "'" << to_string(inst) << "': can't refer to global variable '" << curr.data << "'\n" << end();
91 return;
92 }
93 emit_hex_bytes(new_inst, get(address, curr.data), 4);
94 }
95 inst.words.swap(new_inst.words);
96 trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
97 }
98 }
99
100 void replace_global_variables_in_data_segment(segment& data, const map<string, uint32_t>& address) {
101 for (int i = 0; i < SIZE(data.lines); ++i) {
102 line& l = data.lines.at(i);
103 line new_l;
104 for (int j = 0; j < SIZE(l.words); ++j) {
105 const word& curr = l.words.at(j);
106 if (!contains_key(address, curr.data)) {
107 if (looks_like_hex_int(curr.data)) {
108 if (has_operand_metadata(curr, "imm32"))
109 emit_hex_bytes(new_l, curr, 4);
110 else if (has_operand_metadata(curr, "imm16"))
111 emit_hex_bytes(new_l, curr, 2);
112 else if (has_operand_metadata(curr, "imm8"))
113 emit_hex_bytes(new_l, curr, 1);
114 else if (has_operand_metadata(curr, "disp8"))
115 raise << "can't use /disp8 in a non-code segment\n" << end();
116 else if (has_operand_metadata(curr, "disp16"))
117 raise << "can't use /disp16 in a non-code segment\n" << end();
118 else if (has_operand_metadata(curr, "disp32"))
119 raise << "can't use /disp32 in a non-code segment\n" << end();
120 else
121 new_l.words.push_back(curr);
122 }
123 else {
124 raise << "missing reference to global '" << curr.data << "'\n" << end();
125 new_l.words.push_back(curr);
126 }
127 continue;
128 }
129 trace(99, "transform") << curr.data << " maps to " << HEXWORD << get(address, curr.data) << end();
130 emit_hex_bytes(new_l, get(address, curr.data), 4);
131 }
132 l.words.swap(new_l.words);
133 trace(99, "transform") << "after transform: '" << data_to_string(l) << "'" << end();
134 }
135 }
136
137 bool valid_use_of_global_variable(const word& curr) {
138 if (has_operand_metadata(curr, "imm32")) return true;
139
140 return false;
141 }
142
143
144
145
146 :(after "Begin Level-2 Transforms")
147 Transform.push_back(correlate_disp32_with_mod);
148 :(code)
149 void correlate_disp32_with_mod(program& p) {
150 if (p.segments.empty()) return;
151 segment& code = p.segments.at(0);
152 for (int i = 0; i < SIZE(code.lines); ++i) {
153 line& inst = code.lines.at(i);
154 for (int j = 0; j < SIZE(inst.words); ++j) {
155 word& curr = inst.words.at(j);
156 if (has_operand_metadata(curr, "disp32")
157 && has_operand_metadata(inst, "mod"))
158 curr.metadata.push_back("has_mod");
159 }
160 }
161 }
162
163 :(before "End Valid Uses Of Global Variable(curr)")
164 if (has_operand_metadata(curr, "disp32"))
165 return has_metadata(curr, "has_mod");
166
167
168
169 :(code)
170 bool has_metadata(const word& w, const string& m) {
171 for (int i = 0; i < SIZE(w.metadata); ++i)
172 if (w.metadata.at(i) == m) return true;
173 return false;
174 }
175
176 :(scenario global_variable_disallowed_in_jump)
177 % Hide_errors = true;
178 == code
179 eb/jump x/disp8
180 == data
181 x:
182 00 00 00 00
183 +error: 'eb/jump x/disp8': can't refer to global variable 'x'
184
185
186
187 :(scenario global_variable_disallowed_in_call)
188 % Hide_errors = true;
189 == code
190 e8/call x/disp32
191 == data
192 x:
193 00 00 00 00
194 +error: 'e8/call x/disp32': can't refer to global variable 'x'
195
196
197
198 :(scenario global_variable_in_data_segment)
199 == 0x1
200 b9 x/imm32
201 == 0x0a000000
202 x:
203 y/imm32
204 y:
205 00 00 00 00
206
207 +load: 0x0a000000 -> 04
208 +load: 0x0a000001 -> 00
209 +load: 0x0a000002 -> 00
210 +load: 0x0a000003 -> 0a
211 $error: 0
212
213 :(scenario raw_number_with_imm32_in_data_segment)
214 == 0x1
215 b9 x/imm32
216 == 0x0a000000
217 x:
218 1/imm32
219
220 +load: 0x0a000000 -> 01
221 +load: 0x0a000001 -> 00
222 +load: 0x0a000002 -> 00
223 +load: 0x0a000003 -> 00
224 $error: 0
225
226 :(scenario duplicate_global_variable)
227 % Hide_errors = true;
228 == 0x1
229 40/increment-EAX
230 == 0x0a000000
231 x:
232 x:
233 00
234 +error: duplicate global 'x'
235
236 :(scenario global_variable_disp32_with_modrm)
237 == code
238 8b/copy 0/mod/indirect 5/rm32/.disp32 2/r32/EDX x/disp32
239 == data
240 x:
241 00 00 00 00
242 $error: 0
243
244 :(scenarios transform)
245 :(scenario global_variable_disp32_with_call)
246 == code
247 foo:
248 e8/call bar/disp32
249 bar:
250 $error: 0
251
252 :(code)
253 string to_full_string(const line& in) {
254 ostringstream out;
255 for (int i = 0; i < SIZE(in.words); ++i) {
256 if (i > 0) out << ' ';
257 out << in.words.at(i).data;
258 for (int j = 0; j < SIZE(in.words.at(i).metadata); ++j)
259 out << '/' << in.words.at(i).metadata.at(j);
260 }
261 return out.str();
262 }