https://github.com/akkartik/mu/blob/master/subx/036global_variables.cc
  1 //: Global variables.
  2 //:
  3 //: Global variables are just labels in the data segment.
  4 //: However, they can only be used in imm32 and not disp32 operands. And they
  5 //: can't be used with jump and call instructions.
  6 //:
  7 //: This layer has much the same structure as rewriting labels.
  8 
  9 :(code)
 10 void test_global_variable() {
 11   run(
 12       "== code\n"
 13       "b9  x/imm32\n"
 14       "== data\n"
 15       "x:\n"
 16       "  00 00 00 00\n"
 17   );
 18   CHECK_TRACE_CONTENTS(
 19       "transform: global variable 'x' is at address 0x0a000079\n"
 20   );
 21 }
 22 
 23 :(before "End Level-2 Transforms")
 24 Transform.push_back(rewrite_global_variables);
 25 :(code)
 26 void rewrite_global_variables(program& p) {
 27   trace(3, "transform") << "-- rewrite global variables" << end();
 28   // Begin rewrite_global_variables
 29   map<string, uint32_t> address;
 30   compute_addresses_for_global_variables(p, address);
 31   if (trace_contains_errors()) return;
 32   drop_global_variables(p);
 33   replace_global_variables_with_addresses(p, address);
 34 }
 35 
 36 void compute_addresses_for_global_variables(const program& p, map<string, uint32_t>& address) {
 37   for (int i = /*skip code segment*/1;  i < SIZE(p.segments);  ++i)
 38     compute_addresses_for_global_variables(p.segments.at(i), address);
 39 }
 40 
 41 void compute_addresses_for_global_variables(const segment& s, map<string, uint32_t>& address) {
 42   int current_address = s.start;
 43   for (int i = 0;  i < SIZE(s.lines);  ++i) {
 44     const line& inst = s.lines.at(i);
 45     for (int j = 0;  j < SIZE(inst.words);  ++j) {
 46       const word& curr = inst.words.at(j);
 47       if (*curr.data.rbegin() != ':') {
 48         current_address += size_of(curr);
 49       }
 50       else {
 51         string variable = drop_last(curr.data);
 52         // ensure variables look sufficiently different from raw hex
 53         check_valid_name(variable);
 54         if (trace_contains_errors()) return;
 55         if (j > 0)
 56           raise << "'" << to_string(inst) << "': global variable names can only be the first word in a line.\n" << end();
 57         if (Map_file.is_open())
 58           Map_file << "0x" << HEXWORD << current_address << ' ' << variable << '\n';
 59         if (contains_key(address, variable)) {
 60           raise << "duplicate global '" << variable << "'\n" << end();
 61           return;
 62         }
 63         put(address, variable, current_address);
 64         trace(99, "transform") << "global variable '" << variable << "' is at address 0x" << HEXWORD << current_address << end();
 65         // no modifying current_address; global variable definitions won't be in the final binary
 66       }
 67     }
 68   }
 69 }
 70 
 71 void drop_global_variables(program& p) {
 72   for (int i = /*skip code segment*/1;  i < SIZE(p.segments);  ++i)
 73     drop_labels(p.segments.at(i));
 74 }
 75 
 76 void replace_global_variables_with_addresses(program& p, const map<string, uint32_t>& address) {
 77   if (p.segments.empty()) return;
 78   replace_global_variables_in_code_segment(p.segments.at(0), address);
 79   for (int i = /*skip code*/1;  i < SIZE(p.segments);  ++i)
 80     replace_global_variables_in_data_segment(p.segments.at(i), address);
 81 }
 82 
 83 void replace_global_variables_in_code_segment(segment& code, const map<string, uint32_t>& address) {
 84   for (int i = 0;  i < SIZE(code.lines);  ++i) {
 85     line& inst = code.lines.at(i);
 86     line new_inst;
 87     for (int j = 0;  j < SIZE(inst.words);  ++j) {
 88       const word& curr = inst.words.at(j);
 89       if (!contains_key(address, curr.data)) {
 90         if (!looks_like_hex_int(curr.data))
 91           raise << "missing reference to global '" << curr.data << "'\n" << end();
 92         new_inst.words.push_back(curr);
 93         continue;
 94       }
 95       if (!valid_use_of_global_variable(curr)) {
 96         raise << "'" << to_string(inst) << "': can't refer to global variable '" << curr.data << "'\n" << end();
 97         return;
 98       }
 99       emit_hex_bytes(new_inst, get(address, curr.data), 4);
100     }
101     inst.words.swap(new_inst.words);
102     trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
103   }
104 }
105 
106 void replace_global_variables_in_data_segment(segment& data, const map<string, uint32_t>& address) {
107   for (int i = 0;  i < SIZE(data.lines);  ++i) {
108     line& l = data.lines.at(i);
109     line new_l;
110     for (int j = 0;  j < SIZE(l.words);  ++j) {
111       const word& curr = l.words.at(j);
112       if (!contains_key(address, curr.data)) {
113         if (looks_like_hex_int(curr.data)) {
114           if (has_operand_metadata(curr, "imm32"))
115             emit_hex_bytes(new_l, curr, 4);
116           else if (has_operand_metadata(curr, "imm16"))
117             emit_hex_bytes(new_l, curr, 2);
118           else if (has_operand_metadata(curr, "imm8"))
119             emit_hex_bytes(new_l, curr, 1);
120           else if (has_operand_metadata(curr, "disp8"))
121             raise << "can't use /disp8 in a non-code segment\n" << end();
122           else if (has_operand_metadata(curr, "disp16"))
123             raise << "can't use /disp16 in a non-code segment\n" << end();
124           else if (has_operand_metadata(curr, "disp32"))
125             raise << "can't use /disp32 in a non-code segment\n" << end();
126           else
127             new_l.words.push_back(curr);
128         }
129         else {
130           raise << "missing reference to global '" << curr.data << "'\n" << end();
131           new_l.words.push_back(curr);
132         }
133         continue;
134       }
135       trace(99, "transform") << curr.data << " maps to " << HEXWORD << get(address, curr.data) << end();
136       emit_hex_bytes(new_l, get(address, curr.data), 4);
137     }
138     l.words.swap(new_l.words);
139     trace(99, "transform") << "after transform: '" << data_to_string(l) << "'" << end();
140   }
141 }
142 
143 bool valid_use_of_global_variable(const word& curr) {
144   if (has_operand_metadata(curr, "imm32")) return true;
145   // End Valid Uses Of Global Variable(curr)
146   return false;
147 }
148 
149 //:: a more complex sanity check for how we use global variables
150 //: requires first saving some data early before we pack operands
151 
152 :(after "Begin Level-2 Transforms")
153 Transform.push_back(correlate_disp32_with_mod);
154 :(code)
155 void correlate_disp32_with_mod(program& p) {
156   if (p.segments.empty()) return;
157   segment& code = p.segments.at(0);
158   for (int i = 0;  i < SIZE(code.lines);  ++i) {
159     line& inst = code.lines.at(i);
160     for (int j = 0;  j < SIZE(inst.words);  ++j) {
161       word& curr = inst.words.at(j);
162       if (has_operand_metadata(curr, "disp32")
163           && has_operand_metadata(inst, "mod"))
164         curr.metadata.push_back("has_mod");
165     }
166   }
167 }
168 
169 :(before "End Valid Uses Of Global Variable(curr)")
170 if (has_operand_metadata(curr, "disp32"))
171   return has_metadata(curr, "has_mod");
172 // todo: more sophisticated check, to ensure we don't use global variable
173 // addresses as a real displacement added to other operands.
174 
175 :(code)
176 bool has_metadata(const word& w, const string& m) {
177   for (int i = 0;  i < SIZE(w.metadata);  ++i)
178     if (w.metadata.at(i) == m) return true;
179   return false;
180 }
181 
182 void test_global_variable_disallowed_in_jump() {
183   Hide_errors = true;
184   run(
185       "== code\n"
186       "eb/jump  x/disp8\n"
187       "== data\n"
188       "x:\n"
189       "  00 00 00 00\n"
190   );
191   CHECK_TRACE_CONTENTS(
192       "error: 'eb/jump x/disp8': can't refer to global variable 'x'\n"
193       // sub-optimal error message; should be
194 //?       "error: can't jump to data (variable 'x')\n"
195   );
196 }
197 
198 void test_global_variable_disallowed_in_call() {
199   Hide_errors = true;
200   run(
201       "== code\n"
202       "e8/call  x/disp32\n"
203       "== data\n"
204       "x:\n"
205       "  00 00 00 00\n"
206   );
207   CHECK_TRACE_CONTENTS(
208       "error: 'e8/call x/disp32': can't refer to global variable 'x'\n"
209       // sub-optimal error message; should be
210 //?       "error: can't call to the data segment ('x')\n"
211   );
212 }
213 
214 void test_global_variable_in_data_segment() {
215   run(
216       "== 0x1\n"
217       "b9  x/imm32\n"
218       "== 0x0a000000\n"
219       "x:\n"
220       "  y/imm32\n"
221       "y:\n"
222       "  00 00 00 00\n"
223   );
224   // check that we loaded 'x' with the address of 'y'
225   CHECK_TRACE_CONTENTS(
226       "load: 0x0a000000 -> 04\n"
227       "load: 0x0a000001 -> 00\n"
228       "load: 0x0a000002 -> 00\n"
229       "load: 0x0a000003 -> 0a\n"
230   );
231   CHECK_TRACE_COUNT("error", 0);
232 }
233 
234 void test_raw_number_with_imm32_in_data_segment() {
235   run(
236       "== 0x1\n"
237       "b9  x/imm32\n"
238       "== 0x0a000000\n"
239       "x:\n"
240       "  1/imm32\n"
241   );
242   // check that we loaded 'x' with the address of 1
243   CHECK_TRACE_CONTENTS(
244       "load: 0x0a000000 -> 01\n"
245       "load: 0x0a000001 -> 00\n"
246       "load: 0x0a000002 -> 00\n"
247       "load: 0x0a000003 -> 00\n"
248   );
249   CHECK_TRACE_COUNT("error", 0);
250 }
251 
252 void test_duplicate_global_variable() {
253   Hide_errors = true;
254   run(
255       "== 0x1\n"
256       "40/increment-EAX\n"
257       "== 0x0a000000\n"
258       "x:\n"
259       "x:\n"
260       "  00\n"
261   );
262   CHECK_TRACE_CONTENTS(
263       "error: duplicate global 'x'\n"
264   );
265 }
266 
267 void test_global_variable_disp32_with_modrm() {
268   run(
269       "== code\n"
270       "8b/copy 0/mod/indirect 5/rm32/.disp32 2/r32/EDX x/disp32\n"
271       "== data\n"
272       "x:\n"
273       "  00 00 00 00\n"
274   );
275   CHECK_TRACE_COUNT("error", 0);
276 }
277 
278 void test_global_variable_disp32_with_call() {
279   transform(
280       "== code\n"
281       "foo:\n"
282       "  e8/call bar/disp32\n"
283       "bar:\n"
284   );
285   CHECK_TRACE_COUNT("error", 0);
286 }
287 
288 string to_full_string(const line& in) {
289   ostringstream out;
290   for (int i = 0;  i < SIZE(in.words);  ++i) {
291     if (i > 0) out << ' ';
292     out << in.words.at(i).data;
293     for (int j = 0;  j < SIZE(in.words.at(i).metadata);  ++j)
294       out << '/' << in.words.at(i).metadata.at(j);
295   }
296   return out.str();
297 }