https://github.com/akkartik/mu/blob/master/subx/036global_variables.cc
  1 //: Global variables.
  2 //:
  3 //: Global variables are just labels in the data segment.
  4 //: However, they can only be used in imm32 and not disp32 operands. And they
  5 //: can't be used with jump and call instructions.
  6 //:
  7 //: This layer has much the same structure as rewriting labels.
  8 
  9 :(code)
 10 void test_global_variable() {
 11   run(
 12       "== code 0x1\n"
 13       "b9  x/imm32\n"
 14       "== data 0x2000\n"
 15       "x:\n"
 16       "  00 00 00 00\n"
 17   );
 18   CHECK_TRACE_CONTENTS(
 19       "transform: global variable 'x' is at address 0x00002000\n"
 20   );
 21 }
 22 
 23 :(before "End Level-2 Transforms")
 24 Transform.push_back(rewrite_global_variables);
 25 :(code)
 26 void rewrite_global_variables(program& p) {
 27   trace(3, "transform") << "-- rewrite global variables" << end();
 28   // Begin rewrite_global_variables
 29   map<string, uint32_t> address;
 30   compute_addresses_for_global_variables(p, address);
 31   if (trace_contains_errors()) return;
 32   drop_global_variables(p);
 33   replace_global_variables_with_addresses(p, address);
 34 }
 35 
 36 void compute_addresses_for_global_variables(const program& p, map<string, uint32_t>& address) {
 37   for (int i = 0;  i < SIZE(p.segments);  ++i) {
 38     if (p.segments.at(i).name != "code")
 39       compute_addresses_for_global_variables(p.segments.at(i), address);
 40   }
 41 }
 42 
 43 void compute_addresses_for_global_variables(const segment& s, map<string, uint32_t>& address) {
 44   int current_address = s.start;
 45   for (int i = 0;  i < SIZE(s.lines);  ++i) {
 46     const line& inst = s.lines.at(i);
 47     for (int j = 0;  j < SIZE(inst.words);  ++j) {
 48       const word& curr = inst.words.at(j);
 49       if (*curr.data.rbegin() != ':') {
 50         current_address += size_of(curr);
 51       }
 52       else {
 53         string variable = drop_last(curr.data);
 54         // ensure variables look sufficiently different from raw hex
 55         check_valid_name(variable);
 56         if (trace_contains_errors()) return;
 57         if (j > 0)
 58           raise << "'" << to_string(inst) << "': global variable names can only be the first word in a line.\n" << end();
 59         if (Labels_file.is_open())
 60           Labels_file << "0x" << HEXWORD << current_address << ' ' << variable << '\n';
 61         if (contains_key(address, variable)) {
 62           raise << "duplicate global '" << variable << "'\n" << end();
 63           return;
 64         }
 65         put(address, variable, current_address);
 66         trace(99, "transform") << "global variable '" << variable << "' is at address 0x" << HEXWORD << current_address << end();
 67         // no modifying current_address; global variable definitions won't be in the final binary
 68       }
 69     }
 70   }
 71 }
 72 
 73 void drop_global_variables(program& p) {
 74   for (int i = 0;  i < SIZE(p.segments);  ++i) {
 75     if (p.segments.at(i).name != "code")
 76       drop_labels(p.segments.at(i));
 77   }
 78 }
 79 
 80 void replace_global_variables_with_addresses(program& p, const map<string, uint32_t>& address) {
 81   if (p.segments.empty()) return;
 82   for (int i = 0;  i < SIZE(p.segments);  ++i) {
 83     segment& curr = p.segments.at(i);
 84     if (curr.name == "code")
 85       replace_global_variables_in_code_segment(curr, address);
 86     else
 87       replace_global_variables_in_data_segment(curr, address);
 88   }
 89 }
 90 
 91 void replace_global_variables_in_code_segment(segment& code, const map<string, uint32_t>& address) {
 92   for (int i = 0;  i < SIZE(code.lines);  ++i) {
 93     line& inst = code.lines.at(i);
 94     line new_inst;
 95     for (int j = 0;  j < SIZE(inst.words);  ++j) {
 96       const word& curr = inst.words.at(j);
 97       if (!contains_key(address, curr.data)) {
 98         if (!looks_like_hex_int(curr.data))
 99           raise << "missing reference to global '" << curr.data << "'\n" << end();
100         new_inst.words.push_back(curr);
101         continue;
102       }
103       if (!valid_use_of_global_variable(curr)) {
104         raise << "'" << to_string(inst) << "': can't refer to global variable '" << curr.data << "'\n" << end();
105         return;
106       }
107       emit_hex_bytes(new_inst, get(address, curr.data), 4);
108     }
109     inst.words.swap(new_inst.words);
110     trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
111   }
112 }
113 
114 void replace_global_variables_in_data_segment(segment& data, const map<string, uint32_t>& address) {
115   for (int i = 0;  i < SIZE(data.lines);  ++i) {
116     line& l = data.lines.at(i);
117     line new_l;
118     for (int j = 0;  j < SIZE(l.words);  ++j) {
119       const word& curr = l.words.at(j);
120       if (!contains_key(address, curr.data)) {
121         if (looks_like_hex_int(curr.data)) {
122           if (has_operand_metadata(curr, "imm32"))
123             emit_hex_bytes(new_l, curr, 4);
124           else if (has_operand_metadata(curr, "imm16"))
125             emit_hex_bytes(new_l, curr, 2);
126           else if (has_operand_metadata(curr, "imm8"))
127             emit_hex_bytes(new_l, curr, 1);
128           else if (has_operand_metadata(curr, "disp8"))
129             raise << "can't use /disp8 in a non-code segment\n" << end();
130           else if (has_operand_metadata(curr, "disp16"))
131             raise << "can't use /disp16 in a non-code segment\n" << end();
132           else if (has_operand_metadata(curr, "disp32"))
133             raise << "can't use /disp32 in a non-code segment\n" << end();
134           else
135             new_l.words.push_back(curr);
136         }
137         else {
138           raise << "missing reference to global '" << curr.data << "'\n" << end();
139           new_l.words.push_back(curr);
140         }
141         continue;
142       }
143       trace(99, "transform") << curr.data << " maps to " << HEXWORD << get(address, curr.data) << end();
144       emit_hex_bytes(new_l, get(address, curr.data), 4);
145     }
146     l.words.swap(new_l.words);
147     trace(99, "transform") << "after transform: '" << data_to_string(l) << "'" << end();
148   }
149 }
150 
151 bool valid_use_of_global_variable(const word& curr) {
152   if (has_operand_metadata(curr, "imm32")) return true;
153   // End Valid Uses Of Global Variable(curr)
154   return false;
155 }
156 
157 //:: a more complex sanity check for how we use global variables
158 //: requires first saving some data early before we pack operands
159 
160 :(after "Begin Level-2 Transforms")
161 Transform.push_back(correlate_disp32_with_mod);
162 :(code)
163 void correlate_disp32_with_mod(program& p) {
164   if (p.segments.empty()) return;
165   segment& code = *find(p, "code");
166   for (int i = 0;  i < SIZE(code.lines);  ++i) {
167     line& inst = code.lines.at(i);
168     for (int j = 0;  j < SIZE(inst.words);  ++j) {
169       word& curr = inst.words.at(j);
170       if (has_operand_metadata(curr, "disp32")
171           && has_operand_metadata(inst, "mod"))
172         curr.metadata.push_back("has_mod");
173     }
174   }
175 }
176 
177 :(before "End Valid Uses Of Global Variable(curr)")
178 if (has_operand_metadata(curr, "disp32"))
179   return has_metadata(curr, "has_mod");
180 // todo: more sophisticated check, to ensure we don't use global variable
181 // addresses as a real displacement added to other operands.
182 
183 :(code)
184 bool has_metadata(const word& w, const string& m) {
185   for (int i = 0;  i < SIZE(w.metadata);  ++i)
186     if (w.metadata.at(i) == m) return true;
187   return false;
188 }
189 
190 void test_global_variable_disallowed_in_jump() {
191   Hide_errors = true;
192   run(
193       "== code 0x1\n"
194       "eb/jump  x/disp8\n"
195       "== data 0x2000\n"
196       "x:\n"
197       "  00 00 00 00\n"
198   );
199   CHECK_TRACE_CONTENTS(
200       "error: 'eb/jump x/disp8': can't refer to global variable 'x'\n"
201       // sub-optimal error message; should be
202 //?       "error: can't jump to data (variable 'x')\n"
203   );
204 }
205 
206 void test_global_variable_disallowed_in_call() {
207   Hide_errors = true;
208   run(
209       "== code 0x1\n"
210       "e8/call  x/disp32\n"
211       "== data 0x2000\n"
212       "x:\n"
213       "  00 00 00 00\n"
214   );
215   CHECK_TRACE_CONTENTS(
216       "error: 'e8/call x/disp32': can't refer to global variable 'x'\n"
217       // sub-optimal error message; should be
218 //?       "error: can't call to the data segment ('x')\n"
219   );
220 }
221 
222 void test_global_variable_in_data_segment() {
223   run(
224       "== code 0x1\n"
225       "b9  x/imm32\n"
226       "== data 0x2000\n"
227       "x:\n"
228       "  y/imm32\n"
229       "y:\n"
230       "  00 00 00 00\n"
231   );
232   // check that we loaded 'x' with the address of 'y'
233   CHECK_TRACE_CONTENTS(
234       "load: 0x00002000 -> 04\n"
235       "load: 0x00002001 -> 20\n"
236       "load: 0x00002002 -> 00\n"
237       "load: 0x00002003 -> 00\n"
238   );
239   CHECK_TRACE_COUNT("error", 0);
240 }
241 
242 void test_raw_number_with_imm32_in_data_segment() {
243   run(
244       "== code 0x1\n"
245       "b9  x/imm32\n"
246       "== data 0x2000\n"
247       "x:\n"
248       "  1/imm32\n"
249   );
250   // check that we loaded 'x' with the address of 1
251   CHECK_TRACE_CONTENTS(
252       "load: 0x00002000 -> 01\n"
253       "load: 0x00002001 -> 00\n"
254       "load: 0x00002002 -> 00\n"
255       "load: 0x00002003 -> 00\n"
256   );
257   CHECK_TRACE_COUNT("error", 0);
258 }
259 
260 void test_duplicate_global_variable() {
261   Hide_errors = true;
262   run(
263       "== code 0x1\n"
264       "40/increment-EAX\n"
265       "== data 0x2000\n"
266       "x:\n"
267       "x:\n"
268       "  00\n"
269   );
270   CHECK_TRACE_CONTENTS(
271       "error: duplicate global 'x'\n"
272   );
273 }
274 
275 void test_global_variable_disp32_with_modrm() {
276   run(
277       "== code 0x1\n"
278       "8b/copy 0/mod/indirect 5/rm32/.disp32 2/r32/EDX x/disp32\n"
279       "== data 0x2000\n"
280       "x:\n"
281       "  00 00 00 00\n"
282   );
283   CHECK_TRACE_COUNT("error", 0);
284 }
285 
286 void test_global_variable_disp32_with_call() {
287   transform(
288       "== code 0x1\n"
289       "foo:\n"
290       "  e8/call bar/disp32\n"
291       "bar:\n"
292   );
293   CHECK_TRACE_COUNT("error", 0);
294 }
295 
296 string to_full_string(const line& in) {
297   ostringstream out;
298   for (int i = 0;  i < SIZE(in.words);  ++i) {
299     if (i > 0) out << ' ';
300     out << in.words.at(i).data;
301     for (int j = 0;  j < SIZE(in.words.at(i).metadata);  ++j)
302       out << '/' << in.words.at(i).metadata.at(j);
303   }
304   return out.str();
305 }