https://github.com/akkartik/mu/blob/master/subx/036global_variables.cc
  1 //: Global variables.
  2 //:
  3 //: Global variables are just labels in the data segment.
  4 //: However, they can only be used in imm32 and not disp32 operands. And they
  5 //: can't be used with jump and call instructions.
  6 //:
  7 //: This layer has much the same structure as rewriting labels.
  8 
  9 :(scenario global_variable)
 10 == code
 11 b9  x/imm32
 12 == data
 13 x:
 14   00 00 00 00
 15 +transform: global variable 'x' is at address 0x0a000079
 16 
 17 :(before "End Level-2 Transforms")
 18 Transform.push_back(rewrite_global_variables);
 19 :(code)
 20 void rewrite_global_variables(program& p) {
 21   trace(99, "transform") << "-- rewrite global variables" << end();
 22   // Begin rewrite_global_variables
 23   map<string, uint32_t> address;
 24   compute_addresses_for_global_variables(p, address);
 25   if (trace_contains_errors()) return;
 26   drop_global_variables(p);
 27   replace_global_variables_with_addresses(p, address);
 28 }
 29 
 30 void compute_addresses_for_global_variables(const program& p, map<string, uint32_t>& address) {
 31   for (int i = /*skip code segment*/1;  i < SIZE(p.segments);  ++i)
 32     compute_addresses_for_global_variables(p.segments.at(i), address);
 33 }
 34 
 35 void compute_addresses_for_global_variables(const segment& s, map<string, uint32_t>& address) {
 36   int current_address = s.start;
 37   for (int i = 0;  i < SIZE(s.lines);  ++i) {
 38     const line& inst = s.lines.at(i);
 39     for (int j = 0;  j < SIZE(inst.words);  ++j) {
 40       const word& curr = inst.words.at(j);
 41       if (*curr.data.rbegin() != ':') {
 42         current_address += size_of(curr);
 43       }
 44       else {
 45         string variable = drop_last(curr.data);
 46         // ensure variables look sufficiently different from raw hex
 47         check_valid_name(variable);
 48         if (trace_contains_errors()) return;
 49         if (j > 0)
 50           raise << "'" << to_string(inst) << "': global variable names can only be the first word in a line.\n" << end();
 51         if (Map_file.is_open())
 52           Map_file << "0x" << HEXWORD << current_address << ' ' << variable << '\n';
 53         if (contains_key(address, variable)) {
 54           raise << "duplicate global '" << variable << "'\n" << end();
 55           return;
 56         }
 57         put(address, variable, current_address);
 58         trace(99, "transform") << "global variable '" << variable << "' is at address 0x" << HEXWORD << current_address << end();
 59         // no modifying current_address; global variable definitions won't be in the final binary
 60       }
 61     }
 62   }
 63 }
 64 
 65 void drop_global_variables(program& p) {
 66   for (int i = /*skip code segment*/1;  i < SIZE(p.segments);  ++i)
 67     drop_labels(p.segments.at(i));
 68 }
 69 
 70 void replace_global_variables_with_addresses(program& p, const map<string, uint32_t>& address) {
 71   if (p.segments.empty()) return;
 72   replace_global_variables_in_code_segment(p.segments.at(0), address);
 73   for (int i = /*skip code*/1;  i < SIZE(p.segments);  ++i)
 74     replace_global_variables_in_data_segment(p.segments.at(i), address);
 75 }
 76 
 77 void replace_global_variables_in_code_segment(segment& code, const map<string, uint32_t>& address) {
 78   for (int i = 0;  i < SIZE(code.lines);  ++i) {
 79     line& inst = code.lines.at(i);
 80     line new_inst;
 81     for (int j = 0;  j < SIZE(inst.words);  ++j) {
 82       const word& curr = inst.words.at(j);
 83       if (!contains_key(address, curr.data)) {
 84         if (!looks_like_hex_int(curr.data))
 85           raise << "missing reference to global '" << curr.data << "'\n" << end();
 86         new_inst.words.push_back(curr);
 87         continue;
 88       }
 89       if (!valid_use_of_global_variable(curr)) {
 90         raise << "'" << to_string(inst) << "': can't refer to global variable '" << curr.data << "'\n" << end();
 91         return;
 92       }
 93       emit_hex_bytes(new_inst, get(address, curr.data), 4);
 94     }
 95     inst.words.swap(new_inst.words);
 96     trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
 97   }
 98 }
 99 
100 void replace_global_variables_in_data_segment(segment& data, const map<string, uint32_t>& address) {
101   for (int i = 0;  i < SIZE(data.lines);  ++i) {
102     line& l = data.lines.at(i);
103     line new_l;
104     for (int j = 0;  j < SIZE(l.words);  ++j) {
105       const word& curr = l.words.at(j);
106       if (!contains_key(address, curr.data)) {
107         if (looks_like_hex_int(curr.data)) {
108           if (has_operand_metadata(curr, "imm32"))
109             emit_hex_bytes(new_l, curr, 4);
110           else if (has_operand_metadata(curr, "imm16"))
111             emit_hex_bytes(new_l, curr, 2);
112           else if (has_operand_metadata(curr, "imm8"))
113             emit_hex_bytes(new_l, curr, 1);
114           else if (has_operand_metadata(curr, "disp8"))
115             raise << "can't use /disp8 in a non-code segment\n" << end();
116           else if (has_operand_metadata(curr, "disp16"))
117             raise << "can't use /disp16 in a non-code segment\n" << end();
118           else if (has_operand_metadata(curr, "disp32"))
119             raise << "can't use /disp32 in a non-code segment\n" << end();
120           else
121             new_l.words.push_back(curr);
122         }
123         else {
124           raise << "missing reference to global '" << curr.data << "'\n" << end();
125           new_l.words.push_back(curr);
126         }
127         continue;
128       }
129       trace(99, "transform") << curr.data << " maps to " << HEXWORD << get(address, curr.data) << end();
130       emit_hex_bytes(new_l, get(address, curr.data), 4);
131     }
132     l.words.swap(new_l.words);
133     trace(99, "transform") << "after transform: '" << data_to_string(l) << "'" << end();
134   }
135 }
136 
137 bool valid_use_of_global_variable(const word& curr) {
138   if (has_operand_metadata(curr, "imm32")) return true;
139   // End Valid Uses Of Global Variable(curr)
140   return false;
141 }
142 
143 //:: a more complex sanity check for how we use global variables
144 //: requires first saving some data early before we pack operands
145 
146 :(after "Begin Level-2 Transforms")
147 Transform.push_back(correlate_disp32_with_mod);
148 :(code)
149 void correlate_disp32_with_mod(program& p) {
150   if (p.segments.empty()) return;
151   segment& code = p.segments.at(0);
152   for (int i = 0;  i < SIZE(code.lines);  ++i) {
153     line& inst = code.lines.at(i);
154     for (int j = 0;  j < SIZE(inst.words);  ++j) {
155       word& curr = inst.words.at(j);
156       if (has_operand_metadata(curr, "disp32")
157           && has_operand_metadata(inst, "mod"))
158         curr.metadata.push_back("has_mod");
159     }
160   }
161 }
162 
163 :(before "End Valid Uses Of Global Variable(curr)")
164 if (has_operand_metadata(curr, "disp32"))
165   return has_metadata(curr, "has_mod");
166 // todo: more sophisticated check, to ensure we don't use global variable
167 // addresses as a real displacement added to other operands.
168 
169 :(code)
170 bool has_metadata(const word& w, const string& m) {
171   for (int i = 0;  i < SIZE(w.metadata);  ++i)
172     if (w.metadata.at(i) == m) return true;
173   return false;
174 }
175 
176 :(scenario global_variable_disallowed_in_jump)
177 % Hide_errors = true;
178 == code
179 eb/jump  x/disp8
180 == data
181 x:
182   00 00 00 00
183 +error: 'eb/jump x/disp8': can't refer to global variable 'x'
184 # sub-optimal error message; should be
185 #? +error: can't jump to data (variable 'x')
186 
187 :(scenario global_variable_disallowed_in_call)
188 % Hide_errors = true;
189 == code
190 e8/call  x/disp32
191 == data
192 x:
193   00 00 00 00
194 +error: 'e8/call x/disp32': can't refer to global variable 'x'
195 # sub-optimal error message; should be
196 #? +error: can't call to the data segment ('x')
197 
198 :(scenario global_variable_in_data_segment)
199 == 0x1
200 b9  x/imm32
201 == 0x0a000000
202 x:
203   y/imm32
204 y:
205   00 00 00 00
206 # check that we loaded 'x' with the address of 'y'
207 +load: 0x0a000000 -> 04
208 +load: 0x0a000001 -> 00
209 +load: 0x0a000002 -> 00
210 +load: 0x0a000003 -> 0a
211 $error: 0
212 
213 :(scenario raw_number_with_imm32_in_data_segment)
214 == 0x1
215 b9  x/imm32
216 == 0x0a000000
217 x:
218   1/imm32
219 # check that we loaded 'x' with the address of 1
220 +load: 0x0a000000 -> 01
221 +load: 0x0a000001 -> 00
222 +load: 0x0a000002 -> 00
223 +load: 0x0a000003 -> 00
224 $error: 0
225 
226 :(scenario duplicate_global_variable)
227 % Hide_errors = true;
228 == 0x1
229 40/increment-EAX
230 == 0x0a000000
231 x:
232 x:
233   00
234 +error: duplicate global 'x'
235 
236 :(scenario global_variable_disp32_with_modrm)
237 == code
238 8b/copy 0/mod/indirect 5/rm32/.disp32 2/r32/EDX x/disp32
239 == data
240 x:
241   00 00 00 00
242 $error: 0
243 
244 :(scenarios transform)
245 :(scenario global_variable_disp32_with_call)
246 == code
247 foo:
248   e8/call bar/disp32
249 bar:
250 $error: 0
251 
252 :(code)
253 string to_full_string(const line& in) {
254   ostringstream out;
255   for (int i = 0;  i < SIZE(in.words);  ++i) {
256     if (i > 0) out << ' ';
257     out << in.words.at(i).data;
258     for (int j = 0;  j < SIZE(in.words.at(i).metadata);  ++j)
259       out << '/' << in.words.at(i).metadata.at(j);
260   }
261   return out.str();
262 }