https://github.com/akkartik/mu/blob/master/subx/036global_variables.cc
  1 //: Global variables.
  2 //:
  3 //: Global variables are just labels in the data segment.
  4 //: However, they can only be used in imm32 and not disp32 operands. And they
  5 //: can't be used with jump and call instructions.
  6 //:
  7 //: This layer has much the same structure as rewriting labels.
  8 
  9 :(scenario global_variable)
 10 == code
 11 b9  x/imm32
 12 == data
 13 x:
 14   00 00 00 00
 15 +transform: global variable 'x' is at address 0x0a000079
 16 
 17 :(before "End Level-2 Transforms")
 18 Transform.push_back(rewrite_global_variables);
 19 :(code)
 20 void rewrite_global_variables(program& p) {
 21   trace(99, "transform") << "-- rewrite global variables" << end();
 22   map<string, uint32_t> address;
 23   compute_addresses_for_global_variables(p, address);
 24   if (trace_contains_errors()) return;
 25   drop_global_variables(p);
 26   replace_global_variables_with_addresses(p, address);
 27 }
 28 
 29 void compute_addresses_for_global_variables(const program& p, map<string, uint32_t>& address) {
 30   for (int i = /*skip code segment*/1;  i < SIZE(p.segments);  ++i)
 31     compute_addresses_for_global_variables(p.segments.at(i), address);
 32 }
 33 
 34 void compute_addresses_for_global_variables(const segment& s, map<string, uint32_t>& address) {
 35   int current_address = s.start;
 36   for (int i = 0;  i < SIZE(s.lines);  ++i) {
 37     const line& inst = s.lines.at(i);
 38     for (int j = 0;  j < SIZE(inst.words);  ++j) {
 39       const word& curr = inst.words.at(j);
 40       if (*curr.data.rbegin() != ':') {
 41         current_address += size_of(curr);
 42       }
 43       else {
 44         string variable = drop_last(curr.data);
 45         // ensure variables look sufficiently different from raw hex
 46         check_valid_name(variable);
 47         if (trace_contains_errors()) return;
 48         if (j > 0)
 49           raise << "'" << to_string(inst) << "': global variable names can only be the first word in a line.\n" << end();
 50         if (Map_file.is_open())
 51           Map_file << "0x" << HEXWORD << current_address << ' ' << variable << '\n';
 52         put(address, variable, current_address);
 53         trace(99, "transform") << "global variable '" << variable << "' is at address 0x" << HEXWORD << current_address << end();
 54         // no modifying current_address; global variable definitions won't be in the final binary
 55       }
 56     }
 57   }
 58 }
 59 
 60 void drop_global_variables(program& p) {
 61   for (int i = /*skip code segment*/1;  i < SIZE(p.segments);  ++i)
 62     drop_labels(p.segments.at(i));
 63 }
 64 
 65 void replace_global_variables_with_addresses(program& p, const map<string, uint32_t>& address) {
 66   if (p.segments.empty()) return;
 67   replace_global_variables_in_code_segment(p.segments.at(0), address);
 68   for (int i = /*skip code*/1;  i < SIZE(p.segments);  ++i)
 69     replace_global_variables_in_data_segment(p.segments.at(i), address);
 70 }
 71 
 72 void replace_global_variables_in_code_segment(segment& code, const map<string, uint32_t>& address) {
 73   for (int i = 0;  i < SIZE(code.lines);  ++i) {
 74     line& inst = code.lines.at(i);
 75     line new_inst;
 76     for (int j = 0;  j < SIZE(inst.words);  ++j) {
 77       const word& curr = inst.words.at(j);
 78       if (!contains_key(address, curr.data)) {
 79         if (!looks_like_hex_int(curr.data))
 80           raise << "missing reference to global '" << curr.data << "'\n" << end();
 81         new_inst.words.push_back(curr);
 82         continue;
 83       }
 84       if (!valid_use_of_global_variable(curr)) {
 85         raise << "'" << to_string(inst) << "': can't refer to global variable '" << curr.data << "'\n" << end();
 86         return;
 87       }
 88       emit_hex_bytes(new_inst, get(address, curr.data), 4);
 89     }
 90     inst.words.swap(new_inst.words);
 91     trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
 92   }
 93 }
 94 
 95 void replace_global_variables_in_data_segment(segment& data, const map<string, uint32_t>& address) {
 96   for (int i = 0;  i < SIZE(data.lines);  ++i) {
 97     line& l = data.lines.at(i);
 98     line new_l;
 99     for (int j = 0;  j < SIZE(l.words);  ++j) {
100       const word& curr = l.words.at(j);
101       if (!contains_key(address, curr.data)) {
102         if (!looks_like_hex_int(curr.data))
103           raise << "missing reference to global '" << curr.data << "'\n" << end();
104         new_l.words.push_back(curr);
105         continue;
106       }
107       trace(99, "transform") << curr.data << " maps to " << HEXWORD << get(address, curr.data) << end();
108       emit_hex_bytes(new_l, get(address, curr.data), 4);
109     }
110     l.words.swap(new_l.words);
111     trace(99, "transform") << "after transform: '" << data_to_string(l) << "'" << end();
112   }
113 }
114 
115 bool valid_use_of_global_variable(const word& curr) {
116   if (has_operand_metadata(curr, "imm32")) return true;
117   // End Valid Uses Of Global Variable(curr)
118   return false;
119 }
120 
121 //:: a more complex sanity check for how we use global variables
122 //: requires first saving some data early before we pack operands
123 
124 :(after "Begin Level-2 Transforms")
125 Transform.push_back(correlate_disp32_with_mod);
126 :(code)
127 void correlate_disp32_with_mod(program& p) {
128   if (p.segments.empty()) return;
129   segment& code = p.segments.at(0);
130   for (int i = 0;  i < SIZE(code.lines);  ++i) {
131     line& inst = code.lines.at(i);
132     for (int j = 0;  j < SIZE(inst.words);  ++j) {
133       word& curr = inst.words.at(j);
134       if (has_operand_metadata(curr, "disp32")
135           && has_operand_metadata(inst, "mod"))
136         curr.metadata.push_back("has_mod");
137     }
138   }
139 }
140 
141 :(before "End Valid Uses Of Global Variable(curr)")
142 if (has_operand_metadata(curr, "disp32"))
143   return has_metadata(curr, "has_mod");
144 // todo: more sophisticated check, to ensure we don't use global variable
145 // addresses as a real displacement added to other operands.
146 
147 :(code)
148 bool has_metadata(const word& w, const string& m) {
149   for (int i = 0;  i < SIZE(w.metadata);  ++i)
150     if (w.metadata.at(i) == m) return true;
151   return false;
152 }
153 
154 :(scenario global_variable_disallowed_in_jump)
155 % Hide_errors = true;
156 == code
157 eb/jump  x/disp8
158 == data
159 x:
160   00 00 00 00
161 +error: 'eb/jump x/disp8': can't refer to global variable 'x'
162 # sub-optimal error message; should be
163 #? +error: can't jump to data (variable 'x')
164 
165 :(scenario global_variable_disallowed_in_call)
166 % Hide_errors = true;
167 == code
168 e8/call  x/disp32
169 == data
170 x:
171   00 00 00 00
172 +error: 'e8/call x/disp32': can't refer to global variable 'x'
173 # sub-optimal error message; should be
174 #? +error: can't call to the data segment ('x')
175 
176 :(scenario global_variable_in_data_segment)
177 == 0x1
178 b9  x/imm32
179 == 0x0a000000
180 x:
181   y/imm32
182 y:
183   00 00 00 00
184 # check that we loaded 'x' with the address of 'y'
185 +load: 0x0a000000 -> 04
186 +load: 0x0a000001 -> 00
187 +load: 0x0a000002 -> 00
188 +load: 0x0a000003 -> 0a
189 $error: 0
190 
191 :(scenario disp32_data_with_modrm)
192 == code
193 8b/copy 0/mod/indirect 5/rm32/.disp32 2/r32/EDX x/disp32
194 == data
195 x:
196   00 00 00 00
197 $error: 0
198 
199 :(scenarios transform)
200 :(scenario disp32_data_with_call)
201 == code
202 foo:
203   e8/call bar/disp32
204 bar:
205 $error: 0
206 
207 :(code)
208 string to_full_string(const line& in) {
209   ostringstream out;
210   for (int i = 0;  i < SIZE(in.words);  ++i) {
211     if (i > 0) out << ' ';
212     out << in.words.at(i).data;
213     for (int j = 0;  j < SIZE(in.words.at(i).metadata);  ++j)
214       out << '/' << in.words.at(i).metadata.at(j);
215   }
216   return out.str();
217 }