From 3350c34a74844e21ea69077e01efff3bae64bdcd Mon Sep 17 00:00:00 2001 From: Kartik Agaram Date: Tue, 23 Mar 2021 17:31:08 -0700 Subject: . --- html/linux/bootstrap/037global_variables.cc.html | 368 +++++++++++++++++++++++ 1 file changed, 368 insertions(+) create mode 100644 html/linux/bootstrap/037global_variables.cc.html (limited to 'html/linux/bootstrap/037global_variables.cc.html') diff --git a/html/linux/bootstrap/037global_variables.cc.html b/html/linux/bootstrap/037global_variables.cc.html new file mode 100644 index 00000000..ea37783f --- /dev/null +++ b/html/linux/bootstrap/037global_variables.cc.html @@ -0,0 +1,368 @@ + + + + +Mu - linux/bootstrap/037global_variables.cc + + + + + + + + + + +https://github.com/akkartik/mu/blob/main/linux/bootstrap/037global_variables.cc +
+  1 //: Global variables.
+  2 //:
+  3 //: Global variables are just labels in the data segment.
+  4 //: However, they can only be used in imm32 and not disp32 arguments. And they
+  5 //: can't be used with jump and call instructions.
+  6 //:
+  7 //: This layer has much the same structure as rewriting labels.
+  8 
+  9 :(code)
+ 10 void test_global_variable() {
+ 11   run(
+ 12       "== code 0x1\n"
+ 13       "b9  x/imm32\n"
+ 14       "== data 0x2000\n"
+ 15       "x:\n"
+ 16       "  00 00 00 00\n"
+ 17   );
+ 18   CHECK_TRACE_CONTENTS(
+ 19       "transform: global variable 'x' is at address 0x00002000\n"
+ 20   );
+ 21 }
+ 22 
+ 23 :(before "End Transforms")
+ 24 Transform.push_back(rewrite_global_variables);
+ 25 :(code)
+ 26 void rewrite_global_variables(program& p) {
+ 27   trace(3, "transform") << "-- rewrite global variables" << end();
+ 28   // Begin rewrite_global_variables
+ 29   map<string, uint32_t> address;
+ 30   compute_addresses_for_global_variables(p, address);
+ 31   if (trace_contains_errors()) return;
+ 32   drop_global_variables(p);
+ 33   replace_global_variables_with_addresses(p, address);
+ 34 }
+ 35 
+ 36 void compute_addresses_for_global_variables(const program& p, map<string, uint32_t>& address) {
+ 37   for (int i = 0;  i < SIZE(p.segments);  ++i) {
+ 38     if (p.segments.at(i).name != "code")
+ 39       compute_addresses_for_global_variables(p.segments.at(i), address);
+ 40   }
+ 41 }
+ 42 
+ 43 void compute_addresses_for_global_variables(const segment& s, map<string, uint32_t>& address) {
+ 44   int current_address = s.start;
+ 45   for (int i = 0;  i < SIZE(s.lines);  ++i) {
+ 46     const line& inst = s.lines.at(i);
+ 47     for (int j = 0;  j < SIZE(inst.words);  ++j) {
+ 48       const word& curr = inst.words.at(j);
+ 49       if (*curr.data.rbegin() != ':') {
+ 50         current_address += size_of(curr);
+ 51       }
+ 52       else {
+ 53         string variable = drop_last(curr.data);
+ 54         // ensure variables look sufficiently different from raw hex
+ 55         check_valid_name(variable);
+ 56         if (trace_contains_errors()) return;
+ 57         if (j > 0)
+ 58           raise << "'" << to_string(inst) << "': global variable names can only be the first word in a line.\n" << end();
+ 59         if (Labels_file.is_open())
+ 60           Labels_file << "0x" << HEXWORD << current_address << ' ' << variable << '\n';
+ 61         if (contains_key(address, variable)) {
+ 62           raise << "duplicate global '" << variable << "'\n" << end();
+ 63           return;
+ 64         }
+ 65         put(address, variable, current_address);
+ 66         trace(99, "transform") << "global variable '" << variable << "' is at address 0x" << HEXWORD << current_address << end();
+ 67         // no modifying current_address; global variable definitions won't be in the final binary
+ 68       }
+ 69     }
+ 70   }
+ 71 }
+ 72 
+ 73 void drop_global_variables(program& p) {
+ 74   for (int i = 0;  i < SIZE(p.segments);  ++i) {
+ 75     if (p.segments.at(i).name != "code")
+ 76       drop_labels(p.segments.at(i));
+ 77   }
+ 78 }
+ 79 
+ 80 void replace_global_variables_with_addresses(program& p, const map<string, uint32_t>& address) {
+ 81   if (p.segments.empty()) return;
+ 82   for (int i = 0;  i < SIZE(p.segments);  ++i) {
+ 83     segment& curr = p.segments.at(i);
+ 84     if (curr.name == "code")
+ 85       replace_global_variables_in_code_segment(curr, address);
+ 86     else
+ 87       replace_global_variables_in_data_segment(curr, address);
+ 88   }
+ 89 }
+ 90 
+ 91 void replace_global_variables_in_code_segment(segment& code, const map<string, uint32_t>& address) {
+ 92   for (int i = 0;  i < SIZE(code.lines);  ++i) {
+ 93     line& inst = code.lines.at(i);
+ 94     line new_inst;
+ 95     for (int j = 0;  j < SIZE(inst.words);  ++j) {
+ 96       const word& curr = inst.words.at(j);
+ 97       if (!contains_key(address, curr.data)) {
+ 98         if (!looks_like_hex_int(curr.data))
+ 99           raise << "missing reference to global '" << curr.data << "'\n" << end();
+100         new_inst.words.push_back(curr);
+101         continue;
+102       }
+103       if (!valid_use_of_global_variable(curr)) {
+104         raise << "'" << to_string(inst) << "': can't refer to global variable '" << curr.data << "'\n" << end();
+105         return;
+106       }
+107       emit_hex_bytes(new_inst, get(address, curr.data), 4);
+108     }
+109     inst.words.swap(new_inst.words);
+110     trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
+111   }
+112 }
+113 
+114 void replace_global_variables_in_data_segment(segment& data, const map<string, uint32_t>& address) {
+115   for (int i = 0;  i < SIZE(data.lines);  ++i) {
+116     line& l = data.lines.at(i);
+117     line new_l;
+118     for (int j = 0;  j < SIZE(l.words);  ++j) {
+119       const word& curr = l.words.at(j);
+120       if (!contains_key(address, curr.data)) {
+121         if (looks_like_hex_int(curr.data)) {
+122           if (has_argument_metadata(curr, "imm32"))
+123             emit_hex_bytes(new_l, curr, 4);
+124           else if (has_argument_metadata(curr, "imm16"))
+125             emit_hex_bytes(new_l, curr, 2);
+126           else if (has_argument_metadata(curr, "imm8"))
+127             emit_hex_bytes(new_l, curr, 1);
+128           else if (has_argument_metadata(curr, "disp8"))
+129             raise << "can't use /disp8 in a non-code segment\n" << end();
+130           else if (has_argument_metadata(curr, "disp16"))
+131             raise << "can't use /disp16 in a non-code segment\n" << end();
+132           else if (has_argument_metadata(curr, "disp32"))
+133             raise << "can't use /disp32 in a non-code segment\n" << end();
+134           else
+135             new_l.words.push_back(curr);
+136         }
+137         else {
+138           raise << "missing reference to global '" << curr.data << "'\n" << end();
+139           new_l.words.push_back(curr);
+140         }
+141         continue;
+142       }
+143       trace(99, "transform") << curr.data << " maps to " << HEXWORD << get(address, curr.data) << end();
+144       emit_hex_bytes(new_l, get(address, curr.data), 4);
+145     }
+146     l.words.swap(new_l.words);
+147     trace(99, "transform") << "after transform: '" << data_to_string(l) << "'" << end();
+148   }
+149 }
+150 
+151 bool valid_use_of_global_variable(const word& curr) {
+152   if (has_argument_metadata(curr, "imm32")) return true;
+153   // End Valid Uses Of Global Variable(curr)
+154   return false;
+155 }
+156 
+157 //:: a more complex sanity check for how we use global variables
+158 //: requires first saving some data early before we pack arguments
+159 
+160 :(after "Begin Transforms")
+161 Transform.push_back(correlate_disp32_with_mod);
+162 :(code)
+163 void correlate_disp32_with_mod(program& p) {
+164   if (p.segments.empty()) return;
+165   segment& code = *find(p, "code");
+166   for (int i = 0;  i < SIZE(code.lines);  ++i) {
+167     line& inst = code.lines.at(i);
+168     for (int j = 0;  j < SIZE(inst.words);  ++j) {
+169       word& curr = inst.words.at(j);
+170       if (has_argument_metadata(curr, "disp32")
+171           && has_argument_metadata(inst, "mod"))
+172         curr.metadata.push_back("has_mod");
+173     }
+174   }
+175 }
+176 
+177 :(before "End Valid Uses Of Global Variable(curr)")
+178 if (has_argument_metadata(curr, "disp32"))
+179   return has_metadata(curr, "has_mod");
+180 // todo: more sophisticated check, to ensure we don't use global variable
+181 // addresses as a real displacement added to other arguments.
+182 
+183 :(code)
+184 bool has_metadata(const word& w, const string& m) {
+185   for (int i = 0;  i < SIZE(w.metadata);  ++i)
+186     if (w.metadata.at(i) == m) return true;
+187   return false;
+188 }
+189 
+190 void test_global_variable_disallowed_in_jump() {
+191   Hide_errors = true;
+192   run(
+193       "== code 0x1\n"
+194       "eb/jump  x/disp8\n"
+195       "== data 0x2000\n"
+196       "x:\n"
+197       "  00 00 00 00\n"
+198   );
+199   CHECK_TRACE_CONTENTS(
+200       "error: 'eb/jump x/disp8': can't refer to global variable 'x'\n"
+201       // sub-optimal error message; should be
+202 //?       "error: can't jump to data (variable 'x')\n"
+203   );
+204 }
+205 
+206 void test_global_variable_disallowed_in_call() {
+207   Hide_errors = true;
+208   run(
+209       "== code 0x1\n"
+210       "e8/call  x/disp32\n"
+211       "== data 0x2000\n"
+212       "x:\n"
+213       "  00 00 00 00\n"
+214   );
+215   CHECK_TRACE_CONTENTS(
+216       "error: 'e8/call x/disp32': can't refer to global variable 'x'\n"
+217       // sub-optimal error message; should be
+218 //?       "error: can't call to the data segment ('x')\n"
+219   );
+220 }
+221 
+222 void test_global_variable_in_data_segment() {
+223   run(
+224       "== code 0x1\n"
+225       "b9  x/imm32\n"
+226       "== data 0x2000\n"
+227       "x:\n"
+228       "  y/imm32\n"
+229       "y:\n"
+230       "  00 00 00 00\n"
+231   );
+232   // check that we loaded 'x' with the address of 'y'
+233   CHECK_TRACE_CONTENTS(
+234       "load: 0x00002000 -> 04\n"
+235       "load: 0x00002001 -> 20\n"
+236       "load: 0x00002002 -> 00\n"
+237       "load: 0x00002003 -> 00\n"
+238   );
+239   CHECK_TRACE_COUNT("error", 0);
+240 }
+241 
+242 void test_raw_number_with_imm32_in_data_segment() {
+243   run(
+244       "== code 0x1\n"
+245       "b9  x/imm32\n"
+246       "== data 0x2000\n"
+247       "x:\n"
+248       "  1/imm32\n"
+249   );
+250   // check that we loaded 'x' with the address of 1
+251   CHECK_TRACE_CONTENTS(
+252       "load: 0x00002000 -> 01\n"
+253       "load: 0x00002001 -> 00\n"
+254       "load: 0x00002002 -> 00\n"
+255       "load: 0x00002003 -> 00\n"
+256   );
+257   CHECK_TRACE_COUNT("error", 0);
+258 }
+259 
+260 void test_duplicate_global_variable() {
+261   Hide_errors = true;
+262   run(
+263       "== code 0x1\n"
+264       "40/increment-EAX\n"
+265       "== data 0x2000\n"
+266       "x:\n"
+267       "x:\n"
+268       "  00\n"
+269   );
+270   CHECK_TRACE_CONTENTS(
+271       "error: duplicate global 'x'\n"
+272   );
+273 }
+274 
+275 void test_global_variable_disp32_with_modrm() {
+276   run(
+277       "== code 0x1\n"
+278       "8b/copy 0/mod/indirect 5/rm32/.disp32 2/r32/EDX x/disp32\n"
+279       "== data 0x2000\n"
+280       "x:\n"
+281       "  00 00 00 00\n"
+282   );
+283   CHECK_TRACE_COUNT("error", 0);
+284 }
+285 
+286 void test_global_variable_disp32_with_call() {
+287   transform(
+288       "== code 0x1\n"
+289       "foo:\n"
+290       "  e8/call bar/disp32\n"
+291       "bar:\n"
+292   );
+293   CHECK_TRACE_COUNT("error", 0);
+294 }
+295 
+296 string to_full_string(const line& in) {
+297   ostringstream out;
+298   for (int i = 0;  i < SIZE(in.words);  ++i) {
+299     if (i > 0) out << ' ';
+300     out << in.words.at(i).data;
+301     for (int j = 0;  j < SIZE(in.words.at(i).metadata);  ++j)
+302       out << '/' << in.words.at(i).metadata.at(j);
+303   }
+304   return out.str();
+305 }
+
+ + + -- cgit 1.4.1-2-gfad0