about summary refs log tree commit diff stats
path: root/subx/036global_variables.cc
blob: 846cd29190cf683f4d8e1d2468d5767465342606 (plain) (blame)
1
2
3
pre { line-height: 125%; }
td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
.highlight .hll { background-color: #ffffcc }
.highlight .c { color: #888888 } /* Comment */
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
.highlight .k { color: #008800; font-weight: bold } /* Keyword */
.highlight .ch { color: #888888 } /* Comment.Hashbang */
.highlight .cm { color: #888888 } /* Comment.Multiline */
.highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */
.highlight .cpf { color: #888888 } /* Comment.PreprocFile */
.highlight .c1 { color: #888888 } /* Comment.Single */
.highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */
.highlight .gr { color: #aa0000 } /* Generic.Error */
.highlight .gh { color: #333333 } /* Generic.Heading */
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
.highlight .go { color: #888888 } /* Generic.Output */
.highlight .gp { color: #555555 } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #666666 } /* Generic.Subheading */
.highlight .gt { color: #aa0000 } /* Generic.Traceback */
.highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */
.highlight .kp { color: #008800 } /* Keyword.Pseudo */
.highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */
.highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */
.highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */
.highlight .na { color: #336699 } /* Name.Attribute */
.highlight .nb { color: #003388 } /* Name.Builtin */
.highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */
.highlight .no { color: #003366; font-weight: bold } /* Name.Constant */
.highlight .nd { color: #555555 } /* Name.Decorator */
.highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */
.highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */
.highlight .nl { color: #336699; font-style: italic } /* Name.Label */
.highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */
.highlight .py { color: #336699; font-weight: bold } /* Name.Property */
.highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */
.highlight .nv { color: #336699 } /* Name.Variable */
.highlight .ow { color: #008800 } /* Operator.Word */
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */
.highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */
.highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */
.highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */
.highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */
.highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */
.highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */
.highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */
.highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */
.highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */
.highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */
.highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */
.highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */
.highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */
.highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */
.highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */
.highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */
.highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */
.highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */
.highlight .vc { color: #336699 } /* Name.Variable.Class */
.highlight .vg { color: #dd7700 } /* Name.Variable.Global */
.highlight .vi { color: #3333bb } /* Name.Variable.Instance */
.highlight .vm { color: #336699 } /* Name.Variable.Magic */
.highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */
void returns_no_commands(void** state);
void returns_commands(void** state);
='n186' href='#n186'>186
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297
//: Global variables.
//:
//: Global variables are just labels in the data segment.
//: However, they can only be used in imm32 and not disp32 operands. And they
//: can't be used with jump and call instructions.
//:
//: This layer has much the same structure as rewriting labels.

:(code)
void test_global_variable() {
  run(
      "== code\n"
      "b9  x/imm32\n"
      "== data\n"
      "x:\n"
      "  00 00 00 00\n"
  );
  CHECK_TRACE_CONTENTS(
      "transform: global variable 'x' is at address 0x0a000079\n"
  );
}

:(before "End Level-2 Transforms")
Transform.push_back(rewrite_global_variables);
:(code)
void rewrite_global_variables(program& p) {
  trace(3, "transform") << "-- rewrite global variables" << end();
  // Begin rewrite_global_variables
  map<string, uint32_t> address;
  compute_addresses_for_global_variables(p, address);
  if (trace_contains_errors()) return;
  drop_global_variables(p);
  replace_global_variables_with_addresses(p, address);
}

void compute_addresses_for_global_variables(const program& p, map<string, uint32_t>& address) {
  for (int i = /*skip code segment*/1;  i < SIZE(p.segments);  ++i)
    compute_addresses_for_global_variables(p.segments.at(i), address);
}

void compute_addresses_for_global_variables(const segment& s, map<string, uint32_t>& address) {
  int current_address = s.start;
  for (int i = 0;  i < SIZE(s.lines);  ++i) {
    const line& inst = s.lines.at(i);
    for (int j = 0;  j < SIZE(inst.words);  ++j) {
      const word& curr = inst.words.at(j);
      if (*curr.data.rbegin() != ':') {
        current_address += size_of(curr);
      }
      else {
        string variable = drop_last(curr.data);
        // ensure variables look sufficiently different from raw hex
        check_valid_name(variable);
        if (trace_contains_errors()) return;
        if (j > 0)
          raise << "'" << to_string(inst) << "': global variable names can only be the first word in a line.\n" << end();
        if (Map_file.is_open())
          Map_file << "0x" << HEXWORD << current_address << ' ' << variable << '\n';
        if (contains_key(address, variable)) {
          raise << "duplicate global '" << variable << "'\n" << end();
          return;
        }
        put(address, variable, current_address);
        trace(99, "transform") << "global variable '" << variable << "' is at address 0x" << HEXWORD << current_address << end();
        // no modifying current_address; global variable definitions won't be in the final binary
      }
    }
  }
}

void drop_global_variables(program& p) {
  for (int i = /*skip code segment*/1;  i < SIZE(p.segments);  ++i)
    drop_labels(p.segments.at(i));
}

void replace_global_variables_with_addresses(program& p, const map<string, uint32_t>& address) {
  if (p.segments.empty()) return;
  replace_global_variables_in_code_segment(p.segments.at(0), address);
  for (int i = /*skip code*/1;  i < SIZE(p.segments);  ++i)
    replace_global_variables_in_data_segment(p.segments.at(i), address);
}

void replace_global_variables_in_code_segment(segment& code, const map<string, uint32_t>& address) {
  for (int i = 0;  i < SIZE(code.lines);  ++i) {
    line& inst = code.lines.at(i);
    line new_inst;
    for (int j = 0;  j < SIZE(inst.words);  ++j) {
      const word& curr = inst.words.at(j);
      if (!contains_key(address, curr.data)) {
        if (!looks_like_hex_int(curr.data))
          raise << "missing reference to global '" << curr.data << "'\n" << end();
        new_inst.words.push_back(curr);
        continue;
      }
      if (!valid_use_of_global_variable(curr)) {
        raise << "'" << to_string(inst) << "': can't refer to global variable '" << curr.data << "'\n" << end();
        return;
      }
      emit_hex_bytes(new_inst, get(address, curr.data), 4);
    }
    inst.words.swap(new_inst.words);
    trace(99, "transform") << "instruction after transform: '" << data_to_string(inst) << "'" << end();
  }
}

void replace_global_variables_in_data_segment(segment& data, const map<string, uint32_t>& address) {
  for (int i = 0;  i < SIZE(data.lines);  ++i) {
    line& l = data.lines.at(i);
    line new_l;
    for (int j = 0;  j < SIZE(l.words);  ++j) {
      const word& curr = l.words.at(j);
      if (!contains_key(address, curr.data)) {
        if (looks_like_hex_int(curr.data)) {
          if (has_operand_metadata(curr, "imm32"))
            emit_hex_bytes(new_l, curr, 4);
          else if (has_operand_metadata(curr, "imm16"))
            emit_hex_bytes(new_l, curr, 2);
          else if (has_operand_metadata(curr, "imm8"))
            emit_hex_bytes(new_l, curr, 1);
          else if (has_operand_metadata(curr, "disp8"))
            raise << "can't use /disp8 in a non-code segment\n" << end();
          else if (has_operand_metadata(curr, "disp16"))
            raise << "can't use /disp16 in a non-code segment\n" << end();
          else if (has_operand_metadata(curr, "disp32"))
            raise << "can't use /disp32 in a non-code segment\n" << end();
          else
            new_l.words.push_back(curr);
        }
        else {
          raise << "missing reference to global '" << curr.data << "'\n" << end();
          new_l.words.push_back(curr);
        }
        continue;
      }
      trace(99, "transform") << curr.data << " maps to " << HEXWORD << get(address, curr.data) << end();
      emit_hex_bytes(new_l, get(address, curr.data), 4);
    }
    l.words.swap(new_l.words);
    trace(99, "transform") << "after transform: '" << data_to_string(l) << "'" << end();
  }
}

bool valid_use_of_global_variable(const word& curr) {
  if (has_operand_metadata(curr, "imm32")) return true;
  // End Valid Uses Of Global Variable(curr)
  return false;
}

//:: a more complex sanity check for how we use global variables
//: requires first saving some data early before we pack operands

:(after "Begin Level-2 Transforms")
Transform.push_back(correlate_disp32_with_mod);
:(code)
void correlate_disp32_with_mod(program& p) {
  if (p.segments.empty()) return;
  segment& code = p.segments.at(0);
  for (int i = 0;  i < SIZE(code.lines);  ++i) {
    line& inst = code.lines.at(i);
    for (int j = 0;  j < SIZE(inst.words);  ++j) {
      word& curr = inst.words.at(j);
      if (has_operand_metadata(curr, "disp32")
          && has_operand_metadata(inst, "mod"))
        curr.metadata.push_back("has_mod");
    }
  }
}

:(before "End Valid Uses Of Global Variable(curr)")
if (has_operand_metadata(curr, "disp32"))
  return has_metadata(curr, "has_mod");
// todo: more sophisticated check, to ensure we don't use global variable
// addresses as a real displacement added to other operands.

:(code)
bool has_metadata(const word& w, const string& m) {
  for (int i = 0;  i < SIZE(w.metadata);  ++i)
    if (w.metadata.at(i) == m) return true;
  return false;
}

void test_global_variable_disallowed_in_jump() {
  Hide_errors = true;
  run(
      "== code\n"
      "eb/jump  x/disp8\n"
      "== data\n"
      "x:\n"
      "  00 00 00 00\n"
  );
  CHECK_TRACE_CONTENTS(
      "error: 'eb/jump x/disp8': can't refer to global variable 'x'\n"
      // sub-optimal error message; should be
//?       "error: can't jump to data (variable 'x')\n"
  );
}

void test_global_variable_disallowed_in_call() {
  Hide_errors = true;
  run(
      "== code\n"
      "e8/call  x/disp32\n"
      "== data\n"
      "x:\n"
      "  00 00 00 00\n"
  );
  CHECK_TRACE_CONTENTS(
      "error: 'e8/call x/disp32': can't refer to global variable 'x'\n"
      // sub-optimal error message; should be
//?       "error: can't call to the data segment ('x')\n"
  );
}

void test_global_variable_in_data_segment() {
  run(
      "== 0x1\n"
      "b9  x/imm32\n"
      "== 0x0a000000\n"
      "x:\n"
      "  y/imm32\n"
      "y:\n"
      "  00 00 00 00\n"
  );
  // check that we loaded 'x' with the address of 'y'
  CHECK_TRACE_CONTENTS(
      "load: 0x0a000000 -> 04\n"
      "load: 0x0a000001 -> 00\n"
      "load: 0x0a000002 -> 00\n"
      "load: 0x0a000003 -> 0a\n"
  );
  CHECK_TRACE_COUNT("error", 0);
}

void test_raw_number_with_imm32_in_data_segment() {
  run(
      "== 0x1\n"
      "b9  x/imm32\n"
      "== 0x0a000000\n"
      "x:\n"
      "  1/imm32\n"
  );
  // check that we loaded 'x' with the address of 1
  CHECK_TRACE_CONTENTS(
      "load: 0x0a000000 -> 01\n"
      "load: 0x0a000001 -> 00\n"
      "load: 0x0a000002 -> 00\n"
      "load: 0x0a000003 -> 00\n"
  );
  CHECK_TRACE_COUNT("error", 0);
}

void test_duplicate_global_variable() {
  Hide_errors = true;
  run(
      "== 0x1\n"
      "40/increment-EAX\n"
      "== 0x0a000000\n"
      "x:\n"
      "x:\n"
      "  00\n"
  );
  CHECK_TRACE_CONTENTS(
      "error: duplicate global 'x'\n"
  );
}

void test_global_variable_disp32_with_modrm() {
  run(
      "== code\n"
      "8b/copy 0/mod/indirect 5/rm32/.disp32 2/r32/EDX x/disp32\n"
      "== data\n"
      "x:\n"
      "  00 00 00 00\n"
  );
  CHECK_TRACE_COUNT("error", 0);
}

void test_global_variable_disp32_with_call() {
  transform(
      "== code\n"
      "foo:\n"
      "  e8/call bar/disp32\n"
      "bar:\n"
  );
  CHECK_TRACE_COUNT("error", 0);
}

string to_full_string(const line& in) {
  ostringstream out;
  for (int i = 0;  i < SIZE(in.words);  ++i) {
    if (i > 0) out << ' ';
    out << in.words.at(i).data;
    for (int j = 0;  j < SIZE(in.words.at(i).metadata);  ++j)
      out << '/' << in.words.at(i).metadata.at(j);
  }
  return out.str();
}