5670

author: Kartik Agaram <vc@akkartik.com> 2019-09-19 15:26:24 -0700
committer: Kartik Agaram <vc@akkartik.com> 2019-09-19 15:26:24 -0700
commit: dc8790941e39efb25c40de0420fdd4bce03f2761 (patch)
tree: e14e01797522c3027d5e343db50c4427950bb1e1 /030---operands.cc
parent: 68666b07b88fb4d72ff824550dd271b84520fdf5 (diff)
download: mu-dc8790941e39efb25c40de0420fdd4bce03f2761.tar.gz
1 files changed, 0 insertions, 539 deletions
diff --git a/030---operands.cc b/030---operands.cc
deleted file mode 100644
index 5203201e..00000000
--- a/030---operands.cc
+++ /dev/null
@@ -1,539 +0,0 @@
-//: Beginning of "level 2": tagging bytes with metadata around what field of
-//: an x86 instruction they're for.
-//:
-//: The x86 instruction set is variable-length, and how a byte is interpreted
-//: affects later instruction boundaries. A lot of the pain in programming
-//: machine code stems from computer and programmer going out of sync on what
-//: a byte means. The miscommunication is usually not immediately caught, and
-//: metastasizes at runtime into kilobytes of misinterpreted instructions.
-//:
-//: To mitigate these issues, we'll start programming in terms of logical
-//: operands rather than physical bytes. Some operands are smaller than a
-//: byte, and others may consist of multiple bytes. This layer will correctly
-//: pack and order the bytes corresponding to the operands in an instruction.
-
-:(before "End Help Texts")
-put_new(Help, "instructions",
-  "Each x86 instruction consists of an instruction or opcode and some number\n"
-  "of operands.\n"
-  "Each operand has a type. An instruction won't have more than one operand of\n"
-  "any type.\n"
-  "Each instruction has some set of allowed operand types. It'll reject others.\n"
-  "The complete list of operand types: mod, subop, r32 (register), rm32\n"
-  "(register or memory), scale, index, base, disp8, disp16, disp32, imm8,\n"
-  "imm32.\n"
-  "Each of these has its own help page. Try reading 'subx help mod' next.\n"
-);
-:(before "End Help Contents")
-cerr << "  instructions\n";
-
-:(code)
-void test_pack_immediate_constants() {
-  run(
-      "== code 0x1\n"
-      "bb  0x2a/imm32\n"
-  );
-  CHECK_TRACE_CONTENTS(
-      "transform: packing instruction 'bb 0x2a/imm32'\n"
-      "transform: instruction after packing: 'bb 2a 00 00 00'\n"
-      "run: copy imm32 0x0000002a to EBX\n"
-  );
-}
-
-//: complete set of valid operand types
-
-:(before "End Globals")
-set<string> Instruction_operands;
-:(before "End One-time Setup")
-Instruction_operands.insert("subop");
-Instruction_operands.insert("mod");
-Instruction_operands.insert("rm32");
-Instruction_operands.insert("base");
-Instruction_operands.insert("index");
-Instruction_operands.insert("scale");
-Instruction_operands.insert("r32");
-Instruction_operands.insert("disp8");
-Instruction_operands.insert("disp16");
-Instruction_operands.insert("disp32");
-Instruction_operands.insert("imm8");
-Instruction_operands.insert("imm32");
-
-:(before "End Help Texts")
-init_operand_type_help();
-:(code)
-void init_operand_type_help() {
-  put(Help, "mod",
-    "2-bit operand controlling the _addressing mode_ of many instructions,\n"
-    "to determine how to compute the _effective address_ to look up memory at\n"
-    "based on the 'rm32' operand and potentially others.\n"
-    "\n"
-    "If mod = 3, just operate on the contents of the register specified by rm32\n"
-    "            (direct mode).\n"
-    "If mod = 2, effective address is usually* rm32 + disp32\n"
-    "            (indirect mode with displacement).\n"
-    "If mod = 1, effective address is usually* rm32 + disp8\n"
-    "            (indirect mode with displacement).\n"
-    "If mod = 0, effective address is usually* rm32 (indirect mode).\n"
-    "(* - The exception is when rm32 is '4'. Register 4 is the stack pointer (ESP).\n"
-    "     Using it as an address gets more involved. For more details,\n"
-    "     try reading the help pages for 'base', 'index' and 'scale'.)\n"
-    "\n"
-    "For complete details, spend some time with two tables in the IA-32 software\n"
-    "developer's manual that are also included in this repo:\n"
-    "  - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n"
-    "  - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
-  );
-  put(Help, "subop",
-    "Additional 3-bit operand for determining the instruction when the opcode\n"
-    "is 81, 8f, d3, f7 or ff.\n"
-    "Can't coexist with operand of type 'r32' in a single instruction, because\n"
-    "the two use the same bits.\n"
-  );
-  put(Help, "r32",
-    "3-bit operand specifying a register operand used directly, without any further addressing modes.\n"
-  );
-  put(Help, "rm32",
-    "32-bit value in register or memory. The precise details of its construction\n"
-    "depend on the eponymous 3-bit 'rm32' operand, the 'mod' operand, and also\n"
-    "potentially the 'SIB' operands ('scale', 'index' and 'base') and a displacement\n"
-    "('disp8' or 'disp32').\n"
-    "\n"
-    "For complete details, spend some time with two tables in the IA-32 software\n"
-    "developer's manual that are also included in this repo:\n"
-    "  - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n"
-    "  - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
-  );
-  put(Help, "base",
-    "Additional 3-bit operand (when 'rm32' is 4, unless 'mod' is 3) specifying the\n"
-    "register containing an address to look up.\n"
-    "This address may be further modified by 'index' and 'scale' operands.\n"
-    "  effective address = base + index*scale + displacement (disp8 or disp32)\n"
-    "For complete details, spend some time with the IA-32 software developer's manual,\n"
-    "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
-    "It is included in this repository as 'sib.pdf'.\n"
-  );
-  put(Help, "index",
-    "Optional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be added to\n"
-    "the 'base' operand to compute the 'effective address' at which to look up memory.\n"
-    "  effective address = base + index*scale + displacement (disp8 or disp32)\n"
-    "For complete details, spend some time with the IA-32 software developer's manual,\n"
-    "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
-    "It is included in this repository as 'sib.pdf'.\n"
-  );
-  put(Help, "scale",
-    "Optional 2-bit operand (when 'rm32' is 4 unless 'mod' is 3) that encodes a\n"
-    "power of 2 to be multiplied to the 'index' operand before adding the result to\n"
-    "the 'base' operand to compute the _effective address_ to operate on.\n"
-    "  effective address = base + index * scale + displacement (disp8 or disp32)\n"
-    "\n"
-    "When scale is 0, use index unmodified.\n"
-    "When scale is 1, multiply index by 2.\n"
-    "When scale is 2, multiply index by 4.\n"
-    "When scale is 3, multiply index by 8.\n"
-    "\n"
-    "For complete details, spend some time with the IA-32 software developer's manual,\n"
-    "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
-    "It is included in this repository as 'sib.pdf'.\n"
-  );
-  put(Help, "disp8",
-    "8-bit value to be added in many instructions.\n"
-  );
-  put(Help, "disp16",
-    "16-bit value to be added in many instructions.\n"
-    "Currently not used in any SubX instructions.\n"
-  );
-  put(Help, "disp32",
-    "32-bit value to be added in many instructions.\n"
-  );
-  put(Help, "imm8",
-    "8-bit value for many instructions.\n"
-  );
-  put(Help, "imm32",
-    "32-bit value for many instructions.\n"
-  );
-}
-
-//:: transform packing operands into bytes in the right order
-
-:(after "Begin Transforms")
-// Begin Level-2 Transforms
-Transform.push_back(pack_operands);
-// End Level-2 Transforms
-
-:(code)
-void pack_operands(program& p) {
-  if (p.segments.empty()) return;
-  segment& code = *find(p, "code");
-  // Pack Operands(segment code)
-  trace(3, "transform") << "-- pack operands" << end();
-  for (int i = 0;  i < SIZE(code.lines);  ++i) {
-    line& inst = code.lines.at(i);
-    if (all_hex_bytes(inst)) continue;
-    trace(99, "transform") << "packing instruction '" << to_string(/*with metadata*/inst) << "'" << end();
-    pack_operands(inst);
-    trace(99, "transform") << "instruction after packing: '" << to_string(/*without metadata*/inst.words) << "'" << end();
-  }
-}
-
-void pack_operands(line& inst) {
-  line new_inst;
-  add_opcodes(inst, new_inst);
-  add_modrm_byte(inst, new_inst);
-  add_sib_byte(inst, new_inst);
-  add_disp_bytes(inst, new_inst);
-  add_imm_bytes(inst, new_inst);
-  inst.words.swap(new_inst.words);
-}
-
-void add_opcodes(const line& in, line& out) {
-  out.words.push_back(in.words.at(0));
-  if (in.words.at(0).data == "0f" || in.words.at(0).data == "f2" || in.words.at(0).data == "f3")
-    out.words.push_back(in.words.at(1));
-  if (in.words.at(0).data == "f3" && in.words.at(1).data == "0f")
-    out.words.push_back(in.words.at(2));
-  if (in.words.at(0).data == "f2" && in.words.at(1).data == "0f")
-    out.words.push_back(in.words.at(2));
-}
-
-void add_modrm_byte(const line& in, line& out) {
-  uint8_t mod=0, reg_subop=0, rm32=0;
-  bool emit = false;
-  for (int i = 0;  i < SIZE(in.words);  ++i) {
-    const word& curr = in.words.at(i);
-    if (has_operand_metadata(curr, "mod")) {
-      mod = hex_byte(curr.data);
-      emit = true;
-    }
-    else if (has_operand_metadata(curr, "rm32")) {
-      rm32 = hex_byte(curr.data);
-      emit = true;
-    }
-    else if (has_operand_metadata(curr, "r32")) {
-      reg_subop = hex_byte(curr.data);
-      emit = true;
-    }
-    else if (has_operand_metadata(curr, "subop")) {
-      reg_subop = hex_byte(curr.data);
-      emit = true;
-    }
-  }
-  if (emit)
-    out.words.push_back(hex_byte_text((mod << 6) | (reg_subop << 3) | rm32));
-}
-
-void add_sib_byte(const line& in, line& out) {
-  uint8_t scale=0, index=0, base=0;
-  bool emit = false;
-  for (int i = 0;  i < SIZE(in.words);  ++i) {
-    const word& curr = in.words.at(i);
-    if (has_operand_metadata(curr, "scale")) {
-      scale = hex_byte(curr.data);
-      emit = true;
-    }
-    else if (has_operand_metadata(curr, "index")) {
-      index = hex_byte(curr.data);
-      emit = true;
-    }
-    else if (has_operand_metadata(curr, "base")) {
-      base = hex_byte(curr.data);
-      emit = true;
-    }
-  }
-  if (emit)
-    out.words.push_back(hex_byte_text((scale << 6) | (index << 3) | base));
-}
-
-void add_disp_bytes(const line& in, line& out) {
-  for (int i = 0;  i < SIZE(in.words);  ++i) {
-    const word& curr = in.words.at(i);
-    if (has_operand_metadata(curr, "disp8"))
-      emit_hex_bytes(out, curr, 1);
-    if (has_operand_metadata(curr, "disp16"))
-      emit_hex_bytes(out, curr, 2);
-    else if (has_operand_metadata(curr, "disp32"))
-      emit_hex_bytes(out, curr, 4);
-  }
-}
-
-void add_imm_bytes(const line& in, line& out) {
-  for (int i = 0;  i < SIZE(in.words);  ++i) {
-    const word& curr = in.words.at(i);
-    if (has_operand_metadata(curr, "imm8"))
-      emit_hex_bytes(out, curr, 1);
-    else if (has_operand_metadata(curr, "imm32"))
-      emit_hex_bytes(out, curr, 4);
-  }
-}
-
-void emit_hex_bytes(line& out, const word& w, int num) {
-  assert(num <= 4);
-  bool is_number = looks_like_hex_int(w.data);
-  if (num == 1 || !is_number) {
-    out.words.push_back(w);  // preserve existing metadata
-    if (is_number)
-      out.words.back().data = hex_byte_to_string(parse_int(w.data));
-    return;
-  }
-  emit_hex_bytes(out, static_cast<uint32_t>(parse_int(w.data)), num);
-}
-
-void emit_hex_bytes(line& out, uint32_t val, int num) {
-  assert(num <= 4);
-  for (int i = 0;  i < num;  ++i) {
-    out.words.push_back(hex_byte_text(val & 0xff));
-    val = val >> 8;
-  }
-}
-
-word hex_byte_text(uint8_t val) {
-  word result;
-  result.data = hex_byte_to_string(val);
-  result.original = result.data+"/auto";
-  return result;
-}
-
-string hex_byte_to_string(uint8_t val) {
-  ostringstream out;
-  // uint8_t prints without padding, but int8_t will expand to 32 bits again
-  out << HEXBYTE << NUM(val);
-  return out.str();
-}
-
-string to_string(const vector<word>& in) {
-  ostringstream out;
-  for (int i = 0;  i < SIZE(in);  ++i) {
-    if (i > 0) out << ' ';
-    out << in.at(i).data;
-  }
-  return out.str();
-}
-
-:(before "End Unit Tests")
-void test_preserve_metadata_when_emitting_single_byte() {
-  word in;
-  in.data = "f0";
-  in.original = "f0/foo";
-  line out;
-  emit_hex_bytes(out, in, 1);
-  CHECK_EQ(out.words.at(0).data, "f0");
-  CHECK_EQ(out.words.at(0).original, "f0/foo");
-}
-
-:(code)
-void test_pack_disp8() {
-  run(
-      "== code 0x1\n"
-      "74 2/disp8\n"  // jump 2 bytes away if ZF is set
-  );
-  CHECK_TRACE_CONTENTS(
-      "transform: packing instruction '74 2/disp8'\n"
-      "transform: instruction after packing: '74 02'\n"
-  );
-}
-
-void test_pack_disp8_negative() {
-  transform(
-      "== code 0x1\n"
-      // running this will cause an infinite loop
-      "74 -1/disp8\n"  // jump 1 byte before if ZF is set
-  );
-  CHECK_TRACE_CONTENTS(
-      "transform: packing instruction '74 -1/disp8'\n"
-      "transform: instruction after packing: '74 ff'\n"
-  );
-}
-
-//: helper for scenario
-void transform(const string& text_bytes) {
-  program p;
-  istringstream in(text_bytes);
-  parse(in, p);
-  if (trace_contains_errors()) return;
-  transform(p);
-}
-
-void test_pack_modrm_imm32() {
-  run(
-      "== code 0x1\n"
-      // instruction                     effective address                                                   operand     displacement    immediate\n"
-      // op          subop               mod             rm32          base        index         scale       r32\n"
-      // 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes\n"
-      "  81          0/add/subop         3/mod/direct    3/ebx/rm32                                                                      1/imm32      \n"  // add 1 to EBX
-  );
-  CHECK_TRACE_CONTENTS(
-      "transform: packing instruction '81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32'\n"
-      "transform: instruction after packing: '81 c3 01 00 00 00'\n"
-  );
-}
-
-void test_pack_imm32_large() {
-  run(
-      "== code 0x1\n"
-      "b9  0x080490a7/imm32\n"
-  );
-  CHECK_TRACE_CONTENTS(
-      "transform: packing instruction 'b9 0x080490a7/imm32'\n"
-      "transform: instruction after packing: 'b9 a7 90 04 08'\n"
-  );
-}
-
-void test_pack_immediate_constants_hex() {
-  run(
-      "== code 0x1\n"
-      "b9  0x2a/imm32\n"
-  );
-  CHECK_TRACE_CONTENTS(
-      "transform: packing instruction 'b9 0x2a/imm32'\n"
-      "transform: instruction after packing: 'b9 2a 00 00 00'\n"
-      "run: copy imm32 0x0000002a to ECX\n"
-  );
-}
-
-void test_pack_silently_ignores_non_hex() {
-  Hide_errors = true;
-  transform(
-      "== code 0x1\n"
-      "b9  foo/imm32\n"
-  );
-  CHECK_TRACE_CONTENTS(
-      "transform: packing instruction 'b9 foo/imm32'\n"
-      // no change (we're just not printing metadata to the trace)
-      "transform: instruction after packing: 'b9 foo'\n"
-  );
-}
-
-void test_pack_flags_bad_hex() {
-  Hide_errors = true;
-  run(
-      "== code 0x1\n"
-      "b9  0xfoo/imm32\n"
-  );
-  CHECK_TRACE_CONTENTS(
-      "error: not a number: 0xfoo\n"
-  );
-}
-
-void test_pack_flags_uppercase_hex() {
-  Hide_errors = true;
-  run(
-      "== code 0x1\n"
-      "b9 0xAb/imm32\n"
-  );
-  CHECK_TRACE_CONTENTS(
-      "error: uppercase hex not allowed: 0xAb\n"
-  );
-}
-
-//:: helpers
-
-bool all_hex_bytes(const line& inst) {
-  for (int i = 0;  i < SIZE(inst.words);  ++i)
-    if (!is_hex_byte(inst.words.at(i)))
-      return false;
-  return true;
-}
-
-bool is_hex_byte(const word& curr) {
-  if (contains_any_operand_metadata(curr))
-    return false;
-  if (SIZE(curr.data) != 2)
-    return false;
-  if (curr.data.find_first_not_of("0123456789abcdef") != string::npos)
-    return false;
-  return true;
-}
-
-bool contains_any_operand_metadata(const word& word) {
-  for (int i = 0;  i < SIZE(word.metadata);  ++i)
-    if (Instruction_operands.find(word.metadata.at(i)) != Instruction_operands.end())
-      return true;
-  return false;
-}
-
-bool has_operand_metadata(const line& inst, const string& m) {
-  bool result = false;
-  for (int i = 0;  i < SIZE(inst.words);  ++i) {
-    if (!has_operand_metadata(inst.words.at(i), m)) continue;
-    if (result) {
-      raise << "'" << to_string(inst) << "' has conflicting " << m << " operands\n" << end();
-      return false;
-    }
-    result = true;
-  }
-  return result;
-}
-
-bool has_operand_metadata(const word& w, const string& m) {
-  bool result = false;
-  bool metadata_found = false;
-  for (int i = 0;  i < SIZE(w.metadata);  ++i) {
-    const string& curr = w.metadata.at(i);
-    if (Instruction_operands.find(curr) == Instruction_operands.end()) continue;  // ignore unrecognized metadata
-    if (metadata_found) {
-      raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
-      return false;
-    }
-    metadata_found = true;
-    result = (curr == m);
-  }
-  return result;
-}
-
-word metadata(const line& inst, const string& m) {
-  for (int i = 0;  i < SIZE(inst.words);  ++i)
-    if (has_operand_metadata(inst.words.at(i), m))
-      return inst.words.at(i);
-  assert(false);
-}
-
-bool looks_like_hex_int(const string& s) {
-  if (s.empty()) return false;
-  if (s.at(0) == '-' || s.at(0) == '+') return true;
-  if (isdigit(s.at(0))) return true;  // includes '0x' prefix
-  // End looks_like_hex_int(s) Detectors
-  return false;
-}
-
-string to_string(const line& inst) {
-  ostringstream out;
-  for (int i = 0;  i < SIZE(inst.words);  ++i) {
-    if (i > 0) out << ' ';
-    out << inst.words.at(i).original;
-  }
-  return out.str();
-}
-
-int32_t parse_int(const string& s) {
-  if (s.empty()) return 0;
-  if (contains_uppercase(s)) {
-    raise << "uppercase hex not allowed: " << s << '\n' << end();
-    return 0;
-  }
-  istringstream in(s);
-  in >> std::hex;
-  if (s.at(0) == '-') {
-    int32_t result = 0;
-    in >> result;
-    if (!in || !in.eof()) {
-      raise << "not a number: " << s << '\n' << end();
-      return 0;
-    }
-    return result;
-  }
-  uint32_t uresult = 0;
-  in >> uresult;
-  if (!in || !in.eof()) {
-    raise << "not a number: " << s << '\n' << end();
-    return 0;
-  }
-  return static_cast<int32_t>(uresult);
-}
-:(before "End Unit Tests")
-void test_parse_int() {
-  CHECK_EQ(0, parse_int("0"));
-  CHECK_EQ(0, parse_int("0x0"));
-  CHECK_EQ(0, parse_int("0x0"));
-  CHECK_EQ(16, parse_int("10"));  // hex always
-  CHECK_EQ(-1, parse_int("-1"));
-  CHECK_EQ(-1, parse_int("0xffffffff"));
-}
author	Kartik Agaram <vc@akkartik.com>	2019-09-19 15:26:24 -0700
committer	Kartik Agaram <vc@akkartik.com>	2019-09-19 15:26:24 -0700
commit	dc8790941e39efb25c40de0420fdd4bce03f2761 (patch)
tree	e14e01797522c3027d5e343db50c4427950bb1e1 /030---operands.cc
parent	68666b07b88fb4d72ff824550dd271b84520fdf5 (diff)
download	mu-dc8790941e39efb25c40de0420fdd4bce03f2761.tar.gz