linux/bootstrap/032operands.cc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
pre { line-height: 125%; }
td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
.highlight .hll { background-color: #ffffcc }
.highlight .c { color: #888888 } /* Comment */
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
.highlight .k { color: #008800; font-weight: bold } /* Keyword */
.highlight .ch { color: #888888 } /* Comment.Hashbang */
.highlight .cm { color: #888888 } /* Comment.Multiline */
.highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */
.highlight .cpf { color: #888888 } /* Comment.PreprocFile */
.highlight .c1 { color: #888888 } /* Comment.Single */
.highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */
.highlight .gr { color: #aa0000 } /* Generic.Error */
.highlight .gh { color: #333333 } /* Generic.Heading */
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
.highlight .go { color: #888888 } /* Generic.Output */
.highlight .gp { color: #555555 } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #666666 } /* Generic.Subheading */
.highlight .gt { color: #aa0000 } /* Generic.Traceback */
.highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */
.highlight .kp { color: #008800 } /* Keyword.Pseudo */
.highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */
.highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */
.highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */
.highlight .na { color: #336699 } /* Name.Attribute */
.highlight .nb { color: #003388 } /* Name.Builtin */
.highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */
.highlight .no { color: #003366; font-weight: bold } /* Name.Constant */
.highlight .nd { color: #555555 } /* Name.Decorator */
.highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */
.highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */
.highlight .nl { color: #336699; font-style: italic } /* Name.Label */
.highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */
.highlight .py { color: #336699; font-weight: bold } /* Name.Property */
.highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */
.highlight .nv { color: #336699 } /* Name.Variable */
.highlight .ow { color: #008800 } /* Operator.Word */
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */
.highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */
.highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */
.highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */
.highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */
.highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */
.highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */
.highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */
.highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */
.highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */
.highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */
.highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */
.highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */
.highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */
.highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */
.highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */
.highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */
.highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */
.highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */
.highlight .vc { color: #336699 } /* Name.Variable.Class */
.highlight .vg { color: #dd7700 } /* Name.Variable.Global */
.highlight .vi { color: #3333bb } /* Name.Variable.Instance */
.highlight .vm { color: #336699 } /* Name.Variable.Magic */
.highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long *///: Introduce a new transform to perform various checks in instructions before
//: we start running them. It'll be extensible, so that we can add checks for
//: new recipes as we extend 'run' to support them.
//:
//: Doing checking in a separate part complicates things, because the values
//: of variables in memory and the processor (current_recipe_name,
//: current_instruction) aren't available at checking time. If I had a more
//: sophisticated layer system I'd introduce the simpler version first and
//: transform it in a separate layer or set of layers.

:(before "End Checks")
Transform.push_back(check_instruction);  // idempotent

:(code)
void check_instruction(const recipe_ordinal r) {
  trace(101, "transform") << "--- perform checks for recipe " << get(Recipe, r).name << end();
  map<string, vector<type_ordinal> > metadata;
  for (int i = 0;  i < SIZE(get(Recipe, r).steps);  ++i) {
    instruction& inst = get(Recipe, r).steps.at(i);
    if (inst.is_label) continue;
    switch (inst.operation) {
      // Primitive Recipe Checks
      case COPY: {
        if (SIZE(inst.products) > SIZE(inst.ingredients)) {
          raise << maybe(get(Recipe, r).name) << "too many products in '" << to_original_string(inst) << "'\n" << end();
          break;
        }
        for (int i = 0;  i < SIZE(inst.products);  ++i) {
          if (!types_coercible(inst.products.at(i), inst.ingredients.at(i))) {
            raise << maybe(get(Recipe, r).name) << "can't copy '" << inst.ingredients.//: Metadata for fields of an x86 instruction.
//:
//: The x86 instruction set is variable-length, and how a byte is interpreted
//: affects later instruction boundaries. A lot of the pain in programming
//: machine code stems from computer and programmer going out of sync on what
//: a byte means. The miscommunication is usually not immediately caught, and
//: metastasizes at runtime into kilobytes of misinterpreted instructions.
//:
//: To mitigate these issues, we'll start programming in terms of logical
//: arguments rather than physical bytes. Some arguments are smaller than a
//: byte, and others may consist of multiple bytes. This layer will correctly
//: pack and order the bytes corresponding to the arguments in an instruction.

:(before "End Help Texts")
put_new(Help, "instructions",
  "Each x86 instruction consists of an instruction or opcode and some number\n"
  "of arguments.\n"
  "Each argument has a type. An instruction won't have more than one argument of\n"
  "any type.\n"
  "Each instruction has some set of allowed argument types. It'll reject others.\n"
  "The complete list of argument types: mod, subop, r32 (integer register),\n"
  "rm32 (integer register or memory), x32 (floating point register),\n"
  "xm32 (floating point register or memory), scale, index, base, disp8, disp16,\n"
  "disp32,imm8,imm32.\n"
  "Each of these has its own help page. Try reading 'bootstrap help mod' next.\n"
);
:(before "End Help Contents")
cerr << "  instructions\n";

:(before "Running Test Program")
transform(p);
if (trace_contains_errors()) return;

:(code)
void test_pack_immediate_constants() {
  run(
      "== code 0x1\n"
      "bb  0x2a/imm32\n"
  );
  CHECK_TRACE_CONTENTS(
      "transform: packing instruction 'bb 0x2a/imm32'\n"
      "transform: instruction after packing: 'bb 2a 00 00 00'\n"
      "run: copy imm32 0x0000002a to EBX\n"
  );
}

//: complete set of valid argument types

:(before "End Globals")
set<string> Instruction_arguments;
:(before "End One-time Setup")
Instruction_arguments.insert("subop");
Instruction_arguments.insert("mod");
Instruction_arguments.insert("rm32");
Instruction_arguments.insert("xm32");
Instruction_arguments.insert("base");
Instruction_arguments.insert("index");
Instruction_arguments.insert("scale");
Instruction_arguments.insert("r32");
Instruction_arguments.insert("x32");
Instruction_arguments.insert("disp8");
Instruction_arguments.insert("disp16");
Instruction_arguments.insert("disp32");
Instruction_arguments.insert("imm8");
Instruction_arguments.insert("imm32");

:(before "End Help Texts")
init_argument_type_help();
:(code)
void init_argument_type_help() {
  put(Help, "mod",
    "2-bit argument controlling the _addressing mode_ of many instructions,\n"
    "to determine how to compute the _effective address_ to look up memory at\n"
    "based on the 'rm32' argument and potentially others.\n"
    "\n"
    "If mod = 3, just operate on the contents of the register specified by rm32\n"
    "            (direct mode)\n"
    "If mod = 2, effective address is usually* rm32 + disp32\n"
    "            (indirect mode with displacement)\n"
    "If mod = 1, effective address is usually* rm32 + disp8\n"
    "            (indirect mode with displacement)\n"
    "If mod = 0, effective address is usually* rm32\n"
    "            (indirect mode)\n"
    "\n"
    "* - The exception is when rm32 is '4'. Register 4 is the stack pointer (ESP).\n"
    "    Using it as an address gets more involved. For more details,\n"
    "    try reading the help pages for 'base', 'index' and 'scale'.\n"
    "\n"
    "For complete details, spend some time with two tables in the IA-32 software\n"
    "developer's manual that are also included in this repo:\n"
    "  - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n"
    "  - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
  );
  put(Help, "subop",
    "Additional 3-bit argument for determining the instruction when the opcode\n"
    "is 81, 8f, d3, f7 or ff.\n"
    "Can't coexist with argument of type 'r32' in a single instruction, because\n"
    "the two use the same bits.\n"
  );
  put(Help, "r32",
    "3-bit argument specifying an integer register argument used directly,\n"
    "without any further addressing modes.\n"
  );
  put(Help, "x32",
    "3-bit argument specifying a floating-point register argument used directly,\n"
    "without any further addressing modes.\n"
  );
  put(Help, "rm32",
    "32-bit value in an integer register or memory. The precise details of its\n"
    "construction depend on the eponymous 3-bit 'rm32' argument, the 'mod' argument,\n"
    "and also potentially the 'SIB' arguments ('scale', 'index' and 'base')\n"
    "and a displacement ('disp8' or 'disp32').\n"
    "\n"
    "For complete details, spend some time with two tables in the IA-32 software\n"
    "developer's manual that are also included in this repo:\n"
    "  - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n"
    "  - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
  );
  put(Help, "xm32",
    "32-bit value in a floating-point register or memory. The precise details of its\n"
    "construction depend on the eponymous 3-bit 'xm32' argument, the 'mod' argument,\n"
    "and also potentially the 'SIB' arguments ('scale', 'index' and 'base')\n"
    "and a displacement ('disp8' or 'disp32').\n"
    "\n"
    "For complete details, spend some time with two tables in the IA-32 software\n"
    "developer's manual that are also included in this repo:\n"
    "  - modrm.pdf: volume 2, table 2-2, \"32-bit addressing with the ModR/M byte.\".\n"
    "  - sib.pdf: volume 2, table 2-3, \"32-bit addressing with the SIB byte.\".\n"
    "\n"
    "One subtlety here: while /xm32 refers to floating-point registers in direct mode\n"
    "(when /mod is 3), other addressing modes to construct memory addresses use integer registers\n"
    "(just like /rm32). Other than direct mode, its behavior is identical to /rm32.\n"
  );
  put(Help, "base",
    "Additional 3-bit argument (when 'rm32' is 4, unless 'mod' is 3) specifying the\n"
    "register containing an address to look up.\n"
    "This address may be further modified by 'index' and 'scale' arguments.\n"
    "  effective address = base + index*scale + displacement (disp8 or disp32)\n"
    "For complete details, spend some time with the IA-32 software developer's manual,\n"
    "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
    "It is included in this repository as 'sib.pdf'.\n"
  );
  put(Help, "index",
    "Optional 3-bit argument (when 'rm32' is 4 unless 'mod' is 3) that can be added to\n"
    "the 'base' argument to compute the 'effective address' at which to look up memory.\n"
    "  effective address = base + index*scale + displacement (disp8 or disp32)\n"
    "For complete details, spend some time with the IA-32 software developer's manual,\n"
    "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
    "It is included in this repository as 'sib.pdf'.\n"
  );
  put(Help, "scale",
    "Optional 2-bit argument (when 'rm32' is 4 unless 'mod' is 3) that encodes a\n"
    "power of 2 to be multiplied to the 'index' argument before adding the result to\n"
    "the 'base' argument to compute the _effective address_ to operate on.\n"
    "  effective address = base + index * scale + displacement (disp8 or disp32)\n"
    "\n"
    "When scale is 0, use index unmodified.\n"
    "When scale is 1, multiply index by 2.\n"
    "When scale is 2, multiply index by 4.\n"
    "When scale is 3, multiply index by 8.\n"
    "\n"
    "For complete details, spend some time with the IA-32 software developer's manual,\n"
    "volume 2, table 2-3, \"32-bit addressing with the SIB byte\".\n"
    "It is included in this repository as 'sib.pdf'.\n"
  );
  put(Help, "disp8",
    "8-bit value to be added in many instructions.\n"
  );
  put(Help, "disp16",
    "16-bit value to be added in many instructions.\n"
    "Currently not used in any SubX instructions.\n"
  );
  put(Help, "disp32",
    "32-bit value to be added in many instructions.\n"
  );
  put(Help, "imm8",
    "8-bit value for many instructions.\n"
  );
  put(Help, "imm32",
    "32-bit value for many instructions.\n"
  );
}

//:: transform packing arguments into bytes in the right order

:(after "Begin Transforms")
Transform.push_back(pack_arguments);

:(code)
void pack_arguments(program& p) {
  if (p.segments.empty()) return;
  segment& code = *find(p, "code");
  // Pack Operands(segment code)
  trace(3, "transform") << "-- pack arguments" << end();
  for (int i = 0;  i < SIZE(code.lines);  ++i) {
    line& inst = code.lines.at(i);
    if (all_hex_bytes(inst)) continue;
    trace(99, "transform") << "packing instruction '" << to_string(/*with metadata*/inst) << "'" << end();
    pack_arguments(inst);
    trace(99, "transform") << "instruction after packing: '" << to_string(/*without metadata*/inst.words) << "'" << end();
  }
}

void pack_arguments(line& inst) {
  line new_inst;
  add_opcodes(inst, new_inst);
  add_modrm_byte(inst, new_inst);
  add_sib_byte(inst, new_inst);
  add_disp_bytes(inst, new_inst);
  add_imm_bytes(inst, new_inst);
  inst.words.swap(new_inst.words);
}

void add_opcodes(const line& in, line& out) {
  out.words.push_back(in.words.at(0));
  if (in.words.at(0).data == "0f" || in.words.at(0).data == "f2" || in.words.at(0).data == "f3")
    out.words.push_back(in.words.at(1));
  if (in.words.at(0).data == "f3" && in.words.at(1).data == "0f")
    out.words.push_back(in.words.at(2));
  if (in.words.at(0).data == "f2" && in.words