1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 :(before "End Help Texts")
16 put(Help, "instructions",
17 "Each x86 instruction consists of an instruction or opcode and some number\n"
18 "of operands.\n"
19 "Each operand has a type. An instruction won't have more than one operand of\n"
20 "any type.\n"
21 "Each instruction has some set of allowed operand types. It'll reject others.\n"
22 "The complete list of operand types: mod, subop, r32 (register), rm32\n"
23 "(register or memory), scale, index, base, disp8, disp16, disp32, imm8,\n"
24 "imm32.\n"
25 "Each of these has its own help page. Try reading 'subx help mod' next.\n"
26 );
27 :(before "End Help Contents")
28 cerr << " instructions\n";
29
30 :(scenario pack_immediate_constants)
31 == 0x1
32
33
34
35 bb 0x2a/imm32
36 +transform: packing instruction 'bb 0x2a/imm32'
37 +transform: instruction after packing: 'bb 2a 00 00 00'
38 +run: copy imm32 0x0000002a to EBX
39
40
41
42 :(before "End Globals")
43 set<string> Instruction_operands;
44 :(before "End One-time Setup")
45 Instruction_operands.insert("subop");
46 Instruction_operands.insert("mod");
47 Instruction_operands.insert("rm32");
48 Instruction_operands.insert("base");
49 Instruction_operands.insert("index");
50 Instruction_operands.insert("scale");
51 Instruction_operands.insert("r32");
52 Instruction_operands.insert("disp8");
53 Instruction_operands.insert("disp16");
54 Instruction_operands.insert("disp32");
55 Instruction_operands.insert("imm8");
56 Instruction_operands.insert("imm32");
57
58 :(before "End Help Texts")
59 init_operand_type_help();
60 :(code)
61 void init_operand_type_help() {
62 put(Help, "mod",
63 "2-bit operand controlling the _addressing mode_ of many instructions,\n"
64 "to determine how to compute the _effective address_ to look up memory at\n"
65 "based on the 'rm32' operand and potentially others.\n"
66 "\n"
67 "If mod = 3, just operate on the contents of the register specified by rm32\n"
68 " (direct mode).\n"
69 "If mod = 2, effective address is usually* rm32 + disp32\n"
70 " (indirect mode with displacement).\n"
71 "If mod = 1, effective address is usually* rm32 + disp8\n"
72 " (indirect mode with displacement).\n"
73 "If mod = 0, effective address is usually* rm32 (indirect mode).\n"
74 "(* - The exception is when rm32 is '4'. Register 4 is the stack pointer (ESP).\n"
75 " Using it as an address gets more involved. For more details,\n"
76 " try reading the help pages for 'base', 'index' and 'scale'.)\n"
77 "\n"
78 "For complete details consult the IA-32 software developer's manual, table 2-2,\n"
79 "\"32-bit addressing forms with the ModR/M byte\".\n"
80 " https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
81 );
82 put(Help, "subop",
83 "Additional 3-bit operand for determining the instruction when the opcode is 81, 8f or ff.\n"
84 "Can't coexist with operand of type 'r32' in a single instruction, because the two use the same bits.\n"
85 );
86 put(Help, "r32",
87 "3-bit operand specifying a register operand used directly, without any further addressing modes.\n"
88 );
89 put(Help, "rm32",
90 "32-bit value in register or memory. The precise details of its construction depend on the eponymous 3-bit\n"
91 "'rm32' operand, the 'mod' operand, and also potentially the 'SIB' operands ('scale', 'index' and 'base')\n"
92 "and a displacement ('disp8' or 'disp32').\n"
93 "For complete details consult the IA-32 software developer's manual, table 2-2,\n"
94 "\"32-bit addressing forms with the ModR/M byte\".\n"
95 " https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
96 );
97 put(Help, "base",
98 "Additional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) specifying the register containing an address to look up.\n"
99 "This address may be further modified by 'index' and 'scale' operands.\n"
100 " effective address = base + index*scale + displacement (disp8 or disp32)\n"
101 "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
102 "\"32-bit addressing forms with the SIB byte\".\n"
103 " https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
104 );
105 put(Help, "index",
106 "Optional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be added to the 'base' operand to compute the 'effective address' at which to look up memory.\n"
107 " effective address = base + index*scale + displacement (disp8 or disp32)\n"
108 "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
109 "\"32-bit addressing forms with the SIB byte\".\n"
110 " https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
111 );
112 put(Help, "scale",
113 "Optional 2-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be multiplied to the 'index' operand before adding the result to the 'base' operand to compute the _effective address_ to operate on.\n"
114 " effective address = base + index * scale + displacement (disp8 or disp32)\n"
115 "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
116 "\"32-bit addressing forms with the SIB byte\".\n"
117 " https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
118 );
119 put(Help, "disp8",
120 "8-bit value to be added in many instructions.\n"
121 );
122 put(Help, "disp16",
123 "16-bit value to be added in many instructions.\n"
124 );
125 put(Help, "disp32",
126 "32-bit value to be added in many instructions.\n"
127 );
128 put(Help, "imm8",
129 "8-bit value for many instructions.\n"
130 );
131 put(Help, "imm32",
132 "32-bit value for many instructions.\n"
133 );
134 }
135
136
137
138 :(after "Begin Transforms")
139
140 Transform.push_back(pack_operands);
141
142
143 :(code)
144 void pack_operands(program& p) {
145 if (p.segments.empty()) return;
146 segment& code = p.segments.at(0);
147
148 trace(99, "transform") << "-- pack operands" << end();
149 for (int i = 0; i < SIZE(code.lines); ++i) {
150 line& inst = code.lines.at(i);
151 if (all_hex_bytes(inst)) continue;
152 trace(99, "transform") << "packing instruction '" << to_string(inst) << "'" << end();
153 pack_operands(inst);
154 trace(99, "transform") << "instruction after packing: '" << to_string(inst.words) << "'" << end();
155 }
156 }
157
158 void pack_operands(line& inst) {
159 line new_inst;
160 add_opcodes(inst, new_inst);
161 add_modrm_byte(inst, new_inst);
162 add_sib_byte(inst, new_inst);
163 add_disp_bytes(inst, new_inst);
164 add_imm_bytes(inst, new_inst);
165 inst.words.swap(new_inst.words);
166 }
167
168 void add_opcodes(const line& in, line& out) {
169 out.words.push_back(in.words.at(0));
170 if (in.words.at(0).data == "0f" || in.words.at(0).data == "f2" || in.words.at(0).data == "f3")
171 out.words.push_back(in.words.at(1));
172 if (in.words.at(0).data == "f3" && in.words.at(1).data == "0f")
173 out.words.push_back(in.words.at(2));
174 if (in.words.at(0).data == "f2" && in.words.at(1).data == "0f")
175 out.words.push_back(in.words.at(2));
176 }
177
178 void add_modrm_byte(const line& in, line& out) {
179 uint8_t mod=0, reg_subop=0, rm32=0;
180 bool emit = false;
181 for (int i = 0; i < SIZE(in.words); ++i) {
182 const word& curr = in.words.at(i);
183 if (has_operand_metadata(curr, "mod")) {
184 mod = hex_byte(curr.data);
185 emit = true;
186 }
187 else if (has_operand_metadata(curr, "rm32")) {
188 rm32 = hex_byte(curr.data);
189 emit = true;
190 }
191 else if (has_operand_metadata(curr, "r32")) {
192 reg_subop = hex_byte(curr.data);
193 emit = true;
194 }
195 else if (has_operand_metadata(curr, "subop")) {
196 reg_subop = hex_byte(curr.data);
197 emit = true;
198 }
199 }
200 if (emit)
201 out.words.push_back(hex_byte_text((mod << 6) | (reg_subop << 3) | rm32));
202 }
203
204 void add_sib_byte(const line& in, line& out) {
205 uint8_t scale=0, index=0, base=0;
206 bool emit = false;
207 for (int i = 0; i < SIZE(in.words); ++i) {
208 const word& curr = in.words.at(i);
209 if (has_operand_metadata(curr, "scale")) {
210 scale = hex_byte(curr.data);
211 emit = true;
212 }
213 else if (has_operand_metadata(curr, "index")) {
214 index = hex_byte(curr.data);
215 emit = true;
216 }
217 else if (has_operand_metadata(curr, "base")) {
218 base = hex_byte(curr.data);
219 emit = true;
220 }
221 }
222 if (emit)
223 out.words.push_back(hex_byte_text((scale << 6) | (index << 3) | base));
224 }
225
226 void add_disp_bytes(const line& in, line& out) {
227 for (int i = 0; i < SIZE(in.words); ++i) {
228 const word& curr = in.words.at(i);
229 if (has_operand_metadata(curr, "disp8"))
230 emit_hex_bytes(out, curr, 1);
231 if (has_operand_metadata(curr, "disp16"))
232 emit_hex_bytes(out, curr, 2);
233 else if (has_operand_metadata(curr, "disp32"))
234 emit_hex_bytes(out, curr, 4);
235 }
236 }
237
238 void add_imm_bytes(const line& in, line& out) {
239 for (int i = 0; i < SIZE(in.words); ++i) {
240 const word& curr = in.words.at(i);
241 if (has_operand_metadata(curr, "imm8"))
242 emit_hex_bytes(out, curr, 1);
243 else if (has_operand_metadata(curr, "imm32"))
244 emit_hex_bytes(out, curr, 4);
245 }
246 }
247
248 void emit_hex_bytes(line& out, const word& w, int num) {
249 assert(num <= 4);
250 if (num == 1 || !is_hex_int(w.data)) {
251 out.words.push_back(w);
252 if (is_hex_int(w.data))
253 out.words.back().data = hex_byte_to_string(parse_int(w.data));
254 return;
255 }
256 emit_hex_bytes(out, static_cast<uint32_t>(parse_int(w.data)), num);
257 }
258
259 void emit_hex_bytes(line& out, uint32_t val, int num) {
260 assert(num <= 4);
261 for (int i = 0; i < num; ++i) {
262 out.words.push_back(hex_byte_text(val & 0xff));
263 val = val >> 8;
264 }
265 }
266
267 word hex_byte_text(uint8_t val) {
268 word result;
269 result.data = hex_byte_to_string(val);
270 result.original = result.data+"/auto";
271 return result;
272 }
273
274 string hex_byte_to_string(uint8_t val) {
275 ostringstream out;
276
277 out << HEXBYTE << NUM(val);
278 return out.str();
279 }
280
281 string to_string(const vector<word>& in) {
282 ostringstream out;
283 for (int i = 0; i < SIZE(in); ++i) {
284 if (i > 0) out << ' ';
285 out << in.at(i).data;
286 }
287 return out.str();
288 }
289
290 :(before "End Unit Tests")
291 void test_preserve_metadata_when_emitting_single_byte() {
292 word in;
293 in.data = "f0";
294 in.original = "f0/foo";
295 line out;
296 emit_hex_bytes(out, in, 1);
297 CHECK_EQ(out.words.at(0).data, "f0");
298 CHECK_EQ(out.words.at(0).original, "f0/foo");
299 }
300
301 :(scenario pack_disp8)
302 == 0x1
303 74 2/disp8
304 +transform: packing instruction '74 2/disp8'
305 +transform: instruction after packing: '74 02'
306
307 :(scenarios transform)
308 :(scenario pack_disp8_negative)
309 == 0x1
310
311 74 -1/disp8
312 +transform: packing instruction '74 -1/disp8'
313 +transform: instruction after packing: '74 ff'
314 :(scenarios run)
315
316
317 :(code)
318 void transform(const string& text_bytes) {
319 program p;
320 istringstream in(text_bytes);
321 parse(in, p);
322 if (trace_contains_errors()) return;
323 transform(p);
324 }
325
326 :(scenario pack_modrm_imm32)
327 == 0x1
328
329
330
331 81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32
332 +transform: packing instruction '81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32'
333 +transform: instruction after packing: '81 c3 01 00 00 00'
334
335 :(scenario pack_imm32_large)
336 == 0x1
337 b9 0x080490a7/imm32
338 +transform: packing instruction 'b9 0x080490a7/imm32'
339 +transform: instruction after packing: 'b9 a7 90 04 08'
340
341 :(scenario pack_immediate_constants_hex)
342 == 0x1
343
344
345
346 bb 0x2a/imm32
347 +transform: packing instruction 'bb 0x2a/imm32'
348 +transform: instruction after packing: 'bb 2a 00 00 00'
349 +run: copy imm32 0x0000002a to EBX
350
351 :(scenarios transform)
352 :(scenario pack_silently_ignores_non_hex)
353 == 0x1
354
355
356
357 bb foo/imm32
358 +transform: packing instruction 'bb foo/imm32'
359
360 +transform: instruction after packing: 'bb foo'
361 $error: 0
362 :(scenarios run)
363
364
365
366 :(code)
367 bool all_hex_bytes(const line& inst) {
368 for (int i = 0; i < SIZE(inst.words); ++i)
369 if (!is_hex_byte(inst.words.at(i)))
370 return false;
371 return true;
372 }
373
374 bool is_hex_byte(const word& curr) {
375 if (contains_any_operand_metadata(curr))
376 return false;
377 if (SIZE(curr.data) != 2)
378 return false;
379 if (curr.data.find_first_not_of("0123456789abcdefABCDEF") != string::npos)
380 return false;
381 return true;
382 }
383
384 bool contains_any_operand_metadata(const word& word) {
385 for (int i = 0; i < SIZE(word.metadata); ++i)
386 if (Instruction_operands.find(word.metadata.at(i)) != Instruction_operands.end())
387 return true;
388 return false;
389 }
390
391 bool has_operand_metadata(const line& inst, const string& m) {
392 bool result = false;
393 for (int i = 0; i < SIZE(inst.words); ++i) {
394 if (!has_operand_metadata(inst.words.at(i), m)) continue;
395 if (result) {
396 raise << "'" << to_string(inst) << "' has conflicting " << m << " operands\n" << end();
397 return false;
398 }
399 result = true;
400 }
401 return result;
402 }
403
404 bool has_operand_metadata(const word& w, const string& m) {
405 bool result = false;
406 bool metadata_found = false;
407 for (int i = 0; i < SIZE(w.metadata); ++i) {
408 const string& curr = w.metadata.at(i);
409 if (Instruction_operands.find(curr) == Instruction_operands.end()) continue;
410 if (metadata_found) {
411 raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
412 return false;
413 }
414 metadata_found = true;
415 result = (curr == m);
416 }
417 return result;
418 }
419
420 word metadata(const line& inst, const string& m) {
421 for (int i = 0; i < SIZE(inst.words); ++i)
422 if (has_operand_metadata(inst.words.at(i), m))
423 return inst.words.at(i);
424 assert(false);
425 }
426
427 bool is_hex_int(const string& s) {
428 if (s.empty()) return false;
429 size_t pos = 0;
430 if (s.at(0) == '-' || s.at(0) == '+') pos++;
431 if (s.substr(pos, pos+2) == "0x") pos += 2;
432 return s.find_first_not_of("0123456789abcdefABCDEF", pos) == string::npos;
433 }
434
435 :(code)
436 string to_string(const line& inst) {
437 ostringstream out;
438 for (int i = 0; i < SIZE(inst.words); ++i) {
439 if (i > 0) out << ' ';
440 out << inst.words.at(i).original;
441 }
442 return out.str();
443 }