1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 :(before "End Help Texts")
16 put(Help, "instructions",
17 "Each x86 instruction consists of an instruction or opcode and some number\n"
18 "of operands.\n"
19 "Each operand has a type. An instruction won't have more than one operand of\n"
20 "any type.\n"
21 "Each instruction has some set of allowed operand types. It'll reject others.\n"
22 "The complete list of operand types: mod, subop, r32 (register), rm32\n"
23 "(register or memory), scale, index, base, disp8, disp16, disp32, imm8,\n"
24 "imm32.\n"
25 "Each of these has its own help page. Try reading 'subx help mod' next.\n"
26 );
27 :(before "End Help Contents")
28 cerr << " instructions\n";
29
30 :(scenario pack_immediate_constants)
31 == 0x1
32
33
34
35 bb 0x2a/imm32
36 +transform: packing instruction 'bb 0x2a/imm32'
37 +transform: instruction after packing: 'bb 2a 00 00 00'
38 +run: copy imm32 0x0000002a to EBX
39
40
41
42 :(before "End Globals")
43 set<string> Instruction_operands;
44 :(before "End One-time Setup")
45 Instruction_operands.insert("subop");
46 Instruction_operands.insert("mod");
47 Instruction_operands.insert("rm32");
48 Instruction_operands.insert("base");
49 Instruction_operands.insert("index");
50 Instruction_operands.insert("scale");
51 Instruction_operands.insert("r32");
52 Instruction_operands.insert("disp8");
53 Instruction_operands.insert("disp16");
54 Instruction_operands.insert("disp32");
55 Instruction_operands.insert("imm8");
56 Instruction_operands.insert("imm32");
57
58 :(before "End Help Texts")
59 init_operand_type_help();
60 :(code)
61 void init_operand_type_help() {
62 put(Help, "mod",
63 "2-bit operand controlling the _addressing mode_ of many instructions,\n"
64 "to determine how to compute the _effective address_ to look up memory at\n"
65 "based on the 'rm32' operand and potentially others.\n"
66 "\n"
67 "If mod = 3, just operate on the contents of the register specified by rm32\n"
68 " (direct mode).\n"
69 "If mod = 2, effective address is usually* rm32 + disp32\n"
70 " (indirect mode with displacement).\n"
71 "If mod = 1, effective address is usually* rm32 + disp8\n"
72 " (indirect mode with displacement).\n"
73 "If mod = 0, effective address is usually* rm32 (indirect mode).\n"
74 "(* - The exception is when rm32 is '4'. Register 4 is the stack pointer (ESP).\n"
75 " Using it as an address gets more involved. For more details,\n"
76 " try reading the help pages for 'base', 'index' and 'scale'.)\n"
77 "\n"
78 "For complete details consult the IA-32 software developer's manual, table 2-2,\n"
79 "\"32-bit addressing forms with the ModR/M byte\".\n"
80 " https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
81 );
82 put(Help, "subop",
83 "Additional 3-bit operand for determining the instruction when the opcode is 81, 8f or ff.\n"
84 "Can't coexist with operand of type 'r32' in a single instruction, because the two use the same bits.\n"
85 );
86 put(Help, "r32",
87 "3-bit operand specifying a register operand used directly, without any further addressing modes.\n"
88 );
89 put(Help, "rm32",
90 "32-bit value in register or memory. The precise details of its construction depend on the eponymous 3-bit\n"
91 "'rm32' operand, the 'mod' operand, and also potentially the 'SIB' operands ('scale', 'index' and 'base')\n"
92 "and a displacement ('disp8' or 'disp32').\n"
93 "For complete details consult the IA-32 software developer's manual, table 2-2,\n"
94 "\"32-bit addressing forms with the ModR/M byte\".\n"
95 " https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
96 );
97 put(Help, "base",
98 "Additional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) specifying the register containing an address to look up.\n"
99 "This address may be further modified by 'index' and 'scale' operands.\n"
100 " effective address = base + index*scale + displacement (disp8 or disp32)\n"
101 "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
102 "\"32-bit addressing forms with the SIB byte\".\n"
103 " https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
104 );
105 put(Help, "index",
106 "Optional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be added to the 'base' operand to compute the 'effective address' at which to look up memory.\n"
107 " effective address = base + index*scale + displacement (disp8 or disp32)\n"
108 "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
109 "\"32-bit addressing forms with the SIB byte\".\n"
110 " https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
111 );
112 put(Help, "scale",
113 "Optional 2-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be multiplied to the 'index' operand before adding the result to the 'base' operand to compute the _effective address_ to operate on.\n"
114 " effective address = base + index * scale + displacement (disp8 or disp32)\n"
115 "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
116 "\"32-bit addressing forms with the SIB byte\".\n"
117 " https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
118 );
119 put(Help, "disp8",
120 "8-bit value to be added in many instructions.\n"
121 );
122 put(Help, "disp16",
123 "16-bit value to be added in many instructions.\n"
124 );
125 put(Help, "disp32",
126 "32-bit value to be added in many instructions.\n"
127 );
128 put(Help, "imm8",
129 "8-bit value for many instructions.\n"
130 );
131 put(Help, "imm32",
132 "32-bit value for many instructions.\n"
133 );
134 }
135
136
137
138 :(after "Begin Transforms")
139
140 Transform.push_back(pack_operands);
141
142
143 :(code)
144 void pack_operands(program& p) {
145 if (p.segments.empty()) return;
146 segment& code = p.segments.at(0);
147
148 trace(99, "transform") << "-- pack operands" << end();
149 for (int i = 0; i < SIZE(code.lines); ++i) {
150 line& inst = code.lines.at(i);
151 if (all_hex_bytes(inst)) continue;
152 trace(99, "transform") << "packing instruction '" << to_string(inst) << "'" << end();
153 pack_operands(inst);
154 trace(99, "transform") << "instruction after packing: '" << to_string(inst.words) << "'" << end();
155 }
156 }
157
158 void pack_operands(line& inst) {
159 line new_inst;
160 add_opcodes(inst, new_inst);
161 add_modrm_byte(inst, new_inst);
162 add_sib_byte(inst, new_inst);
163 add_disp_bytes(inst, new_inst);
164 add_imm_bytes(inst, new_inst);
165 inst.words.swap(new_inst.words);
166 }
167
168 void add_opcodes(const line& in, line& out) {
169 out.words.push_back(in.words.at(0));
170 if (in.words.at(0).data == "0f" || in.words.at(0).data == "f2" || in.words.at(0).data == "f3")
171 out.words.push_back(in.words.at(1));
172 if (in.words.at(0).data == "f3" && in.words.at(1).data == "0f")
173 out.words.push_back(in.words.at(2));
174 if (in.words.at(0).data == "f2" && in.words.at(1).data == "0f")
175 out.words.push_back(in.words.at(2));
176 }
177
178 void add_modrm_byte(const line& in, line& out) {
179 uint8_t mod=0, reg_subop=0, rm32=0;
180 bool emit = false;
181 for (int i = 0; i < SIZE(in.words); ++i) {
182 const word& curr = in.words.at(i);
183 if (has_operand_metadata(curr, "mod")) {
184 mod = hex_byte(curr.data);
185 emit = true;
186 }
187 else if (has_operand_metadata(curr, "rm32")) {
188 rm32 = hex_byte(curr.data);
189 emit = true;
190 }
191 else if (has_operand_metadata(curr, "r32")) {
192 reg_subop = hex_byte(curr.data);
193 emit = true;
194 }
195 else if (has_operand_metadata(curr, "subop")) {
196 reg_subop = hex_byte(curr.data);
197 emit = true;
198 }
199 }
200 if (emit)
201 out.words.push_back(hex_byte_text((mod << 6) | (reg_subop << 3) | rm32));
202 }
203
204 void add_sib_byte(const line& in, line& out) {
205 uint8_t scale=0, index=0, base=0;
206 bool emit = false;
207 for (int i = 0; i < SIZE(in.words); ++i) {
208 const word& curr = in.words.at(i);
209 if (has_operand_metadata(curr, "scale")) {
210 scale = hex_byte(curr.data);
211 emit = true;
212 }
213 else if (has_operand_metadata(curr, "index")) {
214 index = hex_byte(curr.data);
215 emit = true;
216 }
217 else if (has_operand_metadata(curr, "base")) {
218 base = hex_byte(curr.data);
219 emit = true;
220 }
221 }
222 if (emit)
223 out.words.push_back(hex_byte_text((scale << 6) | (index << 3) | base));
224 }
225
226 void add_disp_bytes(const line& in, line& out) {
227 for (int i = 0; i < SIZE(in.words); ++i) {
228 const word& curr = in.words.at(i);
229 if (has_operand_metadata(curr, "disp8"))
230 emit_hex_bytes(out, curr, 1);
231 if (has_operand_metadata(curr, "disp16"))
232 emit_hex_bytes(out, curr, 2);
233 else if (has_operand_metadata(curr, "disp32"))
234 emit_hex_bytes(out, curr, 4);
235 }
236 }
237
238 void add_imm_bytes(const line& in, line& out) {
239 for (int i = 0; i < SIZE(in.words); ++i) {
240 const word& curr = in.words.at(i);
241 if (has_operand_metadata(curr, "imm8"))
242 emit_hex_bytes(out, curr, 1);
243 else if (has_operand_metadata(curr, "imm32"))
244 emit_hex_bytes(out, curr, 4);
245 }
246 }
247
248 void emit_hex_bytes(line& out, const word& w, int num) {
249 assert(num <= 4);
250 bool is_number = looks_like_hex_int(w.data);
251 if (num == 1 || !is_number) {
252 out.words.push_back(w);
253 if (is_number)
254 out.words.back().data = hex_byte_to_string(parse_int(w.data));
255 return;
256 }
257 emit_hex_bytes(out, static_cast<uint32_t>(parse_int(w.data)), num);
258 }
259
260 void emit_hex_bytes(line& out, uint32_t val, int num) {
261 assert(num <= 4);
262 for (int i = 0; i < num; ++i) {
263 out.words.push_back(hex_byte_text(val & 0xff));
264 val = val >> 8;
265 }
266 }
267
268 word hex_byte_text(uint8_t val) {
269 word result;
270 result.data = hex_byte_to_string(val);
271 result.original = result.data+"/auto";
272 return result;
273 }
274
275 string hex_byte_to_string(uint8_t val) {
276 ostringstream out;
277
278 out << HEXBYTE << NUM(val);
279 return out.str();
280 }
281
282 string to_string(const vector<word>& in) {
283 ostringstream out;
284 for (int i = 0; i < SIZE(in); ++i) {
285 if (i > 0) out << ' ';
286 out << in.at(i).data;
287 }
288 return out.str();
289 }
290
291 :(before "End Unit Tests")
292 void test_preserve_metadata_when_emitting_single_byte() {
293 word in;
294 in.data = "f0";
295 in.original = "f0/foo";
296 line out;
297 emit_hex_bytes(out, in, 1);
298 CHECK_EQ(out.words.at(0).data, "f0");
299 CHECK_EQ(out.words.at(0).original, "f0/foo");
300 }
301
302 :(scenario pack_disp8)
303 == 0x1
304 74 2/disp8
305 +transform: packing instruction '74 2/disp8'
306 +transform: instruction after packing: '74 02'
307
308 :(scenarios transform)
309 :(scenario pack_disp8_negative)
310 == 0x1
311
312 74 -1/disp8
313 +transform: packing instruction '74 -1/disp8'
314 +transform: instruction after packing: '74 ff'
315 :(scenarios run)
316
317
318 :(code)
319 void transform(const string& text_bytes) {
320 program p;
321 istringstream in(text_bytes);
322 parse(in, p);
323 if (trace_contains_errors()) return;
324 transform(p);
325 }
326
327 :(scenario pack_modrm_imm32)
328 == 0x1
329
330
331
332 81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32
333 +transform: packing instruction '81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32'
334 +transform: instruction after packing: '81 c3 01 00 00 00'
335
336 :(scenario pack_imm32_large)
337 == 0x1
338 b9 0x080490a7/imm32
339 +transform: packing instruction 'b9 0x080490a7/imm32'
340 +transform: instruction after packing: 'b9 a7 90 04 08'
341
342 :(scenario pack_immediate_constants_hex)
343 == 0x1
344
345
346
347 bb 0x2a/imm32
348 +transform: packing instruction 'bb 0x2a/imm32'
349 +transform: instruction after packing: 'bb 2a 00 00 00'
350 +run: copy imm32 0x0000002a to EBX
351
352 :(scenarios transform)
353 :(scenario pack_silently_ignores_non_hex)
354 % Hide_errors = true;
355 == 0x1
356
357
358
359 bb foo/imm32
360 +transform: packing instruction 'bb foo/imm32'
361
362 +transform: instruction after packing: 'bb foo'
363 :(scenarios run)
364
365 :(scenario pack_flags_bad_hex)
366 % Hide_errors = true;
367 == 0x1
368
369
370
371 bb 0xfoo/imm32
372 +error: not a number: 0xfoo
373
374
375
376 :(code)
377 bool all_hex_bytes(const line& inst) {
378 for (int i = 0; i < SIZE(inst.words); ++i)
379 if (!is_hex_byte(inst.words.at(i)))
380 return false;
381 return true;
382 }
383
384 bool is_hex_byte(const word& curr) {
385 if (contains_any_operand_metadata(curr))
386 return false;
387 if (SIZE(curr.data) != 2)
388 return false;
389 if (curr.data.find_first_not_of("0123456789abcdefABCDEF") != string::npos)
390 return false;
391 return true;
392 }
393
394 bool contains_any_operand_metadata(const word& word) {
395 for (int i = 0; i < SIZE(word.metadata); ++i)
396 if (Instruction_operands.find(word.metadata.at(i)) != Instruction_operands.end())
397 return true;
398 return false;
399 }
400
401 bool has_operand_metadata(const line& inst, const string& m) {
402 bool result = false;
403 for (int i = 0; i < SIZE(inst.words); ++i) {
404 if (!has_operand_metadata(inst.words.at(i), m)) continue;
405 if (result) {
406 raise << "'" << to_string(inst) << "' has conflicting " << m << " operands\n" << end();
407 return false;
408 }
409 result = true;
410 }
411 return result;
412 }
413
414 bool has_operand_metadata(const word& w, const string& m) {
415 bool result = false;
416 bool metadata_found = false;
417 for (int i = 0; i < SIZE(w.metadata); ++i) {
418 const string& curr = w.metadata.at(i);
419 if (Instruction_operands.find(curr) == Instruction_operands.end()) continue;
420 if (metadata_found) {
421 raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
422 return false;
423 }
424 metadata_found = true;
425 result = (curr == m);
426 }
427 return result;
428 }
429
430 word metadata(const line& inst, const string& m) {
431 for (int i = 0; i < SIZE(inst.words); ++i)
432 if (has_operand_metadata(inst.words.at(i), m))
433 return inst.words.at(i);
434 assert(false);
435 }
436
437 bool looks_like_hex_int(const string& s) {
438 if (s.empty()) return false;
439 if (s.at(0) == '-' || s.at(0) == '+') return true;
440 if (isdigit(s.at(0))) return true;
441
442 return false;
443 }
444
445 :(code)
446 string to_string(const line& inst) {
447 ostringstream out;
448 for (int i = 0; i < SIZE(inst.words); ++i) {
449 if (i > 0) out << ' ';
450 out << inst.words.at(i).original;
451 }
452 return out.str();
453 }