1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 :(before "End Help Texts")
16 put(Help, "instructions",
17 "Each x86 instruction consists of an instruction or opcode and some number\n"
18 "of operands.\n"
19 "Each operand has a type. An instruction won't have more than one operand of\n"
20 "any type.\n"
21 "Each instruction has some set of allowed operand types. It'll reject others.\n"
22 "The complete list of operand types: mod, subop, r32 (register), rm32\n"
23 "(register or memory), scale, index, base, disp8, disp16, disp32, imm8,\n"
24 "imm32.\n"
25 "Each of these has its own help page. Try reading 'subx help mod' next.\n"
26 );
27 :(before "End Help Contents")
28 cerr << " instructions\n";
29
30 :(scenario pack_immediate_constants)
31 == 0x1
32
33
34
35 bb 0x2a/imm32
36 +transform: packing instruction 'bb 0x2a/imm32'
37 +transform: instruction after packing: 'bb 2a 00 00 00'
38 +run: copy imm32 0x0000002a to EBX
39
40
41
42 :(before "End Globals")
43 set<string> Instruction_operands;
44 :(before "End One-time Setup")
45 Instruction_operands.insert("subop");
46 Instruction_operands.insert("mod");
47 Instruction_operands.insert("rm32");
48 Instruction_operands.insert("base");
49 Instruction_operands.insert("index");
50 Instruction_operands.insert("scale");
51 Instruction_operands.insert("r32");
52 Instruction_operands.insert("disp8");
53 Instruction_operands.insert("disp16");
54 Instruction_operands.insert("disp32");
55 Instruction_operands.insert("imm8");
56 Instruction_operands.insert("imm32");
57
58 :(before "End Help Texts")
59 init_operand_type_help();
60 :(code)
61 void init_operand_type_help() {
62 put(Help, "mod",
63 "2-bit operand controlling the _addressing mode_ of many instructions,\n"
64 "to determine how to compute the _effective address_ to look up memory at\n"
65 "based on the 'rm32' operand and potentially others.\n"
66 "\n"
67 "If mod = 3, just operate on the contents of the register specified by rm32\n"
68 " (direct mode).\n"
69 "If mod = 2, effective address is usually* rm32 + disp32\n"
70 " (indirect mode with displacement).\n"
71 "If mod = 1, effective address is usually* rm32 + disp8\n"
72 " (indirect mode with displacement).\n"
73 "If mod = 0, effective address is usually* rm32 (indirect mode).\n"
74 "(* - The exception is when rm32 is '4'. Register 4 is the stack pointer (ESP).\n"
75 " Using it as an address gets more involved. For more details,\n"
76 " try reading the help pages for 'base', 'index' and 'scale'.)\n"
77 "\n"
78 "For complete details consult the IA-32 software developer's manual, table 2-2,\n"
79 "\"32-bit addressing forms with the ModR/M byte\".\n"
80 " https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
81 );
82 put(Help, "subop",
83 "Additional 3-bit operand for determining the instruction when the opcode is 81, 8f or ff.\n"
84 "Can't coexist with operand of type 'r32' in a single instruction, because the two use the same bits.\n"
85 );
86 put(Help, "r32",
87 "3-bit operand specifying a register operand used directly, without any further addressing modes.\n"
88 );
89 put(Help, "rm32",
90 "3-bit operand specifying a register operand whose precise interpretation interacts with 'mod'.\n"
91 "For complete details consult the IA-32 software developer's manual, table 2-2,\n"
92 "\"32-bit addressing forms with the ModR/M byte\".\n"
93 " https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
94 );
95 put(Help, "base",
96 "Additional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) specifying the register containing an address to look up.\n"
97 "This address may be further modified by 'index' and 'scale' operands.\n"
98 " effective address = base + index*scale + displacement (disp8 or disp32)\n"
99 "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
100 "\"32-bit addressing forms with the SIB byte\".\n"
101 " https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
102 );
103 put(Help, "index",
104 "Optional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be added to the 'base' operand to compute the 'effective address' at which to look up memory.\n"
105 " effective address = base + index*scale + displacement (disp8 or disp32)\n"
106 "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
107 "\"32-bit addressing forms with the SIB byte\".\n"
108 " https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
109 );
110 put(Help, "scale",
111 "Optional 2-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be multiplied to the 'index' operand before adding the result to the 'base' operand to compute the _effective address_ to operate on.\n"
112 " effective address = base + index * scale + displacement (disp8 or disp32)\n"
113 "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
114 "\"32-bit addressing forms with the SIB byte\".\n"
115 " https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
116 );
117 put(Help, "disp8",
118 "8-bit value to be added in many instructions.\n"
119 );
120 put(Help, "disp16",
121 "16-bit value to be added in many instructions.\n"
122 );
123 put(Help, "disp32",
124 "32-bit value to be added in many instructions.\n"
125 );
126 put(Help, "imm8",
127 "8-bit value for many instructions.\n"
128 );
129 put(Help, "imm32",
130 "32-bit value for many instructions.\n"
131 );
132 }
133
134
135
136 :(before "End Transforms")
137
138 Transform.push_back(pack_operands);
139
140
141 :(code)
142 void pack_operands(program& p) {
143 if (p.segments.empty()) return;
144 segment& code = p.segments.at(0);
145
146 trace(99, "transform") << "-- pack operands" << end();
147 for (int i = 0; i < SIZE(code.lines); ++i) {
148 line& inst = code.lines.at(i);
149 if (all_hex_bytes(inst)) continue;
150 trace(99, "transform") << "packing instruction '" << to_string(inst) << "'" << end();
151 pack_operands(inst);
152 trace(99, "transform") << "instruction after packing: '" << to_string(inst.words) << "'" << end();
153 }
154 }
155
156 void pack_operands(line& inst) {
157 line new_inst;
158 add_opcodes(inst, new_inst);
159 add_modrm_byte(inst, new_inst);
160 add_sib_byte(inst, new_inst);
161 add_disp_bytes(inst, new_inst);
162 add_imm_bytes(inst, new_inst);
163 inst.words.swap(new_inst.words);
164 }
165
166 void add_opcodes(const line& in, line& out) {
167 out.words.push_back(in.words.at(0));
168 if (in.words.at(0).data == "0f" || in.words.at(0).data == "f2" || in.words.at(0).data == "f3")
169 out.words.push_back(in.words.at(1));
170 if (in.words.at(0).data == "f3" && in.words.at(1).data == "0f")
171 out.words.push_back(in.words.at(2));
172 if (in.words.at(0).data == "f2" && in.words.at(1).data == "0f")
173 out.words.push_back(in.words.at(2));
174 }
175
176 void add_modrm_byte(const line& in, line& out) {
177 uint8_t mod=0, reg_subop=0, rm32=0;
178 bool emit = false;
179 for (int i = 0; i < SIZE(in.words); ++i) {
180 const word& curr = in.words.at(i);
181 if (has_metadata(curr, "mod")) {
182 mod = hex_byte(curr.data);
183 emit = true;
184 }
185 else if (has_metadata(curr, "rm32")) {
186 rm32 = hex_byte(curr.data);
187 emit = true;
188 }
189 else if (has_metadata(curr, "r32")) {
190 reg_subop = hex_byte(curr.data);
191 emit = true;
192 }
193 else if (has_metadata(curr, "subop")) {
194 reg_subop = hex_byte(curr.data);
195 emit = true;
196 }
197 }
198 if (emit)
199 out.words.push_back(hex_byte_text((mod << 6) | (reg_subop << 3) | rm32));
200 }
201
202 void add_sib_byte(const line& in, line& out) {
203 uint8_t scale=0, index=0, base=0;
204 bool emit = false;
205 for (int i = 0; i < SIZE(in.words); ++i) {
206 const word& curr = in.words.at(i);
207 if (has_metadata(curr, "scale")) {
208 scale = hex_byte(curr.data);
209 emit = true;
210 }
211 else if (has_metadata(curr, "index")) {
212 index = hex_byte(curr.data);
213 emit = true;
214 }
215 else if (has_metadata(curr, "base")) {
216 base = hex_byte(curr.data);
217 emit = true;
218 }
219 }
220 if (emit)
221 out.words.push_back(hex_byte_text((scale << 6) | (index << 3) | base));
222 }
223
224 void add_disp_bytes(const line& in, line& out) {
225 for (int i = 0; i < SIZE(in.words); ++i) {
226 const word& curr = in.words.at(i);
227 if (has_metadata(curr, "disp8"))
228 emit_hex_bytes(out, curr, 1);
229 if (has_metadata(curr, "disp16"))
230 emit_hex_bytes(out, curr, 2);
231 else if (has_metadata(curr, "disp32"))
232 emit_hex_bytes(out, curr, 4);
233 }
234 }
235
236 void add_imm_bytes(const line& in, line& out) {
237 for (int i = 0; i < SIZE(in.words); ++i) {
238 const word& curr = in.words.at(i);
239 if (has_metadata(curr, "imm8"))
240 emit_hex_bytes(out, curr, 1);
241 else if (has_metadata(curr, "imm32"))
242 emit_hex_bytes(out, curr, 4);
243 }
244 }
245
246 void emit_hex_bytes(line& out, const word& w, int num) {
247 assert(num <= 4);
248 if (num == 1 || !is_hex_int(w.data)) {
249 out.words.push_back(w);
250 if (is_hex_int(w.data))
251 out.words.back().data = hex_byte_to_string(parse_int(w.data));
252 return;
253 }
254 emit_hex_bytes(out, static_cast<uint32_t>(parse_int(w.data)), num);
255 }
256
257 void emit_hex_bytes(line& out, uint32_t val, int num) {
258 assert(num <= 4);
259 for (int i = 0; i < num; ++i) {
260 out.words.push_back(hex_byte_text(val & 0xff));
261 val = val >> 8;
262 }
263 }
264
265 word hex_byte_text(uint8_t val) {
266 word result;
267 result.data = hex_byte_to_string(val);
268 result.original = result.data+"/auto";
269 return result;
270 }
271
272 string hex_byte_to_string(uint8_t val) {
273 ostringstream out;
274 out << HEXBYTE << NUM(val);
275 return out.str();
276 }
277
278 string to_string(const vector<word>& in) {
279 ostringstream out;
280 for (int i = 0; i < SIZE(in); ++i) {
281 if (i > 0) out << ' ';
282 out << in.at(i).data;
283 }
284 return out.str();
285 }
286
287 :(before "End Unit Tests")
288 void test_preserve_metadata_when_emitting_single_byte() {
289 word in;
290 in.data = "f0";
291 in.original = "f0/foo";
292 line out;
293 emit_hex_bytes(out, in, 1);
294 CHECK_EQ(out.words.at(0).data, "f0");
295 CHECK_EQ(out.words.at(0).original, "f0/foo");
296 }
297
298 :(scenario pack_disp8)
299 == 0x1
300 74 2/disp8
301 +transform: packing instruction '74 2/disp8'
302 +transform: instruction after packing: '74 02'
303
304 :(scenarios transform)
305 :(scenario pack_disp8_negative)
306 == 0x1
307
308 74 -1/disp8
309 +transform: packing instruction '74 -1/disp8'
310 +transform: instruction after packing: '74 ff'
311 :(scenarios run)
312
313
314 :(code)
315 void transform(const string& text_bytes) {
316 program p;
317 istringstream in(text_bytes);
318 parse(in, p);
319 if (trace_contains_errors()) return;
320 transform(p);
321 }
322
323 :(scenario pack_modrm_imm32)
324 == 0x1
325
326
327
328 81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32
329 +transform: packing instruction '81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32'
330 +transform: instruction after packing: '81 c3 01 00 00 00'
331
332 :(scenario pack_imm32_large)
333 == 0x1
334 b9 0x080490a7/imm32
335 +transform: packing instruction 'b9 0x080490a7/imm32'
336 +transform: instruction after packing: 'b9 a7 90 04 08'
337
338 :(scenario pack_immediate_constants_hex)
339 == 0x1
340
341
342
343 bb 0x2a/imm32
344 +transform: packing instruction 'bb 0x2a/imm32'
345 +transform: instruction after packing: 'bb 2a 00 00 00'
346 +run: copy imm32 0x0000002a to EBX
347
348 :(scenarios transform)
349 :(scenario pack_silently_ignores_non_hex)
350 == 0x1
351
352
353
354 bb foo/imm32
355 +transform: packing instruction 'bb foo/imm32'
356
357 +transform: instruction after packing: 'bb foo'
358 $error: 0
359 :(scenarios run)
360
361
362
363 :(code)
364 bool all_hex_bytes(const line& inst) {
365 for (int i = 0; i < SIZE(inst.words); ++i)
366 if (!is_hex_byte(inst.words.at(i)))
367 return false;
368 return true;
369 }
370
371 bool is_hex_byte(const word& curr) {
372 if (contains_any_operand_metadata(curr))
373 return false;
374 if (SIZE(curr.data) != 2)
375 return false;
376 if (curr.data.find_first_not_of("0123456789abcdefABCDEF") != string::npos)
377 return false;
378 return true;
379 }
380
381 bool contains_any_operand_metadata(const word& word) {
382 for (int i = 0; i < SIZE(word.metadata); ++i)
383 if (Instruction_operands.find(word.metadata.at(i)) != Instruction_operands.end())
384 return true;
385 return false;
386 }
387
388 bool has_metadata(const line& inst, const string& m) {
389 bool result = false;
390 for (int i = 0; i < SIZE(inst.words); ++i) {
391 if (!has_metadata(inst.words.at(i), m)) continue;
392 if (result) {
393 raise << "'" << to_string(inst) << "' has conflicting " << m << " operands\n" << end();
394 return false;
395 }
396 result = true;
397 }
398 return result;
399 }
400
401 bool has_metadata(const word& w, const string& m) {
402 bool result = false;
403 bool metadata_found = false;
404 for (int i = 0; i < SIZE(w.metadata); ++i) {
405 const string& curr = w.metadata.at(i);
406 if (Instruction_operands.find(curr) == Instruction_operands.end()) continue;
407 if (metadata_found) {
408 raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
409 return false;
410 }
411 metadata_found = true;
412 result = (curr == m);
413 }
414 return result;
415 }
416
417 word metadata(const line& inst, const string& m) {
418 for (int i = 0; i < SIZE(inst.words); ++i)
419 if (has_metadata(inst.words.at(i), m))
420 return inst.words.at(i);
421 assert(false);
422 }
423
424 bool is_hex_int(const string& s) {
425 if (s.empty()) return false;
426 size_t pos = 0;
427 if (s.at(0) == '-' || s.at(0) == '+') pos++;
428 if (s.substr(pos, pos+2) == "0x") pos += 2;
429 return s.find_first_not_of("0123456789abcdefABCDEF", pos) == string::npos;
430 }
431
432 int32_t parse_int(const string& s) {
433 if (s.empty()) return 0;
434 istringstream in(s);
435 in >> std::hex;
436 if (s.at(0) == '-') {
437 int32_t result = 0;
438 in >> result;
439 if (!in || !in.eof()) {
440 raise << "not a number: " << s << '\n' << end();
441 return 0;
442 }
443 return result;
444 }
445 uint32_t uresult = 0;
446 in >> uresult;
447 if (!in || !in.eof()) {
448 raise << "not a number: " << s << '\n' << end();
449 return 0;
450 }
451 return static_cast<int32_t>(uresult);
452 }
453 :(before "End Unit Tests")
454 void test_parse_int() {
455 CHECK_EQ(0, parse_int("0"));
456 CHECK_EQ(0, parse_int("0x0"));
457 CHECK_EQ(0, parse_int("0x0"));
458 CHECK_EQ(16, parse_int("10"));
459 CHECK_EQ(-1, parse_int("-1"));
460 CHECK_EQ(-1, parse_int("0xffffffff"));
461 }
462
463 :(code)
464 string to_string(const line& inst) {
465 ostringstream out;
466 for (int i = 0; i < SIZE(inst.words); ++i) {
467 if (i > 0) out << ' ';
468 out << inst.words.at(i).original;
469 }
470 return out.str();
471 }