1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 :(before "End Help Texts")
16 put(Help, "instructions",
17 "Each x86 instruction consists of an instruction or opcode and some number\n"
18 "of operands.\n"
19 "Each operand has a type. An instruction won't have more than one operand of\n"
20 "any type.\n"
21 "Each instruction has some set of allowed operand types. It'll reject others.\n"
22 "The complete list of operand types: mod, subop, r32 (register), rm32\n"
23 "(register or memory), scale, index, base, disp8, disp16, disp32, imm8,\n"
24 "imm32.\n"
25 "Each of these has its own help page. Try reading 'subx help mod' next.\n"
26 );
27 :(before "End Help Contents")
28 cerr << " instructions\n";
29
30 :(scenario pack_immediate_constants)
31 == 0x1
32
33
34
35 bb 0x2a/imm32
36 +transform: packing instruction 'bb 0x2a/imm32'
37 +transform: instruction after packing: 'bb 2a 00 00 00'
38 +run: copy imm32 0x0000002a to EBX
39
40
41
42 :(before "End Globals")
43 set<string> Instruction_operands;
44 :(before "End One-time Setup")
45 Instruction_operands.insert("subop");
46 Instruction_operands.insert("mod");
47 Instruction_operands.insert("rm32");
48 Instruction_operands.insert("base");
49 Instruction_operands.insert("index");
50 Instruction_operands.insert("scale");
51 Instruction_operands.insert("r32");
52 Instruction_operands.insert("disp8");
53 Instruction_operands.insert("disp16");
54 Instruction_operands.insert("disp32");
55 Instruction_operands.insert("imm8");
56 Instruction_operands.insert("imm32");
57
58 :(before "End Help Texts")
59 init_operand_type_help();
60 :(code)
61 void init_operand_type_help() {
62 put(Help, "mod",
63 "2-bit operand controlling the _addressing mode_ of many instructions,\n"
64 "to determine how to compute the _effective address_ to look up memory at\n"
65 "based on the 'rm32' operand and potentially others.\n"
66 "\n"
67 "If mod = 3, just operate on the contents of the register specified by rm32\n"
68 " (direct mode).\n"
69 "If mod = 2, effective address is usually* rm32 + disp32\n"
70 " (indirect mode with displacement).\n"
71 "If mod = 1, effective address is usually* rm32 + disp8\n"
72 " (indirect mode with displacement).\n"
73 "If mod = 0, effective address is usually* rm32 (indirect mode).\n"
74 "(* - The exception is when rm32 is '4'. Register 4 is the stack pointer (ESP).\n"
75 " Using it as an address gets more involved. For more details,\n"
76 " try reading the help pages for 'base', 'index' and 'scale'.)\n"
77 "\n"
78 "For complete details consult the IA-32 software developer's manual, table 2-2,\n"
79 "\"32-bit addressing forms with the ModR/M byte\".\n"
80 " https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
81 );
82 put(Help, "subop",
83 "Additional 3-bit operand for determining the instruction when the opcode is 81, 8f or ff.\n"
84 "Can't coexist with operand of type 'r32' in a single instruction, because the two use the same bits.\n"
85 );
86 put(Help, "r32",
87 "3-bit operand specifying a register operand used directly, without any further addressing modes.\n"
88 );
89 put(Help, "rm32",
90 "3-bit operand specifying a register operand whose precise interpretation interacts with 'mod'.\n"
91 "For complete details consult the IA-32 software developer's manual, table 2-2,\n"
92 "\"32-bit addressing forms with the ModR/M byte\".\n"
93 " https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
94 );
95 put(Help, "base",
96 "Additional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) specifying the register containing an address to look up.\n"
97 "This address may be further modified by 'index' and 'scale' operands.\n"
98 " effective address = base + index*scale + displacement (disp8 or disp32)\n"
99 "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
100 "\"32-bit addressing forms with the SIB byte\".\n"
101 " https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
102 );
103 put(Help, "index",
104 "Optional 3-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be added to the 'base' operand to compute the 'effective address' at which to look up memory.\n"
105 " effective address = base + index*scale + displacement (disp8 or disp32)\n"
106 "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
107 "\"32-bit addressing forms with the SIB byte\".\n"
108 " https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
109 );
110 put(Help, "scale",
111 "Optional 2-bit operand (when 'rm32' is 4 unless 'mod' is 3) that can be multiplied to the 'index' operand before adding the result to the 'base' operand to compute the _effective address_ to operate on.\n"
112 " effective address = base + index * scale + displacement (disp8 or disp32)\n"
113 "For complete details consult the IA-32 software developer's manual, table 2-3,\n"
114 "\"32-bit addressing forms with the SIB byte\".\n"
115 " https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf\n"
116 );
117 put(Help, "disp8",
118 "8-bit value to be added in many instructions.\n"
119 );
120 put(Help, "disp16",
121 "16-bit value to be added in many instructions.\n"
122 );
123 put(Help, "disp32",
124 "32-bit value to be added in many instructions.\n"
125 );
126 put(Help, "imm8",
127 "8-bit value for many instructions.\n"
128 );
129 put(Help, "imm32",
130 "32-bit value for many instructions.\n"
131 );
132 }
133
134
135
136 :(after "Begin Transforms")
137
138 Transform.push_back(pack_operands);
139
140
141 :(code)
142 void pack_operands(program& p) {
143 if (p.segments.empty()) return;
144 segment& code = p.segments.at(0);
145
146 trace(99, "transform") << "-- pack operands" << end();
147 for (int i = 0; i < SIZE(code.lines); ++i) {
148 line& inst = code.lines.at(i);
149 if (all_hex_bytes(inst)) continue;
150 trace(99, "transform") << "packing instruction '" << to_string(inst) << "'" << end();
151 pack_operands(inst);
152 trace(99, "transform") << "instruction after packing: '" << to_string(inst.words) << "'" << end();
153 }
154 }
155
156 void pack_operands(line& inst) {
157 line new_inst;
158 add_opcodes(inst, new_inst);
159 add_modrm_byte(inst, new_inst);
160 add_sib_byte(inst, new_inst);
161 add_disp_bytes(inst, new_inst);
162 add_imm_bytes(inst, new_inst);
163 inst.words.swap(new_inst.words);
164 }
165
166 void add_opcodes(const line& in, line& out) {
167 out.words.push_back(in.words.at(0));
168 if (in.words.at(0).data == "0f" || in.words.at(0).data == "f2" || in.words.at(0).data == "f3")
169 out.words.push_back(in.words.at(1));
170 if (in.words.at(0).data == "f3" && in.words.at(1).data == "0f")
171 out.words.push_back(in.words.at(2));
172 if (in.words.at(0).data == "f2" && in.words.at(1).data == "0f")
173 out.words.push_back(in.words.at(2));
174 }
175
176 void add_modrm_byte(const line& in, line& out) {
177 uint8_t mod=0, reg_subop=0, rm32=0;
178 bool emit = false;
179 for (int i = 0; i < SIZE(in.words); ++i) {
180 const word& curr = in.words.at(i);
181 if (has_metadata(curr, "mod")) {
182 mod = hex_byte(curr.data);
183 emit = true;
184 }
185 else if (has_metadata(curr, "rm32")) {
186 rm32 = hex_byte(curr.data);
187 emit = true;
188 }
189 else if (has_metadata(curr, "r32")) {
190 reg_subop = hex_byte(curr.data);
191 emit = true;
192 }
193 else if (has_metadata(curr, "subop")) {
194 reg_subop = hex_byte(curr.data);
195 emit = true;
196 }
197 }
198 if (emit)
199 out.words.push_back(hex_byte_text((mod << 6) | (reg_subop << 3) | rm32));
200 }
201
202 void add_sib_byte(const line& in, line& out) {
203 uint8_t scale=0, index=0, base=0;
204 bool emit = false;
205 for (int i = 0; i < SIZE(in.words); ++i) {
206 const word& curr = in.words.at(i);
207 if (has_metadata(curr, "scale")) {
208 scale = hex_byte(curr.data);
209 emit = true;
210 }
211 else if (has_metadata(curr, "index")) {
212 index = hex_byte(curr.data);
213 emit = true;
214 }
215 else if (has_metadata(curr, "base")) {
216 base = hex_byte(curr.data);
217 emit = true;
218 }
219 }
220 if (emit)
221 out.words.push_back(hex_byte_text((scale << 6) | (index << 3) | base));
222 }
223
224 void add_disp_bytes(const line& in, line& out) {
225 for (int i = 0; i < SIZE(in.words); ++i) {
226 const word& curr = in.words.at(i);
227 if (has_metadata(curr, "disp8"))
228 emit_hex_bytes(out, curr, 1);
229 if (has_metadata(curr, "disp16"))
230 emit_hex_bytes(out, curr, 2);
231 else if (has_metadata(curr, "disp32"))
232 emit_hex_bytes(out, curr, 4);
233 }
234 }
235
236 void add_imm_bytes(const line& in, line& out) {
237 for (int i = 0; i < SIZE(in.words); ++i) {
238 const word& curr = in.words.at(i);
239 if (has_metadata(curr, "imm8"))
240 emit_hex_bytes(out, curr, 1);
241 else if (has_metadata(curr, "imm32"))
242 emit_hex_bytes(out, curr, 4);
243 }
244 }
245
246 void emit_hex_bytes(line& out, const word& w, int num) {
247 assert(num <= 4);
248 if (num == 1 || !is_hex_int(w.data)) {
249 out.words.push_back(w);
250 if (is_hex_int(w.data))
251 out.words.back().data = hex_byte_to_string(parse_int(w.data));
252 return;
253 }
254 emit_hex_bytes(out, static_cast<uint32_t>(parse_int(w.data)), num);
255 }
256
257 void emit_hex_bytes(line& out, uint32_t val, int num) {
258 assert(num <= 4);
259 for (int i = 0; i < num; ++i) {
260 out.words.push_back(hex_byte_text(val & 0xff));
261 val = val >> 8;
262 }
263 }
264
265 word hex_byte_text(uint8_t val) {
266 word result;
267 result.data = hex_byte_to_string(val);
268 result.original = result.data+"/auto";
269 return result;
270 }
271
272 string hex_byte_to_string(uint8_t val) {
273 ostringstream out;
274
275 out << HEXBYTE << NUM(val);
276 return out.str();
277 }
278
279 string to_string(const vector<word>& in) {
280 ostringstream out;
281 for (int i = 0; i < SIZE(in); ++i) {
282 if (i > 0) out << ' ';
283 out << in.at(i).data;
284 }
285 return out.str();
286 }
287
288 :(before "End Unit Tests")
289 void test_preserve_metadata_when_emitting_single_byte() {
290 word in;
291 in.data = "f0";
292 in.original = "f0/foo";
293 line out;
294 emit_hex_bytes(out, in, 1);
295 CHECK_EQ(out.words.at(0).data, "f0");
296 CHECK_EQ(out.words.at(0).original, "f0/foo");
297 }
298
299 :(scenario pack_disp8)
300 == 0x1
301 74 2/disp8
302 +transform: packing instruction '74 2/disp8'
303 +transform: instruction after packing: '74 02'
304
305 :(scenarios transform)
306 :(scenario pack_disp8_negative)
307 == 0x1
308
309 74 -1/disp8
310 +transform: packing instruction '74 -1/disp8'
311 +transform: instruction after packing: '74 ff'
312 :(scenarios run)
313
314
315 :(code)
316 void transform(const string& text_bytes) {
317 program p;
318 istringstream in(text_bytes);
319 parse(in, p);
320 if (trace_contains_errors()) return;
321 transform(p);
322 }
323
324 :(scenario pack_modrm_imm32)
325 == 0x1
326
327
328
329 81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32
330 +transform: packing instruction '81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32'
331 +transform: instruction after packing: '81 c3 01 00 00 00'
332
333 :(scenario pack_imm32_large)
334 == 0x1
335 b9 0x080490a7/imm32
336 +transform: packing instruction 'b9 0x080490a7/imm32'
337 +transform: instruction after packing: 'b9 a7 90 04 08'
338
339 :(scenario pack_immediate_constants_hex)
340 == 0x1
341
342
343
344 bb 0x2a/imm32
345 +transform: packing instruction 'bb 0x2a/imm32'
346 +transform: instruction after packing: 'bb 2a 00 00 00'
347 +run: copy imm32 0x0000002a to EBX
348
349 :(scenarios transform)
350 :(scenario pack_silently_ignores_non_hex)
351 == 0x1
352
353
354
355 bb foo/imm32
356 +transform: packing instruction 'bb foo/imm32'
357
358 +transform: instruction after packing: 'bb foo'
359 $error: 0
360 :(scenarios run)
361
362
363
364 :(code)
365 bool all_hex_bytes(const line& inst) {
366 for (int i = 0; i < SIZE(inst.words); ++i)
367 if (!is_hex_byte(inst.words.at(i)))
368 return false;
369 return true;
370 }
371
372 bool is_hex_byte(const word& curr) {
373 if (contains_any_operand_metadata(curr))
374 return false;
375 if (SIZE(curr.data) != 2)
376 return false;
377 if (curr.data.find_first_not_of("0123456789abcdefABCDEF") != string::npos)
378 return false;
379 return true;
380 }
381
382 bool contains_any_operand_metadata(const word& word) {
383 for (int i = 0; i < SIZE(word.metadata); ++i)
384 if (Instruction_operands.find(word.metadata.at(i)) != Instruction_operands.end())
385 return true;
386 return false;
387 }
388
389 bool has_metadata(const line& inst, const string& m) {
390 bool result = false;
391 for (int i = 0; i < SIZE(inst.words); ++i) {
392 if (!has_metadata(inst.words.at(i), m)) continue;
393 if (result) {
394 raise << "'" << to_string(inst) << "' has conflicting " << m << " operands\n" << end();
395 return false;
396 }
397 result = true;
398 }
399 return result;
400 }
401
402 bool has_metadata(const word& w, const string& m) {
403 bool result = false;
404 bool metadata_found = false;
405 for (int i = 0; i < SIZE(w.metadata); ++i) {
406 const string& curr = w.metadata.at(i);
407 if (Instruction_operands.find(curr) == Instruction_operands.end()) continue;
408 if (metadata_found) {
409 raise << "'" << w.original << "' has conflicting operand types; it should have only one\n" << end();
410 return false;
411 }
412 metadata_found = true;
413 result = (curr == m);
414 }
415 return result;
416 }
417
418 word metadata(const line& inst, const string& m) {
419 for (int i = 0; i < SIZE(inst.words); ++i)
420 if (has_metadata(inst.words.at(i), m))
421 return inst.words.at(i);
422 assert(false);
423 }
424
425 bool is_hex_int(const string& s) {
426 if (s.empty()) return false;
427 size_t pos = 0;
428 if (s.at(0) == '-' || s.at(0) == '+') pos++;
429 if (s.substr(pos, pos+2) == "0x") pos += 2;
430 return s.find_first_not_of("0123456789abcdefABCDEF", pos) == string::npos;
431 }
432
433 :(code)
434 string to_string(const line& inst) {
435 ostringstream out;
436 for (int i = 0; i < SIZE(inst.words); ++i) {
437 if (i > 0) out << ' ';
438 out << inst.words.at(i).original;
439 }
440 return out.str();
441 }