about summary refs log tree commit diff stats
path: root/subx/024pack_instructions.cc
blob: a897f1dafe0a762afa40c4d8d26c1c65438e2fb8 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
//: Operands can refer to bitfields smaller than a byte. This layer packs
//: operands into their containing bytes in the right order.

:(scenario pack_immediate_constants)
== 0x1
# instruction                     effective address                                           operand     displacement    immediate
# op          subop               mod             rm32          base      index     scale     r32
# 1-3 bytes   3 bits              2 bits          3 bits        3 bits    3 bits    2 bits    2 bits      0/1/2/4 bytes   0/1/2/4 bytes
  bb                                                                                                                      0x2a/imm32        # copy 42 to EBX
+translate: packing instruction 'bb 0x2a/imm32'
+translate: instruction after packing: 'bb 2a 00 00 00'
+run: copy imm32 0x0000002a to EBX

:(scenario pack_disp8)
== 0x1
74 2/disp8  # jump 2 bytes away if ZF is set
+translate: packing instruction '74 2/disp8'
+translate: instruction after packing: '74 02'

:(scenarios transform)
:(scenario pack_disp8_negative)
== 0x1
# running this will cause an infinite loop
74 -1/disp8  # jump 1 byte before if ZF is set
+translate: packing instruction '74 -1/disp8'
+translate: instruction after packing: '74 ff'
:(scenarios run)

:(scenario pack_modrm_imm32)
== 0x1
# instruction                     effective address                                           operand     displacement    immediate
# op          subop               mod             rm32          base      index     scale     r32
# 1-3 bytes   3 bits              2 bits          3 bits        3 bits    3 bits    2 bits    2 bits      0/1/2/4 bytes   0/1/2/4 bytes
  81          0/add/subop         3/mod/direct    3/ebx/rm32                                                              1/imm32           # add 1 to EBX
+translate: packing instruction '81 0/add/subop 3/mod/direct 3/ebx/rm32 1/imm32'
+translate: instruction after packing: '81 c3 01 00 00 00'

:(scenario pack_imm32_large)
== 0x1
b9 0x080490a7/imm32  # copy to ECX
+translate: packing instruction 'b9 0x080490a7/imm32'
+translate: instruction after packing: 'b9 a7 90 04 08'

:(before "End One-time Setup")
Transform.push_back(pack_instructions);

:(code)
void pack_instructions(program& p) {
  if (p.segments.empty()) return;
  segment& code = p.segments.at(0);
  for (int i = 0;  i < SIZE(code.lines);  ++i) {
    line& inst = code.lines.at(i);
    if (all_raw_hex_bytes(inst)) continue;
    trace(99, "translate") << "packing instruction '" << to_string(/*with metadata*/inst) << "'" << end();
    pack_instruction(inst);
    trace(99, "translate") << "instruction after packing: '" << to_string(/*without metadata*/inst.words) << "'" << end();
  }
}

void pack_instruction(line& inst) {
  line new_inst;
  add_opcodes(inst, new_inst);
  add_modrm_byte(inst, new_inst);
  add_sib_byte(inst, new_inst);
  add_disp_bytes(inst, new_inst);
  add_imm_bytes(inst, new_inst);
  inst.words.swap(new_inst.words);
}

void add_opcodes(const line& in, line& out) {
  out.words.push_back(in.words.at(0));
  if (in.words.at(0).data == "0f" || in.words.at(0).data == "f3")
    out.words.push_back(in.words.at(1));
  if (in.words.at(0).data == "f3" && in.words.at(1).data == "0f")
    out.words.push_back(in.words.at(2));
}

void add_modrm_byte(const line& in, line& out) {
  uint8_t mod=0, reg_subop=0, rm32=0;
  bool emit = false;
  for (int i = 0;  i < SIZE(in.words);  ++i) {
    const word& curr = in.words.at(i);
    if (has_metadata(curr, "mod")) {
      mod = hex_byte(curr.data);
      emit = true;
    }
    else if (has_metadata(curr, "rm32")) {
      rm32 = hex_byte(curr.data);
      emit = true;
    }
    else if (has_metadata(curr, "r32")) {
      reg_subop = hex_byte(curr.data);
      emit = true;
    }
    else if (has_metadata(curr, "subop")) {
      reg_subop = hex_byte(curr.data);
      emit = true;
    }
  }
  if (emit)
    out.words.push_back(hex_byte_text((mod << 6) | (reg_subop << 3) | rm32));
}

void add_sib_byte(const line& in, line& out) {
  uint8_t scale=0, index=0, base=0;
  bool emit = false;
  for (int i = 0;  i < SIZE(in.words);  ++i) {
    const word& curr = in.words.at(i);
    if (has_metadata(curr, "scale")) {
      scale = hex_byte(curr.data);
      emit = true;
    }
    else if (has_metadata(curr, "index")) {
      index = hex_byte(curr.data);
      emit = true;
    }
    else if (has_metadata(curr, "base")) {
      base = hex_byte(curr.data);
      emit = true;
    }
  }
  if (emit)
    out.words.push_back(hex_byte_text((scale << 6) | (index << 3) | base));
}

void add_disp_bytes(const line& in, line& out) {
  for (int i = 0;  i < SIZE(in.words);  ++i) {
    const word& curr = in.words.at(i);
    if (has_metadata(curr, "disp8"))
      emit_hex_bytes(out, curr, 1);
    else if (has_metadata(curr, "disp32"))
      emit_hex_bytes(out, curr, 4);
  }
}

void add_imm_bytes(const line& in, line& out) {
  for (int i = 0;  i < SIZE(in.words);  ++i) {
    const word& curr = in.words.at(i);
    if (has_metadata(curr, "imm8"))
      emit_hex_bytes(out, curr, 1);
    else if (has_metadata(curr, "imm32"))
      emit_hex_bytes(out, curr, 4);
  }
}

void emit_hex_bytes(line& out, const word& w, int num) {
  assert(num <= 4);
  uint32_t val = static_cast<uint32_t>(parse_int(w.data));
  for (int i = 0;  i < num;  ++i) {
    out.words.push_back(hex_byte_text(val & 0xff));
    val = val >> 8;
  }
}

word hex_byte_text(uint8_t val) {
  ostringstream out;
  out << HEXBYTE << NUM(val);
  word result;
  result.data = out.str();
  return result;
}

string to_string(const vector<word>& in) {
  ostringstream out;
  for (int i = 0;  i < SIZE(in);  ++i) {
    if (i > 0) out << ' ';
    out << in.at(i).data;
  }
  return out.str();
}

// helper
void transform(const string& text_bytes) {
  program p;
  istringstream in(text_bytes);
  parse(in, p);
  if (trace_contains_errors()) return;
  transform(p);
}

:(scenario pack_immediate_constants_hex)
== 0x1
# instruction                     effective address                                           operand     displacement    immediate
# op          subop               mod             rm32          base      index     scale     r32
# 1-3 bytes   3 bits              2 bits          3 bits        3 bits    3 bits    2 bits    2 bits      0/1/2/4 bytes   0/1/2/4 bytes
  bb                                                                                                                      0x2a/imm32        # copy 42 to EBX
+translate: packing instruction 'bb 0x2a/imm32'
+translate: instruction after packing: 'bb 2a 00 00 00'
+run: copy imm32 0x0000002a to EBX