about summary refs log tree commit diff stats
path: root/subx/022transform_immediate.cc
blob: b1825fdaf23cf3f5171db6288d80d12f50330f98 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
//: Having to manually translate numbers into hex and enter them in
//: little-endian order is tedious and error-prone. Let's automate the
//: translation.
//:
//: We'll convert any immediate operands from decimal to hex and emit the
//: appropriate number of bytes. If they occur in a non-code segment we'll
//: raise an error.

:(scenario translate_immediate_constants)
== 0x1
# opcode        ModR/M                    SIB                   displacement    immediate
# instruction   mod, reg, Reg/Mem bits    scale, index, base
# 1-3 bytes     0/1 byte                  0/1 byte              0/1/2/4 bytes   0/1/2/4 bytes
  bb                                                                            42/imm32
+translate: converting '42/imm32' to '2a 00 00 00'
+run: copy imm32 0x0000002a to EBX

#: we don't have a testable instruction using 8-bit immediates yet, so can't run this instruction
:(scenarios transform)
:(scenario translate_imm8)
== 0x1
  cd 128/imm8
+translate: converting '128/imm8' to '80'
:(scenarios run)

:(before "End One-time Setup")
Transform.push_back(transform_immediate);

:(code)
void transform_immediate(program& p) {
  if (p.segments.empty()) return;
  transform_immediate(p.segments.at(0));
  for (int i = 1;  i < SIZE(p.segments);  ++i)
    flag_immediate(p.segments.at(i));
}

void transform_immediate(segment& seg) {
  for (int i = 0;  i < SIZE(seg.lines);  ++i) {
    for (int j = 0;  j < SIZE(seg.lines.at(i).words);  ++j) {
      if (contains_immediate_metadata(seg.lines.at(i).words.at(j)))
        transform_immediate(seg.lines.at(i).words, j);
    }
  }
}

void transform_immediate(vector<word>& line, int index) {
  assert(index < SIZE(line));
  if (contains_imm8_metadata(line.at(index)))
    transform_imm8(line.at(index));
  else
    transform_imm32(line, index);
}

void transform_imm8(word& w) {
  // convert decimal to hex
  uint32_t val = parse_decimal(w.data);
  if (trace_contains_errors()) return;
  if (val > 0xff) {
    raise << "invalid /imm8 word " << w.data << '\n' << end();
    return;
  }
  w.data = serialize_hex(val);
  trace("translate") << "converting '" << w.original << "' to '" << w.data << "'" << end();
}

void transform_imm32(vector<word>& line, int index) {
  vector<word>::iterator find(vector<word>&, int);
  vector<word>::iterator x = find(line, index);
  uint32_t val = parse_decimal(x->data);
  if (trace_contains_errors()) return;
  string orig = x->original;
  x = line.erase(x);
  emit_octets(line, x, val, orig);
}

vector<word>::iterator find(vector<word>& l, int index) {
  if (index >= SIZE(l)) {
    raise << "find: index too large: " << index << " vs " << SIZE(l) << '\n' << end();
    return l.end();
  }
  vector<word>::iterator result = l.begin();
  for (int i = 0;  i < index;  ++i)
    ++result;
  return result;
}

void emit_octets(vector<word>& line, vector<word>::iterator pos, uint32_t val, const string& orig) {
  vector<word> new_data;
  for (int i = 0;  i < /*num bytes*/4;  ++i) {
    word tmp;
    tmp.data = serialize_hex(val & 0xff);  // little-endian
    new_data.push_back(tmp);
    val = val >> 8;
  }
  trace("translate") << "converting '" << orig << "' to '" << to_string(new_data) << "'" << end();
  line.insert(pos, new_data.begin(), new_data.end());
}

string to_string(const vector<word>& in) {
  ostringstream out;
  for (int i = 0;  i < SIZE(in);  ++i) {
    if (i > 0) out << ' ';
    out << HEXBYTE << in.at(i).data;
  }
  return out.str();
}

uint32_t parse_decimal(const string& s) {
  istringstream in(s);
  uint32_t result = 0;
  in >> result;
  if (!in) {
    raise << "not a number: " << s << '\n' << end();
    return 0;
  }
  return result;
}

string serialize_hex(const int val) {
  ostringstream out;
  out << std::hex << val;
  return out.str();
}

void flag_immediate(const segment& s) {
  for (int i = 0;  i < SIZE(s.lines);  ++i)
    for (int j = 0;  j < SIZE(s.lines.at(i).words);  ++j)
      if (contains_immediate_metadata(s.lines.at(i).words.at(j)))
        raise << "/imm8 and /imm32 only permitted in code segments, and we currently only allow the very first segment to be code.\n" << end();
}

bool contains_immediate_metadata(const word& curr) {
  for (int k = 0;  k < SIZE(curr.metadata);  ++k) {
      if (curr.metadata.at(k) == "imm8"
          || curr.metadata.at(k) == "imm32")
        return true;
  }
  return false;
}

bool contains_imm8_metadata(const word& curr) {
  for (int k = 0;  k < SIZE(curr.metadata);  ++k) {
      if (curr.metadata.at(k) == "imm8")
        return true;
  }
  return false;
}

// helper
void transform(const string& text_bytes) {
  program p;
  istringstream in(text_bytes);
  parse(in, p);
  if (trace_contains_errors()) return;
  transform(p);
}