1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
|
//: The bedrock level 1 of abstraction is now done, and we're going to start
//: building levels above it that make programming in x86 machine code a
//: little more ergonomic.
//:
//: All levels will be "pass through by default". Whatever they don't
//: understand they will silently pass through to lower levels.
//:
//: Since raw hex bytes of machine code are always possible to inject, SubX is
//: not a language, and we aren't building a compiler. This is something
//: deliberately leakier. Levels are more for improving auditing, checks and
//: error messages rather than for hiding low-level details.
//: Translator workflow: read 'source' file. Run a series of transforms on it,
//: each passing through what it doesn't understand. The final program should
//: be just machine code, suitable to write to an ELF binary.
//:
//: Higher levels usually transform code on the basis of metadata.
:(before "End Main")
if (is_equal(argv[1], "translate")) {
START_TRACING_UNTIL_END_OF_SCOPE;
reset();
// Begin subx translate
program p;
string output_filename;
for (int i = /*skip 'subx translate'*/2; i < argc; ++i) {
if (is_equal(argv[i], "-o")) {
++i;
if (i >= argc) {
print_translate_usage();
cerr << "'-o' must be followed by a filename to write results to\n";
exit(1);
}
output_filename = argv[i];
}
else {
trace(2, "parse") << argv[i] << end();
ifstream fin(argv[i]);
if (!fin) {
cerr << "could not open " << argv[i] << '\n';
return 1;
}
parse(fin, p);
if (trace_contains_errors()) return 1;
}
}
if (p.segments.empty()) {
print_translate_usage();
cerr << "nothing to do; must provide at least one file to read\n";
exit(1);
}
if (output_filename.empty()) {
print_translate_usage();
cerr << "must provide a filename to write to using '-o'\n";
exit(1);
}
trace(2, "transform") << "begin" << end();
transform(p);
if (trace_contains_errors()) return 1;
trace(2, "translate") << "begin" << end();
save_elf(p, output_filename);
if (trace_contains_errors()) {
unlink(output_filename.c_str());
return 1;
}
// End subx translate
return 0;
}
:(code)
void print_translate_usage() {
cerr << "Usage: subx translate file1 file2 ... -o output\n";
}
// write out a program to a bare-bones ELF file
void save_elf(const program& p, const string& filename) {
ofstream out(filename.c_str(), ios::binary);
write_elf_header(out, p);
for (size_t i = 0; i < p.segments.size(); ++i)
write_segment(p.segments.at(i), out);
out.close();
}
void write_elf_header(ostream& out, const program& p) {
char c = '\0';
#define O(X) c = (X); out.write(&c, sizeof(c))
// host is required to be little-endian
#define emit(X) out.write(reinterpret_cast<const char*>(&X), sizeof(X))
//// ehdr
// e_ident
O(0x7f); O(/*E*/0x45); O(/*L*/0x4c); O(/*F*/0x46);
O(0x1); // 32-bit format
O(0x1); // little-endian
O(0x1); O(0x0);
for (size_t i = 0; i < 8; ++i) { O(0x0); }
// e_type
O(0x02); O(0x00);
// e_machine
O(0x03); O(0x00);
// e_version
O(0x01); O(0x00); O(0x00); O(0x00);
// e_entry
uint32_t e_entry = p.segments.at(0).start; // convention
// Override e_entry
emit(e_entry);
// e_phoff -- immediately after ELF header
uint32_t e_phoff = 0x34;
emit(e_phoff);
// e_shoff; unused
uint32_t dummy32 = 0;
emit(dummy32);
// e_flags; unused
emit(dummy32);
// e_ehsize
uint16_t e_ehsize = 0x34;
emit(e_ehsize);
// e_phentsize
uint16_t e_phentsize = 0x20;
emit(e_phentsize);
// e_phnum
uint16_t e_phnum = SIZE(p.segments);
emit(e_phnum);
// e_shentsize
uint16_t dummy16 = 0x0;
emit(dummy16);
// e_shnum
emit(dummy16);
// e_shstrndx
emit(dummy16);
uint32_t p_offset = /*size of ehdr*/0x34 + SIZE(p.segments)*0x20/*size of each phdr*/;
for (int i = 0; i < SIZE(p.segments); ++i) {
//// phdr
// p_type
uint32_t p_type = 0x1;
emit(p_type);
// p_offset
emit(p_offset);
// p_vaddr
uint32_t p_start = p.segments.at(i).start;
emit(p_start);
// p_paddr
emit(p_start);
// p_filesz
uint32_t size = num_words(p.segments.at(i));
assert(p_offset + size < SEGMENT_ALIGNMENT);
emit(size);
// p_memsz
emit(size);
// p_flags
uint32_t p_flags = (i == 0) ? /*r-x*/0x5 : /*rw-*/0x6; // convention: only first segment is code
emit(p_flags);
// p_align
// "As the system creates or augments a process image, it logically copies
// a file's segment to a virtual memory segment. When—and if— the system
// physically reads the file depends on the program's execution behavior,
// system load, and so on. A process does not require a physical page
// unless it references the logical page during execution, and processes
// commonly leave many pages unreferenced. Therefore delaying physical
// reads frequently obviates them, improving system performance. To obtain
// this efficiency in practice, executable and shared object files must
// have segment images whose file offsets and virtual addresses are
// congruent, modulo the page size." -- http://refspecs.linuxbase.org/elf/elf.pdf (page 95)
uint32_t p_align = 0x1000; // default page size on linux
emit(p_align);
if (p_offset % p_align != p_start % p_align) {
raise << "segment starting at 0x" << HEXWORD << p_start << " is improperly aligned; alignment for p_offset " << p_offset << " should be " << (p_offset % p_align) << " but is " << (p_start % p_align) << '\n' << end();
return;
}
// prepare for next segment
p_offset += size;
}
#undef O
#undef emit
}
void write_segment(const segment& s, ostream& out) {
for (int i = 0; i < SIZE(s.lines); ++i) {
const vector<word>& w = s.lines.at(i).words;
for (int j = 0; j < SIZE(w); ++j) {
uint8_t x = hex_byte(w.at(j).data); // we're done with metadata by this point
out.write(reinterpret_cast<const char*>(&x), /*sizeof(byte)*/1);
}
}
}
uint32_t num_words(const segment& s) {
uint32_t sum = 0;
for (int i = 0; i < SIZE(s.lines); ++i)
sum += SIZE(s.lines.at(i).words);
return sum;
}
:(before "End Includes")
using std::ios;
|