https://github.com/akkartik/mu/blob/master/subx/012elf.cc
1
2
3
4
5 :(before "End Main")
6 assert(argc > 1);
7 if (is_equal(argv[1], "run")) {
8 START_TRACING_UNTIL_END_OF_SCOPE;
9 trace(2, "run") << "=== Starting to run" << end();
10 assert(argc > 2);
11 reset();
12 cerr << std::hex;
13 load_elf(argv[2], argc, argv);
14 while (EIP < End_of_program)
15 run_one_instruction();
16 raise << "executed past end of the world: " << EIP << " vs " << End_of_program << '\n' << end();
17 return 1;
18 }
19
20 :(code)
21 void load_elf(const string& filename, int argc, char* argv[]) {
22 int fd = open(filename.c_str(), O_RDONLY);
23 if (fd < 0) raise << filename.c_str() << ": open" << perr() << '\n' << die();
24 off_t size = lseek(fd, 0, SEEK_END);
25 lseek(fd, 0, SEEK_SET);
26 uint8_t* elf_contents = static_cast<uint8_t*>(malloc(size));
27 if (elf_contents == NULL) raise << "malloc(" << size << ')' << perr() << '\n' << die();
28 ssize_t read_size = read(fd, elf_contents, size);
29 if (size != read_size) raise << "read → " << size << " (!= " << read_size << ')' << perr() << '\n' << die();
30 load_elf_contents(elf_contents, size, argc, argv);
31 free(elf_contents);
32 }
33
34 void load_elf_contents(uint8_t* elf_contents, size_t size, int argc, char* argv[]) {
35 uint8_t magic[5] = {0};
36 memcpy(magic, elf_contents, 4);
37 if (memcmp(magic, "\177ELF", 4) != 0)
38 raise << "Invalid ELF file; starts with \"" << magic << '"' << die();
39 if (elf_contents[4] != 1)
40 raise << "Only 32-bit ELF files (4-byte words; virtual addresses up to 4GB) supported.\n" << die();
41 if (elf_contents[5] != 1)
42 raise << "Only little-endian ELF files supported.\n" << die();
43
44 uint32_t e_machine_type = u32_in(&elf_contents[16]);
45 if (e_machine_type != 0x00030002)
46 raise << "ELF type/machine 0x" << HEXWORD << e_machine_type << " isn't i386 executable\n" << die();
47
48 uint32_t e_entry = u32_in(&elf_contents[24]);
49 uint32_t e_phoff = u32_in(&elf_contents[28]);
50
51
52 uint32_t e_ehsize = u16_in(&elf_contents[40]);
53 if (e_ehsize < 52) raise << "Invalid binary; ELF header too small\n" << die();
54 uint32_t e_phentsize = u16_in(&elf_contents[42]);
55 uint32_t e_phnum = u16_in(&elf_contents[44]);
56 trace(90, "load") << e_phnum << " entries in the program header, each " << e_phentsize << " bytes long" << end();
57
58
59
60
61 set<uint32_t> overlap;
62 for (size_t i = 0; i < e_phnum; ++i)
63 load_segment_from_program_header(elf_contents, i, size, e_phoff + i*e_phentsize, e_ehsize, overlap);
64
65
66 assert(overlap.find(STACK_SEGMENT) == overlap.end());
67 Mem.push_back(vma(STACK_SEGMENT));
68 assert(overlap.find(AFTER_STACK) == overlap.end());
69
70 Reg[ESP].u = AFTER_STACK;
71 Reg[EBP].u = 0;
72 EIP = e_entry;
73
74
75
76
77 Mem.push_back(vma(ARGV_DATA_SEGMENT));
78 uint32_t argv_data = ARGV_DATA_SEGMENT;
79 for (int i = argc-1; i >= 2; --i) {
80 push(argv_data);
81 for (size_t j = 0; j <= strlen(argv[i]); ++j) {
82 assert(overlap.find(argv_data) == overlap.end());
83 write_mem_u8(argv_data, argv[i][j]);
84 argv_data += sizeof(char);
85 assert(argv_data < ARGV_DATA_SEGMENT + SEGMENT_ALIGNMENT);
86 }
87 }
88 push(argc-2);
89 }
90
91 void push(uint32_t val) {
92 Reg[ESP].u -= 4;
93 if (Reg[ESP].u < STACK_SEGMENT) {
94 raise << "The stack overflowed its segment. "
95 << "Maybe SPACE_FOR_SEGMENT should be larger? "
96 << "Or you need to carve out an exception for the stack segment "
97 << "to be larger.\n" << end();
98 exit(1);
99 }
100 trace(Callstack_depth+1, "run") << "decrementing ESP to 0x" << HEXWORD << Reg[ESP].u << end();
101 trace(Callstack_depth+1, "run") << "pushing value 0x" << HEXWORD << val << end();
102 write_mem_u32(Reg[ESP].u, val);
103 }
104
105 void load_segment_from_program_header(uint8_t* elf_contents, int segment_index, size_t size, uint32_t offset, uint32_t e_ehsize, set<uint32_t>& overlap) {
106 uint32_t p_type = u32_in(&elf_contents[offset]);
107 trace(90, "load") << "program header at offset " << offset << ": type " << p_type << end();
108 if (p_type != 1) {
109 trace(90, "load") << "ignoring segment at offset " << offset << " of non PT_LOAD type " << p_type << " (see http://refspecs.linuxbase.org/elf/elf.pdf)" << end();
110 return;
111 }
112 uint32_t p_offset = u32_in(&elf_contents[offset + 4]);
113 uint32_t p_vaddr = u32_in(&elf_contents[offset + 8]);
114 if (e_ehsize > p_vaddr) raise << "Invalid binary; program header overlaps ELF header\n" << die();
115
116 uint32_t p_filesz = u32_in(&elf_contents[offset + 16]);
117 uint32_t p_memsz = u32_in(&elf_contents[offset + 20]);
118 if (p_filesz != p_memsz)
119 raise << "Can't yet handle segments where p_filesz != p_memsz (see http://refspecs.linuxbase.org/elf/elf.pdf)\n" << die();
120
121 if (p_offset + p_filesz > size)
122 raise << "Invalid binary; segment at offset " << offset << " is too large: wants to end at " << p_offset+p_filesz << " but the file ends at " << size << '\n' << die();
123 if (p_memsz >= SEGMENT_ALIGNMENT) {
124 raise << "Code segment too small for SubX; for now please manually increase SEGMENT_ALIGNMENT.\n" << end();
125 return;
126 }
127 trace(90, "load") << "blitting file offsets (" << p_offset << ", " << (p_offset+p_filesz) << ") to addresses (" << p_vaddr << ", " << (p_vaddr+p_memsz) << ')' << end();
128 if (size > p_memsz) size = p_memsz;
129 Mem.push_back(vma(p_vaddr));
130 for (size_t i = 0; i < p_filesz; ++i) {
131 assert(overlap.find(p_vaddr+i) == overlap.end());
132 write_mem_u8(p_vaddr+i, elf_contents[p_offset+i]);
133 overlap.insert(p_vaddr+i);
134 }
135 if (segment_index == 0 && End_of_program < p_vaddr+p_memsz)
136 End_of_program = p_vaddr+p_memsz;
137 }
138
139 :(before "End Includes")
140
141
142
143
144
145
146
147
148 const uint32_t START_HEAP = 0x0b000000;
149 const uint32_t END_HEAP = 0xbd000000;
150 const uint32_t STACK_SEGMENT = 0xbd000000;
151 const uint32_t AFTER_STACK = 0xbe000000;
152 const uint32_t ARGV_DATA_SEGMENT = 0xbf000000;
153
154
155 :(before "End Dump Info for Instruction")
156
157 :(code)
158 void dump_stack() {
159 ostringstream out;
160 trace(Callstack_depth+1, "run") << "stack:" << end();
161 for (uint32_t a = AFTER_STACK-4; a > Reg[ESP].u; a -= 4)
162 trace(Callstack_depth+2, "run") << " 0x" << HEXWORD << a << " => 0x" << HEXWORD << read_mem_u32(a) << end();
163 trace(Callstack_depth+2, "run") << " 0x" << HEXWORD << Reg[ESP].u << " => 0x" << HEXWORD << read_mem_u32(Reg[ESP].u) << " <=== ESP" << end();
164 for (uint32_t a = Reg[ESP].u-4; a > Reg[ESP].u-40; a -= 4)
165 trace(Callstack_depth+2, "run") << " 0x" << HEXWORD << a << " => 0x" << HEXWORD << read_mem_u32(a) << end();
166 }
167
168 inline uint32_t u32_in(uint8_t* p) {
169 return p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24;
170 }
171
172 inline uint16_t u16_in(uint8_t* p) {
173 return p[0] | p[1] << 8;
174 }
175
176 :(before "End Types")
177 struct perr {};
178 :(code)
179 ostream& operator<<(ostream& os, perr ) {
180 if (errno)
181 os << ": " << strerror(errno);
182 return os;
183 }
184
185 :(before "End Types")
186 struct die {};
187 :(code)
188 ostream& operator<<(ostream& , die ) {
189 if (Trace_stream) Trace_stream->newline();
190 exit(1);
191 }
192
193 :(before "End Includes")
194 #include <sys/types.h>
195 #include <sys/stat.h>
196 #include <fcntl.h>
197 #include <stdarg.h>
198 #include <errno.h>
199 #include <unistd.h>