https://github.com/akkartik/mu/blob/main/012elf.cc
  1 //: Loading SubX programs from ELF binaries.
  2 //: This will allow us to run them natively on a Linux kernel.
  3 //: Based on https://github.com/kragen/stoneknifeforth/blob/702d2ebe1b/386.c
  4 
  5 :(before "End Main")
  6 assert(argc > 1);
  7 if (is_equal(argv[1], "run")) {
  8   // Outside of tests, traces must be explicitly requested.
  9   if (Trace_file.is_open()) Trace_stream = new trace_stream;
 10   trace(2, "run") << "=== Starting to run" << end();
 11   if (argc <= 2) {
 12     raise << "Not enough arguments provided.\n" << die();
 13   }
 14   reset();
 15   cerr << std::hex;
 16   load_elf(argv[2], argc, argv);
 17   while (EIP < End_of_program)  // weak final-gasp termination check
 18     run_one_instruction();
 19   raise << "executed past end of the world: " << EIP << " vs " << End_of_program << '\n' << end();
 20   return 1;
 21 }
 22 
 23 :(code)
 24 void load_elf(const string& filename, int argc, char* argv[]) {
 25   int fd = open(filename.c_str(), O_RDONLY);
 26   if (fd < 0) raise << filename.c_str() << ": open" << perr() << '\n' << die();
 27   off_t size = lseek(fd, 0, SEEK_END);
 28   lseek(fd, 0, SEEK_SET);
 29   uint8_t* elf_contents = static_cast<uint8_t*>(malloc(size));
 30   if (elf_contents == NULL) raise << "malloc(" << size << ')' << perr() << '\n' << die();
 31   ssize_t read_size = read(fd, elf_contents, size);
 32   if (size != read_size) raise << "read → " << size << " (!= " << read_size << ')' << perr() << '\n' << die();
 33   load_elf_contents(elf_contents, size, argc, argv);
 34   free(elf_contents);
 35 }
 36 
 37 void load_elf_contents(uint8_t* elf_contents, size_t size, int argc, char* argv[]) {
 38   uint8_t magic[5] = {0};
 39   memcpy(magic, elf_contents, 4);
 40   if (memcmp(magic, "\177ELF", 4) != 0)
 41     raise << "Invalid ELF file; starts with \"" << magic << '"' << die();
 42   if 
# Copyright (C) 2009, 2010  Roman Zimbelmann <romanz@lavabit.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""Workaround to allow running single test cases directly"""
try:
	from __init__ import init, Fake, OK, raise_ok, TODO
except:
	from test import init, Fake, OK, raise_ok, TODO
51 uint32_t e_entry = u32_in(&elf_contents[24]); 52 uint32_t e_phoff = u32_in(&elf_contents[28]); 53 // unused: e_shoff 54 // unused: e_flags 55 uint32_t e_ehsize = u16_in(&elf_contents[40]); 56 if (e_ehsize < 52) raise << "Invalid binary; ELF header too small\n" << die(); 57 uint32_t e_phentsize = u16_in(&elf_contents[42]); 58 uint32_t e_phnum = u16_in(&elf_contents[44]); 59 trace(90, "load") << e_phnum << " entries in the program header, each " << e_phentsize << " bytes long" << end(); 60 // unused: e_shentsize 61 // unused: e_shnum 62 // unused: e_shstrndx 63 64 set<uint32_t> overlap; // to detect overlapping segments 65 for (size_t i = 0; i < e_phnum; ++i) 66 load_segment_from_program_header(elf_contents, i, size, e_phoff + i*e_phentsize, e_ehsize, overlap); 67 68 // initialize code and stack 69 assert(overlap.find(STACK_SEGMENT) == overlap.end()); 70 Mem.push_back(vma(STACK_SEGMENT)); 71 assert(overlap.find(AFTER_STACK) == overlap.end()); 72 // The stack grows downward. 73 Reg[ESP].u = AFTER_STACK; 74 Reg[EBP].u = 0; 75 EIP = e_entry; 76 77 // initialize args on stack 78 // no envp for now 79 // we wastefully use a separate page of memory for argv 80 Mem.push_back(vma(ARGV_DATA_SEGMENT)); 81 uint32_t argv_data = ARGV_DATA_SEGMENT; 82 for (int i = argc-1; i >= /*skip 'subx_bin' and 'run'*/2; --i) { 83 push(argv_data); 84 for (size_t j = 0; j <= strlen(argv[i]); ++j) { 85 assert(overlap.find(argv_data) == overlap.end()); // don't bother comparing ARGV and STACK 86 write_mem_u8(argv_data, argv[i][j]); 87 argv_data += sizeof(char); 88 assert(argv_data < ARGV_DATA_SEGMENT + SEGMENT_ALIGNMENT); 89 } 90 } 91 push(argc-/*skip 'subx_bin' and 'run'*/2); 92 } 93 94 void push(uint32_t val) { 95 Reg[ESP].u -= 4; 96 if (Reg[ESP].u < STACK_SEGMENT) { 97 raise << "The stack overflowed its segment. " 98 << "Maybe SPACE_FOR_SEGMENT should be larger? " 99 << "Or you need to carve out an exception for the stack segment " 100 << "to be larger.\n" << die(); 101 } 102 trace(Callstack_depth+1, "run") << "decrementing ESP to 0x" << HEXWORD << Reg[ESP].u << end(); 103 trace(Callstack_depth+1, "run") << "pushing value 0x" << HEXWORD << val << end(); 104 write_mem_u32(Reg[ESP].u, val); 105 } 106 107 void load_segment_from_program_header(uint8_t* elf_contents, int segment_index, size_t size, uint32_t offset, uint32_t e_ehsize, set<uint32_t>& overlap) { 108 uint32_t p_type = u32_in(&elf_contents[offset]); 109 trace(90, "load") << "program header at offset " << offset << ": type " << p_type << end(); 110 if (p_type != 1) { 111 trace(90, "load") << "ignoring segment at offset " << offset << " of non PT_LOAD type " << p_type << " (see http://refspecs.linuxbase.org/elf/elf.pdf)" << end(); 112 return; 113 } 114 uint32_t p_offset = u32_in(&elf_contents[offset + 4]); 115 uint32_t p_vaddr = u32_in(&elf_contents[offset + 8]); 116 if (e_ehsize > p_vaddr) raise << "Invalid binary; program header overlaps ELF header\n" << die(); 117 // unused: p_paddr 118 uint32_t p_filesz = u32_in(&elf_contents[offset + 16]); 119 uint32_t p_memsz = u32_in(&elf_contents[offset + 20]); 120 if (p_filesz != p_memsz) 121 raise << "Can't yet handle segments where p_filesz != p_memsz (see http://refspecs.linuxbase.org/elf/elf.pdf)\n" << die(); 122 123 if (p_offset + p_filesz > size) 124 raise << "Invalid binary; segment at offset " << offset << " is too large: wants to end at " << p_offset+p_filesz << " but the file ends at " << size << '\n' << die(); 125 if (p_memsz >= SEGMENT_ALIGNMENT) { 126 raise << "Code segment too small for SubX; for now please manually increase SEGMENT_ALIGNMENT.\n" << end(); 127 return; 128 } 129 trace(90, "load") << "blitting file offsets (" << p_offset << ", " << (p_offset+p_filesz) << ") to addresses (" << p_vaddr << ", " << (p_vaddr+p_memsz) << ')' << end(); 130 if (size > p_memsz) size = p_memsz; 131 Mem.push_back(vma(p_vaddr)); 132 for (size_t i = 0; i < p_filesz; ++i) { 133 assert(overlap.find(p_vaddr+i) == overlap.end()); 134 write_mem_u8(p_vaddr+i, elf_contents[p_offset+i]); 135 overlap.insert(p_vaddr+i); 136 } 137 if (segment_index == 0 && End_of_program < p_vaddr+p_memsz) 138 End_of_program = p_vaddr+p_memsz; 139 } 140 141 :(before "End Includes") 142 // Very primitive/fixed/insecure ELF segments for now. 143 // --- inaccessible: 0x00000000 -> 0x08047fff 144 // code: 0x09000000 -> 0x09ffffff (specified in ELF binary) 145 // data: 0x0a000000 -> 0x0affffff (specified in ELF binary) 146 // --- heap gets mmap'd somewhere here --- 147 // stack: 0xbdffffff -> 0xbd000000 (downward; not in ELF binary) 148 // argv hack: 0xbf000000 -> 0xbfffffff (not in ELF binary) 149 // --- reserved for kernel: 0xc0000000 -> ... 150 const uint32_t START_HEAP = 0x0b000000; 151 const uint32_t END_HEAP = 0xbd000000; 152 const uint32_t STACK_SEGMENT = 0xbd000000; 153 const uint32_t AFTER_STACK = 0xbe000000; 154 const uint32_t ARGV_DATA_SEGMENT = 0xbf000000; 155 // When updating the above memory map, don't forget to update `mmap`'s 156 // implementation in the 'syscalls' layer. 157 :(before "End Dump Info for Instruction") 158 //? dump_stack(); // slow 159 :(code) 160 void dump_stack() { 161 ostringstream out; 162 trace(Callstack_depth+1, "run") << "stack:" << end(); 163 for (uint32_t a = AFTER_STACK-4; a > Reg[ESP].u; a -= 4) 164 trace(Callstack_depth+2, "run") << " 0x" << HEXWORD << a << " => 0x" << HEXWORD << read_mem_u32(a) << end(); 165 trace(Callstack_depth+2, "run") << " 0x" << HEXWORD << Reg[ESP].u << " => 0x" << HEXWORD << read_mem_u32(Reg[ESP].u) << " <=== ESP" << end(); 166 for (uint32_t a = Reg[ESP].u-4; a > Reg[ESP].u-40; a -= 4) 167 trace(Callstack_depth+2, "run") << " 0x" << HEXWORD << a << " => 0x" << HEXWORD << read_mem_u32(a) << end(); 168 } 169 170 inline uint32_t u32_in(uint8_t* p) { 171 return p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24; 172 } 173 174 inline uint16_t u16_in(uint8_t* p) { 175 return p[0] | p[1] << 8; 176 } 177 178 :(before "End Types") 179 struct perr {}; 180 :(code) 181 ostream& operator<<(ostream& os, perr /*unused*/) { 182 if (errno) 183 os << ": " << strerror(errno); 184 return os; 185 } 186 187 :(before "End Includes") 188 #include <sys/types.h> 189 #include <sys/stat.h> 190 #include <fcntl.h> 191 #include <stdarg.h> 192 #include <errno.h> 193 #include <unistd.h>