about summary refs log tree commit diff stats
path: root/012elf.cc
blob: c852e372108382a5ed617ce3809d43a204b681df (plain) (blame)
1
pre { line-height: 125%; }
td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
.highlight .hll { background-color: #ffffcc }
.highlight .c { color: #888888 } /* Comment */
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
.highlight .k { color: #008800; font-weight: bold } /* Keyword */
.highlight .ch { color: #888888 } /* Comment.Hashbang */
.highlight .cm { color: #888888 } /* Comment.Multiline */
.highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */
.highlight .cpf { color: #888888 } /* Comment.PreprocFile */
.highlight .c1 { color: #888888 } /* Comment.Single */
.highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */
.highlight .gr { color: #aa0000 } /* Generic.Error */
.highlight .gh { color: #333333 } /* Generic.Heading */
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
.highlight .go { color: #888888 } /* Generic.Output */
.highlight .gp { color: #555555 } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #666666 } /* Generic.Subheading */
.highlight .gt { color: #aa0000 } /* Generic.Traceback */
.highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */
.highlight .kp { color: #008800 } /* Keyword.Pseudo */
.highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */
.highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */
.highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */
.highlight .na { color: #336699 } /* Name.Attribute */
.highlight .nb { color: #003388 } /* Name.Builtin */
.highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */
.highlight .no { color: #003366; font-weight: bold } /* Name.Constant */
.highlight .nd { color: #555555 } /* Name.Decorator */
.highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */
.highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */
.highlight .nl { color: #336699; font-style: italic } /* Name.Label */
.highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */
.highlight .py { color: #336699; font-weight: bold } /* Name.Property */
.highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */
.highlight .nv { color: #336699 } /* Name.Variable */
.highlight .ow { color: #008800 } /* Operator.Word */
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */
.highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */
.highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */
.highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */
.highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */
.highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */
.highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */
.highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */
.highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */
.highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */
.highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */
.highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */
.highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */
.highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */
.highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */
.highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */
.highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */
.highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */
.highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */
.highlight .vc { color: #336699 } /* Name.Variable.Class */
.highlight .vg { color: #dd7700 } /* Name.Variable.Global */
.highlight .vi { color: #3333bb } /* Name.Variable.Instance */
.highlight .vm { color: #336699 } /* Name.Variable.Magic */
.highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */
#!/bin/sh

./configure CFLAGS='-g3 -O0' CXXFLAGS='-g3 -O0' $@
88 189 190 191 192 193
//: Loading SubX programs from ELF binaries.
//: This will allow us to run them natively on a Linux kernel.
//: Based on https://github.com/kragen/stoneknifeforth/blob/702d2ebe1b/386.c

:(before "End Main")
assert(argc > 1);
if (is_equal(argv[1], "run")) {
  // Outside of tests, traces must be explicitly requested.
  if (Trace_file.is_open()) Trace_stream = new trace_stream;
  trace(2, "run") << "=== Starting to run" << end();
  if (argc <= 2) {
    raise << "Not enough arguments provided.\n" << die();
  }
  reset();
  cerr << std::hex;
  load_elf(argv[2], argc, argv);
  while (EIP < End_of_program)  // weak final-gasp termination check
    run_one_instruction();
  raise << "executed past end of the world: " << EIP << " vs " << End_of_program << '\n' << end();
  return 1;
}

:(code)
void load_elf(const string& filename, int argc, char* argv[]) {
  int fd = open(filename.c_str(), O_RDONLY);
  if (fd < 0) raise << filename.c_str() << ": open" << perr() << '\n' << die();
  off_t size = lseek(fd, 0, SEEK_END);
  lseek(fd, 0, SEEK_SET);
  uint8_t* elf_contents = static_cast<uint8_t*>(malloc(size));
  if (elf_contents == NULL) raise << "malloc(" << size << ')' << perr() << '\n' << die();
  ssize_t read_size = read(fd, elf_contents, size);
  if (size != read_size) raise << "read → " << size << " (!= " << read_size << ')' << perr() << '\n' << die();
  load_elf_contents(elf_contents, size, argc, argv);
  free(elf_contents);
}

void load_elf_contents(uint8_t* elf_contents, size_t size, int argc, char* argv[]) {
  uint8_t magic[5] = {0};
  memcpy(magic, elf_contents, 4);
  if (memcmp(magic, "\177ELF", 4) != 0)
    raise << "Invalid ELF file; starts with \"" << magic << '"' << die();
  if (elf_contents[4] != 1)
    raise << "Only 32-bit ELF files (4-byte words; virtual addresses up to 4GB) supported.\n" << die();
  if (elf_contents[5] != 1)
    raise << "Only little-endian ELF files supported.\n" << die();
  // unused: remaining 10 bytes of e_ident
  uint32_t e_machine_type = u32_in(&elf_contents[16]);
  if (e_machine_type != 0x00030002)
    raise << "ELF type/machine 0x" << HEXWORD << e_machine_type << " isn't i386 executable\n" << die();
  // unused: e_version. We only support version 1, and later versions will be backwards compatible.
  uint32_t e_entry = u32_in(&elf_contents[24]);
  uint32_t e_phoff = u32_in(&elf_contents[28]);
  // unused: e_shoff
  // unused: e_flags
  uint32_t e_ehsize = u16_in(&elf_contents[40]);
  if (e_ehsize < 52) raise << "Invalid binary; ELF header too small\n" << die();
  uint32_t e_phentsize = u16_in(&elf_contents[42]);
  uint32_t e_phnum = u16_in(&elf_contents[44]);
  trace(90, "load") << e_phnum << " entries in the program header, each " << e_phentsize << " bytes long" << end();
  // unused: e_shentsize
  // unused: e_shnum
  // unused: e_shstrndx

  set<uint32_t> overlap;  // to detect overlapping segments
  for (size_t i = 0;  i < e_phnum;  ++i)
    load_segment_from_program_header(elf_contents, i, size, e_phoff + i*e_phentsize, e_ehsize, overlap);

  // initialize code and stack
  assert(overlap.find(STACK_SEGMENT) == overlap.end());
  Mem.push_back(vma(STACK_SEGMENT));
  assert(overlap.find(AFTER_STACK) == overlap.end());
  // The stack grows downward.
  Reg[ESP].u = AFTER_STACK;
  Reg[EBP].u = 0;
  EIP = e_entry;

  // initialize args on stack
  // no envp for now
  // we wastefully use a separate page of memory for argv
  Mem.push_back(vma(ARGV_DATA_SEGMENT));
  uint32_t argv_data = ARGV_DATA_SEGMENT;
  for (int i = argc-1;  i >= /*skip 'subx_bin' and 'run'*/2;  --i) {
    push(argv_data);
    for (size_t j = 0;  j <= strlen(argv[i]);  ++j) {
      assert(overlap.find(argv_data) == overlap.end());  // don't bother comparing ARGV and STACK
      write_mem_u8(argv_data, argv[i][j]);
      argv_data += sizeof(char);
      assert(argv_data < ARGV_DATA_SEGMENT + SEGMENT_ALIGNMENT);
    }
  }
  push(argc-/*skip 'subx_bin' and 'run'*/2);
}

void push(uint32_t val) {
  Reg[ESP].u -= 4;
  if (Reg[ESP].u < STACK_SEGMENT) {
    raise << "The stack overflowed its segment. "
          << "Maybe SPACE_FOR_SEGMENT should be larger? "
          << "Or you need to carve out an exception for the stack segment "
          << "to be larger.\n" << die();
  }
  trace(Callstack_depth+1, "run") << "decrementing ESP to 0x" << HEXWORD << Reg[ESP].u << end();
  trace(Callstack_depth+1, "run") << "pushing value 0x" << HEXWORD << val << end();
  write_mem_u32(Reg[ESP].u, val);
}

void load_segment_from_program_header(uint8_t* elf_contents, int segment_index, size_t size, uint32_t offset, uint32_t e_ehsize, set<uint32_t>& overlap) {
  uint32_t p_type = u32_in(&elf_contents[offset]);
  trace(90, "load") << "program header at offset " << offset << ": type " << p_type << end();
  if (p_type != 1) {
    trace(90, "load") << "ignoring segment at offset " << offset << " of non PT_LOAD type " << p_type << " (see http://refspecs.linuxbase.org/elf/elf.pdf)" << end();
    return;
  }
  uint32_t p_offset = u32_in(&elf_contents[offset + 4]);
  uint32_t p_vaddr = u32_in(&elf_contents[offset + 8]);
  if (e_ehsize > p_vaddr) raise << "Invalid binary; program header overlaps ELF header\n" << die();
  // unused: p_paddr
  uint32_t p_filesz = u32_in(&elf_contents[offset + 16]);
  uint32_t p_memsz = u32_in(&elf_contents[offset + 20]);
  if (p_filesz != p_memsz)
    raise << "Can't yet handle segments where p_filesz != p_memsz (see http://refspecs.linuxbase.org/elf/elf.pdf)\n" << die();

  if (p_offset + p_filesz > size)
    raise << "Invalid binary; segment at offset " << offset << " is too large: wants to end at " << p_offset+p_filesz << " but the file ends at " << size << '\n' << die();
  if (p_memsz >= SEGMENT_ALIGNMENT) {
    raise << "Code segment too small for SubX; for now please manually increase SEGMENT_ALIGNMENT.\n" << end();
    return;
  }
  trace(90, "load") << "blitting file offsets (" << p_offset << ", " << (p_offset+p_filesz) << ") to addresses (" << p_vaddr << ", " << (p_vaddr+p_memsz) << ')' << end();
  if (size > p_memsz) size = p_memsz;
  Mem.push_back(vma(p_vaddr));
  for (size_t i = 0;  i < p_filesz;  ++i) {
    assert(overlap.find(p_vaddr+i) == overlap.end());
    write_mem_u8(p_vaddr+i, elf_contents[p_offset+i]);
    overlap.insert(p_vaddr+i);
  }
  if (segment_index == 0 && End_of_program < p_vaddr+p_memsz)
    End_of_program = p_vaddr+p_memsz;
}

:(before "End Includes")
// Very primitive/fixed/insecure ELF segments for now.
//   --- inaccessible:        0x00000000 -> 0x08047fff
//   code:                    0x09000000 -> 0x09ffffff (specified in ELF binary)
//   data:                    0x0a000000 -> 0x0affffff (specified in ELF binary)
//                      --- heap gets mmap'd somewhere here ---
//   stack:                   0xbdffffff -> 0xbd000000 (downward; not in ELF binary)
//   argv hack:               0xbf000000 -> 0xbfffffff (not in ELF binary)
//   --- reserved for kernel: 0xc0000000 -> ...
const uint32_t START_HEAP        = 0x0b000000;
const uint32_t END_HEAP          = 0xbd000000;
const uint32_t STACK_SEGMENT     = 0xbd000000;
const uint32_t AFTER_STACK       = 0xbe000000;
const uint32_t ARGV_DATA_SEGMENT = 0xbf000000;
// When updating the above memory map, don't forget to update `mmap`'s
// implementation in the 'syscalls' layer.
:(before "End Dump Info for Instruction")
//? dump_stack();  // slow
:(code)
void dump_stack() {
  ostringstream out;
  trace(Callstack_depth+1, "run") << "stack:" << end();
  for (uint32_t a = AFTER_STACK-4;  a > Reg[ESP].u;  a -= 4)
    trace(Callstack_depth+2, "run") << "  0x" << HEXWORD << a << " => 0x" << HEXWORD << read_mem_u32(a) << end();
  trace(Callstack_depth+2, "run") << "  0x" << HEXWORD << Reg[ESP].u << " => 0x" << HEXWORD << read_mem_u32(Reg[ESP].u) << "  <=== ESP" << end();
  for (uint32_t a = Reg[ESP].u-4;  a > Reg[ESP].u-40;  a -= 4)
    trace(Callstack_depth+2, "run") << "  0x" << HEXWORD << a << " => 0x" << HEXWORD << read_mem_u32(a) << end();
}

inline uint32_t u32_in(uint8_t* p) {
  return p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24;
}

inline uint16_t u16_in(uint8_t* p) {
  return p[0] | p[1] << 8;
}

:(before "End Types")
struct perr {};
:(code)
ostream& operator<<(ostream& os, perr /*unused*/) {
  if (errno)
    os << ": " << strerror(errno);
  return os;
}

:(before "End Includes")
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdarg.h>
#include <errno.h>
#include <unistd.h>