about summary refs log blame commit diff stats
path: root/subx/020elf.cc
blob: 85bda20152561dacfba7b35c01cc585ba7aa7d67 (plain) (tree)
pre { line-height: 125%; }
td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
.highlight .hll { background-color: #ffffcc }
.highlight .c { color: #888888 } /* Comment */
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
.highlight .k { color: #008800; font-weight: bold } /* Keyword */
.highlight .ch { color: #888888 } /* Comment.Hashbang */
.highlight .cm { color: #888888 } /* Comment.Multiline */
.highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */
.highlight .cpf { color: #888888 } /* Comment.PreprocFile */
.highlight .c1 { color: #888888 } /* Comment.Single */
.highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */
.highlight .gr { color: #aa0000 } /* Generic.Error */
.highlight .gh { color: #333333 } /* Generic.Heading */
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
.highlight .go { color: #888888 } /* Generic.Output */
.highlight .gp { color: #555555 } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #666666 } /* Generic.Subheading */
.highlight .gt { color: #aa0000 } /* Generic.Traceback */
.highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */
.highlight .kp { color: #008800 } /* Keyword.Pseudo */
.highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */
.highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */
.highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */
.highlight .na { color: #336699 } /* Name.Attribute */
.highlight .nb { color: #003388 } /* Name.Builtin */
.highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */
.highlight .no { color: #003366; font-weight: bold } /* Name.Constant */
.highlight .nd { color: #555555 } /* Name.Decorator */
.highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */
.highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */
.highlight .nl { color: #336699; font-style: italic } /* Name.Label */
.highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */
.highlight .py { color: #336699; font-weight: bold } /* Name.Property */
.highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */
.highlight .nv { color: #336699 } /* Name.Variable */
.highlight .ow { color: #008800 } /* Operator.Word */
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */
.highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */
.highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */
.highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */
.highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */
.highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */
.highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */
.highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */
.highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */
.highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */
.highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */
.highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */
.highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */
.highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */
.highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */
.highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */
.highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */
.highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */
.highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */
.highlight .vc { color: #336699 } /* Name.Variable.Class */
.highlight .vg { color: #dd7700 } /* Name.Variable.Global */
.highlight .vi { color: #3333bb } /* Name.Variable.Instance */
.highlight .vm { color: #336699 } /* Name.Variable.Magic */
.highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */
# https://adventofcode.com/2020/day/1
#
# To run (on Linux):
#   $ git clone https://github.com/akkartik/mu
#   $ cd mu
#   $ ./translate_mu apps/advent2020/1a.mu
#   $ ./a.elf < input
#   found
#   1353 667
#   902451
#
# You'll need to register to download the 'input' file for yourself.

fn main -> _/ebx: int {
  # data structure
  var numbers-storage: (array int 0x100)  # 256 ints
  var numbers/esi: (addr array int) <- address numbers-storage
  var numbers-index/ecx: int <- copy 0
  # phase 1: parse each line from stdin and add it to numbers
  {
    var line-storage: (stream byte 0x100)  # 256 bytes
    var line/edx: (addr stream byte) <- address line-storage
    {
#?       print-string 0, "== iter\n"
      # read line from stdin
      clear-stream line
      read-line-from-real-keyboard line
      # if line is empty (not even a newline), quit
      var done?/eax: boolean <- stream-empty? line
      compare done?, 0  # false
      break-if-!=
#?       print-stream-to-real-screen line
      # convert line to int and append it to numbers
      var n/eax: int <- parse-decimal-int-from-stream line
#?       print-int32-decimal 0, n
#?       print-string 0, "\n"
      var dest/ebx: (addr int) <- index numbers, numbers-index
      copy-to *dest, n
      numbers-index <- increment
#?       print-string 0, "== "
#?       print-int32-decimal 0, numbers-index
#?       print-string 0, "\n"
      loop
    }
  }
  # phase 2: for each number in the array, check if 2020-it is in the rest of
  # the array
  var i/eax: int <- copy 0
  {
    compare i, numbers-index
    break-if->=
    var src/ebx: (addr int) <- index numbers, i
#?     print-int32-decimal 0, *src
#?     print-string 0, "\n"
    var target/ecx: int <- copy 0x7e4  # 2020
    target <- subtract *src
    {
      var found?/eax: boolean <- find-after numbers, i, target
      compare found?, 0  # false
      break-if-=
      print-string 0, "found\n"
      print-int32-decimal 0, *src
      print-string 0, " "
      print-int32-decimal 0, target
      print-string 0, "\n"
      target <- multiply *src
      print-int32-decimal 0, target
      print-string 0, "\n"
      return 0  # success
    }
    i <- increment
    loop
  }
  return 1  # not found
}

fn find-after _numbers: (addr array int), start: int, _target: int -> _/eax: boolean {
  var numbers/esi: (addr array int) <- copy _numbers
  var target/edi: int <- copy _target
  var len/ecx: int <- length numbers
  var i/eax: int <- copy start
  i <- increment
  {
    compare i, len
    break-if->=
    var src/edx: (addr int) <- index numbers, i
    # if *src == target, return true
    compare *src, target
    {
      break-if-!=
      return 1  # true
    }
    i <- increment
    loop
  }
  return 0  # false
}
10:23:08 -0800 4177' href='/akkartik/mu/commit/subx/020elf.cc?h=main&id=087a998e68dcc191265bef39a4dd816af5ecb3d5'>087a998e ^
414d9413 ^


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146

                                                                           


                    
                               
                                   

                   
                   
                   
                          
                    
                                                                    
                          
                                                                                        
 



                                            
                                                                               


                                                              
                                                                                         
                                                   
                                                                                                                



                                        
                                                            

                                 
                                       
                                                                         
                           
                                                                                                       
                           
                                                                  


                                                      
                                                                                                       
                                                                                                   

                                               


                                                
                                                                                

                                                   
                                                                                               


                        
 
                                        
                                                                                            
 


                              


                
                                                                                                               
                                                  
                                                                               
                    
                                                                                                                                                 



                                                        
                                                                                                   
                                                                                                                                                                                                        



                                                         
                                                                                                                          

                                 
                                                                                                                                                                           

                                     
                                     
                                                                                                                                                      
                                         
                                                      



                                       










                                                      
                       
                                       

 



                                                    



                                    



                                               
            
                                  








                                                     

 
                        


                      
                   
                  


                   
// Helper for debugging and testing.
// Based on https://github.com/kragen/stoneknifeforth/blob/702d2ebe1b/386.c

:(before "End Main")
assert(argc > 1);
if (is_equal(argv[1], "run")) {
  START_TRACING_UNTIL_END_OF_SCOPE;
  assert(argc > 2);
  reset();
  cerr << std::hex;
  initialize_mem();
  Mem_offset = CODE_START;
  load_elf(argv[2]);
  while (EIP < End_of_program)  // weak final-gasp termination check
    run_one_instruction();
  info << "executed past end of the world: " << EIP << " vs " << End_of_program << '\n';
}

:(code)
void load_elf(const string& filename) {
  int fd = open(filename.c_str(), O_RDONLY);
  if (fd < 0) raise << filename.c_str() << ": open" << perr() << '\n' << die();
  off_t size = lseek(fd, 0, SEEK_END);
  lseek(fd, 0, SEEK_SET);
  uint8_t* elf_contents = static_cast<uint8_t*>(malloc(size));
  if (elf_contents == NULL) raise << "malloc(" << size << ')' << perr() << '\n' << die();
  ssize_t read_size = read(fd, elf_contents, size);
  if (size != read_size) raise << "read → " << size << " (!= " << read_size << ')' << perr() << '\n' << die();
  load_elf_contents(elf_contents, size);
  free(elf_contents);
}

void load_elf_contents(uint8_t* elf_contents, size_t size) {
  uint8_t magic[5] = {0};
  memcpy(magic, elf_contents, 4);
  if (memcmp(magic, "\177ELF", 4) != 0)
    raise << "Invalid ELF file; starts with \"" << magic << '"' << die();
  if (elf_contents[4] != 1)
    raise << "Only 32-bit ELF files (4-byte words; virtual addresses up to 4GB) supported.\n" << die();
  if (elf_contents[5] != 1)
    raise << "Only little-endian ELF files supported.\n" << die();
  // unused: remaining 10 bytes of e_ident
  uint32_t e_machine_type = u32_in(&elf_contents[16]);
  if (e_machine_type != 0x00030002)
    raise << "ELF type/machine 0x" << HEXWORD << e_machine_type << " isn't i386 executable\n" << die();
  // unused: e_version. We only support version 1, and later versions will be backwards compatible.
  uint32_t e_entry = u32_in(&elf_contents[24]);
  uint32_t e_phoff = u32_in(&elf_contents[28]);
  // unused: e_shoff
  // unused: e_flags
  uint32_t e_ehsize = u16_in(&elf_contents[40]);
  if (e_ehsize < 52) raise << "Invalid binary; ELF header too small\n" << die();
  uint32_t e_phentsize = u16_in(&elf_contents[42]);
  uint32_t e_phnum = u16_in(&elf_contents[44]);
  info << e_phnum << " entries in the program header, each " << e_phentsize << " bytes long\n";
  // unused: e_shentsize
  // unused: e_shnum
  // unused: e_shstrndx

  for (size_t i = 0;  i < e_phnum;  ++i)
    load_segment_from_program_header(elf_contents, size, e_phoff + i*e_phentsize, e_ehsize);

  // initialize code and stack
  Reg[ESP].u = AFTER_STACK;
  Reg[EBP].u = 0;
  EIP = e_entry;
}

void load_segment_from_program_header(uint8_t* elf_contents, size_t size, uint32_t offset, uint32_t e_ehsize) {
  uint32_t p_type = u32_in(&elf_contents[offset]);
  info << "program header at offset " << offset << ": type " << p_type << '\n';
  if (p_type != 1) {
    info << "ignoring segment at offset " << offset << " of non PT_LOAD type " << p_type << " (see http://refspecs.linuxbase.org/elf/elf.pdf)\n";
    return;
  }
  uint32_t p_offset = u32_in(&elf_contents[offset + 4]);
  uint32_t p_vaddr = u32_in(&elf_contents[offset + 8]);
  if (e_ehsize > p_vaddr) raise << "Invalid binary; program header overlaps ELF header\n" << die();
  if ((p_vaddr & 0xfffff000) != 0x08048000) raise << "Currently only supporting binaries starting in the default page 0x08048000, but code segment starts at 0x" << HEXWORD << p_vaddr << '\n' << die();
  // unused: p_paddr
  uint32_t p_filesz = u32_in(&elf_contents[offset + 16]);
  uint32_t p_memsz = u32_in(&elf_contents[offset + 20]);
  if (p_filesz != p_memsz)
    raise << "Can't handle segments where p_filesz != p_memsz (see http://refspecs.linuxbase.org/elf/elf.pdf)\n" << die();

  if (p_offset + p_filesz > size)
    raise << "Invalid binary; segment at offset " << offset << " is too large: wants to end at " << p_offset+p_filesz << " but the file ends at " << size << '\n' << die();
  if (Mem.size() < p_vaddr + p_memsz)
    Mem.resize(p_vaddr + p_memsz);
  if (size > p_memsz) size = p_memsz;
  info << "blitting file offsets (" << p_offset << ", " << (p_offset+p_filesz) << ") to addresses (" << p_vaddr << ", " << (p_vaddr+p_memsz) << ")\n";
  for (size_t i = 0;  i < p_filesz;  ++i)
    write_mem_u8(p_vaddr+i, elf_contents[p_offset+i]);
  if (End_of_program < p_vaddr+p_memsz)
    End_of_program = p_vaddr+p_memsz;
}

:(before "End Includes")
// Very primitive/fixed/insecure ELF segments for now.
//   code: 0x08048000 -> 0x08048fff
//   data: 0x08049000 -> 0x08049fff
//   heap: 0x0804a000 -> 0x0804afff
//   stack: 0x0804bfff -> 0x0804b000 (downward)
const int CODE_START = 0x08048000;
const int SEGMENT_SIZE = 0x1000;
const int DATA_START = 0x08049000;
const int AFTER_STACK = 0x0804c000;
:(code)
void initialize_mem() {
  Mem.resize(AFTER_STACK - CODE_START);
}

inline uint32_t u32_in(uint8_t* p) {
  return p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24;
}

inline uint16_t u16_in(uint8_t* p) {
  return p[0] | p[1] << 8;
}

:(before "End Types")
struct perr {};
:(code)
ostream& operator<<(ostream& os, unused perr) {
  if (errno)
    os << ": " << strerror(errno);
  return os;
}

:(before "End Types")
struct die {};
:(code)
ostream& operator<<(unused ostream& os, unused die) {
  if (Trace_stream) Trace_stream->newline();
  exit(1);
}

:(before "End Includes")
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdarg.h>
#include <errno.h>

#define info cerr
// #define info dbg