about summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--subx/012elf.cc1
-rw-r--r--subx/013direct_addressing.cc14
-rw-r--r--subx/014indirect_addressing.cc13
-rw-r--r--subx/015immediate_addressing.cc5
-rw-r--r--subx/019functions.cc19
-rw-r--r--subx/040---tests.cc3
6 files changed, 33 insertions, 22 deletions
diff --git a/subx/012elf.cc b/subx/012elf.cc
index d0a3fbd2..a77c6056 100644
--- a/subx/012elf.cc
+++ b/subx/012elf.cc
@@ -90,6 +90,7 @@ void load_elf_contents(uint8_t* elf_contents, size_t size, int argc, char* argv[
 
 void push(uint32_t val) {
   Reg[ESP].u -= 4;
+  assert(Reg[ESP].u >= STACK_SEGMENT);
   trace(Callstack_depth+1, "run") << "decrementing ESP to 0x" << HEXWORD << Reg[ESP].u << end();
   trace(Callstack_depth+1, "run") << "pushing value 0x" << HEXWORD << val << end();
   write_mem_u32(Reg[ESP].u, val);
diff --git a/subx/013direct_addressing.cc b/subx/013direct_addressing.cc
index c2dfa911..160ce6d6 100644
--- a/subx/013direct_addressing.cc
+++ b/subx/013direct_addressing.cc
@@ -971,7 +971,8 @@ put_new(Name, "57", "push EDI to stack (push)");
 
 :(code)
 void test_push_r32() {
-  Reg[ESP].u = 0x64;
+  Mem.push_back(vma(0x7d000000));  // manually allocate memory
+  Reg[ESP].u = 0x7d000008;
   Reg[EBX].i = 0x0000000a;
   run(
       "== 0x1\n"  // code segment
@@ -980,7 +981,7 @@ void test_push_r32() {
   );
   CHECK_TRACE_CONTENTS(
       "run: push EBX\n"
-      "run: decrementing ESP to 0x00000060\n"
+      "run: decrementing ESP to 0x7d000004\n"
       "run: pushing value 0x0000000a\n"
   );
 }
@@ -1015,9 +1016,9 @@ put_new(Name, "5f", "pop top of stack to EDI (pop)");
 
 :(code)
 void test_pop_r32() {
-  Reg[ESP].u = 0x02000000;
-  Mem.push_back(vma(0x02000000));  // manually allocate memory
-  write_mem_i32(0x02000000, 0x0000000a);  // ..before this write
+  Mem.push_back(vma(0x7d000000));  // manually allocate memory
+  Reg[ESP].u = 0x7d000008;
+  write_mem_i32(0x7d000008, 0x0000000a);  // ..before this write
   run(
       "== 0x1\n"  // code segment
       // op     ModR/M  SIB   displacement  immediate
@@ -1028,7 +1029,7 @@ void test_pop_r32() {
   CHECK_TRACE_CONTENTS(
       "run: pop into EBX\n"
       "run: popping value 0x0000000a\n"
-      "run: incrementing ESP to 0x02000004\n"
+      "run: incrementing ESP to 0x7d00000c\n"
   );
 }
 
@@ -1054,5 +1055,6 @@ uint32_t pop() {
   trace(Callstack_depth+1, "run") << "popping value 0x" << HEXWORD << result << end();
   Reg[ESP].u += 4;
   trace(Callstack_depth+1, "run") << "incrementing ESP to 0x" << HEXWORD << Reg[ESP].u << end();
+  assert(Reg[ESP].u < AFTER_STACK);
   return result;
 }
diff --git a/subx/014indirect_addressing.cc b/subx/014indirect_addressing.cc
index f591f0cf..8f0d3325 100644
--- a/subx/014indirect_addressing.cc
+++ b/subx/014indirect_addressing.cc
@@ -531,7 +531,8 @@ case 4: {  // jump to r/m32
 :(code)
 void test_push_mem_at_r32() {
   Reg[EAX].i = 0x2000;
-  Reg[ESP].u = 0x14;
+  Mem.push_back(vma(0x7d000000));  // manually allocate memory
+  Reg[ESP].u = 0x7d000014;
   run(
       "== 0x1\n"  // code segment
       // op     ModR/M  SIB   displacement  immediate
@@ -542,7 +543,7 @@ void test_push_mem_at_r32() {
   CHECK_TRACE_CONTENTS(
       "run: push r/m32\n"
       "run: effective address is 0x00002000 (EAX)\n"
-      "run: decrementing ESP to 0x00000010\n"
+      "run: decrementing ESP to 0x7d000010\n"
       "run: pushing value 0x000000af\n"
   );
 }
@@ -563,20 +564,20 @@ put_new(Name, "8f", "pop top of stack to rm32 (pop)");
 :(code)
 void test_pop_mem_at_r32() {
   Reg[EAX].i = 0x60;
-  Reg[ESP].u = 0x2000;
+  Mem.push_back(vma(0x7d000000));  // manually allocate memory
+  Reg[ESP].u = 0x7d000000;
+  write_mem_i32(0x7d000000, 0x00000030);
   run(
       "== 0x1\n"  // code segment
       // op     ModR/M  SIB   displacement  immediate
       "  8f     00                                    \n"  // pop stack into *EAX
       // ModR/M in binary: 00 (indirect mode) 000 (pop r/m32) 000 (dest EAX)
-      "== 0x2000\n"  // data segment
-      "30 00 00 00\n"  // 0x00000030
   );
   CHECK_TRACE_CONTENTS(
       "run: pop into r/m32\n"
       "run: effective address is 0x00000060 (EAX)\n"
       "run: popping value 0x00000030\n"
-      "run: incrementing ESP to 0x00002004\n"
+      "run: incrementing ESP to 0x7d000004\n"
   );
 }
 
diff --git a/subx/015immediate_addressing.cc b/subx/015immediate_addressing.cc
index 18cd5334..16d886e8 100644
--- a/subx/015immediate_addressing.cc
+++ b/subx/015immediate_addressing.cc
@@ -824,7 +824,8 @@ put_new(Name, "68", "push imm32 to stack (push)");
 
 :(code)
 void test_push_imm32() {
-  Reg[ESP].u = 0x14;
+  Mem.push_back(vma(0x7d000000));  // manually allocate memory
+  Reg[ESP].u = 0x7d000014;
   run(
       "== 0x1\n"  // code segment
       // op     ModR/M  SIB   displacement  immediate
@@ -832,7 +833,7 @@ void test_push_imm32() {
   );
   CHECK_TRACE_CONTENTS(
       "run: push imm32 0x000000af\n"
-      "run: ESP is now 0x00000010\n"
+      "run: ESP is now 0x7d000010\n"
       "run: contents at ESP: 0x000000af\n"
   );
 }
diff --git a/subx/019functions.cc b/subx/019functions.cc
index 7f45167b..27fb4fb0 100644
--- a/subx/019functions.cc
+++ b/subx/019functions.cc
@@ -5,7 +5,8 @@ put_new(Name, "e8", "call disp32 (call)");
 
 :(code)
 void test_call_disp32() {
-  Reg[ESP].u = 0x64;
+  Mem.push_back(vma(0x7d000000));  // manually allocate memory
+  Reg[ESP].u = 0x7d000064;
   run(
       "== 0x1\n"  // code segment
       // op     ModR/M  SIB   displacement  immediate
@@ -14,7 +15,7 @@ void test_call_disp32() {
   );
   CHECK_TRACE_CONTENTS(
       "run: call imm32 0x000000a0\n"
-      "run: decrementing ESP to 0x00000060\n"
+      "run: decrementing ESP to 0x7d000060\n"
       "run: pushing value 0x00000006\n"
       "run: jumping to 0x000000a6\n"
   );
@@ -36,7 +37,8 @@ case 0xe8: {  // call disp32 relative to next EIP
 
 :(code)
 void test_call_r32() {
-  Reg[ESP].u = 0x64;
+  Mem.push_back(vma(0x7d000000));  // manually allocate memory
+  Reg[ESP].u = 0x7d000064;
   Reg[EBX].u = 0x000000a0;
   run(
       "== 0x1\n"  // code segment
@@ -47,7 +49,7 @@ void test_call_r32() {
   CHECK_TRACE_CONTENTS(
       "run: call to r/m32\n"
       "run: r/m32 is EBX\n"
-      "run: decrementing ESP to 0x00000060\n"
+      "run: decrementing ESP to 0x7d000060\n"
       "run: pushing value 0x00000003\n"
       "run: jumping to 0x000000a3\n"
   );
@@ -66,7 +68,8 @@ case 2: {  // call function pointer at r/m32
 
 :(code)
 void test_call_mem_at_r32() {
-  Reg[ESP].u = 0x64;
+  Mem.push_back(vma(0x7d000000));  // manually allocate memory
+  Reg[ESP].u = 0x7d000064;
   Reg[EBX].u = 0x2000;
   run(
       "== 0x1\n"  // code segment
@@ -79,7 +82,7 @@ void test_call_mem_at_r32() {
   CHECK_TRACE_CONTENTS(
       "run: call to r/m32\n"
       "run: effective address is 0x00002000 (EBX)\n"
-      "run: decrementing ESP to 0x00000060\n"
+      "run: decrementing ESP to 0x7d000060\n"
       "run: pushing value 0x00000003\n"
       "run: jumping to 0x000000a3\n"
   );
@@ -92,7 +95,9 @@ put_new(Name, "c3", "return from most recent unfinished call (ret)");
 
 :(code)
 void test_ret() {
-  Reg[ESP].u = 0x2000;
+  Mem.push_back(vma(0x7d000000));  // manually allocate memory
+  Reg[ESP].u = 0x7d000064;
+  write_mem_u32(Reg[ESP].u, 0x10);
   run(
       "== 0x1\n"  // code segment
       // op     ModR/M  SIB   displacement  immediate
diff --git a/subx/040---tests.cc b/subx/040---tests.cc
index d35cc711..237bb811 100644
--- a/subx/040---tests.cc
+++ b/subx/040---tests.cc
@@ -16,7 +16,8 @@ Transform.push_back(create_test_function);
 
 :(code)
 void test_run_test() {
-  Reg[ESP].u = 0x100;
+  Mem.push_back(vma(0x7d000000));  // manually allocate memory
+  Reg[ESP].u = 0x7d000100;
   run(
       "== 0x1\n"  // code segment
       "main:\n"
nslate.cc?h=main&id=d5f75ffe86abcd6c59e02f0a59c0e4d3d73d8f70'>^
1f56ac64 ^
3b798ea2 ^
b141a448 ^

3b798ea2 ^
b141a448 ^

















ef52bbf9 ^
8188bbbc ^
b141a448 ^

8188bbbc ^
b141a448 ^

8188bbbc ^
b141a448 ^



1f56ac64 ^
b141a448 ^




1f56ac64 ^
b141a448 ^








1bbbf14f ^
1f56ac64 ^
83c67014 ^
1f56ac64 ^






83c67014 ^
a49bc413 ^
1f56ac64 ^
a49bc413 ^
1f56ac64 ^
83c67014 ^
4a99a6e0 ^
1f56ac64 ^



83c67014 ^
1f56ac64 ^
21d8241b ^
1f56ac64 ^
21d8241b ^









dc9a126e ^
1f56ac64 ^
a49bc413 ^

bcf92ad2 ^


1f56ac64 ^


b141a448 ^
262f6ea8 ^
b141a448 ^

3b798ea2 ^
1f56ac64 ^





b20d9ad3 ^
b20d9ad3 ^

a49bc413 ^
1f56ac64 ^




7b686a02 ^
b141a448 ^

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206

                                                                             


                                                                              
                                                                            
 

                                     

                                                            
          
                              
            
                         
                                                                  









                                                                            
                                            

















                                                                      
   
                                            

                                        
                                            
                               
                                
                                    

             
                            


           
       



                              
                              
                                                                   

 
                                               

                                                         




                                               
                                                                



                                                 




                                                        
                           
                                                  
                                         

 
                                                       

















                                                                        
                             
                     

                                            
                          

                    
                       



                    
                           




                              
                                      








                         
                                                                                        
                                                
                                           






                          
                                  
                  
              
                  
               
                                    
                                                



               
                                                                       
                  
 
              









                                                                                               
                                                             
                  

                                                                                                                                                                                                                              


             


                               
        
           

 
                                                    





                                                                                    
   

 
                                      




                                           
 

                        
//: After that lengthy prelude to define an x86 emulator, we are now ready to
//: start translating SubX notation.

//: Translator workflow: read 'source' file. Run a series of transforms on it,
//: each passing through what it doesn't understand. The final program should
//: be just machine code, suitable to emulate, or to write to an ELF binary.

:(before "End Main")
if (is_equal(argv[1], "translate")) {
  // Outside of tests, traces must be explicitly requested.
  if (Trace_file.is_open()) Trace_stream = new trace_stream;
  reset();
  // Begin bootstrap translate
  program p;
  string output_filename;
  for (int i = /*skip 'bootstrap translate'*/2;  i < argc;  ++i) {
    if (is_equal(argv[i], "-o")) {
      ++i;
      if (i >= argc) {
        print_translate_usage();
        cerr << "'-o' must be followed by a filename to write results to\n";
        exit(1);
      }
      output_filename = argv[i];
    }
    else {
      trace(2, "parse") << argv[i] << end();
      ifstream fin(argv[i]);
      if (!fin) {
        cerr << "could not open " << argv[i] << '\n';
        return 1;
      }
      parse(fin, p);
      if (trace_contains_errors()) return 1;
    }
  }
  if (p.segments.empty()) {
    print_translate_usage();
    cerr << "nothing to do; must provide at least one file to read\n";
    exit(1);
  }
  if (output_filename.empty()) {
    print_translate_usage();
    cerr << "must provide a filename to write to using '-o'\n";
    exit(1);
  }
  trace(2, "transform") << "begin" << end();
  transform(p);
  if (trace_contains_errors()) return 1;
  trace(2, "translate") << "begin" << end();
  save_elf(p, output_filename);
  if (trace_contains_errors()) {
    unlink(output_filename.c_str());
    return 1;
  }
  // End bootstrap translate
  return 0;
}

:(code)
void transform(program& p) {
  // End transform(program& p)
}

void print_translate_usage() {
  cerr << "Usage: bootstrap translate file1 file2 ... -o output\n";
}

// write out a program to a bare-bones ELF file
void save_elf(const program& p, const string& filename) {
  ofstream out(filename.c_str(), ios::binary);
  save_elf(p, out);
  out.close();
}

void save_elf(const program& p, ostream& out) {
  // validation: stay consistent with the self-hosted translator
  if (p.entry == 0) {
    raise << "no 'Entry' label found\n" << end();
    return;
  }
  if (find(p, "data") == NULL) {
    raise << "must include a 'data' segment\n" << end();
    return;
  }
  // processing
  write_elf_header(out, p);
  for (size_t i = 0;  i < p.segments.size();  ++i)
    write_segment(p.segments.at(i), out);
}

void write_elf_header(ostream& out, const program& p) {
  char c = '\0';
#define O(X)  c = (X); out.write(&c, sizeof(c))
// host is required to be little-endian
#define emit(X)  out.write(reinterpret_cast<const char*>(&X), sizeof(X))
  //// ehdr
  // e_ident
  O(0x7f); O(/*E*/0x45); O(/*L*/0x4c); O(/*F*/0x46);
    O(0x1);  // 32-bit format
    O(0x1);  // little-endian
    O(0x1); O(0x0);
  for (size_t i = 0;  i < 8;  ++i) { O(0x0); }
  // e_type
  O(0x02); O(0x00);
  // e_machine
  O(0x03); O(0x00);
  // e_version
  O(0x01); O(0x00); O(0x00); O(0x00);
  // e_entry
  uint32_t e_entry = p.entry;
  // Override e_entry
  emit(e_entry);
  // e_phoff -- immediately after ELF header
  uint32_t e_phoff = 0x34;
  emit(e_phoff);
  // e_shoff; unused
  uint32_t dummy32 = 0;
  emit(dummy32);
  // e_flags; unused
  emit(dummy32);
  // e_ehsize
  uint16_t e_ehsize = 0x34;
  emit(e_ehsize);
  // e_phentsize
  uint16_t e_phentsize = 0x20;
  emit(e_phentsize);
  // e_phnum
  uint16_t e_phnum = SIZE(p.segments);
  emit(e_phnum);
  // e_shentsize
  uint16_t dummy16 = 0x0;
  emit(dummy16);
  // e_shnum
  emit(dummy16);
  // e_shstrndx
  emit(dummy16);

  uint32_t p_offset = /*size of ehdr*/0x34 + SIZE(p.segments)*0x20/*size of each phdr*/;
  for (int i = 0;  i < SIZE(p.segments);  ++i) {
    const segment& curr = p.segments.at(i);
    //// phdr
    // p_type
    uint32_t p_type = 0x1;
    emit(p_type);
    // p_offset
    emit(p_offset);
    // p_vaddr
    uint32_t p_start = curr.start;
    emit(p_start);
    // p_paddr
    emit(p_start);
    // p_filesz
    uint32_t size = num_words(curr);
    assert(p_offset + size < SEGMENT_ALIGNMENT);
    emit(size);
    // p_memsz
    emit(size);
    // p_flags
    uint32_t p_flags = (curr.name == "code") ? /*r-x*/0x5 : /*rw-*/0x6;
    emit(p_flags);

    // p_align
    // "As the system creates or augments a process image, it logically copies
    // a file's segment to a virtual memory segment.  When—and if— the system
    // physically reads the file depends on the program's execution behavior,
    // system load, and so on.  A process does not require a physical page
    // unless it references the logical page during execution, and processes
    // commonly leave many pages unreferenced. Therefore delaying physical
    // reads frequently obviates them, improving system performance. To obtain
    // this efficiency in practice, executable and shared object files must
    // have segment images whose file offsets and virtual addresses are
    // congruent, modulo the page size." -- http://refspecs.linuxbase.org/elf/elf.pdf (page 95)
    uint32_t p_align = 0x1000;  // default page size on linux
    emit(p_align);
    if (p_offset % p_align != p_start % p_align) {
      raise << "segment starting at 0x" << HEXWORD << p_start << " is improperly aligned; alignment for p_offset " << p_offset << " should be " << (p_offset % p_align) << " but is " << (p_start % p_align) << '\n' << end();
      return;
    }

    // prepare for next segment
    p_offset += size;
  }
#undef O
#undef emit
}

void write_segment(const segment& s, ostream& out) {
  for (int i = 0;  i < SIZE(s.lines);  ++i) {
    const vector<word>& w = s.lines.at(i).words;
    for (int j = 0;  j < SIZE(w);  ++j) {
      uint8_t x = hex_byte(w.at(j).data);  // we're done with metadata by this point
      out.write(reinterpret_cast<const char*>(&x), /*sizeof(byte)*/1);
    }
  }
}

uint32_t num_words(const segment& s) {
  uint32_t sum = 0;
  for (int i = 0;  i < SIZE(s.lines);  ++i)
    sum += SIZE(s.lines.at(i).words);
  return sum;
}

:(before "End Includes")
using std::ios;