1 //: The bedrock level 1 of abstraction is now done, and we're going to start
  2 //: building levels above it that make programming in x86 machine code a
  3 //: little more ergonomic.
  4 //:
  5 //: All levels will be "pass through by default". Whatever they don't
  6 //: understand they will silently pass through to lower levels.
  7 //:
  8 //: Since raw hex bytes of machine code are always possible to inject, SubX is
  9 //: not a language, and we aren't building a compiler. This is something
 10 //: deliberately leakier. Levels are more for improving auditing, checks and
 11 //: error messages rather than for hiding low-level details.
 12 
 13 //: Translator workflow: read 'source' file. Run a series of transforms on it,
 14 //: each passing through what it doesn't understand. The final program should
 15 //: be just machine code, suitable to write to an ELF binary.
 16 //:
 17 //: Higher levels usually transform code on the basis of metadata.
 18 
 19 :(before "End Main")
 20 if (is_equal(argv[1], "translate")) {
 21   START_TRACING_UNTIL_END_OF_SCOPE;
 22   reset();
 23   program p;
 24   string output_filename;
 25   for (int i = /*skip 'subx translate'*/2;  i < argc;  ++i) {
 26     if (is_equal(argv[i], "-o")) {
 27       ++i;
 28       if (i >= argc) {
 29         print_translate_usage();
 30         cerr << "'-o' must be followed by a filename to write results to\n";
 31         exit(1);
 32       }
 33       output_filename = argv[i];
 34     }
 35     else {
 36       ifstream fin(argv[i]);
 37       if (!fin) {
 38         cerr << "could not open " << argv[i] << '\n';
 39         return 1;
 40       }
 41       parse(fin, p);
 42       if (trace_contains_errors()) return 1;
 43     }
 44   }
 45   if (p.segments.empty()) {
 46     print_translate_usage();
 47     cerr << "nothing to do; must provide at least one file to read\n";
 48     exit(1);
 49   }
 50   if (output_filename.empty()) {
 51     print_translate_usage();
 52     cerr << "must provide a filename to write to using '-o'\n";
 53     exit(1);
 54   }
 55   transform(p);
 56   if (trace_contains_errors()) return 1;
 57   save_elf(p, output_filename);
 58   if (trace_contains_errors()) {
 59     unlink(output_filename.c_str());
 60     return 1;
 61   }
 62   return 0;
 63 }
 64 
 65 :(code)
 66 void print_translate_usage() {
 67   cerr << "Usage: subx translate file1 file2 ... -o output\n";
 68 }
 69 
 70 // write out a program to a bare-bones ELF file
 71 void save_elf(const program& p, const string& filename) {
 72   ofstream out(filename.c_str(), ios::binary);
 73   write_elf_header(out, p);
 74   for (size_t i = 0;  i < p.segments.size();  ++i)
 75     write_segment(p.segments.at(i), out);
 76   out.close();
 77 }
 78 
 79 void write_elf_header(ostream& out, const program& p) {
 80   char c = '\0';
 81 #define O(X)  c = (X); out.write(&c, sizeof(c))
 82 // host is required to be little-endian
 83 #define emit(X)  out.write(reinterpret_cast<const char*>(&X), sizeof(X))
 84   //// ehdr
 85   // e_ident
 86   O(0x7f); O(/*E*/0x45); O(/*L*/0x4c); O(/*F*/0x46);
 87     O(0x1);  // 32-bit format
 88     O(0x1);  // little-endian
 89     O(0x1); O(0x0);
 90   for (size_t i = 0;  i < 8;  ++i) { O(0x0); }
 91   // e_type
 92   O(0x02); O(0x00);
 93   // e_machine
 94   O(0x03); O(0x00);
 95   // e_version
 96   O(0x01); O(0x00); O(0x00); O(0x00);
 97   // e_entry
 98   int e_entry = p.segments.at(0).start;  // convention
 99   emit(e_entry);
100   // e_phoff -- immediately after ELF header
101   int e_phoff = 0x34;
102   emit(e_phoff);
103   // e_shoff; unused
104   int dummy32 = 0;
105   emit(dummy32);
106   // e_flags; unused
107   emit(dummy32);
108   // e_ehsize
109   uint16_t e_ehsize = 0x34;
110   emit(e_ehsize);
111   // e_phentsize
112   uint16_t e_phentsize = 0x20;
113   emit(e_phentsize);
114   // e_phnum
115   uint16_t e_phnum = SIZE(p.segments);
116   emit(e_phnum);
117   // e_shentsize
118   uint16_t dummy16 = 0x0;
119   emit(dummy16);
120   // e_shnum
121   emit(dummy16);
122   // e_shstrndx
123   emit(dummy16);
124 
125   uint32_t p_offset = /*size of ehdr*/0x34 + SIZE(p.segments)*0x20/*size of each phdr*/;
126   for (int i = 0;  i < SIZE(p.segments);  ++i) {
127     //// phdr
128     // p_type
129     uint32_t p_type = 0x1;
130     emit(p_type);
131     // p_offset
132     emit(p_offset);
133     // p_vaddr
134     uint32_t p_start = p.segments.at(i).start;
135     emit(p_start);
136     // p_paddr
137     emit(p_start);
138     // p_filesz
139     uint32_t size = num_words(p.segments.at(i));
140     assert(p_offset + size < SEGMENT_SIZE);
141     emit(size);
142     // p_memsz
143     emit(size);
144     // p_flags
145     uint32_t p_flags = (i == 0) ? /*r-x*/0x5 : /*rw-*/0x6;  // convention: only first segment is code
146     emit(p_flags);
147 
148     // p_align
149     // "As the system creates or augments a process image, it logically copies
150     // a file's segment to a virtual memory segment.  When—and if— the system
151     // physically reads the file depends on the program's execution behavior,
152     // system load, and so on.  A process does not require a physical page
153     // unless it references the logical page during execution, and processes
154     // commonly leave many pages unreferenced. Therefore delaying physical
155     // reads frequently obviates them, improving system performance. To obtain
156     // this efficiency in practice, executable and shared object files must
157     // have segment images whose file offsets and virtual addresses are
158     // congruent, modulo the page size." -- http://refspecs.linuxbase.org/elf/elf.pdf (page 95)
159     uint32_t p_align = 0x1000;  // default page size on linux
160     emit(p_align);
161     if (p_offset % p_align != p_start % p_align) {
162       raise << "segment starting at 0x" << HEXWORD << p_start << " is improperly aligned; alignment for p_offset " << p_offset << " should be " << (p_offset % p_align) << " but is " << (p_start % p_align) << '\n' << end();
163       return;
164     }
165 
166     // prepare for next segment
167     p_offset += size;
168   }
169 #undef O
170 #undef emit
171 }
172 
173 void write_segment(const segment& s, ostream& out) {
174   for (int i = 0;  i < SIZE(s.lines);  ++i) {
175     const vector<word>& w = s.lines.at(i).words;
176     for (int j = 0;  j < SIZE(w);  ++j) {
177       uint8_t x = hex_byte(w.at(j).data);  // we're done with metadata by this point
178       out.write(reinterpret_cast<const char*>(&x), /*sizeof(byte)*/1);
179     }
180   }
181 }
182 
183 uint32_t num_words(const segment& s) {
184   uint32_t sum = 0;
185   for (int i = 0;  i < SIZE(s.lines);  ++i)
186     sum += SIZE(s.lines.at(i).words);
187   return sum;
188 }
189 
190 :(before "End Includes")
191 using std::ios;