https://github.com/akkartik/mu/blob/main/linux/bootstrap/030translate.cc
  1 //: After that lengthy prelude to define an x86 emulator, we are now ready to
  2 //: start translating SubX notation.
  3 
  4 //: Translator workflow: read 'source' file. Run a series of transforms on it,
  5 //: each passing through what it doesn't understand. The final program should
  6 //: be just machine code, suitable to emulate, or to write to an ELF binary.
  7 
  8 :(before "End Main")
  9 if (is_equal(argv[1], "translate")) {
 10   // Outside of tests, traces must be explicitly requested.
 11   if (Trace_file.is_open()) Trace_stream = new trace_stream;
 12   reset();
 13   // Begin bootstrap translate
 14   program p;
 15   string output_filename;
 16   for (int i = /*skip 'bootstrap translate'*/2;  i < argc;  ++i) {
 17     if (is_equal(argv[i], "-o")) {
 18       ++i;
 19       if (i >= argc) {
 20         print_translate_usage();
 21         cerr << "'-o' must be followed by a filename to write results to\n";
 22         exit(1);
 23       }
 24       output_filename = argv[i];
 25     }
 26     else {
 27       trace(2, "parse") << argv[i] << end();
 28       ifstream fin(argv[i]);
 29       if (!fin) {
 30         cerr << "could not open " << argv[i] << '\n';
 31         return 1;
 32       }
 33       parse(fin, p);
 34       if (trace_contains_errors()) return 1;
 35     }
 36   }
 37   if (p.segments.empty()) {
 38     print_translate_usage();
 39     cerr << "nothing to do; must provide at least one file to read\n";
 40     exit(1);
 41   }
 42   if (output_filename.empty()) {
 43     print_translate_usage();
 44     cerr << "must provide a filename to write to using '-o'\n";
 45     exit(1);
 46   }
 47   trace(2, "transform") << "begin" << end();
 48   transform(p);
 49   if (trace_contains_errors()) return 1;
 50   trace(2, "translate") << "begin" << end();
 51   save_elf(p, output_filename);
 52   if (trace_contains_errors()) {
 53     unlink(output_filename.c_str());
 54     return 1;
 55   }
 56   // End bootstrap translate
 57   return 0;
 58 }
 59 
 60 :(code)
 61 void transform(program& p) {
 62   // End transform(program& p)
 63 }
 64 
 65 void print_translate_usage() {
 66   cerr << "Usage: bootstrap translate file1 file2 ... -o output\n";
 67 }
 68 
 69 // write out a program to a bare-bones ELF file
 70 void save_elf(const program& p, const string& filename) {
 71   ofstream out(filename.c_str(), ios::binary);
 72   save_elf(p, out);
 73   out.close();
 74 }
 75 
 76 void save_elf(const program& p, ostream& out) {
 77   // validation: stay consistent with the self-hosted translator
 78   if (p.entry == 0) {
 79     raise << "no 'Entry' label found\n" << end();
 80     return;
 81   }
 82   if (find(p, "data") == NULL) {
 83     raise << "must include a 'data' segment\n" << end();
 84     return;
 85   }
 86   // processing
 87   write_elf_header(out, p);
 88   for (size_t i = 0;  i < p.segments.size();  ++i)
 89     write_segment(p.segments.at(i), out);
 90 }
 91 
 92 void write_elf_header(ostream& out, const program& p) {
 93   char c = '\0';
 94 #define O(X)  c = (X); out.write(&c, sizeof(c))
 95 // host is required to be little-endian
 96 #define emit(X)  out.write(reinterpret_cast<const char*>(&X), sizeof(X))
 97   //// ehdr
 98   // e_ident
 99   O(0x7f); O(/*E*/0x45); O(/*L*/0x4c); O(/*F*/0x46);
100     O(0x1);  // 32-bit format
101     O(0x1);  // little-endian
102     O(0x1); O(0x0);
103   for (size_t i = 0;  i < 8;  ++i) { O(0x0); }
104   // e_type
105   O(0x02); O(0x00);
106   // e_machine
107   O(0x03); O(0x00);
108   // e_version
109   O(0x01); O(0x00); O(0x00); O(0x00);
110   // e_entry
111   uint32_t e_entry = p.entry;
112   // Override e_entry
113   emit(e_entry);
114   // e_phoff -- immediately after ELF header
115   uint32_t e_phoff = 0x34;
116   emit(e_phoff);
117   // e_shoff; unused
118   uint32_t dummy32 = 0;
119   emit(dummy32);
120   // e_flags; unused
121   emit(dummy32);
122   // e_ehsize
123   uint16_t e_ehsize = 0x34;
124   emit(e_ehsize);
125   // e_phentsize
126   uint16_t e_phentsize = 0x20;
127   emit(e_phentsize);
128   // e_phnum
129   uint16_t e_phnum = SIZE(p.segments);
130   emit(e_phnum);
131   // e_shentsize
132   uint16_t dummy16 = 0x0;
133   emit(dummy16);
134   // e_shnum
135   emit(dummy16);
136   // e_shstrndx
137   emit(dummy16);
138 
139   uint32_t p_offset = /*size of ehdr*/0x34 + SIZE(p.segments)*0x20/*size of each phdr*/;
140   for (int i = 0;  i < SIZE(p.segments);  ++i) {
141     const segment& curr = p.segments.at(i);
142     //// phdr
143     // p_type
144     uint32_t p_type = 0x1;
145     emit(p_type);
146     // p_offset
147     emit(p_offset);
148     // p_vaddr
149     uint32_t p_start = curr.start;
150     emit(p_start);
151     // p_paddr
152     emit(p_start);
153     // p_filesz
154     uint32_t size = num_words(curr);
155     assert(p_offset + size < SEGMENT_ALIGNMENT);
156     emit(size);
157     // p_memsz
158     emit(size);
159     // p_flags
160     uint32_t p_flags = (curr.name == "code") ? /*r-x*/0x5 : /*rw-*/0x6;
161     emit(p_flags);
162 
163     // p_align
164     // "As the system creates or augments a process image, it logically copies
165     // a file's segment to a virtual memory segment.  When—and if— the system
166     // physically reads the file depends on the program's execution behavior,
167     // system load, and so on.  A process does not require a physical page
168     // unless it references the logical page during execution, and processes
169     // commonly leave many pages unreferenced. Therefore delaying physical
170     // reads frequently obviates them, improving system performance. To obtain
171     // this efficiency in practice, executable and shared object files must
172     // have segment images whose file offsets and virtual addresses are
173     // congruent, modulo the page size." -- http://refspecs.linuxbase.org/elf/elf.pdf (page 95)
174     uint32_t p_align = 0x1000;  // default page size on linux
175     emit(p_align);
176     if (p_offset % p_align != p_start % p_align) {
177       raise << "segment starting at 0x" << HEXWORD << p_start << " is improperly aligned; alignment for p_offset " << p_offset << " should be " << (p_offset % p_align) << " but is " << (p_start % p_align) << '\n' << end();
178       return;
179     }
180 
181     // prepare for next segment
182     p_offset += size;
183   }
184 #undef O
185 #undef emit
186 }
187 
188 void write_segment(const segment& s, ostream& out) {
189   for (int i = 0;  i < SIZE(s.lines);  ++i) {
190     const vector<word>& w = s.lines.at(i).words;
191     for (int j = 0;  j < SIZE(w);  ++j) {
192       uint8_t x = hex_byte(w.at(j).data);  // we're done with metadata by this point
193       out.write(reinterpret_cast<const char*>(&x), /*sizeof(byte)*/1);
194     }
195   }
196 }
197 
198 uint32_t num_words(const segment& s) {
199   uint32_t sum = 0;
200   for (int i = 0;  i < SIZE(s.lines);  ++i)
201     sum += SIZE(s.lines.at(i).words);
202   return sum;
203 }
204 
205 :(before "End Includes")
206 using std::ios;