Mu - 030---translate.cc

From 91624dbacabd0b437bbcce3fdb3dc8e67f577fa6 Mon Sep 17 00:00:00 2001 From: Kartik Agaram Date: Mon, 9 Dec 2019 01:32:48 -0800 Subject: 5807 --- html/030---translate.cc.html | 278 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 278 insertions(+) create mode 100644 html/030---translate.cc.html (limited to 'html/030---translate.cc.html') diff --git a/html/030---translate.cc.html b/html/030---translate.cc.html new file mode 100644 index 00000000..c2f9e993 --- /dev/null +++ b/html/030---translate.cc.html @@ -0,0 +1,278 @@ + + + + +Mu - 030---translate.cc + + + + + + + + + + +https://github.com/akkartik/mu/blob/master/030---translate.cc +
+  1 //: The bedrock level 1 of abstraction is now done, and we're going to start
+  2 //: building levels above it that make programming in x86 machine code a
+  3 //: little more ergonomic.
+  4 //:
+  5 //: All levels will be "pass through by default". Whatever they don't
+  6 //: understand they will silently pass through to lower levels.
+  7 //:
+  8 //: Since raw hex bytes of machine code are always possible to inject, SubX is
+  9 //: not a language, and we aren't building a compiler. This is something
+ 10 //: deliberately leakier. Levels are more for improving auditing, checks and
+ 11 //: error messages rather than for hiding low-level details.
+ 12 
+ 13 //: Translator workflow: read 'source' file. Run a series of transforms on it,
+ 14 //: each passing through what it doesn't understand. The final program should
+ 15 //: be just machine code, suitable to write to an ELF binary.
+ 16 //:
+ 17 //: Higher levels usually transform code on the basis of metadata.
+ 18 
+ 19 :(before "End Main")
+ 20 if (is_equal(argv[1], "translate")) {
+ 21   // Outside of tests, traces must be explicitly requested.
+ 22   if (Trace_file.is_open()) Trace_stream = new trace_stream;
+ 23   reset();
+ 24   // Begin subx translate
+ 25   program p;
+ 26   string output_filename;
+ 27   for (int i = /*skip 'subx translate'*/2;  i < argc;  ++i) {
+ 28     if (is_equal(argv[i], "-o")) {
+ 29       ++i;
+ 30       if (i >= argc) {
+ 31         print_translate_usage();
+ 32         cerr << "'-o' must be followed by a filename to write results to\n";
+ 33         exit(1);
+ 34       }
+ 35       output_filename = argv[i];
+ 36     }
+ 37     else {
+ 38       trace(2, "parse") << argv[i] << end();
+ 39       ifstream fin(argv[i]);
+ 40       if (!fin) {
+ 41         cerr << "could not open " << argv[i] << '\n';
+ 42         return 1;
+ 43       }
+ 44       parse(fin, p);
+ 45       if (trace_contains_errors()) return 1;
+ 46     }
+ 47   }
+ 48   if (p.segments.empty()) {
+ 49     print_translate_usage();
+ 50     cerr << "nothing to do; must provide at least one file to read\n";
+ 51     exit(1);
+ 52   }
+ 53   if (output_filename.empty()) {
+ 54     print_translate_usage();
+ 55     cerr << "must provide a filename to write to using '-o'\n";
+ 56     exit(1);
+ 57   }
+ 58   trace(2, "transform") << "begin" << end();
+ 59   transform(p);
+ 60   if (trace_contains_errors()) return 1;
+ 61   trace(2, "translate") << "begin" << end();
+ 62   save_elf(p, output_filename);
+ 63   if (trace_contains_errors()) {
+ 64     unlink(output_filename.c_str());
+ 65     return 1;
+ 66   }
+ 67   // End subx translate
+ 68   return 0;
+ 69 }
+ 70 
+ 71 :(code)
+ 72 void print_translate_usage() {
+ 73   cerr << "Usage: subx translate file1 file2 ... -o output\n";
+ 74 }
+ 75 
+ 76 // write out a program to a bare-bones ELF file
+ 77 void save_elf(const program& p, const string& filename) {
+ 78   ofstream out(filename.c_str(), ios::binary);
+ 79   save_elf(p, out);
+ 80   out.close();
+ 81 }
+ 82 
+ 83 void save_elf(const program& p, ostream& out) {
+ 84   // validation: stay consistent with the self-hosted translator
+ 85   if (p.entry == 0) {
+ 86     raise << "no 'Entry' label found\n" << end();
+ 87     return;
+ 88   }
+ 89   if (find(p, "data") == NULL) {
+ 90     raise << "must include a 'data' segment\n" << end();
+ 91     return;
+ 92   }
+ 93   // processing
+ 94   write_elf_header(out, p);
+ 95   for (size_t i = 0;  i < p.segments.size();  ++i)
+ 96     write_segment(p.segments.at(i), out);
+ 97 }
+ 98 
+ 99 void write_elf_header(ostream& out, const program& p) {
+100   char c = '\0';
+101 #define O(X)  c = (X); out.write(&c, sizeof(c))
+102 // host is required to be little-endian
+103 #define emit(X)  out.write(reinterpret_cast<const char*>(&X), sizeof(X))
+104   //// ehdr
+105   // e_ident
+106   O(0x7f); O(/*E*/0x45); O(/*L*/0x4c); O(/*F*/0x46);
+107     O(0x1);  // 32-bit format
+108     O(0x1);  // little-endian
+109     O(0x1); O(0x0);
+110   for (size_t i = 0;  i < 8;  ++i) { O(0x0); }
+111   // e_type
+112   O(0x02); O(0x00);
+113   // e_machine
+114   O(0x03); O(0x00);
+115   // e_version
+116   O(0x01); O(0x00); O(0x00); O(0x00);
+117   // e_entry
+118   uint32_t e_entry = p.entry;
+119   // Override e_entry
+120   emit(e_entry);
+121   // e_phoff -- immediately after ELF header
+122   uint32_t e_phoff = 0x34;
+123   emit(e_phoff);
+124   // e_shoff; unused
+125   uint32_t dummy32 = 0;
+126   emit(dummy32);
+127   // e_flags; unused
+128   emit(dummy32);
+129   // e_ehsize
+130   uint16_t e_ehsize = 0x34;
+131   emit(e_ehsize);
+132   // e_phentsize
+133   uint16_t e_phentsize = 0x20;
+134   emit(e_phentsize);
+135   // e_phnum
+136   uint16_t e_phnum = SIZE(p.segments);
+137   emit(e_phnum);
+138   // e_shentsize
+139   uint16_t dummy16 = 0x0;
+140   emit(dummy16);
+141   // e_shnum
+142   emit(dummy16);
+143   // e_shstrndx
+144   emit(dummy16);
+145 
+146   uint32_t p_offset = /*size of ehdr*/0x34 + SIZE(p.segments)*0x20/*size of each phdr*/;
+147   for (int i = 0;  i < SIZE(p.segments);  ++i) {
+148     const segment& curr = p.segments.at(i);
+149     //// phdr
+150     // p_type
+151     uint32_t p_type = 0x1;
+152     emit(p_type);
+153     // p_offset
+154     emit(p_offset);
+155     // p_vaddr
+156     uint32_t p_start = curr.start;
+157     emit(p_start);
+158     // p_paddr
+159     emit(p_start);
+160     // p_filesz
+161     uint32_t size = num_words(curr);
+162     assert(p_offset + size < SEGMENT_ALIGNMENT);
+163     emit(size);
+164     // p_memsz
+165     emit(size);
+166     // p_flags
+167     uint32_t p_flags = (curr.name == "code") ? /*r-x*/0x5 : /*rw-*/0x6;
+168     emit(p_flags);
+169 
+170     // p_align
+171     // "As the system creates or augments a process image, it logically copies
+172     // a file's segment to a virtual memory segment.  When—and if— the system
+173     // physically reads the file depends on the program's execution behavior,
+174     // system load, and so on.  A process does not require a physical page
+175     // unless it references the logical page during execution, and processes
+176     // commonly leave many pages unreferenced. Therefore delaying physical
+177     // reads frequently obviates them, improving system performance. To obtain
+178     // this efficiency in practice, executable and shared object files must
+179     // have segment images whose file offsets and virtual addresses are
+180     // congruent, modulo the page size." -- http://refspecs.linuxbase.org/elf/elf.pdf (page 95)
+181     uint32_t p_align = 0x1000;  // default page size on linux
+182     emit(p_align);
+183     if (p_offset % p_align != p_start % p_align) {
+184       raise << "segment starting at 0x" << HEXWORD << p_start << " is improperly aligned; alignment for p_offset " << p_offset << " should be " << (p_offset % p_align) << " but is " << (p_start % p_align) << '\n' << end();
+185       return;
+186     }
+187 
+188     // prepare for next segment
+189     p_offset += size;
+190   }
+191 #undef O
+192 #undef emit
+193 }
+194 
+195 void write_segment(const segment& s, ostream& out) {
+196   for (int i = 0;  i < SIZE(s.lines);  ++i) {
+197     const vector<word>& w = s.lines.at(i).words;
+198     for (int j = 0;  j < SIZE(w);  ++j) {
+199       uint8_t x = hex_byte(w.at(j).data);  // we're done with metadata by this point
+200       out.write(reinterpret_cast<const char*>(&x), /*sizeof(byte)*/1);
+201     }
+202   }
+203 }
+204 
+205 uint32_t num_words(const segment& s) {
+206   uint32_t sum = 0;
+207   for (int i = 0;  i < SIZE(s.lines);  ++i)
+208     sum += SIZE(s.lines.at(i).words);
+209   return sum;
+210 }
+211 
+212 :(before "End Includes")
+213 using std::ios;
+
+ + + -- cgit 1.4.1-2-gfad0