From c504ca566124d1f097e7fe8a2f9f67c1c59e9ccf Mon Sep 17 00:00:00 2001 From: Kartik Agaram Date: Tue, 14 Jan 2020 01:48:06 -0800 Subject: 5893 --- html/030---translate.cc.html | 409 +++++++++++++++++++++---------------------- 1 file changed, 201 insertions(+), 208 deletions(-) (limited to 'html/030---translate.cc.html') diff --git a/html/030---translate.cc.html b/html/030---translate.cc.html index c2f9e993..0055f063 100644 --- a/html/030---translate.cc.html +++ b/html/030---translate.cc.html @@ -59,219 +59,212 @@ if ('onhashchange' in window) { https://github.com/akkartik/mu/blob/master/030---translate.cc
-  1 //: The bedrock level 1 of abstraction is now done, and we're going to start
-  2 //: building levels above it that make programming in x86 machine code a
-  3 //: little more ergonomic.
-  4 //:
-  5 //: All levels will be "pass through by default". Whatever they don't
-  6 //: understand they will silently pass through to lower levels.
-  7 //:
-  8 //: Since raw hex bytes of machine code are always possible to inject, SubX is
-  9 //: not a language, and we aren't building a compiler. This is something
- 10 //: deliberately leakier. Levels are more for improving auditing, checks and
- 11 //: error messages rather than for hiding low-level details.
- 12 
- 13 //: Translator workflow: read 'source' file. Run a series of transforms on it,
- 14 //: each passing through what it doesn't understand. The final program should
- 15 //: be just machine code, suitable to write to an ELF binary.
- 16 //:
- 17 //: Higher levels usually transform code on the basis of metadata.
- 18 
- 19 :(before "End Main")
- 20 if (is_equal(argv[1], "translate")) {
- 21   // Outside of tests, traces must be explicitly requested.
- 22   if (Trace_file.is_open()) Trace_stream = new trace_stream;
- 23   reset();
- 24   // Begin subx translate
- 25   program p;
- 26   string output_filename;
- 27   for (int i = /*skip 'subx translate'*/2;  i < argc;  ++i) {
- 28     if (is_equal(argv[i], "-o")) {
- 29       ++i;
- 30       if (i >= argc) {
- 31         print_translate_usage();
- 32         cerr << "'-o' must be followed by a filename to write results to\n";
- 33         exit(1);
- 34       }
- 35       output_filename = argv[i];
- 36     }
- 37     else {
- 38       trace(2, "parse") << argv[i] << end();
- 39       ifstream fin(argv[i]);
- 40       if (!fin) {
- 41         cerr << "could not open " << argv[i] << '\n';
- 42         return 1;
- 43       }
- 44       parse(fin, p);
- 45       if (trace_contains_errors()) return 1;
- 46     }
- 47   }
- 48   if (p.segments.empty()) {
- 49     print_translate_usage();
- 50     cerr << "nothing to do; must provide at least one file to read\n";
- 51     exit(1);
- 52   }
- 53   if (output_filename.empty()) {
- 54     print_translate_usage();
- 55     cerr << "must provide a filename to write to using '-o'\n";
- 56     exit(1);
- 57   }
- 58   trace(2, "transform") << "begin" << end();
- 59   transform(p);
- 60   if (trace_contains_errors()) return 1;
- 61   trace(2, "translate") << "begin" << end();
- 62   save_elf(p, output_filename);
- 63   if (trace_contains_errors()) {
- 64     unlink(output_filename.c_str());
- 65     return 1;
- 66   }
- 67   // End subx translate
- 68   return 0;
- 69 }
- 70 
- 71 :(code)
- 72 void print_translate_usage() {
- 73   cerr << "Usage: subx translate file1 file2 ... -o output\n";
+  1 //: After that lengthy prelude to define an x86 emulator, we are now ready to
+  2 //: start translating SubX notation.
+  3 
+  4 //: Translator workflow: read 'source' file. Run a series of transforms on it,
+  5 //: each passing through what it doesn't understand. The final program should
+  6 //: be just machine code, suitable to emulate, or to write to an ELF binary.
+  7 
+  8 :(before "End Main")
+  9 if (is_equal(argv[1], "translate")) {
+ 10   // Outside of tests, traces must be explicitly requested.
+ 11   if (Trace_file.is_open()) Trace_stream = new trace_stream;
+ 12   reset();
+ 13   // Begin bootstrap translate
+ 14   program p;
+ 15   string output_filename;
+ 16   for (int i = /*skip 'bootstrap translate'*/2;  i < argc;  ++i) {
+ 17     if (is_equal(argv[i], "-o")) {
+ 18       ++i;
+ 19       if (i >= argc) {
+ 20         print_translate_usage();
+ 21         cerr << "'-o' must be followed by a filename to write results to\n";
+ 22         exit(1);
+ 23       }
+ 24       output_filename = argv[i];
+ 25     }
+ 26     else {
+ 27       trace(2, "parse") << argv[i] << end();
+ 28       ifstream fin(argv[i]);
+ 29       if (!fin) {
+ 30         cerr << "could not open " << argv[i] << '\n';
+ 31         return 1;
+ 32       }
+ 33       parse(fin, p);
+ 34       if (trace_contains_errors()) return 1;
+ 35     }
+ 36   }
+ 37   if (p.segments.empty()) {
+ 38     print_translate_usage();
+ 39     cerr << "nothing to do; must provide at least one file to read\n";
+ 40     exit(1);
+ 41   }
+ 42   if (output_filename.empty()) {
+ 43     print_translate_usage();
+ 44     cerr << "must provide a filename to write to using '-o'\n";
+ 45     exit(1);
+ 46   }
+ 47   trace(2, "transform") << "begin" << end();
+ 48   transform(p);
+ 49   if (trace_contains_errors()) return 1;
+ 50   trace(2, "translate") << "begin" << end();
+ 51   save_elf(p, output_filename);
+ 52   if (trace_contains_errors()) {
+ 53     unlink(output_filename.c_str());
+ 54     return 1;
+ 55   }
+ 56   // End bootstrap translate
+ 57   return 0;
+ 58 }
+ 59 
+ 60 :(code)
+ 61 void transform(program& p) {
+ 62   // End transform(program& p)
+ 63 }
+ 64 
+ 65 void print_translate_usage() {
+ 66   cerr << "Usage: bootstrap translate file1 file2 ... -o output\n";
+ 67 }
+ 68 
+ 69 // write out a program to a bare-bones ELF file
+ 70 void save_elf(const program& p, const string& filename) {
+ 71   ofstream out(filename.c_str(), ios::binary);
+ 72   save_elf(p, out);
+ 73   out.close();
  74 }
  75 
- 76 // write out a program to a bare-bones ELF file
- 77 void save_elf(const program& p, const string& filename) {
- 78   ofstream out(filename.c_str(), ios::binary);
- 79   save_elf(p, out);
- 80   out.close();
- 81 }
- 82 
- 83 void save_elf(const program& p, ostream& out) {
- 84   // validation: stay consistent with the self-hosted translator
- 85   if (p.entry == 0) {
- 86     raise << "no 'Entry' label found\n" << end();
- 87     return;
- 88   }
- 89   if (find(p, "data") == NULL) {
- 90     raise << "must include a 'data' segment\n" << end();
- 91     return;
- 92   }
- 93   // processing
- 94   write_elf_header(out, p);
- 95   for (size_t i = 0;  i < p.segments.size();  ++i)
- 96     write_segment(p.segments.at(i), out);
- 97 }
- 98 
- 99 void write_elf_header(ostream& out, const program& p) {
-100   char c = '\0';
-101 #define O(X)  c = (X); out.write(&c, sizeof(c))
-102 // host is required to be little-endian
-103 #define emit(X)  out.write(reinterpret_cast<const char*>(&X), sizeof(X))
-104   //// ehdr
-105   // e_ident
-106   O(0x7f); O(/*E*/0x45); O(/*L*/0x4c); O(/*F*/0x46);
-107     O(0x1);  // 32-bit format
-108     O(0x1);  // little-endian
-109     O(0x1); O(0x0);
-110   for (size_t i = 0;  i < 8;  ++i) { O(0x0); }
-111   // e_type
-112   O(0x02); O(0x00);
-113   // e_machine
-114   O(0x03); O(0x00);
-115   // e_version
-116   O(0x01); O(0x00); O(0x00); O(0x00);
-117   // e_entry
-118   uint32_t e_entry = p.entry;
-119   // Override e_entry
-120   emit(e_entry);
-121   // e_phoff -- immediately after ELF header
-122   uint32_t e_phoff = 0x34;
-123   emit(e_phoff);
-124   // e_shoff; unused
-125   uint32_t dummy32 = 0;
-126   emit(dummy32);
-127   // e_flags; unused
-128   emit(dummy32);
-129   // e_ehsize
-130   uint16_t e_ehsize = 0x34;
-131   emit(e_ehsize);
-132   // e_phentsize
-133   uint16_t e_phentsize = 0x20;
-134   emit(e_phentsize);
-135   // e_phnum
-136   uint16_t e_phnum = SIZE(p.segments);
-137   emit(e_phnum);
-138   // e_shentsize
-139   uint16_t dummy16 = 0x0;
-140   emit(dummy16);
-141   // e_shnum
-142   emit(dummy16);
-143   // e_shstrndx
-144   emit(dummy16);
-145 
-146   uint32_t p_offset = /*size of ehdr*/0x34 + SIZE(p.segments)*0x20/*size of each phdr*/;
-147   for (int i = 0;  i < SIZE(p.segments);  ++i) {
-148     const segment& curr = p.segments.at(i);
-149     //// phdr
-150     // p_type
-151     uint32_t p_type = 0x1;
-152     emit(p_type);
-153     // p_offset
-154     emit(p_offset);
-155     // p_vaddr
-156     uint32_t p_start = curr.start;
-157     emit(p_start);
-158     // p_paddr
-159     emit(p_start);
-160     // p_filesz
-161     uint32_t size = num_words(curr);
-162     assert(p_offset + size < SEGMENT_ALIGNMENT);
-163     emit(size);
-164     // p_memsz
-165     emit(size);
-166     // p_flags
-167     uint32_t p_flags = (curr.name == "code") ? /*r-x*/0x5 : /*rw-*/0x6;
-168     emit(p_flags);
-169 
-170     // p_align
-171     // "As the system creates or augments a process image, it logically copies
-172     // a file's segment to a virtual memory segment.  When—and if— the system
-173     // physically reads the file depends on the program's execution behavior,
-174     // system load, and so on.  A process does not require a physical page
-175     // unless it references the logical page during execution, and processes
-176     // commonly leave many pages unreferenced. Therefore delaying physical
-177     // reads frequently obviates them, improving system performance. To obtain
-178     // this efficiency in practice, executable and shared object files must
-179     // have segment images whose file offsets and virtual addresses are
-180     // congruent, modulo the page size." -- http://refspecs.linuxbase.org/elf/elf.pdf (page 95)
-181     uint32_t p_align = 0x1000;  // default page size on linux
-182     emit(p_align);
-183     if (p_offset % p_align != p_start % p_align) {
-184       raise << "segment starting at 0x" << HEXWORD << p_start << " is improperly aligned; alignment for p_offset " << p_offset << " should be " << (p_offset % p_align) << " but is " << (p_start % p_align) << '\n' << end();
-185       return;
-186     }
+ 76 void save_elf(const program& p, ostream& out) {
+ 77   // validation: stay consistent with the self-hosted translator
+ 78   if (p.entry == 0) {
+ 79     raise << "no 'Entry' label found\n" << end();
+ 80     return;
+ 81   }
+ 82   if (find(p, "data") == NULL) {
+ 83     raise << "must include a 'data' segment\n" << end();
+ 84     return;
+ 85   }
+ 86   // processing
+ 87   write_elf_header(out, p);
+ 88   for (size_t i = 0;  i < p.segments.size();  ++i)
+ 89     write_segment(p.segments.at(i), out);
+ 90 }
+ 91 
+ 92 void write_elf_header(ostream& out, const program& p) {
+ 93   char c = '\0';
+ 94 #define O(X)  c = (X); out.write(&c, sizeof(c))
+ 95 // host is required to be little-endian
+ 96 #define emit(X)  out.write(reinterpret_cast<const char*>(&X), sizeof(X))
+ 97   //// ehdr
+ 98   // e_ident
+ 99   O(0x7f); O(/*E*/0x45); O(/*L*/0x4c); O(/*F*/0x46);
+100     O(0x1);  // 32-bit format
+101     O(0x1);  // little-endian
+102     O(0x1); O(0x0);
+103   for (size_t i = 0;  i < 8;  ++i) { O(0x0); }
+104   // e_type
+105   O(0x02); O(0x00);
+106   // e_machine
+107   O(0x03); O(0x00);
+108   // e_version
+109   O(0x01); O(0x00); O(0x00); O(0x00);
+110   // e_entry
+111   uint32_t e_entry = p.entry;
+112   // Override e_entry
+113   emit(e_entry);
+114   // e_phoff -- immediately after ELF header
+115   uint32_t e_phoff = 0x34;
+116   emit(e_phoff);
+117   // e_shoff; unused
+118   uint32_t dummy32 = 0;
+119   emit(dummy32);
+120   // e_flags; unused
+121   emit(dummy32);
+122   // e_ehsize
+123   uint16_t e_ehsize = 0x34;
+124   emit(e_ehsize);
+125   // e_phentsize
+126   uint16_t e_phentsize = 0x20;
+127   emit(e_phentsize);
+128   // e_phnum
+129   uint16_t e_phnum = SIZE(p.segments);
+130   emit(e_phnum);
+131   // e_shentsize
+132   uint16_t dummy16 = 0x0;
+133   emit(dummy16);
+134   // e_shnum
+135   emit(dummy16);
+136   // e_shstrndx
+137   emit(dummy16);
+138 
+139   uint32_t p_offset = /*size of ehdr*/0x34 + SIZE(p.segments)*0x20/*size of each phdr*/;
+140   for (int i = 0;  i < SIZE(p.segments);  ++i) {
+141     const segment& curr = p.segments.at(i);
+142     //// phdr
+143     // p_type
+144     uint32_t p_type = 0x1;
+145     emit(p_type);
+146     // p_offset
+147     emit(p_offset);
+148     // p_vaddr
+149     uint32_t p_start = curr.start;
+150     emit(p_start);
+151     // p_paddr
+152     emit(p_start);
+153     // p_filesz
+154     uint32_t size = num_words(curr);
+155     assert(p_offset + size < SEGMENT_ALIGNMENT);
+156     emit(size);
+157     // p_memsz
+158     emit(size);
+159     // p_flags
+160     uint32_t p_flags = (curr.name == "code") ? /*r-x*/0x5 : /*rw-*/0x6;
+161     emit(p_flags);
+162 
+163     // p_align
+164     // "As the system creates or augments a process image, it logically copies
+165     // a file's segment to a virtual memory segment.  When—and if— the system
+166     // physically reads the file depends on the program's execution behavior,
+167     // system load, and so on.  A process does not require a physical page
+168     // unless it references the logical page during execution, and processes
+169     // commonly leave many pages unreferenced. Therefore delaying physical
+170     // reads frequently obviates them, improving system performance. To obtain
+171     // this efficiency in practice, executable and shared object files must
+172     // have segment images whose file offsets and virtual addresses are
+173     // congruent, modulo the page size." -- http://refspecs.linuxbase.org/elf/elf.pdf (page 95)
+174     uint32_t p_align = 0x1000;  // default page size on linux
+175     emit(p_align);
+176     if (p_offset % p_align != p_start % p_align) {
+177       raise << "segment starting at 0x" << HEXWORD << p_start << " is improperly aligned; alignment for p_offset " << p_offset << " should be " << (p_offset % p_align) << " but is " << (p_start % p_align) << '\n' << end();
+178       return;
+179     }
+180 
+181     // prepare for next segment
+182     p_offset += size;
+183   }
+184 #undef O
+185 #undef emit
+186 }
 187 
-188     // prepare for next segment
-189     p_offset += size;
-190   }
-191 #undef O
-192 #undef emit
-193 }
-194 
-195 void write_segment(const segment& s, ostream& out) {
-196   for (int i = 0;  i < SIZE(s.lines);  ++i) {
-197     const vector<word>& w = s.lines.at(i).words;
-198     for (int j = 0;  j < SIZE(w);  ++j) {
-199       uint8_t x = hex_byte(w.at(j).data);  // we're done with metadata by this point
-200       out.write(reinterpret_cast<const char*>(&x), /*sizeof(byte)*/1);
-201     }
-202   }
+188 void write_segment(const segment& s, ostream& out) {
+189   for (int i = 0;  i < SIZE(s.lines);  ++i) {
+190     const vector<word>& w = s.lines.at(i).words;
+191     for (int j = 0;  j < SIZE(w);  ++j) {
+192       uint8_t x = hex_byte(w.at(j).data);  // we're done with metadata by this point
+193       out.write(reinterpret_cast<const char*>(&x), /*sizeof(byte)*/1);
+194     }
+195   }
+196 }
+197 
+198 uint32_t num_words(const segment& s) {
+199   uint32_t sum = 0;
+200   for (int i = 0;  i < SIZE(s.lines);  ++i)
+201     sum += SIZE(s.lines.at(i).words);
+202   return sum;
 203 }
 204 
-205 uint32_t num_words(const segment& s) {
-206   uint32_t sum = 0;
-207   for (int i = 0;  i < SIZE(s.lines);  ++i)
-208     sum += SIZE(s.lines.at(i).words);
-209   return sum;
-210 }
-211 
-212 :(before "End Includes")
-213 using std::ios;
+205 :(before "End Includes")
+206 using std::ios;
 
-- cgit 1.4.1-2-gfad0