From 1f56ac6483f97ab18245c69c8c006be158c18a8d Mon Sep 17 00:00:00 2001
From: Kartik Agaram <vc@akkartik.com>
Date: Sun, 15 Jul 2018 22:59:02 -0700
Subject: 4350

Reorganize layers to introduce the translation workflow right at the start.

We also avoid duplicating parsing code. Programs are always parsed into
the `program` data structure.
---
 subx/021translate.cc | 127 +++++++++++++++++++++++----------------------------
 1 file changed, 58 insertions(+), 69 deletions(-)

(limited to 'subx/021translate.cc')

diff --git a/subx/021translate.cc b/subx/021translate.cc
index d33755ab..f9da5f19 100644
--- a/subx/021translate.cc
+++ b/subx/021translate.cc
@@ -7,47 +7,29 @@
 //: passing through what it doesn't understand. The final program should be
 //: just machine code, suitable to write to an ELF binary.
 
-:(before "End Types")
-typedef void (*transform_fn)(const string& input, string& output);
-:(before "End Globals")
-vector<transform_fn> Transform;
-
 :(before "End Main")
 if (is_equal(argv[1], "translate")) {
   assert(argc > 3);
-  string program;
-  slurp(argv[2], program);
-  perform_all_transforms(program);
-  dump_elf(program, argv[3]);
+  program p;
+  ifstream fin(argv[2]);
+  parse(fin, p);
+  if (trace_contains_errors()) return 1;
+  transform(p);
+  if (trace_contains_errors()) return 1;
+  dump_elf(p, argv[3]);
 }
 
 :(code)
-void perform_all_transforms(string& program) {
-  string& in = program;
-  string out;
-  for (int t = 0;  t < SIZE(Transform);  ++t, in.swap(out), out.clear())
-    (*Transform.at(t))(in, out);
-}
-
-// write out the current Memory contents from address 1 to End_of_program to a
-// bare-bones ELF file with a single section/segment and a hard-coded origin address.
-void dump_elf(const string& program, const char* filename) {
-  initialize_mem();
-  // load program into memory, filtering out comments
-  load_program(program);  // Not where 'program' should be loaded for running.
-                          // But we're not going to run it right now, so we
-                          // can load it anywhere.
-  // dump contents of memory into ELF binary
+// write out a program to a bare-bones ELF file
+void dump_elf(const program& p, const char* filename) {
   ofstream out(filename, ios::binary);
-  dump_elf_header(out);
-  for (size_t i = 1;  i < End_of_program;  ++i) {
-    char c = read_mem_u8(i);
-    out.write(&c, sizeof(c));
-  }
+  dump_elf_header(out, p);
+  for (size_t i = 0;  i < p.segments.size();  ++i)
+    dump_segment(p.segments.at(i), out);
   out.close();
 }
 
-void dump_elf_header(ostream& out) {
+void dump_elf_header(ostream& out, const program& p) {
   char c = '\0';
 #define O(X)  c = (X); out.write(&c, sizeof(c))
 // host is required to be little-endian
@@ -66,10 +48,10 @@ void dump_elf_header(ostream& out) {
   // e_version
   O(0x01); O(0x00); O(0x00); O(0x00);
   // e_entry
-  int e_entry = CODE_START + /*size of ehdr*/52 + /*size of phdr*/32;
+  int e_entry = p.segments.at(0).start;  // convention
   emit(e_entry);
   // e_phoff -- immediately after ELF header
-  int e_phoff = 52;
+  int e_phoff = 0x34;
   emit(e_phoff);
   // e_shoff; unused
   int dummy32 = 0;
@@ -77,13 +59,13 @@ void dump_elf_header(ostream& out) {
   // e_flags; unused
   emit(dummy32);
   // e_ehsize
-  uint16_t e_ehsize = 52;
+  uint16_t e_ehsize = 0x34;
   emit(e_ehsize);
   // e_phentsize
   uint16_t e_phentsize = 0x20;
   emit(e_phentsize);
   // e_phnum
-  uint16_t e_phnum = 0x1;
+  uint16_t e_phnum = SIZE(p.segments);
   emit(e_phnum);
   // e_shentsize
   uint16_t dummy16 = 0x0;
@@ -93,47 +75,54 @@ void dump_elf_header(ostream& out) {
   // e_shstrndx
   emit(dummy16);
 
-  //// phdr
-  // p_type
-  uint32_t p_type = 0x1;
-  emit(p_type);
-  // p_offset
-  uint32_t p_offset = /*size of ehdr*/52 + /*size of phdr*/32;
-  emit(p_offset);
-  // p_vaddr
-  emit(e_entry);
-  // p_paddr
-  emit(e_entry);
-  // p_filesz
-  uint32_t size = End_of_program - /*we're not using location 0*/1;
-  assert(size < SEGMENT_SIZE);
-  emit(size);
-  // p_memsz
-  emit(size);
-  // p_flags
-  uint32_t p_flags = 0x5;  // r-x
-  emit(p_flags);
-  // p_align
-  uint32_t p_align = 0x4;  // p_offset must be congruent to p_paddr/p_vaddr modulo p_align
-  emit(p_align);
+  uint32_t p_offset = /*size of ehdr*/0x34 + SIZE(p.segments)*0x20/*size of each phdr*/;
+  for (int i = 0;  i < SIZE(p.segments);  ++i) {
+    //// phdr
+    // p_type
+    uint32_t p_type = 0x1;
+    emit(p_type);
+    // p_offset
+    emit(p_offset);
+    // p_vaddr
+    emit(e_entry);
+    // p_paddr
+    emit(e_entry);
+    // p_filesz
+    uint32_t size = size_of(p.segments.at(i));
+    assert(size < SEGMENT_SIZE);
+    emit(size);
+    // p_memsz
+    emit(size);
+    // p_flags
+    uint32_t p_flags = (i == 0) ? /*r-x*/0x5 : /*rw-*/0x6;  // convention: only first segment is code
+    emit(p_flags);
+    // p_align
+    uint32_t p_align = 0x4;
+    emit(p_align);
+
+    // prepare for next segment
+    p_offset += size;
+  }
 #undef O
 #undef emit
 }
 
-void slurp(const char* filename, string& out) {
-  ifstream fin(filename);
-  fin >> std::noskipws;
-  ostringstream fout;
-  char c = '\0';
-  while(has_data(fin)) {
-    fin >> c;
-    fout << c;
+void dump_segment(const segment& s, ostream& out) {
+  for (int i = 0;  i < SIZE(s.lines);  ++i) {
+    const vector<word>& w = s.lines.at(i).words;
+    for (int j = 0;  j < SIZE(w);  ++j) {
+      uint8_t x = hex_byte(w.at(j).data);  // we're done with metadata by this point
+      out.write(reinterpret_cast<const char*>(&x), /*sizeof(byte)*/1);
+    }
   }
-  fout.str().swap(out);
 }
 
-:(after "Begin run() For Scenarios")
-perform_all_transforms(text_bytes);
+uint32_t size_of(const segment& s) {
+  uint32_t sum = 0;
+  for (int i = 0;  i < SIZE(s.lines);  ++i)
+    sum += SIZE(s.lines.at(i).words);
+  return sum;
+}
 
 :(before "End Includes")
 using std::ios;
-- 
cgit 1.4.1-2-gfad0