about summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--subx/011run.cc43
-rw-r--r--subx/028translate.cc40
-rw-r--r--subx/030---operands.cc31
-rw-r--r--subx/035labels.cc10
-rw-r--r--subx/apps/factorial.subx2
-rw-r--r--subx/examples/ex1.1.subx2
-rw-r--r--subx/examples/ex1.2.subx2
-rw-r--r--subx/examples/ex2.subx2
-rw-r--r--subx/examples/ex3.subx2
-rw-r--r--subx/examples/ex4.subx2
-rw-r--r--subx/examples/ex5.subx2
-rw-r--r--subx/examples/ex6.subx2
-rw-r--r--subx/examples/ex7.subx2
-rw-r--r--subx/examples/ex8.subx2
14 files changed, 83 insertions, 61 deletions
diff --git a/subx/011run.cc b/subx/011run.cc
index 9c024e27..bca04289 100644
--- a/subx/011run.cc
+++ b/subx/011run.cc
@@ -104,7 +104,11 @@ struct program {
 struct segment {
   uint32_t start;
   vector<line> lines;
-  segment() :start(0) {}
+  // End segment Fields
+  segment() {
+    start = 0;
+    // End segment Constructor
+  }
 };
 :(before "struct segment")
 struct line {
@@ -145,7 +149,10 @@ void parse(istream& fin, program& out) {
           out.segments.back().lines.swap(l);
         }
         segment s;
-        lin >> std::hex >> s.start;
+        string segment_title;
+        lin >> segment_title;
+        if (starts_with(segment_title, "0x"))
+          s.start = parse_int(segment_title);
         trace(99, "parse") << "new segment from " << HEXWORD << s.start << end();
         out.segments.push_back(s);
         // todo?
@@ -296,3 +303,35 @@ int32_t imm32() {
   result |= (next()<<24);
   return result;
 }
+
+:(code)
+int32_t parse_int(const string& s) {
+  if (s.empty()) return 0;
+  istringstream in(s);
+  in >> std::hex;
+  if (s.at(0) == '-') {
+    int32_t result = 0;
+    in >> result;
+    if (!in || !in.eof()) {
+      raise << "not a number: " << s << '\n' << end();
+      return 0;
+    }
+    return result;
+  }
+  uint32_t uresult = 0;
+  in >> uresult;
+  if (!in || !in.eof()) {
+    raise << "not a number: " << s << '\n' << end();
+    return 0;
+  }
+  return static_cast<int32_t>(uresult);
+}
+:(before "End Unit Tests")
+void test_parse_int() {
+  CHECK_EQ(0, parse_int("0"));
+  CHECK_EQ(0, parse_int("0x0"));
+  CHECK_EQ(0, parse_int("0x0"));
+  CHECK_EQ(16, parse_int("10"));  // hex always
+  CHECK_EQ(-1, parse_int("-1"));
+  CHECK_EQ(-1, parse_int("0xffffffff"));
+}
diff --git a/subx/028translate.cc b/subx/028translate.cc
index f3e30126..cc41e715 100644
--- a/subx/028translate.cc
+++ b/subx/028translate.cc
@@ -30,12 +30,28 @@ if (is_equal(argv[1], "translate")) {
   if (trace_contains_errors()) return 1;
   transform(p);
   if (trace_contains_errors()) return 1;
+  compute_segment_offsets(p);
   save_elf(p, argv[3]);
   if (trace_contains_errors()) unlink(argv[3]);
   return 0;
 }
 
+:(before "End segment Fields")
+uint32_t offset;
+:(before "End segment Constructor")
+offset = 0;
 :(code)
+void compute_segment_offsets(program& p) {
+  uint32_t p_offset = /*size of ehdr*/0x34 + SIZE(p.segments)*0x20/*size of each phdr*/;
+  uint32_t cumulative_segment_size = 0;
+  for (size_t i = 0;  i < p.segments.size();  ++i) {
+    segment& curr = p.segments.at(i);
+    curr.offset = p_offset + cumulative_segment_size;
+//?     cerr << "offset " << i << ": " << curr.offset << '\n';
+    cumulative_segment_size += num_words(curr);
+  }
+}
+
 // write out a program to a bare-bones ELF file
 void save_elf(const program& p, const char* filename) {
   ofstream out(filename, ios::binary);
@@ -45,6 +61,12 @@ void save_elf(const program& p, const char* filename) {
   out.close();
 }
 
+uint32_t start(const program& p, const int segment_index) {
+  const segment& seg = p.segments.at(segment_index);
+  if (seg.start != 0) return seg.start;  // if start is already initialized, use it
+  return CODE_START + SEGMENT_SIZE*segment_index + seg.offset;
+}
+
 void write_elf_header(ostream& out, const program& p) {
   char c = '\0';
 #define O(X)  c = (X); out.write(&c, sizeof(c))
@@ -64,7 +86,7 @@ void write_elf_header(ostream& out, const program& p) {
   // e_version
   O(0x01); O(0x00); O(0x00); O(0x00);
   // e_entry
-  int e_entry = p.segments.at(0).start;  // convention
+  int e_entry = start(p, /*segment*/0);  // convention
   emit(e_entry);
   // e_phoff -- immediately after ELF header
   int e_phoff = 0x34;
@@ -91,20 +113,22 @@ void write_elf_header(ostream& out, const program& p) {
   // e_shstrndx
   emit(dummy16);
 
-  uint32_t p_offset = /*size of ehdr*/0x34 + SIZE(p.segments)*0x20/*size of each phdr*/;
   for (int i = 0;  i < SIZE(p.segments);  ++i) {
+    const segment& curr = p.segments.at(i);
     //// phdr
     // p_type
     uint32_t p_type = 0x1;
     emit(p_type);
     // p_offset
+    uint32_t p_offset = curr.offset;
     emit(p_offset);
     // p_vaddr
-    emit(p.segments.at(i).start);
+    uint32_t p_start = start(p, i);
+    emit(p_start);
     // p_paddr
-    emit(p.segments.at(i).start);
+    emit(p_start);
     // p_filesz
-    uint32_t size = size_of(p.segments.at(i));
+    uint32_t size = num_words(curr);
     assert(size < SEGMENT_SIZE);
     emit(size);
     // p_memsz
@@ -126,8 +150,8 @@ void write_elf_header(ostream& out, const program& p) {
     // congruent, modulo the page size." -- http://refspecs.linuxbase.org/elf/elf.pdf (page 95)
     uint32_t p_align = 0x1000;  // default page size on linux
     emit(p_align);
-    if (p_offset % p_align != p.segments.at(i).start % p_align) {
-      raise << "segment starting at 0x" << HEXWORD << p.segments.at(i).start << " is improperly aligned; alignment for p_offset " << p_offset << " should be " << (p_offset % p_align) << " but is " << (p.segments.at(i).start % p_align) << '\n' << end();
+    if (p_offset % p_align != p_start % p_align) {
+      raise << "segment starting at 0x" << HEXWORD << p_start << " is improperly aligned; alignment for p_offset " << p_offset << " should be " << (p_offset % p_align) << " but is " << (p_start % p_align) << '\n' << end();
       return;
     }
 
@@ -148,7 +172,7 @@ void write_segment(const segment& s, ostream& out) {
   }
 }
 
-uint32_t size_of(const segment& s) {
+uint32_t num_words(const segment& s) {
   uint32_t sum = 0;
   for (int i = 0;  i < SIZE(s.lines);  ++i)
     sum += SIZE(s.lines.at(i).words);
diff --git a/subx/030---operands.cc b/subx/030---operands.cc
index 3e103b66..6fa2354d 100644
--- a/subx/030---operands.cc
+++ b/subx/030---operands.cc
@@ -430,37 +430,6 @@ bool is_hex_int(const string& s) {
   return s.find_first_not_of("0123456789abcdefABCDEF", pos) == string::npos;
 }
 
-int32_t parse_int(const string& s) {
-  if (s.empty()) return 0;
-  istringstream in(s);
-  in >> std::hex;
-  if (s.at(0) == '-') {
-    int32_t result = 0;
-    in >> result;
-    if (!in || !in.eof()) {
-      raise << "not a number: " << s << '\n' << end();
-      return 0;
-    }
-    return result;
-  }
-  uint32_t uresult = 0;
-  in >> uresult;
-  if (!in || !in.eof()) {
-    raise << "not a number: " << s << '\n' << end();
-    return 0;
-  }
-  return static_cast<int32_t>(uresult);
-}
-:(before "End Unit Tests")
-void test_parse_int() {
-  CHECK_EQ(0, parse_int("0"));
-  CHECK_EQ(0, parse_int("0x0"));
-  CHECK_EQ(0, parse_int("0x0"));
-  CHECK_EQ(16, parse_int("10"));  // hex always
-  CHECK_EQ(-1, parse_int("-1"));
-  CHECK_EQ(-1, parse_int("0xffffffff"));
-}
-
 :(code)
 string to_string(const line& inst) {
   ostringstream out;
diff --git a/subx/035labels.cc b/subx/035labels.cc
index 0d366d2e..659e4391 100644
--- a/subx/035labels.cc
+++ b/subx/035labels.cc
@@ -18,16 +18,6 @@
 //: be a single character long. 'a' is not a hex number, it's a variable.
 //: Later layers may add more conventions partitioning the space of names. But
 //: the above rules will remain inviolate.
-bool is_number(const string& s) {
-  if (s.at(0) == '-') return true;
-  if (isdigit(s.at(0))) return true;
-  return SIZE(s) == 2;
-}
-:(before "End Unit Tests")
-void test_is_number() {
-  CHECK(!is_number("a"));
-}
-:(code)
 void check_valid_name(const string& s) {
   if (s.empty()) {
     raise << "empty name!\n" << end();
diff --git a/subx/apps/factorial.subx b/subx/apps/factorial.subx
index de9953bf..c531041b 100644
--- a/subx/apps/factorial.subx
+++ b/subx/apps/factorial.subx
@@ -7,7 +7,7 @@
 #   $ echo $?
 #   120
 
-== 0x08048054  # code segment, after leaving room for ELF header
+== code
 # instruction                     effective address                                                   operand     displacement    immediate
 # op          subop               mod             rm32          base        index         scale       r32
 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
diff --git a/subx/examples/ex1.1.subx b/subx/examples/ex1.1.subx
index 1cbe5dc1..56b54a10 100644
--- a/subx/examples/ex1.1.subx
+++ b/subx/examples/ex1.1.subx
@@ -8,7 +8,7 @@
 #   $ echo $?
 #   42
 
-== 0x08048054  # code segment, after leaving room for ELF header
+== code
 # opcode        ModR/M                    SIB                   displacement    immediate
 # instruction   mod, reg, Reg/Mem bits    scale, index, base
 # 1-3 bytes     0/1 byte                  0/1 byte              0/1/2/4 bytes   0/1/2/4 bytes
diff --git a/subx/examples/ex1.2.subx b/subx/examples/ex1.2.subx
index 2652037a..7dca4ec3 100644
--- a/subx/examples/ex1.2.subx
+++ b/subx/examples/ex1.2.subx
@@ -8,7 +8,7 @@
 #   $ echo $?
 #   42
 
-== 0x08048054  # code segment, after leaving room for ELF header
+== code
 # instruction                     effective address                                                   operand     displacement    immediate
 # op          subop               mod             rm32          base        index         scale       r32
 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
diff --git a/subx/examples/ex2.subx b/subx/examples/ex2.subx
index 6463132b..0aad9232 100644
--- a/subx/examples/ex2.subx
+++ b/subx/examples/ex2.subx
@@ -7,7 +7,7 @@
 #   $ echo $?
 #   2
 
-== 0x08048054  # code segment, after leaving room for ELF header
+== code
 # instruction                     effective address                                                   operand     displacement    immediate
 # op          subop               mod             rm32          base        index         scale       r32
 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
diff --git a/subx/examples/ex3.subx b/subx/examples/ex3.subx
index a4012f92..eb4d6c01 100644
--- a/subx/examples/ex3.subx
+++ b/subx/examples/ex3.subx
@@ -7,7 +7,7 @@
 #   $ echo $?
 #   55
 
-== 0x08048054  # code segment, after leaving room for ELF header
+== code
 # instruction                     effective address                                                   operand     displacement    immediate
 # op          subop               mod             rm32          base        index         scale       r32
 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
diff --git a/subx/examples/ex4.subx b/subx/examples/ex4.subx
index cd7003d3..2f5b0e73 100644
--- a/subx/examples/ex4.subx
+++ b/subx/examples/ex4.subx
@@ -4,7 +4,7 @@
 #   $ subx translate ex4.subx ex4
 #   $ subx run ex4
 
-== 0x08048074  # code segment, after leaving room for ELF header and segment headers
+== code
 # instruction                     effective address                                                   operand     displacement    immediate
 # op          subop               mod             rm32          base        index         scale       r32
 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
diff --git a/subx/examples/ex5.subx b/subx/examples/ex5.subx
index e4a2db81..400e17c3 100644
--- a/subx/examples/ex5.subx
+++ b/subx/examples/ex5.subx
@@ -4,7 +4,7 @@
 #   $ subx translate ex5.subx ex5
 #   $ subx run ex5
 
-== 0x08048054  # code segment, after leaving room for ELF header and segment headers
+== code
 # instruction                     effective address                                                   operand     displacement    immediate
 # op          subop               mod             rm32          base        index         scale       r32
 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
diff --git a/subx/examples/ex6.subx b/subx/examples/ex6.subx
index 4c75c617..3d05f00a 100644
--- a/subx/examples/ex6.subx
+++ b/subx/examples/ex6.subx
@@ -5,7 +5,7 @@
 #   $ subx run ex6
 #   Hello, world!
 
-== 0x08048074  # code segment, after leaving room for ELF header and segment headers
+== code
 # instruction                     effective address                                                   operand     displacement    immediate
 # op          subop               mod             rm32          base        index         scale       r32
 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
diff --git a/subx/examples/ex7.subx b/subx/examples/ex7.subx
index 6f8d3979..17a38e29 100644
--- a/subx/examples/ex7.subx
+++ b/subx/examples/ex7.subx
@@ -10,7 +10,7 @@
 #   $ echo $?
 #   97
 
-== 0x08048074  # code segment, after leaving room for ELF header and segment headers
+== code
 # instruction                     effective address                                                   operand     displacement    immediate
 # op          subop               mod             rm32          base        index         scale       r32
 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
diff --git a/subx/examples/ex8.subx b/subx/examples/ex8.subx
index 9ea547f1..452b917d 100644
--- a/subx/examples/ex8.subx
+++ b/subx/examples/ex8.subx
@@ -14,7 +14,7 @@
 #   ...
 # Locals start from ESP-4 downwards.
 
-== 0x08048054  # code segment, after leaving room for ELF header and segment headers
+== code
 # instruction                     effective address                                                   operand     displacement    immediate
 # op          subop               mod             rm32          base        index         scale       r32
 # 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
s="w"> path, name: name, children: children) # debugging proc text*(sourceNode: SourceNode, depth: int): string = let empty = " " result = &"{repeat(empty, depth)}SourceNode({sourceNode.source}:{sourceNode.line}:{sourceNode.column}):\n" for child in sourceNode.children: if child.kind == cSourceString: result.add(&"{repeat(empty, depth + 1)}{child.s}\n") else: result.add(child.node.text(depth + 1)) proc `$`*(sourceNode: SourceNode): string = text(sourceNode, 0) # base64_VLQ let integers = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" proc encode*(i: int): string = result = "" var n = i if n < 0: n = (-n shl 1) or 1 else: n = n shl 1 var z = 0 while z == 0 or n > 0: var e = n and 31 n = n shr 5 if n > 0: e = e or 32 result.add(integers[e]) z += 1 type TokenState = enum Normal, String, Ident, Mangled iterator tokenize*(line: string): (bool, string) = # result = @[] var state = Normal var token = "" var isMangled = false for z, ch in line: if ch.isAlphaAscii: if state == Normal: state = Ident if token.len > 0: yield (isMangled, token) token = $ch isMangled = false else: token.add(ch) elif ch == '_': if state == Ident: state = Mangled isMangled = true token.add($ch) elif ch != '"' and not ch.isAlphaNumeric: if state in {Ident, Mangled}: state = Normal if token.len > 0: yield (isMangled, token) token = $ch isMangled = false else: token.add($ch) elif ch == '"': if state != String: state = String if token.len > 0: yield (isMangled, token) token = $ch isMangled = false else: state = Normal token.add($ch) if token.len > 0: yield (isMangled, token) isMangled = false token = "" else: token.add($ch) if token.len > 0: yield (isMangled, token) proc parse*(source: string, path: string): SourceNode = let lines = source.splitLines() var lastLocation: SourceNode = nil result = newSourceNode(0, 0, path, @[]) # we just use one single parent and add all nim lines # as its children, I guess in typical codegen # that happens recursively on ast level # we also don't have column info, but I doubt more one nim lines can compile to one js # maybe in macros? for i, originalLine in lines: let line = originalLine.strip if line.len == 0: continue # this shouldn't be a problem: # jsgen doesn't generate comments # and if you emit // line you probably know what you're doing if line.startsWith("// line"): if result.children.len > 0: result.children[^1].node.children.add(child(line & "\n")) let pos = line.find(" ", 8) let lineNumber = line[8 .. pos - 1].parseInt let linePath = line[pos + 2 .. ^2] # quotes lastLocation = newSourceNode( lineNumber, 0, linePath, @[]) result.children.add(child(lastLocation)) else: var last: SourceNode for token in line.tokenize(): var name = "" if token[0]: name = token[1].split('_', 1)[0] if result.children.len > 0: result.children[^1].node.children.add( child( newSourceNode( result.children[^1].node.line, 0, result.children[^1].node.source, token[1], name))) last = result.children[^1].node.children[^1].node else: result.children.add( child( newSourceNode(i + 1, 0, path, token[1], name))) last = result.children[^1].node let nl = "\n" if not last.isNil: last.source.add(nl) proc cmp(a: Mapping, b: Mapping): int = var c = cmp(a.generated, b.generated) if c != 0: return c c = cmp(a.source, b.source) if c != 0: return c c = cmp(a.original, b.original) if c != 0: return c return cmp(a.name, b.name) proc index*[T](elements: seq[T], element: T): int = for z in 0 ..< elements.len: if elements[z] == element: return z return -1 proc serializeMappings(map: SourceMapGenerator, mappings: seq[Mapping]): string = var previous = Mapping(generated: (line: 1, column: 0), original: (line: 0, column: 0), name: "", source: "") var previousSourceId = 0 var previousNameId = 0 var next = "" var nameId = 0 var sourceId = 0 result = "" for z, mapping in mappings: next = "" if mapping.generated.line != previous.generated.line: previous.generated.column = 0 while mapping.generated.line != previous.generated.line: next.add(";") previous.generated.line += 1 else: if z > 0: if cmp(mapping, mappings[z - 1]) == 0: continue next.add(",") next.add(encode(mapping.generated.column - previous.generated.column)) previous.generated.column = mapping.generated.column if not mapping.noSource and mapping.source.len > 0: sourceId = map.sources.index(mapping.source) next.add(encode(sourceId - previousSourceId)) previousSourceId = sourceId next.add(encode(mapping.original.line - 1 - previous.original.line)) previous.original.line = mapping.original.line - 1 next.add(encode(mapping.original.column - previous.original.column)) previous.original.column = mapping.original.column if not mapping.noName and mapping.name.len > 0: nameId = map.names.index(mapping.name) next.add(encode(nameId - previousNameId)) previousNameId = nameId result.add(next) proc gen*(map: SourceMapGenerator): SourceMap = var mappings = map.mappings.sorted do (a: Mapping, b: Mapping) -> int: cmp(a, b) result = SourceMap( file: map.file, version: 3, sources: map.sources[0..^1], names: map.names[0..^1], mappings: map.serializeMappings(mappings)) proc addMapping*(map: SourceMapGenerator, mapping: Mapping) = if not mapping.noSource and mapping.source notin map.sources: map.sources.add(mapping.source) if not mapping.noName and mapping.name.len > 0 and mapping.name notin map.names: map.names.add(mapping.name) # echo "map ", mapping.source, " ", mapping.original, " ", mapping.generated, " ", mapping.name map.mappings.add(mapping) proc walk*(node: SourceNode, fn: proc(line: string, original: SourceNode)) = for child in node.children: if child.kind == cSourceString and child.s.len > 0: fn(child.s, node) else: child.node.walk(fn) proc toSourceMap*(node: SourceNode, file: string): SourceMapGenerator = var map = SourceMapGenerator(file: file, sources: @[], names: @[], mappings: @[]) var generated = (line: 1, column: 0) var sourceMappingActive = false var lastOriginal = SourceNode(source: "", line: -1, column: 0, name: "", children: @[]) node.walk do (line: string, original: SourceNode): if original.source.endsWith(".js"): # ignore it discard else: if original.line != -1: if lastOriginal.source != original.source or lastOriginal.line != original.line or lastOriginal.column != original.column or lastOriginal.name != original.name: map.addMapping( Mapping( source: original.source, original: (line: original.line, column: original.column), generated: (line: generated.line, column: generated.column), name: original.name)) lastOriginal = SourceNode( source: original.source, line: original.line, column: original.column, name: original.name, children: lastOriginal.children) sourceMappingActive = true elif sourceMappingActive: map.addMapping( Mapping( noSource: true, noName: true, generated: (line: generated.line, column: generated.column), original: (line: -1, column: -1))) lastOriginal.line = -1 sourceMappingActive = false for z in 0 ..< line.len: if line[z] in Newlines: generated.line += 1 generated.column = 0 if z == line.len - 1: lastOriginal.line = -1 sourceMappingActive = false elif sourceMappingActive: map.addMapping( Mapping( source: original.source, original: (line: original.line, column: original.column), generated: (line: generated.line, column: generated.column), name: original.name)) else: generated.column += 1 map proc genSourceMap*(source: string, outFile: string): (Rope, SourceMap) = let node = parse(source, outFile) let map = node.toSourceMap(file = outFile) ((&"{source}\n//# sourceMappingURL={outFile}.map").rope, map.gen)