diff options
-rw-r--r-- | apps/survey.subx | 86 | ||||
-rw-r--r-- | stats.txt | 82 | ||||
-rw-r--r-- | treeshake.cc | 49 | ||||
-rwxr-xr-x | treeshake_all | 40 | ||||
-rwxr-xr-x | treeshake_ntranslate | 40 | ||||
-rwxr-xr-x | treeshake_translate | 4 |
6 files changed, 198 insertions, 103 deletions
diff --git a/apps/survey.subx b/apps/survey.subx index 701e4abe..8563116f 100644 --- a/apps/survey.subx +++ b/apps/survey.subx @@ -3313,8 +3313,8 @@ $emit-headers:end: emit-elf-header: # out : (address buffered-file), segments : (address stream {string, segment-info}), labels : (address stream {string, label-info}) # pseudocode - # *Elf_e_entry = get(labels, "Entry")->address - # *Elf_e_phnum = segments->write / 16 # size of a row + # *$Elf_e_entry = get(labels, "Entry")->address + # *$Elf_e_phnum = segments->write / 16 # size of a row # emit-hex-array(out, Elf_header) # write-buffered(out, "\n") # @@ -3325,7 +3325,7 @@ emit-elf-header: # out : (address buffered-file), segments : (address stream {s 50/push-eax 51/push-ecx 52/push-edx # just because we need to call idiv - # *Elf_e_entry = get(labels, "Entry")->address + # *$Elf_e_entry = get(labels, "Entry")->address # . eax = labels 8b/copy 1/mod/*+disp8 5/rm32/ebp . . . 0/r32/eax 0x10/disp8 . # copy *(ebp+16) to eax # . label-info/eax = get(labels, "Entry", row-size=16, "label table") @@ -3340,9 +3340,9 @@ emit-elf-header: # out : (address buffered-file), segments : (address stream {s 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 0x10/imm32 # add to esp # . eax = label-info->address 8b/copy 1/mod/*+disp8 0/rm32/eax . . . 0/r32/eax 8/disp8 . # copy *(eax+8) to eax - # . *Elf_e_entry = eax - 89/copy 0/mod/indirect 5/rm32/.disp32 . . 0/r32/eax Elf_e_entry/disp32 # copy eax to *Elf_e_entry - # *Elf_e_phnum = segments->write / 0x10 + # . *$Elf_e_entry = eax + 89/copy 0/mod/indirect 5/rm32/.disp32 . . 0/r32/eax $Elf_e_entry/disp32 # copy eax to *$Elf_e_entry + # *$Elf_e_phnum = segments->write / 0x10 # . eax = segments 8b/copy 1/mod/*+disp8 5/rm32/ebp . . . 0/r32/eax 0xc/disp8 . # copy *(ebp+12) to eax # . len/eax = segments->write @@ -3351,8 +3351,8 @@ emit-elf-header: # out : (address buffered-file), segments : (address stream {s b9/copy-to-ecx 0x10/imm32 31/xor 3/mod/direct 2/rm32/edx . . . 2/r32/edx . . # clear edx f7 7/subop/idiv 3/mod/direct 1/rm32/ecx . . . . . . # divide edx:eax by ecx, storing quotient in eax and remainder in edx - # . *Elf_e_phnum = eax - 89/copy 0/mod/indirect 5/rm32/.disp32 . . 0/r32/eax Elf_e_phnum/disp32 # copy eax to *Elf_e_phnum + # . *$Elf_e_phnum = eax + 89/copy 0/mod/indirect 5/rm32/.disp32 . . 0/r32/eax $Elf_e_phnum/disp32 # copy eax to *$Elf_e_phnum # emit-hex-array(out, Elf_header) # . . push args 68/push Elf_header/imm32 @@ -3381,15 +3381,15 @@ $emit-elf-header:end: emit-elf-program-header-entry: # out : (address buffered-file), curr-segment : (address {string, segment-info}) # pseudocode: - # *Elf_p_offset = curr-segment->file-offset - # *Elf_p_vaddr = curr-segment->address - # *Elf_p_paddr = curr-segment->address - # *Elf_p_filesz = curr-segment->size - # *Elf_p_memsz = curr-segment->size + # *$Elf_p_offset = curr-segment->file-offset + # *$Elf_p_vaddr = curr-segment->address + # *$Elf_p_paddr = curr-segment->address + # *$Elf_p_filesz = curr-segment->size + # *$Elf_p_memsz = curr-segment->size # if curr-segment->name == "code" - # *Elf_p_flags = 5 # r-x + # *$Elf_p_flags = 5 # r-x # else - # *Elf_p_flags = 6 # rw- + # *$Elf_p_flags = 6 # rw- # emit-hex-array(out, Elf_program_header_entry) # write-buffered(out, "\n") # @@ -3401,25 +3401,25 @@ emit-elf-program-header-entry: # out : (address buffered-file), curr-segment : 56/push-esi # esi = curr-segment 8b/copy 1/mod/*+disp8 5/rm32/ebp . . . 6/r32/esi 0xc/disp8 . # copy *(ebp+12) to esi - # *Elf_p_offset = curr-segment->file-offset + # *$Elf_p_offset = curr-segment->file-offset # . eax = curr-segment->file-offset 8b/copy 1/mod/*+disp8 6/rm32/esi . . . 0/r32/eax 8/disp8 . # copy *(esi+8) to eax - # . *Elf_p_offset = eax - 89/copy 0/mod/indirect 5/rm32/.disp32 . . 0/r32/eax Elf_p_offset/disp32 # copy eax to *Elf_p_offset - # *Elf_p_vaddr = curr-segment->address + # . *$Elf_p_offset = eax + 89/copy 0/mod/indirect 5/rm32/.disp32 . . 0/r32/eax $Elf_p_offset/disp32 # copy eax to *$Elf_p_offset + # *$Elf_p_vaddr = curr-segment->address # . eax = curr-segment->address 8b/copy 1/mod/*+disp8 6/rm32/esi . . . 0/r32/eax 4/disp8 . # copy *(esi+4) to eax - # . *Elf_p_vaddr = eax - 89/copy 0/mod/indirect 5/rm32/.disp32 . . 0/r32/eax Elf_p_vaddr/disp32 # copy eax to *Elf_p_vaddr - # *Elf_p_paddr = curr-segment->address - 89/copy 0/mod/indirect 5/rm32/.disp32 . . 0/r32/eax Elf_p_paddr/disp32 # copy eax to *Elf_p_paddr - # *Elf_p_filesz = curr-segment->size + # . *$Elf_p_vaddr = eax + 89/copy 0/mod/indirect 5/rm32/.disp32 . . 0/r32/eax $Elf_p_vaddr/disp32 # copy eax to *$Elf_p_vaddr + # *$Elf_p_paddr = curr-segment->address + 89/copy 0/mod/indirect 5/rm32/.disp32 . . 0/r32/eax $Elf_p_paddr/disp32 # copy eax to *$Elf_p_paddr + # *$Elf_p_filesz = curr-segment->size # . eax = curr-segment->size 8b/copy 1/mod/*+disp8 6/rm32/esi . . . 0/r32/eax 0xc/disp8 . # copy *(esi+12) to eax - # . *Elf_p_filesz = eax - 89/copy 0/mod/indirect 5/rm32/.disp32 . . 0/r32/eax Elf_p_filesz/disp32 # copy eax to *Elf_p_filesz - # *Elf_p_memsz = curr-segment->size - 89/copy 0/mod/indirect 5/rm32/.disp32 . . 0/r32/eax Elf_p_memsz/disp32 # copy eax to *Elf_p_memsz + # . *$Elf_p_filesz = eax + 89/copy 0/mod/indirect 5/rm32/.disp32 . . 0/r32/eax $Elf_p_filesz/disp32 # copy eax to *$Elf_p_filesz + # *$Elf_p_memsz = curr-segment->size + 89/copy 0/mod/indirect 5/rm32/.disp32 . . 0/r32/eax $Elf_p_memsz/disp32 # copy eax to *$Elf_p_memsz # if (!string-equal?(curr-segment->name, "code") goto next check # . eax = curr-segment->name 8b/copy 0/mod/indirect 6/rm32/esi . . . 0/r32/eax . . # copy *esi to eax @@ -3434,12 +3434,12 @@ emit-elf-program-header-entry: # out : (address buffered-file), curr-segment : # . if (eax == 0) goto next check 3d/compare-eax-and 0/imm32 74/jump-if-equal $emit-elf-program-header-entry:data/disp8 - # *Elf_p_flags = r-x - c7 0/subop/copy 0/mod/indirect 5/rm32/.disp32 . . . Elf_p_flags/disp32 5/imm32 # copy to *Elf_p_flags + # *$Elf_p_flags = r-x + c7 0/subop/copy 0/mod/indirect 5/rm32/.disp32 . . . $Elf_p_flags/disp32 5/imm32 # copy to *$Elf_p_flags eb/jump $emit-elf-program-header-entry:really-emit/disp8 $emit-elf-program-header-entry:data: - # otherwise *Elf_p_flags = rw- - c7 0/subop/copy 0/mod/indirect 5/rm32/.disp32 . . . Elf_p_flags/disp32 6/imm32 # copy to *Elf_p_flags + # otherwise *$Elf_p_flags = rw- + c7 0/subop/copy 0/mod/indirect 5/rm32/.disp32 . . . $Elf_p_flags/disp32 6/imm32 # copy to *$Elf_p_flags $emit-elf-program-header-entry:really-emit: # emit-hex-array(out, Elf_program_header_entry) # . . push args @@ -4667,7 +4667,7 @@ test-num-bytes-handles-imm32: == data # This block of bytes gets copied to the start of the output ELF file, with -# some fields filled in. +# some fields (the ones with labels capitalized) filled in. # http://www.sco.com/developers/gabi/latest/ch4.eheader.html Elf_header: # - length @@ -4683,7 +4683,7 @@ $e_machine: 03 00 $e_version: 1/imm32 -Elf_e_entry: +$Elf_e_entry: 0x09000000/imm32 # approximate default; must be updated $e_phoff: 0x34/imm32 # offset for the 'program header table' containing segment headers @@ -4695,7 +4695,7 @@ $e_ehsize: 0x34 00 $e_phentsize: 0x20 00 -Elf_e_phnum: +$Elf_e_phnum: 00 00 # number of segments; must be updated $e_shentsize: 00 00 # no sections @@ -4713,17 +4713,17 @@ Elf_program_header_entry: # - data $p_type: 1/imm32/PT_LOAD -Elf_p_offset: +$Elf_p_offset: 0/imm32 # byte offset in the file at which a segment begins; must be updated -Elf_p_vaddr: +$Elf_p_vaddr: 0/imm32 # starting address to store the segment at before running the program -Elf_p_paddr: - 0/imm32 # should have same value as Elf_p_vaddr -Elf_p_filesz: +$Elf_p_paddr: + 0/imm32 # should have same value as $Elf_p_vaddr +$Elf_p_filesz: 0/imm32 -Elf_p_memsz: - 0/imm32 # should have same value as Elf_p_filesz -Elf_p_flags: +$Elf_p_memsz: + 0/imm32 # should have same value as $Elf_p_filesz +$Elf_p_flags: 6/imm32/rw- # read/write/execute permissions for the segment; must be updated for the code segment $p_align: # we hold this constant; changing it will require adjusting the way we diff --git a/stats.txt b/stats.txt index fcf0191f..393f82fc 100644 --- a/stats.txt +++ b/stats.txt @@ -1,36 +1,50 @@ - Initial -tests/whitespace/comments -## Lines in source -standard library 9597 2316 -apps/crenshaw2-1b.subx 798 176 -apps/crenshaw2-1.subx 601 180 -apps/factorial.subx 107 28 -apps/handle.subx 361 58 -apps/hex.subx 1511 144 -apps/pack.subx 7348 1054 -apps/assort.subx 1318 284 -apps/dquotes.subx 2694 497 -apps/survey.subx 4573 998 +## Lines in source files + Initial -whitespace/comments/tests +apps/factorial.subx 120 44 +apps/crenshaw2-1.subx 561 180 +apps/crenshaw2-1b.subx 757 186 +apps/handle.subx 428 44 +apps/hex.subx 1442 149 +apps/survey.subx 4733 905 +apps/pack.subx 5881 840 +apps/dquotes.subx 1925 383 +apps/assort.subx 905 183 +apps/tests.subx 284 137 +apps/sigils.subx 4641 896 +apps/calls.subx 360 448 +apps/braces.subx 1785 121 +apps/mu.subx (incomplete) 4038 1330 -## Bytes in executable -apps/crenshaw2-1 17612 4112 -apps/crenshaw2-1b 18171 4140 -apps/factorial 16530 3488 -apps/handle 17323 3582 -apps/hex 22684 4909 -apps/pack 37316 7825 -apps/assort 22506 5342 -apps/dquotes 27186 5849 -apps/survey 42791 11258 +## Total source lines needed including libraries + Initial -whitespace/comments/tests/dead code +apps/factorial.subx 8436 1700 +apps/crenshaw2-1.subx 8644 1925 +apps/crenshaw2-1b.subx 8736 1931 +apps/handle.subx 8601 1638 +apps/hex.subx 9065 1908 +apps/survey.subx 10217 3248 +apps/pack.subx 10589 2727 +apps/dquotes.subx 9262 2468 +apps/assort.subx 8686 2425 +apps/tests.subx 8519 2214 +apps/sigils.subx 10578 3043 +apps/calls.subx 9242 2388 +apps/braces.subx 8545 2111 +apps/mu.subx (incomplete) 10804 3421 -# lines per test (including whitespace/comments) - num tests num lines lines/test -apps/crenshaw2-1.subx 2 585 292.5 -apps/crenshaw2-1b.subx 4 785 196.25 -apps/factorial.subx 1 117 117 -apps/handle.subx 4 412 103 -apps/hex.subx 12 1515 126.25 -apps/pack.subx 42 5977 142.3 -apps/assort.subx 1 839 839 -apps/dquotes.subx 26 2680 103 -apps/survey.subx 18 4570 254 -apps/subx-common.subx 32 3098 96.8125 +## executable size in KB + Initial -tests/dead code +apps/crenshaw2-1 41 4.3 +apps/crenshaw2-1b 42 5.2 +apps/factorial 42 5.2 +apps/handle 42 4.2 +apps/hex 45 5.0 +apps/survey 51 9.6 +apps/pack 54 7.6 +apps/dquotes 46 6.5 +apps/assort 42 6.4 +apps/tests 41 5.8 +apps/sigils 54 9.1 +apps/calls 47 7.1 +apps/braces 42 5.9 +apps/mu (incomplete) 62 13.0 diff --git a/treeshake.cc b/treeshake.cc index db8b5135..9bf5106e 100644 --- a/treeshake.cc +++ b/treeshake.cc @@ -27,6 +27,7 @@ using std::string; #include<iostream> using std::cin; using std::cout; +using std::cerr; #include<sstream> using std::istringstream; @@ -38,6 +39,8 @@ bool starts_with(const string& s, const string& pat) { return b == pat.end(); } +// input + void read_body(string name, string definition_line, map<string, vector<string> >& segment) { // last definition wins; this only matters for the 'Entry' label in the code segment segment[name] = vector<string>(); @@ -78,9 +81,48 @@ void read_lines(map<string, vector<string> >& code, map<string, vector<string> > } } -void treeshake(const map<string, vector<string> >& code, map<string, vector<string> >& data) { +// treeshake + +bool any_line_matches(string pat, const vector<string>& lines) { + for (int i = 0; i < SIZE(lines); ++i) + if (lines.at(i).find(pat) != string::npos) // conservative: confused by word boundaries, comments and string literals + return true; + return false; +} + +bool is_dead(string key, const map<string, vector<string> >& code, const map<string, vector<string> >& data) { + if (key == "Entry") return false; + if (key == "==") return false; + for (map<string, vector<string> >::const_iterator p = code.begin(); p != code.end(); ++p) { + if (p->first == key) continue; + if (any_line_matches(key, p->second)) return false; + } + for (map<string, vector<string> >::const_iterator p = data.begin(); p != data.end(); ++p) { + if (p->first == key) continue; + if (any_line_matches(key, p->second)) return false; + } + return true; } +void treeshake(map<string, vector<string> >& code, map<string, vector<string> >& data) { + for (map<string, vector<string> >::iterator p = code.begin(); p != code.end(); ++p) { + if (is_dead(p->first, code, data)) { +//? cerr << " erasing " << p->first << '\n'; + code.erase(p); + return; + } + } + for (map<string, vector<string> >::iterator p = data.begin(); p != data.end(); ++p) { + if (is_dead(p->first, code, data)) { +//? cerr << " erasing " << p->first << '\n'; + data.erase(p); + return; + } + } +} + +// output + void dump(const map<string, vector<string> > definitions) { // nothing special needed for segment headers, since '=' precedes all alphabet in ASCII for (map<string, vector<string> >::const_iterator p = definitions.begin(); p != definitions.end(); ++p) { @@ -90,10 +132,11 @@ void dump(const map<string, vector<string> > definitions) { } } -int main(int argc, const char* argv[]) { +int main() { map<string, vector<string> > code, data; read_lines(code, data); - while (true) { + for (int iter = 0; ; ++iter) { +//? cerr << "iter: " << iter << '\n'; int old_csize = SIZE(code), old_dsize = SIZE(data); treeshake(code, data); if (SIZE(code) == old_csize && SIZE(data) == old_dsize) break; diff --git a/treeshake_all b/treeshake_all index 2a13f50a..8ebf51d0 100755 --- a/treeshake_all +++ b/treeshake_all @@ -9,38 +9,34 @@ cd `dirname $0` export OS=${OS:-linux} -for app in factorial crenshaw2-1 crenshaw2-1b +echo "== deleting dead code" +for app in factorial crenshaw2-1 crenshaw2-1b handle hex survey pack dquotes assort tests sigils calls braces do echo $app - ./treeshake_translate init.$OS 0*.subx apps/$app.subx + ./treeshake_translate init.$OS 0*.subx apps/subx-params.subx apps/$app.subx 2> apps/$app.stderr + mv a.in apps/$app.in mv a.treeshake apps/$app.treeshake + echo "LoC `cat apps/$app.subx |wc -l` => `grep -vh '^\s*$\|^\s*#' apps/$app.subx |./treeshake |wc -l`" + echo "LoC including common libraries: `cat apps/$app.in |wc -l` => `cat apps/$app.treeshake |wc -l`" mv a.elf apps/$app.treeshake.bin - apps/$app test - echo - apps/$app.treeshake.bin test - echo + echo "binary size: `ls -lh apps/$app |column 5` => `ls -lh apps/$app.treeshake.bin |column 5`" done -# Phases of the self-hosted SubX translator. +echo "== testing treeshaken binaries" +for app in factorial crenshaw2-1 crenshaw2-1b +do + echo $app + ./treeshake_ntranslate init.$OS 0*.subx apps/$app.subx + diff apps/$app a.elf +done for app in hex survey pack assort dquotes tests sigils calls braces do echo $app - ./treeshake_translate init.$OS 0*.subx apps/subx-params.subx apps/$app.subx - mv a.treeshake apps/$app.treeshake - mv a.elf apps/$app.treeshake.bin - apps/$app test - echo - apps/$app.treeshake.bin test - echo + ./treeshake_ntranslate init.$OS 0*.subx apps/subx-params.subx apps/$app.subx + diff apps/$app a.elf done -# Mu translator echo mu.subx -./treeshake_translate init.$OS 0*.subx apps/mu.subx -mv a.treeshake apps/mu.treeshake -mv a.elf apps/mu.treeshake.bin -apps/mu test -echo -apps/mu.treeshake.bin test -echo +./treeshake_ntranslate init.$OS 0*.subx apps/mu.subx +diff apps/mu a.elf diff --git a/treeshake_ntranslate b/treeshake_ntranslate new file mode 100755 index 00000000..7a54e436 --- /dev/null +++ b/treeshake_ntranslate @@ -0,0 +1,40 @@ +#!/bin/sh +# Translate SubX by running the minified self-hosted translator natively on Linux. +# This script is a hack; see the other *translate scripts instead. + +set -e + +./build + +cat $* |apps/braces.treeshake.bin > a.braces + +cat a.braces |apps/calls.treeshake.bin > a.calls + +cat a.calls |apps/sigils.treeshake.bin > a.sigils + +cat a.sigils |apps/tests.treeshake.bin > a.tests + +cat a.tests |apps/assort.treeshake.bin > a.assort + +cat a.assort |apps/dquotes.treeshake.bin > a.dquotes + +# A little hack. We want ntranslate to always emit identical binaries to the +# C++ translator. The C++ translator assorts segments before it processes +# string literals, so we follow the same order above. +# +# However, dquotes currently emits a separate data segment for string literals. +# So we need to run assort a second time to clean up after it. +# +# Potential solutions: +# a) modify C++ translator to process string literals before assorting. +# b) clean up dquotes to assume assorted segments, and append to the +# existing data segment. +cat a.dquotes |apps/assort.treeshake.bin > a.assort2 + +cat a.assort2 |apps/pack.treeshake.bin > a.pack + +cat a.pack |apps/survey.treeshake.bin > a.survey + +cat a.survey |apps/hex.treeshake.bin > a.elf + +chmod +x a.elf diff --git a/treeshake_translate b/treeshake_translate index beb19aa5..6794b066 100755 --- a/treeshake_translate +++ b/treeshake_translate @@ -6,7 +6,9 @@ set -e ./build -grep -vh '^\s*#\|^\s*$' $* |./treeshake > a.treeshake +grep -vh '^\s*#\|^\s*$' $* > a.in + +cat a.in |./treeshake > a.treeshake cat a.treeshake |apps/braces > a.braces |