diff options
author | Kartik Agaram <vc@akkartik.com> | 2019-12-07 15:20:44 -0800 |
---|---|---|
committer | Kartik Agaram <vc@akkartik.com> | 2019-12-07 18:05:06 -0800 |
commit | c1d596f56a6f2198ea8ea1b0a90c613e919d891b (patch) | |
tree | 6e44703f23864ad78dd68e7b0a9229f54a46b281 /tools | |
parent | e9aee071f44876bcce4d741eea52198249e5b339 (diff) | |
download | mu-c1d596f56a6f2198ea8ea1b0a90c613e919d891b.tar.gz |
5796 - move treeshake to a new tools/ directory
Diffstat (limited to 'tools')
-rw-r--r-- | tools/Readme.md | 11 | ||||
-rwxr-xr-x | tools/test_treeshake_translate | 29 | ||||
-rw-r--r-- | tools/treeshake.cc | 147 | ||||
-rwxr-xr-x | tools/treeshake_all | 44 | ||||
-rwxr-xr-x | tools/treeshake_translate | 8 |
5 files changed, 239 insertions, 0 deletions
diff --git a/tools/Readme.md b/tools/Readme.md new file mode 100644 index 00000000..3a24955b --- /dev/null +++ b/tools/Readme.md @@ -0,0 +1,11 @@ +Run all these from the top-level `mu/` directory. + +### Miscellaneous odds and ends + +These are built lazily. + +* `treeshake_all`: rebuild SubX binaries without tests and unused functions. + Pretty hacky; just helps estimate the code needed to perform various tasks. + ``` + tools/treeshake_all + ``` diff --git a/tools/test_treeshake_translate b/tools/test_treeshake_translate new file mode 100755 index 00000000..1aa6f068 --- /dev/null +++ b/tools/test_treeshake_translate @@ -0,0 +1,29 @@ +#!/bin/sh +# Translate SubX programs using a minified translator. +# Based on ntranslate. + +set -e + +./build + +cat $* |apps/braces.treeshake.bin > a.braces + +cat a.braces |apps/calls.treeshake.bin > a.calls + +cat a.calls |apps/sigils.treeshake.bin > a.sigils + +cat a.sigils |apps/tests.treeshake.bin > a.tests + +cat a.tests |apps/assort.treeshake.bin > a.assort + +cat a.assort |apps/dquotes.treeshake.bin > a.dquotes + +cat a.dquotes |apps/assort.treeshake.bin > a.assort2 + +cat a.assort2 |apps/pack.treeshake.bin > a.pack + +cat a.pack |apps/survey.treeshake.bin > a.survey + +cat a.survey |apps/hex.treeshake.bin > a.elf + +chmod +x a.elf diff --git a/tools/treeshake.cc b/tools/treeshake.cc new file mode 100644 index 00000000..9bf5106e --- /dev/null +++ b/tools/treeshake.cc @@ -0,0 +1,147 @@ +// Read a set of lines on stdin of the following form: +// definition: +// ... +// ... +// +// Delete all 'dead' definitions with following indented lines that aren't +// used outside their bodies. +// +// This can be transitive; deleting one definition may cause other definitions +// to become dead. +// +// Also assorts segments as a side-effect. +// +// Like linkify, treeshake is a hack. + +#include<assert.h> + +#include<map> +using std::map; +#include<vector> +using std::vector; +#define SIZE(X) (assert((X).size() < (1LL<<(sizeof(int)*8-2))), static_cast<int>((X).size())) + +#include<string> +using std::string; + +#include<iostream> +using std::cin; +using std::cout; +using std::cerr; + +#include<sstream> +using std::istringstream; + +bool starts_with(const string& s, const string& pat) { + string::const_iterator a=s.begin(), b=pat.begin(); + for (/*nada*/; a!=s.end() && b!=pat.end(); ++a, ++b) + if (*a != *b) return false; + return b == pat.end(); +} + +// input + +void read_body(string name, string definition_line, map<string, vector<string> >& segment) { + // last definition wins; this only matters for the 'Entry' label in the code segment + segment[name] = vector<string>(); + segment[name].push_back(definition_line); + while (!cin.eof()) { + if (cin.peek() != ' ' && cin.peek() != '$') break; // assumes: no whitespace but spaces; internal labels start with '$' + string line; + getline(cin, line); + segment[name].push_back(line); + } +} + +void read_lines(string segment_header, map<string, vector<string> >& segment) { + // first segment header wins + if (segment.empty()) + segment["=="].push_back(segment_header); // '==' is a special key containing the segment header + while (!cin.eof()) { + if (cin.peek() == '=') break; // assumes: no line can start with '=' except a segment header + assert(cin.peek() != ' '); // assumes: no whitespace but spaces + string line; + getline(cin, line); + istringstream lstream(line); + string name; + getline(lstream, name, ' '); + assert(name[SIZE(name)-1] == ':'); + name.erase(--name.end()); + read_body(name, line, segment); + } +} + +void read_lines(map<string, vector<string> >& code, map<string, vector<string> >& data) { + while (!cin.eof()) { + string line; + getline(cin, line); + assert(starts_with(line, "== ")); + map<string, vector<string> >& curr = (line.substr(3, 4) == "code") ? code : data; // HACK: doesn't support segments except 'code' and 'data' + read_lines(line, curr); + } +} + +// treeshake + +bool any_line_matches(string pat, const vector<string>& lines) { + for (int i = 0; i < SIZE(lines); ++i) + if (lines.at(i).find(pat) != string::npos) // conservative: confused by word boundaries, comments and string literals + return true; + return false; +} + +bool is_dead(string key, const map<string, vector<string> >& code, const map<string, vector<string> >& data) { + if (key == "Entry") return false; + if (key == "==") return false; + for (map<string, vector<string> >::const_iterator p = code.begin(); p != code.end(); ++p) { + if (p->first == key) continue; + if (any_line_matches(key, p->second)) return false; + } + for (map<string, vector<string> >::const_iterator p = data.begin(); p != data.end(); ++p) { + if (p->first == key) continue; + if (any_line_matches(key, p->second)) return false; + } + return true; +} + +void treeshake(map<string, vector<string> >& code, map<string, vector<string> >& data) { + for (map<string, vector<string> >::iterator p = code.begin(); p != code.end(); ++p) { + if (is_dead(p->first, code, data)) { +//? cerr << " erasing " << p->first << '\n'; + code.erase(p); + return; + } + } + for (map<string, vector<string> >::iterator p = data.begin(); p != data.end(); ++p) { + if (is_dead(p->first, code, data)) { +//? cerr << " erasing " << p->first << '\n'; + data.erase(p); + return; + } + } +} + +// output + +void dump(const map<string, vector<string> > definitions) { + // nothing special needed for segment headers, since '=' precedes all alphabet in ASCII + for (map<string, vector<string> >::const_iterator p = definitions.begin(); p != definitions.end(); ++p) { + const vector<string>& lines = p->second; + for (int i = 0; i < SIZE(lines); ++i) + cout << lines[i] << '\n'; + } +} + +int main() { + map<string, vector<string> > code, data; + read_lines(code, data); + for (int iter = 0; ; ++iter) { +//? cerr << "iter: " << iter << '\n'; + int old_csize = SIZE(code), old_dsize = SIZE(data); + treeshake(code, data); + if (SIZE(code) == old_csize && SIZE(data) == old_dsize) break; + } + dump(code); + dump(data); + return 0; +} diff --git a/tools/treeshake_all b/tools/treeshake_all new file mode 100755 index 00000000..614b106a --- /dev/null +++ b/tools/treeshake_all @@ -0,0 +1,44 @@ +#!/bin/sh +# Build minimal-size versions of all apps. +# Hacky; only intended for some stats at the moment. + +set -e + +[ ! -f tools/treeshake ] && { + echo building tools/treeshake + c++ -g -O3 tools/treeshake.cc -o tools/treeshake +} + +export OS=${OS:-linux} + +echo "== deleting dead code" +for app in factorial crenshaw2-1 crenshaw2-1b handle hex survey pack dquotes assort tests sigils calls braces +do + echo "- $app" + tools/treeshake_translate init.$OS 0*.subx apps/subx-params.subx apps/$app.subx + mv a.in apps/$app.in + mv a.treeshake apps/$app.treeshake + echo "LoC $(cat apps/$app.subx |wc -l) => $(grep -vh '^\s*$\|^\s*#' apps/$app.subx |tools/treeshake |wc -l)" + echo "LoC including common libraries: $(cat apps/$app.in |wc -l) => $(cat apps/$app.treeshake |wc -l)" + mv a.elf apps/$app.treeshake.bin + echo "binary size: $(ls -lh apps/$app |column 5) => $(ls -lh apps/$app.treeshake.bin |column 5)" +done + +echo "== testing treeshaken binaries" +for app in factorial crenshaw2-1 crenshaw2-1b +do + echo $app + tools/test_treeshake_translate init.$OS 0*.subx apps/$app.subx + diff apps/$app a.elf +done + +for app in hex survey pack assort dquotes tests sigils calls braces +do + echo $app + tools/test_treeshake_translate init.$OS 0*.subx apps/subx-params.subx apps/$app.subx + diff apps/$app a.elf +done + +echo mu.subx +tools/test_treeshake_translate init.$OS 0*.subx apps/mu.subx +diff apps/mu a.elf diff --git a/tools/treeshake_translate b/tools/treeshake_translate new file mode 100755 index 00000000..ba80dea0 --- /dev/null +++ b/tools/treeshake_translate @@ -0,0 +1,8 @@ +#!/bin/sh +# Translate SubX into minified ELF binaries for Linux. + +set -e + +grep -vh '^\s*#\|^\s*$' $* > a.in +cat a.in |tools/treeshake > a.treeshake +./ntranslate a.treeshake |