diff options
author | Kartik Agaram <vc@akkartik.com> | 2019-12-05 23:42:34 -0800 |
---|---|---|
committer | Kartik Agaram <vc@akkartik.com> | 2019-12-05 23:45:22 -0800 |
commit | b6d62cc91c144ad15a2d8361a95be99b1003c5ae (patch) | |
tree | e8f8169a1f1c9e41bc04cbbdef69bbb2c9c2cd3c | |
parent | 58467e6cbc4fce0c11a5200b9846c7a47ec874d9 (diff) | |
download | mu-b6d62cc91c144ad15a2d8361a95be99b1003c5ae.tar.gz |
5793
Start of a new script called treeshake to emit stats for minimal line counts and binary sizes for all apps. It doesn't actually do any dead-code deletion yet. But it does build and run all apps successfully. (Except apps/mu; we'll ignore that for now. It's probably not being disciplined about identifying internal labels.)
-rwxr-xr-x | build | 5 | ||||
-rwxr-xr-x | clean | 2 | ||||
-rw-r--r-- | treeshake.cc | 104 | ||||
-rwxr-xr-x | treeshake_all | 46 | ||||
-rwxr-xr-x | treeshake_translate | 42 |
5 files changed, 198 insertions, 1 deletions
diff --git a/build b/build index 9e95bb34..9028bfa3 100755 --- a/build +++ b/build @@ -106,4 +106,9 @@ older_than subx_bin subx.cc *_list && { $CXX $CFLAGS subx.cc -o subx_bin } +older_than treeshake treeshake.cc && { + echo $CXX $CFLAGS treeshake.cc -o treeshake + $CXX $CFLAGS treeshake.cc -o treeshake +} + exit 0 diff --git a/clean b/clean index ef0eb481..aeb8f520 100755 --- a/clean +++ b/clean @@ -5,7 +5,7 @@ set -v rm -rf subx.cc subx_bin* *_list rm -rf .until test $# -gt 0 && exit 0 # convenience: 'clean top-level' to leave subsidiary tools alone -rm -rf enumerate/enumerate tangle/tangle tangle/*_list */*.dSYM +rm -rf enumerate/enumerate tangle/tangle tangle/*_list */*.dSYM treeshake rm -rf browse_trace/browse_trace_bin browse_trace/*_list rm -rf tmp mu-linux.iso outfs initrd.fat mu-soso.iso ( cd kernel.soso && make clean; ) diff --git a/treeshake.cc b/treeshake.cc new file mode 100644 index 00000000..db8b5135 --- /dev/null +++ b/treeshake.cc @@ -0,0 +1,104 @@ +// Read a set of lines on stdin of the following form: +// definition: +// ... +// ... +// +// Delete all 'dead' definitions with following indented lines that aren't +// used outside their bodies. +// +// This can be transitive; deleting one definition may cause other definitions +// to become dead. +// +// Also assorts segments as a side-effect. +// +// Like linkify, treeshake is a hack. + +#include<assert.h> + +#include<map> +using std::map; +#include<vector> +using std::vector; +#define SIZE(X) (assert((X).size() < (1LL<<(sizeof(int)*8-2))), static_cast<int>((X).size())) + +#include<string> +using std::string; + +#include<iostream> +using std::cin; +using std::cout; + +#include<sstream> +using std::istringstream; + +bool starts_with(const string& s, const string& pat) { + string::const_iterator a=s.begin(), b=pat.begin(); + for (/*nada*/; a!=s.end() && b!=pat.end(); ++a, ++b) + if (*a != *b) return false; + return b == pat.end(); +} + +void read_body(string name, string definition_line, map<string, vector<string> >& segment) { + // last definition wins; this only matters for the 'Entry' label in the code segment + segment[name] = vector<string>(); + segment[name].push_back(definition_line); + while (!cin.eof()) { + if (cin.peek() != ' ' && cin.peek() != '$') break; // assumes: no whitespace but spaces; internal labels start with '$' + string line; + getline(cin, line); + segment[name].push_back(line); + } +} + +void read_lines(string segment_header, map<string, vector<string> >& segment) { + // first segment header wins + if (segment.empty()) + segment["=="].push_back(segment_header); // '==' is a special key containing the segment header + while (!cin.eof()) { + if (cin.peek() == '=') break; // assumes: no line can start with '=' except a segment header + assert(cin.peek() != ' '); // assumes: no whitespace but spaces + string line; + getline(cin, line); + istringstream lstream(line); + string name; + getline(lstream, name, ' '); + assert(name[SIZE(name)-1] == ':'); + name.erase(--name.end()); + read_body(name, line, segment); + } +} + +void read_lines(map<string, vector<string> >& code, map<string, vector<string> >& data) { + while (!cin.eof()) { + string line; + getline(cin, line); + assert(starts_with(line, "== ")); + map<string, vector<string> >& curr = (line.substr(3, 4) == "code") ? code : data; // HACK: doesn't support segments except 'code' and 'data' + read_lines(line, curr); + } +} + +void treeshake(const map<string, vector<string> >& code, map<string, vector<string> >& data) { +} + +void dump(const map<string, vector<string> > definitions) { + // nothing special needed for segment headers, since '=' precedes all alphabet in ASCII + for (map<string, vector<string> >::const_iterator p = definitions.begin(); p != definitions.end(); ++p) { + const vector<string>& lines = p->second; + for (int i = 0; i < SIZE(lines); ++i) + cout << lines[i] << '\n'; + } +} + +int main(int argc, const char* argv[]) { + map<string, vector<string> > code, data; + read_lines(code, data); + while (true) { + int old_csize = SIZE(code), old_dsize = SIZE(data); + treeshake(code, data); + if (SIZE(code) == old_csize && SIZE(data) == old_dsize) break; + } + dump(code); + dump(data); + return 0; +} diff --git a/treeshake_all b/treeshake_all new file mode 100755 index 00000000..2a13f50a --- /dev/null +++ b/treeshake_all @@ -0,0 +1,46 @@ +#!/bin/sh +# Build minimal-size versions of all apps. +# Hacky; only intended for some stats at the moment. + +set -e +cd `dirname $0` + +./build + +export OS=${OS:-linux} + +for app in factorial crenshaw2-1 crenshaw2-1b +do + echo $app + ./treeshake_translate init.$OS 0*.subx apps/$app.subx + mv a.treeshake apps/$app.treeshake + mv a.elf apps/$app.treeshake.bin + apps/$app test + echo + apps/$app.treeshake.bin test + echo +done + +# Phases of the self-hosted SubX translator. + +for app in hex survey pack assort dquotes tests sigils calls braces +do + echo $app + ./treeshake_translate init.$OS 0*.subx apps/subx-params.subx apps/$app.subx + mv a.treeshake apps/$app.treeshake + mv a.elf apps/$app.treeshake.bin + apps/$app test + echo + apps/$app.treeshake.bin test + echo +done + +# Mu translator +echo mu.subx +./treeshake_translate init.$OS 0*.subx apps/mu.subx +mv a.treeshake apps/mu.treeshake +mv a.elf apps/mu.treeshake.bin +apps/mu test +echo +apps/mu.treeshake.bin test +echo diff --git a/treeshake_translate b/treeshake_translate new file mode 100755 index 00000000..beb19aa5 --- /dev/null +++ b/treeshake_translate @@ -0,0 +1,42 @@ +#!/bin/sh +# Translate SubX into minimal ELF binaries for Linux. +# This script is a hack; see the other *translate scripts instead. + +set -e + +./build + +grep -vh '^\s*#\|^\s*$' $* |./treeshake > a.treeshake + +cat a.treeshake |apps/braces > a.braces + +cat a.braces |apps/calls > a.calls + +cat a.calls |apps/sigils > a.sigils + +cat a.sigils |apps/tests > a.tests + +cat a.tests |apps/assort > a.assort + +cat a.assort |apps/dquotes > a.dquotes + +# A little hack. We want ntranslate to always emit identical binaries to the +# C++ translator. The C++ translator assorts segments before it processes +# string literals, so we follow the same order above. +# +# However, dquotes currently emits a separate data segment for string literals. +# So we need to run assort a second time to clean up after it. +# +# Potential solutions: +# a) modify C++ translator to process string literals before assorting. +# b) clean up dquotes to assume assorted segments, and append to the +# existing data segment. +cat a.dquotes |apps/assort > a.assort2 + +cat a.assort2 |apps/pack > a.pack + +cat a.pack |apps/survey > a.survey + +cat a.survey |apps/hex > a.elf + +chmod +x a.elf |