about summary refs log tree commit diff stats
path: root/tools
diff options
context:
space:
mode:
authorKartik Agaram <vc@akkartik.com>2019-12-07 15:20:44 -0800
committerKartik Agaram <vc@akkartik.com>2019-12-07 18:05:06 -0800
commitc1d596f56a6f2198ea8ea1b0a90c613e919d891b (patch)
tree6e44703f23864ad78dd68e7b0a9229f54a46b281 /tools
parente9aee071f44876bcce4d741eea52198249e5b339 (diff)
downloadmu-c1d596f56a6f2198ea8ea1b0a90c613e919d891b.tar.gz
5796 - move treeshake to a new tools/ directory
Diffstat (limited to 'tools')
-rw-r--r--tools/Readme.md11
-rwxr-xr-xtools/test_treeshake_translate29
-rw-r--r--tools/treeshake.cc147
-rwxr-xr-xtools/treeshake_all44
-rwxr-xr-xtools/treeshake_translate8
5 files changed, 239 insertions, 0 deletions
diff --git a/tools/Readme.md b/tools/Readme.md
new file mode 100644
index 00000000..3a24955b
--- /dev/null
+++ b/tools/Readme.md
@@ -0,0 +1,11 @@
+Run all these from the top-level `mu/` directory.
+
+### Miscellaneous odds and ends
+
+These are built lazily.
+
+* `treeshake_all`: rebuild SubX binaries without tests and unused functions.
+  Pretty hacky; just helps estimate the code needed to perform various tasks.
+  ```
+  tools/treeshake_all
+  ```
diff --git a/tools/test_treeshake_translate b/tools/test_treeshake_translate
new file mode 100755
index 00000000..1aa6f068
--- /dev/null
+++ b/tools/test_treeshake_translate
@@ -0,0 +1,29 @@
+#!/bin/sh
+# Translate SubX programs using a minified translator.
+# Based on ntranslate.
+
+set -e
+
+./build
+
+cat $*          |apps/braces.treeshake.bin   > a.braces
+
+cat a.braces    |apps/calls.treeshake.bin    > a.calls
+
+cat a.calls     |apps/sigils.treeshake.bin   > a.sigils
+
+cat a.sigils    |apps/tests.treeshake.bin    > a.tests
+
+cat a.tests     |apps/assort.treeshake.bin   > a.assort
+
+cat a.assort    |apps/dquotes.treeshake.bin  > a.dquotes
+
+cat a.dquotes   |apps/assort.treeshake.bin   > a.assort2
+
+cat a.assort2   |apps/pack.treeshake.bin     > a.pack
+
+cat a.pack      |apps/survey.treeshake.bin   > a.survey
+
+cat a.survey    |apps/hex.treeshake.bin      > a.elf
+
+chmod +x a.elf
diff --git a/tools/treeshake.cc b/tools/treeshake.cc
new file mode 100644
index 00000000..9bf5106e
--- /dev/null
+++ b/tools/treeshake.cc
@@ -0,0 +1,147 @@
+// Read a set of lines on stdin of the following form:
+//  definition:
+//    ...
+//    ...
+//
+// Delete all 'dead' definitions with following indented lines that aren't
+// used outside their bodies.
+//
+// This can be transitive; deleting one definition may cause other definitions
+// to become dead.
+//
+// Also assorts segments as a side-effect.
+//
+// Like linkify, treeshake is a hack.
+
+#include<assert.h>
+
+#include<map>
+using std::map;
+#include<vector>
+using std::vector;
+#define SIZE(X) (assert((X).size() < (1LL<<(sizeof(int)*8-2))), static_cast<int>((X).size()))
+
+#include<string>
+using std::string;
+
+#include<iostream>
+using std::cin;
+using std::cout;
+using std::cerr;
+
+#include<sstream>
+using std::istringstream;
+
+bool starts_with(const string& s, const string& pat) {
+  string::const_iterator a=s.begin(), b=pat.begin();
+  for (/*nada*/;  a!=s.end() && b!=pat.end();  ++a, ++b)
+    if (*a != *b) return false;
+  return b == pat.end();
+}
+
+// input
+
+void read_body(string name, string definition_line, map<string, vector<string> >& segment) {
+  // last definition wins; this only matters for the 'Entry' label in the code segment
+  segment[name] = vector<string>();
+  segment[name].push_back(definition_line);
+  while (!cin.eof()) {
+    if (cin.peek() != ' ' && cin.peek() != '$') break;  // assumes: no whitespace but spaces; internal labels start with '$'
+    string line;
+    getline(cin, line);
+    segment[name].push_back(line);
+  }
+}
+
+void read_lines(string segment_header, map<string, vector<string> >& segment) {
+  // first segment header wins
+  if (segment.empty())
+    segment["=="].push_back(segment_header);  // '==' is a special key containing the segment header
+  while (!cin.eof()) {
+    if (cin.peek() == '=') break;  // assumes: no line can start with '=' except a segment header
+    assert(cin.peek() != ' ');  // assumes: no whitespace but spaces
+    string line;
+    getline(cin, line);
+    istringstream lstream(line);
+    string name;
+    getline(lstream, name, ' ');
+    assert(name[SIZE(name)-1] == ':');
+    name.erase(--name.end());
+    read_body(name, line, segment);
+  }
+}
+
+void read_lines(map<string, vector<string> >& code, map<string, vector<string> >& data) {
+  while (!cin.eof()) {
+    string line;
+    getline(cin, line);
+    assert(starts_with(line, "== "));
+    map<string, vector<string> >& curr = (line.substr(3, 4) == "code") ? code : data;  // HACK: doesn't support segments except 'code' and 'data'
+    read_lines(line, curr);
+  }
+}
+
+// treeshake
+
+bool any_line_matches(string pat, const vector<string>& lines) {
+  for (int i = 0;  i < SIZE(lines);  ++i)
+    if (lines.at(i).find(pat) != string::npos)  // conservative: confused by word boundaries, comments and string literals
+      return true;
+  return false;
+}
+
+bool is_dead(string key, const map<string, vector<string> >& code, const map<string, vector<string> >& data) {
+  if (key == "Entry") return false;
+  if (key == "==") return false;
+  for (map<string, vector<string> >::const_iterator p = code.begin();  p != code.end();  ++p) {
+    if (p->first == key) continue;
+    if (any_line_matches(key, p->second)) return false;
+  }
+  for (map<string, vector<string> >::const_iterator p = data.begin();  p != data.end();  ++p) {
+    if (p->first == key) continue;
+    if (any_line_matches(key, p->second)) return false;
+  }
+  return true;
+}
+
+void treeshake(map<string, vector<string> >& code, map<string, vector<string> >& data) {
+  for (map<string, vector<string> >::iterator p = code.begin();  p != code.end();  ++p) {
+    if (is_dead(p->first, code, data)) {
+//?       cerr << "  erasing " << p->first << '\n';
+      code.erase(p);
+      return;
+    }
+  }
+  for (map<string, vector<string> >::iterator p = data.begin();  p != data.end();  ++p) {
+    if (is_dead(p->first, code, data)) {
+//?       cerr << "  erasing " << p->first << '\n';
+      data.erase(p);
+      return;
+    }
+  }
+}
+
+// output
+
+void dump(const map<string, vector<string> > definitions) {
+  // nothing special needed for segment headers, since '=' precedes all alphabet in ASCII
+  for (map<string, vector<string> >::const_iterator p = definitions.begin();  p != definitions.end();  ++p) {
+    const vector<string>& lines = p->second;
+    for (int i = 0;  i < SIZE(lines);  ++i)
+      cout << lines[i] << '\n';
+  }
+}
+
+int main() {
+  map<string, vector<string> > code, data;
+  read_lines(code, data);
+  for (int iter = 0;  ;  ++iter) {
+//?     cerr << "iter: " << iter << '\n';
+    int old_csize = SIZE(code), old_dsize = SIZE(data);
+    treeshake(code, data);
+    if (SIZE(code) == old_csize && SIZE(data) == old_dsize) break;
+  }
+  dump(code);
+  dump(data);
+  return 0;
+}
diff --git a/tools/treeshake_all b/tools/treeshake_all
new file mode 100755
index 00000000..614b106a
--- /dev/null
+++ b/tools/treeshake_all
@@ -0,0 +1,44 @@
+#!/bin/sh
+# Build minimal-size versions of all apps.
+# Hacky; only intended for some stats at the moment.
+
+set -e
+
+[ ! -f tools/treeshake ] && {
+  echo building tools/treeshake
+  c++ -g -O3 tools/treeshake.cc -o tools/treeshake
+}
+
+export OS=${OS:-linux}
+
+echo "== deleting dead code"
+for app in factorial crenshaw2-1 crenshaw2-1b handle hex survey pack dquotes assort tests sigils calls braces
+do
+  echo "- $app"
+  tools/treeshake_translate init.$OS 0*.subx apps/subx-params.subx apps/$app.subx
+  mv a.in apps/$app.in
+  mv a.treeshake apps/$app.treeshake
+  echo "LoC $(cat apps/$app.subx |wc -l) => $(grep -vh '^\s*$\|^\s*#' apps/$app.subx |tools/treeshake |wc -l)"
+  echo "LoC including common libraries: $(cat apps/$app.in |wc -l) => $(cat apps/$app.treeshake |wc -l)"
+  mv a.elf apps/$app.treeshake.bin
+  echo "binary size: $(ls -lh apps/$app |column 5) => $(ls -lh apps/$app.treeshake.bin |column 5)"
+done
+
+echo "== testing treeshaken binaries"
+for app in factorial crenshaw2-1 crenshaw2-1b
+do
+  echo $app
+  tools/test_treeshake_translate init.$OS 0*.subx apps/$app.subx
+  diff apps/$app a.elf
+done
+
+for app in hex survey pack assort dquotes tests sigils calls braces
+do
+  echo $app
+  tools/test_treeshake_translate init.$OS 0*.subx apps/subx-params.subx apps/$app.subx
+  diff apps/$app a.elf
+done
+
+echo mu.subx
+tools/test_treeshake_translate init.$OS 0*.subx apps/mu.subx
+diff apps/mu a.elf
diff --git a/tools/treeshake_translate b/tools/treeshake_translate
new file mode 100755
index 00000000..ba80dea0
--- /dev/null
+++ b/tools/treeshake_translate
@@ -0,0 +1,8 @@
+#!/bin/sh
+# Translate SubX into minified ELF binaries for Linux.
+
+set -e
+
+grep -vh '^\s*#\|^\s*$' $*  > a.in
+cat a.in  |tools/treeshake  > a.treeshake
+./ntranslate a.treeshake