about summary refs log tree commit diff stats
path: root/treeshake.cc
diff options
context:
space:
mode:
authorKartik Agaram <vc@akkartik.com>2019-12-06 01:12:08 -0800
committerKartik Agaram <vc@akkartik.com>2019-12-06 01:12:08 -0800
commit68719bebc02ca355aa96dee2497f511d24606fc8 (patch)
tree4f279ad52a1f83e386672b81eeb1f95e4ae9d692 /treeshake.cc
parentb6d62cc91c144ad15a2d8361a95be99b1003c5ae (diff)
downloadmu-68719bebc02ca355aa96dee2497f511d24606fc8.tar.gz
5794
Rather surprisingly, all the treeshake tooling is done in just about 2
hours of work. From now on it'll be easier to update stats.txt. Observations:

a) Binaries are tiny compared to conventional stacks. Tens of KB.
b) ~80% of binaries are tests and unused libraries in all my apps.
c) ~75% of LoC in SubX sources are tests or comments.
Diffstat (limited to 'treeshake.cc')
-rw-r--r--treeshake.cc49
1 files changed, 46 insertions, 3 deletions
diff --git a/treeshake.cc b/treeshake.cc
index db8b5135..9bf5106e 100644
--- a/treeshake.cc
+++ b/treeshake.cc
@@ -27,6 +27,7 @@ using std::string;
 #include<iostream>
 using std::cin;
 using std::cout;
+using std::cerr;
 
 #include<sstream>
 using std::istringstream;
@@ -38,6 +39,8 @@ bool starts_with(const string& s, const string& pat) {
   return b == pat.end();
 }
 
+// input
+
 void read_body(string name, string definition_line, map<string, vector<string> >& segment) {
   // last definition wins; this only matters for the 'Entry' label in the code segment
   segment[name] = vector<string>();
@@ -78,9 +81,48 @@ void read_lines(map<string, vector<string> >& code, map<string, vector<string> >
   }
 }
 
-void treeshake(const map<string, vector<string> >& code, map<string, vector<string> >& data) {
+// treeshake
+
+bool any_line_matches(string pat, const vector<string>& lines) {
+  for (int i = 0;  i < SIZE(lines);  ++i)
+    if (lines.at(i).find(pat) != string::npos)  // conservative: confused by word boundaries, comments and string literals
+      return true;
+  return false;
+}
+
+bool is_dead(string key, const map<string, vector<string> >& code, const map<string, vector<string> >& data) {
+  if (key == "Entry") return false;
+  if (key == "==") return false;
+  for (map<string, vector<string> >::const_iterator p = code.begin();  p != code.end();  ++p) {
+    if (p->first == key) continue;
+    if (any_line_matches(key, p->second)) return false;
+  }
+  for (map<string, vector<string> >::const_iterator p = data.begin();  p != data.end();  ++p) {
+    if (p->first == key) continue;
+    if (any_line_matches(key, p->second)) return false;
+  }
+  return true;
 }
 
+void treeshake(map<string, vector<string> >& code, map<string, vector<string> >& data) {
+  for (map<string, vector<string> >::iterator p = code.begin();  p != code.end();  ++p) {
+    if (is_dead(p->first, code, data)) {
+//?       cerr << "  erasing " << p->first << '\n';
+      code.erase(p);
+      return;
+    }
+  }
+  for (map<string, vector<string> >::iterator p = data.begin();  p != data.end();  ++p) {
+    if (is_dead(p->first, code, data)) {
+//?       cerr << "  erasing " << p->first << '\n';
+      data.erase(p);
+      return;
+    }
+  }
+}
+
+// output
+
 void dump(const map<string, vector<string> > definitions) {
   // nothing special needed for segment headers, since '=' precedes all alphabet in ASCII
   for (map<string, vector<string> >::const_iterator p = definitions.begin();  p != definitions.end();  ++p) {
@@ -90,10 +132,11 @@ void dump(const map<string, vector<string> > definitions) {
   }
 }
 
-int main(int argc, const char* argv[]) {
+int main() {
   map<string, vector<string> > code, data;
   read_lines(code, data);
-  while (true) {
+  for (int iter = 0;  ;  ++iter) {
+//?     cerr << "iter: " << iter << '\n';
     int old_csize = SIZE(code), old_dsize = SIZE(data);
     treeshake(code, data);
     if (SIZE(code) == old_csize && SIZE(data) == old_dsize) break;