about summary refs log tree commit diff stats
path: root/tools/treeshake.cc
blob: 9bf5106eddb87fed76a41d4c88d1bc99c3ca50c2 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
// Read a set of lines on stdin of the following form:
//  definition:
//    ...
//    ...
//
// Delete all 'dead' definitions with following indented lines that aren't
// used outside their bodies.
//
// This can be transitive; deleting one definition may cause other definitions
// to become dead.
//
// Also assorts segments as a side-effect.
//
// Like linkify, treeshake is a hack.

#include<assert.h>

#include<map>
using std::map;
#include<vector>
using std::vector;
#define SIZE(X) (assert((X).size() < (1LL<<(sizeof(int)*8-2))), static_cast<int>((X).size()))

#include<string>
using std::string;

#include<iostream>
using std::cin;
using std::cout;
using std::cerr;

#include<sstream>
using std::istringstream;

bool starts_with(const string& s, const string& pat) {
  string::const_iterator a=s.begin(), b=pat.begin();
  for (/*nada*/;  a!=s.end() && b!=pat.end();  ++a, ++b)
    if (*a != *b) return false;
  return b == pat.end();
}

// input

void read_body(string name, string definition_line, map<string, vector<string> >& segment) {
  // last definition wins; this only matters for the 'Entry' label in the code segment
  segment[name] = vector<string>();
  segment[name].push_back(definition_line);
  while (!cin.eof()) {
    if (cin.peek() != ' ' && cin.peek() != '$') break;  // assumes: no whitespace but spaces; internal labels start with '$'
    string line;
    getline(cin, line);
    segment[name].push_back(line);
  }
}

void read_lines(string segment_header, map<string, vector<string> >& segment) {
  // first segment header wins
  if (segment.empty())
    segment["=="].push_back(segment_header);  // '==' is a special key containing the segment header
  while (!cin.eof()) {
    if (cin.peek() == '=') break;  // assumes: no line can start with '=' except a segment header
    assert(cin.peek() != ' ');  // assumes: no whitespace but spaces
    string line;
    getline(cin, line);
    istringstream lstream(line);
    string name;
    getline(lstream, name, ' ');
    assert(name[SIZE(name)-1] == ':');
    name.erase(--name.end());
    read_body(name, line, segment);
  }
}

void read_lines(map<string, vector<string> >& code, map<string, vector<string> >& data) {
  while (!cin.eof()) {
    string line;
    getline(cin, line);
    assert(starts_with(line, "== "));
    map<string, vector<string> >& curr = (line.substr(3, 4) == "code") ? code : data;  // HACK: doesn't support segments except 'code' and 'data'
    read_lines(line, curr);
  }
}

// treeshake

bool any_line_matches(string pat, const vector<string>& lines) {
  for (int i = 0;  i < SIZE(lines);  ++i)
    if (lines.at(i).find(pat) != string::npos)  // conservative: confused by word boundaries, comments and string literals
      return true;
  return false;
}

bool is_dead(string key, const map<string, vector<string> >& code, const map<string, vector<string> >& data) {
  if (key == "Entry") return false;
  if (key == "==") return false;
  for (map<string, vector<string> >::const_iterator p = code.begin();  p != code.end();  ++p) {
    if (p->first == key) continue;
    if (any_line_matches(key, p->second)) return false;
  }
  for (map<string, vector<string> >::const_iterator p = data.begin();  p != data.end();  ++p) {
    if (p->first == key) continue;
    if (any_line_matches(key, p->second)) return false;
  }
  return true;
}

void treeshake(map<string, vector<string> >& code, map<string, vector<string> >& data) {
  for (map<string, vector<string> >::iterator p = code.begin();  p != code.end();  ++p) {
    if (is_dead(p->first, code, data)) {
//?       cerr << "  erasing " << p->first << '\n';
      code.erase(p);
      return;
    }
  }
  for (map<string, vector<string> >::iterator p = data.begin();  p != data.end();  ++p) {
    if (is_dead(p->first, code, data)) {
//?       cerr << "  erasing " << p->first << '\n';
      data.erase(p);
      return;
    }
  }
}

// output

void dump(const map<string, vector<string> > definitions) {
  // nothing special needed for segment headers, since '=' precedes all alphabet in ASCII
  for (map<string, vector<string> >::const_iterator p = definitions.begin();  p != definitions.end();  ++p) {
    const vector<string>& lines = p->second;
    for (int i = 0;  i < SIZE(lines);  ++i)
      cout << lines[i] << '\n';
  }
}

int main() {
  map<string, vector<string> > code, data;
  read_lines(code, data);
  for (int iter = 0;  ;  ++iter) {
//?     cerr << "iter: " << iter << '\n';
    int old_csize = SIZE(code), old_dsize = SIZE(data);
    treeshake(code, data);
    if (SIZE(code) == old_csize && SIZE(data) == old_dsize) break;
  }
  dump(code);
  dump(data);
  return 0;
}