about summary refs log tree commit diff stats
path: root/archive/1.vm/cleave/cleave.cc
blob: a3637e8144555f2ec503f9e9fe6363e13bd2dd37 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
// Read a single-file C++ program having a very specific structure, and split
// it up into multiple separate compilation units to reduce the work needed to
// rebuild after a small change. Write each compilation unit only if it has
// changed from what was on disk before.
//
// This tool is tightly coupled with the build system for this project. The
// makefile already auto-generates various things; we only do here what
// standard unix tools can't easily do.
//
// Usage:
//  cleave [input C++ file] [existing output directory]
//
// The input C++ file has the following structure:
//   [#includes]
//   [type definitions]
//   // Globals
//   [global variable definitions]
//   // End Globals
//   [function definitions]
//
// Afterwards, the output directory contains:
//   header -- everything before the '// Globals' delimiter
//   global_definitions_list -- everything between '// Globals' and '// End Globals' delimiters
//   [.cc files partitioning function definitions]
//
// Each output function definition file contains:
//   #include "header"
//   #include "global_declarations_list"
//   [function definitions]
//
// To preserve the original layer-based line numbers in error messages and the
// debugger, we'll chunk the files only at boundaries where we encounter a
// '#line ' directive (generated by the previous tangle/ stage) between
// functions.
//
// One exception: the first file emitted #includes "global_definitions_list" instead
// of "global_declarations_list"

// Tune this parameter to balance time for initial vs incremental build.
//
//   decrease value -> faster initial build
//   increase value -> faster incremental build
int Num_compilation_units = 3;

#include<assert.h>
#include<cstdlib>
#include<cstring>

#include<vector>
using std::vector;
#include<list>
using std::list;
#include<utility>
using std::pair;

#include<string>
using std::string;

#include<iostream>
using std::istream;
using std::ostream;
using std::cin;
using std::cout;
using std::cerr;

#include<sstream>
using std::istringstream;
using std::ostringstream;

#include<fstream>
using std::ifstream;
using std::ofstream;

#include <locale>
using std::isspace;  // unicode-aware

string trim(const string& s) {
  string::const_iterator first = s.begin();
  while (first != s.end() && isspace(*first))
    ++first;
  if (first == s.end()) return "";

  string::const_iterator last = --s.end();
  while (last != s.begin() && isspace(*last))
    --last;
  ++last;
  return string(first, last);
}

bool starts_with(const string& s, const string& pat) {
  string::const_iterator a=s.begin(), b=pat.begin();
  for (/*nada*/; a!=s.end() && b!=pat.end(); ++a, ++b)
    if (*a != *b) return false;
  return b == pat.end();
}

bool has_data(istream& in) {
  return in && !in.eof();
}

void slurp(const string& filename, vector<string>& lines) {
  lines.clear();
  ifstream in(filename.c_str());
  while (has_data(in)) {
    string curr_line;
    getline(in, curr_line);
    lines.push_back(curr_line);
  }
}

size_t slurp_some_functions(const vector<string>& in, size_t start, vector<string>& out, bool first) {
  out.clear();
  if (start >= in.size()) return start;
  out.push_back("#include \"header\"");
  if (first)
    out.push_back("#include \"global_definitions_list\"");
  else
    out.push_back("#include \"global_declarations_list\"");
  out.push_back("");
  size_t curr = start;
  while (true) {
    if (curr >= in.size()) break;
    if (out.size() >= in.size()/Num_compilation_units) break;
    while (curr < in.size()) {
      // read functions -- lines until unindented '}'
      while (curr < in.size()) {
        const string& line = in.at(curr);
//?         cerr << curr << ": adding to function: " << line << '\n';
        out.push_back(line);  ++curr;
        if (!line.empty() && line.at(0) == '}') break;
      }
      // now look for a '#line' directive before the next non-comment non-empty
      // line
      while (curr < in.size()) {
        const string& line = in.at(curr);
        if (starts_with(line, "#line ")) goto try_return;
        out.push_back(line);   ++curr;
        if (trim(line).empty()) continue;
        if (starts_with(trim(line), "//")) continue;
        break;
      }
    }
    try_return:;
  }
  return curr;
}

// compare contents of a file with a list of lines, ignoring #line directives
// on both sides
bool no_change(const vector<string>& lines, const string& output_filename) {
  vector<string> old_lines;
  slurp(output_filename, old_lines);
  size_t l=0, o=0;
  while (true) {
    while (l < lines.size() &&
            (lines.at(l).empty() || starts_with(lines.at(l), "#line "))) {
      ++l;
    }
    while (o < old_lines.size() &&
            (old_lines.at(o).empty() || starts_with(old_lines.at(o), "#line "))) {
      ++o;
    }
    if (l >= lines.size() && o >= old_lines.size()) return true;  // no change
    if (l >= lines.size() || o >= old_lines.size()) return false;  // contents changed
//?     cerr << "comparing\n";
//?     cerr << o << ": " << old_lines.at(o) << '\n';
//?     cerr << l << ": " << lines.at(l) << '\n';
    if (lines.at(l) != old_lines.at(o)) return false;  // contents changed
    ++l;  ++o;
  }
  assert(false);
}

string next_output_filename(const string& output_directory) {
  static int file_count = 0;
  ostringstream out;
  out << output_directory << "/mu_" << file_count << ".cc";
  file_count++;
  return out.str();
}

void emit_file(const vector<string>& lines, const string& output_filename) {
  if (no_change(lines, output_filename)) return;
  cerr << "  updating " << output_filename << '\n';
  ofstream out(output_filename.c_str());
  for (size_t i = 0; i < lines.size(); ++i)
    out << lines.at(i) << '\n';
}

void emit_compilation_unit(const vector<string>& lines, const string& output_directory) {
  string output_filename = next_output_filename(output_directory);
  emit_file(lines, output_filename);
}

int main(int argc, const char* argv[]) {
  if (argc != 3) {
    cerr << "usage: cleave [input .cc file] [output directory]\n";
    exit(0);
  }

  // read input
  vector<string> lines;
  slurp(argv[1], lines);

  // write header until but excluding '// Global' delimiter
  string output_directory = argv[2];
  size_t line_num = 0;
  {
    vector<string> out;
    while (line_num < lines.size()) {
      const string& line = lines.at(line_num);
      if (trim(line) == "// Globals") break;  // todo: #line directive for delimiters
      out.push_back(line);
      ++line_num;
    }
    emit_file(out, output_directory+"/header");
  }

  // write global_definitions_list (including delimiters)
  {
    vector<string> out;
    while (line_num < lines.size()) {
      const string& line = lines.at(line_num);
      out.push_back(line);
      ++line_num;
      if (trim(line) == "// End Globals") break;
    }
    emit_file(out, output_directory+"/global_definitions_list");
  }

  // segment functions
  // first one is special
  if (line_num < lines.size()) {
    vector<string> function;
    line_num = slurp_some_functions(lines, line_num, function, true);
    emit_compilation_unit(function, output_directory);
  }
  while (line_num < lines.size()) {
    vector<string> function;
    line_num = slurp_some_functions(lines, line_num, function, false);
    emit_compilation_unit(function, output_directory);
  }
}