diff options
author | Kartik Agaram <vc@akkartik.com> | 2019-12-02 23:57:01 -0800 |
---|---|---|
committer | Kartik Agaram <vc@akkartik.com> | 2019-12-02 23:57:13 -0800 |
commit | 41302404c7d2d1d4dbf3858a7bcd5665b43bda82 (patch) | |
tree | a5c8a997ac8d60a1775146617f3e985a15abafe1 /archive/2.vm/cleave/cleave.cc | |
parent | f25f2e98f9a35a275b5dc31ea80cbbb26fe88c92 (diff) | |
download | mu-41302404c7d2d1d4dbf3858a7bcd5665b43bda82.tar.gz |
5787
Diffstat (limited to 'archive/2.vm/cleave/cleave.cc')
-rw-r--r-- | archive/2.vm/cleave/cleave.cc | 243 |
1 files changed, 243 insertions, 0 deletions
diff --git a/archive/2.vm/cleave/cleave.cc b/archive/2.vm/cleave/cleave.cc new file mode 100644 index 00000000..a3637e81 --- /dev/null +++ b/archive/2.vm/cleave/cleave.cc @@ -0,0 +1,243 @@ +// Read a single-file C++ program having a very specific structure, and split +// it up into multiple separate compilation units to reduce the work needed to +// rebuild after a small change. Write each compilation unit only if it has +// changed from what was on disk before. +// +// This tool is tightly coupled with the build system for this project. The +// makefile already auto-generates various things; we only do here what +// standard unix tools can't easily do. +// +// Usage: +// cleave [input C++ file] [existing output directory] +// +// The input C++ file has the following structure: +// [#includes] +// [type definitions] +// // Globals +// [global variable definitions] +// // End Globals +// [function definitions] +// +// Afterwards, the output directory contains: +// header -- everything before the '// Globals' delimiter +// global_definitions_list -- everything between '// Globals' and '// End Globals' delimiters +// [.cc files partitioning function definitions] +// +// Each output function definition file contains: +// #include "header" +// #include "global_declarations_list" +// [function definitions] +// +// To preserve the original layer-based line numbers in error messages and the +// debugger, we'll chunk the files only at boundaries where we encounter a +// '#line ' directive (generated by the previous tangle/ stage) between +// functions. +// +// One exception: the first file emitted #includes "global_definitions_list" instead +// of "global_declarations_list" + +// Tune this parameter to balance time for initial vs incremental build. +// +// decrease value -> faster initial build +// increase value -> faster incremental build +int Num_compilation_units = 3; + +#include<assert.h> +#include<cstdlib> +#include<cstring> + +#include<vector> +using std::vector; +#include<list> +using std::list; +#include<utility> +using std::pair; + +#include<string> +using std::string; + +#include<iostream> +using std::istream; +using std::ostream; +using std::cin; +using std::cout; +using std::cerr; + +#include<sstream> +using std::istringstream; +using std::ostringstream; + +#include<fstream> +using std::ifstream; +using std::ofstream; + +#include <locale> +using std::isspace; // unicode-aware + +string trim(const string& s) { + string::const_iterator first = s.begin(); + while (first != s.end() && isspace(*first)) + ++first; + if (first == s.end()) return ""; + + string::const_iterator last = --s.end(); + while (last != s.begin() && isspace(*last)) + --last; + ++last; + return string(first, last); +} + +bool starts_with(const string& s, const string& pat) { + string::const_iterator a=s.begin(), b=pat.begin(); + for (/*nada*/; a!=s.end() && b!=pat.end(); ++a, ++b) + if (*a != *b) return false; + return b == pat.end(); +} + +bool has_data(istream& in) { + return in && !in.eof(); +} + +void slurp(const string& filename, vector<string>& lines) { + lines.clear(); + ifstream in(filename.c_str()); + while (has_data(in)) { + string curr_line; + getline(in, curr_line); + lines.push_back(curr_line); + } +} + +size_t slurp_some_functions(const vector<string>& in, size_t start, vector<string>& out, bool first) { + out.clear(); + if (start >= in.size()) return start; + out.push_back("#include \"header\""); + if (first) + out.push_back("#include \"global_definitions_list\""); + else + out.push_back("#include \"global_declarations_list\""); + out.push_back(""); + size_t curr = start; + while (true) { + if (curr >= in.size()) break; + if (out.size() >= in.size()/Num_compilation_units) break; + while (curr < in.size()) { + // read functions -- lines until unindented '}' + while (curr < in.size()) { + const string& line = in.at(curr); +//? cerr << curr << ": adding to function: " << line << '\n'; + out.push_back(line); ++curr; + if (!line.empty() && line.at(0) == '}') break; + } + // now look for a '#line' directive before the next non-comment non-empty + // line + while (curr < in.size()) { + const string& line = in.at(curr); + if (starts_with(line, "#line ")) goto try_return; + out.push_back(line); ++curr; + if (trim(line).empty()) continue; + if (starts_with(trim(line), "//")) continue; + break; + } + } + try_return:; + } + return curr; +} + +// compare contents of a file with a list of lines, ignoring #line directives +// on both sides +bool no_change(const vector<string>& lines, const string& output_filename) { + vector<string> old_lines; + slurp(output_filename, old_lines); + size_t l=0, o=0; + while (true) { + while (l < lines.size() && + (lines.at(l).empty() || starts_with(lines.at(l), "#line "))) { + ++l; + } + while (o < old_lines.size() && + (old_lines.at(o).empty() || starts_with(old_lines.at(o), "#line "))) { + ++o; + } + if (l >= lines.size() && o >= old_lines.size()) return true; // no change + if (l >= lines.size() || o >= old_lines.size()) return false; // contents changed +//? cerr << "comparing\n"; +//? cerr << o << ": " << old_lines.at(o) << '\n'; +//? cerr << l << ": " << lines.at(l) << '\n'; + if (lines.at(l) != old_lines.at(o)) return false; // contents changed + ++l; ++o; + } + assert(false); +} + +string next_output_filename(const string& output_directory) { + static int file_count = 0; + ostringstream out; + out << output_directory << "/mu_" << file_count << ".cc"; + file_count++; + return out.str(); +} + +void emit_file(const vector<string>& lines, const string& output_filename) { + if (no_change(lines, output_filename)) return; + cerr << " updating " << output_filename << '\n'; + ofstream out(output_filename.c_str()); + for (size_t i = 0; i < lines.size(); ++i) + out << lines.at(i) << '\n'; +} + +void emit_compilation_unit(const vector<string>& lines, const string& output_directory) { + string output_filename = next_output_filename(output_directory); + emit_file(lines, output_filename); +} + +int main(int argc, const char* argv[]) { + if (argc != 3) { + cerr << "usage: cleave [input .cc file] [output directory]\n"; + exit(0); + } + + // read input + vector<string> lines; + slurp(argv[1], lines); + + // write header until but excluding '// Global' delimiter + string output_directory = argv[2]; + size_t line_num = 0; + { + vector<string> out; + while (line_num < lines.size()) { + const string& line = lines.at(line_num); + if (trim(line) == "// Globals") break; // todo: #line directive for delimiters + out.push_back(line); + ++line_num; + } + emit_file(out, output_directory+"/header"); + } + + // write global_definitions_list (including delimiters) + { + vector<string> out; + while (line_num < lines.size()) { + const string& line = lines.at(line_num); + out.push_back(line); + ++line_num; + if (trim(line) == "// End Globals") break; + } + emit_file(out, output_directory+"/global_definitions_list"); + } + + // segment functions + // first one is special + if (line_num < lines.size()) { + vector<string> function; + line_num = slurp_some_functions(lines, line_num, function, true); + emit_compilation_unit(function, output_directory); + } + while (line_num < lines.size()) { + vector<string> function; + line_num = slurp_some_functions(lines, line_num, function, false); + emit_compilation_unit(function, output_directory); + } +} |