From 41302404c7d2d1d4dbf3858a7bcd5665b43bda82 Mon Sep 17 00:00:00 2001 From: Kartik Agaram Date: Mon, 2 Dec 2019 23:57:01 -0800 Subject: 5787 --- archive/2.vm/build2 | 8 +- archive/2.vm/build3 | 8 +- archive/2.vm/build4 | 18 ++-- archive/2.vm/clean | 2 +- archive/2.vm/cleave/Readme | 1 + archive/2.vm/cleave/cleave.cc | 243 ++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 262 insertions(+), 18 deletions(-) create mode 100644 archive/2.vm/cleave/Readme create mode 100644 archive/2.vm/cleave/cleave.cc (limited to 'archive') diff --git a/archive/2.vm/build2 b/archive/2.vm/build2 index b07e8a63..c6d5ef76 100755 --- a/archive/2.vm/build2 +++ b/archive/2.vm/build2 @@ -115,8 +115,8 @@ older_than mu.cc $LAYERS ../../enumerate/enumerate ../../tangle/tangle && { ../../tangle/tangle $LAYERS > mu.cc } -older_than ../../cleave/cleave ../../cleave/cleave.cc && { - $CXX $CXXFLAGS ../../cleave/cleave.cc -o ../../cleave/cleave +older_than cleave/cleave cleave/cleave.cc && { + $CXX $CXXFLAGS cleave/cleave.cc -o cleave/cleave rm -rf .build } @@ -132,8 +132,8 @@ grep -h "^\s*void test_" mu.cc |sed 's/^\s*void \(.*\)() {.*/"\1",/' |update . mkdir -p .build/termbox update_cp termbox/termbox.h .build/termbox -older_than mu_bin mu.cc *_list ../../cleave/cleave termbox/* && { - ../../cleave/cleave mu.cc .build +older_than mu_bin mu.cc *_list cleave/cleave termbox/* && { + cleave/cleave mu.cc .build noisy_cd .build # create the list of global variable declarations from the corresponding definitions grep ';' global_definitions_list |sed 's/[=(].*/;/' |sed 's/^[^\/# ]/extern &/' |sed 's/^extern extern /extern /' |update global_declarations_list diff --git a/archive/2.vm/build3 b/archive/2.vm/build3 index 42e8ffd9..7bacfb62 100755 --- a/archive/2.vm/build3 +++ b/archive/2.vm/build3 @@ -140,8 +140,8 @@ older_than mu.cc $LAYERS ../../enumerate/enumerate ../../tangle/tangle && { ../../tangle/tangle $LAYERS > mu.cc } -older_than ../../cleave/cleave ../../cleave/cleave.cc && { - $CXX $CXXFLAGS ../../cleave/cleave.cc -o ../../cleave/cleave +older_than cleave/cleave cleave/cleave.cc && { + $CXX $CXXFLAGS cleave/cleave.cc -o cleave/cleave rm -rf .build } @@ -157,8 +157,8 @@ grep -h "^\s*void test_" mu.cc |sed 's/^\s*void \(.*\)() {.*/"\1",/' |update . mkdir -p .build/termbox update_cp termbox/termbox.h .build/termbox -older_than mu_bin mu.cc *_list ../../cleave/cleave termbox/* && { - ../../cleave/cleave mu.cc .build +older_than mu_bin mu.cc *_list cleave/cleave termbox/* && { + cleave/cleave mu.cc .build noisy_cd .build # create the list of global variable declarations from the corresponding definitions grep ';' global_definitions_list |sed 's/[=(].*/;/' |sed 's/^[^\/# ]/extern &/' |sed 's/^extern extern /extern /' |update global_declarations_list diff --git a/archive/2.vm/build4 b/archive/2.vm/build4 index a1483089..08752831 100755 --- a/archive/2.vm/build4 +++ b/archive/2.vm/build4 @@ -187,19 +187,19 @@ mv_if_exists mu.cc $TMP ) & TMP=`mktemp` -mv_if_exists ../../cleave/cleave $TMP +mv_if_exists cleave/cleave $TMP ( - wait_for_all ../../cleave/cleave.cc - older_than $TMP ../../cleave/cleave.cc && { + wait_for_all cleave/cleave.cc + older_than $TMP cleave/cleave.cc && { echo "building cleave" - $CXX $CXXFLAGS ../../cleave/cleave.cc -o $TMP || quit + $CXX $CXXFLAGS cleave/cleave.cc -o $TMP || quit rm -rf .build echo "done building cleave" } - mv $TMP ../../cleave/cleave + mv $TMP cleave/cleave ) & -wait_for_all mu.cc ../../cleave/cleave # cleave/cleave just for the .build cleanup +wait_for_all mu.cc cleave/cleave # cleave/cleave just for the .build cleanup mkdir -p .build # auto-generate function declarations, so I can define them in any order # functions start out unindented, have all args on the same line, and end in ') {' @@ -215,10 +215,10 @@ update_cp termbox/termbox.h .build/termbox TMP=`mktemp` mv_if_exists mu_bin $TMP ( - wait_for_all mu.cc ../../cleave/cleave termbox/*.c termbox/*.h termbox/*.inl - older_than $TMP mu.cc *_list ../../cleave/cleave termbox/* && { + wait_for_all mu.cc cleave/cleave termbox/*.c termbox/*.h termbox/*.inl + older_than $TMP mu.cc *_list cleave/cleave termbox/* && { echo "building mu_bin" - ../../cleave/cleave mu.cc .build || quit + cleave/cleave mu.cc .build || quit cd .build # create the list of global variable declarations from the corresponding definitions grep ';' global_definitions_list |sed 's/[=(].*/;/' |sed 's/^[^\/# ]/extern &/' |sed 's/^extern extern /extern /' |update global_declarations_list diff --git a/archive/2.vm/clean b/archive/2.vm/clean index 24a0300f..e3678430 100755 --- a/archive/2.vm/clean +++ b/archive/2.vm/clean @@ -6,4 +6,4 @@ rm -rf mu.cc core.mu mu_bin* *_list .build rm -rf termbox/*.o termbox/libtermbox.a rm -rf .until .quit test $# -gt 0 && exit 0 # convenience: 'clean top-level' to leave subsidiary tools alone -rm -rf ../../enumerate/enumerate ../../tangle/tangle ../../tangle/*_list ../../cleave/cleave ../../*/*.dSYM +rm -rf ../../enumerate/enumerate ../../tangle/tangle ../../tangle/*_list cleave/cleave cleave/cleave.dSYM ../../*/*.dSYM diff --git a/archive/2.vm/cleave/Readme b/archive/2.vm/cleave/Readme new file mode 100644 index 00000000..038b6c98 --- /dev/null +++ b/archive/2.vm/cleave/Readme @@ -0,0 +1 @@ +Tool to construct compilation units out of Mu's layers. diff --git a/archive/2.vm/cleave/cleave.cc b/archive/2.vm/cleave/cleave.cc new file mode 100644 index 00000000..a3637e81 --- /dev/null +++ b/archive/2.vm/cleave/cleave.cc @@ -0,0 +1,243 @@ +// Read a single-file C++ program having a very specific structure, and split +// it up into multiple separate compilation units to reduce the work needed to +// rebuild after a small change. Write each compilation unit only if it has +// changed from what was on disk before. +// +// This tool is tightly coupled with the build system for this project. The +// makefile already auto-generates various things; we only do here what +// standard unix tools can't easily do. +// +// Usage: +// cleave [input C++ file] [existing output directory] +// +// The input C++ file has the following structure: +// [#includes] +// [type definitions] +// // Globals +// [global variable definitions] +// // End Globals +// [function definitions] +// +// Afterwards, the output directory contains: +// header -- everything before the '// Globals' delimiter +// global_definitions_list -- everything between '// Globals' and '// End Globals' delimiters +// [.cc files partitioning function definitions] +// +// Each output function definition file contains: +// #include "header" +// #include "global_declarations_list" +// [function definitions] +// +// To preserve the original layer-based line numbers in error messages and the +// debugger, we'll chunk the files only at boundaries where we encounter a +// '#line ' directive (generated by the previous tangle/ stage) between +// functions. +// +// One exception: the first file emitted #includes "global_definitions_list" instead +// of "global_declarations_list" + +// Tune this parameter to balance time for initial vs incremental build. +// +// decrease value -> faster initial build +// increase value -> faster incremental build +int Num_compilation_units = 3; + +#include +#include +#include + +#include +using std::vector; +#include +using std::list; +#include +using std::pair; + +#include +using std::string; + +#include +using std::istream; +using std::ostream; +using std::cin; +using std::cout; +using std::cerr; + +#include +using std::istringstream; +using std::ostringstream; + +#include +using std::ifstream; +using std::ofstream; + +#include +using std::isspace; // unicode-aware + +string trim(const string& s) { + string::const_iterator first = s.begin(); + while (first != s.end() && isspace(*first)) + ++first; + if (first == s.end()) return ""; + + string::const_iterator last = --s.end(); + while (last != s.begin() && isspace(*last)) + --last; + ++last; + return string(first, last); +} + +bool starts_with(const string& s, const string& pat) { + string::const_iterator a=s.begin(), b=pat.begin(); + for (/*nada*/; a!=s.end() && b!=pat.end(); ++a, ++b) + if (*a != *b) return false; + return b == pat.end(); +} + +bool has_data(istream& in) { + return in && !in.eof(); +} + +void slurp(const string& filename, vector& lines) { + lines.clear(); + ifstream in(filename.c_str()); + while (has_data(in)) { + string curr_line; + getline(in, curr_line); + lines.push_back(curr_line); + } +} + +size_t slurp_some_functions(const vector& in, size_t start, vector& out, bool first) { + out.clear(); + if (start >= in.size()) return start; + out.push_back("#include \"header\""); + if (first) + out.push_back("#include \"global_definitions_list\""); + else + out.push_back("#include \"global_declarations_list\""); + out.push_back(""); + size_t curr = start; + while (true) { + if (curr >= in.size()) break; + if (out.size() >= in.size()/Num_compilation_units) break; + while (curr < in.size()) { + // read functions -- lines until unindented '}' + while (curr < in.size()) { + const string& line = in.at(curr); +//? cerr << curr << ": adding to function: " << line << '\n'; + out.push_back(line); ++curr; + if (!line.empty() && line.at(0) == '}') break; + } + // now look for a '#line' directive before the next non-comment non-empty + // line + while (curr < in.size()) { + const string& line = in.at(curr); + if (starts_with(line, "#line ")) goto try_return; + out.push_back(line); ++curr; + if (trim(line).empty()) continue; + if (starts_with(trim(line), "//")) continue; + break; + } + } + try_return:; + } + return curr; +} + +// compare contents of a file with a list of lines, ignoring #line directives +// on both sides +bool no_change(const vector& lines, const string& output_filename) { + vector old_lines; + slurp(output_filename, old_lines); + size_t l=0, o=0; + while (true) { + while (l < lines.size() && + (lines.at(l).empty() || starts_with(lines.at(l), "#line "))) { + ++l; + } + while (o < old_lines.size() && + (old_lines.at(o).empty() || starts_with(old_lines.at(o), "#line "))) { + ++o; + } + if (l >= lines.size() && o >= old_lines.size()) return true; // no change + if (l >= lines.size() || o >= old_lines.size()) return false; // contents changed +//? cerr << "comparing\n"; +//? cerr << o << ": " << old_lines.at(o) << '\n'; +//? cerr << l << ": " << lines.at(l) << '\n'; + if (lines.at(l) != old_lines.at(o)) return false; // contents changed + ++l; ++o; + } + assert(false); +} + +string next_output_filename(const string& output_directory) { + static int file_count = 0; + ostringstream out; + out << output_directory << "/mu_" << file_count << ".cc"; + file_count++; + return out.str(); +} + +void emit_file(const vector& lines, const string& output_filename) { + if (no_change(lines, output_filename)) return; + cerr << " updating " << output_filename << '\n'; + ofstream out(output_filename.c_str()); + for (size_t i = 0; i < lines.size(); ++i) + out << lines.at(i) << '\n'; +} + +void emit_compilation_unit(const vector& lines, const string& output_directory) { + string output_filename = next_output_filename(output_directory); + emit_file(lines, output_filename); +} + +int main(int argc, const char* argv[]) { + if (argc != 3) { + cerr << "usage: cleave [input .cc file] [output directory]\n"; + exit(0); + } + + // read input + vector lines; + slurp(argv[1], lines); + + // write header until but excluding '// Global' delimiter + string output_directory = argv[2]; + size_t line_num = 0; + { + vector out; + while (line_num < lines.size()) { + const string& line = lines.at(line_num); + if (trim(line) == "// Globals") break; // todo: #line directive for delimiters + out.push_back(line); + ++line_num; + } + emit_file(out, output_directory+"/header"); + } + + // write global_definitions_list (including delimiters) + { + vector out; + while (line_num < lines.size()) { + const string& line = lines.at(line_num); + out.push_back(line); + ++line_num; + if (trim(line) == "// End Globals") break; + } + emit_file(out, output_directory+"/global_definitions_list"); + } + + // segment functions + // first one is special + if (line_num < lines.size()) { + vector function; + line_num = slurp_some_functions(lines, line_num, function, true); + emit_compilation_unit(function, output_directory); + } + while (line_num < lines.size()) { + vector function; + line_num = slurp_some_functions(lines, line_num, function, false); + emit_compilation_unit(function, output_directory); + } +} -- cgit 1.4.1-2-gfad0