about summary refs log blame commit diff stats
path: root/cleave/cleave.cc
blob: ee83bbf2646c3b5a7adc253b44cef8a6400a5d0b (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
pre { line-height: 125%; }
td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
.highlight .hll { background-color: #ffffcc }
.highlight .c { color: #888888 } /* Comment */
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
.highlight .k { color: #008800; font-weight: bold } /* Keyword */
.highlight .ch { color: #888888 } /* Comment.Hashbang */
.highlight .cm { color: #888888 } /* Comment.Multiline */
.highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */
.highlight .cpf { color: #888888 } /* Comment.PreprocFile */
.highlight .c1 { color: #888888 } /* Comment.Single */
.highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */
.highlight .gr { color: #aa0000 } /* Generic.Error */
.highlight .gh { color: #333333 } /* Generic.Heading */
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
.highlight .go { color: #888888 } /* Generic.Output */
.highlight .gp { color: #555555 } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #666666 } /* Generic.Subheading */
.highlight .gt { color: #aa0000 } /* Generic.Traceback */
.highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */
.highlight .kp { color: #008800 } /* Keyword.Pseudo */
.highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */
.highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */
.highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */
.highlight .na { color: #336699 } /* Name.Attribute */
.highlight .nb { color: #003388 } /* Name.Builtin */
.highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */
.highlight .no { color: #003366; font-weight: bold } /* Name.Constant */
.highlight .nd { color: #555555 } /* Name.Decorator */
.highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */
.highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */
.highlight .nl { color: #336699; font-style: italic } /* Name.Label */
.highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */
.highlight .py { color: #336699; font-weight: bold } /* Name.Property */
.highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */
.highlight .nv { color: #336699 } /* Name.Variable */
.highlight .ow { color: #008800 } /* Operator.Word */
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */
.highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */
.highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */
.highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */
.highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */
.highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */
.highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */
.highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */
.highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */
.highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */
.highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */
.highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */
.highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */
.highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */
.highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */
.highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */
.highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */
.highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */
.highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic 
// Read a single-file C++ program having a very specific structure, and split
// it up into multiple separate compilation units to reduce the work needed to
// rebuild after a small change. Write each compilation unit only if it has
// changed from what was on disk before.
//
// This tool is tightly coupled with the build system for this project. The
// makefile already auto-generates various things; we only do here what
// standard unix tools can't easily do.
//
// Usage:
//  cleave [input C++ file] [existing output directory]
//
// The input C++ file has the following structure:
//   [#includes]
//   [type definitions]
//   // Globals
//   [global variable definitions]
//   // End Globals
//   [function definitions]
//
// Afterwards, the output directory contains:
//   header -- everything before the '// Globals' delimiter
//   global_definitions_list -- everything between '// Globals' and '// End Globals' delimiters
//   [.cc files partitioning function definitions]
//
// Each output function definition file contains:
//   #include "header"
//   #include "global_declarations_list"
//   [function definitions]
//
// We'll chunk the files at boundaries where we encounter a '#line ' directive
// between functions.
//
// One exception: the first file emitted #includes "global_definitions_list" instead
// of "global_declarations_list"

// Tune this parameter to balance time for initial vs incremental build.
//
//   Larger numbers -> larger/fewer compilation units -> faster initial build
//   Smaller numbers -> smaller compilation units -> faster incremental build
int Compilation_unit_size = 200;

#include<assert.h>
#include<cstdlib>
#include<cstring>

#include<vector>
using std::vector;
#include<list>
using std::list;
#include<utility>
using std::pair;

#include<string>
using std::string;

#include<iostream>
using std::istream;
using std::ostream;
using std::cin;
using std::cout;
using std::cerr;

#include<sstream>
using std::istringstream;
using std::ostringstream;

#include<fstream>
using std::ifstream;
using std::ofstream;

#include <locale>
using std::isspace;  // unicode-aware

string trim(const string& s) {
  string::const_iterator first = s.begin();
  while (first != s.end() && isspace(*first))
    ++first;
  if (first == s.end()) return "";

  string::const_iterator last = --s.end();
  while (last != s.begin() && isspace(*last))
    --last;
  ++last;
  return string(first, last);
}

bool starts_with(const string& s, const string& pat) {
  return s.substr(0, pat.size()) == pat;
}

bool has_data(istream& in) {
  return in && !in.eof();
}

void slurp(const string/*copy*/ filename, vector<string>& lines) {
  lines.clear();
  ifstream in(filename.c_str());
  while (has_data(in)) {
    string curr_line;
    getline(in, curr_line);
    lines.push_back(curr_line);
  }
}

size_t slurp_some_functions(const vector<string>& in, size_t start, vector<string>& out, bool first) {
  out.clear();
  if (start >= in.size()) return start;
  out.push_back("#include \"header\"");
  if (first)
    out.push_back("#include \"global_definitions_list\"");
  else
    out.push_back("#include \"global_declarations_list\"");
  out.push_back("");
  size_t curr = start;
  for (int i = 0; i < Compilation_unit_size; ++i) {
    while (curr < in.size()) {
      // read functions -- lines until unindented '}'
      while (curr < in.size()) {
        const string& line = in.at(curr);
//?         cerr << curr << ": adding to function: " << line << '\n';
        out.push_back(line);  ++curr;
        if (!line.empty() && line.at(0) == '}') break;
      }
      // now look for a '#line' directive before the next non-comment non-empty
      // line
      while (curr < in.size()) {
        const string& line = in.at(curr);
        if (starts_with(line, "#line ")) goto try_return;
        out.push_back(line);   ++curr;
        if (trim(line).empty()) continue;
        if (starts_with(trim(line), "//")) continue;
        break;
      }
    }
    try_return:;
  }

  // Idea: Increase the number of functions to include in the next call to
  // slurp_some_functions.
  // Early functions are more likely to be larger because later layers added
  // to them.
//?   Compilation_unit_size *= 1.5;
  return curr;
}

// compare contents of a file with a list of lines, ignoring #line directives
// on both sides
bool no_change(const vector<string>& lines, const string& output_filename) {
  vector<string> old_lines;
  slurp(output_filename, old_lines);
  size_t l=0, o=0;
  while (true) {
    while (l < lines.size() &&
            (lines.at(l).empty() || starts_with(lines.at(l), "#line "))) {
      ++l;
    }
    while (o < old_lines.size() &&
            (old_lines.at(o).empty() || starts_with(old_lines.at(o), "#line "))) {
      ++o;
    }
    if (l >= lines.size() && o >= old_lines.size()) return true;  // no change
    if (l >= lines.size() || o >= old_lines.size()) return false;  // contents changed
//?     cerr << "comparing\n";
//?     cerr << o << ": " << old_lines.at(o) << '\n';
//?     cerr << l << ": " << lines.at(l) << '\n';
    if (lines.at(l) != old_lines.at(o)) return false;  // contents changed
    ++l;  ++o;
  }
  assert(false);
}

string next_output_filename(const string& output_directory) {
  static int file_count = 0;
  ostringstream out;
  out << output_directory << "/mu_" << file_count << ".cc";
  file_count++;
  return out.str();
}

void emit_file(const vector<string>& lines, const string& output_filename) {
  if (no_change(lines, output_filename)) return;
  cerr << "  updating " << output_filename << '\n';
  ofstream out(output_filename.c_str());
  for (size_t i = 0; i < lines.size(); ++i)
    out << lines.at(i) << '\n';
}

void emit_compilation_unit(const vector<string>& lines, const string& output_directory) {
  string output_filename = next_output_filename(output_directory);
  emit_file(lines, output_filename);
}

int main(int argc, const char* argv[]) {
  if (argc != 3) {
    cerr << "usage: cleave [input .cc file] [output directory]\n";
    exit(0);
  }

  vector<string> lines;

  // read input
  slurp(argv[1], lines);

  string output_directory = argv[2];

  // write header until but excluding '// Global' delimiter
  size_t line_num = 0;
  {
    vector<string> out;
    while (line_num < lines.size()) {
      const string& line = lines.at(line_num);
      if (trim(line) == "// Globals") break;  // todo: #line directive for delimiters
      out.push_back(line);
      ++line_num;
    }
    emit_file(out, output_directory+"/header");
  }

  // write global_definitions_list (including delimiters)
  {
    vector<string> out;
    while (line_num < lines.size()) {
      const string& line = lines.at(line_num);
      out.push_back(line);
      ++line_num;
      if (trim(line) == "// End Globals") break;
    }
    emit_file(out, output_directory+"/global_definitions_list");
  }

  // segment functions
  // first one is special
  if (line_num < lines.size()) {
    vector<string> function;
    line_num = slurp_some_functions(lines, line_num, function, true);
    emit_compilation_unit(function, output_directory);
  }
  while (line_num < lines.size()) {
    vector<string> function;
    line_num = slurp_some_functions(lines, line_num, function, false);
    emit_compilation_unit(function, output_directory);
  }
}