From 71e4f3812982dba2efb471283d310224e8db363e Mon Sep 17 00:00:00 2001 From: "Kartik K. Agaram" Date: Wed, 3 Mar 2021 22:09:50 -0800 Subject: 7842 - new directory organization Baremetal is now the default build target and therefore has its sources at the top-level. Baremetal programs build using the phase-2 Mu toolchain that requires a Linux kernel. This phase-2 codebase which used to be at the top-level is now under the linux/ directory. Finally, the phase-2 toolchain, while self-hosting, has a way to bootstrap from a C implementation, which is now stored in linux/bootstrap. The bootstrap C implementation uses some literate programming tools that are now in linux/bootstrap/tools. So the whole thing has gotten inverted. Each directory should build one artifact and include the main sources (along with standard library). Tools used for building it are relegated to sub-directories, even though those tools are often useful in their own right, and have had lots of interesting programs written using them. A couple of things have gotten dropped in this process: - I had old ways to run on just a Linux kernel, or with a Soso kernel. No more. - I had some old tooling for running a single test at the cursor. I haven't used that lately. Maybe I'll bring it back one day. The reorg isn't done yet. Still to do: - redo documentation everywhere. All the README files, all other markdown, particularly vocabulary.md. - clean up how-to-run comments at the start of programs everywhere - rethink what to do with the html/ directory. Do we even want to keep supporting it? In spite of these shortcomings, all the scripts at the top-level, linux/ and linux/bootstrap are working. The names of the scripts also feel reasonable. This is a good milestone to take stock at. --- linux/bootstrap/tools/enumerate.cc | 26 + linux/bootstrap/tools/tangle.cc | 1077 ++++++++++++++++++++++++++++++++ linux/bootstrap/tools/tangle.readme.md | 112 ++++ 3 files changed, 1215 insertions(+) create mode 100644 linux/bootstrap/tools/enumerate.cc create mode 100644 linux/bootstrap/tools/tangle.cc create mode 100644 linux/bootstrap/tools/tangle.readme.md (limited to 'linux/bootstrap/tools') diff --git a/linux/bootstrap/tools/enumerate.cc b/linux/bootstrap/tools/enumerate.cc new file mode 100644 index 00000000..2777c407 --- /dev/null +++ b/linux/bootstrap/tools/enumerate.cc @@ -0,0 +1,26 @@ +#include +#include +#include +#include +using std::vector; +#include +using std::string; +#include +using std::cout; + +int main(int argc, const char* argv[]) { + assert(argc == 3); + assert(string(argv[1]) == "--until"); + string last_file(argv[2]); + + dirent** files; + int num_files = scandir(".", &files, NULL, alphasort); + for (int i = 0; i < num_files; ++i) { + string curr_file = files[i]->d_name; + if (!isdigit(curr_file.at(0))) continue; + if (!last_file.empty() && curr_file > last_file) break; + cout << curr_file << '\n'; + } + // don't bother freeing files + return 0; +} diff --git a/linux/bootstrap/tools/tangle.cc b/linux/bootstrap/tools/tangle.cc new file mode 100644 index 00000000..c63db5df --- /dev/null +++ b/linux/bootstrap/tools/tangle.cc @@ -0,0 +1,1077 @@ +// Reorder a file based on directives starting with ':(' (tangle directives). +// Insert #line directives to preserve line numbers in the original. +// Clear lines starting with '//:' (tangle comments). + +#include +#include +#include + +#include +using std::vector; +#include +using std::list; +#include +using std::pair; + +#include +using std::string; + +#include +using std::istream; +using std::ostream; +using std::cin; +using std::cout; +using std::cerr; + +#include +using std::istringstream; +using std::ostringstream; + +#include +using std::ifstream; + +#include +using std::isspace; // unicode-aware + +//// Core data structures + +struct Line { + string filename; + size_t line_number; + string contents; + Line() :line_number(0) {} + Line(const string& text) :line_number(0) { + contents = text; + } + Line(const string& text, const string& f, const size_t& l) { + contents = text; + filename = f; + line_number = l; + } + Line(const string& text, const Line& origin) { + contents = text; + filename = origin.filename; + line_number = origin.line_number; + } +}; + +// Emit a list of line contents, inserting directives just at discontinuities. +// Needs to be a macro because 'out' can have the side effect of creating a +// new trace in Trace_stream. +#define EMIT(lines, out) if (!lines.empty()) { \ + string last_file = lines.begin()->filename; \ + size_t last_line = lines.begin()->line_number-1; \ + out << line_directive(lines.begin()->line_number, lines.begin()->filename) << '\n'; \ + for (list::const_iterator p = lines.begin(); p != lines.end(); ++p) { \ + if (last_file != p->filename || last_line != p->line_number-1) \ + out << line_directive(p->line_number, p->filename) << '\n'; \ + out << p->contents << '\n'; \ + last_file = p->filename; \ + last_line = p->line_number; \ + } \ +} + +//// Traces and white-box tests + +bool Passed = true; + +long Num_failures = 0; + +#define CHECK(X) \ + if (!(X)) { \ + ++Num_failures; \ + cerr << "\nF " << __FUNCTION__ << "(" << __FILE__ << ":" << __LINE__ << "): " << #X << '\n'; \ + Passed = false; \ + return; \ + } + +#define CHECK_EQ(X, Y) \ + if ((X) != (Y)) { \ + ++Num_failures; \ + cerr << "\nF " << __FUNCTION__ << "(" << __FILE__ << ":" << __LINE__ << "): " << #X << " == " << #Y << '\n'; \ + cerr << " got " << (X) << '\n'; /* BEWARE: multiple eval */ \ + Passed = false; \ + return; \ + } + +bool Hide_warnings = false; + +struct trace_stream { + vector > past_lines; // [(layer label, line)] + // accumulator for current line + ostringstream* curr_stream; + string curr_layer; + trace_stream() :curr_stream(NULL) {} + ~trace_stream() { if (curr_stream) delete curr_stream; } + + ostringstream& stream(string layer) { + newline(); + curr_stream = new ostringstream; + curr_layer = layer; + return *curr_stream; + } + + // be sure to call this before messing with curr_stream or curr_layer + void newline() { + if (!curr_stream) return; + string curr_contents = curr_stream->str(); + curr_contents.erase(curr_contents.find_last_not_of("\r\n")+1); + past_lines.push_back(pair(curr_layer, curr_contents)); + delete curr_stream; + curr_stream = NULL; + } + + string readable_contents(string layer) { // missing layer = everything + newline(); + ostringstream output; + for (vector >::iterator p = past_lines.begin(); p != past_lines.end(); ++p) + if (layer.empty() || layer == p->first) + output << p->first << ": " << with_newline(p->second); + return output.str(); + } + + string with_newline(string s) { + if (s[s.size()-1] != '\n') return s+'\n'; + return s; + } +}; + +trace_stream* Trace_stream = NULL; + +// Top-level helper. IMPORTANT: can't nest. +#define trace(layer) !Trace_stream ? cerr /*print nothing*/ : Trace_stream->stream(layer) +// Warnings should go straight to cerr by default since calls to trace() have +// some unfriendly constraints (they delay printing, they can't nest) +#define raise ((!Trace_stream || !Hide_warnings) ? cerr /*do print*/ : Trace_stream->stream("warn")) << __FILE__ << ":" << __LINE__ << " " + +// raise << die exits after printing -- unless Hide_warnings is set. +struct die {}; +ostream& operator<<(ostream& os, __attribute__((unused)) die) { + if (Hide_warnings) return os; + os << "dying\n"; + exit(1); +} + +#define CLEAR_TRACE delete Trace_stream, Trace_stream = new trace_stream; + +#define DUMP(layer) cerr << Trace_stream->readable_contents(layer) + +// Trace_stream is a resource, lease_tracer uses RAII to manage it. +struct lease_tracer { + lease_tracer() { Trace_stream = new trace_stream; } + ~lease_tracer() { delete Trace_stream, Trace_stream = NULL; } +}; + +#define START_TRACING_UNTIL_END_OF_SCOPE lease_tracer leased_tracer; + +vector split(string s, string delim) { + vector result; + string::size_type begin=0, end=s.find(delim); + while (true) { + if (end == string::npos) { + result.push_back(string(s, begin, string::npos)); + break; + } + result.push_back(string(s, begin, end-begin)); + begin = end+delim.size(); + end = s.find(delim, begin); + } + return result; +} + +bool check_trace_contents(string FUNCTION, string FILE, int LINE, string layer, string expected) { // empty layer == everything + vector expected_lines = split(expected, "\n"); + size_t curr_expected_line = 0; + while (curr_expected_line < expected_lines.size() && expected_lines[curr_expected_line].empty()) + ++curr_expected_line; + if (curr_expected_line == expected_lines.size()) return true; + Trace_stream->newline(); + ostringstream output; + for (vector >::iterator p = Trace_stream->past_lines.begin(); p != Trace_stream->past_lines.end(); ++p) { + if (!layer.empty() && layer != p->first) + continue; + if (p->second != expected_lines[curr_expected_line]) + continue; + ++curr_expected_line; + while (curr_expected_line < expected_lines.size() && expected_lines[curr_expected_line].empty()) + ++curr_expected_line; + if (curr_expected_line == expected_lines.size()) return true; + } + + ++Num_failures; + cerr << "\nF " << FUNCTION << "(" << FILE << ":" << LINE << "): missing [" << expected_lines[curr_expected_line] << "] in trace:\n"; + DUMP(layer); + Passed = false; + return false; +} + +#define CHECK_TRACE_CONTENTS(...) check_trace_contents(__FUNCTION__, __FILE__, __LINE__, __VA_ARGS__) + +int trace_count(string layer, string line) { + Trace_stream->newline(); + long result = 0; + for (vector >::iterator p = Trace_stream->past_lines.begin(); p != Trace_stream->past_lines.end(); ++p) { + if (layer == p->first) + if (line == "" || p->second == line) + ++result; + } + return result; +} + +#define CHECK_TRACE_WARNS() CHECK(trace_count("warn", "") > 0) +#define CHECK_TRACE_DOESNT_WARN() \ + if (trace_count("warn") > 0) { \ + ++Num_failures; \ + cerr << "\nF " << __FUNCTION__ << "(" << __FILE__ << ":" << __LINE__ << "): unexpected warnings\n"; \ + DUMP("warn"); \ + Passed = false; \ + return; \ + } + +bool trace_doesnt_contain(string layer, string line) { + return trace_count(layer, line) == 0; +} + +#define CHECK_TRACE_DOESNT_CONTAIN(...) CHECK(trace_doesnt_contain(__VA_ARGS__)) + +// Tests for trace infrastructure + +void test_trace_check_compares() { + CHECK_TRACE_CONTENTS("test layer", ""); + trace("test layer") << "foo"; + CHECK_TRACE_CONTENTS("test layer", "foo"); +} + +void test_trace_check_filters_layers() { + trace("test layer 1") << "foo"; + trace("test layer 2") << "bar"; + CHECK_TRACE_CONTENTS("test layer 1", "foo"); +} + +void test_trace_check_ignores_other_lines() { + trace("test layer 1") << "foo"; + trace("test layer 1") << "bar"; + CHECK_TRACE_CONTENTS("test layer 1", "foo"); +} + +void test_trace_check_always_finds_empty_lines() { + CHECK_TRACE_CONTENTS("test layer 1", ""); +} + +void test_trace_check_treats_empty_layers_as_wildcards() { + trace("test layer 1") << "foo"; + CHECK_TRACE_CONTENTS("", "foo"); +} + +void test_trace_check_multiple_lines_at_once() { + trace("test layer 1") << "foo"; + trace("test layer 2") << "bar"; + CHECK_TRACE_CONTENTS("", "foo\n" + "bar\n"); +} + +void test_trace_check_always_finds_empty_lines2() { + CHECK_TRACE_CONTENTS("test layer 1", "\n\n\n"); +} + +void test_trace_orders_across_layers() { + trace("test layer 1") << "foo"; + trace("test layer 2") << "bar"; + trace("test layer 1") << "qux"; + CHECK_TRACE_CONTENTS("", "foo\n" + "bar\n" + "qux\n"); +} + +void test_trace_supports_count() { + trace("test layer 1") << "foo"; + trace("test layer 1") << "foo"; + CHECK_EQ(trace_count("test layer 1", "foo"), 2); +} + +//// helpers + +// can't check trace because trace methods call 'split' + +void test_split_returns_at_least_one_elem() { + vector result = split("", ","); + CHECK_EQ(result.size(), 1); + CHECK_EQ(result[0], ""); +} + +void test_split_returns_entire_input_when_no_delim() { + vector result = split("abc", ","); + CHECK_EQ(result.size(), 1); + CHECK_EQ(result[0], "abc"); +} + +void test_split_works() { + vector result = split("abc,def", ","); + CHECK_EQ(result.size(), 2); + CHECK_EQ(result[0], "abc"); + CHECK_EQ(result[1], "def"); +} + +void test_split_works2() { + vector result = split("abc,def,ghi", ","); + CHECK_EQ(result.size(), 3); + CHECK_EQ(result[0], "abc"); + CHECK_EQ(result[1], "def"); + CHECK_EQ(result[2], "ghi"); +} + +void test_split_handles_multichar_delim() { + vector result = split("abc,,def,,ghi", ",,"); + CHECK_EQ(result.size(), 3); + CHECK_EQ(result[0], "abc"); + CHECK_EQ(result[1], "def"); + CHECK_EQ(result[2], "ghi"); +} + +//// Core program + +#include "tangle.function_list" + +string line_directive(size_t line_number, string filename) { + ostringstream result; + if (filename.empty()) + result << "#line " << line_number; + else + result << "#line " << line_number << " \"" << filename << '"'; + return result.str(); +} + +string Toplevel = "run"; + +int main(int argc, const char* argv[]) { + if (flag("test", argc, argv)) + return run_tests(); + return tangle(argc, argv); +} + +bool flag(const string& flag, int argc, const char* argv[]) { + for (int i = 1; i < argc; ++i) + if (string(argv[i]) == flag) + return true; + return false; +} + +void setup() { + Hide_warnings = false; + Passed = true; +} + +void verify() { + Hide_warnings = false; + if (!Passed) + ; + else + cerr << "."; +} + +int tangle(int argc, const char* argv[]) { + list result; + for (int i = 1; i < argc; ++i) { +//? cerr << "new file " << argv[i] << '\n'; + Toplevel = "run"; + ifstream in(argv[i]); + tangle(in, argv[i], result); + } + + EMIT(result, cout); + return 0; +} + +void tangle(istream& in, const string& filename, list& out) { + string curr_line; + size_t line_number = 1; + while (!in.eof()) { + getline(in, curr_line); + if (starts_with(curr_line, ":(")) { + ++line_number; + process_next_hunk(in, trim(curr_line), filename, line_number, out); + continue; + } + if (starts_with(curr_line, "//:")) { + ++line_number; + continue; + } + out.push_back(Line(curr_line, filename, line_number)); + ++line_number; + } + + // Trace all line contents, inserting directives just at discontinuities. + if (!Trace_stream) return; + EMIT(out, Trace_stream->stream("tangle")); +} + +// just for tests +void tangle(istream& in, list& out) { + tangle(in, "", out); +} + +void process_next_hunk(istream& in, const string& directive, const string& filename, size_t& line_number, list& out) { + istringstream directive_stream(directive.substr(2)); // length of ":(" + string cmd = next_tangle_token(directive_stream); + + // first slurp all lines until next directive + list hunk; + { + string curr_line; + while (!in.eof()) { + std::streampos old = in.tellg(); + getline(in, curr_line); + if (starts_with(curr_line, ":(")) { + in.seekg(old); + break; + } + if (starts_with(curr_line, "//:")) { + // tangle comments + ++line_number; + continue; + } + hunk.push_back(Line(curr_line, filename, line_number)); + ++line_number; + } + } + + if (cmd == "code") { + out.insert(out.end(), hunk.begin(), hunk.end()); + return; + } + + if (cmd == "before" || cmd == "after" || cmd == "replace" || cmd == "replace{}" || cmd == "delete" || cmd == "delete{}") { + list::iterator target = locate_target(out, directive_stream); + if (target == out.end()) { + raise << "couldn't find target " << directive << '\n' << die(); + return; + } + + indent_all(hunk, target); + + if (cmd == "before") { + out.splice(target, hunk); + } + else if (cmd == "after") { + ++target; + out.splice(target, hunk); + } + else if (cmd == "replace" || cmd == "delete") { + out.splice(target, hunk); + out.erase(target); + } + else if (cmd == "replace{}" || cmd == "delete{}") { + if (find_trim(hunk, ":OLD_CONTENTS") == hunk.end()) { + out.splice(target, hunk); + out.erase(target, balancing_curly(target)); + } + else { + list::iterator next = balancing_curly(target); + list old_version; + old_version.splice(old_version.begin(), out, target, next); + old_version.pop_back(); old_version.pop_front(); // contents only please, not surrounding curlies + + list::iterator new_pos = find_trim(hunk, ":OLD_CONTENTS"); + indent_all(old_version, new_pos); + hunk.splice(new_pos, old_version); + hunk.erase(new_pos); + out.splice(next, hunk); + } + } + return; + } + + raise << "unknown directive " << cmd << '\n' << die(); +} + +list::iterator locate_target(list& out, istream& directive_stream) { + string pat = next_tangle_token(directive_stream); + if (pat == "") return out.end(); + + string next_token = next_tangle_token(directive_stream); + if (next_token == "") { + return find_substr(out, pat); + } + // first way to do nested pattern: pattern 'following' intermediate + else if (next_token == "following") { + string pat2 = next_tangle_token(directive_stream); + if (pat2 == "") return out.end(); + list::iterator intermediate = find_substr(out, pat2); + if (intermediate == out.end()) return out.end(); + return find_substr(out, intermediate, pat); + } + // second way to do nested pattern: intermediate 'then' pattern + else if (next_token == "then") { + list::iterator intermediate = find_substr(out, pat); + if (intermediate == out.end()) return out.end(); + string pat2 = next_tangle_token(directive_stream); + if (pat2 == "") return out.end(); + return find_substr(out, intermediate, pat2); + } + raise << "unknown keyword in directive: " << next_token << '\n'; + return out.end(); +} + +// indent all lines in l like indentation at exemplar +void indent_all(list& l, list::iterator exemplar) { + string curr_indent = indent(exemplar->contents); + for (list::iterator p = l.begin(); p != l.end(); ++p) + if (!p->contents.empty()) + p->contents.insert(p->contents.begin(), curr_indent.begin(), curr_indent.end()); +} + +string next_tangle_token(istream& in) { + in >> std::noskipws; + ostringstream out; + skip_whitespace(in); + if (in.peek() == '"') + slurp_tangle_string(in, out); + else + slurp_word(in, out); + return out.str(); +} + +void slurp_tangle_string(istream& in, ostream& out) { + in.get(); + char c; + while (in >> c) { + if (c == '\\') { + // skip backslash and save next character unconditionally + in >> c; + out << c; + continue; + } + if (c == '"') break; + out << c; + } +} + +void slurp_word(istream& in, ostream& out) { + char c; + while (in >> c) { + if (isspace(c) || c == ')') { + in.putback(c); + break; + } + out << c; + } +} + +void skip_whitespace(istream& in) { + while (isspace(in.peek())) + in.get(); +} + +list::iterator balancing_curly(list::iterator curr) { + long open_curlies = 0; + do { + for (string::iterator p = curr->contents.begin(); p != curr->contents.end(); ++p) { + if (*p == '{') ++open_curlies; + if (*p == '}') --open_curlies; + } + ++curr; + // no guard so far against unbalanced curly, including inside comments or strings + } while (open_curlies != 0); + return curr; +} + +list::iterator find_substr(list& in, const string& pat) { + for (list::iterator p = in.begin(); p != in.end(); ++p) + if (p->contents.find(pat) != string::npos) + return p; + return in.end(); +} + +list::iterator find_substr(list& in, list::iterator p, const string& pat) { + for (; p != in.end(); ++p) + if (p->contents.find(pat) != string::npos) + return p; + return in.end(); +} + +list::iterator find_trim(list& in, const string& pat) { + for (list::iterator p = in.begin(); p != in.end(); ++p) + if (trim(p->contents) == pat) + return p; + return in.end(); +} + +string escape(string s) { + s = replace_all(s, "\\", "\\\\"); + s = replace_all(s, "\"", "\\\""); + s = replace_all(s, "", "\\n"); + return s; +} + +string replace_all(string s, const string& a, const string& b) { + for (size_t pos = s.find(a); pos != string::npos; pos = s.find(a, pos+b.size())) + s = s.replace(pos, a.size(), b); + return s; +} + +// does s start with pat, after skipping whitespace? +// pat can't start with whitespace +bool starts_with(const string& s, const string& pat) { + for (size_t pos = 0; pos < s.size(); ++pos) + if (!isspace(s.at(pos))) + return s.compare(pos, pat.size(), pat) == 0; + return false; +} + +string indent(const string& s) { + for (size_t pos = 0; pos < s.size(); ++pos) + if (!isspace(s.at(pos))) + return s.substr(0, pos); + return ""; +} + +string strip_indent(const string& s, size_t n) { + if (s.empty()) return ""; + string::const_iterator curr = s.begin(); + while (curr != s.end() && n > 0 && isspace(*curr)) { + ++curr; + --n; + } + return string(curr, s.end()); +} + +string trim(const string& s) { + string::const_iterator first = s.begin(); + while (first != s.end() && isspace(*first)) + ++first; + if (first == s.end()) return ""; + + string::const_iterator last = --s.end(); + while (last != s.begin() && isspace(*last)) + --last; + ++last; + return string(first, last); +} + +const Line& front(const list& l) { + assert(!l.empty()); + return l.front(); +} + +//// Tests for tangle + +void test_tangle() { + istringstream in("a\n" + "b\n" + "c\n" + ":(before b)\n" + "d\n"); + list dummy; + tangle(in, dummy); + CHECK_TRACE_CONTENTS("tangle", "a\n" + "d\n" + "b\n" + "c\n"); +} + +void test_tangle_with_linenumber() { + istringstream in("a\n" + "b\n" + "c\n" + ":(before b)\n" + "d\n"); + list dummy; + tangle(in, dummy); + CHECK_TRACE_CONTENTS("tangle", "#line 1\n" + "a\n" + "#line 5\n" + "d\n" + "#line 2\n" + "b\n" + "c\n"); + // no other #line directives + CHECK_TRACE_DOESNT_CONTAIN("tangle", "#line 3"); + CHECK_TRACE_DOESNT_CONTAIN("tangle", "#line 4"); +} + +void test_tangle_linenumbers_with_filename() { + istringstream in("a\n" + "b\n" + "c\n" + ":(before b)\n" + "d\n"); + list dummy; + tangle(in, "foo", dummy); + CHECK_TRACE_CONTENTS("tangle", "a\n" + "#line 5 \"foo\"\n" + "d\n" + "b\n" + "c\n"); +} + +void test_tangle_line_numbers_with_multiple_filenames() { + istringstream in1("a\n" + "b\n" + "c"); + list dummy; + tangle(in1, "foo", dummy); + CLEAR_TRACE; + istringstream in2(":(before b)\n" + "d\n"); + tangle(in2, "bar", dummy); + CHECK_TRACE_CONTENTS("tangle", "a\n" + "#line 2 \"bar\"\n" + "d\n" + "#line 2 \"foo\"\n" + "b\n" + "c\n"); +} + +void test_tangle_linenumbers_with_multiple_directives() { + istringstream in1("a\n" + "b\n" + "c"); + list dummy; + tangle(in1, "foo", dummy); + CLEAR_TRACE; + istringstream in2(":(before b)\n" + "d\n" + ":(before c)\n" + "e"); + tangle(in2, "bar", dummy); + CHECK_TRACE_CONTENTS("tangle", "a\n" + "#line 2 \"bar\"\n" + "d\n" + "#line 2 \"foo\"\n" + "b\n" + "#line 4 \"bar\"\n" + "e\n" + "#line 3 \"foo\"\n" + "c\n"); +} + +void test_tangle_with_multiple_filenames_after() { + istringstream in1("a\n" + "b\n" + "c"); + list dummy; + tangle(in1, "foo", dummy); + CLEAR_TRACE; + istringstream in2(":(after b)\n" + "d\n"); + tangle(in2, "bar", dummy); + CHECK_TRACE_CONTENTS("tangle", "a\n" + "b\n" + "#line 2 \"bar\"\n" + "d\n" + "#line 3 \"foo\"\n" + "c\n"); +} + +void test_tangle_skip_tanglecomments() { + istringstream in("a\n" + "b\n" + "c\n" + "//: 1\n" + "//: 2\n" + "d\n"); + list dummy; + tangle(in, dummy); + CHECK_TRACE_CONTENTS("tangle", "a\n" + "b\n" + "c\n" + "\n" + "\n" + "d\n"); + CHECK_TRACE_DOESNT_CONTAIN("tangle", "//: 1"); +} + +void test_tangle_with_tanglecomments_and_directive() { + istringstream in("a\n" + "//: 1\n" + "b\n" + "c\n" + ":(before b)\n" + "d\n" + ":(code)\n" + "e\n"); + list dummy; + tangle(in, dummy); + CHECK_TRACE_CONTENTS("tangle", "a\n" + "#line 6\n" + "d\n" + "#line 3\n" + "b\n" + "c\n" + "#line 8\n" + "e\n"); + CHECK_TRACE_DOESNT_CONTAIN("tangle", "//: 1"); +} + +void test_tangle_with_tanglecomments_inside_directive() { + istringstream in("a\n" + "//: 1\n" + "b\n" + "c\n" + ":(before b)\n" + "//: abc\n" + "d\n" + ":(code)\n" + "e\n"); + list dummy; + tangle(in, dummy); + CHECK_TRACE_CONTENTS("tangle", "a\n" + "#line 7\n" + "d\n" + "#line 3\n" + "b\n" + "c\n" + "#line 9\n" + "e\n"); + CHECK_TRACE_DOESNT_CONTAIN("tangle", "//: 1"); +} + +void test_tangle_with_multiword_directives() { + istringstream in("a b\n" + "c\n" + ":(after \"a b\")\n" + "d\n"); + list dummy; + tangle(in, dummy); + CHECK_TRACE_CONTENTS("tangle", "a b\n" + "d\n" + "c\n"); +} + +void test_tangle_with_quoted_multiword_directives() { + istringstream in("a \"b\"\n" + "c\n" + ":(after \"a \\\"b\\\"\")\n" + "d\n"); + list dummy; + tangle(in, dummy); + CHECK_TRACE_CONTENTS("tangle", "a \"b\"\n" + "d\n" + "c\n"); +} + +void test_tangle2() { + istringstream in("a\n" + "b\n" + "c\n" + ":(after b)\n" + "d\n"); + list dummy; + tangle(in, dummy); + CHECK_TRACE_CONTENTS("tangle", "a\n" + "b\n" + "d\n" + "c\n"); +} + +void test_tangle_at_end() { + istringstream in("a\n" + "b\n" + "c\n" + ":(after c)\n" + "d\n"); + list dummy; + tangle(in, dummy); + CHECK_TRACE_CONTENTS("tangle", "a\n" + "b\n" + "c\n" + "d\n"); +} + +void test_tangle_indents_hunks_correctly() { + istringstream in("a\n" + " b\n" + "c\n" + ":(after b)\n" + "d\n"); + list dummy; + tangle(in, dummy); + CHECK_TRACE_CONTENTS("tangle", "a\n" + " b\n" + " d\n" + "c\n"); +} + +void test_tangle_warns_on_missing_target() { + Hide_warnings = true; + istringstream in(":(before)\n" + "abc def\n"); + list lines; + tangle(in, lines); + CHECK_TRACE_WARNS(); +} + +void test_tangle_warns_on_unknown_target() { + Hide_warnings = true; + istringstream in(":(before \"foo\")\n" + "abc def\n"); + list lines; + tangle(in, lines); + CHECK_TRACE_WARNS(); +} + +void test_tangle_delete_range_of_lines() { + istringstream in("a\n" + "b {\n" + "c\n" + "}\n" + ":(delete{} \"b\")\n"); + list dummy; + tangle(in, dummy); + CHECK_TRACE_CONTENTS("tangle", "a\n"); + CHECK_TRACE_DOESNT_CONTAIN("tangle", "b"); + CHECK_TRACE_DOESNT_CONTAIN("tangle", "c"); +} + +void test_tangle_replace() { + istringstream in("a\n" + "b\n" + "c\n" + ":(replace b)\n" + "d\n"); + list dummy; + tangle(in, dummy); + CHECK_TRACE_CONTENTS("tangle", "a\n" + "d\n" + "c\n"); + CHECK_TRACE_DOESNT_CONTAIN("tangle", "b"); +} + +void test_tangle_replace_range_of_lines() { + istringstream in("a\n" + "b {\n" + "c\n" + "}\n" + ":(replace{} \"b\")\n" + "d\n" + "e\n"); + list dummy; + tangle(in, dummy); + CHECK_TRACE_CONTENTS("tangle", "a\n" + "d\n" + "e\n"); + CHECK_TRACE_DOESNT_CONTAIN("tangle", "b {"); + CHECK_TRACE_DOESNT_CONTAIN("tangle", "c"); +} + +void test_tangle_replace_tracks_old_lines() { + istringstream in("a\n" + "b {\n" + "c\n" + "}\n" + ":(replace{} \"b\")\n" + "d\n" + ":OLD_CONTENTS\n" + "e\n"); + list dummy; + tangle(in, dummy); + CHECK_TRACE_CONTENTS("tangle", "a\n" + "d\n" + "c\n" + "e\n"); + CHECK_TRACE_DOESNT_CONTAIN("tangle", "b {"); +} + +void test_tangle_nested_patterns() { + istringstream in("a\n" + "c\n" + "b\n" + "c\n" + "d\n" + ":(after \"b\" then \"c\")\n" + "e"); + list dummy; + tangle(in, dummy); + CHECK_TRACE_CONTENTS("tangle", "a\n" + "c\n" + "b\n" + "c\n" + "e\n" + "d\n"); +} + +void test_tangle_nested_patterns2() { + istringstream in("a\n" + "c\n" + "b\n" + "c\n" + "d\n" + ":(after \"c\" following \"b\")\n" + "e"); + list dummy; + tangle(in, dummy); + CHECK_TRACE_CONTENTS("tangle", "a\n" + "c\n" + "b\n" + "c\n" + "e\n" + "d\n"); +} + +// todo: include line numbers in tangle errors + +void test_trim() { + CHECK_EQ(trim(""), ""); + CHECK_EQ(trim(" "), ""); + CHECK_EQ(trim(" "), ""); + CHECK_EQ(trim("a"), "a"); + CHECK_EQ(trim(" a"), "a"); + CHECK_EQ(trim(" a"), "a"); + CHECK_EQ(trim(" ab"), "ab"); + CHECK_EQ(trim("a "), "a"); + CHECK_EQ(trim("a "), "a"); + CHECK_EQ(trim("ab "), "ab"); + CHECK_EQ(trim(" a "), "a"); + CHECK_EQ(trim(" a "), "a"); + CHECK_EQ(trim(" ab "), "ab"); +} + +void test_strip_indent() { + CHECK_EQ(strip_indent("", 0), ""); + CHECK_EQ(strip_indent("", 1), ""); + CHECK_EQ(strip_indent("", 3), ""); + CHECK_EQ(strip_indent(" ", 0), " "); + CHECK_EQ(strip_indent(" a", 0), " a"); + CHECK_EQ(strip_indent(" ", 1), ""); + CHECK_EQ(strip_indent(" a", 1), "a"); + CHECK_EQ(strip_indent(" ", 2), ""); + CHECK_EQ(strip_indent(" a", 2), "a"); + CHECK_EQ(strip_indent(" ", 0), " "); + CHECK_EQ(strip_indent(" a", 0), " a"); + CHECK_EQ(strip_indent(" ", 1), " "); + CHECK_EQ(strip_indent(" a", 1), " a"); + CHECK_EQ(strip_indent(" ", 2), ""); + CHECK_EQ(strip_indent(" a", 2), "a"); + CHECK_EQ(strip_indent(" ", 3), ""); + CHECK_EQ(strip_indent(" a", 3), "a"); +} + +//// Test harness + +typedef void (*test_fn)(void); + +const test_fn Tests[] = { + #include "tangle.test_list" // auto-generated; see 'build*' scripts +}; + +// Names for each element of the 'Tests' global, respectively. +const string Test_names[] = { + #include "tangle.test_name_list" // auto-generated; see 'build*' scripts +}; + +int run_tests() { + for (unsigned long i=0; i < sizeof(Tests)/sizeof(Tests[0]); ++i) { +//? cerr << "running " << Test_names[i] << '\n'; + START_TRACING_UNTIL_END_OF_SCOPE; + setup(); + (*Tests[i])(); + verify(); + } + + cerr << '\n'; + if (Num_failures > 0) + cerr << Num_failures << " failure" + << (Num_failures > 1 ? "s" : "") + << '\n'; + return Num_failures; +} diff --git a/linux/bootstrap/tools/tangle.readme.md b/linux/bootstrap/tools/tangle.readme.md new file mode 100644 index 00000000..be61d40e --- /dev/null +++ b/linux/bootstrap/tools/tangle.readme.md @@ -0,0 +1,112 @@ +[Literate Programming](https://en.wikipedia.org/wiki/Literate_programming) +tool to convert Mu's layers into compilable form. + +Mu's tangling directives differ from Knuth's classic implementation. The +classical approach starts out with labeled subsystems that are initially +empty, and adds code to them using two major directives: + +``` + ≡ + +``` + +``` + +≡ + +``` + +_(`` can span multiple lines.)_ + +This approach is best suited for top-down exposition. + +On the other hand, Mu's tangling directives are better suited for a cleaned-up +history of a codebase. Subsystems start out with a simple skeleton of the core +of the program. Later versions then tell a story of the evolution of the +program, with each version colocating all the code related to new features. + +Read more: +* http://akkartik.name/post/wart-layers +* http://akkartik.name/post/literate-programming +* https://github.com/akkartik/mu/blob/master/000organization.cc + +## directives + +Add code to a project: + +``` +:(code) + +``` + +Insert code before a specific line: + +``` +:(before ) + +``` + +Here `` is a substring matching a single line in the codebase. (We +never use regular expressions.) Surround the substring in `"` quotes if it +spans multiple words. + +Insert code _after_ a specific line: + +``` +:(after ) + +``` + +Delete a specific previously-added line (because it's not needed in a newer +version). + +``` +:(delete ) +``` + +Delete a block of code starting with a given header and surrounded by `{` and +`}`: + +``` +:(delete{}
) +``` + +_(Caveat: doesn't directly support C's `do`..`while` loops.)_ + +Replace a specific line with new code: + +``` +:(replace ) + +``` + +This is identical to: +``` +:(before ) + +:(delete ) +``` +_(Assuming `` did not insert a new line matching the substring ``.)_ + +Replace a block of code with another: + +``` +:(replace{}
) + +``` + +Insert code before or after a substring pattern that isn't quite a unique +waypoint in the whole codebase: + +``` +:(before following ) + +:(after following ) + +``` + +``` +:(before then ) + +:(after then ) + +``` -- cgit 1.4.1-2-gfad0