mu - Soul of a tiny new machine. More thorough tests → More comprehensible and rewrite-friendly software → More resilient society.

	Commit message (Expand)	Author	Age	Files	Lines
*	5001 - drop the :(scenario) DSL	Kartik Agaram	2019-03-12	1	-70/+113
*	4987 - support `browse_trace` tool in SubX	Kartik Agaram	2019-02-25	1	-5/+5
*	4266 - space for alloc-id in heap allocations	Kartik Agaram	2018-06-24	1	-5/+16
*	4264	Kartik Agaram	2018-06-17	1	-0/+183
*	4259	Kartik Agaram	2018-06-16	1	-183/+0
*	4258 - undo 4257	Kartik Agaram	2018-06-15	1	-16/+5
*	4257 - abortive attempt at safe fat pointers	Kartik Agaram	2018-06-15	1	-5/+16
*	4247	Kartik Agaram	2018-05-25	1	-0/+19
*	3887 - clean up early exits in interpreter loop	Kartik K. Agaram	2017-05-28	1	-3/+12
*	3877	Kartik K. Agaram	2017-05-26	1	-8/+8
*	3810	Kartik K. Agaram	2017-04-04	1	-8/+8
*	3380	Kartik K. Agaram	2016-09-17	1	-5/+5
*	3120	Kartik K. Agaram	2016-07-21	1	-3/+3
*	2990	Kartik K. Agaram	2016-05-20	1	-8/+8
*	2881 - disallow recipe literals in conditional jumps	Kartik K. Agaram	2016-04-28	1	-3/+5
*	2803	Kartik K. Agaram	2016-03-21	1	-16/+16
*	2735 - define recipes using 'def'	Kartik K. Agaram	2016-03-08	1	-6/+6
*	2712	Kartik K. Agaram	2016-02-26	1	-8/+8
*	2685	Kartik K. Agaram	2016-02-19	1	-3/+3
*	2377 - stop using operator[] in map	Kartik K. Agaram	2015-11-06	1	-12/+12
*	2313	Kartik K. Agaram	2015-10-29	1	-5/+5
*	2258 - separate warnings from errors	Kartik K. Agaram	2015-10-06	1	-8/+8
*	2226 - standardize warning format	Kartik K. Agaram	2015-10-01	1	-8/+8
*	2223	Kartik K. Agaram	2015-09-30	1	-19/+31
*	2214	Kartik K. Agaram	2015-09-28	1	-0/+141

//: For convenience, some instructions will take literal arrays of characters //: (text or strings). //: //: Instead of quotes, we'll use [] to delimit strings. That'll reduce the //: need for escaping since we can support nested brackets. And we can also //: imagine that 'recipe' might one day itself be defined in Mu, doing its own //: parsing. void test_string_literal() { load( "def main [\n" " 1:address:array:character <- copy [abc def]\n" "]\n" ); CHECK_TRACE_CONTENTS( "parse: ingredient: {\"abc def\": \"literal-string\"}\n" ); } void test_string_literal_with_colons() { load( "def main [\n" " 1:address:array:character <- copy [abc:def/ghi]\n" "]\n" ); CHECK_TRACE_CONTENTS( "parse: ingredient: {\"abc:def/ghi\": \"literal-string\"}\n" ); } :(before "End Mu Types Initialization") put(Type_ordinal, "literal-string", 0); :(before "End next_word Special-cases") if (in.peek() == '[') { string result = slurp_quoted(in); skip_whitespace_and_comments_but_not_newline(in); return result; } :(code) string slurp_quoted(istream& in) { ostringstream out; assert(has_data(in)); assert(in.peek() == '['); out << static_cast<char>(in.get()); // slurp the '[' if (is_code_string(in, out)) slurp_quoted_comment_aware(in, out); else slurp_quoted_comment_oblivious(in, out); return out.str(); } // A string is a code string (ignores comments when scanning for matching // brackets) if it contains a newline at the start before any non-whitespace. bool is_code_string(istream& in, ostream& out) { while (has_data(in)) { char c = in.get(); if (!isspace(c)) { in.putback(c); return false; } out << c; if (c == '\n') { return true; } } return false; } // Read a regular string. Regular strings can only contain other regular // strings. void slurp_quoted_comment_oblivious(istream& in, ostream& out) { int brace_depth = 1; while (has_data(in)) { char c = in.get(); if (c == '\\') { slurp_one_past_backslashes(in, out); continue; } out << c; if (c == '[') ++brace_depth; if (c == ']') --brace_depth; if (brace_depth == 0) break; } if (!has_data(in) && brace_depth > 0) { raise << "unbalanced '['\n" << end(); out.clear(); } } // Read a code string. Code strings can contain either code or regular strings. void slurp_quoted_comment_aware(istream& in, ostream& out) { char c; while (in >> c) { if (c == '\\') { slurp_one_past_backslashes(in, out); continue; } if (c == '#') { out << c; while (has_data(in) && in.peek() != '\n') out << static_cast<char>(in.get()); continue; } if (c == '[') { in.putback(c); // recurse out << slurp_quoted(in); continue; } out << c; if (c == ']') return; } raise << "unbalanced '['\n" << end(); out.clear(); } :(after "Parsing reagent(string s)") if (starts_with(s, "[")) { if (*s.rbegin() != ']') return; // unbalanced bracket; handled elsewhere name = s; // delete [] delimiters name.erase(0, 1); strip_last(name); type = new type_tree("literal-string", 0); return; } //: Unlike other reagents, escape newlines in literal strings to make them //: more friendly to trace(). :(after "string to_string(const reagent& r)") if (is_literal_text(r)) return emit_literal_string(r.name); :(code) bool is_literal_text(const reagent& x) { return x.type && x.type->name == "literal-string"; } string emit_literal_string(string name) { size_t pos = 0; while (pos != string::npos) pos = replace(name, "\n", "\\n", pos); return "{\""+name+"\": \"literal-string\"}"; } size_t replace(string& str, const string& from, const string& to, size_t n) { size_t result = str.find(from, n); if (result != string::npos) str.replace(result, from.length(), to); return result; } void strip_last(string& s) { if (!s.empty()) s.erase(SIZE(s)-1); } void slurp_one_past_backslashes(istream& in, ostream& out) { // When you encounter a backslash, strip it out and pass through any // following run of backslashes. If we 'escaped' a single following // character, then the character '\' would be: // '\\' escaped once // '\\\\' escaped twice // '\\\\\\\\' escaped thrice (8 backslashes) // ..and so on. With our approach it'll be: // '\\' escaped once // '\\\' escaped twice // '\\\\' escaped thrice // This only works as long as backslashes aren't also overloaded to create // special characters. So Mu doesn't follow C's approach of overloading // backslashes both to escape quote characters and also as a notation for // unprintable characters like '\n'. while (has_data(in)) { char c = in.get(); out << c; if (c != '\\') break; } } void test_string_literal_nested() { load( "def main [\n" " 1:address:array:character <- copy [abc [def]]\n" "]\n" ); CHECK_TRACE_CONTENTS( "parse: ingredient: {\"abc [def]\": \"literal-string\"}\n" ); } void test_string_literal_escaped() { load( "def main [\n" " 1:address:array:character <- copy [abc \\[def]\n" "]\n" ); CHECK_TRACE_CONTENTS( "parse: ingredient: {\"abc [def\": \"literal-string\"}\n" ); } void test_string_literal_escaped_twice() { load( "def main [\n" " 1:address:array:character <- copy [\n" "abc \\\\[def]\n" "]\n" ); CHECK_TRACE_CONTENTS( "parse: ingredient: {\"\\nabc \\[def\": \"literal-string\"}\n" ); } void test_string_literal_and_comment() { load( "def main [\n" " 1:address:array:character <- copy [abc] # comment\n" "]\n" ); CHECK_TRACE_CONTENTS( "parse: --- defining main\n" "parse: instruction: copy\n" "parse: number of ingredients: 1\n" "parse: ingredient: {\"abc\": \"literal-string\"}\n" "parse: product: {1: (\"address\" \"array\" \"character\")}\n" ); } void test_string_literal_escapes_newlines_in_trace() { load( "def main [\n" " copy [abc\n" "def]\n" "]\n" ); CHECK_TRACE_CONTENTS( "parse: ingredient: {\"abc\\ndef\": \"literal-string\"}\n" ); } void test_string_literal_can_skip_past_comments() { load( "def main [\n" " copy [\n" " # ']' inside comment\n" " bar\n" " ]\n" "]\n" ); CHECK_TRACE_CONTENTS( "parse: ingredient: {\"\\n # ']' inside comment\\n bar\\n \": \"literal-string\"}\n" ); } void test_string_literal_empty() { load( "def main [\n" " copy []\n" "]\n" ); CHECK_TRACE_CONTENTS( "parse: ingredient: {\"\": \"literal-string\"}\n" ); } void test_multiple_unfinished_recipes() { Hide_errors = true; load( "def f1 [\n" "def f2 [\n" ); CHECK_TRACE_CONTENTS( "error: unbalanced '['\n" ); }