mu - Soul of a tiny new machine. More thorough tests → More comprehensible and rewrite-friendly software → More resilient society.

	Commit message (Expand)	Author	Age	Files	Lines
*	5149	Kartik Agaram	2019-05-10	1	-3/+8
*	5148	Kartik Agaram	2019-05-09	1	-9/+7
*	4996 - back on pack.subx	Kartik Agaram	2019-03-08	1	-3/+3
*	4987 - support `browse_trace` tool in SubX	Kartik Agaram	2019-02-25	1	-6/+7
*	4986 - spending some time improving SubX traces	Kartik Agaram	2019-02-23	1	-4/+7
*	4898	Kartik Agaram	2018-12-30	1	-1/+1
*	4883 - rudimentary memory allocator	Kartik Agaram	2018-12-28	1	-1/+1
*	4878	Kartik Agaram	2018-12-27	1	-1/+1
*	4868	Kartik Agaram	2018-12-16	1	-1/+1
*	4761	Kartik Agaram	2018-11-23	1	-4/+4
*	4723	Kartik Agaram	2018-10-24	1	-0/+9
*	4720	Kartik Agaram	2018-10-24	1	-2/+2
*	4661	Kartik Agaram	2018-10-04	1	-12/+10
*	4632	Kartik Agaram	2018-10-01	1	-3/+10
*	4614 - redo simulated RAM	Kartik Agaram	2018-09-29	1	-19/+20
*	4565	Kartik Agaram	2018-09-21	1	-2/+3
*	4537	Kartik Agaram	2018-09-07	1	-1/+0
*	4528 - commandline arguments working natively	Kartik Agaram	2018-08-31	1	-14/+5
*	4527 - reading commandline arguments	Kartik Agaram	2018-08-30	1	-4/+36
*	4519	Kartik Agaram	2018-08-13	1	-5/+5
*	4469	Kartik Agaram	2018-08-03	1	-0/+143

// Read a tabular cross-reference file generated by ctags, then read a list of // html files generated by Vim's TOhtml command on C++ code. Link words // in the html files to cross-references from ctags. // Usage: // linkify [tags file] [html files]... // Still plenty of holes: // - unnecessarily linking definition location to itself // - except SubX definitions, which start at start of line // - can't detect strings in spite of attempt to support them below, because // Vim's generated html turns quotes into html entities // - distinguishing function and variable names // - distinguishing Mu code in C++ files // - distinguishing between function overloads // - if there's duplicate tags we aren't smart enough to distinguish between // them yet, so we simply don't add any link at all // - but even that's not perfect, because sometimes the tags file has a // single definition but there's still multiple overloads (say I defined // 'clear()' on some type, and it's already defined on STL classes) // - ctags misses some symbols in layered code #include<assert.h> #include<map> using std::map; #include<string> using std::string; #include<iostream> using std::istream; using std::cout; using std::cerr; #include<sstream> using std::istringstream; using std::ostringstream; #include<fstream> using std::ifstream; using std::ofstream; #include <locale> using std::isspace; // unicode-aware struct syminfo { string filename; int line_num; syminfo() :line_num(0) {} }; bool has_data(istream& in) { in.peek(); if (in.eof()) return false; assert(in); return true; } bool starts_with(const string& s, const string& pat) { string::const_iterator a=s.begin(), b=pat.begin(); for (/*nada*/; a!=s.end() && b!=pat.end(); ++a, ++b) if (*a != *b) return false; return b == pat.end(); } bool ends_with(const string& s, const string& pat) { string::const_reverse_iterator a=s.rbegin(), b=pat.rbegin(); for (/*nada*/; a!=s.rend() && b!=pat.rend(); ++a, ++b) if (*a != *b) return false; return b == pat.rend(); } void encode_some_html_entities(string& s) { std::string::size_type pos = 0; while (true) { pos = s.find_first_of("<>", pos); if (pos == std::string::npos) break; std::string replacement; switch (s.at(pos)) { case '<': replacement = "<"; break; case '>': replacement = ">"; break; } s.replace(pos, 1, replacement); pos += replacement.size(); }; } void read_tags(const string& filename, map<string, syminfo>& info) { ifstream in(filename.c_str()); //? cerr << "reading " << filename << '\n'; string dummy; while (has_data(in)) { string symbol; in >> symbol; if (symbol == "operator") { // unsupported getline(in, dummy); // skip continue; } encode_some_html_entities(symbol); //? cerr << symbol << '\n'; if (info.find(symbol) != info.end()) { info[symbol].line_num = -1; info[symbol].filename.clear(); } else { in >> dummy; in >> info[symbol].line_num; in >> info[symbol].filename; } getline(in, dummy); // skip rest of line //? cerr << symbol << ": " << info[symbol].filename << ':' << info[symbol].line_num << '\n'; } in.close(); } void replace_tags_in_file(const string& filename, const map<string, syminfo>& info) { //? cerr << info.size() << " symbols\n"; ifstream in(filename.c_str()); ofstream out((filename+".out").c_str()); while (has_data(in)) { // send lines that don't start with '<span' straight through string line; getline(in, line); if (!starts_with(line, "<span ")) { out << line << '\n'; } else { static int span_size = string("</span>").size(); int skip_first_span = line.find("</span>") + span_size; out << line.substr(0, skip_first_span); istringstream in2(line.substr(skip_first_span)); in2 >> std::noskipws; // only in .subx files, refuse to linkify the first word on a line bool at_start_of_line = ends_with(filename, ".subx.html"); //? cerr << filename << ": " << at_start_of_line << '\n'; while (has_data(in2)) { if (isspace(in2.peek())) { //? cerr << "space\n"; char c; in2 >> c; out << c; at_start_of_line = false; } // within a line, send straight through all characters inside '<..>' else if (in2.peek() == '<') { //? cerr << "tag\n"; char c = '\0'; while (in2 >> c) { //? cerr << "span: " << c << '\n'; out << c; if (c == '>') break; } // don't include initial tag when computing 'at_start_of_line' //? cerr << "end tag\n"; } else { // send straight through all characters inside strings (handling escapes) char c = in2.get(); if (c == '"') { //? cerr << "string\n"; out << c; while (in2 >> c) { out << c; if (c == '\\') { in2 >> c; out << c; } else if (c == '"') { break; } } at_start_of_line = false; } else if (c == '\'') { //? cerr << "character\n"; out << c; while (in2 >> c) { out << c; if (c == '\\') { in2 >> c; out << c; } else if (c == '\'') { break; } } at_start_of_line = false; } // send straight through any characters after '#' (comments) else if (c == '#') { //? cerr << "comment\n"; out << c; while (in2 >> c) out << c; at_start_of_line = false; } // send straight through any characters after '//' (comments) else if (c == '/' && in2.peek() == '/') { //? cerr << "comment\n"; out << c; while (in2 >> c) out << c; at_start_of_line = false; } // send through open parens at start of line else if (c == '(') { out << c; at_start_of_line = false; } else if (c == ')') { out << c; at_start_of_line = false; } else { //? cerr << "rest\n"; if (c == ',' || c == ':') { out << c; at_start_of_line = false; continue; } ostringstream out2; out2 << c; while (in2 >> c) { if (isspace(c) || c == '<' || c == '"' || c == '\'' || c == '/' || c == ',' || c == ':' || c == '(' || c == ')') { // keep sync'd with other clauses above in2.putback(c); break; } out2 << c; } string symbol = out2.str(); if (symbol == "equal" || symbol == "index" || symbol == "put-index" || symbol == "length") { //? cerr << " blacklisted\n"; out << symbol; } else if (info.find(symbol) == info.end()) { //? cerr << " no info\n"; out << symbol; } else { const syminfo& s = info.find(symbol)->second; if (s.filename.empty()) { //? cerr << " empty info\n"; out << symbol; } else { if (at_start_of_line) { //? cerr << " at start of line; refusing to linkify " << symbol << "\n"; out << symbol; } else { //? cerr << " link\n"; out << "<a href='" << s.filename << ".html#L" << s.line_num << "'>" << symbol << "</a>"; } } } } // end rest } } // done parsing line out << '\n'; } } in.close(); out.close(); } int main(int argc, const char* argv[]) { map<string, syminfo> info; read_tags(argv[1], info); for (int i = 2; i < argc; ++i) replace_tags_in_file(argv[i], info); return 0; }