diff options
Diffstat (limited to 'tools')
-rw-r--r-- | tools/Readme.md | 5 | ||||
-rw-r--r-- | tools/linkify.cc | 267 | ||||
-rwxr-xr-x | tools/update_html | 73 |
3 files changed, 344 insertions, 1 deletions
diff --git a/tools/Readme.md b/tools/Readme.md index 2649ef72..29a1a6ef 100644 --- a/tools/Readme.md +++ b/tools/Readme.md @@ -12,8 +12,11 @@ These are built automatically. These are built lazily. +* `linkify`: inserts hyperlinks from variables to definitions in Mu's html + sources. Hacky; just see the number of tests. Invoked by `update_html`. + * `treeshake_all`: rebuild SubX binaries without tests and unused functions. - Pretty hacky; just helps estimate the code needed to perform various tasks. + Hacky; just helps estimate the code needed to perform various tasks. ``` tools/treeshake_all ``` diff --git a/tools/linkify.cc b/tools/linkify.cc new file mode 100644 index 00000000..ece50748 --- /dev/null +++ b/tools/linkify.cc @@ -0,0 +1,267 @@ +// Read a tabular cross-reference file generated by ctags, then read a list of +// html files generated by Vim's TOhtml command on C++ code. Link words +// in the html files to cross-references from ctags. + +// Usage: +// linkify [tags file] [html files]... + +// Still plenty of holes: +// - unnecessarily linking definition location to itself +// - except SubX definitions, which start at start of line +// - can't detect strings in spite of attempt to support them below, because +// Vim's generated html turns quotes into html entities +// - distinguishing function and variable names +// - distinguishing Mu code in C++ files +// - distinguishing between function overloads +// - if there's duplicate tags we aren't smart enough to distinguish between +// them yet, so we simply don't add any link at all +// - but even that's not perfect, because sometimes the tags file has a +// single definition but there's still multiple overloads (say I defined +// 'clear()' on some type, and it's already defined on STL classes) +// - ctags misses some symbols in layered code + +#include<assert.h> + +#include<map> +using std::map; + +#include<string> +using std::string; + +#include<iostream> +using std::istream; +using std::cout; +using std::cerr; + +#include<sstream> +using std::istringstream; +using std::ostringstream; + +#include<fstream> +using std::ifstream; +using std::ofstream; + +#include <locale> +using std::isspace; // unicode-aware + +struct syminfo { + string filename; + int line_num; + syminfo() :line_num(0) {} +}; + +bool has_data(istream& in) { + in.peek(); + if (in.eof()) return false; + assert(in); + return true; +} + +bool starts_with(const string& s, const string& pat) { + string::const_iterator a=s.begin(), b=pat.begin(); + for (/*nada*/; a!=s.end() && b!=pat.end(); ++a, ++b) + if (*a != *b) return false; + return b == pat.end(); +} + +bool ends_with(const string& s, const string& pat) { + string::const_reverse_iterator a=s.rbegin(), b=pat.rbegin(); + for (/*nada*/; a!=s.rend() && b!=pat.rend(); ++a, ++b) + if (*a != *b) return false; + return b == pat.rend(); +} + +void encode_some_html_entities(string& s) { + std::string::size_type pos = 0; + while (true) { + pos = s.find_first_of("<>", pos); + if (pos == std::string::npos) break; + std::string replacement; + switch (s.at(pos)) { + case '<': replacement = "<"; break; + case '>': replacement = ">"; break; + } + s.replace(pos, 1, replacement); + pos += replacement.size(); + }; +} + +void read_tags(const string& filename, map<string, syminfo>& info) { + ifstream in(filename.c_str()); +//? cerr << "reading " << filename << '\n'; + string dummy; + while (has_data(in)) { + string symbol; in >> symbol; + if (symbol == "operator") { + // unsupported + getline(in, dummy); // skip + continue; + } + encode_some_html_entities(symbol); +//? cerr << symbol << '\n'; + if (info.find(symbol) != info.end()) { + info[symbol].line_num = -1; + info[symbol].filename.clear(); + } + else { + in >> dummy; + in >> info[symbol].line_num; + in >> info[symbol].filename; + } + getline(in, dummy); // skip rest of line +//? cerr << symbol << ": " << info[symbol].filename << ':' << info[symbol].line_num << '\n'; + } + in.close(); +} + +void replace_tags_in_file(const string& filename, const map<string, syminfo>& info) { +//? cerr << info.size() << " symbols\n"; + ifstream in(filename.c_str()); + ofstream out((filename+".out").c_str()); + while (has_data(in)) { + // send lines that don't start with '<span' straight through + string line; + getline(in, line); + if (!starts_with(line, "<span ")) { + out << line << '\n'; + } + else { + static int span_size = string("</span>").size(); + int skip_first_span = line.find("</span>") + span_size; + out << line.substr(0, skip_first_span); + istringstream in2(line.substr(skip_first_span)); + in2 >> std::noskipws; + // only in .subx files, refuse to linkify the first word on a line + bool at_start_of_line = ends_with(filename, ".subx.html"); +//? cerr << filename << ": " << at_start_of_line << '\n'; + while (has_data(in2)) { + if (isspace(in2.peek())) { +//? cerr << "space\n"; + char c; in2 >> c; + out << c; + at_start_of_line = false; + } + // within a line, send straight through all characters inside '<..>' + else if (in2.peek() == '<') { +//? cerr << "tag\n"; + char c = '\0'; + while (in2 >> c) { +//? cerr << "span: " << c << '\n'; + out << c; + if (c == '>') break; + } + // don't include initial tag when computing 'at_start_of_line' +//? cerr << "end tag\n"; + } + else { + // send straight through all characters inside strings (handling escapes) + char c = in2.get(); + if (c == '"') { +//? cerr << "string\n"; + out << c; + while (in2 >> c) { + out << c; + if (c == '\\') { + in2 >> c; out << c; + } + else if (c == '"') { + break; + } + } + at_start_of_line = false; + } + else if (c == '\'') { +//? cerr << "character\n"; + out << c; + while (in2 >> c) { + out << c; + if (c == '\\') { + in2 >> c; out << c; + } + else if (c == '\'') { + break; + } + } + at_start_of_line = false; + } + // send straight through any characters after '#' (comments) + else if (c == '#') { +//? cerr << "comment\n"; + out << c; + while (in2 >> c) out << c; + at_start_of_line = false; + } + // send straight through any characters after '//' (comments) + else if (c == '/' && in2.peek() == '/') { +//? cerr << "comment\n"; + out << c; + while (in2 >> c) out << c; + at_start_of_line = false; + } + // send through open parens at start of line + else if (c == '(') { + out << c; + at_start_of_line = false; + } + else if (c == ')') { + out << c; + at_start_of_line = false; + } + else { +//? cerr << "rest\n"; + if (c == ',' || c == ':') { + out << c; + at_start_of_line = false; + continue; + } + ostringstream out2; + out2 << c; + while (in2 >> c) { + if (isspace(c) || c == '<' || c == '"' || c == '\'' || c == '/' || c == ',' || c == ':' || c == '(' || c == ')') { // keep sync'd with other clauses above + in2.putback(c); + break; + } + out2 << c; + } + string symbol = out2.str(); + if (symbol == "equal" || symbol == "index" || symbol == "put-index" || symbol == "length") { +//? cerr << " blacklisted\n"; + out << symbol; + } + else if (info.find(symbol) == info.end()) { +//? cerr << " no info\n"; + out << symbol; + } + else { + const syminfo& s = info.find(symbol)->second; + if (s.filename.empty()) { +//? cerr << " empty info\n"; + out << symbol; + } + else { + if (at_start_of_line) { +//? cerr << " at start of line; refusing to linkify " << symbol << "\n"; + out << symbol; + } + else { +//? cerr << " link\n"; + out << "<a href='" << s.filename << ".html#L" << s.line_num << "'>" << symbol << "</a>"; + } + } + } + } // end rest + } + } // done parsing line + out << '\n'; + } + } + in.close(); out.close(); +} + +int main(int argc, const char* argv[]) { + map<string, syminfo> info; + read_tags(argv[1], info); + for (int i = 2; i < argc; ++i) + replace_tags_in_file(argv[i], info); + return 0; +} diff --git a/tools/update_html b/tools/update_html new file mode 100755 index 00000000..478d5d6c --- /dev/null +++ b/tools/update_html @@ -0,0 +1,73 @@ +#!/bin/bash +# Regenerate html files. +# If given a single argument, regenerate just that file. + +set -e + +( cd tools; c++ -g linkify.cc -o linkify; ) + +# generate html/$1.html using /tmp/tags +process() { + rm -f html/$1.html + convert_html $1 + tools/linkify /tmp/tags html/$1.html + mv html/$1.html.out html/$1.html +} + +URL_BASE='https://github.com/akkartik/mu/blob/master' + +convert_html() { + vim -c "set number" -c TOhtml -c write -c qall $1 + + sed -i 's,<title>.*/mu/,<title>Mu - ,' $1.html + sed -i 's,\.html</title>,</title>,' $1.html + + sed -i "/^<body/a <a href='$URL_BASE/$1'>$URL_BASE/$1</a>" $1.html + + sed -i 's/^\* { \(.*\) }/* { font-size:12pt; \1 }/g' $1.html + sed -i 's/^body { \(.*\) }/body { font-size:12pt; \1 }/g' $1.html + + sed -i '/^body {/a a { color:inherit; }' $1.html + + # switch unicode characters around in the rendered html + # the ones we have in the source files render double-wide in html + # the ones we want in the html cause iTerm2 to slow down in alt-tabbing for some reason + # the following commands give us the best of both worlds + sed -i -e 's/┈/╌/g' -e 's/┊/╎/g' $1.html + + mv -i $1.html html/`dirname $1` +} + +ctags -x *.cc |grep -v '^. ' > /tmp/tags # don't hyperlink every 'i' to the integer register variant +for f in *.cc +do + test $# -gt 0 && test $1 != $f && continue + process $f +done + +for f in examples/*.subx +do + test $# -gt 0 && test $1 != $f && continue + ( cd examples + ctags -x `basename $f` > /tmp/tags + ) + process $f +done + +ctags -x *.subx > /tmp/tags +for f in *.subx +do + test $# -gt 0 && test $1 != $f && continue + process $f +done + +for f in apps/*.subx +do + test $# -gt 0 && test $1 != $f && continue + ( cd apps + ctags -x ../*.subx `basename $f` > /tmp/tags + ) + process $f +done + +rm /tmp/tags |