diff options
author | Kartik Agaram <vc@akkartik.com> | 2019-12-07 16:19:38 -0800 |
---|---|---|
committer | Kartik Agaram <vc@akkartik.com> | 2019-12-07 18:06:17 -0800 |
commit | 9e45cae061fd345d3270f236769bd94966a42eb2 (patch) | |
tree | 5459e3e692e039f0ce1663a60af99d6053ccbc0e /linkify | |
parent | 25636f70d0f116ef2f842e9ca25dfb781071cd2d (diff) | |
download | mu-9e45cae061fd345d3270f236769bd94966a42eb2.tar.gz |
5799 - move html-generation to `tools/` directory
Diffstat (limited to 'linkify')
-rw-r--r-- | linkify/Readme | 3 | ||||
-rwxr-xr-x | linkify/build | 4 | ||||
-rwxr-xr-x | linkify/clean | 4 | ||||
-rw-r--r-- | linkify/linkify.cc | 267 |
4 files changed, 0 insertions, 278 deletions
diff --git a/linkify/Readme b/linkify/Readme deleted file mode 100644 index c4625a9d..00000000 --- a/linkify/Readme +++ /dev/null @@ -1,3 +0,0 @@ -Tool used while rendering Mu's codebase in html. See the mu/update_html script. - -Extremely hacky; just see the number of tests. diff --git a/linkify/build b/linkify/build deleted file mode 100755 index 3c96ad00..00000000 --- a/linkify/build +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh -set -e - -c++ -g linkify.cc -o linkify diff --git a/linkify/clean b/linkify/clean deleted file mode 100755 index 3feef907..00000000 --- a/linkify/clean +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh -set -e - -rm -rf linkify *.dSYM diff --git a/linkify/linkify.cc b/linkify/linkify.cc deleted file mode 100644 index ece50748..00000000 --- a/linkify/linkify.cc +++ /dev/null @@ -1,267 +0,0 @@ -// Read a tabular cross-reference file generated by ctags, then read a list of -// html files generated by Vim's TOhtml command on C++ code. Link words -// in the html files to cross-references from ctags. - -// Usage: -// linkify [tags file] [html files]... - -// Still plenty of holes: -// - unnecessarily linking definition location to itself -// - except SubX definitions, which start at start of line -// - can't detect strings in spite of attempt to support them below, because -// Vim's generated html turns quotes into html entities -// - distinguishing function and variable names -// - distinguishing Mu code in C++ files -// - distinguishing between function overloads -// - if there's duplicate tags we aren't smart enough to distinguish between -// them yet, so we simply don't add any link at all -// - but even that's not perfect, because sometimes the tags file has a -// single definition but there's still multiple overloads (say I defined -// 'clear()' on some type, and it's already defined on STL classes) -// - ctags misses some symbols in layered code - -#include<assert.h> - -#include<map> -using std::map; - -#include<string> -using std::string; - -#include<iostream> -using std::istream; -using std::cout; -using std::cerr; - -#include<sstream> -using std::istringstream; -using std::ostringstream; - -#include<fstream> -using std::ifstream; -using std::ofstream; - -#include <locale> -using std::isspace; // unicode-aware - -struct syminfo { - string filename; - int line_num; - syminfo() :line_num(0) {} -}; - -bool has_data(istream& in) { - in.peek(); - if (in.eof()) return false; - assert(in); - return true; -} - -bool starts_with(const string& s, const string& pat) { - string::const_iterator a=s.begin(), b=pat.begin(); - for (/*nada*/; a!=s.end() && b!=pat.end(); ++a, ++b) - if (*a != *b) return false; - return b == pat.end(); -} - -bool ends_with(const string& s, const string& pat) { - string::const_reverse_iterator a=s.rbegin(), b=pat.rbegin(); - for (/*nada*/; a!=s.rend() && b!=pat.rend(); ++a, ++b) - if (*a != *b) return false; - return b == pat.rend(); -} - -void encode_some_html_entities(string& s) { - std::string::size_type pos = 0; - while (true) { - pos = s.find_first_of("<>", pos); - if (pos == std::string::npos) break; - std::string replacement; - switch (s.at(pos)) { - case '<': replacement = "<"; break; - case '>': replacement = ">"; break; - } - s.replace(pos, 1, replacement); - pos += replacement.size(); - }; -} - -void read_tags(const string& filename, map<string, syminfo>& info) { - ifstream in(filename.c_str()); -//? cerr << "reading " << filename << '\n'; - string dummy; - while (has_data(in)) { - string symbol; in >> symbol; - if (symbol == "operator") { - // unsupported - getline(in, dummy); // skip - continue; - } - encode_some_html_entities(symbol); -//? cerr << symbol << '\n'; - if (info.find(symbol) != info.end()) { - info[symbol].line_num = -1; - info[symbol].filename.clear(); - } - else { - in >> dummy; - in >> info[symbol].line_num; - in >> info[symbol].filename; - } - getline(in, dummy); // skip rest of line -//? cerr << symbol << ": " << info[symbol].filename << ':' << info[symbol].line_num << '\n'; - } - in.close(); -} - -void replace_tags_in_file(const string& filename, const map<string, syminfo>& info) { -//? cerr << info.size() << " symbols\n"; - ifstream in(filename.c_str()); - ofstream out((filename+".out").c_str()); - while (has_data(in)) { - // send lines that don't start with '<span' straight through - string line; - getline(in, line); - if (!starts_with(line, "<span ")) { - out << line << '\n'; - } - else { - static int span_size = string("</span>").size(); - int skip_first_span = line.find("</span>") + span_size; - out << line.substr(0, skip_first_span); - istringstream in2(line.substr(skip_first_span)); - in2 >> std::noskipws; - // only in .subx files, refuse to linkify the first word on a line - bool at_start_of_line = ends_with(filename, ".subx.html"); -//? cerr << filename << ": " << at_start_of_line << '\n'; - while (has_data(in2)) { - if (isspace(in2.peek())) { -//? cerr << "space\n"; - char c; in2 >> c; - out << c; - at_start_of_line = false; - } - // within a line, send straight through all characters inside '<..>' - else if (in2.peek() == '<') { -//? cerr << "tag\n"; - char c = '\0'; - while (in2 >> c) { -//? cerr << "span: " << c << '\n'; - out << c; - if (c == '>') break; - } - // don't include initial tag when computing 'at_start_of_line' -//? cerr << "end tag\n"; - } - else { - // send straight through all characters inside strings (handling escapes) - char c = in2.get(); - if (c == '"') { -//? cerr << "string\n"; - out << c; - while (in2 >> c) { - out << c; - if (c == '\\') { - in2 >> c; out << c; - } - else if (c == '"') { - break; - } - } - at_start_of_line = false; - } - else if (c == '\'') { -//? cerr << "character\n"; - out << c; - while (in2 >> c) { - out << c; - if (c == '\\') { - in2 >> c; out << c; - } - else if (c == '\'') { - break; - } - } - at_start_of_line = false; - } - // send straight through any characters after '#' (comments) - else if (c == '#') { -//? cerr << "comment\n"; - out << c; - while (in2 >> c) out << c; - at_start_of_line = false; - } - // send straight through any characters after '//' (comments) - else if (c == '/' && in2.peek() == '/') { -//? cerr << "comment\n"; - out << c; - while (in2 >> c) out << c; - at_start_of_line = false; - } - // send through open parens at start of line - else if (c == '(') { - out << c; - at_start_of_line = false; - } - else if (c == ')') { - out << c; - at_start_of_line = false; - } - else { -//? cerr << "rest\n"; - if (c == ',' || c == ':') { - out << c; - at_start_of_line = false; - continue; - } - ostringstream out2; - out2 << c; - while (in2 >> c) { - if (isspace(c) || c == '<' || c == '"' || c == '\'' || c == '/' || c == ',' || c == ':' || c == '(' || c == ')') { // keep sync'd with other clauses above - in2.putback(c); - break; - } - out2 << c; - } - string symbol = out2.str(); - if (symbol == "equal" || symbol == "index" || symbol == "put-index" || symbol == "length") { -//? cerr << " blacklisted\n"; - out << symbol; - } - else if (info.find(symbol) == info.end()) { -//? cerr << " no info\n"; - out << symbol; - } - else { - const syminfo& s = info.find(symbol)->second; - if (s.filename.empty()) { -//? cerr << " empty info\n"; - out << symbol; - } - else { - if (at_start_of_line) { -//? cerr << " at start of line; refusing to linkify " << symbol << "\n"; - out << symbol; - } - else { -//? cerr << " link\n"; - out << "<a href='" << s.filename << ".html#L" << s.line_num << "'>" << symbol << "</a>"; - } - } - } - } // end rest - } - } // done parsing line - out << '\n'; - } - } - in.close(); out.close(); -} - -int main(int argc, const char* argv[]) { - map<string, syminfo> info; - read_tags(argv[1], info); - for (int i = 2; i < argc; ++i) - replace_tags_in_file(argv[i], info); - return 0; -} |