about summary refs log tree commit diff stats
path: root/linkify
diff options
context:
space:
mode:
authorKartik Agaram <vc@akkartik.com>2019-12-07 16:19:38 -0800
committerKartik Agaram <vc@akkartik.com>2019-12-07 18:06:17 -0800
commit9e45cae061fd345d3270f236769bd94966a42eb2 (patch)
tree5459e3e692e039f0ce1663a60af99d6053ccbc0e /linkify
parent25636f70d0f116ef2f842e9ca25dfb781071cd2d (diff)
downloadmu-9e45cae061fd345d3270f236769bd94966a42eb2.tar.gz
5799 - move html-generation to `tools/` directory
Diffstat (limited to 'linkify')
-rw-r--r--linkify/Readme3
-rwxr-xr-xlinkify/build4
-rwxr-xr-xlinkify/clean4
-rw-r--r--linkify/linkify.cc267
4 files changed, 0 insertions, 278 deletions
diff --git a/linkify/Readme b/linkify/Readme
deleted file mode 100644
index c4625a9d..00000000
--- a/linkify/Readme
+++ /dev/null
@@ -1,3 +0,0 @@
-Tool used while rendering Mu's codebase in html. See the mu/update_html script.
-
-Extremely hacky; just see the number of tests.
diff --git a/linkify/build b/linkify/build
deleted file mode 100755
index 3c96ad00..00000000
--- a/linkify/build
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-set -e
-
-c++ -g linkify.cc -o linkify
diff --git a/linkify/clean b/linkify/clean
deleted file mode 100755
index 3feef907..00000000
--- a/linkify/clean
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-set -e
-
-rm -rf linkify *.dSYM
diff --git a/linkify/linkify.cc b/linkify/linkify.cc
deleted file mode 100644
index ece50748..00000000
--- a/linkify/linkify.cc
+++ /dev/null
@@ -1,267 +0,0 @@
-// Read a tabular cross-reference file generated by ctags, then read a list of
-// html files generated by Vim's TOhtml command on C++ code. Link words
-// in the html files to cross-references from ctags.
-
-// Usage:
-//    linkify [tags file] [html files]...
-
-// Still plenty of holes:
-// - unnecessarily linking definition location to itself
-//   - except SubX definitions, which start at start of line
-// - can't detect strings in spite of attempt to support them below, because
-//   Vim's generated html turns quotes into html entities
-// - distinguishing function and variable names
-// - distinguishing Mu code in C++ files
-// - distinguishing between function overloads
-//   - if there's duplicate tags we aren't smart enough to distinguish between
-//     them yet, so we simply don't add any link at all
-//   - but even that's not perfect, because sometimes the tags file has a
-//     single definition but there's still multiple overloads (say I defined
-//     'clear()' on some type, and it's already defined on STL classes)
-// - ctags misses some symbols in layered code
-
-#include<assert.h>
-
-#include<map>
-using std::map;
-
-#include<string>
-using std::string;
-
-#include<iostream>
-using std::istream;
-using std::cout;
-using std::cerr;
-
-#include<sstream>
-using std::istringstream;
-using std::ostringstream;
-
-#include<fstream>
-using std::ifstream;
-using std::ofstream;
-
-#include <locale>
-using std::isspace;  // unicode-aware
-
-struct syminfo {
-  string filename;
-  int line_num;
-  syminfo() :line_num(0) {}
-};
-
-bool has_data(istream& in) {
-  in.peek();
-  if (in.eof()) return false;
-  assert(in);
-  return true;
-}
-
-bool starts_with(const string& s, const string& pat) {
-  string::const_iterator a=s.begin(), b=pat.begin();
-  for (/*nada*/;  a!=s.end() && b!=pat.end();  ++a, ++b)
-    if (*a != *b) return false;
-  return b == pat.end();
-}
-
-bool ends_with(const string& s, const string& pat) {
-  string::const_reverse_iterator a=s.rbegin(), b=pat.rbegin();
-  for (/*nada*/;  a!=s.rend() && b!=pat.rend();  ++a, ++b)
-    if (*a != *b) return false;
-  return b == pat.rend();
-}
-
-void encode_some_html_entities(string& s) {
-  std::string::size_type pos = 0;
-  while (true) {
-    pos = s.find_first_of("<>", pos);
-    if (pos == std::string::npos) break;
-    std::string replacement;
-    switch (s.at(pos)) {
-      case '<': replacement = "&lt;"; break;
-      case '>': replacement = "&gt;"; break;
-    }
-    s.replace(pos, 1, replacement);
-    pos += replacement.size();
-  };
-}
-
-void read_tags(const string& filename, map<string, syminfo>& info) {
-  ifstream in(filename.c_str());
-//?   cerr << "reading " << filename << '\n';
-  string dummy;
-  while (has_data(in)) {
-    string symbol;  in >> symbol;
-    if (symbol == "operator") {
-      // unsupported
-      getline(in, dummy);  // skip
-      continue;
-    }
-    encode_some_html_entities(symbol);
-//?     cerr << symbol << '\n';
-    if (info.find(symbol) != info.end()) {
-      info[symbol].line_num = -1;
-      info[symbol].filename.clear();
-    }
-    else {
-      in >> dummy;
-      in >> info[symbol].line_num;
-      in >> info[symbol].filename;
-    }
-    getline(in, dummy);  // skip rest of line
-//?     cerr << symbol << ": " << info[symbol].filename << ':' << info[symbol].line_num << '\n';
-  }
-  in.close();
-}
-
-void replace_tags_in_file(const string& filename, const map<string, syminfo>& info) {
-//?   cerr << info.size() << " symbols\n";
-  ifstream in(filename.c_str());
-  ofstream out((filename+".out").c_str());
-  while (has_data(in)) {
-    // send lines that don't start with '<span' straight through
-    string line;
-    getline(in, line);
-    if (!starts_with(line, "<span ")) {
-      out << line << '\n';
-    }
-    else {
-      static int span_size = string("</span>").size();
-      int skip_first_span = line.find("</span>") + span_size;
-      out << line.substr(0, skip_first_span);
-      istringstream in2(line.substr(skip_first_span));
-      in2 >> std::noskipws;
-      // only in .subx files, refuse to linkify the first word on a line
-      bool at_start_of_line = ends_with(filename, ".subx.html");
-//?       cerr << filename << ": " << at_start_of_line << '\n';
-      while (has_data(in2)) {
-        if (isspace(in2.peek())) {
-//?           cerr << "space\n";
-          char c;  in2 >> c;
-          out << c;
-          at_start_of_line = false;
-        }
-        // within a line, send straight through all characters inside '<..>'
-        else if (in2.peek() == '<') {
-//?           cerr << "tag\n";
-          char c = '\0';
-          while (in2 >> c) {
-//?             cerr << "span: " << c << '\n';
-            out << c;
-            if (c == '>') break;
-          }
-          // don't include initial tag when computing 'at_start_of_line'
-//?           cerr << "end tag\n";
-        }
-        else {
-          // send straight through all characters inside strings (handling escapes)
-          char c = in2.get();
-          if (c == '"') {
-//?             cerr << "string\n";
-            out << c;
-            while (in2 >> c) {
-              out << c;
-              if (c == '\\') {
-                in2 >> c;  out << c;
-              }
-              else if (c == '"') {
-                break;
-              }
-            }
-            at_start_of_line = false;
-          }
-          else if (c == '\'') {
-//?             cerr << "character\n";
-            out << c;
-            while (in2 >> c) {
-              out << c;
-              if (c == '\\') {
-                in2 >> c;  out << c;
-              }
-              else if (c == '\'') {
-                break;
-              }
-            }
-            at_start_of_line = false;
-          }
-          // send straight through any characters after '#' (comments)
-          else if (c == '#') {
-//?             cerr << "comment\n";
-            out << c;
-            while (in2 >> c) out << c;
-            at_start_of_line = false;
-          }
-          // send straight through any characters after '//' (comments)
-          else if (c == '/' && in2.peek() == '/') {
-//?             cerr << "comment\n";
-            out << c;
-            while (in2 >> c) out << c;
-            at_start_of_line = false;
-          }
-          // send through open parens at start of line
-          else if (c == '(') {
-            out << c;
-            at_start_of_line = false;
-          }
-          else if (c == ')') {
-            out << c;
-            at_start_of_line = false;
-          }
-          else {
-//?             cerr << "rest\n";
-            if (c == ',' || c == ':') {
-              out << c;
-              at_start_of_line = false;
-              continue;
-            }
-            ostringstream out2;
-            out2 << c;
-            while (in2 >> c) {
-              if (isspace(c) || c == '<' || c == '"' || c == '\'' || c == '/' || c == ',' || c == ':' || c == '(' || c == ')') {  // keep sync'd with other clauses above
-                in2.putback(c);
-                break;
-              }
-              out2 << c;
-            }
-            string symbol = out2.str();
-            if (symbol == "equal" || symbol == "index" || symbol == "put-index" || symbol == "length") {
-//?               cerr << "  blacklisted\n";
-              out << symbol;
-            }
-            else if (info.find(symbol) == info.end()) {
-//?               cerr << "  no info\n";
-              out << symbol;
-            }
-            else {
-              const syminfo& s = info.find(symbol)->second;
-              if (s.filename.empty()) {
-//?                 cerr << "  empty info\n";
-                out << symbol;
-              }
-              else {
-                if (at_start_of_line) {
-//?                   cerr << "  at start of line; refusing to linkify " << symbol << "\n";
-                  out << symbol;
-                }
-                else {
-//?                   cerr << "  link\n";
-                  out << "<a href='" << s.filename << ".html#L" << s.line_num << "'>" << symbol << "</a>";
-                }
-              }
-            }
-          }  // end rest
-        }
-      }  // done parsing line
-      out << '\n';
-    }
-  }
-  in.close();  out.close();
-}
-
-int main(int argc, const char* argv[]) {
-  map<string, syminfo> info;
-  read_tags(argv[1], info);
-  for (int i = 2;  i < argc;  ++i)
-    replace_tags_in_file(argv[i], info);
-  return 0;
-}