From 58f19b4c41f5c4ca9fec1515fe5e24732eda19b7 Mon Sep 17 00:00:00 2001
From: "Kartik K. Agaram" <vc@akkartik.com>
Date: Tue, 3 Jan 2017 22:10:09 -0800
Subject: 3729

---
 linkify/000test.cc       |  26 ------
 linkify/001trace.cc      | 139 ----------------------------
 linkify/001trace.test.cc |  88 ------------------
 linkify/002main.cc       |  50 -----------
 linkify/003linkify.cc    | 206 ------------------------------------------
 linkify/Readme           |   2 +
 linkify/boot.cc          |  41 ---------
 linkify/build            |   8 +-
 linkify/clean            |   2 +-
 linkify/linkify.cc       | 230 +++++++++++++++++++++++++++++++++++++++++++++++
 10 files changed, 234 insertions(+), 558 deletions(-)
 delete mode 100644 linkify/000test.cc
 delete mode 100644 linkify/001trace.cc
 delete mode 100644 linkify/001trace.test.cc
 delete mode 100644 linkify/002main.cc
 delete mode 100644 linkify/003linkify.cc
 delete mode 100644 linkify/boot.cc
 create mode 100644 linkify/linkify.cc

(limited to 'linkify')

diff --git a/linkify/000test.cc b/linkify/000test.cc
deleted file mode 100644
index 2754b254..00000000
--- a/linkify/000test.cc
+++ /dev/null
@@ -1,26 +0,0 @@
-typedef void (*test_fn)(void);
-
-const test_fn Tests[] = {
-  #include "test_list"  // auto-generated; see makefile
-};
-
-bool Passed = true;
-
-long Num_failures = 0;
-
-#define CHECK(X) \
-  if (!(X)) { \
-    ++Num_failures; \
-    cerr << "\nF " << __FUNCTION__ << "(" << __FILE__ << ":" << __LINE__ << "): " << #X << '\n'; \
-    Passed = false; \
-    return; \
-  }
-
-#define CHECK_EQ(X, Y) \
-  if ((X) != (Y)) { \
-    ++Num_failures; \
-    cerr << "\nF " << __FUNCTION__ << "(" << __FILE__ << ":" << __LINE__ << "): " << #X << " == " << #Y << '\n'; \
-    cerr << "  got " << (X) << '\n';  /* BEWARE: multiple eval */ \
-    Passed = false; \
-    return; \
-  }
diff --git a/linkify/001trace.cc b/linkify/001trace.cc
deleted file mode 100644
index e78c0010..00000000
--- a/linkify/001trace.cc
+++ /dev/null
@@ -1,139 +0,0 @@
-bool Hide_warnings = false;
-
-struct trace_stream {
-  vector<pair<string, string> > past_lines;  // [(layer label, line)]
-  // accumulator for current line
-  ostringstream* curr_stream;
-  string curr_layer;
-  trace_stream() :curr_stream(NULL) {}
-  ~trace_stream() { if (curr_stream) delete curr_stream; }
-
-  ostringstream& stream(string layer) {
-    newline();
-    curr_stream = new ostringstream;
-    curr_layer = layer;
-    return *curr_stream;
-  }
-
-  // be sure to call this before messing with curr_stream or curr_layer
-  void newline() {
-    if (!curr_stream) return;
-    string curr_contents = curr_stream->str();
-    curr_contents.erase(curr_contents.find_last_not_of("\r\n")+1);
-    past_lines.push_back(pair<string, string>(curr_layer, curr_contents));
-    delete curr_stream;
-    curr_stream = NULL;
-  }
-
-  string readable_contents(string layer) {  // missing layer = everything
-    newline();
-    ostringstream output;
-    for (vector<pair<string, string> >::iterator p = past_lines.begin(); p != past_lines.end(); ++p)
-      if (layer.empty() || layer == p->first)
-        output << p->first << ": " << with_newline(p->second);
-    return output.str();
-  }
-
-  string with_newline(string s) {
-    if (s[s.size()-1] != '\n') return s+'\n';
-    return s;
-  }
-};
-
-trace_stream* Trace_stream = NULL;
-
-// Top-level helper. IMPORTANT: can't nest.
-#define trace(layer)  !Trace_stream ? cerr /*print nothing*/ : Trace_stream->stream(layer)
-// Warnings should go straight to cerr by default since calls to trace() have
-// some unfriendly constraints (they delay printing, they can't nest)
-#define raise  ((!Trace_stream || !Hide_warnings) ? cerr /*do print*/ : Trace_stream->stream("warn")) << __FILE__ << ":" << __LINE__ << " "
-
-// raise << die exits after printing -- unless Hide_warnings is set.
-struct die {};
-ostream& operator<<(ostream& os, __attribute__((unused)) die) {
-  if (Hide_warnings) return os;
-  os << "dying\n";
-  exit(1);
-}
-
-#define CLEAR_TRACE  delete Trace_stream, Trace_stream = new trace_stream;
-
-#define DUMP(layer)  cerr << Trace_stream->readable_contents(layer)
-
-// Trace_stream is a resource, lease_tracer uses RAII to manage it.
-struct lease_tracer {
-  lease_tracer() { Trace_stream = new trace_stream; }
-  ~lease_tracer() { delete Trace_stream, Trace_stream = NULL; }
-};
-
-#define START_TRACING_UNTIL_END_OF_SCOPE  lease_tracer leased_tracer;
-
-bool check_trace_contents(string FUNCTION, string FILE, int LINE, string layer, string expected) {  // empty layer == everything
-  vector<string> expected_lines = split(expected, "");
-  size_t curr_expected_line = 0;
-  while (curr_expected_line < expected_lines.size() && expected_lines[curr_expected_line].empty())
-    ++curr_expected_line;
-  if (curr_expected_line == expected_lines.size()) return true;
-  Trace_stream->newline();
-  ostringstream output;
-  for (vector<pair<string, string> >::iterator p = Trace_stream->past_lines.begin(); p != Trace_stream->past_lines.end(); ++p) {
-    if (!layer.empty() && layer != p->first)
-      continue;
-    if (p->second != expected_lines[curr_expected_line])
-      continue;
-    ++curr_expected_line;
-    while (curr_expected_line < expected_lines.size() && expected_lines[curr_expected_line].empty())
-      ++curr_expected_line;
-    if (curr_expected_line == expected_lines.size()) return true;
-  }
-
-  ++Num_failures;
-  cerr << "\nF " << FUNCTION << "(" << FILE << ":" << LINE << "): missing [" << expected_lines[curr_expected_line] << "] in trace:\n";
-  DUMP(layer);
-  Passed = false;
-  return false;
-}
-
-#define CHECK_TRACE_CONTENTS(...)  check_trace_contents(__FUNCTION__, __FILE__, __LINE__, __VA_ARGS__)
-
-int trace_count(string layer, string line) {
-  Trace_stream->newline();
-  long result = 0;
-  for (vector<pair<string, string> >::iterator p = Trace_stream->past_lines.begin(); p != Trace_stream->past_lines.end(); ++p) {
-    if (layer == p->first)
-      if (line == "" || p->second == line)
-        ++result;
-  }
-  return result;
-}
-
-#define CHECK_TRACE_WARNS()  CHECK(trace_count("warn", "") > 0)
-#define CHECK_TRACE_DOESNT_WARN() \
-  if (trace_count("warn") > 0) { \
-    ++Num_failures; \
-    cerr << "\nF " << __FUNCTION__ << "(" << __FILE__ << ":" << __LINE__ << "): unexpected warnings\n"; \
-    DUMP("warn"); \
-    Passed = false; \
-    return; \
-  }
-
-bool trace_doesnt_contain(string layer, string line) {
-  return trace_count(layer, line) == 0;
-}
-
-#define CHECK_TRACE_DOESNT_CONTAIN(...)  CHECK(trace_doesnt_contain(__VA_ARGS__))
-
-vector<string> split(string s, string delim) {
-  vector<string> result;
-  string::size_type begin=0, end=s.find(delim);
-  while (true) {
-    if (end == string::npos) {
-      result.push_back(string(s, begin, string::npos));
-      break;
-    }
-    result.push_back(string(s, begin, end-begin));
-    begin = end+delim.size();
-    end = s.find(delim, begin);
-  }
-  return result;
-}
diff --git a/linkify/001trace.test.cc b/linkify/001trace.test.cc
deleted file mode 100644
index 5f0a696e..00000000
--- a/linkify/001trace.test.cc
+++ /dev/null
@@ -1,88 +0,0 @@
-void test_trace_check_compares() {
-  CHECK_TRACE_CONTENTS("test layer", "");
-  trace("test layer") << "foo";
-  CHECK_TRACE_CONTENTS("test layer", "foo");
-}
-
-void test_trace_check_filters_layers() {
-  trace("test layer 1") << "foo";
-  trace("test layer 2") << "bar";
-  CHECK_TRACE_CONTENTS("test layer 1", "foo");
-}
-
-void test_trace_check_ignores_other_lines() {
-  trace("test layer 1") << "foo";
-  trace("test layer 1") << "bar";
-  CHECK_TRACE_CONTENTS("test layer 1", "foo");
-}
-
-void test_trace_check_always_finds_empty_lines() {
-  CHECK_TRACE_CONTENTS("test layer 1", "");
-}
-
-void test_trace_check_treats_empty_layers_as_wildcards() {
-  trace("test layer 1") << "foo";
-  CHECK_TRACE_CONTENTS("", "foo");
-}
-
-void test_trace_check_multiple_lines_at_once() {
-  trace("test layer 1") << "foo";
-  trace("test layer 2") << "bar";
-  CHECK_TRACE_CONTENTS("", "foobar");
-}
-
-void test_trace_check_always_finds_empty_lines2() {
-  CHECK_TRACE_CONTENTS("test layer 1", "");
-}
-
-void test_trace_orders_across_layers() {
-  trace("test layer 1") << "foo";
-  trace("test layer 2") << "bar";
-  trace("test layer 1") << "qux";
-  CHECK_TRACE_CONTENTS("", "foobarqux");
-}
-
-void test_trace_supports_count() {
-  trace("test layer 1") << "foo";
-  trace("test layer 1") << "foo";
-  CHECK_EQ(trace_count("test layer 1", "foo"), 2);
-}
-
-//// helpers
-
-// can't check trace because trace methods call 'split'
-
-void test_split_returns_at_least_one_elem() {
-  vector<string> result = split("", ",");
-  CHECK_EQ(result.size(), 1);
-  CHECK_EQ(result[0], "");
-}
-
-void test_split_returns_entire_input_when_no_delim() {
-  vector<string> result = split("abc", ",");
-  CHECK_EQ(result.size(), 1);
-  CHECK_EQ(result[0], "abc");
-}
-
-void test_split_works() {
-  vector<string> result = split("abc,def", ",");
-  CHECK_EQ(result.size(), 2);
-  CHECK_EQ(result[0], "abc");
-  CHECK_EQ(result[1], "def");
-}
-
-void test_split_works2() {
-  vector<string> result = split("abc,def,ghi", ",");
-  CHECK_EQ(result.size(), 3);
-  CHECK_EQ(result[0], "abc");
-  CHECK_EQ(result[1], "def");
-  CHECK_EQ(result[2], "ghi");
-}
-
-void test_split_handles_multichar_delim() {
-  vector<string> result = split("abc,,def,,ghi", ",,");
-  CHECK_EQ(result.size(), 3);
-  CHECK_EQ(result[0], "abc");
-  CHECK_EQ(result[1], "def");
-  CHECK_EQ(result[2], "ghi");
-}
diff --git a/linkify/002main.cc b/linkify/002main.cc
deleted file mode 100644
index a4f5b273..00000000
--- a/linkify/002main.cc
+++ /dev/null
@@ -1,50 +0,0 @@
-int main(int argc, const char* argv[]) {
-  if (flag("test", argc, argv))
-    return run_tests();
-  return linkify(argc, argv);
-}
-
-bool flag(const string& flag, int argc, const char* argv[]) {
-  for (int i = 1; i < argc; ++i)
-    if (string(argv[i]) == flag)
-      return true;
-  return false;
-}
-
-string flag_value(const string& flag, int argc, const char* argv[]) {
-  for (int i = 1; i < argc-1; ++i)
-    if (string(argv[i]) == flag)
-      return argv[i+1];
-  return "";
-}
-
-//// test harness
-
-int run_tests() {
-  for (unsigned long i=0; i < sizeof(Tests)/sizeof(Tests[0]); ++i) {
-    START_TRACING_UNTIL_END_OF_SCOPE;
-    setup();
-    (*Tests[i])();
-    verify();
-  }
-
-  cerr << '\n';
-  if (Num_failures > 0)
-    cerr << Num_failures << " failure"
-         << (Num_failures > 1 ? "s" : "")
-         << '\n';
-  return Num_failures;
-}
-
-void verify() {
-  Hide_warnings = false;
-  if (!Passed)
-    ;
-  else
-    cerr << ".";
-}
-
-void setup() {
-  Hide_warnings = false;
-  Passed = true;
-}
diff --git a/linkify/003linkify.cc b/linkify/003linkify.cc
deleted file mode 100644
index 3bb77401..00000000
--- a/linkify/003linkify.cc
+++ /dev/null
@@ -1,206 +0,0 @@
-// Read a tabular cross-reference file generated by ctags, then read a list of
-// html files generated by Vim's TOhtml command on C++ code. Link words
-// in the html files to cross-references from ctags.
-
-// Usage:
-//    linkify [tags file] [html files]...
-
-// Still plenty of holes:
-// - unnecessarily linking definition location to itself
-// - can't detect strings in spite of attempt to support them below, because
-//   Vim's generated html turns quotes into html entities
-// - distinguishing function and variable names
-// - distinguishing Mu code in C++ files
-// - distinguishing between function overloads
-//   - if there's duplicate tags we aren't smart enough to distinguish between
-//     them yet, so we simply don't add any link at all
-//   - but even that's not perfect, because sometimes the tags file has a
-//     single definition but there's still multiple overloads (say I defined
-//     'clear()' on some type, and it's already defined on STL classes)
-// - ctags misses some symbols in layered code
-
-struct syminfo {
-  string filename;
-  int line_num;
-  syminfo() :line_num(0) {}
-};
-
-bool has_data(istream& in) {
-  in.peek();
-  if (in.eof()) return false;
-  assert(in);
-  return true;
-}
-
-bool starts_with(const string& s, const string& pat) {
-  string::const_iterator a=s.begin(), b=pat.begin();
-  for (/*nada*/;  a!=s.end() && b!=pat.end();  ++a, ++b)
-    if (*a != *b) return false;
-  return b == pat.end();
-}
-
-void encode_some_html_entities(string& s) {
-  std::string::size_type pos = 0;
-  while (true) {
-    pos = s.find_first_of("<>", pos);
-    if (pos == std::string::npos) break;
-    std::string replacement;
-    switch (s.at(pos)) {
-      case '<': replacement = "&lt;"; break;
-      case '>': replacement = "&gt;"; break;
-    }
-    s.replace(pos, 1, replacement);
-    pos += replacement.size();
-  };
-}
-
-void read_tags(const string& filename, map<string, syminfo>& info) {
-  ifstream in(filename);
-//?   cerr << "reading " << filename << '\n';
-  string dummy;
-  while (has_data(in)) {
-    string symbol;  in >> symbol;
-    encode_some_html_entities(symbol);
-//?     cerr << symbol << '\n';
-    if (info.find(symbol) != info.end()) {
-      info[symbol].line_num = -1;
-      info[symbol].filename.clear();
-    }
-    else {
-      in >> dummy;
-      in >> info[symbol].line_num;
-      in >> info[symbol].filename;
-    }
-    getline(in, dummy);  // skip rest of line
-//?     cerr << symbol << ": " << info[symbol].filename << ':' << info[symbol].line_num << '\n';
-  }
-  in.close();
-}
-
-void replace_tags_in_file(const string& filename, const map<string, syminfo>& info) {
-//?   cerr << info.size() << " symbols\n";
-  ifstream in(filename);
-  ofstream out(filename+".out");
-  while (has_data(in)) {
-    // send lines that don't start with '<span' straight through
-    string line;
-    getline(in, line);
-    if (!starts_with(line, "<span ")) {
-      out << line << '\n';
-    }
-    else {
-      static int span_size = string("</span>").size();
-      int skip_first_span = line.find("</span>") + span_size;
-      out << line.substr(0, skip_first_span);
-      istringstream in2(line.substr(skip_first_span));
-      in2 >> std::noskipws;
-      while (has_data(in2)) {
-        if (isspace(in2.peek())) {
-//?           cerr << "space\n";
-          char c;  in2 >> c;
-          out << c;
-        }
-        // within a line, send straight through all characters inside '<..>'
-        else if (in2.peek() == '<') {
-//?           cerr << "tag\n";
-          char c = '\0';
-          while (in2 >> c) {
-//?             cerr << "span: " << c << '\n';
-            out << c;
-            if (c == '>') break;
-          }
-//?           cerr << "end tag\n";
-        }
-        else {
-          // send straight through all characters inside strings (handling escapes)
-          char c = in2.get();
-          if (c == '"') {
-//?             cerr << "string\n";
-            out << c;
-            while (in2 >> c) {
-              out << c;
-              if (c == '\\') {
-                in2 >> c;  out << c;
-              }
-              else if (c == '"') {
-                break;
-              }
-            }
-          }
-          else if (c == '\'') {
-//?             cerr << "character\n";
-            out << c;
-            while (in2 >> c) {
-              out << c;
-              if (c == '\\') {
-                in2 >> c;  out << c;
-              }
-              else if (c == '\'') {
-                break;
-              }
-            }
-          }
-          // send straight through any characters after '//' (comments)
-          else if (c == '#') {
-//?             cerr << "comment\n";
-            out << c;
-            while (in2 >> c) out << c;
-          }
-          // send straight through any characters after '//' (comments)
-          else if (c == '/' && in2.peek() == '/') {
-//?             cerr << "comment\n";
-            out << c;
-            while (in2 >> c) out << c;
-          }
-          else {
-//?             cerr << "rest\n";
-            if (c == ',' || c == ':') {
-              out << c;
-              continue;
-            }
-            ostringstream out2;
-            out2 << c;
-            while (in2 >> c) {
-              if (isspace(c) || c == '<' || c == '"' || c == '\'' || c == '/' || c == ',' || c == ':') {  // keep sync'd with other clauses above
-                in2.putback(c);
-                break;
-              }
-              out2 << c;
-            }
-            string symbol = out2.str();
-            if (symbol == "equal" || symbol == "index" || symbol == "put-index" || symbol == "length") {
-//?               cerr << "  blacklisted\n";
-              out << symbol;
-            }
-            else if (info.find(symbol) == info.end()) {
-//?               cerr << "  no info\n";
-              out << symbol;
-            }
-            else {
-              const syminfo& s = info.find(symbol)->second;
-              if (s.filename.empty()) {
-//?                 cerr << "  empty info\n";
-                out << symbol;
-              }
-              else {
-//?                 cerr << "  link\n";
-                out << "<a href='" << s.filename << ".html#L" << s.line_num << "'>" << symbol << "</a>";
-              }
-            }
-          }  // end rest
-        }
-      }  // done parsing line
-      out << '\n';
-    }
-  }
-  in.close();  out.close();
-}
-
-int linkify(int argc, const char* argv[]) {
-  map<string, syminfo> info;
-  read_tags(argv[1], info);
-  for (int i = 2;  i < argc;  ++i) {
-    replace_tags_in_file(argv[i], info);
-  }
-  return 0;
-}
diff --git a/linkify/Readme b/linkify/Readme
index c88928b1..c4625a9d 100644
--- a/linkify/Readme
+++ b/linkify/Readme
@@ -1 +1,3 @@
 Tool used while rendering Mu's codebase in html. See the mu/update_html script.
+
+Extremely hacky; just see the number of tests.
diff --git a/linkify/boot.cc b/linkify/boot.cc
deleted file mode 100644
index d3b59a7c..00000000
--- a/linkify/boot.cc
+++ /dev/null
@@ -1,41 +0,0 @@
-#include<assert.h>
-#include<cstdlib>
-#include<cstring>
-
-#include<vector>
-using std::vector;
-#include<list>
-using std::list;
-#include<map>
-using std::map;
-#include<utility>
-using std::pair;
-
-#include<string>
-using std::string;
-
-#include<iostream>
-using std::istream;
-using std::ostream;
-using std::cin;
-using std::cout;
-using std::cerr;
-
-#include<sstream>
-using std::istringstream;
-using std::ostringstream;
-
-#include<fstream>
-using std::ifstream;
-using std::ofstream;
-
-#include <locale>
-using std::isspace;  // unicode-aware
-
-#include "type_list"
-
-#include "function_list"
-
-#include "file_list"
-
-#include "test_file_list"
diff --git a/linkify/build b/linkify/build
index da6c3ff4..54c3f459 100755
--- a/linkify/build
+++ b/linkify/build
@@ -1,9 +1,3 @@
 #!/bin/sh
 
-grep -h "^struct .* {" [0-9]*.cc  |sed 's/\(struct *[^ ]*\).*/\1;/'  > type_list
-grep -h "^typedef " [0-9]*.cc  >> type_list
-grep -h "^[^ #].*) {" [0-9]*.cc  |sed 's/ {.*/;/'  > function_list
-ls [0-9]*.cc  |grep -v "\.test\.cc$"  |sed 's/.*/#include "&"/'  > file_list
-ls [0-9]*.test.cc  |sed 's/.*/#include "&"/'  > test_file_list
-grep -h "^[[:space:]]*void test_" [0-9]*.cc  |sed 's/^\s*void \(.*\)() {$/\1,/'  > test_list
-c++ -g -O3 boot.cc -o linkify
+c++ -g linkify.cc -o linkify
diff --git a/linkify/clean b/linkify/clean
index 87321f72..7838b78a 100755
--- a/linkify/clean
+++ b/linkify/clean
@@ -1,2 +1,2 @@
 #!/bin/sh
-rm -rf linkify *.dSYM *_list
+rm -rf linkify *.dSYM
diff --git a/linkify/linkify.cc b/linkify/linkify.cc
new file mode 100644
index 00000000..7ed2bc38
--- /dev/null
+++ b/linkify/linkify.cc
@@ -0,0 +1,230 @@
+// Read a tabular cross-reference file generated by ctags, then read a list of
+// html files generated by Vim's TOhtml command on C++ code. Link words
+// in the html files to cross-references from ctags.
+
+// Usage:
+//    linkify [tags file] [html files]...
+
+// Still plenty of holes:
+// - unnecessarily linking definition location to itself
+// - can't detect strings in spite of attempt to support them below, because
+//   Vim's generated html turns quotes into html entities
+// - distinguishing function and variable names
+// - distinguishing Mu code in C++ files
+// - distinguishing between function overloads
+//   - if there's duplicate tags we aren't smart enough to distinguish between
+//     them yet, so we simply don't add any link at all
+//   - but even that's not perfect, because sometimes the tags file has a
+//     single definition but there's still multiple overloads (say I defined
+//     'clear()' on some type, and it's already defined on STL classes)
+// - ctags misses some symbols in layered code
+
+#include<assert.h>
+
+#include<map>
+using std::map;
+
+#include<string>
+using std::string;
+
+#include<iostream>
+using std::istream;
+using std::cout;
+using std::cerr;
+
+#include<sstream>
+using std::istringstream;
+using std::ostringstream;
+
+#include<fstream>
+using std::ifstream;
+using std::ofstream;
+
+#include <locale>
+using std::isspace;  // unicode-aware
+
+struct syminfo {
+  string filename;
+  int line_num;
+  syminfo() :line_num(0) {}
+};
+
+bool has_data(istream& in) {
+  in.peek();
+  if (in.eof()) return false;
+  assert(in);
+  return true;
+}
+
+bool starts_with(const string& s, const string& pat) {
+  string::const_iterator a=s.begin(), b=pat.begin();
+  for (/*nada*/;  a!=s.end() && b!=pat.end();  ++a, ++b)
+    if (*a != *b) return false;
+  return b == pat.end();
+}
+
+void encode_some_html_entities(string& s) {
+  std::string::size_type pos = 0;
+  while (true) {
+    pos = s.find_first_of("<>", pos);
+    if (pos == std::string::npos) break;
+    std::string replacement;
+    switch (s.at(pos)) {
+      case '<': replacement = "&lt;"; break;
+      case '>': replacement = "&gt;"; break;
+    }
+    s.replace(pos, 1, replacement);
+    pos += replacement.size();
+  };
+}
+
+void read_tags(const string& filename, map<string, syminfo>& info) {
+  ifstream in(filename);
+//?   cerr << "reading " << filename << '\n';
+  string dummy;
+  while (has_data(in)) {
+    string symbol;  in >> symbol;
+    encode_some_html_entities(symbol);
+//?     cerr << symbol << '\n';
+    if (info.find(symbol) != info.end()) {
+      info[symbol].line_num = -1;
+      info[symbol].filename.clear();
+    }
+    else {
+      in >> dummy;
+      in >> info[symbol].line_num;
+      in >> info[symbol].filename;
+    }
+    getline(in, dummy);  // skip rest of line
+//?     cerr << symbol << ": " << info[symbol].filename << ':' << info[symbol].line_num << '\n';
+  }
+  in.close();
+}
+
+void replace_tags_in_file(const string& filename, const map<string, syminfo>& info) {
+//?   cerr << info.size() << " symbols\n";
+  ifstream in(filename);
+  ofstream out(filename+".out");
+  while (has_data(in)) {
+    // send lines that don't start with '<span' straight through
+    string line;
+    getline(in, line);
+    if (!starts_with(line, "<span ")) {
+      out << line << '\n';
+    }
+    else {
+      static int span_size = string("</span>").size();
+      int skip_first_span = line.find("</span>") + span_size;
+      out << line.substr(0, skip_first_span);
+      istringstream in2(line.substr(skip_first_span));
+      in2 >> std::noskipws;
+      while (has_data(in2)) {
+        if (isspace(in2.peek())) {
+//?           cerr << "space\n";
+          char c;  in2 >> c;
+          out << c;
+        }
+        // within a line, send straight through all characters inside '<..>'
+        else if (in2.peek() == '<') {
+//?           cerr << "tag\n";
+          char c = '\0';
+          while (in2 >> c) {
+//?             cerr << "span: " << c << '\n';
+            out << c;
+            if (c == '>') break;
+          }
+//?           cerr << "end tag\n";
+        }
+        else {
+          // send straight through all characters inside strings (handling escapes)
+          char c = in2.get();
+          if (c == '"') {
+//?             cerr << "string\n";
+            out << c;
+            while (in2 >> c) {
+              out << c;
+              if (c == '\\') {
+                in2 >> c;  out << c;
+              }
+              else if (c == '"') {
+                break;
+              }
+            }
+          }
+          else if (c == '\'') {
+//?             cerr << "character\n";
+            out << c;
+            while (in2 >> c) {
+              out << c;
+              if (c == '\\') {
+                in2 >> c;  out << c;
+              }
+              else if (c == '\'') {
+                break;
+              }
+            }
+          }
+          // send straight through any characters after '//' (comments)
+          else if (c == '#') {
+//?             cerr << "comment\n";
+            out << c;
+            while (in2 >> c) out << c;
+          }
+          // send straight through any characters after '//' (comments)
+          else if (c == '/' && in2.peek() == '/') {
+//?             cerr << "comment\n";
+            out << c;
+            while (in2 >> c) out << c;
+          }
+          else {
+//?             cerr << "rest\n";
+            if (c == ',' || c == ':') {
+              out << c;
+              continue;
+            }
+            ostringstream out2;
+            out2 << c;
+            while (in2 >> c) {
+              if (isspace(c) || c == '<' || c == '"' || c == '\'' || c == '/' || c == ',' || c == ':') {  // keep sync'd with other clauses above
+                in2.putback(c);
+                break;
+              }
+              out2 << c;
+            }
+            string symbol = out2.str();
+            if (symbol == "equal" || symbol == "index" || symbol == "put-index" || symbol == "length") {
+//?               cerr << "  blacklisted\n";
+              out << symbol;
+            }
+            else if (info.find(symbol) == info.end()) {
+//?               cerr << "  no info\n";
+              out << symbol;
+            }
+            else {
+              const syminfo& s = info.find(symbol)->second;
+              if (s.filename.empty()) {
+//?                 cerr << "  empty info\n";
+                out << symbol;
+              }
+              else {
+//?                 cerr << "  link\n";
+                out << "<a href='" << s.filename << ".html#L" << s.line_num << "'>" << symbol << "</a>";
+              }
+            }
+          }  // end rest
+        }
+      }  // done parsing line
+      out << '\n';
+    }
+  }
+  in.close();  out.close();
+}
+
+int main(int argc, const char* argv[]) {
+  map<string, syminfo> info;
+  read_tags(argv[1], info);
+  for (int i = 2;  i < argc;  ++i) {
+    replace_tags_in_file(argv[i], info);
+  }
+  return 0;
+}
-- 
cgit 1.4.1-2-gfad0