1 files changed, 57 insertions, 143 deletions
diff --git a/lib/packages/docutils/rstgen.nim b/lib/packages/docutils/rstgen.nim
index 597ee1553..57bc00fcb 100644
--- a/lib/packages/docutils/rstgen.nim
+++ b/lib/packages/docutils/rstgen.nim
@@ -39,7 +39,8 @@
 ##   No backreferences are generated since finding all references of a footnote
 ##   can be done by simply searching for ``[footnoteName]``.
 
-import strutils, os, hashes, strtabs, rstast, rst, highlite, tables, sequtils,
+import strutils, os, hashes, strtabs, rstast, rst, rstidx,
+  highlite, tables, sequtils,
   algorithm, parseutils, std/strbasics
 
 
@@ -59,7 +60,7 @@ type
     outLatex            # output is Latex
 
   MetaEnum* = enum
-    metaNone, metaTitle, metaSubtitle, metaAuthor, metaVersion
+    metaNone, metaTitleRaw, metaTitle, metaSubtitle, metaAuthor, metaVersion
 
   EscapeMode* = enum  # in Latex text inside options [] and URLs is
                       # escaped slightly differently than in normal text
@@ -321,31 +322,8 @@ proc renderAux(d: PDoc, n: PRstNode, html, tex: string, result: var string) =
 
 # ---------------- index handling --------------------------------------------
 
-proc quoteIndexColumn(text: string): string =
-  ## Returns a safe version of `text` for serialization to the ``.idx`` file.
-  ##
-  ## The returned version can be put without worries in a line based tab
-  ## separated column text file. The following character sequence replacements
-  ## will be performed for that goal:
-  ##
-  ## * ``"\\"`` => ``"\\\\"``
-  ## * ``"\n"`` => ``"\\n"``
-  ## * ``"\t"`` => ``"\\t"``
-  result = newStringOfCap(text.len + 3)
-  for c in text:
-    case c
-    of '\\': result.add "\\"
-    of '\L': result.add "\\n"
-    of '\C': discard
-    of '\t': result.add "\\t"
-    else: result.add c
-
-proc unquoteIndexColumn(text: string): string =
-  ## Returns the unquoted version generated by ``quoteIndexColumn``.
-  result = text.multiReplace(("\\t", "\t"), ("\\n", "\n"), ("\\\\", "\\"))
-
-proc setIndexTerm*(d: var RstGenerator, htmlFile, id, term: string,
-                   linkTitle, linkDesc = "") =
+proc setIndexTerm*(d: var RstGenerator; k: IndexEntryKind, htmlFile, id, term: string,
+                   linkTitle, linkDesc = "", line = 0) =
   ## Adds a `term` to the index using the specified hyperlink identifier.
   ##
   ## A new entry will be added to the index using the format
@@ -368,21 +346,8 @@ proc setIndexTerm*(d: var RstGenerator, htmlFile, id, term: string,
   ## <#writeIndexFile,RstGenerator,string>`_. The purpose of the index is
   ## documented in the `docgen tools guide
   ## <docgen.html#related-options-index-switch>`_.
-  var
-    entry = term
-    isTitle = false
-  entry.add('\t')
-  entry.add(htmlFile)
-  if id.len > 0:
-    entry.add('#')
-    entry.add(id)
-  else:
-    isTitle = true
-  if linkTitle.len > 0 or linkDesc.len > 0:
-    entry.add('\t' & linkTitle.quoteIndexColumn)
-    entry.add('\t' & linkDesc.quoteIndexColumn)
-  entry.add("\n")
-
+  let (entry, isTitle) = formatIndexEntry(k, htmlFile, id, term,
+                                          linkTitle, linkDesc, line)
   if isTitle: d.theIndex.insert(entry)
   else: d.theIndex.add(entry)
 
@@ -395,6 +360,15 @@ proc hash(n: PRstNode): int =
       result = result !& hash(n.sons[i])
     result = !$result
 
+proc htmlFileRelPath(d: PDoc): string =
+  if d.outDir.len == 0:
+    # /foo/bar/zoo.nim -> zoo.html
+    changeFileExt(extractFilename(d.filename), HtmlExt)
+  else: # d is initialized in docgen.nim
+    # outDir   = /foo              -\
+    # destFile = /foo/bar/zoo.html -|-> bar/zoo.html
+    d.destFile.relativePath(d.outDir, '/')
+
 proc renderIndexTerm*(d: PDoc, n: PRstNode, result: var string) =
   ## Renders the string decorated within \`foobar\`\:idx\: markers.
   ##
@@ -411,17 +385,12 @@ proc renderIndexTerm*(d: PDoc, n: PRstNode, result: var string) =
 
   var term = ""
   renderAux(d, n, term)
-  setIndexTerm(d, changeFileExt(extractFilename(d.filename), HtmlExt), id, term, d.currentSection)
+  setIndexTerm(d, ieIdxRole,
+  htmlFileRelPath(d), id, term, d.currentSection)
   dispA(d.target, result, "<span id=\"$1\">$2</span>", "\\nimindexterm{$1}{$2}",
         [id, term])
 
 type
-  IndexEntry* = object
-    keyword*: string
-    link*: string
-    linkTitle*: string ## contains a prettier text for the href
-    linkDesc*: string ## the title attribute of the final href
-
   IndexedDocs* = Table[IndexEntry, seq[IndexEntry]] ## \
     ## Contains the index sequences for doc types.
     ##
@@ -432,21 +401,6 @@ type
     ## The value indexed by this IndexEntry is a sequence with the real index
     ## entries found in the ``.idx`` file.
 
-proc cmp(a, b: IndexEntry): int =
-  ## Sorts two ``IndexEntry`` first by `keyword` field, then by `link`.
-  result = cmpIgnoreStyle(a.keyword, b.keyword)
-  if result == 0:
-    result = cmpIgnoreStyle(a.link, b.link)
-
-proc hash(x: IndexEntry): Hash =
-  ## Returns the hash for the combined fields of the type.
-  ##
-  ## The hash is computed as the chained hash of the individual string hashes.
-  result = x.keyword.hash !& x.link.hash
-  result = result !& x.linkTitle.hash
-  result = result !& x.linkDesc.hash
-  result = !$result
-
 when defined(gcDestructors):
   template `<-`(a, b: var IndexEntry) = a = move(b)
 else:
@@ -455,6 +409,7 @@ else:
     shallowCopy a.link, b.link
     shallowCopy a.linkTitle, b.linkTitle
     shallowCopy a.linkDesc, b.linkDesc
+    shallowCopy a.module, b.module
 
 proc sortIndex(a: var openArray[IndexEntry]) =
   # we use shellsort here; fast and simple
@@ -494,16 +449,20 @@ proc generateSymbolIndex(symbols: seq[IndexEntry]): string =
   result = "<dl>"
   var i = 0
   while i < symbols.len:
-    let keyword = symbols[i].keyword
+    let keyword = esc(outHtml, symbols[i].keyword)
     let cleanedKeyword = keyword.escapeLink
     result.addf("<dt><a name=\"$2\" href=\"#$2\"><span>$1:</span></a></dt><dd><ul class=\"simple\">\n",
                 [keyword, cleanedKeyword])
     var j = i
-    while j < symbols.len and keyword == symbols[j].keyword:
+    while j < symbols.len and symbols[i].keyword == symbols[j].keyword:
       let
         url = symbols[j].link.escapeLink
-        text = if symbols[j].linkTitle.len > 0: symbols[j].linkTitle else: url
-        desc = if symbols[j].linkDesc.len > 0: symbols[j].linkDesc else: ""
+        module = symbols[j].module
+        text =
+          if symbols[j].linkTitle.len > 0:
+            esc(outHtml, module & ": " & symbols[j].linkTitle)
+          else: url
+        desc = symbols[j].linkDesc
       if desc.len > 0:
         result.addf("""<li><a class="reference external"
           title="$3" data-doc-search-tag="$2" href="$1">$2</a></li>
@@ -517,13 +476,6 @@ proc generateSymbolIndex(symbols: seq[IndexEntry]): string =
     i = j
   result.add("</dl>")
 
-proc isDocumentationTitle(hyperlink: string): bool =
-  ## Returns true if the hyperlink is actually a documentation title.
-  ##
-  ## Documentation titles lack the hash. See `mergeIndexes()
-  ## <#mergeIndexes,string>`_ for a more detailed explanation.
-  result = hyperlink.find('#') < 0
-
 proc stripTocLevel(s: string): tuple[level: int, text: string] =
   ## Returns the *level* of the toc along with the text without it.
   for c in 0 ..< s.len:
@@ -557,17 +509,15 @@ proc generateDocumentationToc(entries: seq[IndexEntry]): string =
     level = 1
   levels.newSeq(entries.len)
   for entry in entries:
-    let (rawLevel, rawText) = stripTocLevel(entry.linkTitle or entry.keyword)
+    let (rawLevel, rawText) = stripTocLevel(entry.linkTitle)
     if rawLevel < 1:
       # This is a normal symbol, push it *inside* one level from the last one.
       levels[L].level = level + 1
-      # Also, ignore the linkTitle and use directly the keyword.
-      levels[L].text = entry.keyword
     else:
       # The level did change, update the level indicator.
       level = rawLevel
       levels[L].level = rawLevel
-      levels[L].text = rawText
+    levels[L].text = rawText
     inc L
 
   # Now generate hierarchical lists based on the precalculated levels.
@@ -598,7 +548,7 @@ proc generateDocumentationIndex(docs: IndexedDocs): string =
   for title in titles:
     let tocList = generateDocumentationToc(docs.getOrDefault(title))
     result.add("<ul><li><a href=\"" &
-      title.link & "\">" & title.keyword & "</a>\n" & tocList & "</li></ul>\n")
+      title.link & "\">" & title.linkTitle & "</a>\n" & tocList & "</li></ul>\n")
 
 proc generateDocumentationJumps(docs: IndexedDocs): string =
   ## Returns a plain list of hyperlinks to documentation TOCs in HTML.
@@ -610,7 +560,7 @@ proc generateDocumentationJumps(docs: IndexedDocs): string =
 
   var chunks: seq[string] = @[]
   for title in titles:
-    chunks.add("<a href=\"" & title.link & "\">" & title.keyword & "</a>")
+    chunks.add("<a href=\"" & title.link & "\">" & title.linkTitle & "</a>")
 
   result.add(chunks.join(", ") & ".<br/>")
 
@@ -639,39 +589,11 @@ proc readIndexDir*(dir: string):
   # Scan index files and build the list of symbols.
   for path in walkDirRec(dir):
     if path.endsWith(IndexExt):
-      var
-        fileEntries: seq[IndexEntry]
-        title: IndexEntry
-        f = 0
-      newSeq(fileEntries, 500)
-      setLen(fileEntries, 0)
-      for line in lines(path):
-        let s = line.find('\t')
-        if s < 0: continue
-        setLen(fileEntries, f+1)
-        fileEntries[f].keyword = line.substr(0, s-1)
-        fileEntries[f].link = line.substr(s+1)
-        # See if we detect a title, a link without a `#foobar` trailing part.
-        if title.keyword.len == 0 and fileEntries[f].link.isDocumentationTitle:
-          title.keyword = fileEntries[f].keyword
-          title.link = fileEntries[f].link
-
-        if fileEntries[f].link.find('\t') > 0:
-          let extraCols = fileEntries[f].link.split('\t')
-          fileEntries[f].link = extraCols[0]
-          assert extraCols.len == 3
-          fileEntries[f].linkTitle = extraCols[1].unquoteIndexColumn
-          fileEntries[f].linkDesc = extraCols[2].unquoteIndexColumn
-        else:
-          fileEntries[f].linkTitle = ""
-          fileEntries[f].linkDesc = ""
-        inc f
+      var (fileEntries, title) = parseIdxFile(path)
       # Depending on type add this to the list of symbols or table of APIs.
-      if title.keyword.len == 0:
-        for i in 0 ..< f:
-          # Don't add to symbols TOC entries (they start with a whitespace).
-          let toc = fileEntries[i].linkTitle
-          if toc.len > 0 and toc[0] == ' ':
+      if title.kind == ieNimTitle:
+        for i in 0 ..< fileEntries.len:
+          if fileEntries[i].kind != ieNim:
             continue
           # Ok, non TOC entry, add it.
           setLen(result.symbols, L + 1)
@@ -687,7 +609,7 @@ proc readIndexDir*(dir: string):
             result.modules.add(x.changeFileExt(""))
       else:
         # Generate the symbolic anchor for index quickjumps.
-        title.linkTitle = "doc_toc_" & $result.docs.len
+        title.aux = "doc_toc_" & $result.docs.len
         result.docs[title] = fileEntries
 
 proc mergeIndexes*(dir: string): string =
@@ -747,24 +669,6 @@ proc mergeIndexes*(dir: string): string =
 
 # ----------------------------------------------------------------------------
 
-proc stripTocHtml(s: string): string =
-  ## Ugly quick hack to remove HTML tags from TOC titles.
-  ##
-  ## A TocEntry.header field already contains rendered HTML tags. Instead of
-  ## implementing a proper version of renderRstToOut() which recursively
-  ## renders an rst tree to plain text, we simply remove text found between
-  ## angled brackets. Given the limited possibilities of rst inside TOC titles
-  ## this should be enough.
-  result = s
-  var first = result.find('<')
-  while first >= 0:
-    let last = result.find('>', first)
-    if last < 0:
-      # Abort, since we didn't found a closing angled bracket.
-      return
-    result.delete(first..last)
-    first = result.find('<', first)
-
 proc renderHeadline(d: PDoc, n: PRstNode, result: var string) =
   var tmp = ""
   for i in countup(0, len(n) - 1): renderRstToOut(d, n.sons[i], tmp)
@@ -785,19 +689,12 @@ proc renderHeadline(d: PDoc, n: PRstNode, result: var string) =
 
   # Generate index entry using spaces to indicate TOC level for the output HTML.
   assert n.level >= 0
-  let
-    htmlFileRelPath = if d.outDir.len == 0:
-                        # /foo/bar/zoo.nim -> zoo.html
-                        changeFileExt(extractFilename(d.filename), HtmlExt)
-                      else: # d is initialized in docgen.nim
-                        # outDir   = /foo              -\
-                        # destFile = /foo/bar/zoo.html -|-> bar/zoo.html
-                        d.destFile.relativePath(d.outDir, '/')
-  setIndexTerm(d, htmlFileRelPath, n.anchor, tmp.stripTocHtml,
-    spaces(max(0, n.level)) & tmp)
+  setIndexTerm(d, ieHeading, htmlFile = d.htmlFileRelPath, id = n.anchor,
+               term = n.addNodes, linkTitle = spaces(max(0, n.level)) & tmp)
 
 proc renderOverline(d: PDoc, n: PRstNode, result: var string) =
   if n.level == 0 and d.meta[metaTitle].len == 0:
+    d.meta[metaTitleRaw] = n.addNodes
     for i in countup(0, len(n)-1):
       renderRstToOut(d, n.sons[i], d.meta[metaTitle])
     d.currentSection = d.meta[metaTitle]
@@ -813,6 +710,8 @@ proc renderOverline(d: PDoc, n: PRstNode, result: var string) =
     dispA(d.target, result, "<h$1$2><center>$3</center></h$1>",
                    "\\rstov$4[$5]{$3}$2\n", [$n.level,
                    n.anchor.idS, tmp, $chr(n.level - 1 + ord('A')), tocName])
+    setIndexTerm(d, ieHeading, htmlFile = d.htmlFileRelPath, id = n.anchor,
+                 term = n.addNodes, linkTitle = spaces(max(0, n.level)) & tmp)
 
 proc renderTocEntry(d: PDoc, n: PRstNode, result: var string) =
   var header = ""
@@ -1197,6 +1096,18 @@ proc renderHyperlink(d: PDoc, text, link: PRstNode, result: var string,
       "\\hyperlink{$2}{$1} (p.~\\pageref{$2})",
       [textStr, linkStr, nimDocStr, tooltipStr])
 
+proc traverseForIndex*(d: PDoc, n: PRstNode) =
+  ## A version of [renderRstToOut] that only fills entries for ``.idx`` files.
+  var discarded: string
+  if n == nil: return
+  case n.kind
+  of rnIdx: renderIndexTerm(d, n, discarded)
+  of rnHeadline, rnMarkdownHeadline: renderHeadline(d, n, discarded)
+  of rnOverline: renderOverline(d, n, discarded)
+  else:
+    for i in 0 ..< len(n):
+      traverseForIndex(d, n.sons[i])
+
 proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) =
   if n == nil: return
   case n.kind
@@ -1451,6 +1362,7 @@ proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) =
   of rnTitle:
     d.meta[metaTitle] = ""
     renderRstToOut(d, n.sons[0], d.meta[metaTitle])
+    d.meta[metaTitleRaw] = n.sons[0].addNodes
 
 # -----------------------------------------------------------------------------
 
@@ -1616,11 +1528,13 @@ proc rstToHtml*(s: string, options: RstParseOptions,
   proc myFindFile(filename: string): string =
     # we don't find any files in online mode:
     result = ""
+  proc myFindRefFile(filename: string): (string, string) =
+    result = ("", "")
 
   const filen = "input"
   let (rst, filenames, t) = rstParse(s, filen,
                                      line=LineRstInit, column=ColRstInit,
-                                     options, myFindFile, msgHandler)
+                                     options, myFindFile, myFindRefFile, msgHandler)
   var d: RstGenerator
   initRstGenerator(d, outHtml, config, filen, myFindFile, msgHandler,
                    filenames, hasToc = t)