docgen: implement cross-document links (#20990)

* docgen: implement cross-document links Fully implements https://github.com/nim-lang/RFCs/issues/125 Follow-up of: https://github.com/nim-lang/Nim/pull/18642 (for internal links) and https://github.com/nim-lang/Nim/issues/20127. Overview -------- Explicit import-like directive is required, called `.. importdoc::`. (the syntax is % RST, Markdown will use it for a while). Then one can reference any symbols/headings/anchors, as if they were in the local file (but they will be prefixed with a module name or markup document in link text). It's possible to reference anything from anywhere (any direction in `.nim`/`.md`/`.rst` files). See `doc/docgen.md` for full description. Working is based on `.idx` files, hence one needs to generate all `.idx` beforehand. A dedicated option `--index:only` is introduced (and a separate stage for `--index:only` is added to `kochdocs.nim`). Performance note ---------------- Full run for `./koch docs` now takes 185% of the time before this PR. (After: 315 s, before: 170 s on my PC). All the time seems to be spent on `--index:only` run, which takes almost as much (85%) of normal doc run -- it seems that most time is spent on file parsing, turning off HTML generation phase has not helped much. (One could avoid it by specifying list of files that can be referenced and pre-processing only them. But it can become error-prone and I assume that these linke will be **everywhere** in the repository anyway, especially considering https://github.com/nim-lang/RFCs/issues/478. So every `.nim`/`.md` file is processed for `.idx` first). But that's all without significant part of repository converted to cross-module auto links. To estimate impact I checked the time for `doc`ing a few files (after all indexes have been generated), and everywhere difference was **negligible**. E.g. for `lib/std/private/osfiles.nim` that `importdoc`s large `os.idx` and hence should have been a case with relatively large performance impact, but: * After: 0.59 s. * Before: 0.59 s. So Nim compiler works so slow that doc part basically does not matter :-) Testing ------- 1) added `extlinks` test to `nimdoc/` 2) checked that `theindex.html` is still correct 2) fixed broken auto-links for modules that were derived from `os.nim` by adding appropriate ``importdoc`` Implementation note ------------------- Parsing and formating of `.idx` entries is moved into a dedicated `rstidx.nim` module from `rstgen.nim`. `.idx` file format changed: * fields are not escaped in most cases because we need original strings for referencing, not HTML ones (the exception is linkTitle for titles and headings). Escaping happens later -- on the stage of `rstgen` buildIndex, etc. * all lines have fixed number of columns 6 * added discriminator tag as a first column, it always allows distinguish Nim/markup entries, titles/headings, etc. `rstgen` does not rely any more (in most cases) on ad-hoc logic to determine what type each entry is. * there is now always a title entry added at the first line. * add a line number as 6th column * linkTitle (4th) column has a different format: before it was like `module: funcName()`, now it's `proc funcName()`. (This format is also propagated to `theindex.html` and search results, I kept it that way since I like it more though it's discussible.) This column is what used for Nim symbols resolution. * also changed details on column format for headings and titles: "keyword" is original, "linkTitle" is HTML one * fix paths on Windows + more clear code * Update compiler/docgen.nim Co-authored-by: Andreas Rumpf <rumpf_a@web.de> * Handle .md and .nim paths uniformly in findRefFile * handle titles better + more comments * don't allow markup overwrite index title for .nim files Co-authored-by: Andreas Rumpf <rumpf_a@web.de>
author: Andrey Makarov <ph.makarov@gmail.com> 2023-01-04 23:19:01 +0300
committer: GitHub <noreply@github.com> 2023-01-04 15:19:01 -0500
commit: 2620da9bf9994d09da3d5b31b29c95855fb79a41 (patch)
tree: 0a83bee41e6f6bb6e28eba1dda06321d9b1e00c6 /lib/packages/docutils
parent: b2328b44baaced7a89e897332a66dac4a975efe8 (diff)
download: Nim-2620da9bf9994d09da3d5b31b29c95855fb79a41.tar.gz
4 files changed, 452 insertions, 176 deletions
diff --git a/lib/packages/docutils/dochelpers.nim b/lib/packages/docutils/dochelpers.nim
index c7f7f73f5..b7cdd16d2 100644
--- a/lib/packages/docutils/dochelpers.nim
+++ b/lib/packages/docutils/dochelpers.nim
@@ -13,7 +13,7 @@
 ## `type LangSymbol`_ in ``rst.nim``, while `match(generated, docLink)`_
 ## matches it with `generated`, produced from `PNode` by ``docgen.rst``.
 
-import rstast
+import rstast, strutils
 
 when defined(nimPreviewSlimSystem):
   import std/[assertions, syncio]
@@ -35,6 +35,12 @@ type
                                ## name-type seq, e.g. for proc
     outType*: string           ## result type, e.g. for proc
 
+proc `$`*(s: LangSymbol): string =  # for debug
+  ("(symkind=$1, symTypeKind=$2, name=$3, generics=$4, isGroup=$5, " &
+   "parametersProvided=$6, parameters=$7, outType=$8)") % [
+      s.symKind, s.symTypeKind , s.name, s.generics, $s.isGroup,
+      $s.parametersProvided, $s.parameters, s.outType]
+
 func nimIdentBackticksNormalize*(s: string): string =
   ## Normalizes the string `s` as a Nim identifier.
   ##
@@ -71,6 +77,12 @@ func nimIdentBackticksNormalize*(s: string): string =
     else: discard  # just omit '`' or ' '
   if j != s.len: setLen(result, j)
 
+proc langSymbolGroup*(kind: string, name: string): LangSymbol =
+  if kind notin ["proc", "func", "macro", "method", "iterator",
+                 "template", "converter"]:
+    raise newException(ValueError, "unknown symbol kind $1" % [kind])
+  result = LangSymbol(symKind: kind, name: name, isGroup: true)
+
 proc toLangSymbol*(linkText: PRstNode): LangSymbol =
   ## Parses `linkText` into a more structured form using a state machine.
   ##
@@ -82,11 +94,14 @@ proc toLangSymbol*(linkText: PRstNode): LangSymbol =
   ##
   ## This proc should be kept in sync with the `renderTypes` proc from
   ## ``compiler/typesrenderer.nim``.
-  assert linkText.kind in {rnRstRef, rnInner}
+  template fail(msg: string) =
+    raise newException(ValueError, msg)
+  if linkText.kind notin {rnRstRef, rnInner}:
+    fail("toLangSymbol: wrong input kind " & $linkText.kind)
 
   const NimDefs = ["proc", "func", "macro", "method", "iterator",
                    "template", "converter", "const", "type", "var",
-                   "enum", "object", "tuple"]
+                   "enum", "object", "tuple", "module"]
   template resolveSymKind(x: string) =
     if x in ["enum", "object", "tuple"]:
       result.symKind = "type"
@@ -109,11 +124,11 @@ proc toLangSymbol*(linkText: PRstNode): LangSymbol =
   template flushIdent() =
     if curIdent != "":
       case state
-      of inBeginning:  doAssert false, "incorrect state inBeginning"
+      of inBeginning:  fail("incorrect state inBeginning")
       of afterSymKind:  resolveSymKind curIdent
-      of beforeSymbolName:  doAssert false, "incorrect state beforeSymbolName"
+      of beforeSymbolName:  fail("incorrect state beforeSymbolName")
       of atSymbolName: result.name = curIdent.nimIdentBackticksNormalize
-      of afterSymbolName: doAssert false, "incorrect state afterSymbolName"
+      of afterSymbolName: fail("incorrect state afterSymbolName")
       of genericsPar: result.generics = curIdent
       of parameterName: result.parameters.add (curIdent, "")
       of parameterType:
diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim
index e9f76a26f..b3b1fb9c0 100644
--- a/lib/packages/docutils/rst.nim
+++ b/lib/packages/docutils/rst.nim
@@ -22,7 +22,7 @@
 
 import
   os, strutils, rstast, dochelpers, std/enumutils, algorithm, lists, sequtils,
-  std/private/miscdollars, tables, strscans
+  std/private/miscdollars, tables, strscans, rstidx
 from highlite import SourceLanguage, getSourceLanguage
 
 when defined(nimPreviewSlimSystem):
@@ -40,7 +40,7 @@ type
     roNimFile                 ## set for Nim files where default interpreted
                               ## text role should be :nim:
     roSandboxDisabled         ## this option enables certain options
-                              ## (e.g. raw, include)
+                              ## (e.g. raw, include, importdoc)
                               ## which are disabled by default as they can
                               ## enable users to read arbitrary data and
                               ## perform XSS if the parser is used in a web
@@ -73,11 +73,17 @@ type
     mwUnsupportedLanguage = "language '$1' not supported",
     mwUnsupportedField = "field '$1' not supported",
     mwRstStyle = "RST style: $1",
+    mwUnusedImportdoc = "importdoc for '$1' is not used",
     meSandboxedDirective = "disabled directive: '$1'",
 
   MsgHandler* = proc (filename: string, line, col: int, msgKind: MsgKind,
                        arg: string) {.closure, gcsafe.} ## what to do in case of an error
   FindFileHandler* = proc (filename: string): string {.closure, gcsafe.}
+  FindRefFileHandler* =
+    proc (targetRelPath: string):
+         tuple[targetPath: string, linkRelPath: string] {.closure, gcsafe.}
+    ## returns where .html or .idx file should be found by its relative path;
+    ## `linkRelPath` is a prefix to be added before a link anchor from such file
 
 proc rstnodeToRefname*(n: PRstNode): string
 proc addNodes*(n: PRstNode): string
@@ -333,7 +339,8 @@ type
     arInternalRst,  ## For automatically generated RST anchors (from
                     ## headings, footnotes, inline internal targets):
                     ## case-insensitive, 1-space-significant (by RST spec)
-    arNim   ## For anchors generated by ``docgen.rst``: Nim-style case
+    arExternalRst,  ## For external .nim doc comments or .rst/.md
+    arNim   ## For anchors generated by ``docgen.nim``: Nim-style case
             ## sensitivity, etc. (see `proc normalizeNimName`_ for details)
     arHyperlink,  ## For links with manually set anchors in
                   ## form `text <pagename.html#anchor>`_
@@ -349,11 +356,15 @@ type
     of arInternalRst:
       anchorType: RstAnchorKind
       target: PRstNode
+    of arExternalRst:
+      anchorTypeExt: RstAnchorKind
+      refnameExt: string
     of arNim:
       tooltip: string       # displayed tooltip for Nim-generated anchors
       langSym: LangSymbol
       refname: string     # A reference name that will be inserted directly
                           # into HTML/Latex.
+      external: bool
   AnchorSubstTable = Table[string, seq[AnchorSubst]]
                          # use `seq` to account for duplicate anchors
   FootnoteType = enum
@@ -372,6 +383,12 @@ type
   RstFileTable* = object
     filenameToIdx*: Table[string, FileIndex]
     idxToFilename*: seq[string]
+  ImportdocInfo = object
+    used: bool             # was this import used?
+    fromInfo: TLineInfo    # place of `.. importdoc::` directive
+    idxPath: string        # full path to ``.idx`` file
+    linkRelPath: string    # prefix before target anchor
+    title: string          # document title obtained from ``.idx``
   RstSharedState = object
     options*: RstParseOptions   # parsing options
     hLevels: LevelMap           # hierarchy of heading styles
@@ -393,12 +410,17 @@ type
     footnotes: seq[FootnoteSubst] # correspondence b/w footnote label,
                                   # number, order of occurrence
     msgHandler: MsgHandler      # How to handle errors.
-    findFile: FindFileHandler   # How to find files.
+    findFile: FindFileHandler   # How to find files for include.
+    findRefFile: FindRefFileHandler
+                                # How to find files imported by importdoc.
     filenames*: RstFileTable    # map file name <-> FileIndex (for storing
                                 # file names for warnings after 1st stage)
     currFileIdx*: FileIndex     # current index in `filenames`
     tocPart*: seq[PRstNode]     # all the headings of a document
     hasToc*: bool
+    idxImports*: Table[string, ImportdocInfo]
+                                # map `importdoc`ed filename -> it's info
+    nimFileImported*: bool      # Was any ``.nim`` module `importdoc`ed ?
 
   PRstSharedState* = ref RstSharedState
   ManualAnchor = object
@@ -452,6 +474,9 @@ proc defaultFindFile*(filename: string): string =
   if fileExists(filename): result = filename
   else: result = ""
 
+proc defaultFindRefFile*(filename: string): (string, string) =
+  (filename, "")
+
 proc defaultRole(options: RstParseOptions): string =
   if roNimFile in options: "nim" else: "literal"
 
@@ -492,12 +517,16 @@ proc getFilename(filenames: RstFileTable, fid: FileIndex): string =
         $fid.int, $(filenames.len - 1)])
   result = filenames.idxToFilename[fid.int]
 
+proc getFilename(s: PRstSharedState, subst: AnchorSubst): string =
+  getFilename(s.filenames, subst.info.fileIndex)
+
 proc currFilename(s: PRstSharedState): string =
   getFilename(s.filenames, s.currFileIdx)
 
 proc newRstSharedState*(options: RstParseOptions,
                         filename: string,
                         findFile: FindFileHandler,
+                        findRefFile: FindRefFileHandler,
                         msgHandler: MsgHandler,
                         hasToc: bool): PRstSharedState =
   let r = defaultRole(options)
@@ -507,6 +536,9 @@ proc newRstSharedState*(options: RstParseOptions,
       options: options,
       msgHandler: if not isNil(msgHandler): msgHandler else: defaultMsgHandler,
       findFile: if not isNil(findFile): findFile else: defaultFindFile,
+      findRefFile:
+        if not isNil(findRefFile): findRefFile
+        else: defaultFindRefFile,
       hasToc: hasToc
   )
   setCurrFilename(result, filename)
@@ -525,6 +557,14 @@ proc rstMessage(p: RstParser, msgKind: MsgKind, arg: string) =
 proc rstMessage(s: PRstSharedState, msgKind: MsgKind, arg: string) =
   s.msgHandler(s.currFilename, LineRstInit, ColRstInit, msgKind, arg)
 
+proc rstMessage(s: PRstSharedState, msgKind: MsgKind, arg: string;
+                line, col: int) =
+  s.msgHandler(s.currFilename, line, col, msgKind, arg)
+
+proc rstMessage(s: PRstSharedState, filename: string, msgKind: MsgKind,
+                arg: string) =
+  s.msgHandler(filename, LineRstInit, ColRstInit, msgKind, arg)
+
 proc rstMessage*(filenames: RstFileTable, f: MsgHandler,
                  info: TLineInfo, msgKind: MsgKind, arg: string) =
   ## Print warnings using `info`, i.e. in 2nd-pass warnings for
@@ -756,6 +796,14 @@ proc internalRefPriority(k: RstAnchorKind): int =
   of footnoteAnchor: result = 4
   of headlineAnchor: result = 3
 
+proc `$`(subst: AnchorSubst): string =  # for debug
+  let s =
+    case subst.kind
+    of arInternalRst: "type=" & $subst.anchorType
+    of arExternalRst: "type=" & $subst.anchorTypeExt
+    of arNim: "langsym=" & $subst.langSym
+  result = "(kind=$1, priority=$2, $3)" % [$subst.kind, $subst.priority, s]
+
 proc addAnchorRst(p: var RstParser, name: string, target: PRstNode,
                   anchorType: RstAnchorKind) =
   ## Associates node `target` (which has field `anchor`) with an
@@ -771,31 +819,49 @@ proc addAnchorRst(p: var RstParser, name: string, target: PRstNode,
                     info: prevLineInfo(p), anchorType: anchorType))
   p.curAnchors.setLen 0
 
-proc addAnchorNim*(s: var PRstSharedState, refn: string, tooltip: string,
+proc addAnchorExtRst(s: var PRstSharedState, key: string, refn: string,
+                  anchorType: RstAnchorKind, info: TLineInfo) =
+  let name = key.toLowerAscii
+  let prio = internalRefPriority(anchorType)
+  s.anchors.mgetOrPut(name, newSeq[AnchorSubst]()).add(
+      AnchorSubst(kind: arExternalRst, refnameExt: refn, priority: prio,
+                  info: info,
+                  anchorTypeExt: anchorType))
+
+proc addAnchorNim*(s: var PRstSharedState, external: bool, refn: string, tooltip: string,
                    langSym: LangSymbol, priority: int,
                    info: TLineInfo) =
   ## Adds an anchor `refn`, which follows
   ## the rule `arNim` (i.e. a symbol in ``*.nim`` file)
   s.anchors.mgetOrPut(langSym.name, newSeq[AnchorSubst]()).add(
-      AnchorSubst(kind: arNim, refname: refn, langSym: langSym,
+      AnchorSubst(kind: arNim, external: external, refname: refn, langSym: langSym,
                   tooltip: tooltip, priority: priority,
                   info: info))
 
 proc findMainAnchorNim(s: PRstSharedState, signature: PRstNode,
                        info: TLineInfo):
                       seq[AnchorSubst] =
-  let langSym = toLangSymbol(signature)
+  var langSym: LangSymbol
+  try:
+    langSym = toLangSymbol(signature)
+  except ValueError:  # parsing failed, not a Nim symbol
+    return
   let substitutions = s.anchors.getOrDefault(langSym.name,
                                              newSeq[AnchorSubst]())
   if substitutions.len == 0:
     return
-  # map symKind (like "proc") -> found symbols/groups:
-  var found: Table[string, seq[AnchorSubst]]
-  for s in substitutions:
-    if s.kind == arNim:
-      if match(s.langSym, langSym):
-        found.mgetOrPut(s.langSym.symKind, newSeq[AnchorSubst]()).add s
-  for symKind, sList in found:
+  # logic to select only groups instead of concrete symbols
+  # with overloads, note that the same symbol can be defined
+  # in multiple modules and `importdoc`ed:
+  type GroupKey = tuple[symKind: string, origModule: string]
+  # map (symKind, file) (like "proc", "os.nim") -> found symbols/groups:
+  var found: Table[GroupKey, seq[AnchorSubst]]
+  for subst in substitutions:
+    if subst.kind == arNim:
+      if match(subst.langSym, langSym):
+        let key: GroupKey = (subst.langSym.symKind, getFilename(s, subst))
+        found.mgetOrPut(key, newSeq[AnchorSubst]()).add subst
+  for key, sList in found:
     if sList.len == 1:
       result.add sList[0]
     else:  # > 1, there are overloads, potential ambiguity in this `symKind`
@@ -812,14 +878,16 @@ proc findMainAnchorNim(s: PRstSharedState, signature: PRstNode,
             result.add s
             foundGroup = true
             break
-        doAssert foundGroup, "docgen has not generated the group"
+        doAssert(foundGroup,
+                 "docgen has not generated the group for $1 (file $2)" % [
+                 langSym.name, getFilename(s, sList[0]) ])
 
 proc findMainAnchorRst(s: PRstSharedState, linkText: string, info: TLineInfo):
                       seq[AnchorSubst] =
   let name = linkText.toLowerAscii
   let substitutions = s.anchors.getOrDefault(name, newSeq[AnchorSubst]())
   for s in substitutions:
-    if s.kind == arInternalRst:
+    if s.kind in {arInternalRst, arExternalRst}:
       result.add s
 
 proc addFootnoteNumManual(p: var RstParser, num: int) =
@@ -3251,6 +3319,15 @@ proc dirRaw(p: var RstParser): PRstNode =
   else:
     dirRawAux(p, result, rnRaw, parseSectionWrapper)
 
+proc dirImportdoc(p: var RstParser): PRstNode =
+  result = parseDirective(p, rnDirective, {}, parseLiteralBlock)
+  assert result.sons[2].kind == rnLiteralBlock
+  assert result.sons[2].sons[0].kind == rnLeaf
+  let filenames: seq[string] = split(result.sons[2].sons[0].text, seps = {','})
+  proc rmSpaces(s: string): string = s.split.join("")
+  for origFilename in filenames:
+    p.s.idxImports[origFilename.rmSpaces] = ImportdocInfo(fromInfo: lineInfo(p))
+
 proc selectDir(p: var RstParser, d: string): PRstNode =
   result = nil
   let tok = p.tok[p.idx-2] # report on directive in ".. directive::"
@@ -3271,6 +3348,7 @@ proc selectDir(p: var RstParser, d: string): PRstNode =
   of "hint": result = dirAdmonition(p, d)
   of "image": result = dirImage(p)
   of "important": result = dirAdmonition(p, d)
+  of "importdoc": result = dirImportdoc(p)
   of "include": result = dirInclude(p)
   of "index": result = dirIndex(p)
   of "note": result = dirAdmonition(p, d)
@@ -3401,11 +3479,90 @@ proc rstParsePass1*(fragment: string,
   getTokens(fragment, p.tok)
   result = parseDoc(p)
 
-proc preparePass2*(s: PRstSharedState, mainNode: PRstNode) =
+proc extractLinkEnd(x: string): string =
+  ## From links like `path/to/file.html#/%` extract `file.html#/%`.
+  let i = find(x, '#')
+  let last =
+    if i >= 0: i
+    else: x.len - 1
+  let j = rfind(x, '/', start=0, last=last)
+  if j >= 0:
+    result = x[j+1 .. ^1]
+  else:
+    result = x
+
+proc loadIdxFile(s: var PRstSharedState, origFilename: string) =
+  doAssert roSandboxDisabled in s.options
+  var info: TLineInfo
+  info.fileIndex = addFilename(s, origFilename)
+  var (dir, basename, ext) = origFilename.splitFile
+  if ext notin [".md", ".rst", ".nim", ""]:
+    rstMessage(s.filenames, s.msgHandler, s.idxImports[origFilename].fromInfo,
+               meCannotOpenFile, origFilename & ": unknown extension")
+  let idxFilename = dir / basename & ".idx"
+  let (idxPath, linkRelPath) = s.findRefFile(idxFilename)
+  s.idxImports[origFilename].linkRelPath = linkRelPath
+  var
+    fileEntries: seq[IndexEntry]
+    title: IndexEntry
+  try:
+    (fileEntries, title) = parseIdxFile(idxPath)
+  except IOError:
+    rstMessage(s.filenames, s.msgHandler, s.idxImports[origFilename].fromInfo,
+               meCannotOpenFile, idxPath)
+  except ValueError as e:
+    s.msgHandler(idxPath, LineRstInit, ColRstInit, meInvalidField, e.msg)
+
+  var isMarkup = false  # for sanity check to avoid mixing .md <-> .nim
+  for entry in fileEntries:
+    # Though target .idx already has inside it the path to HTML relative
+    # project's root, we won't rely on it and use `linkRelPath` instead.
+    let refn = extractLinkEnd(entry.link)
+    # select either markup (rst/md) or Nim cases:
+    if entry.kind in {ieMarkupTitle, ieNimTitle}:
+      s.idxImports[origFilename].title = entry.keyword
+    case entry.kind
+    of ieIdxRole, ieHeading, ieMarkupTitle:
+      if ext == ".nim" and entry.kind == ieMarkupTitle:
+        rstMessage(s, idxPath, meInvalidField,
+                   $ieMarkupTitle & " in supposedly .nim-derived file")
+      if entry.kind == ieMarkupTitle:
+        isMarkup = true
+      info.line = entry.line.uint16
+      addAnchorExtRst(s, key = entry.keyword, refn = refn,
+                      anchorType = headlineAnchor, info=info)
+    of ieNim, ieNimGroup, ieNimTitle:
+      if ext in [".md", ".rst"] or isMarkup:
+        rstMessage(s, idxPath, meInvalidField,
+                   $entry.kind & " in supposedly markup-derived file")
+      s.nimFileImported = true
+      var langSym: LangSymbol
+      if entry.kind in {ieNim, ieNimTitle}:
+        var q: RstParser
+        initParser(q, s)
+        info.line = entry.line.uint16
+        setLen(q.tok, 0)
+        q.idx = 0
+        getTokens(entry.linkTitle, q.tok)
+        var sons = newSeq[PRstNode](q.tok.len)
+        for i in 0 ..< q.tok.len: sons[i] = newLeaf(q.tok[i].symbol)
+        let linkTitle = newRstNode(rnInner, sons)
+        langSym = linkTitle.toLangSymbol
+      else:  # entry.kind == ieNimGroup
+        langSym = langSymbolGroup(kind=entry.linkTitle, name=entry.keyword)
+      addAnchorNim(s, external = true, refn = refn, tooltip = entry.linkDesc,
+                   langSym = langSym, priority = -4, # lowest
+                   info=info)
+  doAssert s.idxImports[origFilename].title != ""
+
+proc preparePass2*(s: var PRstSharedState, mainNode: PRstNode, importdoc = true) =
   ## Records titles in node `mainNode` and orders footnotes.
   countTitles(s, mainNode)
   fixHeadlines(s)
   orderFootnotes(s)
+  if importdoc:
+    for origFilename in s.idxImports.keys:
+      loadIdxFile(s, origFilename)
 
 proc resolveLink(s: PRstSharedState, n: PRstNode) : PRstNode =
     # Associate this link alias with its target and change node kind to
@@ -3423,6 +3580,9 @@ proc resolveLink(s: PRstSharedState, n: PRstNode) : PRstNode =
       tooltip: string
       target: PRstNode
       info: TLineInfo
+      externFilename: string
+        # when external anchor: origin filename where anchor was defined
+      isTitle: bool
     proc cmp(x, y: LinkDef): int =
       result = cmp(x.priority, y.priority)
       if result == 0:
@@ -3435,26 +3595,67 @@ proc resolveLink(s: PRstSharedState, n: PRstNode) : PRstNode =
                              target: y.value, info: y.info,
                              tooltip: "(" & $y.kind & ")")
     let substRst = findMainAnchorRst(s, alias.addNodes, n.info)
+    template getExternFilename(subst: AnchorSubst): string =
+      if subst.kind == arExternalRst or
+          (subst.kind == arNim and subst.external):
+        getFilename(s, subst)
+      else: ""
     for subst in substRst:
-      foundLinks.add LinkDef(ar: arInternalRst, priority: subst.priority,
-                             target: newLeaf(subst.target.anchor),
+      var refname, fullRefname: string
+      if subst.kind == arInternalRst:
+        refname = subst.target.anchor
+        fullRefname = refname
+      else:  # arExternalRst
+        refname = subst.refnameExt
+        fullRefname = s.idxImports[getFilename(s, subst)].linkRelPath &
+                        "/" & refname
+      let anchorType =
+        if subst.kind == arInternalRst: subst.anchorType
+        else: subst.anchorTypeExt  # arExternalRst
+      foundLinks.add LinkDef(ar: subst.kind, priority: subst.priority,
+                             target: newLeaf(fullRefname),
                              info: subst.info,
-                             tooltip: "(" & $subst.anchorType & ")")
+                             externFilename: getExternFilename(subst),
+                             isTitle: isDocumentationTitle(refname),
+                             tooltip: "(" & $anchorType & ")")
     # find anchors automatically generated from Nim symbols
-    if roNimFile in s.options:
+    if roNimFile in s.options or s.nimFileImported:
       let substNim = findMainAnchorNim(s, signature=alias, n.info)
       for subst in substNim:
-        foundLinks.add LinkDef(ar: arNim, priority: subst.priority,
-                               target: newLeaf(subst.refname),
+        let fullRefname =
+          if subst.external:
+            s.idxImports[getFilename(s, subst)].linkRelPath &
+                "/" & subst.refname
+          else: subst.refname
+        foundLinks.add LinkDef(ar: subst.kind, priority: subst.priority,
+                               target: newLeaf(fullRefname),
+                               externFilename: getExternFilename(subst),
+                               isTitle: isDocumentationTitle(subst.refname),
                                info: subst.info, tooltip: subst.tooltip)
     foundLinks.sort(cmp = cmp, order = Descending)
     let aliasStr = addNodes(alias)
     if foundLinks.len >= 1:
-      let kind = if foundLinks[0].ar == arHyperlink: rnHyperlink
-                 elif foundLinks[0].ar == arNim: rnNimdocRef
+      if foundLinks[0].externFilename != "":
+        s.idxImports[foundLinks[0].externFilename].used = true
+      let kind = if foundLinks[0].ar in {arHyperlink, arExternalRst}: rnHyperlink
+                 elif foundLinks[0].ar == arNim:
+                   if foundLinks[0].externFilename == "": rnNimdocRef
+                   else: rnHyperlink
                  else: rnInternalRef
       result = newRstNode(kind)
-      result.sons = @[newRstNode(rnInner, desc.sons), foundLinks[0].target]
+      let documentName =  # filename without ext for `.nim`, title for `.md`
+        if foundLinks[0].ar == arNim:
+          changeFileExt(foundLinks[0].externFilename.extractFilename, "")
+        elif foundLinks[0].externFilename != "":
+          s.idxImports[foundLinks[0].externFilename].title
+        else: foundLinks[0].externFilename.extractFilename
+      let linkText =
+        if foundLinks[0].externFilename != "":
+          if foundLinks[0].isTitle: newLeaf(addNodes(desc))
+          else: newLeaf(documentName & ": " & addNodes(desc))
+        else:
+          newRstNode(rnInner, desc.sons)
+      result.sons = @[linkText, foundLinks[0].target]
       if kind == rnNimdocRef: result.tooltip = foundLinks[0].tooltip
       if foundLinks.len > 1:  # report ambiguous link
         var targets = newSeq[string]()
@@ -3568,20 +3769,28 @@ proc resolveSubs*(s: PRstSharedState, n: PRstNode): PRstNode =
           inc i
       result.sons = newSons
 
+proc completePass2*(s: PRstSharedState) =
+  for (filename, importdocInfo) in s.idxImports.pairs:
+    if not importdocInfo.used:
+      rstMessage(s.filenames, s.msgHandler, importdocInfo.fromInfo,
+                 mwUnusedImportdoc, filename)
+
 proc rstParse*(text, filename: string,
                line, column: int,
                options: RstParseOptions,
                findFile: FindFileHandler = nil,
+               findRefFile: FindRefFileHandler = nil,
                msgHandler: MsgHandler = nil):
               tuple[node: PRstNode, filenames: RstFileTable, hasToc: bool] =
   ## Parses the whole `text`. The result is ready for `rstgen.renderRstToOut`,
   ## note that 2nd tuple element should be fed to `initRstGenerator`
   ## argument `filenames` (it is being filled here at least with `filename`
   ## and possibly with other files from RST ``.. include::`` statement).
-  var sharedState = newRstSharedState(options, filename, findFile,
+  var sharedState = newRstSharedState(options, filename, findFile, findRefFile,
                                       msgHandler, hasToc=false)
   let unresolved = rstParsePass1(text, line, column, sharedState)
   preparePass2(sharedState, unresolved)
   result.node = resolveSubs(sharedState, unresolved)
+  completePass2(sharedState)
   result.filenames = sharedState.filenames
   result.hasToc = sharedState.hasToc
diff --git a/lib/packages/docutils/rstgen.nim b/lib/packages/docutils/rstgen.nim
index 597ee1553..57bc00fcb 100644
--- a/lib/packages/docutils/rstgen.nim
+++ b/lib/packages/docutils/rstgen.nim
@@ -39,7 +39,8 @@
 ##   No backreferences are generated since finding all references of a footnote
 ##   can be done by simply searching for ``[footnoteName]``.
 
-import strutils, os, hashes, strtabs, rstast, rst, highlite, tables, sequtils,
+import strutils, os, hashes, strtabs, rstast, rst, rstidx,
+  highlite, tables, sequtils,
   algorithm, parseutils, std/strbasics
 
 
@@ -59,7 +60,7 @@ type
     outLatex            # output is Latex
 
   MetaEnum* = enum
-    metaNone, metaTitle, metaSubtitle, metaAuthor, metaVersion
+    metaNone, metaTitleRaw, metaTitle, metaSubtitle, metaAuthor, metaVersion
 
   EscapeMode* = enum  # in Latex text inside options [] and URLs is
                       # escaped slightly differently than in normal text
@@ -321,31 +322,8 @@ proc renderAux(d: PDoc, n: PRstNode, html, tex: string, result: var string) =
 
 # ---------------- index handling --------------------------------------------
 
-proc quoteIndexColumn(text: string): string =
-  ## Returns a safe version of `text` for serialization to the ``.idx`` file.
-  ##
-  ## The returned version can be put without worries in a line based tab
-  ## separated column text file. The following character sequence replacements
-  ## will be performed for that goal:
-  ##
-  ## * ``"\\"`` => ``"\\\\"``
-  ## * ``"\n"`` => ``"\\n"``
-  ## * ``"\t"`` => ``"\\t"``
-  result = newStringOfCap(text.len + 3)
-  for c in text:
-    case c
-    of '\\': result.add "\\"
-    of '\L': result.add "\\n"
-    of '\C': discard
-    of '\t': result.add "\\t"
-    else: result.add c
-
-proc unquoteIndexColumn(text: string): string =
-  ## Returns the unquoted version generated by ``quoteIndexColumn``.
-  result = text.multiReplace(("\\t", "\t"), ("\\n", "\n"), ("\\\\", "\\"))
-
-proc setIndexTerm*(d: var RstGenerator, htmlFile, id, term: string,
-                   linkTitle, linkDesc = "") =
+proc setIndexTerm*(d: var RstGenerator; k: IndexEntryKind, htmlFile, id, term: string,
+                   linkTitle, linkDesc = "", line = 0) =
   ## Adds a `term` to the index using the specified hyperlink identifier.
   ##
   ## A new entry will be added to the index using the format
@@ -368,21 +346,8 @@ proc setIndexTerm*(d: var RstGenerator, htmlFile, id, term: string,
   ## <#writeIndexFile,RstGenerator,string>`_. The purpose of the index is
   ## documented in the `docgen tools guide
   ## <docgen.html#related-options-index-switch>`_.
-  var
-    entry = term
-    isTitle = false
-  entry.add('\t')
-  entry.add(htmlFile)
-  if id.len > 0:
-    entry.add('#')
-    entry.add(id)
-  else:
-    isTitle = true
-  if linkTitle.len > 0 or linkDesc.len > 0:
-    entry.add('\t' & linkTitle.quoteIndexColumn)
-    entry.add('\t' & linkDesc.quoteIndexColumn)
-  entry.add("\n")
-
+  let (entry, isTitle) = formatIndexEntry(k, htmlFile, id, term,
+                                          linkTitle, linkDesc, line)
   if isTitle: d.theIndex.insert(entry)
   else: d.theIndex.add(entry)
 
@@ -395,6 +360,15 @@ proc hash(n: PRstNode): int =
       result = result !& hash(n.sons[i])
     result = !$result
 
+proc htmlFileRelPath(d: PDoc): string =
+  if d.outDir.len == 0:
+    # /foo/bar/zoo.nim -> zoo.html
+    changeFileExt(extractFilename(d.filename), HtmlExt)
+  else: # d is initialized in docgen.nim
+    # outDir   = /foo              -\
+    # destFile = /foo/bar/zoo.html -|-> bar/zoo.html
+    d.destFile.relativePath(d.outDir, '/')
+
 proc renderIndexTerm*(d: PDoc, n: PRstNode, result: var string) =
   ## Renders the string decorated within \`foobar\`\:idx\: markers.
   ##
@@ -411,17 +385,12 @@ proc renderIndexTerm*(d: PDoc, n: PRstNode, result: var string) =
 
   var term = ""
   renderAux(d, n, term)
-  setIndexTerm(d, changeFileExt(extractFilename(d.filename), HtmlExt), id, term, d.currentSection)
+  setIndexTerm(d, ieIdxRole,
+  htmlFileRelPath(d), id, term, d.currentSection)
   dispA(d.target, result, "<span id=\"$1\">$2</span>", "\\nimindexterm{$1}{$2}",
         [id, term])
 
 type
-  IndexEntry* = object
-    keyword*: string
-    link*: string
-    linkTitle*: string ## contains a prettier text for the href
-    linkDesc*: string ## the title attribute of the final href
-
   IndexedDocs* = Table[IndexEntry, seq[IndexEntry]] ## \
     ## Contains the index sequences for doc types.
     ##
@@ -432,21 +401,6 @@ type
     ## The value indexed by this IndexEntry is a sequence with the real index
     ## entries found in the ``.idx`` file.
 
-proc cmp(a, b: IndexEntry): int =
-  ## Sorts two ``IndexEntry`` first by `keyword` field, then by `link`.
-  result = cmpIgnoreStyle(a.keyword, b.keyword)
-  if result == 0:
-    result = cmpIgnoreStyle(a.link, b.link)
-
-proc hash(x: IndexEntry): Hash =
-  ## Returns the hash for the combined fields of the type.
-  ##
-  ## The hash is computed as the chained hash of the individual string hashes.
-  result = x.keyword.hash !& x.link.hash
-  result = result !& x.linkTitle.hash
-  result = result !& x.linkDesc.hash
-  result = !$result
-
 when defined(gcDestructors):
   template `<-`(a, b: var IndexEntry) = a = move(b)
 else:
@@ -455,6 +409,7 @@ else:
     shallowCopy a.link, b.link
     shallowCopy a.linkTitle, b.linkTitle
     shallowCopy a.linkDesc, b.linkDesc
+    shallowCopy a.module, b.module
 
 proc sortIndex(a: var openArray[IndexEntry]) =
   # we use shellsort here; fast and simple
@@ -494,16 +449,20 @@ proc generateSymbolIndex(symbols: seq[IndexEntry]): string =
   result = "<dl>"
   var i = 0
   while i < symbols.len:
-    let keyword = symbols[i].keyword
+    let keyword = esc(outHtml, symbols[i].keyword)
     let cleanedKeyword = keyword.escapeLink
     result.addf("<dt><a name=\"$2\" href=\"#$2\"><span>$1:</span></a></dt><dd><ul class=\"simple\">\n",
                 [keyword, cleanedKeyword])
     var j = i
-    while j < symbols.len and keyword == symbols[j].keyword:
+    while j < symbols.len and symbols[i].keyword == symbols[j].keyword:
       let
         url = symbols[j].link.escapeLink
-        text = if symbols[j].linkTitle.len > 0: symbols[j].linkTitle else: url
-        desc = if symbols[j].linkDesc.len > 0: symbols[j].linkDesc else: ""
+        module = symbols[j].module
+        text =
+          if symbols[j].linkTitle.len > 0:
+            esc(outHtml, module & ": " & symbols[j].linkTitle)
+          else: url
+        desc = symbols[j].linkDesc
       if desc.len > 0:
         result.addf("""<li><a class="reference external"
           title="$3" data-doc-search-tag="$2" href="$1">$2</a></li>
@@ -517,13 +476,6 @@ proc generateSymbolIndex(symbols: seq[IndexEntry]): string =
     i = j
   result.add("</dl>")
 
-proc isDocumentationTitle(hyperlink: string): bool =
-  ## Returns true if the hyperlink is actually a documentation title.
-  ##
-  ## Documentation titles lack the hash. See `mergeIndexes()
-  ## <#mergeIndexes,string>`_ for a more detailed explanation.
-  result = hyperlink.find('#') < 0
-
 proc stripTocLevel(s: string): tuple[level: int, text: string] =
   ## Returns the *level* of the toc along with the text without it.
   for c in 0 ..< s.len:
@@ -557,17 +509,15 @@ proc generateDocumentationToc(entries: seq[IndexEntry]): string =
     level = 1
   levels.newSeq(entries.len)
   for entry in entries:
-    let (rawLevel, rawText) = stripTocLevel(entry.linkTitle or entry.keyword)
+    let (rawLevel, rawText) = stripTocLevel(entry.linkTitle)
     if rawLevel < 1:
       # This is a normal symbol, push it *inside* one level from the last one.
       levels[L].level = level + 1
-      # Also, ignore the linkTitle and use directly the keyword.
-      levels[L].text = entry.keyword
     else:
       # The level did change, update the level indicator.
       level = rawLevel
       levels[L].level = rawLevel
-      levels[L].text = rawText
+    levels[L].text = rawText
     inc L
 
   # Now generate hierarchical lists based on the precalculated levels.
@@ -598,7 +548,7 @@ proc generateDocumentationIndex(docs: IndexedDocs): string =
   for title in titles:
     let tocList = generateDocumentationToc(docs.getOrDefault(title))
     result.add("<ul><li><a href=\"" &
-      title.link & "\">" & title.keyword & "</a>\n" & tocList & "</li></ul>\n")
+      title.link & "\">" & title.linkTitle & "</a>\n" & tocList & "</li></ul>\n")
 
 proc generateDocumentationJumps(docs: IndexedDocs): string =
   ## Returns a plain list of hyperlinks to documentation TOCs in HTML.
@@ -610,7 +560,7 @@ proc generateDocumentationJumps(docs: IndexedDocs): string =
 
   var chunks: seq[string] = @[]
   for title in titles:
-    chunks.add("<a href=\"" & title.link & "\">" & title.keyword & "</a>")
+    chunks.add("<a href=\"" & title.link & "\">" & title.linkTitle & "</a>")
 
   result.add(chunks.join(", ") & ".<br/>")
 
@@ -639,39 +589,11 @@ proc readIndexDir*(dir: string):
   # Scan index files and build the list of symbols.
   for path in walkDirRec(dir):
     if path.endsWith(IndexExt):
-      var
-        fileEntries: seq[IndexEntry]
-        title: IndexEntry
-        f = 0
-      newSeq(fileEntries, 500)
-      setLen(fileEntries, 0)
-      for line in lines(path):
-        let s = line.find('\t')
-        if s < 0: continue
-        setLen(fileEntries, f+1)
-        fileEntries[f].keyword = line.substr(0, s-1)
-        fileEntries[f].link = line.substr(s+1)
-        # See if we detect a title, a link without a `#foobar` trailing part.
-        if title.keyword.len == 0 and fileEntries[f].link.isDocumentationTitle:
-          title.keyword = fileEntries[f].keyword
-          title.link = fileEntries[f].link
-
-        if fileEntries[f].link.find('\t') > 0:
-          let extraCols = fileEntries[f].link.split('\t')
-          fileEntries[f].link = extraCols[0]
-          assert extraCols.len == 3
-          fileEntries[f].linkTitle = extraCols[1].unquoteIndexColumn
-          fileEntries[f].linkDesc = extraCols[2].unquoteIndexColumn
-        else:
-          fileEntries[f].linkTitle = ""
-          fileEntries[f].linkDesc = ""
-        inc f
+      var (fileEntries, title) = parseIdxFile(path)
       # Depending on type add this to the list of symbols or table of APIs.
-      if title.keyword.len == 0:
-        for i in 0 ..< f:
-          # Don't add to symbols TOC entries (they start with a whitespace).
-          let toc = fileEntries[i].linkTitle
-          if toc.len > 0 and toc[0] == ' ':
+      if title.kind == ieNimTitle:
+        for i in 0 ..< fileEntries.len:
+          if fileEntries[i].kind != ieNim:
             continue
           # Ok, non TOC entry, add it.
           setLen(result.symbols, L + 1)
@@ -687,7 +609,7 @@ proc readIndexDir*(dir: string):
             result.modules.add(x.changeFileExt(""))
       else:
         # Generate the symbolic anchor for index quickjumps.
-        title.linkTitle = "doc_toc_" & $result.docs.len
+        title.aux = "doc_toc_" & $result.docs.len
         result.docs[title] = fileEntries
 
 proc mergeIndexes*(dir: string): string =
@@ -747,24 +669,6 @@ proc mergeIndexes*(dir: string): string =
 
 # ----------------------------------------------------------------------------
 
-proc stripTocHtml(s: string): string =
-  ## Ugly quick hack to remove HTML tags from TOC titles.
-  ##
-  ## A TocEntry.header field already contains rendered HTML tags. Instead of
-  ## implementing a proper version of renderRstToOut() which recursively
-  ## renders an rst tree to plain text, we simply remove text found between
-  ## angled brackets. Given the limited possibilities of rst inside TOC titles
-  ## this should be enough.
-  result = s
-  var first = result.find('<')
-  while first >= 0:
-    let last = result.find('>', first)
-    if last < 0:
-      # Abort, since we didn't found a closing angled bracket.
-      return
-    result.delete(first..last)
-    first = result.find('<', first)
-
 proc renderHeadline(d: PDoc, n: PRstNode, result: var string) =
   var tmp = ""
   for i in countup(0, len(n) - 1): renderRstToOut(d, n.sons[i], tmp)
@@ -785,19 +689,12 @@ proc renderHeadline(d: PDoc, n: PRstNode, result: var string) =
 
   # Generate index entry using spaces to indicate TOC level for the output HTML.
   assert n.level >= 0
-  let
-    htmlFileRelPath = if d.outDir.len == 0:
-                        # /foo/bar/zoo.nim -> zoo.html
-                        changeFileExt(extractFilename(d.filename), HtmlExt)
-                      else: # d is initialized in docgen.nim
-                        # outDir   = /foo              -\
-                        # destFile = /foo/bar/zoo.html -|-> bar/zoo.html
-                        d.destFile.relativePath(d.outDir, '/')
-  setIndexTerm(d, htmlFileRelPath, n.anchor, tmp.stripTocHtml,
-    spaces(max(0, n.level)) & tmp)
+  setIndexTerm(d, ieHeading, htmlFile = d.htmlFileRelPath, id = n.anchor,
+               term = n.addNodes, linkTitle = spaces(max(0, n.level)) & tmp)
 
 proc renderOverline(d: PDoc, n: PRstNode, result: var string) =
   if n.level == 0 and d.meta[metaTitle].len == 0:
+    d.meta[metaTitleRaw] = n.addNodes
     for i in countup(0, len(n)-1):
       renderRstToOut(d, n.sons[i], d.meta[metaTitle])
     d.currentSection = d.meta[metaTitle]
@@ -813,6 +710,8 @@ proc renderOverline(d: PDoc, n: PRstNode, result: var string) =
     dispA(d.target, result, "<h$1$2><center>$3</center></h$1>",
                    "\\rstov$4[$5]{$3}$2\n", [$n.level,
                    n.anchor.idS, tmp, $chr(n.level - 1 + ord('A')), tocName])
+    setIndexTerm(d, ieHeading, htmlFile = d.htmlFileRelPath, id = n.anchor,
+                 term = n.addNodes, linkTitle = spaces(max(0, n.level)) & tmp)
 
 proc renderTocEntry(d: PDoc, n: PRstNode, result: var string) =
   var header = ""
@@ -1197,6 +1096,18 @@ proc renderHyperlink(d: PDoc, text, link: PRstNode, result: var string,
       "\\hyperlink{$2}{$1} (p.~\\pageref{$2})",
       [textStr, linkStr, nimDocStr, tooltipStr])
 
+proc traverseForIndex*(d: PDoc, n: PRstNode) =
+  ## A version of [renderRstToOut] that only fills entries for ``.idx`` files.
+  var discarded: string
+  if n == nil: return
+  case n.kind
+  of rnIdx: renderIndexTerm(d, n, discarded)
+  of rnHeadline, rnMarkdownHeadline: renderHeadline(d, n, discarded)
+  of rnOverline: renderOverline(d, n, discarded)
+  else:
+    for i in 0 ..< len(n):
+      traverseForIndex(d, n.sons[i])
+
 proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) =
   if n == nil: return
   case n.kind
@@ -1451,6 +1362,7 @@ proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) =
   of rnTitle:
     d.meta[metaTitle] = ""
     renderRstToOut(d, n.sons[0], d.meta[metaTitle])
+    d.meta[metaTitleRaw] = n.sons[0].addNodes
 
 # -----------------------------------------------------------------------------
 
@@ -1616,11 +1528,13 @@ proc rstToHtml*(s: string, options: RstParseOptions,
   proc myFindFile(filename: string): string =
     # we don't find any files in online mode:
     result = ""
+  proc myFindRefFile(filename: string): (string, string) =
+    result = ("", "")
 
   const filen = "input"
   let (rst, filenames, t) = rstParse(s, filen,
                                      line=LineRstInit, column=ColRstInit,
-                                     options, myFindFile, msgHandler)
+                                     options, myFindFile, myFindRefFile, msgHandler)
   var d: RstGenerator
   initRstGenerator(d, outHtml, config, filen, myFindFile, msgHandler,
                    filenames, hasToc = t)
diff --git a/lib/packages/docutils/rstidx.nim b/lib/packages/docutils/rstidx.nim
new file mode 100644
index 000000000..c109636d7
--- /dev/null
+++ b/lib/packages/docutils/rstidx.nim
@@ -0,0 +1,138 @@
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2022 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+
+## Nim `idx`:idx: file format related definitions.
+
+import strutils, std/syncio, hashes
+from os import splitFile
+
+type
+  IndexEntryKind* = enum ## discriminator tag
+    ieMarkupTitle = "markupTitle"
+                           ## RST/Markdown title, text in `keyword` +
+                           ## HTML text in `linkTitle`
+    ieNimTitle = "nimTitle"
+                           ## Nim title
+    ieHeading = "heading"  ## RST/Markdown markup heading, escaped
+    ieIdxRole = "idx"      ## RST :idx: definition, escaped
+    ieNim = "nim"          ## Nim symbol, unescaped
+    ieNimGroup = "nimgrp"  ## Nim overload group, unescaped
+  IndexEntry* = object
+    kind*: IndexEntryKind  ## 0.
+    keyword*: string       ## 1.
+    link*: string          ## 2.
+    linkTitle*: string     ## 3. contains a prettier text for the href
+    linkDesc*: string      ## 4. the title attribute of the final href
+    line*: int             ## 5.
+    module*: string        ## origin file, NOT a field in ``.idx`` file
+    aux*: string           ## auxuliary field, NOT a field in ``.idx`` file
+
+proc isDocumentationTitle*(hyperlink: string): bool =
+  ## Returns true if the hyperlink is actually a documentation title.
+  ##
+  ## Documentation titles lack the hash. See `mergeIndexes()
+  ## <#mergeIndexes,string>`_ for a more detailed explanation.
+  result = hyperlink.find('#') < 0
+
+proc `$`*(e: IndexEntry): string =
+  """("$1", "$2", "$3", "$4", $5)""" % [
+      e.keyword, e.link, e.linkTitle, e.linkDesc, $e.line]
+
+proc quoteIndexColumn(text: string): string =
+  ## Returns a safe version of `text` for serialization to the ``.idx`` file.
+  ##
+  ## The returned version can be put without worries in a line based tab
+  ## separated column text file. The following character sequence replacements
+  ## will be performed for that goal:
+  ##
+  ## * ``"\\"`` => ``"\\\\"``
+  ## * ``"\n"`` => ``"\\n"``
+  ## * ``"\t"`` => ``"\\t"``
+  result = newStringOfCap(text.len + 3)
+  for c in text:
+    case c
+    of '\\': result.add "\\"
+    of '\L': result.add "\\n"
+    of '\C': discard
+    of '\t': result.add "\\t"
+    else: result.add c
+
+proc unquoteIndexColumn*(text: string): string =
+  ## Returns the unquoted version generated by ``quoteIndexColumn``.
+  result = text.multiReplace(("\\t", "\t"), ("\\n", "\n"), ("\\\\", "\\"))
+
+proc formatIndexEntry*(kind: IndexEntryKind; htmlFile, id, term, linkTitle,
+                       linkDesc: string, line: int):
+                      tuple[entry: string, isTitle: bool] =
+  result.entry = $kind
+  result.entry.add('\t')
+  result.entry.add term
+  result.entry.add('\t')
+  result.entry.add(htmlFile)
+  if id.len > 0:
+    result.entry.add('#')
+    result.entry.add(id)
+    result.isTitle = false
+  else:
+    result.isTitle = true
+  result.entry.add('\t' & linkTitle.quoteIndexColumn)
+  result.entry.add('\t' & linkDesc.quoteIndexColumn)
+  result.entry.add('\t' & $line)
+  result.entry.add("\n")
+
+proc parseIndexEntryKind(s: string): IndexEntryKind =
+  result = case s:
+    of "nim": ieNim
+    of "nimgrp": ieNimGroup
+    of "heading": ieHeading
+    of "idx": ieIdxRole
+    of "nimTitle": ieNimTitle
+    of "markupTitle": ieMarkupTitle
+    else: raise newException(ValueError, "unknown index entry value $1" % [s])
+
+proc parseIdxFile*(path: string):
+    tuple[fileEntries: seq[IndexEntry], title: IndexEntry] =
+  var
+    f = 0
+  newSeq(result.fileEntries, 500)
+  setLen(result.fileEntries, 0)
+  let (_, base, _) = path.splitFile
+  for line in lines(path):
+    let s = line.find('\t')
+    if s < 0: continue
+    setLen(result.fileEntries, f+1)
+    let cols = line.split('\t')
+    result.fileEntries[f].kind = parseIndexEntryKind(cols[0])
+    result.fileEntries[f].keyword = cols[1]
+    result.fileEntries[f].link = cols[2]
+    if result.title.keyword.len == 0:
+      result.fileEntries[f].module = base
+    else:
+      result.fileEntries[f].module = result.title.keyword
+
+    result.fileEntries[f].linkTitle = cols[3].unquoteIndexColumn
+    result.fileEntries[f].linkDesc = cols[4].unquoteIndexColumn
+    result.fileEntries[f].line = parseInt(cols[5])
+
+    if result.fileEntries[f].kind in {ieNimTitle, ieMarkupTitle}:
+      result.title = result.fileEntries[f]
+    inc f
+
+proc cmp*(a, b: IndexEntry): int =
+  ## Sorts two ``IndexEntry`` first by `keyword` field, then by `link`.
+  result = cmpIgnoreStyle(a.keyword, b.keyword)
+  if result == 0:
+    result = cmpIgnoreStyle(a.link, b.link)
+
+proc hash*(x: IndexEntry): Hash =
+  ## Returns the hash for the combined fields of the type.
+  ##
+  ## The hash is computed as the chained hash of the individual string hashes.
+  result = x.keyword.hash !& x.link.hash
+  result = result !& x.linkTitle.hash
+  result = result !& x.linkDesc.hash
+  result = !$result
author	Andrey Makarov <ph.makarov@gmail.com>	2023-01-04 23:19:01 +0300
committer	GitHub <noreply@github.com>	2023-01-04 15:19:01 -0500
commit	2620da9bf9994d09da3d5b31b29c95855fb79a41 (patch)
tree	0a83bee41e6f6bb6e28eba1dda06321d9b1e00c6 /lib/packages/docutils
parent	b2328b44baaced7a89e897332a66dac4a975efe8 (diff)
download	Nim-2620da9bf9994d09da3d5b31b29c95855fb79a41.tar.gz