diff options
author | Andrey Makarov <ph.makarov@gmail.com> | 2023-01-04 23:19:01 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-01-04 15:19:01 -0500 |
commit | 2620da9bf9994d09da3d5b31b29c95855fb79a41 (patch) | |
tree | 0a83bee41e6f6bb6e28eba1dda06321d9b1e00c6 /lib/packages/docutils | |
parent | b2328b44baaced7a89e897332a66dac4a975efe8 (diff) | |
download | Nim-2620da9bf9994d09da3d5b31b29c95855fb79a41.tar.gz |
docgen: implement cross-document links (#20990)
* docgen: implement cross-document links Fully implements https://github.com/nim-lang/RFCs/issues/125 Follow-up of: https://github.com/nim-lang/Nim/pull/18642 (for internal links) and https://github.com/nim-lang/Nim/issues/20127. Overview -------- Explicit import-like directive is required, called `.. importdoc::`. (the syntax is % RST, Markdown will use it for a while). Then one can reference any symbols/headings/anchors, as if they were in the local file (but they will be prefixed with a module name or markup document in link text). It's possible to reference anything from anywhere (any direction in `.nim`/`.md`/`.rst` files). See `doc/docgen.md` for full description. Working is based on `.idx` files, hence one needs to generate all `.idx` beforehand. A dedicated option `--index:only` is introduced (and a separate stage for `--index:only` is added to `kochdocs.nim`). Performance note ---------------- Full run for `./koch docs` now takes 185% of the time before this PR. (After: 315 s, before: 170 s on my PC). All the time seems to be spent on `--index:only` run, which takes almost as much (85%) of normal doc run -- it seems that most time is spent on file parsing, turning off HTML generation phase has not helped much. (One could avoid it by specifying list of files that can be referenced and pre-processing only them. But it can become error-prone and I assume that these linke will be **everywhere** in the repository anyway, especially considering https://github.com/nim-lang/RFCs/issues/478. So every `.nim`/`.md` file is processed for `.idx` first). But that's all without significant part of repository converted to cross-module auto links. To estimate impact I checked the time for `doc`ing a few files (after all indexes have been generated), and everywhere difference was **negligible**. E.g. for `lib/std/private/osfiles.nim` that `importdoc`s large `os.idx` and hence should have been a case with relatively large performance impact, but: * After: 0.59 s. * Before: 0.59 s. So Nim compiler works so slow that doc part basically does not matter :-) Testing ------- 1) added `extlinks` test to `nimdoc/` 2) checked that `theindex.html` is still correct 2) fixed broken auto-links for modules that were derived from `os.nim` by adding appropriate ``importdoc`` Implementation note ------------------- Parsing and formating of `.idx` entries is moved into a dedicated `rstidx.nim` module from `rstgen.nim`. `.idx` file format changed: * fields are not escaped in most cases because we need original strings for referencing, not HTML ones (the exception is linkTitle for titles and headings). Escaping happens later -- on the stage of `rstgen` buildIndex, etc. * all lines have fixed number of columns 6 * added discriminator tag as a first column, it always allows distinguish Nim/markup entries, titles/headings, etc. `rstgen` does not rely any more (in most cases) on ad-hoc logic to determine what type each entry is. * there is now always a title entry added at the first line. * add a line number as 6th column * linkTitle (4th) column has a different format: before it was like `module: funcName()`, now it's `proc funcName()`. (This format is also propagated to `theindex.html` and search results, I kept it that way since I like it more though it's discussible.) This column is what used for Nim symbols resolution. * also changed details on column format for headings and titles: "keyword" is original, "linkTitle" is HTML one * fix paths on Windows + more clear code * Update compiler/docgen.nim Co-authored-by: Andreas Rumpf <rumpf_a@web.de> * Handle .md and .nim paths uniformly in findRefFile * handle titles better + more comments * don't allow markup overwrite index title for .nim files Co-authored-by: Andreas Rumpf <rumpf_a@web.de>
Diffstat (limited to 'lib/packages/docutils')
-rw-r--r-- | lib/packages/docutils/dochelpers.nim | 27 | ||||
-rw-r--r-- | lib/packages/docutils/rst.nim | 263 | ||||
-rw-r--r-- | lib/packages/docutils/rstgen.nim | 200 | ||||
-rw-r--r-- | lib/packages/docutils/rstidx.nim | 138 |
4 files changed, 452 insertions, 176 deletions
diff --git a/lib/packages/docutils/dochelpers.nim b/lib/packages/docutils/dochelpers.nim index c7f7f73f5..b7cdd16d2 100644 --- a/lib/packages/docutils/dochelpers.nim +++ b/lib/packages/docutils/dochelpers.nim @@ -13,7 +13,7 @@ ## `type LangSymbol`_ in ``rst.nim``, while `match(generated, docLink)`_ ## matches it with `generated`, produced from `PNode` by ``docgen.rst``. -import rstast +import rstast, strutils when defined(nimPreviewSlimSystem): import std/[assertions, syncio] @@ -35,6 +35,12 @@ type ## name-type seq, e.g. for proc outType*: string ## result type, e.g. for proc +proc `$`*(s: LangSymbol): string = # for debug + ("(symkind=$1, symTypeKind=$2, name=$3, generics=$4, isGroup=$5, " & + "parametersProvided=$6, parameters=$7, outType=$8)") % [ + s.symKind, s.symTypeKind , s.name, s.generics, $s.isGroup, + $s.parametersProvided, $s.parameters, s.outType] + func nimIdentBackticksNormalize*(s: string): string = ## Normalizes the string `s` as a Nim identifier. ## @@ -71,6 +77,12 @@ func nimIdentBackticksNormalize*(s: string): string = else: discard # just omit '`' or ' ' if j != s.len: setLen(result, j) +proc langSymbolGroup*(kind: string, name: string): LangSymbol = + if kind notin ["proc", "func", "macro", "method", "iterator", + "template", "converter"]: + raise newException(ValueError, "unknown symbol kind $1" % [kind]) + result = LangSymbol(symKind: kind, name: name, isGroup: true) + proc toLangSymbol*(linkText: PRstNode): LangSymbol = ## Parses `linkText` into a more structured form using a state machine. ## @@ -82,11 +94,14 @@ proc toLangSymbol*(linkText: PRstNode): LangSymbol = ## ## This proc should be kept in sync with the `renderTypes` proc from ## ``compiler/typesrenderer.nim``. - assert linkText.kind in {rnRstRef, rnInner} + template fail(msg: string) = + raise newException(ValueError, msg) + if linkText.kind notin {rnRstRef, rnInner}: + fail("toLangSymbol: wrong input kind " & $linkText.kind) const NimDefs = ["proc", "func", "macro", "method", "iterator", "template", "converter", "const", "type", "var", - "enum", "object", "tuple"] + "enum", "object", "tuple", "module"] template resolveSymKind(x: string) = if x in ["enum", "object", "tuple"]: result.symKind = "type" @@ -109,11 +124,11 @@ proc toLangSymbol*(linkText: PRstNode): LangSymbol = template flushIdent() = if curIdent != "": case state - of inBeginning: doAssert false, "incorrect state inBeginning" + of inBeginning: fail("incorrect state inBeginning") of afterSymKind: resolveSymKind curIdent - of beforeSymbolName: doAssert false, "incorrect state beforeSymbolName" + of beforeSymbolName: fail("incorrect state beforeSymbolName") of atSymbolName: result.name = curIdent.nimIdentBackticksNormalize - of afterSymbolName: doAssert false, "incorrect state afterSymbolName" + of afterSymbolName: fail("incorrect state afterSymbolName") of genericsPar: result.generics = curIdent of parameterName: result.parameters.add (curIdent, "") of parameterType: diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim index e9f76a26f..b3b1fb9c0 100644 --- a/lib/packages/docutils/rst.nim +++ b/lib/packages/docutils/rst.nim @@ -22,7 +22,7 @@ import os, strutils, rstast, dochelpers, std/enumutils, algorithm, lists, sequtils, - std/private/miscdollars, tables, strscans + std/private/miscdollars, tables, strscans, rstidx from highlite import SourceLanguage, getSourceLanguage when defined(nimPreviewSlimSystem): @@ -40,7 +40,7 @@ type roNimFile ## set for Nim files where default interpreted ## text role should be :nim: roSandboxDisabled ## this option enables certain options - ## (e.g. raw, include) + ## (e.g. raw, include, importdoc) ## which are disabled by default as they can ## enable users to read arbitrary data and ## perform XSS if the parser is used in a web @@ -73,11 +73,17 @@ type mwUnsupportedLanguage = "language '$1' not supported", mwUnsupportedField = "field '$1' not supported", mwRstStyle = "RST style: $1", + mwUnusedImportdoc = "importdoc for '$1' is not used", meSandboxedDirective = "disabled directive: '$1'", MsgHandler* = proc (filename: string, line, col: int, msgKind: MsgKind, arg: string) {.closure, gcsafe.} ## what to do in case of an error FindFileHandler* = proc (filename: string): string {.closure, gcsafe.} + FindRefFileHandler* = + proc (targetRelPath: string): + tuple[targetPath: string, linkRelPath: string] {.closure, gcsafe.} + ## returns where .html or .idx file should be found by its relative path; + ## `linkRelPath` is a prefix to be added before a link anchor from such file proc rstnodeToRefname*(n: PRstNode): string proc addNodes*(n: PRstNode): string @@ -333,7 +339,8 @@ type arInternalRst, ## For automatically generated RST anchors (from ## headings, footnotes, inline internal targets): ## case-insensitive, 1-space-significant (by RST spec) - arNim ## For anchors generated by ``docgen.rst``: Nim-style case + arExternalRst, ## For external .nim doc comments or .rst/.md + arNim ## For anchors generated by ``docgen.nim``: Nim-style case ## sensitivity, etc. (see `proc normalizeNimName`_ for details) arHyperlink, ## For links with manually set anchors in ## form `text <pagename.html#anchor>`_ @@ -349,11 +356,15 @@ type of arInternalRst: anchorType: RstAnchorKind target: PRstNode + of arExternalRst: + anchorTypeExt: RstAnchorKind + refnameExt: string of arNim: tooltip: string # displayed tooltip for Nim-generated anchors langSym: LangSymbol refname: string # A reference name that will be inserted directly # into HTML/Latex. + external: bool AnchorSubstTable = Table[string, seq[AnchorSubst]] # use `seq` to account for duplicate anchors FootnoteType = enum @@ -372,6 +383,12 @@ type RstFileTable* = object filenameToIdx*: Table[string, FileIndex] idxToFilename*: seq[string] + ImportdocInfo = object + used: bool # was this import used? + fromInfo: TLineInfo # place of `.. importdoc::` directive + idxPath: string # full path to ``.idx`` file + linkRelPath: string # prefix before target anchor + title: string # document title obtained from ``.idx`` RstSharedState = object options*: RstParseOptions # parsing options hLevels: LevelMap # hierarchy of heading styles @@ -393,12 +410,17 @@ type footnotes: seq[FootnoteSubst] # correspondence b/w footnote label, # number, order of occurrence msgHandler: MsgHandler # How to handle errors. - findFile: FindFileHandler # How to find files. + findFile: FindFileHandler # How to find files for include. + findRefFile: FindRefFileHandler + # How to find files imported by importdoc. filenames*: RstFileTable # map file name <-> FileIndex (for storing # file names for warnings after 1st stage) currFileIdx*: FileIndex # current index in `filenames` tocPart*: seq[PRstNode] # all the headings of a document hasToc*: bool + idxImports*: Table[string, ImportdocInfo] + # map `importdoc`ed filename -> it's info + nimFileImported*: bool # Was any ``.nim`` module `importdoc`ed ? PRstSharedState* = ref RstSharedState ManualAnchor = object @@ -452,6 +474,9 @@ proc defaultFindFile*(filename: string): string = if fileExists(filename): result = filename else: result = "" +proc defaultFindRefFile*(filename: string): (string, string) = + (filename, "") + proc defaultRole(options: RstParseOptions): string = if roNimFile in options: "nim" else: "literal" @@ -492,12 +517,16 @@ proc getFilename(filenames: RstFileTable, fid: FileIndex): string = $fid.int, $(filenames.len - 1)]) result = filenames.idxToFilename[fid.int] +proc getFilename(s: PRstSharedState, subst: AnchorSubst): string = + getFilename(s.filenames, subst.info.fileIndex) + proc currFilename(s: PRstSharedState): string = getFilename(s.filenames, s.currFileIdx) proc newRstSharedState*(options: RstParseOptions, filename: string, findFile: FindFileHandler, + findRefFile: FindRefFileHandler, msgHandler: MsgHandler, hasToc: bool): PRstSharedState = let r = defaultRole(options) @@ -507,6 +536,9 @@ proc newRstSharedState*(options: RstParseOptions, options: options, msgHandler: if not isNil(msgHandler): msgHandler else: defaultMsgHandler, findFile: if not isNil(findFile): findFile else: defaultFindFile, + findRefFile: + if not isNil(findRefFile): findRefFile + else: defaultFindRefFile, hasToc: hasToc ) setCurrFilename(result, filename) @@ -525,6 +557,14 @@ proc rstMessage(p: RstParser, msgKind: MsgKind, arg: string) = proc rstMessage(s: PRstSharedState, msgKind: MsgKind, arg: string) = s.msgHandler(s.currFilename, LineRstInit, ColRstInit, msgKind, arg) +proc rstMessage(s: PRstSharedState, msgKind: MsgKind, arg: string; + line, col: int) = + s.msgHandler(s.currFilename, line, col, msgKind, arg) + +proc rstMessage(s: PRstSharedState, filename: string, msgKind: MsgKind, + arg: string) = + s.msgHandler(filename, LineRstInit, ColRstInit, msgKind, arg) + proc rstMessage*(filenames: RstFileTable, f: MsgHandler, info: TLineInfo, msgKind: MsgKind, arg: string) = ## Print warnings using `info`, i.e. in 2nd-pass warnings for @@ -756,6 +796,14 @@ proc internalRefPriority(k: RstAnchorKind): int = of footnoteAnchor: result = 4 of headlineAnchor: result = 3 +proc `$`(subst: AnchorSubst): string = # for debug + let s = + case subst.kind + of arInternalRst: "type=" & $subst.anchorType + of arExternalRst: "type=" & $subst.anchorTypeExt + of arNim: "langsym=" & $subst.langSym + result = "(kind=$1, priority=$2, $3)" % [$subst.kind, $subst.priority, s] + proc addAnchorRst(p: var RstParser, name: string, target: PRstNode, anchorType: RstAnchorKind) = ## Associates node `target` (which has field `anchor`) with an @@ -771,31 +819,49 @@ proc addAnchorRst(p: var RstParser, name: string, target: PRstNode, info: prevLineInfo(p), anchorType: anchorType)) p.curAnchors.setLen 0 -proc addAnchorNim*(s: var PRstSharedState, refn: string, tooltip: string, +proc addAnchorExtRst(s: var PRstSharedState, key: string, refn: string, + anchorType: RstAnchorKind, info: TLineInfo) = + let name = key.toLowerAscii + let prio = internalRefPriority(anchorType) + s.anchors.mgetOrPut(name, newSeq[AnchorSubst]()).add( + AnchorSubst(kind: arExternalRst, refnameExt: refn, priority: prio, + info: info, + anchorTypeExt: anchorType)) + +proc addAnchorNim*(s: var PRstSharedState, external: bool, refn: string, tooltip: string, langSym: LangSymbol, priority: int, info: TLineInfo) = ## Adds an anchor `refn`, which follows ## the rule `arNim` (i.e. a symbol in ``*.nim`` file) s.anchors.mgetOrPut(langSym.name, newSeq[AnchorSubst]()).add( - AnchorSubst(kind: arNim, refname: refn, langSym: langSym, + AnchorSubst(kind: arNim, external: external, refname: refn, langSym: langSym, tooltip: tooltip, priority: priority, info: info)) proc findMainAnchorNim(s: PRstSharedState, signature: PRstNode, info: TLineInfo): seq[AnchorSubst] = - let langSym = toLangSymbol(signature) + var langSym: LangSymbol + try: + langSym = toLangSymbol(signature) + except ValueError: # parsing failed, not a Nim symbol + return let substitutions = s.anchors.getOrDefault(langSym.name, newSeq[AnchorSubst]()) if substitutions.len == 0: return - # map symKind (like "proc") -> found symbols/groups: - var found: Table[string, seq[AnchorSubst]] - for s in substitutions: - if s.kind == arNim: - if match(s.langSym, langSym): - found.mgetOrPut(s.langSym.symKind, newSeq[AnchorSubst]()).add s - for symKind, sList in found: + # logic to select only groups instead of concrete symbols + # with overloads, note that the same symbol can be defined + # in multiple modules and `importdoc`ed: + type GroupKey = tuple[symKind: string, origModule: string] + # map (symKind, file) (like "proc", "os.nim") -> found symbols/groups: + var found: Table[GroupKey, seq[AnchorSubst]] + for subst in substitutions: + if subst.kind == arNim: + if match(subst.langSym, langSym): + let key: GroupKey = (subst.langSym.symKind, getFilename(s, subst)) + found.mgetOrPut(key, newSeq[AnchorSubst]()).add subst + for key, sList in found: if sList.len == 1: result.add sList[0] else: # > 1, there are overloads, potential ambiguity in this `symKind` @@ -812,14 +878,16 @@ proc findMainAnchorNim(s: PRstSharedState, signature: PRstNode, result.add s foundGroup = true break - doAssert foundGroup, "docgen has not generated the group" + doAssert(foundGroup, + "docgen has not generated the group for $1 (file $2)" % [ + langSym.name, getFilename(s, sList[0]) ]) proc findMainAnchorRst(s: PRstSharedState, linkText: string, info: TLineInfo): seq[AnchorSubst] = let name = linkText.toLowerAscii let substitutions = s.anchors.getOrDefault(name, newSeq[AnchorSubst]()) for s in substitutions: - if s.kind == arInternalRst: + if s.kind in {arInternalRst, arExternalRst}: result.add s proc addFootnoteNumManual(p: var RstParser, num: int) = @@ -3251,6 +3319,15 @@ proc dirRaw(p: var RstParser): PRstNode = else: dirRawAux(p, result, rnRaw, parseSectionWrapper) +proc dirImportdoc(p: var RstParser): PRstNode = + result = parseDirective(p, rnDirective, {}, parseLiteralBlock) + assert result.sons[2].kind == rnLiteralBlock + assert result.sons[2].sons[0].kind == rnLeaf + let filenames: seq[string] = split(result.sons[2].sons[0].text, seps = {','}) + proc rmSpaces(s: string): string = s.split.join("") + for origFilename in filenames: + p.s.idxImports[origFilename.rmSpaces] = ImportdocInfo(fromInfo: lineInfo(p)) + proc selectDir(p: var RstParser, d: string): PRstNode = result = nil let tok = p.tok[p.idx-2] # report on directive in ".. directive::" @@ -3271,6 +3348,7 @@ proc selectDir(p: var RstParser, d: string): PRstNode = of "hint": result = dirAdmonition(p, d) of "image": result = dirImage(p) of "important": result = dirAdmonition(p, d) + of "importdoc": result = dirImportdoc(p) of "include": result = dirInclude(p) of "index": result = dirIndex(p) of "note": result = dirAdmonition(p, d) @@ -3401,11 +3479,90 @@ proc rstParsePass1*(fragment: string, getTokens(fragment, p.tok) result = parseDoc(p) -proc preparePass2*(s: PRstSharedState, mainNode: PRstNode) = +proc extractLinkEnd(x: string): string = + ## From links like `path/to/file.html#/%` extract `file.html#/%`. + let i = find(x, '#') + let last = + if i >= 0: i + else: x.len - 1 + let j = rfind(x, '/', start=0, last=last) + if j >= 0: + result = x[j+1 .. ^1] + else: + result = x + +proc loadIdxFile(s: var PRstSharedState, origFilename: string) = + doAssert roSandboxDisabled in s.options + var info: TLineInfo + info.fileIndex = addFilename(s, origFilename) + var (dir, basename, ext) = origFilename.splitFile + if ext notin [".md", ".rst", ".nim", ""]: + rstMessage(s.filenames, s.msgHandler, s.idxImports[origFilename].fromInfo, + meCannotOpenFile, origFilename & ": unknown extension") + let idxFilename = dir / basename & ".idx" + let (idxPath, linkRelPath) = s.findRefFile(idxFilename) + s.idxImports[origFilename].linkRelPath = linkRelPath + var + fileEntries: seq[IndexEntry] + title: IndexEntry + try: + (fileEntries, title) = parseIdxFile(idxPath) + except IOError: + rstMessage(s.filenames, s.msgHandler, s.idxImports[origFilename].fromInfo, + meCannotOpenFile, idxPath) + except ValueError as e: + s.msgHandler(idxPath, LineRstInit, ColRstInit, meInvalidField, e.msg) + + var isMarkup = false # for sanity check to avoid mixing .md <-> .nim + for entry in fileEntries: + # Though target .idx already has inside it the path to HTML relative + # project's root, we won't rely on it and use `linkRelPath` instead. + let refn = extractLinkEnd(entry.link) + # select either markup (rst/md) or Nim cases: + if entry.kind in {ieMarkupTitle, ieNimTitle}: + s.idxImports[origFilename].title = entry.keyword + case entry.kind + of ieIdxRole, ieHeading, ieMarkupTitle: + if ext == ".nim" and entry.kind == ieMarkupTitle: + rstMessage(s, idxPath, meInvalidField, + $ieMarkupTitle & " in supposedly .nim-derived file") + if entry.kind == ieMarkupTitle: + isMarkup = true + info.line = entry.line.uint16 + addAnchorExtRst(s, key = entry.keyword, refn = refn, + anchorType = headlineAnchor, info=info) + of ieNim, ieNimGroup, ieNimTitle: + if ext in [".md", ".rst"] or isMarkup: + rstMessage(s, idxPath, meInvalidField, + $entry.kind & " in supposedly markup-derived file") + s.nimFileImported = true + var langSym: LangSymbol + if entry.kind in {ieNim, ieNimTitle}: + var q: RstParser + initParser(q, s) + info.line = entry.line.uint16 + setLen(q.tok, 0) + q.idx = 0 + getTokens(entry.linkTitle, q.tok) + var sons = newSeq[PRstNode](q.tok.len) + for i in 0 ..< q.tok.len: sons[i] = newLeaf(q.tok[i].symbol) + let linkTitle = newRstNode(rnInner, sons) + langSym = linkTitle.toLangSymbol + else: # entry.kind == ieNimGroup + langSym = langSymbolGroup(kind=entry.linkTitle, name=entry.keyword) + addAnchorNim(s, external = true, refn = refn, tooltip = entry.linkDesc, + langSym = langSym, priority = -4, # lowest + info=info) + doAssert s.idxImports[origFilename].title != "" + +proc preparePass2*(s: var PRstSharedState, mainNode: PRstNode, importdoc = true) = ## Records titles in node `mainNode` and orders footnotes. countTitles(s, mainNode) fixHeadlines(s) orderFootnotes(s) + if importdoc: + for origFilename in s.idxImports.keys: + loadIdxFile(s, origFilename) proc resolveLink(s: PRstSharedState, n: PRstNode) : PRstNode = # Associate this link alias with its target and change node kind to @@ -3423,6 +3580,9 @@ proc resolveLink(s: PRstSharedState, n: PRstNode) : PRstNode = tooltip: string target: PRstNode info: TLineInfo + externFilename: string + # when external anchor: origin filename where anchor was defined + isTitle: bool proc cmp(x, y: LinkDef): int = result = cmp(x.priority, y.priority) if result == 0: @@ -3435,26 +3595,67 @@ proc resolveLink(s: PRstSharedState, n: PRstNode) : PRstNode = target: y.value, info: y.info, tooltip: "(" & $y.kind & ")") let substRst = findMainAnchorRst(s, alias.addNodes, n.info) + template getExternFilename(subst: AnchorSubst): string = + if subst.kind == arExternalRst or + (subst.kind == arNim and subst.external): + getFilename(s, subst) + else: "" for subst in substRst: - foundLinks.add LinkDef(ar: arInternalRst, priority: subst.priority, - target: newLeaf(subst.target.anchor), + var refname, fullRefname: string + if subst.kind == arInternalRst: + refname = subst.target.anchor + fullRefname = refname + else: # arExternalRst + refname = subst.refnameExt + fullRefname = s.idxImports[getFilename(s, subst)].linkRelPath & + "/" & refname + let anchorType = + if subst.kind == arInternalRst: subst.anchorType + else: subst.anchorTypeExt # arExternalRst + foundLinks.add LinkDef(ar: subst.kind, priority: subst.priority, + target: newLeaf(fullRefname), info: subst.info, - tooltip: "(" & $subst.anchorType & ")") + externFilename: getExternFilename(subst), + isTitle: isDocumentationTitle(refname), + tooltip: "(" & $anchorType & ")") # find anchors automatically generated from Nim symbols - if roNimFile in s.options: + if roNimFile in s.options or s.nimFileImported: let substNim = findMainAnchorNim(s, signature=alias, n.info) for subst in substNim: - foundLinks.add LinkDef(ar: arNim, priority: subst.priority, - target: newLeaf(subst.refname), + let fullRefname = + if subst.external: + s.idxImports[getFilename(s, subst)].linkRelPath & + "/" & subst.refname + else: subst.refname + foundLinks.add LinkDef(ar: subst.kind, priority: subst.priority, + target: newLeaf(fullRefname), + externFilename: getExternFilename(subst), + isTitle: isDocumentationTitle(subst.refname), info: subst.info, tooltip: subst.tooltip) foundLinks.sort(cmp = cmp, order = Descending) let aliasStr = addNodes(alias) if foundLinks.len >= 1: - let kind = if foundLinks[0].ar == arHyperlink: rnHyperlink - elif foundLinks[0].ar == arNim: rnNimdocRef + if foundLinks[0].externFilename != "": + s.idxImports[foundLinks[0].externFilename].used = true + let kind = if foundLinks[0].ar in {arHyperlink, arExternalRst}: rnHyperlink + elif foundLinks[0].ar == arNim: + if foundLinks[0].externFilename == "": rnNimdocRef + else: rnHyperlink else: rnInternalRef result = newRstNode(kind) - result.sons = @[newRstNode(rnInner, desc.sons), foundLinks[0].target] + let documentName = # filename without ext for `.nim`, title for `.md` + if foundLinks[0].ar == arNim: + changeFileExt(foundLinks[0].externFilename.extractFilename, "") + elif foundLinks[0].externFilename != "": + s.idxImports[foundLinks[0].externFilename].title + else: foundLinks[0].externFilename.extractFilename + let linkText = + if foundLinks[0].externFilename != "": + if foundLinks[0].isTitle: newLeaf(addNodes(desc)) + else: newLeaf(documentName & ": " & addNodes(desc)) + else: + newRstNode(rnInner, desc.sons) + result.sons = @[linkText, foundLinks[0].target] if kind == rnNimdocRef: result.tooltip = foundLinks[0].tooltip if foundLinks.len > 1: # report ambiguous link var targets = newSeq[string]() @@ -3568,20 +3769,28 @@ proc resolveSubs*(s: PRstSharedState, n: PRstNode): PRstNode = inc i result.sons = newSons +proc completePass2*(s: PRstSharedState) = + for (filename, importdocInfo) in s.idxImports.pairs: + if not importdocInfo.used: + rstMessage(s.filenames, s.msgHandler, importdocInfo.fromInfo, + mwUnusedImportdoc, filename) + proc rstParse*(text, filename: string, line, column: int, options: RstParseOptions, findFile: FindFileHandler = nil, + findRefFile: FindRefFileHandler = nil, msgHandler: MsgHandler = nil): tuple[node: PRstNode, filenames: RstFileTable, hasToc: bool] = ## Parses the whole `text`. The result is ready for `rstgen.renderRstToOut`, ## note that 2nd tuple element should be fed to `initRstGenerator` ## argument `filenames` (it is being filled here at least with `filename` ## and possibly with other files from RST ``.. include::`` statement). - var sharedState = newRstSharedState(options, filename, findFile, + var sharedState = newRstSharedState(options, filename, findFile, findRefFile, msgHandler, hasToc=false) let unresolved = rstParsePass1(text, line, column, sharedState) preparePass2(sharedState, unresolved) result.node = resolveSubs(sharedState, unresolved) + completePass2(sharedState) result.filenames = sharedState.filenames result.hasToc = sharedState.hasToc diff --git a/lib/packages/docutils/rstgen.nim b/lib/packages/docutils/rstgen.nim index 597ee1553..57bc00fcb 100644 --- a/lib/packages/docutils/rstgen.nim +++ b/lib/packages/docutils/rstgen.nim @@ -39,7 +39,8 @@ ## No backreferences are generated since finding all references of a footnote ## can be done by simply searching for ``[footnoteName]``. -import strutils, os, hashes, strtabs, rstast, rst, highlite, tables, sequtils, +import strutils, os, hashes, strtabs, rstast, rst, rstidx, + highlite, tables, sequtils, algorithm, parseutils, std/strbasics @@ -59,7 +60,7 @@ type outLatex # output is Latex MetaEnum* = enum - metaNone, metaTitle, metaSubtitle, metaAuthor, metaVersion + metaNone, metaTitleRaw, metaTitle, metaSubtitle, metaAuthor, metaVersion EscapeMode* = enum # in Latex text inside options [] and URLs is # escaped slightly differently than in normal text @@ -321,31 +322,8 @@ proc renderAux(d: PDoc, n: PRstNode, html, tex: string, result: var string) = # ---------------- index handling -------------------------------------------- -proc quoteIndexColumn(text: string): string = - ## Returns a safe version of `text` for serialization to the ``.idx`` file. - ## - ## The returned version can be put without worries in a line based tab - ## separated column text file. The following character sequence replacements - ## will be performed for that goal: - ## - ## * ``"\\"`` => ``"\\\\"`` - ## * ``"\n"`` => ``"\\n"`` - ## * ``"\t"`` => ``"\\t"`` - result = newStringOfCap(text.len + 3) - for c in text: - case c - of '\\': result.add "\\" - of '\L': result.add "\\n" - of '\C': discard - of '\t': result.add "\\t" - else: result.add c - -proc unquoteIndexColumn(text: string): string = - ## Returns the unquoted version generated by ``quoteIndexColumn``. - result = text.multiReplace(("\\t", "\t"), ("\\n", "\n"), ("\\\\", "\\")) - -proc setIndexTerm*(d: var RstGenerator, htmlFile, id, term: string, - linkTitle, linkDesc = "") = +proc setIndexTerm*(d: var RstGenerator; k: IndexEntryKind, htmlFile, id, term: string, + linkTitle, linkDesc = "", line = 0) = ## Adds a `term` to the index using the specified hyperlink identifier. ## ## A new entry will be added to the index using the format @@ -368,21 +346,8 @@ proc setIndexTerm*(d: var RstGenerator, htmlFile, id, term: string, ## <#writeIndexFile,RstGenerator,string>`_. The purpose of the index is ## documented in the `docgen tools guide ## <docgen.html#related-options-index-switch>`_. - var - entry = term - isTitle = false - entry.add('\t') - entry.add(htmlFile) - if id.len > 0: - entry.add('#') - entry.add(id) - else: - isTitle = true - if linkTitle.len > 0 or linkDesc.len > 0: - entry.add('\t' & linkTitle.quoteIndexColumn) - entry.add('\t' & linkDesc.quoteIndexColumn) - entry.add("\n") - + let (entry, isTitle) = formatIndexEntry(k, htmlFile, id, term, + linkTitle, linkDesc, line) if isTitle: d.theIndex.insert(entry) else: d.theIndex.add(entry) @@ -395,6 +360,15 @@ proc hash(n: PRstNode): int = result = result !& hash(n.sons[i]) result = !$result +proc htmlFileRelPath(d: PDoc): string = + if d.outDir.len == 0: + # /foo/bar/zoo.nim -> zoo.html + changeFileExt(extractFilename(d.filename), HtmlExt) + else: # d is initialized in docgen.nim + # outDir = /foo -\ + # destFile = /foo/bar/zoo.html -|-> bar/zoo.html + d.destFile.relativePath(d.outDir, '/') + proc renderIndexTerm*(d: PDoc, n: PRstNode, result: var string) = ## Renders the string decorated within \`foobar\`\:idx\: markers. ## @@ -411,17 +385,12 @@ proc renderIndexTerm*(d: PDoc, n: PRstNode, result: var string) = var term = "" renderAux(d, n, term) - setIndexTerm(d, changeFileExt(extractFilename(d.filename), HtmlExt), id, term, d.currentSection) + setIndexTerm(d, ieIdxRole, + htmlFileRelPath(d), id, term, d.currentSection) dispA(d.target, result, "<span id=\"$1\">$2</span>", "\\nimindexterm{$1}{$2}", [id, term]) type - IndexEntry* = object - keyword*: string - link*: string - linkTitle*: string ## contains a prettier text for the href - linkDesc*: string ## the title attribute of the final href - IndexedDocs* = Table[IndexEntry, seq[IndexEntry]] ## \ ## Contains the index sequences for doc types. ## @@ -432,21 +401,6 @@ type ## The value indexed by this IndexEntry is a sequence with the real index ## entries found in the ``.idx`` file. -proc cmp(a, b: IndexEntry): int = - ## Sorts two ``IndexEntry`` first by `keyword` field, then by `link`. - result = cmpIgnoreStyle(a.keyword, b.keyword) - if result == 0: - result = cmpIgnoreStyle(a.link, b.link) - -proc hash(x: IndexEntry): Hash = - ## Returns the hash for the combined fields of the type. - ## - ## The hash is computed as the chained hash of the individual string hashes. - result = x.keyword.hash !& x.link.hash - result = result !& x.linkTitle.hash - result = result !& x.linkDesc.hash - result = !$result - when defined(gcDestructors): template `<-`(a, b: var IndexEntry) = a = move(b) else: @@ -455,6 +409,7 @@ else: shallowCopy a.link, b.link shallowCopy a.linkTitle, b.linkTitle shallowCopy a.linkDesc, b.linkDesc + shallowCopy a.module, b.module proc sortIndex(a: var openArray[IndexEntry]) = # we use shellsort here; fast and simple @@ -494,16 +449,20 @@ proc generateSymbolIndex(symbols: seq[IndexEntry]): string = result = "<dl>" var i = 0 while i < symbols.len: - let keyword = symbols[i].keyword + let keyword = esc(outHtml, symbols[i].keyword) let cleanedKeyword = keyword.escapeLink result.addf("<dt><a name=\"$2\" href=\"#$2\"><span>$1:</span></a></dt><dd><ul class=\"simple\">\n", [keyword, cleanedKeyword]) var j = i - while j < symbols.len and keyword == symbols[j].keyword: + while j < symbols.len and symbols[i].keyword == symbols[j].keyword: let url = symbols[j].link.escapeLink - text = if symbols[j].linkTitle.len > 0: symbols[j].linkTitle else: url - desc = if symbols[j].linkDesc.len > 0: symbols[j].linkDesc else: "" + module = symbols[j].module + text = + if symbols[j].linkTitle.len > 0: + esc(outHtml, module & ": " & symbols[j].linkTitle) + else: url + desc = symbols[j].linkDesc if desc.len > 0: result.addf("""<li><a class="reference external" title="$3" data-doc-search-tag="$2" href="$1">$2</a></li> @@ -517,13 +476,6 @@ proc generateSymbolIndex(symbols: seq[IndexEntry]): string = i = j result.add("</dl>") -proc isDocumentationTitle(hyperlink: string): bool = - ## Returns true if the hyperlink is actually a documentation title. - ## - ## Documentation titles lack the hash. See `mergeIndexes() - ## <#mergeIndexes,string>`_ for a more detailed explanation. - result = hyperlink.find('#') < 0 - proc stripTocLevel(s: string): tuple[level: int, text: string] = ## Returns the *level* of the toc along with the text without it. for c in 0 ..< s.len: @@ -557,17 +509,15 @@ proc generateDocumentationToc(entries: seq[IndexEntry]): string = level = 1 levels.newSeq(entries.len) for entry in entries: - let (rawLevel, rawText) = stripTocLevel(entry.linkTitle or entry.keyword) + let (rawLevel, rawText) = stripTocLevel(entry.linkTitle) if rawLevel < 1: # This is a normal symbol, push it *inside* one level from the last one. levels[L].level = level + 1 - # Also, ignore the linkTitle and use directly the keyword. - levels[L].text = entry.keyword else: # The level did change, update the level indicator. level = rawLevel levels[L].level = rawLevel - levels[L].text = rawText + levels[L].text = rawText inc L # Now generate hierarchical lists based on the precalculated levels. @@ -598,7 +548,7 @@ proc generateDocumentationIndex(docs: IndexedDocs): string = for title in titles: let tocList = generateDocumentationToc(docs.getOrDefault(title)) result.add("<ul><li><a href=\"" & - title.link & "\">" & title.keyword & "</a>\n" & tocList & "</li></ul>\n") + title.link & "\">" & title.linkTitle & "</a>\n" & tocList & "</li></ul>\n") proc generateDocumentationJumps(docs: IndexedDocs): string = ## Returns a plain list of hyperlinks to documentation TOCs in HTML. @@ -610,7 +560,7 @@ proc generateDocumentationJumps(docs: IndexedDocs): string = var chunks: seq[string] = @[] for title in titles: - chunks.add("<a href=\"" & title.link & "\">" & title.keyword & "</a>") + chunks.add("<a href=\"" & title.link & "\">" & title.linkTitle & "</a>") result.add(chunks.join(", ") & ".<br/>") @@ -639,39 +589,11 @@ proc readIndexDir*(dir: string): # Scan index files and build the list of symbols. for path in walkDirRec(dir): if path.endsWith(IndexExt): - var - fileEntries: seq[IndexEntry] - title: IndexEntry - f = 0 - newSeq(fileEntries, 500) - setLen(fileEntries, 0) - for line in lines(path): - let s = line.find('\t') - if s < 0: continue - setLen(fileEntries, f+1) - fileEntries[f].keyword = line.substr(0, s-1) - fileEntries[f].link = line.substr(s+1) - # See if we detect a title, a link without a `#foobar` trailing part. - if title.keyword.len == 0 and fileEntries[f].link.isDocumentationTitle: - title.keyword = fileEntries[f].keyword - title.link = fileEntries[f].link - - if fileEntries[f].link.find('\t') > 0: - let extraCols = fileEntries[f].link.split('\t') - fileEntries[f].link = extraCols[0] - assert extraCols.len == 3 - fileEntries[f].linkTitle = extraCols[1].unquoteIndexColumn - fileEntries[f].linkDesc = extraCols[2].unquoteIndexColumn - else: - fileEntries[f].linkTitle = "" - fileEntries[f].linkDesc = "" - inc f + var (fileEntries, title) = parseIdxFile(path) # Depending on type add this to the list of symbols or table of APIs. - if title.keyword.len == 0: - for i in 0 ..< f: - # Don't add to symbols TOC entries (they start with a whitespace). - let toc = fileEntries[i].linkTitle - if toc.len > 0 and toc[0] == ' ': + if title.kind == ieNimTitle: + for i in 0 ..< fileEntries.len: + if fileEntries[i].kind != ieNim: continue # Ok, non TOC entry, add it. setLen(result.symbols, L + 1) @@ -687,7 +609,7 @@ proc readIndexDir*(dir: string): result.modules.add(x.changeFileExt("")) else: # Generate the symbolic anchor for index quickjumps. - title.linkTitle = "doc_toc_" & $result.docs.len + title.aux = "doc_toc_" & $result.docs.len result.docs[title] = fileEntries proc mergeIndexes*(dir: string): string = @@ -747,24 +669,6 @@ proc mergeIndexes*(dir: string): string = # ---------------------------------------------------------------------------- -proc stripTocHtml(s: string): string = - ## Ugly quick hack to remove HTML tags from TOC titles. - ## - ## A TocEntry.header field already contains rendered HTML tags. Instead of - ## implementing a proper version of renderRstToOut() which recursively - ## renders an rst tree to plain text, we simply remove text found between - ## angled brackets. Given the limited possibilities of rst inside TOC titles - ## this should be enough. - result = s - var first = result.find('<') - while first >= 0: - let last = result.find('>', first) - if last < 0: - # Abort, since we didn't found a closing angled bracket. - return - result.delete(first..last) - first = result.find('<', first) - proc renderHeadline(d: PDoc, n: PRstNode, result: var string) = var tmp = "" for i in countup(0, len(n) - 1): renderRstToOut(d, n.sons[i], tmp) @@ -785,19 +689,12 @@ proc renderHeadline(d: PDoc, n: PRstNode, result: var string) = # Generate index entry using spaces to indicate TOC level for the output HTML. assert n.level >= 0 - let - htmlFileRelPath = if d.outDir.len == 0: - # /foo/bar/zoo.nim -> zoo.html - changeFileExt(extractFilename(d.filename), HtmlExt) - else: # d is initialized in docgen.nim - # outDir = /foo -\ - # destFile = /foo/bar/zoo.html -|-> bar/zoo.html - d.destFile.relativePath(d.outDir, '/') - setIndexTerm(d, htmlFileRelPath, n.anchor, tmp.stripTocHtml, - spaces(max(0, n.level)) & tmp) + setIndexTerm(d, ieHeading, htmlFile = d.htmlFileRelPath, id = n.anchor, + term = n.addNodes, linkTitle = spaces(max(0, n.level)) & tmp) proc renderOverline(d: PDoc, n: PRstNode, result: var string) = if n.level == 0 and d.meta[metaTitle].len == 0: + d.meta[metaTitleRaw] = n.addNodes for i in countup(0, len(n)-1): renderRstToOut(d, n.sons[i], d.meta[metaTitle]) d.currentSection = d.meta[metaTitle] @@ -813,6 +710,8 @@ proc renderOverline(d: PDoc, n: PRstNode, result: var string) = dispA(d.target, result, "<h$1$2><center>$3</center></h$1>", "\\rstov$4[$5]{$3}$2\n", [$n.level, n.anchor.idS, tmp, $chr(n.level - 1 + ord('A')), tocName]) + setIndexTerm(d, ieHeading, htmlFile = d.htmlFileRelPath, id = n.anchor, + term = n.addNodes, linkTitle = spaces(max(0, n.level)) & tmp) proc renderTocEntry(d: PDoc, n: PRstNode, result: var string) = var header = "" @@ -1197,6 +1096,18 @@ proc renderHyperlink(d: PDoc, text, link: PRstNode, result: var string, "\\hyperlink{$2}{$1} (p.~\\pageref{$2})", [textStr, linkStr, nimDocStr, tooltipStr]) +proc traverseForIndex*(d: PDoc, n: PRstNode) = + ## A version of [renderRstToOut] that only fills entries for ``.idx`` files. + var discarded: string + if n == nil: return + case n.kind + of rnIdx: renderIndexTerm(d, n, discarded) + of rnHeadline, rnMarkdownHeadline: renderHeadline(d, n, discarded) + of rnOverline: renderOverline(d, n, discarded) + else: + for i in 0 ..< len(n): + traverseForIndex(d, n.sons[i]) + proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) = if n == nil: return case n.kind @@ -1451,6 +1362,7 @@ proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) = of rnTitle: d.meta[metaTitle] = "" renderRstToOut(d, n.sons[0], d.meta[metaTitle]) + d.meta[metaTitleRaw] = n.sons[0].addNodes # ----------------------------------------------------------------------------- @@ -1616,11 +1528,13 @@ proc rstToHtml*(s: string, options: RstParseOptions, proc myFindFile(filename: string): string = # we don't find any files in online mode: result = "" + proc myFindRefFile(filename: string): (string, string) = + result = ("", "") const filen = "input" let (rst, filenames, t) = rstParse(s, filen, line=LineRstInit, column=ColRstInit, - options, myFindFile, msgHandler) + options, myFindFile, myFindRefFile, msgHandler) var d: RstGenerator initRstGenerator(d, outHtml, config, filen, myFindFile, msgHandler, filenames, hasToc = t) diff --git a/lib/packages/docutils/rstidx.nim b/lib/packages/docutils/rstidx.nim new file mode 100644 index 000000000..c109636d7 --- /dev/null +++ b/lib/packages/docutils/rstidx.nim @@ -0,0 +1,138 @@ +# +# Nim's Runtime Library +# (c) Copyright 2022 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. + +## Nim `idx`:idx: file format related definitions. + +import strutils, std/syncio, hashes +from os import splitFile + +type + IndexEntryKind* = enum ## discriminator tag + ieMarkupTitle = "markupTitle" + ## RST/Markdown title, text in `keyword` + + ## HTML text in `linkTitle` + ieNimTitle = "nimTitle" + ## Nim title + ieHeading = "heading" ## RST/Markdown markup heading, escaped + ieIdxRole = "idx" ## RST :idx: definition, escaped + ieNim = "nim" ## Nim symbol, unescaped + ieNimGroup = "nimgrp" ## Nim overload group, unescaped + IndexEntry* = object + kind*: IndexEntryKind ## 0. + keyword*: string ## 1. + link*: string ## 2. + linkTitle*: string ## 3. contains a prettier text for the href + linkDesc*: string ## 4. the title attribute of the final href + line*: int ## 5. + module*: string ## origin file, NOT a field in ``.idx`` file + aux*: string ## auxuliary field, NOT a field in ``.idx`` file + +proc isDocumentationTitle*(hyperlink: string): bool = + ## Returns true if the hyperlink is actually a documentation title. + ## + ## Documentation titles lack the hash. See `mergeIndexes() + ## <#mergeIndexes,string>`_ for a more detailed explanation. + result = hyperlink.find('#') < 0 + +proc `$`*(e: IndexEntry): string = + """("$1", "$2", "$3", "$4", $5)""" % [ + e.keyword, e.link, e.linkTitle, e.linkDesc, $e.line] + +proc quoteIndexColumn(text: string): string = + ## Returns a safe version of `text` for serialization to the ``.idx`` file. + ## + ## The returned version can be put without worries in a line based tab + ## separated column text file. The following character sequence replacements + ## will be performed for that goal: + ## + ## * ``"\\"`` => ``"\\\\"`` + ## * ``"\n"`` => ``"\\n"`` + ## * ``"\t"`` => ``"\\t"`` + result = newStringOfCap(text.len + 3) + for c in text: + case c + of '\\': result.add "\\" + of '\L': result.add "\\n" + of '\C': discard + of '\t': result.add "\\t" + else: result.add c + +proc unquoteIndexColumn*(text: string): string = + ## Returns the unquoted version generated by ``quoteIndexColumn``. + result = text.multiReplace(("\\t", "\t"), ("\\n", "\n"), ("\\\\", "\\")) + +proc formatIndexEntry*(kind: IndexEntryKind; htmlFile, id, term, linkTitle, + linkDesc: string, line: int): + tuple[entry: string, isTitle: bool] = + result.entry = $kind + result.entry.add('\t') + result.entry.add term + result.entry.add('\t') + result.entry.add(htmlFile) + if id.len > 0: + result.entry.add('#') + result.entry.add(id) + result.isTitle = false + else: + result.isTitle = true + result.entry.add('\t' & linkTitle.quoteIndexColumn) + result.entry.add('\t' & linkDesc.quoteIndexColumn) + result.entry.add('\t' & $line) + result.entry.add("\n") + +proc parseIndexEntryKind(s: string): IndexEntryKind = + result = case s: + of "nim": ieNim + of "nimgrp": ieNimGroup + of "heading": ieHeading + of "idx": ieIdxRole + of "nimTitle": ieNimTitle + of "markupTitle": ieMarkupTitle + else: raise newException(ValueError, "unknown index entry value $1" % [s]) + +proc parseIdxFile*(path: string): + tuple[fileEntries: seq[IndexEntry], title: IndexEntry] = + var + f = 0 + newSeq(result.fileEntries, 500) + setLen(result.fileEntries, 0) + let (_, base, _) = path.splitFile + for line in lines(path): + let s = line.find('\t') + if s < 0: continue + setLen(result.fileEntries, f+1) + let cols = line.split('\t') + result.fileEntries[f].kind = parseIndexEntryKind(cols[0]) + result.fileEntries[f].keyword = cols[1] + result.fileEntries[f].link = cols[2] + if result.title.keyword.len == 0: + result.fileEntries[f].module = base + else: + result.fileEntries[f].module = result.title.keyword + + result.fileEntries[f].linkTitle = cols[3].unquoteIndexColumn + result.fileEntries[f].linkDesc = cols[4].unquoteIndexColumn + result.fileEntries[f].line = parseInt(cols[5]) + + if result.fileEntries[f].kind in {ieNimTitle, ieMarkupTitle}: + result.title = result.fileEntries[f] + inc f + +proc cmp*(a, b: IndexEntry): int = + ## Sorts two ``IndexEntry`` first by `keyword` field, then by `link`. + result = cmpIgnoreStyle(a.keyword, b.keyword) + if result == 0: + result = cmpIgnoreStyle(a.link, b.link) + +proc hash*(x: IndexEntry): Hash = + ## Returns the hash for the combined fields of the type. + ## + ## The hash is computed as the chained hash of the individual string hashes. + result = x.keyword.hash !& x.link.hash + result = result !& x.linkTitle.hash + result = result !& x.linkDesc.hash + result = !$result |