docgen: implement doc link resolution in current module (#18642)

author: Andrey Makarov <ph.makarov@gmail.com> 2021-10-28 20:20:52 +0300
committer: GitHub <noreply@github.com> 2021-10-28 19:20:52 +0200
commit: 7ba2659f733b97db63b7552415ad048e34d4a11a (patch)
tree: 54eb85f0eabc927c6d15c2d69c45aefd09efa39d /lib
parent: c80e2c173686bd12904e5487752dc0ce20cb8bcb (diff)
download: Nim-7ba2659f733b97db63b7552415ad048e34d4a11a.tar.gz
5 files changed, 545 insertions, 78 deletions
diff --git a/lib/packages/docutils/dochelpers.nim b/lib/packages/docutils/dochelpers.nim
new file mode 100644
index 000000000..c488c4d99
--- /dev/null
+++ b/lib/packages/docutils/dochelpers.nim
@@ -0,0 +1,267 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2021 Nim contributors
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## Integration helpers between ``docgen.nim`` and ``rst.nim``.
+##
+## Function `toLangSymbol(linkText)`_ produces a signature `docLink` of
+## `type LangSymbol`_ in ``rst.nim``, while `match(generated, docLink)`_
+## matches it with `generated`, produced from `PNode` by ``docgen.rst``.
+
+import rstast
+
+type
+  LangSymbol* = object       ## symbol signature in Nim
+    symKind*: string           ## "proc", "const", etc
+    name*: string              ## plain symbol name without any parameters
+    generics*: string          ## generic parameters (without brackets)
+    isGroup*: bool             ## is LangSymbol a group with overloads?
+    # the following fields are valid iff `isGroup` == false
+    # (always false when parsed by `toLangSymbol` because link like foo_
+    # can point to just a single symbol foo, e.g. proc).
+    parametersProvided*: bool  ## to disambiguate `proc f`_ and `proc f()`_
+    parameters*: seq[tuple[name: string, `type`: string]]
+                               ## name-type seq, e.g. for proc
+    outType*: string           ## result type, e.g. for proc
+
+func nimIdentBackticksNormalize*(s: string): string =
+  ## Normalizes the string `s` as a Nim identifier.
+  ##
+  ## Unlike `nimIdentNormalize` removes spaces and backticks.
+  ##
+  ## .. Warning:: No checking (e.g. that identifiers cannot start from
+  ##    digits or '_', or that number of backticks is even) is performed.
+  runnableExamples:
+    doAssert nimIdentBackticksNormalize("Foo_bar") == "Foobar"
+    doAssert nimIdentBackticksNormalize("FoO BAr") == "Foobar"
+    doAssert nimIdentBackticksNormalize("`Foo BAR`") == "Foobar"
+    doAssert nimIdentBackticksNormalize("` Foo BAR `") == "Foobar"
+    # not a valid identifier:
+    doAssert nimIdentBackticksNormalize("`_x_y`") == "_xy"
+  result = newString(s.len)
+  var firstChar = true
+  var j = 0
+  for i in 0..len(s) - 1:
+    if s[i] in {'A'..'Z'}:
+      if not firstChar:  # to lowercase
+        result[j] = chr(ord(s[i]) + (ord('a') - ord('A')))
+      else:
+        result[j] = s[i]
+        firstChar = false
+      inc j
+    elif s[i] notin {'_', ' ', '`'}:
+      result[j] = s[i]
+      inc j
+      firstChar = false
+    elif s[i] == '_' and firstChar:
+      result[j] = '_'
+      inc j
+      firstChar = false
+    else: discard  # just omit '`' or ' '
+  if j != s.len: setLen(result, j)
+
+proc toLangSymbol*(linkText: PRstNode): LangSymbol =
+  ## Parses `linkText` into a more structured form using a state machine.
+  ##
+  ## This proc is designed to allow link syntax with operators even
+  ## without escaped backticks inside::
+  ##   
+  ##   `proc *`_
+  ##   `proc []`_
+  ##
+  ## This proc should be kept in sync with the `renderTypes` proc from
+  ## ``compiler/typesrenderer.nim``.
+  assert linkText.kind in {rnRef, rnInner}
+
+  const NimDefs = ["proc", "func", "macro", "method", "iterator",
+                   "template", "converter", "const", "type", "var"]
+  type
+    State = enum
+      inBeginning
+      afterSymKind
+      beforeSymbolName  # auxiliary state to catch situations like `proc []`_ after space
+      atSymbolName
+      afterSymbolName
+      genericsPar
+      parameterName
+      parameterType
+      outType
+  var state = inBeginning
+  var curIdent = ""
+  template flushIdent() =
+    if curIdent != "":
+      case state
+      of inBeginning:  doAssert false, "incorrect state inBeginning"
+      of afterSymKind:  result.symKind = curIdent
+      of beforeSymbolName:  doAssert false, "incorrect state beforeSymbolName"
+      of atSymbolName: result.name = curIdent.nimIdentBackticksNormalize
+      of afterSymbolName: doAssert false, "incorrect state afterSymbolName"
+      of genericsPar: result.generics = curIdent
+      of parameterName: result.parameters.add (curIdent, "")
+      of parameterType:
+        for a in countdown(result.parameters.len - 1, 0):
+          if result.parameters[a].`type` == "":
+            result.parameters[a].`type` = curIdent
+      of outType: result.outType = curIdent
+      curIdent = ""
+  var parens = 0
+  let L = linkText.sons.len
+  template s(i: int): string = linkText.sons[i].text
+  var i = 0
+  template nextState =
+    case s(i)
+    of " ":
+      if state == afterSymKind:
+        flushIdent
+        state = beforeSymbolName
+    of "`":
+      curIdent.add "`"
+      inc i
+      while i < L:  # add contents between ` ` as a whole
+        curIdent.add s(i)
+        if s(i) == "`":
+          break
+        inc i
+      curIdent = curIdent.nimIdentBackticksNormalize
+      if state in {inBeginning, afterSymKind, beforeSymbolName}:
+        state = atSymbolName
+        flushIdent
+        state = afterSymbolName
+    of "[":
+      if state notin {inBeginning, afterSymKind, beforeSymbolName}:
+        inc parens
+      if state in {inBeginning, afterSymKind, beforeSymbolName}:
+        state = atSymbolName
+        curIdent.add s(i)
+      elif state in {atSymbolName, afterSymbolName} and parens == 1:
+        flushIdent
+        state = genericsPar
+        curIdent.add s(i)
+      else: curIdent.add s(i)
+    of "]":
+      if state notin {inBeginning, afterSymKind, beforeSymbolName, atSymbolName}:
+        dec parens
+      if state == genericsPar and parens == 0:
+        curIdent.add s(i)
+        flushIdent
+      else: curIdent.add s(i)
+    of "(":
+      inc parens
+      if state in {inBeginning, afterSymKind, beforeSymbolName}:
+        result.parametersProvided = true
+        state = atSymbolName
+        flushIdent
+        state = parameterName
+      elif state in {atSymbolName, afterSymbolName, genericsPar} and parens == 1:
+        result.parametersProvided = true
+        flushIdent
+        state = parameterName
+      else: curIdent.add s(i)
+    of ")":
+      dec parens
+      if state in {parameterName, parameterType} and parens == 0:
+        flushIdent
+        state = outType
+      else: curIdent.add s(i)
+    of "{":  # remove pragmas
+      while i < L:
+        if s(i) == "}":
+          break
+        inc i
+    of ",", ";":
+      if state in {parameterName, parameterType} and parens == 1:
+        flushIdent
+        state = parameterName
+      else: curIdent.add s(i)
+    of "*":  # skip export symbol
+      if state == atSymbolName:
+        flushIdent
+        state = afterSymbolName
+      elif state == afterSymbolName:
+        discard
+      else: curIdent.add "*"
+    of ":":
+      if state == outType: discard
+      elif state == parameterName:
+        flushIdent
+        state = parameterType
+      else: curIdent.add ":"
+    else:
+      let isPostfixSymKind = i > 0 and i == L - 1 and
+          result.symKind == "" and s(i) in NimDefs
+      if isPostfixSymKind:  # for links like `foo proc`_
+        result.symKind = s(i)
+      else:
+        case state
+        of inBeginning:
+          if s(i) in NimDefs:
+            state = afterSymKind
+          else:
+            state = atSymbolName
+          curIdent.add s(i)
+        of afterSymKind, beforeSymbolName:
+          state = atSymbolName
+          curIdent.add s(i)
+        of parameterType:
+          case s(i)
+          of "ref": curIdent.add "ref."
+          of "ptr": curIdent.add "ptr."
+          of "var": discard
+          else: curIdent.add s(i).nimIdentBackticksNormalize
+        of atSymbolName:
+          curIdent.add s(i)
+        else:
+          curIdent.add s(i).nimIdentBackticksNormalize
+  while i < L:
+    nextState
+    inc i
+  if state == afterSymKind:  # treat `type`_ as link to symbol `type`
+    state = atSymbolName
+  flushIdent
+  result.isGroup = false
+
+proc match*(generated: LangSymbol, docLink: LangSymbol): bool =
+  ## Returns true if `generated` can be a target for `docLink`.
+  ## If `generated` is an overload group then only `symKind` and `name`
+  ## are compared for success.
+  result = true
+  if docLink.symKind != "":
+    if generated.symKind == "proc":
+      result = docLink.symKind in ["proc", "func"]
+    else:
+      result = generated.symKind == docLink.symKind
+    if not result: return
+  result = generated.name == docLink.name
+  if not result: return
+  if generated.isGroup:
+    # if `()` were added then it's not a reference to the whole group:
+    return not docLink.parametersProvided
+  if docLink.generics != "":
+    result = generated.generics == docLink.generics
+    if not result: return
+  if docLink.outType != "":
+    result = generated.outType == docLink.outType
+    if not result: return
+  if docLink.parametersProvided:
+    result = generated.parameters.len == docLink.parameters.len
+    if not result: return
+    var onlyType = false
+    for i in 0 ..< generated.parameters.len:
+      let g = generated.parameters[i]
+      let d = docLink.parameters[i]
+      if i == 0:
+        if g.`type` == d.name:
+          onlyType = true  # only types, not names, are provided in `docLink`
+      if onlyType:
+        result = g.`type` == d.name:
+      else:
+        if d.`type` != "":
+          result = g.`type` == d.`type`
+          if not result: return
+        result = g.name == d.name
+      if not result: return
diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim
index 29234f28b..2e908f4e5 100644
--- a/lib/packages/docutils/rst.nim
+++ b/lib/packages/docutils/rst.nim
@@ -114,7 +114,7 @@
 ## .. _`extra features`:
 ##
 ## Optional additional features, turned on by ``options: RstParseOption`` in
-## `rstParse proc <#rstParse,string,string,int,int,bool,RstParseOptions,FindFileHandler,MsgHandler>`_:
+## `proc rstParse`_:
 ##
 ## * emoji / smiley symbols
 ## * Markdown tables
@@ -196,7 +196,7 @@
 ## .. _Sphinx roles: https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html
 
 import
-  os, strutils, rstast, std/enumutils, algorithm, lists, sequtils,
+  os, strutils, rstast, dochelpers, std/enumutils, algorithm, lists, sequtils,
   std/private/miscdollars, tables
 from highlite import SourceLanguage, getSourceLanguage
 
@@ -231,6 +231,7 @@ type
     meFootnoteMismatch = "mismatch in number of footnotes and their refs: $1",
     mwRedefinitionOfLabel = "redefinition of label '$1'",
     mwUnknownSubstitution = "unknown substitution '$1'",
+    mwAmbiguousLink = "ambiguous doc link $1",
     mwBrokenLink = "broken link '$1'",
     mwUnsupportedLanguage = "language '$1' not supported",
     mwUnsupportedField = "field '$1' not supported",
@@ -473,12 +474,42 @@ type
     hasPeers: bool       # has headings on the same level of hierarchy?
   LevelMap = seq[LevelInfo]   # Saves for each possible title adornment
                               # style its level in the current document.
+  SubstitutionKind = enum
+    rstSubstitution = "substitution",
+    hyperlinkAlias = "hyperlink alias",
+    implicitHyperlinkAlias = "implicitly-generated hyperlink alias"
   Substitution = object
+    kind*: SubstitutionKind
     key*: string
     value*: PRstNode
-  AnchorSubst = tuple
-    mainAnchor: string
-    aliases: seq[string]
+    info*: TLineInfo   # place where the substitution was defined
+  AnchorRule = enum
+    arInternalRst,  ## For automatically generated RST anchors (from
+                    ## headings, footnotes, inline internal targets):
+                    ## case-insensitive, 1-space-significant (by RST spec)
+    arNim   ## For anchors generated by ``docgen.rst``: Nim-style case
+            ## sensitivity, etc. (see `proc normalizeNimName`_ for details)
+    arHyperlink,  ## For links with manually set anchors in
+                  ## form `text <pagename.html#anchor>`_
+  RstAnchorKind = enum
+    manualDirectiveAnchor = "manual directive anchor",
+    manualInlineAnchor = "manual inline anchor",
+    footnoteAnchor = "footnote anchor",
+    headlineAnchor = "implicitly-generated headline anchor"
+  AnchorSubst = object
+    mainAnchor: ref string  # A reference name that will be inserted directly
+                            # into HTML/Latex. It's declared as `ref` because
+                            # it can be shared between aliases.
+    info: TLineInfo         # where the anchor was defined
+    priority: int
+    case kind: range[arInternalRst .. arNim]
+    of arInternalRst:
+      anchorType: RstAnchorKind
+    of arNim:
+      tooltip: string       # displayed tooltip for Nim-generated anchors
+      langSym: LangSymbol
+  AnchorSubstTable = Table[string, seq[AnchorSubst]]
+                         # use `seq` to account for duplicate anchors
   FootnoteType = enum
     fnManualNumber,     # manually numbered footnote like [3]
     fnAutoNumber,       # auto-numbered footnote [#]
@@ -505,7 +536,8 @@ type
     currRoleKind: RstNodeKind   # ... and its node kind
     subs: seq[Substitution]     # substitutions
     refs*: seq[Substitution]    # references
-    anchors*: seq[AnchorSubst]  # internal target substitutions
+    anchors*: AnchorSubstTable
+                                # internal target substitutions
     lineFootnoteNum: seq[TLineInfo]     # footnote line, auto numbers .. [#]
     lineFootnoteNumRef: seq[TLineInfo]  # footnote line, their reference [#]_
     currFootnoteNumRef: int             # ... their counter for `resolveSubs`
@@ -518,7 +550,7 @@ type
     findFile: FindFileHandler   # How to find files.
     filenames*: RstFileTable    # map file name <-> FileIndex (for storing
                                 # file names for warnings after 1st stage)
-    currFileIdx: FileIndex      # current index in `filesnames`
+    currFileIdx*: FileIndex     # current index in `filenames`
     hasToc*: bool
 
   PRstSharedState* = ref RstSharedState
@@ -532,6 +564,7 @@ type
                                 ## in case of error/warning reporting to
                                 ## (relative) line/column of the token.
     curAnchor*: string          # variable to track latest anchor in s.anchors
+    curAnchorName*: string      # corresponding name in human-readable format
 
   EParseError* = object of ValueError
 
@@ -590,13 +623,16 @@ proc whichRoleAux(sym: string): RstNodeKind =
 
 proc len(filenames: RstFileTable): int = filenames.idxToFilename.len
 
-proc setCurrFilename(s: PRstSharedState, file1: string) =
+proc addFilename*(s: PRstSharedState, file1: string): FileIndex =
+  ## Returns index of filename, adding it if it has not been used before
   let nextIdx = s.filenames.len.FileIndex
-  let v = getOrDefault(s.filenames.filenameToIdx, file1, default = nextIdx)
-  if v == nextIdx:
-    s.filenames.filenameToIdx[file1] = v
+  result = getOrDefault(s.filenames.filenameToIdx, file1, default = nextIdx)
+  if result == nextIdx:
+    s.filenames.filenameToIdx[file1] = result
     s.filenames.idxToFilename.add file1
-  s.currFileIdx = v
+
+proc setCurrFilename*(s: PRstSharedState, file1: string) =
+  s.currFileIdx = addFilename(s, file1)
 
 proc getFilename(filenames: RstFileTable, fid: FileIndex): string =
   doAssert(0 <= fid.int and fid.int < filenames.len,
@@ -730,6 +766,8 @@ proc initParser(p: var RstParser, sharedState: PRstSharedState) =
   p.s = sharedState
 
 proc addNodesAux(n: PRstNode, result: var string) =
+  if n == nil:
+    return
   if n.kind == rnLeaf:
     result.add(n.text)
   else:
@@ -738,6 +776,11 @@ proc addNodesAux(n: PRstNode, result: var string) =
 proc addNodes(n: PRstNode): string =
   n.addNodesAux(result)
 
+proc linkName(n: PRstNode): string =
+  ## Returns a normalized reference name, see:
+  ## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#reference-names
+  n.addNodes.toLowerAscii
+
 proc rstnodeToRefnameAux(n: PRstNode, r: var string, b: var bool) =
   template special(s) =
     if b:
@@ -804,15 +847,26 @@ proc findSub(s: PRstSharedState, n: PRstNode): int =
       return i
   result = -1
 
+proc lineInfo(p: RstParser, iTok: int): TLineInfo =
+  result.col = int16(p.col + p.tok[iTok].col)
+  result.line = uint16(p.line + p.tok[iTok].line)
+  result.fileIndex = p.s.currFileIdx
+
+proc lineInfo(p: RstParser): TLineInfo = lineInfo(p, p.idx)
+# TODO: we need this simplification because we don't preserve exact starting
+# token of currently parsed element:
+proc prevLineInfo(p: RstParser): TLineInfo = lineInfo(p, p.idx-1)
+
 proc setSub(p: var RstParser, key: string, value: PRstNode) =
   var length = p.s.subs.len
   for i in 0 ..< length:
     if key == p.s.subs[i].key:
       p.s.subs[i].value = value
       return
-  p.s.subs.add(Substitution(key: key, value: value))
+  p.s.subs.add(Substitution(key: key, value: value, info: prevLineInfo(p)))
 
-proc setRef(p: var RstParser, key: string, value: PRstNode) =
+proc setRef(p: var RstParser, key: string, value: PRstNode,
+            refType: SubstitutionKind) =
   var length = p.s.refs.len
   for i in 0 ..< length:
     if key == p.s.refs[i].key:
@@ -820,37 +874,111 @@ proc setRef(p: var RstParser, key: string, value: PRstNode) =
         rstMessage(p, mwRedefinitionOfLabel, key)
       p.s.refs[i].value = value
       return
-  p.s.refs.add(Substitution(key: key, value: value))
+  p.s.refs.add(Substitution(kind: refType, key: key, value: value,
+                            info: prevLineInfo(p)))
 
-proc findRef(s: PRstSharedState, key: string): PRstNode =
+proc findRef(s: PRstSharedState, key: string): seq[Substitution] =
   for i in countup(0, high(s.refs)):
     if key == s.refs[i].key:
-      return s.refs[i].value
-
-proc addAnchor(p: var RstParser, refn: string, reset: bool) =
-  ## add anchor `refn` to anchor aliases and update last anchor ``curAnchor``
-  if p.curAnchor == "":
-    p.s.anchors.add (refn, @[refn])
+      result.add s.refs[i]
+
+# Ambiguity in links: we don't follow procedure of removing implicit targets
+# defined in https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#implicit-hyperlink-targets
+# Instead we just give explicit links a higher priority than to implicit ones
+# and report ambiguities as warnings. Hopefully it is easy to remove
+# ambiguities manually. Nim auto-generated links from ``docgen.nim``
+# have lowest priority: 1 (for procs) and below for other symbol types.
+
+proc refPriority(k: SubstitutionKind): int =
+  case k
+  of rstSubstitution: result = 8
+  of hyperlinkAlias: result = 7
+  of implicitHyperlinkAlias: result = 2
+
+proc internalRefPriority(k: RstAnchorKind): int =
+  case k
+  of manualDirectiveAnchor: result = 6
+  of manualInlineAnchor: result = 5
+  of footnoteAnchor: result = 4
+  of headlineAnchor: result = 3
+
+proc addAnchorRst(p: var RstParser, name: string, refn: string, reset: bool,
+                  anchorType: RstAnchorKind) =
+  ## Adds anchor `refn` with an alias `name` and
+  ## updates the corresponding `curAnchor` / `curAnchorName`.
+  let prio = internalRefPriority(anchorType)
+  if p.curAnchorName == "":
+    var anchRef = new string
+    anchRef[] = refn
+    p.s.anchors.mgetOrPut(name, newSeq[AnchorSubst]()).add(
+        AnchorSubst(kind: arInternalRst, mainAnchor: anchRef, priority: prio,
+                    info: prevLineInfo(p), anchorType: anchorType))
   else:
-    p.s.anchors[^1].mainAnchor = refn
-    p.s.anchors[^1].aliases.add refn
+    # override previous mainAnchor by `ref` in all aliases
+    var anchRef = p.s.anchors[p.curAnchorName][0].mainAnchor
+    anchRef[] = refn
+    p.s.anchors.mgetOrPut(name, newSeq[AnchorSubst]()).add(
+        AnchorSubst(kind: arInternalRst, mainAnchor: anchRef, priority: prio,
+                    info: prevLineInfo(p), anchorType: anchorType))
   if reset:
     p.curAnchor = ""
+    p.curAnchorName = ""
   else:
     p.curAnchor = refn
+    p.curAnchorName = name
+
+proc addAnchorNim*(s: var PRstSharedState, refn: string, tooltip: string,
+                   langSym: LangSymbol, priority: int,
+                   info: TLineInfo) =
+  ## Adds an anchor `refn` (`mainAnchor`), which follows
+  ## the rule `arNim` (i.e. a symbol in ``*.nim`` file)
+  var anchRef = new string
+  anchRef[] = refn
+  s.anchors.mgetOrPut(langSym.name, newSeq[AnchorSubst]()).add(
+      AnchorSubst(kind: arNim, mainAnchor: anchRef, langSym: langSym,
+                  tooltip: tooltip, priority: priority,
+                  info: info))
+
+proc findMainAnchorNim(s: PRstSharedState, signature: PRstNode,
+                       info: TLineInfo):
+                      seq[AnchorSubst] =
+  let langSym = toLangSymbol(signature)
+  let substitutions = s.anchors.getOrDefault(langSym.name,
+                                             newSeq[AnchorSubst]())
+  if substitutions.len == 0:
+    return
+  # map symKind (like "proc") -> found symbols/groups:
+  var found: Table[string, seq[AnchorSubst]]
+  for s in substitutions:
+    if s.kind == arNim:
+      if match(s.langSym, langSym):
+        found.mgetOrPut(s.langSym.symKind, newSeq[AnchorSubst]()).add s
+  for symKind, sList in found:
+    if sList.len == 1:
+      result.add sList[0]
+    else:  # > 1, there are overloads, potential ambiguity in this `symKind`
+      if langSym.parametersProvided:
+        # there are non-group signatures, select only them
+        for s in sList:
+          if not s.langSym.isGroup:
+            result.add s
+      else:  # when there are many overloads a link like foo_ points to all
+             # of them, so selecting the group
+        var foundGroup = true
+        for s in sList:
+          if s.langSym.isGroup:
+            result.add s
+            foundGroup = true
+            break
+        doAssert foundGroup, "docgen has not generated the group"
 
-proc findMainAnchor(s: PRstSharedState, refn: string): string =
-  for subst in s.anchors:
-    if subst.mainAnchor == refn:  # no need to rename
-      result = subst.mainAnchor
-      break
-    var toLeave = false
-    for anchor in subst.aliases:
-      if anchor == refn:  # this anchor will be named as mainAnchor
-        result = subst.mainAnchor
-        toLeave = true
-    if toLeave:
-      break
+proc findMainAnchorRst(s: PRstSharedState, linkText: string, info: TLineInfo):
+                      seq[AnchorSubst] =
+  let name = linkText.toLowerAscii
+  let substitutions = s.anchors.getOrDefault(name, newSeq[AnchorSubst]())
+  for s in substitutions:
+    if s.kind == arInternalRst:
+      result.add s
 
 proc addFootnoteNumManual(p: var RstParser, num: int) =
   ## add manually-numbered footnote
@@ -860,13 +988,6 @@ proc addFootnoteNumManual(p: var RstParser, num: int) =
       return
   p.s.footnotes.add((fnManualNumber, num, -1, -1, $num))
 
-proc lineInfo(p: RstParser, iTok: int): TLineInfo =
-  result.col = int16(p.col + p.tok[iTok].col)
-  result.line = uint16(p.line + p.tok[iTok].line)
-  result.fileIndex = p.s.currFileIdx
-
-proc lineInfo(p: RstParser): TLineInfo = lineInfo(p, p.idx)
-
 proc addFootnoteNumAuto(p: var RstParser, label: string) =
   ## add auto-numbered footnote.
   ## Empty label [#] means it'll be resolved by the occurrence.
@@ -989,6 +1110,7 @@ proc newRstNodeA(p: var RstParser, kind: RstNodeKind): PRstNode =
   if p.curAnchor != "":
     result.anchor = p.curAnchor
     p.curAnchor = ""
+    p.curAnchorName = ""
 
 template newLeaf(s: string): PRstNode = newRstLeaf(s)
 
@@ -1255,7 +1377,7 @@ proc parsePostfix(p: var RstParser, n: PRstNode): PRstNode =
       else:
         newKind = rnHyperlink
         newSons = @[a, b]
-        setRef(p, rstnodeToRefname(a), b)
+        setRef(p, rstnodeToRefname(a), b, implicitHyperlinkAlias)
       result = newRstNode(newKind, newSons)
     else:  # some link that will be resolved in `resolveSubs`
       newKind = rnRef
@@ -1562,7 +1684,8 @@ proc parseInline(p: var RstParser, father: PRstNode) =
       inc p.idx
       parseUntil(p, n, "`", false)
       let refn = rstnodeToRefname(n)
-      p.s.anchors.add (refn, @[refn])
+      addAnchorRst(p, name = linkName(n), refn = refn, reset = true,
+                   anchorType=manualInlineAnchor)
       father.add(n)
     elif roSupportMarkdown in p.s.options and currentTok(p).symbol == "```":
       inc p.idx
@@ -2084,7 +2207,8 @@ proc parseHeadline(p: var RstParser): PRstNode =
     result.level = getLevel(p, c, hasOverline=false)
     checkHeadingHierarchy(p, result.level)
     p.s.hCurLevel = result.level
-  addAnchor(p, rstnodeToRefname(result), reset=true)
+  addAnchorRst(p, linkName(result), rstnodeToRefname(result), reset=true,
+               anchorType=headlineAnchor)
 
 proc parseOverline(p: var RstParser): PRstNode =
   var c = currentTok(p).symbol[0]
@@ -2106,7 +2230,8 @@ proc parseOverline(p: var RstParser): PRstNode =
   if currentTok(p).kind == tkAdornment:
     inc p.idx
     if currentTok(p).kind == tkIndent: inc p.idx
-  addAnchor(p, rstnodeToRefname(result), reset=true)
+  addAnchorRst(p, linkName(result), rstnodeToRefname(result), reset=true,
+               anchorType=headlineAnchor)
 
 type
   IntSeq = seq[int]
@@ -2837,7 +2962,7 @@ proc parseFootnote(p: var RstParser): PRstNode =
     anchor.add $p.s.lineFootnoteSym.len
   of fnCitation:
     anchor.add rstnodeToRefname(label)
-  addAnchor(p, anchor, reset=true)
+  addAnchorRst(p, anchor, anchor, reset=true, anchorType=footnoteAnchor)
   result.anchor = anchor
   if currentTok(p).kind == tkWhite: inc p.idx
   discard parseBlockContent(p, result, parseSectionWrapper)
@@ -2858,13 +2983,23 @@ proc parseDotDot(p: var RstParser): PRstNode =
   elif match(p, p.idx, " _"):
     # hyperlink target:
     inc p.idx, 2
-    var a = getReferenceName(p, ":")
+    var ending = ":"
+    if currentTok(p).symbol == "`":
+      inc p.idx
+      ending = "`"
+    var a = getReferenceName(p, ending)
+    if ending == "`":
+      if currentTok(p).symbol == ":":
+        inc p.idx
+      else:
+        rstMessage(p, meExpected, ":")
     if currentTok(p).kind == tkWhite: inc p.idx
     var b = untilEol(p)
     if len(b) == 0:  # set internal anchor
-      addAnchor(p, rstnodeToRefname(a), reset=false)
+      addAnchorRst(p, linkName(a), rstnodeToRefname(a), reset=false,
+                   anchorType=manualDirectiveAnchor)
     else:  # external hyperlink
-      setRef(p, rstnodeToRefname(a), b)
+      setRef(p, rstnodeToRefname(a), b, refType=hyperlinkAlias)
   elif match(p, p.idx, " |"):
     # substitution definitions:
     inc p.idx, 2
@@ -2892,7 +3027,7 @@ proc rstParsePass1*(fragment: string,
                     sharedState: PRstSharedState): PRstNode =
   ## Parses an RST `fragment`.
   ## The result should be further processed by
-  ## `preparePass2` and `resolveSubs` (which is pass 2).
+  ## preparePass2_ and resolveSubs_ (which is pass 2).
   var p: RstParser
   initParser(p, sharedState)
   p.line = line
@@ -2905,6 +3040,65 @@ proc preparePass2*(s: PRstSharedState, mainNode: PRstNode) =
   countTitles(s, mainNode)
   orderFootnotes(s)
 
+proc resolveLink(s: PRstSharedState, n: PRstNode) : PRstNode =
+    # Associate this link alias with its target and change node kind to
+    # rnHyperlink or rnInternalRef appropriately.
+    type LinkDef = object
+      ar: AnchorRule
+      priority: int
+      tooltip: string
+      target: PRstNode
+      info: TLineInfo
+    proc cmp(x, y: LinkDef): int =
+      result = cmp(x.priority, y.priority)
+      if result == 0:
+        result = cmp(x.target, y.target)
+    var foundLinks: seq[LinkDef]
+    let text = newRstNode(rnInner, n.sons)
+    let refn = rstnodeToRefname(n)
+    var hyperlinks = findRef(s, refn)
+    for y in hyperlinks:
+      foundLinks.add LinkDef(ar: arHyperlink, priority: refPriority(y.kind),
+                             target: y.value, info: y.info,
+                             tooltip: "(" & $y.kind & ")")
+    let substRst = findMainAnchorRst(s, text.addNodes, n.info)
+    for subst in substRst:
+      foundLinks.add LinkDef(ar: arInternalRst, priority: subst.priority,
+                             target: newLeaf(subst.mainAnchor[]),
+                             info: subst.info,
+                             tooltip: "(" & $subst.anchorType & ")")
+    if roNimFile in s.options:
+      let substNim = findMainAnchorNim(s, signature=text, n.info)
+      for subst in substNim:
+        foundLinks.add LinkDef(ar: arNim, priority: subst.priority,
+                               target: newLeaf(subst.mainAnchor[]),
+                               info: subst.info, tooltip: subst.tooltip)
+    foundLinks.sort(cmp = cmp, order = Descending)
+    let linkText = addNodes(n)
+    if foundLinks.len >= 1:
+      let kind = if foundLinks[0].ar == arHyperlink: rnHyperlink
+                 elif foundLinks[0].ar == arNim: rnNimdocRef
+                 else: rnInternalRef
+      result = newRstNode(kind)
+      result.sons = @[text, foundLinks[0].target]
+      if kind == rnNimdocRef: result.tooltip = foundLinks[0].tooltip
+      if foundLinks.len > 1:  # report ambiguous link
+        var targets = newSeq[string]()
+        for l in foundLinks:
+          var t = "    "
+          if s.filenames.len > 1:
+            t.add getFilename(s.filenames, l.info.fileIndex)
+          let n = l.info.line
+          let c = l.info.col + ColRstOffset
+          t.add "($1, $2): $3" % [$n, $c, l.tooltip]
+          targets.add t
+        rstMessage(s.filenames, s.msgHandler, n.info, mwAmbiguousLink,
+                   "`$1`\n  clash:\n$2" % [
+                     linkText, targets.join("\n")])
+    else:  # nothing found
+      result = n
+      rstMessage(s.filenames, s.msgHandler, n.info, mwBrokenLink, linkText)
+
 proc resolveSubs*(s: PRstSharedState, n: PRstNode): PRstNode =
   ## Makes pass 2 of RST parsing.
   ## Resolves substitutions and anchor aliases, groups footnotes.
@@ -2933,21 +3127,7 @@ proc resolveSubs*(s: PRstSharedState, n: PRstNode): PRstNode =
     elif s.hTitleCnt == 0:
       n.level += 1
   of rnRef:
-    let refn = rstnodeToRefname(n)
-    var y = findRef(s, refn)
-    if y != nil:
-      result = newRstNode(rnHyperlink)
-      let text = newRstNode(rnInner, n.sons)
-      result.sons = @[text, y]
-    else:
-      let anchor = findMainAnchor(s, refn)
-      if anchor != "":
-        result = newRstNode(rnInternalRef)
-        let text = newRstNode(rnInner, n.sons)
-        result.sons = @[text,             # visible text of reference
-                        newLeaf(anchor)]  # link itself
-      else:
-        rstMessage(s.filenames, s.msgHandler, n.info, mwBrokenLink, refn)
+    result = resolveLink(s, n)
   of rnFootnote:
     var (fnType, num) = getFootnoteType(n.sons[0])
     case fnType
@@ -2993,9 +3173,10 @@ proc resolveSubs*(s: PRstSharedState, n: PRstNode): PRstNode =
     of fnCitation:
       result.add n.sons[0]
       refn.add rstnodeToRefname(n)
-    let anch = findMainAnchor(s, refn)
-    if anch != "":
-      result.add newLeaf(anch)     # add link
+    # TODO: correctly report ambiguities
+    let anchorInfo = findMainAnchorRst(s, refn, n.info)
+    if anchorInfo.len != 0:
+      result.add newLeaf(anchorInfo[0].mainAnchor[])  # add link
     else:
       rstMessage(s.filenames, s.msgHandler, n.info, mwBrokenLink, refn)
       result.add newLeaf(refn)  # add link
diff --git a/lib/packages/docutils/rstast.nim b/lib/packages/docutils/rstast.nim
index e1ed7c099..bc7b9a650 100644
--- a/lib/packages/docutils/rstast.nim
+++ b/lib/packages/docutils/rstast.nim
@@ -43,6 +43,7 @@ type
     rnFootnoteGroup,          # footnote group - exists for a purely stylistic
                               # reason: to display a few footnotes as 1 block
     rnStandaloneHyperlink, rnHyperlink, rnRef, rnInternalRef, rnFootnoteRef,
+    rnNimdocRef,              # reference to automatically generated Nim symbol
     rnDirective,              # a general directive
     rnDirArg,                 # a directive argument (for some directives).
                               # here are directives that are not rnDirective:
@@ -104,6 +105,8 @@ type
         rnInterpretedText, rnField, rnInlineCode, rnCodeBlock, rnFootnoteRef:
       info*: TLineInfo        ## To have line/column info for warnings at
                               ## nodes that are post-processed after parsing
+    of rnNimdocRef:
+      tooltip*: string
     else:
       discard
     anchor*: string           ## anchor, internal link target
diff --git a/lib/packages/docutils/rstgen.nim b/lib/packages/docutils/rstgen.nim
index 8eac37307..a556aa2e3 100644
--- a/lib/packages/docutils/rstgen.nim
+++ b/lib/packages/docutils/rstgen.nim
@@ -60,8 +60,8 @@ type
   MetaEnum* = enum
     metaNone, metaTitle, metaSubtitle, metaAuthor, metaVersion
 
-  EscapeMode = enum  # in Latex text inside options [] and URLs is
-                     # escaped slightly differently than in normal text
+  EscapeMode* = enum  # in Latex text inside options [] and URLs is
+                      # escaped slightly differently than in normal text
     emText, emOption, emUrl  # emText is currently used for code also
 
   RstGenerator* = object of RootObj
@@ -201,7 +201,9 @@ proc addTexChar(dest: var string, c: char, escMode: EscapeMode) =
   ## All escapes that need to work in text and code blocks (`emText` mode)
   ## should start from \ (to be compatible with fancyvrb/fvextra).
   case c
-  of '_', '$', '&', '#', '%': add(dest, "\\" & c)
+  of '_', '&', '#', '%': add(dest, "\\" & c)
+  # commands \label and \pageref don't accept \$ by some reason but OK with $:
+  of '$': (if escMode == emUrl: add(dest, c) else: add(dest, "\\" & c))
   # \~ and \^ have a special meaning unless they are followed by {}
   of '~', '^': add(dest, "\\" & c & "{}")
   # Latex loves to substitute ` to opening quote, even in texttt mode!
@@ -1180,7 +1182,8 @@ proc renderAdmonition(d: PDoc, n: PRstNode, result: var string) =
         "$1\n\\end{rstadmonition}\n",
       result)
 
-proc renderHyperlink(d: PDoc, text, link: PRstNode, result: var string, external: bool) =
+proc renderHyperlink(d: PDoc, text, link: PRstNode, result: var string,
+                     external: bool, nimdoc = false, tooltip="") =
   var linkStr = ""
   block:
     let mode = d.escMode
@@ -1189,14 +1192,19 @@ proc renderHyperlink(d: PDoc, text, link: PRstNode, result: var string, external
     d.escMode = mode
   var textStr = ""
   renderRstToOut(d, text, textStr)
+  let nimDocStr = if nimdoc: " nimdoc" else: ""
+  var tooltipStr = ""
+  if tooltip != "":
+    tooltipStr = """ title="$1"""" % [ esc(d.target, tooltip) ]
   if external:
     dispA(d.target, result,
-      "<a class=\"reference external\" href=\"$2\">$1</a>",
-      "\\href{$2}{$1}", [textStr, linkStr])
+      "<a class=\"reference external$3\"$4 href=\"$2\">$1</a>",
+      "\\href{$2}{$1}", [textStr, linkStr, nimDocStr, tooltipStr])
   else:
     dispA(d.target, result,
-      "<a class=\"reference internal\" href=\"#$2\">$1</a>",
-      "\\hyperlink{$2}{$1} (p.~\\pageref{$2})", [textStr, linkStr])
+      "<a class=\"reference internal$3\"$4 href=\"#$2\">$1</a>",
+      "\\hyperlink{$2}{$1} (p.~\\pageref{$2})",
+      [textStr, linkStr, nimDocStr, tooltipStr])
 
 proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) =
   if n == nil: return
@@ -1329,6 +1337,9 @@ proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) =
     renderHyperlink(d, text=n.sons[0], link=n.sons[0], result, external=true)
   of rnInternalRef:
     renderHyperlink(d, text=n.sons[0], link=n.sons[1], result, external=false)
+  of rnNimdocRef:
+    renderHyperlink(d, text=n.sons[0], link=n.sons[1], result, external=false,
+                    nimdoc=true, tooltip=n.tooltip)
   of rnHyperlink:
     renderHyperlink(d, text=n.sons[0], link=n.sons[1], result, external=true)
   of rnFootnoteRef:
diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim
index 873949ca6..2234a8625 100644
--- a/lib/pure/strutils.nim
+++ b/lib/pure/strutils.nim
@@ -273,6 +273,11 @@ func nimIdentNormalize*(s: string): string =
   ##
   ## That means to convert to lower case and remove any '_' on all characters
   ## except first one.
+  ##
+  ## .. Warning:: Backticks (`) are not handled: they remain *as is* and
+  ##    spaces are preserved. See `nimIdentBackticksNormalize 
+  ##    <dochelpers.html#nimIdentBackticksNormalize,string>`_ for
+  ##    an alternative approach.
   runnableExamples:
     doAssert nimIdentNormalize("Foo_bar") == "Foobar"
   result = newString(s.len)
author	Andrey Makarov <ph.makarov@gmail.com>	2021-10-28 20:20:52 +0300
committer	GitHub <noreply@github.com>	2021-10-28 19:20:52 +0200
commit	7ba2659f733b97db63b7552415ad048e34d4a11a (patch)
tree	54eb85f0eabc927c6d15c2d69c45aefd09efa39d /lib
parent	c80e2c173686bd12904e5487752dc0ce20cb8bcb (diff)
download	Nim-7ba2659f733b97db63b7552415ad048e34d4a11a.tar.gz