diff options
author | Andrey Makarov <ph.makarov@gmail.com> | 2023-09-03 08:09:36 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-09-03 16:09:36 +0200 |
commit | c5495f40d5d881e6bd155c9e6c9c6e5e49b749a7 (patch) | |
tree | 961e73fccfcc15efc6bc1367352c85253910b523 | |
parent | 480e98c479035a8a19ff543bace3616d202e1ea2 (diff) | |
download | Nim-c5495f40d5d881e6bd155c9e6c9c6e5e49b749a7.tar.gz |
docgen: add Pandoc footnotes (fixes #21080) (#22591)
This implements Pandoc Markdown-style footnotes, that are compatible with Pandoc referencing syntax: Ref. [^ftn]. [^ftn]: Block. See https://pandoc.org/MANUAL.html#footnotes for more examples.
-rw-r--r-- | doc/contributing.md | 4 | ||||
-rw-r--r-- | doc/docgen.md | 4 | ||||
-rw-r--r-- | doc/markdown_rst.md | 20 | ||||
-rw-r--r-- | lib/packages/docutils/rst.nim | 221 | ||||
-rw-r--r-- | tests/stdlib/trst.nim | 67 | ||||
-rw-r--r-- | tests/stdlib/trstgen.nim | 6 |
6 files changed, 221 insertions, 101 deletions
diff --git a/doc/contributing.md b/doc/contributing.md index 47e1fa3dd..420c1438e 100644 --- a/doc/contributing.md +++ b/doc/contributing.md @@ -336,7 +336,7 @@ To avoid accidental highlighting follow this rule in ``*.nim`` files: .. Note:: ``*.rst`` files have ``:literal:`` as their default role. So for them the rule above is only applicable if the ``:nim:`` role - is set up manually as the default \[*]: + is set up manually as the default [^1]: .. role:: nim(code) :language: nim @@ -345,7 +345,7 @@ To avoid accidental highlighting follow this rule in ``*.nim`` files: The first 2 lines are for other RST implementations, including Github one. - \[*] this is fulfilled when ``doc/rstcommon.rst`` is included. + [^1]: this is fulfilled when ``doc/rstcommon.rst`` is included. Best practices ============== diff --git a/doc/docgen.md b/doc/docgen.md index a05da8198..21058e88d 100644 --- a/doc/docgen.md +++ b/doc/docgen.md @@ -354,9 +354,9 @@ This pertains to any exported symbol like `proc`, `const`, `iterator`, etc. Link text is either one word or a group of words enclosed by delimiters (brackets ``[...]`` for Markdown or backticks `\`...\`_` for RST). Link text will be displayed *as is* while *link target* will be set to -the anchor \[*] of Nim symbol that corresponds to link text. +the anchor [^1] of Nim symbol that corresponds to link text. -\[*] anchors' format is described in [HTML anchor generation] section below. +[^1] anchors' format is described in [HTML anchor generation] section below. If you have a constant: diff --git a/doc/markdown_rst.md b/doc/markdown_rst.md index 9a266be9a..b7f091649 100644 --- a/doc/markdown_rst.md +++ b/doc/markdown_rst.md @@ -32,12 +32,12 @@ The `md2tex`:option: command is invoked identically to `md2html`:option:, but outputs a ``.tex`` file instead of ``.html``. These tools embedded into Nim compiler; the compiler can output -the result to HTML \[#html] or Latex \[#latex]. +the result to HTML [^html] or Latex [^latex]. -\[#html] commands `nim doc`:cmd: for ``*.nim`` files and +[^html]: commands `nim doc`:cmd: for ``*.nim`` files and `nim rst2html`:cmd: for ``*.rst`` files -\[#latex] commands `nim doc2tex`:cmd: for ``*.nim`` and +[^latex]: commands `nim doc2tex`:cmd: for ``*.nim`` and `nim rst2tex`:cmd: for ``*.rst``. Full list of supported commands: @@ -127,7 +127,9 @@ Markdown-specific features ``` * Markdown links ``[...](...)`` * Pandoc syntax for automatic links ``[...]``, see [Referencing] for description -+ Markdown literal blocks indented by 4 or more spaces +* Pandoc syntax for footnotes, including ``[^10]`` (manually numbered) + and ``[^someName]`` (auto-numbered with a label) +* Markdown literal blocks indented by 4 or more spaces * Markdown headlines * Markdown block quotes * Markdown syntax for definition lists @@ -139,8 +141,8 @@ Additional Nim-specific features * referencing to definitions in external files, see [Markup external referencing] section -* directives: ``code-block`` \[cmp:Sphinx], ``title``, - ``index`` \[cmp:Sphinx] +* directives: ``code-block`` [^Sphinx], ``title``, + ``index`` [^Sphinx] * predefined roles - ``:nim:`` (default), ``:c:`` (C programming language), ``:python:``, ``:yaml:``, ``:java:``, ``:cpp:`` (C++), ``:csharp`` (C#). @@ -154,9 +156,9 @@ Additional Nim-specific features - ``:cmd:`` for commands and common shells syntax - ``:console:`` the same for interactive sessions (commands should be prepended by ``$``) - - ``:program:`` for executable names \[cmp:Sphinx] + - ``:program:`` for executable names [^Sphinx] (one can just use ``:cmd:`` on single word) - - ``:option:`` for command line options \[cmp:Sphinx] + - ``:option:`` for command line options [^Sphinx] - ``:tok:``, a role for highlighting of programming language tokens * ***triple emphasis*** (bold and italic) using \*\*\* * ``:idx:`` role for \`interpreted text\` to include the link to this @@ -171,7 +173,7 @@ Additional Nim-specific features and `doc`:option: will be left in the final document. * emoji / smiley symbols -\[cmp:Sphinx] similar but different from the directives of +[^Sphinx]: similar but different from the directives of Python [Sphinx directives] and [Sphinx roles] extensions .. Note:: By default Nim has ``roSupportMarkdown`` and diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim index 2894010ef..a9bc4db91 100644 --- a/lib/packages/docutils/rst.nim +++ b/lib/packages/docutils/rst.nim @@ -442,6 +442,7 @@ type ## because RST can have >1 alias per 1 anchor EParseError* = object of ValueError + SectionParser = proc (p: var RstParser): PRstNode {.nimcall, gcsafe.} const LineRstInit* = 1 ## Initial line number for standalone RST text @@ -597,8 +598,10 @@ proc rstMessage(p: RstParser, msgKind: MsgKind) = # # TODO: we need to apply this strategy to all markup elements eventually. -func isPureRst(p: RstParser): bool = - roSupportMarkdown notin p.s.options +func isPureRst(p: RstParser): bool = roSupportMarkdown notin p.s.options +func isRst(p: RstParser): bool = roPreferMarkdown notin p.s.options +func isMd(p: RstParser): bool = roPreferMarkdown in p.s.options +func isMd(s: PRstSharedState): bool = roPreferMarkdown in s.options proc stopOrWarn(p: RstParser, errorType: MsgKind, arg: string) = let realMsgKind = if isPureRst(p): errorType else: mwRstStyle @@ -1692,7 +1695,7 @@ proc parseMarkdownLink(p: var RstParser; father: PRstNode): bool = else: result = false -proc getFootnoteType(label: PRstNode): (FootnoteType, int) = +proc getRstFootnoteType(label: PRstNode): (FootnoteType, int) = if label.sons.len >= 1 and label.sons[0].kind == rnLeaf and label.sons[0].text == "#": if label.sons.len == 1: @@ -1710,7 +1713,18 @@ proc getFootnoteType(label: PRstNode): (FootnoteType, int) = else: result = (fnCitation, -1) -proc parseFootnoteName(p: var RstParser, reference: bool): PRstNode = +proc getMdFootnoteType(label: PRstNode): (FootnoteType, int) = + try: + result = (fnManualNumber, parseInt(label.sons[0].text)) + except ValueError: + result = (fnAutoNumberLabel, -1) + +proc getFootnoteType(s: PRstSharedState, label: PRstNode): (FootnoteType, int) = + ## Returns footnote/citation type and manual number (if present). + if isMd(s): getMdFootnoteType(label) + else: getRstFootnoteType(label) + +proc parseRstFootnoteName(p: var RstParser, reference: bool): PRstNode = ## parse footnote/citation label. Precondition: start at `[`. ## Label text should be valid ref. name symbol, otherwise nil is returned. var i = p.idx + 1 @@ -1740,6 +1754,41 @@ proc parseFootnoteName(p: var RstParser, reference: bool): PRstNode = inc i p.idx = i +proc isMdFootnoteName(p: RstParser, reference: bool): bool = + ## Pandoc Markdown footnote extension. + let j = p.idx + result = p.tok[j].symbol == "[" and p.tok[j+1].symbol == "^" and + p.tok[j+2].kind == tkWord + +proc parseMdFootnoteName(p: var RstParser, reference: bool): PRstNode = + if isMdFootnoteName(p, reference): + result = newRstNode(rnInner) + var j = p.idx + 2 + while p.tok[j].kind in {tkWord, tkOther} or + validRefnamePunct(p.tok[j].symbol): + result.add newLeaf(p.tok[j].symbol) + inc j + if j == p.idx + 2: + return nil + if p.tok[j].symbol == "]": + if reference: + p.idx = j + 1 # skip ] + else: + if p.tok[j+1].symbol == ":": + p.idx = j + 2 # skip ]: + else: + result = nil + else: + result = nil + else: + result = nil + +proc parseFootnoteName(p: var RstParser, reference: bool): PRstNode = + if isMd(p): parseMdFootnoteName(p, reference) + else: + if isInlineMarkupStart(p, "["): parseRstFootnoteName(p, reference) + else: nil + proc isMarkdownCodeBlock(p: RstParser, idx: int): bool = let tok = p.tok[idx] template allowedSymbol: bool = @@ -1806,16 +1855,12 @@ proc parseInline(p: var RstParser, father: PRstNode) = var n = newRstNode(rnSubstitutionReferences, info=lineInfo(p, p.idx+1)) parseUntil(p, n, "|", false) father.add(n) - elif roSupportMarkdown in p.s.options and - currentTok(p).symbol == "[" and nextTok(p).symbol != "[" and - parseMarkdownLink(p, father): - discard "parseMarkdownLink already processed it" - elif isInlineMarkupStart(p, "[") and nextTok(p).symbol != "[" and + elif currentTok(p).symbol == "[" and nextTok(p).symbol != "[" and (n = parseFootnoteName(p, reference=true); n != nil): var nn = newRstNode(rnFootnoteRef) nn.info = lineInfo(p, saveIdx+1) nn.add n - let (fnType, _) = getFootnoteType(n) + let (fnType, _) = getFootnoteType(p.s, n) case fnType of fnAutoSymbol: p.s.lineFootnoteSymRef.add lineInfo(p) @@ -1823,6 +1868,10 @@ proc parseInline(p: var RstParser, father: PRstNode) = p.s.lineFootnoteNumRef.add lineInfo(p) else: discard father.add(nn) + elif roSupportMarkdown in p.s.options and + currentTok(p).symbol == "[" and nextTok(p).symbol != "[" and + parseMarkdownLink(p, father): + discard "parseMarkdownLink already processed it" else: if roSupportSmilies in p.s.options: let n = parseSmiley(p) @@ -1960,8 +2009,26 @@ proc getMdBlockIndent(p: RstParser): int = else: result = nextIndent # allow parsing next lines [case.3] -template isRst(p: RstParser): bool = roPreferMarkdown notin p.s.options -template isMd(p: RstParser): bool = roPreferMarkdown in p.s.options +proc indFollows(p: RstParser): bool = + result = currentTok(p).kind == tkIndent and currentTok(p).ival > currInd(p) + +proc parseBlockContent(p: var RstParser, father: var PRstNode, + contentParser: SectionParser): bool {.gcsafe.} = + ## parse the final content part of explicit markup blocks (directives, + ## footnotes, etc). Returns true if succeeded. + if currentTok(p).kind != tkIndent or indFollows(p): + let blockIndent = getWrappableIndent(p) + pushInd(p, blockIndent) + let content = contentParser(p) + popInd(p) + father.add content + result = true + +proc parseSectionWrapper(p: var RstParser): PRstNode = + result = newRstNode(rnInner) + parseSection(p, result) + while result.kind == rnInner and result.len == 1: + result = result.sons[0] proc parseField(p: var RstParser): PRstNode = ## Returns a parsed rnField node. @@ -2298,6 +2365,8 @@ proc whichSection(p: RstParser): RstNodeKind = elif roSupportMarkdown in p.s.options and predNL(p) and match(p, p.idx, "| w") and findPipe(p, p.idx+3): result = rnMarkdownTable + elif isMd(p) and isMdFootnoteName(p, reference=false): + result = rnFootnote elif currentTok(p).symbol == "|" and isLineBlock(p): result = rnLineBlock elif roSupportMarkdown in p.s.options and isMarkdownBlockQuote(p): @@ -2866,7 +2935,7 @@ proc parseOptionList(p: var RstParser): PRstNode = break proc parseMdDefinitionList(p: var RstParser): PRstNode = - ## Parses (Pandoc/kramdown/PHPextra) Mardkown definition lists. + ## Parses (Pandoc/kramdown/PHPextra) Markdown definition lists. result = newRstNodeA(p, rnMdDefList) let termCol = currentTok(p).col while true: @@ -3022,6 +3091,57 @@ proc parseEnumList(p: var RstParser): PRstNode = else: break +proc prefix(ftnType: FootnoteType): string = + case ftnType + of fnManualNumber: result = "footnote-" + of fnAutoNumber: result = "footnoteauto-" + of fnAutoNumberLabel: result = "footnote-" + of fnAutoSymbol: result = "footnotesym-" + of fnCitation: result = "citation-" + +proc parseFootnote(p: var RstParser): PRstNode {.gcsafe.} = + ## Parses footnotes and citations, always returns 2 sons: + ## + ## 1) footnote label, always containing rnInner with 1 or more sons + ## 2) footnote body, which may be nil + var label: PRstNode + if isRst(p): + inc p.idx # skip space after `..` + label = parseFootnoteName(p, reference=false) + if label == nil: + if isRst(p): + dec p.idx + return nil + result = newRstNode(rnFootnote) + result.add label + let (fnType, i) = getFootnoteType(p.s, label) + var name = "" + var anchor = fnType.prefix + case fnType + of fnManualNumber: + addFootnoteNumManual(p, i) + anchor.add $i + of fnAutoNumber, fnAutoNumberLabel: + name = rstnodeToRefname(label) + addFootnoteNumAuto(p, name) + if fnType == fnAutoNumberLabel: + anchor.add name + else: # fnAutoNumber + result.order = p.s.lineFootnoteNum.len + anchor.add $result.order + of fnAutoSymbol: + addFootnoteSymAuto(p) + result.order = p.s.lineFootnoteSym.len + anchor.add $p.s.lineFootnoteSym.len + of fnCitation: + anchor.add rstnodeToRefname(label) + addAnchorRst(p, anchor, target = result, anchorType = footnoteAnchor) + result.anchor = anchor + if currentTok(p).kind == tkWhite: inc p.idx + discard parseBlockContent(p, result, parseSectionWrapper) + if result.len < 2: + result.add nil + proc sonKind(father: PRstNode, i: int): RstNodeKind = result = rnLeaf if i < father.len: result = father.sons[i].kind @@ -3064,6 +3184,7 @@ proc parseSection(p: var RstParser, result: PRstNode) = of rnLineBlock: a = parseLineBlock(p) of rnMarkdownBlockQuote: a = parseMarkdownBlockQuote(p) of rnDirective: a = parseDotDot(p) + of rnFootnote: a = parseFootnote(p) of rnEnumList: a = parseEnumList(p) of rnLeaf: rstMessage(p, meNewSectionExpected, "(syntax error)") of rnParagraph: discard @@ -3089,12 +3210,6 @@ proc parseSection(p: var RstParser, result: PRstNode) = result.sons[0] = newRstNode(rnInner, result.sons[0].sons, anchor=result.sons[0].anchor) -proc parseSectionWrapper(p: var RstParser): PRstNode = - result = newRstNode(rnInner) - parseSection(p, result) - while result.kind == rnInner and result.len == 1: - result = result.sons[0] - proc parseDoc(p: var RstParser): PRstNode = result = parseSectionWrapper(p) if currentTok(p).kind != tkEof: @@ -3104,7 +3219,6 @@ type DirFlag = enum hasArg, hasOptions, argIsFile, argIsWord DirFlags = set[DirFlag] - SectionParser = proc (p: var RstParser): PRstNode {.nimcall, gcsafe.} proc parseDirective(p: var RstParser, k: RstNodeKind, flags: DirFlags): PRstNode = ## Parses arguments and options for a directive block. @@ -3147,21 +3261,6 @@ proc parseDirective(p: var RstParser, k: RstNodeKind, flags: DirFlags): PRstNode popInd(p) result.add(options) -proc indFollows(p: RstParser): bool = - result = currentTok(p).kind == tkIndent and currentTok(p).ival > currInd(p) - -proc parseBlockContent(p: var RstParser, father: var PRstNode, - contentParser: SectionParser): bool {.gcsafe.} = - ## parse the final content part of explicit markup blocks (directives, - ## footnotes, etc). Returns true if succeeded. - if currentTok(p).kind != tkIndent or indFollows(p): - let blockIndent = getWrappableIndent(p) - pushInd(p, blockIndent) - let content = contentParser(p) - popInd(p) - father.add content - result = true - proc parseDirective(p: var RstParser, k: RstNodeKind, flags: DirFlags, contentParser: SectionParser): PRstNode = ## A helper proc that does main work for specific directive procs. @@ -3398,54 +3497,6 @@ proc selectDir(p: var RstParser, d: string): PRstNode = else: rstMessage(p, meInvalidDirective, d, tok.line, tok.col) -proc prefix(ftnType: FootnoteType): string = - case ftnType - of fnManualNumber: result = "footnote-" - of fnAutoNumber: result = "footnoteauto-" - of fnAutoNumberLabel: result = "footnote-" - of fnAutoSymbol: result = "footnotesym-" - of fnCitation: result = "citation-" - -proc parseFootnote(p: var RstParser): PRstNode {.gcsafe.} = - ## Parses footnotes and citations, always returns 2 sons: - ## - ## 1) footnote label, always containing rnInner with 1 or more sons - ## 2) footnote body, which may be nil - inc p.idx - let label = parseFootnoteName(p, reference=false) - if label == nil: - dec p.idx - return nil - result = newRstNode(rnFootnote) - result.add label - let (fnType, i) = getFootnoteType(label) - var name = "" - var anchor = fnType.prefix - case fnType - of fnManualNumber: - addFootnoteNumManual(p, i) - anchor.add $i - of fnAutoNumber, fnAutoNumberLabel: - name = rstnodeToRefname(label) - addFootnoteNumAuto(p, name) - if fnType == fnAutoNumberLabel: - anchor.add name - else: # fnAutoNumber - result.order = p.s.lineFootnoteNum.len - anchor.add $result.order - of fnAutoSymbol: - addFootnoteSymAuto(p) - result.order = p.s.lineFootnoteSym.len - anchor.add $p.s.lineFootnoteSym.len - of fnCitation: - anchor.add rstnodeToRefname(label) - addAnchorRst(p, anchor, target = result, anchorType = footnoteAnchor) - result.anchor = anchor - if currentTok(p).kind == tkWhite: inc p.idx - discard parseBlockContent(p, result, parseSectionWrapper) - if result.len < 2: - result.add nil - proc parseDotDot(p: var RstParser): PRstNode = # parse "explicit markup blocks" result = nil @@ -3729,7 +3780,7 @@ proc resolveSubs*(s: PRstSharedState, n: PRstNode): PRstNode = of rnRstRef, rnPandocRef: result = resolveLink(s, n) of rnFootnote: - var (fnType, num) = getFootnoteType(n.sons[0]) + var (fnType, num) = getFootnoteType(s, n.sons[0]) case fnType of fnManualNumber, fnCitation: discard "no need to alter fixed text" @@ -3747,7 +3798,7 @@ proc resolveSubs*(s: PRstSharedState, n: PRstNode): PRstNode = n.sons[0].sons[0].text = sym n.sons[1] = resolveSubs(s, n.sons[1]) of rnFootnoteRef: - var (fnType, num) = getFootnoteType(n.sons[0]) + var (fnType, num) = getFootnoteType(s, n.sons[0]) template addLabel(number: int | string) = var nn = newRstNode(rnInner) nn.add newLeaf($number) diff --git a/tests/stdlib/trst.nim b/tests/stdlib/trst.nim index 329adc101..e39eae9c1 100644 --- a/tests/stdlib/trst.nim +++ b/tests/stdlib/trst.nim @@ -531,6 +531,73 @@ suite "RST parsing": ```' """ + test "Markdown footnotes": + # Testing also 1) correct order of manually-numbered and automatically- + # numbered footnotes; 2) no spaces between references (html & 3 below): + + check(dedent""" + Paragraph [^1] [^html-hyphen][^3] and [^latex] + + [^1]: footnote1 + + [^html-hyphen]: footnote2 + continuation2 + + [^latex]: footnote4 + + [^3]: footnote3 + continuation3 + """.toAst == + dedent""" + rnInner + rnInner + rnLeaf 'Paragraph' + rnLeaf ' ' + rnFootnoteRef + rnInner + rnLeaf '1' + rnLeaf 'footnote-1' + rnLeaf ' ' + rnFootnoteRef + rnInner + rnLeaf '2' + rnLeaf 'footnote-htmlminushyphen' + rnFootnoteRef + rnInner + rnLeaf '3' + rnLeaf 'footnote-3' + rnLeaf ' ' + rnLeaf 'and' + rnLeaf ' ' + rnFootnoteRef + rnInner + rnLeaf '4' + rnLeaf 'footnote-latex' + rnFootnoteGroup + rnFootnote anchor='footnote-1' + rnInner + rnLeaf '1' + rnLeaf 'footnote1' + rnFootnote anchor='footnote-htmlminushyphen' + rnInner + rnLeaf '2' + rnInner + rnLeaf 'footnote2' + rnLeaf ' ' + rnLeaf 'continuation2' + rnFootnote anchor='footnote-latex' + rnInner + rnLeaf '4' + rnLeaf 'footnote4' + rnFootnote anchor='footnote-3' + rnInner + rnLeaf '3' + rnInner + rnLeaf 'footnote3' + rnLeaf ' ' + rnLeaf 'continuation3' + """) + test "Markdown code blocks with more > 3 backticks": check(dedent""" ```` diff --git a/tests/stdlib/trstgen.nim b/tests/stdlib/trstgen.nim index 8c68f68c9..934403665 100644 --- a/tests/stdlib/trstgen.nim +++ b/tests/stdlib/trstgen.nim @@ -1128,7 +1128,7 @@ Test1 Paragraph2 ref `internal anchor`_. """ - let output9 = input9.toHtml + let output9 = input9.toHtml(preferRst) # _`internal anchor` got erased: check "href=\"#internal-anchor\"" notin output9 check "href=\"#citation-another\"" in output9 @@ -1156,7 +1156,7 @@ Test1 doAssert "<a href=\"#citation-third\">[Third]</a>" in output10 let input11 = ".. [note]\n" # should not crash - let output11 = input11.toHtml + let output11 = input11.toHtml(preferRst) doAssert "<a href=\"#citation-note\">[note]</a>" in output11 # check that references to auto-numbered footnotes work @@ -1443,7 +1443,7 @@ Test1 Ref. target103_. """ - let output2 = input2.toHtml + let output2 = input2.toHtml(preferRst) # "target101" should be erased and changed to "section-xyz": doAssert "href=\"#target300\"" notin output2 doAssert "id=\"target300\"" notin output2 |