diff options
Diffstat (limited to 'lib/packages/docutils')
-rw-r--r-- | lib/packages/docutils/dochelpers.nim | 298 | ||||
-rw-r--r-- | lib/packages/docutils/docutils.nimble | 4 | ||||
-rw-r--r-- | lib/packages/docutils/docutils.nimble.old | 7 | ||||
-rw-r--r-- | lib/packages/docutils/highlite.nim | 207 | ||||
-rw-r--r-- | lib/packages/docutils/rst.nim | 2764 | ||||
-rw-r--r-- | lib/packages/docutils/rstast.nim | 127 | ||||
-rw-r--r-- | lib/packages/docutils/rstgen.nim | 705 | ||||
-rw-r--r-- | lib/packages/docutils/rstidx.nim | 141 |
8 files changed, 3087 insertions, 1166 deletions
diff --git a/lib/packages/docutils/dochelpers.nim b/lib/packages/docutils/dochelpers.nim new file mode 100644 index 000000000..0a41d85b5 --- /dev/null +++ b/lib/packages/docutils/dochelpers.nim @@ -0,0 +1,298 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2021 Nim contributors +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Integration helpers between ``docgen.nim`` and ``rst.nim``. +## +## Function `toLangSymbol(linkText)`_ produces a signature `docLink` of +## `type LangSymbol`_ in ``rst.nim``, while `match(generated, docLink)`_ +## matches it with `generated`, produced from `PNode` by ``docgen.rst``. + +import rstast +import std/strutils + +when defined(nimPreviewSlimSystem): + import std/[assertions, syncio] + + +type + LangSymbol* = object ## symbol signature in Nim + symKind*: string ## "proc", "const", "type", etc + symTypeKind*: string ## ""|enum|object|tuple - + ## valid only when `symKind == "type"` + name*: string ## plain symbol name without any parameters + generics*: string ## generic parameters (without brackets) + isGroup*: bool ## is LangSymbol a group with overloads? + # the following fields are valid iff `isGroup` == false + # (always false when parsed by `toLangSymbol` because link like foo_ + # can point to just a single symbol foo, e.g. proc). + parametersProvided*: bool ## to disambiguate `proc f`_ and `proc f()`_ + parameters*: seq[tuple[name: string, `type`: string]] + ## name-type seq, e.g. for proc + outType*: string ## result type, e.g. for proc + +proc `$`*(s: LangSymbol): string = # for debug + ("(symkind=$1, symTypeKind=$2, name=$3, generics=$4, isGroup=$5, " & + "parametersProvided=$6, parameters=$7, outType=$8)") % [ + s.symKind, s.symTypeKind , s.name, s.generics, $s.isGroup, + $s.parametersProvided, $s.parameters, s.outType] + +func nimIdentBackticksNormalize*(s: string): string = + ## Normalizes the string `s` as a Nim identifier. + ## + ## Unlike `nimIdentNormalize` removes spaces and backticks. + ## + ## .. Warning:: No checking (e.g. that identifiers cannot start from + ## digits or '_', or that number of backticks is even) is performed. + runnableExamples: + doAssert nimIdentBackticksNormalize("Foo_bar") == "Foobar" + doAssert nimIdentBackticksNormalize("FoO BAr") == "Foobar" + doAssert nimIdentBackticksNormalize("`Foo BAR`") == "Foobar" + doAssert nimIdentBackticksNormalize("` Foo BAR `") == "Foobar" + # not a valid identifier: + doAssert nimIdentBackticksNormalize("`_x_y`") == "_xy" + result = newString(s.len) + var firstChar = true + var j = 0 + for i in 0..len(s) - 1: + if s[i] in {'A'..'Z'}: + if not firstChar: # to lowercase + result[j] = chr(ord(s[i]) + (ord('a') - ord('A'))) + else: + result[j] = s[i] + firstChar = false + inc j + elif s[i] notin {'_', ' ', '`'}: + result[j] = s[i] + inc j + firstChar = false + elif s[i] == '_' and firstChar: + result[j] = '_' + inc j + firstChar = false + else: discard # just omit '`' or ' ' + if j != s.len: setLen(result, j) + +proc langSymbolGroup*(kind: string, name: string): LangSymbol = + if kind notin ["proc", "func", "macro", "method", "iterator", + "template", "converter"]: + raise newException(ValueError, "unknown symbol kind $1" % [kind]) + result = LangSymbol(symKind: kind, name: name, isGroup: true) + +proc toLangSymbol*(linkText: PRstNode): LangSymbol = + ## Parses `linkText` into a more structured form using a state machine. + ## + ## This proc is designed to allow link syntax with operators even + ## without escaped backticks inside: + ## + ## `proc *`_ + ## `proc []`_ + ## + ## This proc should be kept in sync with the `renderTypes` proc from + ## ``compiler/typesrenderer.nim``. + template fail(msg: string) = + raise newException(ValueError, msg) + if linkText.kind notin {rnRstRef, rnInner}: + fail("toLangSymbol: wrong input kind " & $linkText.kind) + + const NimDefs = ["proc", "func", "macro", "method", "iterator", + "template", "converter", "const", "type", "var", + "enum", "object", "tuple", "module"] + template resolveSymKind(x: string) = + if x in ["enum", "object", "tuple"]: + result.symKind = "type" + result.symTypeKind = x + else: + result.symKind = x + type + State = enum + inBeginning + afterSymKind + beforeSymbolName # auxiliary state to catch situations like `proc []`_ after space + atSymbolName + afterSymbolName + genericsPar + parameterName + parameterType + outType + var state = inBeginning + var curIdent = "" + template flushIdent() = + if curIdent != "": + case state + of inBeginning: fail("incorrect state inBeginning") + of afterSymKind: resolveSymKind curIdent + of beforeSymbolName: fail("incorrect state beforeSymbolName") + of atSymbolName: result.name = curIdent.nimIdentBackticksNormalize + of afterSymbolName: fail("incorrect state afterSymbolName") + of genericsPar: result.generics = curIdent + of parameterName: result.parameters.add (curIdent, "") + of parameterType: + for a in countdown(result.parameters.len - 1, 0): + if result.parameters[a].`type` == "": + result.parameters[a].`type` = curIdent + of outType: result.outType = curIdent + curIdent = "" + var parens = 0 + let L = linkText.sons.len + template s(i: int): string = linkText.sons[i].text + var i = 0 + template nextState = + case s(i) + of " ": + if state == afterSymKind: + flushIdent + state = beforeSymbolName + of "`": + curIdent.add "`" + inc i + while i < L: # add contents between ` ` as a whole + curIdent.add s(i) + if s(i) == "`": + break + inc i + curIdent = curIdent.nimIdentBackticksNormalize + if state in {inBeginning, afterSymKind, beforeSymbolName}: + state = atSymbolName + flushIdent + state = afterSymbolName + of "[": + if state notin {inBeginning, afterSymKind, beforeSymbolName}: + inc parens + if state in {inBeginning, afterSymKind, beforeSymbolName}: + state = atSymbolName + curIdent.add s(i) + elif state in {atSymbolName, afterSymbolName} and parens == 1: + flushIdent + state = genericsPar + curIdent.add s(i) + else: curIdent.add s(i) + of "]": + if state notin {inBeginning, afterSymKind, beforeSymbolName, atSymbolName}: + dec parens + if state == genericsPar and parens == 0: + curIdent.add s(i) + flushIdent + else: curIdent.add s(i) + of "(": + inc parens + if state in {inBeginning, afterSymKind, beforeSymbolName}: + result.parametersProvided = true + state = atSymbolName + flushIdent + state = parameterName + elif state in {atSymbolName, afterSymbolName, genericsPar} and parens == 1: + result.parametersProvided = true + flushIdent + state = parameterName + else: curIdent.add s(i) + of ")": + dec parens + if state in {parameterName, parameterType} and parens == 0: + flushIdent + state = outType + else: curIdent.add s(i) + of "{": # remove pragmas + while i < L: + if s(i) == "}": + break + inc i + of ",", ";": + if state in {parameterName, parameterType} and parens == 1: + flushIdent + state = parameterName + else: curIdent.add s(i) + of "*": # skip export symbol + if state == atSymbolName: + flushIdent + state = afterSymbolName + elif state == afterSymbolName: + discard + else: curIdent.add "*" + of ":": + if state == outType: discard + elif state == parameterName: + flushIdent + state = parameterType + else: curIdent.add ":" + else: + let isPostfixSymKind = i > 0 and i == L - 1 and + result.symKind == "" and s(i) in NimDefs + if isPostfixSymKind: # for links like `foo proc`_ + resolveSymKind s(i) + else: + case state + of inBeginning: + if s(i) in NimDefs: + state = afterSymKind + else: + state = atSymbolName + curIdent.add s(i) + of afterSymKind, beforeSymbolName: + state = atSymbolName + curIdent.add s(i) + of parameterType: + case s(i) + of "ref": curIdent.add "ref." + of "ptr": curIdent.add "ptr." + of "var": discard + else: curIdent.add s(i).nimIdentBackticksNormalize + of atSymbolName: + curIdent.add s(i) + else: + curIdent.add s(i).nimIdentBackticksNormalize + while i < L: + nextState + inc i + if state == afterSymKind: # treat `type`_ as link to symbol `type` + state = atSymbolName + flushIdent + result.isGroup = false + +proc match*(generated: LangSymbol, docLink: LangSymbol): bool = + ## Returns true if `generated` can be a target for `docLink`. + ## If `generated` is an overload group then only `symKind` and `name` + ## are compared for success. + result = true + if docLink.symKind != "": + if generated.symKind == "proc": + result = docLink.symKind in ["proc", "func"] + else: + result = generated.symKind == docLink.symKind + if result and docLink.symKind == "type" and docLink.symTypeKind != "": + result = generated.symTypeKind == docLink.symTypeKind + if not result: return + result = generated.name == docLink.name + if not result: return + if generated.isGroup: + # if `()` were added then it's not a reference to the whole group: + return not docLink.parametersProvided + if docLink.generics != "": + result = generated.generics == docLink.generics + if not result: return + if docLink.outType != "": + result = generated.outType == docLink.outType + if not result: return + if docLink.parametersProvided: + result = generated.parameters.len == docLink.parameters.len + if not result: return + var onlyType = false + for i in 0 ..< generated.parameters.len: + let g = generated.parameters[i] + let d = docLink.parameters[i] + if i == 0: + if g.`type` == d.name: + onlyType = true # only types, not names, are provided in `docLink` + if onlyType: + result = g.`type` == d.name + else: + if d.`type` != "": + result = g.`type` == d.`type` + if not result: return + result = g.name == d.name + if not result: return diff --git a/lib/packages/docutils/docutils.nimble b/lib/packages/docutils/docutils.nimble deleted file mode 100644 index f1683c515..000000000 --- a/lib/packages/docutils/docutils.nimble +++ /dev/null @@ -1,4 +0,0 @@ -version = "0.10.0" -author = "Andreas Rumpf" -description = "Nim's reStructuredText processor." -license = "MIT" diff --git a/lib/packages/docutils/docutils.nimble.old b/lib/packages/docutils/docutils.nimble.old new file mode 100644 index 000000000..f97c3bdde --- /dev/null +++ b/lib/packages/docutils/docutils.nimble.old @@ -0,0 +1,7 @@ +# xxx disabled this as this isn't really a nimble package and it affects logic +# used to compute canonical imports, refs https://github.com/nim-lang/Nim/pull/16999#issuecomment-805442914 + +version = "0.10.0" +author = "Andreas Rumpf" +description = "Nim's reStructuredText processor." +license = "MIT" diff --git a/lib/packages/docutils/highlite.nim b/lib/packages/docutils/highlite.nim index 8c91e0a8e..f8376f46c 100644 --- a/lib/packages/docutils/highlite.nim +++ b/lib/packages/docutils/highlite.nim @@ -11,11 +11,9 @@ ## Currently only few languages are supported, other languages may be added. ## The interface supports one language nested in another. ## -## **Note:** Import `packages/docutils/highlite` to use this module -## ## You can use this to build your own syntax highlighting, check this example: ## -## .. code::nim +## ```Nim ## let code = """for x in $int.high: echo x.ord mod 2 == 0""" ## var toknizr: GeneralTokenizer ## initGeneralTokenizer(toknizr, code) @@ -33,21 +31,43 @@ ## else: ## echo toknizr.kind # All the kinds of tokens can be processed here. ## echo substr(code, toknizr.start, toknizr.length + toknizr.start - 1) +## ``` ## ## The proc `getSourceLanguage` can get the language `enum` from a string: -## -## .. code::nim +## ```Nim ## for l in ["C", "c++", "jAvA", "Nim", "c#"]: echo getSourceLanguage(l) +## ``` +## +## There is also a `Cmd` pseudo-language supported, which is a simple generic +## shell/cmdline tokenizer (UNIX shell/Powershell/Windows Command): +## no escaping, no programming language constructs besides variable definition +## at the beginning of line. It supports these operators: +## ```Cmd +## & && | || ( ) '' "" ; # for comments +## ``` ## +## Instead of escaping always use quotes like here +## `nimgrep --ext:'nim|nims' file.name`:cmd: shows how to input ``|``. +## Any argument that contains ``.`` or ``/`` or ``\`` will be treated +## as a file or directory. +## +## In addition to `Cmd` there is also `Console` language for +## displaying interactive sessions. +## Lines with a command should start with ``$``, other lines are considered +## as program output. import - strutils -from algorithm import binarySearch + std/strutils +from std/algorithm import binarySearch + +when defined(nimPreviewSlimSystem): + import std/[assertions, syncio] + type SourceLanguage* = enum langNone, langNim, langCpp, langCsharp, langC, langJava, - langYaml, langPython + langYaml, langPython, langCmd, langConsole TokenClass* = enum gtEof, gtNone, gtWhitespace, gtDecNumber, gtBinNumber, gtHexNumber, gtOctNumber, gtFloatNumber, gtIdentifier, gtKeyword, gtStringLit, @@ -55,7 +75,7 @@ type gtOperator, gtPunctuation, gtComment, gtLongComment, gtRegularExpression, gtTagStart, gtTagEnd, gtKey, gtValue, gtRawData, gtAssembler, gtPreprocessor, gtDirective, gtCommand, gtRule, gtHyperlink, gtLabel, - gtReference, gtOther + gtReference, gtPrompt, gtProgramOutput, gtProgram, gtOption, gtOther GeneralTokenizer* = object of RootObj kind*: TokenClass start*, length*: int @@ -66,14 +86,20 @@ type const sourceLanguageToStr*: array[SourceLanguage, string] = ["none", - "Nim", "C++", "C#", "C", "Java", "Yaml", "Python"] + "Nim", "C++", "C#", "C", "Java", "Yaml", "Python", "Cmd", "Console"] + sourceLanguageToAlpha*: array[SourceLanguage, string] = ["none", + "Nim", "cpp", "csharp", "C", "Java", "Yaml", "Python", "Cmd", "Console"] + ## list of languages spelled with alpabetic characters tokenClassToStr*: array[TokenClass, string] = ["Eof", "None", "Whitespace", "DecNumber", "BinNumber", "HexNumber", "OctNumber", "FloatNumber", "Identifier", "Keyword", "StringLit", "LongStringLit", "CharLit", "EscapeSequence", "Operator", "Punctuation", "Comment", "LongComment", "RegularExpression", "TagStart", "TagEnd", "Key", "Value", "RawData", "Assembler", "Preprocessor", "Directive", "Command", "Rule", "Hyperlink", - "Label", "Reference", "Other"] + "Label", "Reference", "Prompt", "ProgramOutput", + # start from lower-case if there is a corresponding RST role (see rst.nim) + "program", "option", + "Other"] # The following list comes from doc/keywords.txt, make sure it is # synchronized with this array by running the module itself as a test case. @@ -90,9 +116,11 @@ const "xor", "yield"] proc getSourceLanguage*(name: string): SourceLanguage = - for i in countup(succ(low(SourceLanguage)), high(SourceLanguage)): + for i in succ(low(SourceLanguage)) .. high(SourceLanguage): if cmpIgnoreStyle(name, sourceLanguageToStr[i]) == 0: return i + if cmpIgnoreStyle(name, sourceLanguageToAlpha[i]) == 0: + return i result = langNone proc initGeneralTokenizer*(g: var GeneralTokenizer, buf: cstring) = @@ -102,9 +130,7 @@ proc initGeneralTokenizer*(g: var GeneralTokenizer, buf: cstring) = g.length = 0 g.state = low(TokenClass) g.lang = low(SourceLanguage) - var pos = 0 # skip initial whitespace: - while g.buf[pos] in {' ', '\t'..'\r'}: inc(pos) - g.pos = pos + g.pos = 0 proc initGeneralTokenizer*(g: var GeneralTokenizer, buf: string) = initGeneralTokenizer(g, cstring(buf)) @@ -165,9 +191,6 @@ const proc isKeyword(x: openArray[string], y: string): int = binarySearch(x, y) -proc isKeywordIgnoreCase(x: openArray[string], y: string): int = - binarySearch(x, y, cmpIgnoreCase) - proc nimNextToken(g: var GeneralTokenizer, keywords: openArray[string] = @[]) = const hexChars = {'0'..'9', 'A'..'F', 'a'..'f', '_'} @@ -177,31 +200,33 @@ proc nimNextToken(g: var GeneralTokenizer, keywords: openArray[string] = @[]) = var pos = g.pos g.start = g.pos if g.state == gtStringLit: - g.kind = gtStringLit - while true: + if g.buf[pos] == '\\': + g.kind = gtEscapeSequence + inc(pos) case g.buf[pos] - of '\\': - g.kind = gtEscapeSequence + of 'x', 'X': inc(pos) + if g.buf[pos] in hexChars: inc(pos) + if g.buf[pos] in hexChars: inc(pos) + of '0'..'9': + while g.buf[pos] in {'0'..'9'}: inc(pos) + of '\0': + g.state = gtNone + else: inc(pos) + else: + g.kind = gtStringLit + while true: case g.buf[pos] - of 'x', 'X': + of '\\': + break + of '\0', '\r', '\n': + g.state = gtNone + break + of '\"': inc(pos) - if g.buf[pos] in hexChars: inc(pos) - if g.buf[pos] in hexChars: inc(pos) - of '0'..'9': - while g.buf[pos] in {'0'..'9'}: inc(pos) - of '\0': g.state = gtNone + break else: inc(pos) - break - of '\0', '\r', '\n': - g.state = gtNone - break - of '\"': - inc(pos) - g.state = gtNone - break - else: inc(pos) else: case g.buf[pos] of ' ', '\t'..'\r': @@ -299,17 +324,18 @@ proc nimNextToken(g: var GeneralTokenizer, keywords: openArray[string] = @[]) = pos = nimNumber(g, pos) of '\'': inc(pos) - g.kind = gtCharLit - while true: - case g.buf[pos] - of '\0', '\r', '\n': - break - of '\'': - inc(pos) - break - of '\\': - inc(pos, 2) - else: inc(pos) + if g.kind != gtPunctuation: + g.kind = gtCharLit + while true: + case g.buf[pos] + of '\0', '\r', '\n': + break + of '\'': + inc(pos) + break + of '\\': + inc(pos, 2) + else: inc(pos) of '\"': inc(pos) if (g.buf[pos] == '\"') and (g.buf[pos + 1] == '\"'): @@ -473,6 +499,9 @@ proc clikeNextToken(g: var GeneralTokenizer, keywords: openArray[string], of '\0': break else: inc(pos) + else: + g.kind = gtOperator + while g.buf[pos] in OpChars: inc(pos) of '#': inc(pos) if hasPreprocessor in flags: @@ -900,6 +929,74 @@ proc pythonNextToken(g: var GeneralTokenizer) = "with", "yield"] nimNextToken(g, keywords) +proc cmdNextToken(g: var GeneralTokenizer, dollarPrompt = false) = + var pos = g.pos + g.start = g.pos + if g.state == low(TokenClass): + g.state = if dollarPrompt: gtPrompt else: gtProgram + case g.buf[pos] + of ' ', '\t'..'\r': + g.kind = gtWhitespace + while g.buf[pos] in {' ', '\t'..'\r'}: + if g.buf[pos] == '\n': + g.state = if dollarPrompt: gtPrompt else: gtProgram + inc(pos) + of '\'', '"': + g.kind = gtOption + let q = g.buf[pos] + inc(pos) + while g.buf[pos] notin {q, '\0'}: + inc(pos) + if g.buf[pos] == q: inc(pos) + of '#': + g.kind = gtComment + while g.buf[pos] notin {'\n', '\0'}: + inc(pos) + of '&', '|': + g.kind = gtOperator + inc(pos) + if g.buf[pos] == g.buf[pos-1]: inc(pos) + g.state = gtProgram + of '(': + g.kind = gtOperator + g.state = gtProgram + inc(pos) + of ')': + g.kind = gtOperator + inc(pos) + of ';': + g.state = gtProgram + g.kind = gtOperator + inc(pos) + of '\0': g.kind = gtEof + elif dollarPrompt and g.state == gtPrompt: + if g.buf[pos] == '$' and g.buf[pos+1] in {' ', '\t'}: + g.kind = gtPrompt + inc pos, 2 + g.state = gtProgram + else: + g.kind = gtProgramOutput + while g.buf[pos] notin {'\n', '\0'}: + inc(pos) + else: + if g.state == gtProgram: + g.kind = gtProgram + g.state = gtOption + else: + g.kind = gtOption + while g.buf[pos] notin {' ', '\t'..'\r', '&', '|', '(', ')', '\'', '"', '\0'}: + if g.buf[pos] == ';' and g.buf[pos+1] == ' ': + # (check space because ';' can be used inside arguments in Win bat) + break + if g.kind == gtOption and g.buf[pos] in {'/', '\\', '.'}: + g.kind = gtIdentifier # for file/dir name + elif g.kind == gtProgram and g.buf[pos] == '=': + g.kind = gtIdentifier # for env variable setting at beginning of line + g.state = gtProgram + inc(pos) + g.length = pos - g.pos + g.pos = pos + proc getNextToken*(g: var GeneralTokenizer, lang: SourceLanguage) = g.lang = lang case lang @@ -911,6 +1008,20 @@ proc getNextToken*(g: var GeneralTokenizer, lang: SourceLanguage) = of langJava: javaNextToken(g) of langYaml: yamlNextToken(g) of langPython: pythonNextToken(g) + of langCmd: cmdNextToken(g) + of langConsole: cmdNextToken(g, dollarPrompt=true) + +proc tokenize*(text: string, lang: SourceLanguage): seq[(string, TokenClass)] = + var g: GeneralTokenizer + initGeneralTokenizer(g, text) + var prevPos = 0 + while true: + getNextToken(g, lang) + if g.kind == gtEof: + break + var s = text[prevPos ..< g.pos] + result.add (s, g.kind) + prevPos = g.pos when isMainModule: var keywords: seq[string] diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim index b5eef7610..706c50689 100644 --- a/lib/packages/docutils/rst.nim +++ b/lib/packages/docutils/rst.nim @@ -7,156 +7,46 @@ # distribution, for details about the copyright. # -## ================================== -## rst -## ================================== +## This module implements a `reStructuredText`:idx: (RST) and +## `Markdown`:idx: parser. +## User's manual on supported markup syntax and command line usage can be +## found in [Nim-flavored Markdown and reStructuredText](markdown_rst.html). ## -## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -## Nim-flavored reStructuredText -## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +## * See also [Nim DocGen Tools Guide](docgen.html) for handling of +## ``.nim`` files. +## * See also [packages/docutils/rstgen module](rstgen.html) to know how to +## generate HTML or Latex strings (for embedding them into custom documents). ## -## This module implements a `reStructuredText`:idx: (RST) parser. -## A large subset is implemented with some limitations_ and -## `Nim-specific features`_. -## A few `extra features`_ of the `Markdown`:idx: syntax are -## also supported. -## -## Nim can output the result to HTML [#html]_ or Latex [#latex]_. -## -## .. [#html] commands ``nim doc`` for ``*.nim`` files and -## ``nim rst2html`` for ``*.rst`` files -## -## .. [#latex] command ``nim rst2tex`` for ``*.rst``. -## -## If you are new to RST please consider reading the following: -## -## 1) a short `quick introduction`_ -## 2) an `RST reference`_: a comprehensive cheatsheet for RST -## 3) a more formal 50-page `RST specification`_. -## -## Features -## -------- -## -## Supported standard RST features: -## -## * body elements -## + sections -## + transitions -## + paragraphs -## + bullet lists using \+, \*, \- -## + enumerated lists using arabic numerals or alphabet -## characters: 1. ... 2. ... *or* a. ... b. ... *or* A. ... B. ... -## + footnotes (including manually numbered, auto-numbered, auto-numbered -## with label, and auto-symbol footnotes) and citations -## + definition lists -## + field lists -## + option lists -## + indented literal blocks -## + simple tables -## + directives (see official documentation in `RST directives list`_): -## - ``image``, ``figure`` for including images and videos -## - ``code`` -## - ``contents`` (table of contents), ``container``, ``raw`` -## - ``include`` -## - admonitions: "attention", "caution", "danger", "error", "hint", -## "important", "note", "tip", "warning", "admonition" -## - substitution definitions: `replace` and `image` -## + comments -## * inline markup -## + *emphasis*, **strong emphasis**, -## ``inline literals``, hyperlink references (including embedded URI), -## substitution references, standalone hyperlinks, -## internal links (inline and outline) -## + \`interpreted text\` with roles ``:literal:``, ``:strong:``, -## ``emphasis``, ``:sub:``/``:subscript:``, ``:sup:``/``:supscript:`` -## (see `RST roles list`_ for description). -## + inline internal targets -## -## .. _`Nim-specific features`: -## -## Additional Nim-specific features: -## -## * directives: ``code-block`` [cmp:Sphinx]_, ``title``, -## ``index`` [cmp:Sphinx]_ -## -## * ***triple emphasis*** (bold and italic) using \*\*\* -## * ``:idx:`` role for \`interpreted text\` to include the link to this -## text into an index (example: `Nim index`_). -## -## .. [cmp:Sphinx] similar but different from the directives of -## Python `Sphinx directives`_ extensions -## -## .. _`extra features`: -## -## Optional additional features, turned on by ``options: RstParseOption`` in -## `rstParse proc <#rstParse,string,string,int,int,bool,RstParseOptions,FindFileHandler,MsgHandler>`_: -## -## * emoji / smiley symbols -## * Markdown tables -## * Markdown code blocks -## * Markdown links -## * Markdown headlines -## * using ``1`` as auto-enumerator in enumerated lists like RST ``#`` -## (auto-enumerator ``1`` can not be used with ``#`` in the same list) -## -## .. Note:: By default Nim has ``roSupportMarkdown`` and -## ``roSupportRawDirective`` turned **on**. -## -## .. warning:: Using Nim-specific features can cause other RST implementations -## to fail on your document. -## -## Limitations -## ----------- -## -## * no Unicode support in character width calculations -## * body elements -## - no roman numerals in enumerated lists -## - no quoted literal blocks -## - no doctest blocks -## - no grid tables -## - some directives are missing (check official `RST directives list`_): -## ``parsed-literal``, ``sidebar``, ``topic``, ``math``, ``rubric``, -## ``epigraph``, ``highlights``, ``pull-quote``, ``compound``, -## ``table``, ``csv-table``, ``list-table``, ``section-numbering``, -## ``header``, ``footer``, ``meta``, ``class`` -## - no ``role`` directives and no custom interpreted text roles -## - some standard roles are not supported (check `RST roles list`_) -## * inline markup -## - no simple-inline-markup -## - no embedded aliases -## -## Usage -## ----- -## -## See `Nim DocGen Tools Guide <docgen.html>`_ for the details about -## ``nim doc``, ``nim rst2html`` and ``nim rst2tex`` commands. -## -## See `packages/docutils/rstgen module <rstgen.html>`_ to know how to -## generate HTML or Latex strings to embed them into your documents. -## -## .. Tip:: Import ``packages/docutils/rst`` to use this module -## programmatically. -## -## .. _quick introduction: https://docutils.sourceforge.io/docs/user/rst/quickstart.html -## .. _RST reference: https://docutils.sourceforge.io/docs/user/rst/quickref.html -## .. _RST specification: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html -## .. _RST directives list: https://docutils.sourceforge.io/docs/ref/rst/directives.html -## .. _RST roles list: https://docutils.sourceforge.io/docs/ref/rst/roles.html -## .. _Nim index: https://nim-lang.org/docs/theindex.html -## .. _Sphinx directives: https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html +## Choice between Markdown and RST as well as optional additional features are +## turned on by passing ``options:`` [RstParseOptions] to [proc rstParse]. import - os, strutils, rstast, std/enumutils, algorithm, lists, sequtils, - std/private/miscdollars + std/[os, strutils, enumutils, algorithm, lists, sequtils, + tables, strscans] +import dochelpers, rstidx, rstast +import std/private/miscdollars +from highlite import SourceLanguage, getSourceLanguage + +when defined(nimPreviewSlimSystem): + import std/[assertions, syncio] + type RstParseOption* = enum ## options for the RST parser - roSkipPounds, ## skip ``#`` at line beginning (documentation - ## embedded in Nim comments) roSupportSmilies, ## make the RST parser support smilies like ``:)`` roSupportRawDirective, ## support the ``raw`` directive (don't support ## it for sandboxing) - roSupportMarkdown ## support additional features of Markdown + roSupportMarkdown, ## support additional features of Markdown + roPreferMarkdown, ## parse as Markdown (keeping RST as "extension" + ## to Markdown) -- implies `roSupportMarkdown` + roNimFile ## set for Nim files where default interpreted + ## text role should be :nim: + roSandboxDisabled ## this option enables certain options + ## (e.g. raw, include, importdoc) + ## which are disabled by default as they can + ## enable users to read arbitrary data and + ## perform XSS if the parser is used in a web + ## app. RstParseOptions* = set[RstParseOption] @@ -165,28 +55,41 @@ type mcWarning = "Warning", mcError = "Error" + # keep the order in sync with compiler/docgen.nim and compiler/lineinfos.nim: MsgKind* = enum ## the possible messages meCannotOpenFile = "cannot open '$1'", meExpected = "'$1' expected", + meMissingClosing = "$1", meGridTableNotImplemented = "grid table is not implemented", meMarkdownIllformedTable = "illformed delimiter row of a Markdown table", + meIllformedTable = "Illformed table: $1", meNewSectionExpected = "new section expected $1", meGeneralParseError = "general parse error", meInvalidDirective = "invalid directive: '$1'", + meInvalidField = "invalid field: $1", meFootnoteMismatch = "mismatch in number of footnotes and their refs: $1", mwRedefinitionOfLabel = "redefinition of label '$1'", mwUnknownSubstitution = "unknown substitution '$1'", + mwAmbiguousLink = "ambiguous doc link $1", + mwBrokenLink = "broken link '$1'", mwUnsupportedLanguage = "language '$1' not supported", mwUnsupportedField = "field '$1' not supported", - mwRstStyle = "RST style: $1" + mwRstStyle = "RST style: $1", + mwUnusedImportdoc = "importdoc for '$1' is not used", + meSandboxedDirective = "disabled directive: '$1'", MsgHandler* = proc (filename: string, line, col: int, msgKind: MsgKind, arg: string) {.closure, gcsafe.} ## what to do in case of an error FindFileHandler* = proc (filename: string): string {.closure, gcsafe.} + FindRefFileHandler* = + proc (targetRelPath: string): + tuple[targetPath: string, linkRelPath: string] {.closure, gcsafe.} + ## returns where .html or .idx file should be found by its relative path; + ## `linkRelPath` is a prefix to be added before a link anchor from such file proc rstnodeToRefname*(n: PRstNode): string proc addNodes*(n: PRstNode): string -proc getFieldValue*(n: PRstNode, fieldname: string): string +proc getFieldValue*(n: PRstNode, fieldname: string): string {.gcsafe.} proc getArgument*(n: PRstNode): string # ----------------------------- scanner part -------------------------------- @@ -233,6 +136,10 @@ const ":geek:": "icon_e_geek", ":ugeek:": "icon_e_ugeek" } + SandboxDirAllowlist = [ + "image", "code", "code-block", "admonition", "attention", "caution", + "container", "contents", "danger", "default-role", "error", "figure", + "hint", "important", "index", "note", "role", "tip", "title", "warning"] type TokType = enum @@ -253,8 +160,8 @@ type buf*: cstring bufpos*: int line*, col*, baseIndent*: int - skipPounds*: bool adornmentLine*: bool + escapeNext*: bool proc getThing(L: var Lexer, tok: var Token, s: set[char]) = tok.kind = tkWord @@ -292,10 +199,18 @@ proc getPunctAdornment(L: var Lexer, tok: var Token) = tok.col = L.col var pos = L.bufpos let c = L.buf[pos] - while true: + if not L.escapeNext and (c != '\\' or L.adornmentLine): + while true: + tok.symbol.add(L.buf[pos]) + inc pos + if L.buf[pos] != c: break + elif L.escapeNext: tok.symbol.add(L.buf[pos]) inc pos - if L.buf[pos] != c: break + else: # not L.escapeNext and c == '\\' and not L.adornmentLine + tok.symbol.add '\\' + inc pos + L.escapeNext = true inc L.col, pos - L.bufpos L.bufpos = pos if tok.symbol == "\\": tok.kind = tkPunct @@ -317,9 +232,6 @@ proc getIndentAux(L: var Lexer, start: int): int = else: inc pos elif L.buf[pos] == '\n': inc pos - if L.skipPounds: - if L.buf[pos] == '#': inc pos - if L.buf[pos] == '#': inc pos while true: case L.buf[pos] of ' ', '\v', '\f': @@ -384,30 +296,19 @@ proc rawGetTok(L: var Lexer, tok: var Token) = inc L.col tok.col = max(tok.col - L.baseIndent, 0) -proc getTokens(buffer: string, skipPounds: bool, tokens: var TokenSeq): int = +proc getTokens(buffer: string, tokens: var TokenSeq) = var L: Lexer var length = tokens.len L.buf = cstring(buffer) L.line = 0 # skip UTF-8 BOM if L.buf[0] == '\xEF' and L.buf[1] == '\xBB' and L.buf[2] == '\xBF': inc L.bufpos, 3 - L.skipPounds = skipPounds - if skipPounds: - if L.buf[L.bufpos] == '#': - inc L.bufpos - inc result - if L.buf[L.bufpos] == '#': - inc L.bufpos - inc result - L.baseIndent = 0 - while L.buf[L.bufpos] == ' ': - inc L.bufpos - inc L.baseIndent - inc result while true: inc length setLen(tokens, length) + let toEscape = L.escapeNext rawGetTok(L, tokens[length - 1]) + if toEscape: L.escapeNext = false if tokens[length - 1].kind == tkEof: break if tokens[0].kind == tkWhite: # BUGFIX @@ -421,14 +322,54 @@ type line: int # the last line of this style occurrence # (for error message) hasPeers: bool # has headings on the same level of hierarchy? + LiteralBlockKind = enum # RST-style literal blocks after `::` + lbNone, + lbIndentedLiteralBlock, + lbQuotedLiteralBlock LevelMap = seq[LevelInfo] # Saves for each possible title adornment # style its level in the current document. + SubstitutionKind = enum + rstSubstitution = "substitution", + hyperlinkAlias = "hyperlink alias", + implicitHyperlinkAlias = "implicitly-generated hyperlink alias" Substitution = object + kind*: SubstitutionKind key*: string value*: PRstNode - AnchorSubst = tuple - mainAnchor: string - aliases: seq[string] + info*: TLineInfo # place where the substitution was defined + AnchorRule = enum + arInternalRst, ## For automatically generated RST anchors (from + ## headings, footnotes, inline internal targets): + ## case-insensitive, 1-space-significant (by RST spec) + arExternalRst, ## For external .nim doc comments or .rst/.md + arNim ## For anchors generated by ``docgen.nim``: Nim-style case + ## sensitivity, etc. (see `proc normalizeNimName`_ for details) + arHyperlink, ## For links with manually set anchors in + ## form `text <pagename.html#anchor>`_ + RstAnchorKind = enum + manualDirectiveAnchor = "manual directive anchor", + manualInlineAnchor = "manual inline anchor", + footnoteAnchor = "footnote anchor", + headlineAnchor = "implicitly-generated headline anchor" + AnchorSubst = object + info: TLineInfo # the file where the anchor was defined + priority: int + case kind: range[arInternalRst .. arNim] + of arInternalRst: + anchorType: RstAnchorKind + target: PRstNode + of arExternalRst: + anchorTypeExt: RstAnchorKind + refnameExt: string + of arNim: + module: FileIndex # anchor's module (generally not the same as file) + tooltip: string # displayed tooltip for Nim-generated anchors + langSym: LangSymbol + refname: string # A reference name that will be inserted directly + # into HTML/Latex. + external: bool + AnchorSubstTable = Table[string, seq[AnchorSubst]] + # use `seq` to account for duplicate anchors FootnoteType = enum fnManualNumber, # manually numbered footnote like [3] fnAutoNumber, # auto-numbered footnote [#] @@ -439,43 +380,71 @@ type kind: FootnoteType # discriminator number: int # valid for fnManualNumber (always) and fnAutoNumber, # fnAutoNumberLabel after resolveSubs is called - autoNumIdx: int # order of occurence: fnAutoNumber, fnAutoNumberLabel - autoSymIdx: int # order of occurence: fnAutoSymbol + autoNumIdx: int # order of occurrence: fnAutoNumber, fnAutoNumberLabel + autoSymIdx: int # order of occurrence: fnAutoSymbol label: string # valid for fnAutoNumberLabel - - SharedState = object - options: RstParseOptions # parsing options + RstFileTable* = object + filenameToIdx*: Table[string, FileIndex] + idxToFilename*: seq[string] + ImportdocInfo = object + used: bool # was this import used? + fromInfo: TLineInfo # place of `.. importdoc::` directive + idxPath: string # full path to ``.idx`` file + linkRelPath: string # prefix before target anchor + title: string # document title obtained from ``.idx`` + RstSharedState = object + options*: RstParseOptions # parsing options hLevels: LevelMap # hierarchy of heading styles hTitleCnt: int # =0 if no title, =1 if only main title, # =2 if both title and subtitle are present hCurLevel: int # current section level + currRole: string # current interpreted text role + currRoleKind: RstNodeKind # ... and its node kind subs: seq[Substitution] # substitutions - refs: seq[Substitution] # references - anchors: seq[AnchorSubst] # internal target substitutions - lineFootnoteNum: seq[int] # footnote line, auto numbers .. [#] - lineFootnoteNumRef: seq[int] # footnote line, their reference [#]_ - lineFootnoteSym: seq[int] # footnote line, auto symbols .. [*] - lineFootnoteSymRef: seq[int] # footnote line, their reference [*]_ + refs*: seq[Substitution] # references + anchors*: AnchorSubstTable + # internal target substitutions + lineFootnoteNum: seq[TLineInfo] # footnote line, auto numbers .. [#] + lineFootnoteNumRef: seq[TLineInfo] # footnote line, their reference [#]_ + currFootnoteNumRef: int # ... their counter for `resolveSubs` + lineFootnoteSym: seq[TLineInfo] # footnote line, auto symbols .. [*] + lineFootnoteSymRef: seq[TLineInfo] # footnote line, their reference [*]_ + currFootnoteSymRef: int # ... their counter for `resolveSubs` footnotes: seq[FootnoteSubst] # correspondence b/w footnote label, # number, order of occurrence msgHandler: MsgHandler # How to handle errors. - findFile: FindFileHandler # How to find files. - - PSharedState = ref SharedState + findFile: FindFileHandler # How to find files for include. + findRefFile: FindRefFileHandler + # How to find files imported by importdoc. + filenames*: RstFileTable # map file name <-> FileIndex (for storing + # file names for warnings after 1st stage) + currFileIdx*: FileIndex # current index in `filenames` + tocPart*: seq[PRstNode] # all the headings of a document + hasToc*: bool + idxImports*: Table[string, ImportdocInfo] + # map `importdoc`ed filename -> it's info + nimFileImported*: bool # Was any ``.nim`` module `importdoc`ed ? + + PRstSharedState* = ref RstSharedState + ManualAnchor = object + alias: string # a (short) name that can substitute the `anchor` + anchor: string # anchor = id = refname + info: TLineInfo RstParser = object of RootObj idx*: int tok*: TokenSeq - s*: PSharedState + s*: PRstSharedState indentStack*: seq[int] - filename*: string line*, col*: int ## initial line/column of whole text or ## documenation fragment that will be added ## in case of error/warning reporting to ## (relative) line/column of the token. - hasToc*: bool - curAnchor*: string # variable to track latest anchor in s.anchors + curAnchors*: seq[ManualAnchor] + ## seq to accumulate aliases for anchors: + ## because RST can have >1 alias per 1 anchor EParseError* = object of ValueError + SectionParser = proc (p: var RstParser): PRstNode {.nimcall, gcsafe.} const LineRstInit* = 1 ## Initial line number for standalone RST text @@ -509,36 +478,141 @@ proc defaultFindFile*(filename: string): string = if fileExists(filename): result = filename else: result = "" -proc newSharedState(options: RstParseOptions, - findFile: FindFileHandler, - msgHandler: MsgHandler): PSharedState = - new(result) - result.subs = @[] - result.refs = @[] - result.options = options - result.msgHandler = if not isNil(msgHandler): msgHandler else: defaultMsgHandler - result.findFile = if not isNil(findFile): findFile else: defaultFindFile +proc defaultFindRefFile*(filename: string): (string, string) = + (filename, "") + +proc defaultRole(options: RstParseOptions): string = + if roNimFile in options: "nim" else: "literal" + +proc whichRoleAux(sym: string): RstNodeKind = + let r = sym.toLowerAscii + case r + of "idx": result = rnIdx + of "literal": result = rnInlineLiteral + of "strong": result = rnStrongEmphasis + of "emphasis": result = rnEmphasis + of "sub", "subscript": result = rnSub + of "sup", "superscript": result = rnSup + # literal and code are the same in our implementation + of "code": result = rnInlineLiteral + of "program", "option", "tok": result = rnCodeFragment + # c++ currently can be spelled only as cpp, c# only as csharp + elif getSourceLanguage(r) != langNone: + result = rnInlineCode + else: # unknown role + result = rnUnknownRole + +proc len(filenames: RstFileTable): int = filenames.idxToFilename.len + +proc addFilename*(s: PRstSharedState, file1: string): FileIndex = + ## Returns index of filename, adding it if it has not been used before + let nextIdx = s.filenames.len.FileIndex + result = getOrDefault(s.filenames.filenameToIdx, file1, default = nextIdx) + if result == nextIdx: + s.filenames.filenameToIdx[file1] = result + s.filenames.idxToFilename.add file1 + +proc setCurrFilename*(s: PRstSharedState, file1: string) = + s.currFileIdx = addFilename(s, file1) + +proc getFilename(filenames: RstFileTable, fid: FileIndex): string = + doAssert(0 <= fid.int and fid.int < filenames.len, + "incorrect FileIndex $1 (range 0..$2)" % [ + $fid.int, $(filenames.len - 1)]) + result = filenames.idxToFilename[fid.int] + +proc getFilename(s: PRstSharedState, subst: AnchorSubst): string = + getFilename(s.filenames, subst.info.fileIndex) + +proc getModule(s: PRstSharedState, subst: AnchorSubst): string = + result = getFilename(s.filenames, subst.module) + +proc currFilename(s: PRstSharedState): string = + getFilename(s.filenames, s.currFileIdx) + +proc newRstSharedState*(options: RstParseOptions, + filename: string, + findFile: FindFileHandler, + findRefFile: FindRefFileHandler, + msgHandler: MsgHandler, + hasToc: bool): PRstSharedState = + let r = defaultRole(options) + result = PRstSharedState( + currRole: r, + currRoleKind: whichRoleAux(r), + options: options, + msgHandler: if not isNil(msgHandler): msgHandler else: defaultMsgHandler, + findFile: if not isNil(findFile): findFile else: defaultFindFile, + findRefFile: + if not isNil(findRefFile): findRefFile + else: defaultFindRefFile, + hasToc: hasToc + ) + setCurrFilename(result, filename) proc curLine(p: RstParser): int = p.line + currentTok(p).line proc findRelativeFile(p: RstParser; filename: string): string = - result = p.filename.splitFile.dir / filename + result = p.s.currFilename.splitFile.dir / filename if not fileExists(result): result = p.s.findFile(filename) proc rstMessage(p: RstParser, msgKind: MsgKind, arg: string) = - p.s.msgHandler(p.filename, curLine(p), + p.s.msgHandler(p.s.currFilename, curLine(p), p.col + currentTok(p).col, msgKind, arg) +proc rstMessage(s: PRstSharedState, msgKind: MsgKind, arg: string) = + s.msgHandler(s.currFilename, LineRstInit, ColRstInit, msgKind, arg) + +proc rstMessage(s: PRstSharedState, msgKind: MsgKind, arg: string; + line, col: int) = + s.msgHandler(s.currFilename, line, col, msgKind, arg) + +proc rstMessage(s: PRstSharedState, filename: string, msgKind: MsgKind, + arg: string) = + s.msgHandler(filename, LineRstInit, ColRstInit, msgKind, arg) + +proc rstMessage*(filenames: RstFileTable, f: MsgHandler, + info: TLineInfo, msgKind: MsgKind, arg: string) = + ## Print warnings using `info`, i.e. in 2nd-pass warnings for + ## footnotes/substitutions/references or from ``rstgen.nim``. + let file = getFilename(filenames, info.fileIndex) + f(file, info.line.int, info.col.int, msgKind, arg) + proc rstMessage(p: RstParser, msgKind: MsgKind, arg: string, line, col: int) = - p.s.msgHandler(p.filename, p.line + line, + p.s.msgHandler(p.s.currFilename, p.line + line, p.col + col, msgKind, arg) proc rstMessage(p: RstParser, msgKind: MsgKind) = - p.s.msgHandler(p.filename, curLine(p), + p.s.msgHandler(p.s.currFilename, curLine(p), p.col + currentTok(p).col, msgKind, currentTok(p).symbol) +# Functions `isPureRst` & `stopOrWarn` address differences between +# Markdown and RST: +# * Markdown always tries to continue working. If it is really impossible +# to parse a markup element, its proc just returns `nil` and parsing +# continues for it as for normal text paragraph. +# The downside is that real mistakes/typos are often silently ignored. +# The same applies to legacy `RstMarkdown` mode for nimforum. +# * RST really signals errors. The downside is that it's more intrusive - +# the user must escape special syntax with \ explicitly. +# +# TODO: we need to apply this strategy to all markup elements eventually. + +func isPureRst(p: RstParser): bool = roSupportMarkdown notin p.s.options +func isRst(p: RstParser): bool = roPreferMarkdown notin p.s.options +func isMd(p: RstParser): bool = roPreferMarkdown in p.s.options +func isMd(s: PRstSharedState): bool = roPreferMarkdown in s.options + +proc stopOrWarn(p: RstParser, errorType: MsgKind, arg: string) = + let realMsgKind = if isPureRst(p): errorType else: mwRstStyle + rstMessage(p, realMsgKind, arg) + +proc stopOrWarn(p: RstParser, errorType: MsgKind, arg: string, line, col: int) = + let realMsgKind = if isPureRst(p): errorType else: mwRstStyle + rstMessage(p, realMsgKind, arg, line, col) + proc currInd(p: RstParser): int = result = p.indentStack[high(p.indentStack)] @@ -548,17 +622,78 @@ proc pushInd(p: var RstParser, ind: int) = proc popInd(p: var RstParser) = if p.indentStack.len > 1: setLen(p.indentStack, p.indentStack.len - 1) -proc initParser(p: var RstParser, sharedState: PSharedState) = +# Working with indentation in rst.nim +# ----------------------------------- +# +# Every line break has an associated tkIndent. +# The tokenizer writes back the first column of next non-blank line +# in all preceeding tkIndent tokens to the `ival` field of tkIndent. +# +# RST document is separated into body elements (B.E.), every of which +# has a dedicated handler proc (or block of logic when B.E. is a block quote) +# that should follow the next rule: +# Every B.E. handler proc should finish at tkIndent (newline) +# after its B.E. finishes. +# Then its callers (which is `parseSection` or another B.E. handler) +# check for tkIndent ival (without necessity to advance `p.idx`) +# and decide themselves whether they continue processing or also stop. +# +# An example:: +# +# L RST text fragment indentation +# +--------------------+ +# 1 | | <- (empty line at the start of file) no tokens +# 2 |First paragraph. | <- tkIndent has ival=0, and next tkWord has col=0 +# 3 | | <- tkIndent has ival=0 +# 4 |* bullet item and | <- tkIndent has ival=0, and next tkPunct has col=0 +# 5 | its continuation | <- tkIndent has ival=2, and next tkWord has col=2 +# 6 | | <- tkIndent has ival=4 +# 7 | Block quote | <- tkIndent has ival=4, and next tkWord has col=4 +# 8 | | <- tkIndent has ival=0 +# 9 | | <- tkIndent has ival=0 +# 10|Final paragraph | <- tkIndent has ival=0, and tkWord has col=0 +# +--------------------+ +# C:01234 +# +# Here parser starts with initial `indentStack=[0]` and then calls the +# 1st `parseSection`: +# +# - `parseSection` calls `parseParagraph` and "First paragraph" is parsed +# - bullet list handler is started at reaching ``*`` (L4 C0), it +# starts bullet item logic (L4 C2), which calls `pushInd(p, ind=2)`, +# then calls `parseSection` (2nd call, nested) which parses +# paragraph "bullet list and its continuation" and then starts +# a block quote logic (L7 C4). +# The block quote logic calls calls `pushInd(p, ind=4)` and +# calls `parseSection` again, so a (simplified) sequence of calls now is:: +# +# parseSection -> parseBulletList -> +# parseSection (+block quote logic) -> parseSection +# +# 3rd `parseSection` finishes, block quote logic calls `popInd(p)`, +# it returns to bullet item logic, which sees that next tkIndent has +# ival=0 and stops there since the required indentation for a bullet item +# is 2 and 0<2; the bullet item logic calls `popInd(p)`. +# Then bullet list handler checks that next tkWord (L10 C0) has the +# right indentation but does not have ``*`` so stops at tkIndent (L10). +# - 1st `parseSection` invocation calls `parseParagraph` and the +# "Final paragraph" is parsed. +# +# If a B.E. handler has advanced `p.idx` past tkIndent to check +# whether it should continue its processing or not, and decided not to, +# then this B.E. handler should step back (e.g. do `dec p.idx`). + +proc initParser(p: var RstParser, sharedState: PRstSharedState) = p.indentStack = @[0] p.tok = @[] p.idx = 0 - p.filename = "" - p.hasToc = false p.col = ColRstInit p.line = LineRstInit p.s = sharedState proc addNodesAux(n: PRstNode, result: var string) = + if n == nil: + return if n.kind == rnLeaf: result.add(n.text) else: @@ -567,6 +702,11 @@ proc addNodesAux(n: PRstNode, result: var string) = proc addNodes(n: PRstNode): string = n.addNodesAux(result) +proc linkName(n: PRstNode): string = + ## Returns a normalized reference name, see: + ## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#reference-names + n.addNodes.toLowerAscii + proc rstnodeToRefnameAux(n: PRstNode, r: var string, b: var bool) = template special(s) = if b: @@ -622,26 +762,37 @@ proc rstnodeToRefname(n: PRstNode): string = var b = false rstnodeToRefnameAux(n, result, b) -proc findSub(p: var RstParser, n: PRstNode): int = +proc findSub(s: PRstSharedState, n: PRstNode): int = var key = addNodes(n) # the spec says: if no exact match, try one without case distinction: - for i in countup(0, high(p.s.subs)): - if key == p.s.subs[i].key: + for i in countup(0, high(s.subs)): + if key == s.subs[i].key: return i - for i in countup(0, high(p.s.subs)): - if cmpIgnoreStyle(key, p.s.subs[i].key) == 0: + for i in countup(0, high(s.subs)): + if cmpIgnoreStyle(key, s.subs[i].key) == 0: return i result = -1 +proc lineInfo(p: RstParser, iTok: int): TLineInfo = + result.col = int16(p.col + p.tok[iTok].col) + result.line = uint16(p.line + p.tok[iTok].line) + result.fileIndex = p.s.currFileIdx + +proc lineInfo(p: RstParser): TLineInfo = lineInfo(p, p.idx) +# TODO: we need this simplification because we don't preserve exact starting +# token of currently parsed element: +proc prevLineInfo(p: RstParser): TLineInfo = lineInfo(p, p.idx-1) + proc setSub(p: var RstParser, key: string, value: PRstNode) = var length = p.s.subs.len for i in 0 ..< length: if key == p.s.subs[i].key: p.s.subs[i].value = value return - p.s.subs.add(Substitution(key: key, value: value)) + p.s.subs.add(Substitution(key: key, value: value, info: prevLineInfo(p))) -proc setRef(p: var RstParser, key: string, value: PRstNode) = +proc setRef(p: var RstParser, key: string, value: PRstNode, + refType: SubstitutionKind) = var length = p.s.refs.len for i in 0 ..< length: if key == p.s.refs[i].key: @@ -649,37 +800,127 @@ proc setRef(p: var RstParser, key: string, value: PRstNode) = rstMessage(p, mwRedefinitionOfLabel, key) p.s.refs[i].value = value return - p.s.refs.add(Substitution(key: key, value: value)) - -proc findRef(p: var RstParser, key: string): PRstNode = - for i in countup(0, high(p.s.refs)): - if key == p.s.refs[i].key: - return p.s.refs[i].value - -proc addAnchor(p: var RstParser, refn: string, reset: bool) = - ## add anchor `refn` to anchor aliases and update last anchor ``curAnchor`` - if p.curAnchor == "": - p.s.anchors.add (refn, @[refn]) - else: - p.s.anchors[^1].mainAnchor = refn - p.s.anchors[^1].aliases.add refn - if reset: - p.curAnchor = "" - else: - p.curAnchor = refn - -proc findMainAnchor(p: RstParser, refn: string): string = - for subst in p.s.anchors: - if subst.mainAnchor == refn: # no need to rename - result = subst.mainAnchor - break - var toLeave = false - for anchor in subst.aliases: - if anchor == refn: # this anchor will be named as mainAnchor - result = subst.mainAnchor - toLeave = true - if toLeave: - break + p.s.refs.add(Substitution(kind: refType, key: key, value: value, + info: prevLineInfo(p))) + +proc findRef(s: PRstSharedState, key: string): seq[Substitution] = + for i in countup(0, high(s.refs)): + if key == s.refs[i].key: + result.add s.refs[i] + +# Ambiguity in links: we don't follow procedure of removing implicit targets +# defined in https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#implicit-hyperlink-targets +# Instead we just give explicit links a higher priority than to implicit ones +# and report ambiguities as warnings. Hopefully it is easy to remove +# ambiguities manually. Nim auto-generated links from ``docgen.nim`` +# have lowest priority: 1 (for procs) and below for other symbol types. + +proc refPriority(k: SubstitutionKind): int = + case k + of rstSubstitution: result = 8 + of hyperlinkAlias: result = 7 + of implicitHyperlinkAlias: result = 2 + +proc internalRefPriority(k: RstAnchorKind): int = + case k + of manualDirectiveAnchor: result = 6 + of manualInlineAnchor: result = 5 + of footnoteAnchor: result = 4 + of headlineAnchor: result = 3 + +proc `$`(subst: AnchorSubst): string = # for debug + let s = + case subst.kind + of arInternalRst: "type=" & $subst.anchorType + of arExternalRst: "type=" & $subst.anchorTypeExt + of arNim: "langsym=" & $subst.langSym + result = "(kind=$1, priority=$2, $3)" % [$subst.kind, $subst.priority, s] + +proc addAnchorRst(p: var RstParser, name: string, target: PRstNode, + anchorType: RstAnchorKind) = + ## Associates node `target` (which has field `anchor`) with an + ## alias `name` and updates the corresponding aliases in `p.curAnchors`. + let prio = internalRefPriority(anchorType) + for a in p.curAnchors: + p.s.anchors.mgetOrPut(a.alias, newSeq[AnchorSubst]()).add( + AnchorSubst(kind: arInternalRst, target: target, priority: prio, + info: a.info, anchorType: manualDirectiveAnchor)) + if name != "": + p.s.anchors.mgetOrPut(name, newSeq[AnchorSubst]()).add( + AnchorSubst(kind: arInternalRst, target: target, priority: prio, + info: prevLineInfo(p), anchorType: anchorType)) + p.curAnchors.setLen 0 + +proc addAnchorExtRst(s: var PRstSharedState, key: string, refn: string, + anchorType: RstAnchorKind, info: TLineInfo) = + let name = key.toLowerAscii + let prio = internalRefPriority(anchorType) + s.anchors.mgetOrPut(name, newSeq[AnchorSubst]()).add( + AnchorSubst(kind: arExternalRst, refnameExt: refn, priority: prio, + info: info, + anchorTypeExt: anchorType)) + +proc addAnchorNim*(s: var PRstSharedState, external: bool, refn: string, tooltip: string, + langSym: LangSymbol, priority: int, + info: TLineInfo, module: FileIndex) = + ## Adds an anchor `refn`, which follows + ## the rule `arNim` (i.e. a symbol in ``*.nim`` file) + s.anchors.mgetOrPut(langSym.name, newSeq[AnchorSubst]()).add( + AnchorSubst(kind: arNim, external: external, refname: refn, langSym: langSym, + tooltip: tooltip, priority: priority, + info: info)) + +proc findMainAnchorNim(s: PRstSharedState, signature: PRstNode, + info: TLineInfo): + seq[AnchorSubst] = + var langSym: LangSymbol + try: + langSym = toLangSymbol(signature) + except ValueError: # parsing failed, not a Nim symbol + return + let substitutions = s.anchors.getOrDefault(langSym.name, + newSeq[AnchorSubst]()) + if substitutions.len == 0: + return + # logic to select only groups instead of concrete symbols + # with overloads, note that the same symbol can be defined + # in multiple modules and `importdoc`ed: + type GroupKey = tuple[symKind: string, origModule: string] + # map (symKind, file) (like "proc", "os.nim") -> found symbols/groups: + var found: Table[GroupKey, seq[AnchorSubst]] + for subst in substitutions: + if subst.kind == arNim: + if match(subst.langSym, langSym): + let key: GroupKey = (subst.langSym.symKind, getModule(s, subst)) + found.mgetOrPut(key, newSeq[AnchorSubst]()).add subst + for key, sList in found: + if sList.len == 1: + result.add sList[0] + else: # > 1, there are overloads, potential ambiguity in this `symKind` + if langSym.parametersProvided: + # there are non-group signatures, select only them + for s in sList: + if not s.langSym.isGroup: + result.add s + else: # when there are many overloads a link like foo_ points to all + # of them, so selecting the group + var foundGroup = false + for s in sList: + if s.langSym.isGroup: + result.add s + foundGroup = true + break + doAssert(foundGroup, + "docgen has not generated the group for $1 (file $2)" % [ + langSym.name, getModule(s, sList[0]) ]) + +proc findMainAnchorRst(s: PRstSharedState, linkText: string, info: TLineInfo): + seq[AnchorSubst] = + let name = linkText.toLowerAscii + let substitutions = s.anchors.getOrDefault(name, newSeq[AnchorSubst]()) + for s in substitutions: + if s.kind in {arInternalRst, arExternalRst}: + result.add s proc addFootnoteNumManual(p: var RstParser, num: int) = ## add manually-numbered footnote @@ -693,7 +934,7 @@ proc addFootnoteNumAuto(p: var RstParser, label: string) = ## add auto-numbered footnote. ## Empty label [#] means it'll be resolved by the occurrence. if label == "": # simple auto-numbered [#] - p.s.lineFootnoteNum.add curLine(p) + p.s.lineFootnoteNum.add lineInfo(p) p.s.footnotes.add((fnAutoNumber, -1, p.s.lineFootnoteNum.len, -1, label)) else: # auto-numbered with label [#label] for fnote in p.s.footnotes: @@ -703,31 +944,39 @@ proc addFootnoteNumAuto(p: var RstParser, label: string) = p.s.footnotes.add((fnAutoNumberLabel, -1, -1, -1, label)) proc addFootnoteSymAuto(p: var RstParser) = - p.s.lineFootnoteSym.add curLine(p) + p.s.lineFootnoteSym.add lineInfo(p) p.s.footnotes.add((fnAutoSymbol, -1, -1, p.s.lineFootnoteSym.len, "")) -proc orderFootnotes(p: var RstParser) = +proc orderFootnotes(s: PRstSharedState) = ## numerate auto-numbered footnotes taking into account that all ## manually numbered ones always have preference. - ## Save the result back to p.s.footnotes. + ## Save the result back to `s.footnotes`. # Report an error if found any mismatch in number of automatic footnotes - proc listFootnotes(lines: seq[int]): string = + proc listFootnotes(locations: seq[TLineInfo]): string = + var lines: seq[string] + for info in locations: + if s.filenames.len > 1: + let file = getFilename(s.filenames, info.fileIndex) + lines.add file & ":" + else: # no need to add file name here if there is only 1 + lines.add "" + lines[^1].add $info.line result.add $lines.len & " (lines " & join(lines, ", ") & ")" - if p.s.lineFootnoteNum.len != p.s.lineFootnoteNumRef.len: - rstMessage(p, meFootnoteMismatch, - "$1 != $2" % [listFootnotes(p.s.lineFootnoteNum), - listFootnotes(p.s.lineFootnoteNumRef)] & + if s.lineFootnoteNum.len != s.lineFootnoteNumRef.len: + rstMessage(s, meFootnoteMismatch, + "$1 != $2" % [listFootnotes(s.lineFootnoteNum), + listFootnotes(s.lineFootnoteNumRef)] & " for auto-numbered footnotes") - if p.s.lineFootnoteSym.len != p.s.lineFootnoteSymRef.len: - rstMessage(p, meFootnoteMismatch, - "$1 != $2" % [listFootnotes(p.s.lineFootnoteSym), - listFootnotes(p.s.lineFootnoteSymRef)] & + if s.lineFootnoteSym.len != s.lineFootnoteSymRef.len: + rstMessage(s, meFootnoteMismatch, + "$1 != $2" % [listFootnotes(s.lineFootnoteSym), + listFootnotes(s.lineFootnoteSymRef)] & " for auto-symbol footnotes") var result: seq[FootnoteSubst] var manuallyN, autoN, autoSymbol: seq[FootnoteSubst] - for fs in p.s.footnotes: + for fs in s.footnotes: if fs.kind == fnManualNumber: manuallyN.add fs elif fs.kind in {fnAutoNumber, fnAutoNumberLabel}: autoN.add fs else: autoSymbol.add fs @@ -774,41 +1023,73 @@ proc orderFootnotes(p: var RstParser) = let label = footnoteAutoSymbols[symbolNum].repeat(nSymbols) result.add((fs.kind, -1, -1, fs.autoSymIdx, label)) - p.s.footnotes = result + s.footnotes = result -proc getFootnoteNum(p: var RstParser, label: string): int = +proc getFootnoteNum(s: PRstSharedState, label: string): int = ## get number from label. Must be called after `orderFootnotes`. result = -1 - for fnote in p.s.footnotes: + for fnote in s.footnotes: if fnote.label == label: return fnote.number -proc getFootnoteNum(p: var RstParser, order: int): int = +proc getFootnoteNum(s: PRstSharedState, order: int): int = ## get number from occurrence. Must be called after `orderFootnotes`. result = -1 - for fnote in p.s.footnotes: + for fnote in s.footnotes: if fnote.autoNumIdx == order: return fnote.number -proc getAutoSymbol(p: var RstParser, order: int): string = +proc getAutoSymbol(s: PRstSharedState, order: int): string = ## get symbol from occurrence of auto-symbol footnote. result = "???" - for fnote in p.s.footnotes: + for fnote in s.footnotes: if fnote.autoSymIdx == order: return fnote.label proc newRstNodeA(p: var RstParser, kind: RstNodeKind): PRstNode = ## create node and consume the current anchor result = newRstNode(kind) - if p.curAnchor != "": - result.anchor = p.curAnchor - p.curAnchor = "" + if p.curAnchors.len > 0: + result.anchor = p.curAnchors[0].anchor + addAnchorRst(p, "", result, manualDirectiveAnchor) template newLeaf(s: string): PRstNode = newRstLeaf(s) proc newLeaf(p: var RstParser): PRstNode = result = newLeaf(currentTok(p).symbol) +proc validRefnamePunct(x: string): bool = + ## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#reference-names + x.len == 1 and x[0] in {'-', '_', '.', ':', '+'} + +func getRefnameIdx(p: RstParser, startIdx: int): int = + ## Gets last token index of a refname ("word" in RST terminology): + ## + ## reference names are single words consisting of alphanumerics plus + ## isolated (no two adjacent) internal hyphens, underscores, periods, + ## colons and plus signs; no whitespace or other characters are allowed. + ## + ## Refnames are used for: + ## - reference names + ## - role names + ## - directive names + ## - footnote labels + ## + # TODO: use this func in all other relevant places + var j = startIdx + if p.tok[j].kind == tkWord: + inc j + while p.tok[j].kind == tkPunct and validRefnamePunct(p.tok[j].symbol) and + p.tok[j+1].kind == tkWord: + inc j, 2 + result = j - 1 + +func getRefname(p: RstParser, startIdx: int): (string, int) = + let lastIdx = getRefnameIdx(p, startIdx) + result[1] = lastIdx + for j in startIdx..lastIdx: + result[0].add p.tok[j].symbol + proc getReferenceName(p: var RstParser, endStr: string): PRstNode = var res = newRstNode(rnInner) while true: @@ -837,54 +1118,82 @@ proc expect(p: var RstParser, tok: string) = if currentTok(p).symbol == tok: inc p.idx else: rstMessage(p, meExpected, tok) -proc isInlineMarkupEnd(p: RstParser, markup: string): bool = +proc inlineMarkdownEnd(p: RstParser): bool = + result = prevTok(p).kind notin {tkIndent, tkWhite} + ## (For a special case of ` we don't allow spaces surrounding it + ## unlike original Markdown because this behavior confusing/useless) + +proc inlineRstEnd(p: RstParser): bool = # rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules - result = currentTok(p).symbol == markup - if not result: return # Rule 2: result = prevTok(p).kind notin {tkIndent, tkWhite} if not result: return # Rule 7: result = nextTok(p).kind in {tkIndent, tkWhite, tkEof} or - markup in ["``", "`"] and nextTok(p).kind in {tkIndent, tkWhite, tkWord, tkEof} or nextTok(p).symbol[0] in {'\'', '\"', ')', ']', '}', '>', '-', '/', '\\', ':', '.', ',', ';', '!', '?', '_'} - if not result: return - # Rule 4: - if p.idx > 0: - if markup != "``" and prevTok(p).symbol == "\\": - result = false -proc isInlineMarkupStart(p: RstParser, markup: string): bool = - # rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules - var d: char - if markup != "_`": +proc isInlineMarkupEnd(p: RstParser, markup: string, exact: bool): bool = + if exact: result = currentTok(p).symbol == markup - else: # _` is a 2 token case - result = currentTok(p).symbol == "_" and nextTok(p).symbol == "`" + else: + result = currentTok(p).symbol.endsWith markup + if (not result) and markup == "``": + # check that escaping may have splitted `` to 2 tokens ` and ` + result = currentTok(p).symbol == "`" and prevTok(p).symbol == "`" if not result: return - # Rule 6: + # surroundings check + if markup in ["_", "__"]: + result = inlineRstEnd(p) + else: + if roPreferMarkdown in p.s.options: result = inlineMarkdownEnd(p) + else: result = inlineRstEnd(p) + +proc rstRuleSurround(p: RstParser): bool = + result = true + # Rules 4 & 5: + if p.idx > 0: + var d: char + var c = prevTok(p).symbol[0] + case c + of '\'', '\"': d = c + of '(': d = ')' + of '[': d = ']' + of '{': d = '}' + of '<': d = '>' + else: d = '\0' + if d != '\0': result = nextTok(p).symbol[0] != d + +proc inlineMarkdownStart(p: RstParser): bool = + result = nextTok(p).kind notin {tkIndent, tkWhite, tkEof} + if not result: return + # this rst rule is really nice, let us use it in Markdown mode too. + result = rstRuleSurround(p) + +proc inlineRstStart(p: RstParser): bool = + ## rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules + # Rule 6 result = p.idx == 0 or prevTok(p).kind in {tkIndent, tkWhite} or - (markup in ["``", "`"] and prevTok(p).kind in {tkIndent, tkWhite, tkWord}) or prevTok(p).symbol[0] in {'\'', '\"', '(', '[', '{', '<', '-', '/', ':', '_'} if not result: return # Rule 1: result = nextTok(p).kind notin {tkIndent, tkWhite, tkEof} if not result: return - # Rules 4 & 5: - if p.idx > 0: - if prevTok(p).symbol == "\\": - result = false - else: - var c = prevTok(p).symbol[0] - case c - of '\'', '\"': d = c - of '(': d = ')' - of '[': d = ']' - of '{': d = '}' - of '<': d = '>' - else: d = '\0' - if d != '\0': result = nextTok(p).symbol[0] != d + result = rstRuleSurround(p) + +proc isInlineMarkupStart(p: RstParser, markup: string): bool = + if markup != "_`": + result = currentTok(p).symbol == markup + else: # _` is a 2 token case + result = currentTok(p).symbol == "_" and nextTok(p).symbol == "`" + if not result: return + # surroundings check + if markup in ["_", "__", "[", "|"]: + # Note: we require space/punctuation even before [markdown link](...) + result = inlineRstStart(p) + else: + if roPreferMarkdown in p.s.options: result = inlineMarkdownStart(p) + else: result = inlineRstStart(p) proc match(p: RstParser, start: int, expr: string): bool = # regular expressions are: @@ -905,7 +1214,10 @@ proc match(p: RstParser, start: int, expr: string): bool = var last = expr.len - 1 while i <= last: case expr[i] - of 'w': result = p.tok[j].kind == tkWord + of 'w': + let lastIdx = getRefnameIdx(p, j) + result = lastIdx >= j + if result: j = lastIdx of ' ': result = p.tok[j].kind == tkWhite of 'i': result = p.tok[j].kind == tkIndent of 'I': result = p.tok[j].kind in {tkIndent, tkEof} @@ -939,7 +1251,20 @@ proc match(p: RstParser, start: int, expr: string): bool = inc i result = true -proc fixupEmbeddedRef(n, a, b: PRstNode) = +proc safeProtocol*(linkStr: var string): string = + # Returns link's protocol and, if it's not safe, clears `linkStr` + result = "" + if scanf(linkStr, "$w:", result): + # if it has a protocol at all, ensure that it's not 'javascript:' or worse: + if cmpIgnoreCase(result, "http") == 0 or + cmpIgnoreCase(result, "https") == 0 or + cmpIgnoreCase(result, "ftp") == 0: + discard "it's fine" + else: + linkStr = "" + +proc fixupEmbeddedRef(p: var RstParser, n, a, b: PRstNode): bool = + # Returns `true` if the link belongs to an allowed protocol var sep = - 1 for i in countdown(n.len - 2, 0): if n.sons[i].text == "<": @@ -947,54 +1272,89 @@ proc fixupEmbeddedRef(n, a, b: PRstNode) = break var incr = if sep > 0 and n.sons[sep - 1].text[0] == ' ': 2 else: 1 for i in countup(0, sep - incr): a.add(n.sons[i]) - for i in countup(sep + 1, n.len - 2): b.add(n.sons[i]) + var linkStr = "" + for i in countup(sep + 1, n.len - 2): linkStr.add(n.sons[i].addNodes) + if linkStr != "": + let protocol = safeProtocol(linkStr) + result = linkStr != "" + if not result: + rstMessage(p, mwBrokenLink, protocol, + p.tok[p.idx-3].line, p.tok[p.idx-3].col) + b.add newLeaf(linkStr) + +proc whichRole(p: RstParser, sym: string): RstNodeKind = + result = whichRoleAux(sym) + if result == rnUnknownRole: + rstMessage(p, mwUnsupportedLanguage, sym) + +proc toInlineCode(n: PRstNode, language: string): PRstNode = + ## Creates rnInlineCode and attaches `n` contents as code (in 3rd son). + result = newRstNode(rnInlineCode, info=n.info) + let args = newRstNode(rnDirArg) + var lang = language + if language == "cpp": lang = "c++" + elif language == "csharp": lang = "c#" + args.add newLeaf(lang) + result.add args + result.add PRstNode(nil) + var lb = newRstNode(rnLiteralBlock) + var s: string + for i in n.sons: + assert i.kind == rnLeaf + s.add i.text + lb.add newLeaf(s) + result.add lb + +proc toOtherRole(n: PRstNode, kind: RstNodeKind, roleName: string): PRstNode = + let newN = newRstNode(rnInner, n.sons) + let newSons = @[newN, newLeaf(roleName)] + result = newRstNode(kind, newSons) proc parsePostfix(p: var RstParser, n: PRstNode): PRstNode = + ## Finalizes node `n` that was tentatively determined as interpreted text. var newKind = n.kind var newSons = n.sons - if isInlineMarkupEnd(p, "_") or isInlineMarkupEnd(p, "__"): + + proc finalizeInterpreted(node: PRstNode, newKind: RstNodeKind, + newSons: seq[PRstNode], roleName: string): + PRstNode {.nimcall.} = + # fixes interpreted text (`x` or `y`:role:) to proper internal AST format + if newKind in {rnUnknownRole, rnCodeFragment}: + result = node.toOtherRole(newKind, roleName) + elif newKind == rnInlineCode: + result = node.toInlineCode(language=roleName) + else: + result = newRstNode(newKind, newSons) + + if isInlineMarkupEnd(p, "_", exact=true) or + isInlineMarkupEnd(p, "__", exact=true): inc p.idx if p.tok[p.idx-2].symbol == "`" and p.tok[p.idx-3].symbol == ">": var a = newRstNode(rnInner) var b = newRstNode(rnInner) - fixupEmbeddedRef(n, a, b) - if a.len == 0: - newKind = rnStandaloneHyperlink - newSons = @[b] - else: - newKind = rnHyperlink - newSons = @[a, b] - setRef(p, rstnodeToRefname(a), b) - elif n.kind == rnInterpretedText: - newKind = rnRef - else: - newKind = rnRef - newSons = @[n] - result = newRstNode(newKind, newSons) + if fixupEmbeddedRef(p, n, a, b): + if a.len == 0: # e.g. `<a_named_relative_link>`_ + newKind = rnStandaloneHyperlink + newSons = @[b] + else: # e.g. `link title <http://site>`_ + newKind = rnHyperlink + newSons = @[a, b] + setRef(p, rstnodeToRefname(a), b, implicitHyperlinkAlias) + else: # include as plain text, not a link + newKind = rnInner + newSons = n.sons + result = newRstNode(newKind, newSons) + else: # some link that will be resolved in `resolveSubs` + newKind = rnRstRef + result = newRstNode(newKind, sons=newSons, info=n.info) elif match(p, p.idx, ":w:"): # a role: - if nextTok(p).symbol == "idx": - newKind = rnIdx - elif nextTok(p).symbol == "literal": - newKind = rnInlineLiteral - elif nextTok(p).symbol == "strong": - newKind = rnStrongEmphasis - elif nextTok(p).symbol == "emphasis": - newKind = rnEmphasis - elif nextTok(p).symbol == "sub" or - nextTok(p).symbol == "subscript": - newKind = rnSub - elif nextTok(p).symbol == "sup" or - nextTok(p).symbol == "supscript": - newKind = rnSup - else: - newKind = rnGeneralRole - let newN = newRstNode(rnInner, n.sons) - newSons = @[newN, newLeaf(nextTok(p).symbol)] - inc p.idx, 3 - result = newRstNode(newKind, newSons) - else: # no change - result = n + let (roleName, lastIdx) = getRefname(p, p.idx+1) + newKind = whichRole(p, roleName) + result = n.finalizeInterpreted(newKind, newSons, roleName) + p.idx = lastIdx + 2 + else: + result = n.finalizeInterpreted(p.s.currRoleKind, newSons, p.s.currRole) proc matchVerbatim(p: RstParser, start: int, expr: string): int = result = start @@ -1015,49 +1375,78 @@ proc parseSmiley(p: var RstParser): PRstNode = result.text = val return -proc validRefnamePunct(x: string): bool = - ## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#reference-names - x.len == 1 and x[0] in {'-', '_', '.', ':', '+'} - proc isUrl(p: RstParser, i: int): bool = result = p.tok[i+1].symbol == ":" and p.tok[i+2].symbol == "//" and p.tok[i+3].kind == tkWord and p.tok[i].symbol in ["http", "https", "ftp", "telnet", "file"] +proc checkParen(token: Token, parensStack: var seq[char]): bool {.inline.} = + ## Returns `true` iff `token` is a closing parenthesis for some + ## previous opening parenthesis saved in `parensStack`. + ## This is according Markdown balanced parentheses rule + ## (https://spec.commonmark.org/0.29/#link-destination) + ## to allow links like + ## https://en.wikipedia.org/wiki/APL_(programming_language), + ## we use it for RST also. + result = false + if token.kind == tkPunct: + let c = token.symbol[0] + if c in {'(', '[', '{'}: # push + parensStack.add c + elif c in {')', ']', '}'}: # try pop + # a case like ([) inside a link is allowed and [ is also `pop`ed: + for i in countdown(parensStack.len - 1, 0): + if (parensStack[i] == '(' and c == ')' or + parensStack[i] == '[' and c == ']' or + parensStack[i] == '{' and c == '}'): + parensStack.setLen i + result = true + break + +proc parseUrl(p: var RstParser): PRstNode = + ## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#standalone-hyperlinks + result = newRstNode(rnStandaloneHyperlink) + var lastIdx = p.idx + var closedParenIdx = p.idx - 1 # for balanced parens rule + var parensStack: seq[char] + while p.tok[lastIdx].kind in {tkWord, tkPunct, tkOther}: + let isClosing = checkParen(p.tok[lastIdx], parensStack) + if isClosing: + closedParenIdx = lastIdx + inc lastIdx + dec lastIdx + # standalone URL can not end with punctuation in RST + while lastIdx > closedParenIdx and p.tok[lastIdx].kind == tkPunct and + p.tok[lastIdx].symbol != "/": + dec lastIdx + var s = "" + for i in p.idx .. lastIdx: s.add p.tok[i].symbol + result.add s + p.idx = lastIdx + 1 + proc parseWordOrRef(p: var RstParser, father: PRstNode) = ## Parses a normal word or may be a reference or URL. if nextTok(p).kind != tkPunct: # <- main path, a normal word father.add newLeaf(p) inc p.idx elif isUrl(p, p.idx): # URL http://something - var n = newRstNode(rnStandaloneHyperlink) - while true: - case currentTok(p).kind - of tkWord, tkAdornment, tkOther: discard - of tkPunct: - if nextTok(p).kind notin {tkWord, tkAdornment, tkOther, tkPunct}: - break - else: break - n.add(newLeaf(p)) - inc p.idx - father.add(n) + father.add parseUrl(p) else: # check for reference (probably, long one like some.ref.with.dots_ ) var saveIdx = p.idx - var isRef = false + var reference: PRstNode = nil inc p.idx while currentTok(p).kind in {tkWord, tkPunct}: if currentTok(p).kind == tkPunct: - if isInlineMarkupEnd(p, "_"): - isRef = true + if isInlineMarkupEnd(p, "_", exact=true): + reference = newRstNode(rnRstRef, info=lineInfo(p, saveIdx)) break if not validRefnamePunct(currentTok(p).symbol): break inc p.idx - if isRef: - let r = newRstNode(rnRef) - for i in saveIdx..p.idx-1: r.add newLeaf(p.tok[i].symbol) - father.add r + if reference != nil: + for i in saveIdx..p.idx-1: reference.add newLeaf(p.tok[i].symbol) + father.add reference inc p.idx # skip final _ else: # 1 normal word father.add newLeaf(p.tok[saveIdx].symbol) @@ -1065,10 +1454,7 @@ proc parseWordOrRef(p: var RstParser, father: PRstNode) = proc parseBackslash(p: var RstParser, father: PRstNode) = assert(currentTok(p).kind == tkPunct) - if currentTok(p).symbol == "\\\\": - father.add newLeaf("\\") - inc p.idx - elif currentTok(p).symbol == "\\": + if currentTok(p).symbol == "\\": # XXX: Unicode? inc p.idx if currentTok(p).kind != tkWhite: father.add(newLeaf(p)) @@ -1086,14 +1472,39 @@ proc parseUntil(p: var RstParser, father: PRstNode, postfix: string, while true: case currentTok(p).kind of tkPunct: - if isInlineMarkupEnd(p, postfix): + if isInlineMarkupEnd(p, postfix, exact=false): + let l = currentTok(p).symbol.len + if l > postfix.len: + # handle cases like *emphasis with stars****. (It's valid RST!) + father.add newLeaf(currentTok(p).symbol[0 ..< l - postfix.len]) + elif postfix == "``" and currentTok(p).symbol == "`" and + prevTok(p).symbol == "`": + # handle cases like ``literal\`` - delete ` already added after \ + father.sons.setLen(father.sons.len - 1) inc p.idx break - elif interpretBackslash: - parseBackslash(p, father) else: - father.add(newLeaf(p)) - inc p.idx + if postfix == "`": + if currentTok(p).symbol == "\\": + if nextTok(p).symbol == "\\": + father.add newLeaf("\\") + father.add newLeaf("\\") + inc p.idx, 2 + elif nextTok(p).symbol == "`": # escape ` + father.add newLeaf("`") + inc p.idx, 2 + else: + father.add newLeaf("\\") + inc p.idx + else: + father.add(newLeaf(p)) + inc p.idx + else: + if interpretBackslash: + parseBackslash(p, father) + else: + father.add(newLeaf(p)) + inc p.idx of tkAdornment, tkWord, tkOther: father.add(newLeaf(p)) inc p.idx @@ -1108,62 +1519,188 @@ proc parseUntil(p: var RstParser, father: PRstNode, postfix: string, inc p.idx else: rstMessage(p, meExpected, postfix, line, col) +proc parseMarkdownCodeblockFields(p: var RstParser): PRstNode = + ## Parses additional (after language string) code block parameters + ## in a format *suggested* in the `CommonMark Spec`_ with handling of `"`. + if currentTok(p).kind == tkIndent: + result = nil + else: + result = newRstNode(rnFieldList) + while currentTok(p).kind notin {tkIndent, tkEof}: + if currentTok(p).kind == tkWhite: + inc p.idx + else: + let field = newRstNode(rnField) + var fieldName = "" + while currentTok(p).kind notin {tkWhite, tkIndent, tkEof} and + currentTok(p).symbol != "=": + fieldName.add currentTok(p).symbol + inc p.idx + field.add(newRstNode(rnFieldName, @[newLeaf(fieldName)])) + if currentTok(p).kind == tkWhite: inc p.idx + let fieldBody = newRstNode(rnFieldBody) + if currentTok(p).symbol == "=": + inc p.idx + if currentTok(p).kind == tkWhite: inc p.idx + var fieldValue = "" + if currentTok(p).symbol == "\"": + while true: + fieldValue.add currentTok(p).symbol + inc p.idx + if currentTok(p).kind == tkEof: + rstMessage(p, meExpected, "\"") + elif currentTok(p).symbol == "\"": + fieldValue.add "\"" + inc p.idx + break + else: + while currentTok(p).kind notin {tkWhite, tkIndent, tkEof}: + fieldValue.add currentTok(p).symbol + inc p.idx + fieldBody.add newLeaf(fieldValue) + field.add(fieldBody) + result.add(field) + +proc mayLoadFile(p: RstParser, result: var PRstNode) = + var filename = strip(getFieldValue(result, "file"), + chars = Whitespace + {'"'}) + if filename != "": + if roSandboxDisabled notin p.s.options: + let tok = p.tok[p.idx-2] + rstMessage(p, meSandboxedDirective, "file", tok.line, tok.col) + var path = p.findRelativeFile(filename) + if path == "": rstMessage(p, meCannotOpenFile, filename) + var n = newRstNode(rnLiteralBlock) + n.add newLeaf(readFile(path)) + result.sons[2] = n + +proc defaultCodeLangNim(p: RstParser, result: var PRstNode) = + # Create a field block if the input block didn't have any. + if result.sons[1].isNil: result.sons[1] = newRstNode(rnFieldList) + assert result.sons[1].kind == rnFieldList + # Hook the extra field and specify the Nim language as value. + var extraNode = newRstNode(rnField, info=lineInfo(p)) + extraNode.add(newRstNode(rnFieldName)) + extraNode.add(newRstNode(rnFieldBody)) + extraNode.sons[0].add newLeaf("default-language") + extraNode.sons[1].add newLeaf("Nim") + result.sons[1].add(extraNode) + proc parseMarkdownCodeblock(p: var RstParser): PRstNode = + result = newRstNodeA(p, rnCodeBlock) + result.sons.setLen(3) + let line = curLine(p) + let baseCol = currentTok(p).col + let baseSym = currentTok(p).symbol # usually just ``` + inc p.idx + result.info = lineInfo(p) var args = newRstNode(rnDirArg) if currentTok(p).kind == tkWord: args.add(newLeaf(p)) inc p.idx + result.sons[1] = parseMarkdownCodeblockFields(p) + mayLoadFile(p, result) else: args = nil var n = newLeaf("") + var isFirstLine = true while true: - case currentTok(p).kind - of tkEof: - rstMessage(p, meExpected, "```") + if currentTok(p).kind == tkEof: + rstMessage(p, meMissingClosing, + "$1 (started at line $2)" % [baseSym, $line]) break - of tkPunct, tkAdornment: - if currentTok(p).symbol == "```": - inc p.idx - break - else: - n.text.add(currentTok(p).symbol) - inc p.idx + elif nextTok(p).kind in {tkPunct, tkAdornment} and + nextTok(p).symbol[0] == baseSym[0] and + nextTok(p).symbol.len >= baseSym.len: + inc p.idx, 2 + break + elif currentTok(p).kind == tkIndent: + if not isFirstLine: + n.text.add "\n" + if currentTok(p).ival > baseCol: + n.text.add " ".repeat(currentTok(p).ival - baseCol) + elif currentTok(p).ival < baseCol: + rstMessage(p, mwRstStyle, + "unexpected de-indentation in Markdown code block") + inc p.idx else: n.text.add(currentTok(p).symbol) inc p.idx - var lb = newRstNode(rnLiteralBlock) - lb.add(n) - result = newRstNodeA(p, rnCodeBlock) - result.add(args) - result.add(PRstNode(nil)) - result.add(lb) + isFirstLine = false + result.sons[0] = args + if result.sons[2] == nil: + var lb = newRstNode(rnLiteralBlock) + lb.add(n) + result.sons[2] = lb + if result.sons[0].isNil and roNimFile in p.s.options: + defaultCodeLangNim(p, result) proc parseMarkdownLink(p: var RstParser; father: PRstNode): bool = - result = true - var desc, link = "" + # Parses Markdown link. If it's Pandoc auto-link then its second + # son (target) will be in tokenized format (rnInner with leafs). + var desc = newRstNode(rnInner) var i = p.idx + var parensStack: seq[char] template parse(endToken, dest) = + parensStack.setLen 0 inc i # skip begin token while true: - if p.tok[i].kind in {tkEof, tkIndent}: return false - if p.tok[i].symbol == endToken: break - dest.add p.tok[i].symbol + if p.tok[i].kind == tkEof: return false + if p.tok[i].kind == tkIndent and p.tok[i+1].kind == tkIndent: + return false + let isClosing = checkParen(p.tok[i], parensStack) + if p.tok[i].symbol == endToken and not isClosing: + break + let symbol = if p.tok[i].kind == tkIndent: " " else: p.tok[i].symbol + when dest is string: dest.add symbol + else: dest.add newLeaf(symbol) inc i inc i # skip end token parse("]", desc) - if p.tok[i].symbol != "(": return false - parse(")", link) - let child = newRstNode(rnHyperlink) - child.add desc - child.add link - # only commit if we detected no syntax error: - father.add child - p.idx = i - result = true + if p.tok[i].symbol == "(": + var link = "" + let linkIdx = i + 1 + parse(")", link) + # only commit if we detected no syntax error: + let protocol = safeProtocol(link) + if link == "": + result = false + rstMessage(p, mwBrokenLink, protocol, + p.tok[linkIdx].line, p.tok[linkIdx].col) + else: + let child = newRstNode(rnHyperlink) + child.add newLeaf(desc.addNodes) + child.add link + father.add child + p.idx = i + result = true + elif roPreferMarkdown in p.s.options: + # Use Pandoc's implicit_header_references extension + var n = newRstNode(rnPandocRef) + if p.tok[i].symbol == "[": + var link = newRstNode(rnInner) + let targetIdx = i + 1 + parse("]", link) + n.add desc + if link.len != 0: # [description][target] + n.add link + n.info = lineInfo(p, targetIdx) + else: # [description=target][] + n.add desc + n.info = lineInfo(p, p.idx + 1) + else: # [description=target] + n.add desc + n.add desc # target is the same as description + n.info = lineInfo(p, p.idx + 1) + father.add n + p.idx = i + result = true + else: + result = false -proc getFootnoteType(label: PRstNode): (FootnoteType, int) = +proc getRstFootnoteType(label: PRstNode): (FootnoteType, int) = if label.sons.len >= 1 and label.sons[0].kind == rnLeaf and label.sons[0].text == "#": if label.sons.len == 1: @@ -1176,12 +1713,23 @@ proc getFootnoteType(label: PRstNode): (FootnoteType, int) = elif label.len == 1 and label.sons[0].kind == rnLeaf: try: result = (fnManualNumber, parseInt(label.sons[0].text)) - except: + except ValueError: result = (fnCitation, -1) else: result = (fnCitation, -1) -proc parseFootnoteName(p: var RstParser, reference: bool): PRstNode = +proc getMdFootnoteType(label: PRstNode): (FootnoteType, int) = + try: + result = (fnManualNumber, parseInt(label.sons[0].text)) + except ValueError: + result = (fnAutoNumberLabel, -1) + +proc getFootnoteType(s: PRstSharedState, label: PRstNode): (FootnoteType, int) = + ## Returns footnote/citation type and manual number (if present). + if isMd(s): getMdFootnoteType(label) + else: getRstFootnoteType(label) + +proc parseRstFootnoteName(p: var RstParser, reference: bool): PRstNode = ## parse footnote/citation label. Precondition: start at `[`. ## Label text should be valid ref. name symbol, otherwise nil is returned. var i = p.idx + 1 @@ -1211,8 +1759,57 @@ proc parseFootnoteName(p: var RstParser, reference: bool): PRstNode = inc i p.idx = i +proc isMdFootnoteName(p: RstParser, reference: bool): bool = + ## Pandoc Markdown footnote extension. + let j = p.idx + result = p.tok[j].symbol == "[" and p.tok[j+1].symbol == "^" and + p.tok[j+2].kind == tkWord + +proc parseMdFootnoteName(p: var RstParser, reference: bool): PRstNode = + if isMdFootnoteName(p, reference): + result = newRstNode(rnInner) + var j = p.idx + 2 + while p.tok[j].kind in {tkWord, tkOther} or + validRefnamePunct(p.tok[j].symbol): + result.add newLeaf(p.tok[j].symbol) + inc j + if j == p.idx + 2: + return nil + if p.tok[j].symbol == "]": + if reference: + p.idx = j + 1 # skip ] + else: + if p.tok[j+1].symbol == ":": + p.idx = j + 2 # skip ]: + else: + result = nil + else: + result = nil + else: + result = nil + +proc parseFootnoteName(p: var RstParser, reference: bool): PRstNode = + if isMd(p): parseMdFootnoteName(p, reference) + else: + if isInlineMarkupStart(p, "["): parseRstFootnoteName(p, reference) + else: nil + +proc isMarkdownCodeBlock(p: RstParser, idx: int): bool = + let tok = p.tok[idx] + template allowedSymbol: bool = + (tok.symbol[0] == '`' or + roPreferMarkdown in p.s.options and tok.symbol[0] == '~') + result = (roSupportMarkdown in p.s.options and + tok.kind in {tkPunct, tkAdornment} and + allowedSymbol and + tok.symbol.len >= 3) + +proc isMarkdownCodeBlock(p: RstParser): bool = + isMarkdownCodeBlock(p, p.idx) + proc parseInline(p: var RstParser, father: PRstNode) = var n: PRstNode # to be used in `if` condition + let saveIdx = p.idx case currentTok(p).kind of tkPunct: if isInlineMarkupStart(p, "***"): @@ -1231,43 +1828,55 @@ proc parseInline(p: var RstParser, father: PRstNode) = var n = newRstNode(rnInlineTarget) inc p.idx parseUntil(p, n, "`", false) - let refn = rstnodeToRefname(n) - p.s.anchors.add (refn, @[refn]) + n.anchor = rstnodeToRefname(n) + addAnchorRst(p, name = linkName(n), target = n, + anchorType=manualInlineAnchor) father.add(n) - elif roSupportMarkdown in p.s.options and currentTok(p).symbol == "```": - inc p.idx + elif isMarkdownCodeBlock(p): father.add(parseMarkdownCodeblock(p)) elif isInlineMarkupStart(p, "``"): var n = newRstNode(rnInlineLiteral) parseUntil(p, n, "``", false) father.add(n) + elif match(p, p.idx, ":w:") and + (var lastIdx = getRefnameIdx(p, p.idx + 1); + p.tok[lastIdx+2].symbol == "`"): + let (roleName, _) = getRefname(p, p.idx+1) + let k = whichRole(p, roleName) + var n = newRstNode(k) + p.idx = lastIdx + 2 + if k == rnInlineCode: + n = n.toInlineCode(language=roleName) + parseUntil(p, n, "`", false) # bug #17260 + if k in {rnUnknownRole, rnCodeFragment}: + n = n.toOtherRole(k, roleName) + father.add(n) elif isInlineMarkupStart(p, "`"): - var n = newRstNode(rnInterpretedText) - parseUntil(p, n, "`", true) + var n = newRstNode(rnInterpretedText, info=lineInfo(p, p.idx+1)) + parseUntil(p, n, "`", false) # bug #17260 n = parsePostfix(p, n) father.add(n) elif isInlineMarkupStart(p, "|"): - var n = newRstNode(rnSubstitutionReferences) + var n = newRstNode(rnSubstitutionReferences, info=lineInfo(p, p.idx+1)) parseUntil(p, n, "|", false) father.add(n) - elif roSupportMarkdown in p.s.options and - currentTok(p).symbol == "[" and nextTok(p).symbol != "[" and - parseMarkdownLink(p, father): - discard "parseMarkdownLink already processed it" - elif isInlineMarkupStart(p, "[") and nextTok(p).symbol != "[" and + elif currentTok(p).symbol == "[" and nextTok(p).symbol != "[" and (n = parseFootnoteName(p, reference=true); n != nil): var nn = newRstNode(rnFootnoteRef) + nn.info = lineInfo(p, saveIdx+1) nn.add n - let (fnType, _) = getFootnoteType(n) + let (fnType, _) = getFootnoteType(p.s, n) case fnType of fnAutoSymbol: - p.s.lineFootnoteSymRef.add curLine(p) - nn.order = p.s.lineFootnoteSymRef.len + p.s.lineFootnoteSymRef.add lineInfo(p) of fnAutoNumber: - p.s.lineFootnoteNumRef.add curLine(p) - nn.order = p.s.lineFootnoteNumRef.len + p.s.lineFootnoteNumRef.add lineInfo(p) else: discard father.add(nn) + elif roSupportMarkdown in p.s.options and + currentTok(p).symbol == "[" and nextTok(p).symbol != "[" and + parseMarkdownLink(p, father): + discard "parseMarkdownLink already processed it" else: if roSupportSmilies in p.s.options: let n = parseSmiley(p) @@ -1283,8 +1892,7 @@ proc parseInline(p: var RstParser, father: PRstNode) = return parseWordOrRef(p, father) of tkAdornment, tkOther, tkWhite: - if roSupportMarkdown in p.s.options and currentTok(p).symbol == "```": - inc p.idx + if isMarkdownCodeBlock(p): father.add(parseMarkdownCodeblock(p)) return if roSupportSmilies in p.s.options: @@ -1297,44 +1905,39 @@ proc parseInline(p: var RstParser, father: PRstNode) = else: discard proc getDirective(p: var RstParser): string = - if currentTok(p).kind == tkWhite and nextTok(p).kind == tkWord: - var j = p.idx - inc p.idx - result = currentTok(p).symbol - inc p.idx - while currentTok(p).kind in {tkWord, tkPunct, tkAdornment, tkOther}: - if currentTok(p).symbol == "::": break - result.add(currentTok(p).symbol) - inc p.idx - if currentTok(p).kind == tkWhite: inc p.idx - if currentTok(p).symbol == "::": - inc p.idx - if currentTok(p).kind == tkWhite: inc p.idx - else: - p.idx = j # set back - result = "" # error + result = "" + if currentTok(p).kind == tkWhite: + let (name, lastIdx) = getRefname(p, p.idx + 1) + let afterIdx = lastIdx + 1 + if name.len > 0: + if p.tok[afterIdx].symbol == "::": + result = name + p.idx = afterIdx + 1 + if currentTok(p).kind == tkWhite: + inc p.idx + elif currentTok(p).kind != tkIndent: + rstMessage(p, mwRstStyle, + "whitespace or newline expected after directive " & name) + result = result.toLowerAscii() + elif p.tok[afterIdx].symbol == ":": + rstMessage(p, mwRstStyle, + "double colon :: may be missing at end of '" & name & "'", + p.tok[afterIdx].line, p.tok[afterIdx].col) + elif p.tok[afterIdx].kind == tkPunct and p.tok[afterIdx].symbol[0] == ':': + rstMessage(p, mwRstStyle, + "too many colons for a directive (should be ::)", + p.tok[afterIdx].line, p.tok[afterIdx].col) + +proc parseComment(p: var RstParser, col: int): PRstNode = + if currentTok(p).kind != tkEof and nextTok(p).kind == tkIndent: + inc p.idx # empty comment else: - result = "" - result = result.toLowerAscii() - -proc parseComment(p: var RstParser): PRstNode = - case currentTok(p).kind - of tkIndent, tkEof: - if currentTok(p).kind != tkEof and nextTok(p).kind == tkIndent: - inc p.idx # empty comment - else: - var indent = currentTok(p).ival - while true: - case currentTok(p).kind - of tkEof: - break - of tkIndent: - if currentTok(p).ival < indent: break - else: - discard + while currentTok(p).kind != tkEof: + if currentTok(p).kind == tkIndent and currentTok(p).ival > col or + currentTok(p).kind != tkIndent and currentTok(p).col > col: inc p.idx - else: - while currentTok(p).kind notin {tkIndent, tkEof}: inc p.idx + else: + break result = nil proc parseLine(p: var RstParser, father: PRstNode) = @@ -1350,22 +1953,103 @@ proc parseUntilNewline(p: var RstParser, father: PRstNode) = of tkEof, tkIndent: break proc parseSection(p: var RstParser, result: PRstNode) {.gcsafe.} + +proc tokenAfterNewline(p: RstParser, start: int): int = + result = start + while true: + case p.tok[result].kind + of tkEof: + break + of tkIndent: + inc result + break + else: inc result + +proc tokenAfterNewline(p: RstParser): int {.inline.} = + result = tokenAfterNewline(p, p.idx) + +proc getWrappableIndent(p: RstParser): int = + ## Gets baseline indentation for bodies of field lists and directives. + ## Handles situations like this (with possible de-indent in [case.3]):: + ## + ## :field: definition [case.1] + ## + ## currInd currentTok(p).col + ## | | + ## v v + ## + ## .. Note:: defItem: [case.2] + ## definition + ## + ## ^ + ## | + ## nextIndent + ## + ## .. Note:: - point1 [case.3] + ## - point 2 + ## + ## ^ + ## | + ## nextIndent + if currentTok(p).kind == tkIndent: + result = currentTok(p).ival + else: + var nextIndent = p.tok[tokenAfterNewline(p)-1].ival + if nextIndent <= currInd(p): # parse only this line [case.1] + result = currentTok(p).col + elif nextIndent >= currentTok(p).col: # may be a definition list [case.2] + result = currentTok(p).col + else: + result = nextIndent # allow parsing next lines [case.3] + +proc getMdBlockIndent(p: RstParser): int = + ## Markdown version of `getWrappableIndent`. + if currentTok(p).kind == tkIndent: + result = currentTok(p).ival + else: + var nextIndent = p.tok[tokenAfterNewline(p)-1].ival + # TODO: Markdown-compliant definition should allow nextIndent == currInd(p): + if nextIndent <= currInd(p): # parse only this line + result = currentTok(p).col + else: + result = nextIndent # allow parsing next lines [case.3] + +proc indFollows(p: RstParser): bool = + result = currentTok(p).kind == tkIndent and currentTok(p).ival > currInd(p) + +proc parseBlockContent(p: var RstParser, father: var PRstNode, + contentParser: SectionParser): bool {.gcsafe.} = + ## parse the final content part of explicit markup blocks (directives, + ## footnotes, etc). Returns true if succeeded. + if currentTok(p).kind != tkIndent or indFollows(p): + let blockIndent = getWrappableIndent(p) + pushInd(p, blockIndent) + let content = contentParser(p) + popInd(p) + father.add content + result = true + +proc parseSectionWrapper(p: var RstParser): PRstNode = + result = newRstNode(rnInner) + parseSection(p, result) + while result.kind == rnInner and result.len == 1: + result = result.sons[0] + proc parseField(p: var RstParser): PRstNode = ## Returns a parsed rnField node. ## ## rnField nodes have two children nodes, a rnFieldName and a rnFieldBody. - result = newRstNode(rnField) + result = newRstNode(rnField, info=lineInfo(p)) var col = currentTok(p).col var fieldname = newRstNode(rnFieldName) parseUntil(p, fieldname, ":", false) var fieldbody = newRstNode(rnFieldBody) - if currentTok(p).kind != tkIndent: parseLine(p, fieldbody) - if currentTok(p).kind == tkIndent: - var indent = currentTok(p).ival - if indent > col: - pushInd(p, indent) - parseSection(p, fieldbody) - popInd(p) + if currentTok(p).kind == tkWhite: inc p.idx + let indent = getWrappableIndent(p) + if indent > col: + pushInd(p, indent) + parseSection(p, fieldbody) + popInd(p) result.add(fieldname) result.add(fieldbody) @@ -1424,7 +2108,7 @@ proc parseLiteralBlock(p: var RstParser): PRstNode = var n = newLeaf("") if currentTok(p).kind == tkIndent: var indent = currentTok(p).ival - inc p.idx + while currentTok(p).kind == tkIndent: inc p.idx # skip blank lines while true: case currentTok(p).kind of tkEof: @@ -1445,6 +2129,44 @@ proc parseLiteralBlock(p: var RstParser): PRstNode = inc p.idx result.add(n) +proc parseQuotedLiteralBlock(p: var RstParser): PRstNode = + result = newRstNodeA(p, rnLiteralBlock) + var n = newLeaf("") + if currentTok(p).kind == tkIndent: + var indent = currInd(p) + while currentTok(p).kind == tkIndent: inc p.idx # skip blank lines + var quoteSym = currentTok(p).symbol[0] + while true: + case currentTok(p).kind + of tkEof: + break + of tkIndent: + if currentTok(p).ival < indent: + break + elif currentTok(p).ival == indent: + if nextTok(p).kind == tkPunct and nextTok(p).symbol[0] == quoteSym: + n.text.add("\n") + inc p.idx + elif nextTok(p).kind == tkIndent: + break + else: + rstMessage(p, mwRstStyle, "no newline after quoted literal block") + break + else: + rstMessage(p, mwRstStyle, + "unexpected indentation in quoted literal block") + break + else: + n.text.add(currentTok(p).symbol) + inc p.idx + result.add(n) + +proc parseRstLiteralBlock(p: var RstParser, kind: LiteralBlockKind): PRstNode = + if kind == lbIndentedLiteralBlock: + result = parseLiteralBlock(p) + else: + result = parseQuotedLiteralBlock(p) + proc getLevel(p: var RstParser, c: char, hasOverline: bool): int = ## Returns (preliminary) heading level corresponding to `c` and ## `hasOverline`. If level does not exist, add it first. @@ -1457,38 +2179,27 @@ proc getLevel(p: var RstParser, c: char, hasOverline: bool): int = line: curLine(p), hasPeers: false) result = p.s.hLevels.len - 1 -proc countTitles(p: var RstParser, n: PRstNode) = - ## Fill `p.s.hTitleCnt` +proc countTitles(s: PRstSharedState, n: PRstNode) = + ## Fill `s.hTitleCnt` + if n == nil: return for node in n.sons: if node != nil: if node.kind notin {rnOverline, rnSubstitutionDef, rnDefaultRole}: break if node.kind == rnOverline: - if p.s.hLevels[p.s.hTitleCnt].hasPeers: + if s.hLevels[s.hTitleCnt].hasPeers: break - inc p.s.hTitleCnt - if p.s.hTitleCnt >= 2: + inc s.hTitleCnt + if s.hTitleCnt >= 2: break -proc tokenAfterNewline(p: RstParser, start: int): int = - result = start - while true: - case p.tok[result].kind - of tkEof: - break - of tkIndent: - inc result - break - else: inc result - -proc tokenAfterNewline(p: RstParser): int {.inline.} = - result = tokenAfterNewline(p, p.idx) - proc isAdornmentHeadline(p: RstParser, adornmentIdx: int): bool = ## check that underline/overline length is enough for the heading. ## No support for Unicode. if p.tok[adornmentIdx].symbol in ["::", "..", "|"]: return false + if isMarkdownCodeBlock(p, adornmentIdx): + return false var headlineLen = 0 var failure = "" if p.idx < adornmentIdx: # check for underline @@ -1508,17 +2219,20 @@ proc isAdornmentHeadline(p: RstParser, adornmentIdx: int): bool = while p.tok[i].kind notin {tkEof, tkIndent}: headlineLen += p.tok[i].symbol.len inc i - result = p.tok[adornmentIdx].symbol.len >= headlineLen and - headlineLen != 0 - if result: - result = result and p.tok[i].kind == tkIndent and - p.tok[i+1].kind == tkAdornment and - p.tok[i+1].symbol == p.tok[adornmentIdx].symbol - if not result: - failure = "(underline '" & p.tok[i+1].symbol & "' does not match " & - "overline '" & p.tok[adornmentIdx].symbol & "')" - else: - failure = "(overline '" & p.tok[adornmentIdx].symbol & "' is too short)" + if p.tok[i].kind == tkIndent and + p.tok[i+1].kind == tkAdornment and + p.tok[i+1].symbol[0] == p.tok[adornmentIdx].symbol[0]: + result = p.tok[adornmentIdx].symbol.len >= headlineLen and + headlineLen != 0 + if result: + result = p.tok[i+1].symbol == p.tok[adornmentIdx].symbol + if not result: + failure = "(underline '" & p.tok[i+1].symbol & "' does not match " & + "overline '" & p.tok[adornmentIdx].symbol & "')" + else: + failure = "(overline '" & p.tok[adornmentIdx].symbol & "' is too short)" + else: # it's not overline/underline section, not reporting error + return false if not result: rstMessage(p, meNewSectionExpected, failure) @@ -1528,6 +2242,33 @@ proc isLineBlock(p: RstParser): bool = p.tok[j].col > currentTok(p).col or p.tok[j].symbol == "\n" +proc isMarkdownBlockQuote(p: RstParser): bool = + result = currentTok(p).symbol[0] == '>' + +proc whichRstLiteralBlock(p: RstParser): LiteralBlockKind = + ## Checks that the following tokens are either Indented Literal Block or + ## Quoted Literal Block (which is not quite the same as Markdown quote block). + ## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#quoted-literal-blocks + if currentTok(p).symbol == "::" and nextTok(p).kind == tkIndent: + if currInd(p) > nextTok(p).ival: + result = lbNone + if currInd(p) < nextTok(p).ival: + result = lbIndentedLiteralBlock + elif currInd(p) == nextTok(p).ival: + var i = p.idx + 1 + while p.tok[i].kind == tkIndent: inc i + const validQuotingCharacters = { + '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', + '.', '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', + '_', '`', '{', '|', '}', '~'} + if p.tok[i].kind in {tkPunct, tkAdornment} and + p.tok[i].symbol[0] in validQuotingCharacters: + result = lbQuotedLiteralBlock + else: + result = lbNone + else: + result = lbNone + proc predNL(p: RstParser): bool = result = true if p.idx > 0: @@ -1540,6 +2281,39 @@ proc isDefList(p: RstParser): bool = p.tok[j].kind in {tkWord, tkOther, tkPunct} and p.tok[j - 2].symbol != "::" +proc `$`(t: Token): string = # for debugging only + result = "(" & $t.kind & " line=" & $t.line & " col=" & $t.col + if t.kind == tkIndent: result = result & " ival=" & $t.ival & ")" + else: result = result & " symbol=" & t.symbol & ")" + +proc skipNewlines(p: RstParser, j: int): int = + result = j + while p.tok[result].kind != tkEof and p.tok[result].kind == tkIndent: + inc result # skip blank lines + +proc skipNewlines(p: var RstParser) = + p.idx = skipNewlines(p, p.idx) + +const maxMdRelInd = 3 ## In Markdown: maximum indentation that does not yet + ## make the indented block a code + +proc isMdRelInd(outerInd, nestedInd: int): bool = + result = outerInd <= nestedInd and nestedInd <= outerInd + maxMdRelInd + +proc isMdDefBody(p: RstParser, j: int, termCol: int): bool = + let defCol = p.tok[j].col + result = p.tok[j].symbol == ":" and + isMdRelInd(termCol, defCol) and + p.tok[j+1].kind == tkWhite and + p.tok[j+2].kind in {tkWord, tkOther, tkPunct} + +proc isMdDefListItem(p: RstParser, idx: int): bool = + var j = tokenAfterNewline(p, idx) + j = skipNewlines(p, j) + let termCol = p.tok[j].col + result = isMdRelInd(currInd(p), termCol) and + isMdDefBody(p, j, termCol) + proc isOptionList(p: RstParser): bool = result = match(p, p.idx, "-w") or match(p, p.idx, "--w") or match(p, p.idx, "/w") or match(p, p.idx, "//w") @@ -1566,11 +2340,12 @@ proc findPipe(p: RstParser, start: int): bool = proc whichSection(p: RstParser): RstNodeKind = if currentTok(p).kind in {tkAdornment, tkPunct}: # for punctuation sequences that can be both tkAdornment and tkPunct - if roSupportMarkdown in p.s.options and currentTok(p).symbol == "```": + if isMarkdownCodeBlock(p): return rnCodeBlock - elif currentTok(p).symbol == "::": + elif isRst(p) and currentTok(p).symbol == "::": return rnLiteralBlock - elif currentTok(p).symbol == ".." and predNL(p): + elif currentTok(p).symbol == ".." and + nextTok(p).kind in {tkWhite, tkIndent}: return rnDirective case currentTok(p).kind of tkAdornment: @@ -1582,34 +2357,42 @@ proc whichSection(p: RstParser): RstNodeKind = elif match(p, p.idx + 1, " a"): result = rnTable elif currentTok(p).symbol == "|" and isLineBlock(p): result = rnLineBlock - elif match(p, p.idx + 1, "i") and isAdornmentHeadline(p, p.idx): + elif roSupportMarkdown in p.s.options and isMarkdownBlockQuote(p): + result = rnMarkdownBlockQuote + elif (match(p, p.idx + 1, "i") and not match(p, p.idx + 2, "I")) and + isAdornmentHeadline(p, p.idx): result = rnOverline else: - result = rnLeaf + result = rnParagraph of tkPunct: if isMarkdownHeadline(p): result = rnMarkdownHeadline elif roSupportMarkdown in p.s.options and predNL(p) and match(p, p.idx, "| w") and findPipe(p, p.idx+3): result = rnMarkdownTable + elif isMd(p) and isMdFootnoteName(p, reference=false): + result = rnFootnote elif currentTok(p).symbol == "|" and isLineBlock(p): result = rnLineBlock + elif roSupportMarkdown in p.s.options and isMarkdownBlockQuote(p): + result = rnMarkdownBlockQuote elif match(p, tokenAfterNewline(p), "aI") and isAdornmentHeadline(p, tokenAfterNewline(p)): result = rnHeadline - elif predNL(p) and - currentTok(p).symbol in ["+", "*", "-"] and nextTok(p).kind == tkWhite: + elif currentTok(p).symbol in ["+", "*", "-"] and nextTok(p).kind == tkWhite: result = rnBulletList - elif match(p, p.idx, ":w:") and predNL(p): + elif match(p, p.idx, ":w:E"): # (currentTok(p).symbol == ":") result = rnFieldList elif match(p, p.idx, "(e) ") or match(p, p.idx, "e) ") or match(p, p.idx, "e. "): result = rnEnumList - elif isDefList(p): - result = rnDefList elif isOptionList(p): result = rnOptionList + elif isRst(p) and isDefList(p): + result = rnDefList + elif isMd(p) and isMdDefListItem(p, p.idx): + result = rnMdDefList else: result = rnParagraph of tkWord, tkOther, tkWhite: @@ -1618,7 +2401,9 @@ proc whichSection(p: RstParser): RstNodeKind = if isAdornmentHeadline(p, tokIdx): result = rnHeadline else: result = rnParagraph elif match(p, p.idx, "e) ") or match(p, p.idx, "e. "): result = rnEnumList - elif isDefList(p): result = rnDefList + elif isRst(p) and isDefList(p): result = rnDefList + elif isMd(p) and isMdDefListItem(p, p.idx): + result = rnMdDefList else: result = rnParagraph else: result = rnLeaf @@ -1648,14 +2433,112 @@ proc parseLineBlock(p: var RstParser): PRstNode = else: break +proc parseDoc(p: var RstParser): PRstNode {.gcsafe.} + +proc getQuoteSymbol(p: RstParser, idx: int): tuple[sym: string, depth: int, tokens: int] = + result = ("", 0, 0) + var i = idx + result.sym &= p.tok[i].symbol + result.depth += p.tok[i].symbol.len + inc result.tokens + inc i + while p.tok[i].kind == tkWhite and i+1 < p.tok.len and + p.tok[i+1].kind == tkPunct and p.tok[i+1].symbol[0] == '>': + result.sym &= p.tok[i].symbol + result.sym &= p.tok[i+1].symbol + result.depth += p.tok[i+1].symbol.len + inc result.tokens, 2 + inc i, 2 + +proc parseMarkdownQuoteSegment(p: var RstParser, curSym: string, col: int): + PRstNode = + ## We define *segment* as a group of lines that starts with exactly the + ## same quote symbol. If the following lines don't contain any `>` (*lazy* + ## continuation) they considered as continuation of the current segment. + var q: RstParser # to delete `>` at a start of line and then parse normally + initParser(q, p.s) + q.col = p.col + q.line = p.line + var minCol = int.high # minimum colum num in the segment + while true: # move tokens of segment from `p` to `q` skipping `curSym` + case currentTok(p).kind + of tkEof: + break + of tkIndent: + if nextTok(p).kind in {tkIndent, tkEof}: + break + else: + if nextTok(p).symbol[0] == '>': + var (quoteSym, _, quoteTokens) = getQuoteSymbol(p, p.idx + 1) + if quoteSym == curSym: # the segment continues + var iTok = tokenAfterNewline(p, p.idx+1) + if p.tok[iTok].kind notin {tkEof, tkIndent} and + p.tok[iTok].symbol[0] != '>': + rstMessage(p, mwRstStyle, + "two or more quoted lines are followed by unquoted line " & + $(curLine(p) + 1)) + break + q.tok.add currentTok(p) + var ival = currentTok(p).ival + quoteSym.len + inc p.idx, (1 + quoteTokens) # skip newline and > > > + if currentTok(p).kind == tkWhite: + ival += currentTok(p).symbol.len + inc p.idx + # fix up previous `tkIndent`s to ival (as if >>> were not there) + var j = q.tok.len - 1 + while j >= 0 and q.tok[j].kind == tkIndent: + q.tok[j].ival = ival + dec j + else: # next segment started + break + elif currentTok(p).ival < col: + break + else: # the segment continues, a case like: + # > beginning + # continuation + q.tok.add currentTok(p) + inc p.idx + else: + if currentTok(p).col < minCol: minCol = currentTok(p).col + q.tok.add currentTok(p) + inc p.idx + q.indentStack = @[minCol] + # if initial indentation `minCol` is > 0 then final newlines + # should be omitted so that parseDoc could advance to the end of tokens: + var j = q.tok.len - 1 + while q.tok[j].kind == tkIndent: dec j + q.tok.setLen (j+1) + q.tok.add Token(kind: tkEof, line: currentTok(p).line) + result = parseDoc(q) + +proc parseMarkdownBlockQuote(p: var RstParser): PRstNode = + var (curSym, quotationDepth, quoteTokens) = getQuoteSymbol(p, p.idx) + let col = currentTok(p).col + result = newRstNodeA(p, rnMarkdownBlockQuote) + inc p.idx, quoteTokens # skip first > + while true: + var item = newRstNode(rnMarkdownBlockQuoteItem) + item.quotationDepth = quotationDepth + if currentTok(p).kind == tkWhite: inc p.idx + item.add parseMarkdownQuoteSegment(p, curSym, col) + result.add(item) + if currentTok(p).kind == tkIndent and currentTok(p).ival == col and + nextTok(p).kind != tkEof and nextTok(p).symbol[0] == '>': + (curSym, quotationDepth, quoteTokens) = getQuoteSymbol(p, p.idx + 1) + inc p.idx, (1 + quoteTokens) # skip newline and > > > + else: + break + proc parseParagraph(p: var RstParser, result: PRstNode) = while true: case currentTok(p).kind of tkIndent: if nextTok(p).kind == tkIndent: inc p.idx - break - elif currentTok(p).ival == currInd(p): + break # blank line breaks paragraph for both Md & Rst + elif currentTok(p).ival == currInd(p) or ( + isMd(p) and currentTok(p).ival > currInd(p)): + # (Md allows adding additional indentation inside paragraphs) inc p.idx case whichSection(p) of rnParagraph, rnLeaf, rnHeadline, rnMarkdownHeadline, @@ -1663,16 +2546,20 @@ proc parseParagraph(p: var RstParser, result: PRstNode) = result.add newLeaf(" ") of rnLineBlock: result.addIfNotNil(parseLineBlock(p)) - else: break + of rnMarkdownBlockQuote: + result.addIfNotNil(parseMarkdownBlockQuote(p)) + else: + dec p.idx # allow subsequent block to be parsed as another section + break else: break of tkPunct: - if currentTok(p).symbol == "::" and - nextTok(p).kind == tkIndent and - currInd(p) < nextTok(p).ival: + if isRst(p) and ( + let literalBlockKind = whichRstLiteralBlock(p); + literalBlockKind != lbNone): result.add newLeaf(":") inc p.idx # skip '::' - result.add(parseLiteralBlock(p)) + result.add(parseRstLiteralBlock(p, literalBlockKind)) break else: parseInline(p, result) @@ -1712,7 +2599,8 @@ proc parseHeadline(p: var RstParser): PRstNode = result.level = getLevel(p, c, hasOverline=false) checkHeadingHierarchy(p, result.level) p.s.hCurLevel = result.level - addAnchor(p, rstnodeToRefname(result), reset=true) + addAnchorRst(p, linkName(result), result, anchorType=headlineAnchor) + p.s.tocPart.add result proc parseOverline(p: var RstParser): PRstNode = var c = currentTok(p).symbol[0] @@ -1734,87 +2622,208 @@ proc parseOverline(p: var RstParser): PRstNode = if currentTok(p).kind == tkAdornment: inc p.idx if currentTok(p).kind == tkIndent: inc p.idx - addAnchor(p, rstnodeToRefname(result), reset=true) + addAnchorRst(p, linkName(result), result, anchorType=headlineAnchor) + p.s.tocPart.add result + +proc fixHeadlines(s: PRstSharedState) = + # Fix up section levels depending on presence of a title and subtitle: + for n in s.tocPart: + if n.kind in {rnHeadline, rnOverline}: + if s.hTitleCnt == 2: + if n.level == 1: # it's the subtitle + n.level = 0 + elif n.level >= 2: # normal sections, start numbering from 1 + n.level -= 1 + elif s.hTitleCnt == 0: + n.level += 1 + # Set headline anchors: + for iHeading in 0 .. s.tocPart.high: + let n: PRstNode = s.tocPart[iHeading] + if n.level >= 1: + n.anchor = rstnodeToRefname(n) + # Fix anchors for uniqueness if `.. contents::` is present + if s.hasToc: + # Find the last higher level section for unique reference name + var sectionPrefix = "" + for i in countdown(iHeading - 1, 0): + if s.tocPart[i].level >= 1 and s.tocPart[i].level < n.level: + sectionPrefix = rstnodeToRefname(s.tocPart[i]) & "-" + break + if sectionPrefix != "": + n.anchor = sectionPrefix & n.anchor + s.tocPart.setLen 0 type - IntSeq = seq[int] - ColumnLimits = tuple + ColSpec = object + start, stop: int + RstCols = seq[ColSpec] + ColumnLimits = tuple # for Markdown first, last: int ColSeq = seq[ColumnLimits] +proc tokStart(p: RstParser, idx: int): int = + result = p.tok[idx].col + +proc tokStart(p: RstParser): int = + result = tokStart(p, p.idx) + +proc tokEnd(p: RstParser, idx: int): int = + result = p.tok[idx].col + p.tok[idx].symbol.len - 1 + proc tokEnd(p: RstParser): int = - result = currentTok(p).col + currentTok(p).symbol.len - 1 + result = tokEnd(p, p.idx) -proc getColumns(p: var RstParser, cols: var IntSeq) = +proc getColumns(p: RstParser, cols: var RstCols, startIdx: int): int = + # Fills table column specification (or separator) `cols` and returns + # the next parser index after it. var L = 0 + result = startIdx while true: inc L setLen(cols, L) - cols[L - 1] = tokEnd(p) - assert(currentTok(p).kind == tkAdornment) - inc p.idx - if currentTok(p).kind != tkWhite: break - inc p.idx - if currentTok(p).kind != tkAdornment: break - if currentTok(p).kind == tkIndent: inc p.idx - # last column has no limit: - cols[L - 1] = 32000 + cols[L - 1].start = tokStart(p, result) + cols[L - 1].stop = tokEnd(p, result) + assert(p.tok[result].kind == tkAdornment) + inc result + if p.tok[result].kind != tkWhite: break + inc result + if p.tok[result].kind != tkAdornment: break + if p.tok[result].kind == tkIndent: inc result -proc parseDoc(p: var RstParser): PRstNode {.gcsafe.} +proc checkColumns(p: RstParser, cols: RstCols) = + var i = p.idx + if p.tok[i].symbol[0] != '=': + stopOrWarn(p, meIllformedTable, + "only tables with `=` columns specification are allowed") + for col in 0 ..< cols.len: + if tokEnd(p, i) != cols[col].stop: + stopOrWarn(p, meIllformedTable, + "end of table column #$1 should end at position $2" % [ + $(col+1), $(cols[col].stop+ColRstOffset)], + p.tok[i].line, tokEnd(p, i)) + inc i + if col == cols.len - 1: + if p.tok[i].kind == tkWhite: + inc i + if p.tok[i].kind notin {tkIndent, tkEof}: + stopOrWarn(p, meIllformedTable, "extraneous column specification") + elif p.tok[i].kind == tkWhite: + inc i + else: + stopOrWarn(p, meIllformedTable, + "no enough table columns", p.tok[i].line, p.tok[i].col) + +proc getSpans(p: RstParser, nextLine: int, + cols: RstCols, unitedCols: RstCols): seq[int] = + ## Calculates how many columns a joined cell occupies. + if unitedCols.len > 0: + result = newSeq[int](unitedCols.len) + var + iCell = 0 + jCell = 0 + uCell = 0 + while jCell < cols.len: + if cols[jCell].stop < unitedCols[uCell].stop: + inc jCell + elif cols[jCell].stop == unitedCols[uCell].stop: + result[uCell] = jCell - iCell + 1 + iCell = jCell + 1 + jCell = jCell + 1 + inc uCell + else: + rstMessage(p, meIllformedTable, + "spanning underline does not match main table columns", + p.tok[nextLine].line, p.tok[nextLine].col) + +proc parseSimpleTableRow(p: var RstParser, cols: RstCols, colChar: char): PRstNode = + ## Parses 1 row in RST simple table. + # Consider that columns may be spanning (united by using underline like ----): + let nextLine = tokenAfterNewline(p) + var unitedCols: RstCols + var afterSpan: int + if p.tok[nextLine].kind == tkAdornment and p.tok[nextLine].symbol[0] == '-': + afterSpan = getColumns(p, unitedCols, nextLine) + if unitedCols == cols and p.tok[nextLine].symbol[0] == colChar: + # legacy rst.nim compat.: allow punctuation like `----` in main boundaries + afterSpan = nextLine + unitedCols.setLen 0 + else: + afterSpan = nextLine + template colEnd(i): int = + if i == cols.len - 1: high(int) # last column has no limit + elif unitedCols.len > 0: unitedCols[i].stop else: cols[i].stop + template colStart(i): int = + if unitedCols.len > 0: unitedCols[i].start else: cols[i].start + var row = newSeq[string](if unitedCols.len > 0: unitedCols.len else: cols.len) + var spans: seq[int] = getSpans(p, nextLine, cols, unitedCols) + + let line = currentTok(p).line + # Iterate over the lines a single cell may span: + while true: + var nCell = 0 + # distribute tokens between cells in the current line: + while currentTok(p).kind notin {tkIndent, tkEof}: + if tokEnd(p) <= colEnd(nCell): + if tokStart(p) < colStart(nCell): + if currentTok(p).kind != tkWhite: + stopOrWarn(p, meIllformedTable, + "this word crosses table column from the left") + row[nCell].add(currentTok(p).symbol) + else: + row[nCell].add(currentTok(p).symbol) + inc p.idx + else: + if tokStart(p) < colEnd(nCell) and currentTok(p).kind != tkWhite: + stopOrWarn(p, meIllformedTable, + "this word crosses table column from the right") + row[nCell].add(currentTok(p).symbol) + inc p.idx + inc nCell + if currentTok(p).kind == tkIndent: inc p.idx + if tokEnd(p) <= colEnd(0): break + # Continued current cells because the 1st column is empty. + if currentTok(p).kind in {tkEof, tkAdornment}: + break + for nCell in countup(1, high(row)): row[nCell].add('\n') + result = newRstNode(rnTableRow) + var q: RstParser + for uCell in 0 ..< row.len: + initParser(q, p.s) + q.col = colStart(uCell) + q.line = line - 1 + getTokens(row[uCell], q.tok) + let cell = newRstNode(rnTableDataCell) + cell.span = if spans.len == 0: 0 else: spans[uCell] + cell.add(parseDoc(q)) + result.add(cell) + if afterSpan > p.idx: + p.idx = afterSpan proc parseSimpleTable(p: var RstParser): PRstNode = - var - cols: IntSeq - row: seq[string] - i, last, line: int - c: char - q: RstParser - a, b: PRstNode + var cols: RstCols result = newRstNodeA(p, rnTable) - cols = @[] - row = @[] - a = nil - c = currentTok(p).symbol[0] + let startIdx = getColumns(p, cols, p.idx) + let colChar = currentTok(p).symbol[0] + checkColumns(p, cols) + p.idx = startIdx + result.colCount = cols.len while true: if currentTok(p).kind == tkAdornment: - last = tokenAfterNewline(p) - if p.tok[last].kind in {tkEof, tkIndent}: + checkColumns(p, cols) + p.idx = tokenAfterNewline(p) + if currentTok(p).kind in {tkEof, tkIndent}: # skip last adornment line: - p.idx = last break - getColumns(p, cols) - setLen(row, cols.len) - if a != nil: - for j in 0 ..< a.len: # fix rnTableDataCell -> rnTableHeaderCell - a.sons[j] = newRstNode(rnTableHeaderCell, a.sons[j].sons) + if result.sons.len > 0: result.sons[^1].endsHeader = true + # fix rnTableDataCell -> rnTableHeaderCell for previous table rows: + for nRow in 0 ..< result.sons.len: + for nCell in 0 ..< result.sons[nRow].len: + template cell: PRstNode = result.sons[nRow].sons[nCell] + cell = PRstNode(kind: rnTableHeaderCell, sons: cell.sons, + span: cell.span, anchor: cell.anchor) if currentTok(p).kind == tkEof: break - for j in countup(0, high(row)): row[j] = "" - # the following while loop iterates over the lines a single cell may span: - line = currentTok(p).line - while true: - i = 0 - while currentTok(p).kind notin {tkIndent, tkEof}: - if tokEnd(p) <= cols[i]: - row[i].add(currentTok(p).symbol) - inc p.idx - else: - if currentTok(p).kind == tkWhite: inc p.idx - inc i - if currentTok(p).kind == tkIndent: inc p.idx - if tokEnd(p) <= cols[0]: break - if currentTok(p).kind in {tkEof, tkAdornment}: break - for j in countup(1, high(row)): row[j].add('\n') - a = newRstNode(rnTableRow) - for j in countup(0, high(row)): - initParser(q, p.s) - q.col = cols[j] - q.line = line - 1 - q.filename = p.filename - q.col += getTokens(row[j], false, q.tok) - b = newRstNode(rnTableDataCell) - b.add(parseDoc(q)) - a.add(b) - result.add(a) + let tabRow = parseSimpleTableRow(p, cols, colChar) + result.add tabRow proc readTableRow(p: var RstParser): ColSeq = if currentTok(p).symbol == "|": inc p.idx @@ -1847,29 +2856,28 @@ proc isValidDelimiterRow(p: var RstParser, colNum: int): bool = proc parseMarkdownTable(p: var RstParser): PRstNode = var row: ColSeq - colNum: int a, b: PRstNode q: RstParser result = newRstNodeA(p, rnMarkdownTable) proc parseRow(p: var RstParser, cellKind: RstNodeKind, result: PRstNode) = row = readTableRow(p) - if colNum == 0: colNum = row.len # table header - elif row.len < colNum: row.setLen(colNum) + if result.colCount == 0: result.colCount = row.len # table header + elif row.len < result.colCount: row.setLen(result.colCount) a = newRstNode(rnTableRow) - for j in 0 ..< colNum: + for j in 0 ..< result.colCount: b = newRstNode(cellKind) initParser(q, p.s) q.col = p.col q.line = currentTok(p).line - 1 - q.filename = p.filename - q.col += getTokens(getColContents(p, row[j]), false, q.tok) + getTokens(getColContents(p, row[j]), q.tok) b.add(parseDoc(q)) a.add(b) result.add(a) parseRow(p, rnTableHeaderCell, result) - if not isValidDelimiterRow(p, colNum): rstMessage(p, meMarkdownIllformedTable) + if not isValidDelimiterRow(p, result.colCount): + rstMessage(p, meMarkdownIllformedTable) while predNL(p) and currentTok(p).symbol == "|": parseRow(p, rnTableDataCell, result) @@ -1901,8 +2909,10 @@ proc parseBulletList(p: var RstParser): PRstNode = proc parseOptionList(p: var RstParser): PRstNode = result = newRstNodeA(p, rnOptionList) + let col = currentTok(p).col + var order = 1 while true: - if isOptionList(p): + if currentTok(p).col == col and isOptionList(p): var a = newRstNode(rnOptionGroup) var b = newRstNode(rnDescription) var c = newRstNode(rnOptionListItem) @@ -1920,11 +2930,43 @@ proc parseOptionList(p: var RstParser): PRstNode = popInd(p) else: parseLine(p, b) - if currentTok(p).kind == tkIndent: inc p.idx + while currentTok(p).kind == tkIndent: inc p.idx c.add(a) c.add(b) + c.order = order; inc order result.add(c) else: + if currentTok(p).kind != tkEof: dec p.idx # back to tkIndent + break + +proc parseMdDefinitionList(p: var RstParser): PRstNode = + ## Parses (Pandoc/kramdown/PHPextra) Markdown definition lists. + result = newRstNodeA(p, rnMdDefList) + let termCol = currentTok(p).col + while true: + var item = newRstNode(rnDefItem) + var term = newRstNode(rnDefName) + parseLine(p, term) + skipNewlines(p) + inc p.idx, 2 # skip ":" and space + item.add(term) + while true: + var def = newRstNode(rnDefBody) + let indent = getMdBlockIndent(p) + pushInd(p, indent) + parseSection(p, def) + popInd(p) + item.add(def) + let j = skipNewlines(p, p.idx) + if isMdDefBody(p, j, termCol): # parse next definition body + p.idx = j + 2 # skip ":" and space + else: + break + result.add(item) + let j = skipNewlines(p, p.idx) + if p.tok[j].col == termCol and isMdDefListItem(p, j): + p.idx = j # parse next item + else: break proc parseDefinitionList(p: var RstParser): PRstNode = @@ -1935,6 +2977,8 @@ proc parseDefinitionList(p: var RstParser): PRstNode = var col = currentTok(p).col result = newRstNodeA(p, rnDefList) while true: + if isOptionList(p): + break # option list has priority over def.list j = p.idx var a = newRstNode(rnDefName) parseLine(p, a) @@ -1991,11 +3035,10 @@ proc parseEnumList(p: var RstParser): PRstNode = let n = p.line + p.tok[j].line let msg = "\n" & """ not enough indentation on line $2 - (should be at column $3 if it's a continuation of enum. list), + (should be at column $3 if it's a continuation of enum. list), or no blank line after line $1 (if it should be the next paragraph), or no escaping \ at the beginning of line $1 - (if lines $1..$2 are a normal paragraph, not enum. list)""". - unindent(8) + (if lines $1..$2 are a normal paragraph, not enum. list)""".dedent let c = p.col + requiredIndent + ColRstOffset rstMessage(p, mwRstStyle, msg % [$(n-1), $n, $c], p.tok[j].line, p.tok[j].col) @@ -2032,13 +3075,13 @@ proc parseEnumList(p: var RstParser): PRstNode = let enumerator = p.tok[p.idx + 1 + wildIndex[w]].symbol # check that it's in sequence: enumerator == next(prevEnum) if "n" in wildcards[w]: # arabic numeral - let prevEnumI = try: parseInt(prevEnum) except: 1 + let prevEnumI = try: parseInt(prevEnum) except ValueError: 1 if enumerator in autoEnums: if prevAE != "" and enumerator != prevAE: break prevAE = enumerator curEnum = prevEnumI + 1 - else: curEnum = (try: parseInt(enumerator) except: 1) + else: curEnum = (try: parseInt(enumerator) except ValueError: 1) if curEnum - prevEnumI != 1: break prevEnum = enumerator @@ -2053,6 +3096,57 @@ proc parseEnumList(p: var RstParser): PRstNode = else: break +proc prefix(ftnType: FootnoteType): string = + case ftnType + of fnManualNumber: result = "footnote-" + of fnAutoNumber: result = "footnoteauto-" + of fnAutoNumberLabel: result = "footnote-" + of fnAutoSymbol: result = "footnotesym-" + of fnCitation: result = "citation-" + +proc parseFootnote(p: var RstParser): PRstNode {.gcsafe.} = + ## Parses footnotes and citations, always returns 2 sons: + ## + ## 1) footnote label, always containing rnInner with 1 or more sons + ## 2) footnote body, which may be nil + var label: PRstNode + if isRst(p): + inc p.idx # skip space after `..` + label = parseFootnoteName(p, reference=false) + if label == nil: + if isRst(p): + dec p.idx + return nil + result = newRstNode(rnFootnote) + result.add label + let (fnType, i) = getFootnoteType(p.s, label) + var name = "" + var anchor = fnType.prefix + case fnType + of fnManualNumber: + addFootnoteNumManual(p, i) + anchor.add $i + of fnAutoNumber, fnAutoNumberLabel: + name = rstnodeToRefname(label) + addFootnoteNumAuto(p, name) + if fnType == fnAutoNumberLabel: + anchor.add name + else: # fnAutoNumber + result.order = p.s.lineFootnoteNum.len + anchor.add $result.order + of fnAutoSymbol: + addFootnoteSymAuto(p) + result.order = p.s.lineFootnoteSym.len + anchor.add $p.s.lineFootnoteSym.len + of fnCitation: + anchor.add rstnodeToRefname(label) + addAnchorRst(p, anchor, target = result, anchorType = footnoteAnchor) + result.anchor = anchor + if currentTok(p).kind == tkWhite: inc p.idx + discard parseBlockContent(p, result, parseSectionWrapper) + if result.len < 2: + result.add nil + proc sonKind(father: PRstNode, i: int): RstNodeKind = result = rnLeaf if i < father.len: result = father.sons[i].kind @@ -2066,11 +3160,19 @@ proc parseSection(p: var RstParser, result: PRstNode) = if currInd(p) == currentTok(p).ival: inc p.idx elif currentTok(p).ival > currInd(p): - pushInd(p, currentTok(p).ival) - var a = newRstNodeA(p, rnBlockQuote) - parseSection(p, a) - result.add(a) - popInd(p) + if roPreferMarkdown in p.s.options: # Markdown => normal paragraphs + if currentTok(p).ival - currInd(p) >= 4: + result.add parseLiteralBlock(p) + else: + pushInd(p, currentTok(p).ival) + parseSection(p, result) + popInd(p) + else: # RST mode => block quotes + pushInd(p, currentTok(p).ival) + var a = newRstNodeA(p, rnBlockQuote) + parseSection(p, a) + result.add(a) + popInd(p) else: while currentTok(p).kind != tkEof and nextTok(p).kind == tkIndent: inc p.idx # skip blank lines @@ -2085,11 +3187,14 @@ proc parseSection(p: var RstParser, result: PRstNode) = a = parseLiteralBlock(p) of rnBulletList: a = parseBulletList(p) of rnLineBlock: a = parseLineBlock(p) + of rnMarkdownBlockQuote: a = parseMarkdownBlockQuote(p) of rnDirective: a = parseDotDot(p) + of rnFootnote: a = parseFootnote(p) of rnEnumList: a = parseEnumList(p) of rnLeaf: rstMessage(p, meNewSectionExpected, "(syntax error)") of rnParagraph: discard of rnDefList: a = parseDefinitionList(p) + of rnMdDefList: a = parseMdDefinitionList(p) of rnFieldList: if p.idx > 0: dec p.idx a = parseFields(p) @@ -2110,15 +3215,6 @@ proc parseSection(p: var RstParser, result: PRstNode) = result.sons[0] = newRstNode(rnInner, result.sons[0].sons, anchor=result.sons[0].anchor) -proc parseSectionWrapper(p: var RstParser): PRstNode = - result = newRstNode(rnInner) - parseSection(p, result) - while result.kind == rnInner and result.len == 1: - result = result.sons[0] - -proc `$`(t: Token): string = - result = $t.kind & ' ' & t.symbol - proc parseDoc(p: var RstParser): PRstNode = result = parseSectionWrapper(p) if currentTok(p).kind != tkEof: @@ -2128,7 +3224,6 @@ type DirFlag = enum hasArg, hasOptions, argIsFile, argIsWord DirFlags = set[DirFlag] - SectionParser = proc (p: var RstParser): PRstNode {.nimcall.} proc parseDirective(p: var RstParser, k: RstNodeKind, flags: DirFlags): PRstNode = ## Parses arguments and options for a directive block. @@ -2141,6 +3236,7 @@ proc parseDirective(p: var RstParser, k: RstNodeKind, flags: DirFlags): PRstNode ## Both rnDirArg and rnFieldList children nodes might be nil, so you need to ## check them before accessing. result = newRstNodeA(p, k) + if k == rnCodeBlock: result.info = lineInfo(p) var args: PRstNode = nil var options: PRstNode = nil if hasArg in flags: @@ -2163,28 +3259,13 @@ proc parseDirective(p: var RstParser, k: RstNodeKind, flags: DirFlags): PRstNode parseLine(p, args) result.add(args) if hasOptions in flags: - if currentTok(p).kind == tkIndent and currentTok(p).ival >= 3 and + if currentTok(p).kind == tkIndent and currentTok(p).ival > currInd(p) and nextTok(p).symbol == ":": + pushInd(p, currentTok(p).ival) options = parseFields(p) + popInd(p) result.add(options) -proc indFollows(p: RstParser): bool = - result = currentTok(p).kind == tkIndent and currentTok(p).ival > currInd(p) - -proc parseBlockContent(p: var RstParser, father: var PRstNode, - contentParser: SectionParser): bool = - ## parse the final content part of explicit markup blocks (directives, - ## footnotes, etc). Returns true if succeeded. - if currentTok(p).kind != tkIndent or indFollows(p): - var nextIndent = p.tok[tokenAfterNewline(p)-1].ival - if nextIndent <= currInd(p): # parse only this line - nextIndent = currentTok(p).col - pushInd(p, nextIndent) - var content = contentParser(p) - popInd(p) - father.add content - result = true - proc parseDirective(p: var RstParser, k: RstNodeKind, flags: DirFlags, contentParser: SectionParser): PRstNode = ## A helper proc that does main work for specific directive procs. @@ -2265,15 +3346,16 @@ proc dirInclude(p: var RstParser): PRstNode = var q: RstParser initParser(q, p.s) - q.filename = path - q.col += getTokens( - inputString[startPosition..endPosition].strip(), - false, + let saveFileIdx = p.s.currFileIdx + setCurrFilename(p.s, path) + getTokens( + inputString[startPosition..endPosition], q.tok) # workaround a GCC bug; more like the interior pointer bug? #if find(q.tok[high(q.tok)].symbol, "\0\x01\x02") > 0: # InternalError("Too many binary zeros in include file") result = parseDoc(q) + p.s.currFileIdx = saveFileIdx proc dirCodeBlock(p: var RstParser, nimExtension = false): PRstNode = ## Parses a code block. @@ -2291,28 +3373,14 @@ proc dirCodeBlock(p: var RstParser, nimExtension = false): PRstNode = ## ## As an extension this proc will process the ``file`` extension field and if ## present will replace the code block with the contents of the referenced - ## file. + ## file. This behaviour is disabled in sandboxed mode and can be re-enabled + ## with the `roSandboxDisabled` flag. result = parseDirective(p, rnCodeBlock, {hasArg, hasOptions}, parseLiteralBlock) - var filename = strip(getFieldValue(result, "file")) - if filename != "": - var path = p.findRelativeFile(filename) - if path == "": rstMessage(p, meCannotOpenFile, filename) - var n = newRstNode(rnLiteralBlock) - n.add newLeaf(readFile(path)) - result.sons[2] = n + mayLoadFile(p, result) # Extend the field block if we are using our custom Nim extension. if nimExtension: - # Create a field block if the input block didn't have any. - if result.sons[1].isNil: result.sons[1] = newRstNode(rnFieldList) - assert result.sons[1].kind == rnFieldList - # Hook the extra field and specify the Nim language as value. - var extraNode = newRstNode(rnField) - extraNode.add(newRstNode(rnFieldName)) - extraNode.add(newRstNode(rnFieldBody)) - extraNode.sons[0].add newLeaf("default-language") - extraNode.sons[1].add newLeaf("Nim") - result.sons[1].add(extraNode) + defaultCodeLangNim(p, result) proc dirContainer(p: var RstParser): PRstNode = result = parseDirective(p, rnContainer, {hasArg}, parseSectionWrapper) @@ -2330,6 +3398,7 @@ proc dirTitle(p: var RstParser): PRstNode = proc dirContents(p: var RstParser): PRstNode = result = parseDirective(p, rnContents, {hasArg}, nil) + p.s.hasToc = true proc dirIndex(p: var RstParser): PRstNode = result = parseDirective(p, rnIndex, {}, parseSectionWrapper) @@ -2340,6 +3409,18 @@ proc dirAdmonition(p: var RstParser, d: string): PRstNode = proc dirDefaultRole(p: var RstParser): PRstNode = result = parseDirective(p, rnDefaultRole, {hasArg}, nil) + if result.sons[0].len == 0: p.s.currRole = defaultRole(p.s.options) + else: + assert result.sons[0].sons[0].kind == rnLeaf + p.s.currRole = result.sons[0].sons[0].text + p.s.currRoleKind = whichRole(p, p.s.currRole) + +proc dirRole(p: var RstParser): PRstNode = + result = parseDirective(p, rnDirective, {hasArg, hasOptions}, nil) + # just check that language is supported, TODO: real role association + let lang = getFieldValue(result, "language").strip + if lang != "" and getSourceLanguage(lang) == langNone: + rstMessage(p, mwUnsupportedLanguage, lang) proc dirRawAux(p: var RstParser, result: var PRstNode, kind: RstNodeKind, contentParser: SectionParser) = @@ -2376,19 +3457,36 @@ proc dirRaw(p: var RstParser): PRstNode = else: dirRawAux(p, result, rnRaw, parseSectionWrapper) +proc dirImportdoc(p: var RstParser): PRstNode = + result = parseDirective(p, rnDirective, {}, parseLiteralBlock) + assert result.sons[2].kind == rnLiteralBlock + assert result.sons[2].sons[0].kind == rnLeaf + let filenames: seq[string] = split(result.sons[2].sons[0].text, seps = {','}) + proc rmSpaces(s: string): string = s.split.join("") + for origFilename in filenames: + p.s.idxImports[origFilename.rmSpaces] = ImportdocInfo(fromInfo: lineInfo(p)) + proc selectDir(p: var RstParser, d: string): PRstNode = result = nil + let tok = p.tok[p.idx-2] # report on directive in ".. directive::" + if roSandboxDisabled notin p.s.options: + if d notin SandboxDirAllowlist: + rstMessage(p, meSandboxedDirective, d, tok.line, tok.col) + case d of "admonition", "attention", "caution": result = dirAdmonition(p, d) of "code": result = dirCodeBlock(p) of "code-block": result = dirCodeBlock(p, nimExtension = true) of "container": result = dirContainer(p) of "contents": result = dirContents(p) - of "danger", "error": result = dirAdmonition(p, d) + of "danger": result = dirAdmonition(p, d) + of "default-role": result = dirDefaultRole(p) + of "error": result = dirAdmonition(p, d) of "figure": result = dirFigure(p) of "hint": result = dirAdmonition(p, d) of "image": result = dirImage(p) of "important": result = dirAdmonition(p, d) + of "importdoc": result = dirImportdoc(p) of "include": result = dirInclude(p) of "index": result = dirIndex(p) of "note": result = dirAdmonition(p, d) @@ -2397,62 +3495,13 @@ proc selectDir(p: var RstParser, d: string): PRstNode = result = dirRaw(p) else: rstMessage(p, meInvalidDirective, d) + of "role": result = dirRole(p) of "tip": result = dirAdmonition(p, d) of "title": result = dirTitle(p) of "warning": result = dirAdmonition(p, d) - of "default-role": result = dirDefaultRole(p) else: - let tok = p.tok[p.idx-2] # report on directive in ".. directive::" rstMessage(p, meInvalidDirective, d, tok.line, tok.col) -proc prefix(ftnType: FootnoteType): string = - case ftnType - of fnManualNumber: result = "footnote-" - of fnAutoNumber: result = "footnoteauto-" - of fnAutoNumberLabel: result = "footnote-" - of fnAutoSymbol: result = "footnotesym-" - of fnCitation: result = "citation-" - -proc parseFootnote(p: var RstParser): PRstNode = - ## Parses footnotes and citations, always returns 2 sons: - ## - ## 1) footnote label, always containing rnInner with 1 or more sons - ## 2) footnote body, which may be nil - inc p.idx - let label = parseFootnoteName(p, reference=false) - if label == nil: - dec p.idx - return nil - result = newRstNode(rnFootnote) - result.add label - let (fnType, i) = getFootnoteType(label) - var name = "" - var anchor = fnType.prefix - case fnType - of fnManualNumber: - addFootnoteNumManual(p, i) - anchor.add $i - of fnAutoNumber, fnAutoNumberLabel: - name = rstnodeToRefname(label) - addFootnoteNumAuto(p, name) - if fnType == fnAutoNumberLabel: - anchor.add name - else: # fnAutoNumber - result.order = p.s.lineFootnoteNum.len - anchor.add $result.order - of fnAutoSymbol: - addFootnoteSymAuto(p) - result.order = p.s.lineFootnoteSym.len - anchor.add $p.s.lineFootnoteSym.len - of fnCitation: - anchor.add rstnodeToRefname(label) - addAnchor(p, anchor, reset=true) - result.anchor = anchor - if currentTok(p).kind == tkWhite: inc p.idx - discard parseBlockContent(p, result, parseSectionWrapper) - if result.len < 2: - result.add nil - proc parseDotDot(p: var RstParser): PRstNode = # parse "explicit markup blocks" result = nil @@ -2467,13 +3516,24 @@ proc parseDotDot(p: var RstParser): PRstNode = elif match(p, p.idx, " _"): # hyperlink target: inc p.idx, 2 - var a = getReferenceName(p, ":") + var ending = ":" + if currentTok(p).symbol == "`": + inc p.idx + ending = "`" + var a = getReferenceName(p, ending) + if ending == "`": + if currentTok(p).symbol == ":": + inc p.idx + else: + rstMessage(p, meExpected, ":") if currentTok(p).kind == tkWhite: inc p.idx var b = untilEol(p) if len(b) == 0: # set internal anchor - addAnchor(p, rstnodeToRefname(a), reset=false) + p.curAnchors.add ManualAnchor( + alias: linkName(a), anchor: rstnodeToRefname(a), info: prevLineInfo(p) + ) else: # external hyperlink - setRef(p, rstnodeToRefname(a), b) + setRef(p, rstnodeToRefname(a), b, refType=hyperlinkAlias) elif match(p, p.idx, " |"): # substitution definitions: inc p.idx, 2 @@ -2494,9 +3554,218 @@ proc parseDotDot(p: var RstParser): PRstNode = (n = parseFootnote(p); n != nil): result = n else: - result = parseComment(p) + result = parseComment(p, col) + +proc rstParsePass1*(fragment: string, + line, column: int, + sharedState: PRstSharedState): PRstNode = + ## Parses an RST `fragment`. + ## The result should be further processed by + ## preparePass2_ and resolveSubs_ (which is pass 2). + var p: RstParser + initParser(p, sharedState) + p.line = line + p.col = column + getTokens(fragment, p.tok) + result = parseDoc(p) + +proc extractLinkEnd(x: string): string = + ## From links like `path/to/file.html#/%` extract `file.html#/%`. + let i = find(x, '#') + let last = + if i >= 0: i + else: x.len - 1 + let j = rfind(x, '/', start=0, last=last) + if j >= 0: + result = x[j+1 .. ^1] + else: + result = x + +proc loadIdxFile(s: var PRstSharedState, origFilename: string) = + doAssert roSandboxDisabled in s.options + var info: TLineInfo + info.fileIndex = addFilename(s, origFilename) + var (dir, basename, ext) = origFilename.splitFile + if ext notin [".md", ".rst", ".nim", ""]: + rstMessage(s.filenames, s.msgHandler, s.idxImports[origFilename].fromInfo, + meCannotOpenFile, origFilename & ": unknown extension") + let idxFilename = dir / basename & ".idx" + let (idxPath, linkRelPath) = s.findRefFile(idxFilename) + s.idxImports[origFilename].linkRelPath = linkRelPath + var + fileEntries: seq[IndexEntry] + title: IndexEntry + try: + (fileEntries, title) = parseIdxFile(idxPath) + except IOError: + rstMessage(s.filenames, s.msgHandler, s.idxImports[origFilename].fromInfo, + meCannotOpenFile, idxPath) + except ValueError as e: + s.msgHandler(idxPath, LineRstInit, ColRstInit, meInvalidField, e.msg) + + var isMarkup = false # for sanity check to avoid mixing .md <-> .nim + for entry in fileEntries: + # Though target .idx already has inside it the path to HTML relative + # project's root, we won't rely on it and use `linkRelPath` instead. + let refn = extractLinkEnd(entry.link) + # select either markup (rst/md) or Nim cases: + if entry.kind in {ieMarkupTitle, ieNimTitle}: + s.idxImports[origFilename].title = entry.keyword + case entry.kind + of ieIdxRole, ieHeading, ieMarkupTitle: + if ext == ".nim" and entry.kind == ieMarkupTitle: + rstMessage(s, idxPath, meInvalidField, + $ieMarkupTitle & " in supposedly .nim-derived file") + if entry.kind == ieMarkupTitle: + isMarkup = true + info.line = entry.line.uint16 + addAnchorExtRst(s, key = entry.keyword, refn = refn, + anchorType = headlineAnchor, info=info) + of ieNim, ieNimGroup, ieNimTitle: + if ext in [".md", ".rst"] or isMarkup: + rstMessage(s, idxPath, meInvalidField, + $entry.kind & " in supposedly markup-derived file") + s.nimFileImported = true + var langSym: LangSymbol + if entry.kind in {ieNim, ieNimTitle}: + var q: RstParser + initParser(q, s) + info.line = entry.line.uint16 + setLen(q.tok, 0) + q.idx = 0 + getTokens(entry.linkTitle, q.tok) + var sons = newSeq[PRstNode](q.tok.len) + for i in 0 ..< q.tok.len: sons[i] = newLeaf(q.tok[i].symbol) + let linkTitle = newRstNode(rnInner, sons) + langSym = linkTitle.toLangSymbol + else: # entry.kind == ieNimGroup + langSym = langSymbolGroup(kind=entry.linkTitle, name=entry.keyword) + addAnchorNim(s, external = true, refn = refn, tooltip = entry.linkDesc, + langSym = langSym, priority = -4, # lowest + info = info, module = info.fileIndex) + doAssert s.idxImports[origFilename].title != "" + +proc preparePass2*(s: var PRstSharedState, mainNode: PRstNode, importdoc = true) = + ## Records titles in node `mainNode` and orders footnotes. + countTitles(s, mainNode) + fixHeadlines(s) + orderFootnotes(s) + if importdoc: + for origFilename in s.idxImports.keys: + loadIdxFile(s, origFilename) + +proc resolveLink(s: PRstSharedState, n: PRstNode) : PRstNode = + # Associate this link alias with its target and change node kind to + # rnHyperlink or rnInternalRef appropriately. + var desc, alias: PRstNode + if n.kind == rnPandocRef: # link like [desc][alias] + desc = n.sons[0] + alias = n.sons[1] + else: # n.kind == rnRstRef, link like `desc=alias`_ + desc = n + alias = n + type LinkDef = object + ar: AnchorRule + priority: int + tooltip: string + target: PRstNode + info: TLineInfo + externFilename: string + # when external anchor: origin filename where anchor was defined + isTitle: bool + proc cmp(x, y: LinkDef): int = + result = cmp(x.priority, y.priority) + if result == 0: + result = cmp(x.target, y.target) + var foundLinks: seq[LinkDef] + let refn = rstnodeToRefname(alias) + var hyperlinks = findRef(s, refn) + for y in hyperlinks: + foundLinks.add LinkDef(ar: arHyperlink, priority: refPriority(y.kind), + target: y.value, info: y.info, + tooltip: "(" & $y.kind & ")") + let substRst = findMainAnchorRst(s, alias.addNodes, n.info) + template getExternFilename(subst: AnchorSubst): string = + if subst.kind == arExternalRst or + (subst.kind == arNim and subst.external): + getFilename(s, subst) + else: "" + for subst in substRst: + var refname, fullRefname: string + if subst.kind == arInternalRst: + refname = subst.target.anchor + fullRefname = refname + else: # arExternalRst + refname = subst.refnameExt + fullRefname = s.idxImports[getFilename(s, subst)].linkRelPath & + "/" & refname + let anchorType = + if subst.kind == arInternalRst: subst.anchorType + else: subst.anchorTypeExt # arExternalRst + foundLinks.add LinkDef(ar: subst.kind, priority: subst.priority, + target: newLeaf(fullRefname), + info: subst.info, + externFilename: getExternFilename(subst), + isTitle: isDocumentationTitle(refname), + tooltip: "(" & $anchorType & ")") + # find anchors automatically generated from Nim symbols + if roNimFile in s.options or s.nimFileImported: + let substNim = findMainAnchorNim(s, signature=alias, n.info) + for subst in substNim: + let fullRefname = + if subst.external: + s.idxImports[getFilename(s, subst)].linkRelPath & + "/" & subst.refname + else: subst.refname + foundLinks.add LinkDef(ar: subst.kind, priority: subst.priority, + target: newLeaf(fullRefname), + externFilename: getExternFilename(subst), + isTitle: isDocumentationTitle(subst.refname), + info: subst.info, tooltip: subst.tooltip) + foundLinks.sort(cmp = cmp, order = Descending) + let aliasStr = addNodes(alias) + if foundLinks.len >= 1: + if foundLinks[0].externFilename != "": + s.idxImports[foundLinks[0].externFilename].used = true + let kind = if foundLinks[0].ar in {arHyperlink, arExternalRst}: rnHyperlink + elif foundLinks[0].ar == arNim: + if foundLinks[0].externFilename == "": rnNimdocRef + else: rnHyperlink + else: rnInternalRef + result = newRstNode(kind) + let documentName = # filename without ext for `.nim`, title for `.md` + if foundLinks[0].ar == arNim: + changeFileExt(foundLinks[0].externFilename.extractFilename, "") + elif foundLinks[0].externFilename != "": + s.idxImports[foundLinks[0].externFilename].title + else: foundLinks[0].externFilename.extractFilename + let linkText = + if foundLinks[0].externFilename != "": + if foundLinks[0].isTitle: newLeaf(addNodes(desc)) + else: newLeaf(documentName & ": " & addNodes(desc)) + else: + newRstNode(rnInner, desc.sons) + result.sons = @[linkText, foundLinks[0].target] + if kind == rnNimdocRef: result.tooltip = foundLinks[0].tooltip + if foundLinks.len > 1: # report ambiguous link + var targets = newSeq[string]() + for l in foundLinks: + var t = " " + if s.filenames.len > 1: + t.add getFilename(s.filenames, l.info.fileIndex) + let n = l.info.line + let c = l.info.col + ColRstOffset + t.add "($1, $2): $3" % [$n, $c, l.tooltip] + targets.add t + rstMessage(s.filenames, s.msgHandler, n.info, mwAmbiguousLink, + "`$1`\n clash:\n$2" % [ + aliasStr, targets.join("\n")]) + else: # nothing found + result = n + rstMessage(s.filenames, s.msgHandler, n.info, mwBrokenLink, aliasStr) -proc resolveSubs(p: var RstParser, n: PRstNode): PRstNode = +proc resolveSubs*(s: PRstSharedState, n: PRstNode): PRstNode = + ## Makes pass 2 of RST parsing. ## Resolves substitutions and anchor aliases, groups footnotes. ## Takes input node `n` and returns the same node with recursive ## substitutions in `n.sons` to `result`. @@ -2504,94 +3773,75 @@ proc resolveSubs(p: var RstParser, n: PRstNode): PRstNode = if n == nil: return case n.kind of rnSubstitutionReferences: - var x = findSub(p, n) + var x = findSub(s, n) if x >= 0: - result = p.s.subs[x].value + result = s.subs[x].value else: var key = addNodes(n) var e = getEnv(key) if e != "": result = newLeaf(e) - else: rstMessage(p, mwUnknownSubstitution, key) - of rnHeadline, rnOverline: - # fix up section levels depending on presence of a title and subtitle - if p.s.hTitleCnt == 2: - if n.level == 1: # it's the subtitle - n.level = 0 - elif n.level >= 2: # normal sections - n.level -= 1 - elif p.s.hTitleCnt == 0: - n.level += 1 - of rnRef: - let refn = rstnodeToRefname(n) - var y = findRef(p, refn) - if y != nil: - result = newRstNode(rnHyperlink) - let text = newRstNode(rnInner, n.sons) - result.sons = @[text, y] - else: - let s = findMainAnchor(p, refn) - if s != "": - result = newRstNode(rnInternalRef) - let text = newRstNode(rnInner, n.sons) - result.sons = @[text, # visible text of reference - newLeaf(s)] # link itself + else: rstMessage(s.filenames, s.msgHandler, n.info, + mwUnknownSubstitution, key) + of rnRstRef, rnPandocRef: + result = resolveLink(s, n) of rnFootnote: - var (fnType, num) = getFootnoteType(n.sons[0]) + var (fnType, num) = getFootnoteType(s, n.sons[0]) case fnType of fnManualNumber, fnCitation: discard "no need to alter fixed text" of fnAutoNumberLabel, fnAutoNumber: if fnType == fnAutoNumberLabel: let labelR = rstnodeToRefname(n.sons[0]) - num = getFootnoteNum(p, labelR) + num = getFootnoteNum(s, labelR) else: - num = getFootnoteNum(p, n.order) + num = getFootnoteNum(s, n.order) var nn = newRstNode(rnInner) nn.add newLeaf($num) result.sons[0] = nn of fnAutoSymbol: - let sym = getAutoSymbol(p, n.order) + let sym = getAutoSymbol(s, n.order) n.sons[0].sons[0].text = sym - n.sons[1] = resolveSubs(p, n.sons[1]) + n.sons[1] = resolveSubs(s, n.sons[1]) of rnFootnoteRef: - var (fnType, num) = getFootnoteType(n.sons[0]) + var (fnType, num) = getFootnoteType(s, n.sons[0]) template addLabel(number: int | string) = var nn = newRstNode(rnInner) nn.add newLeaf($number) result.add(nn) var refn = fnType.prefix # create new rnFootnoteRef, add final label, and finalize target refn: - result = newRstNode(rnFootnoteRef) + result = newRstNode(rnFootnoteRef, info = n.info) case fnType of fnManualNumber: addLabel num refn.add $num of fnAutoNumber: - addLabel getFootnoteNum(p, n.order) - refn.add $n.order + inc s.currFootnoteNumRef + addLabel getFootnoteNum(s, s.currFootnoteNumRef) + refn.add $s.currFootnoteNumRef of fnAutoNumberLabel: - addLabel getFootnoteNum(p, rstnodeToRefname(n)) + addLabel getFootnoteNum(s, rstnodeToRefname(n)) refn.add rstnodeToRefname(n) of fnAutoSymbol: - addLabel getAutoSymbol(p, n.order) - refn.add $n.order + inc s.currFootnoteSymRef + addLabel getAutoSymbol(s, s.currFootnoteSymRef) + refn.add $s.currFootnoteSymRef of fnCitation: result.add n.sons[0] refn.add rstnodeToRefname(n) - let s = findMainAnchor(p, refn) - if s != "": - result.add newLeaf(s) # add link + # TODO: correctly report ambiguities + let anchorInfo = findMainAnchorRst(s, refn, n.info) + if anchorInfo.len != 0: + result.add newLeaf(anchorInfo[0].target.anchor) # add link else: - rstMessage(p, mwUnknownSubstitution, refn) + rstMessage(s.filenames, s.msgHandler, n.info, mwBrokenLink, refn) result.add newLeaf(refn) # add link of rnLeaf: discard - of rnContents: - p.hasToc = true else: var regroup = false for i in 0 ..< n.len: - n.sons[i] = resolveSubs(p, n.sons[i]) + n.sons[i] = resolveSubs(s, n.sons[i]) if n.sons[i] != nil and n.sons[i].kind == rnFootnote: regroup = true if regroup: # group footnotes together into rnFootnoteGroup @@ -2609,18 +3859,28 @@ proc resolveSubs(p: var RstParser, n: PRstNode): PRstNode = inc i result.sons = newSons +proc completePass2*(s: PRstSharedState) = + for (filename, importdocInfo) in s.idxImports.pairs: + if not importdocInfo.used: + rstMessage(s.filenames, s.msgHandler, importdocInfo.fromInfo, + mwUnusedImportdoc, filename) + proc rstParse*(text, filename: string, - line, column: int, hasToc: var bool, + line, column: int, options: RstParseOptions, findFile: FindFileHandler = nil, - msgHandler: MsgHandler = nil): PRstNode = - var p: RstParser - initParser(p, newSharedState(options, findFile, msgHandler)) - p.filename = filename - p.line = line - p.col = column + getTokens(text, roSkipPounds in options, p.tok) - let unresolved = parseDoc(p) - countTitles(p, unresolved) - orderFootnotes(p) - result = resolveSubs(p, unresolved) - hasToc = p.hasToc + findRefFile: FindRefFileHandler = nil, + msgHandler: MsgHandler = nil): + tuple[node: PRstNode, filenames: RstFileTable, hasToc: bool] = + ## Parses the whole `text`. The result is ready for `rstgen.renderRstToOut`, + ## note that 2nd tuple element should be fed to `initRstGenerator` + ## argument `filenames` (it is being filled here at least with `filename` + ## and possibly with other files from RST ``.. include::`` statement). + var sharedState = newRstSharedState(options, filename, findFile, findRefFile, + msgHandler, hasToc=false) + let unresolved = rstParsePass1(text, line, column, sharedState) + preparePass2(sharedState, unresolved) + result.node = resolveSubs(sharedState, unresolved) + completePass2(sharedState) + result.filenames = sharedState.filenames + result.hasToc = sharedState.hasToc diff --git a/lib/packages/docutils/rstast.nim b/lib/packages/docutils/rstast.nim index c68df7daa..2bbb0d0b8 100644 --- a/lib/packages/docutils/rstast.nim +++ b/lib/packages/docutils/rstast.nim @@ -8,10 +8,12 @@ # ## This module implements an AST for the `reStructuredText`:idx: parser. -## -## **Note:** Import ``packages/docutils/rstast`` to use this module -import strutils, json +import std/[strutils, json] + +when defined(nimPreviewSlimSystem): + import std/assertions + type RstNodeKind* = enum ## the possible node kinds of an PRstNode @@ -25,7 +27,7 @@ type rnBulletItem, # a bullet item rnEnumList, # an enumerated list rnEnumItem, # an enumerated item - rnDefList, # a definition list + rnDefList, rnMdDefList, # a definition list (RST/Markdown) rnDefItem, # an item of a definition list consisting of ... rnDefName, # ... a name part ... rnDefBody, # ... and a body part ... @@ -34,7 +36,10 @@ type rnFieldName, # consisting of a field name ... rnFieldBody, # ... and a field body rnOptionList, rnOptionListItem, rnOptionGroup, rnOption, rnOptionString, - rnOptionArgument, rnDescription, rnLiteralBlock, rnQuotedLiteralBlock, + rnOptionArgument, rnDescription, rnLiteralBlock, + rnMarkdownBlockQuote, # a quote starting from punctuation like >>> + rnMarkdownBlockQuoteItem, # a quotation block, quote lines starting with + # the same number of chars rnLineBlock, # the | thingie rnLineBlockItem, # a son of rnLineBlock - one line inside it. # When `RstNode` lineIndent="\n" the line's empty @@ -44,7 +49,11 @@ type rnCitation, # similar to footnote, so use rnFootnote instead rnFootnoteGroup, # footnote group - exists for a purely stylistic # reason: to display a few footnotes as 1 block - rnStandaloneHyperlink, rnHyperlink, rnRef, rnInternalRef, rnFootnoteRef, + rnStandaloneHyperlink, rnHyperlink, + rnRstRef, # RST reference like `section name`_ + rnPandocRef, # Pandoc Markdown reference like [section name] + rnInternalRef, rnFootnoteRef, + rnNimdocRef, # reference to automatically generated Nim symbol rnDirective, # a general directive rnDirArg, # a directive argument (for some directives). # here are directives that are not rnDirective: @@ -57,12 +66,17 @@ type # * `file#id <file#id>`_ # * `file#id <file#id>'_ rnSubstitutionDef, # a definition of a substitution - rnGeneralRole, # Inline markup: + # Inline markup: + rnInlineCode, # interpreted text with code in a known language + rnCodeFragment, # inline code for highlighting with the specified + # class (which cannot be inferred from context) + rnUnknownRole, # interpreted text with an unknown role rnSub, rnSup, rnIdx, rnEmphasis, # "*" rnStrongEmphasis, # "**" rnTripleEmphasis, # "***" - rnInterpretedText, # "`" + rnInterpretedText, # "`" an auxiliary role for parsing that will + # be converted into other kinds like rnInlineCode rnInlineLiteral, # "``" rnInlineTarget, # "_`target`" rnSubstitutionReferences, # "|" @@ -71,6 +85,11 @@ type rnLeaf # a leaf; the node's text field contains the # leaf val + FileIndex* = distinct int32 + TLineInfo* = object + line*: uint16 + col*: int16 + fileIndex*: FileIndex PRstNode* = ref RstNode ## an RST node RstNodeSeq* = seq[PRstNode] @@ -89,21 +108,42 @@ type level*: int ## level of headings starting from 1 (main ## chapter) to larger ones (minor sub-sections) ## level=0 means it's document title or subtitle - of rnFootnote, rnCitation, rnFootnoteRef: + of rnFootnote, rnCitation, rnOptionListItem: order*: int ## footnote order (for auto-symbol footnotes and ## auto-numbered ones without a label) + of rnMarkdownBlockQuoteItem: + quotationDepth*: int ## number of characters in line prefix + of rnRstRef, rnPandocRef, rnSubstitutionReferences, + rnInterpretedText, rnField, rnInlineCode, rnCodeBlock, rnFootnoteRef: + info*: TLineInfo ## To have line/column info for warnings at + ## nodes that are post-processed after parsing + of rnNimdocRef: + tooltip*: string + of rnTable, rnGridTable, rnMarkdownTable: + colCount*: int ## Number of (not-united) cells in the table + of rnTableRow: + endsHeader*: bool ## Is last row in the header of table? + of rnTableHeaderCell, rnTableDataCell: + span*: int ## Number of table columns that the cell occupies else: discard anchor*: string ## anchor, internal link target ## (aka HTML id tag, aka Latex label/hypertarget) sons*: RstNodeSeq ## the node's sons +proc `==`*(a, b: FileIndex): bool {.borrow.} + proc len*(n: PRstNode): int = result = len(n.sons) proc newRstNode*(kind: RstNodeKind, sons: seq[PRstNode] = @[], anchor = ""): PRstNode = + result = PRstNode(kind: kind, sons: sons, anchor: anchor) + +proc newRstNode*(kind: RstNodeKind, info: TLineInfo, + sons: seq[PRstNode] = @[]): PRstNode = result = PRstNode(kind: kind, sons: sons) + result.info = info proc newRstNode*(kind: RstNodeKind, s: string): PRstNode {.deprecated.} = assert kind in {rnLeaf, rnSmiley} @@ -244,7 +284,7 @@ proc renderRstToRst(d: var RenderContext, n: PRstNode, result: var string) = inc(d.indent, 2) renderRstSons(d, n, result) dec(d.indent, 2) - of rnRef: + of rnRstRef: result.add("`") renderRstSons(d, n, result) result.add("`_") @@ -254,7 +294,7 @@ proc renderRstToRst(d: var RenderContext, n: PRstNode, result: var string) = result.add(" <") renderRstToRst(d, n.sons[1], result) result.add(">`_") - of rnGeneralRole: + of rnUnknownRole: result.add('`') renderRstToRst(d, n.sons[0],result) result.add("`:") @@ -337,19 +377,35 @@ proc renderRstToJsonNode(node: PRstNode): JsonNode = proc renderRstToJson*(node: PRstNode): string = ## Writes the given RST node as JSON that is in the form - ## :: - ## { - ## "kind":string node.kind, - ## "text":optional string node.text, - ## "level":optional int node.level, - ## "sons":optional node array - ## } + ## + ## { + ## "kind":string node.kind, + ## "text":optional string node.text, + ## "level":optional int node.level, + ## "sons":optional node array + ## } renderRstToJsonNode(node).pretty -proc renderRstToStr*(node: PRstNode, indent=0): string = - ## Writes the parsed RST `node` into a compact string +proc renderRstToText*(node: PRstNode): string = + ## minimal text representation of markup node + const code = {rnCodeFragment, rnInterpretedText, rnInlineLiteral, rnInlineCode} + if node == nil: + return "" + case node.kind + of rnLeaf, rnSmiley: + result.add node.text + else: + if node.kind in code: result.add "`" + for i in 0 ..< node.sons.len: + if node.kind in {rnInlineCode, rnCodeBlock} and i == 0: + continue # omit language specifier + result.add renderRstToText(node.sons[i]) + if node.kind in code: result.add "`" + +proc treeRepr*(node: PRstNode, indent=0): string = + ## Writes the parsed RST `node` into an AST tree with compact string ## representation in the format (one line per every sub-node): - ## ``indent - kind - text - level - order - anchor (if non-zero)`` + ## ``indent - kind - [text|level|order|adType] - anchor (if non-zero)`` ## (suitable for debugging of RST parsing). if node == nil: result.add " ".repeat(indent) & "[nil]\n" @@ -357,21 +413,32 @@ proc renderRstToStr*(node: PRstNode, indent=0): string = result.add " ".repeat(indent) & $node.kind case node.kind of rnLeaf, rnSmiley: - result.add (if node.text == "": "" else: "\t'" & node.text & "'") + result.add (if node.text == "": "" else: " '" & node.text & "'") of rnEnumList: - result.add "\tlabelFmt=" & node.labelFmt + result.add " labelFmt=" & node.labelFmt of rnLineBlockItem: var txt: string - if node.lineIndent == "\n": txt = "\t(blank line)" - else: txt = "\tlineIndent=" & $node.lineIndent.len + if node.lineIndent == "\n": txt = " (blank line)" + else: txt = " lineIndent=" & $node.lineIndent.len result.add txt + of rnAdmonition: + result.add " adType=" & node.adType of rnHeadline, rnOverline, rnMarkdownHeadline: - result.add "\tlevel=" & $node.level - of rnFootnote, rnCitation, rnFootnoteRef: - result.add (if node.order == 0: "" else: "\torder=" & $node.order) + result.add " level=" & $node.level + of rnFootnote, rnCitation, rnOptionListItem: + result.add (if node.order == 0: "" else: " order=" & $node.order) + of rnMarkdownBlockQuoteItem: + result.add " quotationDepth=" & $node.quotationDepth + of rnTable, rnGridTable, rnMarkdownTable: + result.add " colCount=" & $node.colCount + of rnTableHeaderCell, rnTableDataCell: + if node.span > 0: + result.add " span=" & $node.span + of rnTableRow: + if node.endsHeader: result.add " endsHeader" else: discard - result.add (if node.anchor == "": "" else: "\tanchor='" & node.anchor & "'") + result.add (if node.anchor == "": "" else: " anchor='" & node.anchor & "'") result.add "\n" for son in node.sons: - result.add renderRstToStr(son, indent=indent+2) + result.add treeRepr(son, indent=indent+2) diff --git a/lib/packages/docutils/rstgen.nim b/lib/packages/docutils/rstgen.nim index c52a0fdcc..7fc0ac03a 100644 --- a/lib/packages/docutils/rstgen.nim +++ b/lib/packages/docutils/rstgen.nim @@ -37,12 +37,16 @@ ## ## * The same goes for footnotes/citations links: they point to themselves. ## No backreferences are generated since finding all references of a footnote -## can be done by simply searching for [footnoteName]. -## -## .. Tip: Import ``packages/docutils/rstgen`` to use this module +## can be done by simply searching for ``[footnoteName]``. + +import std/[strutils, os, hashes, strtabs, tables, sequtils, + algorithm, parseutils, strbasics] + +import rstast, rst, rstidx, highlite + +when defined(nimPreviewSlimSystem): + import std/[assertions, syncio, formatfloat] -import strutils, os, hashes, strtabs, rstast, rst, highlite, tables, sequtils, - algorithm, parseutils import ../../std/private/since @@ -55,26 +59,26 @@ type outHtml, # output is HTML outLatex # output is Latex - TocEntry = object - n*: PRstNode - refname*, header*: string - MetaEnum* = enum - metaNone, metaTitle, metaSubtitle, metaAuthor, metaVersion + metaNone, metaTitleRaw, metaTitle, metaSubtitle, metaAuthor, metaVersion + + EscapeMode* = enum # in Latex text inside options [] and URLs is + # escaped slightly differently than in normal text + emText, emOption, emUrl # emText is currently used for code also RstGenerator* = object of RootObj target*: OutputTarget config*: StringTableRef splitAfter*: int # split too long entries in the TOC listingCounter*: int - tocPart*: seq[TocEntry] + tocPart*: seq[PRstNode] # headings for Table of Contents hasToc*: bool theIndex: string # Contents of the index file to be dumped at the end. - options*: RstParseOptions findFile*: FindFileHandler msgHandler*: MsgHandler outDir*: string ## output directory, initialized by docgen.nim destFile*: string ## output (HTML) file, initialized by docgen.nim + filenames*: RstFileTable filename*: string ## source Nim or Rst file meta*: array[MetaEnum, string] currentSection: string ## \ @@ -85,7 +89,9 @@ type ## for hyperlinks. See renderIndexTerm proc for details. id*: int ## A counter useful for generating IDs. onTestSnippet*: proc (d: var RstGenerator; filename, cmd: string; status: int; - content: string) + content: string) {.gcsafe.} + escMode*: EscapeMode + curQuotationDepth: int PDoc = var RstGenerator ## Alias to type less. @@ -109,9 +115,10 @@ proc init(p: var CodeBlockParams) = proc initRstGenerator*(g: var RstGenerator, target: OutputTarget, config: StringTableRef, filename: string, - options: RstParseOptions, findFile: FindFileHandler = nil, - msgHandler: MsgHandler = nil) = + msgHandler: MsgHandler = nil, + filenames = default(RstFileTable), + hasToc = false) = ## Initializes a ``RstGenerator``. ## ## You need to call this before using a ``RstGenerator`` with any other @@ -147,22 +154,25 @@ proc initRstGenerator*(g: var RstGenerator, target: OutputTarget, ## ## Example: ## - ## .. code-block:: nim - ## + ## ```nim ## import packages/docutils/rstgen ## ## var gen: RstGenerator ## gen.initRstGenerator(outHtml, defaultConfig(), "filename", {}) + ## ``` g.config = config g.target = target g.tocPart = @[] + g.hasToc = hasToc g.filename = filename + g.filenames = filenames g.splitAfter = 20 g.theIndex = "" - g.options = options g.findFile = findFile g.currentSection = "" g.id = 0 + g.escMode = emText + g.curQuotationDepth = 0 let fileParts = filename.splitFile if fileParts.ext == ".nim": g.currentSection = "Module " & fileParts.name @@ -181,7 +191,9 @@ proc writeIndexFile*(g: var RstGenerator, outfile: string) = ## If the index is empty the file won't be created. if g.theIndex.len > 0: writeFile(outfile, g.theIndex) -proc addXmlChar(dest: var string, c: char) = +proc addHtmlChar(dest: var string, c: char) = + # Escapes HTML characters. Note that single quote ' is not escaped as + # ' -- unlike XML (for standards pre HTML5 it was even forbidden). case c of '&': add(dest, "&") of '<': add(dest, "<") @@ -189,35 +201,36 @@ proc addXmlChar(dest: var string, c: char) = of '\"': add(dest, """) else: add(dest, c) -proc addRtfChar(dest: var string, c: char) = +proc addTexChar(dest: var string, c: char, escMode: EscapeMode) = + ## Escapes 10 special Latex characters and sometimes ` and [, ]. + ## TODO: @ is always a normal symbol (besides the header), am I wrong? + ## All escapes that need to work in text and code blocks (`emText` mode) + ## should start from \ (to be compatible with fancyvrb/fvextra). case c - of '{': add(dest, "\\{") - of '}': add(dest, "\\}") - of '\\': add(dest, "\\\\") + of '_', '&', '#', '%': add(dest, "\\" & c) + # commands \label and \pageref don't accept \$ by some reason but OK with $: + of '$': (if escMode == emUrl: add(dest, c) else: add(dest, "\\" & c)) + # \~ and \^ have a special meaning unless they are followed by {} + of '~', '^': add(dest, "\\" & c & "{}") + # Latex loves to substitute ` to opening quote, even in texttt mode! + of '`': add(dest, "\\textasciigrave{}") + # add {} to avoid gobbling up space by \textbackslash + of '\\': add(dest, "\\textbackslash{}") + # Using { and } in URL in Latex: https://tex.stackexchange.com/a/469175 + of '{': + add(dest, if escMode == emUrl: "\\%7B" else: "\\{") + of '}': + add(dest, if escMode == emUrl: "\\%7D" else: "\\}") + of ']': + # escape ] inside an optional argument in e.g. \section[static[T]]{.. + add(dest, if escMode == emOption: "\\text{]}" else: "]") else: add(dest, c) -proc addTexChar(dest: var string, c: char) = - case c - of '_': add(dest, "\\_") - of '{': add(dest, "\\symbol{123}") - of '}': add(dest, "\\symbol{125}") - of '[': add(dest, "\\symbol{91}") - of ']': add(dest, "\\symbol{93}") - of '\\': add(dest, "\\symbol{92}") - of '$': add(dest, "\\$") - of '&': add(dest, "\\&") - of '#': add(dest, "\\#") - of '%': add(dest, "\\%") - of '~': add(dest, "\\symbol{126}") - of '@': add(dest, "\\symbol{64}") - of '^': add(dest, "\\symbol{94}") - of '`': add(dest, "\\symbol{96}") - else: add(dest, c) - -proc escChar*(target: OutputTarget, dest: var string, c: char) {.inline.} = +proc escChar*(target: OutputTarget, dest: var string, + c: char, escMode: EscapeMode) {.inline.} = case target - of outHtml: addXmlChar(dest, c) - of outLatex: addTexChar(dest, c) + of outHtml: addHtmlChar(dest, c) + of outLatex: addTexChar(dest, c, escMode) proc addSplitter(target: OutputTarget; dest: var string) {.inline.} = case target @@ -236,7 +249,7 @@ proc nextSplitPoint*(s: string, start: int): int = inc(result) dec(result) # last valid index -proc esc*(target: OutputTarget, s: string, splitAfter = -1): string = +proc esc*(target: OutputTarget, s: string, splitAfter = -1, escMode = emText): string = ## Escapes the HTML. result = "" if splitAfter >= 0: @@ -247,11 +260,11 @@ proc esc*(target: OutputTarget, s: string, splitAfter = -1): string = #if (splitter != " ") or (partLen + k - j + 1 > splitAfter): partLen = 0 addSplitter(target, result) - for i in countup(j, k): escChar(target, result, s[i]) + for i in countup(j, k): escChar(target, result, s[i], escMode) inc(partLen, k - j + 1) j = k + 1 else: - for i in countup(0, len(s) - 1): escChar(target, result, s[i]) + for i in countup(0, len(s) - 1): escChar(target, result, s[i], escMode) proc disp(target: OutputTarget, xml, tex: string): string = @@ -271,19 +284,18 @@ proc dispA(target: OutputTarget, dest: var string, proc `or`(x, y: string): string {.inline.} = result = if x.len == 0: y else: x -proc renderRstToOut*(d: var RstGenerator, n: PRstNode, result: var string) +proc renderRstToOut*(d: var RstGenerator, n: PRstNode, result: var string) {.gcsafe.} ## Writes into ``result`` the rst ast ``n`` using the ``d`` configuration. ## ## Before using this proc you need to initialise a ``RstGenerator`` with ## ``initRstGenerator`` and parse a rst file with ``rstParse`` from the ## `packages/docutils/rst module <rst.html>`_. Example: - ## - ## .. code-block:: nim - ## + ## ```nim ## # ...configure gen and rst vars... ## var generatedHtml = "" ## renderRstToOut(gen, rst, generatedHtml) ## echo generatedHtml + ## ``` proc renderAux(d: PDoc, n: PRstNode, result: var string) = for i in countup(0, len(n)-1): renderRstToOut(d, n.sons[i], result) @@ -310,31 +322,8 @@ proc renderAux(d: PDoc, n: PRstNode, html, tex: string, result: var string) = # ---------------- index handling -------------------------------------------- -proc quoteIndexColumn(text: string): string = - ## Returns a safe version of `text` for serialization to the ``.idx`` file. - ## - ## The returned version can be put without worries in a line based tab - ## separated column text file. The following character sequence replacements - ## will be performed for that goal: - ## - ## * ``"\\"`` => ``"\\\\"`` - ## * ``"\n"`` => ``"\\n"`` - ## * ``"\t"`` => ``"\\t"`` - result = newStringOfCap(text.len + 3) - for c in text: - case c - of '\\': result.add "\\" - of '\L': result.add "\\n" - of '\C': discard - of '\t': result.add "\\t" - else: result.add c - -proc unquoteIndexColumn(text: string): string = - ## Returns the unquoted version generated by ``quoteIndexColumn``. - result = text.multiReplace(("\\t", "\t"), ("\\n", "\n"), ("\\\\", "\\")) - -proc setIndexTerm*(d: var RstGenerator, htmlFile, id, term: string, - linkTitle, linkDesc = "") = +proc setIndexTerm*(d: var RstGenerator; k: IndexEntryKind, htmlFile, id, term: string, + linkTitle, linkDesc = "", line = 0) = ## Adds a `term` to the index using the specified hyperlink identifier. ## ## A new entry will be added to the index using the format @@ -357,21 +346,8 @@ proc setIndexTerm*(d: var RstGenerator, htmlFile, id, term: string, ## <#writeIndexFile,RstGenerator,string>`_. The purpose of the index is ## documented in the `docgen tools guide ## <docgen.html#related-options-index-switch>`_. - var - entry = term - isTitle = false - entry.add('\t') - entry.add(htmlFile) - if id.len > 0: - entry.add('#') - entry.add(id) - else: - isTitle = true - if linkTitle.len > 0 or linkDesc.len > 0: - entry.add('\t' & linkTitle.quoteIndexColumn) - entry.add('\t' & linkDesc.quoteIndexColumn) - entry.add("\n") - + let (entry, isTitle) = formatIndexEntry(k, htmlFile, id, term, + linkTitle, linkDesc, line) if isTitle: d.theIndex.insert(entry) else: d.theIndex.add(entry) @@ -384,6 +360,15 @@ proc hash(n: PRstNode): int = result = result !& hash(n.sons[i]) result = !$result +proc htmlFileRelPath(d: PDoc): string = + if d.outDir.len == 0: + # /foo/bar/zoo.nim -> zoo.html + changeFileExt(extractFilename(d.filename), HtmlExt) + else: # d is initialized in docgen.nim + # outDir = /foo -\ + # destFile = /foo/bar/zoo.html -|-> bar/zoo.html + d.destFile.relativePath(d.outDir, '/') + proc renderIndexTerm*(d: PDoc, n: PRstNode, result: var string) = ## Renders the string decorated within \`foobar\`\:idx\: markers. ## @@ -400,18 +385,13 @@ proc renderIndexTerm*(d: PDoc, n: PRstNode, result: var string) = var term = "" renderAux(d, n, term) - setIndexTerm(d, changeFileExt(extractFilename(d.filename), HtmlExt), id, term, d.currentSection) - dispA(d.target, result, "<span id=\"$1\">$2</span>", "$2\\label{$1}", + setIndexTerm(d, ieIdxRole, + htmlFileRelPath(d), id, term, d.currentSection) + dispA(d.target, result, "<span id=\"$1\">$2</span>", "\\nimindexterm{$1}{$2}", [id, term]) type - IndexEntry = object - keyword: string - link: string - linkTitle: string ## contains a prettier text for the href - linkDesc: string ## the title attribute of the final href - - IndexedDocs = Table[IndexEntry, seq[IndexEntry]] ## \ + IndexedDocs* = Table[IndexEntry, seq[IndexEntry]] ## \ ## Contains the index sequences for doc types. ## ## The key is a *fake* IndexEntry which will contain the title of the @@ -421,21 +401,6 @@ type ## The value indexed by this IndexEntry is a sequence with the real index ## entries found in the ``.idx`` file. -proc cmp(a, b: IndexEntry): int = - ## Sorts two ``IndexEntry`` first by `keyword` field, then by `link`. - result = cmpIgnoreStyle(a.keyword, b.keyword) - if result == 0: - result = cmpIgnoreStyle(a.link, b.link) - -proc hash(x: IndexEntry): Hash = - ## Returns the hash for the combined fields of the type. - ## - ## The hash is computed as the chained hash of the individual string hashes. - result = x.keyword.hash !& x.link.hash - result = result !& x.linkTitle.hash - result = result !& x.linkDesc.hash - result = !$result - when defined(gcDestructors): template `<-`(a, b: var IndexEntry) = a = move(b) else: @@ -444,6 +409,7 @@ else: shallowCopy a.link, b.link shallowCopy a.linkTitle, b.linkTitle shallowCopy a.linkDesc, b.linkDesc + shallowCopy a.module, b.module proc sortIndex(a: var openArray[IndexEntry]) = # we use shellsort here; fast and simple @@ -483,16 +449,20 @@ proc generateSymbolIndex(symbols: seq[IndexEntry]): string = result = "<dl>" var i = 0 while i < symbols.len: - let keyword = symbols[i].keyword + let keyword = esc(outHtml, symbols[i].keyword) let cleanedKeyword = keyword.escapeLink result.addf("<dt><a name=\"$2\" href=\"#$2\"><span>$1:</span></a></dt><dd><ul class=\"simple\">\n", [keyword, cleanedKeyword]) var j = i - while j < symbols.len and keyword == symbols[j].keyword: + while j < symbols.len and symbols[i].keyword == symbols[j].keyword: let url = symbols[j].link.escapeLink - text = if symbols[j].linkTitle.len > 0: symbols[j].linkTitle else: url - desc = if symbols[j].linkDesc.len > 0: symbols[j].linkDesc else: "" + module = symbols[j].module + text = + if symbols[j].linkTitle.len > 0: + esc(outHtml, module & ": " & symbols[j].linkTitle) + else: url + desc = symbols[j].linkDesc if desc.len > 0: result.addf("""<li><a class="reference external" title="$3" data-doc-search-tag="$2" href="$1">$2</a></li> @@ -506,13 +476,6 @@ proc generateSymbolIndex(symbols: seq[IndexEntry]): string = i = j result.add("</dl>") -proc isDocumentationTitle(hyperlink: string): bool = - ## Returns true if the hyperlink is actually a documentation title. - ## - ## Documentation titles lack the hash. See `mergeIndexes() - ## <#mergeIndexes,string>`_ for a more detailed explanation. - result = hyperlink.find('#') < 0 - proc stripTocLevel(s: string): tuple[level: int, text: string] = ## Returns the *level* of the toc along with the text without it. for c in 0 ..< s.len: @@ -546,17 +509,15 @@ proc generateDocumentationToc(entries: seq[IndexEntry]): string = level = 1 levels.newSeq(entries.len) for entry in entries: - let (rawLevel, rawText) = stripTocLevel(entry.linkTitle or entry.keyword) + let (rawLevel, rawText) = stripTocLevel(entry.linkTitle) if rawLevel < 1: # This is a normal symbol, push it *inside* one level from the last one. levels[L].level = level + 1 - # Also, ignore the linkTitle and use directly the keyword. - levels[L].text = entry.keyword else: # The level did change, update the level indicator. level = rawLevel levels[L].level = rawLevel - levels[L].text = rawText + levels[L].text = rawText inc L # Now generate hierarchical lists based on the precalculated levels. @@ -587,7 +548,7 @@ proc generateDocumentationIndex(docs: IndexedDocs): string = for title in titles: let tocList = generateDocumentationToc(docs.getOrDefault(title)) result.add("<ul><li><a href=\"" & - title.link & "\">" & title.keyword & "</a>\n" & tocList & "</li></ul>\n") + title.link & "\">" & title.linkTitle & "</a>\n" & tocList & "</li></ul>\n") proc generateDocumentationJumps(docs: IndexedDocs): string = ## Returns a plain list of hyperlinks to documentation TOCs in HTML. @@ -599,7 +560,7 @@ proc generateDocumentationJumps(docs: IndexedDocs): string = var chunks: seq[string] = @[] for title in titles: - chunks.add("<a href=\"" & title.link & "\">" & title.keyword & "</a>") + chunks.add("<a href=\"" & title.link & "\">" & title.linkTitle & "</a>") result.add(chunks.join(", ") & ".<br/>") @@ -613,7 +574,7 @@ proc generateModuleJumps(modules: seq[string]): string = result.add(chunks.join(", ") & ".<br/>") -proc readIndexDir(dir: string): +proc readIndexDir*(dir: string): tuple[modules: seq[string], symbols: seq[IndexEntry], docs: IndexedDocs] = ## Walks `dir` reading ``.idx`` files converting them in IndexEntry items. ## @@ -628,39 +589,12 @@ proc readIndexDir(dir: string): # Scan index files and build the list of symbols. for path in walkDirRec(dir): if path.endsWith(IndexExt): - var - fileEntries: seq[IndexEntry] - title: IndexEntry - f = 0 - newSeq(fileEntries, 500) - setLen(fileEntries, 0) - for line in lines(path): - let s = line.find('\t') - if s < 0: continue - setLen(fileEntries, f+1) - fileEntries[f].keyword = line.substr(0, s-1) - fileEntries[f].link = line.substr(s+1) - # See if we detect a title, a link without a `#foobar` trailing part. - if title.keyword.len == 0 and fileEntries[f].link.isDocumentationTitle: - title.keyword = fileEntries[f].keyword - title.link = fileEntries[f].link - - if fileEntries[f].link.find('\t') > 0: - let extraCols = fileEntries[f].link.split('\t') - fileEntries[f].link = extraCols[0] - assert extraCols.len == 3 - fileEntries[f].linkTitle = extraCols[1].unquoteIndexColumn - fileEntries[f].linkDesc = extraCols[2].unquoteIndexColumn - else: - fileEntries[f].linkTitle = "" - fileEntries[f].linkDesc = "" - inc f + var (fileEntries, title) = parseIdxFile(path) # Depending on type add this to the list of symbols or table of APIs. - if title.keyword.len == 0: - for i in 0 ..< f: - # Don't add to symbols TOC entries (they start with a whitespace). - let toc = fileEntries[i].linkTitle - if toc.len > 0 and toc[0] == ' ': + + if title.kind == ieNimTitle: + for i in 0 ..< fileEntries.len: + if fileEntries[i].kind != ieNim: continue # Ok, non TOC entry, add it. setLen(result.symbols, L + 1) @@ -670,16 +604,22 @@ proc readIndexDir(dir: string): var x = fileEntries[0].link let i = find(x, '#') if i > 0: - x = x.substr(0, i-1) + x.setLen(i) if i != 0: # don't add entries starting with '#' result.modules.add(x.changeFileExt("")) else: # Generate the symbolic anchor for index quickjumps. - title.linkTitle = "doc_toc_" & $result.docs.len + title.aux = "doc_toc_" & $result.docs.len result.docs[title] = fileEntries - sort(result.modules, system.cmp) + for i in 0 ..< fileEntries.len: + if fileEntries[i].kind != ieIdxRole: + continue + + setLen(result.symbols, L + 1) + result.symbols[L] = fileEntries[i] + inc L proc mergeIndexes*(dir: string): string = ## Merges all index files in `dir` and returns the generated index as HTML. @@ -710,6 +650,7 @@ proc mergeIndexes*(dir: string): string = ## Returns the merged and sorted indices into a single HTML block which can ## be further embedded into nimdoc templates. var (modules, symbols, docs) = readIndexDir(dir) + sort(modules, system.cmp) result = "" # Generate a quick jump list of documents. @@ -737,67 +678,32 @@ proc mergeIndexes*(dir: string): string = # ---------------------------------------------------------------------------- -proc stripTocHtml(s: string): string = - ## Ugly quick hack to remove HTML tags from TOC titles. - ## - ## A TocEntry.header field already contains rendered HTML tags. Instead of - ## implementing a proper version of renderRstToOut() which recursively - ## renders an rst tree to plain text, we simply remove text found between - ## angled brackets. Given the limited possibilities of rst inside TOC titles - ## this should be enough. - result = s - var first = result.find('<') - while first >= 0: - let last = result.find('>', first) - if last < 0: - # Abort, since we didn't found a closing angled bracket. - return - result.delete(first, last) - first = result.find('<', first) - proc renderHeadline(d: PDoc, n: PRstNode, result: var string) = var tmp = "" for i in countup(0, len(n) - 1): renderRstToOut(d, n.sons[i], tmp) d.currentSection = tmp - # Find the last higher level section for unique reference name - var sectionPrefix = "" - for i in countdown(d.tocPart.high, 0): - let n2 = d.tocPart[i].n - if n2.level < n.level: - sectionPrefix = rstnodeToRefname(n2) & "-" - break - var refname = sectionPrefix & rstnodeToRefname(n) + var tocName = esc(d.target, renderRstToText(n), escMode = emOption) + # for Latex: simple text without commands that may break TOC/hyperref if d.hasToc: - var length = len(d.tocPart) - setLen(d.tocPart, length + 1) - d.tocPart[length].refname = refname - d.tocPart[length].n = n - d.tocPart[length].header = tmp - + d.tocPart.add n dispA(d.target, result, "\n<h$1><a class=\"toc-backref\"" & - "$2 href=\"#$5\">$3</a></h$1>", "\\rsth$4{$3}$2\n", - [$n.level, refname.idS, tmp, $chr(n.level - 1 + ord('A')), refname]) + "$2 href=\"#$5\">$3</a></h$1>", "\\rsth$4[$6]{$3}$2\n", + [$n.level, n.anchor.idS, tmp, + $chr(n.level - 1 + ord('A')), n.anchor, tocName]) else: dispA(d.target, result, "\n<h$1$2>$3</h$1>", - "\\rsth$4{$3}$2\n", [ - $n.level, refname.idS, tmp, - $chr(n.level - 1 + ord('A'))]) + "\\rsth$4[$5]{$3}$2\n", [ + $n.level, n.anchor.idS, tmp, + $chr(n.level - 1 + ord('A')), tocName]) # Generate index entry using spaces to indicate TOC level for the output HTML. assert n.level >= 0 - let - htmlFileRelPath = if d.outDir.len == 0: - # /foo/bar/zoo.nim -> zoo.html - changeFileExt(extractFilename(d.filename), HtmlExt) - else: # d is initialized in docgen.nim - # outDir = /foo -\ - # destFile = /foo/bar/zoo.html -|-> bar/zoo.html - d.destFile.relativePath(d.outDir, '/') - setIndexTerm(d, htmlFileRelPath, refname, tmp.stripTocHtml, - spaces(max(0, n.level)) & tmp) + setIndexTerm(d, ieHeading, htmlFile = d.htmlFileRelPath, id = n.anchor, + term = n.addNodes, linkTitle = spaces(max(0, n.level)) & tmp) proc renderOverline(d: PDoc, n: PRstNode, result: var string) = if n.level == 0 and d.meta[metaTitle].len == 0: + d.meta[metaTitleRaw] = n.addNodes for i in countup(0, len(n)-1): renderRstToOut(d, n.sons[i], d.meta[metaTitle]) d.currentSection = d.meta[metaTitle] @@ -809,21 +715,25 @@ proc renderOverline(d: PDoc, n: PRstNode, result: var string) = var tmp = "" for i in countup(0, len(n) - 1): renderRstToOut(d, n.sons[i], tmp) d.currentSection = tmp + var tocName = esc(d.target, renderRstToText(n), escMode=emOption) dispA(d.target, result, "<h$1$2><center>$3</center></h$1>", - "\\rstov$4{$3}$2\n", [$n.level, - rstnodeToRefname(n).idS, tmp, $chr(n.level - 1 + ord('A'))]) - - -proc renderTocEntry(d: PDoc, e: TocEntry, result: var string) = + "\\rstov$4[$5]{$3}$2\n", [$n.level, + n.anchor.idS, tmp, $chr(n.level - 1 + ord('A')), tocName]) + setIndexTerm(d, ieHeading, htmlFile = d.htmlFileRelPath, id = n.anchor, + term = n.addNodes, linkTitle = spaces(max(0, n.level)) & tmp) + +proc renderTocEntry(d: PDoc, n: PRstNode, result: var string) = + var header = "" + for i in countup(0, len(n) - 1): renderRstToOut(d, n.sons[i], header) dispA(d.target, result, "<li><a class=\"reference\" id=\"$1_toc\" href=\"#$1\">$2</a></li>\n", - "\\item\\label{$1_toc} $2\\ref{$1}\n", [e.refname, e.header]) + "\\item\\label{$1_toc} $2\\ref{$1}\n", [n.anchor, header]) proc renderTocEntries*(d: var RstGenerator, j: var int, lvl: int, result: var string) = var tmp = "" while j <= high(d.tocPart): - var a = abs(d.tocPart[j].n.level) + var a = abs(d.tocPart[j].level) if a == lvl: renderTocEntry(d, d.tocPart[j], tmp) inc(j) @@ -877,7 +787,9 @@ proc renderImage(d: PDoc, n: PRstNode, result: var string) = htmlOut = "<img$3 src=\"$1\"$2/>" # support for `:target:` links for images: - var target = esc(d.target, getFieldValue(n, "target").strip()) + var target = esc(d.target, getFieldValue(n, "target").strip(), escMode=emUrl) + discard safeProtocol(target) + if target.len > 0: # `htmlOut` needs to be of the following format for link to work for images: # <a class="reference external" href="target"><img src=\"$1\"$2/></a> @@ -898,6 +810,25 @@ proc renderSmiley(d: PDoc, n: PRstNode, result: var string) = "\\includegraphics{$1}", [d.config.getOrDefault"doc.smiley_format" % n.text]) +proc getField1Int(d: PDoc, n: PRstNode, fieldName: string): int = + template err(msg: string) = + rstMessage(d.filenames, d.msgHandler, n.info, meInvalidField, msg) + let value = n.getFieldValue + var number: int + let nChars = parseInt(value, number) + if nChars == 0: + if value.len == 0: + # use a good default value: + result = 1 + else: + err("field $1 requires an integer, but '$2' was given" % + [fieldName, value]) + elif nChars < value.len: + err("extra arguments were given to $1: '$2'" % + [fieldName, value[nChars..^1]]) + else: + result = number + proc parseCodeBlockField(d: PDoc, n: PRstNode, params: var CodeBlockParams) = ## Parses useful fields which can appear before a code block. ## @@ -907,9 +838,7 @@ proc parseCodeBlockField(d: PDoc, n: PRstNode, params: var CodeBlockParams) = of "number-lines": params.numberLines = true # See if the field has a parameter specifying a different line than 1. - var number: int - if parseInt(n.getFieldValue, number) > 0: - params.startLine = number + params.startLine = getField1Int(d, n, "number-lines") of "file", "filename": # The ``file`` option is a Nim extension to the official spec, it acts # like it would for other directives like ``raw`` or ``cvs-table``. This @@ -927,14 +856,13 @@ proc parseCodeBlockField(d: PDoc, n: PRstNode, params: var CodeBlockParams) = # consider whether `$docCmd` should be appended here too params.testCmd = unescape(params.testCmd) of "status", "exitcode": - var status: int - if parseInt(n.getFieldValue, status) > 0: - params.status = status + params.status = getField1Int(d, n, n.getArgument) of "default-language": params.langStr = n.getFieldValue.strip params.lang = params.langStr.getSourceLanguage else: - d.msgHandler(d.filename, 1, 0, mwUnsupportedField, n.getArgument) + rstMessage(d.filenames, d.msgHandler, n.info, mwUnsupportedField, + n.getArgument) proc parseCodeBlockParams(d: PDoc, n: PRstNode): CodeBlockParams = ## Iterates over all code block fields and returns processed params. @@ -944,8 +872,7 @@ proc parseCodeBlockParams(d: PDoc, n: PRstNode): CodeBlockParams = result.init if n.isNil: return - assert n.kind == rnCodeBlock - assert(not n.sons[2].isNil) + assert n.kind in {rnCodeBlock, rnInlineCode} # Parse the field list for rendering parameters if there are any. if not n.sons[1].isNil: @@ -982,15 +909,34 @@ proc buildLinesHtmlTable(d: PDoc; params: CodeBlockParams, code: string, result.beginTable.add($line & "\n") line.inc codeLines.dec - result.beginTable.add("</pre$3></td><td>" & ( + result.beginTable.add("</pre></td><td>" & ( d.config.getOrDefault"doc.listing_start" % [id, sourceLanguageToStr[params.lang], idStr])) result.endTable = (d.config.getOrDefault"doc.listing_end" % id) & "</td></tr></tbody></table>" & ( d.config.getOrDefault"doc.listing_button" % id) -proc renderCodeBlock(d: PDoc, n: PRstNode, result: var string) = - ## Renders a code block, appending it to `result`. +proc renderCodeLang*(result: var string, lang: SourceLanguage, code: string, + target: OutputTarget) = + var g: GeneralTokenizer + initGeneralTokenizer(g, code) + while true: + getNextToken(g, lang) + case g.kind + of gtEof: break + of gtNone, gtWhitespace: + add(result, substr(code, g.start, g.length + g.start - 1)) + else: + dispA(target, result, "<span class=\"$2\">$1</span>", "\\span$2{$1}", [ + esc(target, substr(code, g.start, g.length+g.start-1)), + tokenClassToStr[g.kind]]) + deinitGeneralTokenizer(g) + +proc renderNimCode*(result: var string, code: string, target: OutputTarget) = + renderCodeLang(result, langNim, code, target) + +proc renderCode(d: PDoc, n: PRstNode, result: var string) {.gcsafe.} = + ## Renders a code (code block or inline code), appending it to `result`. ## ## If the code block uses the ``number-lines`` option, a table will be ## generated with two columns, the first being a list of numbers and the @@ -999,38 +945,40 @@ proc renderCodeBlock(d: PDoc, n: PRstNode, result: var string) = ## may also come from the parser through the internal ``default-language`` ## option to differentiate between a plain code block and Nim's code block ## extension. - assert n.kind == rnCodeBlock - if n.sons[2] == nil: return + assert n.kind in {rnCodeBlock, rnInlineCode} var params = d.parseCodeBlockParams(n) + if n.sons[2] == nil: return var m = n.sons[2].sons[0] assert m.kind == rnLeaf if params.testCmd.len > 0 and d.onTestSnippet != nil: d.onTestSnippet(d, params.filename, params.testCmd, params.status, m.text) - let (blockStart, blockEnd) = buildLinesHtmlTable(d, params, m.text, + var blockStart, blockEnd: string + case d.target + of outHtml: + if n.kind == rnCodeBlock: + (blockStart, blockEnd) = buildLinesHtmlTable(d, params, m.text, n.anchor.idS) - dispA(d.target, result, blockStart, - "\\begin{rstpre}\n" & n.anchor.idS & "\n", []) + else: # rnInlineCode + blockStart = "<tt class=\"docutils literal\"><span class=\"pre\">" + blockEnd = "</span></tt>" + of outLatex: + if n.kind == rnCodeBlock: + blockStart = "\n\n" & n.anchor.idS & "\\begin{rstpre}\n" + blockEnd = "\n\\end{rstpre}\n\n" + else: # rnInlineCode + blockStart = "\\rstcode{" + blockEnd = "}" + dispA(d.target, result, blockStart, blockStart, []) if params.lang == langNone: - if len(params.langStr) > 0: - d.msgHandler(d.filename, 1, 0, mwUnsupportedLanguage, params.langStr) - for letter in m.text: escChar(d.target, result, letter) + if len(params.langStr) > 0 and params.langStr.toLowerAscii != "none": + rstMessage(d.filenames, d.msgHandler, n.info, mwUnsupportedLanguage, + params.langStr) + for letter in m.text: escChar(d.target, result, letter, emText) else: - var g: GeneralTokenizer - initGeneralTokenizer(g, m.text) - while true: - getNextToken(g, params.lang) - case g.kind - of gtEof: break - of gtNone, gtWhitespace: - add(result, substr(m.text, g.start, g.length + g.start - 1)) - else: - dispA(d.target, result, "<span class=\"$2\">$1</span>", "\\span$2{$1}", [ - esc(d.target, substr(m.text, g.start, g.length+g.start-1)), - tokenClassToStr[g.kind]]) - deinitGeneralTokenizer(g) - dispA(d.target, result, blockEnd, "\n\\end{rstpre}\n") + renderCodeLang(result, params.lang, m.text, d.target) + dispA(d.target, result, blockEnd, blockEnd) proc renderContainer(d: PDoc, n: PRstNode, result: var string) = var tmp = "" @@ -1041,11 +989,6 @@ proc renderContainer(d: PDoc, n: PRstNode, result: var string) = else: dispA(d.target, result, "<div class=\"$1\">$2</div>", "$2", [arg, tmp]) -proc texColumns(n: PRstNode): string = - result = "" - let nColumns = if n.sons.len > 0: len(n.sons[0]) else: 1 - for i in countup(1, nColumns): add(result, "|X") - proc renderField(d: PDoc, n: PRstNode, result: var string) = var b = false if d.target == outLatex: @@ -1121,7 +1064,7 @@ proc renderAdmonition(d: PDoc, n: PRstNode, result: var string) = case n.adType of "hint", "note", "tip": htmlCls = "admonition-info"; texSz = "\\normalsize"; texColor = "green" - of "attention", "admonition", "important", "warning": + of "attention", "admonition", "important", "warning", "caution": htmlCls = "admonition-warning"; texSz = "\\large"; texColor = "orange" of "danger", "error": htmlCls = "admonition-error"; texSz = "\\Large"; texColor = "red" @@ -1131,30 +1074,68 @@ proc renderAdmonition(d: PDoc, n: PRstNode, result: var string) = renderAux(d, n, htmlHead & "<span$2 class=\"" & htmlCls & "-text\"><b>" & txt & ":</b></span>\n" & "$1</div>\n", - "\n\n\\begin{mdframed}[linecolor=" & texColor & "]$2\n" & + "\n\n\\begin{rstadmonition}[borderline west={0.2em}{0pt}{" & + texColor & "}]$2\n" & "{" & texSz & "\\color{" & texColor & "}{\\textbf{" & txt & ":}}} " & - "$1\n\\end{mdframed}\n", + "$1\n\\end{rstadmonition}\n", result) +proc renderHyperlink(d: PDoc, text, link: PRstNode, result: var string, + external: bool, nimdoc = false, tooltip="") = + var linkStr = "" + block: + let mode = d.escMode + d.escMode = emUrl + renderRstToOut(d, link, linkStr) + d.escMode = mode + discard safeProtocol(linkStr) + var textStr = "" + renderRstToOut(d, text, textStr) + let nimDocStr = if nimdoc: " nimdoc" else: "" + var tooltipStr = "" + if tooltip != "": + tooltipStr = """ title="$1"""" % [ esc(d.target, tooltip) ] + if external: + dispA(d.target, result, + "<a class=\"reference external$3\"$4 href=\"$2\">$1</a>", + "\\href{$2}{$1}", [textStr, linkStr, nimDocStr, tooltipStr]) + else: + dispA(d.target, result, + "<a class=\"reference internal$3\"$4 href=\"#$2\">$1</a>", + "\\hyperlink{$2}{$1} (p.~\\pageref{$2})", + [textStr, linkStr, nimDocStr, tooltipStr]) + +proc traverseForIndex*(d: PDoc, n: PRstNode) = + ## A version of [renderRstToOut] that only fills entries for ``.idx`` files. + var discarded: string + if n == nil: return + case n.kind + of rnIdx: renderIndexTerm(d, n, discarded) + of rnHeadline, rnMarkdownHeadline: renderHeadline(d, n, discarded) + of rnOverline: renderOverline(d, n, discarded) + else: + for i in 0 ..< len(n): + traverseForIndex(d, n.sons[i]) + proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) = if n == nil: return case n.kind of rnInner: renderAux(d, n, result) of rnHeadline, rnMarkdownHeadline: renderHeadline(d, n, result) of rnOverline: renderOverline(d, n, result) - of rnTransition: renderAux(d, n, "<hr$2 />\n", "\\hrule$2\n", result) - of rnParagraph: renderAux(d, n, "<p$2>$1</p>\n", "$2\n$1\n\n", result) + of rnTransition: renderAux(d, n, "<hr$2 />\n", "\n\n\\vspace{0.6em}\\hrule$2\n", result) + of rnParagraph: renderAux(d, n, "<p$2>$1</p>\n", "\n\n$2\n$1\n\n", result) of rnBulletList: renderAux(d, n, "<ul$2 class=\"simple\">$1</ul>\n", "\\begin{itemize}\n$2\n$1\\end{itemize}\n", result) of rnBulletItem, rnEnumItem: renderAux(d, n, "<li$2>$1</li>\n", "\\item $2$1\n", result) of rnEnumList: renderEnumList(d, n, result) - of rnDefList: + of rnDefList, rnMdDefList: renderAux(d, n, "<dl$2 class=\"docutils\">$1</dl>\n", "\\begin{description}\n$2\n$1\\end{description}\n", result) of rnDefItem: renderAux(d, n, result) - of rnDefName: renderAux(d, n, "<dt$2>$1</dt>\n", "$2\\item[$1] ", result) + of rnDefName: renderAux(d, n, "<dt$2>$1</dt>\n", "$2\\item[$1]\\ ", result) of rnDefBody: renderAux(d, n, "<dd$2>$1</dd>\n", "$2\n$1\n", result) of rnFieldList: var tmp = "" @@ -1178,21 +1159,51 @@ proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) = of rnIndex: renderRstToOut(d, n.sons[2], result) of rnOptionList: - renderAux(d, n, "<table$2 frame=\"void\">$1</table>", - "\\begin{description}\n$2\n$1\\end{description}\n", result) + renderAux(d, n, "<div$2 class=\"option-list\">$1</div>", + "\\begin{rstoptlist}$2\n$1\\end{rstoptlist}", result) of rnOptionListItem: - renderAux(d, n, "<tr>$1</tr>\n", "$1", result) + var addclass = if n.order mod 2 == 1: " odd" else: "" + renderAux(d, n, + "<div class=\"option-list-item" & addclass & "\">$1</div>\n", + "$1", result) of rnOptionGroup: - renderAux(d, n, "<th align=\"left\">$1</th>", "\\item[$1]", result) + renderAux(d, n, + "<div class=\"option-list-label\"><tt><span class=\"option\">" & + "$1</span></tt></div>", + "\\item[\\rstcodeitem{\\spanoption{$1}}]", result) of rnDescription: - renderAux(d, n, "<td align=\"left\">$1</td>\n", " $1\n", result) + renderAux(d, n, "<div class=\"option-list-description\">$1</div>", + " $1\n", result) of rnOption, rnOptionString, rnOptionArgument: - doAssert false, "renderRstToOut" + raiseAssert "renderRstToOut" of rnLiteralBlock: renderAux(d, n, "<pre$2>$1</pre>\n", - "\\begin{rstpre}\n$2\n$1\n\\end{rstpre}\n", result) - of rnQuotedLiteralBlock: - doAssert false, "renderRstToOut" + "\n\n$2\\begin{rstpre}\n$1\n\\end{rstpre}\n\n", result) + of rnMarkdownBlockQuote: + d.curQuotationDepth = 1 + var tmp = "" + renderAux(d, n, "$1", "$1", tmp) + let itemEnding = + if d.target == outHtml: "</blockquote>" else: "\\end{rstquote}" + tmp.add itemEnding.repeat(d.curQuotationDepth - 1) + dispA(d.target, result, + "<blockquote$2 class=\"markdown-quote\">$1</blockquote>\n", + "\n\\begin{rstquote}\n$2\n$1\\end{rstquote}\n", [tmp, n.anchor.idS]) + of rnMarkdownBlockQuoteItem: + let addQuotationDepth = n.quotationDepth - d.curQuotationDepth + var itemPrefix: string # start or ending (quotation grey bar on the left) + if addQuotationDepth >= 0: + let s = + if d.target == outHtml: "<blockquote class=\"markdown-quote\">" + else: "\\begin{rstquote}" + itemPrefix = s.repeat(addQuotationDepth) + else: + let s = + if d.target == outHtml: "</blockquote>" + else: "\\end{rstquote}" + itemPrefix = s.repeat(-addQuotationDepth) + renderAux(d, n, itemPrefix & "<p>$1</p>", itemPrefix & "\n$1", result) + d.curQuotationDepth = n.quotationDepth of rnLineBlock: if n.sons.len == 1 and n.sons[0].lineIndent == "\n": # whole line block is one empty line, no need to add extra spacing @@ -1217,25 +1228,50 @@ proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) = of rnTable, rnGridTable, rnMarkdownTable: renderAux(d, n, "<table$2 border=\"1\" class=\"docutils\">$1</table>", - "\\begin{table}\n$2\n\\begin{rsttab}{" & - texColumns(n) & "|}\n\\hline\n$1\\end{rsttab}\\end{table}", result) + "\n$2\n\\begin{rsttab}{" & + "L".repeat(n.colCount) & "}\n\\toprule\n$1" & + "\\addlinespace[0.1em]\\bottomrule\n\\end{rsttab}", result) of rnTableRow: if len(n) >= 1: - if d.target == outLatex: - #var tmp = "" - renderRstToOut(d, n.sons[0], result) - for i in countup(1, len(n) - 1): - result.add(" & ") - renderRstToOut(d, n.sons[i], result) - result.add("\\\\\n\\hline\n") - else: + case d.target + of outHtml: result.add("<tr>") renderAux(d, n, result) result.add("</tr>\n") - of rnTableDataCell: - renderAux(d, n, "<td>$1</td>", "$1", result) - of rnTableHeaderCell: - renderAux(d, n, "<th>$1</th>", "\\textbf{$1}", result) + of outLatex: + if n.sons[0].kind == rnTableHeaderCell: + result.add "\\rowcolor{gray!15} " + var spanLines: seq[(int, int)] + var nCell = 0 + for uCell in 0 .. n.len - 1: + renderRstToOut(d, n.sons[uCell], result) + if n.sons[uCell].span > 0: + spanLines.add (nCell + 1, nCell + n.sons[uCell].span) + nCell += n.sons[uCell].span + else: + nCell += 1 + if uCell != n.len - 1: + result.add(" & ") + result.add("\\\\") + if n.endsHeader: result.add("\\midrule\n") + for (start, stop) in spanLines: + result.add("\\cmidrule(lr){$1-$2}" % [$start, $stop]) + result.add("\n") + of rnTableHeaderCell, rnTableDataCell: + case d.target + of outHtml: + let tag = if n.kind == rnTableHeaderCell: "th" else: "td" + var spanSpec: string + if n.span <= 1: spanSpec = "" + else: + spanSpec = " colspan=\"" & $n.span & "\" style=\"text-align: center\"" + renderAux(d, n, "<$1$2>$$1</$1>" % [tag, spanSpec], "", result) + of outLatex: + let text = if n.kind == rnTableHeaderCell: "\\textbf{$1}" else: "$1" + var latexStr: string + if n.span <= 1: latexStr = text + else: latexStr = "\\multicolumn{" & $n.span & "}{c}{" & text & "}" + renderAux(d, n, "", latexStr, result) of rnFootnoteGroup: renderAux(d, n, "<hr class=\"footnote\">" & @@ -1254,22 +1290,19 @@ proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) = "</div>   $1\n</div>\n", "\\item[\\textsuperscript{[$3]}]$2 $1\n", [body, n.anchor.idS, mark, n.anchor]) - of rnRef: - var tmp = "" - renderAux(d, n, tmp) - dispA(d.target, result, - "<a class=\"reference external\" href=\"#$2\">$1</a>", - "$1\\ref{$2}", [tmp, rstnodeToRefname(n)]) + of rnPandocRef: + renderHyperlink(d, text=n.sons[0], link=n.sons[1], result, external=false) + of rnRstRef: + renderHyperlink(d, text=n.sons[0], link=n.sons[0], result, external=false) of rnStandaloneHyperlink: - renderAux(d, n, - "<a class=\"reference external\" href=\"$1\">$1</a>", - "\\href{$1}{$1}", result) + renderHyperlink(d, text=n.sons[0], link=n.sons[0], result, external=true) of rnInternalRef: - var tmp = "" - renderAux(d, n.sons[0], tmp) - dispA(d.target, result, - "<a class=\"reference internal\" href=\"#$2\">$1</a>", - "\\hyperlink{$2}{$1} (p.~\\pageref{$2})", [tmp, n.sons[1].text]) + renderHyperlink(d, text=n.sons[0], link=n.sons[1], result, external=false) + of rnNimdocRef: + renderHyperlink(d, text=n.sons[0], link=n.sons[1], result, external=false, + nimdoc=true, tooltip=n.tooltip) + of rnHyperlink: + renderHyperlink(d, text=n.sons[0], link=n.sons[1], result, external=true) of rnFootnoteRef: var tmp = "[" renderAux(d, n.sons[0], tmp) @@ -1279,14 +1312,6 @@ proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) = "$1</a></strong></sup>", "\\textsuperscript{\\hyperlink{$2}{\\textbf{$1}}}", [tmp, n.sons[1].text]) - of rnHyperlink: - var tmp0 = "" - var tmp1 = "" - renderRstToOut(d, n.sons[0], tmp0) - renderRstToOut(d, n.sons[1], tmp1) - dispA(d.target, result, - "<a class=\"reference external\" href=\"$2\">$1</a>", - "\\href{$2}{$1}", [tmp0, tmp1]) of rnDirArg, rnRaw: renderAux(d, n, result) of rnRawHtml: if d.target != outLatex and not lastSon(n).isNil: @@ -1296,19 +1321,28 @@ proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) = result.add addNodes(lastSon(n)) of rnImage, rnFigure: renderImage(d, n, result) - of rnCodeBlock: renderCodeBlock(d, n, result) + of rnCodeBlock, rnInlineCode: renderCode(d, n, result) of rnContainer: renderContainer(d, n, result) of rnSubstitutionReferences, rnSubstitutionDef: renderAux(d, n, "|$1|", "|$1|", result) of rnDirective: renderAux(d, n, "", "", result) - of rnGeneralRole: + of rnUnknownRole, rnCodeFragment: var tmp0 = "" var tmp1 = "" renderRstToOut(d, n.sons[0], tmp0) renderRstToOut(d, n.sons[1], tmp1) - dispA(d.target, result, "<span class=\"$2\">$1</span>", "\\span$2{$1}", - [tmp0, tmp1]) + var class = tmp1 + # don't allow missing role break latex compilation: + if d.target == outLatex and n.kind == rnUnknownRole: class = "Other" + if n.kind == rnCodeFragment: + dispA(d.target, result, + "<tt class=\"docutils literal\"><span class=\"pre $2\">" & + "$1</span></tt>", + "\\rstcode{\\span$2{$1}}", [tmp0, class]) + else: # rnUnknownRole, not necessarily code/monospace font + dispA(d.target, result, "<span class=\"$2\">$1</span>", "\\span$2{$1}", + [tmp0, class]) of rnSub: renderAux(d, n, "<sub>$1</sub>", "\\rstsub{$1}", result) of rnSup: renderAux(d, n, "<sup>$1</sup>", "\\rstsup{$1}", result) of rnEmphasis: renderAux(d, n, "<em>$1</em>", "\\emph{$1}", result) @@ -1322,7 +1356,7 @@ proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) = of rnInlineLiteral, rnInterpretedText: renderAux(d, n, "<tt class=\"docutils literal\"><span class=\"pre\">$1</span></tt>", - "\\texttt{$1}", result) + "\\rstcode{$1}", result) of rnInlineTarget: var tmp = "" renderAux(d, n, tmp) @@ -1331,12 +1365,13 @@ proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) = "\\label{$2}\\hypertarget{$2}{$1}", [tmp, rstnodeToRefname(n)]) of rnSmiley: renderSmiley(d, n, result) - of rnLeaf: result.add(esc(d.target, n.text)) + of rnLeaf: result.add(esc(d.target, n.text, escMode=d.escMode)) of rnContents: d.hasToc = true of rnDefaultRole: discard of rnTitle: d.meta[metaTitle] = "" renderRstToOut(d, n.sons[0], d.meta[metaTitle]) + d.meta[metaTitleRaw] = n.sons[0].addNodes # ----------------------------------------------------------------------------- @@ -1477,7 +1512,7 @@ $content proc rstToHtml*(s: string, options: RstParseOptions, config: StringTableRef, - msgHandler: MsgHandler = rst.defaultMsgHandler): string = + msgHandler: MsgHandler = rst.defaultMsgHandler): string {.gcsafe.} = ## Converts an input rst string into embeddable HTML. ## ## This convenience proc parses any input string using rst markup (it doesn't @@ -1487,12 +1522,13 @@ proc rstToHtml*(s: string, options: RstParseOptions, ## work. For an explanation of the ``config`` parameter see the ## ``initRstGenerator`` proc. Example: ## - ## .. code-block:: nim + ## ```nim ## import packages/docutils/rstgen, strtabs ## ## echo rstToHtml("*Hello* **world**!", {}, ## newStringTable(modeStyleInsensitive)) ## # --> <em>Hello</em> <strong>world</strong>! + ## ``` ## ## If you need to allow the rst ``include`` directive or tweak the generated ## output you have to create your own ``RstGenerator`` with @@ -1501,25 +1537,30 @@ proc rstToHtml*(s: string, options: RstParseOptions, proc myFindFile(filename: string): string = # we don't find any files in online mode: result = "" + proc myFindRefFile(filename: string): (string, string) = + result = ("", "") const filen = "input" + let (rst, filenames, t) = rstParse(s, filen, + line=LineRstInit, column=ColRstInit, + options, myFindFile, myFindRefFile, msgHandler) var d: RstGenerator - initRstGenerator(d, outHtml, config, filen, options, myFindFile, msgHandler) - var dummyHasToc = false - var rst = rstParse(s, filen, line=LineRstInit, column=ColRstInit, - dummyHasToc, options, myFindFile, msgHandler) + initRstGenerator(d, outHtml, config, filen, myFindFile, msgHandler, + filenames, hasToc = t) result = "" renderRstToOut(d, rst, result) + strbasics.strip(result) proc rstToLatex*(rstSource: string; options: RstParseOptions): string {.inline, since: (1, 3).} = ## Convenience proc for `renderRstToOut` and `initRstGenerator`. runnableExamples: doAssert rstToLatex("*Hello* **world**", {}) == """\emph{Hello} \textbf{world}""" if rstSource.len == 0: return - var option: bool + let (rst, filenames, t) = rstParse(rstSource, "", + line=LineRstInit, column=ColRstInit, + options) var rstGenera: RstGenerator - rstGenera.initRstGenerator(outLatex, defaultConfig(), "input", options) - rstGenera.renderRstToOut( - rstParse(rstSource, "", line=LineRstInit, column=ColRstInit, - option, options), - result) + rstGenera.initRstGenerator(outLatex, defaultConfig(), "input", + filenames=filenames, hasToc = t) + rstGenera.renderRstToOut(rst, result) + strbasics.strip(result) diff --git a/lib/packages/docutils/rstidx.nim b/lib/packages/docutils/rstidx.nim new file mode 100644 index 000000000..1472d28fd --- /dev/null +++ b/lib/packages/docutils/rstidx.nim @@ -0,0 +1,141 @@ +# +# Nim's Runtime Library +# (c) Copyright 2022 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. + +## Nim `idx`:idx: file format related definitions. + +import std/[strutils, syncio, hashes] +from std/os import splitFile + +type + IndexEntryKind* = enum ## discriminator tag + ieMarkupTitle = "markupTitle" + ## RST/Markdown title, text in `keyword` + + ## HTML text in `linkTitle` + ieNimTitle = "nimTitle" + ## Nim title + ieHeading = "heading" ## RST/Markdown markup heading, escaped + ieIdxRole = "idx" ## RST :idx: definition, escaped + ieNim = "nim" ## Nim symbol, unescaped + ieNimGroup = "nimgrp" ## Nim overload group, unescaped + IndexEntry* = object + kind*: IndexEntryKind ## 0. + keyword*: string ## 1. + link*: string ## 2. + linkTitle*: string ## 3. contains a prettier text for the href + linkDesc*: string ## 4. the title attribute of the final href + line*: int ## 5. + module*: string ## origin file, NOT a field in ``.idx`` file + aux*: string ## auxuliary field, NOT a field in ``.idx`` file + +proc isDocumentationTitle*(hyperlink: string): bool = + ## Returns true if the hyperlink is actually a documentation title. + ## + ## Documentation titles lack the hash. See `mergeIndexes() + ## <#mergeIndexes,string>`_ for a more detailed explanation. + result = hyperlink.find('#') < 0 + +proc `$`*(e: IndexEntry): string = + """("$1", "$2", "$3", "$4", $5)""" % [ + e.keyword, e.link, e.linkTitle, e.linkDesc, $e.line] + +proc quoteIndexColumn(text: string): string = + ## Returns a safe version of `text` for serialization to the ``.idx`` file. + ## + ## The returned version can be put without worries in a line based tab + ## separated column text file. The following character sequence replacements + ## will be performed for that goal: + ## + ## * ``"\\"`` => ``"\\\\"`` + ## * ``"\n"`` => ``"\\n"`` + ## * ``"\t"`` => ``"\\t"`` + result = newStringOfCap(text.len + 3) + for c in text: + case c + of '\\': result.add "\\" + of '\L': result.add "\\n" + of '\C': discard + of '\t': result.add "\\t" + else: result.add c + +proc unquoteIndexColumn*(text: string): string = + ## Returns the unquoted version generated by ``quoteIndexColumn``. + result = text.multiReplace(("\\t", "\t"), ("\\n", "\n"), ("\\\\", "\\")) + +proc formatIndexEntry*(kind: IndexEntryKind; htmlFile, id, term, linkTitle, + linkDesc: string, line: int): + tuple[entry: string, isTitle: bool] = + result.entry = $kind + result.entry.add('\t') + result.entry.add term + result.entry.add('\t') + result.entry.add(htmlFile) + if id.len > 0: + result.entry.add('#') + result.entry.add(id) + result.isTitle = false + else: + result.isTitle = true + result.entry.add('\t' & linkTitle.quoteIndexColumn) + result.entry.add('\t' & linkDesc.quoteIndexColumn) + result.entry.add('\t' & $line) + result.entry.add("\n") + +proc parseIndexEntryKind(s: string): IndexEntryKind = + result = case s: + of "nim": ieNim + of "nimgrp": ieNimGroup + of "heading": ieHeading + of "idx": ieIdxRole + of "nimTitle": ieNimTitle + of "markupTitle": ieMarkupTitle + else: raise newException(ValueError, "unknown index entry value $1" % [s]) + +proc parseIdxFile*(path: string): + tuple[fileEntries: seq[IndexEntry], title: IndexEntry] = + var + f = 0 + newSeq(result.fileEntries, 500) + setLen(result.fileEntries, 0) + let (_, base, _) = path.splitFile + for line in lines(path): + let s = line.find('\t') + if s < 0: continue + setLen(result.fileEntries, f+1) + let cols = line.split('\t') + result.fileEntries[f].kind = parseIndexEntryKind(cols[0]) + result.fileEntries[f].keyword = cols[1] + result.fileEntries[f].link = cols[2] + if result.fileEntries[f].kind == ieIdxRole: + result.fileEntries[f].module = base + else: + if result.title.keyword.len == 0: + result.fileEntries[f].module = base + else: + result.fileEntries[f].module = result.title.keyword + + result.fileEntries[f].linkTitle = cols[3].unquoteIndexColumn + result.fileEntries[f].linkDesc = cols[4].unquoteIndexColumn + result.fileEntries[f].line = parseInt(cols[5]) + + if result.fileEntries[f].kind in {ieNimTitle, ieMarkupTitle}: + result.title = result.fileEntries[f] + inc f + +proc cmp*(a, b: IndexEntry): int = + ## Sorts two ``IndexEntry`` first by `keyword` field, then by `link`. + result = cmpIgnoreStyle(a.keyword, b.keyword) + if result == 0: + result = cmpIgnoreStyle(a.link, b.link) + +proc hash*(x: IndexEntry): Hash = + ## Returns the hash for the combined fields of the type. + ## + ## The hash is computed as the chained hash of the individual string hashes. + result = x.keyword.hash !& x.link.hash + result = result !& x.linkTitle.hash + result = result !& x.linkDesc.hash + result = !$result |