diff options
Diffstat (limited to 'lib/packages/docutils/rst.nim')
-rw-r--r-- | lib/packages/docutils/rst.nim | 3891 |
1 files changed, 2976 insertions, 915 deletions
diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim index 05b58c56b..706c50689 100644 --- a/lib/packages/docutils/rst.nim +++ b/lib/packages/docutils/rst.nim @@ -7,23 +7,46 @@ # distribution, for details about the copyright. # -## This module implements a `reStructuredText`:idx: parser. A large -## subset is implemented. Some features of the `markdown`:idx: wiki syntax are -## also supported. +## This module implements a `reStructuredText`:idx: (RST) and +## `Markdown`:idx: parser. +## User's manual on supported markup syntax and command line usage can be +## found in [Nim-flavored Markdown and reStructuredText](markdown_rst.html). ## -## **Note:** Import ``packages/docutils/rst`` to use this module +## * See also [Nim DocGen Tools Guide](docgen.html) for handling of +## ``.nim`` files. +## * See also [packages/docutils/rstgen module](rstgen.html) to know how to +## generate HTML or Latex strings (for embedding them into custom documents). +## +## Choice between Markdown and RST as well as optional additional features are +## turned on by passing ``options:`` [RstParseOptions] to [proc rstParse]. import - os, strutils, rstast + std/[os, strutils, enumutils, algorithm, lists, sequtils, + tables, strscans] +import dochelpers, rstidx, rstast +import std/private/miscdollars +from highlite import SourceLanguage, getSourceLanguage + +when defined(nimPreviewSlimSystem): + import std/[assertions, syncio] + type RstParseOption* = enum ## options for the RST parser - roSkipPounds, ## skip ``#`` at line beginning (documentation - ## embedded in Nim comments) roSupportSmilies, ## make the RST parser support smilies like ``:)`` roSupportRawDirective, ## support the ``raw`` directive (don't support ## it for sandboxing) - roSupportMarkdown ## support additional features of markdown + roSupportMarkdown, ## support additional features of Markdown + roPreferMarkdown, ## parse as Markdown (keeping RST as "extension" + ## to Markdown) -- implies `roSupportMarkdown` + roNimFile ## set for Nim files where default interpreted + ## text role should be :nim: + roSandboxDisabled ## this option enables certain options + ## (e.g. raw, include, importdoc) + ## which are disabled by default as they can + ## enable users to read arbitrary data and + ## perform XSS if the parser is used in a web + ## app. RstParseOptions* = set[RstParseOption] @@ -32,39 +55,41 @@ type mcWarning = "Warning", mcError = "Error" + # keep the order in sync with compiler/docgen.nim and compiler/lineinfos.nim: MsgKind* = enum ## the possible messages - meCannotOpenFile, - meExpected, - meGridTableNotImplemented, - meNewSectionExpected, - meGeneralParseError, - meInvalidDirective, - mwRedefinitionOfLabel, - mwUnknownSubstitution, - mwUnsupportedLanguage, - mwUnsupportedField + meCannotOpenFile = "cannot open '$1'", + meExpected = "'$1' expected", + meMissingClosing = "$1", + meGridTableNotImplemented = "grid table is not implemented", + meMarkdownIllformedTable = "illformed delimiter row of a Markdown table", + meIllformedTable = "Illformed table: $1", + meNewSectionExpected = "new section expected $1", + meGeneralParseError = "general parse error", + meInvalidDirective = "invalid directive: '$1'", + meInvalidField = "invalid field: $1", + meFootnoteMismatch = "mismatch in number of footnotes and their refs: $1", + mwRedefinitionOfLabel = "redefinition of label '$1'", + mwUnknownSubstitution = "unknown substitution '$1'", + mwAmbiguousLink = "ambiguous doc link $1", + mwBrokenLink = "broken link '$1'", + mwUnsupportedLanguage = "language '$1' not supported", + mwUnsupportedField = "field '$1' not supported", + mwRstStyle = "RST style: $1", + mwUnusedImportdoc = "importdoc for '$1' is not used", + meSandboxedDirective = "disabled directive: '$1'", MsgHandler* = proc (filename: string, line, col: int, msgKind: MsgKind, arg: string) {.closure, gcsafe.} ## what to do in case of an error FindFileHandler* = proc (filename: string): string {.closure, gcsafe.} - -const - messages: array[MsgKind, string] = [ - meCannotOpenFile: "cannot open '$1'", - meExpected: "'$1' expected", - meGridTableNotImplemented: "grid table is not implemented", - meNewSectionExpected: "new section expected", - meGeneralParseError: "general parse error", - meInvalidDirective: "invalid directive: '$1'", - mwRedefinitionOfLabel: "redefinition of label '$1'", - mwUnknownSubstitution: "unknown substitution '$1'", - mwUnsupportedLanguage: "language '$1' not supported", - mwUnsupportedField: "field '$1' not supported" - ] + FindRefFileHandler* = + proc (targetRelPath: string): + tuple[targetPath: string, linkRelPath: string] {.closure, gcsafe.} + ## returns where .html or .idx file should be found by its relative path; + ## `linkRelPath` is a prefix to be added before a link anchor from such file proc rstnodeToRefname*(n: PRstNode): string proc addNodes*(n: PRstNode): string -proc getFieldValue*(n: PRstNode, fieldname: string): string +proc getFieldValue*(n: PRstNode, fieldname: string): string {.gcsafe.} proc getArgument*(n: PRstNode): string # ----------------------------- scanner part -------------------------------- @@ -111,10 +136,19 @@ const ":geek:": "icon_e_geek", ":ugeek:": "icon_e_ugeek" } + SandboxDirAllowlist = [ + "image", "code", "code-block", "admonition", "attention", "caution", + "container", "contents", "danger", "default-role", "error", "figure", + "hint", "important", "index", "note", "role", "tip", "title", "warning"] type TokType = enum - tkEof, tkIndent, tkWhite, tkWord, tkAdornment, tkPunct, tkOther + tkEof, tkIndent, + tkWhite, tkWord, + tkAdornment, # used for chapter adornment, transitions and + # horizontal table borders + tkPunct, # one or many punctuation characters + tkOther Token = object # a RST token kind*: TokType # the type of the token ival*: int # the indentation or parsed integer value @@ -126,7 +160,8 @@ type buf*: cstring bufpos*: int line*, col*, baseIndent*: int - skipPounds*: bool + adornmentLine*: bool + escapeNext*: bool proc getThing(L: var Lexer, tok: var Token, s: set[char]) = tok.kind = tkWord @@ -134,57 +169,82 @@ proc getThing(L: var Lexer, tok: var Token, s: set[char]) = tok.col = L.col var pos = L.bufpos while true: - add(tok.symbol, L.buf[pos]) - inc(pos) + tok.symbol.add(L.buf[pos]) + inc pos if L.buf[pos] notin s: break - inc(L.col, pos - L.bufpos) + inc L.col, pos - L.bufpos L.bufpos = pos -proc getAdornment(L: var Lexer, tok: var Token) = - tok.kind = tkAdornment +proc isCurrentLineAdornment(L: var Lexer): bool = + var pos = L.bufpos + let c = L.buf[pos] + while true: + inc pos + if L.buf[pos] in {'\c', '\l', '\0'}: + break + if c == '+': # grid table + if L.buf[pos] notin {'-', '=', '+'}: + return false + else: # section adornment or table horizontal border + if L.buf[pos] notin {c, ' ', '\t', '\v', '\f'}: + return false + result = true + +proc getPunctAdornment(L: var Lexer, tok: var Token) = + if L.adornmentLine: + tok.kind = tkAdornment + else: + tok.kind = tkPunct tok.line = L.line tok.col = L.col var pos = L.bufpos - var c = L.buf[pos] - while true: - add(tok.symbol, L.buf[pos]) - inc(pos) - if L.buf[pos] != c: break - inc(L.col, pos - L.bufpos) + let c = L.buf[pos] + if not L.escapeNext and (c != '\\' or L.adornmentLine): + while true: + tok.symbol.add(L.buf[pos]) + inc pos + if L.buf[pos] != c: break + elif L.escapeNext: + tok.symbol.add(L.buf[pos]) + inc pos + else: # not L.escapeNext and c == '\\' and not L.adornmentLine + tok.symbol.add '\\' + inc pos + L.escapeNext = true + inc L.col, pos - L.bufpos L.bufpos = pos + if tok.symbol == "\\": tok.kind = tkPunct + # nim extension: standalone \ can not be adornment proc getBracket(L: var Lexer, tok: var Token) = tok.kind = tkPunct tok.line = L.line tok.col = L.col - add(tok.symbol, L.buf[L.bufpos]) + tok.symbol.add(L.buf[L.bufpos]) inc L.col inc L.bufpos proc getIndentAux(L: var Lexer, start: int): int = var pos = start # skip the newline (but include it in the token!) - if L.buf[pos] == '\x0D': - if L.buf[pos + 1] == '\x0A': inc(pos, 2) - else: inc(pos) - elif L.buf[pos] == '\x0A': - inc(pos) - if L.skipPounds: - if L.buf[pos] == '#': inc(pos) - if L.buf[pos] == '#': inc(pos) + if L.buf[pos] == '\r': + if L.buf[pos + 1] == '\n': inc pos, 2 + else: inc pos + elif L.buf[pos] == '\n': + inc pos while true: case L.buf[pos] - of ' ', '\x0B', '\x0C': - inc(pos) - inc(result) - of '\x09': - inc(pos) + of ' ', '\v', '\f': + inc pos + inc result + of '\t': + inc pos result = result - (result mod 8) + 8 else: break # EndOfFile also leaves the loop if L.buf[pos] == '\0': result = 0 - elif (L.buf[pos] == '\x0A') or (L.buf[pos] == '\x0D'): + elif L.buf[pos] == '\n' or L.buf[pos] == '\r': # look at the next line for proper indentation: result = getIndentAux(L, pos) L.bufpos = pos # no need to set back buf @@ -202,22 +262,26 @@ proc getIndent(L: var Lexer, tok: var Token) = proc rawGetTok(L: var Lexer, tok: var Token) = tok.symbol = "" tok.ival = 0 + if L.col == 0: + L.adornmentLine = false var c = L.buf[L.bufpos] case c of 'a'..'z', 'A'..'Z', '\x80'..'\xFF', '0'..'9': getThing(L, tok, SymChars) - of ' ', '\x09', '\x0B', '\x0C': - getThing(L, tok, {' ', '\x09'}) + of ' ', '\t', '\v', '\f': + getThing(L, tok, {' ', '\t'}) tok.kind = tkWhite - if L.buf[L.bufpos] in {'\x0D', '\x0A'}: + if L.buf[L.bufpos] in {'\r', '\n'}: rawGetTok(L, tok) # ignore spaces before \n - of '\x0D', '\x0A': + of '\r', '\n': getIndent(L, tok) + L.adornmentLine = false of '!', '\"', '#', '$', '%', '&', '\'', '*', '+', ',', '-', '.', '/', ':', ';', '<', '=', '>', '?', '@', '\\', '^', '_', '`', '|', '~': - getAdornment(L, tok) - if len(tok.symbol) <= 3: tok.kind = tkPunct + if L.col == 0: + L.adornmentLine = L.isCurrentLineAdornment() + getPunctAdornment(L, tok) of '(', ')', '[', ']', '{', '}': getBracket(L, tok) else: @@ -227,78 +291,174 @@ proc rawGetTok(L: var Lexer, tok: var Token) = tok.kind = tkEof else: tok.kind = tkOther - add(tok.symbol, c) - inc(L.bufpos) - inc(L.col) + tok.symbol.add(c) + inc L.bufpos + inc L.col tok.col = max(tok.col - L.baseIndent, 0) -proc getTokens(buffer: string, skipPounds: bool, tokens: var TokenSeq): int = +proc getTokens(buffer: string, tokens: var TokenSeq) = var L: Lexer - var length = len(tokens) + var length = tokens.len L.buf = cstring(buffer) L.line = 0 # skip UTF-8 BOM - if (L.buf[0] == '\xEF') and (L.buf[1] == '\xBB') and (L.buf[2] == '\xBF'): - inc(L.bufpos, 3) - L.skipPounds = skipPounds - if skipPounds: - if L.buf[L.bufpos] == '#': - inc(L.bufpos) - inc(result) - if L.buf[L.bufpos] == '#': - inc(L.bufpos) - inc(result) - L.baseIndent = 0 - while L.buf[L.bufpos] == ' ': - inc(L.bufpos) - inc(L.baseIndent) - inc(result) + if L.buf[0] == '\xEF' and L.buf[1] == '\xBB' and L.buf[2] == '\xBF': + inc L.bufpos, 3 while true: - inc(length) + inc length setLen(tokens, length) + let toEscape = L.escapeNext rawGetTok(L, tokens[length - 1]) + if toEscape: L.escapeNext = false if tokens[length - 1].kind == tkEof: break if tokens[0].kind == tkWhite: # BUGFIX - tokens[0].ival = len(tokens[0].symbol) + tokens[0].ival = tokens[0].symbol.len tokens[0].kind = tkIndent type - LevelMap = array[char, int] + LevelInfo = object + symbol: char # adornment character + hasOverline: bool # has also overline (besides underline)? + line: int # the last line of this style occurrence + # (for error message) + hasPeers: bool # has headings on the same level of hierarchy? + LiteralBlockKind = enum # RST-style literal blocks after `::` + lbNone, + lbIndentedLiteralBlock, + lbQuotedLiteralBlock + LevelMap = seq[LevelInfo] # Saves for each possible title adornment + # style its level in the current document. + SubstitutionKind = enum + rstSubstitution = "substitution", + hyperlinkAlias = "hyperlink alias", + implicitHyperlinkAlias = "implicitly-generated hyperlink alias" Substitution = object + kind*: SubstitutionKind key*: string value*: PRstNode - - SharedState = object - options: RstParseOptions # parsing options - uLevel, oLevel: int # counters for the section levels + info*: TLineInfo # place where the substitution was defined + AnchorRule = enum + arInternalRst, ## For automatically generated RST anchors (from + ## headings, footnotes, inline internal targets): + ## case-insensitive, 1-space-significant (by RST spec) + arExternalRst, ## For external .nim doc comments or .rst/.md + arNim ## For anchors generated by ``docgen.nim``: Nim-style case + ## sensitivity, etc. (see `proc normalizeNimName`_ for details) + arHyperlink, ## For links with manually set anchors in + ## form `text <pagename.html#anchor>`_ + RstAnchorKind = enum + manualDirectiveAnchor = "manual directive anchor", + manualInlineAnchor = "manual inline anchor", + footnoteAnchor = "footnote anchor", + headlineAnchor = "implicitly-generated headline anchor" + AnchorSubst = object + info: TLineInfo # the file where the anchor was defined + priority: int + case kind: range[arInternalRst .. arNim] + of arInternalRst: + anchorType: RstAnchorKind + target: PRstNode + of arExternalRst: + anchorTypeExt: RstAnchorKind + refnameExt: string + of arNim: + module: FileIndex # anchor's module (generally not the same as file) + tooltip: string # displayed tooltip for Nim-generated anchors + langSym: LangSymbol + refname: string # A reference name that will be inserted directly + # into HTML/Latex. + external: bool + AnchorSubstTable = Table[string, seq[AnchorSubst]] + # use `seq` to account for duplicate anchors + FootnoteType = enum + fnManualNumber, # manually numbered footnote like [3] + fnAutoNumber, # auto-numbered footnote [#] + fnAutoNumberLabel, # auto-numbered with label [#label] + fnAutoSymbol, # auto-symbol footnote [*] + fnCitation # simple text label like [citation2021] + FootnoteSubst = tuple + kind: FootnoteType # discriminator + number: int # valid for fnManualNumber (always) and fnAutoNumber, + # fnAutoNumberLabel after resolveSubs is called + autoNumIdx: int # order of occurrence: fnAutoNumber, fnAutoNumberLabel + autoSymIdx: int # order of occurrence: fnAutoSymbol + label: string # valid for fnAutoNumberLabel + RstFileTable* = object + filenameToIdx*: Table[string, FileIndex] + idxToFilename*: seq[string] + ImportdocInfo = object + used: bool # was this import used? + fromInfo: TLineInfo # place of `.. importdoc::` directive + idxPath: string # full path to ``.idx`` file + linkRelPath: string # prefix before target anchor + title: string # document title obtained from ``.idx`` + RstSharedState = object + options*: RstParseOptions # parsing options + hLevels: LevelMap # hierarchy of heading styles + hTitleCnt: int # =0 if no title, =1 if only main title, + # =2 if both title and subtitle are present + hCurLevel: int # current section level + currRole: string # current interpreted text role + currRoleKind: RstNodeKind # ... and its node kind subs: seq[Substitution] # substitutions - refs: seq[Substitution] # references - underlineToLevel: LevelMap # Saves for each possible title adornment - # character its level in the - # current document. - # This is for single underline adornments. - overlineToLevel: LevelMap # Saves for each possible title adornment - # character its level in the current - # document. - # This is for over-underline adornments. + refs*: seq[Substitution] # references + anchors*: AnchorSubstTable + # internal target substitutions + lineFootnoteNum: seq[TLineInfo] # footnote line, auto numbers .. [#] + lineFootnoteNumRef: seq[TLineInfo] # footnote line, their reference [#]_ + currFootnoteNumRef: int # ... their counter for `resolveSubs` + lineFootnoteSym: seq[TLineInfo] # footnote line, auto symbols .. [*] + lineFootnoteSymRef: seq[TLineInfo] # footnote line, their reference [*]_ + currFootnoteSymRef: int # ... their counter for `resolveSubs` + footnotes: seq[FootnoteSubst] # correspondence b/w footnote label, + # number, order of occurrence msgHandler: MsgHandler # How to handle errors. - findFile: FindFileHandler # How to find files. - - PSharedState = ref SharedState + findFile: FindFileHandler # How to find files for include. + findRefFile: FindRefFileHandler + # How to find files imported by importdoc. + filenames*: RstFileTable # map file name <-> FileIndex (for storing + # file names for warnings after 1st stage) + currFileIdx*: FileIndex # current index in `filenames` + tocPart*: seq[PRstNode] # all the headings of a document + hasToc*: bool + idxImports*: Table[string, ImportdocInfo] + # map `importdoc`ed filename -> it's info + nimFileImported*: bool # Was any ``.nim`` module `importdoc`ed ? + + PRstSharedState* = ref RstSharedState + ManualAnchor = object + alias: string # a (short) name that can substitute the `anchor` + anchor: string # anchor = id = refname + info: TLineInfo RstParser = object of RootObj idx*: int tok*: TokenSeq - s*: PSharedState + s*: PRstSharedState indentStack*: seq[int] - filename*: string - line*, col*: int - hasToc*: bool + line*, col*: int ## initial line/column of whole text or + ## documenation fragment that will be added + ## in case of error/warning reporting to + ## (relative) line/column of the token. + curAnchors*: seq[ManualAnchor] + ## seq to accumulate aliases for anchors: + ## because RST can have >1 alias per 1 anchor EParseError* = object of ValueError + SectionParser = proc (p: var RstParser): PRstNode {.nimcall, gcsafe.} + +const + LineRstInit* = 1 ## Initial line number for standalone RST text + ColRstInit* = 0 ## Initial column number for standalone RST text + ## (Nim global reporting adds ColOffset=1) + ColRstOffset* = 1 ## 1: a replica of ColOffset for internal use + +template currentTok(p: RstParser): Token = p.tok[p.idx] +template prevTok(p: RstParser): Token = p.tok[p.idx - 1] +template nextTok(p: RstParser): Token = p.tok[p.idx + 1] proc whichMsgClass*(k: MsgKind): MsgClass = ## returns which message class `k` belongs to. - case ($k)[1] + case k.symbolName[1] of 'e', 'E': result = mcError of 'w', 'W': result = mcWarning of 'h', 'H': result = mcHint @@ -307,8 +467,10 @@ proc whichMsgClass*(k: MsgKind): MsgClass = proc defaultMsgHandler*(filename: string, line, col: int, msgkind: MsgKind, arg: string) = let mc = msgkind.whichMsgClass - let a = messages[msgkind] % arg - let message = "$1($2, $3) $4: $5" % [filename, $line, $col, $mc, a] + let a = $msgkind % arg + var message: string + toLocation(message, filename, line, col + ColRstOffset) + message.add " $1: $2" % [$mc, a] if mc == mcError: raise newException(EParseError, message) else: writeLine(stdout, message) @@ -316,90 +478,262 @@ proc defaultFindFile*(filename: string): string = if fileExists(filename): result = filename else: result = "" -proc newSharedState(options: RstParseOptions, - findFile: FindFileHandler, - msgHandler: MsgHandler): PSharedState = - new(result) - result.subs = @[] - result.refs = @[] - result.options = options - result.msgHandler = if not isNil(msgHandler): msgHandler else: defaultMsgHandler - result.findFile = if not isNil(findFile): findFile else: defaultFindFile +proc defaultFindRefFile*(filename: string): (string, string) = + (filename, "") + +proc defaultRole(options: RstParseOptions): string = + if roNimFile in options: "nim" else: "literal" + +proc whichRoleAux(sym: string): RstNodeKind = + let r = sym.toLowerAscii + case r + of "idx": result = rnIdx + of "literal": result = rnInlineLiteral + of "strong": result = rnStrongEmphasis + of "emphasis": result = rnEmphasis + of "sub", "subscript": result = rnSub + of "sup", "superscript": result = rnSup + # literal and code are the same in our implementation + of "code": result = rnInlineLiteral + of "program", "option", "tok": result = rnCodeFragment + # c++ currently can be spelled only as cpp, c# only as csharp + elif getSourceLanguage(r) != langNone: + result = rnInlineCode + else: # unknown role + result = rnUnknownRole + +proc len(filenames: RstFileTable): int = filenames.idxToFilename.len + +proc addFilename*(s: PRstSharedState, file1: string): FileIndex = + ## Returns index of filename, adding it if it has not been used before + let nextIdx = s.filenames.len.FileIndex + result = getOrDefault(s.filenames.filenameToIdx, file1, default = nextIdx) + if result == nextIdx: + s.filenames.filenameToIdx[file1] = result + s.filenames.idxToFilename.add file1 + +proc setCurrFilename*(s: PRstSharedState, file1: string) = + s.currFileIdx = addFilename(s, file1) + +proc getFilename(filenames: RstFileTable, fid: FileIndex): string = + doAssert(0 <= fid.int and fid.int < filenames.len, + "incorrect FileIndex $1 (range 0..$2)" % [ + $fid.int, $(filenames.len - 1)]) + result = filenames.idxToFilename[fid.int] + +proc getFilename(s: PRstSharedState, subst: AnchorSubst): string = + getFilename(s.filenames, subst.info.fileIndex) + +proc getModule(s: PRstSharedState, subst: AnchorSubst): string = + result = getFilename(s.filenames, subst.module) + +proc currFilename(s: PRstSharedState): string = + getFilename(s.filenames, s.currFileIdx) + +proc newRstSharedState*(options: RstParseOptions, + filename: string, + findFile: FindFileHandler, + findRefFile: FindRefFileHandler, + msgHandler: MsgHandler, + hasToc: bool): PRstSharedState = + let r = defaultRole(options) + result = PRstSharedState( + currRole: r, + currRoleKind: whichRoleAux(r), + options: options, + msgHandler: if not isNil(msgHandler): msgHandler else: defaultMsgHandler, + findFile: if not isNil(findFile): findFile else: defaultFindFile, + findRefFile: + if not isNil(findRefFile): findRefFile + else: defaultFindRefFile, + hasToc: hasToc + ) + setCurrFilename(result, filename) + +proc curLine(p: RstParser): int = p.line + currentTok(p).line proc findRelativeFile(p: RstParser; filename: string): string = - result = p.filename.splitFile.dir / filename + result = p.s.currFilename.splitFile.dir / filename if not fileExists(result): result = p.s.findFile(filename) proc rstMessage(p: RstParser, msgKind: MsgKind, arg: string) = - p.s.msgHandler(p.filename, p.line + p.tok[p.idx].line, - p.col + p.tok[p.idx].col, msgKind, arg) + p.s.msgHandler(p.s.currFilename, curLine(p), + p.col + currentTok(p).col, msgKind, arg) + +proc rstMessage(s: PRstSharedState, msgKind: MsgKind, arg: string) = + s.msgHandler(s.currFilename, LineRstInit, ColRstInit, msgKind, arg) + +proc rstMessage(s: PRstSharedState, msgKind: MsgKind, arg: string; + line, col: int) = + s.msgHandler(s.currFilename, line, col, msgKind, arg) + +proc rstMessage(s: PRstSharedState, filename: string, msgKind: MsgKind, + arg: string) = + s.msgHandler(filename, LineRstInit, ColRstInit, msgKind, arg) + +proc rstMessage*(filenames: RstFileTable, f: MsgHandler, + info: TLineInfo, msgKind: MsgKind, arg: string) = + ## Print warnings using `info`, i.e. in 2nd-pass warnings for + ## footnotes/substitutions/references or from ``rstgen.nim``. + let file = getFilename(filenames, info.fileIndex) + f(file, info.line.int, info.col.int, msgKind, arg) proc rstMessage(p: RstParser, msgKind: MsgKind, arg: string, line, col: int) = - p.s.msgHandler(p.filename, p.line + line, + p.s.msgHandler(p.s.currFilename, p.line + line, p.col + col, msgKind, arg) proc rstMessage(p: RstParser, msgKind: MsgKind) = - p.s.msgHandler(p.filename, p.line + p.tok[p.idx].line, - p.col + p.tok[p.idx].col, msgKind, - p.tok[p.idx].symbol) + p.s.msgHandler(p.s.currFilename, curLine(p), + p.col + currentTok(p).col, msgKind, + currentTok(p).symbol) + +# Functions `isPureRst` & `stopOrWarn` address differences between +# Markdown and RST: +# * Markdown always tries to continue working. If it is really impossible +# to parse a markup element, its proc just returns `nil` and parsing +# continues for it as for normal text paragraph. +# The downside is that real mistakes/typos are often silently ignored. +# The same applies to legacy `RstMarkdown` mode for nimforum. +# * RST really signals errors. The downside is that it's more intrusive - +# the user must escape special syntax with \ explicitly. +# +# TODO: we need to apply this strategy to all markup elements eventually. + +func isPureRst(p: RstParser): bool = roSupportMarkdown notin p.s.options +func isRst(p: RstParser): bool = roPreferMarkdown notin p.s.options +func isMd(p: RstParser): bool = roPreferMarkdown in p.s.options +func isMd(s: PRstSharedState): bool = roPreferMarkdown in s.options + +proc stopOrWarn(p: RstParser, errorType: MsgKind, arg: string) = + let realMsgKind = if isPureRst(p): errorType else: mwRstStyle + rstMessage(p, realMsgKind, arg) + +proc stopOrWarn(p: RstParser, errorType: MsgKind, arg: string, line, col: int) = + let realMsgKind = if isPureRst(p): errorType else: mwRstStyle + rstMessage(p, realMsgKind, arg, line, col) proc currInd(p: RstParser): int = result = p.indentStack[high(p.indentStack)] proc pushInd(p: var RstParser, ind: int) = - add(p.indentStack, ind) + p.indentStack.add(ind) proc popInd(p: var RstParser) = - if len(p.indentStack) > 1: setLen(p.indentStack, len(p.indentStack) - 1) + if p.indentStack.len > 1: setLen(p.indentStack, p.indentStack.len - 1) + +# Working with indentation in rst.nim +# ----------------------------------- +# +# Every line break has an associated tkIndent. +# The tokenizer writes back the first column of next non-blank line +# in all preceeding tkIndent tokens to the `ival` field of tkIndent. +# +# RST document is separated into body elements (B.E.), every of which +# has a dedicated handler proc (or block of logic when B.E. is a block quote) +# that should follow the next rule: +# Every B.E. handler proc should finish at tkIndent (newline) +# after its B.E. finishes. +# Then its callers (which is `parseSection` or another B.E. handler) +# check for tkIndent ival (without necessity to advance `p.idx`) +# and decide themselves whether they continue processing or also stop. +# +# An example:: +# +# L RST text fragment indentation +# +--------------------+ +# 1 | | <- (empty line at the start of file) no tokens +# 2 |First paragraph. | <- tkIndent has ival=0, and next tkWord has col=0 +# 3 | | <- tkIndent has ival=0 +# 4 |* bullet item and | <- tkIndent has ival=0, and next tkPunct has col=0 +# 5 | its continuation | <- tkIndent has ival=2, and next tkWord has col=2 +# 6 | | <- tkIndent has ival=4 +# 7 | Block quote | <- tkIndent has ival=4, and next tkWord has col=4 +# 8 | | <- tkIndent has ival=0 +# 9 | | <- tkIndent has ival=0 +# 10|Final paragraph | <- tkIndent has ival=0, and tkWord has col=0 +# +--------------------+ +# C:01234 +# +# Here parser starts with initial `indentStack=[0]` and then calls the +# 1st `parseSection`: +# +# - `parseSection` calls `parseParagraph` and "First paragraph" is parsed +# - bullet list handler is started at reaching ``*`` (L4 C0), it +# starts bullet item logic (L4 C2), which calls `pushInd(p, ind=2)`, +# then calls `parseSection` (2nd call, nested) which parses +# paragraph "bullet list and its continuation" and then starts +# a block quote logic (L7 C4). +# The block quote logic calls calls `pushInd(p, ind=4)` and +# calls `parseSection` again, so a (simplified) sequence of calls now is:: +# +# parseSection -> parseBulletList -> +# parseSection (+block quote logic) -> parseSection +# +# 3rd `parseSection` finishes, block quote logic calls `popInd(p)`, +# it returns to bullet item logic, which sees that next tkIndent has +# ival=0 and stops there since the required indentation for a bullet item +# is 2 and 0<2; the bullet item logic calls `popInd(p)`. +# Then bullet list handler checks that next tkWord (L10 C0) has the +# right indentation but does not have ``*`` so stops at tkIndent (L10). +# - 1st `parseSection` invocation calls `parseParagraph` and the +# "Final paragraph" is parsed. +# +# If a B.E. handler has advanced `p.idx` past tkIndent to check +# whether it should continue its processing or not, and decided not to, +# then this B.E. handler should step back (e.g. do `dec p.idx`). -proc initParser(p: var RstParser, sharedState: PSharedState) = +proc initParser(p: var RstParser, sharedState: PRstSharedState) = p.indentStack = @[0] p.tok = @[] p.idx = 0 - p.filename = "" - p.hasToc = false - p.col = 0 - p.line = 1 + p.col = ColRstInit + p.line = LineRstInit p.s = sharedState proc addNodesAux(n: PRstNode, result: var string) = + if n == nil: + return if n.kind == rnLeaf: - add(result, n.text) + result.add(n.text) else: - for i in countup(0, len(n) - 1): addNodesAux(n.sons[i], result) + for i in 0 ..< n.len: addNodesAux(n.sons[i], result) proc addNodes(n: PRstNode): string = - result = "" - addNodesAux(n, result) + n.addNodesAux(result) + +proc linkName(n: PRstNode): string = + ## Returns a normalized reference name, see: + ## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#reference-names + n.addNodes.toLowerAscii proc rstnodeToRefnameAux(n: PRstNode, r: var string, b: var bool) = template special(s) = if b: - add(r, '-') + r.add('-') b = false - add(r, s) + r.add(s) if n == nil: return if n.kind == rnLeaf: - for i in countup(0, len(n.text) - 1): + for i in 0 ..< n.text.len: case n.text[i] of '0'..'9': if b: - add(r, '-') + r.add('-') b = false - if len(r) == 0: add(r, 'Z') - add(r, n.text[i]) + if r.len == 0: r.add('Z') + r.add(n.text[i]) of 'a'..'z', '\128'..'\255': if b: - add(r, '-') + r.add('-') b = false - add(r, n.text[i]) + r.add(n.text[i]) of 'A'..'Z': if b: - add(r, '-') + r.add('-') b = false - add(r, chr(ord(n.text[i]) - ord('A') + ord('a'))) + r.add(chr(ord(n.text[i]) - ord('A') + ord('a'))) of '$': special "dollar" of '%': special "percent" of '&': special "amp" @@ -420,128 +754,446 @@ proc rstnodeToRefnameAux(n: PRstNode, r: var string, b: var bool) = of '@': special "at" of '|': special "bar" else: - if len(r) > 0: b = true + if r.len > 0: b = true else: - for i in countup(0, len(n) - 1): rstnodeToRefnameAux(n.sons[i], r, b) + for i in 0 ..< n.len: rstnodeToRefnameAux(n.sons[i], r, b) proc rstnodeToRefname(n: PRstNode): string = - result = "" var b = false rstnodeToRefnameAux(n, result, b) -proc findSub(p: var RstParser, n: PRstNode): int = +proc findSub(s: PRstSharedState, n: PRstNode): int = var key = addNodes(n) # the spec says: if no exact match, try one without case distinction: - for i in countup(0, high(p.s.subs)): - if key == p.s.subs[i].key: + for i in countup(0, high(s.subs)): + if key == s.subs[i].key: return i - for i in countup(0, high(p.s.subs)): - if cmpIgnoreStyle(key, p.s.subs[i].key) == 0: + for i in countup(0, high(s.subs)): + if cmpIgnoreStyle(key, s.subs[i].key) == 0: return i result = -1 +proc lineInfo(p: RstParser, iTok: int): TLineInfo = + result.col = int16(p.col + p.tok[iTok].col) + result.line = uint16(p.line + p.tok[iTok].line) + result.fileIndex = p.s.currFileIdx + +proc lineInfo(p: RstParser): TLineInfo = lineInfo(p, p.idx) +# TODO: we need this simplification because we don't preserve exact starting +# token of currently parsed element: +proc prevLineInfo(p: RstParser): TLineInfo = lineInfo(p, p.idx-1) + proc setSub(p: var RstParser, key: string, value: PRstNode) = - var length = len(p.s.subs) - for i in countup(0, length - 1): + var length = p.s.subs.len + for i in 0 ..< length: if key == p.s.subs[i].key: p.s.subs[i].value = value return - setLen(p.s.subs, length + 1) - p.s.subs[length].key = key - p.s.subs[length].value = value + p.s.subs.add(Substitution(key: key, value: value, info: prevLineInfo(p))) -proc setRef(p: var RstParser, key: string, value: PRstNode) = - var length = len(p.s.refs) - for i in countup(0, length - 1): +proc setRef(p: var RstParser, key: string, value: PRstNode, + refType: SubstitutionKind) = + var length = p.s.refs.len + for i in 0 ..< length: if key == p.s.refs[i].key: if p.s.refs[i].value.addNodes != value.addNodes: rstMessage(p, mwRedefinitionOfLabel, key) - p.s.refs[i].value = value return - setLen(p.s.refs, length + 1) - p.s.refs[length].key = key - p.s.refs[length].value = value + p.s.refs.add(Substitution(kind: refType, key: key, value: value, + info: prevLineInfo(p))) + +proc findRef(s: PRstSharedState, key: string): seq[Substitution] = + for i in countup(0, high(s.refs)): + if key == s.refs[i].key: + result.add s.refs[i] + +# Ambiguity in links: we don't follow procedure of removing implicit targets +# defined in https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#implicit-hyperlink-targets +# Instead we just give explicit links a higher priority than to implicit ones +# and report ambiguities as warnings. Hopefully it is easy to remove +# ambiguities manually. Nim auto-generated links from ``docgen.nim`` +# have lowest priority: 1 (for procs) and below for other symbol types. + +proc refPriority(k: SubstitutionKind): int = + case k + of rstSubstitution: result = 8 + of hyperlinkAlias: result = 7 + of implicitHyperlinkAlias: result = 2 + +proc internalRefPriority(k: RstAnchorKind): int = + case k + of manualDirectiveAnchor: result = 6 + of manualInlineAnchor: result = 5 + of footnoteAnchor: result = 4 + of headlineAnchor: result = 3 + +proc `$`(subst: AnchorSubst): string = # for debug + let s = + case subst.kind + of arInternalRst: "type=" & $subst.anchorType + of arExternalRst: "type=" & $subst.anchorTypeExt + of arNim: "langsym=" & $subst.langSym + result = "(kind=$1, priority=$2, $3)" % [$subst.kind, $subst.priority, s] + +proc addAnchorRst(p: var RstParser, name: string, target: PRstNode, + anchorType: RstAnchorKind) = + ## Associates node `target` (which has field `anchor`) with an + ## alias `name` and updates the corresponding aliases in `p.curAnchors`. + let prio = internalRefPriority(anchorType) + for a in p.curAnchors: + p.s.anchors.mgetOrPut(a.alias, newSeq[AnchorSubst]()).add( + AnchorSubst(kind: arInternalRst, target: target, priority: prio, + info: a.info, anchorType: manualDirectiveAnchor)) + if name != "": + p.s.anchors.mgetOrPut(name, newSeq[AnchorSubst]()).add( + AnchorSubst(kind: arInternalRst, target: target, priority: prio, + info: prevLineInfo(p), anchorType: anchorType)) + p.curAnchors.setLen 0 + +proc addAnchorExtRst(s: var PRstSharedState, key: string, refn: string, + anchorType: RstAnchorKind, info: TLineInfo) = + let name = key.toLowerAscii + let prio = internalRefPriority(anchorType) + s.anchors.mgetOrPut(name, newSeq[AnchorSubst]()).add( + AnchorSubst(kind: arExternalRst, refnameExt: refn, priority: prio, + info: info, + anchorTypeExt: anchorType)) + +proc addAnchorNim*(s: var PRstSharedState, external: bool, refn: string, tooltip: string, + langSym: LangSymbol, priority: int, + info: TLineInfo, module: FileIndex) = + ## Adds an anchor `refn`, which follows + ## the rule `arNim` (i.e. a symbol in ``*.nim`` file) + s.anchors.mgetOrPut(langSym.name, newSeq[AnchorSubst]()).add( + AnchorSubst(kind: arNim, external: external, refname: refn, langSym: langSym, + tooltip: tooltip, priority: priority, + info: info)) + +proc findMainAnchorNim(s: PRstSharedState, signature: PRstNode, + info: TLineInfo): + seq[AnchorSubst] = + var langSym: LangSymbol + try: + langSym = toLangSymbol(signature) + except ValueError: # parsing failed, not a Nim symbol + return + let substitutions = s.anchors.getOrDefault(langSym.name, + newSeq[AnchorSubst]()) + if substitutions.len == 0: + return + # logic to select only groups instead of concrete symbols + # with overloads, note that the same symbol can be defined + # in multiple modules and `importdoc`ed: + type GroupKey = tuple[symKind: string, origModule: string] + # map (symKind, file) (like "proc", "os.nim") -> found symbols/groups: + var found: Table[GroupKey, seq[AnchorSubst]] + for subst in substitutions: + if subst.kind == arNim: + if match(subst.langSym, langSym): + let key: GroupKey = (subst.langSym.symKind, getModule(s, subst)) + found.mgetOrPut(key, newSeq[AnchorSubst]()).add subst + for key, sList in found: + if sList.len == 1: + result.add sList[0] + else: # > 1, there are overloads, potential ambiguity in this `symKind` + if langSym.parametersProvided: + # there are non-group signatures, select only them + for s in sList: + if not s.langSym.isGroup: + result.add s + else: # when there are many overloads a link like foo_ points to all + # of them, so selecting the group + var foundGroup = false + for s in sList: + if s.langSym.isGroup: + result.add s + foundGroup = true + break + doAssert(foundGroup, + "docgen has not generated the group for $1 (file $2)" % [ + langSym.name, getModule(s, sList[0]) ]) + +proc findMainAnchorRst(s: PRstSharedState, linkText: string, info: TLineInfo): + seq[AnchorSubst] = + let name = linkText.toLowerAscii + let substitutions = s.anchors.getOrDefault(name, newSeq[AnchorSubst]()) + for s in substitutions: + if s.kind in {arInternalRst, arExternalRst}: + result.add s + +proc addFootnoteNumManual(p: var RstParser, num: int) = + ## add manually-numbered footnote + for fnote in p.s.footnotes: + if fnote.number == num: + rstMessage(p, mwRedefinitionOfLabel, $num) + return + p.s.footnotes.add((fnManualNumber, num, -1, -1, $num)) + +proc addFootnoteNumAuto(p: var RstParser, label: string) = + ## add auto-numbered footnote. + ## Empty label [#] means it'll be resolved by the occurrence. + if label == "": # simple auto-numbered [#] + p.s.lineFootnoteNum.add lineInfo(p) + p.s.footnotes.add((fnAutoNumber, -1, p.s.lineFootnoteNum.len, -1, label)) + else: # auto-numbered with label [#label] + for fnote in p.s.footnotes: + if fnote.label == label: + rstMessage(p, mwRedefinitionOfLabel, label) + return + p.s.footnotes.add((fnAutoNumberLabel, -1, -1, -1, label)) + +proc addFootnoteSymAuto(p: var RstParser) = + p.s.lineFootnoteSym.add lineInfo(p) + p.s.footnotes.add((fnAutoSymbol, -1, -1, p.s.lineFootnoteSym.len, "")) + +proc orderFootnotes(s: PRstSharedState) = + ## numerate auto-numbered footnotes taking into account that all + ## manually numbered ones always have preference. + ## Save the result back to `s.footnotes`. + + # Report an error if found any mismatch in number of automatic footnotes + proc listFootnotes(locations: seq[TLineInfo]): string = + var lines: seq[string] + for info in locations: + if s.filenames.len > 1: + let file = getFilename(s.filenames, info.fileIndex) + lines.add file & ":" + else: # no need to add file name here if there is only 1 + lines.add "" + lines[^1].add $info.line + result.add $lines.len & " (lines " & join(lines, ", ") & ")" + if s.lineFootnoteNum.len != s.lineFootnoteNumRef.len: + rstMessage(s, meFootnoteMismatch, + "$1 != $2" % [listFootnotes(s.lineFootnoteNum), + listFootnotes(s.lineFootnoteNumRef)] & + " for auto-numbered footnotes") + if s.lineFootnoteSym.len != s.lineFootnoteSymRef.len: + rstMessage(s, meFootnoteMismatch, + "$1 != $2" % [listFootnotes(s.lineFootnoteSym), + listFootnotes(s.lineFootnoteSymRef)] & + " for auto-symbol footnotes") + + var result: seq[FootnoteSubst] + var manuallyN, autoN, autoSymbol: seq[FootnoteSubst] + for fs in s.footnotes: + if fs.kind == fnManualNumber: manuallyN.add fs + elif fs.kind in {fnAutoNumber, fnAutoNumberLabel}: autoN.add fs + else: autoSymbol.add fs + + if autoN.len == 0: + result = manuallyN + else: + # fill gaps between manually numbered footnotes in ascending order + manuallyN.sort() # sort by number - its first field + var lst = initSinglyLinkedList[FootnoteSubst]() + for elem in manuallyN: lst.append(elem) + var firstAuto = 0 + if lst.head == nil or lst.head.value.number != 1: + # no manual footnote [1], start numeration from 1 for auto-numbered + lst.prepend (autoN[0].kind, 1, autoN[0].autoNumIdx, -1, autoN[0].label) + firstAuto = 1 + var curNode = lst.head + var nextNode: SinglyLinkedNode[FootnoteSubst] + # go simultaneously through `autoN` and `lst` looking for gaps + for (kind, x, autoNumIdx, y, label) in autoN[firstAuto .. ^1]: + while (nextNode = curNode.next; nextNode != nil): + if nextNode.value.number - curNode.value.number > 1: + # gap found, insert new node `n` between curNode and nextNode: + var n = newSinglyLinkedNode((kind, curNode.value.number + 1, + autoNumIdx, -1, label)) + curNode.next = n + n.next = nextNode + curNode = n + break + else: + curNode = nextNode + if nextNode == nil: # no gap found, just append + lst.append (kind, curNode.value.number + 1, autoNumIdx, -1, label) + curNode = lst.tail + result = lst.toSeq + + # we use ASCII symbols instead of those recommended in RST specification: + const footnoteAutoSymbols = ["*", "^", "+", "=", "~", "$", "@", "%", "&"] + for fs in autoSymbol: + # assignment order: *, **, ***, ^, ^^, ^^^, ... &&&, ****, *****, ... + let i = fs.autoSymIdx - 1 + let symbolNum = (i div 3) mod footnoteAutoSymbols.len + let nSymbols = (1 + i mod 3) + 3 * (i div (3 * footnoteAutoSymbols.len)) + let label = footnoteAutoSymbols[symbolNum].repeat(nSymbols) + result.add((fs.kind, -1, -1, fs.autoSymIdx, label)) + + s.footnotes = result + +proc getFootnoteNum(s: PRstSharedState, label: string): int = + ## get number from label. Must be called after `orderFootnotes`. + result = -1 + for fnote in s.footnotes: + if fnote.label == label: + return fnote.number -proc findRef(p: var RstParser, key: string): PRstNode = - for i in countup(0, high(p.s.refs)): - if key == p.s.refs[i].key: - return p.s.refs[i].value +proc getFootnoteNum(s: PRstSharedState, order: int): int = + ## get number from occurrence. Must be called after `orderFootnotes`. + result = -1 + for fnote in s.footnotes: + if fnote.autoNumIdx == order: + return fnote.number + +proc getAutoSymbol(s: PRstSharedState, order: int): string = + ## get symbol from occurrence of auto-symbol footnote. + result = "???" + for fnote in s.footnotes: + if fnote.autoSymIdx == order: + return fnote.label + +proc newRstNodeA(p: var RstParser, kind: RstNodeKind): PRstNode = + ## create node and consume the current anchor + result = newRstNode(kind) + if p.curAnchors.len > 0: + result.anchor = p.curAnchors[0].anchor + addAnchorRst(p, "", result, manualDirectiveAnchor) + +template newLeaf(s: string): PRstNode = newRstLeaf(s) proc newLeaf(p: var RstParser): PRstNode = - result = newRstNode(rnLeaf, p.tok[p.idx].symbol) + result = newLeaf(currentTok(p).symbol) + +proc validRefnamePunct(x: string): bool = + ## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#reference-names + x.len == 1 and x[0] in {'-', '_', '.', ':', '+'} + +func getRefnameIdx(p: RstParser, startIdx: int): int = + ## Gets last token index of a refname ("word" in RST terminology): + ## + ## reference names are single words consisting of alphanumerics plus + ## isolated (no two adjacent) internal hyphens, underscores, periods, + ## colons and plus signs; no whitespace or other characters are allowed. + ## + ## Refnames are used for: + ## - reference names + ## - role names + ## - directive names + ## - footnote labels + ## + # TODO: use this func in all other relevant places + var j = startIdx + if p.tok[j].kind == tkWord: + inc j + while p.tok[j].kind == tkPunct and validRefnamePunct(p.tok[j].symbol) and + p.tok[j+1].kind == tkWord: + inc j, 2 + result = j - 1 + +func getRefname(p: RstParser, startIdx: int): (string, int) = + let lastIdx = getRefnameIdx(p, startIdx) + result[1] = lastIdx + for j in startIdx..lastIdx: + result[0].add p.tok[j].symbol proc getReferenceName(p: var RstParser, endStr: string): PRstNode = var res = newRstNode(rnInner) while true: - case p.tok[p.idx].kind + case currentTok(p).kind of tkWord, tkOther, tkWhite: - add(res, newLeaf(p)) + res.add(newLeaf(p)) of tkPunct: - if p.tok[p.idx].symbol == endStr: - inc(p.idx) + if currentTok(p).symbol == endStr: + inc p.idx break else: - add(res, newLeaf(p)) + res.add(newLeaf(p)) else: rstMessage(p, meExpected, endStr) break - inc(p.idx) + inc p.idx result = res proc untilEol(p: var RstParser): PRstNode = result = newRstNode(rnInner) - while not (p.tok[p.idx].kind in {tkIndent, tkEof}): - add(result, newLeaf(p)) - inc(p.idx) + while currentTok(p).kind notin {tkIndent, tkEof}: + result.add(newLeaf(p)) + inc p.idx proc expect(p: var RstParser, tok: string) = - if p.tok[p.idx].symbol == tok: inc(p.idx) + if currentTok(p).symbol == tok: inc p.idx else: rstMessage(p, meExpected, tok) -proc isInlineMarkupEnd(p: RstParser, markup: string): bool = - result = p.tok[p.idx].symbol == markup - if not result: - return # Rule 3: - result = not (p.tok[p.idx - 1].kind in {tkIndent, tkWhite}) - if not result: - return # Rule 4: - result = (p.tok[p.idx + 1].kind in {tkIndent, tkWhite, tkEof}) or - (p.tok[p.idx + 1].symbol[0] in - {'\'', '\"', ')', ']', '}', '>', '-', '/', '\\', ':', '.', ',', ';', '!', - '?', '_'}) - if not result: - return # Rule 7: +proc inlineMarkdownEnd(p: RstParser): bool = + result = prevTok(p).kind notin {tkIndent, tkWhite} + ## (For a special case of ` we don't allow spaces surrounding it + ## unlike original Markdown because this behavior confusing/useless) + +proc inlineRstEnd(p: RstParser): bool = + # rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules + # Rule 2: + result = prevTok(p).kind notin {tkIndent, tkWhite} + if not result: return + # Rule 7: + result = nextTok(p).kind in {tkIndent, tkWhite, tkEof} or + nextTok(p).symbol[0] in + {'\'', '\"', ')', ']', '}', '>', '-', '/', '\\', ':', '.', ',', ';', '!', '?', '_'} + +proc isInlineMarkupEnd(p: RstParser, markup: string, exact: bool): bool = + if exact: + result = currentTok(p).symbol == markup + else: + result = currentTok(p).symbol.endsWith markup + if (not result) and markup == "``": + # check that escaping may have splitted `` to 2 tokens ` and ` + result = currentTok(p).symbol == "`" and prevTok(p).symbol == "`" + if not result: return + # surroundings check + if markup in ["_", "__"]: + result = inlineRstEnd(p) + else: + if roPreferMarkdown in p.s.options: result = inlineMarkdownEnd(p) + else: result = inlineRstEnd(p) + +proc rstRuleSurround(p: RstParser): bool = + result = true + # Rules 4 & 5: if p.idx > 0: - if (markup != "``") and (p.tok[p.idx - 1].symbol == "\\"): - result = false + var d: char + var c = prevTok(p).symbol[0] + case c + of '\'', '\"': d = c + of '(': d = ')' + of '[': d = ']' + of '{': d = '}' + of '<': d = '>' + else: d = '\0' + if d != '\0': result = nextTok(p).symbol[0] != d + +proc inlineMarkdownStart(p: RstParser): bool = + result = nextTok(p).kind notin {tkIndent, tkWhite, tkEof} + if not result: return + # this rst rule is really nice, let us use it in Markdown mode too. + result = rstRuleSurround(p) + +proc inlineRstStart(p: RstParser): bool = + ## rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules + # Rule 6 + result = p.idx == 0 or prevTok(p).kind in {tkIndent, tkWhite} or + prevTok(p).symbol[0] in {'\'', '\"', '(', '[', '{', '<', '-', '/', ':', '_'} + if not result: return + # Rule 1: + result = nextTok(p).kind notin {tkIndent, tkWhite, tkEof} + if not result: return + result = rstRuleSurround(p) proc isInlineMarkupStart(p: RstParser, markup: string): bool = - var d: char - result = p.tok[p.idx].symbol == markup - if not result: - return # Rule 1: - result = (p.idx == 0) or (p.tok[p.idx - 1].kind in {tkIndent, tkWhite}) or - (p.tok[p.idx - 1].symbol[0] in - {'\'', '\"', '(', '[', '{', '<', '-', '/', ':', '_'}) - if not result: - return # Rule 2: - result = not (p.tok[p.idx + 1].kind in {tkIndent, tkWhite, tkEof}) - if not result: - return # Rule 5 & 7: - if p.idx > 0: - if p.tok[p.idx - 1].symbol == "\\": - result = false - else: - var c = p.tok[p.idx - 1].symbol[0] - case c - of '\'', '\"': d = c - of '(': d = ')' - of '[': d = ']' - of '{': d = '}' - of '<': d = '>' - else: d = '\0' - if d != '\0': result = p.tok[p.idx + 1].symbol[0] != d + if markup != "_`": + result = currentTok(p).symbol == markup + else: # _` is a 2 token case + result = currentTok(p).symbol == "_" and nextTok(p).symbol == "`" + if not result: return + # surroundings check + if markup in ["_", "__", "[", "|"]: + # Note: we require space/punctuation even before [markdown link](...) + result = inlineRstStart(p) + else: + if roPreferMarkdown in p.s.options: result = inlineMarkdownStart(p) + else: result = inlineRstStart(p) proc match(p: RstParser, start: int, expr: string): bool = # regular expressions are: @@ -550,97 +1202,159 @@ proc match(p: RstParser, start: int, expr: string): bool = # ' ' tkWhite # 'a' tkAdornment # 'i' tkIndent + # 'I' tkIndent or tkEof # 'p' tkPunct # 'T' always true # 'E' whitespace, indent or eof - # 'e' tkWord or '#' (for enumeration lists) + # 'e' any enumeration sequence or '#' (for enumeration lists) + # 'x' a..z or '#' (for enumeration lists) + # 'n' 0..9 or '#' (for enumeration lists) var i = 0 var j = start - var last = len(expr) - 1 + var last = expr.len - 1 while i <= last: case expr[i] - of 'w': result = p.tok[j].kind == tkWord + of 'w': + let lastIdx = getRefnameIdx(p, j) + result = lastIdx >= j + if result: j = lastIdx of ' ': result = p.tok[j].kind == tkWhite of 'i': result = p.tok[j].kind == tkIndent + of 'I': result = p.tok[j].kind in {tkIndent, tkEof} of 'p': result = p.tok[j].kind == tkPunct of 'a': result = p.tok[j].kind == tkAdornment of 'o': result = p.tok[j].kind == tkOther of 'T': result = true of 'E': result = p.tok[j].kind in {tkEof, tkWhite, tkIndent} - of 'e': - result = (p.tok[j].kind == tkWord) or (p.tok[j].symbol == "#") + of 'e', 'x', 'n': + result = p.tok[j].kind == tkWord or p.tok[j].symbol == "#" if result: case p.tok[j].symbol[0] - of 'a'..'z', 'A'..'Z', '#': result = len(p.tok[j].symbol) == 1 - of '0'..'9': result = allCharsInSet(p.tok[j].symbol, {'0'..'9'}) + of '#': result = true + of 'a'..'z', 'A'..'Z': + result = expr[i] in {'e', 'x'} and p.tok[j].symbol.len == 1 + of '0'..'9': + result = expr[i] in {'e', 'n'} and + allCharsInSet(p.tok[j].symbol, {'0'..'9'}) else: result = false else: var c = expr[i] var length = 0 - while (i <= last) and (expr[i] == c): - inc(i) - inc(length) - dec(i) - result = (p.tok[j].kind in {tkPunct, tkAdornment}) and - (len(p.tok[j].symbol) == length) and (p.tok[j].symbol[0] == c) + while i <= last and expr[i] == c: + inc i + inc length + dec i + result = p.tok[j].kind in {tkPunct, tkAdornment} and + p.tok[j].symbol.len == length and p.tok[j].symbol[0] == c if not result: return - inc(j) - inc(i) + inc j + inc i result = true -proc fixupEmbeddedRef(n, a, b: PRstNode) = +proc safeProtocol*(linkStr: var string): string = + # Returns link's protocol and, if it's not safe, clears `linkStr` + result = "" + if scanf(linkStr, "$w:", result): + # if it has a protocol at all, ensure that it's not 'javascript:' or worse: + if cmpIgnoreCase(result, "http") == 0 or + cmpIgnoreCase(result, "https") == 0 or + cmpIgnoreCase(result, "ftp") == 0: + discard "it's fine" + else: + linkStr = "" + +proc fixupEmbeddedRef(p: var RstParser, n, a, b: PRstNode): bool = + # Returns `true` if the link belongs to an allowed protocol var sep = - 1 - for i in countdown(len(n) - 2, 0): + for i in countdown(n.len - 2, 0): if n.sons[i].text == "<": sep = i break - var incr = if (sep > 0) and (n.sons[sep - 1].text[0] == ' '): 2 else: 1 - for i in countup(0, sep - incr): add(a, n.sons[i]) - for i in countup(sep + 1, len(n) - 2): add(b, n.sons[i]) + var incr = if sep > 0 and n.sons[sep - 1].text[0] == ' ': 2 else: 1 + for i in countup(0, sep - incr): a.add(n.sons[i]) + var linkStr = "" + for i in countup(sep + 1, n.len - 2): linkStr.add(n.sons[i].addNodes) + if linkStr != "": + let protocol = safeProtocol(linkStr) + result = linkStr != "" + if not result: + rstMessage(p, mwBrokenLink, protocol, + p.tok[p.idx-3].line, p.tok[p.idx-3].col) + b.add newLeaf(linkStr) + +proc whichRole(p: RstParser, sym: string): RstNodeKind = + result = whichRoleAux(sym) + if result == rnUnknownRole: + rstMessage(p, mwUnsupportedLanguage, sym) + +proc toInlineCode(n: PRstNode, language: string): PRstNode = + ## Creates rnInlineCode and attaches `n` contents as code (in 3rd son). + result = newRstNode(rnInlineCode, info=n.info) + let args = newRstNode(rnDirArg) + var lang = language + if language == "cpp": lang = "c++" + elif language == "csharp": lang = "c#" + args.add newLeaf(lang) + result.add args + result.add PRstNode(nil) + var lb = newRstNode(rnLiteralBlock) + var s: string + for i in n.sons: + assert i.kind == rnLeaf + s.add i.text + lb.add newLeaf(s) + result.add lb + +proc toOtherRole(n: PRstNode, kind: RstNodeKind, roleName: string): PRstNode = + let newN = newRstNode(rnInner, n.sons) + let newSons = @[newN, newLeaf(roleName)] + result = newRstNode(kind, newSons) proc parsePostfix(p: var RstParser, n: PRstNode): PRstNode = - result = n - if isInlineMarkupEnd(p, "_") or isInlineMarkupEnd(p, "__"): - inc(p.idx) + ## Finalizes node `n` that was tentatively determined as interpreted text. + var newKind = n.kind + var newSons = n.sons + + proc finalizeInterpreted(node: PRstNode, newKind: RstNodeKind, + newSons: seq[PRstNode], roleName: string): + PRstNode {.nimcall.} = + # fixes interpreted text (`x` or `y`:role:) to proper internal AST format + if newKind in {rnUnknownRole, rnCodeFragment}: + result = node.toOtherRole(newKind, roleName) + elif newKind == rnInlineCode: + result = node.toInlineCode(language=roleName) + else: + result = newRstNode(newKind, newSons) + + if isInlineMarkupEnd(p, "_", exact=true) or + isInlineMarkupEnd(p, "__", exact=true): + inc p.idx if p.tok[p.idx-2].symbol == "`" and p.tok[p.idx-3].symbol == ">": var a = newRstNode(rnInner) var b = newRstNode(rnInner) - fixupEmbeddedRef(n, a, b) - if len(a) == 0: - result = newRstNode(rnStandaloneHyperlink) - add(result, b) - else: - result = newRstNode(rnHyperlink) - add(result, a) - add(result, b) - setRef(p, rstnodeToRefname(a), b) - elif n.kind == rnInterpretedText: - n.kind = rnRef - else: - result = newRstNode(rnRef) - add(result, n) + if fixupEmbeddedRef(p, n, a, b): + if a.len == 0: # e.g. `<a_named_relative_link>`_ + newKind = rnStandaloneHyperlink + newSons = @[b] + else: # e.g. `link title <http://site>`_ + newKind = rnHyperlink + newSons = @[a, b] + setRef(p, rstnodeToRefname(a), b, implicitHyperlinkAlias) + else: # include as plain text, not a link + newKind = rnInner + newSons = n.sons + result = newRstNode(newKind, newSons) + else: # some link that will be resolved in `resolveSubs` + newKind = rnRstRef + result = newRstNode(newKind, sons=newSons, info=n.info) elif match(p, p.idx, ":w:"): # a role: - if p.tok[p.idx + 1].symbol == "idx": - n.kind = rnIdx - elif p.tok[p.idx + 1].symbol == "literal": - n.kind = rnInlineLiteral - elif p.tok[p.idx + 1].symbol == "strong": - n.kind = rnStrongEmphasis - elif p.tok[p.idx + 1].symbol == "emphasis": - n.kind = rnEmphasis - elif (p.tok[p.idx + 1].symbol == "sub") or - (p.tok[p.idx + 1].symbol == "subscript"): - n.kind = rnSub - elif (p.tok[p.idx + 1].symbol == "sup") or - (p.tok[p.idx + 1].symbol == "supscript"): - n.kind = rnSup - else: - result = newRstNode(rnGeneralRole) - n.kind = rnInner - add(result, n) - add(result, newRstNode(rnLeaf, p.tok[p.idx + 1].symbol)) - inc(p.idx, 3) + let (roleName, lastIdx) = getRefname(p, p.idx+1) + newKind = whichRole(p, roleName) + result = n.finalizeInterpreted(newKind, newSons, roleName) + p.idx = lastIdx + 2 + else: + result = n.finalizeInterpreted(p.s.currRoleKind, newSons, p.s.currRole) proc matchVerbatim(p: RstParser, start: int, expr: string): int = result = start @@ -652,7 +1366,7 @@ proc matchVerbatim(p: RstParser, start: int, expr: string): int = if j < expr.len: result = 0 proc parseSmiley(p: var RstParser): PRstNode = - if p.tok[p.idx].symbol[0] notin SmileyStartChars: return + if currentTok(p).symbol[0] notin SmileyStartChars: return for key, val in items(Smilies): let m = matchVerbatim(p, p.idx, key) if m > 0: @@ -661,306 +1375,683 @@ proc parseSmiley(p: var RstParser): PRstNode = result.text = val return -when false: - const - urlChars = {'A'..'Z', 'a'..'z', '0'..'9', ':', '#', '@', '%', '/', ';', - '$', '(', ')', '~', '_', '?', '+', '-', '=', '\\', '.', '&', - '\128'..'\255'} - proc isUrl(p: RstParser, i: int): bool = - result = (p.tok[i+1].symbol == ":") and (p.tok[i+2].symbol == "//") and - (p.tok[i+3].kind == tkWord) and - (p.tok[i].symbol in ["http", "https", "ftp", "telnet", "file"]) - -proc parseUrl(p: var RstParser, father: PRstNode) = - #if p.tok[p.idx].symbol[strStart] == '<': - if isUrl(p, p.idx): - var n = newRstNode(rnStandaloneHyperlink) - while true: - case p.tok[p.idx].kind - of tkWord, tkAdornment, tkOther: discard - of tkPunct: - if p.tok[p.idx+1].kind notin {tkWord, tkAdornment, tkOther, tkPunct}: + result = p.tok[i+1].symbol == ":" and p.tok[i+2].symbol == "//" and + p.tok[i+3].kind == tkWord and + p.tok[i].symbol in ["http", "https", "ftp", "telnet", "file"] + +proc checkParen(token: Token, parensStack: var seq[char]): bool {.inline.} = + ## Returns `true` iff `token` is a closing parenthesis for some + ## previous opening parenthesis saved in `parensStack`. + ## This is according Markdown balanced parentheses rule + ## (https://spec.commonmark.org/0.29/#link-destination) + ## to allow links like + ## https://en.wikipedia.org/wiki/APL_(programming_language), + ## we use it for RST also. + result = false + if token.kind == tkPunct: + let c = token.symbol[0] + if c in {'(', '[', '{'}: # push + parensStack.add c + elif c in {')', ']', '}'}: # try pop + # a case like ([) inside a link is allowed and [ is also `pop`ed: + for i in countdown(parensStack.len - 1, 0): + if (parensStack[i] == '(' and c == ')' or + parensStack[i] == '[' and c == ']' or + parensStack[i] == '{' and c == '}'): + parensStack.setLen i + result = true break - else: break - add(n, newLeaf(p)) - inc(p.idx) - add(father, n) + +proc parseUrl(p: var RstParser): PRstNode = + ## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#standalone-hyperlinks + result = newRstNode(rnStandaloneHyperlink) + var lastIdx = p.idx + var closedParenIdx = p.idx - 1 # for balanced parens rule + var parensStack: seq[char] + while p.tok[lastIdx].kind in {tkWord, tkPunct, tkOther}: + let isClosing = checkParen(p.tok[lastIdx], parensStack) + if isClosing: + closedParenIdx = lastIdx + inc lastIdx + dec lastIdx + # standalone URL can not end with punctuation in RST + while lastIdx > closedParenIdx and p.tok[lastIdx].kind == tkPunct and + p.tok[lastIdx].symbol != "/": + dec lastIdx + var s = "" + for i in p.idx .. lastIdx: s.add p.tok[i].symbol + result.add s + p.idx = lastIdx + 1 + +proc parseWordOrRef(p: var RstParser, father: PRstNode) = + ## Parses a normal word or may be a reference or URL. + if nextTok(p).kind != tkPunct: # <- main path, a normal word + father.add newLeaf(p) + inc p.idx + elif isUrl(p, p.idx): # URL http://something + father.add parseUrl(p) else: - var n = newLeaf(p) - inc(p.idx) - if p.tok[p.idx].symbol == "_": n = parsePostfix(p, n) - add(father, n) + # check for reference (probably, long one like some.ref.with.dots_ ) + var saveIdx = p.idx + var reference: PRstNode = nil + inc p.idx + while currentTok(p).kind in {tkWord, tkPunct}: + if currentTok(p).kind == tkPunct: + if isInlineMarkupEnd(p, "_", exact=true): + reference = newRstNode(rnRstRef, info=lineInfo(p, saveIdx)) + break + if not validRefnamePunct(currentTok(p).symbol): + break + inc p.idx + if reference != nil: + for i in saveIdx..p.idx-1: reference.add newLeaf(p.tok[i].symbol) + father.add reference + inc p.idx # skip final _ + else: # 1 normal word + father.add newLeaf(p.tok[saveIdx].symbol) + p.idx = saveIdx + 1 proc parseBackslash(p: var RstParser, father: PRstNode) = - assert(p.tok[p.idx].kind == tkPunct) - if p.tok[p.idx].symbol == "\\\\": - add(father, newRstNode(rnLeaf, "\\")) - inc(p.idx) - elif p.tok[p.idx].symbol == "\\": + assert(currentTok(p).kind == tkPunct) + if currentTok(p).symbol == "\\": # XXX: Unicode? - inc(p.idx) - if p.tok[p.idx].kind != tkWhite: add(father, newLeaf(p)) - if p.tok[p.idx].kind != tkEof: inc(p.idx) + inc p.idx + if currentTok(p).kind != tkWhite: father.add(newLeaf(p)) + if currentTok(p).kind != tkEof: inc p.idx else: - add(father, newLeaf(p)) - inc(p.idx) - -when false: - proc parseAdhoc(p: var RstParser, father: PRstNode, verbatim: bool) = - if not verbatim and isURL(p, p.idx): - var n = newRstNode(rnStandaloneHyperlink) - while true: - case p.tok[p.idx].kind - of tkWord, tkAdornment, tkOther: nil - of tkPunct: - if p.tok[p.idx+1].kind notin {tkWord, tkAdornment, tkOther, tkPunct}: - break - else: break - add(n, newLeaf(p)) - inc(p.idx) - add(father, n) - elif not verbatim and roSupportSmilies in p.sharedState.options: - let n = parseSmiley(p) - if s != nil: - add(father, n) - else: - var n = newLeaf(p) - inc(p.idx) - if p.tok[p.idx].symbol == "_": n = parsePostfix(p, n) - add(father, n) + father.add(newLeaf(p)) + inc p.idx proc parseUntil(p: var RstParser, father: PRstNode, postfix: string, interpretBackslash: bool) = let - line = p.tok[p.idx].line - col = p.tok[p.idx].col + line = currentTok(p).line + col = currentTok(p).col inc p.idx while true: - case p.tok[p.idx].kind + case currentTok(p).kind of tkPunct: - if isInlineMarkupEnd(p, postfix): - inc(p.idx) + if isInlineMarkupEnd(p, postfix, exact=false): + let l = currentTok(p).symbol.len + if l > postfix.len: + # handle cases like *emphasis with stars****. (It's valid RST!) + father.add newLeaf(currentTok(p).symbol[0 ..< l - postfix.len]) + elif postfix == "``" and currentTok(p).symbol == "`" and + prevTok(p).symbol == "`": + # handle cases like ``literal\`` - delete ` already added after \ + father.sons.setLen(father.sons.len - 1) + inc p.idx break - elif interpretBackslash: - parseBackslash(p, father) else: - add(father, newLeaf(p)) - inc(p.idx) + if postfix == "`": + if currentTok(p).symbol == "\\": + if nextTok(p).symbol == "\\": + father.add newLeaf("\\") + father.add newLeaf("\\") + inc p.idx, 2 + elif nextTok(p).symbol == "`": # escape ` + father.add newLeaf("`") + inc p.idx, 2 + else: + father.add newLeaf("\\") + inc p.idx + else: + father.add(newLeaf(p)) + inc p.idx + else: + if interpretBackslash: + parseBackslash(p, father) + else: + father.add(newLeaf(p)) + inc p.idx of tkAdornment, tkWord, tkOther: - add(father, newLeaf(p)) - inc(p.idx) + father.add(newLeaf(p)) + inc p.idx of tkIndent: - add(father, newRstNode(rnLeaf, " ")) - inc(p.idx) - if p.tok[p.idx].kind == tkIndent: + father.add newLeaf(" ") + inc p.idx + if currentTok(p).kind == tkIndent: rstMessage(p, meExpected, postfix, line, col) break of tkWhite: - add(father, newRstNode(rnLeaf, " ")) - inc(p.idx) + father.add newLeaf(" ") + inc p.idx else: rstMessage(p, meExpected, postfix, line, col) +proc parseMarkdownCodeblockFields(p: var RstParser): PRstNode = + ## Parses additional (after language string) code block parameters + ## in a format *suggested* in the `CommonMark Spec`_ with handling of `"`. + if currentTok(p).kind == tkIndent: + result = nil + else: + result = newRstNode(rnFieldList) + while currentTok(p).kind notin {tkIndent, tkEof}: + if currentTok(p).kind == tkWhite: + inc p.idx + else: + let field = newRstNode(rnField) + var fieldName = "" + while currentTok(p).kind notin {tkWhite, tkIndent, tkEof} and + currentTok(p).symbol != "=": + fieldName.add currentTok(p).symbol + inc p.idx + field.add(newRstNode(rnFieldName, @[newLeaf(fieldName)])) + if currentTok(p).kind == tkWhite: inc p.idx + let fieldBody = newRstNode(rnFieldBody) + if currentTok(p).symbol == "=": + inc p.idx + if currentTok(p).kind == tkWhite: inc p.idx + var fieldValue = "" + if currentTok(p).symbol == "\"": + while true: + fieldValue.add currentTok(p).symbol + inc p.idx + if currentTok(p).kind == tkEof: + rstMessage(p, meExpected, "\"") + elif currentTok(p).symbol == "\"": + fieldValue.add "\"" + inc p.idx + break + else: + while currentTok(p).kind notin {tkWhite, tkIndent, tkEof}: + fieldValue.add currentTok(p).symbol + inc p.idx + fieldBody.add newLeaf(fieldValue) + field.add(fieldBody) + result.add(field) + +proc mayLoadFile(p: RstParser, result: var PRstNode) = + var filename = strip(getFieldValue(result, "file"), + chars = Whitespace + {'"'}) + if filename != "": + if roSandboxDisabled notin p.s.options: + let tok = p.tok[p.idx-2] + rstMessage(p, meSandboxedDirective, "file", tok.line, tok.col) + var path = p.findRelativeFile(filename) + if path == "": rstMessage(p, meCannotOpenFile, filename) + var n = newRstNode(rnLiteralBlock) + n.add newLeaf(readFile(path)) + result.sons[2] = n + +proc defaultCodeLangNim(p: RstParser, result: var PRstNode) = + # Create a field block if the input block didn't have any. + if result.sons[1].isNil: result.sons[1] = newRstNode(rnFieldList) + assert result.sons[1].kind == rnFieldList + # Hook the extra field and specify the Nim language as value. + var extraNode = newRstNode(rnField, info=lineInfo(p)) + extraNode.add(newRstNode(rnFieldName)) + extraNode.add(newRstNode(rnFieldBody)) + extraNode.sons[0].add newLeaf("default-language") + extraNode.sons[1].add newLeaf("Nim") + result.sons[1].add(extraNode) + proc parseMarkdownCodeblock(p: var RstParser): PRstNode = + result = newRstNodeA(p, rnCodeBlock) + result.sons.setLen(3) + let line = curLine(p) + let baseCol = currentTok(p).col + let baseSym = currentTok(p).symbol # usually just ``` + inc p.idx + result.info = lineInfo(p) var args = newRstNode(rnDirArg) - if p.tok[p.idx].kind == tkWord: - add(args, newLeaf(p)) - inc(p.idx) + if currentTok(p).kind == tkWord: + args.add(newLeaf(p)) + inc p.idx + result.sons[1] = parseMarkdownCodeblockFields(p) + mayLoadFile(p, result) else: args = nil - var n = newRstNode(rnLeaf, "") + var n = newLeaf("") + var isFirstLine = true while true: - case p.tok[p.idx].kind - of tkEof: - rstMessage(p, meExpected, "```") + if currentTok(p).kind == tkEof: + rstMessage(p, meMissingClosing, + "$1 (started at line $2)" % [baseSym, $line]) break - of tkPunct: - if p.tok[p.idx].symbol == "```": - inc(p.idx) - break - else: - add(n.text, p.tok[p.idx].symbol) - inc(p.idx) + elif nextTok(p).kind in {tkPunct, tkAdornment} and + nextTok(p).symbol[0] == baseSym[0] and + nextTok(p).symbol.len >= baseSym.len: + inc p.idx, 2 + break + elif currentTok(p).kind == tkIndent: + if not isFirstLine: + n.text.add "\n" + if currentTok(p).ival > baseCol: + n.text.add " ".repeat(currentTok(p).ival - baseCol) + elif currentTok(p).ival < baseCol: + rstMessage(p, mwRstStyle, + "unexpected de-indentation in Markdown code block") + inc p.idx else: - add(n.text, p.tok[p.idx].symbol) - inc(p.idx) - var lb = newRstNode(rnLiteralBlock) - add(lb, n) - result = newRstNode(rnCodeBlock) - add(result, args) - add(result, PRstNode(nil)) - add(result, lb) + n.text.add(currentTok(p).symbol) + inc p.idx + isFirstLine = false + result.sons[0] = args + if result.sons[2] == nil: + var lb = newRstNode(rnLiteralBlock) + lb.add(n) + result.sons[2] = lb + if result.sons[0].isNil and roNimFile in p.s.options: + defaultCodeLangNim(p, result) proc parseMarkdownLink(p: var RstParser; father: PRstNode): bool = - result = true - var desc, link = "" + # Parses Markdown link. If it's Pandoc auto-link then its second + # son (target) will be in tokenized format (rnInner with leafs). + var desc = newRstNode(rnInner) var i = p.idx + var parensStack: seq[char] template parse(endToken, dest) = + parensStack.setLen 0 inc i # skip begin token while true: - if p.tok[i].kind in {tkEof, tkIndent}: return false - if p.tok[i].symbol == endToken: break - dest.add p.tok[i].symbol + if p.tok[i].kind == tkEof: return false + if p.tok[i].kind == tkIndent and p.tok[i+1].kind == tkIndent: + return false + let isClosing = checkParen(p.tok[i], parensStack) + if p.tok[i].symbol == endToken and not isClosing: + break + let symbol = if p.tok[i].kind == tkIndent: " " else: p.tok[i].symbol + when dest is string: dest.add symbol + else: dest.add newLeaf(symbol) inc i inc i # skip end token parse("]", desc) - if p.tok[i].symbol != "(": return false - parse(")", link) - let child = newRstNode(rnHyperlink) - child.add desc - child.add link - # only commit if we detected no syntax error: - father.add child + if p.tok[i].symbol == "(": + var link = "" + let linkIdx = i + 1 + parse(")", link) + # only commit if we detected no syntax error: + let protocol = safeProtocol(link) + if link == "": + result = false + rstMessage(p, mwBrokenLink, protocol, + p.tok[linkIdx].line, p.tok[linkIdx].col) + else: + let child = newRstNode(rnHyperlink) + child.add newLeaf(desc.addNodes) + child.add link + father.add child + p.idx = i + result = true + elif roPreferMarkdown in p.s.options: + # Use Pandoc's implicit_header_references extension + var n = newRstNode(rnPandocRef) + if p.tok[i].symbol == "[": + var link = newRstNode(rnInner) + let targetIdx = i + 1 + parse("]", link) + n.add desc + if link.len != 0: # [description][target] + n.add link + n.info = lineInfo(p, targetIdx) + else: # [description=target][] + n.add desc + n.info = lineInfo(p, p.idx + 1) + else: # [description=target] + n.add desc + n.add desc # target is the same as description + n.info = lineInfo(p, p.idx + 1) + father.add n + p.idx = i + result = true + else: + result = false + +proc getRstFootnoteType(label: PRstNode): (FootnoteType, int) = + if label.sons.len >= 1 and label.sons[0].kind == rnLeaf and + label.sons[0].text == "#": + if label.sons.len == 1: + result = (fnAutoNumber, -1) + else: + result = (fnAutoNumberLabel, -1) + elif label.len == 1 and label.sons[0].kind == rnLeaf and + label.sons[0].text == "*": + result = (fnAutoSymbol, -1) + elif label.len == 1 and label.sons[0].kind == rnLeaf: + try: + result = (fnManualNumber, parseInt(label.sons[0].text)) + except ValueError: + result = (fnCitation, -1) + else: + result = (fnCitation, -1) + +proc getMdFootnoteType(label: PRstNode): (FootnoteType, int) = + try: + result = (fnManualNumber, parseInt(label.sons[0].text)) + except ValueError: + result = (fnAutoNumberLabel, -1) + +proc getFootnoteType(s: PRstSharedState, label: PRstNode): (FootnoteType, int) = + ## Returns footnote/citation type and manual number (if present). + if isMd(s): getMdFootnoteType(label) + else: getRstFootnoteType(label) + +proc parseRstFootnoteName(p: var RstParser, reference: bool): PRstNode = + ## parse footnote/citation label. Precondition: start at `[`. + ## Label text should be valid ref. name symbol, otherwise nil is returned. + var i = p.idx + 1 + result = newRstNode(rnInner) + while true: + if p.tok[i].kind in {tkEof, tkIndent, tkWhite}: + return nil + if p.tok[i].kind == tkPunct: + case p.tok[i].symbol: + of "]": + if i > p.idx + 1 and (not reference or (p.tok[i+1].kind == tkPunct and p.tok[i+1].symbol == "_")): + inc i # skip ] + if reference: inc i # skip _ + break # to succeed, it's a footnote/citation indeed + else: + return nil + of "#": + if i != p.idx + 1: + return nil + of "*": + if i != p.idx + 1 and p.tok[i].kind != tkPunct and p.tok[i+1].symbol != "]": + return nil + else: + if not validRefnamePunct(p.tok[i].symbol): + return nil + result.add newLeaf(p.tok[i].symbol) + inc i p.idx = i - result = true + +proc isMdFootnoteName(p: RstParser, reference: bool): bool = + ## Pandoc Markdown footnote extension. + let j = p.idx + result = p.tok[j].symbol == "[" and p.tok[j+1].symbol == "^" and + p.tok[j+2].kind == tkWord + +proc parseMdFootnoteName(p: var RstParser, reference: bool): PRstNode = + if isMdFootnoteName(p, reference): + result = newRstNode(rnInner) + var j = p.idx + 2 + while p.tok[j].kind in {tkWord, tkOther} or + validRefnamePunct(p.tok[j].symbol): + result.add newLeaf(p.tok[j].symbol) + inc j + if j == p.idx + 2: + return nil + if p.tok[j].symbol == "]": + if reference: + p.idx = j + 1 # skip ] + else: + if p.tok[j+1].symbol == ":": + p.idx = j + 2 # skip ]: + else: + result = nil + else: + result = nil + else: + result = nil + +proc parseFootnoteName(p: var RstParser, reference: bool): PRstNode = + if isMd(p): parseMdFootnoteName(p, reference) + else: + if isInlineMarkupStart(p, "["): parseRstFootnoteName(p, reference) + else: nil + +proc isMarkdownCodeBlock(p: RstParser, idx: int): bool = + let tok = p.tok[idx] + template allowedSymbol: bool = + (tok.symbol[0] == '`' or + roPreferMarkdown in p.s.options and tok.symbol[0] == '~') + result = (roSupportMarkdown in p.s.options and + tok.kind in {tkPunct, tkAdornment} and + allowedSymbol and + tok.symbol.len >= 3) + +proc isMarkdownCodeBlock(p: RstParser): bool = + isMarkdownCodeBlock(p, p.idx) proc parseInline(p: var RstParser, father: PRstNode) = - case p.tok[p.idx].kind + var n: PRstNode # to be used in `if` condition + let saveIdx = p.idx + case currentTok(p).kind of tkPunct: if isInlineMarkupStart(p, "***"): var n = newRstNode(rnTripleEmphasis) parseUntil(p, n, "***", true) - add(father, n) + father.add(n) elif isInlineMarkupStart(p, "**"): var n = newRstNode(rnStrongEmphasis) parseUntil(p, n, "**", true) - add(father, n) + father.add(n) elif isInlineMarkupStart(p, "*"): var n = newRstNode(rnEmphasis) parseUntil(p, n, "*", true) - add(father, n) - elif roSupportMarkdown in p.s.options and p.tok[p.idx].symbol == "```": - inc(p.idx) - add(father, parseMarkdownCodeblock(p)) + father.add(n) + elif isInlineMarkupStart(p, "_`"): + var n = newRstNode(rnInlineTarget) + inc p.idx + parseUntil(p, n, "`", false) + n.anchor = rstnodeToRefname(n) + addAnchorRst(p, name = linkName(n), target = n, + anchorType=manualInlineAnchor) + father.add(n) + elif isMarkdownCodeBlock(p): + father.add(parseMarkdownCodeblock(p)) elif isInlineMarkupStart(p, "``"): var n = newRstNode(rnInlineLiteral) parseUntil(p, n, "``", false) - add(father, n) + father.add(n) + elif match(p, p.idx, ":w:") and + (var lastIdx = getRefnameIdx(p, p.idx + 1); + p.tok[lastIdx+2].symbol == "`"): + let (roleName, _) = getRefname(p, p.idx+1) + let k = whichRole(p, roleName) + var n = newRstNode(k) + p.idx = lastIdx + 2 + if k == rnInlineCode: + n = n.toInlineCode(language=roleName) + parseUntil(p, n, "`", false) # bug #17260 + if k in {rnUnknownRole, rnCodeFragment}: + n = n.toOtherRole(k, roleName) + father.add(n) elif isInlineMarkupStart(p, "`"): - var n = newRstNode(rnInterpretedText) - parseUntil(p, n, "`", true) + var n = newRstNode(rnInterpretedText, info=lineInfo(p, p.idx+1)) + parseUntil(p, n, "`", false) # bug #17260 n = parsePostfix(p, n) - add(father, n) + father.add(n) elif isInlineMarkupStart(p, "|"): - var n = newRstNode(rnSubstitutionReferences) + var n = newRstNode(rnSubstitutionReferences, info=lineInfo(p, p.idx+1)) parseUntil(p, n, "|", false) - add(father, n) + father.add(n) + elif currentTok(p).symbol == "[" and nextTok(p).symbol != "[" and + (n = parseFootnoteName(p, reference=true); n != nil): + var nn = newRstNode(rnFootnoteRef) + nn.info = lineInfo(p, saveIdx+1) + nn.add n + let (fnType, _) = getFootnoteType(p.s, n) + case fnType + of fnAutoSymbol: + p.s.lineFootnoteSymRef.add lineInfo(p) + of fnAutoNumber: + p.s.lineFootnoteNumRef.add lineInfo(p) + else: discard + father.add(nn) elif roSupportMarkdown in p.s.options and - p.tok[p.idx].symbol == "[" and p.tok[p.idx+1].symbol != "[" and + currentTok(p).symbol == "[" and nextTok(p).symbol != "[" and parseMarkdownLink(p, father): discard "parseMarkdownLink already processed it" else: if roSupportSmilies in p.s.options: let n = parseSmiley(p) if n != nil: - add(father, n) + father.add(n) return parseBackslash(p, father) of tkWord: if roSupportSmilies in p.s.options: let n = parseSmiley(p) if n != nil: - add(father, n) + father.add(n) return - parseUrl(p, father) + parseWordOrRef(p, father) of tkAdornment, tkOther, tkWhite: + if isMarkdownCodeBlock(p): + father.add(parseMarkdownCodeblock(p)) + return if roSupportSmilies in p.s.options: let n = parseSmiley(p) if n != nil: - add(father, n) + father.add(n) return - add(father, newLeaf(p)) - inc(p.idx) + father.add(newLeaf(p)) + inc p.idx else: discard proc getDirective(p: var RstParser): string = - if p.tok[p.idx].kind == tkWhite and p.tok[p.idx+1].kind == tkWord: - var j = p.idx - inc(p.idx) - result = p.tok[p.idx].symbol - inc(p.idx) - while p.tok[p.idx].kind in {tkWord, tkPunct, tkAdornment, tkOther}: - if p.tok[p.idx].symbol == "::": break - add(result, p.tok[p.idx].symbol) - inc(p.idx) - if p.tok[p.idx].kind == tkWhite: inc(p.idx) - if p.tok[p.idx].symbol == "::": - inc(p.idx) - if (p.tok[p.idx].kind == tkWhite): inc(p.idx) - else: - p.idx = j # set back - result = "" # error - else: - result = "" - -proc parseComment(p: var RstParser): PRstNode = - case p.tok[p.idx].kind - of tkIndent, tkEof: - if p.tok[p.idx].kind != tkEof and p.tok[p.idx + 1].kind == tkIndent: - inc(p.idx) # empty comment - else: - var indent = p.tok[p.idx].ival - while true: - case p.tok[p.idx].kind - of tkEof: - break - of tkIndent: - if (p.tok[p.idx].ival < indent): break - else: - discard - inc(p.idx) + result = "" + if currentTok(p).kind == tkWhite: + let (name, lastIdx) = getRefname(p, p.idx + 1) + let afterIdx = lastIdx + 1 + if name.len > 0: + if p.tok[afterIdx].symbol == "::": + result = name + p.idx = afterIdx + 1 + if currentTok(p).kind == tkWhite: + inc p.idx + elif currentTok(p).kind != tkIndent: + rstMessage(p, mwRstStyle, + "whitespace or newline expected after directive " & name) + result = result.toLowerAscii() + elif p.tok[afterIdx].symbol == ":": + rstMessage(p, mwRstStyle, + "double colon :: may be missing at end of '" & name & "'", + p.tok[afterIdx].line, p.tok[afterIdx].col) + elif p.tok[afterIdx].kind == tkPunct and p.tok[afterIdx].symbol[0] == ':': + rstMessage(p, mwRstStyle, + "too many colons for a directive (should be ::)", + p.tok[afterIdx].line, p.tok[afterIdx].col) + +proc parseComment(p: var RstParser, col: int): PRstNode = + if currentTok(p).kind != tkEof and nextTok(p).kind == tkIndent: + inc p.idx # empty comment else: - while p.tok[p.idx].kind notin {tkIndent, tkEof}: inc(p.idx) + while currentTok(p).kind != tkEof: + if currentTok(p).kind == tkIndent and currentTok(p).ival > col or + currentTok(p).kind != tkIndent and currentTok(p).col > col: + inc p.idx + else: + break result = nil -type - DirKind = enum # must be ordered alphabetically! - dkNone, dkAuthor, dkAuthors, dkCode, dkCodeBlock, dkContainer, dkContents, - dkFigure, dkImage, dkInclude, dkIndex, dkRaw, dkTitle - -const - DirIds: array[0..12, string] = ["", "author", "authors", "code", - "code-block", "container", "contents", "figure", "image", "include", - "index", "raw", "title"] - -proc getDirKind(s: string): DirKind = - let i = find(DirIds, s) - if i >= 0: result = DirKind(i) - else: result = dkNone - proc parseLine(p: var RstParser, father: PRstNode) = while true: - case p.tok[p.idx].kind + case currentTok(p).kind of tkWhite, tkWord, tkOther, tkPunct: parseInline(p, father) else: break proc parseUntilNewline(p: var RstParser, father: PRstNode) = while true: - case p.tok[p.idx].kind + case currentTok(p).kind of tkWhite, tkWord, tkAdornment, tkOther, tkPunct: parseInline(p, father) of tkEof, tkIndent: break proc parseSection(p: var RstParser, result: PRstNode) {.gcsafe.} + +proc tokenAfterNewline(p: RstParser, start: int): int = + result = start + while true: + case p.tok[result].kind + of tkEof: + break + of tkIndent: + inc result + break + else: inc result + +proc tokenAfterNewline(p: RstParser): int {.inline.} = + result = tokenAfterNewline(p, p.idx) + +proc getWrappableIndent(p: RstParser): int = + ## Gets baseline indentation for bodies of field lists and directives. + ## Handles situations like this (with possible de-indent in [case.3]):: + ## + ## :field: definition [case.1] + ## + ## currInd currentTok(p).col + ## | | + ## v v + ## + ## .. Note:: defItem: [case.2] + ## definition + ## + ## ^ + ## | + ## nextIndent + ## + ## .. Note:: - point1 [case.3] + ## - point 2 + ## + ## ^ + ## | + ## nextIndent + if currentTok(p).kind == tkIndent: + result = currentTok(p).ival + else: + var nextIndent = p.tok[tokenAfterNewline(p)-1].ival + if nextIndent <= currInd(p): # parse only this line [case.1] + result = currentTok(p).col + elif nextIndent >= currentTok(p).col: # may be a definition list [case.2] + result = currentTok(p).col + else: + result = nextIndent # allow parsing next lines [case.3] + +proc getMdBlockIndent(p: RstParser): int = + ## Markdown version of `getWrappableIndent`. + if currentTok(p).kind == tkIndent: + result = currentTok(p).ival + else: + var nextIndent = p.tok[tokenAfterNewline(p)-1].ival + # TODO: Markdown-compliant definition should allow nextIndent == currInd(p): + if nextIndent <= currInd(p): # parse only this line + result = currentTok(p).col + else: + result = nextIndent # allow parsing next lines [case.3] + +proc indFollows(p: RstParser): bool = + result = currentTok(p).kind == tkIndent and currentTok(p).ival > currInd(p) + +proc parseBlockContent(p: var RstParser, father: var PRstNode, + contentParser: SectionParser): bool {.gcsafe.} = + ## parse the final content part of explicit markup blocks (directives, + ## footnotes, etc). Returns true if succeeded. + if currentTok(p).kind != tkIndent or indFollows(p): + let blockIndent = getWrappableIndent(p) + pushInd(p, blockIndent) + let content = contentParser(p) + popInd(p) + father.add content + result = true + +proc parseSectionWrapper(p: var RstParser): PRstNode = + result = newRstNode(rnInner) + parseSection(p, result) + while result.kind == rnInner and result.len == 1: + result = result.sons[0] + proc parseField(p: var RstParser): PRstNode = ## Returns a parsed rnField node. ## ## rnField nodes have two children nodes, a rnFieldName and a rnFieldBody. - result = newRstNode(rnField) - var col = p.tok[p.idx].col + result = newRstNode(rnField, info=lineInfo(p)) + var col = currentTok(p).col var fieldname = newRstNode(rnFieldName) parseUntil(p, fieldname, ":", false) var fieldbody = newRstNode(rnFieldBody) - if p.tok[p.idx].kind != tkIndent: parseLine(p, fieldbody) - if p.tok[p.idx].kind == tkIndent: - var indent = p.tok[p.idx].ival - if indent > col: - pushInd(p, indent) - parseSection(p, fieldbody) - popInd(p) - add(result, fieldname) - add(result, fieldbody) + if currentTok(p).kind == tkWhite: inc p.idx + let indent = getWrappableIndent(p) + if indent > col: + pushInd(p, indent) + parseSection(p, fieldbody) + popInd(p) + result.add(fieldname) + result.add(fieldbody) proc parseFields(p: var RstParser): PRstNode = ## Parses fields for a section or directive block. @@ -969,16 +2060,16 @@ proc parseFields(p: var RstParser): PRstNode = ## otherwise it will return a node of rnFieldList type with children. result = nil var atStart = p.idx == 0 and p.tok[0].symbol == ":" - if (p.tok[p.idx].kind == tkIndent) and (p.tok[p.idx + 1].symbol == ":") or + if currentTok(p).kind == tkIndent and nextTok(p).symbol == ":" or atStart: - var col = if atStart: p.tok[p.idx].col else: p.tok[p.idx].ival - result = newRstNode(rnFieldList) - if not atStart: inc(p.idx) + var col = if atStart: currentTok(p).col else: currentTok(p).ival + result = newRstNodeA(p, rnFieldList) + if not atStart: inc p.idx while true: - add(result, parseField(p)) - if (p.tok[p.idx].kind == tkIndent) and (p.tok[p.idx].ival == col) and - (p.tok[p.idx + 1].symbol == ":"): - inc(p.idx) + result.add(parseField(p)) + if currentTok(p).kind == tkIndent and currentTok(p).ival == col and + nextTok(p).symbol == ":": + inc p.idx else: break @@ -995,13 +2086,12 @@ proc getFieldValue*(n: PRstNode): string = result = addNodes(n.sons[1]).strip proc getFieldValue(n: PRstNode, fieldname: string): string = - result = "" if n.sons[1] == nil: return - if (n.sons[1].kind != rnFieldList): + if n.sons[1].kind != rnFieldList: #InternalError("getFieldValue (2): " & $n.sons[1].kind) # We don't like internal errors here anymore as that would break the forum! return - for i in countup(0, len(n.sons[1]) - 1): + for i in 0 ..< n.sons[1].len: var f = n.sons[1].sons[i] if cmpIgnoreStyle(addNodes(f.sons[0]), fieldname) == 0: result = addNodes(f.sons[1]) @@ -1014,64 +2104,215 @@ proc getArgument(n: PRstNode): string = proc parseDotDot(p: var RstParser): PRstNode {.gcsafe.} proc parseLiteralBlock(p: var RstParser): PRstNode = - result = newRstNode(rnLiteralBlock) - var n = newRstNode(rnLeaf, "") - if p.tok[p.idx].kind == tkIndent: - var indent = p.tok[p.idx].ival - inc(p.idx) + result = newRstNodeA(p, rnLiteralBlock) + var n = newLeaf("") + if currentTok(p).kind == tkIndent: + var indent = currentTok(p).ival + while currentTok(p).kind == tkIndent: inc p.idx # skip blank lines while true: - case p.tok[p.idx].kind + case currentTok(p).kind of tkEof: break of tkIndent: - if (p.tok[p.idx].ival < indent): + if currentTok(p).ival < indent: break else: - add(n.text, "\n") - add(n.text, spaces(p.tok[p.idx].ival - indent)) - inc(p.idx) + n.text.add("\n") + n.text.add(spaces(currentTok(p).ival - indent)) + inc p.idx else: - add(n.text, p.tok[p.idx].symbol) - inc(p.idx) + n.text.add(currentTok(p).symbol) + inc p.idx else: - while not (p.tok[p.idx].kind in {tkIndent, tkEof}): - add(n.text, p.tok[p.idx].symbol) - inc(p.idx) - add(result, n) - -proc getLevel(map: var LevelMap, lvl: var int, c: char): int = - if map[c] == 0: - inc(lvl) - map[c] = lvl - result = map[c] - -proc tokenAfterNewline(p: RstParser): int = - result = p.idx - while true: - case p.tok[result].kind - of tkEof: - break - of tkIndent: - inc(result) - break - else: inc(result) + while currentTok(p).kind notin {tkIndent, tkEof}: + n.text.add(currentTok(p).symbol) + inc p.idx + result.add(n) + +proc parseQuotedLiteralBlock(p: var RstParser): PRstNode = + result = newRstNodeA(p, rnLiteralBlock) + var n = newLeaf("") + if currentTok(p).kind == tkIndent: + var indent = currInd(p) + while currentTok(p).kind == tkIndent: inc p.idx # skip blank lines + var quoteSym = currentTok(p).symbol[0] + while true: + case currentTok(p).kind + of tkEof: + break + of tkIndent: + if currentTok(p).ival < indent: + break + elif currentTok(p).ival == indent: + if nextTok(p).kind == tkPunct and nextTok(p).symbol[0] == quoteSym: + n.text.add("\n") + inc p.idx + elif nextTok(p).kind == tkIndent: + break + else: + rstMessage(p, mwRstStyle, "no newline after quoted literal block") + break + else: + rstMessage(p, mwRstStyle, + "unexpected indentation in quoted literal block") + break + else: + n.text.add(currentTok(p).symbol) + inc p.idx + result.add(n) + +proc parseRstLiteralBlock(p: var RstParser, kind: LiteralBlockKind): PRstNode = + if kind == lbIndentedLiteralBlock: + result = parseLiteralBlock(p) + else: + result = parseQuotedLiteralBlock(p) + +proc getLevel(p: var RstParser, c: char, hasOverline: bool): int = + ## Returns (preliminary) heading level corresponding to `c` and + ## `hasOverline`. If level does not exist, add it first. + for i, hType in p.s.hLevels: + if hType.symbol == c and hType.hasOverline == hasOverline: + p.s.hLevels[i].line = curLine(p) + p.s.hLevels[i].hasPeers = true + return i + p.s.hLevels.add LevelInfo(symbol: c, hasOverline: hasOverline, + line: curLine(p), hasPeers: false) + result = p.s.hLevels.len - 1 + +proc countTitles(s: PRstSharedState, n: PRstNode) = + ## Fill `s.hTitleCnt` + if n == nil: return + for node in n.sons: + if node != nil: + if node.kind notin {rnOverline, rnSubstitutionDef, rnDefaultRole}: + break + if node.kind == rnOverline: + if s.hLevels[s.hTitleCnt].hasPeers: + break + inc s.hTitleCnt + if s.hTitleCnt >= 2: + break + +proc isAdornmentHeadline(p: RstParser, adornmentIdx: int): bool = + ## check that underline/overline length is enough for the heading. + ## No support for Unicode. + if p.tok[adornmentIdx].symbol in ["::", "..", "|"]: + return false + if isMarkdownCodeBlock(p, adornmentIdx): + return false + var headlineLen = 0 + var failure = "" + if p.idx < adornmentIdx: # check for underline + if p.idx > 0: + headlineLen = currentTok(p).col - p.tok[adornmentIdx].col + if headlineLen > 0: + rstMessage(p, mwRstStyle, "indentation of heading text allowed" & + " only for overline titles") + for i in p.idx ..< adornmentIdx-1: # adornmentIdx-1 is a linebreak + headlineLen += p.tok[i].symbol.len + result = p.tok[adornmentIdx].symbol.len >= headlineLen and headlineLen != 0 + if not result: + failure = "(underline '" & p.tok[adornmentIdx].symbol & "' is too short)" + else: # p.idx == adornmentIdx, at overline. Check overline and underline + var i = p.idx + 2 + headlineLen = p.tok[i].col - p.tok[adornmentIdx].col + while p.tok[i].kind notin {tkEof, tkIndent}: + headlineLen += p.tok[i].symbol.len + inc i + if p.tok[i].kind == tkIndent and + p.tok[i+1].kind == tkAdornment and + p.tok[i+1].symbol[0] == p.tok[adornmentIdx].symbol[0]: + result = p.tok[adornmentIdx].symbol.len >= headlineLen and + headlineLen != 0 + if result: + result = p.tok[i+1].symbol == p.tok[adornmentIdx].symbol + if not result: + failure = "(underline '" & p.tok[i+1].symbol & "' does not match " & + "overline '" & p.tok[adornmentIdx].symbol & "')" + else: + failure = "(overline '" & p.tok[adornmentIdx].symbol & "' is too short)" + else: # it's not overline/underline section, not reporting error + return false + if not result: + rstMessage(p, meNewSectionExpected, failure) proc isLineBlock(p: RstParser): bool = var j = tokenAfterNewline(p) - result = (p.tok[p.idx].col == p.tok[j].col) and (p.tok[j].symbol == "|") or - (p.tok[j].col > p.tok[p.idx].col) + result = currentTok(p).col == p.tok[j].col and p.tok[j].symbol == "|" or + p.tok[j].col > currentTok(p).col or + p.tok[j].symbol == "\n" + +proc isMarkdownBlockQuote(p: RstParser): bool = + result = currentTok(p).symbol[0] == '>' + +proc whichRstLiteralBlock(p: RstParser): LiteralBlockKind = + ## Checks that the following tokens are either Indented Literal Block or + ## Quoted Literal Block (which is not quite the same as Markdown quote block). + ## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#quoted-literal-blocks + if currentTok(p).symbol == "::" and nextTok(p).kind == tkIndent: + if currInd(p) > nextTok(p).ival: + result = lbNone + if currInd(p) < nextTok(p).ival: + result = lbIndentedLiteralBlock + elif currInd(p) == nextTok(p).ival: + var i = p.idx + 1 + while p.tok[i].kind == tkIndent: inc i + const validQuotingCharacters = { + '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', + '.', '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', + '_', '`', '{', '|', '}', '~'} + if p.tok[i].kind in {tkPunct, tkAdornment} and + p.tok[i].symbol[0] in validQuotingCharacters: + result = lbQuotedLiteralBlock + else: + result = lbNone + else: + result = lbNone proc predNL(p: RstParser): bool = result = true if p.idx > 0: - result = p.tok[p.idx-1].kind == tkIndent and - p.tok[p.idx-1].ival == currInd(p) + result = prevTok(p).kind == tkIndent and + prevTok(p).ival == currInd(p) proc isDefList(p: RstParser): bool = var j = tokenAfterNewline(p) - result = (p.tok[p.idx].col < p.tok[j].col) and - (p.tok[j].kind in {tkWord, tkOther, tkPunct}) and - (p.tok[j - 2].symbol != "::") + result = currentTok(p).col < p.tok[j].col and + p.tok[j].kind in {tkWord, tkOther, tkPunct} and + p.tok[j - 2].symbol != "::" + +proc `$`(t: Token): string = # for debugging only + result = "(" & $t.kind & " line=" & $t.line & " col=" & $t.col + if t.kind == tkIndent: result = result & " ival=" & $t.ival & ")" + else: result = result & " symbol=" & t.symbol & ")" + +proc skipNewlines(p: RstParser, j: int): int = + result = j + while p.tok[result].kind != tkEof and p.tok[result].kind == tkIndent: + inc result # skip blank lines + +proc skipNewlines(p: var RstParser) = + p.idx = skipNewlines(p, p.idx) + +const maxMdRelInd = 3 ## In Markdown: maximum indentation that does not yet + ## make the indented block a code + +proc isMdRelInd(outerInd, nestedInd: int): bool = + result = outerInd <= nestedInd and nestedInd <= outerInd + maxMdRelInd + +proc isMdDefBody(p: RstParser, j: int, termCol: int): bool = + let defCol = p.tok[j].col + result = p.tok[j].symbol == ":" and + isMdRelInd(termCol, defCol) and + p.tok[j+1].kind == tkWhite and + p.tok[j+2].kind in {tkWord, tkOther, tkPunct} + +proc isMdDefListItem(p: RstParser, idx: int): bool = + var j = tokenAfterNewline(p, idx) + j = skipNewlines(p, j) + let termCol = p.tok[j].col + result = isMdRelInd(currInd(p), termCol) and + isMdDefBody(p, j, termCol) proc isOptionList(p: RstParser): bool = result = match(p, p.idx, "-w") or match(p, p.idx, "--w") or @@ -1085,99 +2326,240 @@ proc isMarkdownHeadlinePattern(s: string): bool = proc isMarkdownHeadline(p: RstParser): bool = if roSupportMarkdown in p.s.options: - if isMarkdownHeadlinePattern(p.tok[p.idx].symbol) and p.tok[p.idx+1].kind == tkWhite: + if isMarkdownHeadlinePattern(currentTok(p).symbol) and nextTok(p).kind == tkWhite: if p.tok[p.idx+2].kind in {tkWord, tkOther, tkPunct}: result = true +proc findPipe(p: RstParser, start: int): bool = + var i = start + while true: + if p.tok[i].symbol == "|": return true + if p.tok[i].kind in {tkIndent, tkEof}: return false + inc i + proc whichSection(p: RstParser): RstNodeKind = - case p.tok[p.idx].kind + if currentTok(p).kind in {tkAdornment, tkPunct}: + # for punctuation sequences that can be both tkAdornment and tkPunct + if isMarkdownCodeBlock(p): + return rnCodeBlock + elif isRst(p) and currentTok(p).symbol == "::": + return rnLiteralBlock + elif currentTok(p).symbol == ".." and + nextTok(p).kind in {tkWhite, tkIndent}: + return rnDirective + case currentTok(p).kind of tkAdornment: - if match(p, p.idx + 1, "ii"): result = rnTransition + if match(p, p.idx + 1, "iI") and currentTok(p).symbol.len >= 4: + result = rnTransition + elif match(p, p.idx, "+a+"): + result = rnGridTable + rstMessage(p, meGridTableNotImplemented) elif match(p, p.idx + 1, " a"): result = rnTable - elif match(p, p.idx + 1, "i"): result = rnOverline - elif isMarkdownHeadline(p): - result = rnHeadline + elif currentTok(p).symbol == "|" and isLineBlock(p): + result = rnLineBlock + elif roSupportMarkdown in p.s.options and isMarkdownBlockQuote(p): + result = rnMarkdownBlockQuote + elif (match(p, p.idx + 1, "i") and not match(p, p.idx + 2, "I")) and + isAdornmentHeadline(p, p.idx): + result = rnOverline else: - result = rnLeaf + result = rnParagraph of tkPunct: if isMarkdownHeadline(p): + result = rnMarkdownHeadline + elif roSupportMarkdown in p.s.options and predNL(p) and + match(p, p.idx, "| w") and findPipe(p, p.idx+3): + result = rnMarkdownTable + elif isMd(p) and isMdFootnoteName(p, reference=false): + result = rnFootnote + elif currentTok(p).symbol == "|" and isLineBlock(p): + result = rnLineBlock + elif roSupportMarkdown in p.s.options and isMarkdownBlockQuote(p): + result = rnMarkdownBlockQuote + elif match(p, tokenAfterNewline(p), "aI") and + isAdornmentHeadline(p, tokenAfterNewline(p)): result = rnHeadline - elif match(p, tokenAfterNewline(p), "ai"): - result = rnHeadline - elif p.tok[p.idx].symbol == "::": - result = rnLiteralBlock - elif predNL(p) and - ((p.tok[p.idx].symbol == "+") or (p.tok[p.idx].symbol == "*") or - (p.tok[p.idx].symbol == "-")) and (p.tok[p.idx + 1].kind == tkWhite): + elif currentTok(p).symbol in ["+", "*", "-"] and nextTok(p).kind == tkWhite: result = rnBulletList - elif (p.tok[p.idx].symbol == "|") and isLineBlock(p): - result = rnLineBlock - elif (p.tok[p.idx].symbol == "..") and predNL(p): - result = rnDirective - elif match(p, p.idx, ":w:") and predNL(p): - # (p.tok[p.idx].symbol == ":") + elif match(p, p.idx, ":w:E"): + # (currentTok(p).symbol == ":") result = rnFieldList - elif match(p, p.idx, "(e) ") or match(p, p.idx, "e. "): + elif match(p, p.idx, "(e) ") or match(p, p.idx, "e) ") or + match(p, p.idx, "e. "): result = rnEnumList - elif match(p, p.idx, "+a+"): - result = rnGridTable - rstMessage(p, meGridTableNotImplemented) - elif isDefList(p): - result = rnDefList elif isOptionList(p): result = rnOptionList + elif isRst(p) and isDefList(p): + result = rnDefList + elif isMd(p) and isMdDefListItem(p, p.idx): + result = rnMdDefList else: result = rnParagraph of tkWord, tkOther, tkWhite: - if match(p, tokenAfterNewline(p), "ai"): result = rnHeadline + let tokIdx = tokenAfterNewline(p) + if match(p, tokIdx, "aI"): + if isAdornmentHeadline(p, tokIdx): result = rnHeadline + else: result = rnParagraph elif match(p, p.idx, "e) ") or match(p, p.idx, "e. "): result = rnEnumList - elif isDefList(p): result = rnDefList + elif isRst(p) and isDefList(p): result = rnDefList + elif isMd(p) and isMdDefListItem(p, p.idx): + result = rnMdDefList else: result = rnParagraph else: result = rnLeaf proc parseLineBlock(p: var RstParser): PRstNode = + ## Returns rnLineBlock with all sons of type rnLineBlockItem result = nil - if p.tok[p.idx + 1].kind == tkWhite: - var col = p.tok[p.idx].col - result = newRstNode(rnLineBlock) - pushInd(p, p.tok[p.idx + 2].col) - inc(p.idx, 2) + if nextTok(p).kind in {tkWhite, tkIndent}: + var col = currentTok(p).col + result = newRstNodeA(p, rnLineBlock) while true: var item = newRstNode(rnLineBlockItem) - parseSection(p, item) - add(result, item) - if (p.tok[p.idx].kind == tkIndent) and (p.tok[p.idx].ival == col) and - (p.tok[p.idx + 1].symbol == "|") and - (p.tok[p.idx + 2].kind == tkWhite): - inc(p.idx, 3) + if nextTok(p).kind == tkWhite: + if nextTok(p).symbol.len > 1: # pass additional indentation after '| ' + item.lineIndent = nextTok(p).symbol + inc p.idx, 2 + pushInd(p, p.tok[p.idx].col) + parseSection(p, item) + popInd(p) + else: # tkIndent => add an empty line + item.lineIndent = "\n" + inc p.idx, 1 + result.add(item) + if currentTok(p).kind == tkIndent and currentTok(p).ival == col and + nextTok(p).symbol == "|" and + p.tok[p.idx + 2].kind in {tkWhite, tkIndent}: + inc p.idx, 1 else: break - popInd(p) + +proc parseDoc(p: var RstParser): PRstNode {.gcsafe.} + +proc getQuoteSymbol(p: RstParser, idx: int): tuple[sym: string, depth: int, tokens: int] = + result = ("", 0, 0) + var i = idx + result.sym &= p.tok[i].symbol + result.depth += p.tok[i].symbol.len + inc result.tokens + inc i + while p.tok[i].kind == tkWhite and i+1 < p.tok.len and + p.tok[i+1].kind == tkPunct and p.tok[i+1].symbol[0] == '>': + result.sym &= p.tok[i].symbol + result.sym &= p.tok[i+1].symbol + result.depth += p.tok[i+1].symbol.len + inc result.tokens, 2 + inc i, 2 + +proc parseMarkdownQuoteSegment(p: var RstParser, curSym: string, col: int): + PRstNode = + ## We define *segment* as a group of lines that starts with exactly the + ## same quote symbol. If the following lines don't contain any `>` (*lazy* + ## continuation) they considered as continuation of the current segment. + var q: RstParser # to delete `>` at a start of line and then parse normally + initParser(q, p.s) + q.col = p.col + q.line = p.line + var minCol = int.high # minimum colum num in the segment + while true: # move tokens of segment from `p` to `q` skipping `curSym` + case currentTok(p).kind + of tkEof: + break + of tkIndent: + if nextTok(p).kind in {tkIndent, tkEof}: + break + else: + if nextTok(p).symbol[0] == '>': + var (quoteSym, _, quoteTokens) = getQuoteSymbol(p, p.idx + 1) + if quoteSym == curSym: # the segment continues + var iTok = tokenAfterNewline(p, p.idx+1) + if p.tok[iTok].kind notin {tkEof, tkIndent} and + p.tok[iTok].symbol[0] != '>': + rstMessage(p, mwRstStyle, + "two or more quoted lines are followed by unquoted line " & + $(curLine(p) + 1)) + break + q.tok.add currentTok(p) + var ival = currentTok(p).ival + quoteSym.len + inc p.idx, (1 + quoteTokens) # skip newline and > > > + if currentTok(p).kind == tkWhite: + ival += currentTok(p).symbol.len + inc p.idx + # fix up previous `tkIndent`s to ival (as if >>> were not there) + var j = q.tok.len - 1 + while j >= 0 and q.tok[j].kind == tkIndent: + q.tok[j].ival = ival + dec j + else: # next segment started + break + elif currentTok(p).ival < col: + break + else: # the segment continues, a case like: + # > beginning + # continuation + q.tok.add currentTok(p) + inc p.idx + else: + if currentTok(p).col < minCol: minCol = currentTok(p).col + q.tok.add currentTok(p) + inc p.idx + q.indentStack = @[minCol] + # if initial indentation `minCol` is > 0 then final newlines + # should be omitted so that parseDoc could advance to the end of tokens: + var j = q.tok.len - 1 + while q.tok[j].kind == tkIndent: dec j + q.tok.setLen (j+1) + q.tok.add Token(kind: tkEof, line: currentTok(p).line) + result = parseDoc(q) + +proc parseMarkdownBlockQuote(p: var RstParser): PRstNode = + var (curSym, quotationDepth, quoteTokens) = getQuoteSymbol(p, p.idx) + let col = currentTok(p).col + result = newRstNodeA(p, rnMarkdownBlockQuote) + inc p.idx, quoteTokens # skip first > + while true: + var item = newRstNode(rnMarkdownBlockQuoteItem) + item.quotationDepth = quotationDepth + if currentTok(p).kind == tkWhite: inc p.idx + item.add parseMarkdownQuoteSegment(p, curSym, col) + result.add(item) + if currentTok(p).kind == tkIndent and currentTok(p).ival == col and + nextTok(p).kind != tkEof and nextTok(p).symbol[0] == '>': + (curSym, quotationDepth, quoteTokens) = getQuoteSymbol(p, p.idx + 1) + inc p.idx, (1 + quoteTokens) # skip newline and > > > + else: + break proc parseParagraph(p: var RstParser, result: PRstNode) = while true: - case p.tok[p.idx].kind + case currentTok(p).kind of tkIndent: - if p.tok[p.idx + 1].kind == tkIndent: - inc(p.idx) - break - elif (p.tok[p.idx].ival == currInd(p)): - inc(p.idx) + if nextTok(p).kind == tkIndent: + inc p.idx + break # blank line breaks paragraph for both Md & Rst + elif currentTok(p).ival == currInd(p) or ( + isMd(p) and currentTok(p).ival > currInd(p)): + # (Md allows adding additional indentation inside paragraphs) + inc p.idx case whichSection(p) - of rnParagraph, rnLeaf, rnHeadline, rnOverline, rnDirective: - add(result, newRstNode(rnLeaf, " ")) + of rnParagraph, rnLeaf, rnHeadline, rnMarkdownHeadline, + rnOverline, rnDirective: + result.add newLeaf(" ") of rnLineBlock: - addIfNotNil(result, parseLineBlock(p)) - else: break + result.addIfNotNil(parseLineBlock(p)) + of rnMarkdownBlockQuote: + result.addIfNotNil(parseMarkdownBlockQuote(p)) + else: + dec p.idx # allow subsequent block to be parsed as another section + break else: break of tkPunct: - if (p.tok[p.idx].symbol == "::") and - (p.tok[p.idx + 1].kind == tkIndent) and - (currInd(p) < p.tok[p.idx + 1].ival): - add(result, newRstNode(rnLeaf, ":")) - inc(p.idx) # skip '::' - add(result, parseLiteralBlock(p)) + if isRst(p) and ( + let literalBlockKind = whichRstLiteralBlock(p); + literalBlockKind != lbNone): + result.add newLeaf(":") + inc p.idx # skip '::' + result.add(parseRstLiteralBlock(p, literalBlockKind)) break else: parseInline(p, result) @@ -1185,375 +2567,725 @@ proc parseParagraph(p: var RstParser, result: PRstNode) = parseInline(p, result) else: break +proc checkHeadingHierarchy(p: RstParser, lvl: int) = + if lvl - p.s.hCurLevel > 1: # broken hierarchy! + proc descr(l: int): string = + (if p.s.hLevels[l].hasOverline: "overline " else: "underline ") & + repeat(p.s.hLevels[l].symbol, 5) + var msg = "(section level inconsistent: " + msg.add descr(lvl) & " unexpectedly found, " & + "while the following intermediate section level(s) are missing on lines " + msg.add $p.s.hLevels[p.s.hCurLevel].line & ".." & $curLine(p) & ":" + for l in p.s.hCurLevel+1 .. lvl-1: + msg.add " " & descr(l) + if l != lvl-1: msg.add "," + rstMessage(p, meNewSectionExpected, msg & ")") + proc parseHeadline(p: var RstParser): PRstNode = - result = newRstNode(rnHeadline) if isMarkdownHeadline(p): - result.level = p.tok[p.idx].symbol.len - assert(p.tok[p.idx+1].kind == tkWhite) + result = newRstNode(rnMarkdownHeadline) + # Note that level hierarchy is not checked for markdown headings + result.level = currentTok(p).symbol.len + assert(nextTok(p).kind == tkWhite) inc p.idx, 2 parseUntilNewline(p, result) else: + result = newRstNode(rnHeadline) + parseUntilNewline(p, result) + assert(currentTok(p).kind == tkIndent) + assert(nextTok(p).kind == tkAdornment) + var c = nextTok(p).symbol[0] + inc p.idx, 2 + result.level = getLevel(p, c, hasOverline=false) + checkHeadingHierarchy(p, result.level) + p.s.hCurLevel = result.level + addAnchorRst(p, linkName(result), result, anchorType=headlineAnchor) + p.s.tocPart.add result + +proc parseOverline(p: var RstParser): PRstNode = + var c = currentTok(p).symbol[0] + inc p.idx, 2 + result = newRstNode(rnOverline) + while true: parseUntilNewline(p, result) - assert(p.tok[p.idx].kind == tkIndent) - assert(p.tok[p.idx + 1].kind == tkAdornment) - var c = p.tok[p.idx + 1].symbol[0] - inc(p.idx, 2) - result.level = getLevel(p.s.underlineToLevel, p.s.uLevel, c) + if currentTok(p).kind == tkIndent: + inc p.idx + if prevTok(p).ival > currInd(p): + result.add newLeaf(" ") + else: + break + else: + break + result.level = getLevel(p, c, hasOverline=true) + checkHeadingHierarchy(p, result.level) + p.s.hCurLevel = result.level + if currentTok(p).kind == tkAdornment: + inc p.idx + if currentTok(p).kind == tkIndent: inc p.idx + addAnchorRst(p, linkName(result), result, anchorType=headlineAnchor) + p.s.tocPart.add result + +proc fixHeadlines(s: PRstSharedState) = + # Fix up section levels depending on presence of a title and subtitle: + for n in s.tocPart: + if n.kind in {rnHeadline, rnOverline}: + if s.hTitleCnt == 2: + if n.level == 1: # it's the subtitle + n.level = 0 + elif n.level >= 2: # normal sections, start numbering from 1 + n.level -= 1 + elif s.hTitleCnt == 0: + n.level += 1 + # Set headline anchors: + for iHeading in 0 .. s.tocPart.high: + let n: PRstNode = s.tocPart[iHeading] + if n.level >= 1: + n.anchor = rstnodeToRefname(n) + # Fix anchors for uniqueness if `.. contents::` is present + if s.hasToc: + # Find the last higher level section for unique reference name + var sectionPrefix = "" + for i in countdown(iHeading - 1, 0): + if s.tocPart[i].level >= 1 and s.tocPart[i].level < n.level: + sectionPrefix = rstnodeToRefname(s.tocPart[i]) & "-" + break + if sectionPrefix != "": + n.anchor = sectionPrefix & n.anchor + s.tocPart.setLen 0 type - IntSeq = seq[int] + ColSpec = object + start, stop: int + RstCols = seq[ColSpec] + ColumnLimits = tuple # for Markdown + first, last: int + ColSeq = seq[ColumnLimits] + +proc tokStart(p: RstParser, idx: int): int = + result = p.tok[idx].col + +proc tokStart(p: RstParser): int = + result = tokStart(p, p.idx) + +proc tokEnd(p: RstParser, idx: int): int = + result = p.tok[idx].col + p.tok[idx].symbol.len - 1 proc tokEnd(p: RstParser): int = - result = p.tok[p.idx].col + len(p.tok[p.idx].symbol) - 1 + result = tokEnd(p, p.idx) -proc getColumns(p: var RstParser, cols: var IntSeq) = +proc getColumns(p: RstParser, cols: var RstCols, startIdx: int): int = + # Fills table column specification (or separator) `cols` and returns + # the next parser index after it. var L = 0 + result = startIdx while true: - inc(L) + inc L setLen(cols, L) - cols[L - 1] = tokEnd(p) - assert(p.tok[p.idx].kind == tkAdornment) - inc(p.idx) - if p.tok[p.idx].kind != tkWhite: break - inc(p.idx) - if p.tok[p.idx].kind != tkAdornment: break - if p.tok[p.idx].kind == tkIndent: inc(p.idx) - # last column has no limit: - cols[L - 1] = 32000 + cols[L - 1].start = tokStart(p, result) + cols[L - 1].stop = tokEnd(p, result) + assert(p.tok[result].kind == tkAdornment) + inc result + if p.tok[result].kind != tkWhite: break + inc result + if p.tok[result].kind != tkAdornment: break + if p.tok[result].kind == tkIndent: inc result -proc parseDoc(p: var RstParser): PRstNode {.gcsafe.} +proc checkColumns(p: RstParser, cols: RstCols) = + var i = p.idx + if p.tok[i].symbol[0] != '=': + stopOrWarn(p, meIllformedTable, + "only tables with `=` columns specification are allowed") + for col in 0 ..< cols.len: + if tokEnd(p, i) != cols[col].stop: + stopOrWarn(p, meIllformedTable, + "end of table column #$1 should end at position $2" % [ + $(col+1), $(cols[col].stop+ColRstOffset)], + p.tok[i].line, tokEnd(p, i)) + inc i + if col == cols.len - 1: + if p.tok[i].kind == tkWhite: + inc i + if p.tok[i].kind notin {tkIndent, tkEof}: + stopOrWarn(p, meIllformedTable, "extraneous column specification") + elif p.tok[i].kind == tkWhite: + inc i + else: + stopOrWarn(p, meIllformedTable, + "no enough table columns", p.tok[i].line, p.tok[i].col) + +proc getSpans(p: RstParser, nextLine: int, + cols: RstCols, unitedCols: RstCols): seq[int] = + ## Calculates how many columns a joined cell occupies. + if unitedCols.len > 0: + result = newSeq[int](unitedCols.len) + var + iCell = 0 + jCell = 0 + uCell = 0 + while jCell < cols.len: + if cols[jCell].stop < unitedCols[uCell].stop: + inc jCell + elif cols[jCell].stop == unitedCols[uCell].stop: + result[uCell] = jCell - iCell + 1 + iCell = jCell + 1 + jCell = jCell + 1 + inc uCell + else: + rstMessage(p, meIllformedTable, + "spanning underline does not match main table columns", + p.tok[nextLine].line, p.tok[nextLine].col) + +proc parseSimpleTableRow(p: var RstParser, cols: RstCols, colChar: char): PRstNode = + ## Parses 1 row in RST simple table. + # Consider that columns may be spanning (united by using underline like ----): + let nextLine = tokenAfterNewline(p) + var unitedCols: RstCols + var afterSpan: int + if p.tok[nextLine].kind == tkAdornment and p.tok[nextLine].symbol[0] == '-': + afterSpan = getColumns(p, unitedCols, nextLine) + if unitedCols == cols and p.tok[nextLine].symbol[0] == colChar: + # legacy rst.nim compat.: allow punctuation like `----` in main boundaries + afterSpan = nextLine + unitedCols.setLen 0 + else: + afterSpan = nextLine + template colEnd(i): int = + if i == cols.len - 1: high(int) # last column has no limit + elif unitedCols.len > 0: unitedCols[i].stop else: cols[i].stop + template colStart(i): int = + if unitedCols.len > 0: unitedCols[i].start else: cols[i].start + var row = newSeq[string](if unitedCols.len > 0: unitedCols.len else: cols.len) + var spans: seq[int] = getSpans(p, nextLine, cols, unitedCols) + + let line = currentTok(p).line + # Iterate over the lines a single cell may span: + while true: + var nCell = 0 + # distribute tokens between cells in the current line: + while currentTok(p).kind notin {tkIndent, tkEof}: + if tokEnd(p) <= colEnd(nCell): + if tokStart(p) < colStart(nCell): + if currentTok(p).kind != tkWhite: + stopOrWarn(p, meIllformedTable, + "this word crosses table column from the left") + row[nCell].add(currentTok(p).symbol) + else: + row[nCell].add(currentTok(p).symbol) + inc p.idx + else: + if tokStart(p) < colEnd(nCell) and currentTok(p).kind != tkWhite: + stopOrWarn(p, meIllformedTable, + "this word crosses table column from the right") + row[nCell].add(currentTok(p).symbol) + inc p.idx + inc nCell + if currentTok(p).kind == tkIndent: inc p.idx + if tokEnd(p) <= colEnd(0): break + # Continued current cells because the 1st column is empty. + if currentTok(p).kind in {tkEof, tkAdornment}: + break + for nCell in countup(1, high(row)): row[nCell].add('\n') + result = newRstNode(rnTableRow) + var q: RstParser + for uCell in 0 ..< row.len: + initParser(q, p.s) + q.col = colStart(uCell) + q.line = line - 1 + getTokens(row[uCell], q.tok) + let cell = newRstNode(rnTableDataCell) + cell.span = if spans.len == 0: 0 else: spans[uCell] + cell.add(parseDoc(q)) + result.add(cell) + if afterSpan > p.idx: + p.idx = afterSpan proc parseSimpleTable(p: var RstParser): PRstNode = - var - cols: IntSeq - row: seq[string] - i, last, line: int - c: char - q: RstParser - a, b: PRstNode - result = newRstNode(rnTable) - cols = @[] - row = @[] - a = nil - c = p.tok[p.idx].symbol[0] + var cols: RstCols + result = newRstNodeA(p, rnTable) + let startIdx = getColumns(p, cols, p.idx) + let colChar = currentTok(p).symbol[0] + checkColumns(p, cols) + p.idx = startIdx + result.colCount = cols.len while true: - if p.tok[p.idx].kind == tkAdornment: - last = tokenAfterNewline(p) - if p.tok[last].kind in {tkEof, tkIndent}: + if currentTok(p).kind == tkAdornment: + checkColumns(p, cols) + p.idx = tokenAfterNewline(p) + if currentTok(p).kind in {tkEof, tkIndent}: # skip last adornment line: - p.idx = last break - getColumns(p, cols) - setLen(row, len(cols)) - if a != nil: - for j in 0..len(a)-1: a.sons[j].kind = rnTableHeaderCell - if p.tok[p.idx].kind == tkEof: break - for j in countup(0, high(row)): row[j] = "" - # the following while loop iterates over the lines a single cell may span: - line = p.tok[p.idx].line - while true: - i = 0 - while not (p.tok[p.idx].kind in {tkIndent, tkEof}): - if (tokEnd(p) <= cols[i]): - add(row[i], p.tok[p.idx].symbol) - inc(p.idx) - else: - if p.tok[p.idx].kind == tkWhite: inc(p.idx) - inc(i) - if p.tok[p.idx].kind == tkIndent: inc(p.idx) - if tokEnd(p) <= cols[0]: break - if p.tok[p.idx].kind in {tkEof, tkAdornment}: break - for j in countup(1, high(row)): add(row[j], '\x0A') + if result.sons.len > 0: result.sons[^1].endsHeader = true + # fix rnTableDataCell -> rnTableHeaderCell for previous table rows: + for nRow in 0 ..< result.sons.len: + for nCell in 0 ..< result.sons[nRow].len: + template cell: PRstNode = result.sons[nRow].sons[nCell] + cell = PRstNode(kind: rnTableHeaderCell, sons: cell.sons, + span: cell.span, anchor: cell.anchor) + if currentTok(p).kind == tkEof: break + let tabRow = parseSimpleTableRow(p, cols, colChar) + result.add tabRow + +proc readTableRow(p: var RstParser): ColSeq = + if currentTok(p).symbol == "|": inc p.idx + while currentTok(p).kind notin {tkIndent, tkEof}: + var limits: ColumnLimits + limits.first = p.idx + while currentTok(p).kind notin {tkIndent, tkEof}: + if currentTok(p).symbol == "|" and prevTok(p).symbol != "\\": break + inc p.idx + limits.last = p.idx + result.add(limits) + if currentTok(p).kind in {tkIndent, tkEof}: break + inc p.idx + p.idx = tokenAfterNewline(p) + +proc getColContents(p: var RstParser, colLim: ColumnLimits): string = + for i in colLim.first ..< colLim.last: + result.add(p.tok[i].symbol) + result.strip + +proc isValidDelimiterRow(p: var RstParser, colNum: int): bool = + let row = readTableRow(p) + if row.len != colNum: return false + for limits in row: + let content = getColContents(p, limits) + if content.len < 3 or not (content.startsWith("--") or content.startsWith(":-")): + return false + return true + +proc parseMarkdownTable(p: var RstParser): PRstNode = + var + row: ColSeq + a, b: PRstNode + q: RstParser + result = newRstNodeA(p, rnMarkdownTable) + + proc parseRow(p: var RstParser, cellKind: RstNodeKind, result: PRstNode) = + row = readTableRow(p) + if result.colCount == 0: result.colCount = row.len # table header + elif row.len < result.colCount: row.setLen(result.colCount) a = newRstNode(rnTableRow) - for j in countup(0, high(row)): + for j in 0 ..< result.colCount: + b = newRstNode(cellKind) initParser(q, p.s) - q.col = cols[j] - q.line = line - 1 - q.filename = p.filename - q.col += getTokens(row[j], false, q.tok) - b = newRstNode(rnTableDataCell) - add(b, parseDoc(q)) - add(a, b) - add(result, a) + q.col = p.col + q.line = currentTok(p).line - 1 + getTokens(getColContents(p, row[j]), q.tok) + b.add(parseDoc(q)) + a.add(b) + result.add(a) + + parseRow(p, rnTableHeaderCell, result) + if not isValidDelimiterRow(p, result.colCount): + rstMessage(p, meMarkdownIllformedTable) + while predNL(p) and currentTok(p).symbol == "|": + parseRow(p, rnTableDataCell, result) proc parseTransition(p: var RstParser): PRstNode = - result = newRstNode(rnTransition) - inc(p.idx) - if p.tok[p.idx].kind == tkIndent: inc(p.idx) - if p.tok[p.idx].kind == tkIndent: inc(p.idx) - -proc parseOverline(p: var RstParser): PRstNode = - var c = p.tok[p.idx].symbol[0] - inc(p.idx, 2) - result = newRstNode(rnOverline) - while true: - parseUntilNewline(p, result) - if p.tok[p.idx].kind == tkIndent: - inc(p.idx) - if p.tok[p.idx - 1].ival > currInd(p): - add(result, newRstNode(rnLeaf, " ")) - else: - break - else: - break - result.level = getLevel(p.s.overlineToLevel, p.s.oLevel, c) - if p.tok[p.idx].kind == tkAdornment: - inc(p.idx) # XXX: check? - if p.tok[p.idx].kind == tkIndent: inc(p.idx) + result = newRstNodeA(p, rnTransition) + inc p.idx + if currentTok(p).kind == tkIndent: inc p.idx + if currentTok(p).kind == tkIndent: inc p.idx proc parseBulletList(p: var RstParser): PRstNode = result = nil - if p.tok[p.idx + 1].kind == tkWhite: - var bullet = p.tok[p.idx].symbol - var col = p.tok[p.idx].col - result = newRstNode(rnBulletList) + if nextTok(p).kind == tkWhite: + var bullet = currentTok(p).symbol + var col = currentTok(p).col + result = newRstNodeA(p, rnBulletList) pushInd(p, p.tok[p.idx + 2].col) - inc(p.idx, 2) + inc p.idx, 2 while true: var item = newRstNode(rnBulletItem) parseSection(p, item) - add(result, item) - if (p.tok[p.idx].kind == tkIndent) and (p.tok[p.idx].ival == col) and - (p.tok[p.idx + 1].symbol == bullet) and - (p.tok[p.idx + 2].kind == tkWhite): - inc(p.idx, 3) + result.add(item) + if currentTok(p).kind == tkIndent and currentTok(p).ival == col and + nextTok(p).symbol == bullet and + p.tok[p.idx + 2].kind == tkWhite: + inc p.idx, 3 else: break popInd(p) proc parseOptionList(p: var RstParser): PRstNode = - result = newRstNode(rnOptionList) + result = newRstNodeA(p, rnOptionList) + let col = currentTok(p).col + var order = 1 while true: - if isOptionList(p): + if currentTok(p).col == col and isOptionList(p): var a = newRstNode(rnOptionGroup) var b = newRstNode(rnDescription) var c = newRstNode(rnOptionListItem) - if match(p, p.idx, "//w"): inc(p.idx) - while not (p.tok[p.idx].kind in {tkIndent, tkEof}): - if (p.tok[p.idx].kind == tkWhite) and (len(p.tok[p.idx].symbol) > 1): - inc(p.idx) + if match(p, p.idx, "//w"): inc p.idx + while currentTok(p).kind notin {tkIndent, tkEof}: + if currentTok(p).kind == tkWhite and currentTok(p).symbol.len > 1: + inc p.idx break - add(a, newLeaf(p)) - inc(p.idx) + a.add(newLeaf(p)) + inc p.idx var j = tokenAfterNewline(p) - if (j > 0) and (p.tok[j - 1].kind == tkIndent) and - (p.tok[j - 1].ival > currInd(p)): + if j > 0 and p.tok[j - 1].kind == tkIndent and p.tok[j - 1].ival > currInd(p): pushInd(p, p.tok[j - 1].ival) parseSection(p, b) popInd(p) else: parseLine(p, b) - if (p.tok[p.idx].kind == tkIndent): inc(p.idx) - add(c, a) - add(c, b) - add(result, c) + while currentTok(p).kind == tkIndent: inc p.idx + c.add(a) + c.add(b) + c.order = order; inc order + result.add(c) + else: + if currentTok(p).kind != tkEof: dec p.idx # back to tkIndent + break + +proc parseMdDefinitionList(p: var RstParser): PRstNode = + ## Parses (Pandoc/kramdown/PHPextra) Markdown definition lists. + result = newRstNodeA(p, rnMdDefList) + let termCol = currentTok(p).col + while true: + var item = newRstNode(rnDefItem) + var term = newRstNode(rnDefName) + parseLine(p, term) + skipNewlines(p) + inc p.idx, 2 # skip ":" and space + item.add(term) + while true: + var def = newRstNode(rnDefBody) + let indent = getMdBlockIndent(p) + pushInd(p, indent) + parseSection(p, def) + popInd(p) + item.add(def) + let j = skipNewlines(p, p.idx) + if isMdDefBody(p, j, termCol): # parse next definition body + p.idx = j + 2 # skip ":" and space + else: + break + result.add(item) + let j = skipNewlines(p, p.idx) + if p.tok[j].col == termCol and isMdDefListItem(p, j): + p.idx = j # parse next item else: break proc parseDefinitionList(p: var RstParser): PRstNode = result = nil var j = tokenAfterNewline(p) - 1 - if (j >= 1) and (p.tok[j].kind == tkIndent) and - (p.tok[j].ival > currInd(p)) and (p.tok[j - 1].symbol != "::"): - var col = p.tok[p.idx].col - result = newRstNode(rnDefList) + if j >= 1 and p.tok[j].kind == tkIndent and + p.tok[j].ival > currInd(p) and p.tok[j - 1].symbol != "::": + var col = currentTok(p).col + result = newRstNodeA(p, rnDefList) while true: + if isOptionList(p): + break # option list has priority over def.list j = p.idx var a = newRstNode(rnDefName) parseLine(p, a) - if (p.tok[p.idx].kind == tkIndent) and - (p.tok[p.idx].ival > currInd(p)) and - (p.tok[p.idx + 1].symbol != "::") and - not (p.tok[p.idx + 1].kind in {tkIndent, tkEof}): - pushInd(p, p.tok[p.idx].ival) + if currentTok(p).kind == tkIndent and + currentTok(p).ival > currInd(p) and + nextTok(p).symbol != "::" and + nextTok(p).kind notin {tkIndent, tkEof}: + pushInd(p, currentTok(p).ival) var b = newRstNode(rnDefBody) parseSection(p, b) var c = newRstNode(rnDefItem) - add(c, a) - add(c, b) - add(result, c) + c.add(a) + c.add(b) + result.add(c) popInd(p) else: p.idx = j break - if (p.tok[p.idx].kind == tkIndent) and (p.tok[p.idx].ival == col): - inc(p.idx) + if currentTok(p).kind == tkIndent and currentTok(p).ival == col: + inc p.idx j = tokenAfterNewline(p) - 1 if j >= 1 and p.tok[j].kind == tkIndent and p.tok[j].ival > col and p.tok[j-1].symbol != "::" and p.tok[j+1].kind != tkIndent: discard else: break - if len(result) == 0: result = nil + if result.len == 0: result = nil proc parseEnumList(p: var RstParser): PRstNode = const - wildcards: array[0..2, string] = ["(e) ", "e) ", "e. "] - wildpos: array[0..2, int] = [1, 0, 0] - result = nil + wildcards: array[0..5, string] = ["(n) ", "n) ", "n. ", + "(x) ", "x) ", "x. "] + # enumerator patterns, where 'x' means letter and 'n' means number + wildToken: array[0..5, int] = [4, 3, 3, 4, 3, 3] # number of tokens + wildIndex: array[0..5, int] = [1, 0, 0, 1, 0, 0] + # position of enumeration sequence (number/letter) in enumerator + let col = currentTok(p).col var w = 0 - while w <= 2: + while w < wildcards.len: if match(p, p.idx, wildcards[w]): break - inc(w) - if w <= 2: - var col = p.tok[p.idx].col - result = newRstNode(rnEnumList) - inc(p.idx, wildpos[w] + 3) - var j = tokenAfterNewline(p) - if (p.tok[j].col == p.tok[p.idx].col) or match(p, j, wildcards[w]): - pushInd(p, p.tok[p.idx].col) - while true: - var item = newRstNode(rnEnumItem) - parseSection(p, item) - add(result, item) - if (p.tok[p.idx].kind == tkIndent) and (p.tok[p.idx].ival == col) and - match(p, p.idx + 1, wildcards[w]): - inc(p.idx, wildpos[w] + 4) - else: + inc w + assert w < wildcards.len + + proc checkAfterNewline(p: RstParser, report: bool): bool = + ## If no indentation on the next line then parse as a normal paragraph + ## according to the RST spec. And report a warning with suggestions + let j = tokenAfterNewline(p, start=p.idx+1) + let requiredIndent = p.tok[p.idx+wildToken[w]].col + if p.tok[j].kind notin {tkIndent, tkEof} and + p.tok[j].col < requiredIndent and + (p.tok[j].col > col or + (p.tok[j].col == col and not match(p, j, wildcards[w]))): + if report: + let n = p.line + p.tok[j].line + let msg = "\n" & """ + not enough indentation on line $2 + (should be at column $3 if it's a continuation of enum. list), + or no blank line after line $1 (if it should be the next paragraph), + or no escaping \ at the beginning of line $1 + (if lines $1..$2 are a normal paragraph, not enum. list)""".dedent + let c = p.col + requiredIndent + ColRstOffset + rstMessage(p, mwRstStyle, msg % [$(n-1), $n, $c], + p.tok[j].line, p.tok[j].col) + result = false + else: + result = true + + if not checkAfterNewline(p, report = true): + return nil + result = newRstNodeA(p, rnEnumList) + let autoEnums = if roSupportMarkdown in p.s.options: @["#", "1"] else: @["#"] + var prevAE = "" # so as not allow mixing auto-enumerators `1` and `#` + var curEnum = 1 + for i in 0 ..< wildToken[w]-1: # add first enumerator with (, ), and . + if p.tok[p.idx + i].symbol == "#": + prevAE = "#" + result.labelFmt.add "1" + else: + result.labelFmt.add p.tok[p.idx + i].symbol + var prevEnum = p.tok[p.idx + wildIndex[w]].symbol + inc p.idx, wildToken[w] + while true: + var item = newRstNode(rnEnumItem) + pushInd(p, currentTok(p).col) + parseSection(p, item) + popInd(p) + result.add(item) + if currentTok(p).kind == tkIndent and currentTok(p).ival == col and + match(p, p.idx+1, wildcards[w]): + # don't report to avoid duplication of warning since for + # subsequent enum. items parseEnumList will be called second time: + if not checkAfterNewline(p, report = false): + break + let enumerator = p.tok[p.idx + 1 + wildIndex[w]].symbol + # check that it's in sequence: enumerator == next(prevEnum) + if "n" in wildcards[w]: # arabic numeral + let prevEnumI = try: parseInt(prevEnum) except ValueError: 1 + if enumerator in autoEnums: + if prevAE != "" and enumerator != prevAE: + break + prevAE = enumerator + curEnum = prevEnumI + 1 + else: curEnum = (try: parseInt(enumerator) except ValueError: 1) + if curEnum - prevEnumI != 1: break - popInd(p) + prevEnum = enumerator + else: # a..z + let prevEnumI = ord(prevEnum[0]) + if enumerator == "#": curEnum = prevEnumI + 1 + else: curEnum = ord(enumerator[0]) + if curEnum - prevEnumI != 1: + break + prevEnum = $chr(curEnum) + inc p.idx, 1 + wildToken[w] else: - dec(p.idx, wildpos[w] + 3) - result = nil + break + +proc prefix(ftnType: FootnoteType): string = + case ftnType + of fnManualNumber: result = "footnote-" + of fnAutoNumber: result = "footnoteauto-" + of fnAutoNumberLabel: result = "footnote-" + of fnAutoSymbol: result = "footnotesym-" + of fnCitation: result = "citation-" + +proc parseFootnote(p: var RstParser): PRstNode {.gcsafe.} = + ## Parses footnotes and citations, always returns 2 sons: + ## + ## 1) footnote label, always containing rnInner with 1 or more sons + ## 2) footnote body, which may be nil + var label: PRstNode + if isRst(p): + inc p.idx # skip space after `..` + label = parseFootnoteName(p, reference=false) + if label == nil: + if isRst(p): + dec p.idx + return nil + result = newRstNode(rnFootnote) + result.add label + let (fnType, i) = getFootnoteType(p.s, label) + var name = "" + var anchor = fnType.prefix + case fnType + of fnManualNumber: + addFootnoteNumManual(p, i) + anchor.add $i + of fnAutoNumber, fnAutoNumberLabel: + name = rstnodeToRefname(label) + addFootnoteNumAuto(p, name) + if fnType == fnAutoNumberLabel: + anchor.add name + else: # fnAutoNumber + result.order = p.s.lineFootnoteNum.len + anchor.add $result.order + of fnAutoSymbol: + addFootnoteSymAuto(p) + result.order = p.s.lineFootnoteSym.len + anchor.add $p.s.lineFootnoteSym.len + of fnCitation: + anchor.add rstnodeToRefname(label) + addAnchorRst(p, anchor, target = result, anchorType = footnoteAnchor) + result.anchor = anchor + if currentTok(p).kind == tkWhite: inc p.idx + discard parseBlockContent(p, result, parseSectionWrapper) + if result.len < 2: + result.add nil proc sonKind(father: PRstNode, i: int): RstNodeKind = result = rnLeaf - if i < len(father): result = father.sons[i].kind + if i < father.len: result = father.sons[i].kind proc parseSection(p: var RstParser, result: PRstNode) = + ## parse top-level RST elements: sections, transitions and body elements. while true: var leave = false assert(p.idx >= 0) - while p.tok[p.idx].kind == tkIndent: - if currInd(p) == p.tok[p.idx].ival: - inc(p.idx) - elif p.tok[p.idx].ival > currInd(p): - pushInd(p, p.tok[p.idx].ival) - var a = newRstNode(rnBlockQuote) - parseSection(p, a) - add(result, a) - popInd(p) + while currentTok(p).kind == tkIndent: + if currInd(p) == currentTok(p).ival: + inc p.idx + elif currentTok(p).ival > currInd(p): + if roPreferMarkdown in p.s.options: # Markdown => normal paragraphs + if currentTok(p).ival - currInd(p) >= 4: + result.add parseLiteralBlock(p) + else: + pushInd(p, currentTok(p).ival) + parseSection(p, result) + popInd(p) + else: # RST mode => block quotes + pushInd(p, currentTok(p).ival) + var a = newRstNodeA(p, rnBlockQuote) + parseSection(p, a) + result.add(a) + popInd(p) else: + while currentTok(p).kind != tkEof and nextTok(p).kind == tkIndent: + inc p.idx # skip blank lines leave = true break - if leave or p.tok[p.idx].kind == tkEof: break + if leave or currentTok(p).kind == tkEof: break var a: PRstNode = nil var k = whichSection(p) case k of rnLiteralBlock: - inc(p.idx) # skip '::' + inc p.idx # skip '::' a = parseLiteralBlock(p) of rnBulletList: a = parseBulletList(p) of rnLineBlock: a = parseLineBlock(p) + of rnMarkdownBlockQuote: a = parseMarkdownBlockQuote(p) of rnDirective: a = parseDotDot(p) + of rnFootnote: a = parseFootnote(p) of rnEnumList: a = parseEnumList(p) - of rnLeaf: rstMessage(p, meNewSectionExpected) + of rnLeaf: rstMessage(p, meNewSectionExpected, "(syntax error)") of rnParagraph: discard of rnDefList: a = parseDefinitionList(p) + of rnMdDefList: a = parseMdDefinitionList(p) of rnFieldList: - if p.idx > 0: dec(p.idx) + if p.idx > 0: dec p.idx a = parseFields(p) of rnTransition: a = parseTransition(p) - of rnHeadline: a = parseHeadline(p) + of rnHeadline, rnMarkdownHeadline: a = parseHeadline(p) of rnOverline: a = parseOverline(p) of rnTable: a = parseSimpleTable(p) + of rnMarkdownTable: a = parseMarkdownTable(p) of rnOptionList: a = parseOptionList(p) else: #InternalError("rst.parseSection()") discard if a == nil and k != rnDirective: - a = newRstNode(rnParagraph) + a = newRstNodeA(p, rnParagraph) parseParagraph(p, a) - addIfNotNil(result, a) + result.addIfNotNil(a) if sonKind(result, 0) == rnParagraph and sonKind(result, 1) != rnParagraph: - result.sons[0].kind = rnInner - -proc parseSectionWrapper(p: var RstParser): PRstNode = - result = newRstNode(rnInner) - parseSection(p, result) - while (result.kind == rnInner) and (len(result) == 1): - result = result.sons[0] - -proc `$`(t: Token): string = - result = $t.kind & ' ' & t.symbol + result.sons[0] = newRstNode(rnInner, result.sons[0].sons, + anchor=result.sons[0].anchor) proc parseDoc(p: var RstParser): PRstNode = result = parseSectionWrapper(p) - if p.tok[p.idx].kind != tkEof: - when false: - assert isAllocatedPtr(cast[pointer](p.tok)) - for i in 0 .. high(p.tok): - assert isNil(p.tok[i].symbol) or - isAllocatedPtr(cast[pointer](p.tok[i].symbol)) - echo "index: ", p.idx, " length: ", high(p.tok), "##", - p.tok[p.idx-1], p.tok[p.idx], p.tok[p.idx+1] - #assert isAllocatedPtr(cast[pointer](p.indentStack)) + if currentTok(p).kind != tkEof: rstMessage(p, meGeneralParseError) type DirFlag = enum hasArg, hasOptions, argIsFile, argIsWord DirFlags = set[DirFlag] - SectionParser = proc (p: var RstParser): PRstNode {.nimcall.} -proc parseDirective(p: var RstParser, flags: DirFlags): PRstNode = +proc parseDirective(p: var RstParser, k: RstNodeKind, flags: DirFlags): PRstNode = ## Parses arguments and options for a directive block. ## ## A directive block will always have three sons: the arguments for the - ## directive (rnDirArg), the options (rnFieldList) and the block - ## (rnLineBlock). This proc parses the two first nodes, the block is left to + ## directive (rnDirArg), the options (rnFieldList) and the directive + ## content block. This proc parses the two first nodes, the 3rd is left to ## the outer `parseDirective` call. ## ## Both rnDirArg and rnFieldList children nodes might be nil, so you need to ## check them before accessing. - result = newRstNode(rnDirective) + result = newRstNodeA(p, k) + if k == rnCodeBlock: result.info = lineInfo(p) var args: PRstNode = nil var options: PRstNode = nil if hasArg in flags: args = newRstNode(rnDirArg) if argIsFile in flags: while true: - case p.tok[p.idx].kind + case currentTok(p).kind of tkWord, tkOther, tkPunct, tkAdornment: - add(args, newLeaf(p)) - inc(p.idx) + args.add(newLeaf(p)) + inc p.idx else: break elif argIsWord in flags: - while p.tok[p.idx].kind == tkWhite: inc(p.idx) - if p.tok[p.idx].kind == tkWord: - add(args, newLeaf(p)) - inc(p.idx) + while currentTok(p).kind == tkWhite: inc p.idx + if currentTok(p).kind == tkWord: + args.add(newLeaf(p)) + inc p.idx else: args = nil else: parseLine(p, args) - add(result, args) + result.add(args) if hasOptions in flags: - if (p.tok[p.idx].kind == tkIndent) and (p.tok[p.idx].ival >= 3) and - (p.tok[p.idx + 1].symbol == ":"): + if currentTok(p).kind == tkIndent and currentTok(p).ival > currInd(p) and + nextTok(p).symbol == ":": + pushInd(p, currentTok(p).ival) options = parseFields(p) - add(result, options) - -proc indFollows(p: RstParser): bool = - result = p.tok[p.idx].kind == tkIndent and p.tok[p.idx].ival > currInd(p) + popInd(p) + result.add(options) -proc parseDirective(p: var RstParser, flags: DirFlags, +proc parseDirective(p: var RstParser, k: RstNodeKind, flags: DirFlags, contentParser: SectionParser): PRstNode = - ## Returns a generic rnDirective tree. + ## A helper proc that does main work for specific directive procs. + ## Always returns a generic rnDirective tree with these 3 children: ## - ## The children are rnDirArg, rnFieldList and rnLineBlock. Any might be nil. - result = parseDirective(p, flags) - if not isNil(contentParser) and indFollows(p): - pushInd(p, p.tok[p.idx].ival) - var content = contentParser(p) - popInd(p) - add(result, content) + ## 1) rnDirArg + ## 2) rnFieldList + ## 3) a node returned by `contentParser`. + ## + ## .. warning:: Any of the 3 children may be nil. + result = parseDirective(p, k, flags) + if not isNil(contentParser) and + parseBlockContent(p, result, contentParser): + discard "result is updated by parseBlockContent" else: - add(result, PRstNode(nil)) + result.add(PRstNode(nil)) proc parseDirBody(p: var RstParser, contentParser: SectionParser): PRstNode = if indFollows(p): - pushInd(p, p.tok[p.idx].ival) + pushInd(p, currentTok(p).ival) result = contentParser(p) popInd(p) @@ -1580,7 +3312,7 @@ proc dirInclude(p: var RstParser): PRstNode = # encoding (if specified). # result = nil - var n = parseDirective(p, {hasArg, argIsFile, hasOptions}, nil) + var n = parseDirective(p, rnDirective, {hasArg, argIsFile, hasOptions}, nil) var filename = strip(addNodes(n.sons[0])) var path = p.findRelativeFile(filename) if path == "": @@ -1589,15 +3321,15 @@ proc dirInclude(p: var RstParser): PRstNode = # XXX: error handling; recursive file inclusion! if getFieldValue(n, "literal") != "": result = newRstNode(rnLiteralBlock) - add(result, newRstNode(rnLeaf, readFile(path))) + result.add newLeaf(readFile(path)) else: - let inputString = readFile(path).string() + let inputString = readFile(path) let startPosition = block: let searchFor = n.getFieldValue("start-after").strip() if searchFor != "": let pos = inputString.find(searchFor) - if pos != -1: pos + searchFor.len() + if pos != -1: pos + searchFor.len else: 0 else: 0 @@ -1614,15 +3346,16 @@ proc dirInclude(p: var RstParser): PRstNode = var q: RstParser initParser(q, p.s) - q.filename = path - q.col += getTokens( - inputString[startPosition..endPosition].strip(), - false, + let saveFileIdx = p.s.currFileIdx + setCurrFilename(p.s, path) + getTokens( + inputString[startPosition..endPosition], q.tok) # workaround a GCC bug; more like the interior pointer bug? #if find(q.tok[high(q.tok)].symbol, "\0\x01\x02") > 0: # InternalError("Too many binary zeros in include file") result = parseDoc(q) + p.s.currFileIdx = saveFileIdx proc dirCodeBlock(p: var RstParser, nimExtension = false): PRstNode = ## Parses a code block. @@ -1640,57 +3373,54 @@ proc dirCodeBlock(p: var RstParser, nimExtension = false): PRstNode = ## ## As an extension this proc will process the ``file`` extension field and if ## present will replace the code block with the contents of the referenced - ## file. - result = parseDirective(p, {hasArg, hasOptions}, parseLiteralBlock) - var filename = strip(getFieldValue(result, "file")) - if filename != "": - var path = p.findRelativeFile(filename) - if path == "": rstMessage(p, meCannotOpenFile, filename) - var n = newRstNode(rnLiteralBlock) - add(n, newRstNode(rnLeaf, readFile(path))) - result.sons[2] = n + ## file. This behaviour is disabled in sandboxed mode and can be re-enabled + ## with the `roSandboxDisabled` flag. + result = parseDirective(p, rnCodeBlock, {hasArg, hasOptions}, parseLiteralBlock) + mayLoadFile(p, result) - # Extend the field block if we are using our custom extension. + # Extend the field block if we are using our custom Nim extension. if nimExtension: - # Create a field block if the input block didn't have any. - if result.sons[1].isNil: result.sons[1] = newRstNode(rnFieldList) - assert result.sons[1].kind == rnFieldList - # Hook the extra field and specify the Nim language as value. - var extraNode = newRstNode(rnField) - extraNode.add(newRstNode(rnFieldName)) - extraNode.add(newRstNode(rnFieldBody)) - extraNode.sons[0].add(newRstNode(rnLeaf, "default-language")) - extraNode.sons[1].add(newRstNode(rnLeaf, "Nim")) - result.sons[1].add(extraNode) - - result.kind = rnCodeBlock + defaultCodeLangNim(p, result) proc dirContainer(p: var RstParser): PRstNode = - result = parseDirective(p, {hasArg}, parseSectionWrapper) - assert(result.kind == rnDirective) - assert(len(result) == 3) - result.kind = rnContainer + result = parseDirective(p, rnContainer, {hasArg}, parseSectionWrapper) + assert(result.len == 3) proc dirImage(p: var RstParser): PRstNode = - result = parseDirective(p, {hasOptions, hasArg, argIsFile}, nil) - result.kind = rnImage + result = parseDirective(p, rnImage, {hasOptions, hasArg, argIsFile}, nil) proc dirFigure(p: var RstParser): PRstNode = - result = parseDirective(p, {hasOptions, hasArg, argIsFile}, + result = parseDirective(p, rnFigure, {hasOptions, hasArg, argIsFile}, parseSectionWrapper) - result.kind = rnFigure proc dirTitle(p: var RstParser): PRstNode = - result = parseDirective(p, {hasArg}, nil) - result.kind = rnTitle + result = parseDirective(p, rnTitle, {hasArg}, nil) proc dirContents(p: var RstParser): PRstNode = - result = parseDirective(p, {hasArg}, nil) - result.kind = rnContents + result = parseDirective(p, rnContents, {hasArg}, nil) + p.s.hasToc = true proc dirIndex(p: var RstParser): PRstNode = - result = parseDirective(p, {}, parseSectionWrapper) - result.kind = rnIndex + result = parseDirective(p, rnIndex, {}, parseSectionWrapper) + +proc dirAdmonition(p: var RstParser, d: string): PRstNode = + result = parseDirective(p, rnAdmonition, {}, parseSectionWrapper) + result.adType = d + +proc dirDefaultRole(p: var RstParser): PRstNode = + result = parseDirective(p, rnDefaultRole, {hasArg}, nil) + if result.sons[0].len == 0: p.s.currRole = defaultRole(p.s.options) + else: + assert result.sons[0].sons[0].kind == rnLeaf + p.s.currRole = result.sons[0].sons[0].text + p.s.currRoleKind = whichRole(p, p.s.currRole) + +proc dirRole(p: var RstParser): PRstNode = + result = parseDirective(p, rnDirective, {hasArg, hasOptions}, nil) + # just check that language is supported, TODO: real role association + let lang = getFieldValue(result, "language").strip + if lang != "" and getSourceLanguage(lang) == langNone: + rstMessage(p, mwUnsupportedLanguage, lang) proc dirRawAux(p: var RstParser, result: var PRstNode, kind: RstNodeKind, contentParser: SectionParser) = @@ -1702,10 +3432,10 @@ proc dirRawAux(p: var RstParser, result: var PRstNode, kind: RstNodeKind, else: var f = readFile(path) result = newRstNode(kind) - add(result, newRstNode(rnLeaf, f)) + result.add newLeaf(f) else: - result.kind = kind - add(result, parseDirBody(p, contentParser)) + result = newRstNode(kind, result.sons) + result.add(parseDirBody(p, contentParser)) proc dirRaw(p: var RstParser): PRstNode = # @@ -1716,7 +3446,7 @@ proc dirRaw(p: var RstParser): PRstNode = # # html # latex - result = parseDirective(p, {hasOptions, hasArg, argIsWord}) + result = parseDirective(p, rnDirective, {hasOptions, hasArg, argIsWord}) if result.sons[0] != nil: if cmpIgnoreCase(result.sons[0].sons[0].text, "html") == 0: dirRawAux(p, result, rnRawHtml, parseLiteralBlock) @@ -1727,99 +3457,430 @@ proc dirRaw(p: var RstParser): PRstNode = else: dirRawAux(p, result, rnRaw, parseSectionWrapper) +proc dirImportdoc(p: var RstParser): PRstNode = + result = parseDirective(p, rnDirective, {}, parseLiteralBlock) + assert result.sons[2].kind == rnLiteralBlock + assert result.sons[2].sons[0].kind == rnLeaf + let filenames: seq[string] = split(result.sons[2].sons[0].text, seps = {','}) + proc rmSpaces(s: string): string = s.split.join("") + for origFilename in filenames: + p.s.idxImports[origFilename.rmSpaces] = ImportdocInfo(fromInfo: lineInfo(p)) + +proc selectDir(p: var RstParser, d: string): PRstNode = + result = nil + let tok = p.tok[p.idx-2] # report on directive in ".. directive::" + if roSandboxDisabled notin p.s.options: + if d notin SandboxDirAllowlist: + rstMessage(p, meSandboxedDirective, d, tok.line, tok.col) + + case d + of "admonition", "attention", "caution": result = dirAdmonition(p, d) + of "code": result = dirCodeBlock(p) + of "code-block": result = dirCodeBlock(p, nimExtension = true) + of "container": result = dirContainer(p) + of "contents": result = dirContents(p) + of "danger": result = dirAdmonition(p, d) + of "default-role": result = dirDefaultRole(p) + of "error": result = dirAdmonition(p, d) + of "figure": result = dirFigure(p) + of "hint": result = dirAdmonition(p, d) + of "image": result = dirImage(p) + of "important": result = dirAdmonition(p, d) + of "importdoc": result = dirImportdoc(p) + of "include": result = dirInclude(p) + of "index": result = dirIndex(p) + of "note": result = dirAdmonition(p, d) + of "raw": + if roSupportRawDirective in p.s.options: + result = dirRaw(p) + else: + rstMessage(p, meInvalidDirective, d) + of "role": result = dirRole(p) + of "tip": result = dirAdmonition(p, d) + of "title": result = dirTitle(p) + of "warning": result = dirAdmonition(p, d) + else: + rstMessage(p, meInvalidDirective, d, tok.line, tok.col) + proc parseDotDot(p: var RstParser): PRstNode = + # parse "explicit markup blocks" result = nil - var col = p.tok[p.idx].col - inc(p.idx) + var n: PRstNode # to store result, workaround for bug 16855 + var col = currentTok(p).col + inc p.idx var d = getDirective(p) if d != "": pushInd(p, col) - case getDirKind(d) - of dkInclude: result = dirInclude(p) - of dkImage: result = dirImage(p) - of dkFigure: result = dirFigure(p) - of dkTitle: result = dirTitle(p) - of dkContainer: result = dirContainer(p) - of dkContents: result = dirContents(p) - of dkRaw: - if roSupportRawDirective in p.s.options: - result = dirRaw(p) - else: - rstMessage(p, meInvalidDirective, d) - of dkCode: result = dirCodeBlock(p) - of dkCodeBlock: result = dirCodeBlock(p, nimExtension = true) - of dkIndex: result = dirIndex(p) - else: rstMessage(p, meInvalidDirective, d) + result = selectDir(p, d) popInd(p) elif match(p, p.idx, " _"): # hyperlink target: - inc(p.idx, 2) - var a = getReferenceName(p, ":") - if p.tok[p.idx].kind == tkWhite: inc(p.idx) + inc p.idx, 2 + var ending = ":" + if currentTok(p).symbol == "`": + inc p.idx + ending = "`" + var a = getReferenceName(p, ending) + if ending == "`": + if currentTok(p).symbol == ":": + inc p.idx + else: + rstMessage(p, meExpected, ":") + if currentTok(p).kind == tkWhite: inc p.idx var b = untilEol(p) - setRef(p, rstnodeToRefname(a), b) + if len(b) == 0: # set internal anchor + p.curAnchors.add ManualAnchor( + alias: linkName(a), anchor: rstnodeToRefname(a), info: prevLineInfo(p) + ) + else: # external hyperlink + setRef(p, rstnodeToRefname(a), b, refType=hyperlinkAlias) elif match(p, p.idx, " |"): # substitution definitions: - inc(p.idx, 2) + inc p.idx, 2 var a = getReferenceName(p, "|") var b: PRstNode - if p.tok[p.idx].kind == tkWhite: inc(p.idx) - if cmpIgnoreStyle(p.tok[p.idx].symbol, "replace") == 0: - inc(p.idx) + if currentTok(p).kind == tkWhite: inc p.idx + if cmpIgnoreStyle(currentTok(p).symbol, "replace") == 0: + inc p.idx expect(p, "::") b = untilEol(p) - elif cmpIgnoreStyle(p.tok[p.idx].symbol, "image") == 0: - inc(p.idx) + elif cmpIgnoreStyle(currentTok(p).symbol, "image") == 0: + inc p.idx b = dirImage(p) else: - rstMessage(p, meInvalidDirective, p.tok[p.idx].symbol) + rstMessage(p, meInvalidDirective, currentTok(p).symbol) setSub(p, addNodes(a), b) - elif match(p, p.idx, " ["): - # footnotes, citations - inc(p.idx, 2) - var a = getReferenceName(p, "]") - if p.tok[p.idx].kind == tkWhite: inc(p.idx) - var b = untilEol(p) - setRef(p, rstnodeToRefname(a), b) + elif match(p, p.idx, " [") and + (n = parseFootnote(p); n != nil): + result = n else: - result = parseComment(p) - -proc resolveSubs(p: var RstParser, n: PRstNode): PRstNode = + result = parseComment(p, col) + +proc rstParsePass1*(fragment: string, + line, column: int, + sharedState: PRstSharedState): PRstNode = + ## Parses an RST `fragment`. + ## The result should be further processed by + ## preparePass2_ and resolveSubs_ (which is pass 2). + var p: RstParser + initParser(p, sharedState) + p.line = line + p.col = column + getTokens(fragment, p.tok) + result = parseDoc(p) + +proc extractLinkEnd(x: string): string = + ## From links like `path/to/file.html#/%` extract `file.html#/%`. + let i = find(x, '#') + let last = + if i >= 0: i + else: x.len - 1 + let j = rfind(x, '/', start=0, last=last) + if j >= 0: + result = x[j+1 .. ^1] + else: + result = x + +proc loadIdxFile(s: var PRstSharedState, origFilename: string) = + doAssert roSandboxDisabled in s.options + var info: TLineInfo + info.fileIndex = addFilename(s, origFilename) + var (dir, basename, ext) = origFilename.splitFile + if ext notin [".md", ".rst", ".nim", ""]: + rstMessage(s.filenames, s.msgHandler, s.idxImports[origFilename].fromInfo, + meCannotOpenFile, origFilename & ": unknown extension") + let idxFilename = dir / basename & ".idx" + let (idxPath, linkRelPath) = s.findRefFile(idxFilename) + s.idxImports[origFilename].linkRelPath = linkRelPath + var + fileEntries: seq[IndexEntry] + title: IndexEntry + try: + (fileEntries, title) = parseIdxFile(idxPath) + except IOError: + rstMessage(s.filenames, s.msgHandler, s.idxImports[origFilename].fromInfo, + meCannotOpenFile, idxPath) + except ValueError as e: + s.msgHandler(idxPath, LineRstInit, ColRstInit, meInvalidField, e.msg) + + var isMarkup = false # for sanity check to avoid mixing .md <-> .nim + for entry in fileEntries: + # Though target .idx already has inside it the path to HTML relative + # project's root, we won't rely on it and use `linkRelPath` instead. + let refn = extractLinkEnd(entry.link) + # select either markup (rst/md) or Nim cases: + if entry.kind in {ieMarkupTitle, ieNimTitle}: + s.idxImports[origFilename].title = entry.keyword + case entry.kind + of ieIdxRole, ieHeading, ieMarkupTitle: + if ext == ".nim" and entry.kind == ieMarkupTitle: + rstMessage(s, idxPath, meInvalidField, + $ieMarkupTitle & " in supposedly .nim-derived file") + if entry.kind == ieMarkupTitle: + isMarkup = true + info.line = entry.line.uint16 + addAnchorExtRst(s, key = entry.keyword, refn = refn, + anchorType = headlineAnchor, info=info) + of ieNim, ieNimGroup, ieNimTitle: + if ext in [".md", ".rst"] or isMarkup: + rstMessage(s, idxPath, meInvalidField, + $entry.kind & " in supposedly markup-derived file") + s.nimFileImported = true + var langSym: LangSymbol + if entry.kind in {ieNim, ieNimTitle}: + var q: RstParser + initParser(q, s) + info.line = entry.line.uint16 + setLen(q.tok, 0) + q.idx = 0 + getTokens(entry.linkTitle, q.tok) + var sons = newSeq[PRstNode](q.tok.len) + for i in 0 ..< q.tok.len: sons[i] = newLeaf(q.tok[i].symbol) + let linkTitle = newRstNode(rnInner, sons) + langSym = linkTitle.toLangSymbol + else: # entry.kind == ieNimGroup + langSym = langSymbolGroup(kind=entry.linkTitle, name=entry.keyword) + addAnchorNim(s, external = true, refn = refn, tooltip = entry.linkDesc, + langSym = langSym, priority = -4, # lowest + info = info, module = info.fileIndex) + doAssert s.idxImports[origFilename].title != "" + +proc preparePass2*(s: var PRstSharedState, mainNode: PRstNode, importdoc = true) = + ## Records titles in node `mainNode` and orders footnotes. + countTitles(s, mainNode) + fixHeadlines(s) + orderFootnotes(s) + if importdoc: + for origFilename in s.idxImports.keys: + loadIdxFile(s, origFilename) + +proc resolveLink(s: PRstSharedState, n: PRstNode) : PRstNode = + # Associate this link alias with its target and change node kind to + # rnHyperlink or rnInternalRef appropriately. + var desc, alias: PRstNode + if n.kind == rnPandocRef: # link like [desc][alias] + desc = n.sons[0] + alias = n.sons[1] + else: # n.kind == rnRstRef, link like `desc=alias`_ + desc = n + alias = n + type LinkDef = object + ar: AnchorRule + priority: int + tooltip: string + target: PRstNode + info: TLineInfo + externFilename: string + # when external anchor: origin filename where anchor was defined + isTitle: bool + proc cmp(x, y: LinkDef): int = + result = cmp(x.priority, y.priority) + if result == 0: + result = cmp(x.target, y.target) + var foundLinks: seq[LinkDef] + let refn = rstnodeToRefname(alias) + var hyperlinks = findRef(s, refn) + for y in hyperlinks: + foundLinks.add LinkDef(ar: arHyperlink, priority: refPriority(y.kind), + target: y.value, info: y.info, + tooltip: "(" & $y.kind & ")") + let substRst = findMainAnchorRst(s, alias.addNodes, n.info) + template getExternFilename(subst: AnchorSubst): string = + if subst.kind == arExternalRst or + (subst.kind == arNim and subst.external): + getFilename(s, subst) + else: "" + for subst in substRst: + var refname, fullRefname: string + if subst.kind == arInternalRst: + refname = subst.target.anchor + fullRefname = refname + else: # arExternalRst + refname = subst.refnameExt + fullRefname = s.idxImports[getFilename(s, subst)].linkRelPath & + "/" & refname + let anchorType = + if subst.kind == arInternalRst: subst.anchorType + else: subst.anchorTypeExt # arExternalRst + foundLinks.add LinkDef(ar: subst.kind, priority: subst.priority, + target: newLeaf(fullRefname), + info: subst.info, + externFilename: getExternFilename(subst), + isTitle: isDocumentationTitle(refname), + tooltip: "(" & $anchorType & ")") + # find anchors automatically generated from Nim symbols + if roNimFile in s.options or s.nimFileImported: + let substNim = findMainAnchorNim(s, signature=alias, n.info) + for subst in substNim: + let fullRefname = + if subst.external: + s.idxImports[getFilename(s, subst)].linkRelPath & + "/" & subst.refname + else: subst.refname + foundLinks.add LinkDef(ar: subst.kind, priority: subst.priority, + target: newLeaf(fullRefname), + externFilename: getExternFilename(subst), + isTitle: isDocumentationTitle(subst.refname), + info: subst.info, tooltip: subst.tooltip) + foundLinks.sort(cmp = cmp, order = Descending) + let aliasStr = addNodes(alias) + if foundLinks.len >= 1: + if foundLinks[0].externFilename != "": + s.idxImports[foundLinks[0].externFilename].used = true + let kind = if foundLinks[0].ar in {arHyperlink, arExternalRst}: rnHyperlink + elif foundLinks[0].ar == arNim: + if foundLinks[0].externFilename == "": rnNimdocRef + else: rnHyperlink + else: rnInternalRef + result = newRstNode(kind) + let documentName = # filename without ext for `.nim`, title for `.md` + if foundLinks[0].ar == arNim: + changeFileExt(foundLinks[0].externFilename.extractFilename, "") + elif foundLinks[0].externFilename != "": + s.idxImports[foundLinks[0].externFilename].title + else: foundLinks[0].externFilename.extractFilename + let linkText = + if foundLinks[0].externFilename != "": + if foundLinks[0].isTitle: newLeaf(addNodes(desc)) + else: newLeaf(documentName & ": " & addNodes(desc)) + else: + newRstNode(rnInner, desc.sons) + result.sons = @[linkText, foundLinks[0].target] + if kind == rnNimdocRef: result.tooltip = foundLinks[0].tooltip + if foundLinks.len > 1: # report ambiguous link + var targets = newSeq[string]() + for l in foundLinks: + var t = " " + if s.filenames.len > 1: + t.add getFilename(s.filenames, l.info.fileIndex) + let n = l.info.line + let c = l.info.col + ColRstOffset + t.add "($1, $2): $3" % [$n, $c, l.tooltip] + targets.add t + rstMessage(s.filenames, s.msgHandler, n.info, mwAmbiguousLink, + "`$1`\n clash:\n$2" % [ + aliasStr, targets.join("\n")]) + else: # nothing found + result = n + rstMessage(s.filenames, s.msgHandler, n.info, mwBrokenLink, aliasStr) + +proc resolveSubs*(s: PRstSharedState, n: PRstNode): PRstNode = + ## Makes pass 2 of RST parsing. + ## Resolves substitutions and anchor aliases, groups footnotes. + ## Takes input node `n` and returns the same node with recursive + ## substitutions in `n.sons` to `result`. result = n if n == nil: return case n.kind of rnSubstitutionReferences: - var x = findSub(p, n) + var x = findSub(s, n) if x >= 0: - result = p.s.subs[x].value + result = s.subs[x].value else: var key = addNodes(n) var e = getEnv(key) - if e != "": result = newRstNode(rnLeaf, e) - else: rstMessage(p, mwUnknownSubstitution, key) - of rnRef: - var y = findRef(p, rstnodeToRefname(n)) - if y != nil: - result = newRstNode(rnHyperlink) - n.kind = rnInner - add(result, n) - add(result, y) + if e != "": result = newLeaf(e) + else: rstMessage(s.filenames, s.msgHandler, n.info, + mwUnknownSubstitution, key) + of rnRstRef, rnPandocRef: + result = resolveLink(s, n) + of rnFootnote: + var (fnType, num) = getFootnoteType(s, n.sons[0]) + case fnType + of fnManualNumber, fnCitation: + discard "no need to alter fixed text" + of fnAutoNumberLabel, fnAutoNumber: + if fnType == fnAutoNumberLabel: + let labelR = rstnodeToRefname(n.sons[0]) + num = getFootnoteNum(s, labelR) + else: + num = getFootnoteNum(s, n.order) + var nn = newRstNode(rnInner) + nn.add newLeaf($num) + result.sons[0] = nn + of fnAutoSymbol: + let sym = getAutoSymbol(s, n.order) + n.sons[0].sons[0].text = sym + n.sons[1] = resolveSubs(s, n.sons[1]) + of rnFootnoteRef: + var (fnType, num) = getFootnoteType(s, n.sons[0]) + template addLabel(number: int | string) = + var nn = newRstNode(rnInner) + nn.add newLeaf($number) + result.add(nn) + var refn = fnType.prefix + # create new rnFootnoteRef, add final label, and finalize target refn: + result = newRstNode(rnFootnoteRef, info = n.info) + case fnType + of fnManualNumber: + addLabel num + refn.add $num + of fnAutoNumber: + inc s.currFootnoteNumRef + addLabel getFootnoteNum(s, s.currFootnoteNumRef) + refn.add $s.currFootnoteNumRef + of fnAutoNumberLabel: + addLabel getFootnoteNum(s, rstnodeToRefname(n)) + refn.add rstnodeToRefname(n) + of fnAutoSymbol: + inc s.currFootnoteSymRef + addLabel getAutoSymbol(s, s.currFootnoteSymRef) + refn.add $s.currFootnoteSymRef + of fnCitation: + result.add n.sons[0] + refn.add rstnodeToRefname(n) + # TODO: correctly report ambiguities + let anchorInfo = findMainAnchorRst(s, refn, n.info) + if anchorInfo.len != 0: + result.add newLeaf(anchorInfo[0].target.anchor) # add link + else: + rstMessage(s.filenames, s.msgHandler, n.info, mwBrokenLink, refn) + result.add newLeaf(refn) # add link of rnLeaf: discard - of rnContents: - p.hasToc = true else: - for i in countup(0, len(n) - 1): n.sons[i] = resolveSubs(p, n.sons[i]) + var regroup = false + for i in 0 ..< n.len: + n.sons[i] = resolveSubs(s, n.sons[i]) + if n.sons[i] != nil and n.sons[i].kind == rnFootnote: + regroup = true + if regroup: # group footnotes together into rnFootnoteGroup + var newSons: seq[PRstNode] + var i = 0 + while i < n.len: + if n.sons[i] != nil and n.sons[i].kind == rnFootnote: + var grp = newRstNode(rnFootnoteGroup) + while i < n.len and n.sons[i].kind == rnFootnote: + grp.sons.add n.sons[i] + inc i + newSons.add grp + else: + newSons.add n.sons[i] + inc i + result.sons = newSons + +proc completePass2*(s: PRstSharedState) = + for (filename, importdocInfo) in s.idxImports.pairs: + if not importdocInfo.used: + rstMessage(s.filenames, s.msgHandler, importdocInfo.fromInfo, + mwUnusedImportdoc, filename) proc rstParse*(text, filename: string, - line, column: int, hasToc: var bool, + line, column: int, options: RstParseOptions, findFile: FindFileHandler = nil, - msgHandler: MsgHandler = nil): PRstNode = - var p: RstParser - initParser(p, newSharedState(options, findFile, msgHandler)) - p.filename = filename - p.line = line - p.col = column + getTokens(text, roSkipPounds in options, p.tok) - result = resolveSubs(p, parseDoc(p)) - hasToc = p.hasToc + findRefFile: FindRefFileHandler = nil, + msgHandler: MsgHandler = nil): + tuple[node: PRstNode, filenames: RstFileTable, hasToc: bool] = + ## Parses the whole `text`. The result is ready for `rstgen.renderRstToOut`, + ## note that 2nd tuple element should be fed to `initRstGenerator` + ## argument `filenames` (it is being filled here at least with `filename` + ## and possibly with other files from RST ``.. include::`` statement). + var sharedState = newRstSharedState(options, filename, findFile, findRefFile, + msgHandler, hasToc=false) + let unresolved = rstParsePass1(text, line, column, sharedState) + preparePass2(sharedState, unresolved) + result.node = resolveSubs(sharedState, unresolved) + completePass2(sharedState) + result.filenames = sharedState.filenames + result.hasToc = sharedState.hasToc |