diff options
Diffstat (limited to 'lib/packages/docutils/rst.nim')
-rw-r--r-- | lib/packages/docutils/rst.nim | 697 |
1 files changed, 498 insertions, 199 deletions
diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim index f81be7a50..706c50689 100644 --- a/lib/packages/docutils/rst.nim +++ b/lib/packages/docutils/rst.nim @@ -21,8 +21,10 @@ ## turned on by passing ``options:`` [RstParseOptions] to [proc rstParse]. import - os, strutils, rstast, dochelpers, std/enumutils, algorithm, lists, sequtils, - std/private/miscdollars, tables, strscans + std/[os, strutils, enumutils, algorithm, lists, sequtils, + tables, strscans] +import dochelpers, rstidx, rstast +import std/private/miscdollars from highlite import SourceLanguage, getSourceLanguage when defined(nimPreviewSlimSystem): @@ -40,7 +42,7 @@ type roNimFile ## set for Nim files where default interpreted ## text role should be :nim: roSandboxDisabled ## this option enables certain options - ## (e.g. raw, include) + ## (e.g. raw, include, importdoc) ## which are disabled by default as they can ## enable users to read arbitrary data and ## perform XSS if the parser is used in a web @@ -73,11 +75,17 @@ type mwUnsupportedLanguage = "language '$1' not supported", mwUnsupportedField = "field '$1' not supported", mwRstStyle = "RST style: $1", + mwUnusedImportdoc = "importdoc for '$1' is not used", meSandboxedDirective = "disabled directive: '$1'", MsgHandler* = proc (filename: string, line, col: int, msgKind: MsgKind, arg: string) {.closure, gcsafe.} ## what to do in case of an error FindFileHandler* = proc (filename: string): string {.closure, gcsafe.} + FindRefFileHandler* = + proc (targetRelPath: string): + tuple[targetPath: string, linkRelPath: string] {.closure, gcsafe.} + ## returns where .html or .idx file should be found by its relative path; + ## `linkRelPath` is a prefix to be added before a link anchor from such file proc rstnodeToRefname*(n: PRstNode): string proc addNodes*(n: PRstNode): string @@ -333,7 +341,8 @@ type arInternalRst, ## For automatically generated RST anchors (from ## headings, footnotes, inline internal targets): ## case-insensitive, 1-space-significant (by RST spec) - arNim ## For anchors generated by ``docgen.rst``: Nim-style case + arExternalRst, ## For external .nim doc comments or .rst/.md + arNim ## For anchors generated by ``docgen.nim``: Nim-style case ## sensitivity, etc. (see `proc normalizeNimName`_ for details) arHyperlink, ## For links with manually set anchors in ## form `text <pagename.html#anchor>`_ @@ -343,17 +352,22 @@ type footnoteAnchor = "footnote anchor", headlineAnchor = "implicitly-generated headline anchor" AnchorSubst = object - info: TLineInfo # where the anchor was defined + info: TLineInfo # the file where the anchor was defined priority: int case kind: range[arInternalRst .. arNim] of arInternalRst: anchorType: RstAnchorKind target: PRstNode + of arExternalRst: + anchorTypeExt: RstAnchorKind + refnameExt: string of arNim: + module: FileIndex # anchor's module (generally not the same as file) tooltip: string # displayed tooltip for Nim-generated anchors langSym: LangSymbol refname: string # A reference name that will be inserted directly # into HTML/Latex. + external: bool AnchorSubstTable = Table[string, seq[AnchorSubst]] # use `seq` to account for duplicate anchors FootnoteType = enum @@ -366,12 +380,18 @@ type kind: FootnoteType # discriminator number: int # valid for fnManualNumber (always) and fnAutoNumber, # fnAutoNumberLabel after resolveSubs is called - autoNumIdx: int # order of occurence: fnAutoNumber, fnAutoNumberLabel - autoSymIdx: int # order of occurence: fnAutoSymbol + autoNumIdx: int # order of occurrence: fnAutoNumber, fnAutoNumberLabel + autoSymIdx: int # order of occurrence: fnAutoSymbol label: string # valid for fnAutoNumberLabel RstFileTable* = object filenameToIdx*: Table[string, FileIndex] idxToFilename*: seq[string] + ImportdocInfo = object + used: bool # was this import used? + fromInfo: TLineInfo # place of `.. importdoc::` directive + idxPath: string # full path to ``.idx`` file + linkRelPath: string # prefix before target anchor + title: string # document title obtained from ``.idx`` RstSharedState = object options*: RstParseOptions # parsing options hLevels: LevelMap # hierarchy of heading styles @@ -393,12 +413,17 @@ type footnotes: seq[FootnoteSubst] # correspondence b/w footnote label, # number, order of occurrence msgHandler: MsgHandler # How to handle errors. - findFile: FindFileHandler # How to find files. + findFile: FindFileHandler # How to find files for include. + findRefFile: FindRefFileHandler + # How to find files imported by importdoc. filenames*: RstFileTable # map file name <-> FileIndex (for storing # file names for warnings after 1st stage) currFileIdx*: FileIndex # current index in `filenames` tocPart*: seq[PRstNode] # all the headings of a document hasToc*: bool + idxImports*: Table[string, ImportdocInfo] + # map `importdoc`ed filename -> it's info + nimFileImported*: bool # Was any ``.nim`` module `importdoc`ed ? PRstSharedState* = ref RstSharedState ManualAnchor = object @@ -419,6 +444,7 @@ type ## because RST can have >1 alias per 1 anchor EParseError* = object of ValueError + SectionParser = proc (p: var RstParser): PRstNode {.nimcall, gcsafe.} const LineRstInit* = 1 ## Initial line number for standalone RST text @@ -452,6 +478,9 @@ proc defaultFindFile*(filename: string): string = if fileExists(filename): result = filename else: result = "" +proc defaultFindRefFile*(filename: string): (string, string) = + (filename, "") + proc defaultRole(options: RstParseOptions): string = if roNimFile in options: "nim" else: "literal" @@ -492,12 +521,19 @@ proc getFilename(filenames: RstFileTable, fid: FileIndex): string = $fid.int, $(filenames.len - 1)]) result = filenames.idxToFilename[fid.int] +proc getFilename(s: PRstSharedState, subst: AnchorSubst): string = + getFilename(s.filenames, subst.info.fileIndex) + +proc getModule(s: PRstSharedState, subst: AnchorSubst): string = + result = getFilename(s.filenames, subst.module) + proc currFilename(s: PRstSharedState): string = getFilename(s.filenames, s.currFileIdx) proc newRstSharedState*(options: RstParseOptions, filename: string, findFile: FindFileHandler, + findRefFile: FindRefFileHandler, msgHandler: MsgHandler, hasToc: bool): PRstSharedState = let r = defaultRole(options) @@ -507,6 +543,9 @@ proc newRstSharedState*(options: RstParseOptions, options: options, msgHandler: if not isNil(msgHandler): msgHandler else: defaultMsgHandler, findFile: if not isNil(findFile): findFile else: defaultFindFile, + findRefFile: + if not isNil(findRefFile): findRefFile + else: defaultFindRefFile, hasToc: hasToc ) setCurrFilename(result, filename) @@ -525,6 +564,14 @@ proc rstMessage(p: RstParser, msgKind: MsgKind, arg: string) = proc rstMessage(s: PRstSharedState, msgKind: MsgKind, arg: string) = s.msgHandler(s.currFilename, LineRstInit, ColRstInit, msgKind, arg) +proc rstMessage(s: PRstSharedState, msgKind: MsgKind, arg: string; + line, col: int) = + s.msgHandler(s.currFilename, line, col, msgKind, arg) + +proc rstMessage(s: PRstSharedState, filename: string, msgKind: MsgKind, + arg: string) = + s.msgHandler(filename, LineRstInit, ColRstInit, msgKind, arg) + proc rstMessage*(filenames: RstFileTable, f: MsgHandler, info: TLineInfo, msgKind: MsgKind, arg: string) = ## Print warnings using `info`, i.e. in 2nd-pass warnings for @@ -541,6 +588,31 @@ proc rstMessage(p: RstParser, msgKind: MsgKind) = p.col + currentTok(p).col, msgKind, currentTok(p).symbol) +# Functions `isPureRst` & `stopOrWarn` address differences between +# Markdown and RST: +# * Markdown always tries to continue working. If it is really impossible +# to parse a markup element, its proc just returns `nil` and parsing +# continues for it as for normal text paragraph. +# The downside is that real mistakes/typos are often silently ignored. +# The same applies to legacy `RstMarkdown` mode for nimforum. +# * RST really signals errors. The downside is that it's more intrusive - +# the user must escape special syntax with \ explicitly. +# +# TODO: we need to apply this strategy to all markup elements eventually. + +func isPureRst(p: RstParser): bool = roSupportMarkdown notin p.s.options +func isRst(p: RstParser): bool = roPreferMarkdown notin p.s.options +func isMd(p: RstParser): bool = roPreferMarkdown in p.s.options +func isMd(s: PRstSharedState): bool = roPreferMarkdown in s.options + +proc stopOrWarn(p: RstParser, errorType: MsgKind, arg: string) = + let realMsgKind = if isPureRst(p): errorType else: mwRstStyle + rstMessage(p, realMsgKind, arg) + +proc stopOrWarn(p: RstParser, errorType: MsgKind, arg: string, line, col: int) = + let realMsgKind = if isPureRst(p): errorType else: mwRstStyle + rstMessage(p, realMsgKind, arg, line, col) + proc currInd(p: RstParser): int = result = p.indentStack[high(p.indentStack)] @@ -756,6 +828,14 @@ proc internalRefPriority(k: RstAnchorKind): int = of footnoteAnchor: result = 4 of headlineAnchor: result = 3 +proc `$`(subst: AnchorSubst): string = # for debug + let s = + case subst.kind + of arInternalRst: "type=" & $subst.anchorType + of arExternalRst: "type=" & $subst.anchorTypeExt + of arNim: "langsym=" & $subst.langSym + result = "(kind=$1, priority=$2, $3)" % [$subst.kind, $subst.priority, s] + proc addAnchorRst(p: var RstParser, name: string, target: PRstNode, anchorType: RstAnchorKind) = ## Associates node `target` (which has field `anchor`) with an @@ -771,31 +851,49 @@ proc addAnchorRst(p: var RstParser, name: string, target: PRstNode, info: prevLineInfo(p), anchorType: anchorType)) p.curAnchors.setLen 0 -proc addAnchorNim*(s: var PRstSharedState, refn: string, tooltip: string, +proc addAnchorExtRst(s: var PRstSharedState, key: string, refn: string, + anchorType: RstAnchorKind, info: TLineInfo) = + let name = key.toLowerAscii + let prio = internalRefPriority(anchorType) + s.anchors.mgetOrPut(name, newSeq[AnchorSubst]()).add( + AnchorSubst(kind: arExternalRst, refnameExt: refn, priority: prio, + info: info, + anchorTypeExt: anchorType)) + +proc addAnchorNim*(s: var PRstSharedState, external: bool, refn: string, tooltip: string, langSym: LangSymbol, priority: int, - info: TLineInfo) = + info: TLineInfo, module: FileIndex) = ## Adds an anchor `refn`, which follows ## the rule `arNim` (i.e. a symbol in ``*.nim`` file) s.anchors.mgetOrPut(langSym.name, newSeq[AnchorSubst]()).add( - AnchorSubst(kind: arNim, refname: refn, langSym: langSym, + AnchorSubst(kind: arNim, external: external, refname: refn, langSym: langSym, tooltip: tooltip, priority: priority, info: info)) proc findMainAnchorNim(s: PRstSharedState, signature: PRstNode, info: TLineInfo): seq[AnchorSubst] = - let langSym = toLangSymbol(signature) + var langSym: LangSymbol + try: + langSym = toLangSymbol(signature) + except ValueError: # parsing failed, not a Nim symbol + return let substitutions = s.anchors.getOrDefault(langSym.name, newSeq[AnchorSubst]()) if substitutions.len == 0: return - # map symKind (like "proc") -> found symbols/groups: - var found: Table[string, seq[AnchorSubst]] - for s in substitutions: - if s.kind == arNim: - if match(s.langSym, langSym): - found.mgetOrPut(s.langSym.symKind, newSeq[AnchorSubst]()).add s - for symKind, sList in found: + # logic to select only groups instead of concrete symbols + # with overloads, note that the same symbol can be defined + # in multiple modules and `importdoc`ed: + type GroupKey = tuple[symKind: string, origModule: string] + # map (symKind, file) (like "proc", "os.nim") -> found symbols/groups: + var found: Table[GroupKey, seq[AnchorSubst]] + for subst in substitutions: + if subst.kind == arNim: + if match(subst.langSym, langSym): + let key: GroupKey = (subst.langSym.symKind, getModule(s, subst)) + found.mgetOrPut(key, newSeq[AnchorSubst]()).add subst + for key, sList in found: if sList.len == 1: result.add sList[0] else: # > 1, there are overloads, potential ambiguity in this `symKind` @@ -812,14 +910,16 @@ proc findMainAnchorNim(s: PRstSharedState, signature: PRstNode, result.add s foundGroup = true break - doAssert foundGroup, "docgen has not generated the group" + doAssert(foundGroup, + "docgen has not generated the group for $1 (file $2)" % [ + langSym.name, getModule(s, sList[0]) ]) proc findMainAnchorRst(s: PRstSharedState, linkText: string, info: TLineInfo): seq[AnchorSubst] = let name = linkText.toLowerAscii let substitutions = s.anchors.getOrDefault(name, newSeq[AnchorSubst]()) for s in substitutions: - if s.kind == arInternalRst: + if s.kind in {arInternalRst, arExternalRst}: result.add s proc addFootnoteNumManual(p: var RstParser, num: int) = @@ -1426,7 +1526,7 @@ proc parseMarkdownCodeblockFields(p: var RstParser): PRstNode = result = nil else: result = newRstNode(rnFieldList) - while currentTok(p).kind != tkIndent: + while currentTok(p).kind notin {tkIndent, tkEof}: if currentTok(p).kind == tkWhite: inc p.idx else: @@ -1503,6 +1603,7 @@ proc parseMarkdownCodeblock(p: var RstParser): PRstNode = else: args = nil var n = newLeaf("") + var isFirstLine = true while true: if currentTok(p).kind == tkEof: rstMessage(p, meMissingClosing, @@ -1514,7 +1615,8 @@ proc parseMarkdownCodeblock(p: var RstParser): PRstNode = inc p.idx, 2 break elif currentTok(p).kind == tkIndent: - n.text.add "\n" + if not isFirstLine: + n.text.add "\n" if currentTok(p).ival > baseCol: n.text.add " ".repeat(currentTok(p).ival - baseCol) elif currentTok(p).ival < baseCol: @@ -1524,6 +1626,7 @@ proc parseMarkdownCodeblock(p: var RstParser): PRstNode = else: n.text.add(currentTok(p).symbol) inc p.idx + isFirstLine = false result.sons[0] = args if result.sons[2] == nil: var lb = newRstNode(rnLiteralBlock) @@ -1597,7 +1700,7 @@ proc parseMarkdownLink(p: var RstParser; father: PRstNode): bool = else: result = false -proc getFootnoteType(label: PRstNode): (FootnoteType, int) = +proc getRstFootnoteType(label: PRstNode): (FootnoteType, int) = if label.sons.len >= 1 and label.sons[0].kind == rnLeaf and label.sons[0].text == "#": if label.sons.len == 1: @@ -1610,12 +1713,23 @@ proc getFootnoteType(label: PRstNode): (FootnoteType, int) = elif label.len == 1 and label.sons[0].kind == rnLeaf: try: result = (fnManualNumber, parseInt(label.sons[0].text)) - except: + except ValueError: result = (fnCitation, -1) else: result = (fnCitation, -1) -proc parseFootnoteName(p: var RstParser, reference: bool): PRstNode = +proc getMdFootnoteType(label: PRstNode): (FootnoteType, int) = + try: + result = (fnManualNumber, parseInt(label.sons[0].text)) + except ValueError: + result = (fnAutoNumberLabel, -1) + +proc getFootnoteType(s: PRstSharedState, label: PRstNode): (FootnoteType, int) = + ## Returns footnote/citation type and manual number (if present). + if isMd(s): getMdFootnoteType(label) + else: getRstFootnoteType(label) + +proc parseRstFootnoteName(p: var RstParser, reference: bool): PRstNode = ## parse footnote/citation label. Precondition: start at `[`. ## Label text should be valid ref. name symbol, otherwise nil is returned. var i = p.idx + 1 @@ -1645,6 +1759,41 @@ proc parseFootnoteName(p: var RstParser, reference: bool): PRstNode = inc i p.idx = i +proc isMdFootnoteName(p: RstParser, reference: bool): bool = + ## Pandoc Markdown footnote extension. + let j = p.idx + result = p.tok[j].symbol == "[" and p.tok[j+1].symbol == "^" and + p.tok[j+2].kind == tkWord + +proc parseMdFootnoteName(p: var RstParser, reference: bool): PRstNode = + if isMdFootnoteName(p, reference): + result = newRstNode(rnInner) + var j = p.idx + 2 + while p.tok[j].kind in {tkWord, tkOther} or + validRefnamePunct(p.tok[j].symbol): + result.add newLeaf(p.tok[j].symbol) + inc j + if j == p.idx + 2: + return nil + if p.tok[j].symbol == "]": + if reference: + p.idx = j + 1 # skip ] + else: + if p.tok[j+1].symbol == ":": + p.idx = j + 2 # skip ]: + else: + result = nil + else: + result = nil + else: + result = nil + +proc parseFootnoteName(p: var RstParser, reference: bool): PRstNode = + if isMd(p): parseMdFootnoteName(p, reference) + else: + if isInlineMarkupStart(p, "["): parseRstFootnoteName(p, reference) + else: nil + proc isMarkdownCodeBlock(p: RstParser, idx: int): bool = let tok = p.tok[idx] template allowedSymbol: bool = @@ -1711,16 +1860,12 @@ proc parseInline(p: var RstParser, father: PRstNode) = var n = newRstNode(rnSubstitutionReferences, info=lineInfo(p, p.idx+1)) parseUntil(p, n, "|", false) father.add(n) - elif roSupportMarkdown in p.s.options and - currentTok(p).symbol == "[" and nextTok(p).symbol != "[" and - parseMarkdownLink(p, father): - discard "parseMarkdownLink already processed it" - elif isInlineMarkupStart(p, "[") and nextTok(p).symbol != "[" and + elif currentTok(p).symbol == "[" and nextTok(p).symbol != "[" and (n = parseFootnoteName(p, reference=true); n != nil): var nn = newRstNode(rnFootnoteRef) nn.info = lineInfo(p, saveIdx+1) nn.add n - let (fnType, _) = getFootnoteType(n) + let (fnType, _) = getFootnoteType(p.s, n) case fnType of fnAutoSymbol: p.s.lineFootnoteSymRef.add lineInfo(p) @@ -1728,6 +1873,10 @@ proc parseInline(p: var RstParser, father: PRstNode) = p.s.lineFootnoteNumRef.add lineInfo(p) else: discard father.add(nn) + elif roSupportMarkdown in p.s.options and + currentTok(p).symbol == "[" and nextTok(p).symbol != "[" and + parseMarkdownLink(p, father): + discard "parseMarkdownLink already processed it" else: if roSupportSmilies in p.s.options: let n = parseSmiley(p) @@ -1865,8 +2014,26 @@ proc getMdBlockIndent(p: RstParser): int = else: result = nextIndent # allow parsing next lines [case.3] -template isRst(p: RstParser): bool = roPreferMarkdown notin p.s.options -template isMd(p: RstParser): bool = roPreferMarkdown in p.s.options +proc indFollows(p: RstParser): bool = + result = currentTok(p).kind == tkIndent and currentTok(p).ival > currInd(p) + +proc parseBlockContent(p: var RstParser, father: var PRstNode, + contentParser: SectionParser): bool {.gcsafe.} = + ## parse the final content part of explicit markup blocks (directives, + ## footnotes, etc). Returns true if succeeded. + if currentTok(p).kind != tkIndent or indFollows(p): + let blockIndent = getWrappableIndent(p) + pushInd(p, blockIndent) + let content = contentParser(p) + popInd(p) + father.add content + result = true + +proc parseSectionWrapper(p: var RstParser): PRstNode = + result = newRstNode(rnInner) + parseSection(p, result) + while result.kind == rnInner and result.len == 1: + result = result.sons[0] proc parseField(p: var RstParser): PRstNode = ## Returns a parsed rnField node. @@ -2052,17 +2219,20 @@ proc isAdornmentHeadline(p: RstParser, adornmentIdx: int): bool = while p.tok[i].kind notin {tkEof, tkIndent}: headlineLen += p.tok[i].symbol.len inc i - result = p.tok[adornmentIdx].symbol.len >= headlineLen and - headlineLen != 0 - if result: - result = result and p.tok[i].kind == tkIndent and - p.tok[i+1].kind == tkAdornment and - p.tok[i+1].symbol == p.tok[adornmentIdx].symbol - if not result: - failure = "(underline '" & p.tok[i+1].symbol & "' does not match " & - "overline '" & p.tok[adornmentIdx].symbol & "')" - else: - failure = "(overline '" & p.tok[adornmentIdx].symbol & "' is too short)" + if p.tok[i].kind == tkIndent and + p.tok[i+1].kind == tkAdornment and + p.tok[i+1].symbol[0] == p.tok[adornmentIdx].symbol[0]: + result = p.tok[adornmentIdx].symbol.len >= headlineLen and + headlineLen != 0 + if result: + result = p.tok[i+1].symbol == p.tok[adornmentIdx].symbol + if not result: + failure = "(underline '" & p.tok[i+1].symbol & "' does not match " & + "overline '" & p.tok[adornmentIdx].symbol & "')" + else: + failure = "(overline '" & p.tok[adornmentIdx].symbol & "' is too short)" + else: # it's not overline/underline section, not reporting error + return false if not result: rstMessage(p, meNewSectionExpected, failure) @@ -2189,16 +2359,19 @@ proc whichSection(p: RstParser): RstNodeKind = result = rnLineBlock elif roSupportMarkdown in p.s.options and isMarkdownBlockQuote(p): result = rnMarkdownBlockQuote - elif match(p, p.idx + 1, "i") and isAdornmentHeadline(p, p.idx): + elif (match(p, p.idx + 1, "i") and not match(p, p.idx + 2, "I")) and + isAdornmentHeadline(p, p.idx): result = rnOverline else: - result = rnLeaf + result = rnParagraph of tkPunct: if isMarkdownHeadline(p): result = rnMarkdownHeadline elif roSupportMarkdown in p.s.options and predNL(p) and match(p, p.idx, "| w") and findPipe(p, p.idx+3): result = rnMarkdownTable + elif isMd(p) and isMdFootnoteName(p, reference=false): + result = rnFootnote elif currentTok(p).symbol == "|" and isLineBlock(p): result = rnLineBlock elif roSupportMarkdown in p.s.options and isMarkdownBlockQuote(p): @@ -2375,7 +2548,9 @@ proc parseParagraph(p: var RstParser, result: PRstNode) = result.addIfNotNil(parseLineBlock(p)) of rnMarkdownBlockQuote: result.addIfNotNil(parseMarkdownBlockQuote(p)) - else: break + else: + dec p.idx # allow subsequent block to be parsed as another section + break else: break of tkPunct: @@ -2518,11 +2693,11 @@ proc getColumns(p: RstParser, cols: var RstCols, startIdx: int): int = proc checkColumns(p: RstParser, cols: RstCols) = var i = p.idx if p.tok[i].symbol[0] != '=': - rstMessage(p, mwRstStyle, + stopOrWarn(p, meIllformedTable, "only tables with `=` columns specification are allowed") for col in 0 ..< cols.len: if tokEnd(p, i) != cols[col].stop: - rstMessage(p, meIllformedTable, + stopOrWarn(p, meIllformedTable, "end of table column #$1 should end at position $2" % [ $(col+1), $(cols[col].stop+ColRstOffset)], p.tok[i].line, tokEnd(p, i)) @@ -2531,12 +2706,12 @@ proc checkColumns(p: RstParser, cols: RstCols) = if p.tok[i].kind == tkWhite: inc i if p.tok[i].kind notin {tkIndent, tkEof}: - rstMessage(p, meIllformedTable, "extraneous column specification") + stopOrWarn(p, meIllformedTable, "extraneous column specification") elif p.tok[i].kind == tkWhite: inc i else: - rstMessage(p, meIllformedTable, "no enough table columns", - p.tok[i].line, p.tok[i].col) + stopOrWarn(p, meIllformedTable, + "no enough table columns", p.tok[i].line, p.tok[i].col) proc getSpans(p: RstParser, nextLine: int, cols: RstCols, unitedCols: RstCols): seq[int] = @@ -2591,17 +2766,18 @@ proc parseSimpleTableRow(p: var RstParser, cols: RstCols, colChar: char): PRstNo if tokEnd(p) <= colEnd(nCell): if tokStart(p) < colStart(nCell): if currentTok(p).kind != tkWhite: - rstMessage(p, meIllformedTable, + stopOrWarn(p, meIllformedTable, "this word crosses table column from the left") - else: - inc p.idx + row[nCell].add(currentTok(p).symbol) else: row[nCell].add(currentTok(p).symbol) - inc p.idx + inc p.idx else: if tokStart(p) < colEnd(nCell) and currentTok(p).kind != tkWhite: - rstMessage(p, meIllformedTable, + stopOrWarn(p, meIllformedTable, "this word crosses table column from the right") + row[nCell].add(currentTok(p).symbol) + inc p.idx inc nCell if currentTok(p).kind == tkIndent: inc p.idx if tokEnd(p) <= colEnd(0): break @@ -2764,7 +2940,7 @@ proc parseOptionList(p: var RstParser): PRstNode = break proc parseMdDefinitionList(p: var RstParser): PRstNode = - ## Parses (Pandoc/kramdown/PHPextra) Mardkown definition lists. + ## Parses (Pandoc/kramdown/PHPextra) Markdown definition lists. result = newRstNodeA(p, rnMdDefList) let termCol = currentTok(p).col while true: @@ -2899,13 +3075,13 @@ proc parseEnumList(p: var RstParser): PRstNode = let enumerator = p.tok[p.idx + 1 + wildIndex[w]].symbol # check that it's in sequence: enumerator == next(prevEnum) if "n" in wildcards[w]: # arabic numeral - let prevEnumI = try: parseInt(prevEnum) except: 1 + let prevEnumI = try: parseInt(prevEnum) except ValueError: 1 if enumerator in autoEnums: if prevAE != "" and enumerator != prevAE: break prevAE = enumerator curEnum = prevEnumI + 1 - else: curEnum = (try: parseInt(enumerator) except: 1) + else: curEnum = (try: parseInt(enumerator) except ValueError: 1) if curEnum - prevEnumI != 1: break prevEnum = enumerator @@ -2920,6 +3096,57 @@ proc parseEnumList(p: var RstParser): PRstNode = else: break +proc prefix(ftnType: FootnoteType): string = + case ftnType + of fnManualNumber: result = "footnote-" + of fnAutoNumber: result = "footnoteauto-" + of fnAutoNumberLabel: result = "footnote-" + of fnAutoSymbol: result = "footnotesym-" + of fnCitation: result = "citation-" + +proc parseFootnote(p: var RstParser): PRstNode {.gcsafe.} = + ## Parses footnotes and citations, always returns 2 sons: + ## + ## 1) footnote label, always containing rnInner with 1 or more sons + ## 2) footnote body, which may be nil + var label: PRstNode + if isRst(p): + inc p.idx # skip space after `..` + label = parseFootnoteName(p, reference=false) + if label == nil: + if isRst(p): + dec p.idx + return nil + result = newRstNode(rnFootnote) + result.add label + let (fnType, i) = getFootnoteType(p.s, label) + var name = "" + var anchor = fnType.prefix + case fnType + of fnManualNumber: + addFootnoteNumManual(p, i) + anchor.add $i + of fnAutoNumber, fnAutoNumberLabel: + name = rstnodeToRefname(label) + addFootnoteNumAuto(p, name) + if fnType == fnAutoNumberLabel: + anchor.add name + else: # fnAutoNumber + result.order = p.s.lineFootnoteNum.len + anchor.add $result.order + of fnAutoSymbol: + addFootnoteSymAuto(p) + result.order = p.s.lineFootnoteSym.len + anchor.add $p.s.lineFootnoteSym.len + of fnCitation: + anchor.add rstnodeToRefname(label) + addAnchorRst(p, anchor, target = result, anchorType = footnoteAnchor) + result.anchor = anchor + if currentTok(p).kind == tkWhite: inc p.idx + discard parseBlockContent(p, result, parseSectionWrapper) + if result.len < 2: + result.add nil + proc sonKind(father: PRstNode, i: int): RstNodeKind = result = rnLeaf if i < father.len: result = father.sons[i].kind @@ -2962,6 +3189,7 @@ proc parseSection(p: var RstParser, result: PRstNode) = of rnLineBlock: a = parseLineBlock(p) of rnMarkdownBlockQuote: a = parseMarkdownBlockQuote(p) of rnDirective: a = parseDotDot(p) + of rnFootnote: a = parseFootnote(p) of rnEnumList: a = parseEnumList(p) of rnLeaf: rstMessage(p, meNewSectionExpected, "(syntax error)") of rnParagraph: discard @@ -2987,12 +3215,6 @@ proc parseSection(p: var RstParser, result: PRstNode) = result.sons[0] = newRstNode(rnInner, result.sons[0].sons, anchor=result.sons[0].anchor) -proc parseSectionWrapper(p: var RstParser): PRstNode = - result = newRstNode(rnInner) - parseSection(p, result) - while result.kind == rnInner and result.len == 1: - result = result.sons[0] - proc parseDoc(p: var RstParser): PRstNode = result = parseSectionWrapper(p) if currentTok(p).kind != tkEof: @@ -3002,7 +3224,6 @@ type DirFlag = enum hasArg, hasOptions, argIsFile, argIsWord DirFlags = set[DirFlag] - SectionParser = proc (p: var RstParser): PRstNode {.nimcall, gcsafe.} proc parseDirective(p: var RstParser, k: RstNodeKind, flags: DirFlags): PRstNode = ## Parses arguments and options for a directive block. @@ -3045,21 +3266,6 @@ proc parseDirective(p: var RstParser, k: RstNodeKind, flags: DirFlags): PRstNode popInd(p) result.add(options) -proc indFollows(p: RstParser): bool = - result = currentTok(p).kind == tkIndent and currentTok(p).ival > currInd(p) - -proc parseBlockContent(p: var RstParser, father: var PRstNode, - contentParser: SectionParser): bool {.gcsafe.} = - ## parse the final content part of explicit markup blocks (directives, - ## footnotes, etc). Returns true if succeeded. - if currentTok(p).kind != tkIndent or indFollows(p): - let blockIndent = getWrappableIndent(p) - pushInd(p, blockIndent) - let content = contentParser(p) - popInd(p) - father.add content - result = true - proc parseDirective(p: var RstParser, k: RstNodeKind, flags: DirFlags, contentParser: SectionParser): PRstNode = ## A helper proc that does main work for specific directive procs. @@ -3251,6 +3457,15 @@ proc dirRaw(p: var RstParser): PRstNode = else: dirRawAux(p, result, rnRaw, parseSectionWrapper) +proc dirImportdoc(p: var RstParser): PRstNode = + result = parseDirective(p, rnDirective, {}, parseLiteralBlock) + assert result.sons[2].kind == rnLiteralBlock + assert result.sons[2].sons[0].kind == rnLeaf + let filenames: seq[string] = split(result.sons[2].sons[0].text, seps = {','}) + proc rmSpaces(s: string): string = s.split.join("") + for origFilename in filenames: + p.s.idxImports[origFilename.rmSpaces] = ImportdocInfo(fromInfo: lineInfo(p)) + proc selectDir(p: var RstParser, d: string): PRstNode = result = nil let tok = p.tok[p.idx-2] # report on directive in ".. directive::" @@ -3271,6 +3486,7 @@ proc selectDir(p: var RstParser, d: string): PRstNode = of "hint": result = dirAdmonition(p, d) of "image": result = dirImage(p) of "important": result = dirAdmonition(p, d) + of "importdoc": result = dirImportdoc(p) of "include": result = dirInclude(p) of "index": result = dirIndex(p) of "note": result = dirAdmonition(p, d) @@ -3286,54 +3502,6 @@ proc selectDir(p: var RstParser, d: string): PRstNode = else: rstMessage(p, meInvalidDirective, d, tok.line, tok.col) -proc prefix(ftnType: FootnoteType): string = - case ftnType - of fnManualNumber: result = "footnote-" - of fnAutoNumber: result = "footnoteauto-" - of fnAutoNumberLabel: result = "footnote-" - of fnAutoSymbol: result = "footnotesym-" - of fnCitation: result = "citation-" - -proc parseFootnote(p: var RstParser): PRstNode {.gcsafe.} = - ## Parses footnotes and citations, always returns 2 sons: - ## - ## 1) footnote label, always containing rnInner with 1 or more sons - ## 2) footnote body, which may be nil - inc p.idx - let label = parseFootnoteName(p, reference=false) - if label == nil: - dec p.idx - return nil - result = newRstNode(rnFootnote) - result.add label - let (fnType, i) = getFootnoteType(label) - var name = "" - var anchor = fnType.prefix - case fnType - of fnManualNumber: - addFootnoteNumManual(p, i) - anchor.add $i - of fnAutoNumber, fnAutoNumberLabel: - name = rstnodeToRefname(label) - addFootnoteNumAuto(p, name) - if fnType == fnAutoNumberLabel: - anchor.add name - else: # fnAutoNumber - result.order = p.s.lineFootnoteNum.len - anchor.add $result.order - of fnAutoSymbol: - addFootnoteSymAuto(p) - result.order = p.s.lineFootnoteSym.len - anchor.add $p.s.lineFootnoteSym.len - of fnCitation: - anchor.add rstnodeToRefname(label) - addAnchorRst(p, anchor, target = result, anchorType = footnoteAnchor) - result.anchor = anchor - if currentTok(p).kind == tkWhite: inc p.idx - discard parseBlockContent(p, result, parseSectionWrapper) - if result.len < 2: - result.add nil - proc parseDotDot(p: var RstParser): PRstNode = # parse "explicit markup blocks" result = nil @@ -3401,77 +3569,200 @@ proc rstParsePass1*(fragment: string, getTokens(fragment, p.tok) result = parseDoc(p) -proc preparePass2*(s: PRstSharedState, mainNode: PRstNode) = +proc extractLinkEnd(x: string): string = + ## From links like `path/to/file.html#/%` extract `file.html#/%`. + let i = find(x, '#') + let last = + if i >= 0: i + else: x.len - 1 + let j = rfind(x, '/', start=0, last=last) + if j >= 0: + result = x[j+1 .. ^1] + else: + result = x + +proc loadIdxFile(s: var PRstSharedState, origFilename: string) = + doAssert roSandboxDisabled in s.options + var info: TLineInfo + info.fileIndex = addFilename(s, origFilename) + var (dir, basename, ext) = origFilename.splitFile + if ext notin [".md", ".rst", ".nim", ""]: + rstMessage(s.filenames, s.msgHandler, s.idxImports[origFilename].fromInfo, + meCannotOpenFile, origFilename & ": unknown extension") + let idxFilename = dir / basename & ".idx" + let (idxPath, linkRelPath) = s.findRefFile(idxFilename) + s.idxImports[origFilename].linkRelPath = linkRelPath + var + fileEntries: seq[IndexEntry] + title: IndexEntry + try: + (fileEntries, title) = parseIdxFile(idxPath) + except IOError: + rstMessage(s.filenames, s.msgHandler, s.idxImports[origFilename].fromInfo, + meCannotOpenFile, idxPath) + except ValueError as e: + s.msgHandler(idxPath, LineRstInit, ColRstInit, meInvalidField, e.msg) + + var isMarkup = false # for sanity check to avoid mixing .md <-> .nim + for entry in fileEntries: + # Though target .idx already has inside it the path to HTML relative + # project's root, we won't rely on it and use `linkRelPath` instead. + let refn = extractLinkEnd(entry.link) + # select either markup (rst/md) or Nim cases: + if entry.kind in {ieMarkupTitle, ieNimTitle}: + s.idxImports[origFilename].title = entry.keyword + case entry.kind + of ieIdxRole, ieHeading, ieMarkupTitle: + if ext == ".nim" and entry.kind == ieMarkupTitle: + rstMessage(s, idxPath, meInvalidField, + $ieMarkupTitle & " in supposedly .nim-derived file") + if entry.kind == ieMarkupTitle: + isMarkup = true + info.line = entry.line.uint16 + addAnchorExtRst(s, key = entry.keyword, refn = refn, + anchorType = headlineAnchor, info=info) + of ieNim, ieNimGroup, ieNimTitle: + if ext in [".md", ".rst"] or isMarkup: + rstMessage(s, idxPath, meInvalidField, + $entry.kind & " in supposedly markup-derived file") + s.nimFileImported = true + var langSym: LangSymbol + if entry.kind in {ieNim, ieNimTitle}: + var q: RstParser + initParser(q, s) + info.line = entry.line.uint16 + setLen(q.tok, 0) + q.idx = 0 + getTokens(entry.linkTitle, q.tok) + var sons = newSeq[PRstNode](q.tok.len) + for i in 0 ..< q.tok.len: sons[i] = newLeaf(q.tok[i].symbol) + let linkTitle = newRstNode(rnInner, sons) + langSym = linkTitle.toLangSymbol + else: # entry.kind == ieNimGroup + langSym = langSymbolGroup(kind=entry.linkTitle, name=entry.keyword) + addAnchorNim(s, external = true, refn = refn, tooltip = entry.linkDesc, + langSym = langSym, priority = -4, # lowest + info = info, module = info.fileIndex) + doAssert s.idxImports[origFilename].title != "" + +proc preparePass2*(s: var PRstSharedState, mainNode: PRstNode, importdoc = true) = ## Records titles in node `mainNode` and orders footnotes. countTitles(s, mainNode) fixHeadlines(s) orderFootnotes(s) + if importdoc: + for origFilename in s.idxImports.keys: + loadIdxFile(s, origFilename) proc resolveLink(s: PRstSharedState, n: PRstNode) : PRstNode = - # Associate this link alias with its target and change node kind to - # rnHyperlink or rnInternalRef appropriately. - var desc, alias: PRstNode - if n.kind == rnPandocRef: # link like [desc][alias] - desc = n.sons[0] - alias = n.sons[1] - else: # n.kind == rnRstRef, link like `desc=alias`_ - desc = n - alias = n - type LinkDef = object - ar: AnchorRule - priority: int - tooltip: string - target: PRstNode - info: TLineInfo - proc cmp(x, y: LinkDef): int = - result = cmp(x.priority, y.priority) - if result == 0: - result = cmp(x.target, y.target) - var foundLinks: seq[LinkDef] - let refn = rstnodeToRefname(alias) - var hyperlinks = findRef(s, refn) - for y in hyperlinks: - foundLinks.add LinkDef(ar: arHyperlink, priority: refPriority(y.kind), - target: y.value, info: y.info, - tooltip: "(" & $y.kind & ")") - let substRst = findMainAnchorRst(s, alias.addNodes, n.info) - for subst in substRst: - foundLinks.add LinkDef(ar: arInternalRst, priority: subst.priority, - target: newLeaf(subst.target.anchor), - info: subst.info, - tooltip: "(" & $subst.anchorType & ")") - # find anchors automatically generated from Nim symbols - if roNimFile in s.options: - let substNim = findMainAnchorNim(s, signature=alias, n.info) - for subst in substNim: - foundLinks.add LinkDef(ar: arNim, priority: subst.priority, - target: newLeaf(subst.refname), - info: subst.info, tooltip: subst.tooltip) - foundLinks.sort(cmp = cmp, order = Descending) - let aliasStr = addNodes(alias) - if foundLinks.len >= 1: - let kind = if foundLinks[0].ar == arHyperlink: rnHyperlink - elif foundLinks[0].ar == arNim: rnNimdocRef - else: rnInternalRef - result = newRstNode(kind) - result.sons = @[newRstNode(rnInner, desc.sons), foundLinks[0].target] - if kind == rnNimdocRef: result.tooltip = foundLinks[0].tooltip - if foundLinks.len > 1: # report ambiguous link - var targets = newSeq[string]() - for l in foundLinks: - var t = " " - if s.filenames.len > 1: - t.add getFilename(s.filenames, l.info.fileIndex) - let n = l.info.line - let c = l.info.col + ColRstOffset - t.add "($1, $2): $3" % [$n, $c, l.tooltip] - targets.add t - rstMessage(s.filenames, s.msgHandler, n.info, mwAmbiguousLink, - "`$1`\n clash:\n$2" % [ - aliasStr, targets.join("\n")]) - else: # nothing found - result = n - rstMessage(s.filenames, s.msgHandler, n.info, mwBrokenLink, aliasStr) + # Associate this link alias with its target and change node kind to + # rnHyperlink or rnInternalRef appropriately. + var desc, alias: PRstNode + if n.kind == rnPandocRef: # link like [desc][alias] + desc = n.sons[0] + alias = n.sons[1] + else: # n.kind == rnRstRef, link like `desc=alias`_ + desc = n + alias = n + type LinkDef = object + ar: AnchorRule + priority: int + tooltip: string + target: PRstNode + info: TLineInfo + externFilename: string + # when external anchor: origin filename where anchor was defined + isTitle: bool + proc cmp(x, y: LinkDef): int = + result = cmp(x.priority, y.priority) + if result == 0: + result = cmp(x.target, y.target) + var foundLinks: seq[LinkDef] + let refn = rstnodeToRefname(alias) + var hyperlinks = findRef(s, refn) + for y in hyperlinks: + foundLinks.add LinkDef(ar: arHyperlink, priority: refPriority(y.kind), + target: y.value, info: y.info, + tooltip: "(" & $y.kind & ")") + let substRst = findMainAnchorRst(s, alias.addNodes, n.info) + template getExternFilename(subst: AnchorSubst): string = + if subst.kind == arExternalRst or + (subst.kind == arNim and subst.external): + getFilename(s, subst) + else: "" + for subst in substRst: + var refname, fullRefname: string + if subst.kind == arInternalRst: + refname = subst.target.anchor + fullRefname = refname + else: # arExternalRst + refname = subst.refnameExt + fullRefname = s.idxImports[getFilename(s, subst)].linkRelPath & + "/" & refname + let anchorType = + if subst.kind == arInternalRst: subst.anchorType + else: subst.anchorTypeExt # arExternalRst + foundLinks.add LinkDef(ar: subst.kind, priority: subst.priority, + target: newLeaf(fullRefname), + info: subst.info, + externFilename: getExternFilename(subst), + isTitle: isDocumentationTitle(refname), + tooltip: "(" & $anchorType & ")") + # find anchors automatically generated from Nim symbols + if roNimFile in s.options or s.nimFileImported: + let substNim = findMainAnchorNim(s, signature=alias, n.info) + for subst in substNim: + let fullRefname = + if subst.external: + s.idxImports[getFilename(s, subst)].linkRelPath & + "/" & subst.refname + else: subst.refname + foundLinks.add LinkDef(ar: subst.kind, priority: subst.priority, + target: newLeaf(fullRefname), + externFilename: getExternFilename(subst), + isTitle: isDocumentationTitle(subst.refname), + info: subst.info, tooltip: subst.tooltip) + foundLinks.sort(cmp = cmp, order = Descending) + let aliasStr = addNodes(alias) + if foundLinks.len >= 1: + if foundLinks[0].externFilename != "": + s.idxImports[foundLinks[0].externFilename].used = true + let kind = if foundLinks[0].ar in {arHyperlink, arExternalRst}: rnHyperlink + elif foundLinks[0].ar == arNim: + if foundLinks[0].externFilename == "": rnNimdocRef + else: rnHyperlink + else: rnInternalRef + result = newRstNode(kind) + let documentName = # filename without ext for `.nim`, title for `.md` + if foundLinks[0].ar == arNim: + changeFileExt(foundLinks[0].externFilename.extractFilename, "") + elif foundLinks[0].externFilename != "": + s.idxImports[foundLinks[0].externFilename].title + else: foundLinks[0].externFilename.extractFilename + let linkText = + if foundLinks[0].externFilename != "": + if foundLinks[0].isTitle: newLeaf(addNodes(desc)) + else: newLeaf(documentName & ": " & addNodes(desc)) + else: + newRstNode(rnInner, desc.sons) + result.sons = @[linkText, foundLinks[0].target] + if kind == rnNimdocRef: result.tooltip = foundLinks[0].tooltip + if foundLinks.len > 1: # report ambiguous link + var targets = newSeq[string]() + for l in foundLinks: + var t = " " + if s.filenames.len > 1: + t.add getFilename(s.filenames, l.info.fileIndex) + let n = l.info.line + let c = l.info.col + ColRstOffset + t.add "($1, $2): $3" % [$n, $c, l.tooltip] + targets.add t + rstMessage(s.filenames, s.msgHandler, n.info, mwAmbiguousLink, + "`$1`\n clash:\n$2" % [ + aliasStr, targets.join("\n")]) + else: # nothing found + result = n + rstMessage(s.filenames, s.msgHandler, n.info, mwBrokenLink, aliasStr) proc resolveSubs*(s: PRstSharedState, n: PRstNode): PRstNode = ## Makes pass 2 of RST parsing. @@ -3494,7 +3785,7 @@ proc resolveSubs*(s: PRstSharedState, n: PRstNode): PRstNode = of rnRstRef, rnPandocRef: result = resolveLink(s, n) of rnFootnote: - var (fnType, num) = getFootnoteType(n.sons[0]) + var (fnType, num) = getFootnoteType(s, n.sons[0]) case fnType of fnManualNumber, fnCitation: discard "no need to alter fixed text" @@ -3512,7 +3803,7 @@ proc resolveSubs*(s: PRstSharedState, n: PRstNode): PRstNode = n.sons[0].sons[0].text = sym n.sons[1] = resolveSubs(s, n.sons[1]) of rnFootnoteRef: - var (fnType, num) = getFootnoteType(n.sons[0]) + var (fnType, num) = getFootnoteType(s, n.sons[0]) template addLabel(number: int | string) = var nn = newRstNode(rnInner) nn.add newLeaf($number) @@ -3568,20 +3859,28 @@ proc resolveSubs*(s: PRstSharedState, n: PRstNode): PRstNode = inc i result.sons = newSons +proc completePass2*(s: PRstSharedState) = + for (filename, importdocInfo) in s.idxImports.pairs: + if not importdocInfo.used: + rstMessage(s.filenames, s.msgHandler, importdocInfo.fromInfo, + mwUnusedImportdoc, filename) + proc rstParse*(text, filename: string, line, column: int, options: RstParseOptions, findFile: FindFileHandler = nil, + findRefFile: FindRefFileHandler = nil, msgHandler: MsgHandler = nil): tuple[node: PRstNode, filenames: RstFileTable, hasToc: bool] = ## Parses the whole `text`. The result is ready for `rstgen.renderRstToOut`, ## note that 2nd tuple element should be fed to `initRstGenerator` ## argument `filenames` (it is being filled here at least with `filename` ## and possibly with other files from RST ``.. include::`` statement). - var sharedState = newRstSharedState(options, filename, findFile, + var sharedState = newRstSharedState(options, filename, findFile, findRefFile, msgHandler, hasToc=false) let unresolved = rstParsePass1(text, line, column, sharedState) preparePass2(sharedState, unresolved) result.node = resolveSubs(sharedState, unresolved) + completePass2(sharedState) result.filenames = sharedState.filenames result.hasToc = sharedState.hasToc |