diff options
-rw-r--r-- | compiler/docgen.nim | 7 | ||||
-rw-r--r-- | lib/packages/docutils/rst.nim | 52 | ||||
-rw-r--r-- | tests/stdlib/trst.nim | 161 |
3 files changed, 207 insertions, 13 deletions
diff --git a/compiler/docgen.nim b/compiler/docgen.nim index 977fcf8ef..78fb88f9b 100644 --- a/compiler/docgen.nim +++ b/compiler/docgen.nim @@ -178,7 +178,8 @@ proc newDocumentor*(filename: AbsoluteFile; cache: IdentCache; conf: ConfigRef, result.outDir = conf.outDir.string initRstGenerator(result[], (if conf.cmd != cmdRst2tex: outHtml else: outLatex), conf.configVars, filename.string, - {roSupportRawDirective, roSupportMarkdown, roNimFile}, + {roSupportRawDirective, roSupportMarkdown, + roPreferMarkdown, roNimFile}, docgenFindFile, compilerMsgHandler) if conf.configVars.hasKey("doc.googleAnalytics"): @@ -1380,7 +1381,9 @@ proc commandRstAux(cache: IdentCache, conf: ConfigRef; d.isPureRst = true var rst = parseRst(readFile(filen.string), filen.string, line=LineRstInit, column=ColRstInit, - d.hasToc, {roSupportRawDirective, roSupportMarkdown}, conf) + d.hasToc, + {roSupportRawDirective, roSupportMarkdown, roPreferMarkdown}, + conf) var modDesc = newStringOfCap(30_000) renderRstToOut(d[], rst, modDesc) d.modDesc = rope(modDesc) diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim index cb65791ff..6ad466716 100644 --- a/lib/packages/docutils/rst.nim +++ b/lib/packages/docutils/rst.nim @@ -11,9 +11,9 @@ ## rst ## ================================== ## -## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -## Nim-flavored reStructuredText -## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +## Nim-flavored reStructuredText and Markdown +## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## ## This module implements a `reStructuredText`:idx: (RST) parser. ## A large subset is implemented with some limitations_ and @@ -177,6 +177,8 @@ type roSupportRawDirective, ## support the ``raw`` directive (don't support ## it for sandboxing) roSupportMarkdown, ## support additional features of Markdown + roPreferMarkdown, ## parse as Markdown (keeping RST as "extension" + ## to Markdown) -- implies `roSupportMarkdown` roNimFile ## set for Nim files where default interpreted ## text role should be :nim: @@ -277,6 +279,7 @@ type line*, col*, baseIndent*: int skipPounds*: bool adornmentLine*: bool + escapeNext*: bool proc getThing(L: var Lexer, tok: var Token, s: set[char]) = tok.kind = tkWord @@ -314,10 +317,18 @@ proc getPunctAdornment(L: var Lexer, tok: var Token) = tok.col = L.col var pos = L.bufpos let c = L.buf[pos] - while true: + if not L.escapeNext and (c != '\\' or L.adornmentLine): + while true: + tok.symbol.add(L.buf[pos]) + inc pos + if L.buf[pos] != c: break + elif L.escapeNext: tok.symbol.add(L.buf[pos]) inc pos - if L.buf[pos] != c: break + else: # not L.escapeNext and c == '\\' and not L.adornmentLine + tok.symbol.add '\\' + inc pos + L.escapeNext = true inc L.col, pos - L.bufpos L.bufpos = pos if tok.symbol == "\\": tok.kind = tkPunct @@ -429,7 +440,9 @@ proc getTokens(buffer: string, skipPounds: bool, tokens: var TokenSeq): int = while true: inc length setLen(tokens, length) + let toEscape = L.escapeNext rawGetTok(L, tokens[length - 1]) + if toEscape: L.escapeNext = false if tokens[length - 1].kind == tkEof: break if tokens[0].kind == tkWhite: # BUGFIX @@ -981,16 +994,24 @@ proc expect(p: var RstParser, tok: string) = if currentTok(p).symbol == tok: inc p.idx else: rstMessage(p, meExpected, tok) -proc isInlineMarkupEnd(p: RstParser, markup: string): bool = +proc isInlineMarkupEnd(p: RstParser, markup: string, exact: bool): bool = # rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules - result = currentTok(p).symbol == markup + if exact: + result = currentTok(p).symbol == markup + else: + result = currentTok(p).symbol.endsWith markup + if (not result) and markup == "``": + # check that escaping may have splitted `` to 2 tokens ` and ` + result = currentTok(p).symbol == "`" and prevTok(p).symbol == "`" if not result: return # Rule 2: result = prevTok(p).kind notin {tkIndent, tkWhite} if not result: return # Rule 7: result = nextTok(p).kind in {tkIndent, tkWhite, tkEof} or - markup in ["``", "`"] and nextTok(p).kind in {tkIndent, tkWhite, tkWord, tkEof} or + (roPreferMarkdown in p.s.options and + markup in ["``", "`"] and + nextTok(p).kind in {tkIndent, tkWhite, tkWord, tkEof}) or nextTok(p).symbol[0] in {'\'', '\"', ')', ']', '}', '>', '-', '/', '\\', ':', '.', ',', ';', '!', '?', '_'} if not result: return @@ -1130,7 +1151,8 @@ proc toOtherRole(n: PRstNode, kind: RstNodeKind, roleName: string): PRstNode = proc parsePostfix(p: var RstParser, n: PRstNode): PRstNode = var newKind = n.kind var newSons = n.sons - if isInlineMarkupEnd(p, "_") or isInlineMarkupEnd(p, "__"): + if isInlineMarkupEnd(p, "_", exact=true) or + isInlineMarkupEnd(p, "__", exact=true): inc p.idx if p.tok[p.idx-2].symbol == "`" and p.tok[p.idx-3].symbol == ">": var a = newRstNode(rnInner) @@ -1215,7 +1237,7 @@ proc parseWordOrRef(p: var RstParser, father: PRstNode) = inc p.idx while currentTok(p).kind in {tkWord, tkPunct}: if currentTok(p).kind == tkPunct: - if isInlineMarkupEnd(p, "_"): + if isInlineMarkupEnd(p, "_", exact=true): isRef = true break if not validRefnamePunct(currentTok(p).symbol): @@ -1253,7 +1275,15 @@ proc parseUntil(p: var RstParser, father: PRstNode, postfix: string, while true: case currentTok(p).kind of tkPunct: - if isInlineMarkupEnd(p, postfix): + if isInlineMarkupEnd(p, postfix, exact=false): + let l = currentTok(p).symbol.len + if l > postfix.len: + # handle cases like *emphasis with stars****. (It's valid RST!) + father.add newLeaf(currentTok(p).symbol[0 ..< l - postfix.len]) + elif postfix == "``" and currentTok(p).symbol == "`" and + prevTok(p).symbol == "`": + # handle cases like ``literal\`` - delete ` already added after \ + father.sons.setLen(father.sons.len - 1) inc p.idx break else: diff --git a/tests/stdlib/trst.nim b/tests/stdlib/trst.nim index 2398b92a8..fef80dfc7 100644 --- a/tests/stdlib/trst.nim +++ b/tests/stdlib/trst.nim @@ -4,6 +4,10 @@ discard """ [Suite] RST indentation [Suite] RST include directive + +[Suite] RST escaping + +[Suite] RST inline markup ''' """ @@ -267,3 +271,160 @@ And this should **NOT** be visible in `docs.html` """ doAssert "<em>Visible</em>" == rstTohtml(input, {}, defaultConfig()) removeFile("other.rst") + +suite "RST escaping": + test "backspaces": + check("""\ this""".toAst == dedent""" + rnLeaf 'this' + """) + + check("""\\ this""".toAst == dedent""" + rnInner + rnLeaf '\' + rnLeaf ' ' + rnLeaf 'this' + """) + + check("""\\\ this""".toAst == dedent""" + rnInner + rnLeaf '\' + rnLeaf 'this' + """) + + check("""\\\\ this""".toAst == dedent""" + rnInner + rnLeaf '\' + rnLeaf '\' + rnLeaf ' ' + rnLeaf 'this' + """) + +suite "RST inline markup": + test "end-string has repeating symbols": + check("*emphasis content****".toAst == dedent""" + rnEmphasis + rnLeaf 'emphasis' + rnLeaf ' ' + rnLeaf 'content' + rnLeaf '***' + """) + + check("""*emphasis content\****""".toAst == dedent""" + rnEmphasis + rnLeaf 'emphasis' + rnLeaf ' ' + rnLeaf 'content' + rnLeaf '*' + rnLeaf '**' + """) # exact configuration of leafs with * is not really essential, + # only total number of * is essential + + check("**strong content****".toAst == dedent""" + rnStrongEmphasis + rnLeaf 'strong' + rnLeaf ' ' + rnLeaf 'content' + rnLeaf '**' + """) + + check("""**strong content*\****""".toAst == dedent""" + rnStrongEmphasis + rnLeaf 'strong' + rnLeaf ' ' + rnLeaf 'content' + rnLeaf '*' + rnLeaf '*' + rnLeaf '*' + """) + + check("``lit content`````".toAst == dedent""" + rnInlineLiteral + rnLeaf 'lit' + rnLeaf ' ' + rnLeaf 'content' + rnLeaf '```' + """) + + + test """interpreted text can be ended with \` """: + let output = (".. default-role:: literal\n" & """`\``""").toAst + check(output.endsWith """ + rnParagraph + rnInlineLiteral + rnLeaf '`'""" & "\n") + + let output2 = """`\``""".toAst + check(output2 == dedent""" + rnInlineCode + rnDirArg + rnLeaf 'nim' + [nil] + rnLiteralBlock + rnLeaf '`' + """) + + let output3 = """`proc \`+\``""".toAst + check(output3 == dedent""" + rnInlineCode + rnDirArg + rnLeaf 'nim' + [nil] + rnLiteralBlock + rnLeaf 'proc `+`' + """) + + test """inline literals can contain \ anywhere""": + check("""``\``""".toAst == dedent""" + rnInlineLiteral + rnLeaf '\' + """) + + check("""``\\``""".toAst == dedent""" + rnInlineLiteral + rnLeaf '\' + rnLeaf '\' + """) + + check("""``\```""".toAst == dedent""" + rnInlineLiteral + rnLeaf '\' + rnLeaf '`' + """) + + check("""``\\```""".toAst == dedent""" + rnInlineLiteral + rnLeaf '\' + rnLeaf '\' + rnLeaf '`' + """) + + check("""``\````""".toAst == dedent""" + rnInlineLiteral + rnLeaf '\' + rnLeaf '`' + rnLeaf '`' + """) + + test "references with _ at the end": + check(dedent""" + .. _lnk: https + + lnk_""".toAst == + dedent""" + rnHyperlink + rnInner + rnLeaf 'lnk' + rnInner + rnLeaf 'https' + """) + + test "not a hyper link": + check(dedent""" + .. _lnk: https + + lnk___""".toAst == + dedent""" + rnInner + rnLeaf 'lnk' + rnLeaf '___' + """) |