diff options
author | Andrey Makarov <ph.makarov@gmail.com> | 2021-04-29 18:31:15 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-04-29 17:31:15 +0200 |
commit | 16405083485967a395b8d677833bc26040881b21 (patch) | |
tree | a60fa53745e1ac48c2dc189347c1ad470a0d701b /lib | |
parent | e61381a293ab4986427c5ce8968c6c031c83f34f (diff) | |
download | Nim-16405083485967a395b8d677833bc26040881b21.tar.gz |
more strict RST inline markup parsing (#17827)
* more strict RST inline markup parsing * add test for unexpected bonus * introduce `roPreferMarkdown`
Diffstat (limited to 'lib')
-rw-r--r-- | lib/packages/docutils/rst.nim | 52 |
1 files changed, 41 insertions, 11 deletions
diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim index cb65791ff..6ad466716 100644 --- a/lib/packages/docutils/rst.nim +++ b/lib/packages/docutils/rst.nim @@ -11,9 +11,9 @@ ## rst ## ================================== ## -## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -## Nim-flavored reStructuredText -## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +## Nim-flavored reStructuredText and Markdown +## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## ## This module implements a `reStructuredText`:idx: (RST) parser. ## A large subset is implemented with some limitations_ and @@ -177,6 +177,8 @@ type roSupportRawDirective, ## support the ``raw`` directive (don't support ## it for sandboxing) roSupportMarkdown, ## support additional features of Markdown + roPreferMarkdown, ## parse as Markdown (keeping RST as "extension" + ## to Markdown) -- implies `roSupportMarkdown` roNimFile ## set for Nim files where default interpreted ## text role should be :nim: @@ -277,6 +279,7 @@ type line*, col*, baseIndent*: int skipPounds*: bool adornmentLine*: bool + escapeNext*: bool proc getThing(L: var Lexer, tok: var Token, s: set[char]) = tok.kind = tkWord @@ -314,10 +317,18 @@ proc getPunctAdornment(L: var Lexer, tok: var Token) = tok.col = L.col var pos = L.bufpos let c = L.buf[pos] - while true: + if not L.escapeNext and (c != '\\' or L.adornmentLine): + while true: + tok.symbol.add(L.buf[pos]) + inc pos + if L.buf[pos] != c: break + elif L.escapeNext: tok.symbol.add(L.buf[pos]) inc pos - if L.buf[pos] != c: break + else: # not L.escapeNext and c == '\\' and not L.adornmentLine + tok.symbol.add '\\' + inc pos + L.escapeNext = true inc L.col, pos - L.bufpos L.bufpos = pos if tok.symbol == "\\": tok.kind = tkPunct @@ -429,7 +440,9 @@ proc getTokens(buffer: string, skipPounds: bool, tokens: var TokenSeq): int = while true: inc length setLen(tokens, length) + let toEscape = L.escapeNext rawGetTok(L, tokens[length - 1]) + if toEscape: L.escapeNext = false if tokens[length - 1].kind == tkEof: break if tokens[0].kind == tkWhite: # BUGFIX @@ -981,16 +994,24 @@ proc expect(p: var RstParser, tok: string) = if currentTok(p).symbol == tok: inc p.idx else: rstMessage(p, meExpected, tok) -proc isInlineMarkupEnd(p: RstParser, markup: string): bool = +proc isInlineMarkupEnd(p: RstParser, markup: string, exact: bool): bool = # rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules - result = currentTok(p).symbol == markup + if exact: + result = currentTok(p).symbol == markup + else: + result = currentTok(p).symbol.endsWith markup + if (not result) and markup == "``": + # check that escaping may have splitted `` to 2 tokens ` and ` + result = currentTok(p).symbol == "`" and prevTok(p).symbol == "`" if not result: return # Rule 2: result = prevTok(p).kind notin {tkIndent, tkWhite} if not result: return # Rule 7: result = nextTok(p).kind in {tkIndent, tkWhite, tkEof} or - markup in ["``", "`"] and nextTok(p).kind in {tkIndent, tkWhite, tkWord, tkEof} or + (roPreferMarkdown in p.s.options and + markup in ["``", "`"] and + nextTok(p).kind in {tkIndent, tkWhite, tkWord, tkEof}) or nextTok(p).symbol[0] in {'\'', '\"', ')', ']', '}', '>', '-', '/', '\\', ':', '.', ',', ';', '!', '?', '_'} if not result: return @@ -1130,7 +1151,8 @@ proc toOtherRole(n: PRstNode, kind: RstNodeKind, roleName: string): PRstNode = proc parsePostfix(p: var RstParser, n: PRstNode): PRstNode = var newKind = n.kind var newSons = n.sons - if isInlineMarkupEnd(p, "_") or isInlineMarkupEnd(p, "__"): + if isInlineMarkupEnd(p, "_", exact=true) or + isInlineMarkupEnd(p, "__", exact=true): inc p.idx if p.tok[p.idx-2].symbol == "`" and p.tok[p.idx-3].symbol == ">": var a = newRstNode(rnInner) @@ -1215,7 +1237,7 @@ proc parseWordOrRef(p: var RstParser, father: PRstNode) = inc p.idx while currentTok(p).kind in {tkWord, tkPunct}: if currentTok(p).kind == tkPunct: - if isInlineMarkupEnd(p, "_"): + if isInlineMarkupEnd(p, "_", exact=true): isRef = true break if not validRefnamePunct(currentTok(p).symbol): @@ -1253,7 +1275,15 @@ proc parseUntil(p: var RstParser, father: PRstNode, postfix: string, while true: case currentTok(p).kind of tkPunct: - if isInlineMarkupEnd(p, postfix): + if isInlineMarkupEnd(p, postfix, exact=false): + let l = currentTok(p).symbol.len + if l > postfix.len: + # handle cases like *emphasis with stars****. (It's valid RST!) + father.add newLeaf(currentTok(p).symbol[0 ..< l - postfix.len]) + elif postfix == "``" and currentTok(p).symbol == "`" and + prevTok(p).symbol == "`": + # handle cases like ``literal\`` - delete ` already added after \ + father.sons.setLen(father.sons.len - 1) inc p.idx break else: |