diff options
author | Andrey Makarov <ph.makarov@gmail.com> | 2021-05-21 07:54:20 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-05-21 06:54:20 +0200 |
commit | 9f7e2e30573a377770fa630f12a47bac09751282 (patch) | |
tree | 65f2691ac1c438d97a041e77692b211606070206 /lib/packages/docutils | |
parent | 6a5973882bb0c4134a0e454ea4ae69dc54815f27 (diff) | |
download | Nim-9f7e2e30573a377770fa630f12a47bac09751282.tar.gz |
docs: make inline markup more compatible with Markdown (#18053)
fixes https://github.com/timotheecour/Nim/issues/739
Diffstat (limited to 'lib/packages/docutils')
-rw-r--r-- | lib/packages/docutils/rst.nim | 145 |
1 files changed, 97 insertions, 48 deletions
diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim index da04e9b54..dae692fb7 100644 --- a/lib/packages/docutils/rst.nim +++ b/lib/packages/docutils/rst.nim @@ -130,6 +130,32 @@ ## .. warning:: Using Nim-specific features can cause other RST implementations ## to fail on your document. ## +## Idiosyncrasies +## -------------- +## +## Currently we do **not** aim at 100% Markdown or RST compatibility in inline +## markup recognition rules because that would provide very little user value. +## This parser has 2 modes for inline markup: +## +## 1) Markdown-like mode which is enabled by `roPreferMarkdown` option +## (turned **on** by default). +## +## .. Note:: RST features like directives are still turned **on** +## +## 2) Compatibility mode which is RST rules. +## +## .. Note:: in both modes the parser interpretes text between single +## backticks (code) identically: +## backslash does not escape; the only exception: ``\`` folowed by ` +## does escape so that we can always input a single backtick ` in +## inline code. However that makes impossible to input code with +## ``\`` at the end in *single* backticks, one must use *double* +## backticks:: +## +## `\` -- WRONG +## ``\`` -- GOOD +## So single backticks can always be input: `\`` will turn to ` code +## ## Limitations ## ----------- ## @@ -994,8 +1020,22 @@ proc expect(p: var RstParser, tok: string) = if currentTok(p).symbol == tok: inc p.idx else: rstMessage(p, meExpected, tok) -proc isInlineMarkupEnd(p: RstParser, markup: string, exact: bool): bool = +proc inlineMarkdownEnd(p: RstParser): bool = + result = prevTok(p).kind notin {tkIndent, tkWhite} + ## (For a special case of ` we don't allow spaces surrounding it + ## unlike original Markdown because this behavior confusing/useless) + +proc inlineRstEnd(p: RstParser): bool = # rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules + # Rule 2: + result = prevTok(p).kind notin {tkIndent, tkWhite} + if not result: return + # Rule 7: + result = nextTok(p).kind in {tkIndent, tkWhite, tkEof} or + nextTok(p).symbol[0] in + {'\'', '\"', ')', ']', '}', '>', '-', '/', '\\', ':', '.', ',', ';', '!', '?', '_'} + +proc isInlineMarkupEnd(p: RstParser, markup: string, exact: bool): bool = if exact: result = currentTok(p).symbol == markup else: @@ -1004,55 +1044,58 @@ proc isInlineMarkupEnd(p: RstParser, markup: string, exact: bool): bool = # check that escaping may have splitted `` to 2 tokens ` and ` result = currentTok(p).symbol == "`" and prevTok(p).symbol == "`" if not result: return - # Rule 2: - result = prevTok(p).kind notin {tkIndent, tkWhite} + # surroundings check + if markup in ["_", "__"]: + result = inlineRstEnd(p) + else: + if roPreferMarkdown in p.s.options: result = inlineMarkdownEnd(p) + else: result = inlineRstEnd(p) + +proc rstRuleSurround(p: RstParser): bool = + result = true + # Rules 4 & 5: + if p.idx > 0: + var d: char + var c = prevTok(p).symbol[0] + case c + of '\'', '\"': d = c + of '(': d = ')' + of '[': d = ']' + of '{': d = '}' + of '<': d = '>' + else: d = '\0' + if d != '\0': result = nextTok(p).symbol[0] != d + +proc inlineMarkdownStart(p: RstParser): bool = + result = nextTok(p).kind notin {tkIndent, tkWhite, tkEof} if not result: return - # Rule 7: - result = nextTok(p).kind in {tkIndent, tkWhite, tkEof} or - (roPreferMarkdown in p.s.options and - markup in ["``", "`"] and - nextTok(p).kind in {tkIndent, tkWhite, tkWord, tkEof}) or - nextTok(p).symbol[0] in - {'\'', '\"', ')', ']', '}', '>', '-', '/', '\\', ':', '.', ',', ';', '!', '?', '_'} + # this rst rule is really nice, let us use it in Markdown mode too. + result = rstRuleSurround(p) + +proc inlineRstStart(p: RstParser): bool = + ## rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules + # Rule 6 + result = p.idx == 0 or prevTok(p).kind in {tkIndent, tkWhite} or + prevTok(p).symbol[0] in {'\'', '\"', '(', '[', '{', '<', '-', '/', ':', '_'} if not result: return - # Rule 4: - if p.idx > 0: - # see bug #17260; for now `\` must be written ``\``, likewise with sequences - # ending in an un-escaped `\`; `\\` is legal but not `\\\` for example; - # for this reason we can't use `["``", "`"]` here. - if markup != "``" and prevTok(p).symbol == "\\": - result = false + # Rule 1: + result = nextTok(p).kind notin {tkIndent, tkWhite, tkEof} + if not result: return + result = rstRuleSurround(p) proc isInlineMarkupStart(p: RstParser, markup: string): bool = - # rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules - var d: char if markup != "_`": result = currentTok(p).symbol == markup else: # _` is a 2 token case result = currentTok(p).symbol == "_" and nextTok(p).symbol == "`" if not result: return - # Rule 6: - result = p.idx == 0 or prevTok(p).kind in {tkIndent, tkWhite} or - (markup in ["``", "`"] and prevTok(p).kind in {tkIndent, tkWhite, tkWord}) or - prevTok(p).symbol[0] in {'\'', '\"', '(', '[', '{', '<', '-', '/', ':', '_'} - if not result: return - # Rule 1: - result = nextTok(p).kind notin {tkIndent, tkWhite, tkEof} - if not result: return - # Rules 4 & 5: - if p.idx > 0: - if prevTok(p).symbol == "\\": - result = false - else: - var c = prevTok(p).symbol[0] - case c - of '\'', '\"': d = c - of '(': d = ')' - of '[': d = ']' - of '{': d = '}' - of '<': d = '>' - else: d = '\0' - if d != '\0': result = nextTok(p).symbol[0] != d + # surroundings check + if markup in ["_", "__", "[", "|"]: + # Note: we require space/punctuation even before [markdown link](...) + result = inlineRstStart(p) + else: + if roPreferMarkdown in p.s.options: result = inlineMarkdownStart(p) + else: result = inlineRstStart(p) proc match(p: RstParser, start: int, expr: string): bool = # regular expressions are: @@ -1263,10 +1306,7 @@ proc parseWordOrRef(p: var RstParser, father: PRstNode) = proc parseBackslash(p: var RstParser, father: PRstNode) = assert(currentTok(p).kind == tkPunct) - if currentTok(p).symbol == "\\\\": - father.add newLeaf("\\") - inc p.idx - elif currentTok(p).symbol == "\\": + if currentTok(p).symbol == "\\": # XXX: Unicode? inc p.idx if currentTok(p).kind != tkWhite: father.add(newLeaf(p)) @@ -1297,11 +1337,20 @@ proc parseUntil(p: var RstParser, father: PRstNode, postfix: string, break else: if postfix == "`": - if prevTok(p).symbol == "\\" and currentTok(p).symbol == "`": - father.sons[^1] = newLeaf(p) # instead, we should use lookahead + if currentTok(p).symbol == "\\": + if nextTok(p).symbol == "\\": + father.add newLeaf("\\") + father.add newLeaf("\\") + inc p.idx, 2 + elif nextTok(p).symbol == "`": # escape ` + father.add newLeaf("`") + inc p.idx, 2 + else: + father.add newLeaf("\\") + inc p.idx else: father.add(newLeaf(p)) - inc p.idx + inc p.idx else: if interpretBackslash: parseBackslash(p, father) |