diff options
-rw-r--r-- | doc/regexprs.txt | 2 | ||||
-rw-r--r-- | lib/impure/db_sqlite.nim | 2 | ||||
-rw-r--r-- | lib/packages/docutils/rst.nim | 145 | ||||
-rw-r--r-- | lib/posix/posix_utils.nim | 2 | ||||
-rw-r--r-- | tests/stdlib/trst.nim | 65 | ||||
-rw-r--r-- | tests/stdlib/trstgen.nim | 12 |
6 files changed, 167 insertions, 61 deletions
diff --git a/doc/regexprs.txt b/doc/regexprs.txt index b7370d858..9ec08b810 100644 --- a/doc/regexprs.txt +++ b/doc/regexprs.txt @@ -146,7 +146,7 @@ character meaning After ``\x``, from zero to two hexadecimal digits are read (letters can be in upper or lower case). In UTF-8 mode, any number of hexadecimal digits may appear between ``\x{`` and ``}``, but the value of the character code must be -less than 2**31 (that is, the maximum hexadecimal value is 7FFFFFFF). If +less than 2^31 (that is, the maximum hexadecimal value is 7FFFFFFF). If characters other than hexadecimal digits appear between ``\x{`` and ``}``, or if there is no terminating ``}``, this form of escape is not recognized. Instead, the initial ``\x`` will be interpreted as a basic hexadecimal escape, diff --git a/lib/impure/db_sqlite.nim b/lib/impure/db_sqlite.nim index 832407960..7bd807a12 100644 --- a/lib/impure/db_sqlite.nim +++ b/lib/impure/db_sqlite.nim @@ -152,7 +152,7 @@ ## Instead, a `seq[string]` is returned for each row. ## ## The reasoning is as follows: -## 1. it's close to what many DBs offer natively (char**) +## 1. it's close to what many DBs offer natively (`char**`:c:) ## 2. it hides the number of types that the DB supports ## (int? int64? decimal up to 10 places? geo coords?) ## 3. it's convenient when all you do is to forward the data to somewhere else (echo, log, put the data into a new query) diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim index da04e9b54..dae692fb7 100644 --- a/lib/packages/docutils/rst.nim +++ b/lib/packages/docutils/rst.nim @@ -130,6 +130,32 @@ ## .. warning:: Using Nim-specific features can cause other RST implementations ## to fail on your document. ## +## Idiosyncrasies +## -------------- +## +## Currently we do **not** aim at 100% Markdown or RST compatibility in inline +## markup recognition rules because that would provide very little user value. +## This parser has 2 modes for inline markup: +## +## 1) Markdown-like mode which is enabled by `roPreferMarkdown` option +## (turned **on** by default). +## +## .. Note:: RST features like directives are still turned **on** +## +## 2) Compatibility mode which is RST rules. +## +## .. Note:: in both modes the parser interpretes text between single +## backticks (code) identically: +## backslash does not escape; the only exception: ``\`` folowed by ` +## does escape so that we can always input a single backtick ` in +## inline code. However that makes impossible to input code with +## ``\`` at the end in *single* backticks, one must use *double* +## backticks:: +## +## `\` -- WRONG +## ``\`` -- GOOD +## So single backticks can always be input: `\`` will turn to ` code +## ## Limitations ## ----------- ## @@ -994,8 +1020,22 @@ proc expect(p: var RstParser, tok: string) = if currentTok(p).symbol == tok: inc p.idx else: rstMessage(p, meExpected, tok) -proc isInlineMarkupEnd(p: RstParser, markup: string, exact: bool): bool = +proc inlineMarkdownEnd(p: RstParser): bool = + result = prevTok(p).kind notin {tkIndent, tkWhite} + ## (For a special case of ` we don't allow spaces surrounding it + ## unlike original Markdown because this behavior confusing/useless) + +proc inlineRstEnd(p: RstParser): bool = # rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules + # Rule 2: + result = prevTok(p).kind notin {tkIndent, tkWhite} + if not result: return + # Rule 7: + result = nextTok(p).kind in {tkIndent, tkWhite, tkEof} or + nextTok(p).symbol[0] in + {'\'', '\"', ')', ']', '}', '>', '-', '/', '\\', ':', '.', ',', ';', '!', '?', '_'} + +proc isInlineMarkupEnd(p: RstParser, markup: string, exact: bool): bool = if exact: result = currentTok(p).symbol == markup else: @@ -1004,55 +1044,58 @@ proc isInlineMarkupEnd(p: RstParser, markup: string, exact: bool): bool = # check that escaping may have splitted `` to 2 tokens ` and ` result = currentTok(p).symbol == "`" and prevTok(p).symbol == "`" if not result: return - # Rule 2: - result = prevTok(p).kind notin {tkIndent, tkWhite} + # surroundings check + if markup in ["_", "__"]: + result = inlineRstEnd(p) + else: + if roPreferMarkdown in p.s.options: result = inlineMarkdownEnd(p) + else: result = inlineRstEnd(p) + +proc rstRuleSurround(p: RstParser): bool = + result = true + # Rules 4 & 5: + if p.idx > 0: + var d: char + var c = prevTok(p).symbol[0] + case c + of '\'', '\"': d = c + of '(': d = ')' + of '[': d = ']' + of '{': d = '}' + of '<': d = '>' + else: d = '\0' + if d != '\0': result = nextTok(p).symbol[0] != d + +proc inlineMarkdownStart(p: RstParser): bool = + result = nextTok(p).kind notin {tkIndent, tkWhite, tkEof} if not result: return - # Rule 7: - result = nextTok(p).kind in {tkIndent, tkWhite, tkEof} or - (roPreferMarkdown in p.s.options and - markup in ["``", "`"] and - nextTok(p).kind in {tkIndent, tkWhite, tkWord, tkEof}) or - nextTok(p).symbol[0] in - {'\'', '\"', ')', ']', '}', '>', '-', '/', '\\', ':', '.', ',', ';', '!', '?', '_'} + # this rst rule is really nice, let us use it in Markdown mode too. + result = rstRuleSurround(p) + +proc inlineRstStart(p: RstParser): bool = + ## rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules + # Rule 6 + result = p.idx == 0 or prevTok(p).kind in {tkIndent, tkWhite} or + prevTok(p).symbol[0] in {'\'', '\"', '(', '[', '{', '<', '-', '/', ':', '_'} if not result: return - # Rule 4: - if p.idx > 0: - # see bug #17260; for now `\` must be written ``\``, likewise with sequences - # ending in an un-escaped `\`; `\\` is legal but not `\\\` for example; - # for this reason we can't use `["``", "`"]` here. - if markup != "``" and prevTok(p).symbol == "\\": - result = false + # Rule 1: + result = nextTok(p).kind notin {tkIndent, tkWhite, tkEof} + if not result: return + result = rstRuleSurround(p) proc isInlineMarkupStart(p: RstParser, markup: string): bool = - # rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules - var d: char if markup != "_`": result = currentTok(p).symbol == markup else: # _` is a 2 token case result = currentTok(p).symbol == "_" and nextTok(p).symbol == "`" if not result: return - # Rule 6: - result = p.idx == 0 or prevTok(p).kind in {tkIndent, tkWhite} or - (markup in ["``", "`"] and prevTok(p).kind in {tkIndent, tkWhite, tkWord}) or - prevTok(p).symbol[0] in {'\'', '\"', '(', '[', '{', '<', '-', '/', ':', '_'} - if not result: return - # Rule 1: - result = nextTok(p).kind notin {tkIndent, tkWhite, tkEof} - if not result: return - # Rules 4 & 5: - if p.idx > 0: - if prevTok(p).symbol == "\\": - result = false - else: - var c = prevTok(p).symbol[0] - case c - of '\'', '\"': d = c - of '(': d = ')' - of '[': d = ']' - of '{': d = '}' - of '<': d = '>' - else: d = '\0' - if d != '\0': result = nextTok(p).symbol[0] != d + # surroundings check + if markup in ["_", "__", "[", "|"]: + # Note: we require space/punctuation even before [markdown link](...) + result = inlineRstStart(p) + else: + if roPreferMarkdown in p.s.options: result = inlineMarkdownStart(p) + else: result = inlineRstStart(p) proc match(p: RstParser, start: int, expr: string): bool = # regular expressions are: @@ -1263,10 +1306,7 @@ proc parseWordOrRef(p: var RstParser, father: PRstNode) = proc parseBackslash(p: var RstParser, father: PRstNode) = assert(currentTok(p).kind == tkPunct) - if currentTok(p).symbol == "\\\\": - father.add newLeaf("\\") - inc p.idx - elif currentTok(p).symbol == "\\": + if currentTok(p).symbol == "\\": # XXX: Unicode? inc p.idx if currentTok(p).kind != tkWhite: father.add(newLeaf(p)) @@ -1297,11 +1337,20 @@ proc parseUntil(p: var RstParser, father: PRstNode, postfix: string, break else: if postfix == "`": - if prevTok(p).symbol == "\\" and currentTok(p).symbol == "`": - father.sons[^1] = newLeaf(p) # instead, we should use lookahead + if currentTok(p).symbol == "\\": + if nextTok(p).symbol == "\\": + father.add newLeaf("\\") + father.add newLeaf("\\") + inc p.idx, 2 + elif nextTok(p).symbol == "`": # escape ` + father.add newLeaf("`") + inc p.idx, 2 + else: + father.add newLeaf("\\") + inc p.idx else: father.add(newLeaf(p)) - inc p.idx + inc p.idx else: if interpretBackslash: parseBackslash(p, father) diff --git a/lib/posix/posix_utils.nim b/lib/posix/posix_utils.nim index aeec73a45..c2d5aab56 100644 --- a/lib/posix/posix_utils.nim +++ b/lib/posix/posix_utils.nim @@ -7,7 +7,7 @@ # ## A set of helpers for the POSIX module. -## Raw interfaces are in the other posix*.nim files. +## Raw interfaces are in the other ``posix*.nim`` files. # Where possible, contribute OS-independent procs in `os <os.html>`_ instead. diff --git a/tests/stdlib/trst.nim b/tests/stdlib/trst.nim index 71f5a858b..ec34edc91 100644 --- a/tests/stdlib/trst.nim +++ b/tests/stdlib/trst.nim @@ -23,7 +23,7 @@ import std/private/miscdollars import os proc toAst(input: string, - rstOptions: RstParseOptions = {roSupportMarkdown, roNimFile}, + rstOptions: RstParseOptions = {roPreferMarkdown, roSupportMarkdown, roNimFile}, error: ref string = nil, warnings: ref seq[string] = nil): string = ## If `error` is nil then no errors should be generated. @@ -36,10 +36,11 @@ proc toAst(input: string, toLocation(message, filename, line, col + ColRstOffset) message.add " $1: $2" % [$mc, a] if mc == mcError: - doAssert error != nil, "unexpected RST error '" & message & "'" + if error == nil: + raise newException(EParseError, "[unexpected error] " & message) error[] = message # we check only first error because subsequent ones may be meaningless - raise newException(EParseError, message) + raise newException(EParseError, "") else: doAssert warnings != nil, "unexpected RST warning '" & message & "'" warnings[].add message @@ -54,8 +55,9 @@ proc toAst(input: string, var rst = rstParse(input, filen, line=LineRstInit, column=ColRstInit, dummyHasToc, rstOptions, myFindFile, testMsgHandler) result = renderRstToStr(rst) - except EParseError: - discard + except EParseError as e: + if e.msg != "": + result = e.msg suite "RST parsing": test "option list has priority over definition list": @@ -326,6 +328,28 @@ suite "RST escaping": """) suite "RST inline markup": + test "* and ** surrounded by spaces are not inline markup": + check("a * b * c ** d ** e".toAst == dedent""" + rnInner + rnLeaf 'a' + rnLeaf ' ' + rnLeaf '*' + rnLeaf ' ' + rnLeaf 'b' + rnLeaf ' ' + rnLeaf '*' + rnLeaf ' ' + rnLeaf 'c' + rnLeaf ' ' + rnLeaf '**' + rnLeaf ' ' + rnLeaf 'd' + rnLeaf ' ' + rnLeaf '**' + rnLeaf ' ' + rnLeaf 'e' + """) + test "end-string has repeating symbols": check("*emphasis content****".toAst == dedent""" rnEmphasis @@ -420,6 +444,37 @@ suite "RST inline markup": rnLeaf 'proc `+`' """) + check("""`\\`""".toAst == + dedent""" + rnInlineCode + rnDirArg + rnLeaf 'nim' + [nil] + rnLiteralBlock + rnLeaf '\\' + """) + + test "Markdown-style code/backtick": + # no whitespace is required before ` + check("`try`...`except`".toAst == + dedent""" + rnInner + rnInlineCode + rnDirArg + rnLeaf 'nim' + [nil] + rnLiteralBlock + rnLeaf 'try' + rnLeaf '...' + rnInlineCode + rnDirArg + rnLeaf 'nim' + [nil] + rnLiteralBlock + rnLeaf 'except' + """) + + test """inline literals can contain \ anywhere""": check("""``\``""".toAst == dedent""" rnInlineLiteral diff --git a/tests/stdlib/trstgen.nim b/tests/stdlib/trstgen.nim index 667fec780..864728686 100644 --- a/tests/stdlib/trstgen.nim +++ b/tests/stdlib/trstgen.nim @@ -10,7 +10,7 @@ import unittest, strutils, strtabs import std/private/miscdollars proc toHtml(input: string, - rstOptions: RstParseOptions = {roSupportMarkdown, roNimFile}, + rstOptions: RstParseOptions = {roPreferMarkdown, roSupportMarkdown, roNimFile}, error: ref string = nil, warnings: ref seq[string] = nil): string = ## If `error` is nil then no errors should be generated. @@ -23,18 +23,20 @@ proc toHtml(input: string, toLocation(message, filename, line, col + ColRstOffset) message.add " $1: $2" % [$mc, a] if mc == mcError: - doAssert error != nil, "unexpected RST error '" & message & "'" + if error == nil: + raise newException(EParseError, "[unexpected error] " & message) error[] = message # we check only first error because subsequent ones may be meaningless - raise newException(EParseError, message) + raise newException(EParseError, "") else: doAssert warnings != nil, "unexpected RST warning '" & message & "'" warnings[].add message try: result = rstToHtml(input, rstOptions, defaultConfig(), msgHandler=testMsgHandler) - except EParseError: - discard + except EParseError as e: + if e.msg != "": + result = e.msg # inline code tags (for parsing originated from highlite.nim) proc id(str: string): string = """<span class="Identifier">""" & str & "</span>" |