summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--doc/regexprs.txt2
-rw-r--r--lib/impure/db_sqlite.nim2
-rw-r--r--lib/packages/docutils/rst.nim145
-rw-r--r--lib/posix/posix_utils.nim2
-rw-r--r--tests/stdlib/trst.nim65
-rw-r--r--tests/stdlib/trstgen.nim12
6 files changed, 167 insertions, 61 deletions
diff --git a/doc/regexprs.txt b/doc/regexprs.txt
index b7370d858..9ec08b810 100644
--- a/doc/regexprs.txt
+++ b/doc/regexprs.txt
@@ -146,7 +146,7 @@ character          meaning
 After ``\x``, from zero to two hexadecimal digits are read (letters can be in
 upper or lower case). In UTF-8 mode, any number of hexadecimal digits may
 appear between ``\x{`` and ``}``, but the value of the character code must be
-less than 2**31 (that is, the maximum hexadecimal value is 7FFFFFFF). If
+less than 2^31 (that is, the maximum hexadecimal value is 7FFFFFFF). If
 characters other than hexadecimal digits appear between ``\x{`` and ``}``, or
 if there is no terminating ``}``, this form of escape is not recognized.
 Instead, the initial ``\x`` will be interpreted as a basic hexadecimal escape,
diff --git a/lib/impure/db_sqlite.nim b/lib/impure/db_sqlite.nim
index 832407960..7bd807a12 100644
--- a/lib/impure/db_sqlite.nim
+++ b/lib/impure/db_sqlite.nim
@@ -152,7 +152,7 @@
 ## Instead, a `seq[string]` is returned for each row.
 ##
 ## The reasoning is as follows:
-## 1. it's close to what many DBs offer natively (char**)
+## 1. it's close to what many DBs offer natively (`char**`:c:)
 ## 2. it hides the number of types that the DB supports
 ##    (int? int64? decimal up to 10 places? geo coords?)
 ## 3. it's convenient when all you do is to forward the data to somewhere else (echo, log, put the data into a new query)
diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim
index da04e9b54..dae692fb7 100644
--- a/lib/packages/docutils/rst.nim
+++ b/lib/packages/docutils/rst.nim
@@ -130,6 +130,32 @@
 ## .. warning:: Using Nim-specific features can cause other RST implementations
 ##   to fail on your document.
 ##
+## Idiosyncrasies
+## --------------
+##
+## Currently we do **not** aim at 100% Markdown or RST compatibility in inline
+## markup recognition rules because that would provide very little user value.
+## This parser has 2 modes for inline markup:
+##
+## 1) Markdown-like mode which is enabled by `roPreferMarkdown` option
+##    (turned **on** by default).
+##
+##    .. Note:: RST features like directives are still turned **on**
+##
+## 2) Compatibility mode which is RST rules.
+##
+## .. Note:: in both modes the parser interpretes text between single
+##    backticks (code) identically:
+##      backslash does not escape; the only exception: ``\`` folowed by `
+##      does escape so that we can always input a single backtick ` in
+##      inline code. However that makes impossible to input code with
+##      ``\`` at the end in *single* backticks, one must use *double*
+##      backticks::
+##
+##        `\`   -- WRONG
+##        ``\`` -- GOOD
+##        So single backticks can always be input: `\`` will turn to ` code
+##
 ## Limitations
 ## -----------
 ##
@@ -994,8 +1020,22 @@ proc expect(p: var RstParser, tok: string) =
   if currentTok(p).symbol == tok: inc p.idx
   else: rstMessage(p, meExpected, tok)
 
-proc isInlineMarkupEnd(p: RstParser, markup: string, exact: bool): bool =
+proc inlineMarkdownEnd(p: RstParser): bool =
+  result = prevTok(p).kind notin {tkIndent, tkWhite}
+  ## (For a special case of ` we don't allow spaces surrounding it
+  ## unlike original Markdown because this behavior confusing/useless)
+
+proc inlineRstEnd(p: RstParser): bool =
   # rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
+  # Rule 2:
+  result = prevTok(p).kind notin {tkIndent, tkWhite}
+  if not result: return
+  # Rule 7:
+  result = nextTok(p).kind in {tkIndent, tkWhite, tkEof} or
+      nextTok(p).symbol[0] in
+      {'\'', '\"', ')', ']', '}', '>', '-', '/', '\\', ':', '.', ',', ';', '!', '?', '_'}
+
+proc isInlineMarkupEnd(p: RstParser, markup: string, exact: bool): bool =
   if exact:
     result = currentTok(p).symbol == markup
   else:
@@ -1004,55 +1044,58 @@ proc isInlineMarkupEnd(p: RstParser, markup: string, exact: bool): bool =
       # check that escaping may have splitted `` to 2 tokens ` and `
       result = currentTok(p).symbol == "`" and prevTok(p).symbol == "`"
   if not result: return
-  # Rule 2:
-  result = prevTok(p).kind notin {tkIndent, tkWhite}
+  # surroundings check
+  if markup in ["_", "__"]:
+    result = inlineRstEnd(p)
+  else:
+    if roPreferMarkdown in p.s.options: result = inlineMarkdownEnd(p)
+    else: result = inlineRstEnd(p)
+
+proc rstRuleSurround(p: RstParser): bool =
+  result = true
+  # Rules 4 & 5:
+  if p.idx > 0:
+    var d: char
+    var c = prevTok(p).symbol[0]
+    case c
+    of '\'', '\"': d = c
+    of '(': d = ')'
+    of '[': d = ']'
+    of '{': d = '}'
+    of '<': d = '>'
+    else: d = '\0'
+    if d != '\0': result = nextTok(p).symbol[0] != d
+
+proc inlineMarkdownStart(p: RstParser): bool =
+  result = nextTok(p).kind notin {tkIndent, tkWhite, tkEof}
   if not result: return
-  # Rule 7:
-  result = nextTok(p).kind in {tkIndent, tkWhite, tkEof} or
-      (roPreferMarkdown in p.s.options and
-        markup in ["``", "`"] and
-        nextTok(p).kind in {tkIndent, tkWhite, tkWord, tkEof}) or
-      nextTok(p).symbol[0] in
-      {'\'', '\"', ')', ']', '}', '>', '-', '/', '\\', ':', '.', ',', ';', '!', '?', '_'}
+  # this rst rule is really nice, let us use it in Markdown mode too.
+  result = rstRuleSurround(p)
+
+proc inlineRstStart(p: RstParser): bool =
+  ## rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
+  # Rule 6
+  result = p.idx == 0 or prevTok(p).kind in {tkIndent, tkWhite} or
+      prevTok(p).symbol[0] in {'\'', '\"', '(', '[', '{', '<', '-', '/', ':', '_'}
   if not result: return
-  # Rule 4:
-  if p.idx > 0:
-    # see bug #17260; for now `\` must be written ``\``, likewise with sequences
-    # ending in an un-escaped `\`; `\\` is legal but not `\\\` for example;
-    # for this reason we can't use `["``", "`"]` here.
-    if markup != "``" and prevTok(p).symbol == "\\":
-      result = false
+  # Rule 1:
+  result = nextTok(p).kind notin {tkIndent, tkWhite, tkEof}
+  if not result: return
+  result = rstRuleSurround(p)
 
 proc isInlineMarkupStart(p: RstParser, markup: string): bool =
-  # rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
-  var d: char
   if markup != "_`":
     result = currentTok(p).symbol == markup
   else:  # _` is a 2 token case
     result = currentTok(p).symbol == "_" and nextTok(p).symbol == "`"
   if not result: return
-  # Rule 6:
-  result = p.idx == 0 or prevTok(p).kind in {tkIndent, tkWhite} or
-      (markup in ["``", "`"] and prevTok(p).kind in {tkIndent, tkWhite, tkWord}) or
-      prevTok(p).symbol[0] in {'\'', '\"', '(', '[', '{', '<', '-', '/', ':', '_'}
-  if not result: return
-  # Rule 1:
-  result = nextTok(p).kind notin {tkIndent, tkWhite, tkEof}
-  if not result: return
-  # Rules 4 & 5:
-  if p.idx > 0:
-    if prevTok(p).symbol == "\\":
-      result = false
-    else:
-      var c = prevTok(p).symbol[0]
-      case c
-      of '\'', '\"': d = c
-      of '(': d = ')'
-      of '[': d = ']'
-      of '{': d = '}'
-      of '<': d = '>'
-      else: d = '\0'
-      if d != '\0': result = nextTok(p).symbol[0] != d
+  # surroundings check
+  if markup in ["_", "__", "[", "|"]:
+    # Note: we require space/punctuation even before [markdown link](...)
+    result = inlineRstStart(p)
+  else:
+    if roPreferMarkdown in p.s.options: result = inlineMarkdownStart(p)
+    else: result = inlineRstStart(p)
 
 proc match(p: RstParser, start: int, expr: string): bool =
   # regular expressions are:
@@ -1263,10 +1306,7 @@ proc parseWordOrRef(p: var RstParser, father: PRstNode) =
 
 proc parseBackslash(p: var RstParser, father: PRstNode) =
   assert(currentTok(p).kind == tkPunct)
-  if currentTok(p).symbol == "\\\\":
-    father.add newLeaf("\\")
-    inc p.idx
-  elif currentTok(p).symbol == "\\":
+  if currentTok(p).symbol == "\\":
     # XXX: Unicode?
     inc p.idx
     if currentTok(p).kind != tkWhite: father.add(newLeaf(p))
@@ -1297,11 +1337,20 @@ proc parseUntil(p: var RstParser, father: PRstNode, postfix: string,
         break
       else:
         if postfix == "`":
-          if prevTok(p).symbol == "\\" and currentTok(p).symbol == "`":
-            father.sons[^1] = newLeaf(p) # instead, we should use lookahead
+          if currentTok(p).symbol == "\\":
+            if nextTok(p).symbol == "\\":
+              father.add newLeaf("\\")
+              father.add newLeaf("\\")
+              inc p.idx, 2
+            elif nextTok(p).symbol == "`":  # escape `
+              father.add newLeaf("`")
+              inc p.idx, 2
+            else:
+              father.add newLeaf("\\")
+              inc p.idx
           else:
             father.add(newLeaf(p))
-          inc p.idx
+            inc p.idx
         else:
           if interpretBackslash:
             parseBackslash(p, father)
diff --git a/lib/posix/posix_utils.nim b/lib/posix/posix_utils.nim
index aeec73a45..c2d5aab56 100644
--- a/lib/posix/posix_utils.nim
+++ b/lib/posix/posix_utils.nim
@@ -7,7 +7,7 @@
 #
 
 ## A set of helpers for the POSIX module.
-## Raw interfaces are in the other posix*.nim files.
+## Raw interfaces are in the other ``posix*.nim`` files.
 
 # Where possible, contribute OS-independent procs in `os <os.html>`_ instead.
 
diff --git a/tests/stdlib/trst.nim b/tests/stdlib/trst.nim
index 71f5a858b..ec34edc91 100644
--- a/tests/stdlib/trst.nim
+++ b/tests/stdlib/trst.nim
@@ -23,7 +23,7 @@ import std/private/miscdollars
 import os
 
 proc toAst(input: string,
-            rstOptions: RstParseOptions = {roSupportMarkdown, roNimFile},
+            rstOptions: RstParseOptions = {roPreferMarkdown, roSupportMarkdown, roNimFile},
             error: ref string = nil,
             warnings: ref seq[string] = nil): string =
   ## If `error` is nil then no errors should be generated.
@@ -36,10 +36,11 @@ proc toAst(input: string,
     toLocation(message, filename, line, col + ColRstOffset)
     message.add " $1: $2" % [$mc, a]
     if mc == mcError:
-      doAssert error != nil, "unexpected RST error '" & message & "'"
+      if error == nil:
+        raise newException(EParseError, "[unexpected error] " & message)
       error[] = message
       # we check only first error because subsequent ones may be meaningless
-      raise newException(EParseError, message)
+      raise newException(EParseError, "")
     else:
       doAssert warnings != nil, "unexpected RST warning '" & message & "'"
       warnings[].add message
@@ -54,8 +55,9 @@ proc toAst(input: string,
     var rst = rstParse(input, filen, line=LineRstInit, column=ColRstInit,
                        dummyHasToc, rstOptions, myFindFile, testMsgHandler)
     result = renderRstToStr(rst)
-  except EParseError:
-    discard
+  except EParseError as e:
+    if e.msg != "":
+      result = e.msg
 
 suite "RST parsing":
   test "option list has priority over definition list":
@@ -326,6 +328,28 @@ suite "RST escaping":
       """)
 
 suite "RST inline markup":
+  test "* and ** surrounded by spaces are not inline markup":
+    check("a * b * c ** d ** e".toAst == dedent"""
+      rnInner
+        rnLeaf  'a'
+        rnLeaf  ' '
+        rnLeaf  '*'
+        rnLeaf  ' '
+        rnLeaf  'b'
+        rnLeaf  ' '
+        rnLeaf  '*'
+        rnLeaf  ' '
+        rnLeaf  'c'
+        rnLeaf  ' '
+        rnLeaf  '**'
+        rnLeaf  ' '
+        rnLeaf  'd'
+        rnLeaf  ' '
+        rnLeaf  '**'
+        rnLeaf  ' '
+        rnLeaf  'e'
+      """)
+
   test "end-string has repeating symbols":
     check("*emphasis content****".toAst == dedent"""
       rnEmphasis
@@ -420,6 +444,37 @@ suite "RST inline markup":
           rnLeaf  'proc `+`'
       """)
 
+    check("""`\\`""".toAst ==
+      dedent"""
+        rnInlineCode
+          rnDirArg
+            rnLeaf  'nim'
+          [nil]
+          rnLiteralBlock
+            rnLeaf  '\\'
+        """)
+
+  test "Markdown-style code/backtick":
+    # no whitespace is required before `
+    check("`try`...`except`".toAst ==
+      dedent"""
+        rnInner
+          rnInlineCode
+            rnDirArg
+              rnLeaf  'nim'
+            [nil]
+            rnLiteralBlock
+              rnLeaf  'try'
+          rnLeaf  '...'
+          rnInlineCode
+            rnDirArg
+              rnLeaf  'nim'
+            [nil]
+            rnLiteralBlock
+              rnLeaf  'except'
+        """)
+
+
   test """inline literals can contain \ anywhere""":
     check("""``\``""".toAst == dedent"""
       rnInlineLiteral
diff --git a/tests/stdlib/trstgen.nim b/tests/stdlib/trstgen.nim
index 667fec780..864728686 100644
--- a/tests/stdlib/trstgen.nim
+++ b/tests/stdlib/trstgen.nim
@@ -10,7 +10,7 @@ import unittest, strutils, strtabs
 import std/private/miscdollars
 
 proc toHtml(input: string,
-            rstOptions: RstParseOptions = {roSupportMarkdown, roNimFile},
+            rstOptions: RstParseOptions = {roPreferMarkdown, roSupportMarkdown, roNimFile},
             error: ref string = nil,
             warnings: ref seq[string] = nil): string =
   ## If `error` is nil then no errors should be generated.
@@ -23,18 +23,20 @@ proc toHtml(input: string,
     toLocation(message, filename, line, col + ColRstOffset)
     message.add " $1: $2" % [$mc, a]
     if mc == mcError:
-      doAssert error != nil, "unexpected RST error '" & message & "'"
+      if error == nil:
+        raise newException(EParseError, "[unexpected error] " & message)
       error[] = message
       # we check only first error because subsequent ones may be meaningless
-      raise newException(EParseError, message)
+      raise newException(EParseError, "")
     else:
       doAssert warnings != nil, "unexpected RST warning '" & message & "'"
       warnings[].add message
   try:
     result = rstToHtml(input, rstOptions, defaultConfig(),
                        msgHandler=testMsgHandler)
-  except EParseError:
-    discard
+  except EParseError as e:
+    if e.msg != "":
+      result = e.msg
 
 # inline code tags (for parsing originated from highlite.nim)
 proc id(str: string): string = """<span class="Identifier">"""  & str & "</span>"