summary refs log tree commit diff stats
path: root/lib/packages/docutils
diff options
context:
space:
mode:
authorAndrey Makarov <ph.makarov@gmail.com>2021-05-21 07:54:20 +0300
committerGitHub <noreply@github.com>2021-05-21 06:54:20 +0200
commit9f7e2e30573a377770fa630f12a47bac09751282 (patch)
tree65f2691ac1c438d97a041e77692b211606070206 /lib/packages/docutils
parent6a5973882bb0c4134a0e454ea4ae69dc54815f27 (diff)
downloadNim-9f7e2e30573a377770fa630f12a47bac09751282.tar.gz
docs: make inline markup more compatible with Markdown (#18053)
fixes https://github.com/timotheecour/Nim/issues/739
Diffstat (limited to 'lib/packages/docutils')
-rw-r--r--lib/packages/docutils/rst.nim145
1 files changed, 97 insertions, 48 deletions
diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim
index da04e9b54..dae692fb7 100644
--- a/lib/packages/docutils/rst.nim
+++ b/lib/packages/docutils/rst.nim
@@ -130,6 +130,32 @@
 ## .. warning:: Using Nim-specific features can cause other RST implementations
 ##   to fail on your document.
 ##
+## Idiosyncrasies
+## --------------
+##
+## Currently we do **not** aim at 100% Markdown or RST compatibility in inline
+## markup recognition rules because that would provide very little user value.
+## This parser has 2 modes for inline markup:
+##
+## 1) Markdown-like mode which is enabled by `roPreferMarkdown` option
+##    (turned **on** by default).
+##
+##    .. Note:: RST features like directives are still turned **on**
+##
+## 2) Compatibility mode which is RST rules.
+##
+## .. Note:: in both modes the parser interpretes text between single
+##    backticks (code) identically:
+##      backslash does not escape; the only exception: ``\`` folowed by `
+##      does escape so that we can always input a single backtick ` in
+##      inline code. However that makes impossible to input code with
+##      ``\`` at the end in *single* backticks, one must use *double*
+##      backticks::
+##
+##        `\`   -- WRONG
+##        ``\`` -- GOOD
+##        So single backticks can always be input: `\`` will turn to ` code
+##
 ## Limitations
 ## -----------
 ##
@@ -994,8 +1020,22 @@ proc expect(p: var RstParser, tok: string) =
   if currentTok(p).symbol == tok: inc p.idx
   else: rstMessage(p, meExpected, tok)
 
-proc isInlineMarkupEnd(p: RstParser, markup: string, exact: bool): bool =
+proc inlineMarkdownEnd(p: RstParser): bool =
+  result = prevTok(p).kind notin {tkIndent, tkWhite}
+  ## (For a special case of ` we don't allow spaces surrounding it
+  ## unlike original Markdown because this behavior confusing/useless)
+
+proc inlineRstEnd(p: RstParser): bool =
   # rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
+  # Rule 2:
+  result = prevTok(p).kind notin {tkIndent, tkWhite}
+  if not result: return
+  # Rule 7:
+  result = nextTok(p).kind in {tkIndent, tkWhite, tkEof} or
+      nextTok(p).symbol[0] in
+      {'\'', '\"', ')', ']', '}', '>', '-', '/', '\\', ':', '.', ',', ';', '!', '?', '_'}
+
+proc isInlineMarkupEnd(p: RstParser, markup: string, exact: bool): bool =
   if exact:
     result = currentTok(p).symbol == markup
   else:
@@ -1004,55 +1044,58 @@ proc isInlineMarkupEnd(p: RstParser, markup: string, exact: bool): bool =
       # check that escaping may have splitted `` to 2 tokens ` and `
       result = currentTok(p).symbol == "`" and prevTok(p).symbol == "`"
   if not result: return
-  # Rule 2:
-  result = prevTok(p).kind notin {tkIndent, tkWhite}
+  # surroundings check
+  if markup in ["_", "__"]:
+    result = inlineRstEnd(p)
+  else:
+    if roPreferMarkdown in p.s.options: result = inlineMarkdownEnd(p)
+    else: result = inlineRstEnd(p)
+
+proc rstRuleSurround(p: RstParser): bool =
+  result = true
+  # Rules 4 & 5:
+  if p.idx > 0:
+    var d: char
+    var c = prevTok(p).symbol[0]
+    case c
+    of '\'', '\"': d = c
+    of '(': d = ')'
+    of '[': d = ']'
+    of '{': d = '}'
+    of '<': d = '>'
+    else: d = '\0'
+    if d != '\0': result = nextTok(p).symbol[0] != d
+
+proc inlineMarkdownStart(p: RstParser): bool =
+  result = nextTok(p).kind notin {tkIndent, tkWhite, tkEof}
   if not result: return
-  # Rule 7:
-  result = nextTok(p).kind in {tkIndent, tkWhite, tkEof} or
-      (roPreferMarkdown in p.s.options and
-        markup in ["``", "`"] and
-        nextTok(p).kind in {tkIndent, tkWhite, tkWord, tkEof}) or
-      nextTok(p).symbol[0] in
-      {'\'', '\"', ')', ']', '}', '>', '-', '/', '\\', ':', '.', ',', ';', '!', '?', '_'}
+  # this rst rule is really nice, let us use it in Markdown mode too.
+  result = rstRuleSurround(p)
+
+proc inlineRstStart(p: RstParser): bool =
+  ## rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
+  # Rule 6
+  result = p.idx == 0 or prevTok(p).kind in {tkIndent, tkWhite} or
+      prevTok(p).symbol[0] in {'\'', '\"', '(', '[', '{', '<', '-', '/', ':', '_'}
   if not result: return
-  # Rule 4:
-  if p.idx > 0:
-    # see bug #17260; for now `\` must be written ``\``, likewise with sequences
-    # ending in an un-escaped `\`; `\\` is legal but not `\\\` for example;
-    # for this reason we can't use `["``", "`"]` here.
-    if markup != "``" and prevTok(p).symbol == "\\":
-      result = false
+  # Rule 1:
+  result = nextTok(p).kind notin {tkIndent, tkWhite, tkEof}
+  if not result: return
+  result = rstRuleSurround(p)
 
 proc isInlineMarkupStart(p: RstParser, markup: string): bool =
-  # rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
-  var d: char
   if markup != "_`":
     result = currentTok(p).symbol == markup
   else:  # _` is a 2 token case
     result = currentTok(p).symbol == "_" and nextTok(p).symbol == "`"
   if not result: return
-  # Rule 6:
-  result = p.idx == 0 or prevTok(p).kind in {tkIndent, tkWhite} or
-      (markup in ["``", "`"] and prevTok(p).kind in {tkIndent, tkWhite, tkWord}) or
-      prevTok(p).symbol[0] in {'\'', '\"', '(', '[', '{', '<', '-', '/', ':', '_'}
-  if not result: return
-  # Rule 1:
-  result = nextTok(p).kind notin {tkIndent, tkWhite, tkEof}
-  if not result: return
-  # Rules 4 & 5:
-  if p.idx > 0:
-    if prevTok(p).symbol == "\\":
-      result = false
-    else:
-      var c = prevTok(p).symbol[0]
-      case c
-      of '\'', '\"': d = c
-      of '(': d = ')'
-      of '[': d = ']'
-      of '{': d = '}'
-      of '<': d = '>'
-      else: d = '\0'
-      if d != '\0': result = nextTok(p).symbol[0] != d
+  # surroundings check
+  if markup in ["_", "__", "[", "|"]:
+    # Note: we require space/punctuation even before [markdown link](...)
+    result = inlineRstStart(p)
+  else:
+    if roPreferMarkdown in p.s.options: result = inlineMarkdownStart(p)
+    else: result = inlineRstStart(p)
 
 proc match(p: RstParser, start: int, expr: string): bool =
   # regular expressions are:
@@ -1263,10 +1306,7 @@ proc parseWordOrRef(p: var RstParser, father: PRstNode) =
 
 proc parseBackslash(p: var RstParser, father: PRstNode) =
   assert(currentTok(p).kind == tkPunct)
-  if currentTok(p).symbol == "\\\\":
-    father.add newLeaf("\\")
-    inc p.idx
-  elif currentTok(p).symbol == "\\":
+  if currentTok(p).symbol == "\\":
     # XXX: Unicode?
     inc p.idx
     if currentTok(p).kind != tkWhite: father.add(newLeaf(p))
@@ -1297,11 +1337,20 @@ proc parseUntil(p: var RstParser, father: PRstNode, postfix: string,
         break
       else:
         if postfix == "`":
-          if prevTok(p).symbol == "\\" and currentTok(p).symbol == "`":
-            father.sons[^1] = newLeaf(p) # instead, we should use lookahead
+          if currentTok(p).symbol == "\\":
+            if nextTok(p).symbol == "\\":
+              father.add newLeaf("\\")
+              father.add newLeaf("\\")
+              inc p.idx, 2
+            elif nextTok(p).symbol == "`":  # escape `
+              father.add newLeaf("`")
+              inc p.idx, 2
+            else:
+              father.add newLeaf("\\")
+              inc p.idx
           else:
             father.add(newLeaf(p))
-          inc p.idx
+            inc p.idx
         else:
           if interpretBackslash:
             parseBackslash(p, father)