summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--compiler/docgen.nim7
-rw-r--r--lib/packages/docutils/rst.nim52
-rw-r--r--tests/stdlib/trst.nim161
3 files changed, 207 insertions, 13 deletions
diff --git a/compiler/docgen.nim b/compiler/docgen.nim
index 977fcf8ef..78fb88f9b 100644
--- a/compiler/docgen.nim
+++ b/compiler/docgen.nim
@@ -178,7 +178,8 @@ proc newDocumentor*(filename: AbsoluteFile; cache: IdentCache; conf: ConfigRef,
   result.outDir = conf.outDir.string
   initRstGenerator(result[], (if conf.cmd != cmdRst2tex: outHtml else: outLatex),
                    conf.configVars, filename.string,
-                   {roSupportRawDirective, roSupportMarkdown, roNimFile},
+                   {roSupportRawDirective, roSupportMarkdown,
+                    roPreferMarkdown, roNimFile},
                    docgenFindFile, compilerMsgHandler)
 
   if conf.configVars.hasKey("doc.googleAnalytics"):
@@ -1380,7 +1381,9 @@ proc commandRstAux(cache: IdentCache, conf: ConfigRef;
   d.isPureRst = true
   var rst = parseRst(readFile(filen.string), filen.string,
                      line=LineRstInit, column=ColRstInit,
-                     d.hasToc, {roSupportRawDirective, roSupportMarkdown}, conf)
+                     d.hasToc,
+                     {roSupportRawDirective, roSupportMarkdown, roPreferMarkdown},
+                     conf)
   var modDesc = newStringOfCap(30_000)
   renderRstToOut(d[], rst, modDesc)
   d.modDesc = rope(modDesc)
diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim
index cb65791ff..6ad466716 100644
--- a/lib/packages/docutils/rst.nim
+++ b/lib/packages/docutils/rst.nim
@@ -11,9 +11,9 @@
 ##                rst
 ## ==================================
 ##
-## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-## Nim-flavored reStructuredText
-## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+## Nim-flavored reStructuredText and Markdown
+## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 ##
 ## This module implements a `reStructuredText`:idx: (RST) parser.
 ## A large subset is implemented with some limitations_ and
@@ -177,6 +177,8 @@ type
     roSupportRawDirective,    ## support the ``raw`` directive (don't support
                               ## it for sandboxing)
     roSupportMarkdown,        ## support additional features of Markdown
+    roPreferMarkdown,         ## parse as Markdown (keeping RST as "extension"
+                              ## to Markdown) -- implies `roSupportMarkdown`
     roNimFile                 ## set for Nim files where default interpreted
                               ## text role should be :nim:
 
@@ -277,6 +279,7 @@ type
     line*, col*, baseIndent*: int
     skipPounds*: bool
     adornmentLine*: bool
+    escapeNext*: bool
 
 proc getThing(L: var Lexer, tok: var Token, s: set[char]) =
   tok.kind = tkWord
@@ -314,10 +317,18 @@ proc getPunctAdornment(L: var Lexer, tok: var Token) =
   tok.col = L.col
   var pos = L.bufpos
   let c = L.buf[pos]
-  while true:
+  if not L.escapeNext and (c != '\\' or L.adornmentLine):
+    while true:
+      tok.symbol.add(L.buf[pos])
+      inc pos
+      if L.buf[pos] != c: break
+  elif L.escapeNext:
     tok.symbol.add(L.buf[pos])
     inc pos
-    if L.buf[pos] != c: break
+  else:  # not L.escapeNext and c == '\\' and not L.adornmentLine
+    tok.symbol.add '\\'
+    inc pos
+    L.escapeNext = true
   inc L.col, pos - L.bufpos
   L.bufpos = pos
   if tok.symbol == "\\": tok.kind = tkPunct
@@ -429,7 +440,9 @@ proc getTokens(buffer: string, skipPounds: bool, tokens: var TokenSeq): int =
   while true:
     inc length
     setLen(tokens, length)
+    let toEscape = L.escapeNext
     rawGetTok(L, tokens[length - 1])
+    if toEscape: L.escapeNext = false
     if tokens[length - 1].kind == tkEof: break
   if tokens[0].kind == tkWhite:
     # BUGFIX
@@ -981,16 +994,24 @@ proc expect(p: var RstParser, tok: string) =
   if currentTok(p).symbol == tok: inc p.idx
   else: rstMessage(p, meExpected, tok)
 
-proc isInlineMarkupEnd(p: RstParser, markup: string): bool =
+proc isInlineMarkupEnd(p: RstParser, markup: string, exact: bool): bool =
   # rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
-  result = currentTok(p).symbol == markup
+  if exact:
+    result = currentTok(p).symbol == markup
+  else:
+    result = currentTok(p).symbol.endsWith markup
+    if (not result) and markup == "``":
+      # check that escaping may have splitted `` to 2 tokens ` and `
+      result = currentTok(p).symbol == "`" and prevTok(p).symbol == "`"
   if not result: return
   # Rule 2:
   result = prevTok(p).kind notin {tkIndent, tkWhite}
   if not result: return
   # Rule 7:
   result = nextTok(p).kind in {tkIndent, tkWhite, tkEof} or
-      markup in ["``", "`"] and nextTok(p).kind in {tkIndent, tkWhite, tkWord, tkEof} or
+      (roPreferMarkdown in p.s.options and
+        markup in ["``", "`"] and
+        nextTok(p).kind in {tkIndent, tkWhite, tkWord, tkEof}) or
       nextTok(p).symbol[0] in
       {'\'', '\"', ')', ']', '}', '>', '-', '/', '\\', ':', '.', ',', ';', '!', '?', '_'}
   if not result: return
@@ -1130,7 +1151,8 @@ proc toOtherRole(n: PRstNode, kind: RstNodeKind, roleName: string): PRstNode =
 proc parsePostfix(p: var RstParser, n: PRstNode): PRstNode =
   var newKind = n.kind
   var newSons = n.sons
-  if isInlineMarkupEnd(p, "_") or isInlineMarkupEnd(p, "__"):
+  if isInlineMarkupEnd(p, "_", exact=true) or
+      isInlineMarkupEnd(p, "__", exact=true):
     inc p.idx
     if p.tok[p.idx-2].symbol == "`" and p.tok[p.idx-3].symbol == ">":
       var a = newRstNode(rnInner)
@@ -1215,7 +1237,7 @@ proc parseWordOrRef(p: var RstParser, father: PRstNode) =
     inc p.idx
     while currentTok(p).kind in {tkWord, tkPunct}:
       if currentTok(p).kind == tkPunct:
-        if isInlineMarkupEnd(p, "_"):
+        if isInlineMarkupEnd(p, "_", exact=true):
           isRef = true
           break
         if not validRefnamePunct(currentTok(p).symbol):
@@ -1253,7 +1275,15 @@ proc parseUntil(p: var RstParser, father: PRstNode, postfix: string,
   while true:
     case currentTok(p).kind
     of tkPunct:
-      if isInlineMarkupEnd(p, postfix):
+      if isInlineMarkupEnd(p, postfix, exact=false):
+        let l = currentTok(p).symbol.len
+        if l > postfix.len:
+          # handle cases like *emphasis with stars****. (It's valid RST!)
+          father.add newLeaf(currentTok(p).symbol[0 ..< l - postfix.len])
+        elif postfix == "``" and currentTok(p).symbol == "`" and
+            prevTok(p).symbol == "`":
+          # handle cases like ``literal\`` - delete ` already added after \
+          father.sons.setLen(father.sons.len - 1)
         inc p.idx
         break
       else:
diff --git a/tests/stdlib/trst.nim b/tests/stdlib/trst.nim
index 2398b92a8..fef80dfc7 100644
--- a/tests/stdlib/trst.nim
+++ b/tests/stdlib/trst.nim
@@ -4,6 +4,10 @@ discard """
 [Suite] RST indentation
 
 [Suite] RST include directive
+
+[Suite] RST escaping
+
+[Suite] RST inline markup
 '''
 """
 
@@ -267,3 +271,160 @@ And this should **NOT** be visible in `docs.html`
 """
     doAssert "<em>Visible</em>" == rstTohtml(input, {}, defaultConfig())
     removeFile("other.rst")
+
+suite "RST escaping":
+  test "backspaces":
+    check("""\ this""".toAst == dedent"""
+      rnLeaf  'this'
+      """)
+
+    check("""\\ this""".toAst == dedent"""
+      rnInner
+        rnLeaf  '\'
+        rnLeaf  ' '
+        rnLeaf  'this'
+      """)
+
+    check("""\\\ this""".toAst == dedent"""
+      rnInner
+        rnLeaf  '\'
+        rnLeaf  'this'
+      """)
+
+    check("""\\\\ this""".toAst == dedent"""
+      rnInner
+        rnLeaf  '\'
+        rnLeaf  '\'
+        rnLeaf  ' '
+        rnLeaf  'this'
+      """)
+
+suite "RST inline markup":
+  test "end-string has repeating symbols":
+    check("*emphasis content****".toAst == dedent"""
+      rnEmphasis
+        rnLeaf  'emphasis'
+        rnLeaf  ' '
+        rnLeaf  'content'
+        rnLeaf  '***'
+      """)
+
+    check("""*emphasis content\****""".toAst == dedent"""
+      rnEmphasis
+        rnLeaf  'emphasis'
+        rnLeaf  ' '
+        rnLeaf  'content'
+        rnLeaf  '*'
+        rnLeaf  '**'
+      """)  # exact configuration of leafs with * is not really essential,
+            # only total number of * is essential
+
+    check("**strong content****".toAst == dedent"""
+      rnStrongEmphasis
+        rnLeaf  'strong'
+        rnLeaf  ' '
+        rnLeaf  'content'
+        rnLeaf  '**'
+      """)
+
+    check("""**strong content*\****""".toAst == dedent"""
+      rnStrongEmphasis
+        rnLeaf  'strong'
+        rnLeaf  ' '
+        rnLeaf  'content'
+        rnLeaf  '*'
+        rnLeaf  '*'
+        rnLeaf  '*'
+      """)
+
+    check("``lit content`````".toAst == dedent"""
+      rnInlineLiteral
+        rnLeaf  'lit'
+        rnLeaf  ' '
+        rnLeaf  'content'
+        rnLeaf  '```'
+      """)
+
+
+  test """interpreted text can be ended with \` """:
+    let output = (".. default-role:: literal\n" & """`\``""").toAst
+    check(output.endsWith """
+  rnParagraph
+    rnInlineLiteral
+      rnLeaf  '`'""" & "\n")
+
+    let output2 = """`\``""".toAst
+    check(output2 == dedent"""
+      rnInlineCode
+        rnDirArg
+          rnLeaf  'nim'
+        [nil]
+        rnLiteralBlock
+          rnLeaf  '`'
+      """)
+
+    let output3 = """`proc \`+\``""".toAst
+    check(output3 == dedent"""
+      rnInlineCode
+        rnDirArg
+          rnLeaf  'nim'
+        [nil]
+        rnLiteralBlock
+          rnLeaf  'proc `+`'
+      """)
+
+  test """inline literals can contain \ anywhere""":
+    check("""``\``""".toAst == dedent"""
+      rnInlineLiteral
+        rnLeaf  '\'
+      """)
+
+    check("""``\\``""".toAst == dedent"""
+      rnInlineLiteral
+        rnLeaf  '\'
+        rnLeaf  '\'
+      """)
+
+    check("""``\```""".toAst == dedent"""
+      rnInlineLiteral
+        rnLeaf  '\'
+        rnLeaf  '`'
+      """)
+
+    check("""``\\```""".toAst == dedent"""
+      rnInlineLiteral
+        rnLeaf  '\'
+        rnLeaf  '\'
+        rnLeaf  '`'
+      """)
+
+    check("""``\````""".toAst == dedent"""
+      rnInlineLiteral
+        rnLeaf  '\'
+        rnLeaf  '`'
+        rnLeaf  '`'
+      """)
+
+  test "references with _ at the end":
+    check(dedent"""
+      .. _lnk: https
+
+      lnk_""".toAst ==
+      dedent"""
+        rnHyperlink
+          rnInner
+            rnLeaf  'lnk'
+          rnInner
+            rnLeaf  'https'
+      """)
+
+  test "not a hyper link":
+    check(dedent"""
+      .. _lnk: https
+
+      lnk___""".toAst ==
+      dedent"""
+        rnInner
+          rnLeaf  'lnk'
+          rnLeaf  '___'
+      """)