summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--lib/packages/docutils/rst.nim28
-rw-r--r--tests/stdlib/trst.nim50
2 files changed, 67 insertions, 11 deletions
diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim
index 223e57bc4..249c310b9 100644
--- a/lib/packages/docutils/rst.nim
+++ b/lib/packages/docutils/rst.nim
@@ -1213,23 +1213,29 @@ proc isUrl(p: RstParser, i: int): bool =
     p.tok[i+3].kind == tkWord and
     p.tok[i].symbol in ["http", "https", "ftp", "telnet", "file"]
 
+proc parseUrl(p: var RstParser): PRstNode =
+  ## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#standalone-hyperlinks
+  result = newRstNode(rnStandaloneHyperlink)
+  var lastIdx = p.idx
+  while p.tok[lastIdx].kind in {tkWord, tkPunct, tkOther}:
+    inc lastIdx
+  dec lastIdx
+  # standalone URL can not end with punctuation in RST
+  while lastIdx >= p.idx and p.tok[lastIdx].kind == tkPunct and
+      p.tok[lastIdx].symbol != "/":
+    dec lastIdx
+  var s = ""
+  for i in p.idx .. lastIdx: s.add p.tok[i].symbol
+  result.add s
+  p.idx = lastIdx + 1
+
 proc parseWordOrRef(p: var RstParser, father: PRstNode) =
   ## Parses a normal word or may be a reference or URL.
   if nextTok(p).kind != tkPunct:  # <- main path, a normal word
     father.add newLeaf(p)
     inc p.idx
   elif isUrl(p, p.idx):           # URL http://something
-    var n = newRstNode(rnStandaloneHyperlink)
-    while true:
-      case currentTok(p).kind
-      of tkWord, tkAdornment, tkOther: discard
-      of tkPunct:
-        if nextTok(p).kind notin {tkWord, tkAdornment, tkOther, tkPunct}:
-          break
-      else: break
-      n.add(newLeaf(p))
-      inc p.idx
-    father.add(n)
+    father.add parseUrl(p)
   else:
     # check for reference (probably, long one like some.ref.with.dots_ )
     var saveIdx = p.idx
diff --git a/tests/stdlib/trst.nim b/tests/stdlib/trst.nim
index fef80dfc7..905e5143a 100644
--- a/tests/stdlib/trst.nim
+++ b/tests/stdlib/trst.nim
@@ -428,3 +428,53 @@ suite "RST inline markup":
           rnLeaf  'lnk'
           rnLeaf  '___'
       """)
+
+  test "no punctuation in the end of a standalone URI is allowed":
+    check(dedent"""
+        [see (http://no.org)], end""".toAst ==
+      dedent"""
+        rnInner
+          rnLeaf  '['
+          rnLeaf  'see'
+          rnLeaf  ' '
+          rnLeaf  '('
+          rnStandaloneHyperlink
+            rnLeaf  'http://no.org'
+          rnLeaf  ')'
+          rnLeaf  ']'
+          rnLeaf  ','
+          rnLeaf  ' '
+          rnLeaf  'end'
+        """)
+
+    # but `/` at the end is OK
+    check(
+      dedent"""
+        See http://no.org/ end""".toAst ==
+      dedent"""
+        rnInner
+          rnLeaf  'See'
+          rnLeaf  ' '
+          rnStandaloneHyperlink
+            rnLeaf  'http://no.org/'
+          rnLeaf  ' '
+          rnLeaf  'end'
+        """)
+
+    # a more complex URL with some made-up ending '&='.
+    # Github Markdown would include final &= and
+    # so would rst2html.py in contradiction with RST spec.
+    check(
+      dedent"""
+        See https://www.google.com/url?sa=t&source=web&cd=&cad=rja&url=https%3A%2F%2Fnim-lang.github.io%2FNim%2Frst.html%23features&usg=AO&= end""".toAst ==
+      dedent"""
+        rnInner
+          rnLeaf  'See'
+          rnLeaf  ' '
+          rnStandaloneHyperlink
+            rnLeaf  'https://www.google.com/url?sa=t&source=web&cd=&cad=rja&url=https%3A%2F%2Fnim-lang.github.io%2FNim%2Frst.html%23features&usg=AO'
+          rnLeaf  '&'
+          rnLeaf  '='
+          rnLeaf  ' '
+          rnLeaf  'end'
+        """)