summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorAndrey Makarov <ph.makarov@gmail.com>2022-02-08 02:11:53 +0300
committerGitHub <noreply@github.com>2022-02-07 18:11:53 -0500
commit801c0f0369779a47da2ef6618abea11feb390973 (patch)
treeb765a1ad4f68048de51505c8f404448c623d17d3
parent4b0636fba018df407237811f9e687ed4fc249c10 (diff)
downloadNim-801c0f0369779a47da2ef6618abea11feb390973.tar.gz
Fix bug 27 of #17340 (#19433)
Fixes silent disappearance of Markdown (pseudo-)link when it's detected as
unsafe protocol. Now it will be converted to plain text in spirit of
[the specification](https://spec.commonmark.org/0.30/#links).
For that sake the check for protocol is added to rst.nim also.
-rw-r--r--lib/packages/docutils/rst.nim66
-rw-r--r--lib/packages/docutils/rstgen.nim17
-rw-r--r--tests/stdlib/trst.nim27
-rw-r--r--tests/stdlib/trstgen.nim11
4 files changed, 81 insertions, 40 deletions
diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim
index 93c2231dc..d4bfae48f 100644
--- a/lib/packages/docutils/rst.nim
+++ b/lib/packages/docutils/rst.nim
@@ -222,7 +222,7 @@
 
 import
   os, strutils, rstast, dochelpers, std/enumutils, algorithm, lists, sequtils,
-  std/private/miscdollars, tables
+  std/private/miscdollars, tables, strscans
 from highlite import SourceLanguage, getSourceLanguage
 
 type
@@ -1347,7 +1347,20 @@ proc match(p: RstParser, start: int, expr: string): bool =
     inc i
   result = true
 
-proc fixupEmbeddedRef(n, a, b: PRstNode) =
+proc safeProtocol*(linkStr: var string): string =
+  # Returns link's protocol and, if it's not safe, clears `linkStr`
+  result = ""
+  if scanf(linkStr, "$w:", result):
+    # if it has a protocol at all, ensure that it's not 'javascript:' or worse:
+    if cmpIgnoreCase(result, "http") == 0 or
+        cmpIgnoreCase(result, "https") == 0 or
+        cmpIgnoreCase(result, "ftp") == 0:
+      discard "it's fine"
+    else:
+      linkStr = ""
+
+proc fixupEmbeddedRef(p: var RstParser, n, a, b: PRstNode): bool =
+  # Returns `true` if the link belongs to an allowed protocol
   var sep = - 1
   for i in countdown(n.len - 2, 0):
     if n.sons[i].text == "<":
@@ -1355,7 +1368,15 @@ proc fixupEmbeddedRef(n, a, b: PRstNode) =
       break
   var incr = if sep > 0 and n.sons[sep - 1].text[0] == ' ': 2 else: 1
   for i in countup(0, sep - incr): a.add(n.sons[i])
-  for i in countup(sep + 1, n.len - 2): b.add(n.sons[i])
+  var linkStr = ""
+  for i in countup(sep + 1, n.len - 2): linkStr.add(n.sons[i].addNodes)
+  if linkStr != "":
+    let protocol = safeProtocol(linkStr)
+    result = linkStr != ""
+    if not result:
+      rstMessage(p, mwBrokenLink, protocol,
+                 p.tok[p.idx-3].line, p.tok[p.idx-3].col)
+  b.add newLeaf(linkStr)
 
 proc whichRole(p: RstParser, sym: string): RstNodeKind =
   result = whichRoleAux(sym)
@@ -1407,14 +1428,17 @@ proc parsePostfix(p: var RstParser, n: PRstNode): PRstNode =
     if p.tok[p.idx-2].symbol == "`" and p.tok[p.idx-3].symbol == ">":
       var a = newRstNode(rnInner)
       var b = newRstNode(rnInner)
-      fixupEmbeddedRef(n, a, b)
-      if a.len == 0:
-        newKind = rnStandaloneHyperlink
-        newSons = @[b]
-      else:
-        newKind = rnHyperlink
-        newSons = @[a, b]
-        setRef(p, rstnodeToRefname(a), b, implicitHyperlinkAlias)
+      if fixupEmbeddedRef(p, n, a, b):
+        if a.len == 0:  # e.g. `<a_named_relative_link>`_
+          newKind = rnStandaloneHyperlink
+          newSons = @[b]
+        else:  # e.g. `link title <http://site>`_
+          newKind = rnHyperlink
+          newSons = @[a, b]
+          setRef(p, rstnodeToRefname(a), b, implicitHyperlinkAlias)
+      else:  # include as plain text, not a link
+        newKind = rnInner
+        newSons = n.sons
       result = newRstNode(newKind, newSons)
     else:  # some link that will be resolved in `resolveSubs`
       newKind = rnRef
@@ -1623,7 +1647,6 @@ proc parseMarkdownCodeblock(p: var RstParser): PRstNode =
   result.add(lb)
 
 proc parseMarkdownLink(p: var RstParser; father: PRstNode): bool =
-  result = true
   var desc, link = ""
   var i = p.idx
 
@@ -1642,14 +1665,21 @@ proc parseMarkdownLink(p: var RstParser; father: PRstNode): bool =
 
   parse("]", desc)
   if p.tok[i].symbol != "(": return false
+  let linkIdx = i + 1
   parse(")", link)
-  let child = newRstNode(rnHyperlink)
-  child.add desc
-  child.add link
   # only commit if we detected no syntax error:
-  father.add child
-  p.idx = i
-  result = true
+  let protocol = safeProtocol(link)
+  if link == "":
+    result = false
+    rstMessage(p, mwBrokenLink, protocol,
+               p.tok[linkIdx].line, p.tok[linkIdx].col)
+  else:
+    let child = newRstNode(rnHyperlink)
+    child.add desc
+    child.add link
+    father.add child
+    p.idx = i
+    result = true
 
 proc getFootnoteType(label: PRstNode): (FootnoteType, int) =
   if label.sons.len >= 1 and label.sons[0].kind == rnLeaf and
diff --git a/lib/packages/docutils/rstgen.nim b/lib/packages/docutils/rstgen.nim
index d2180cb91..d662a667c 100644
--- a/lib/packages/docutils/rstgen.nim
+++ b/lib/packages/docutils/rstgen.nim
@@ -40,7 +40,7 @@
 ##   can be done by simply searching for [footnoteName].
 
 import strutils, os, hashes, strtabs, rstast, rst, highlite, tables, sequtils,
-  algorithm, parseutils, std/strbasics, strscans
+  algorithm, parseutils, std/strbasics
 
 import ../../std/private/since
 
@@ -826,17 +826,6 @@ proc renderOverline(d: PDoc, n: PRstNode, result: var string) =
                    "\\rstov$4[$5]{$3}$2\n", [$n.level,
         rstnodeToRefname(n).idS, tmp, $chr(n.level - 1 + ord('A')), tocName])
 
-
-proc safeProtocol(linkStr: var string) =
-  var protocol = ""
-  if scanf(linkStr, "$w:", protocol):
-    # if it has a protocol at all, ensure that it's not 'javascript:' or worse:
-    if cmpIgnoreCase(protocol, "http") == 0 or cmpIgnoreCase(protocol, "https") == 0 or
-        cmpIgnoreCase(protocol, "ftp") == 0:
-      discard "it's fine"
-    else:
-      linkStr = ""
-
 proc renderTocEntry(d: PDoc, e: TocEntry, result: var string) =
   dispA(d.target, result,
     "<li><a class=\"reference\" id=\"$1_toc\" href=\"#$1\">$2</a></li>\n",
@@ -901,7 +890,7 @@ proc renderImage(d: PDoc, n: PRstNode, result: var string) =
 
   # support for `:target:` links for images:
   var target = esc(d.target, getFieldValue(n, "target").strip(), escMode=emUrl)
-  safeProtocol(target)
+  discard safeProtocol(target)
 
   if target.len > 0:
     # `htmlOut` needs to be of the following format for link to work for images:
@@ -1205,7 +1194,7 @@ proc renderHyperlink(d: PDoc, text, link: PRstNode, result: var string,
     d.escMode = emUrl
     renderRstToOut(d, link, linkStr)
     d.escMode = mode
-  safeProtocol(linkStr)
+  discard safeProtocol(linkStr)
   var textStr = ""
   renderRstToOut(d, text, textStr)
   let nimDocStr = if nimdoc: " nimdoc" else: ""
diff --git a/tests/stdlib/trst.nim b/tests/stdlib/trst.nim
index 771e02477..8b98d1403 100644
--- a/tests/stdlib/trst.nim
+++ b/tests/stdlib/trst.nim
@@ -843,12 +843,7 @@ suite "Warnings":
               rnInner
                 rnLeaf  'foo'
               rnInner
-                rnLeaf  '#'
-                rnLeaf  'foo'
-                rnLeaf  ','
-                rnLeaf  'string'
-                rnLeaf  ','
-                rnLeaf  'string'
+                rnLeaf  '#foo,string,string'
           rnParagraph  anchor='foo'
             rnLeaf  'Paragraph'
             rnLeaf  '.'
@@ -1256,3 +1251,23 @@ suite "RST inline markup":
           rnLeaf  'my {link example'
           rnLeaf  'http://example.com/bracket_(symbol_[)'
       """)
+
+  test "not a Markdown link":
+    # bug #17340 (27) `f` will be considered as a protocol and blocked as unsafe
+    var warnings = new seq[string]
+    check("[T](f: var Foo)".toAst(warnings = warnings) ==
+      dedent"""
+        rnInner
+          rnLeaf  '['
+          rnLeaf  'T'
+          rnLeaf  ']'
+          rnLeaf  '('
+          rnLeaf  'f'
+          rnLeaf  ':'
+          rnLeaf  ' '
+          rnLeaf  'var'
+          rnLeaf  ' '
+          rnLeaf  'Foo'
+          rnLeaf  ')'
+      """)
+    check(warnings[] == @["input(1, 5) Warning: broken link 'f'"])
diff --git a/tests/stdlib/trstgen.nim b/tests/stdlib/trstgen.nim
index 995c0a151..2c28fe506 100644
--- a/tests/stdlib/trstgen.nim
+++ b/tests/stdlib/trstgen.nim
@@ -1593,8 +1593,15 @@ suite "invalid targets":
   test "invalid links":
     check("(([Nim](https://nim-lang.org/)))".toHtml ==
         """((<a class="reference external" href="https://nim-lang.org/">Nim</a>))""")
-    check("(([Nim](javascript://nim-lang.org/)))".toHtml ==
-        """((<a class="reference external" href="">Nim</a>))""")
+    # unknown protocol is treated just like plain text, not a link
+    var warnings = new seq[string]
+    check("(([Nim](javascript://nim-lang.org/)))".toHtml(warnings=warnings) ==
+        """(([Nim](javascript://nim-lang.org/)))""")
+    check(warnings[] == @["input(1, 9) Warning: broken link 'javascript'"])
+    warnings[].setLen 0
+    check("`Nim <javascript://nim-lang.org/>`_".toHtml(warnings=warnings) ==
+      """Nim &lt;javascript://nim-lang.org/&gt;""")
+    check(warnings[] == @["input(1, 33) Warning: broken link 'javascript'"])
 
 suite "local file inclusion":
   test "cannot include files in sandboxed mode":