summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--changelog.md1
-rw-r--r--lib/pure/uri.nim32
-rw-r--r--tests/stdlib/turi.nim12
3 files changed, 42 insertions, 3 deletions
diff --git a/changelog.md b/changelog.md
index 29b3237d0..7732aec30 100644
--- a/changelog.md
+++ b/changelog.md
@@ -299,6 +299,7 @@
 
 - Added `copyWithin` [for `seq` and `array` for JavaScript targets](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/copyWithin).
 
+- Added optional `strict` argument to `parseUri` of `uri` module to raise a `UriParseError` if input contains newline or tab characters, or [remove them in non-strict case](https://url.spec.whatwg.org/#concept-basic-url-parser).
 
 ## Language changes
 
diff --git a/lib/pure/uri.nim b/lib/pure/uri.nim
index a828298c2..67d5e5933 100644
--- a/lib/pure/uri.nim
+++ b/lib/pure/uri.nim
@@ -51,6 +51,8 @@ type
 
   UriParseError* = object of ValueError
 
+# https://url.spec.whatwg.org/#concept-basic-url-parser
+const unsafeUrlBytesToRemove = {'\t', '\r', '\n'}
 
 proc uriParseError*(msg: string) {.noreturn.} =
   ## Raises a `UriParseError` exception with message `msg`.
@@ -261,7 +263,11 @@ func resetUri(uri: var Uri) =
     else:
       f = false
 
-func parseUri*(uri: string, result: var Uri) =
+func removeUnsafeBytesFromUri(uri: string): string =
+  for c in uri:
+    if c notin unsafeUrlBytesToRemove: result.add c
+
+func parseUri*(uri: string, result: var Uri, strict = true) =
   ## Parses a URI. The `result` variable will be cleared before.
   ##
   ## **See also:**
@@ -273,6 +279,26 @@ func parseUri*(uri: string, result: var Uri) =
     assert res.scheme == "https"
     assert res.hostname == "nim-lang.org"
     assert res.path == "/docs/manual.html"
+
+    # Non-strict
+    res = initUri()
+    parseUri("https://nim-lang\n.org\t/docs/", res, strict=false)
+    assert res.scheme == "https"
+    assert res.hostname == "nim-lang.org"
+    assert res.path == "/docs/"
+
+    # Strict
+    res = initUri()
+    doAssertRaises(UriParseError):
+      parseUri("https://nim-lang\n.org\t/docs/", res)
+
+  var uri = uri
+  if strict:
+    for c in uri:
+      if c in unsafeUrlBytesToRemove: uriParseError("Invalid uri '$#'" % uri)
+  else:
+    uri = removeUnsafeBytesFromUri(uri)
+
   resetUri(result)
 
   var i = 0
@@ -309,7 +335,7 @@ func parseUri*(uri: string, result: var Uri) =
   # Path
   parsePath(uri, i, result)
 
-func parseUri*(uri: string): Uri =
+func parseUri*(uri: string, strict = true): Uri =
   ## Parses a URI and returns it.
   ##
   ## **See also:**
@@ -320,7 +346,7 @@ func parseUri*(uri: string): Uri =
     assert res.password == "Password"
     assert res.scheme == "ftp"
   result = initUri()
-  parseUri(uri, result)
+  parseUri(uri, result, strict)
 
 func removeDotSegments(path: string): string =
   ## Collapses `..` and `.` in `path` in a similar way as done in `os.normalizedPath`
diff --git a/tests/stdlib/turi.nim b/tests/stdlib/turi.nim
index a3b6afe2c..26b88a982 100644
--- a/tests/stdlib/turi.nim
+++ b/tests/stdlib/turi.nim
@@ -141,6 +141,18 @@ template main() =
       doAssert test.port == ""
       doAssert test.path == "/foo/bar/baz.txt"
 
+    block: # Strict
+      doAssertRaises(UriParseError):
+        discard parseUri("https://nim-lang\n.org\t/docs/\nalert('msg\r\n')/?query\n=\tvalue#frag\nment")
+
+      # Non-strict would sanitize newline and tab characters from input
+      let test = parseUri("https://nim-lang\n.org\t/docs/\nalert('msg\r\n')/?query\n=\tvalue#frag\nment", strict=false)
+      assert test.scheme == "https"
+      assert test.hostname == "nim-lang.org"
+      assert test.path == "/docs/alert('msg')/"
+      assert test.query == "query=value"
+      assert test.anchor == "fragment"
+
   block: # combine
     block:
       let concat = combine(parseUri("http://google.com/foo/bar/"), parseUri("baz"))