summary refs log tree commit diff stats
path: root/lib
diff options
context:
space:
mode:
authorBeshr Kayali <beshrkayali@users.noreply.github.com>2021-05-09 20:24:00 +0200
committerGitHub <noreply@github.com>2021-05-09 19:24:00 +0100
commitf4dd95f3bee14b69caec63c3be984c4a75f43c8a (patch)
tree3da6d2362e3858ad2b7ad6af56f1d4623b983951 /lib
parentd84a3b10b5540d77a3501b9269dabeaedad542de (diff)
downloadNim-f4dd95f3bee14b69caec63c3be984c4a75f43c8a.tar.gz
Fix parseUri to sanitize urls containing ASCII newline or tab (#17967)
* Fix parseUri to sanitize urls containing ASCII newline or tab

* Fix ups based on review

Co-authored-by: Timothee Cour <timothee.cour2@gmail.com>

* Additional fix ups based on review

- Avoid unnecessary `removeUnsafeBytesFromUri` call if parseUri is strict
- Move some parseUri tests to uri module test file

Co-authored-by: Dominik Picheta <dominikpicheta@googlemail.com>

* Update changelog

Co-authored-by: Timothee Cour <timothee.cour2@gmail.com>
Co-authored-by: Dominik Picheta <dominikpicheta@googlemail.com>
Diffstat (limited to 'lib')
-rw-r--r--lib/pure/uri.nim32
1 files changed, 29 insertions, 3 deletions
diff --git a/lib/pure/uri.nim b/lib/pure/uri.nim
index a828298c2..67d5e5933 100644
--- a/lib/pure/uri.nim
+++ b/lib/pure/uri.nim
@@ -51,6 +51,8 @@ type
 
   UriParseError* = object of ValueError
 
+# https://url.spec.whatwg.org/#concept-basic-url-parser
+const unsafeUrlBytesToRemove = {'\t', '\r', '\n'}
 
 proc uriParseError*(msg: string) {.noreturn.} =
   ## Raises a `UriParseError` exception with message `msg`.
@@ -261,7 +263,11 @@ func resetUri(uri: var Uri) =
     else:
       f = false
 
-func parseUri*(uri: string, result: var Uri) =
+func removeUnsafeBytesFromUri(uri: string): string =
+  for c in uri:
+    if c notin unsafeUrlBytesToRemove: result.add c
+
+func parseUri*(uri: string, result: var Uri, strict = true) =
   ## Parses a URI. The `result` variable will be cleared before.
   ##
   ## **See also:**
@@ -273,6 +279,26 @@ func parseUri*(uri: string, result: var Uri) =
     assert res.scheme == "https"
     assert res.hostname == "nim-lang.org"
     assert res.path == "/docs/manual.html"
+
+    # Non-strict
+    res = initUri()
+    parseUri("https://nim-lang\n.org\t/docs/", res, strict=false)
+    assert res.scheme == "https"
+    assert res.hostname == "nim-lang.org"
+    assert res.path == "/docs/"
+
+    # Strict
+    res = initUri()
+    doAssertRaises(UriParseError):
+      parseUri("https://nim-lang\n.org\t/docs/", res)
+
+  var uri = uri
+  if strict:
+    for c in uri:
+      if c in unsafeUrlBytesToRemove: uriParseError("Invalid uri '$#'" % uri)
+  else:
+    uri = removeUnsafeBytesFromUri(uri)
+
   resetUri(result)
 
   var i = 0
@@ -309,7 +335,7 @@ func parseUri*(uri: string, result: var Uri) =
   # Path
   parsePath(uri, i, result)
 
-func parseUri*(uri: string): Uri =
+func parseUri*(uri: string, strict = true): Uri =
   ## Parses a URI and returns it.
   ##
   ## **See also:**
@@ -320,7 +346,7 @@ func parseUri*(uri: string): Uri =
     assert res.password == "Password"
     assert res.scheme == "ftp"
   result = initUri()
-  parseUri(uri, result)
+  parseUri(uri, result, strict)
 
 func removeDotSegments(path: string): string =
   ## Collapses `..` and `.` in `path` in a similar way as done in `os.normalizedPath`