diff options
author | Beshr Kayali <beshrkayali@users.noreply.github.com> | 2021-05-09 20:24:00 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-05-09 19:24:00 +0100 |
commit | f4dd95f3bee14b69caec63c3be984c4a75f43c8a (patch) | |
tree | 3da6d2362e3858ad2b7ad6af56f1d4623b983951 | |
parent | d84a3b10b5540d77a3501b9269dabeaedad542de (diff) | |
download | Nim-f4dd95f3bee14b69caec63c3be984c4a75f43c8a.tar.gz |
Fix parseUri to sanitize urls containing ASCII newline or tab (#17967)
* Fix parseUri to sanitize urls containing ASCII newline or tab * Fix ups based on review Co-authored-by: Timothee Cour <timothee.cour2@gmail.com> * Additional fix ups based on review - Avoid unnecessary `removeUnsafeBytesFromUri` call if parseUri is strict - Move some parseUri tests to uri module test file Co-authored-by: Dominik Picheta <dominikpicheta@googlemail.com> * Update changelog Co-authored-by: Timothee Cour <timothee.cour2@gmail.com> Co-authored-by: Dominik Picheta <dominikpicheta@googlemail.com>
-rw-r--r-- | changelog.md | 1 | ||||
-rw-r--r-- | lib/pure/uri.nim | 32 | ||||
-rw-r--r-- | tests/stdlib/turi.nim | 12 |
3 files changed, 42 insertions, 3 deletions
diff --git a/changelog.md b/changelog.md index 29b3237d0..7732aec30 100644 --- a/changelog.md +++ b/changelog.md @@ -299,6 +299,7 @@ - Added `copyWithin` [for `seq` and `array` for JavaScript targets](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/copyWithin). +- Added optional `strict` argument to `parseUri` of `uri` module to raise a `UriParseError` if input contains newline or tab characters, or [remove them in non-strict case](https://url.spec.whatwg.org/#concept-basic-url-parser). ## Language changes diff --git a/lib/pure/uri.nim b/lib/pure/uri.nim index a828298c2..67d5e5933 100644 --- a/lib/pure/uri.nim +++ b/lib/pure/uri.nim @@ -51,6 +51,8 @@ type UriParseError* = object of ValueError +# https://url.spec.whatwg.org/#concept-basic-url-parser +const unsafeUrlBytesToRemove = {'\t', '\r', '\n'} proc uriParseError*(msg: string) {.noreturn.} = ## Raises a `UriParseError` exception with message `msg`. @@ -261,7 +263,11 @@ func resetUri(uri: var Uri) = else: f = false -func parseUri*(uri: string, result: var Uri) = +func removeUnsafeBytesFromUri(uri: string): string = + for c in uri: + if c notin unsafeUrlBytesToRemove: result.add c + +func parseUri*(uri: string, result: var Uri, strict = true) = ## Parses a URI. The `result` variable will be cleared before. ## ## **See also:** @@ -273,6 +279,26 @@ func parseUri*(uri: string, result: var Uri) = assert res.scheme == "https" assert res.hostname == "nim-lang.org" assert res.path == "/docs/manual.html" + + # Non-strict + res = initUri() + parseUri("https://nim-lang\n.org\t/docs/", res, strict=false) + assert res.scheme == "https" + assert res.hostname == "nim-lang.org" + assert res.path == "/docs/" + + # Strict + res = initUri() + doAssertRaises(UriParseError): + parseUri("https://nim-lang\n.org\t/docs/", res) + + var uri = uri + if strict: + for c in uri: + if c in unsafeUrlBytesToRemove: uriParseError("Invalid uri '$#'" % uri) + else: + uri = removeUnsafeBytesFromUri(uri) + resetUri(result) var i = 0 @@ -309,7 +335,7 @@ func parseUri*(uri: string, result: var Uri) = # Path parsePath(uri, i, result) -func parseUri*(uri: string): Uri = +func parseUri*(uri: string, strict = true): Uri = ## Parses a URI and returns it. ## ## **See also:** @@ -320,7 +346,7 @@ func parseUri*(uri: string): Uri = assert res.password == "Password" assert res.scheme == "ftp" result = initUri() - parseUri(uri, result) + parseUri(uri, result, strict) func removeDotSegments(path: string): string = ## Collapses `..` and `.` in `path` in a similar way as done in `os.normalizedPath` diff --git a/tests/stdlib/turi.nim b/tests/stdlib/turi.nim index a3b6afe2c..26b88a982 100644 --- a/tests/stdlib/turi.nim +++ b/tests/stdlib/turi.nim @@ -141,6 +141,18 @@ template main() = doAssert test.port == "" doAssert test.path == "/foo/bar/baz.txt" + block: # Strict + doAssertRaises(UriParseError): + discard parseUri("https://nim-lang\n.org\t/docs/\nalert('msg\r\n')/?query\n=\tvalue#frag\nment") + + # Non-strict would sanitize newline and tab characters from input + let test = parseUri("https://nim-lang\n.org\t/docs/\nalert('msg\r\n')/?query\n=\tvalue#frag\nment", strict=false) + assert test.scheme == "https" + assert test.hostname == "nim-lang.org" + assert test.path == "/docs/alert('msg')/" + assert test.query == "query=value" + assert test.anchor == "fragment" + block: # combine block: let concat = combine(parseUri("http://google.com/foo/bar/"), parseUri("baz")) |