diff options
Diffstat (limited to 'lib/pure/uri.nim')
-rw-r--r-- | lib/pure/uri.nim | 270 |
1 files changed, 147 insertions, 123 deletions
diff --git a/lib/pure/uri.nim b/lib/pure/uri.nim index 7f553be1a..725d5bbd9 100644 --- a/lib/pure/uri.nim +++ b/lib/pure/uri.nim @@ -14,40 +14,35 @@ ## as a locator, a name, or both. The term "Uniform Resource Locator" ## (URL) refers to the subset of URIs. ## -## Basic usage -## =========== +## .. warning:: URI parsers in this module do not perform security validation. ## -## Combine URIs -## ------------- -## .. code-block:: -## import uri -## let host = parseUri("https://nim-lang.org") -## let blog = "/blog.html" -## let bloguri = host / blog -## assert $host == "https://nim-lang.org" -## assert $bloguri == "https://nim-lang.org/blog.html" -## -## Access URI item -## --------------- -## .. code-block:: -## import uri -## let res = parseUri("sftp://127.0.0.1:4343") -## if isAbsolute(res): -## assert res.port == "4343" -## else: -## echo "Wrong format" -## -## Data URI Base64 -## --------------- -## -## .. code-block::nim -## doAssert getDataUri("Hello World", "text/plain") == "data:text/plain;charset=utf-8;base64,SGVsbG8gV29ybGQ=" -## doAssert getDataUri("Nim", "text/plain") == "data:text/plain;charset=utf-8;base64,Tmlt" +## # Basic usage + + +## ## Combine URIs +runnableExamples: + let host = parseUri("https://nim-lang.org") + assert $host == "https://nim-lang.org" + assert $(host / "/blog.html") == "https://nim-lang.org/blog.html" + assert $(host / "blog2.html") == "https://nim-lang.org/blog2.html" + +## ## Access URI item +runnableExamples: + let res = parseUri("sftp://127.0.0.1:4343") + assert isAbsolute(res) + assert res.port == "4343" -import std/private/since +## ## Data URI Base64 +runnableExamples: + assert getDataUri("Hello World", "text/plain") == "data:text/plain;charset=utf-8;base64,SGVsbG8gV29ybGQ=" + assert getDataUri("Nim", "text/plain") == "data:text/plain;charset=utf-8;base64,Tmlt" -import strutils, parseutils, base64 -import std/private/decode_helpers + +import std/[strutils, parseutils, base64] +import std/private/[since, decode_helpers] + +when defined(nimPreviewSlimSystem): + import std/assertions type @@ -57,26 +52,26 @@ type scheme*, username*, password*: string hostname*, port*, path*, query*, anchor*: string opaque*: bool - isIpv6: bool # not expose it for compatibility. + isIpv6*: bool UriParseError* = object of ValueError proc uriParseError*(msg: string) {.noreturn.} = - ## Raises a ``UriParseError`` exception with message `msg`. + ## Raises a `UriParseError` exception with message `msg`. raise newException(UriParseError, msg) func encodeUrl*(s: string, usePlus = true): string = ## Encodes a URL according to RFC3986. ## ## This means that characters in the set - ## ``{'a'..'z', 'A'..'Z', '0'..'9', '-', '.', '_', '~'}`` are + ## `{'a'..'z', 'A'..'Z', '0'..'9', '-', '.', '_', '~'}` are ## carried over to the result. - ## All other characters are encoded as ``%xx`` where ``xx`` + ## All other characters are encoded as `%xx` where `xx` ## denotes its hexadecimal value. ## - ## As a special rule, when the value of ``usePlus`` is true, - ## spaces are encoded as ``+`` instead of ``%20``. + ## As a special rule, when the value of `usePlus` is true, + ## spaces are encoded as `+` instead of `%20`. ## ## **See also:** ## * `decodeUrl func<#decodeUrl,string>`_ @@ -98,12 +93,12 @@ func encodeUrl*(s: string, usePlus = true): string = func decodeUrl*(s: string, decodePlus = true): string = ## Decodes a URL according to RFC3986. ## - ## This means that any ``%xx`` (where ``xx`` denotes a hexadecimal - ## value) are converted to the character with ordinal number ``xx``, + ## This means that any `%xx` (where `xx` denotes a hexadecimal + ## value) are converted to the character with ordinal number `xx`, ## and every other character is carried over. - ## If ``xx`` is not a valid hexadecimal value, it is left intact. + ## If `xx` is not a valid hexadecimal value, it is left intact. ## - ## As a special rule, when the value of ``decodePlus`` is true, ``+`` + ## As a special rule, when the value of `decodePlus` is true, `+` ## characters are converted to a space. ## ## **See also:** @@ -133,15 +128,15 @@ func decodeUrl*(s: string, decodePlus = true): string = setLen(result, j) func encodeQuery*(query: openArray[(string, string)], usePlus = true, - omitEq = true): string = + omitEq = true, sep = '&'): string = ## Encodes a set of (key, value) parameters into a URL query string. ## - ## Every (key, value) pair is URL-encoded and written as ``key=value``. If the - ## value is an empty string then the ``=`` is omitted, unless ``omitEq`` is + ## Every (key, value) pair is URL-encoded and written as `key=value`. If the + ## value is an empty string then the `=` is omitted, unless `omitEq` is ## false. - ## The pairs are joined together by a ``&`` character. + ## The pairs are joined together by the `sep` character. ## - ## The ``usePlus`` parameter is passed down to the `encodeUrl` function that + ## The `usePlus` parameter is passed down to the `encodeUrl` function that ## is used for the URL encoding of the string values. ## ## **See also:** @@ -150,9 +145,10 @@ func encodeQuery*(query: openArray[(string, string)], usePlus = true, assert encodeQuery({: }) == "" assert encodeQuery({"a": "1", "b": "2"}) == "a=1&b=2" assert encodeQuery({"a": "1", "b": ""}) == "a=1&b" + assert encodeQuery({"a": "1", "b": ""}, omitEq = false, sep = ';') == "a=1;b=" for elem in query: - # Encode the `key = value` pairs and separate them with a '&' - if result.len > 0: result.add('&') + # Encode the `key = value` pairs and separate them with 'sep' + if result.len > 0: result.add(sep) let (key, val) = elem result.add(encodeUrl(key, usePlus)) # Omit the '=' if the value string is empty @@ -160,23 +156,28 @@ func encodeQuery*(query: openArray[(string, string)], usePlus = true, result.add('=') result.add(encodeUrl(val, usePlus)) -iterator decodeQuery*(data: string): tuple[key, value: TaintedString] = - ## Reads and decodes query string ``data`` and yields the (key, value) pairs the - ## data consists of. +iterator decodeQuery*(data: string, sep = '&'): tuple[key, value: string] = + ## Reads and decodes the query string `data` and yields the `(key, value)` pairs + ## the data consists of. If compiled with `-d:nimLegacyParseQueryStrict`, + ## a `UriParseError` is raised when there is an unencoded `=` character in a decoded + ## value, which was the behavior in Nim < 1.5.1. runnableExamples: - import std/sugar - let s = collect(newSeq): - for k, v in decodeQuery("foo=1&bar=2"): (k, v) - doAssert s == @[("foo", "1"), ("bar", "2")] + import std/sequtils + assert toSeq(decodeQuery("foo=1&bar=2=3")) == @[("foo", "1"), ("bar", "2=3")] + assert toSeq(decodeQuery("foo=1;bar=2=3", ';')) == @[("foo", "1"), ("bar", "2=3")] + assert toSeq(decodeQuery("&a&=b&=&&")) == @[("", ""), ("a", ""), ("", "b"), ("", ""), ("", "")] - proc parseData(data: string, i: int, field: var string): int = + proc parseData(data: string, i: int, field: var string, sep: char): int = result = i while result < data.len: - case data[result] + let c = data[result] + case c of '%': add(field, decodePercent(data, result)) of '+': add(field, ' ') - of '=', '&': break - else: add(field, data[result]) + of '&': break + else: + if c == sep: break + else: add(field, data[result]) inc(result) var i = 0 @@ -185,16 +186,20 @@ iterator decodeQuery*(data: string): tuple[key, value: TaintedString] = # decode everything in one pass: while i < data.len: setLen(name, 0) # reuse memory - i = parseData(data, i, name) + i = parseData(data, i, name, '=') setLen(value, 0) # reuse memory if i < data.len and data[i] == '=': inc(i) # skip '=' - i = parseData(data, i, value) - yield (name.TaintedString, value.TaintedString) - if i < data.len: - if data[i] == '&': inc(i) + when defined(nimLegacyParseQueryStrict): + i = parseData(data, i, value, '=') else: - uriParseError("'&' expected at index '$#' for '$#'" % [$i, data]) + i = parseData(data, i, value, sep) + yield (name, value) + if i < data.len: + when defined(nimLegacyParseQueryStrict): + if data[i] != '&': + uriParseError("'&' expected at index '$#' for '$#'" % [$i, data]) + inc(i) func parseAuthority(authority: string, result: var Uri) = var i = 0 @@ -226,7 +231,6 @@ func parseAuthority(authority: string, result: var Uri) = i.inc func parsePath(uri: string, i: var int, result: var Uri) = - i.inc parseUntil(uri, result.path, {'?', '#'}, i) # The 'mailto' scheme's PATH actually contains the hostname/username @@ -242,21 +246,9 @@ func parsePath(uri: string, i: var int, result: var Uri) = i.inc # Skip '#' i.inc parseUntil(uri, result.anchor, {}, i) -func initUri*(): Uri = - ## Initializes a URI with ``scheme``, ``username``, ``password``, - ## ``hostname``, ``port``, ``path``, ``query`` and ``anchor``. - ## - ## **See also:** - ## * `Uri type <#Uri>`_ for available fields in the URI type - runnableExamples: - var uri2: Uri - assert initUri() == uri2 - result = Uri(scheme: "", username: "", password: "", hostname: "", port: "", - path: "", query: "", anchor: "") - -func initUri*(isIpv6: bool): Uri {.since: (1, 3, 5).} = - ## Initializes a URI with ``scheme``, ``username``, ``password``, - ## ``hostname``, ``port``, ``path``, ``query``, ``anchor`` and ``isIpv6``. +func initUri*(isIpv6 = false): Uri = + ## Initializes a URI with `scheme`, `username`, `password`, + ## `hostname`, `port`, `path`, `query`, `anchor` and `isIpv6`. ## ## **See also:** ## * `Uri type <#Uri>`_ for available fields in the URI type @@ -293,9 +285,9 @@ func parseUri*(uri: string, result: var Uri) = var i = 0 # Check if this is a reference URI (relative URI) - let doubleSlash = uri.len > 1 and uri[1] == '/' + let doubleSlash = uri.len > 1 and uri[0] == '/' and uri[1] == '/' if i < uri.len and uri[i] == '/': - # Make sure ``uri`` doesn't begin with '//'. + # Make sure `uri` doesn't begin with '//'. if not doubleSlash: parsePath(uri, i, result) return @@ -338,9 +330,17 @@ func parseUri*(uri: string): Uri = parseUri(uri, result) func removeDotSegments(path: string): string = + ## Collapses `..` and `.` in `path` in a similar way as done in `os.normalizedPath` + ## Caution: this is buggy. + runnableExamples: + assert removeDotSegments("a1/a2/../a3/a4/a5/./a6/a7/.//./") == "a1/a3/a4/a5/a6/a7/" + assert removeDotSegments("http://www.ai.") == "http://www.ai." + # xxx adapt or reuse `pathnorm.normalizePath(path, '/')` to make this more reliable, but + # taking into account url specificities such as not collapsing leading `//` in scheme + # `https://`. see `turi` for failing tests. if path.len == 0: return "" var collection: seq[string] = @[] - let endsWithSlash = path[path.len-1] == '/' + let endsWithSlash = path.endsWith '/' var i = 0 var currentSegment = "" while i < path.len: @@ -354,7 +354,7 @@ func removeDotSegments(path: string): string = discard collection.pop() i.inc 3 continue - elif path[i+1] == '/': + elif i + 1 < path.len and path[i+1] == '/': i.inc 2 continue currentSegment.add path[i] @@ -446,10 +446,8 @@ func combine*(uris: varargs[Uri]): Uri = func isAbsolute*(uri: Uri): bool = ## Returns true if URI is absolute, false otherwise. runnableExamples: - let foo = parseUri("https://nim-lang.org") - assert isAbsolute(foo) == true - let bar = parseUri("nim-lang") - assert isAbsolute(bar) == false + assert parseUri("https://nim-lang.org").isAbsolute + assert not parseUri("nim-lang").isAbsolute return uri.scheme != "" and (uri.hostname != "" or uri.path != "") func `/`*(x: Uri, path: string): Uri = @@ -497,44 +495,62 @@ func `?`*(u: Uri, query: openArray[(string, string)]): Uri = func `$`*(u: Uri): string = ## Returns the string representation of the specified URI object. runnableExamples: - let foo = parseUri("https://nim-lang.org") - assert $foo == "https://nim-lang.org" - result = "" - if u.scheme.len > 0: - result.add(u.scheme) - if u.opaque: - result.add(":") - else: - result.add("://") - if u.username.len > 0: - result.add(u.username) - if u.password.len > 0: - result.add(":") - result.add(u.password) - result.add("@") + assert $parseUri("https://nim-lang.org") == "https://nim-lang.org" + # Get the len of all the parts. + let schemeLen = u.scheme.len + let usernameLen = u.username.len + let passwordLen = u.password.len + let hostnameLen = u.hostname.len + let portLen = u.port.len + let pathLen = u.path.len + let queryLen = u.query.len + let anchorLen = u.anchor.len + # Prepare a string that fits all the parts and all punctuation chars. + # 12 is the max len required by all possible punctuation chars. + result = newStringOfCap( + schemeLen + usernameLen + passwordLen + hostnameLen + portLen + pathLen + queryLen + anchorLen + 12 + ) + # Insert to result. + if schemeLen > 0: + result.add u.scheme + result.add ':' + if not u.opaque: + result.add '/' + result.add '/' + if usernameLen > 0: + result.add u.username + if passwordLen > 0: + result.add ':' + result.add u.password + result.add '@' if u.hostname.endsWith('/'): if u.isIpv6: - result.add("[" & u.hostname[0 .. ^2] & "]") + result.add '[' + result.add u.hostname[0 .. ^2] + result.add ']' else: - result.add(u.hostname[0 .. ^2]) + result.add u.hostname[0 .. ^2] else: if u.isIpv6: - result.add("[" & u.hostname & "]") + result.add '[' + result.add u.hostname + result.add ']' else: - result.add(u.hostname) - if u.port.len > 0: - result.add(":") - result.add(u.port) - if u.path.len > 0: - if u.hostname.len > 0 and u.path[0] != '/': - result.add('/') - result.add(u.path) - if u.query.len > 0: - result.add("?") - result.add(u.query) - if u.anchor.len > 0: - result.add("#") - result.add(u.anchor) + result.add u.hostname + if portLen > 0: + result.add ':' + result.add u.port + if pathLen > 0: + if hostnameLen > 0 and u.path[0] != '/': + result.add '/' + result.add u.path + if queryLen > 0: + result.add '?' + result.add u.query + if anchorLen > 0: + result.add '#' + result.add u.anchor + proc getDataUri*(data, mime: string, encoding = "utf-8"): string {.since: (1, 3).} = ## Convenience proc for `base64.encode` returns a standard Base64 Data URI (RFC-2397) @@ -543,6 +559,14 @@ proc getDataUri*(data, mime: string, encoding = "utf-8"): string {.since: (1, 3) ## * `mimetypes <mimetypes.html>`_ for `mime` argument ## * https://tools.ietf.org/html/rfc2397 ## * https://en.wikipedia.org/wiki/Data_URI_scheme - runnableExamples: static: doAssert getDataUri("Nim", "text/plain") == "data:text/plain;charset=utf-8;base64,Tmlt" + runnableExamples: static: assert getDataUri("Nim", "text/plain") == "data:text/plain;charset=utf-8;base64,Tmlt" assert encoding.len > 0 and mime.len > 0 # Must *not* be URL-Safe, see RFC-2397 - result = "data:" & mime & ";charset=" & encoding & ";base64," & base64.encode(data) + let base64encoded: string = base64.encode(data) + # ("data:".len + ";charset=".len + ";base64,".len) == 22 + result = newStringOfCap(22 + mime.len + encoding.len + base64encoded.len) + result.add "data:" + result.add mime + result.add ";charset=" + result.add encoding + result.add ";base64," + result.add base64encoded |