#
#
# Nim's Runtime Library
# (c) Copyright 2015 Dominik Picheta
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## This module implements URI parsing as specified by RFC 3986.
##
## A Uniform Resource Identifier (URI) provides a simple and extensible
## means for identifying a resource. A URI can be further classified
## as a locator, a name, or both. The term "Uniform Resource Locator"
## (URL) refers to the subset of URIs.
##
## # Basic usage
## ## Combine URIs
runnableExamples:
let host = parseUri("https://nim-lang.org")
assert $host == "https://nim-lang.org"
assert $(host / "/blog.html") == "https://nim-lang.org/blog.html"
assert $(host / "blog2.html") == "https://nim-lang.org/blog2.html"
## ## Access URI item
runnableExamples:
let res = parseUri("sftp://127.0.0.1:4343")
assert isAbsolute(res)
assert res.port == "4343"
## ## Data URI Base64
runnableExamples:
assert getDataUri("Hello World", "text/plain") == "data:text/plain;charset=utf-8;base64,SGVsbG8gV29ybGQ="
assert getDataUri("Nim", "text/plain") == "data:text/plain;charset=utf-8;base64,Tmlt"
import std/[strutils, parseutils, base64]
import std/private/[since, decode_helpers]
when defined(nimPreviewSlimSystem):
import std/assertions
type
Url* = distinct string
Uri* = object
scheme*, username*, password*: string
hostname*, port*, path*, query*, anchor*: string
opaque*: bool
isIpv6*: bool
UriParseError* = object of ValueError
proc uriParseError*(msg: string) {.noreturn.} =
## Raises a `UriParseError` exception with message `msg`.
raise newException(UriParseError, msg)
func encodeUrl*(s: string, usePlus = true): string =
## Encodes a URL according to RFC3986.
##
## This means that characters in the set
## `{'a'..'z', 'A'..'Z', '0'..'9', '-', '.', '_', '~'}` are
## carried over to the result.
## All other characters are encoded as `%xx` where `xx`
## denotes its hexadecimal value.
##
## As a special rule, when the value of `usePlus` is true,
## spaces are encoded as `+` instead of `%20`.
##
## **See also:**
## * `decodeUrl func<#decodeUrl,string>`_
runnableExamples:
assert encodeUrl("https://nim-lang.org") == "https%3A%2F%2Fnim-lang.org"
assert encodeUrl("https://nim-lang.org/this is a test") == "https%3A%2F%2Fnim-lang.org%2Fthis+is+a+test"
assert encodeUrl("https://nim-lang.org/this is a test", false) == "https%3A%2F%2Fnim-lang.org%2Fthis%20is%20a%20test"
result = newStringOfCap(s.len + s.len shr 2) # assume 12% non-alnum-chars
let fromSpace = if usePlus: "+" else: "%20"
for c in s:
case c
# https://tools.ietf.org/html/rfc3986#section-2.3
of 'a'..'z', 'A'..'Z', '0'..'9', '-', '.', '_', '~': add(result, c)
of ' ': add(result, fromSpace)
else:
add(result, '%')
add(result, toHex(ord(c), 2))
func decodeUrl*(s: string, decodePlus = true): string =
## Decodes a URL according to RFC3986.
##
## This means that any `%xx` (where `xx` denotes a hexadecimal
## value) are converted to the character with ordinal number `xx`,
## and every other character is carried over.
## If `xx` is not a valid hexadecimal value, it is left intact.
##
## As a special rule, when the value of `decodePlus` is true, `+`
## characters are converted to a space.
##
## **See also:**
## * `encodeUrl func<#encodeUrl,string>`_
runnableExamples:
assert decodeUrl("https%3A%2F%2Fnim-lang.org") == "https://nim-lang.org"
assert decodeUrl("https%3A%2F%2Fnim-lang.org%2Fthis+is+a+test") == "https://nim-lang.org/this is a test"
assert decodeUrl("https%3A%2F%2Fnim-lang.org%2Fthis%20is%20a%20test",
false) == "https://nim-lang.org/this is a test"
assert decodeUrl("abc%xyz") == "abc%xyz"
result = newString(s.len)
var i = 0
var j = 0
while i < s.len:
case s[i]
of '%':
result[j] = decodePercent(s, i)
of '+':
if decodePlus:
result[j] = ' '
else:
result[j] = s[i]
else: result[j] = s[i]
inc(i)
inc(j)
setLen(result, j)
func encodeQuery*(query: openArray[(string, string)], usePlus = true,
omitEq = true, sep = '&'): string =
## Encodes a set of (key, value) parameters into a URL query string.
##
## Every (key, value) pair is URL-encoded and written as `key=value`. If the
## value is an empty string then the `=` is omitted, unless `omitEq` is
## false.
## The pairs are joined together by the `sep` character.
##
## The `usePlus` parameter is passed down to the `encodeUrl` function that
## is used for the URL encoding of the string values.
##
## **See also:**
## * `encodeUrl func<#encodeUrl,string>`_
runnableExamples:
assert encodeQuery({: }) == ""
assert encodeQuery({"a": "1", "b": "2"}) == "a=1&b=2"
assert encodeQuery({"a": "1", "b": ""}) == "a=1&b"
assert encodeQuery({"a": "1", "b": ""}, omitEq = false, sep = ';') == "a=1;b="
for elem in query:
# Encode the `key = value` pairs and separate them with 'sep'
if result.len > 0: result.add(sep)
let (key, val) = elem
result.add(encodeUrl(key, usePlus))
# Omit the '=' if the value string is empty
if not omitEq or val.len > 0:
result.add('=')
result.add(encodeUrl(val, usePlus))
iterator decodeQuery*(data: string, sep = '&'): tuple[key, value: string] =
## Reads and decodes the query string `data` and yields the `(key, value)` pairs
## the data consists of. If compiled with `-d:nimLegacyParseQueryStrict`,
## a `UriParseError` is raised when there is an unencoded `=` character in a decoded
## value, which was the behavior in Nim < 1.5.1.
runnableExamples:
import std/sequtils
assert toSeq(decodeQuery("foo=1&bar=2=3")) == @[("foo", "1"), ("bar", "2=3")]
assert toSeq(decodeQuery("foo=1;bar=2=3", ';')) == @[("foo", "1"), ("bar", "2=3")]
assert toSeq(decodeQuery("&a&=b&=&&")) == @[("", ""), ("a", ""), ("", "b"), ("", ""), ("", "")]
proc parseData(data: string, i: int, field: var string, sep: char): int =
result = i
while result < data.len:
let c = data[result]
case c
of '%': add(field, decodePercent(data, result))
of '+': add(field, ' ')
of '&': break
else:
if c == sep: break
else: add(field, data[result])
inc(result)
var i = 0
var name = ""
var value = ""
# decode everything in one pass:
while i < data.len:
setLen(name, 0) # reuse memory
i = parseData(data, i, name, '=')
setLen(value, 0) # reuse memory
if i < data.len and data[i] == '=':
inc(i) # skip '='
when defined(nimLegacyParseQueryStrict):
i = parseData(data, i, value, '=')
else:
i = parseData(data, i, value, sep)
yield (name, value)
if i < data.len:
when defined(nimLegacyParseQueryStrict):
if data[i] != '&':
uriParseError("'&' expected at index '$#' for '$#'" % [$i, data])
inc(i)
func parseAuthority(authority: string, result: var Uri) =
var i = 0
var inPort = false
var inIPv6 = false
while i < authority.len:
case authority[i]
of '@':
swap result.password, result.port
result.port.setLen(0)
swap result.username, result.hostname
result.hostname.setLen(0)
inPort = false
of ':':
if inIPv6:
result.hostname.add(authority[i])
else:
inPort = true
of '[':
inIPv6 = true
result.isIpv6 = true
of ']':
inIPv6 = false
else:
if inPort:
result.port.add(authority[i])
else:
result.hostname.add(authority[i])
i.inc
func parsePath(uri: string, i: var int, result: var Uri) =
i.inc parseUntil(uri, result.path, {'?', '#'}, i)
# The 'mailto' scheme's PATH actually contains the hostname/username
if cmpIgnoreCase(result.scheme, "mailto") == 0:
parseAuthority(result.path, result)
result.path.setLen(0)
if i < uri.len and uri[i] == '?':
i.inc # Skip '?'
i.inc parseUntil(uri, result.query, {'#'}, i)
if i < uri.len and uri[i] == '#':
i.inc # Skip '#'
i.inc parseUntil(uri, result.anchor, {}, i)
func initUri*(isIpv6 = false): Uri =
## Initializes a URI with `scheme`, `username`, `password`,
## `hostname`, `port`, `path`, `query`, `anchor` and `isIpv6`.
##
## **See also:**
## * `Uri type <#Uri>`_ for available fields in the URI type
runnableExamples:
var uri2 = initUri(isIpv6 = true)
uri2.scheme = "tcp"
uri2.hostname = "2001:0db8:85a3:0000:0000:8a2e:0370:7334"
uri2.port = "8080"
assert $uri2 == "tcp://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:8080"
result = Uri(scheme: "", username: "", password: "", hostname: "", port: "",
path: "", query: "", anchor: "", isIpv6: isIpv6)
func resetUri(uri: var Uri) =
for f in uri.fields:
when f is string:
f.setLen(0)
else:
f = false
func parseUri*(uri: string, result: var Uri) =
## Parses a URI. The `result` variable will be cleared before.
##
## **See also:**
## * `Uri type <#Uri>`_ for available fields in the URI type
## * `initUri func <#initUri>`_ for initializing a URI
runnableExamples:
var res = initUri()
parseUri("https://nim-lang.org/docs/manual.html", res)
assert res.scheme == "https"
assert res.hostname == "nim-lang.org"
assert res.path == "/docs/manual.html"
resetUri(result)
var i = 0
# Check if this is a reference URI (relative URI)
let doubleSlash = uri.len > 1 and uri[0] == '/' and uri[1] == '/'
if i < uri.len and uri[i] == '/':
# Make sure `uri` doesn't begin with '//'.
if not doubleSlash:
parsePath(uri, i, result)
return
# Scheme
i.inc parseWhile(uri, result.scheme, Letters + Digits + {'+', '-', '.'}, i)
if (i >= uri.len or uri[i] != ':') and not doubleSlash:
# Assume this is a reference URI (relative URI)
i = 0
result.scheme.setLen(0)
parsePath(uri, i, result)
return
if not doubleSlash:
i.inc # Skip ':'
# Authority
if i+1 < uri.len and uri[i] == '/' and uri[i+1] == '/':
i.inc(2) # Skip //
var authority = ""
i.inc parseUntil(uri, authority, {'/', '?', '#'}, i)
if authority.len > 0:
parseAuthority(authority, result)
else:
result.opaque = true
# Path
parsePath(uri, i, result)
func parseUri*(uri: string): Uri =
## Parses a URI and returns it.
##
## **See also:**
## * `Uri type <#Uri>`_ for available fields in the URI type
runnableExamples:
let res = parseUri("ftp://Username:Password@Hostname")
assert res.username == "Username"
assert res.password == "Password"
assert res.scheme == "ftp"
result = initUri()
parseUri(uri, result)
func removeDotSegments(path: string): string =
## Collapses `..` and `.` in `path` in a similar way as done in `os.normalizedPath`
## Caution: this is buggy.
runnableExamples:
assert removeDotSegments("a1/a2/../a3/a4/a5/./a6/a7/.//./") == "a1/a3/a4/a5/a6/a7/"
assert removeDotSegments("http://www.ai.") == "http://www.ai."
# xxx adapt or reuse `pathnorm.normalizePath(path, '/')` to make this more reliable, but
# taking into account url specificities such as not collapsing leading `//` in scheme
# `https://`. see `turi` for failing tests.
if path.len == 0: return ""
var collection: seq[string] = @[]
let endsWithSlash = path.endsWith '/'
var i = 0
var currentSegment = ""
while i < path.len:
case path[i]
of '/':
collection.add(currentSegment)
currentSegment = ""
of '.':
if i+2 < path.len and path[i+1] == '.' and path[i+2] == '/':
if collection.len > 0:
discard collection.pop()
i.inc 3
continue
elif i + 1 < path.len and path[i+1] == '/':
i.inc 2
continue
currentSegment.add path[i]
else:
currentSegment.add path[i]
i.inc
if currentSegment != "":
collection.add currentSegment
result = collection.join("/")
if endsWithSlash: result.add '/'
func merge(base, reference: Uri): string =
# http://tools.ietf.org/html/rfc3986#section-5.2.3
if base.hostname != "" and base.path == "":
'/' & reference.path
else:
let lastSegment = rfind(base.path, "/")
if lastSegment == -1:
reference.path
else:
base.path[0 .. lastSegment] & reference.path
func combine*(base: Uri, reference: Uri): Uri =
## Combines a base URI with a reference URI.
##
## This uses the algorithm specified in
## `section 5.2.2 of RFC 3986 <http://tools.ietf.org/html/rfc3986#section-5.2.2>`_.
##
## This means that the slashes inside the base URIs path as well as reference
## URIs path affect the resulting URI.
##
## **See also:**
## * `/ func <#/,Uri,string>`_ for building URIs
runnableExamples:
let foo = combine(parseUri("https://nim-lang.org/foo/bar"), parseUri("/baz"))
assert foo.path == "/baz"
let bar = combine(parseUri("https://nim-lang.org/foo/bar"), parseUri("baz"))
assert bar.path == "/foo/baz"
let qux = combine(parseUri("https://nim-lang.org/foo/bar/"), parseUri("baz"))
assert qux.path == "/foo/bar/baz"
template setAuthority(dest, src): untyped =
dest.hostname = src.hostname
dest.username = src.username
dest.port = src.port
dest.password = src.password
result = initUri()
if reference.scheme != base.scheme and reference.scheme != "":
result = reference
result.path = removeDotSegments(result.path)
else:
if reference.hostname != "":
setAuthority(result, reference)
result.path = removeDotSegments(reference.path)
result.query = reference.query
else:
if reference.path == "":
result.path = base.path
if reference.query != "":
result.query = reference.query
else:
result.query = base.query
else:
if reference.path.startsWith("/"):
result.path = removeDotSegments(reference.path)
else:
result.path = removeDotSegments(merge(base, reference))
result.query = reference.query
setAuthority(result, base)
result.scheme = base.scheme
result.anchor = reference.anchor
func combine*(uris: varargs[Uri]): Uri =
## Combines multiple URIs together.
##
## **See also:**
## * `/ func <#/,Uri,string>`_ for building URIs
runnableExamples:
let foo = combine(parseUri("https://nim-lang.org/"), parseUri("docs/"),
parseUri("manual.html"))
assert foo.hostname == "nim-lang.org"
assert foo.path == "/docs/manual.html"
result = uris[0]
for i in 1 ..< uris.len:
result = combine(result, uris[i])
func isAbsolute*(uri: Uri): bool =
## Returns true if URI is absolute, false otherwise.
runnableExamples:
assert parseUri("https://nim-lang.org").isAbsolute
assert not parseUri("nim-lang").isAbsolute
return uri.scheme != "" and (uri.hostname != "" or uri.path != "")
func `/`*(x: Uri, path: string): Uri =
## Concatenates the path specified to the specified URIs path.
##
## Contrary to the `combine func <#combine,Uri,Uri>`_ you do not have to worry about
## the slashes at the beginning and end of the path and URIs path
## respectively.
##
## **See also:**
## * `combine func <#combine,Uri,Uri>`_
runnableExamples:
let foo = parseUri("https://nim-lang.org/foo/bar") / "/baz"
assert foo.path == "/foo/bar/baz"
let bar = parseUri("https://nim-lang.org/foo/bar") / "baz"
assert bar.path == "/foo/bar/baz"
let qux = parseUri("https://nim-lang.org/foo/bar/") / "baz"
assert qux.path == "/foo/bar/baz"
result = x
if result.path.len == 0:
if path.len == 0 or path[0] != '/':
result.path = "/"
result.path.add(path)
return
if result.path.len > 0 and result.path[result.path.len-1] == '/':
if path.len > 0 and path[0] == '/':
result.path.add(path[1 .. path.len-1])
else:
result.path.add(path)
else:
if path.len == 0 or path[0] != '/':
result.path.add '/'
result.path.add(path)
func `?`*(u: Uri, query: openArray[(string, string)]): Uri =
## Concatenates the query parameters to the specified URI object.
runnableExamples:
let foo = parseUri("https://example.com") / "foo" ? {"bar": "qux"}
assert $foo == "https://example.com/foo?bar=qux"
result = u
result.query = encodeQuery(query)
func `$`*(u: Uri): string =
## Returns the string representation of the specified URI object.
runnableExamples:
assert $parseUri("https://nim-lang.org") == "https://nim-lang.org"
# Get the len of all the parts.
let schemeLen = u.scheme.len
let usernameLen = u.username.len
let passwordLen = u.password.len
let hostnameLen = u.hostname.len
let portLen = u.port.len
let pathLen = u.path.len
let queryLen = u.query.len
let anchorLen = u.anchor.len
# Prepare a string that fits all the parts and all punctuation chars.
# 12 is the max len required by all possible punctuation chars.
result = newStringOfCap(
schemeLen + usernameLen + passwordLen + hostnameLen + portLen + pathLen + queryLen + anchorLen + 12
)
# Insert to result.
if schemeLen > 0:
result.add u.scheme
result.add ':'
if not u.opaque:
result.add '/'
result.add '/'
if usernameLen > 0:
result.add u.username
if passwordLen > 0:
result.add ':'
result.add u.password
result.add '@'
if u.hostname.endsWith('/'):
if u.isIpv6:
result.add '['
result.add u.hostname[0 .. ^2]
result.add ']'
else:
result.add u.hostname[0 .. ^2]
else:
if u.isIpv6:
result.add '['
result.add u.hostname
result.add ']'
else:
result.add u.hostname
if portLen > 0:
result.add ':'
result.add u.port
if pathLen > 0:
if hostnameLen > 0 and u.path[0] != '/':
result.add '/'
result.add u.path
if queryLen > 0:
result.add '?'
result.add u.query
if anchorLen > 0:
result.add '#'
result.add u.anchor
proc getDataUri*(data, mime: string, encoding = "utf-8"): string {.since: (1, 3).} =
## Convenience proc for `base64.encode` returns a standard Base64 Data URI (RFC-2397)
##
## **See also:**
## * `mimetypes <mimetypes.html>`_ for `mime` argument
## * https://tools.ietf.org/html/rfc2397
## * https://en.wikipedia.org/wiki/Data_URI_scheme
runnableExamples: static: assert getDataUri("Nim", "text/plain") == "data:text/plain;charset=utf-8;base64,Tmlt"
assert encoding.len > 0 and mime.len > 0 # Must *not* be URL-Safe, see RFC-2397
let base64encoded: string = base64.encode(data)
# ("data:".len + ";charset=".len + ";base64,".len) == 22
result = newStringOfCap(22 + mime.len + encoding.len + base64encoded.len)
result.add "data:"
result.add mime
result.add ";charset="
result.add encoding
result.add ";base64,"
result.add base64encoded