diff options
Diffstat (limited to 'lib/pure/uri.nim')
-rw-r--r-- | lib/pure/uri.nim | 95 |
1 files changed, 54 insertions, 41 deletions
diff --git a/lib/pure/uri.nim b/lib/pure/uri.nim index d2d11253a..dd8040928 100644 --- a/lib/pure/uri.nim +++ b/lib/pure/uri.nim @@ -18,14 +18,12 @@ type hostname*, port*, path*, query*, anchor*: string opaque*: bool -{.deprecated: [TUrl: Url, TUri: Uri].} - {.push warning[deprecated]: off.} -proc `$`*(url: Url): string {.deprecated.} = +proc `$`*(url: Url): string {.deprecated: "use Uri instead".} = ## **Deprecated since 0.9.6**: Use ``Uri`` instead. return string(url) -proc `/`*(a, b: Url): Url {.deprecated.} = +proc `/`*(a, b: Url): Url {.deprecated: "use Uri instead".} = ## Joins two URLs together, separating them with / if needed. ## ## **Deprecated since 0.9.6**: Use ``Uri`` instead. @@ -40,39 +38,50 @@ proc `/`*(a, b: Url): Url {.deprecated.} = urlS.add(bs) result = Url(urlS) -proc add*(url: var Url, a: Url) {.deprecated.} = +proc add*(url: var Url, a: Url) {.deprecated: "use Uri instead".} = ## Appends url to url. ## ## **Deprecated since 0.9.6**: Use ``Uri`` instead. url = url / a {.pop.} -proc encodeUrl*(s: string): string = - ## Encodes a value to be HTTP safe: This means that characters in the set - ## ``{'A'..'Z', 'a'..'z', '0'..'9', '_'}`` are carried over to the result, - ## a space is converted to ``'+'`` and every other character is encoded as - ## ``'%xx'`` where ``xx`` denotes its hexadecimal value. +proc encodeUrl*(s: string, usePlus=true): string = + ## Encodes a URL according to RFC3986. + ## + ## This means that characters in the set + ## ``{'a'..'z', 'A'..'Z', '0'..'9', '-', '.', '_', '~'}`` are + ## carried over to the result. + ## All other characters are encoded as ``''%xx'`` where ``xx`` + ## denotes its hexadecimal value. + ## + ## As a special rule, when the value of ``usePlus`` is true, + ## spaces are encoded as ``'+'`` instead of ``'%20'``. result = newStringOfCap(s.len + s.len shr 2) # assume 12% non-alnum-chars - for i in 0..s.len-1: - case s[i] - of 'a'..'z', 'A'..'Z', '0'..'9', '_': add(result, s[i]) - of ' ': add(result, '+') + let fromSpace = if usePlus: "+" else: "%20" + for c in s: + case c + of 'a'..'z', 'A'..'Z', '0'..'9', '-', '.', '_', '~': add(result, c) + of ' ': add(result, fromSpace) else: add(result, '%') - add(result, toHex(ord(s[i]), 2)) - -proc decodeUrl*(s: string): string = - ## Decodes a value from its HTTP representation: This means that a ``'+'`` - ## is converted to a space, ``'%xx'`` (where ``xx`` denotes a hexadecimal - ## value) is converted to the character with ordinal number ``xx``, and + add(result, toHex(ord(c), 2)) + +proc decodeUrl*(s: string, decodePlus=true): string = + ## Decodes a URL according to RFC3986. + ## + ## This means that any ``'%xx'`` (where ``xx`` denotes a hexadecimal + ## value) are converted to the character with ordinal number ``xx``, ## and every other character is carried over. + ## + ## As a special rule, when the value of ``decodePlus`` is true, ``'+'`` + ## characters are converted to a space. proc handleHexChar(c: char, x: var int) {.inline.} = case c of '0'..'9': x = (x shl 4) or (ord(c) - ord('0')) of 'a'..'f': x = (x shl 4) or (ord(c) - ord('a') + 10) of 'A'..'F': x = (x shl 4) or (ord(c) - ord('A') + 10) else: assert(false) - + result = newString(s.len) var i = 0 var j = 0 @@ -84,7 +93,11 @@ proc decodeUrl*(s: string): string = handleHexChar(s[i+2], x) inc(i, 2) result[j] = chr(x) - of '+': result[j] = ' ' + of '+': + if decodePlus: + result[j] = ' ' + else: + result[j] = s[i] else: result[j] = s[i] inc(i) inc(j) @@ -94,7 +107,7 @@ proc parseAuthority(authority: string, result: var Uri) = var i = 0 var inPort = false var inIPv6 = false - while true: + while i < authority.len: case authority[i] of '@': swap result.password, result.port @@ -111,7 +124,6 @@ proc parseAuthority(authority: string, result: var Uri) = inIPv6 = true of ']': inIPv6 = false - of '\0': break else: if inPort: result.port.add(authority[i]) @@ -128,11 +140,11 @@ proc parsePath(uri: string, i: var int, result: var Uri) = parseAuthority(result.path, result) result.path.setLen(0) - if uri[i] == '?': + if i < uri.len and uri[i] == '?': i.inc # Skip '?' i.inc parseUntil(uri, result.query, {'#'}, i) - if uri[i] == '#': + if i < uri.len and uri[i] == '#': i.inc # Skip '#' i.inc parseUntil(uri, result.anchor, {}, i) @@ -156,7 +168,7 @@ proc parseUri*(uri: string, result: var Uri) = # Check if this is a reference URI (relative URI) let doubleSlash = uri.len > 1 and uri[1] == '/' - if uri[i] == '/': + if i < uri.len and uri[i] == '/': # Make sure ``uri`` doesn't begin with '//'. if not doubleSlash: parsePath(uri, i, result) @@ -164,7 +176,7 @@ proc parseUri*(uri: string, result: var Uri) = # Scheme i.inc parseWhile(uri, result.scheme, Letters + Digits + {'+', '-', '.'}, i) - if uri[i] != ':' and not doubleSlash: + if (i >= uri.len or uri[i] != ':') and not doubleSlash: # Assume this is a reference URI (relative URI) i = 0 result.scheme.setLen(0) @@ -174,7 +186,7 @@ proc parseUri*(uri: string, result: var Uri) = i.inc # Skip ':' # Authority - if uri[i] == '/' and uri[i+1] == '/': + if i+1 < uri.len and uri[i] == '/' and uri[i+1] == '/': i.inc(2) # Skip // var authority = "" i.inc parseUntil(uri, authority, {'/', '?', '#'}, i) @@ -197,13 +209,13 @@ proc removeDotSegments(path: string): string = let endsWithSlash = path[path.len-1] == '/' var i = 0 var currentSegment = "" - while true: + while i < path.len: case path[i] of '/': collection.add(currentSegment) currentSegment = "" of '.': - if path[i+1] == '.' and path[i+2] == '/': + if i+2 < path.len and path[i+1] == '.' and path[i+2] == '/': if collection.len > 0: discard collection.pop() i.inc 3 @@ -212,13 +224,11 @@ proc removeDotSegments(path: string): string = i.inc 2 continue currentSegment.add path[i] - of '\0': - if currentSegment != "": - collection.add currentSegment - break else: currentSegment.add path[i] i.inc + if currentSegment != "": + collection.add currentSegment result = collection.join("/") if endsWithSlash: result.add '/' @@ -320,18 +330,18 @@ proc `/`*(x: Uri, path: string): Uri = result = x if result.path.len == 0: - if path[0] != '/': + if path.len == 0 or path[0] != '/': result.path = "/" result.path.add(path) return - if result.path[result.path.len-1] == '/': - if path[0] == '/': + if result.path.len > 0 and result.path[result.path.len-1] == '/': + if path.len > 0 and path[0] == '/': result.path.add(path[1 .. path.len-1]) else: result.path.add(path) else: - if path[0] != '/': + if path.len == 0 or path[0] != '/': result.path.add '/' result.path.add(path) @@ -373,7 +383,10 @@ when isMainModule: const test1 = "abc\L+def xyz" doAssert encodeUrl(test1) == "abc%0A%2Bdef+xyz" doAssert decodeUrl(encodeUrl(test1)) == test1 - + doAssert encodeUrl(test1, false) == "abc%0A%2Bdef%20xyz" + doAssert decodeUrl(encodeUrl(test1, false), false) == test1 + doAssert decodeUrl(encodeUrl(test1)) == test1 + block: let str = "http://localhost" let test = parseUri(str) @@ -464,7 +477,7 @@ when isMainModule: doAssert test.hostname == "github.com" doAssert test.port == "dom96" doAssert test.path == "/packages" - + block: let str = "file:///foo/bar/baz.txt" let test = parseUri(str) |