summary refs log tree commit diff stats
path: root/lib/pure/uri.nim
diff options
context:
space:
mode:
Diffstat (limited to 'lib/pure/uri.nim')
-rw-r--r--lib/pure/uri.nim95
1 files changed, 54 insertions, 41 deletions
diff --git a/lib/pure/uri.nim b/lib/pure/uri.nim
index d2d11253a..dd8040928 100644
--- a/lib/pure/uri.nim
+++ b/lib/pure/uri.nim
@@ -18,14 +18,12 @@ type
     hostname*, port*, path*, query*, anchor*: string
     opaque*: bool
 
-{.deprecated: [TUrl: Url, TUri: Uri].}
-
 {.push warning[deprecated]: off.}
-proc `$`*(url: Url): string {.deprecated.} =
+proc `$`*(url: Url): string {.deprecated: "use Uri instead".} =
   ## **Deprecated since 0.9.6**: Use ``Uri`` instead.
   return string(url)
 
-proc `/`*(a, b: Url): Url {.deprecated.} =
+proc `/`*(a, b: Url): Url {.deprecated: "use Uri instead".} =
   ## Joins two URLs together, separating them with / if needed.
   ##
   ## **Deprecated since 0.9.6**: Use ``Uri`` instead.
@@ -40,39 +38,50 @@ proc `/`*(a, b: Url): Url {.deprecated.} =
     urlS.add(bs)
   result = Url(urlS)
 
-proc add*(url: var Url, a: Url) {.deprecated.} =
+proc add*(url: var Url, a: Url) {.deprecated: "use Uri instead".} =
   ## Appends url to url.
   ##
   ## **Deprecated since 0.9.6**: Use ``Uri`` instead.
   url = url / a
 {.pop.}
 
-proc encodeUrl*(s: string): string =
-  ## Encodes a value to be HTTP safe: This means that characters in the set
-  ## ``{'A'..'Z', 'a'..'z', '0'..'9', '_'}`` are carried over to the result,
-  ## a space is converted to ``'+'`` and every other character is encoded as
-  ## ``'%xx'`` where ``xx`` denotes its hexadecimal value.
+proc encodeUrl*(s: string, usePlus=true): string =
+  ## Encodes a URL according to RFC3986.
+  ##
+  ## This means that characters in the set
+  ## ``{'a'..'z', 'A'..'Z', '0'..'9', '-', '.', '_', '~'}`` are
+  ## carried over to the result.
+  ## All other characters are encoded as ``''%xx'`` where ``xx``
+  ## denotes its hexadecimal value.
+  ##
+  ## As a special rule, when the value of ``usePlus`` is true,
+  ## spaces are encoded as ``'+'`` instead of ``'%20'``.
   result = newStringOfCap(s.len + s.len shr 2) # assume 12% non-alnum-chars
-  for i in 0..s.len-1:
-    case s[i]
-    of 'a'..'z', 'A'..'Z', '0'..'9', '_': add(result, s[i])
-    of ' ': add(result, '+')
+  let fromSpace = if usePlus: "+" else: "%20"
+  for c in s:
+    case c
+    of 'a'..'z', 'A'..'Z', '0'..'9', '-', '.', '_', '~': add(result, c)
+    of ' ': add(result, fromSpace)
     else:
       add(result, '%')
-      add(result, toHex(ord(s[i]), 2))
-      
-proc decodeUrl*(s: string): string =
-  ## Decodes a value from its HTTP representation: This means that a ``'+'``
-  ## is converted to a space, ``'%xx'`` (where ``xx`` denotes a hexadecimal
-  ## value) is converted to the character with ordinal number ``xx``, and
+      add(result, toHex(ord(c), 2))
+
+proc decodeUrl*(s: string, decodePlus=true): string =
+  ## Decodes a URL according to RFC3986.
+  ##
+  ## This means that any ``'%xx'`` (where ``xx`` denotes a hexadecimal
+  ## value) are converted to the character with ordinal number ``xx``,
   ## and every other character is carried over.
+  ##
+  ## As a special rule, when the value of ``decodePlus`` is true, ``'+'``
+  ## characters are converted to a space.
   proc handleHexChar(c: char, x: var int) {.inline.} =
     case c
     of '0'..'9': x = (x shl 4) or (ord(c) - ord('0'))
     of 'a'..'f': x = (x shl 4) or (ord(c) - ord('a') + 10)
     of 'A'..'F': x = (x shl 4) or (ord(c) - ord('A') + 10)
     else: assert(false)
-    
+
   result = newString(s.len)
   var i = 0
   var j = 0
@@ -84,7 +93,11 @@ proc decodeUrl*(s: string): string =
       handleHexChar(s[i+2], x)
       inc(i, 2)
       result[j] = chr(x)
-    of '+': result[j] = ' '
+    of '+':
+      if decodePlus:
+        result[j] = ' '
+      else:
+        result[j] = s[i]
     else: result[j] = s[i]
     inc(i)
     inc(j)
@@ -94,7 +107,7 @@ proc parseAuthority(authority: string, result: var Uri) =
   var i = 0
   var inPort = false
   var inIPv6 = false
-  while true:
+  while i < authority.len:
     case authority[i]
     of '@':
       swap result.password, result.port
@@ -111,7 +124,6 @@ proc parseAuthority(authority: string, result: var Uri) =
       inIPv6 = true
     of ']':
       inIPv6 = false
-    of '\0': break
     else:
       if inPort:
         result.port.add(authority[i])
@@ -128,11 +140,11 @@ proc parsePath(uri: string, i: var int, result: var Uri) =
     parseAuthority(result.path, result)
     result.path.setLen(0)
 
-  if uri[i] == '?':
+  if i < uri.len and uri[i] == '?':
     i.inc # Skip '?'
     i.inc parseUntil(uri, result.query, {'#'}, i)
 
-  if uri[i] == '#':
+  if i < uri.len and uri[i] == '#':
     i.inc # Skip '#'
     i.inc parseUntil(uri, result.anchor, {}, i)
 
@@ -156,7 +168,7 @@ proc parseUri*(uri: string, result: var Uri) =
 
   # Check if this is a reference URI (relative URI)
   let doubleSlash = uri.len > 1 and uri[1] == '/'
-  if uri[i] == '/':
+  if i < uri.len and uri[i] == '/':
     # Make sure ``uri`` doesn't begin with '//'.
     if not doubleSlash:
       parsePath(uri, i, result)
@@ -164,7 +176,7 @@ proc parseUri*(uri: string, result: var Uri) =
 
   # Scheme
   i.inc parseWhile(uri, result.scheme, Letters + Digits + {'+', '-', '.'}, i)
-  if uri[i] != ':' and not doubleSlash:
+  if (i >= uri.len or uri[i] != ':') and not doubleSlash:
     # Assume this is a reference URI (relative URI)
     i = 0
     result.scheme.setLen(0)
@@ -174,7 +186,7 @@ proc parseUri*(uri: string, result: var Uri) =
     i.inc # Skip ':'
 
   # Authority
-  if uri[i] == '/' and uri[i+1] == '/':
+  if i+1 < uri.len and uri[i] == '/' and uri[i+1] == '/':
     i.inc(2) # Skip //
     var authority = ""
     i.inc parseUntil(uri, authority, {'/', '?', '#'}, i)
@@ -197,13 +209,13 @@ proc removeDotSegments(path: string): string =
   let endsWithSlash = path[path.len-1] == '/'
   var i = 0
   var currentSegment = ""
-  while true:
+  while i < path.len:
     case path[i]
     of '/':
       collection.add(currentSegment)
       currentSegment = ""
     of '.':
-      if path[i+1] == '.' and path[i+2] == '/':
+      if i+2 < path.len and path[i+1] == '.' and path[i+2] == '/':
         if collection.len > 0:
           discard collection.pop()
           i.inc 3
@@ -212,13 +224,11 @@ proc removeDotSegments(path: string): string =
         i.inc 2
         continue
       currentSegment.add path[i]
-    of '\0':
-      if currentSegment != "":
-        collection.add currentSegment
-      break
     else:
       currentSegment.add path[i]
     i.inc
+  if currentSegment != "":
+    collection.add currentSegment
 
   result = collection.join("/")
   if endsWithSlash: result.add '/'
@@ -320,18 +330,18 @@ proc `/`*(x: Uri, path: string): Uri =
   result = x
 
   if result.path.len == 0:
-    if path[0] != '/':
+    if path.len == 0 or path[0] != '/':
       result.path = "/"
     result.path.add(path)
     return
 
-  if result.path[result.path.len-1] == '/':
-    if path[0] == '/':
+  if result.path.len > 0 and result.path[result.path.len-1] == '/':
+    if path.len > 0 and path[0] == '/':
       result.path.add(path[1 .. path.len-1])
     else:
       result.path.add(path)
   else:
-    if path[0] != '/':
+    if path.len == 0 or path[0] != '/':
       result.path.add '/'
     result.path.add(path)
 
@@ -373,7 +383,10 @@ when isMainModule:
     const test1 = "abc\L+def xyz"
     doAssert encodeUrl(test1) == "abc%0A%2Bdef+xyz"
     doAssert decodeUrl(encodeUrl(test1)) == test1
-    
+    doAssert encodeUrl(test1, false) == "abc%0A%2Bdef%20xyz"
+    doAssert decodeUrl(encodeUrl(test1, false), false) == test1
+    doAssert decodeUrl(encodeUrl(test1)) == test1
+
   block:
     let str = "http://localhost"
     let test = parseUri(str)
@@ -464,7 +477,7 @@ when isMainModule:
     doAssert test.hostname == "github.com"
     doAssert test.port == "dom96"
     doAssert test.path == "/packages"
-    
+
   block:
     let str = "file:///foo/bar/baz.txt"
     let test = parseUri(str)