summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--lib/pure/parsecfg.nim3
-rw-r--r--lib/pure/parseurl.nim4
-rw-r--r--lib/pure/uri.nim350
-rw-r--r--lib/system.nim10
-rw-r--r--web/news.txt4
5 files changed, 355 insertions, 16 deletions
diff --git a/lib/pure/parsecfg.nim b/lib/pure/parsecfg.nim
index 727a8efd8..1d61a967b 100644
--- a/lib/pure/parsecfg.nim
+++ b/lib/pure/parsecfg.nim
@@ -350,7 +350,8 @@ proc next*(c: var TCfgParser): TCfgEvent {.rtl, extern: "npc$1".} =
     rawGetTok(c, c.tok)
     if c.tok.kind == tkBracketRi: 
       rawGetTok(c, c.tok)
-    else: 
+    else:
+      reset(result)
       result.kind = cfgError
       result.msg = errorStr(c, "']' expected, but found: " & c.tok.literal)
   of tkInvalid, tkEquals, tkColon, tkBracketRi: 
diff --git a/lib/pure/parseurl.nim b/lib/pure/parseurl.nim
index 357d1df0f..67c6de905 100644
--- a/lib/pure/parseurl.nim
+++ b/lib/pure/parseurl.nim
@@ -20,7 +20,7 @@ type
     scheme, username, password, 
     hostname, port, path, query, anchor: string]
     
-proc parseUrl*(url: string): TUrl =
+proc parseUrl*(url: string): TUrl {.deprecated.} =
   var i = 0
 
   var scheme, username, password: string = ""
@@ -86,7 +86,7 @@ proc parseUrl*(url: string): TUrl =
     
   return (scheme, username, password, hostname, port, path, query, anchor)
 
-proc `$`*(u: TUrl): string =
+proc `$`*(u: TUrl): string {.deprecated.} =
   ## turns the URL `u` into its string representation.
   result = ""
   if u.scheme.len > 0:
diff --git a/lib/pure/uri.nim b/lib/pure/uri.nim
index ee1226a35..7ec823033 100644
--- a/lib/pure/uri.nim
+++ b/lib/pure/uri.nim
@@ -7,17 +7,24 @@
 #    distribution, for details about the copyright.
 #
 
-## **Note**: This module will be deprecated in the future and merged into a
-## new ``url`` module.
+## This module implements URI parsing as specified by RFC 3986.
 
-import strutils
+import strutils, parseutils
 type
   TUrl* = distinct string
 
-proc `$`*(url: TUrl): string = return string(url)
+  TUri* = object
+    scheme*, username*, password*: string 
+    hostname*, port*, path*, query*, anchor*: string
 
-proc `/`*(a, b: TUrl): TUrl =
+proc `$`*(url: TUrl): string {.deprecated.} =
+  ## **Deprecated since 0.9.6**: Use ``TUri`` instead.
+  return string(url)
+
+proc `/`*(a, b: TUrl): TUrl {.deprecated.} =
   ## Joins two URLs together, separating them with / if needed.
+  ##
+  ## **Deprecated since 0.9.6**: Use ``TUri`` instead.
   var urlS = $a
   var bS = $b
   if urlS == "": return b
@@ -29,9 +36,338 @@ proc `/`*(a, b: TUrl): TUrl =
     urlS.add(bs)
   result = TUrl(urlS)
 
-proc add*(url: var TUrl, a: TUrl) =
+proc add*(url: var TUrl, a: TUrl) {.deprecated.} =
   ## Appends url to url.
+  ##
+  ## **Deprecated since 0.9.6**: Use ``TUri`` instead.
   url = url / a
 
+proc parseAuthority(authority: string, result: var TUri) =
+  var i = 0
+  var inPort = false
+  while true:
+    case authority[i]
+    of '@':
+      result.password = result.port
+      result.port = ""
+      result.username = result.hostname
+      result.hostname = ""
+      inPort = false
+    of ':':
+      inPort = true
+    of '\0': break
+    else:
+      if inPort:
+        result.port.add(authority[i])
+      else:
+        result.hostname.add(authority[i])
+    i.inc
+
+proc parsePath(uri: string, i: var int, result: var TUri) =
+  
+  i.inc parseUntil(uri, result.path, {'?', '#'}, i)
+
+  # The 'mailto' scheme's PATH actually contains the hostname/username
+  if result.scheme.ToLower() == "mailto":
+    parseAuthority(result.path, result)
+    result.path = ""
+
+  if uri[i] == '?':
+    i.inc # Skip '?'
+    i.inc parseUntil(uri, result.query, {'#'}, i)
+
+  if uri[i] == '#':
+    i.inc # Skip '#'
+    i.inc parseUntil(uri, result.anchor, {}, i)
+
+proc initUri(): TUri =
+  result = TUri(scheme: "", username: "", password: "", hostname: "", port: "",
+                path: "", query: "", anchor: "")
+
+proc parseUri*(uri: string): TUri =
+  ## Parses a URI.
+  result = initUri()
+
+  var i = 0
+
+  # Check if this is a reference URI (relative URI)
+  if uri[i] == '/':
+    parsePath(uri, i, result)
+    return
+
+  # Scheme
+  i.inc parseWhile(uri, result.scheme, Letters + Digits + {'+', '-', '.'}, i)
+  if uri[i] != ':':
+    # Assume this is a reference URI (relative URI)
+    i = 0
+    result.scheme = ""
+    parsePath(uri, i, result)
+    return
+  i.inc # Skip ':'
+
+  # Authority
+  if uri[i] == '/' and uri[i+1] == '/':
+    i.inc(2) # Skip //
+    var authority = ""
+    i.inc parseUntil(uri, authority, {'/', '?', '#'}, i)
+    if authority == "":
+      raise newException(EInvalidValue, "Expected authority got nothing.")
+    parseAuthority(authority, result)
+
+  # Path
+  parsePath(uri, i, result)
+
+proc removeDotSegments(path: string): string =
+  var collection: seq[string] = @[]
+  let endsWithSlash = path[path.len-1] == '/'
+  var i = 0
+  var currentSegment = ""
+  while true:
+    case path[i]
+    of '/':
+      collection.add(currentSegment)
+      currentSegment = ""
+    of '.':
+      if path[i+1] == '.' and path[i+2] == '/':
+        if collection.len > 0:
+          discard collection.pop()
+          i.inc 3
+          continue
+      elif path[i+1] == '/':
+        i.inc 2
+        continue
+      currentSegment.add path[i]
+    of '\0':
+      if currentSegment != "":
+        collection.add currentSegment
+      break
+    else:
+      currentSegment.add path[i]
+    i.inc
+
+  result = collection.join("/")
+  if endsWithSlash: result.add '/'
+
+proc merge(base, reference: TUri): string =
+  # http://tools.ietf.org/html/rfc3986#section-5.2.3
+  if base.hostname != "" and base.path == "":
+    '/' & reference.path
+  else:
+    let lastSegment = rfind(base.path, "/")
+    if lastSegment == -1:
+      reference.path
+    else:
+      base.path[0 .. lastSegment] & reference.path
+
+proc combine*(base: TUri, reference: TUri): TUri =
+  ## Combines a base URI with a reference URI.
+  ##
+  ## This uses the algorithm specified in
+  ## `section 5.2.2 of RFC 3986 <http://tools.ietf.org/html/rfc3986#section-5.2.2>`_.
+  ##
+  ## This means that the slashes inside the base URI's path as well as reference
+  ## URI's path affect the resulting URI.
+  ##
+  ## For building URIs you may wish to use \`/\` instead.
+  ##
+  ## Examples:
+  ##
+  ## .. code-block:: nimrod
+  ##   let foo = combine(parseUri("http://example.com/foo/bar"), parseUri("/baz"))
+  ##   assert foo.path == "/baz"
+  ##
+  ##   let bar = combine(parseUri("http://example.com/foo/bar"), parseUri("baz"))
+  ##   assert foo.path == "/foo/baz"
+  ##
+  ##   let bar = combine(parseUri("http://example.com/foo/bar/"), parseUri("baz"))
+  ##   assert foo.path == "/foo/bar/baz"
+  
+  template setAuthority(dest, src: expr): stmt =
+    dest.hostname = src.hostname
+    dest.username = src.username
+    dest.port = src.port
+    dest.password = src.password
+
+  result = initUri()
+  if reference.scheme != base.scheme and reference.scheme != "":
+    result = reference
+    result.path = removeDotSegments(result.path)
+  else:
+    if reference.hostname != "":
+      setAuthority(result, reference)
+      result.path = removeDotSegments(reference.path)
+      result.query = reference.query
+    else:
+      if reference.path == "":
+        result.path = base.path
+        if reference.query != "":
+          result.query = reference.query
+        else:
+          result.query = base.query
+      else:
+        if reference.path.startsWith("/"):
+          result.path = removeDotSegments(reference.path)
+        else:
+          result.path = removeDotSegments(merge(base, reference))
+        result.query = reference.query
+      setAuthority(result, base)
+    result.scheme = base.scheme
+  result.anchor = reference.anchor
+
+proc combine*(uris: varargs[TUri]): TUri =
+  ## Combines multiple URIs together.
+  result = uris[0]
+  for i in 1 .. <uris.len:
+    result = combine(result, uris[i])
+
+proc `/`*(x: TUri, path: string): TUri =
+  ## Concatenates the path specified to the specified URI's path.
+  ##
+  ## Contrary to the ``combine`` procedure you do not have to worry about
+  ## the slashes at the beginning and end of the path and URI's path
+  ## respectively.
+  ##
+  ## Examples:
+  ##
+  ## .. code-block:: nimrod
+  ##   let foo = parseUri("http://example.com/foo/bar") / parseUri("/baz")
+  ##   assert foo.path == "/foo/bar/baz"
+  ##
+  ##   let bar = parseUri("http://example.com/foo/bar") / parseUri("baz")
+  ##   assert foo.path == "/foo/bar/baz"
+  ##
+  ##   let bar = parseUri("http://example.com/foo/bar/") / parseUri("baz")
+  ##   assert foo.path == "/foo/bar/baz"
+  result = x
+  if result.path[result.path.len-1] == '/':
+    if path[0] == '/':
+      result.path.add(path[1 .. path.len-1])
+    else:
+      result.path.add(path)
+  else:
+    if path[0] != '/':
+      result.path.add '/'
+    result.path.add(path)
+
+proc `$`*(u: TUri): string =
+  ## Returns the string representation of the specified URI object.
+  result = ""
+  if u.scheme.len > 0:
+    result.add(u.scheme)
+    result.add("://")
+  if u.username.len > 0:
+    result.add(u.username)
+    if u.password.len > 0:
+      result.add(":")
+      result.add(u.password)
+    result.add("@")
+  result.add(u.hostname)
+  if u.port.len > 0:
+    result.add(":")
+    result.add(u.port)
+  if u.path.len > 0:
+    if u.path[0] != '/': result.add("/")
+    result.add(u.path)
+  result.add(u.query)
+  result.add(u.anchor)
+
 when isMainModule:
-  assert($("http://".TUrl / "localhost:5000".TUrl) == "http://localhost:5000")
+  block:
+    let test = parseUri("http://localhost:8080/test")
+    doAssert test.scheme == "http"
+    doAssert test.port == "8080"
+    doAssert test.path == "/test"
+    doAssert test.hostname == "localhost"
+
+  block:
+    let test = parseUri("foo://username:password@example.com:8042/over/there" &
+                        "/index.dtb?type=animal&name=narwhal#nose")
+    doAssert test.scheme == "foo"
+    doAssert test.username == "username"
+    doAssert test.password == "password"
+    doAssert test.hostname == "example.com"
+    doAssert test.port == "8042"
+    doAssert test.path == "/over/there/index.dtb"
+    doAssert test.query == "type=animal&name=narwhal"
+    doAssert test.anchor == "nose"
+
+  block:
+    let test = parseUri("urn:example:animal:ferret:nose")
+    doAssert test.scheme == "urn"
+    doAssert test.path == "example:animal:ferret:nose"
+
+  block:
+    let test = parseUri("mailto:username@example.com?subject=Topic")
+    doAssert test.scheme == "mailto"
+    doAssert test.username == "username"
+    doAssert test.hostname == "example.com"
+    doAssert test.query == "subject=Topic"
+
+  block:
+    let test = parseUri("magnet:?xt=urn:sha1:72hsga62ba515sbd62&dn=foobar")
+    doAssert test.scheme == "magnet"
+    doAssert test.query == "xt=urn:sha1:72hsga62ba515sbd62&dn=foobar"
+
+  block:
+    let test = parseUri("/test/foo/bar?q=2#asdf")
+    doAssert test.scheme == ""
+    doAssert test.path == "/test/foo/bar"
+    doAssert test.query == "q=2"
+    doAssert test.anchor == "asdf"
+
+  block:
+    let test = parseUri("test/no/slash")
+    doAssert test.path == "test/no/slash"
+
+  # Remove dot segments tests
+  block:
+    doAssert removeDotSegments("/foo/bar/baz") == "/foo/bar/baz"
+
+  # Combine tests
+  block:
+    let concat = combine(parseUri("http://google.com/foo/bar/"), parseUri("baz"))
+    doAssert concat.path == "/foo/bar/baz"
+    doAssert concat.hostname == "google.com"
+    doAssert concat.scheme == "http"
+
+  block:
+    let concat = combine(parseUri("http://google.com/foo"), parseUri("/baz"))
+    doAssert concat.path == "/baz"
+    doAssert concat.hostname == "google.com"
+    doAssert concat.scheme == "http"
+
+  block:
+    let concat = combine(parseUri("http://google.com/foo/test"), parseUri("bar"))
+    doAssert concat.path == "/foo/bar"
+
+  block:
+    let concat = combine(parseUri("http://google.com/foo/test"), parseUri("/bar"))
+    doAssert concat.path == "/bar"
+
+  block:
+    let concat = combine(parseUri("http://google.com/foo/test"), parseUri("bar"))
+    doAssert concat.path == "/foo/bar"
+
+  block:
+    let concat = combine(parseUri("http://google.com/foo/test/"), parseUri("bar"))
+    doAssert concat.path == "/foo/test/bar"
+
+  block:
+    let concat = combine(parseUri("http://google.com/foo/test/"), parseUri("bar/"))
+    doAssert concat.path == "/foo/test/bar/"
+
+  block:
+    let concat = combine(parseUri("http://google.com/foo/test/"), parseUri("bar/"),
+                         parseUri("baz"))
+    doAssert concat.path == "/foo/test/bar/baz"
+
+  # `/` tests
+  block:
+    let test = parseUri("http://example.com/foo") / "bar/asd"
+    doAssert test.path == "/foo/bar/asd"
+
+  block:
+    let test = parseUri("http://example.com/foo/") / "/bar/asd"
+    doAssert test.path == "/foo/bar/asd"
+
+  
diff --git a/lib/system.nim b/lib/system.nim
index 2fb08563a..440c6474a 100644
--- a/lib/system.nim
+++ b/lib/system.nim
@@ -783,7 +783,7 @@ proc contains*[T](s: TSlice[T], value: T): bool {.noSideEffect, inline.} =
   result = s.a <= value and value <= s.b
 
 template `in` * (x, y: expr): expr {.immediate.} = contains(y, x)
-  ## Suger for contains
+  ## Sugar for contains
   ##
   ## .. code-block:: Nimrod
   ##   assert(1 in (1..3) == true)
@@ -1052,7 +1052,7 @@ proc add *[T](x: var seq[T], y: openArray[T]) {.noSideEffect.} =
   ## containers should also call their adding proc `add` for consistency.
   ## Generic code becomes much easier to write if the Nimrod naming scheme is
   ## respected.
-  var xl = x.len
+  let xl = x.len
   setLen(x, xl + y.len)
   for i in 0..high(y): x[xl+i] = y[i]
 
@@ -1066,20 +1066,20 @@ proc shallowCopy*[T](x: var T, y: T) {.noSideEffect, magic: "ShallowCopy".}
 proc del*[T](x: var seq[T], i: int) {.noSideEffect.} = 
   ## deletes the item at index `i` by putting ``x[high(x)]`` into position `i`.
   ## This is an O(1) operation.
-  var xl = x.len
+  let xl = x.len
   shallowCopy(x[i], x[xl-1])
   setLen(x, xl-1)
   
 proc delete*[T](x: var seq[T], i: int) {.noSideEffect.} = 
   ## deletes the item at index `i` by moving ``x[i+1..]`` by one position.
   ## This is an O(n) operation.
-  var xl = x.len
+  let xl = x.len
   for j in i..xl-2: shallowCopy(x[j], x[j+1]) 
   setLen(x, xl-1)
   
 proc insert*[T](x: var seq[T], item: T, i = 0) {.noSideEffect.} = 
   ## inserts `item` into `x` at position `i`.
-  var xl = x.len
+  let xl = x.len
   setLen(x, xl+1)
   var j = xl-1
   while j >= i:
diff --git a/web/news.txt b/web/news.txt
index a913c94ad..62a923dcd 100644
--- a/web/news.txt
+++ b/web/news.txt
@@ -18,6 +18,8 @@ News
   - ``pas2nim`` moved into its own repository and is now a Babel package.
   - ``system.$`` for floating point types now produces a human friendly string
     representation.
+  - ``uri.TUrl`` as well as the ``parseurl`` module are now deprecated in favour
+    of the new ``TUri`` type in the ``uri`` module.
 
   Library Additions
   -----------------
@@ -26,7 +28,7 @@ News
   - Added module ``threadpool``.
   - ``sequtils.distnct`` has been renamed to ``sequtils.deduplicate``.
   - Added ``algorithm.reversed``
-
+  - Added ``uri.combine`` and ``uri.parseUri``.
 
 2014-04-21 Version 0.9.4 released
 =================================