summary refs log blame commit diff stats
path: root/lib/pure/uri.nim
blob: 7f553be1abc97fc11523235bc0d249ef5c06124c (plain) (tree)
1
2
3
4
5
6
7
8
9

 
                                  
                                           



                                                   
 
                                                               


                                                                       
                                                                     




















                                                         
                                 

                           






                                                                                                                 
 
                        

                                   
                                 
 
 
    
                        
 
               
                                         
                                                    
                 
                                                   
 






                                                             
                                                    




                                                               
                                                             


                                                              


                                                    
                                           



                                                                                                                         
                                                                           


                                             
                                                     

                                                                       

                      
                                   
 
                                                       

                                        
                                                                    
                                                                      
                                               
                                                                   
    
                                                                       
                                         

                  
                                           


                                                                                                            

                                                                         
                                            
 





                           
                                     




                        




                          
                                                                     
                            










                                                                                
                                           
                   
                                  











                                                                 



































                                                                                  
                                                         

                    
                    
                          

                     



                                           

                    





                                         
                          

                    






                                         
                                                          
 


                                                                      
                                                 
                                       
                         
 
                                   


                                                 
                                   


                                               
                      





                                                                   

                            
                                                                              

                                                
                                                       













                                                                              
                             





                      
                                              
                                                                


                                                              
                                                        





                                                          
                  



                                                   
                                                 
                                   



                                                


                                                                             
                                                         

                                                   
                           

                             

                     

             
                                                         


                                                        

                                       

                        



                           
                                  
                                 







                                                              


                       
                                              
                             



                                             
                     




                                    
                                                                  







                                  


                                

                                 



                                  
                                          









                                                    
                                               




                                                                                     

                                                                               
    
                  
                                                  






                                                                                 
 
                                             






























                                                                 
                                        
                                     

                  
                                                  
                   

                                                                           
                                         
                                          
                  
                          

                                     
                                  





                                                      

                                                                    
                                      
                                                                
    
                                                                                      
                                                                   

                  
                  
                                         






                                                               
            

                          
                                       

                         

          

                                                                   



                                            
                                       


                         
                                                            






                                                                      
                           
                                                                   


                                              


                        



                       





                            
                              



                                                 
       



                                        



                      

                                               
                      





                        
 









                                                                                                                   
#
#
#            Nim's Runtime Library
#        (c) Copyright 2015 Dominik Picheta
#
#    See the file "copying.txt", included in this
#    distribution, for details about the copyright.
#

## This module implements URI parsing as specified by RFC 3986.
##
## A Uniform Resource Identifier (URI) provides a simple and extensible
## means for identifying a resource. A URI can be further classified
## as a locator, a name, or both. The term "Uniform Resource Locator"
## (URL) refers to the subset of URIs.
##
## Basic usage
## ===========
##
## Combine URIs
## -------------
## .. code-block::
##    import uri
##    let host = parseUri("https://nim-lang.org")
##    let blog = "/blog.html"
##    let bloguri = host / blog
##    assert $host == "https://nim-lang.org"
##    assert $bloguri == "https://nim-lang.org/blog.html"
##
## Access URI item
## ---------------
## .. code-block::
##    import uri
##    let res = parseUri("sftp://127.0.0.1:4343")
##    if isAbsolute(res):
##      assert res.port == "4343"
##    else:
##      echo "Wrong format"
##
## Data URI Base64
## ---------------
##
## .. code-block::nim
##    doAssert getDataUri("Hello World", "text/plain") == "data:text/plain;charset=utf-8;base64,SGVsbG8gV29ybGQ="
##    doAssert getDataUri("Nim", "text/plain") == "data:text/plain;charset=utf-8;base64,Tmlt"

import std/private/since

import strutils, parseutils, base64
import std/private/decode_helpers


type
  Url* = distinct string

  Uri* = object
    scheme*, username*, password*: string
    hostname*, port*, path*, query*, anchor*: string
    opaque*: bool
    isIpv6: bool # not expose it for compatibility.

  UriParseError* = object of ValueError


proc uriParseError*(msg: string) {.noreturn.} =
  ## Raises a ``UriParseError`` exception with message `msg`.
  raise newException(UriParseError, msg)

func encodeUrl*(s: string, usePlus = true): string =
  ## Encodes a URL according to RFC3986.
  ##
  ## This means that characters in the set
  ## ``{'a'..'z', 'A'..'Z', '0'..'9', '-', '.', '_', '~'}`` are
  ## carried over to the result.
  ## All other characters are encoded as ``%xx`` where ``xx``
  ## denotes its hexadecimal value.
  ##
  ## As a special rule, when the value of ``usePlus`` is true,
  ## spaces are encoded as ``+`` instead of ``%20``.
  ##
  ## **See also:**
  ## * `decodeUrl func<#decodeUrl,string>`_
  runnableExamples:
    assert encodeUrl("https://nim-lang.org") == "https%3A%2F%2Fnim-lang.org"
    assert encodeUrl("https://nim-lang.org/this is a test") == "https%3A%2F%2Fnim-lang.org%2Fthis+is+a+test"
    assert encodeUrl("https://nim-lang.org/this is a test", false) == "https%3A%2F%2Fnim-lang.org%2Fthis%20is%20a%20test"
  result = newStringOfCap(s.len + s.len shr 2) # assume 12% non-alnum-chars
  let fromSpace = if usePlus: "+" else: "%20"
  for c in s:
    case c
    # https://tools.ietf.org/html/rfc3986#section-2.3
    of 'a'..'z', 'A'..'Z', '0'..'9', '-', '.', '_', '~': add(result, c)
    of ' ': add(result, fromSpace)
    else:
      add(result, '%')
      add(result, toHex(ord(c), 2))

func decodeUrl*(s: string, decodePlus = true): string =
  ## Decodes a URL according to RFC3986.
  ##
  ## This means that any ``%xx`` (where ``xx`` denotes a hexadecimal
  ## value) are converted to the character with ordinal number ``xx``,
  ## and every other character is carried over.
  ## If ``xx`` is not a valid hexadecimal value, it is left intact.
  ##
  ## As a special rule, when the value of ``decodePlus`` is true, ``+``
  ## characters are converted to a space.
  ##
  ## **See also:**
  ## * `encodeUrl func<#encodeUrl,string>`_
  runnableExamples:
    assert decodeUrl("https%3A%2F%2Fnim-lang.org") == "https://nim-lang.org"
    assert decodeUrl("https%3A%2F%2Fnim-lang.org%2Fthis+is+a+test") == "https://nim-lang.org/this is a test"
    assert decodeUrl("https%3A%2F%2Fnim-lang.org%2Fthis%20is%20a%20test",
        false) == "https://nim-lang.org/this is a test"
    assert decodeUrl("abc%xyz") == "abc%xyz"

  result = newString(s.len)
  var i = 0
  var j = 0
  while i < s.len:
    case s[i]
    of '%':
      result[j] = decodePercent(s, i)
    of '+':
      if decodePlus:
        result[j] = ' '
      else:
        result[j] = s[i]
    else: result[j] = s[i]
    inc(i)
    inc(j)
  setLen(result, j)

func encodeQuery*(query: openArray[(string, string)], usePlus = true,
    omitEq = true): string =
  ## Encodes a set of (key, value) parameters into a URL query string.
  ##
  ## Every (key, value) pair is URL-encoded and written as ``key=value``. If the
  ## value is an empty string then the ``=`` is omitted, unless ``omitEq`` is
  ## false.
  ## The pairs are joined together by a ``&`` character.
  ##
  ## The ``usePlus`` parameter is passed down to the `encodeUrl` function that
  ## is used for the URL encoding of the string values.
  ##
  ## **See also:**
  ## * `encodeUrl func<#encodeUrl,string>`_
  runnableExamples:
    assert encodeQuery({: }) == ""
    assert encodeQuery({"a": "1", "b": "2"}) == "a=1&b=2"
    assert encodeQuery({"a": "1", "b": ""}) == "a=1&b"
  for elem in query:
    # Encode the `key = value` pairs and separate them with a '&'
    if result.len > 0: result.add('&')
    let (key, val) = elem
    result.add(encodeUrl(key, usePlus))
    # Omit the '=' if the value string is empty
    if not omitEq or val.len > 0:
      result.add('=')
      result.add(encodeUrl(val, usePlus))

iterator decodeQuery*(data: string): tuple[key, value: TaintedString] =
  ## Reads and decodes query string ``data`` and yields the (key, value) pairs the
  ## data consists of.
  runnableExamples:
    import std/sugar
    let s = collect(newSeq):
      for k, v in decodeQuery("foo=1&bar=2"): (k, v)
    doAssert s == @[("foo", "1"), ("bar", "2")]

  proc parseData(data: string, i: int, field: var string): int =
    result = i
    while result < data.len:
      case data[result]
      of '%': add(field, decodePercent(data, result))
      of '+': add(field, ' ')
      of '=', '&': break
      else: add(field, data[result])
      inc(result)

  var i = 0
  var name = ""
  var value = ""
  # decode everything in one pass:
  while i < data.len:
    setLen(name, 0) # reuse memory
    i = parseData(data, i, name)
    setLen(value, 0) # reuse memory
    if i < data.len and data[i] == '=':
      inc(i) # skip '='
      i = parseData(data, i, value)
    yield (name.TaintedString, value.TaintedString)
    if i < data.len:
      if data[i] == '&': inc(i)
      else:
        uriParseError("'&' expected at index '$#' for '$#'" % [$i, data])

func parseAuthority(authority: string, result: var Uri) =
  var i = 0
  var inPort = false
  var inIPv6 = false
  while i < authority.len:
    case authority[i]
    of '@':
      swap result.password, result.port
      result.port.setLen(0)
      swap result.username, result.hostname
      result.hostname.setLen(0)
      inPort = false
    of ':':
      if inIPv6:
        result.hostname.add(authority[i])
      else:
        inPort = true
    of '[':
      inIPv6 = true
      result.isIpv6 = true
    of ']':
      inIPv6 = false
    else:
      if inPort:
        result.port.add(authority[i])
      else:
        result.hostname.add(authority[i])
    i.inc

func parsePath(uri: string, i: var int, result: var Uri) =

  i.inc parseUntil(uri, result.path, {'?', '#'}, i)

  # The 'mailto' scheme's PATH actually contains the hostname/username
  if cmpIgnoreCase(result.scheme, "mailto") == 0:
    parseAuthority(result.path, result)
    result.path.setLen(0)

  if i < uri.len and uri[i] == '?':
    i.inc # Skip '?'
    i.inc parseUntil(uri, result.query, {'#'}, i)

  if i < uri.len and uri[i] == '#':
    i.inc # Skip '#'
    i.inc parseUntil(uri, result.anchor, {}, i)

func initUri*(): Uri =
  ## Initializes a URI with ``scheme``, ``username``, ``password``,
  ## ``hostname``, ``port``, ``path``, ``query`` and ``anchor``.
  ##
  ## **See also:**
  ## * `Uri type <#Uri>`_ for available fields in the URI type
  runnableExamples:
    var uri2: Uri
    assert initUri() == uri2
  result = Uri(scheme: "", username: "", password: "", hostname: "", port: "",
                path: "", query: "", anchor: "")

func initUri*(isIpv6: bool): Uri {.since: (1, 3, 5).} =
  ## Initializes a URI with ``scheme``, ``username``, ``password``,
  ## ``hostname``, ``port``, ``path``, ``query``, ``anchor`` and ``isIpv6``.
  ##
  ## **See also:**
  ## * `Uri type <#Uri>`_ for available fields in the URI type
  runnableExamples:
    var uri2 = initUri(isIpv6 = true)
    uri2.scheme = "tcp"
    uri2.hostname = "2001:0db8:85a3:0000:0000:8a2e:0370:7334"
    uri2.port = "8080"
    assert $uri2 == "tcp://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:8080"
  result = Uri(scheme: "", username: "", password: "", hostname: "", port: "",
                path: "", query: "", anchor: "", isIpv6: isIpv6)

func resetUri(uri: var Uri) =
  for f in uri.fields:
    when f is string:
      f.setLen(0)
    else:
      f = false

func parseUri*(uri: string, result: var Uri) =
  ## Parses a URI. The `result` variable will be cleared before.
  ##
  ## **See also:**
  ## * `Uri type <#Uri>`_ for available fields in the URI type
  ## * `initUri func <#initUri>`_ for initializing a URI
  runnableExamples:
    var res = initUri()
    parseUri("https://nim-lang.org/docs/manual.html", res)
    assert res.scheme == "https"
    assert res.hostname == "nim-lang.org"
    assert res.path == "/docs/manual.html"
  resetUri(result)

  var i = 0

  # Check if this is a reference URI (relative URI)
  let doubleSlash = uri.len > 1 and uri[1] == '/'
  if i < uri.len and uri[i] == '/':
    # Make sure ``uri`` doesn't begin with '//'.
    if not doubleSlash:
      parsePath(uri, i, result)
      return

  # Scheme
  i.inc parseWhile(uri, result.scheme, Letters + Digits + {'+', '-', '.'}, i)
  if (i >= uri.len or uri[i] != ':') and not doubleSlash:
    # Assume this is a reference URI (relative URI)
    i = 0
    result.scheme.setLen(0)
    parsePath(uri, i, result)
    return
  if not doubleSlash:
    i.inc # Skip ':'

  # Authority
  if i+1 < uri.len and uri[i] == '/' and uri[i+1] == '/':
    i.inc(2) # Skip //
    var authority = ""
    i.inc parseUntil(uri, authority, {'/', '?', '#'}, i)
    if authority.len > 0:
      parseAuthority(authority, result)
  else:
    result.opaque = true

  # Path
  parsePath(uri, i, result)

func parseUri*(uri: string): Uri =
  ## Parses a URI and returns it.
  ##
  ## **See also:**
  ## * `Uri type <#Uri>`_ for available fields in the URI type
  runnableExamples:
    let res = parseUri("ftp://Username:Password@Hostname")
    assert res.username == "Username"
    assert res.password == "Password"
    assert res.scheme == "ftp"
  result = initUri()
  parseUri(uri, result)

func removeDotSegments(path: string): string =
  if path.len == 0: return ""
  var collection: seq[string] = @[]
  let endsWithSlash = path[path.len-1] == '/'
  var i = 0
  var currentSegment = ""
  while i < path.len:
    case path[i]
    of '/':
      collection.add(currentSegment)
      currentSegment = ""
    of '.':
      if i+2 < path.len and path[i+1] == '.' and path[i+2] == '/':
        if collection.len > 0:
          discard collection.pop()
          i.inc 3
          continue
      elif path[i+1] == '/':
        i.inc 2
        continue
      currentSegment.add path[i]
    else:
      currentSegment.add path[i]
    i.inc
  if currentSegment != "":
    collection.add currentSegment

  result = collection.join("/")
  if endsWithSlash: result.add '/'

func merge(base, reference: Uri): string =
  # http://tools.ietf.org/html/rfc3986#section-5.2.3
  if base.hostname != "" and base.path == "":
    '/' & reference.path
  else:
    let lastSegment = rfind(base.path, "/")
    if lastSegment == -1:
      reference.path
    else:
      base.path[0 .. lastSegment] & reference.path

func combine*(base: Uri, reference: Uri): Uri =
  ## Combines a base URI with a reference URI.
  ##
  ## This uses the algorithm specified in
  ## `section 5.2.2 of RFC 3986 <http://tools.ietf.org/html/rfc3986#section-5.2.2>`_.
  ##
  ## This means that the slashes inside the base URIs path as well as reference
  ## URIs path affect the resulting URI.
  ##
  ## **See also:**
  ## * `/ func <#/,Uri,string>`_ for building URIs
  runnableExamples:
    let foo = combine(parseUri("https://nim-lang.org/foo/bar"), parseUri("/baz"))
    assert foo.path == "/baz"
    let bar = combine(parseUri("https://nim-lang.org/foo/bar"), parseUri("baz"))
    assert bar.path == "/foo/baz"
    let qux = combine(parseUri("https://nim-lang.org/foo/bar/"), parseUri("baz"))
    assert qux.path == "/foo/bar/baz"

  template setAuthority(dest, src): untyped =
    dest.hostname = src.hostname
    dest.username = src.username
    dest.port = src.port
    dest.password = src.password

  result = initUri()
  if reference.scheme != base.scheme and reference.scheme != "":
    result = reference
    result.path = removeDotSegments(result.path)
  else:
    if reference.hostname != "":
      setAuthority(result, reference)
      result.path = removeDotSegments(reference.path)
      result.query = reference.query
    else:
      if reference.path == "":
        result.path = base.path
        if reference.query != "":
          result.query = reference.query
        else:
          result.query = base.query
      else:
        if reference.path.startsWith("/"):
          result.path = removeDotSegments(reference.path)
        else:
          result.path = removeDotSegments(merge(base, reference))
        result.query = reference.query
      setAuthority(result, base)
    result.scheme = base.scheme
  result.anchor = reference.anchor

func combine*(uris: varargs[Uri]): Uri =
  ## Combines multiple URIs together.
  ##
  ## **See also:**
  ## * `/ func <#/,Uri,string>`_ for building URIs
  runnableExamples:
    let foo = combine(parseUri("https://nim-lang.org/"), parseUri("docs/"),
        parseUri("manual.html"))
    assert foo.hostname == "nim-lang.org"
    assert foo.path == "/docs/manual.html"
  result = uris[0]
  for i in 1 ..< uris.len:
    result = combine(result, uris[i])

func isAbsolute*(uri: Uri): bool =
  ## Returns true if URI is absolute, false otherwise.
  runnableExamples:
    let foo = parseUri("https://nim-lang.org")
    assert isAbsolute(foo) == true
    let bar = parseUri("nim-lang")
    assert isAbsolute(bar) == false
  return uri.scheme != "" and (uri.hostname != "" or uri.path != "")

func `/`*(x: Uri, path: string): Uri =
  ## Concatenates the path specified to the specified URIs path.
  ##
  ## Contrary to the `combine func <#combine,Uri,Uri>`_ you do not have to worry about
  ## the slashes at the beginning and end of the path and URIs path
  ## respectively.
  ##
  ## **See also:**
  ## * `combine func <#combine,Uri,Uri>`_
  runnableExamples:
    let foo = parseUri("https://nim-lang.org/foo/bar") / "/baz"
    assert foo.path == "/foo/bar/baz"
    let bar = parseUri("https://nim-lang.org/foo/bar") / "baz"
    assert bar.path == "/foo/bar/baz"
    let qux = parseUri("https://nim-lang.org/foo/bar/") / "baz"
    assert qux.path == "/foo/bar/baz"
  result = x

  if result.path.len == 0:
    if path.len == 0 or path[0] != '/':
      result.path = "/"
    result.path.add(path)
    return

  if result.path.len > 0 and result.path[result.path.len-1] == '/':
    if path.len > 0 and path[0] == '/':
      result.path.add(path[1 .. path.len-1])
    else:
      result.path.add(path)
  else:
    if path.len == 0 or path[0] != '/':
      result.path.add '/'
    result.path.add(path)

func `?`*(u: Uri, query: openArray[(string, string)]): Uri =
  ## Concatenates the query parameters to the specified URI object.
  runnableExamples:
    let foo = parseUri("https://example.com") / "foo" ? {"bar": "qux"}
    assert $foo == "https://example.com/foo?bar=qux"
  result = u
  result.query = encodeQuery(query)

func `$`*(u: Uri): string =
  ## Returns the string representation of the specified URI object.
  runnableExamples:
    let foo = parseUri("https://nim-lang.org")
    assert $foo == "https://nim-lang.org"
  result = ""
  if u.scheme.len > 0:
    result.add(u.scheme)
    if u.opaque:
      result.add(":")
    else:
      result.add("://")
  if u.username.len > 0:
    result.add(u.username)
    if u.password.len > 0:
      result.add(":")
      result.add(u.password)
    result.add("@")
  if u.hostname.endsWith('/'):
    if u.isIpv6:
      result.add("[" & u.hostname[0 .. ^2] & "]")
    else:
      result.add(u.hostname[0 .. ^2])
  else:
    if u.isIpv6:
      result.add("[" & u.hostname & "]")
    else:
      result.add(u.hostname)
  if u.port.len > 0:
    result.add(":")
    result.add(u.port)
  if u.path.len > 0:
    if u.hostname.len > 0 and u.path[0] != '/':
      result.add('/')
    result.add(u.path)
  if u.query.len > 0:
    result.add("?")
    result.add(u.query)
  if u.anchor.len > 0:
    result.add("#")
    result.add(u.anchor)

proc getDataUri*(data, mime: string, encoding = "utf-8"): string {.since: (1, 3).} =
  ## Convenience proc for `base64.encode` returns a standard Base64 Data URI (RFC-2397)
  ##
  ## **See also:**
  ## * `mimetypes <mimetypes.html>`_ for `mime` argument
  ## * https://tools.ietf.org/html/rfc2397
  ## * https://en.wikipedia.org/wiki/Data_URI_scheme
  runnableExamples: static: doAssert getDataUri("Nim", "text/plain") == "data:text/plain;charset=utf-8;base64,Tmlt"
  assert encoding.len > 0 and mime.len > 0 # Must *not* be URL-Safe, see RFC-2397
  result = "data:" & mime & ";charset=" & encoding & ";base64," & base64.encode(data)