about summary refs log tree commit diff stats
path: root/src/types
diff options
context:
space:
mode:
authorbptato <nincsnevem662@gmail.com>2021-12-26 22:17:24 +0100
committerbptato <nincsnevem662@gmail.com>2021-12-26 22:17:24 +0100
commit647089a99e3c44c4115274a8822ca0a4dd947d67 (patch)
tree1fa85f4e12676ef8532ff2c9eaaa065a0bcd263a /src/types
parent9c688a75adcd647723a993f04cb964d62e7f05a4 (diff)
downloadchawan-647089a99e3c44c4115274a8822ca0a4dd947d67.tar.gz
Proper URL handling
Diffstat (limited to 'src/types')
-rw-r--r--src/types/url.nim719
1 files changed, 632 insertions, 87 deletions
diff --git a/src/types/url.nim b/src/types/url.nim
index 8eec6090..6dea5064 100644
--- a/src/types/url.nim
+++ b/src/types/url.nim
@@ -3,17 +3,18 @@ import strutils
 import tables
 import options
 import unicode
+import math
 
 import utils/twtstr
 
 type
   UrlState = enum
-    STATE_OVERRIDE, SCHEME_START_STATE, SCHEME_STATE, NO_SCHEME_STATE,
-    FILE_STATE, SPECIAL_RELATIVE_OR_AUTHORITY_STATE,
-    SPECIAL_AUTHORITY_SLASHES_STATE, PATH_OR_AUTHORITY_STATE,
-    OPAQUE_PATH_STATE, FRAGMENT_STATE, RELATIVE_STATE,
+    SCHEME_START_STATE, SCHEME_STATE, NO_SCHEME_STATE, FILE_STATE,
+    SPECIAL_RELATIVE_OR_AUTHORITY_STATE, SPECIAL_AUTHORITY_SLASHES_STATE,
+    PATH_OR_AUTHORITY_STATE, OPAQUE_PATH_STATE, FRAGMENT_STATE, RELATIVE_STATE,
     SPECIAL_AUTHORITY_IGNORE_SLASHES_STATE, AUTHORITY_STATE, PATH_STATE,
-    RELATIVE_SLASH_STATE, QUERY_STATE
+    RELATIVE_SLASH_STATE, QUERY_STATE, HOST_STATE, HOSTNAME_STATE,
+    FILE_HOST_STATE, PORT_STATE, PATH_START_STATE, FILE_SLASH_STATE
 
   Blob* = object
 
@@ -27,42 +28,267 @@ type
     else:
       ss*: seq[string]
 
+  Host = object
+    domain: string
+    ipv4: Option[uint32]
+    ipv6: Option[array[8, uint16]]
+    opaquehost: string
+
   Url* = object
-    failure*: bool
-    isnull: bool
     encoding: int #TODO
-    scheme: string
+    scheme*: string
     username: string
     password: string
-    port: int
-    host: string
-    path: UrlPath
+    port: Option[uint16]
+    host: Option[Host]
+    path*: UrlPath
     query: Option[string]
     fragment: Option[string]
     blob: Option[BlobUrlEntry]
 
-const NullUrl = Url(isnull: true)
 const EmptyPath = UrlPath(opaque: true, s: "")
+const EmptyHost = Host(domain: "").some
 
 const SpecialSchemes = {
-  "ftp": 21,
-  "file": -1,
-  "http": 80,
-  "https": 443,
-  "ws": 80,
-  "wss": 443,
+  "ftp": 21u16.some,
+  "file": none(uint16),
+  "http": 80u16.some,
+  "https": 443u16.some,
+  "ws": 80u16.some,
+  "wss": 443u16.some,
 }.toTable()
 
-proc shorten(url: var Url) =
+func parseIpv6(input: string): Option[array[8, uint16]] =
+  var pieceindex = 0
+  var compress = -1
+  var pointer = 0
+  var address: array[8, uint16]
+
+  template c(i = 0): char = input[pointer + i]
+  template has(i = 0): bool = (pointer + i < input.len)
+  template failure(): Option[array[8, uint16]] = none(array[8, uint16])
+  if c == ':':
+    if not has(1) or c(1) != ':':
+      #TODO validation error
+      return failure
+    pointer += 2
+    inc pieceindex
+    compress = pieceindex
+  while has:
+    if pieceindex == 8:
+      #TODO validation error
+      return failure
+    if c == ':':
+      if compress != -1:
+        #TODO validation error
+        return failure
+      inc pointer
+      inc pieceindex
+      compress = pieceindex
+      continue
+    var value: uint16 = 0
+    var length = 0
+    while length < 4 and has and c in HexDigits:
+      value = value * 0x10 + uint16(c.hexValue)
+      inc pointer
+      inc length
+    if has and c == '.':
+      if length == 0:
+        #TODO validation error
+        return failure
+      pointer -= length
+      if pieceindex > 6:
+        #TODO validation error
+        return failure
+      var numbersseen = 0
+      while has:
+        var ipv4piece = -1
+        if numbersseen > 0:
+          if c == '.' and numbersseen < 4:
+            inc pointer
+          else:
+            #TODO validation error
+            return failure
+        if not has or c notin Digits:
+          #TODO validation error
+          return failure
+        while has and c in Digits:
+          if ipv4piece == -1:
+            ipv4piece = c.decValue
+          elif ipv4piece == 0:
+            #TODO validation error
+            return failure
+          else:
+            ipv4piece = ipv4piece * 10 + c.decValue
+          if ipv4piece > 255:
+            #TODO validation error
+            return failure
+          inc pointer
+        address[pieceindex] = address[pieceindex] * 0x100 + uint16(ipv4piece)
+        inc numbersseen
+        if numbersseen == 2 or numbersseen == 4:
+          inc pieceindex
+      if numbersseen != 4:
+        #TODO validation error
+        return failure
+      break
+    elif has and c == ':':
+      inc pointer
+      if not has:
+        #TODO validation error
+        return failure
+      address[pieceindex] = value
+      inc pieceindex
+  if compress != -1:
+    var swaps = pieceindex - compress
+    pieceindex = 7
+    while pieceindex != 0 and swaps > 0:
+      let sp = address[pieceindex]
+      address[pieceindex] = address[compress + swaps - 1]
+      address[compress + swaps - 1] = sp
+      dec pieceindex
+      dec swaps
+  elif pieceindex != 8:
+    #TODO validation error
+    return failure
+  return address.some
+
+func parseIpv4Number(s: string): tuple[num: int, validationError: bool] =
+  if s == "": return (-1, true)
+  var input = s
+  var R = 10
+  var validationerror = false
+  if input.len >= 2 and input[0] == '0':
+    if input[1] in {'x', 'X'}:
+      validationerror = true
+      input = input.substr(2)
+      R = 16
+    else:
+      validationerror = true
+      input = input.substr(1)
+      R = 8
+  if input == "":
+    return (0, true)
+
+  var output = 0
+  try:
+    case R
+    of 8: output = parseOctInt(input)
+    of 10: output = parseInt(input)
+    of 16: output = parseHexInt(input)
+    else: discard
+  except ValueError: return (-1, true)
+  return (output, validationerror)
+
+func parseIpv4(input: string): Option[uint32] =
+  var validationerror = false
+  var parts = input.split('.')
+  if parts[^1] == "":
+    validationerror = true
+    if parts.len > 1:
+      discard parts.pop()
+  if parts.len > 4:
+    #TODO validation error
+    return none(uint32)
+  var numbers: seq[int]
+  for i in low(parts)..high(parts):
+    let part = parts[i]
+    let pp = parseIpv4Number(part)
+    if pp[0] < 0: 
+      #TODO validation error
+      return none(uint32)
+    if pp[0] > 255:
+      validationerror = true
+      if i != high(parts):
+        #TODO validation error
+        return none(uint32)
+    if pp[1]:
+      validationerror = true
+    numbers.add(pp[0])
+  #TODO validation error if validationerror
+  if numbers[^1] >= 256^(5-numbers.len):
+    #TODO validation error
+    return none(uint32)
+  var ipv4 = uint32(numbers[^1])
+  discard numbers.pop()
+  for i in 0..numbers.high:
+    let n = uint32(numbers[i])
+    ipv4 += n * (256u32 ^ (3 - i))
+  return ipv4.some
+
+const ForbiddenHostChars = {
+  chr(0x00), '\t', '\n', '\r', ' ', '#', '%', '/', ':', '<', '>', '?', '@',
+  '[', '\\', ']', '^', '|'
+}
+func opaqueParseHost(input: string): Option[Host] =
+  for c in input:
+    if c in ForbiddenHostChars:
+      return none(Host)
+    #TODO If input contains a code point that is not a URL code point and not
+    #U+0025 (%), validation error.
+    #TODO If input contains a U+0025 (%) and the two code points following it
+    #are not ASCII hex digits, validation error.
+  var o = ""
+  for c in input:
+    o.percentEncode(c, ControlPercentEncodeSet)
+
+func endsInNumber(input: string): bool =
+  var parts = input.split('.')
+  if parts[^1] == "":
+    if parts.len == 1: return false
+    discard parts.pop()
+  if parts.len == 0: return false
+  var last = parts[^1]
+  if last != "":
+    if last.len == 2 and last[0] in Digits and last[1].tolower() == 'x':
+      last = last.substr(2)
+    for c in last:
+      if c notin Digits:
+        return false
+    return true
+  return false
+
+func parseHost(input: string, isnotspecial = false): Option[Host] =
+  if input.len == 0: return
+  if input[0] == '[':
+    if input[^1] != ']':
+      #TODO validation error
+      return none(Host)
+    return Host(ipv6: parseIpv6(input.substr(1, input.high - 1))).some
+  if isnotspecial: #TODO ??
+    return opaqueParseHost(input)
+  let domain = percentDecode(input)
+  let asciiDomain = domain.domainToAscii()
+  if asciiDomain.isnone:
+    #TODO validation error
+    return none(Host)
+  for c in asciiDomain.get:
+    if c in ForbiddenHostChars:
+      #TODO validation error
+      return none(Host)
+  if asciiDomain.get.len > 0 and asciiDomain.get.endsInNumber():
+    let ipv4 = parseIpv4(asciiDomain.get)
+    return Host(ipv4: ipv4).some
+  return Host(domain: asciiDomain.get).some
+
+func isempty(host: Host): bool =
+  return host.domain == "" and host.ipv4.isnone and host.ipv6.isnone and host.opaquehost == ""
+
+proc shorten_path(url: var Url) {.inline.} =
   assert not url.path.opaque
 
   if url.scheme == "file" and url.path.ss.len == 1 and url.path.ss[0][0] in Letters and url.path.ss[0][1] == ':':
     return
-  if url.path.ss.len >= 0:
+  if url.path.ss.len > 0:
     discard url.path.ss.pop()
 
+proc append(path: var UrlPath, s: string) =
+  path.ss.add(s)
+
+template includes_credentials(url: Url): bool = url.username != "" or url.password != ""
+
 #TODO encoding
-proc basicParseUrl*(input: string, base: Url = NullUrl, url: Url = Url(), override: bool = false): Url =
+proc basicParseUrl*(input: string, base = none(Url), url: var Url = Url(), override: bool = false): Option[Url] =
   #TODO If input contains any leading or trailing C0 control or space, validation error.
   #TODO If input contains any ASCII tab or newline, validation error.
   let input = input.strip(true, false, {chr(0x00)..chr(0x1F), ' '}).strip(true, false, {'\t', '\n'})
@@ -72,20 +298,23 @@ proc basicParseUrl*(input: string, base: Url = NullUrl, url: Url = Url(), overri
   var passwordtokenseen = false
   var pointer = 0
   var state = SCHEME_START_STATE
-  if override:
-    state = STATE_OVERRIDE
-  var url = url
-  template c(): char = input[pointer]
-  template c(i: int): char = input[pointer + i]
-  template has(i: int): bool = (pointer + i < input.len)
-  template return_failure() = return Url(failure: true)
-  template includes_credentials(url: Url): bool = url.username != "" or url.password != ""
-  template default_port(url: Url): bool = url.scheme in SpecialSchemes and url.port == SpecialSchemes[url.scheme]
+
+  template c(i = 0): char = input[pointer + i]
+  template has(i = 0): bool = (pointer + i < input.len)
+  template is_special(url: Url): bool = url.scheme in SpecialSchemes
+  template default_port(url: Url): Option[uint16] = SpecialSchemes[url.scheme]
   template start_over() = pointer = -1
-  while pointer < input.len:
+  template starts_with_windows_drive_letter(s: string): bool = s.len >= 2 and s[0] in Letters and (s[1] == ':' or s[1] == '|')
+  template is_normalized_windows_drive_letter(s: string): bool = s.len == 2 and s[0] in Letters and (s[1] == ':')
+  template is_windows_drive_letter(s: string): bool = s.len == 2 and s[0] in Letters and (s[1] == ':' or s[1] == '|')
+  template is_double_dot_path_segment(s: string): bool = s == ".." or s.equalsIgnoreCase(".%2e") or s.equalsIgnoreCase("%2e.") or s.equalsIgnoreCase("%2e%2e")
+  template is_single_dot_path_segment(s: string): bool = s == "." or s.equalsIgnoreCase("%2e")
+  template is_empty(path: UrlPath): bool = path.ss.len == 0
+
+  while pointer <= input.len:
     case state
     of SCHEME_START_STATE:
-      if c.isAlphaAscii():
+      if has and c.isAlphaAscii():
         buffer &= c.tolower()
         state = SCHEME_STATE
       elif not override:
@@ -93,32 +322,32 @@ proc basicParseUrl*(input: string, base: Url = NullUrl, url: Url = Url(), overri
         dec pointer
       else:
         #TODO validation error
-        return_failure
+        return none(Url)
     of SCHEME_STATE:
-      if c in Letters + {'+', '-', '.'}:
+      if has and c in Letters + {'+', '-', '.'}:
         buffer &= c.tolower()
-      elif c == ':':
+      elif has and c == ':':
         if override:
           if url.scheme in SpecialSchemes and buffer notin SpecialSchemes:
-            return url
+            return url.some
           if url.scheme notin SpecialSchemes and buffer in SpecialSchemes:
-            return url
-          if (url.includes_credentials or url.port != -1) and buffer == "file":
-            return url
-          if url.scheme == "file" and url.host == "":
-            return url
+            return url.some
+          if (url.includes_credentials or url.port.issome) and buffer == "file":
+            return url.some
+          if url.scheme == "file" and url.host.get.isempty:
+            return url.some
         url.scheme = buffer
         if override:
-          if url.default_port:
-            url.port = -1
-          return url
+          if url.default_port == url.port:
+            url.port = none(uint16)
+          return url.some
         buffer = ""
         if url.scheme == "file":
           #TODO If remaining does not start with "//", validation error.
           state = FILE_STATE
-        elif url.scheme in SpecialSchemes and not base.isnull and base.scheme == url.scheme:
+        elif url.is_special and not base.isnone and base.get.scheme == url.scheme:
           state = SPECIAL_RELATIVE_OR_AUTHORITY_STATE
-        elif url.scheme in SpecialSchemes:
+        elif url.is_special:
           state = SPECIAL_AUTHORITY_SLASHES_STATE
         elif has(1) and c(1) == '/':
           state = PATH_OR_AUTHORITY_STATE
@@ -131,25 +360,25 @@ proc basicParseUrl*(input: string, base: Url = NullUrl, url: Url = Url(), overri
         start_over
       else:
         #TODO validation error
-        return_failure
+        return none(Url)
     of NO_SCHEME_STATE:
-      if base.isnull or base.path.opaque and c != '#':
+      if base.isnone or base.get.path.opaque and (not has or c != '#'):
         #TODO validation error
-        return_failure
-      elif base.path.opaque and c == '#':
-        url.scheme = base.scheme
-        url.path = base.path
-        url.query = base.query
-        url.fragment = some("")
+        return none(Url)
+      elif base.get.path.opaque and has and c == '#':
+        url.scheme = base.get.scheme
+        url.path = base.get.path
+        url.query = base.get.query
+        url.fragment = "".some
         state = FRAGMENT_STATE
-      elif base.scheme != "file":
+      elif base.get.scheme != "file":
         state = RELATIVE_STATE
         dec pointer
       else:
         state = FILE_STATE
         dec pointer
     of SPECIAL_RELATIVE_OR_AUTHORITY_STATE:
-      if c == '/' and has(1) and c(1) == '/':
+      if has(1) and c == '/' and c(1) == '/':
         state = SPECIAL_AUTHORITY_IGNORE_SLASHES_STATE
         inc pointer
       else:
@@ -163,46 +392,46 @@ proc basicParseUrl*(input: string, base: Url = NullUrl, url: Url = Url(), overri
         state = PATH_STATE
         dec pointer
     of RELATIVE_STATE:
-      assert base.scheme != "file"
-      url.scheme = base.scheme
-      if c == '/':
+      assert base.get.scheme != "file"
+      url.scheme = base.get.scheme
+      if has and c == '/':
         state = RELATIVE_SLASH_STATE
-      elif url.scheme in SpecialSchemes and c == '\\':
+      elif url.is_special and has and c == '\\':
         #TODO validation error
         state = RELATIVE_SLASH_STATE
       else:
-        url.username = base.username
-        url.password = base.password
-        url.host = base.host
-        url.port = base.port
-        url.path = base.path
-        url.query = base.query
-        if c == '?':
+        url.username = base.get.username
+        url.password = base.get.password
+        url.host = base.get.host
+        url.port = base.get.port
+        url.path = base.get.path
+        url.query = base.get.query
+        if has and c == '?':
           url.query = "".some
           state = QUERY_STATE
-        elif c == '#':
+        elif has and c == '#':
           url.fragment = "".some
           state = FRAGMENT_STATE
         else:
           url.query = none(string)
-          url.shorten()
+          url.shorten_path()
           state = PATH_STATE
           dec pointer
     of RELATIVE_SLASH_STATE:
-      if url.scheme in SpecialSchemes and c in {'/', '\\'}:
+      if url.is_special and has and c in {'/', '\\'}:
         #TODO if c is \ validation error
         state = SPECIAL_AUTHORITY_IGNORE_SLASHES_STATE
-      elif c == '/':
+      elif has and c == '/':
         state = AUTHORITY_STATE
       else:
-        url.username = base.username
-        url.password = base.password
-        url.host = base.host
-        url.port = base.port
+        url.username = base.get.username
+        url.password = base.get.password
+        url.host = base.get.host
+        url.port = base.get.port
         state = PATH_STATE
         dec pointer
     of SPECIAL_AUTHORITY_SLASHES_STATE:
-      if c == '/' and has(1) and c(1) == '/':
+      if has(1) and c == '/' and c(1) == '/':
         state = SPECIAL_AUTHORITY_IGNORE_SLASHES_STATE
         inc pointer
       else:
@@ -210,14 +439,14 @@ proc basicParseUrl*(input: string, base: Url = NullUrl, url: Url = Url(), overri
         state = SPECIAL_AUTHORITY_IGNORE_SLASHES_STATE
         dec pointer
     of SPECIAL_AUTHORITY_IGNORE_SLASHES_STATE:
-      if c notin {'/', '\\'}:
+      if has and c notin {'/', '\\'}:
         state = AUTHORITY_STATE
         dec pointer
       else:
         #TODO validation error
         discard
     of AUTHORITY_STATE:
-      if c == '@':
+      if has and c == '@':
         #TODO validation error
         if atsignseen:
           buffer = "%40" & buffer
@@ -228,18 +457,334 @@ proc basicParseUrl*(input: string, base: Url = NullUrl, url: Url = Url(), overri
               passwordtokenseen = true
               inc i
               continue
-            var r: Rune
-            fastRuneAt(buffer, i, r)
-            #TODO finish this mess
-            #encodedcodepoints = 
-
+            if passwordtokenseen:
+              url.password.percentEncode(c, UserInfoPercentEncodeSet)
+            else:
+              url.username.percentEncode(c, UserInfoPercentEncodeSet)
+          buffer = ""
+      elif not has or c in {'/', '?', '#'} or (url.is_special and c == '\\'):
+        if atsignseen and buffer == "":
+          #TODO validation error
+          return none(Url)
+        pointer -= buffer.len + 1
+        buffer = ""
+        state = HOST_STATE
+      else:
+        buffer &= c
+    of HOST_STATE, HOSTNAME_STATE:
+      if override and url.scheme == "file":
+        dec pointer
+        state = FILE_HOST_STATE
+      elif has and c == ':' and not insidebrackets:
+        if buffer == "":
+          #TODO validation error
+          return none(Url)
+        let host = parseHost(buffer)
+        if host.isnone:
+          return none(Url)
+        url.host = host
+        buffer = ""
+        state = PORT_STATE
+      elif (not has or c in {'/', '?', '#'}) or
+        (url.is_special and c == '\\'):
+        dec pointer
+        if url.is_special and buffer == "":
+          #TODO validation error
+          return none(Url)
+        elif override and buffer == "" and (url.includes_credentials or url.port.issome):
+          return
+        let host = parseHost(buffer)
+        if host.isnone:
+          return none(Url)
+        url.host = host
+        buffer = ""
+        state = PATH_STATE
+        if override:
+          return
+      else:
+        if c == '[':
+          insidebrackets = true
+        elif c == ']':
+          insidebrackets = false
+        buffer &= c
+    of PORT_STATE:
+      if has and c in Digits:
+        buffer &= c
+      elif (not has or c in {'/', '?', '#'}) or
+        (url.is_special and c == '\\') or override:
+        if buffer != "":
+          let i = parseInt32(buffer)
+          if i notin 0..65535:
+            #TODO validation error
+            return none(Url)
+          let port = cast[uint16](i).some
+          url.port = if url.default_port == port: none(uint16) else: port
+          buffer = ""
+        if override:
+          return
+        state = PATH_START_STATE
+        dec pointer
+      else:
+        #TODO validation error
+        return none(Url)
+    of FILE_STATE:
+      url.scheme = "file"
+      url.host = EmptyHost
+      if has and (c == '/' or c == '\\'):
+        #TODO if c == '\\' validation error
+        state = FILE_SLASH_STATE
+      elif base.issome and base.get.scheme == "file":
+        url.host = base.get.host
+        url.path = base.get.path
+        url.query = base.get.query
+        if has:
+          if c == '?':
+            url.query = "".some
+            state = QUERY_STATE
+          elif c == '#':
+            url.fragment = "".some
+            state = FRAGMENT_STATE
+          else:
+            url.query = none(string)
+            if not input.substr(pointer).starts_with_windows_drive_letter():
+              url.shorten_path()
+            else:
+              #TODO validation error
+              url.path.ss.setLen(0)
+            state = PATH_STATE
+            dec pointer
+      else:
+        state = PATH_STATE
+        dec pointer
+    of FILE_SLASH_STATE:
+      if has and (c == '/' or c == '\\'):
+        #TODO if c == '\\' validation error
+        state = FILE_HOST_STATE
+      else:
+        if base.issome and base.get.scheme == "file":
+          url.host = base.get.host
+          let bpath = base.get.path.ss
+          if not input.substr(pointer).starts_with_windows_drive_letter() and bpath.len > 0 and bpath[0].is_normalized_windows_drive_letter():
+            url.path.append(bpath[0])
+          state = PATH_STATE
+          dec pointer
+    of FILE_HOST_STATE:
+      if (not has or c in {'/', '?', '#'}):
+        dec pointer
+        if not override and buffer.is_windows_drive_letter:
+          #TODO validation error
+          state = PATH_STATE
+        elif buffer == "":
+          url.host = Host(domain: "").some
+          if override:
+            return
+          state = PATH_START_STATE
+        else:
+          var host = parseHost(buffer)
+          if host.isnone:
+            return none(Url)
+          if host.get.domain == "localhost":
+            host.get.domain = ""
+          url.host = host
+          if override:
+            return
+          buffer = ""
+          state = PATH_START_STATE
+      else:
+        buffer &= c
+    of PATH_START_STATE:
+      if url.is_special:
+        #TODO if c == '\\' validation error
+        state = PATH_STATE
+        if not has or c notin {'/', '\\'}:
+          dec pointer
+      elif not override and has and c == '?':
+        url.query = "".some
+        state = QUERY_STATE
+      elif not override and has and c == '#':
+        url.fragment = "".some
+        state = FRAGMENT_STATE
+      elif has:
+        state = PATH_STATE
+        if c != '/':
+          dec pointer
+      elif override and url.host.isnone:
+        url.path.append("")
+    of PATH_STATE:
+      if not has or c == '/' or (url.is_special and c == '\\') or
+          (not override and c in {'?', '#'}):
+        #TODO if url.is_special and c == '\\' validation error
+        let slash_cond = not has or (c != '/' and not (url.is_special and c == '\\'))
+        if buffer.is_double_dot_path_segment:
+          url.shorten_path()
+          if slash_cond:
+            url.path.append("")
+        elif buffer.is_single_dot_path_segment and slash_cond:
+          url.path.append("")
+        elif not buffer.is_single_dot_path_segment:
+          if url.scheme == "file" and url.path.is_empty and buffer.is_windows_drive_letter:
+            buffer[1] = ':'
+          url.path.append(buffer)
+        buffer = ""
+        if has:
+          if c == '?':
+            url.query = "".some
+            state = QUERY_STATE
+          elif c == '#':
+            url.fragment = "".some
+            state = FRAGMENT_STATE
+      else:
+        #TODO If c is not a URL code point and not U+0025 (%), validation error.
+        #TODO If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
+        buffer.percentEncode(c, PathPercentEncodeSet)
+    of OPAQUE_PATH_STATE:
+      if has:
+        if c == '?':
+          url.query = "".some
+          state = QUERY_STATE
+        elif c == '#':
+          url.fragment = "".some
+          state = FRAGMENT_STATE
+        else:
+          #TODO If c is not the EOF code point, not a URL code point, and not U+0025 (%), validation error.
+          #TODO If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
+          url.path.append(percentEncode(c, ControlPercentEncodeSet))
+    of QUERY_STATE:
+      #TODO encoding
+      if not has or (not override and c == '#'):
+        let querypercentencodeset = if url.is_special: SpecialQueryPercentEncodeSet else: QueryPercentEncodeSet
+        url.query.get.percentEncode(buffer, querypercentencodeset)
+        buffer = ""
+        if has and c == '#':
+          url.fragment = "".some
+          state = FRAGMENT_STATE
+      elif has:
+        #TODO If c is not a URL code point and not U+0025 (%), validation error.
+        #TOOD If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
+        buffer &= c
+    of FRAGMENT_STATE:
+      if has:
+        #TODO If c is not a URL code point and not U+0025 (%), validation error.
+        #TOOD If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
+        url.fragment.get.percentEncode(c, FragmentPercentEncodeSet)
     inc pointer
+  return url.some
 
-proc parseURL(input: string, base: Url = NullUrl, url: Url = Url(), override: bool = false): Url =
+func anchor*(url: Url): string =
+  if url.fragment.issome:
+    return url.fragment.get
+  return ""
+
+proc parseUrl*(input: string, base = none(Url), url: var Url, override: bool = false): Option[Url] =
   var url = basicParseUrl(input, base, url, override)
-  if url.failure:
+  if url.isnone:
+    return url
+  if url.get.scheme != "blob":
     return url
-  if url.scheme != "blob":
+  url.get.blob = BlobUrlEntry().some
+  return url
+
+proc parseUrl*(input: string, base = none(Url), override: bool = false): Option[Url] =
+  var url = Url().some
+  url = basicParseUrl(input, base, url.get, override)
+  if url.isnone:
+    return url
+  if url.get.scheme != "blob":
     return url
-  url.blob = some(BlobUrlEntry())
+  url.get.blob = BlobUrlEntry().some
   return url
+
+func serializeip(ipv4: uint32): string =
+  var n = ipv4
+  for i in 1..4:
+    result = $(n mod 256) & result
+    if i != 4:
+      result = '.' & result
+    n = n.floorDiv 256u32
+  assert n == 0
+
+func findZeroSeq(ipv6: array[8, uint16]): int =
+  var maxi = -1
+  var maxn = 0
+  var newi = -1
+  var newn = 1
+  for i in low(ipv6)..high(ipv6):
+    if ipv6[i] == 0:
+      inc newn
+      if newi == -1:
+        newi = i
+    else:
+      if newn > maxn:
+        maxn = newn
+        maxi = newi
+      newn = 0
+      newi = -1
+  if newn > maxn:
+    return newi
+  return maxi
+
+func serializeip(ipv6: array[8, uint16]): string =
+  let compress = findZeroSeq(ipv6)
+  var ignore0 = false
+  for i in low(ipv6)..high(ipv6):
+    if ignore0:
+      if ipv6[i] == 0: continue
+      else: ignore0 = false
+    if i == compress:
+      if i == 0:
+        result &= "::"
+      else:
+        result &= ':'
+      ignore0 = true
+      continue
+    result &= toHex(ipv6[i])
+    if i != high(ipv6):
+      result &= ':'
+
+func serialize(host: Host): string =
+  if host.ipv4.issome:
+    return serializeip(host.ipv4.get)
+  if host.ipv6.issome:
+    return "[" & serializeip(host.ipv6.get) & "]"
+  if host.opaquehost != "":
+    return host.opaquehost
+  return host.domain
+
+func serialize*(path: UrlPath): string {.inline.} =
+  if path.opaque:
+    return path.s
+  for s in path.ss:
+    result &= '/'
+    result &= s
+
+func serialize*(url: Url, excludefragment = false): string =
+  result = url.scheme & ':'
+  if url.host.issome:
+    result &= "//"
+    if url.includes_credentials:
+      result &= url.username
+      if url.password != "":
+        result &= ':' & url.password
+      result &= '@'
+    result &= url.host.get.serialize
+    if url.port.issome:
+      result &= ':' & $url.port.get
+  elif not url.path.opaque and url.path.ss.len > 1 and url.path.ss[0] == "":
+    result &= "/."
+  result &= url.path.serialize()
+  if url.query.issome:
+    result &= '?' & url.query.get
+  if not excludefragment and url.fragment.issome:
+    result &= '#' & url.fragment.get
+
+func serialize*(url: Option[Url], excludefragment = false): string =
+  if url.isnone:
+    return ""
+  return url.get.serialize()
+
+func equals*(a, b: Url, excludefragment = false): bool =
+  return a.serialize(excludefragment) == b.serialize(excludefragment)
+
+func `$`*(url: Url): string {.inline.} = url.serialize()
+
+func `$`*(path: UrlPath): string {.inline.} = path.serialize()