about summary refs log blame commit diff stats
path: root/src/types/url.nim
blob: f7cd550f75cda12eba5408f4c3020bc4b21adc92 (plain) (tree)
1
2
3
4
5
6
7
8
9
                                               



               
           
 
                    
                 


                   
                 


                                                                               
                                                                        

                                                                   
 
                        
                             







                      





                                  





                                          
                       
                   

                               


                        
                          

                              
                                            
 






                          


                             
                                              
                                       

                        





                       

           





























                                                                       
                                                    










































                                                                             






                                

                              

                               












































































































                                                                             































                                                                       
                                
 

























                                                                                              
                                        

                            
                                                                                                                                             
          
                         

                             
                                           



                  

                                                                                        
                                                                                                                   

                                                                             
 
              

                                                                      







                                                                                                    
                                     
                                

                             




                                                                              
                                      







                                                                                                                                                              

                          
                                  






                               
                        
                    
                                                          
                             
                            

                                                                          
                           
                                                                          




                                                                                

                           


                                          



                                                                        
                                                                                  
                                                     
                            


                                                 
                     








                                   
                        
                       
                                                                       
                              





                                                     
                              
                                     





                                           
                                             












                                                      


                                      
                                    
                                                


                                    






                                        

                             
                              



                                  
                            


                            
                                                     

                                                      
                            

                               



                                        


                                       
                                             






                                                      
                                        





                               
                          


                                 









                                                                   



































                                                                                         
                                














                                                   
                                              

                                  
                                             
                                                                                            

















































                                                                                                                                              
                                                 

























































































                                                                                                               
                                                                                                          



                                                                                
                                                                                                          
                                                                   
               
                 
 




                                

                                                             
                                                     


                              
              


                                    
                                                                                         




                                                     
              
                                    
            











































                                                  
                                 


















                                                   






                                                           













                                                               
                                                                                     




                                
                                                    




















                                                                            
                                                        

                                                              
 























                                                                    





                                             


































                                                                                  
                                                                                                                          















                                                            



































                                                                               

                                                             
                 
                                    

                                                                
                                  


                                                         
                       

                                                       
                                             
                                
                                                                 
                    
 
                                 






                                     

                              
                                         
                                                                   

            
                   
       







                                       
                 









                                           
 
                                             

                         


                                                               
 
                                                      



                                                                 
                                                      



                                                                 
                                         





                                                       
                                                       



                                                                       









                                                                           
                                         


                        
                                                       






                                                                         
                                             

                             
                                                               




                                                                             
                                           



                             
                                                           








                                                                        
                                         



                                
                                                       



                                             
                         

                                                                           
                                    

                                   
# See https://url.spec.whatwg.org/#url-parsing.
import strutils
import tables
import options
import unicode
import math

import js/javascript
import types/blob
import utils/twtstr

type
  URLState = enum
    SCHEME_START_STATE, SCHEME_STATE, NO_SCHEME_STATE, FILE_STATE,
    SPECIAL_RELATIVE_OR_AUTHORITY_STATE, SPECIAL_AUTHORITY_SLASHES_STATE,
    PATH_OR_AUTHORITY_STATE, OPAQUE_PATH_STATE, FRAGMENT_STATE, RELATIVE_STATE,
    SPECIAL_AUTHORITY_IGNORE_SLASHES_STATE, AUTHORITY_STATE, PATH_STATE,
    RELATIVE_SLASH_STATE, QUERY_STATE, HOST_STATE, HOSTNAME_STATE,
    FILE_HOST_STATE, PORT_STATE, PATH_START_STATE, FILE_SLASH_STATE

  BlobUrlEntry* = object
    obj: Blob #TODO blob urls

  UrlPath* = object
    case opaque*: bool
    of true:
      s*: string
    else:
      ss*: seq[string]

  Host = object
    domain: string
    ipv4: Option[uint32]
    ipv6: Option[array[8, uint16]]
    opaquehost: string

  URLSearchParams* = ref object
    list*: seq[tuple[name, value: string]]
    url: Option[URL]

  URL* = ref URLObj
  URLObj* = object
    encoding: int #TODO
    scheme*: string
    username* {.jsget.}: string
    password* {.jsget.}: string
    port: Option[uint16]
    host: Option[Host]
    path*: UrlPath
    query*: Option[string]
    fragment: Option[string]
    blob: Option[BlobUrlEntry]
    searchParams* {.jsget.}: URLSearchParams

  Origin* = Option[tuple[
    scheme: string,
    host: Host,
    port: Option[uint16],
    domain: Option[string]
  ]]

jsDestructor(URL)
jsDestructor(URLSearchParams)

const EmptyPath = UrlPath(opaque: true, s: "")
const EmptyHost = Host(domain: "").some

const SpecialSchemes = {
  "ftp": 21u16.some,
  "file": none(uint16),
  "http": 80u16.some,
  "https": 443u16.some,
  "ws": 80u16.some,
  "wss": 443u16.some,
}.toTable()

func parseIpv6(input: string): Option[array[8, uint16]] =
  var pieceindex = 0
  var compress = -1
  var pointer = 0
  var address: array[8, uint16]

  template c(i = 0): char = input[pointer + i]
  template has(i = 0): bool = (pointer + i < input.len)
  template failure(): Option[array[8, uint16]] = none(array[8, uint16])
  if c == ':':
    if not has(1) or c(1) != ':':
      #TODO validation error
      return failure
    pointer += 2
    inc pieceindex
    compress = pieceindex
  while has:
    if pieceindex == 8:
      #TODO validation error
      return failure
    if c == ':':
      if compress != -1:
        #TODO validation error
        return failure
      inc pointer
      inc pieceindex
      compress = pieceindex
      continue
    var value: uint16 = 0
    var length = 0
    while length < 4 and has and c in AsciiHexDigit:
      value = value * 0x10 + uint16(c.hexValue)
      inc pointer
      inc length
    if has and c == '.':
      if length == 0:
        #TODO validation error
        return failure
      pointer -= length
      if pieceindex > 6:
        #TODO validation error
        return failure
      var numbersseen = 0
      while has:
        var ipv4piece = -1
        if numbersseen > 0:
          if c == '.' and numbersseen < 4:
            inc pointer
          else:
            #TODO validation error
            return failure
        if not has or c notin Digits:
          #TODO validation error
          return failure
        while has and c in Digits:
          if ipv4piece == -1:
            ipv4piece = c.decValue
          elif ipv4piece == 0:
            #TODO validation error
            return failure
          else:
            ipv4piece = ipv4piece * 10 + c.decValue
          if ipv4piece > 255:
            #TODO validation error
            return failure
          inc pointer
        address[pieceindex] = address[pieceindex] * 0x100 + uint16(ipv4piece)
        inc numbersseen
        if numbersseen == 2 or numbersseen == 4:
          inc pieceindex
      if numbersseen != 4:
        #TODO validation error
        return failure
      break
    elif has:
      if c == ':':
        inc pointer
        if not has:
          #TODO validation error
          return failure
      else:
        #TODO validation error
        return failure
    address[pieceindex] = value
    inc pieceindex
  if compress != -1:
    var swaps = pieceindex - compress
    pieceindex = 7
    while pieceindex != 0 and swaps > 0:
      let sp = address[pieceindex]
      address[pieceindex] = address[compress + swaps - 1]
      address[compress + swaps - 1] = sp
      dec pieceindex
      dec swaps
  elif pieceindex != 8:
    #TODO validation error
    return failure
  return address.some

func parseIpv4Number(s: string): tuple[num: int, validationError: bool] =
  if s == "": return (-1, true)
  var input = s
  var R = 10
  var validationerror = false
  if input.len >= 2 and input[0] == '0':
    if input[1] in {'x', 'X'}:
      validationerror = true
      input = input.substr(2)
      R = 16
    else:
      validationerror = true
      input = input.substr(1)
      R = 8
  if input == "":
    return (0, true)

  var output = 0
  try:
    case R
    of 8: output = parseOctInt(input)
    of 10: output = parseInt(input)
    of 16: output = parseHexInt(input)
    else: discard
  except ValueError: return (-1, true)
  return (output, validationerror)

func parseIpv4(input: string): Option[uint32] =
  var validationerror = false
  var parts = input.split('.')
  if parts[^1] == "":
    validationerror = true
    if parts.len > 1:
      discard parts.pop()
  if parts.len > 4:
    #TODO validation error
    return none(uint32)
  var numbers: seq[int]
  for i in low(parts)..high(parts):
    let part = parts[i]
    let pp = parseIpv4Number(part)
    if pp[0] < 0: 
      #TODO validation error
      return none(uint32)
    if pp[0] > 255:
      validationerror = true
      if i != high(parts):
        #TODO validation error
        return none(uint32)
    if pp[1]:
      validationerror = true
    numbers.add(pp[0])
  #TODO validation error if validationerror
  if numbers[^1] >= 256^(5-numbers.len):
    #TODO validation error
    return none(uint32)
  var ipv4 = uint32(numbers[^1])
  discard numbers.pop()
  for i in 0..numbers.high:
    let n = uint32(numbers[i])
    ipv4 += n * (256u32 ^ (3 - i))
  return ipv4.some

const ForbiddenHostChars = {
  chr(0x00), '\t', '\n', '\r', ' ', '#', '%', '/', ':', '<', '>', '?', '@',
  '[', '\\', ']', '^', '|'
}
func opaqueParseHost(input: string): Option[Host] =
  for c in input:
    if c in ForbiddenHostChars:
      return none(Host)
    #TODO If input contains a code point that is not a URL code point and not
    #U+0025 (%), validation error.
    #TODO If input contains a U+0025 (%) and the two code points following it
    #are not ASCII hex digits, validation error.
  var o = ""
  for c in input:
    o.percentEncode(c, ControlPercentEncodeSet)

func endsInNumber(input: string): bool =
  var parts = input.split('.')
  if parts[^1] == "":
    if parts.len == 1: return false
    discard parts.pop()
  if parts.len == 0: return false
  var last = parts[^1]
  if last != "":
    if last.len == 2 and last[0] in Digits and last[1].tolower() == 'x':
      last = last.substr(2)
    for c in last:
      if c notin Digits:
        return false
    return true
  return false

func domainToAscii*(domain: string, bestrict = false): Option[string] =
  var needsprocessing = false
  for s in domain.split('.'):
    var i = 0
    var xn = 0
    while i < s.len:
      if s[i] notin Ascii:
        needsprocessing = true
        break
      case i
      of 0:
        if s[i] == 'x': inc xn
      of 1:
        if s[i] == 'n': inc xn
      of 2:
        if s[i] == '-': inc xn
      of 3:
        if s[i] == '-' and xn == 3:
          needsprocessing = true
          break
      else: discard
      inc i
    if needsprocessing:
      break
  if bestrict or needsprocessing:
    #Note: we don't implement STD3 separately, it's always true
    result = domain.unicodeToAscii(false, true, true, false, bestrict)
    if result.isnone or result.get == "":
      #TODO validation error
      return none(string)
    return result
  else:
    return domain.tolower().some

func parseHost(input: string, isnotspecial = false): Option[Host] =
  if input.len == 0: return
  if input[0] == '[':
    if input[^1] != ']':
      #TODO validation error
      return none(Host)
    return Host(ipv6: parseIpv6(input.substr(1, input.high - 1))).some
  if isnotspecial: #TODO ??
    return opaqueParseHost(input)
  let domain = percentDecode(input)
  let asciiDomain = domain.domainToAscii()
  if asciiDomain.isnone:
    #TODO validation error
    return none(Host)
  for c in asciiDomain.get:
    if c in ForbiddenHostChars:
      #TODO validation error
      return none(Host)
  if asciiDomain.get.len > 0 and asciiDomain.get.endsInNumber():
    let ipv4 = parseIpv4(asciiDomain.get)
    return Host(ipv4: ipv4).some
  return Host(domain: asciiDomain.get).some

func isempty(host: Host): bool =
  return host.domain == "" and host.ipv4.isnone and host.ipv6.isnone and host.opaquehost == ""

proc shorten_path(url: Url) {.inline.} =
  assert not url.path.opaque

  if url.scheme == "file" and url.path.ss.len == 1 and url.path.ss[0].len == 2 and url.path.ss[0][0] in Letters and url.path.ss[0][1] == ':':
    return
  if url.path.ss.len > 0:
    discard url.path.ss.pop()

proc append(path: var UrlPath, s: string) =
  if path.opaque:
    path.s &= s
  else:
    path.ss.add(s)

template includes_credentials(url: Url): bool = url.username != "" or url.password != ""
template is_windows_drive_letter(s: string): bool = s.len == 2 and s[0] in Letters and (s[1] == ':' or s[1] == '|')
template canHaveUsernamePasswordPort(url: URL): bool =
  url.host.issome and url.host.get.serialize() != "" and url.scheme != "file"

#TODO encoding
proc basicParseUrl*(input: string, base = none(URL), url: URL = URL(),
    stateOverride = none(URLState)): Option[Url] =
  #TODO If input contains any leading or trailing C0 control or space, validation error.
  #TODO If input contains any ASCII tab or newline, validation error.
  let input = input.strip(true, false, {chr(0x00)..chr(0x1F), ' '}).strip(true, false, {'\t', '\n'})
  var buffer = ""
  var atsignseen = false
  var insidebrackets = false
  var passwordtokenseen = false
  var pointer = 0
  let override = stateOverride.issome
  var state = SCHEME_START_STATE
  if override:
    state = stateOverride.get

  template c(i = 0): char = input[pointer + i]
  template has(i = 0): bool = (pointer + i < input.len)
  template is_special(url: Url): bool = url.scheme in SpecialSchemes
  template default_port(url: Url): Option[uint16] = SpecialSchemes[url.scheme]
  template start_over() = pointer = -1
  template starts_with_windows_drive_letter(s: string): bool = s.len >= 2 and s[0] in Letters and (s[1] == ':' or s[1] == '|')
  template is_normalized_windows_drive_letter(s: string): bool = s.len == 2 and s[0] in Letters and (s[1] == ':')
  template is_windows_drive_letter(s: string): bool = s.len == 2 and s[0] in Letters and (s[1] == ':' or s[1] == '|')
  template is_double_dot_path_segment(s: string): bool = s == ".." or s.equalsIgnoreCase(".%2e") or s.equalsIgnoreCase("%2e.") or s.equalsIgnoreCase("%2e%2e")
  template is_single_dot_path_segment(s: string): bool = s == "." or s.equalsIgnoreCase("%2e")
  template is_empty(path: UrlPath): bool = path.ss.len == 0

  while pointer <= input.len:
    case state
    of SCHEME_START_STATE:
      if has and c.isAlphaAscii():
        buffer &= c.tolower()
        state = SCHEME_STATE
      elif not override:
        state = NO_SCHEME_STATE
        dec pointer
      else:
        #TODO validation error
        return none(Url)
    of SCHEME_STATE:
      if has and c in AsciiAlphaNumeric + {'+', '-', '.'}:
        buffer &= c.tolower()
      elif has and c == ':':
        if override:
          if url.scheme in SpecialSchemes and buffer notin SpecialSchemes:
            return url.some
          if url.scheme notin SpecialSchemes and buffer in SpecialSchemes:
            return url.some
          if (url.includes_credentials or url.port.issome) and buffer == "file":
            return url.some
          if url.scheme == "file" and url.host.get.isempty:
            return url.some
        url.scheme = buffer
        if override:
          if url.default_port == url.port:
            url.port = none(uint16)
          return url.some
        buffer = ""
        if url.scheme == "file":
          #TODO If remaining does not start with "//", validation error.
          state = FILE_STATE
        elif url.is_special and not base.isnone and base.get.scheme == url.scheme:
          state = SPECIAL_RELATIVE_OR_AUTHORITY_STATE
        elif url.is_special:
          state = SPECIAL_AUTHORITY_SLASHES_STATE
        elif has(1) and c(1) == '/':
          state = PATH_OR_AUTHORITY_STATE
          inc pointer
        else:
          url.path = EmptyPath
          state = OPAQUE_PATH_STATE
      elif not override:
        buffer = ""
        state = NO_SCHEME_STATE
        start_over
      else:
        #TODO validation error
        return none(Url)
    of NO_SCHEME_STATE:
      if base.isnone or base.get.path.opaque and (not has or c != '#'):
        #TODO validation error
        return none(Url)
      elif base.get.path.opaque and has and c == '#':
        url.scheme = base.get.scheme
        url.path = base.get.path
        url.query = base.get.query
        url.fragment = "".some
        state = FRAGMENT_STATE
      elif base.get.scheme != "file":
        state = RELATIVE_STATE
        dec pointer
      else:
        state = FILE_STATE
        dec pointer
    of SPECIAL_RELATIVE_OR_AUTHORITY_STATE:
      if has(1) and c == '/' and c(1) == '/':
        state = SPECIAL_AUTHORITY_IGNORE_SLASHES_STATE
        inc pointer
      else:
        #TODO validation error
        state = RELATIVE_STATE
        dec pointer
    of PATH_OR_AUTHORITY_STATE:
      if c == '/':
        state = AUTHORITY_STATE
      else:
        state = PATH_STATE
        dec pointer
    of RELATIVE_STATE:
      assert base.get.scheme != "file"
      url.scheme = base.get.scheme
      if has and c == '/':
        state = RELATIVE_SLASH_STATE
      elif url.is_special and has and c == '\\':
        #TODO validation error
        state = RELATIVE_SLASH_STATE
      else:
        url.username = base.get.username
        url.password = base.get.password
        url.host = base.get.host
        url.port = base.get.port
        url.path = base.get.path
        url.query = base.get.query
        if has and c == '?':
          url.query = "".some
          state = QUERY_STATE
        elif has and c == '#':
          url.fragment = "".some
          state = FRAGMENT_STATE
        else:
          url.query = none(string)
          url.shorten_path()
          state = PATH_STATE
          dec pointer
    of RELATIVE_SLASH_STATE:
      if url.is_special and has and c in {'/', '\\'}:
        #TODO if c is \ validation error
        state = SPECIAL_AUTHORITY_IGNORE_SLASHES_STATE
      elif has and c == '/':
        state = AUTHORITY_STATE
      else:
        url.username = base.get.username
        url.password = base.get.password
        url.host = base.get.host
        url.port = base.get.port
        state = PATH_STATE
        dec pointer
    of SPECIAL_AUTHORITY_SLASHES_STATE:
      if has(1) and c == '/' and c(1) == '/':
        state = SPECIAL_AUTHORITY_IGNORE_SLASHES_STATE
        inc pointer
      else:
        #TODO validation error
        state = SPECIAL_AUTHORITY_IGNORE_SLASHES_STATE
        dec pointer
    of SPECIAL_AUTHORITY_IGNORE_SLASHES_STATE:
      if not has or c notin {'/', '\\'}:
        state = AUTHORITY_STATE
        dec pointer
      else:
        #TODO validation error
        discard
    of AUTHORITY_STATE:
      if has and c == '@':
        #TODO validation error
        if atsignseen:
          buffer = "%40" & buffer
        atsignseen = true
        for c in buffer:
          if c == ':' and not passwordtokenseen:
            passwordtokenseen = true
            continue
          if passwordtokenseen:
            url.password.percentEncode(c, UserInfoPercentEncodeSet)
          else:
            url.username.percentEncode(c, UserInfoPercentEncodeSet)
        buffer = ""
      elif not has or c in {'/', '?', '#'} or (url.is_special and c == '\\'):
        if atsignseen and buffer == "":
          #TODO validation error
          return none(Url)
        pointer -= buffer.len + 1
        buffer = ""
        state = HOST_STATE
      else:
        buffer &= c
    of HOST_STATE, HOSTNAME_STATE:
      if override and url.scheme == "file":
        dec pointer
        state = FILE_HOST_STATE
      elif has and c == ':' and not insidebrackets:
        if buffer == "":
          #TODO validation error
          return none(Url)
        let host = parseHost(buffer)
        if host.isnone:
          return none(Url)
        url.host = host
        buffer = ""
        state = PORT_STATE
      elif (not has or c in {'/', '?', '#'}) or
        (url.is_special and c == '\\'):
        dec pointer
        if url.is_special and buffer == "":
          #TODO validation error
          return none(Url)
        elif override and buffer == "" and (url.includes_credentials or url.port.issome):
          return
        let host = parseHost(buffer)
        if host.isnone:
          return none(Url)
        url.host = host
        buffer = ""
        state = PATH_START_STATE
        if override:
          return
      else:
        if c == '[':
          insidebrackets = true
        elif c == ']':
          insidebrackets = false
        buffer &= c
    of PORT_STATE:
      if has and c in Digits:
        buffer &= c
      elif (not has or c in {'/', '?', '#'}) or
        (url.is_special and c == '\\') or override:
        if buffer != "":
          let i = parseInt32(buffer)
          if i.isNone or i.get notin 0..65535:
            #TODO validation error
            return none(Url)
          let port = cast[uint16](i.get).some
          url.port = if url.is_special and url.default_port == port: none(uint16) else: port
          buffer = ""
        if override:
          return
        state = PATH_START_STATE
        dec pointer
      else:
        #TODO validation error
        return none(Url)
    of FILE_STATE:
      url.scheme = "file"
      url.host = EmptyHost
      if has and (c == '/' or c == '\\'):
        #TODO if c == '\\' validation error
        state = FILE_SLASH_STATE
      elif base.issome and base.get.scheme == "file":
        url.host = base.get.host
        url.path = base.get.path
        url.query = base.get.query
        if has:
          if c == '?':
            url.query = "".some
            state = QUERY_STATE
          elif c == '#':
            url.fragment = "".some
            state = FRAGMENT_STATE
          else:
            url.query = none(string)
            if not input.substr(pointer).starts_with_windows_drive_letter():
              url.shorten_path()
            else:
              #TODO validation error
              url.path.ss.setLen(0)
            state = PATH_STATE
            dec pointer
      else:
        state = PATH_STATE
        dec pointer
    of FILE_SLASH_STATE:
      if has and (c == '/' or c == '\\'):
        #TODO if c == '\\' validation error
        state = FILE_HOST_STATE
      else:
        if base.issome and base.get.scheme == "file":
          url.host = base.get.host
          let bpath = base.get.path.ss
          if not input.substr(pointer).starts_with_windows_drive_letter() and bpath.len > 0 and bpath[0].is_normalized_windows_drive_letter():
            url.path.append(bpath[0])
          state = PATH_STATE
          dec pointer
    of FILE_HOST_STATE:
      if (not has or c in {'/', '\\', '?', '#'}):
        dec pointer
        if not override and buffer.is_windows_drive_letter:
          #TODO validation error
          state = PATH_STATE
        elif buffer == "":
          url.host = Host(domain: "").some
          if override:
            return
          state = PATH_START_STATE
        else:
          var host = parseHost(buffer)
          if host.isnone:
            return none(Url)
          if host.get.domain == "localhost":
            host.get.domain = ""
          url.host = host
          if override:
            return
          buffer = ""
          state = PATH_START_STATE
      else:
        buffer &= c
    of PATH_START_STATE:
      if url.is_special:
        #TODO if c == '\\' validation error
        state = PATH_STATE
        if not has or c notin {'/', '\\'}:
          dec pointer
      elif not override and has and c == '?':
        url.query = "".some
        state = QUERY_STATE
      elif not override and has and c == '#':
        url.fragment = "".some
        state = FRAGMENT_STATE
      elif has:
        state = PATH_STATE
        if c != '/':
          dec pointer
      elif override and url.host.isnone:
        url.path.append("")
    of PATH_STATE:
      if not has or c == '/' or (url.is_special and c == '\\') or
          (not override and c in {'?', '#'}):
        #TODO if url.is_special and c == '\\' validation error
        let slash_cond = not has or (c != '/' and not (url.is_special and c == '\\'))
        if buffer.is_double_dot_path_segment:
          url.shorten_path()
          if slash_cond:
            url.path.append("")
        elif buffer.is_single_dot_path_segment and slash_cond:
          url.path.append("")
        elif not buffer.is_single_dot_path_segment:
          if url.scheme == "file" and url.path.is_empty and buffer.is_windows_drive_letter:
            buffer[1] = ':'
          url.path.append(buffer)
        buffer = ""
        if has:
          if c == '?':
            url.query = "".some
            state = QUERY_STATE
          elif c == '#':
            url.fragment = "".some
            state = FRAGMENT_STATE
      else:
        #TODO If c is not a URL code point and not U+0025 (%), validation error.
        #TODO If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
        buffer.percentEncode(c, PathPercentEncodeSet)
    of OPAQUE_PATH_STATE:
      if has:
        if c == '?':
          url.query = "".some
          state = QUERY_STATE
        elif c == '#':
          url.fragment = "".some
          state = FRAGMENT_STATE
        else:
          #TODO If c is not the EOF code point, not a URL code point, and not U+0025 (%), validation error.
          #TODO If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
          url.path.append(percentEncode(c, ControlPercentEncodeSet))
    of QUERY_STATE:
      #TODO encoding
      if not has or (not override and c == '#'):
        let querypercentencodeset = if url.is_special: SpecialQueryPercentEncodeSet else: QueryPercentEncodeSet
        url.query.get.percentEncode(buffer, querypercentencodeset)
        buffer = ""
        if has and c == '#':
          url.fragment = "".some
          state = FRAGMENT_STATE
      elif has:
        #TODO If c is not a URL code point and not U+0025 (%), validation error.
        #TODO If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
        buffer &= c
    of FRAGMENT_STATE:
      if has:
        #TODO If c is not a URL code point and not U+0025 (%), validation error.
        #TODO If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
        url.fragment.get.percentEncode(c, FragmentPercentEncodeSet)
    inc pointer
  return url.some

func anchor*(url: Url): string =
  if url.fragment.issome:
    return url.fragment.get
  return ""

proc parseURL*(input: string, base = none(Url), url: var URL,
    override = none(URLState)): Option[URL] =
  var url = basicParseUrl(input, base, url, override)
  if url.isnone:
    return url
  if url.get.scheme != "blob":
    return url
  url.get.blob = BlobUrlEntry().some
  return url

proc parseURL*(input: string, base = none(Url), override = none(URLState)): Option[Url] =
  var url = Url().some
  url = basicParseUrl(input, base, url.get, override)
  if url.isnone:
    return url
  if url.get.scheme != "blob":
    return url
  url.get.blob = BlobUrlEntry().some
  return url

func serializeip(ipv4: uint32): string =
  var n = ipv4
  for i in 1..4:
    result = $(n mod 256) & result
    if i != 4:
      result = '.' & result
    n = n.floorDiv 256u32
  assert n == 0

func findZeroSeq(ipv6: array[8, uint16]): int =
  var maxi = -1
  var maxn = 0
  var newi = -1
  var newn = 1
  for i in low(ipv6)..high(ipv6):
    if ipv6[i] == 0:
      inc newn
      if newi == -1:
        newi = i
    else:
      if newn > maxn:
        maxn = newn
        maxi = newi
      newn = 0
      newi = -1
  if newn > maxn:
    return newi
  return maxi

func serializeip(ipv6: array[8, uint16]): string =
  let compress = findZeroSeq(ipv6)
  var ignore0 = false
  for i in low(ipv6)..high(ipv6):
    if ignore0:
      if ipv6[i] == 0: continue
      else: ignore0 = false
    if i == compress:
      if i == 0:
        result &= "::"
      else:
        result &= ':'
      ignore0 = true
      continue
    result &= toHexLower(ipv6[i])
    if i != high(ipv6):
      result &= ':'

func serialize(host: Host): string =
  if host.ipv4.issome:
    return serializeip(host.ipv4.get)
  if host.ipv6.issome:
    return "[" & serializeip(host.ipv6.get) & "]"
  if host.opaquehost != "":
    return host.opaquehost
  return host.domain

func serialize*(path: UrlPath): string {.inline.} =
  if path.opaque:
    return path.s
  for s in path.ss:
    result &= '/'
    result &= s

func serialize_unicode*(path: UrlPath): string {.inline.} =
  if path.opaque:
    return percentDecode(path.s)
  for s in path.ss:
    result &= '/'
    result &= percentDecode(s)

func serialize_unicode_dos*(path: UrlPath): string {.inline.} =
  if path.opaque:
    return percentDecode(path.s)
  var i = 0
  if i < path.ss.len:
    if path.ss[i].is_windows_drive_letter:
      result &= path.ss[i]
      inc i
  while i < path.ss.len:
    let s = path.ss[i]
    result &= '\\'
    result &= percentDecode(s)
    inc i

func serialize*(url: Url, excludefragment = false, excludepassword = false): string =
  result = url.scheme & ':'
  if url.host.issome:
    result &= "//"
    if url.includes_credentials:
      result &= url.username
      if not excludepassword and url.password != "":
        result &= ':' & url.password
      result &= '@'
    result &= url.host.get.serialize
    if url.port.issome:
      result &= ':' & $url.port.get
  elif not url.path.opaque and url.path.ss.len > 1 and url.path.ss[0] == "":
    result &= "/."
  result &= url.path.serialize()
  if url.query.issome:
    result &= '?' & url.query.get
  if not excludefragment and url.fragment.issome:
    result &= '#' & url.fragment.get

func serialize*(url: Option[Url], excludefragment = false): string =
  if url.isnone:
    return ""
  return url.get.serialize()

func equals*(a, b: Url, excludefragment = false): bool =
  return a.serialize(excludefragment) == b.serialize(excludefragment)

func `$`*(url: URL): string {.jsfunc.} = url.serialize()

func `$`*(path: UrlPath): string {.inline.} = path.serialize()

func href(url: URL): string {.jsfget.} =
  return $url

func toJSON(url: URL): string {.jsfget.} =
  return $url

# from a to b
proc cloneInto(a, b: URL) =
  b[] = a[]
  if a.searchParams != nil: #TODO ideally this would never be false
    b.searchParams = URLSearchParams()
    b.searchParams[] = a.searchParams[]
    b.searchParams.url = some(b)

proc newURL*(url: URL): URL =
  new(result)
  url.cloneInto(result)

proc setHref(url: URL, s: string): Err[JSError] {.jsfset: "href".} =
  let purl = basicParseUrl(s)
  if purl.isNone:
    return err(newTypeError(s & " is not a valid URL"))
  purl.get.cloneInto(url)

func isIP*(url: URL): bool =
  if url.host.isNone:
    return false
  let host = url.host.get
  return host.ipv4.isSome or host.ipv6.isSome

#https://url.spec.whatwg.org/#concept-urlencoded-serializer
proc parseApplicationXWWWFormUrlEncoded(input: string): seq[(string, string)] =
  for s in input.split('&'):
    if s == "":
      continue
    var name = ""
    var value = ""
    for i in 0..<s.len:
      if s[i] == '=':
        name = s.substr(0, i - 1)
        value = s.substr(i + 1)
        break
    if name == "":
      name = s
    for i in 0..<name.len:
      if name[i] == '+':
        name[i] = ' '
    for i in 0..<value.len:
      if value[i] == '+':
        value[i] = ' '
    result.add((percentDecode(name), percentDecode(value)))

#https://url.spec.whatwg.org/#concept-urlencoded-serializer
proc serializeApplicationXWWWFormUrlEncoded*(kvs: seq[(string, string)]): string =
  for it in kvs:
    let (name, value) = it
    if result != "":
      result &= '&'
    result.percentEncode(name, ApplicationXWWWFormUrlEncodedSet, true)
    result &= '='
    result.percentEncode(value, ApplicationXWWWFormUrlEncodedSet, true)

proc initURLSearchParams(params: URLSearchParams, init: string) =
  params.list = parseApplicationXWWWFormUrlEncoded(init)

proc newURLSearchParams[T: seq[(string, string)]|Table[string, string]|string](init: T = ""): URLSearchParams {.jsctor.} =
  new(result)
  when T is seq[(string, string)]:
    result.list = init
  elif T is Table[string, string]:
    for k, v in init:
      result.list.add((k, v))
  elif T is string:
    let init = if init.len > 0 and init[0] == '?':
      init.substr(1)
    else:
      init
    result.initURLSearchParams(init)

proc `$`*(params: URLSearchParams): string {.jsfunc.} =
  return serializeApplicationXWWWFormUrlEncoded(params.list)

proc update(params: URLSearchParams) =
  if params.url.isnone:
    return
  let serializedQuery = $params
  if serializedQuery == "":
    params.url.get.query = none(string)
  else:
    params.url.get.query = some(serializedQuery)

proc append*(params: URLSearchParams, name: string, value: string) {.jsfunc.} =
  params.list.add((name, value))
  params.update()

proc delete*(params: URLSearchParams, name: string) {.jsfunc.} =
  for i in countdown(params.list.high, 0):
    if params.list[i][0] == name:
      params.list.delete(i)

proc get*(params: URLSearchParams, name: string): Option[string] {.jsfunc.} =
  for it in params.list:
    if it[0] == name:
      return some(it[1])

proc getAll*(params: URLSearchParams, name: string): seq[string] {.jsfunc.} =
  for it in params.list:
    if it[0] == name:
      result.add(it[1])

proc set*(params: URLSearchParams, name: string, value: string) {.jsfunc.} =
  var first = true
  for i in 0..params.list.high:
    if params.list[i][0] == name:
      if first:
        first = false
        params.list[i][1] = value

proc newURL*(s: string, base: Option[string] = none(string)):
    Result[URL, JSError] {.jsctor.} =
  if base.issome:
    let baseUrl = parseURL(base.get)
    if baseUrl.isNone:
      return err(newTypeError(base.get & " is not a valid URL"))
    let url = parseURL(s, baseUrl)
    if url.isNone:
      return err(newTypeError(s & " is not a valid URL"))
    return ok(url.get)
  let url = parseURL(s)
  if url.isNone:
    return err(newTypeError(s & " is not a valid URL"))
  url.get.searchParams = newURLSearchParams()
  url.get.searchParams.url = url
  url.get.searchParams.initURLSearchParams(url.get.query.get(""))
  return ok(url.get)

proc origin0*(url: URL): Origin =
  case url.scheme
  of "blob":
    if url.blob.issome:
      #TODO
      discard
    let pathURL = parseURL($url.path)
    if pathURL.isnone:
      return # opaque
    return pathURL.get.origin0
  of "ftp", "http", "https", "ws", "wss":
    return some((url.scheme, url.host.get, url.port, none(string)))
  of "file":
    #???
    return # opaque
  else:
    return # opaque

proc `==`*(a, b: Origin): bool =
  if a.isNone or b.isNone: return false
  return a.get == b.get

proc `$`*(origin: Origin): string =
  if origin.isNone:
    return "null"
  let origin = origin.get
  result = origin.scheme
  result &= "://"
  result &= origin.host.serialize()
  if origin.port.isSome:
    result &= ':'
    result &= $origin.port.get

proc origin*(url: URL): string {.jsfget.} =
  return $url.origin0

proc protocol*(url: URL): string {.jsfget.} =
  return url.scheme & ':'

proc setProtocol*(url: URL, s: string) {.jsfset: "protocol".} =
  discard basicParseUrl(s & ':', url = url,
    stateOverride = some(SCHEME_START_STATE))

proc username(url: URL, username: string) {.jsfset.} =
  if not url.canHaveUsernamePasswordPort:
    return
  url.username = username.percentEncode(UserInfoPercentEncodeSet)

proc password(url: URL, password: string) {.jsfset.} =
  if not url.canHaveUsernamePasswordPort:
    return
  url.password = password.percentEncode(UserInfoPercentEncodeSet)

proc host*(url: URL): string {.jsfget.} =
  if url.host.isnone:
    return ""
  if url.port.isnone:
    return url.host.get.serialize()
  return url.host.get.serialize() & ':' & $url.port.get

proc setHost*(url: URL, s: string) {.jsfset: "host".} =
  if url.path.opaque:
    return
  discard basicParseUrl(s, url = url, stateOverride = some(HOST_STATE))

proc hostname*(url: URL): string {.jsfget.} =
  if url.host.isNone:
    return ""
  return url.host.get.serialize()

proc setHostname*(url: URL, s: string) {.jsfset: "hostname".} =
  if url.path.opaque:
    return
  discard basicParseUrl(s, url = url, stateOverride = some(HOSTNAME_STATE))

proc port*(url: URL): string {.jsfget.} =
  if url.port.issome:
    return $url.port.get

proc setPort*(url: URL, s: string) {.jsfset: "port".} =
  if not url.canHaveUsernamePasswordPort:
    return
  if s == "":
    url.port = none(uint16)
  else:
    discard basicParseUrl(s, url = url, stateOverride = some(PORT_STATE))

proc pathname*(url: URL): string {.jsfget.} =
  return url.path.serialize()

proc setPathname*(url: URL, s: string) {.jsfset: "pathname".} =
  if url.path.opaque:
    return
  url.path.ss.setLen(0)
  discard basicParseUrl(s, url = url, stateOverride = some(PATH_START_STATE))

proc search*(url: URL): string {.jsfget.} =
  if url.query.get("") == "":
    return ""
  return "?" & url.query.get

proc setSearch*(url: URL, s: string) {.jsfset: "search".} =
  if s == "":
    url.query = none(string)
    url.searchParams.list.setLen(0)
    return
  let s = if s[0] == '?': s.substr(1) else: s
  url.query = some("")
  discard basicParseUrl(s, url = url, stateOverride = some(QUERY_STATE))
  url.searchParams.list = parseApplicationXWWWFormUrlEncoded(s)

proc hash*(url: URL): string {.jsfget.} =
  if url.fragment.get("") == "":
    return ""
  return '#' & url.fragment.get

proc setHash*(url: URL, s: string) {.jsfset: "hash".} =
  if s == "":
    url.fragment = none(string)
    return
  let s = if s[0] == '#': s.substr(1) else: s
  url.fragment = some("")
  discard basicParseUrl(s, url = url, stateOverride = some(FRAGMENT_STATE))

proc addURLModule*(ctx: JSContext) =
  ctx.registerType(URL)
  ctx.registerType(URLSearchParams)