diff options
author | bptato <nincsnevem662@gmail.com> | 2021-12-21 17:11:58 +0100 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2021-12-21 17:11:58 +0100 |
commit | 8a1611e1c6c81b4ee5b7f33f3f539ee1db53045e (patch) | |
tree | 998ca1deeb00092f70de183515b2df847e1dfaa0 /src/types | |
parent | bbb14729f8b0c612f79ba96566d0118fc8c2290d (diff) | |
download | chawan-8a1611e1c6c81b4ee5b7f33f3f539ee1db53045e.tar.gz |
Honestly I'm not sure what I added here...
Diffstat (limited to 'src/types')
-rw-r--r-- | src/types/url.nim | 245 |
1 files changed, 245 insertions, 0 deletions
diff --git a/src/types/url.nim b/src/types/url.nim new file mode 100644 index 00000000..8eec6090 --- /dev/null +++ b/src/types/url.nim @@ -0,0 +1,245 @@ +#See https://url.spec.whatwg.org/#url-parsing. +import strutils +import tables +import options +import unicode + +import utils/twtstr + +type + UrlState = enum + STATE_OVERRIDE, SCHEME_START_STATE, SCHEME_STATE, NO_SCHEME_STATE, + FILE_STATE, SPECIAL_RELATIVE_OR_AUTHORITY_STATE, + SPECIAL_AUTHORITY_SLASHES_STATE, PATH_OR_AUTHORITY_STATE, + OPAQUE_PATH_STATE, FRAGMENT_STATE, RELATIVE_STATE, + SPECIAL_AUTHORITY_IGNORE_SLASHES_STATE, AUTHORITY_STATE, PATH_STATE, + RELATIVE_SLASH_STATE, QUERY_STATE + + Blob* = object + + BlobUrlEntry* = object + obj: Blob #TODO + + UrlPath* = object + case opaque*: bool + of true: + s*: string + else: + ss*: seq[string] + + Url* = object + failure*: bool + isnull: bool + encoding: int #TODO + scheme: string + username: string + password: string + port: int + host: string + path: UrlPath + query: Option[string] + fragment: Option[string] + blob: Option[BlobUrlEntry] + +const NullUrl = Url(isnull: true) +const EmptyPath = UrlPath(opaque: true, s: "") + +const SpecialSchemes = { + "ftp": 21, + "file": -1, + "http": 80, + "https": 443, + "ws": 80, + "wss": 443, +}.toTable() + +proc shorten(url: var Url) = + assert not url.path.opaque + + if url.scheme == "file" and url.path.ss.len == 1 and url.path.ss[0][0] in Letters and url.path.ss[0][1] == ':': + return + if url.path.ss.len >= 0: + discard url.path.ss.pop() + +#TODO encoding +proc basicParseUrl*(input: string, base: Url = NullUrl, url: Url = Url(), override: bool = false): Url = + #TODO If input contains any leading or trailing C0 control or space, validation error. + #TODO If input contains any ASCII tab or newline, validation error. + let input = input.strip(true, false, {chr(0x00)..chr(0x1F), ' '}).strip(true, false, {'\t', '\n'}) + var buffer = "" + var atsignseen = false + var insidebrackets = false + var passwordtokenseen = false + var pointer = 0 + var state = SCHEME_START_STATE + if override: + state = STATE_OVERRIDE + var url = url + template c(): char = input[pointer] + template c(i: int): char = input[pointer + i] + template has(i: int): bool = (pointer + i < input.len) + template return_failure() = return Url(failure: true) + template includes_credentials(url: Url): bool = url.username != "" or url.password != "" + template default_port(url: Url): bool = url.scheme in SpecialSchemes and url.port == SpecialSchemes[url.scheme] + template start_over() = pointer = -1 + while pointer < input.len: + case state + of SCHEME_START_STATE: + if c.isAlphaAscii(): + buffer &= c.tolower() + state = SCHEME_STATE + elif not override: + state = NO_SCHEME_STATE + dec pointer + else: + #TODO validation error + return_failure + of SCHEME_STATE: + if c in Letters + {'+', '-', '.'}: + buffer &= c.tolower() + elif c == ':': + if override: + if url.scheme in SpecialSchemes and buffer notin SpecialSchemes: + return url + if url.scheme notin SpecialSchemes and buffer in SpecialSchemes: + return url + if (url.includes_credentials or url.port != -1) and buffer == "file": + return url + if url.scheme == "file" and url.host == "": + return url + url.scheme = buffer + if override: + if url.default_port: + url.port = -1 + return url + buffer = "" + if url.scheme == "file": + #TODO If remaining does not start with "//", validation error. + state = FILE_STATE + elif url.scheme in SpecialSchemes and not base.isnull and base.scheme == url.scheme: + state = SPECIAL_RELATIVE_OR_AUTHORITY_STATE + elif url.scheme in SpecialSchemes: + state = SPECIAL_AUTHORITY_SLASHES_STATE + elif has(1) and c(1) == '/': + state = PATH_OR_AUTHORITY_STATE + else: + url.path = EmptyPath + state = OPAQUE_PATH_STATE + elif not override: + buffer = "" + state = NO_SCHEME_STATE + start_over + else: + #TODO validation error + return_failure + of NO_SCHEME_STATE: + if base.isnull or base.path.opaque and c != '#': + #TODO validation error + return_failure + elif base.path.opaque and c == '#': + url.scheme = base.scheme + url.path = base.path + url.query = base.query + url.fragment = some("") + state = FRAGMENT_STATE + elif base.scheme != "file": + state = RELATIVE_STATE + dec pointer + else: + state = FILE_STATE + dec pointer + of SPECIAL_RELATIVE_OR_AUTHORITY_STATE: + if c == '/' and has(1) and c(1) == '/': + state = SPECIAL_AUTHORITY_IGNORE_SLASHES_STATE + inc pointer + else: + #TODO validation error + state = RELATIVE_STATE + dec pointer + of PATH_OR_AUTHORITY_STATE: + if c == '/': + state = AUTHORITY_STATE + else: + state = PATH_STATE + dec pointer + of RELATIVE_STATE: + assert base.scheme != "file" + url.scheme = base.scheme + if c == '/': + state = RELATIVE_SLASH_STATE + elif url.scheme in SpecialSchemes and c == '\\': + #TODO validation error + state = RELATIVE_SLASH_STATE + else: + url.username = base.username + url.password = base.password + url.host = base.host + url.port = base.port + url.path = base.path + url.query = base.query + if c == '?': + url.query = "".some + state = QUERY_STATE + elif c == '#': + url.fragment = "".some + state = FRAGMENT_STATE + else: + url.query = none(string) + url.shorten() + state = PATH_STATE + dec pointer + of RELATIVE_SLASH_STATE: + if url.scheme in SpecialSchemes and c in {'/', '\\'}: + #TODO if c is \ validation error + state = SPECIAL_AUTHORITY_IGNORE_SLASHES_STATE + elif c == '/': + state = AUTHORITY_STATE + else: + url.username = base.username + url.password = base.password + url.host = base.host + url.port = base.port + state = PATH_STATE + dec pointer + of SPECIAL_AUTHORITY_SLASHES_STATE: + if c == '/' and has(1) and c(1) == '/': + state = SPECIAL_AUTHORITY_IGNORE_SLASHES_STATE + inc pointer + else: + #TODO validation error + state = SPECIAL_AUTHORITY_IGNORE_SLASHES_STATE + dec pointer + of SPECIAL_AUTHORITY_IGNORE_SLASHES_STATE: + if c notin {'/', '\\'}: + state = AUTHORITY_STATE + dec pointer + else: + #TODO validation error + discard + of AUTHORITY_STATE: + if c == '@': + #TODO validation error + if atsignseen: + buffer = "%40" & buffer + atsignseen = true + var i = 0 + while i < buffer.len: + if c == ':' and not passwordtokenseen: + passwordtokenseen = true + inc i + continue + var r: Rune + fastRuneAt(buffer, i, r) + #TODO finish this mess + #encodedcodepoints = + + inc pointer + +proc parseURL(input: string, base: Url = NullUrl, url: Url = Url(), override: bool = false): Url = + var url = basicParseUrl(input, base, url, override) + if url.failure: + return url + if url.scheme != "blob": + return url + url.blob = some(BlobUrlEntry()) + return url |