diff options
author | bptato <nincsnevem662@gmail.com> | 2024-07-24 22:41:07 +0200 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2024-07-24 22:41:28 +0200 |
commit | 5a16c29c8d68e8907a1ecc93ba1cf810efaefe1e (patch) | |
tree | f729bc809f3b20e38af794260959f77ffaf178f4 /src | |
parent | de2a70dc814658a8c72e7da6180ea5e16a8b985b (diff) | |
download | chawan-5a16c29c8d68e8907a1ecc93ba1cf810efaefe1e.tar.gz |
url: misc fixes & improvements
* fix various parsing bugs * rewrite state machine * other small optimizations
Diffstat (limited to 'src')
-rw-r--r-- | src/types/url.nim | 1032 | ||||
-rw-r--r-- | src/utils/twtstr.nim | 38 |
2 files changed, 580 insertions, 490 deletions
diff --git a/src/types/url.nim b/src/types/url.nim index f6e28d10..fc0d49d0 100644 --- a/src/types/url.nim +++ b/src/types/url.nim @@ -19,11 +19,8 @@ include res/map/idna_gen type URLState = enum - usSchemeStart, usScheme, usNoScheme, usFile, usSpecialRelativeOrAuthority, - usSpecialAuthoritySlashes, usPathOrAuthority, usOpaquePath, usFragment, - usRelative, usSpecialAuthorityIgnoreSlashes, usAuthority, usPath, - usRelativePath, usQuery, usHost, usHostname, usFileHost, usPort, - usPathStart, usFileSlash + usFail, usDone, usSchemeStart, usNoScheme, usFile, usFragment, usAuthority, + usPath, usQuery, usHost, usHostname, usPort, usPathStart BlobURLEntry* = object obj: Blob #TODO blob urls @@ -35,24 +32,32 @@ type else: ss*: seq[string] + HostType = enum + htNone, htDomain, htIpv4, htIpv6, htOpaque + Host = object - domain: string - ipv4: Option[uint32] - ipv6: Option[array[8, uint16]] - opaquehost: string + case t: HostType + of htNone: + discard + of htDomain: + domain: string + of htIpv4: + ipv4: uint32 + of htIpv6: + ipv6: array[8, uint16] + of htOpaque: + opaque: string URLSearchParams* = ref object list*: seq[tuple[name, value: string]] url: Option[URL] - URL* = ref URLObj - URLObj* = object - encoding: int #TODO + URL* = ref object scheme*: string username* {.jsget.}: string password* {.jsget.}: string port: Option[uint16] - host: Option[Host] + host: Host path*: URLPath query*: Option[string] fragment: Option[string] @@ -79,18 +84,18 @@ jsDestructor(URL) jsDestructor(URLSearchParams) const EmptyPath = URLPath(opaque: true, s: "") -const EmptyHost = Host(domain: "").some +const EmptyHost = Host(t: htDomain, domain: "") const SpecialSchemes = { - "ftp": 21u16.some, + "ftp": some(21u16), "file": none(uint16), - "http": 80u16.some, - "https": 443u16.some, - "ws": 80u16.some, - "wss": 443u16.some, + "http": some(80u16), + "https": some(443u16), + "ws": some(80u16), + "wss": some(443u16), }.toTable() -func parseIpv6(input: string): Option[array[8, uint16]] = +func parseIpv6(input: openArray[char]): Option[array[8, uint16]] = var pieceindex = 0 var compress = -1 var pointer = 0 @@ -217,19 +222,20 @@ func parseIpv4(input: string): Option[uint32] = for i in 0 ..< numbers.high: let n = uint32(numbers[i]) ipv4 += n * (1u32 shl ((3 - i) * 8)) - return ipv4.some + return some(ipv4) const ForbiddenHostChars = { - char(0x00), '\t', '\n', '\r', ' ', '#', '%', '/', ':', '<', '>', '?', '@', - '[', '\\', ']', '^', '|' + char(0x00), '\t', '\n', '\r', ' ', '#', '/', ':', '<', '>', '?', '@', '[', + '\\', ']', '^', '|' } -func opaqueParseHost(input: string): Option[Host] = +const ForbiddenDomainChars = ForbiddenHostChars + {'%'} +func opaqueParseHost(input: string): Host = var o = "" for c in input: if c in ForbiddenHostChars: - return none(Host) + return Host(t: htNone) o.percentEncode(c, ControlPercentEncodeSet) - return some(Host(opaquehost: o)) + return Host(t: htOpaque, opaque: o) func endsInNumber(input: string): bool = if input.len == 0: @@ -288,18 +294,18 @@ func getIdnaMapped(r: Rune): string = let n = MappedMapHigh.searchInMap(i) return $MappedMapHigh[n].mapped -func processIdna(str: string; beStrict: bool): Option[string] = +func processIdna(str: string; beStrict: bool): string = # CheckHyphens = false # CheckBidi = true # CheckJoiners = true # UseSTD3ASCIIRules = beStrict (but STD3 is not implemented) # Transitional_Processing = false # VerifyDnsLength = beStrict - var mapped: seq[Rune] + var mapped: seq[Rune] = @[] for r in str.runes(): let status = getIdnaTableStatus(r) case status - of itsDisallowed: return none(string) #error + of itsDisallowed: return "" #error of itsIgnored: discard of itsMapped: mapped &= getIdnaMapped(r).toRunes() of itsDeviation: mapped &= r @@ -311,7 +317,7 @@ func processIdna(str: string; beStrict: bool): Option[string] = cr_init(addr cr, nil, passRealloc) let r = unicode_general_category(addr cr, "Mark") assert r == 0 - var labels: seq[string] + var labels = "" for label in ($mapped).split('.'): if label.startsWith("xn--"): try: @@ -319,93 +325,95 @@ func processIdna(str: string; beStrict: bool): Option[string] = let x0 = s.toRunes() let x1 = normalize(x0) if x0 != x1: - return none(string) #error + return "" #error # CheckHyphens is false if x0.len > 0: let cps = cast[ptr UncheckedArray[u32pair]](cr.points) let c = uint32(x0[0]) let L = cr.len div 2 - 1 if cps.toOpenArray(0, L).binarySearch(c, cmpRange) != -1: - return none(string) #error + return "" #error for r in x0: if r == Rune('.'): - return none(string) #error + return "" #error let status = getIdnaTableStatus(r) if status in {itsDisallowed, itsIgnored, itsMapped}: - return none(string) #error + return "" #error #TODO check joiners #TODO check bidi - labels.add(s) + if labels.len > 0: + labels &= '.' + labels &= s except PunyError: - return none(string) #error + return "" #error else: - labels.add(label) + if labels.len > 0: + labels &= '.' + labels &= label cr_free(addr cr) - return some(labels.join('.')) + return labels -func unicodeToAscii(s: string; beStrict: bool): Option[string] = +func unicodeToAscii(s: string; beStrict: bool): string = let processed = s.processIdna(beStrict) - if processed.isNone: - return none(string) #error - var labels: seq[string] + var labels = "" var all = 0 - for label in processed.get.split('.'): + for label in processed.split('.'): + var s = "" if AllChars - Ascii in s: try: - let converted = "xn--" & punycode.encode(label) - labels.add(converted) + s = "xn--" & punycode.encode(label) except PunyError: - return none(string) #error + return "" #error else: - labels.add(label) + s = label if beStrict: # VerifyDnsLength - let rl = labels[^1].runeLen() + let rl = s.runeLen() if rl notin 1..63: - return none(string) + return "" all += rl + if labels.len > 0: + labels &= '.' + labels &= s if beStrict: # VerifyDnsLength if all notin 1..253: - return none(string) #error - return some(labels.join('.')) + return "" #error + return labels -func domainToAscii(domain: string; bestrict = false): Option[string] = +func domainToAscii(domain: string; bestrict = false): string = var needsprocessing = false for s in domain.split('.'): if s.startsWith("xn--") or AllChars - Ascii in s: needsprocessing = true break if bestrict or needsprocessing: - #Note: we don't implement STD3 separately, it's always true - let res = domain.unicodeToAscii(bestrict) - if res.isNone or res.get == "": - return none(string) - return res - else: - return some(domain.toLowerAscii()) + # Note: we don't implement STD3 separately, it's always true + return domain.unicodeToAscii(bestrict) + return domain.toLowerAscii() -func parseHost(input: string; special: bool): Option[Host] = - if input.len == 0: return +func parseHost(input: string; special: bool): Host = + if input.len == 0: + return Host(t: htNone) if input[0] == '[': if input[^1] != ']': - return none(Host) - return some(Host(ipv6: parseIpv6(input.substr(1, input.high - 1)))) + return Host(t: htNone) + let ipv6 = parseIpv6(input.toOpenArray(1, input.high - 1)) + if ipv6.isNone: + return Host(t: htNone) + return Host( + t: htIpv6, + ipv6: ipv6.get + ) if not special: return opaqueParseHost(input) let domain = percentDecode(input) let asciiDomain = domain.domainToAscii() - if asciiDomain.isNone: - return none(Host) - if ForbiddenHostChars in asciiDomain.get: - return none(Host) - if asciiDomain.get.len > 0 and asciiDomain.get.endsInNumber(): - let ipv4 = parseIpv4(asciiDomain.get) + if asciiDomain == "" or ForbiddenDomainChars in asciiDomain: + return Host(t: htNone) + if asciiDomain.endsInNumber(): + let ipv4 = parseIpv4(asciiDomain) if ipv4.isSome: - return some(Host(ipv4: ipv4)) - return some(Host(domain: asciiDomain.get)) - -func isempty(host: Host): bool = - return host.domain == "" and host.ipv4.isNone and host.ipv6.isNone and - host.opaquehost == "" + return Host(t: htIpv4, ipv4: ipv4.get) + return Host(t: htDomain, domain: asciiDomain) proc shortenPath(url: URL) {.inline.} = assert not url.path.opaque @@ -422,393 +430,482 @@ proc append(path: var URLPath; s: string) = else: path.ss.add(s) -template includes_credentials(url: URL): bool = - url.username != "" or url.password != "" +func includesCredentials(url: URL): bool = + return url.username != "" or url.password != "" template is_windows_drive_letter(s: string): bool = s.len == 2 and s[0] in AsciiAlpha and (s[1] == ':' or s[1] == '|') template canHaveUsernamePasswordPort(url: URL): bool = - url.host.isSome and url.host.get.serialize() != "" and url.scheme != "file" + url.host.serialize() != "" and url.scheme != "file" + +proc parseOpaquePath(input: openArray[char]; pointer: var int; url: URL): + URLState = + while pointer < input.len: + let c = input[pointer] + if c == '?': + url.query = some("") + inc pointer + return usQuery + elif c == '#': + url.fragment = some("") + inc pointer + return usFragment + else: + url.path.s.percentEncode(c, ControlPercentEncodeSet) + inc pointer + return usDone -#TODO encoding -proc basicParseURL*(input: string; base = none(URL); url: URL = URL(); - stateOverride = none(URLState)): Option[URL] = - const NoStrip = AllChars - C0Controls - {' '} - let starti0 = input.find(NoStrip) - let starti = if starti0 == -1: 0 else: starti0 - let endi0 = input.rfind(NoStrip) - let endi = if endi0 == -1: input.len else: endi0 + 1 +proc parseSpecialAuthorityIgnoreSlashes(input: openArray[char]; + pointer: var int): URLState = + while pointer < input.len and input[pointer] in {'/', '\\'}: + inc pointer + return usAuthority + +proc parseRelativeSlash(input: openArray[char]; pointer: var int; + isSpecial: var bool; base, url: URL): URLState = + if isSpecial and pointer < input.len and input[pointer] in {'/', '\\'}: + inc pointer + return input.parseSpecialAuthorityIgnoreSlashes(pointer) + if pointer < input.len and input[pointer] == '/': + inc pointer + return usAuthority + url.username = base.username + url.password = base.password + url.host = base.host + url.port = base.port + return usPath + +proc parseRelative(input: openArray[char]; pointer: var int; + isSpecial: var bool; base, url: URL): URLState = + assert base.scheme != "file" + url.scheme = base.scheme + isSpecial = url.scheme in SpecialSchemes + if pointer < input.len and input[pointer] == '/' or + isSpecial and pointer < input.len and input[pointer] == '\\': + inc pointer + return input.parseRelativeSlash(pointer, isSpecial, base, url) + url.username = base.username + url.password = base.password + url.host = base.host + url.port = base.port + url.path = base.path + url.query = base.query + if pointer < input.len and input[pointer] == '?': + url.query = some("") + inc pointer + return usQuery + if pointer < input.len and input[pointer] == '#': + url.fragment = some("") + inc pointer + return usFragment + url.query = none(string) + url.shortenPath() + return usPath + +proc parseSpecialRelativeOrAuthority(input: openArray[char]; pointer: var int; + isSpecial: var bool; base, url: URL): URLState = + if pointer + 1 < input.len and input[pointer] == '/' and + input[pointer + 1] == '/': + pointer += 2 + return input.parseSpecialAuthorityIgnoreSlashes(pointer) + return input.parseRelative(pointer, isSpecial, base, url) + +proc parsePathOrAuthority(input: openArray[char]; pointer: var int): URLState = + if pointer < input.len and input[pointer] == '/': + inc pointer + return usAuthority + return usPath + +proc parseScheme(input: openArray[char]; pointer: var int; isSpecial: var bool; + firstc: char; base: Option[URL]; url: URL; override: bool): URLState = + var buffer = $firstc + var i = pointer + while i < input.len: + let c = input[i] + if c in {'\t', '\n'}: + discard + elif c in AsciiAlphaNumeric + {'+', '-', '.'}: + buffer &= c.toLowerAscii() + elif c == ':': + if override: + if isSpecial != (buffer in SpecialSchemes): + return usNoScheme + if (url.includesCredentials or url.port.isSome) and buffer == "file": + return usNoScheme + if url.host.t == htNone and url.scheme == "file": + return usNoScheme + url.scheme = buffer + isSpecial = url.scheme in SpecialSchemes + if override: + if isSpecial and SpecialSchemes[url.scheme] == url.port: + url.port = none(uint16) + return usNoScheme + pointer = i + 1 + if url.scheme == "file": + return usFile + if isSpecial and base.isSome and base.get.scheme == url.scheme: + return input.parseSpecialRelativeOrAuthority(pointer, isSpecial, + base.get, url) + if isSpecial: + # special authority slashes state + if pointer + 1 < input.len and input[pointer] == '/' and + input[pointer + 1] == '/': + pointer += 2 + return input.parseSpecialAuthorityIgnoreSlashes(pointer) + if i + 1 < input.len and input[i + 1] == '/': + inc pointer + return input.parsePathOrAuthority(pointer) + url.path = EmptyPath + return input.parseOpaquePath(pointer, url) + else: + break + inc i + return usNoScheme + +proc parseSchemeStart(input: openArray[char]; pointer: var int; + isSpecial: var bool; base: Option[URL]; url: URL; + override: bool): URLState = + var state = usNoScheme + if pointer < input.len and (let c = input[pointer]; c in AsciiAlpha): + # continue to scheme state + inc pointer + state = input.parseScheme(pointer, isSpecial, c.toLowerAscii(), base, url, + override) + if state == usNoScheme: + pointer = 0 # start over + if override: + return state + while pointer < input.len and input[pointer] in {'\t', '\n'}: + inc pointer + if state == usNoScheme: + if base.isNone: + return usFail + if base.get.path.opaque and (pointer >= input.len or input[pointer] != '#'): + return usFail + if base.get.path.opaque and pointer < input.len and input[pointer] == '#': + url.scheme = base.get.scheme + isSpecial = url.scheme in SpecialSchemes + url.path = base.get.path + url.query = base.get.query + url.fragment = some("") + inc pointer + return usFragment + if base.get.scheme != "file": + return input.parseRelative(pointer, isSpecial, base.get, url) + return usFile + return state + +proc parseAuthority(input: openArray[char]; pointer: var int; isSpecial: bool; + url: URL): URLState = + var atSignSeen = false + var passwordSeen = false var buffer = "" - var atsignseen = false - var insidebrackets = false - var passwordtokenseen = false - var pointer = starti - let override = stateOverride.isSome - var state = usSchemeStart + var beforeBuffer = pointer + while pointer < input.len: + let c = input[pointer] + if c in {'/', '?', '#'} or isSpecial and c == '\\': + break + if c == '@': + if atSignSeen: + buffer = "%40" & buffer + atSignSeen = true + for c in buffer: + if c == ':' and not passwordSeen: + passwordSeen = true + continue + if passwordSeen: + url.password.percentEncode(c, UserInfoPercentEncodeSet) + else: + url.username.percentEncode(c, UserInfoPercentEncodeSet) + buffer = "" + beforeBuffer = pointer + 1 + else: + buffer &= c + inc pointer + if atSignSeen and buffer == "": + return usFail + pointer = beforeBuffer + return usHost + +proc parseFileHost(input: openArray[char]; pointer: var int; isSpecial: bool; + url: URL; override: bool): URLState = + let buffer = input.until({'/', '\\', '?', '#'}, pointer) + pointer += buffer.len + if not override and buffer.is_windows_drive_letter: + return usPath + if buffer == "": + url.host = Host(t: htDomain, domain: "") + else: + let host = parseHost(buffer, isSpecial) + if host.t == htNone: + return usFail + url.host = host + if url.host.t == htDomain and url.host.domain == "localhost": + url.host.domain = "" + if override: + return usFail + return usPathStart + +proc parseHostState(input: openArray[char]; pointer: var int; isSpecial: bool; + url: URL; override: bool; state: URLState): URLState = + if override and url.scheme == "file": + return input.parseFileHost(pointer, isSpecial, url, override) + var insideBrackets = false + var buffer = "" + while pointer < input.len: + let c = input[pointer] + if c == ':' and not insideBrackets: + if override and state == usHostname: + return usFail + let host = parseHost(buffer, isSpecial) + if host.t == htNone: + return usFail + url.host = host + inc pointer + return usPort + elif c in {'/', '?', '#'} or isSpecial and c == '\\': + break + else: + if c == '[': + insideBrackets = true + elif c == ']': + insideBrackets = false + buffer &= c + inc pointer + if isSpecial and buffer == "": + return usFail + if override and buffer == "" and (url.includesCredentials or url.port.isSome): + return usFail + let host = parseHost(buffer, isSpecial) + if host.t == htNone: + return usFail + url.host = host if override: - state = stateOverride.get + return usFail + return usPathStart - template c(i = 0): char = input[pointer + i] - template has(i = 0): bool = (pointer + i < endi) - template is_special(url: URL): bool = url.scheme in SpecialSchemes - template default_port(url: URL): Option[uint16] = SpecialSchemes[url.scheme] - template start_over() = - pointer = starti - continue # skip pointer inc - template starts_with_windows_drive_letter(i: int): bool = - i + 2 <= endi and input[i] in AsciiAlpha and input[i + 1] in {':', '|'} +proc parsePort(input: openArray[char]; pointer: var int; isSpecial: bool; + url: URL; override: bool): URLState = + var buffer = "" + var i = pointer + while i < input.len: + let c = input[i] + if c in AsciiDigit: + buffer &= c + elif c in {'/', '?', '#'} or isSpecial and c == '\\' or override: + break + else: + return usFail + inc i + pointer = i + if buffer != "": + let i = parseInt32(buffer) + if i.isNone or i.get notin 0..65535: + return usFail + let port = some(uint16(i.get)) + url.port = if isSpecial and SpecialSchemes[url.scheme] == port: + none(uint16) + else: + port + if override: + return usFail + return usPathStart + +func startsWithWinDriveLetter(input: openArray[char]; i: int): bool = + if i + 1 >= input.len: + return false + return input[i] in AsciiAlpha and input[i + 1] in {':', '|'} + +proc parseFileSlash(input: openArray[char]; pointer: var int; isSpecial: bool; + base: Option[URL]; url: URL; override: bool): URLState = + if pointer < input.len and input[pointer] in {'/', '\\'}: + inc pointer + return input.parseFileHost(pointer, isSpecial, url, override) template is_normalized_windows_drive_letter(s: string): bool = s.len == 2 and s[0] in AsciiAlpha and s[1] == ':' - template is_double_dot_path_segment(s: string): bool = - s == ".." or s.equalsIgnoreCase(".%2e") or s.equalsIgnoreCase("%2e.") or - s.equalsIgnoreCase("%2e%2e") - template is_single_dot_path_segment(s: string): bool = - s == "." or s.equalsIgnoreCase("%2e") - template is_empty(path: URLPath): bool = path.ss.len == 0 - - while pointer <= endi: - assert pointer >= starti - if pointer < endi and input[pointer] in {'\n', '\t'}: - inc pointer - continue - case state - of usSchemeStart: - if has and c in AsciiAlpha: - buffer &= c.toLowerAscii() - state = usScheme - elif not override: - state = usNoScheme - dec pointer - else: - return none(URL) - of usScheme: - if has and c in AsciiAlphaNumeric + {'+', '-', '.'}: - buffer &= c.toLowerAscii() - elif has and c == ':': - if override: - if url.scheme in SpecialSchemes and buffer notin SpecialSchemes: - return url.some - if url.scheme notin SpecialSchemes and buffer in SpecialSchemes: - return url.some - if (url.includes_credentials or url.port.isSome) and - buffer == "file": - return url.some - if url.scheme == "file" and url.host.get.isempty: - return url.some - url.scheme = buffer - if override: - if url.default_port == url.port: - url.port = none(uint16) - return url.some - buffer = "" - if url.scheme == "file": - state = usFile - elif url.is_special and not base.isNone and - base.get.scheme == url.scheme: - state = usSpecialRelativeOrAuthority - elif url.is_special: - state = usSpecialAuthoritySlashes - elif has(1) and c(1) == '/': - state = usPathOrAuthority - inc pointer - else: - url.path = EmptyPath - state = usOpaquePath - elif not override: - buffer = "" - state = usNoScheme - start_over - else: - return none(URL) - of usNoScheme: - if base.isNone or base.get.path.opaque and (not has or c != '#'): - return none(URL) - elif base.get.path.opaque and has and c == '#': - url.scheme = base.get.scheme - url.path = base.get.path - url.query = base.get.query - url.fragment = "".some - state = usFragment - elif base.get.scheme != "file": - state = usRelative - dec pointer - else: - state = usFile - dec pointer - of usSpecialRelativeOrAuthority: - if has(1) and c == '/' and c(1) == '/': - state = usSpecialAuthorityIgnoreSlashes + if base.isSome and base.get.scheme == "file": + url.host = base.get.host + let bpath = base.get.path.ss + if not input.startsWithWinDriveLetter(pointer) and bpath.len > 0 and + bpath[0].is_normalized_windows_drive_letter(): + url.path.append(bpath[0]) + return usPath + +proc parseFile(input: openArray[char]; pointer: var int; base: Option[URL]; + url: URL; override: bool): URLState = + url.scheme = "file" + url.host = EmptyHost + if pointer < input.len and input[pointer] in {'/', '\\'}: + inc pointer + return input.parseFileSlash(pointer, isSpecial = true, base, url, override) + if base.isSome and base.get.scheme == "file": + url.host = base.get.host + url.path = base.get.path + url.query = base.get.query + if pointer < input.len: + let c = input[pointer] + if c == '?': + url.query = some("") inc pointer - else: - state = usRelative - dec pointer - of usPathOrAuthority: - if has and c == '/': - state = usAuthority - else: - state = usPath - dec pointer - of usRelative: - assert base.get.scheme != "file" - url.scheme = base.get.scheme - if has and c == '/': - state = usRelativePath - elif url.is_special and has and c == '\\': - state = usRelativePath - else: - url.username = base.get.username - url.password = base.get.password - url.host = base.get.host - url.port = base.get.port - url.path = base.get.path - url.query = base.get.query - if has and c == '?': - url.query = "".some - state = usQuery - elif has and c == '#': - url.fragment = "".some - state = usFragment - else: - url.query = none(string) - url.shortenPath() - state = usPath - dec pointer - of usRelativePath: - if url.is_special and has and c in {'/', '\\'}: - state = usSpecialAuthorityIgnoreSlashes - elif has and c == '/': - state = usAuthority - else: - url.username = base.get.username - url.password = base.get.password - url.host = base.get.host - url.port = base.get.port - state = usPath - dec pointer - of usSpecialAuthoritySlashes: - if has(1) and c == '/' and c(1) == '/': - state = usSpecialAuthorityIgnoreSlashes + return usQuery + elif c == '#': + url.fragment = some("") inc pointer + return usFragment else: - state = usSpecialAuthorityIgnoreSlashes - dec pointer - of usSpecialAuthorityIgnoreSlashes: - if not has or c notin {'/', '\\'}: - state = usAuthority - dec pointer - of usAuthority: - if has and c == '@': - if atsignseen: - buffer = "%40" & buffer - atsignseen = true - for c in buffer: - if c == ':' and not passwordtokenseen: - passwordtokenseen = true - continue - if passwordtokenseen: - url.password.percentEncode(c, UserInfoPercentEncodeSet) - else: - url.username.percentEncode(c, UserInfoPercentEncodeSet) - buffer = "" - elif not has or c in {'/', '?', '#'} or (url.is_special and c == '\\'): - if atsignseen and buffer == "": - return none(URL) - pointer -= buffer.len + 1 - buffer = "" - state = usHost - else: - buffer &= c - of usHost, usHostname: - if override and url.scheme == "file": - dec pointer - state = usFileHost - elif has and c == ':' and not insidebrackets: - if buffer == "": - return none(URL) - let host = parseHost(buffer, url.is_special) - if host.isNone: - return none(URL) - url.host = host - buffer = "" - state = usPort - elif (not has or c in {'/', '?', '#'}) or - (url.is_special and c == '\\'): - dec pointer - if url.is_special and buffer == "": - return none(URL) - elif override and buffer == "" and - (url.includes_credentials or url.port.isSome): - return - let host = parseHost(buffer, url.is_special) - if host.isNone: - return none(URL) - url.host = host - buffer = "" - state = usPathStart - if override: - return - else: - if c == '[': - insidebrackets = true - elif c == ']': - insidebrackets = false - buffer &= c - of usPort: - if has and c in AsciiDigit: - buffer &= c - elif (not has or c in {'/', '?', '#'}) or - (url.is_special and c == '\\') or override: - if buffer != "": - let i = parseInt32(buffer) - if i.isNone or i.get notin 0..65535: - return none(URL) - let port = uint16(i.get).some - url.port = if url.is_special and url.default_port == port: - none(uint16) - else: - port - buffer = "" - if override: - return - state = usPathStart - dec pointer - else: - return none(URL) - of usFile: - url.scheme = "file" - url.host = EmptyHost - if has and (c == '/' or c == '\\'): - state = usFileSlash - elif base.isSome and base.get.scheme == "file": - url.host = base.get.host - url.path = base.get.path - url.query = base.get.query - if has: - if c == '?': - url.query = "".some - state = usQuery - elif c == '#': - url.fragment = "".some - state = usFragment - else: - url.query = none(string) - if not starts_with_windows_drive_letter(pointer): - url.shortenPath() - else: - url.path.ss.setLen(0) - state = usPath - dec pointer - else: - state = usPath - dec pointer - of usFileSlash: - if has and (c == '/' or c == '\\'): - state = usFileHost - else: - if base.isSome and base.get.scheme == "file": - url.host = base.get.host - let bpath = base.get.path.ss - if not starts_with_windows_drive_letter(pointer) and - bpath.len > 0 and bpath[0].is_normalized_windows_drive_letter(): - url.path.append(bpath[0]) - state = usPath - dec pointer - of usFileHost: - if (not has or c in {'/', '\\', '?', '#'}): - dec pointer - if not override and buffer.is_windows_drive_letter: - state = usPath - elif buffer == "": - url.host = Host(domain: "").some - if override: - return - state = usPathStart + url.query = none(string) + if not input.startsWithWinDriveLetter(pointer): + url.shortenPath() else: - var host = parseHost(buffer, url.is_special) - if host.isNone: - return none(URL) - if host.get.domain == "localhost": - host.get.domain = "" - url.host = host - if override: - return - buffer = "" - state = usPathStart - else: - buffer &= c - of usPathStart: - if url.is_special: - state = usPath - if not has or c notin {'/', '\\'}: - dec pointer - elif not override and has and c == '?': - url.query = "".some + url.path.ss.setLen(0) + return usPath + +proc parsePathStart(input: openArray[char]; pointer: var int; isSpecial: bool; + url: URL; override: bool): URLState = + if isSpecial: + if pointer < input.len and input[pointer] in {'/', '\\'}: + inc pointer + return usPath + if pointer < input.len: + let c = input[pointer] + if not override: + if c == '?': + url.query = some("") + inc pointer + return usQuery + if c == '#': + url.fragment = some("") + inc pointer + return usFragment + if c == '/': + inc pointer + return usPath + if override and url.host.t == htNone: + url.path.append("") + inc pointer + return usDone + +proc parsePath(input: openArray[char]; pointer: var int; isSpecial: bool; + url: URL; override: bool): URLState = + var state = usPath + var buffer = "" + template is_single_dot_path_segment(s: string): bool = + s == "." or s.equalsIgnoreCase("%2e") + template is_double_dot_path_segment(s: string): bool = + s == ".." or s.equalsIgnoreCase(".%2e") or s.equalsIgnoreCase("%2e.") or + s.equalsIgnoreCase("%2e%2e") + while pointer < input.len: + let c = input[pointer] + if c == '/' or isSpecial and c == '\\' or not override and c in {'?', '#'}: + if c == '?': + url.query = some("") state = usQuery - elif not override and has and c == '#': - url.fragment = "".some + inc pointer + break + elif c == '#': + url.fragment = some("") state = usFragment - elif has: - state = usPath - if c != '/': - dec pointer - elif override and url.host.isNone: - url.path.append("") - of usPath: - if not has or c == '/' or (url.is_special and c == '\\') or - (not override and c in {'?', '#'}): - let slash_cond = not has or (c != '/' and not url.is_special and - c != '\\') - if buffer.is_double_dot_path_segment: - url.shortenPath() - if slash_cond: - url.path.append("") - elif buffer.is_single_dot_path_segment and slash_cond: + inc pointer + break + let slashCond = c != '/' and (not isSpecial or c != '\\') + if buffer.is_double_dot_path_segment: + url.shortenPath() + if slashCond: url.path.append("") - elif not buffer.is_single_dot_path_segment: - if url.scheme == "file" and url.path.is_empty and - buffer.is_windows_drive_letter: - buffer[1] = ':' - url.path.append(buffer) - buffer = "" - if has: - if c == '?': - url.query = "".some - state = usQuery - elif c == '#': - url.fragment = "".some - state = usFragment - else: - buffer.percentEncode(c, PathPercentEncodeSet) - of usOpaquePath: - if has: - if c == '?': - url.query = "".some - state = usQuery - elif c == '#': - url.fragment = "".some - state = usFragment - else: - url.path.append(percentEncode(c, ControlPercentEncodeSet)) - of usQuery: - #TODO encoding - if not has or (not override and c == '#'): - let querypercentencodeset = if url.is_special: - SpecialQueryPercentEncodeSet - else: - QueryPercentEncodeSet - url.query.get.percentEncode(buffer, querypercentencodeset) - buffer = "" - if has and c == '#': - url.fragment = "".some - state = usFragment - elif has: - buffer &= c - of usFragment: - if has: - url.fragment.get.percentEncode(c, FragmentPercentEncodeSet) + elif buffer.is_single_dot_path_segment and slashCond: + url.path.append("") + elif not buffer.is_single_dot_path_segment: + if url.scheme == "file" and url.path.ss.len == 0 and + buffer.is_windows_drive_letter: + buffer[1] = ':' + url.path.append(buffer) + buffer = "" + else: + buffer.percentEncode(c, PathPercentEncodeSet) inc pointer - return url.some + let slashCond = pointer >= input.len or input[pointer] != '/' and + (not isSpecial or input[pointer] != '\\') + if buffer.is_double_dot_path_segment: + url.shortenPath() + if slashCond: + url.path.append("") + elif buffer.is_single_dot_path_segment and slashCond: + url.path.append("") + elif not buffer.is_single_dot_path_segment: + if url.scheme == "file" and url.path.ss.len == 0 and + buffer.is_windows_drive_letter: + buffer[1] = ':' + url.path.append(buffer) + return state + +proc parseQuery(input: openArray[char]; pointer: var int; isSpecial: bool; + url: URL; override: bool): URLState = + #TODO encoding + var buffer = "" + var i = pointer + while i < input.len: + let c = input[i] + if not override and c == '#': + break + buffer &= c + inc i + pointer = i + let querypercentencodeset = if isSpecial: + SpecialQueryPercentEncodeSet + else: + QueryPercentEncodeSet + url.query.get.percentEncode(buffer, querypercentencodeset) + if pointer < input.len: + url.fragment = some("") + inc pointer + return usFragment + return usDone + +proc basicParseURL0(input: openArray[char]; base = none(URL); url = URL(); + stateOverride = none(URLState)): Option[URL] = + var pointer = 0 + var isSpecial = url.scheme in SpecialSchemes + let input = input.deleteChars({'\n', '\t'}) + let override = stateOverride.isSome + var state = stateOverride.get(usSchemeStart) + if state == usSchemeStart: + state = input.parseSchemeStart(pointer, isSpecial, base, url, override) + if override: + return none(URL) + if state == usAuthority: + state = input.parseAuthority(pointer, isSpecial, url) + if state in {usHost, usHostname}: + state = input.parseHostState(pointer, isSpecial, url, override, state) + if state == usPort: + state = input.parsePort(pointer, isSpecial, url, override) + if state == usFile: + isSpecial = true + state = input.parseFile(pointer, base, url, override) + if state == usPathStart: + state = input.parsePathStart(pointer, isSpecial, url, override) + if state == usPath: + state = input.parsePath(pointer, isSpecial, url, override) + if state == usQuery: + state = input.parseQuery(pointer, isSpecial, url, override) + if state == usFragment: + while pointer < input.len: + url.fragment.get.percentEncode(input[pointer], FragmentPercentEncodeSet) + inc pointer + if state == usFail: + return none(URL) + return some(url) + +#TODO encoding +proc basicParseURL*(input: string; base = none(URL); url = URL(); + stateOverride = none(URLState)): Option[URL] = + const NoStrip = AllChars - C0Controls - {' '} + let starti0 = input.find(NoStrip) + let starti = if starti0 == -1: 0 else: starti0 + let endi0 = input.rfind(NoStrip) + let endi = if endi0 == -1: input.len else: endi0 + 1 + return input.toOpenArray(starti, endi - 1).basicParseURL0(base, url, + stateOverride) func anchor*(url: URL): string = if url.fragment.isSome: @@ -856,7 +953,7 @@ func findZeroSeq(ipv6: array[8, uint16]): int = func serializeip(ipv6: array[8, uint16]): string = let compress = findZeroSeq(ipv6) var ignore0 = false - result = "" + result = "[" for i, n in ipv6: if ignore0: if n == 0: @@ -873,15 +970,15 @@ func serializeip(ipv6: array[8, uint16]): string = result &= toHexLower(n) if i != ipv6.high: result &= ':' + result &= ']' func serialize(host: Host): string = - if host.ipv4.isSome: - return serializeip(host.ipv4.get) - if host.ipv6.isSome: - return "[" & serializeip(host.ipv6.get) & "]" - if host.opaquehost != "": - return host.opaquehost - return host.domain + case host.t + of htNone: return "" + of htDomain: return host.domain + of htIpv4: return host.ipv4.serializeip() + of htIpv6: return host.ipv6.serializeip() + of htOpaque: return host.opaque func serialize*(path: URLPath): string {.inline.} = if path.opaque: @@ -919,14 +1016,14 @@ else: func serialize*(url: URL; excludefragment = false; excludepassword = false): string = result = url.scheme & ':' - if url.host.isSome: + if url.host.t != htNone: result &= "//" - if url.includes_credentials: + if url.includesCredentials: result &= url.username if not excludepassword and url.password != "": result &= ':' & url.password result &= '@' - result &= url.host.get.serialize + result &= url.host.serialize() if url.port.isSome: result &= ':' & $url.port.get elif not url.path.opaque and url.path.ss.len > 1 and url.path.ss[0] == "": @@ -974,10 +1071,7 @@ proc setHref(url: URL; s: string): Err[JSError] {.jsfset: "href".} = purl.get.cloneInto(url) func isIP*(url: URL): bool = - if url.host.isNone: - return false - let host = url.host.get - return host.ipv4.isSome or host.ipv6.isSome + return url.host.t in {htIpv4, htIpv6} #https://url.spec.whatwg.org/#concept-urlencoded-serializer proc parseFromURLEncoded(input: string): seq[(string, string)] = @@ -1106,13 +1200,23 @@ proc origin*(url: URL): Origin = of "ftp", "http", "https", "ws", "wss": return Origin( t: otTuple, - tup: (url.scheme, url.host.get, url.port, none(string)) + tup: (url.scheme, url.host, url.port, none(string)) ) of "file": return Origin(t: otOpaque, s: $url) else: return Origin(t: otOpaque, s: $url) +proc `==`(a, b: Host): bool = + if a.t != b.t: + return false + case a.t + of htNone: return true + of htDomain: return a.domain == b.domain + of htOpaque: return a.opaque == b.opaque + of htIpv4: return a.ipv4 == b.ipv4 + of htIpv6: return a.ipv6 == b.ipv6 + proc `==`*(a, b: Origin): bool {.error.} = discard @@ -1157,11 +1261,11 @@ proc password(url: URL; password: string) {.jsfset.} = url.password = password.percentEncode(UserInfoPercentEncodeSet) proc host*(url: URL): string {.jsfget.} = - if url.host.isNone: + if url.host.t == htNone: return "" if url.port.isNone: - return url.host.get.serialize() - return url.host.get.serialize() & ':' & $url.port.get + return url.host.serialize() + return url.host.serialize() & ':' & $url.port.get proc setHost*(url: URL; s: string) {.jsfset: "host".} = if url.path.opaque: @@ -1169,9 +1273,7 @@ proc setHost*(url: URL; s: string) {.jsfset: "host".} = discard basicParseURL(s, url = url, stateOverride = some(usHost)) proc hostname*(url: URL): string {.jsfget.} = - if url.host.isNone: - return "" - return url.host.get.serialize() + return url.host.serialize() proc setHostname*(url: URL; s: string) {.jsfset: "hostname".} = if url.path.opaque: diff --git a/src/utils/twtstr.nim b/src/utils/twtstr.nim index a4345251..14e93bda 100644 --- a/src/utils/twtstr.nim +++ b/src/utils/twtstr.nim @@ -147,7 +147,7 @@ func stripAndCollapse*(s: string): string = else: result &= ' ' -func until*(s: string; c: set[char]; starti = 0): string = +func until*(s: openArray[char]; c: set[char]; starti = 0): string = result = "" for i in starti ..< s.len: if s[i] in c: @@ -364,14 +364,10 @@ proc percentEncode*(append: var string; c: char; set: set[char]; append.pushHex(c) proc percentEncode*(append: var string; s: string; set: set[char]; - spaceAsPlus = false) {.inline.} = + spaceAsPlus = false) = for c in s: append.percentEncode(c, set, spaceAsPlus) -func percentEncode*(c: char; set: set[char]; spaceAsPlus = false): string - {.inline.} = - result.percentEncode(c, set, spaceAsPlus) - func percentEncode*(s: string; set: set[char]; spaceAsPlus = false): string = result.percentEncode(s, set, spaceAsPlus) @@ -413,14 +409,11 @@ func dqEscape*(s: string): string = func join*(ss: openArray[string]; sep: char): string = if ss.len == 0: return "" - var n = ss.high - 1 - for i in 0..high(ss): - n += ss[i].len - result = newStringOfCap(n) - result &= ss[0] - for i in 1..high(ss): - result &= sep - result &= ss[i] + var s = ss[0] + for i in 1 ..< ss.len: + s &= sep + s &= ss[i] + return s proc passRealloc*(opaque, p: pointer; size: csize_t): pointer {.cdecl.} = return realloc(p, size) @@ -512,21 +505,16 @@ proc expandPath*(path: string): string = return $p.pw_dir / path.substr(usr.len) return path -func deleteChars*(s: string; todel: set[char]): string = - let i = s.find(todel) - if i == -1: - return s - var rs = s.substr(0, i - 1) - for j in i + 1 ..< s.len: - if s[j] in todel: - continue - rs &= s[j] - return rs +func deleteChars*(s: openArray[char]; todel: set[char]): string = + result = newStringOfCap(s.len) + for c in s: + if c notin todel: + result &= c func replaceControls*(s: string): string = result = newStringOfCap(s.len) for c in s: - if c in Controls - {' '}: + if c in Controls: result &= '^' result &= c.getControlLetter() else: |