# See https://url.spec.whatwg.org/#url-parsing. import std/algorithm import std/options import std/strutils import std/tables import std/unicode import lib/punycode import monoucha/fromjs import monoucha/javascript import monoucha/jserror import monoucha/libunicode import monoucha/quickjs import types/blob import types/opt import utils/luwrap import utils/map import utils/twtstr include res/map/idna_gen type URLState = enum usFail, usDone, usSchemeStart, usNoScheme, usFile, usFragment, usAuthority, usPath, usQuery, usHost, usHostname, usPort, usPathStart BlobURLEntry* = object obj: Blob #TODO blob urls URLPath* = object case opaque*: bool of true: s*: string else: ss*: seq[string] HostType = enum htNone, htDomain, htIpv4, htIpv6, htOpaque Host = object case t: HostType of htNone: discard of htDomain: domain: string of htIpv4: ipv4: uint32 of htIpv6: ipv6: array[8, uint16] of htOpaque: opaque: string URLSearchParams* = ref object list*: seq[tuple[name, value: string]] url: URL URL* = ref object scheme*: string username* {.jsget.}: string password* {.jsget.}: string port: Option[uint16] host: Host path*: URLPath query*: Option[string] fragment: Option[string] blob: Option[BlobURLEntry] searchParamsInternal: URLSearchParams OriginType* = enum otOpaque, otTuple TupleOrigin* = tuple scheme: string host: Host port: Option[uint16] domain: Option[string] Origin* = ref object case t*: OriginType of otOpaque: s: string of otTuple: tup: TupleOrigin jsDestructor(URL) jsDestructor(URLSearchParams) const EmptyPath = URLPath(opaque: true, s: "") const EmptyHost = Host(t: htDomain, domain: "") const SpecialSchemes = { "ftp": some(21u16), "file": none(uint16), "http": some(80u16), "https": some(443u16), "ws": some(80u16), "wss": some(443u16), }.toTable() func parseIpv6(input: openArray[char]): Option[array[8, uint16]] = var pieceindex = 0 var compress = -1 var pointer = 0 var address: array[8, uint16] template c(i = 0): char = input[pointer + i] template has(i = 0): bool = (pointer + i < input.len) template failure(): Option[array[8, uint16]] = none(array[8, uint16]) if c == ':': if not has(1) or c(1) != ':': return failure pointer += 2 inc pieceindex compress = pieceindex while has: if pieceindex == 8: return failure if c == ':': if compress != -1: return failure inc pointer inc pieceindex compress = pieceindex continue var value: uint16 = 0 var length = 0 while length < 4 and has and c in AsciiHexDigit: value = value * 0x10 + uint16(c.hexValue) inc pointer inc length if has and c == '.': if length == 0: return failure pointer -= length if pieceindex > 6: return failure var numbersseen = 0 while has: var ipv4piece = -1 if numbersseen > 0: if c == '.' and numbersseen < 4: inc pointer else: return failure if not has or c notin AsciiDigit: return failure while has and c in AsciiDigit: if ipv4piece == -1: ipv4piece = c.decValue elif ipv4piece == 0: return failure else: ipv4piece = ipv4piece * 10 + c.decValue if ipv4piece > 255: return failure inc pointer address[pieceindex] = address[pieceindex] * 0x100 + uint16(ipv4piece) inc numbersseen if numbersseen == 2 or numbersseen == 4: inc pieceindex if numbersseen != 4: return failure break elif has: if c == ':': inc pointer if not has: return failure else: return failure address[pieceindex] = value inc pieceindex if compress != -1: var swaps = pieceindex - compress pieceindex = 7 while pieceindex != 0 and swaps > 0: let sp = address[pieceindex] address[pieceindex] = address[compress + swaps - 1] address[compress + swaps - 1] = sp dec pieceindex dec swaps elif pieceindex != 8: return failure return address.some func parseIpv4Number(s: string): uint32 = var input = s var R = 10 if input.len >= 2 and input[0] == '0': if input[1] in {'x', 'X'}: input.delete(0..1) R = 16 else: input.delete(0..0) R = 8 if input == "": return 0 case R of 8: return parseOctUInt32(input, allowSign = false).get(uint32.high) of 10: return parseUInt32(input, allowSign = false).get(uint32.high) of 16: return parseHexUInt32(input, allowSign = false).get(uint32.high) else: return 0 func parseIpv4(input: string): Option[uint32] = var numbers: seq[uint32] = @[] var prevEmpty = false var i = 0 for part in input.split('.'): if i > 4 or prevEmpty: return none(uint32) inc i if part == "": prevEmpty = true continue let num = parseIpv4Number(part) if num notin 0u32..255u32: return none(uint32) numbers.add(num) if numbers[^1] >= 1u32 shl ((5 - numbers.len) * 8): return none(uint32) var ipv4 = uint32(numbers[^1]) for i in 0 ..< numbers.high: let n = uint32(numbers[i]) ipv4 += n * (1u32 shl ((3 - i) * 8)) return some(ipv4) const ForbiddenHostChars = { char(0x00), '\t', '\n', '\r', ' ', '#', '/', ':', '<', '>', '?', '@', '[', '\\', ']', '^', '|' } const ForbiddenDomainChars = ForbiddenHostChars + {'%'} func opaqueParseHost(input: string): Host = var o = "" for c in input: if c in ForbiddenHostChars: return Host(t: htNone) o.percentEncode(c, ControlPercentEncodeSet) return Host(t: htOpaque, opaque: o) func endsInNumber(input: string): bool = if input.len == 0: return false var i = input.high if input[i] == '.': dec i i = input.rfind('.', last = i) if i < 0: return false inc i if i + 1 < input.len and input[i] == '0' and input[i + 1] in {'x', 'X'}: # hex? i += 2 while i < input.len and input[i] != '.': if input[i] notin AsciiHexDigit: return false inc i else: while i < input.len and input[i] != '.': if input[i] notin AsciiDigit: return false inc i return true type IDNATableStatus = enum itsValid, itsIgnored, itsMapped, itsDeviation, itsDisallowed func getIdnaTableStatus(r: Rune): IDNATableStatus = let i = uint32(r) if i <= high(uint16): let u = uint16(i) if u in IgnoredLow: return itsIgnored if u in DisallowedLow or DisallowedRangesLow.isInRange(u): return itsDisallowed if MappedMapLow.isInMap(u): return itsMapped else: if i in IgnoredHigh: return itsIgnored if i in DisallowedHigh or DisallowedRangesHigh.isInRange(i): return itsDisallowed if MappedMapHigh.isInMap(uint32(i)): return itsMapped return itsValid func getIdnaMapped(r: Rune): string = let i = uint32(r) if i <= high(uint16): let u = uint16(i) let n = MappedMapLow.searchInMap(u) if n != -1: return $MappedMapLow[n].mapped let n = MappedMapHigh.searchInMap(i) return $MappedMapHigh[n].mapped func processIdna(str: string; beStrict: bool): string = # CheckHyphens = false # CheckBidi = true # CheckJoiners = true # UseSTD3ASCIIRules = beStrict (but STD3 is not implemented) # Transitional_Processing = false # VerifyDnsLength = beStrict var mapped: seq[Rune] = @[] for r in str.runes(): let status = getIdnaTableStatus(r) case status of itsDisallowed: return "" #error of itsIgnored: discard of itsMapped: mapped &= getIdnaMapped(r).toRunes() of itsDeviation: mapped &= r of itsValid: mapped &= r if mapped.len == 0: return mapped = mapped.normalize() var cr: CharRange {.cast(noSideEffect).}: cr_init(addr cr, nil, passRealloc) let r = unicode_general_category(addr cr, "Mark") assert r == 0 var labels = "" for label in ($mapped).split('.'): if label.startsWith("xn--"): try: let s = punycode.decode(label.substr("xn--".len)) let x0 = s.toRunes() let x1 = normalize(x0) if x0 != x1: return "" #error # CheckHyphens is false if x0.len > 0: let cps = cast[ptr UncheckedArray[u32pair]](cr.points) let c = uint32(x0[0]) let L = cr.len div 2 - 1 if cps.toOpenArray(0, L).binarySearch(c, cmpRange) != -1: return "" #error for r in x0: if r == Rune('.'): return "" #error let status = getIdnaTableStatus(r) if status in {itsDisallowed, itsIgnored, itsMapped}: return "" #error #TODO check joiners #TODO check bidi if labels.len > 0: labels &= '.' labels &= s except PunyError: return "" #error else: if labels.len > 0: labels &= '.' labels &= label cr_free(addr cr) return labels func unicodeToAscii(s: string; beStrict: bool): string = let processed = s.processIdna(beStrict) var labels = "" var all = 0 for label in processed.split('.'): var s = "" if AllChars - Ascii in s: try: s = "xn--" & punycode.encode(label) except PunyError: return "" #error else: s = label if beStrict: # VerifyDnsLength let rl = s.runeLen() if rl notin 1..63: return "" all += rl if labels.len > 0: labels &= '.' labels &= s if beStrict: # VerifyDnsLength if all notin 1..253: return "" #error return labels func domainToAscii(domain: string; bestrict = false): string = var needsprocessing = false for s in domain.split('.'): if s.startsWith("xn--") or AllChars - Ascii in s: needsprocessing = true break if bestrict or needsprocessing: # Note: we don't implement STD3 separately, it's always true return domain.unicodeToAscii(bestrict) return domain.toLowerAscii() func parseHost(input: string; special: bool): Host = if input.len == 0: return Host(t: htNone) if input[0] == '[': if input[^1] != ']': return Host(t: htNone) let ipv6 = parseIpv6(input.toOpenArray(1, input.high - 1)) if ipv6.isNone: return Host(t: htNone) return Host( t: htIpv6, ipv6: ipv6.get ) if not special: return opaqueParseHost(input) let domain = percentDecode(input) let asciiDomain = domain.domainToAscii() if asciiDomain == "" or ForbiddenDomainChars in asciiDomain: return Host(t: htNone) if asciiDomain.endsInNumber(): let ipv4 = parseIpv4(asciiDomain) if ipv4.isSome: return Host(t: htIpv4, ipv4: ipv4.get) return Host(t: htDomain, domain: asciiDomain) proc shortenPath(url: URL) = assert not url.path.opaque if url.scheme == "file" and url.path.ss.len == 1 and url.path.ss[0].len == 2 and url.path.ss[0][0] in AsciiAlpha and url.path.ss[0][1] == ':': return if url.path.ss.len > 0: discard url.path.ss.pop() proc append(path: var URLPath; s: string) = if path.opaque: path.s &= s else: path.ss.add(s) func includesCredentials(url: URL): bool = return url.username != "" or url.password != "" template is_windows_drive_letter(s: string): bool = s.len == 2 and s[0] in AsciiAlpha and (s[1] == ':' or s[1] == '|') template canHaveUsernamePasswordPort(url: URL): bool = url.host.serialize() != "" and url.scheme != "file" proc parseOpaquePath(input: openArray[char]; pointer: var int; url: URL): URLState = while pointer < input.len: let c = input[pointer] if c == '?': url.query = some("") inc pointer return usQuery elif c == '#': url.fragment = some("") inc pointer return usFragment else: url.path.s.percentEncode(c, ControlPercentEncodeSet) inc pointer return usDone proc parseSpecialAuthority
import std/strutils
import std/times
import io/urlfilter
import js/error
import js/javascript
import js/regex
import types/url
import types/opt
import utils/twtstr
type
Cookie* = ref object
created: int64 # unix time
name {.jsget.}: string
value {.jsget.}: string
expires {.jsget.}: int64 # unix time
secure {.jsget.}: bool
httponly {.jsget.}: bool
samesite {.jsget.}: bool
domain {.jsget.}: string
path {.jsget.}: string
CookieJar* = ref object
filter*: URLFilter
cookies*: seq[Cookie]
jsDestructor(Cookie)
proc parseCookieDate(val: string): Option[DateTime] =
# cookie-date
const Delimiters = {'\t', ' '..'/', ';'..'@', '['..'`', '{'..'~'}
const NonDigit = AllChars - AsciiDigit
var foundTime = false
var foundDayOfMonth = false
var foundMonth = false
var foundYear = false
# date-token-list
var time: array[3, int]
var dayOfMonth: int
var month: int
var year: int
for dateToken in val.split(Delimiters):
if dateToken == "": continue # *delimiter
if not foundTime:
block timeBlock: # test for time
let hmsTime = dateToken.until(NonDigit - {':'})
var i = 0
for timeField in hmsTime.split(':'):
if i > 2: break timeBlock # too many time fields
# 1*2DIGIT
if timeField.len != 1 and timeField.len != 2: break timeBlock
var timeFields: array[3, int]
for c in timeField:
if c notin AsciiDigit: break timeBlock
timeFields[i] *= 10
timeFields[i] += c.decValue
time = timeFields
inc i
if i != 3: break timeBlock
foundTime = true
continue
if not foundDayOfMonth:
block dayOfMonthBlock: # test for day-of-month
let digits = dateToken.until(NonDigit)
if digits.len != 1 and digits.len != 2: break dayOfMonthBlock
var n = 0
for c in digits:
if c notin AsciiDigit: break dayOfMonthBlock
n *= 10
n += c.decValue
dayOfMonth = n
foundDayOfMonth = true
continue
if not foundMonth:
block monthBlock: # test for month
if dateToken.len < 3: break monthBlock
case dateToken.substr(0, 2).toLowerAscii()
of "jan": month = 1
of "feb": month = 2
of "mar": month = 3
of "apr": month = 4
of "may": month = 5
of "jun": month = 6
of "jul": month = 7
of "aug": month = 8
of "sep": month = 9
of "oct": month = 10
of "nov": month = 11
of "dec": month = 12
else: break monthBlock
foundMonth = true
continue
if not foundYear:
block <