diff options
author | bptato <nincsnevem662@gmail.com> | 2024-04-18 20:53:36 +0200 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2024-04-18 21:12:50 +0200 |
commit | c23ea622d1b34d3d290670e60e231f4f236fec50 (patch) | |
tree | 739abbf0490c28f5446469d7244a20a9a0a03502 /src | |
parent | 38db6ab5be80b255fe40df715adc3b5852875cdd (diff) | |
download | chawan-c23ea622d1b34d3d290670e60e231f4f236fec50.tar.gz |
url, twtstr: correct number parsing
* do not use std's parse*Int; they accept weird stuff that we do not want to accept in any case * fix bug in parseHost where a parseIpv4 failure would result in an empty host * do not use isDigit, isAlphaAscii * improve parse*IntImpl error handling
Diffstat (limited to 'src')
-rw-r--r-- | src/config/toml.nim | 17 | ||||
-rw-r--r-- | src/loader/loader.nim | 13 | ||||
-rw-r--r-- | src/types/url.nim | 32 | ||||
-rw-r--r-- | src/utils/charcategory.nim | 1 | ||||
-rw-r--r-- | src/utils/twtstr.nim | 80 |
5 files changed, 75 insertions, 68 deletions
diff --git a/src/config/toml.nim b/src/config/toml.nim index f28c4916..f8ab9a08 100644 --- a/src/config/toml.nim +++ b/src/config/toml.nim @@ -1,6 +1,5 @@ import std/options import std/streams -import std/strutils import std/tables import std/times import std/unicode @@ -441,21 +440,19 @@ proc consumeNumber(state: var TomlParser; c: char): TomlResult = case numType of NUMBER_INTEGER: let val = parseInt64(repr) - if not val.isSome: + if val.isNone: return state.err("invalid integer") return ok(TomlValue(t: tvtInteger, i: val.get)) of NUMBER_HEX: - try: - let val = parseHexInt(repr) - return ok(TomlValue(t: tvtInteger, i: val)) - except ValueError: + let val = parseHexInt64(repr) + if val.isNone: return state.err("invalid hexadecimal number") + return ok(TomlValue(t: tvtInteger, i: val.get)) of NUMBER_OCT: - try: - let val = parseOctInt(repr) - return ok(TomlValue(t: tvtInteger, i: val)) - except ValueError: + let val = parseOctInt64(repr) + if val.isNone: return state.err("invalid octal number") + return ok(TomlValue(t: tvtInteger, i: val.get)) of NUMBER_FLOAT: let val = parseFloat64(repr) return ok(TomlValue(t: tvtFloat, f: val)) diff --git a/src/loader/loader.nim b/src/loader/loader.nim index 6a8e9164..21511c56 100644 --- a/src/loader/loader.nim +++ b/src/loader/loader.nim @@ -362,14 +362,11 @@ func find(cacheMap: seq[CachedItem]; id: int): int = proc loadFromCache(ctx: LoaderContext; client: ClientData; handle: LoaderHandle; request: Request) = - var id = -1 - var startFrom = 0 - try: - id = parseInt(request.url.pathname) - if request.url.query.isSome: - startFrom = parseInt(request.url.query.get) - except ValueError: - discard + let id = parseInt32(request.url.pathname).get(-1) + let startFrom = if request.url.query.isSome: + parseInt32(request.url.query.get).get(0) + else: + 0 let n = client.cacheMap.find(id) if n != -1: let ps = newPosixStream(client.cacheMap[n].path, O_RDONLY, 0) diff --git a/src/types/url.nim b/src/types/url.nim index b72fb8af..3426bf39 100644 --- a/src/types/url.nim +++ b/src/types/url.nim @@ -166,7 +166,7 @@ func parseIpv6(input: string): Option[array[8, uint16]] = return failure return address.some -func parseIpv4Number(s: string): int = +func parseIpv4Number(s: string): uint32 = var input = s var R = 10 if input.len >= 2 and input[0] == '0': @@ -178,19 +178,16 @@ func parseIpv4Number(s: string): int = R = 8 if input == "": return 0 - var output = 0 - try: - case R - of 8: output = parseOctInt(input) - of 10: output = parseInt(input) - of 16: output = parseHexInt(input) - else: discard - except ValueError: - return -1 + var output = 0u32 + case R + of 8: output = parseOctUInt32(input, allowSign = false).get(uint32.high) + of 10: output = parseUInt32(input, allowSign = false).get(uint32.high) + of 16: output = parseHexUInt32(input, allowSign = false).get(uint32.high) + else: discard return output func parseIpv4(input: string): Option[uint32] = - var numbers: seq[int] = @[] + var numbers: seq[uint32] = @[] var prevEmpty = false var i = 0 for part in input.split('.'): @@ -201,10 +198,10 @@ func parseIpv4(input: string): Option[uint32] = prevEmpty = true continue let num = parseIpv4Number(part) - if num notin 0..255: + if num notin 0u32..255u32: return none(uint32) numbers.add(num) - if numbers[^1] >= 1 shl ((5 - numbers.len) * 8): + if numbers[^1] >= 1u32 shl ((5 - numbers.len) * 8): return none(uint32) var ipv4 = uint32(numbers[^1]) for i in 0 ..< numbers.high: @@ -392,7 +389,7 @@ func parseHost(input: string; special: bool): Option[Host] = if input[0] == '[': if input[^1] != ']': return none(Host) - return Host(ipv6: parseIpv6(input.substr(1, input.high - 1))).some + return some(Host(ipv6: parseIpv6(input.substr(1, input.high - 1)))) if not special: return opaqueParseHost(input) let domain = percentDecode(input) @@ -403,8 +400,9 @@ func parseHost(input: string; special: bool): Option[Host] = return none(Host) if asciiDomain.get.len > 0 and asciiDomain.get.endsInNumber(): let ipv4 = parseIpv4(asciiDomain.get) - return Host(ipv4: ipv4).some - return Host(domain: asciiDomain.get).some + if ipv4.isSome: + return some(Host(ipv4: ipv4)) + return some(Host(domain: asciiDomain.get)) func isempty(host: Host): bool = return host.domain == "" and host.ipv4.isNone and host.ipv6.isNone and @@ -477,7 +475,7 @@ proc basicParseURL*(input: string; base = none(URL); url: URL = URL(); continue case state of usSchemeStart: - if has and c.isAlphaAscii(): + if has and c in AsciiAlpha: buffer &= c.toLowerAscii() state = usScheme elif not override: diff --git a/src/utils/charcategory.nim b/src/utils/charcategory.nim index 55cbb930..e2c2a045 100644 --- a/src/utils/charcategory.nim +++ b/src/utils/charcategory.nim @@ -7,5 +7,6 @@ const AsciiAlpha* = (AsciiUpperAlpha + AsciiLowerAlpha) const NonAscii* = {char(0x80)..char(0xFF)} const AsciiDigit* = {'0'..'9'} const AsciiAlphaNumeric* = AsciiAlpha + AsciiDigit +const AsciiOctDigit* = {'0'..'7'} const AsciiHexDigit* = (AsciiDigit + {'a'..'f', 'A'..'F'}) const AsciiWhitespace* = {' ', '\n', '\r', '\t', '\f'} diff --git a/src/utils/twtstr.nim b/src/utils/twtstr.nim index 31ceeec2..713e8f40 100644 --- a/src/utils/twtstr.nim +++ b/src/utils/twtstr.nim @@ -310,7 +310,8 @@ func japaneseNumber*(i: int): string = dec n # Implements https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#signed-integers -func parseIntImpl[T: SomeSignedInt](s: string): Option[T] = +func parseIntImpl[T: SomeSignedInt](s: string; allowed: set[char]; radix: T): + Option[T] = var sign: T = 1 var i = 0 if i < s.len and s[i] == '-': @@ -318,48 +319,61 @@ func parseIntImpl[T: SomeSignedInt](s: string): Option[T] = inc i elif i < s.len and s[i] == '+': inc i - if i == s.len or s[i] notin AsciiDigit: + if i == s.len: return none(T) - var integer = T(decValue(s[i])) - inc i - while i < s.len and isDigit(s[i]): - if unlikely(integer != 0 and high(T) div 10 < integer): - return none(T) # overflow - integer *= 10 - let c = T(decValue(s[i])) - if unlikely(high(T) - c < integer): - return none(T) # overflow + var integer: T = 0 + while i < s.len: + if s[i] notin allowed: + return none(T) # invalid + let c = T(hexValue(s[i])) + if integer != 0: + if unlikely(T.high div radix - c < integer or + T.low div radix + c > integer): + return none(T) # overflow + integer *= radix integer += c inc i return some(sign * integer) +func parseIntImpl[T: SomeSignedInt](s: string): Option[T] = + return parseIntImpl[T](s, AsciiDigit, 10) + func parseInt32*(s: string): Option[int32] = return parseIntImpl[int32](s) func parseInt64*(s: string): Option[int64] = return parseIntImpl[int64](s) -func parseUIntImpl[T: SomeUnsignedInt](s: string; allowSign: static bool): - Option[T] = +func parseOctInt64*(s: string): Option[int64] = + return parseIntImpl[int64](s, AsciiOctDigit, 8) + +func parseHexInt64*(s: string): Option[int64] = + return parseIntImpl[int64](s, AsciiHexDigit, 16) + +func parseUIntImpl[T: SomeUnsignedInt](s: string; allowSign: static bool; + allowed: set[char]; radix: T): Option[T] = var i = 0 when allowSign: if i < s.len and s[i] == '+': inc i - if i == s.len or s[i] notin AsciiDigit: - return none(T) - var integer = T(decValue(s[i])) - inc i - while i < s.len and s[i] in AsciiDigit: - if unlikely(integer != 0 and high(T) div 10 < integer): - return none(T) # overflow - integer *= 10 - let c = T(decValue(s[i])) - if unlikely(high(T) - c < integer): + if i == s.len: + return none(T) + var integer: T = 0 + while i < s.len: + if s[i] notin allowed: + return none(T) # invalid + let c = T(hexValue(s[i])) + if integer != 0 and unlikely(high(T) div radix - c < integer): return none(T) # overflow - integer += T(c) + integer *= radix + integer += c inc i return some(integer) +func parseUIntImpl[T: SomeUnsignedInt](s: string; allowSign: static bool): + Option[T] = + return parseUIntImpl[T](s, allowSign, AsciiDigit, 10) + func parseUInt8*(s: string; allowSign: static bool): Option[uint8] = return parseUIntImpl[uint8](s, allowSign) @@ -369,6 +383,12 @@ func parseUInt16*(s: string; allowSign: static bool): Option[uint16] = func parseUInt32*(s: string; allowSign: static bool): Option[uint32] = return parseUIntImpl[uint32](s, allowSign) +func parseOctUInt32*(s: string; allowSign: static bool): Option[uint32] = + return parseUIntImpl[uint32](s, allowSign, AsciiOctDigit, 8) + +func parseHexUInt32*(s: string; allowSign: static bool): Option[uint32] = + return parseUIntImpl[uint32](s, allowSign, AsciiHexDigit, 16) + #TODO not sure where this algorithm is from... # (probably from CSS) func parseFloat64*(s: string): float64 = @@ -378,27 +398,23 @@ func parseFloat64*(s: string): float64 = var integer: float64 = 0 var f: float64 = 0 var e: float64 = 0 - var i = 0 if i < s.len and s[i] == '-': sign = -1f64 inc i elif i < s.len and s[i] == '+': inc i - - while i < s.len and isDigit(s[i]): + while i < s.len and s[i] in AsciiDigit: integer *= 10 integer += float64(decValue(s[i])) inc i - if i < s.len and s[i] == '.': inc i - while i < s.len and isDigit(s[i]): + while i < s.len and s[i] in AsciiDigit: f *= 10 f += float64(decValue(s[i])) inc i inc d - if i < s.len and (s[i] == 'e' or s[i] == 'E'): inc i if i < s.len and s[i] == '-': @@ -406,12 +422,10 @@ func parseFloat64*(s: string): float64 = inc i elif i < s.len and s[i] == '+': inc i - - while i < s.len and isDigit(s[i]): + while i < s.len and s[i] in AsciiDigit: e *= 10 e += float64(decValue(s[i])) inc i - return sign * (integer + f * pow(10, float64(-d))) * pow(10, (float64(t) * e)) const ControlPercentEncodeSet* = Controls + NonAscii |