diff options
author | bptato <nincsnevem662@gmail.com> | 2024-04-18 20:53:36 +0200 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2024-04-18 21:12:50 +0200 |
commit | c23ea622d1b34d3d290670e60e231f4f236fec50 (patch) | |
tree | 739abbf0490c28f5446469d7244a20a9a0a03502 /src/utils | |
parent | 38db6ab5be80b255fe40df715adc3b5852875cdd (diff) | |
download | chawan-c23ea622d1b34d3d290670e60e231f4f236fec50.tar.gz |
url, twtstr: correct number parsing
* do not use std's parse*Int; they accept weird stuff that we do not want to accept in any case * fix bug in parseHost where a parseIpv4 failure would result in an empty host * do not use isDigit, isAlphaAscii * improve parse*IntImpl error handling
Diffstat (limited to 'src/utils')
-rw-r--r-- | src/utils/charcategory.nim | 1 | ||||
-rw-r--r-- | src/utils/twtstr.nim | 80 |
2 files changed, 48 insertions, 33 deletions
diff --git a/src/utils/charcategory.nim b/src/utils/charcategory.nim index 55cbb930..e2c2a045 100644 --- a/src/utils/charcategory.nim +++ b/src/utils/charcategory.nim @@ -7,5 +7,6 @@ const AsciiAlpha* = (AsciiUpperAlpha + AsciiLowerAlpha) const NonAscii* = {char(0x80)..char(0xFF)} const AsciiDigit* = {'0'..'9'} const AsciiAlphaNumeric* = AsciiAlpha + AsciiDigit +const AsciiOctDigit* = {'0'..'7'} const AsciiHexDigit* = (AsciiDigit + {'a'..'f', 'A'..'F'}) const AsciiWhitespace* = {' ', '\n', '\r', '\t', '\f'} diff --git a/src/utils/twtstr.nim b/src/utils/twtstr.nim index 31ceeec2..713e8f40 100644 --- a/src/utils/twtstr.nim +++ b/src/utils/twtstr.nim @@ -310,7 +310,8 @@ func japaneseNumber*(i: int): string = dec n # Implements https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#signed-integers -func parseIntImpl[T: SomeSignedInt](s: string): Option[T] = +func parseIntImpl[T: SomeSignedInt](s: string; allowed: set[char]; radix: T): + Option[T] = var sign: T = 1 var i = 0 if i < s.len and s[i] == '-': @@ -318,48 +319,61 @@ func parseIntImpl[T: SomeSignedInt](s: string): Option[T] = inc i elif i < s.len and s[i] == '+': inc i - if i == s.len or s[i] notin AsciiDigit: + if i == s.len: return none(T) - var integer = T(decValue(s[i])) - inc i - while i < s.len and isDigit(s[i]): - if unlikely(integer != 0 and high(T) div 10 < integer): - return none(T) # overflow - integer *= 10 - let c = T(decValue(s[i])) - if unlikely(high(T) - c < integer): - return none(T) # overflow + var integer: T = 0 + while i < s.len: + if s[i] notin allowed: + return none(T) # invalid + let c = T(hexValue(s[i])) + if integer != 0: + if unlikely(T.high div radix - c < integer or + T.low div radix + c > integer): + return none(T) # overflow + integer *= radix integer += c inc i return some(sign * integer) +func parseIntImpl[T: SomeSignedInt](s: string): Option[T] = + return parseIntImpl[T](s, AsciiDigit, 10) + func parseInt32*(s: string): Option[int32] = return parseIntImpl[int32](s) func parseInt64*(s: string): Option[int64] = return parseIntImpl[int64](s) -func parseUIntImpl[T: SomeUnsignedInt](s: string; allowSign: static bool): - Option[T] = +func parseOctInt64*(s: string): Option[int64] = + return parseIntImpl[int64](s, AsciiOctDigit, 8) + +func parseHexInt64*(s: string): Option[int64] = + return parseIntImpl[int64](s, AsciiHexDigit, 16) + +func parseUIntImpl[T: SomeUnsignedInt](s: string; allowSign: static bool; + allowed: set[char]; radix: T): Option[T] = var i = 0 when allowSign: if i < s.len and s[i] == '+': inc i - if i == s.len or s[i] notin AsciiDigit: - return none(T) - var integer = T(decValue(s[i])) - inc i - while i < s.len and s[i] in AsciiDigit: - if unlikely(integer != 0 and high(T) div 10 < integer): - return none(T) # overflow - integer *= 10 - let c = T(decValue(s[i])) - if unlikely(high(T) - c < integer): + if i == s.len: + return none(T) + var integer: T = 0 + while i < s.len: + if s[i] notin allowed: + return none(T) # invalid + let c = T(hexValue(s[i])) + if integer != 0 and unlikely(high(T) div radix - c < integer): return none(T) # overflow - integer += T(c) + integer *= radix + integer += c inc i return some(integer) +func parseUIntImpl[T: SomeUnsignedInt](s: string; allowSign: static bool): + Option[T] = + return parseUIntImpl[T](s, allowSign, AsciiDigit, 10) + func parseUInt8*(s: string; allowSign: static bool): Option[uint8] = return parseUIntImpl[uint8](s, allowSign) @@ -369,6 +383,12 @@ func parseUInt16*(s: string; allowSign: static bool): Option[uint16] = func parseUInt32*(s: string; allowSign: static bool): Option[uint32] = return parseUIntImpl[uint32](s, allowSign) +func parseOctUInt32*(s: string; allowSign: static bool): Option[uint32] = + return parseUIntImpl[uint32](s, allowSign, AsciiOctDigit, 8) + +func parseHexUInt32*(s: string; allowSign: static bool): Option[uint32] = + return parseUIntImpl[uint32](s, allowSign, AsciiHexDigit, 16) + #TODO not sure where this algorithm is from... # (probably from CSS) func parseFloat64*(s: string): float64 = @@ -378,27 +398,23 @@ func parseFloat64*(s: string): float64 = var integer: float64 = 0 var f: float64 = 0 var e: float64 = 0 - var i = 0 if i < s.len and s[i] == '-': sign = -1f64 inc i elif i < s.len and s[i] == '+': inc i - - while i < s.len and isDigit(s[i]): + while i < s.len and s[i] in AsciiDigit: integer *= 10 integer += float64(decValue(s[i])) inc i - if i < s.len and s[i] == '.': inc i - while i < s.len and isDigit(s[i]): + while i < s.len and s[i] in AsciiDigit: f *= 10 f += float64(decValue(s[i])) inc i inc d - if i < s.len and (s[i] == 'e' or s[i] == 'E'): inc i if i < s.len and s[i] == '-': @@ -406,12 +422,10 @@ func parseFloat64*(s: string): float64 = inc i elif i < s.len and s[i] == '+': inc i - - while i < s.len and isDigit(s[i]): + while i < s.len and s[i] in AsciiDigit: e *= 10 e += float64(decValue(s[i])) inc i - return sign * (integer + f * pow(10, float64(-d))) * pow(10, (float64(t) * e)) const ControlPercentEncodeSet* = Controls + NonAscii |