about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorbptato <nincsnevem662@gmail.com>2024-04-18 20:53:36 +0200
committerbptato <nincsnevem662@gmail.com>2024-04-18 21:12:50 +0200
commitc23ea622d1b34d3d290670e60e231f4f236fec50 (patch)
tree739abbf0490c28f5446469d7244a20a9a0a03502 /src
parent38db6ab5be80b255fe40df715adc3b5852875cdd (diff)
downloadchawan-c23ea622d1b34d3d290670e60e231f4f236fec50.tar.gz
url, twtstr: correct number parsing
* do not use std's parse*Int; they accept weird stuff that we do not
  want to accept in any case
* fix bug in parseHost where a parseIpv4 failure would result in an
  empty host
* do not use isDigit, isAlphaAscii
* improve parse*IntImpl error handling
Diffstat (limited to 'src')
-rw-r--r--src/config/toml.nim17
-rw-r--r--src/loader/loader.nim13
-rw-r--r--src/types/url.nim32
-rw-r--r--src/utils/charcategory.nim1
-rw-r--r--src/utils/twtstr.nim80
5 files changed, 75 insertions, 68 deletions
diff --git a/src/config/toml.nim b/src/config/toml.nim
index f28c4916..f8ab9a08 100644
--- a/src/config/toml.nim
+++ b/src/config/toml.nim
@@ -1,6 +1,5 @@
 import std/options
 import std/streams
-import std/strutils
 import std/tables
 import std/times
 import std/unicode
@@ -441,21 +440,19 @@ proc consumeNumber(state: var TomlParser; c: char): TomlResult =
   case numType
   of NUMBER_INTEGER:
     let val = parseInt64(repr)
-    if not val.isSome:
+    if val.isNone:
       return state.err("invalid integer")
     return ok(TomlValue(t: tvtInteger, i: val.get))
   of NUMBER_HEX:
-    try:
-      let val = parseHexInt(repr)
-      return ok(TomlValue(t: tvtInteger, i: val))
-    except ValueError:
+    let val = parseHexInt64(repr)
+    if val.isNone:
       return state.err("invalid hexadecimal number")
+    return ok(TomlValue(t: tvtInteger, i: val.get))
   of NUMBER_OCT:
-    try:
-      let val = parseOctInt(repr)
-      return ok(TomlValue(t: tvtInteger, i: val))
-    except ValueError:
+    let val = parseOctInt64(repr)
+    if val.isNone:
       return state.err("invalid octal number")
+    return ok(TomlValue(t: tvtInteger, i: val.get))
   of NUMBER_FLOAT:
     let val = parseFloat64(repr)
     return ok(TomlValue(t: tvtFloat, f: val))
diff --git a/src/loader/loader.nim b/src/loader/loader.nim
index 6a8e9164..21511c56 100644
--- a/src/loader/loader.nim
+++ b/src/loader/loader.nim
@@ -362,14 +362,11 @@ func find(cacheMap: seq[CachedItem]; id: int): int =
 
 proc loadFromCache(ctx: LoaderContext; client: ClientData; handle: LoaderHandle;
     request: Request) =
-  var id = -1
-  var startFrom = 0
-  try:
-    id = parseInt(request.url.pathname)
-    if request.url.query.isSome:
-      startFrom = parseInt(request.url.query.get)
-  except ValueError:
-    discard
+  let id = parseInt32(request.url.pathname).get(-1)
+  let startFrom = if request.url.query.isSome:
+    parseInt32(request.url.query.get).get(0)
+  else:
+    0
   let n = client.cacheMap.find(id)
   if n != -1:
     let ps = newPosixStream(client.cacheMap[n].path, O_RDONLY, 0)
diff --git a/src/types/url.nim b/src/types/url.nim
index b72fb8af..3426bf39 100644
--- a/src/types/url.nim
+++ b/src/types/url.nim
@@ -166,7 +166,7 @@ func parseIpv6(input: string): Option[array[8, uint16]] =
     return failure
   return address.some
 
-func parseIpv4Number(s: string): int =
+func parseIpv4Number(s: string): uint32 =
   var input = s
   var R = 10
   if input.len >= 2 and input[0] == '0':
@@ -178,19 +178,16 @@ func parseIpv4Number(s: string): int =
       R = 8
   if input == "":
     return 0
-  var output = 0
-  try:
-    case R
-    of 8: output = parseOctInt(input)
-    of 10: output = parseInt(input)
-    of 16: output = parseHexInt(input)
-    else: discard
-  except ValueError:
-    return -1
+  var output = 0u32
+  case R
+  of 8: output = parseOctUInt32(input, allowSign = false).get(uint32.high)
+  of 10: output = parseUInt32(input, allowSign = false).get(uint32.high)
+  of 16: output = parseHexUInt32(input, allowSign = false).get(uint32.high)
+  else: discard
   return output
 
 func parseIpv4(input: string): Option[uint32] =
-  var numbers: seq[int] = @[]
+  var numbers: seq[uint32] = @[]
   var prevEmpty = false
   var i = 0
   for part in input.split('.'):
@@ -201,10 +198,10 @@ func parseIpv4(input: string): Option[uint32] =
       prevEmpty = true
       continue
     let num = parseIpv4Number(part)
-    if num notin 0..255:
+    if num notin 0u32..255u32:
       return none(uint32)
     numbers.add(num)
-  if numbers[^1] >= 1 shl ((5 - numbers.len) * 8):
+  if numbers[^1] >= 1u32 shl ((5 - numbers.len) * 8):
     return none(uint32)
   var ipv4 = uint32(numbers[^1])
   for i in 0 ..< numbers.high:
@@ -392,7 +389,7 @@ func parseHost(input: string; special: bool): Option[Host] =
   if input[0] == '[':
     if input[^1] != ']':
       return none(Host)
-    return Host(ipv6: parseIpv6(input.substr(1, input.high - 1))).some
+    return some(Host(ipv6: parseIpv6(input.substr(1, input.high - 1))))
   if not special:
     return opaqueParseHost(input)
   let domain = percentDecode(input)
@@ -403,8 +400,9 @@ func parseHost(input: string; special: bool): Option[Host] =
     return none(Host)
   if asciiDomain.get.len > 0 and asciiDomain.get.endsInNumber():
     let ipv4 = parseIpv4(asciiDomain.get)
-    return Host(ipv4: ipv4).some
-  return Host(domain: asciiDomain.get).some
+    if ipv4.isSome:
+      return some(Host(ipv4: ipv4))
+  return some(Host(domain: asciiDomain.get))
 
 func isempty(host: Host): bool =
   return host.domain == "" and host.ipv4.isNone and host.ipv6.isNone and
@@ -477,7 +475,7 @@ proc basicParseURL*(input: string; base = none(URL); url: URL = URL();
       continue
     case state
     of usSchemeStart:
-      if has and c.isAlphaAscii():
+      if has and c in AsciiAlpha:
         buffer &= c.toLowerAscii()
         state = usScheme
       elif not override:
diff --git a/src/utils/charcategory.nim b/src/utils/charcategory.nim
index 55cbb930..e2c2a045 100644
--- a/src/utils/charcategory.nim
+++ b/src/utils/charcategory.nim
@@ -7,5 +7,6 @@ const AsciiAlpha* = (AsciiUpperAlpha + AsciiLowerAlpha)
 const NonAscii* = {char(0x80)..char(0xFF)}
 const AsciiDigit* = {'0'..'9'}
 const AsciiAlphaNumeric* = AsciiAlpha + AsciiDigit
+const AsciiOctDigit* = {'0'..'7'}
 const AsciiHexDigit* = (AsciiDigit + {'a'..'f', 'A'..'F'})
 const AsciiWhitespace* = {' ', '\n', '\r', '\t', '\f'}
diff --git a/src/utils/twtstr.nim b/src/utils/twtstr.nim
index 31ceeec2..713e8f40 100644
--- a/src/utils/twtstr.nim
+++ b/src/utils/twtstr.nim
@@ -310,7 +310,8 @@ func japaneseNumber*(i: int): string =
     dec n
 
 # Implements https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#signed-integers
-func parseIntImpl[T: SomeSignedInt](s: string): Option[T] =
+func parseIntImpl[T: SomeSignedInt](s: string; allowed: set[char]; radix: T):
+    Option[T] =
   var sign: T = 1
   var i = 0
   if i < s.len and s[i] == '-':
@@ -318,48 +319,61 @@ func parseIntImpl[T: SomeSignedInt](s: string): Option[T] =
     inc i
   elif i < s.len and s[i] == '+':
     inc i
-  if i == s.len or s[i] notin AsciiDigit:
+  if i == s.len:
     return none(T)
-  var integer = T(decValue(s[i]))
-  inc i
-  while i < s.len and isDigit(s[i]):
-    if unlikely(integer != 0 and high(T) div 10 < integer):
-      return none(T) # overflow
-    integer *= 10
-    let c = T(decValue(s[i]))
-    if unlikely(high(T) - c < integer):
-      return none(T) # overflow
+  var integer: T = 0
+  while i < s.len:
+    if s[i] notin allowed:
+      return none(T) # invalid
+    let c = T(hexValue(s[i]))
+    if integer != 0:
+      if unlikely(T.high div radix - c < integer or
+          T.low div radix + c > integer):
+        return none(T) # overflow
+    integer *= radix
     integer += c
     inc i
   return some(sign * integer)
 
+func parseIntImpl[T: SomeSignedInt](s: string): Option[T] =
+  return parseIntImpl[T](s, AsciiDigit, 10)
+
 func parseInt32*(s: string): Option[int32] =
   return parseIntImpl[int32](s)
 
 func parseInt64*(s: string): Option[int64] =
   return parseIntImpl[int64](s)
 
-func parseUIntImpl[T: SomeUnsignedInt](s: string; allowSign: static bool):
-    Option[T] =
+func parseOctInt64*(s: string): Option[int64] =
+  return parseIntImpl[int64](s, AsciiOctDigit, 8)
+
+func parseHexInt64*(s: string): Option[int64] =
+  return parseIntImpl[int64](s, AsciiHexDigit, 16)
+
+func parseUIntImpl[T: SomeUnsignedInt](s: string; allowSign: static bool;
+    allowed: set[char]; radix: T): Option[T] =
   var i = 0
   when allowSign:
     if i < s.len and s[i] == '+':
       inc i
-    if i == s.len or s[i] notin AsciiDigit:
-      return none(T)
-  var integer = T(decValue(s[i]))
-  inc i
-  while i < s.len and s[i] in AsciiDigit:
-    if unlikely(integer != 0 and high(T) div 10 < integer):
-      return none(T) # overflow
-    integer *= 10
-    let c = T(decValue(s[i]))
-    if unlikely(high(T) - c < integer):
+  if i == s.len:
+    return none(T)
+  var integer: T = 0
+  while i < s.len:
+    if s[i] notin allowed:
+      return none(T) # invalid
+    let c = T(hexValue(s[i]))
+    if integer != 0 and unlikely(high(T) div radix - c < integer):
       return none(T) # overflow
-    integer += T(c)
+    integer *= radix
+    integer += c
     inc i
   return some(integer)
 
+func parseUIntImpl[T: SomeUnsignedInt](s: string; allowSign: static bool):
+    Option[T] =
+  return parseUIntImpl[T](s, allowSign, AsciiDigit, 10)
+
 func parseUInt8*(s: string; allowSign: static bool): Option[uint8] =
   return parseUIntImpl[uint8](s, allowSign)
 
@@ -369,6 +383,12 @@ func parseUInt16*(s: string; allowSign: static bool): Option[uint16] =
 func parseUInt32*(s: string; allowSign: static bool): Option[uint32] =
   return parseUIntImpl[uint32](s, allowSign)
 
+func parseOctUInt32*(s: string; allowSign: static bool): Option[uint32] =
+  return parseUIntImpl[uint32](s, allowSign, AsciiOctDigit, 8)
+
+func parseHexUInt32*(s: string; allowSign: static bool): Option[uint32] =
+  return parseUIntImpl[uint32](s, allowSign, AsciiHexDigit, 16)
+
 #TODO not sure where this algorithm is from...
 # (probably from CSS)
 func parseFloat64*(s: string): float64 =
@@ -378,27 +398,23 @@ func parseFloat64*(s: string): float64 =
   var integer: float64 = 0
   var f: float64 = 0
   var e: float64 = 0
-
   var i = 0
   if i < s.len and s[i] == '-':
     sign = -1f64
     inc i
   elif i < s.len and s[i] == '+':
     inc i
-
-  while i < s.len and isDigit(s[i]):
+  while i < s.len and s[i] in AsciiDigit:
     integer *= 10
     integer += float64(decValue(s[i]))
     inc i
-
   if i < s.len and s[i] == '.':
     inc i
-    while i < s.len and isDigit(s[i]):
+    while i < s.len and s[i] in AsciiDigit:
       f *= 10
       f += float64(decValue(s[i]))
       inc i
       inc d
-
   if i < s.len and (s[i] == 'e' or s[i] == 'E'):
     inc i
     if i < s.len and s[i] == '-':
@@ -406,12 +422,10 @@ func parseFloat64*(s: string): float64 =
       inc i
     elif i < s.len and s[i] == '+':
       inc i
-
-    while i < s.len and isDigit(s[i]):
+    while i < s.len and s[i] in AsciiDigit:
       e *= 10
       e += float64(decValue(s[i]))
       inc i
-
   return sign * (integer + f * pow(10, float64(-d))) * pow(10, (float64(t) * e))
 
 const ControlPercentEncodeSet* = Controls + NonAscii