summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--changelog.md7
-rw-r--r--lib/pure/parseutils.nim38
-rw-r--r--lib/pure/strutils.nim70
-rw-r--r--tests/stdlib/tstrutil.nim49
4 files changed, 111 insertions, 53 deletions
diff --git a/changelog.md b/changelog.md
index 959990900..8919cf702 100644
--- a/changelog.md
+++ b/changelog.md
@@ -48,6 +48,10 @@
 - For string inputs, ``strutils.isUpperAscii`` and ``strutils.isLowerAscii`` now
   require a second mandatory parameter ``skipNonAlpha``.
 
+- The procs ``parseHexInt`` and ``parseOctInt`` now fail on empty strings
+    and strings containing only valid prefixes, e.g. "0x" for hex integers.
+
+
 #### Breaking changes in the compiler
 
 - The undocumented ``#? braces`` parsing mode was removed.
@@ -72,6 +76,8 @@
 - Added the procs ``math.floorMod`` and ``math.floorDiv`` for floor based integer division.
 - Added the procs ``rationals.`div```, ``rationals.`mod```, ``rationals.floorDiv`` and ``rationals.floorMod`` for rationals.
 - Added the proc ``math.prod`` for product of elements in openArray.
+- Added the proc ``parseBinInt`` to parse a binary integer from a string, which returns the value.
+- ``parseOct`` and ``parseBin`` in parseutils now also support the ``maxLen`` argument similar to ``parseHexInt``
 
 ### Library changes
 
@@ -100,7 +106,6 @@
 - Added the parameter ``val`` for the ``CritBitTree[T].incl`` proc.
 - The proc ``tgamma`` was renamed to ``gamma``. ``tgamma`` is deprecated.
 
-
 ### Language additions
 
 - Dot calls combined with explicit generic instantiations can now be written
diff --git a/lib/pure/parseutils.nim b/lib/pure/parseutils.nim
index d54f1454b..e633d8cf7 100644
--- a/lib/pure/parseutils.nim
+++ b/lib/pure/parseutils.nim
@@ -47,12 +47,14 @@ proc parseHex*(s: string, number: var int, start = 0; maxLen = 0): int {.
   ##   discard parseHex("0x38", value)
   ##   assert value == -200
   ##
-  ## If 'maxLen==0' the length of the hexadecimal number has no
-  ## upper bound. Not more than ```maxLen`` characters are parsed.
+  ## If ``maxLen == 0`` the length of the hexadecimal number has no upper bound.
+  ## Else no more than ``start + maxLen`` characters are parsed, up to the
+  ## length of the string.
   var i = start
   var foundDigit = false
-  let last = if maxLen == 0: s.len else: i+maxLen
-  if i+1 < last and s[i] == '0' and (s[i+1] == 'x' or s[i+1] == 'X'): inc(i, 2)
+  # get last index based on minimum `start + maxLen` or `s.len`
+  let last = min(s.len, if maxLen == 0: s.len else: i+maxLen)
+  if i+1 < last and s[i] == '0' and (s[i+1] in {'x', 'X'}): inc(i, 2)
   elif i < last and s[i] == '#': inc(i)
   while i < last:
     case s[i]
@@ -70,14 +72,20 @@ proc parseHex*(s: string, number: var int, start = 0; maxLen = 0): int {.
     inc(i)
   if foundDigit: result = i-start
 
-proc parseOct*(s: string, number: var int, start = 0): int  {.
+proc parseOct*(s: string, number: var int, start = 0, maxLen = 0): int  {.
   rtl, extern: "npuParseOct", noSideEffect.} =
-  ## parses an octal number and stores its value in ``number``. Returns
+  ## Parses an octal number and stores its value in ``number``. Returns
   ## the number of the parsed characters or 0 in case of an error.
+  ##
+  ## If ``maxLen == 0`` the length of the octal number has no upper bound.
+  ## Else no more than ``start + maxLen`` characters are parsed, up to the
+  ## length of the string.
   var i = start
   var foundDigit = false
-  if i+1 < s.len and s[i] == '0' and (s[i+1] == 'o' or s[i+1] == 'O'): inc(i, 2)
-  while i < s.len:
+  # get last index based on minimum `start + maxLen` or `s.len`
+  let last = min(s.len, if maxLen == 0: s.len else: i+maxLen)
+  if i+1 < last and s[i] == '0' and (s[i+1] in {'o', 'O'}): inc(i, 2)
+  while i < last:
     case s[i]
     of '_': discard
     of '0'..'7':
@@ -87,14 +95,20 @@ proc parseOct*(s: string, number: var int, start = 0): int  {.
     inc(i)
   if foundDigit: result = i-start
 
-proc parseBin*(s: string, number: var int, start = 0): int  {.
+proc parseBin*(s: string, number: var int, start = 0, maxLen = 0): int  {.
   rtl, extern: "npuParseBin", noSideEffect.} =
-  ## parses an binary number and stores its value in ``number``. Returns
+  ## Parses an binary number and stores its value in ``number``. Returns
   ## the number of the parsed characters or 0 in case of an error.
+  ##
+  ## If ``maxLen == 0`` the length of the binary number has no upper bound.
+  ## Else no more than ``start + maxLen`` characters are parsed, up to the
+  ## length of the string.
   var i = start
   var foundDigit = false
-  if i+1 < s.len and s[i] == '0' and (s[i+1] == 'b' or s[i+1] == 'B'): inc(i, 2)
-  while i < s.len:
+  # get last index based on minimum `start + maxLen` or `s.len`
+  let last = min(s.len, if maxLen == 0: s.len else: i+maxLen)
+  if i+1 < last and s[i] == '0' and (s[i+1] in {'b', 'B'}): inc(i, 2)
+  while i < last:
     case s[i]
     of '_': discard
     of '0'..'1':
diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim
index bea0a0243..5de013c26 100644
--- a/lib/pure/strutils.nim
+++ b/lib/pure/strutils.nim
@@ -844,7 +844,7 @@ proc parseInt*(s: string): int {.noSideEffect, procvar,
   ## Parses a decimal integer value contained in `s`.
   ##
   ## If `s` is not a valid integer, `ValueError` is raised.
-  var L = parseutils.parseInt(s, result, 0)
+  let L = parseutils.parseInt(s, result, 0)
   if L != s.len or L == 0:
     raise newException(ValueError, "invalid integer: " & s)
 
@@ -853,7 +853,7 @@ proc parseBiggestInt*(s: string): BiggestInt {.noSideEffect, procvar,
   ## Parses a decimal integer value contained in `s`.
   ##
   ## If `s` is not a valid integer, `ValueError` is raised.
-  var L = parseutils.parseBiggestInt(s, result, 0)
+  let L = parseutils.parseBiggestInt(s, result, 0)
   if L != s.len or L == 0:
     raise newException(ValueError, "invalid integer: " & s)
 
@@ -862,7 +862,7 @@ proc parseUInt*(s: string): uint {.noSideEffect, procvar,
   ## Parses a decimal unsigned integer value contained in `s`.
   ##
   ## If `s` is not a valid integer, `ValueError` is raised.
-  var L = parseutils.parseUInt(s, result, 0)
+  let L = parseutils.parseUInt(s, result, 0)
   if L != s.len or L == 0:
     raise newException(ValueError, "invalid unsigned integer: " & s)
 
@@ -871,7 +871,7 @@ proc parseBiggestUInt*(s: string): BiggestUInt {.noSideEffect, procvar,
   ## Parses a decimal unsigned integer value contained in `s`.
   ##
   ## If `s` is not a valid integer, `ValueError` is raised.
-  var L = parseutils.parseBiggestUInt(s, result, 0)
+  let L = parseutils.parseBiggestUInt(s, result, 0)
   if L != s.len or L == 0:
     raise newException(ValueError, "invalid unsigned integer: " & s)
 
@@ -880,33 +880,42 @@ proc parseFloat*(s: string): float {.noSideEffect, procvar,
   ## Parses a decimal floating point value contained in `s`. If `s` is not
   ## a valid floating point number, `ValueError` is raised. ``NAN``,
   ## ``INF``, ``-INF`` are also supported (case insensitive comparison).
-  var L = parseutils.parseFloat(s, result, 0)
+  let L = parseutils.parseFloat(s, result, 0)
   if L != s.len or L == 0:
     raise newException(ValueError, "invalid float: " & s)
 
+proc parseBinInt*(s: string): int {.noSideEffect, procvar,
+  rtl, extern: "nsuParseBinInt".} =
+  ## Parses a binary integer value contained in `s`.
+  ##
+  ## If `s` is not a valid binary integer, `ValueError` is raised. `s` can have
+  ## one of the following optional prefixes: ``0b``, ``0B``. Underscores within
+  ## `s` are ignored.
+  let L = parseutils.parseBin(s, result, 0)
+  if L != s.len or L == 0:
+    raise newException(ValueError, "invalid binary integer: " & s)
+
+proc parseOctInt*(s: string): int {.noSideEffect,
+  rtl, extern: "nsuParseOctInt".} =
+  ## Parses an octal integer value contained in `s`.
+  ##
+  ## If `s` is not a valid oct integer, `ValueError` is raised. `s` can have one
+  ## of the following optional prefixes: ``0o``, ``0O``.  Underscores within
+  ## `s` are ignored.
+  let L = parseutils.parseOct(s, result, 0)
+  if L != s.len or L == 0:
+    raise newException(ValueError, "invalid oct integer: " & s)  
+
 proc parseHexInt*(s: string): int {.noSideEffect, procvar,
   rtl, extern: "nsuParseHexInt".} =
   ## Parses a hexadecimal integer value contained in `s`.
   ##
-  ## If `s` is not a valid integer, `ValueError` is raised. `s` can have one
+  ## If `s` is not a valid hex integer, `ValueError` is raised. `s` can have one
   ## of the following optional prefixes: ``0x``, ``0X``, ``#``.  Underscores
   ## within `s` are ignored.
-  var i = 0
-  if i+1 < s.len and s[i] == '0' and (s[i+1] == 'x' or s[i+1] == 'X'): inc(i, 2)
-  elif i < s.len and s[i] == '#': inc(i)
-  while i < s.len:
-    case s[i]
-    of '_': inc(i)
-    of '0'..'9':
-      result = result shl 4 or (ord(s[i]) - ord('0'))
-      inc(i)
-    of 'a'..'f':
-      result = result shl 4 or (ord(s[i]) - ord('a') + 10)
-      inc(i)
-    of 'A'..'F':
-      result = result shl 4 or (ord(s[i]) - ord('A') + 10)
-      inc(i)
-    else: raise newException(ValueError, "invalid integer: " & s)
+  let L = parseutils.parseHex(s, result, 0)
+  if L != s.len or L == 0:
+    raise newException(ValueError, "invalid hex integer: " & s)
 
 proc generateHexCharToValueMap(): string =
   ## Generate a string to map a hex digit to uint value
@@ -1616,23 +1625,6 @@ proc delete*(s: var string, first, last: int) {.noSideEffect,
     inc(j)
   setLen(s, newLen)
 
-proc parseOctInt*(s: string): int {.noSideEffect,
-  rtl, extern: "nsuParseOctInt".} =
-  ## Parses an octal integer value contained in `s`.
-  ##
-  ## If `s` is not a valid integer, `ValueError` is raised. `s` can have one
-  ## of the following optional prefixes: ``0o``, ``0O``.  Underscores within
-  ## `s` are ignored.
-  var i = 0
-  if i+1 < s.len and s[i] == '0' and (s[i+1] == 'o' or s[i+1] == 'O'): inc(i, 2)
-  while i < s.len:
-    case s[i]
-    of '_': inc(i)
-    of '0'..'7':
-      result = result shl 3 or (ord(s[i]) - ord('0'))
-      inc(i)
-    else: raise newException(ValueError, "invalid integer: " & s)
-
 proc toOct*(x: BiggestInt, len: Positive): string {.noSideEffect,
   rtl, extern: "nsuToOct".} =
   ## Converts `x` into its octal representation.
diff --git a/tests/stdlib/tstrutil.nim b/tests/stdlib/tstrutil.nim
index 6f78a91ac..4d4081d39 100644
--- a/tests/stdlib/tstrutil.nim
+++ b/tests/stdlib/tstrutil.nim
@@ -7,6 +7,14 @@ discard """
 import
   strutils
 
+import macros
+
+template rejectParse(e) =
+  try:
+    discard e
+    raise newException(AssertionError, "This was supposed to fail: $#!" % astToStr(e))
+  except ValueError: discard
+
 proc testStrip() =
   write(stdout, strip("  ha  "))
 
@@ -148,7 +156,6 @@ proc testDelete =
   delete(s, 0, 0)
   assert s == "1236789ABCDEFG"
 
-
 proc testIsAlphaNumeric =
   assert isAlphaNumeric("abcdABC1234") == true
   assert isAlphaNumeric("a") == true
@@ -203,10 +210,50 @@ proc testCountLines =
   assertCountLines("\nabc\n123")
   assertCountLines("\nabc\n123\n")
 
+proc testParseInts =
+  # binary
+  assert "0b1111".parseBinInt == 15
+  assert "0B1111".parseBinInt == 15
+  assert "1111".parseBinInt == 15
+  assert "1110".parseBinInt == 14
+  assert "1_1_1_1".parseBinInt == 15
+  assert "0b1_1_1_1".parseBinInt == 15
+  rejectParse "".parseBinInt
+  rejectParse "_".parseBinInt
+  rejectParse "0b".parseBinInt
+  rejectParse "0b1234".parseBinInt
+  # hex
+  assert "0x72".parseHexInt == 114
+  assert "0X72".parseHexInt == 114
+  assert "#72".parseHexInt == 114
+  assert "72".parseHexInt == 114
+  assert "FF".parseHexInt == 255
+  assert "ff".parseHexInt == 255
+  assert "fF".parseHexInt == 255  
+  assert "0x7_2".parseHexInt == 114
+  rejectParse "".parseHexInt
+  rejectParse "_".parseHexInt
+  rejectParse "0x".parseHexInt
+  rejectParse "0xFFG".parseHexInt
+  rejectParse "reject".parseHexInt
+  # octal
+  assert "0o17".parseOctInt == 15
+  assert "0O17".parseOctInt == 15
+  assert "17".parseOctInt == 15
+  assert "10".parseOctInt == 8
+  assert "0o1_0_0".parseOctInt == 64
+  rejectParse "".parseOctInt
+  rejectParse "_".parseOctInt
+  rejectParse "0o".parseOctInt
+  rejectParse "9".parseOctInt
+  rejectParse "0o9".parseOctInt
+  rejectParse "reject".parseOctInt
+
 testDelete()
 testFind()
 testRFind()
 testCountLines()
+testParseInts()
 
 assert(insertSep($1000_000) == "1_000_000")
 assert(insertSep($232) == "232")