diff options
-rw-r--r-- | changelog.md | 7 | ||||
-rw-r--r-- | lib/pure/parseutils.nim | 38 | ||||
-rw-r--r-- | lib/pure/strutils.nim | 70 | ||||
-rw-r--r-- | tests/stdlib/tstrutil.nim | 49 |
4 files changed, 111 insertions, 53 deletions
diff --git a/changelog.md b/changelog.md index 959990900..8919cf702 100644 --- a/changelog.md +++ b/changelog.md @@ -48,6 +48,10 @@ - For string inputs, ``strutils.isUpperAscii`` and ``strutils.isLowerAscii`` now require a second mandatory parameter ``skipNonAlpha``. +- The procs ``parseHexInt`` and ``parseOctInt`` now fail on empty strings + and strings containing only valid prefixes, e.g. "0x" for hex integers. + + #### Breaking changes in the compiler - The undocumented ``#? braces`` parsing mode was removed. @@ -72,6 +76,8 @@ - Added the procs ``math.floorMod`` and ``math.floorDiv`` for floor based integer division. - Added the procs ``rationals.`div```, ``rationals.`mod```, ``rationals.floorDiv`` and ``rationals.floorMod`` for rationals. - Added the proc ``math.prod`` for product of elements in openArray. +- Added the proc ``parseBinInt`` to parse a binary integer from a string, which returns the value. +- ``parseOct`` and ``parseBin`` in parseutils now also support the ``maxLen`` argument similar to ``parseHexInt`` ### Library changes @@ -100,7 +106,6 @@ - Added the parameter ``val`` for the ``CritBitTree[T].incl`` proc. - The proc ``tgamma`` was renamed to ``gamma``. ``tgamma`` is deprecated. - ### Language additions - Dot calls combined with explicit generic instantiations can now be written diff --git a/lib/pure/parseutils.nim b/lib/pure/parseutils.nim index d54f1454b..e633d8cf7 100644 --- a/lib/pure/parseutils.nim +++ b/lib/pure/parseutils.nim @@ -47,12 +47,14 @@ proc parseHex*(s: string, number: var int, start = 0; maxLen = 0): int {. ## discard parseHex("0x38", value) ## assert value == -200 ## - ## If 'maxLen==0' the length of the hexadecimal number has no - ## upper bound. Not more than ```maxLen`` characters are parsed. + ## If ``maxLen == 0`` the length of the hexadecimal number has no upper bound. + ## Else no more than ``start + maxLen`` characters are parsed, up to the + ## length of the string. var i = start var foundDigit = false - let last = if maxLen == 0: s.len else: i+maxLen - if i+1 < last and s[i] == '0' and (s[i+1] == 'x' or s[i+1] == 'X'): inc(i, 2) + # get last index based on minimum `start + maxLen` or `s.len` + let last = min(s.len, if maxLen == 0: s.len else: i+maxLen) + if i+1 < last and s[i] == '0' and (s[i+1] in {'x', 'X'}): inc(i, 2) elif i < last and s[i] == '#': inc(i) while i < last: case s[i] @@ -70,14 +72,20 @@ proc parseHex*(s: string, number: var int, start = 0; maxLen = 0): int {. inc(i) if foundDigit: result = i-start -proc parseOct*(s: string, number: var int, start = 0): int {. +proc parseOct*(s: string, number: var int, start = 0, maxLen = 0): int {. rtl, extern: "npuParseOct", noSideEffect.} = - ## parses an octal number and stores its value in ``number``. Returns + ## Parses an octal number and stores its value in ``number``. Returns ## the number of the parsed characters or 0 in case of an error. + ## + ## If ``maxLen == 0`` the length of the octal number has no upper bound. + ## Else no more than ``start + maxLen`` characters are parsed, up to the + ## length of the string. var i = start var foundDigit = false - if i+1 < s.len and s[i] == '0' and (s[i+1] == 'o' or s[i+1] == 'O'): inc(i, 2) - while i < s.len: + # get last index based on minimum `start + maxLen` or `s.len` + let last = min(s.len, if maxLen == 0: s.len else: i+maxLen) + if i+1 < last and s[i] == '0' and (s[i+1] in {'o', 'O'}): inc(i, 2) + while i < last: case s[i] of '_': discard of '0'..'7': @@ -87,14 +95,20 @@ proc parseOct*(s: string, number: var int, start = 0): int {. inc(i) if foundDigit: result = i-start -proc parseBin*(s: string, number: var int, start = 0): int {. +proc parseBin*(s: string, number: var int, start = 0, maxLen = 0): int {. rtl, extern: "npuParseBin", noSideEffect.} = - ## parses an binary number and stores its value in ``number``. Returns + ## Parses an binary number and stores its value in ``number``. Returns ## the number of the parsed characters or 0 in case of an error. + ## + ## If ``maxLen == 0`` the length of the binary number has no upper bound. + ## Else no more than ``start + maxLen`` characters are parsed, up to the + ## length of the string. var i = start var foundDigit = false - if i+1 < s.len and s[i] == '0' and (s[i+1] == 'b' or s[i+1] == 'B'): inc(i, 2) - while i < s.len: + # get last index based on minimum `start + maxLen` or `s.len` + let last = min(s.len, if maxLen == 0: s.len else: i+maxLen) + if i+1 < last and s[i] == '0' and (s[i+1] in {'b', 'B'}): inc(i, 2) + while i < last: case s[i] of '_': discard of '0'..'1': diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim index bea0a0243..5de013c26 100644 --- a/lib/pure/strutils.nim +++ b/lib/pure/strutils.nim @@ -844,7 +844,7 @@ proc parseInt*(s: string): int {.noSideEffect, procvar, ## Parses a decimal integer value contained in `s`. ## ## If `s` is not a valid integer, `ValueError` is raised. - var L = parseutils.parseInt(s, result, 0) + let L = parseutils.parseInt(s, result, 0) if L != s.len or L == 0: raise newException(ValueError, "invalid integer: " & s) @@ -853,7 +853,7 @@ proc parseBiggestInt*(s: string): BiggestInt {.noSideEffect, procvar, ## Parses a decimal integer value contained in `s`. ## ## If `s` is not a valid integer, `ValueError` is raised. - var L = parseutils.parseBiggestInt(s, result, 0) + let L = parseutils.parseBiggestInt(s, result, 0) if L != s.len or L == 0: raise newException(ValueError, "invalid integer: " & s) @@ -862,7 +862,7 @@ proc parseUInt*(s: string): uint {.noSideEffect, procvar, ## Parses a decimal unsigned integer value contained in `s`. ## ## If `s` is not a valid integer, `ValueError` is raised. - var L = parseutils.parseUInt(s, result, 0) + let L = parseutils.parseUInt(s, result, 0) if L != s.len or L == 0: raise newException(ValueError, "invalid unsigned integer: " & s) @@ -871,7 +871,7 @@ proc parseBiggestUInt*(s: string): BiggestUInt {.noSideEffect, procvar, ## Parses a decimal unsigned integer value contained in `s`. ## ## If `s` is not a valid integer, `ValueError` is raised. - var L = parseutils.parseBiggestUInt(s, result, 0) + let L = parseutils.parseBiggestUInt(s, result, 0) if L != s.len or L == 0: raise newException(ValueError, "invalid unsigned integer: " & s) @@ -880,33 +880,42 @@ proc parseFloat*(s: string): float {.noSideEffect, procvar, ## Parses a decimal floating point value contained in `s`. If `s` is not ## a valid floating point number, `ValueError` is raised. ``NAN``, ## ``INF``, ``-INF`` are also supported (case insensitive comparison). - var L = parseutils.parseFloat(s, result, 0) + let L = parseutils.parseFloat(s, result, 0) if L != s.len or L == 0: raise newException(ValueError, "invalid float: " & s) +proc parseBinInt*(s: string): int {.noSideEffect, procvar, + rtl, extern: "nsuParseBinInt".} = + ## Parses a binary integer value contained in `s`. + ## + ## If `s` is not a valid binary integer, `ValueError` is raised. `s` can have + ## one of the following optional prefixes: ``0b``, ``0B``. Underscores within + ## `s` are ignored. + let L = parseutils.parseBin(s, result, 0) + if L != s.len or L == 0: + raise newException(ValueError, "invalid binary integer: " & s) + +proc parseOctInt*(s: string): int {.noSideEffect, + rtl, extern: "nsuParseOctInt".} = + ## Parses an octal integer value contained in `s`. + ## + ## If `s` is not a valid oct integer, `ValueError` is raised. `s` can have one + ## of the following optional prefixes: ``0o``, ``0O``. Underscores within + ## `s` are ignored. + let L = parseutils.parseOct(s, result, 0) + if L != s.len or L == 0: + raise newException(ValueError, "invalid oct integer: " & s) + proc parseHexInt*(s: string): int {.noSideEffect, procvar, rtl, extern: "nsuParseHexInt".} = ## Parses a hexadecimal integer value contained in `s`. ## - ## If `s` is not a valid integer, `ValueError` is raised. `s` can have one + ## If `s` is not a valid hex integer, `ValueError` is raised. `s` can have one ## of the following optional prefixes: ``0x``, ``0X``, ``#``. Underscores ## within `s` are ignored. - var i = 0 - if i+1 < s.len and s[i] == '0' and (s[i+1] == 'x' or s[i+1] == 'X'): inc(i, 2) - elif i < s.len and s[i] == '#': inc(i) - while i < s.len: - case s[i] - of '_': inc(i) - of '0'..'9': - result = result shl 4 or (ord(s[i]) - ord('0')) - inc(i) - of 'a'..'f': - result = result shl 4 or (ord(s[i]) - ord('a') + 10) - inc(i) - of 'A'..'F': - result = result shl 4 or (ord(s[i]) - ord('A') + 10) - inc(i) - else: raise newException(ValueError, "invalid integer: " & s) + let L = parseutils.parseHex(s, result, 0) + if L != s.len or L == 0: + raise newException(ValueError, "invalid hex integer: " & s) proc generateHexCharToValueMap(): string = ## Generate a string to map a hex digit to uint value @@ -1616,23 +1625,6 @@ proc delete*(s: var string, first, last: int) {.noSideEffect, inc(j) setLen(s, newLen) -proc parseOctInt*(s: string): int {.noSideEffect, - rtl, extern: "nsuParseOctInt".} = - ## Parses an octal integer value contained in `s`. - ## - ## If `s` is not a valid integer, `ValueError` is raised. `s` can have one - ## of the following optional prefixes: ``0o``, ``0O``. Underscores within - ## `s` are ignored. - var i = 0 - if i+1 < s.len and s[i] == '0' and (s[i+1] == 'o' or s[i+1] == 'O'): inc(i, 2) - while i < s.len: - case s[i] - of '_': inc(i) - of '0'..'7': - result = result shl 3 or (ord(s[i]) - ord('0')) - inc(i) - else: raise newException(ValueError, "invalid integer: " & s) - proc toOct*(x: BiggestInt, len: Positive): string {.noSideEffect, rtl, extern: "nsuToOct".} = ## Converts `x` into its octal representation. diff --git a/tests/stdlib/tstrutil.nim b/tests/stdlib/tstrutil.nim index 6f78a91ac..4d4081d39 100644 --- a/tests/stdlib/tstrutil.nim +++ b/tests/stdlib/tstrutil.nim @@ -7,6 +7,14 @@ discard """ import strutils +import macros + +template rejectParse(e) = + try: + discard e + raise newException(AssertionError, "This was supposed to fail: $#!" % astToStr(e)) + except ValueError: discard + proc testStrip() = write(stdout, strip(" ha ")) @@ -148,7 +156,6 @@ proc testDelete = delete(s, 0, 0) assert s == "1236789ABCDEFG" - proc testIsAlphaNumeric = assert isAlphaNumeric("abcdABC1234") == true assert isAlphaNumeric("a") == true @@ -203,10 +210,50 @@ proc testCountLines = assertCountLines("\nabc\n123") assertCountLines("\nabc\n123\n") +proc testParseInts = + # binary + assert "0b1111".parseBinInt == 15 + assert "0B1111".parseBinInt == 15 + assert "1111".parseBinInt == 15 + assert "1110".parseBinInt == 14 + assert "1_1_1_1".parseBinInt == 15 + assert "0b1_1_1_1".parseBinInt == 15 + rejectParse "".parseBinInt + rejectParse "_".parseBinInt + rejectParse "0b".parseBinInt + rejectParse "0b1234".parseBinInt + # hex + assert "0x72".parseHexInt == 114 + assert "0X72".parseHexInt == 114 + assert "#72".parseHexInt == 114 + assert "72".parseHexInt == 114 + assert "FF".parseHexInt == 255 + assert "ff".parseHexInt == 255 + assert "fF".parseHexInt == 255 + assert "0x7_2".parseHexInt == 114 + rejectParse "".parseHexInt + rejectParse "_".parseHexInt + rejectParse "0x".parseHexInt + rejectParse "0xFFG".parseHexInt + rejectParse "reject".parseHexInt + # octal + assert "0o17".parseOctInt == 15 + assert "0O17".parseOctInt == 15 + assert "17".parseOctInt == 15 + assert "10".parseOctInt == 8 + assert "0o1_0_0".parseOctInt == 64 + rejectParse "".parseOctInt + rejectParse "_".parseOctInt + rejectParse "0o".parseOctInt + rejectParse "9".parseOctInt + rejectParse "0o9".parseOctInt + rejectParse "reject".parseOctInt + testDelete() testFind() testRFind() testCountLines() +testParseInts() assert(insertSep($1000_000) == "1_000_000") assert(insertSep($232) == "232") |