1 files changed, 227 insertions, 74 deletions
diff --git a/lib/pure/parseutils.nim b/lib/pure/parseutils.nim
index fb4bc19af..ba09347a2 100644
--- a/lib/pure/parseutils.nim
+++ b/lib/pure/parseutils.nim
@@ -7,9 +7,46 @@
 #    distribution, for details about the copyright.
 #
 
-## This module contains helpers for parsing tokens, numbers, identifiers, etc.
+## This module contains helpers for parsing tokens, numbers, integers, floats,
+## identifiers, etc.
 ##
 ## To unpack raw bytes look at the `streams <streams.html>`_ module.
+##
+##
+## .. code-block::
+##    import parseutils
+##
+##    let logs = @["2019-01-10: OK_", "2019-01-11: FAIL_", "2019-01: aaaa"]
+##
+##    for log in logs:
+##      var res: string
+##      if parseUntil(log, res, ':') == 10: # YYYY-MM-DD == 10
+##        echo res & " - " & captureBetween(log, ' ', '_')
+##        # => 2019-01-10 - OK
+##
+##
+## .. code-block::
+##    import parseutils
+##    from strutils import Digits, parseInt
+##
+##    let userInput1 = "2019 school start"
+##    let userInput2 = "3 years back"
+##
+##    let startYear = input1[0..skipWhile(input1, Digits)-1] # 2019
+##    let yearsBack = input2[0..skipWhile(input2, Digits)-1] # 3
+##
+##    echo "Examination is in " & $(parseInt(startYear) + parseInt(yearsBack))
+##
+##
+## **See also:**
+## * `strutils module<strutils.html>`_ for combined and identical parsing proc's
+## * `json module<json.html>`_ for a JSON parser
+## * `parsecfg module<parsecfg.html>`_ for a configuration file parser
+## * `parsecsv module<parsecsv.html>`_ for a simple CSV (comma separated value) parser
+## * `parseopt module<parseopt.html>`_ for a command line parser
+## * `parsexml module<parsexml.html>`_ for a XML / HTML parser
+## * `other parsers<lib.html#pure-libraries-parsers>`_ for other parsers
+
 
 {.deadCodeElim: on.}  # dce option deprecated
 
@@ -35,21 +72,20 @@ proc parseHex*(s: string, number: var int, start = 0; maxLen = 0): int {.
   ## proc is sensitive to the already existing value of ``number`` and will
   ## likely not do what you want unless you make sure ``number`` is zero. You
   ## can use this feature to *chain* calls, though the result int will quickly
-  ## overflow. Example:
-  ##
-  ## .. code-block:: nim
-  ##   var value = 0
-  ##   discard parseHex("0x38", value)
-  ##   assert value == 56
-  ##   discard parseHex("0x34", value)
-  ##   assert value == 56 * 256 + 52
-  ##   value = -1
-  ##   discard parseHex("0x38", value)
-  ##   assert value == -200
+  ## overflow.
   ##
   ## If ``maxLen == 0`` the length of the hexadecimal number has no upper bound.
   ## Else no more than ``start + maxLen`` characters are parsed, up to the
   ## length of the string.
+  runnableExamples:
+    var value = 0
+    discard parseHex("0x38", value)
+    assert value == 56
+    discard parseHex("0x34", value)
+    assert value == 56 * 256 + 52
+    value = -1
+    discard parseHex("0x38", value)
+    assert value == -200
   var i = start
   var foundDigit = false
   # get last index based on minimum `start + maxLen` or `s.len`
@@ -80,6 +116,11 @@ proc parseOct*(s: string, number: var int, start = 0, maxLen = 0): int  {.
   ## If ``maxLen == 0`` the length of the octal number has no upper bound.
   ## Else no more than ``start + maxLen`` characters are parsed, up to the
   ## length of the string.
+  runnableExamples:
+    var res: int
+    doAssert parseOct("12", res) == 2
+    doAssert res == 10
+    doAssert parseOct("9", res) == 0
   var i = start
   var foundDigit = false
   # get last index based on minimum `start + maxLen` or `s.len`
@@ -95,7 +136,7 @@ proc parseOct*(s: string, number: var int, start = 0, maxLen = 0): int  {.
     inc(i)
   if foundDigit: result = i-start
 
-proc parseBin*(s: string, number: var int, start = 0, maxLen = 0): int  {.
+proc parseBin*(s: string, number: var int, start = 0, maxLen = 0): int {.
   rtl, extern: "npuParseBin", noSideEffect.} =
   ## Parses an binary number and stores its value in ``number``. Returns
   ## the number of the parsed characters or 0 in case of an error.
@@ -103,6 +144,10 @@ proc parseBin*(s: string, number: var int, start = 0, maxLen = 0): int  {.
   ## If ``maxLen == 0`` the length of the binary number has no upper bound.
   ## Else no more than ``start + maxLen`` characters are parsed, up to the
   ## length of the string.
+  runnableExamples:
+    var res: int
+    doAssert parseBin("010011100110100101101101", res) == 24
+    doAssert parseBin("3", res) == 0
   var i = start
   var foundDigit = false
   # get last index based on minimum `start + maxLen` or `s.len`
@@ -119,8 +164,16 @@ proc parseBin*(s: string, number: var int, start = 0, maxLen = 0): int  {.
   if foundDigit: result = i-start
 
 proc parseIdent*(s: string, ident: var string, start = 0): int =
-  ## parses an identifier and stores it in ``ident``. Returns
+  ## Parses an identifier and stores it in ``ident``. Returns
   ## the number of the parsed characters or 0 in case of an error.
+  runnableExamples:
+    var res: string
+    doAssert parseIdent("Hello World", res, 0) == 5
+    doAssert res == "Hello"
+    doAssert parseIdent("Hello World", res, 1) == 4
+    doAssert res == "ello"
+    doAssert parseIdent("Hello World", res, 6) == 5
+    doAssert res == "World"
   var i = start
   if i < s.len and s[i] in IdentStartChars:
     inc(i)
@@ -129,8 +182,13 @@ proc parseIdent*(s: string, ident: var string, start = 0): int =
     result = i-start
 
 proc parseIdent*(s: string, start = 0): string =
-  ## parses an identifier and returns it or an empty string in
+  ## Parses an identifier and returns it or an empty string in
   ## case of an error.
+  runnableExamples:
+    doAssert parseIdent("Hello World", 0) == "Hello"
+    doAssert parseIdent("Hello World", 1) == "ello"
+    doAssert parseIdent("Hello World", 5) == ""
+    doAssert parseIdent("Hello World", 6) == "World"
   result = ""
   var i = start
   if i < s.len and s[i] in IdentStartChars:
@@ -138,33 +196,35 @@ proc parseIdent*(s: string, start = 0): string =
     while i < s.len and s[i] in IdentChars: inc(i)
     result = substr(s, start, i-1)
 
-proc parseToken*(s: string, token: var string, validChars: set[char],
-                 start = 0): int {.inline, deprecated.} =
-  ## parses a token and stores it in ``token``. Returns
-  ## the number of the parsed characters or 0 in case of an error. A token
-  ## consists of the characters in `validChars`.
-  ##
-  ## **Deprecated since version 0.8.12**: Use ``parseWhile`` instead.
-  var i = start
-  while i < s.len and s[i] in validChars: inc(i)
-  result = i-start
-  token = substr(s, start, i-1)
-
 proc skipWhitespace*(s: string, start = 0): int {.inline.} =
-  ## skips the whitespace starting at ``s[start]``. Returns the number of
+  ## Skips the whitespace starting at ``s[start]``. Returns the number of
   ## skipped characters.
+  runnableExamples:
+    doAssert skipWhitespace("Hello World", 0) == 0
+    doAssert skipWhitespace(" Hello World", 0) == 1
+    doAssert skipWhitespace("Hello World", 5) == 1
+    doAssert skipWhitespace("Hello  World", 5) == 2
   while start+result < s.len and s[start+result] in Whitespace: inc(result)
 
 proc skip*(s, token: string, start = 0): int {.inline.} =
-  ## skips the `token` starting at ``s[start]``. Returns the length of `token`
+  ## Skips the `token` starting at ``s[start]``. Returns the length of `token`
   ## or 0 if there was no `token` at ``s[start]``.
+  runnableExamples:
+    doAssert skip("2019-01-22", "2019", 0) == 4
+    doAssert skip("2019-01-22", "19", 0) == 0
+    doAssert skip("2019-01-22", "19", 2) == 2
+    doAssert skip("CAPlow", "CAP", 0) == 3
+    doAssert skip("CAPlow", "cap", 0) == 0
   while start+result < s.len and result < token.len and
       s[result+start] == token[result]:
     inc(result)
   if result != token.len: result = 0
 
 proc skipIgnoreCase*(s, token: string, start = 0): int =
-  ## same as `skip` but case is ignored for token matching.
+  ## Same as `skip` but case is ignored for token matching.
+  runnableExamples:
+    doAssert skipIgnoreCase("CAPlow", "CAP", 0) == 3
+    doAssert skipIgnoreCase("CAPlow", "cap", 0) == 3
   while start+result < s.len and result < token.len and
       toLower(s[result+start]) == toLower(token[result]): inc(result)
   if result != token.len: result = 0
@@ -173,24 +233,45 @@ proc skipUntil*(s: string, until: set[char], start = 0): int {.inline.} =
   ## Skips all characters until one char from the set `until` is found
   ## or the end is reached.
   ## Returns number of characters skipped.
+  runnableExamples:
+    doAssert skipUntil("Hello World", {'W', 'e'}, 0) == 1
+    doAssert skipUntil("Hello World", {'W'}, 0) == 6
+    doAssert skipUntil("Hello World", {'W', 'd'}, 0) == 6
   while start+result < s.len and s[result+start] notin until: inc(result)
 
 proc skipUntil*(s: string, until: char, start = 0): int {.inline.} =
   ## Skips all characters until the char `until` is found
   ## or the end is reached.
   ## Returns number of characters skipped.
+  runnableExamples:
+    doAssert skipUntil("Hello World", 'o', 0) == 4
+    doAssert skipUntil("Hello World", 'o', 4) == 0
+    doAssert skipUntil("Hello World", 'W', 0) == 6
+    doAssert skipUntil("Hello World", 'w', 0) == 11
   while start+result < s.len and s[result+start] != until: inc(result)
 
 proc skipWhile*(s: string, toSkip: set[char], start = 0): int {.inline.} =
   ## Skips all characters while one char from the set `token` is found.
   ## Returns number of characters skipped.
+  runnableExamples:
+    doAssert skipWhile("Hello World", {'H', 'e'}) == 2
+    doAssert skipWhile("Hello World", {'e'}) == 0
+    doAssert skipWhile("Hello World", {'W', 'o', 'r'}, 6) == 3
   while start+result < s.len and s[result+start] in toSkip: inc(result)
 
 proc parseUntil*(s: string, token: var string, until: set[char],
                  start = 0): int {.inline.} =
-  ## parses a token and stores it in ``token``. Returns
+  ## Parses a token and stores it in ``token``. Returns
   ## the number of the parsed characters or 0 in case of an error. A token
   ## consists of the characters notin `until`.
+  runnableExamples:
+    var myToken: string
+    doAssert parseUntil("Hello World", myToken, {'W', 'o', 'r'}) == 4
+    doAssert myToken == "Hell"
+    doAssert parseUntil("Hello World", myToken, {'W', 'r'}) == 6
+    doAssert myToken == "Hello "
+    doAssert parseUntil("Hello World", myToken, {'W', 'r'}, 3) == 3
+    doAssert myToken == "lo "
   var i = start
   while i < s.len and s[i] notin until: inc(i)
   result = i-start
@@ -198,9 +279,17 @@ proc parseUntil*(s: string, token: var string, until: set[char],
 
 proc parseUntil*(s: string, token: var string, until: char,
                  start = 0): int {.inline.} =
-  ## parses a token and stores it in ``token``. Returns
+  ## Parses a token and stores it in ``token``. Returns
   ## the number of the parsed characters or 0 in case of an error. A token
   ## consists of any character that is not the `until` character.
+  runnableExamples:
+    var myToken: string
+    doAssert parseUntil("Hello World", myToken, 'W') == 6
+    doAssert myToken == "Hello "
+    doAssert parseUntil("Hello World", myToken, 'o') == 4
+    doAssert myToken == "Hell"
+    doAssert parseUntil("Hello World", myToken, 'o', 2) == 2
+    doAssert myToken == "ll"
   var i = start
   while i < s.len and s[i] != until: inc(i)
   result = i-start
@@ -208,9 +297,15 @@ proc parseUntil*(s: string, token: var string, until: char,
 
 proc parseUntil*(s: string, token: var string, until: string,
                  start = 0): int {.inline.} =
-  ## parses a token and stores it in ``token``. Returns
+  ## Parses a token and stores it in ``token``. Returns
   ## the number of the parsed characters or 0 in case of an error. A token
   ## consists of any character that comes before the `until`  token.
+  runnableExamples:
+    var myToken: string
+    doAssert parseUntil("Hello World", myToken, "Wor") == 6
+    doAssert myToken == "Hello "
+    doAssert parseUntil("Hello World", myToken, "Wor", 2) == 4
+    doAssert myToken == "llo "
   if until.len == 0:
     token.setLen(0)
     return 0
@@ -227,9 +322,15 @@ proc parseUntil*(s: string, token: var string, until: string,
 
 proc parseWhile*(s: string, token: var string, validChars: set[char],
                  start = 0): int {.inline.} =
-  ## parses a token and stores it in ``token``. Returns
+  ## Parses a token and stores it in ``token``. Returns
   ## the number of the parsed characters or 0 in case of an error. A token
   ## consists of the characters in `validChars`.
+  runnableExamples:
+    var myToken: string
+    doAssert parseWhile("Hello World", myToken, {'W', 'o', 'r'}, 0) == 0
+    doAssert myToken.len() == 0
+    doAssert parseWhile("Hello World", myToken, {'W', 'o', 'r'}, 6) == 3
+    doAssert myToken == "Wor"
   var i = start
   while i < s.len and s[i] in validChars: inc(i)
   result = i-start
@@ -238,12 +339,21 @@ proc parseWhile*(s: string, token: var string, validChars: set[char],
 proc captureBetween*(s: string, first: char, second = '\0', start = 0): string =
   ## Finds the first occurrence of ``first``, then returns everything from there
   ## up to ``second`` (if ``second`` is '\0', then ``first`` is used).
+  runnableExamples:
+    doAssert captureBetween("Hello World", 'e') == "llo World"
+    doAssert captureBetween("Hello World", 'e', 'r') == "llo Wo"
+    doAssert captureBetween("Hello World", 'l', start = 6) == "d"
   var i = skipUntil(s, first, start)+1+start
   result = ""
   discard s.parseUntil(result, if second == '\0': first else: second, i)
 
-{.push overflowChecks: on.}
-# this must be compiled with overflow checking turned on:
+proc integerOutOfRangeError() {.noinline.} =
+  raise newException(ValueError, "Parsed integer outside of valid range")
+
+# See #6752
+when defined(js):
+  {.push overflowChecks: off.}
+
 proc rawParseInt(s: string, b: var BiggestInt, start = 0): int =
   var
     sign: BiggestInt = -1
@@ -256,48 +366,67 @@ proc rawParseInt(s: string, b: var BiggestInt, start = 0): int =
   if i < s.len and s[i] in {'0'..'9'}:
     b = 0
     while i < s.len and s[i] in {'0'..'9'}:
-      b = b * 10 - (ord(s[i]) - ord('0'))
+      let c = ord(s[i]) - ord('0')
+      if b >= (low(BiggestInt) + c) div 10:
+        b = b * 10 - c
+      else:
+        integerOutOfRangeError()
       inc(i)
       while i < s.len and s[i] == '_': inc(i) # underscores are allowed and ignored
-    b = b * sign
-    result = i - start
-{.pop.} # overflowChecks
+    if sign == -1 and b == low(BiggestInt):
+      integerOutOfRangeError()
+    else:
+      b = b * sign
+      result = i - start
+
+when defined(js):
+  {.pop.} # overflowChecks: off
 
 proc parseBiggestInt*(s: string, number: var BiggestInt, start = 0): int {.
-  rtl, extern: "npuParseBiggestInt", noSideEffect.} =
-  ## parses an integer starting at `start` and stores the value into `number`.
+  rtl, extern: "npuParseBiggestInt", noSideEffect, raises: [ValueError].} =
+  ## Parses an integer starting at `start` and stores the value into `number`.
   ## Result is the number of processed chars or 0 if there is no integer.
-  ## `OverflowError` is raised if an overflow occurs.
+  ## `ValueError` is raised if the parsed integer is out of the valid range.
+  runnableExamples:
+    var res: BiggestInt
+    doAssert parseBiggestInt("9223372036854775807", res, 0) == 19
+    doAssert res == 9223372036854775807
   var res: BiggestInt
   # use 'res' for exception safety (don't write to 'number' in case of an
   # overflow exception):
   result = rawParseInt(s, res, start)
-  number = res
+  if result != 0:
+    number = res
 
 proc parseInt*(s: string, number: var int, start = 0): int {.
-  rtl, extern: "npuParseInt", noSideEffect.} =
-  ## parses an integer starting at `start` and stores the value into `number`.
+  rtl, extern: "npuParseInt", noSideEffect, raises: [ValueError].} =
+  ## Parses an integer starting at `start` and stores the value into `number`.
   ## Result is the number of processed chars or 0 if there is no integer.
-  ## `OverflowError` is raised if an overflow occurs.
+  ## `ValueError` is raised if the parsed integer is out of the valid range.
+  runnableExamples:
+    var res: int
+    doAssert parseInt("2019", res, 0) == 4
+    doAssert res == 2019
+    doAssert parseInt("2019", res, 2) == 2
+    doAssert res == 19
   var res: BiggestInt
   result = parseBiggestInt(s, res, start)
-  if (sizeof(int) <= 4) and
-      ((res < low(int)) or (res > high(int))):
-    raise newException(OverflowError, "overflow")
-  elif result != 0:
+  when sizeof(int) <= 4:
+    if res < low(int) or res > high(int):
+      integerOutOfRangeError()
+  if result != 0:
     number = int(res)
 
-proc parseSaturatedNatural*(s: string, b: var int, start = 0): int =
-  ## parses a natural number into ``b``. This cannot raise an overflow
+proc parseSaturatedNatural*(s: string, b: var int, start = 0): int {.
+  raises: [].}=
+  ## Parses a natural number into ``b``. This cannot raise an overflow
   ## error. ``high(int)`` is returned for an overflow.
   ## The number of processed character is returned.
   ## This is usually what you really want to use instead of `parseInt`:idx:.
-  ## Example:
-  ##
-  ## .. code-block:: nim
-  ##   var res = 0
-  ##   discard parseSaturatedNatural("848", res)
-  ##   doAssert res == 848
+  runnableExamples:
+    var res = 0
+    discard parseSaturatedNatural("848", res)
+    doAssert res == 848
   var i = start
   if i < s.len and s[i] == '+': inc(i)
   if i < s.len and s[i] in {'0'..'9'}:
@@ -312,12 +441,13 @@ proc parseSaturatedNatural*(s: string, b: var int, start = 0): int =
       while i < s.len and s[i] == '_': inc(i) # underscores are allowed and ignored
     result = i - start
 
-# overflowChecks doesn't work with BiggestUInt
 proc rawParseUInt(s: string, b: var BiggestUInt, start = 0): int =
   var
     res = 0.BiggestUInt
     prev = 0.BiggestUInt
     i = start
+  if i < s.len - 1 and s[i] == '-' and s[i + 1] in {'0'..'9'}:
+    integerOutOfRangeError()
   if i < s.len and s[i] == '+': inc(i) # Allow
   if i < s.len and s[i] in {'0'..'9'}:
     b = 0
@@ -325,56 +455,75 @@ proc rawParseUInt(s: string, b: var BiggestUInt, start = 0): int =
       prev = res
       res = res * 10 + (ord(s[i]) - ord('0')).BiggestUInt
       if prev > res:
-        return 0 # overflowChecks emulation
+        integerOutOfRangeError()
       inc(i)
       while i < s.len and s[i] == '_': inc(i) # underscores are allowed and ignored
     b = res
     result = i - start
 
 proc parseBiggestUInt*(s: string, number: var BiggestUInt, start = 0): int {.
-  rtl, extern: "npuParseBiggestUInt", noSideEffect.} =
-  ## parses an unsigned integer starting at `start` and stores the value
+  rtl, extern: "npuParseBiggestUInt", noSideEffect, raises: [ValueError].} =
+  ## Parses an unsigned integer starting at `start` and stores the value
   ## into `number`.
-  ## Result is the number of processed chars or 0 if there is no integer
-  ## or overflow detected.
+  ## `ValueError` is raised if the parsed integer is out of the valid range.
+  runnableExamples:
+    var res: BiggestUInt
+    doAssert parseBiggestUInt("12", res, 0) == 2
+    doAssert res == 12
+    doAssert parseBiggestUInt("1111111111111111111", res, 0) == 19
+    doAssert res == 1111111111111111111'u64
   var res: BiggestUInt
   # use 'res' for exception safety (don't write to 'number' in case of an
   # overflow exception):
   result = rawParseUInt(s, res, start)
-  number = res
+  if result != 0:
+    number = res
 
 proc parseUInt*(s: string, number: var uint, start = 0): int {.
-  rtl, extern: "npuParseUInt", noSideEffect.} =
-  ## parses an unsigned integer starting at `start` and stores the value
+  rtl, extern: "npuParseUInt", noSideEffect, raises: [ValueError].} =
+  ## Parses an unsigned integer starting at `start` and stores the value
   ## into `number`.
-  ## Result is the number of processed chars or 0 if there is no integer or
-  ## overflow detected.
+  ## `ValueError` is raised if the parsed integer is out of the valid range.
+  runnableExamples:
+    var res: uint
+    doAssert parseUInt("3450", res) == 4
+    doAssert res == 3450
+    doAssert parseUInt("3450", res, 2) == 2
+    doAssert res == 50
   var res: BiggestUInt
   result = parseBiggestUInt(s, res, start)
   when sizeof(BiggestUInt) > sizeof(uint) and sizeof(uint) <= 4:
     if res > 0xFFFF_FFFF'u64:
-      raise newException(OverflowError, "overflow")
+      integerOutOfRangeError()
   if result != 0:
     number = uint(res)
 
 proc parseBiggestFloat*(s: string, number: var BiggestFloat, start = 0): int {.
   magic: "ParseBiggestFloat", importc: "nimParseBiggestFloat", noSideEffect.}
-  ## parses a float starting at `start` and stores the value into `number`.
+  ## Parses a float starting at `start` and stores the value into `number`.
   ## Result is the number of processed chars or 0 if a parsing error
   ## occurred.
 
 proc parseFloat*(s: string, number: var float, start = 0): int {.
   rtl, extern: "npuParseFloat", noSideEffect.} =
-  ## parses a float starting at `start` and stores the value into `number`.
+  ## Parses a float starting at `start` and stores the value into `number`.
   ## Result is the number of processed chars or 0 if there occurred a parsing
   ## error.
+  runnableExamples:
+    var res: float
+    doAssert parseFloat("32", res, 0) == 2
+    doAssert res == 32.0
+    doAssert parseFloat("32.57", res, 0) == 5
+    doAssert res == 32.57
+    doAssert parseFloat("32.57", res, 3) == 2
+    doAssert res == 57.00
   var bf: BiggestFloat
   result = parseBiggestFloat(s, bf, start)
   if result != 0:
     number = bf
 
 type
-  InterpolatedKind* = enum   ## describes for `interpolatedFragments`
+  InterpolatedKind* = enum   ## Describes for `interpolatedFragments`
                              ## which part of the interpolated string is
                              ## yielded; for example in "str$$$var${expr}"
     ikStr,                   ## ``str`` part of the interpolated string
@@ -490,4 +639,8 @@ when isMainModule:
   doAssert(parseSaturatedNatural("1_000_000", value) == 9)
   doAssert value == 1_000_000
 
+  var i64Value: int64
+  discard parseBiggestInt("9223372036854775807", i64Value)
+  doAssert i64Value == 9223372036854775807
+
 {.pop.}