summary refs log tree commit diff stats
path: root/lib/pure/parseutils.nim
diff options
context:
space:
mode:
Diffstat (limited to 'lib/pure/parseutils.nim')
-rw-r--r--lib/pure/parseutils.nim124
1 files changed, 96 insertions, 28 deletions
diff --git a/lib/pure/parseutils.nim b/lib/pure/parseutils.nim
index 2bb23c626..2ca255fa0 100644
--- a/lib/pure/parseutils.nim
+++ b/lib/pure/parseutils.nim
@@ -12,29 +12,28 @@
 ##
 ## To unpack raw bytes look at the `streams <streams.html>`_ module.
 ##
-## .. code-block:: nim
-##    :test:
+##   ```nim test
+##   let logs = @["2019-01-10: OK_", "2019-01-11: FAIL_", "2019-01: aaaa"]
+##   var outp: seq[string]
 ##
-##    let logs = @["2019-01-10: OK_", "2019-01-11: FAIL_", "2019-01: aaaa"]
-##    var outp: seq[string]
+##   for log in logs:
+##     var res: string
+##     if parseUntil(log, res, ':') == 10: # YYYY-MM-DD == 10
+##       outp.add(res & " - " & captureBetween(log, ' ', '_'))
+##   doAssert outp == @["2019-01-10 - OK", "2019-01-11 - FAIL"]
+##   ```
 ##
-##    for log in logs:
-##      var res: string
-##      if parseUntil(log, res, ':') == 10: # YYYY-MM-DD == 10
-##        outp.add(res & " - " & captureBetween(log, ' ', '_'))
-##    doAssert outp == @["2019-01-10 - OK", "2019-01-11 - FAIL"]
+##   ```nim test
+##   from std/strutils import Digits, parseInt
 ##
-## .. code-block:: nim
-##    :test:
-##    from std/strutils import Digits, parseInt
-##
-##    let
-##      input1 = "2019 school start"
-##      input2 = "3 years back"
-##      startYear = input1[0 .. skipWhile(input1, Digits)-1] # 2019
-##      yearsBack = input2[0 .. skipWhile(input2, Digits)-1] # 3
-##      examYear = parseInt(startYear) + parseInt(yearsBack)
-##    doAssert "Examination is in " & $examYear == "Examination is in 2022"
+##   let
+##     input1 = "2019 school start"
+##     input2 = "3 years back"
+##     startYear = input1[0 .. skipWhile(input1, Digits)-1] # 2019
+##     yearsBack = input2[0 .. skipWhile(input2, Digits)-1] # 3
+##     examYear = parseInt(startYear) + parseInt(yearsBack)
+##   doAssert "Examination is in " & $examYear == "Examination is in 2022"
+##   ```
 ##
 ## **See also:**
 ## * `strutils module<strutils.html>`_ for combined and identical parsing proc's
@@ -461,6 +460,8 @@ proc parseBiggestInt*(s: openArray[char], number: var BiggestInt): int {.
     var res: BiggestInt
     doAssert parseBiggestInt("9223372036854775807", res) == 19
     doAssert res == 9223372036854775807
+    doAssert parseBiggestInt("-2024_05_09", res) == 11
+    doAssert res == -20240509
   var res = BiggestInt(0)
   # use 'res' for exception safety (don't write to 'number' in case of an
   # overflow exception):
@@ -475,10 +476,8 @@ proc parseInt*(s: openArray[char], number: var int): int {.
   ## `ValueError` is raised if the parsed integer is out of the valid range.
   runnableExamples:
     var res: int
-    doAssert parseInt("2019", res, 0) == 4
-    doAssert res == 2019
-    doAssert parseInt("2019", res, 2) == 2
-    doAssert res == 19
+    doAssert parseInt("-2024_05_02", res) == 11
+    doAssert res == -20240502
   var res = BiggestInt(0)
   result = parseBiggestInt(s, res)
   when sizeof(int) <= 4:
@@ -592,6 +591,71 @@ proc parseFloat*(s: openArray[char], number: var float): int {.
   if result != 0:
     number = bf
 
+func toLowerAscii(c: char): char =
+  if c in {'A'..'Z'}: char(uint8(c) xor 0b0010_0000'u8) else: c
+
+func parseSize*(s: openArray[char], size: var int64, alwaysBin=false): int =
+  ## Parse a size qualified by binary or metric units into `size`.  This format
+  ## is often called "human readable".  Result is the number of processed chars
+  ## or 0 on parse errors and size is rounded to the nearest integer.  Trailing
+  ## garbage like "/s" in "1k/s" is allowed and detected by `result < s.len`.
+  ##
+  ## To simplify use, following non-rare wild conventions, and since fractional
+  ## data like milli-bytes is so rare, unit matching is case-insensitive but for
+  ## the 'i' distinguishing binary-metric from metric (which cannot be 'I').
+  ##
+  ## An optional trailing 'B|b' is ignored but processed.  I.e., you must still
+  ## know if units are bytes | bits or infer this fact via the case of s[^1] (if
+  ## users can even be relied upon to use 'B' for byte and 'b' for bit or have
+  ## that be s[^1]).
+  ##
+  ## If `alwaysBin==true` then scales are always binary-metric, but e.g. "KiB"
+  ## is still accepted for clarity.  If the value would exceed the range of
+  ## `int64`, `size` saturates to `int64.high`.  Supported metric prefix chars
+  ## include k, m, g, t, p, e, z, y (but z & y saturate unless the number is a
+  ## small fraction).
+  ##
+  ## **See also:**
+  ## * https://en.wikipedia.org/wiki/Binary_prefix
+  ## * `formatSize module<strutils.html>`_ for formatting
+  runnableExamples:
+    var res: int64  # caller must still know if 'b' refers to bytes|bits
+    doAssert parseSize("10.5 MB", res) == 7
+    doAssert res == 10_500_000  # decimal metric Mega prefix
+    doAssert parseSize("64 mib", res) == 6
+    doAssert res == 67108864    # 64 shl 20
+    doAssert parseSize("1G/h", res, true) == 2 # '/' stops parse
+    doAssert res == 1073741824  # 1 shl 30, forced binary metric
+  const prefix = "b" & "kmgtpezy"       # byte|bit & lowCase metric-ish prefixes
+  const scaleM = [1.0, 1e3, 1e6, 1e9, 1e12, 1e15, 1e18, 1e21, 1e24] # 10^(3*idx)
+  const scaleB = [1.0, 1024, 1048576, 1073741824, 1099511627776.0,  # 2^(10*idx)
+                  1125899906842624.0, 1152921504606846976.0,        # ldexp?
+                  1.180591620717411303424e21, 1.208925819614629174706176e24]
+  var number: float
+  var scale = 1.0
+  result = parseFloat(s, number)
+  if number < 0:                        # While parseFloat accepts negatives ..
+    result = 0                          #.. we do not since sizes cannot be < 0
+  if result > 0:
+    let start = result                  # Save spot to maybe unwind white to EOS
+    while result < s.len and s[result] in Whitespace:
+      inc result
+    if result < s.len:                  # Illegal starting char => unity
+      if (let si = prefix.find(s[result].toLowerAscii); si >= 0):
+        inc result                      # Now parse the scale
+        scale = if alwaysBin: scaleB[si] else: scaleM[si]
+        if result < s.len and s[result] == 'i':
+          scale = scaleB[si]            # Switch from default to binary-metric
+          inc result
+        if result < s.len and s[result].toLowerAscii == 'b':
+          inc result                    # Skip optional '[bB]'
+    else:                               # Unwind result advancement when there..
+      result = start                    #..is no unit to the end of `s`.
+    var sizeF = number * scale + 0.5    # Saturate to int64.high when too big
+    size = if sizeF > 9223372036854774784.0: int64.high else: sizeF.int64
+# Above constant=2^63-1024 avoids C UB; github.com/nim-lang/Nim/issues/20102 or
+# stackoverflow.com/questions/20923556/math-pow2-63-1-math-pow2-63-512-is-true
+
 type
   InterpolatedKind* = enum ## Describes for `interpolatedFragments`
                            ## which part of the interpolated string is
@@ -928,6 +992,10 @@ proc parseBiggestInt*(s: string, number: var BiggestInt, start = 0): int {.noSid
     var res: BiggestInt
     doAssert parseBiggestInt("9223372036854775807", res, 0) == 19
     doAssert res == 9223372036854775807
+    doAssert parseBiggestInt("-2024_05_09", res) == 11
+    doAssert res == -20240509
+    doAssert parseBiggestInt("-2024_05_02", res, 7) == 4
+    doAssert res == 502
   parseBiggestInt(s.toOpenArray(start, s.high), number)
 
 proc parseInt*(s: string, number: var int, start = 0): int {.noSideEffect, raises: [ValueError].} =
@@ -936,10 +1004,10 @@ proc parseInt*(s: string, number: var int, start = 0): int {.noSideEffect, raise
   ## `ValueError` is raised if the parsed integer is out of the valid range.
   runnableExamples:
     var res: int
-    doAssert parseInt("2019", res, 0) == 4
-    doAssert res == 2019
-    doAssert parseInt("2019", res, 2) == 2
-    doAssert res == 19
+    doAssert parseInt("-2024_05_02", res) == 11
+    doAssert res == -20240502
+    doAssert parseInt("-2024_05_02", res, 7) == 4
+    doAssert res == 502
   parseInt(s.toOpenArray(start, s.high), number)