diff options
Diffstat (limited to 'lib/pure/parseutils.nim')
-rw-r--r-- | lib/pure/parseutils.nim | 124 |
1 files changed, 96 insertions, 28 deletions
diff --git a/lib/pure/parseutils.nim b/lib/pure/parseutils.nim index 2bb23c626..2ca255fa0 100644 --- a/lib/pure/parseutils.nim +++ b/lib/pure/parseutils.nim @@ -12,29 +12,28 @@ ## ## To unpack raw bytes look at the `streams <streams.html>`_ module. ## -## .. code-block:: nim -## :test: +## ```nim test +## let logs = @["2019-01-10: OK_", "2019-01-11: FAIL_", "2019-01: aaaa"] +## var outp: seq[string] ## -## let logs = @["2019-01-10: OK_", "2019-01-11: FAIL_", "2019-01: aaaa"] -## var outp: seq[string] +## for log in logs: +## var res: string +## if parseUntil(log, res, ':') == 10: # YYYY-MM-DD == 10 +## outp.add(res & " - " & captureBetween(log, ' ', '_')) +## doAssert outp == @["2019-01-10 - OK", "2019-01-11 - FAIL"] +## ``` ## -## for log in logs: -## var res: string -## if parseUntil(log, res, ':') == 10: # YYYY-MM-DD == 10 -## outp.add(res & " - " & captureBetween(log, ' ', '_')) -## doAssert outp == @["2019-01-10 - OK", "2019-01-11 - FAIL"] +## ```nim test +## from std/strutils import Digits, parseInt ## -## .. code-block:: nim -## :test: -## from std/strutils import Digits, parseInt -## -## let -## input1 = "2019 school start" -## input2 = "3 years back" -## startYear = input1[0 .. skipWhile(input1, Digits)-1] # 2019 -## yearsBack = input2[0 .. skipWhile(input2, Digits)-1] # 3 -## examYear = parseInt(startYear) + parseInt(yearsBack) -## doAssert "Examination is in " & $examYear == "Examination is in 2022" +## let +## input1 = "2019 school start" +## input2 = "3 years back" +## startYear = input1[0 .. skipWhile(input1, Digits)-1] # 2019 +## yearsBack = input2[0 .. skipWhile(input2, Digits)-1] # 3 +## examYear = parseInt(startYear) + parseInt(yearsBack) +## doAssert "Examination is in " & $examYear == "Examination is in 2022" +## ``` ## ## **See also:** ## * `strutils module<strutils.html>`_ for combined and identical parsing proc's @@ -461,6 +460,8 @@ proc parseBiggestInt*(s: openArray[char], number: var BiggestInt): int {. var res: BiggestInt doAssert parseBiggestInt("9223372036854775807", res) == 19 doAssert res == 9223372036854775807 + doAssert parseBiggestInt("-2024_05_09", res) == 11 + doAssert res == -20240509 var res = BiggestInt(0) # use 'res' for exception safety (don't write to 'number' in case of an # overflow exception): @@ -475,10 +476,8 @@ proc parseInt*(s: openArray[char], number: var int): int {. ## `ValueError` is raised if the parsed integer is out of the valid range. runnableExamples: var res: int - doAssert parseInt("2019", res, 0) == 4 - doAssert res == 2019 - doAssert parseInt("2019", res, 2) == 2 - doAssert res == 19 + doAssert parseInt("-2024_05_02", res) == 11 + doAssert res == -20240502 var res = BiggestInt(0) result = parseBiggestInt(s, res) when sizeof(int) <= 4: @@ -592,6 +591,71 @@ proc parseFloat*(s: openArray[char], number: var float): int {. if result != 0: number = bf +func toLowerAscii(c: char): char = + if c in {'A'..'Z'}: char(uint8(c) xor 0b0010_0000'u8) else: c + +func parseSize*(s: openArray[char], size: var int64, alwaysBin=false): int = + ## Parse a size qualified by binary or metric units into `size`. This format + ## is often called "human readable". Result is the number of processed chars + ## or 0 on parse errors and size is rounded to the nearest integer. Trailing + ## garbage like "/s" in "1k/s" is allowed and detected by `result < s.len`. + ## + ## To simplify use, following non-rare wild conventions, and since fractional + ## data like milli-bytes is so rare, unit matching is case-insensitive but for + ## the 'i' distinguishing binary-metric from metric (which cannot be 'I'). + ## + ## An optional trailing 'B|b' is ignored but processed. I.e., you must still + ## know if units are bytes | bits or infer this fact via the case of s[^1] (if + ## users can even be relied upon to use 'B' for byte and 'b' for bit or have + ## that be s[^1]). + ## + ## If `alwaysBin==true` then scales are always binary-metric, but e.g. "KiB" + ## is still accepted for clarity. If the value would exceed the range of + ## `int64`, `size` saturates to `int64.high`. Supported metric prefix chars + ## include k, m, g, t, p, e, z, y (but z & y saturate unless the number is a + ## small fraction). + ## + ## **See also:** + ## * https://en.wikipedia.org/wiki/Binary_prefix + ## * `formatSize module<strutils.html>`_ for formatting + runnableExamples: + var res: int64 # caller must still know if 'b' refers to bytes|bits + doAssert parseSize("10.5 MB", res) == 7 + doAssert res == 10_500_000 # decimal metric Mega prefix + doAssert parseSize("64 mib", res) == 6 + doAssert res == 67108864 # 64 shl 20 + doAssert parseSize("1G/h", res, true) == 2 # '/' stops parse + doAssert res == 1073741824 # 1 shl 30, forced binary metric + const prefix = "b" & "kmgtpezy" # byte|bit & lowCase metric-ish prefixes + const scaleM = [1.0, 1e3, 1e6, 1e9, 1e12, 1e15, 1e18, 1e21, 1e24] # 10^(3*idx) + const scaleB = [1.0, 1024, 1048576, 1073741824, 1099511627776.0, # 2^(10*idx) + 1125899906842624.0, 1152921504606846976.0, # ldexp? + 1.180591620717411303424e21, 1.208925819614629174706176e24] + var number: float + var scale = 1.0 + result = parseFloat(s, number) + if number < 0: # While parseFloat accepts negatives .. + result = 0 #.. we do not since sizes cannot be < 0 + if result > 0: + let start = result # Save spot to maybe unwind white to EOS + while result < s.len and s[result] in Whitespace: + inc result + if result < s.len: # Illegal starting char => unity + if (let si = prefix.find(s[result].toLowerAscii); si >= 0): + inc result # Now parse the scale + scale = if alwaysBin: scaleB[si] else: scaleM[si] + if result < s.len and s[result] == 'i': + scale = scaleB[si] # Switch from default to binary-metric + inc result + if result < s.len and s[result].toLowerAscii == 'b': + inc result # Skip optional '[bB]' + else: # Unwind result advancement when there.. + result = start #..is no unit to the end of `s`. + var sizeF = number * scale + 0.5 # Saturate to int64.high when too big + size = if sizeF > 9223372036854774784.0: int64.high else: sizeF.int64 +# Above constant=2^63-1024 avoids C UB; github.com/nim-lang/Nim/issues/20102 or +# stackoverflow.com/questions/20923556/math-pow2-63-1-math-pow2-63-512-is-true + type InterpolatedKind* = enum ## Describes for `interpolatedFragments` ## which part of the interpolated string is @@ -928,6 +992,10 @@ proc parseBiggestInt*(s: string, number: var BiggestInt, start = 0): int {.noSid var res: BiggestInt doAssert parseBiggestInt("9223372036854775807", res, 0) == 19 doAssert res == 9223372036854775807 + doAssert parseBiggestInt("-2024_05_09", res) == 11 + doAssert res == -20240509 + doAssert parseBiggestInt("-2024_05_02", res, 7) == 4 + doAssert res == 502 parseBiggestInt(s.toOpenArray(start, s.high), number) proc parseInt*(s: string, number: var int, start = 0): int {.noSideEffect, raises: [ValueError].} = @@ -936,10 +1004,10 @@ proc parseInt*(s: string, number: var int, start = 0): int {.noSideEffect, raise ## `ValueError` is raised if the parsed integer is out of the valid range. runnableExamples: var res: int - doAssert parseInt("2019", res, 0) == 4 - doAssert res == 2019 - doAssert parseInt("2019", res, 2) == 2 - doAssert res == 19 + doAssert parseInt("-2024_05_02", res) == 11 + doAssert res == -20240502 + doAssert parseInt("-2024_05_02", res, 7) == 4 + doAssert res == 502 parseInt(s.toOpenArray(start, s.high), number) |