diff options
Diffstat (limited to 'lib/pure/strutils.nim')
-rw-r--r-- | lib/pure/strutils.nim | 488 |
1 files changed, 315 insertions, 173 deletions
diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim index cc0f474f4..dbb4db781 100644 --- a/lib/pure/strutils.nim +++ b/lib/pure/strutils.nim @@ -32,10 +32,6 @@ when defined(nimOldSplit): else: {.pragma: deprecatedSplit.} -type - CharSet* {.deprecated.} = set[char] # for compatibility with Nim -{.deprecated: [TCharSet: CharSet].} - const Whitespace* = {' ', '\t', '\v', '\r', '\l', '\f'} ## All the characters that count as whitespace. @@ -78,40 +74,40 @@ proc isAlphaAscii*(c: char): bool {.noSideEffect, procvar, return c in Letters proc isAlphaNumeric*(c: char): bool {.noSideEffect, procvar, - rtl, extern: "nsuIsAlphaNumericChar".}= + rtl, extern: "nsuIsAlphaNumericChar".} = ## Checks whether or not `c` is alphanumeric. ## ## This checks a-z, A-Z, 0-9 ASCII characters only. - return c in Letters or c in Digits + return c in Letters+Digits proc isDigit*(c: char): bool {.noSideEffect, procvar, - rtl, extern: "nsuIsDigitChar".}= + rtl, extern: "nsuIsDigitChar".} = ## Checks whether or not `c` is a number. ## ## This checks 0-9 ASCII characters only. return c in Digits proc isSpaceAscii*(c: char): bool {.noSideEffect, procvar, - rtl, extern: "nsuIsSpaceAsciiChar".}= + rtl, extern: "nsuIsSpaceAsciiChar".} = ## Checks whether or not `c` is a whitespace character. return c in Whitespace proc isLowerAscii*(c: char): bool {.noSideEffect, procvar, - rtl, extern: "nsuIsLowerAsciiChar".}= + rtl, extern: "nsuIsLowerAsciiChar".} = ## Checks whether or not `c` is a lower case character. ## ## This checks ASCII characters only. return c in {'a'..'z'} proc isUpperAscii*(c: char): bool {.noSideEffect, procvar, - rtl, extern: "nsuIsUpperAsciiChar".}= + rtl, extern: "nsuIsUpperAsciiChar".} = ## Checks whether or not `c` is an upper case character. ## ## This checks ASCII characters only. return c in {'A'..'Z'} proc isAlphaAscii*(s: string): bool {.noSideEffect, procvar, - rtl, extern: "nsuIsAlphaAsciiStr".}= + rtl, extern: "nsuIsAlphaAsciiStr".} = ## Checks whether or not `s` is alphabetical. ## ## This checks a-z, A-Z ASCII characters only. @@ -123,10 +119,10 @@ proc isAlphaAscii*(s: string): bool {.noSideEffect, procvar, result = true for c in s: - result = c.isAlphaAscii() and result + if not c.isAlphaAscii(): return false proc isAlphaNumeric*(s: string): bool {.noSideEffect, procvar, - rtl, extern: "nsuIsAlphaNumericStr".}= + rtl, extern: "nsuIsAlphaNumericStr".} = ## Checks whether or not `s` is alphanumeric. ## ## This checks a-z, A-Z, 0-9 ASCII characters only. @@ -138,10 +134,11 @@ proc isAlphaNumeric*(s: string): bool {.noSideEffect, procvar, result = true for c in s: - result = c.isAlphaNumeric() and result + if not c.isAlphaNumeric(): + return false proc isDigit*(s: string): bool {.noSideEffect, procvar, - rtl, extern: "nsuIsDigitStr".}= + rtl, extern: "nsuIsDigitStr".} = ## Checks whether or not `s` is a numeric value. ## ## This checks 0-9 ASCII characters only. @@ -153,10 +150,11 @@ proc isDigit*(s: string): bool {.noSideEffect, procvar, result = true for c in s: - result = c.isDigit() and result + if not c.isDigit(): + return false proc isSpaceAscii*(s: string): bool {.noSideEffect, procvar, - rtl, extern: "nsuIsSpaceAsciiStr".}= + rtl, extern: "nsuIsSpaceAsciiStr".} = ## Checks whether or not `s` is completely whitespace. ## ## Returns true if all characters in `s` are whitespace @@ -170,7 +168,7 @@ proc isSpaceAscii*(s: string): bool {.noSideEffect, procvar, return false proc isLowerAscii*(s: string): bool {.noSideEffect, procvar, - rtl, extern: "nsuIsLowerAsciiStr".}= + rtl, extern: "nsuIsLowerAsciiStr".} = ## Checks whether or not `s` contains all lower case characters. ## ## This checks ASCII characters only. @@ -185,7 +183,7 @@ proc isLowerAscii*(s: string): bool {.noSideEffect, procvar, true proc isUpperAscii*(s: string): bool {.noSideEffect, procvar, - rtl, extern: "nsuIsUpperAsciiStr".}= + rtl, extern: "nsuIsUpperAsciiStr".} = ## Checks whether or not `s` contains all upper case characters. ## ## This checks ASCII characters only. @@ -504,16 +502,15 @@ template splitCommon(s, sep, maxsplit, sepLen) = var last = 0 var splits = maxsplit - if len(s) > 0: - while last <= len(s): - var first = last - while last < len(s) and not stringHasSep(s, last, sep): - inc(last) - if splits == 0: last = len(s) - yield substr(s, first, last-1) - if splits == 0: break - dec(splits) - inc(last, sepLen) + while last <= len(s): + var first = last + while last < len(s) and not stringHasSep(s, last, sep): + inc(last) + if splits == 0: last = len(s) + yield substr(s, first, last-1) + if splits == 0: break + dec(splits) + inc(last, sepLen) template oldSplit(s, seps, maxsplit) = var last = 0 @@ -576,15 +573,46 @@ iterator split*(s: string, seps: set[char] = Whitespace, else: splitCommon(s, seps, maxsplit, 1) -iterator splitWhitespace*(s: string): string = - ## Splits at whitespace. - oldSplit(s, Whitespace, -1) +iterator splitWhitespace*(s: string, maxsplit: int = -1): string = + ## Splits the string ``s`` at whitespace stripping leading and trailing + ## whitespace if necessary. If ``maxsplit`` is specified and is positive, + ## no more than ``maxsplit`` splits is made. + ## + ## The following code: + ## + ## .. code-block:: nim + ## let s = " foo \t bar baz " + ## for ms in [-1, 1, 2, 3]: + ## echo "------ maxsplit = ", ms, ":" + ## for item in s.splitWhitespace(maxsplit=ms): + ## echo '"', item, '"' + ## + ## ...results in: + ## + ## .. code-block:: + ## ------ maxsplit = -1: + ## "foo" + ## "bar" + ## "baz" + ## ------ maxsplit = 1: + ## "foo" + ## "bar baz " + ## ------ maxsplit = 2: + ## "foo" + ## "bar" + ## "baz " + ## ------ maxsplit = 3: + ## "foo" + ## "bar" + ## "baz" + ## + oldSplit(s, Whitespace, maxsplit) -proc splitWhitespace*(s: string): seq[string] {.noSideEffect, +proc splitWhitespace*(s: string, maxsplit: int = -1): seq[string] {.noSideEffect, rtl, extern: "nsuSplitWhitespace".} = - ## The same as the `splitWhitespace <#splitWhitespace.i,string>`_ + ## The same as the `splitWhitespace <#splitWhitespace.i,string,int>`_ ## iterator, but is a proc that returns a sequence of substrings. - accumulateResult(splitWhitespace(s)) + accumulateResult(splitWhitespace(s, maxsplit)) iterator split*(s: string, sep: char, maxsplit: int = -1): string = ## Splits the string `s` into substrings using a single separator. @@ -640,36 +668,35 @@ template rsplitCommon(s, sep, maxsplit, sepLen) = splits = maxsplit startPos = 0 - if len(s) > 0: - # go to -1 in order to get separators at the beginning - while first >= -1: - while first >= 0 and not stringHasSep(s, first, sep): - dec(first) + # go to -1 in order to get separators at the beginning + while first >= -1: + while first >= 0 and not stringHasSep(s, first, sep): + dec(first) - if splits == 0: - # No more splits means set first to the beginning - first = -1 + if splits == 0: + # No more splits means set first to the beginning + first = -1 - if first == -1: - startPos = 0 - else: - startPos = first + sepLen + if first == -1: + startPos = 0 + else: + startPos = first + sepLen - yield substr(s, startPos, last) + yield substr(s, startPos, last) - if splits == 0: - break + if splits == 0: + break - dec(splits) - dec(first) + dec(splits) + dec(first) - last = first + last = first iterator rsplit*(s: string, seps: set[char] = Whitespace, maxsplit: int = -1): string = ## Splits the string `s` into substrings from the right using a ## string separator. Works exactly the same as `split iterator - ## <#split.i,string,char>`_ except in reverse order. + ## <#split.i,string,char,int>`_ except in reverse order. ## ## .. code-block:: nim ## for piece in "foo bar".rsplit(WhiteSpace): @@ -689,7 +716,7 @@ iterator rsplit*(s: string, sep: char, maxsplit: int = -1): string = ## Splits the string `s` into substrings from the right using a ## string separator. Works exactly the same as `split iterator - ## <#split.i,string,char>`_ except in reverse order. + ## <#split.i,string,char,int>`_ except in reverse order. ## ## .. code-block:: nim ## for piece in "foo:bar".rsplit(':'): @@ -708,7 +735,7 @@ iterator rsplit*(s: string, sep: string, maxsplit: int = -1, keepSeparators: bool = false): string = ## Splits the string `s` into substrings from the right using a ## string separator. Works exactly the same as `split iterator - ## <#split.i,string,string>`_ except in reverse order. + ## <#split.i,string,string,int>`_ except in reverse order. ## ## .. code-block:: nim ## for piece in "foothebar".rsplit("the"): @@ -789,14 +816,20 @@ proc countLines*(s: string): int {.noSideEffect, proc split*(s: string, seps: set[char] = Whitespace, maxsplit: int = -1): seq[string] {. noSideEffect, rtl, extern: "nsuSplitCharSet".} = - ## The same as the `split iterator <#split.i,string,set[char]>`_, but is a + ## The same as the `split iterator <#split.i,string,set[char],int>`_, but is a ## proc that returns a sequence of substrings. + runnableExamples: + doAssert "a,b;c".split({',', ';'}) == @["a", "b", "c"] + doAssert "".split({' '}) == @[""] accumulateResult(split(s, seps, maxsplit)) proc split*(s: string, sep: char, maxsplit: int = -1): seq[string] {.noSideEffect, rtl, extern: "nsuSplitChar".} = - ## The same as the `split iterator <#split.i,string,char>`_, but is a proc + ## The same as the `split iterator <#split.i,string,char,int>`_, but is a proc ## that returns a sequence of substrings. + runnableExamples: + doAssert "a,b,c".split(',') == @["a", "b", "c"] + doAssert "".split(' ') == @[""] accumulateResult(split(s, sep, maxsplit)) proc split*(s: string, sep: string, maxsplit: int = -1): seq[string] {.noSideEffect, @@ -804,7 +837,14 @@ proc split*(s: string, sep: string, maxsplit: int = -1): seq[string] {.noSideEff ## Splits the string `s` into substrings using a string separator. ## ## Substrings are separated by the string `sep`. This is a wrapper around the - ## `split iterator <#split.i,string,string>`_. + ## `split iterator <#split.i,string,string,int>`_. + runnableExamples: + doAssert "a,b,c".split(",") == @["a", "b", "c"] + doAssert "a man a plan a canal panama".split("a ") == @["", "man ", "plan ", "canal panama"] + doAssert "".split("Elon Musk") == @[""] + doAssert "a largely spaced sentence".split(" ") == @["a", "", "largely", "", "", "", "spaced", "sentence"] + + doAssert "a largely spaced sentence".split(" ", maxsplit=1) == @["a", " largely spaced sentence"] doAssert(sep.len > 0) accumulateResult(split(s, sep, maxsplit)) @@ -812,7 +852,7 @@ proc split*(s: string, sep: string, maxsplit: int = -1): seq[string] {.noSideEff proc rsplit*(s: string, seps: set[char] = Whitespace, maxsplit: int = -1): seq[string] {.noSideEffect, rtl, extern: "nsuRSplitCharSet".} = - ## The same as the `rsplit iterator <#rsplit.i,string,set[char]>`_, but is a + ## The same as the `rsplit iterator <#rsplit.i,string,set[char],int>`_, but is a ## proc that returns a sequence of substrings. ## ## A possible common use case for `rsplit` is path manipulation, @@ -834,7 +874,7 @@ proc rsplit*(s: string, seps: set[char] = Whitespace, proc rsplit*(s: string, sep: char, maxsplit: int = -1): seq[string] {.noSideEffect, rtl, extern: "nsuRSplitChar".} = - ## The same as the `split iterator <#rsplit.i,string,char>`_, but is a proc + ## The same as the `rsplit iterator <#rsplit.i,string,char,int>`_, but is a proc ## that returns a sequence of substrings. ## ## A possible common use case for `rsplit` is path manipulation, @@ -856,7 +896,7 @@ proc rsplit*(s: string, sep: char, maxsplit: int = -1): seq[string] proc rsplit*(s: string, sep: string, maxsplit: int = -1): seq[string] {.noSideEffect, rtl, extern: "nsuRSplitString".} = - ## The same as the `split iterator <#rsplit.i,string,string>`_, but is a proc + ## The same as the `rsplit iterator <#rsplit.i,string,string,int>`_, but is a proc ## that returns a sequence of substrings. ## ## A possible common use case for `rsplit` is path manipulation, @@ -873,6 +913,13 @@ proc rsplit*(s: string, sep: string, maxsplit: int = -1): seq[string] ## .. code-block:: nim ## @["Root#Object#Method", "Index"] ## + runnableExamples: + doAssert "a largely spaced sentence".rsplit(" ", maxsplit=1) == @["a largely spaced", "sentence"] + + doAssert "a,b,c".rsplit(",") == @["a", "b", "c"] + doAssert "a man a plan a canal panama".rsplit("a ") == @["", "man ", "plan ", "canal panama"] + doAssert "".rsplit("Elon Musk") == @[""] + doAssert "a largely spaced sentence".rsplit(" ") == @["a", "", "largely", "", "", "", "spaced", "sentence"] accumulateResult(rsplit(s, sep, maxsplit)) result.reverse() @@ -1062,8 +1109,8 @@ proc align*(s: string, count: Natural, padding = ' '): string {. ## ## `padding` characters (by default spaces) are added before `s` resulting in ## right alignment. If ``s.len >= count``, no spaces are added and `s` is - ## returned unchanged. If you need to left align a string use the `repeatChar - ## proc <#repeatChar>`_. Example: + ## returned unchanged. If you need to left align a string use the `alignLeft + ## proc <#alignLeft>`_. Example: ## ## .. code-block:: nim ## assert align("abc", 4) == " abc" @@ -1078,6 +1125,28 @@ proc align*(s: string, count: Natural, padding = ' '): string {. else: result = s +proc alignLeft*(s: string, count: Natural, padding = ' '): string {.noSideEffect.} = + ## Left-Aligns a string `s` with `padding`, so that it is of length `count`. + ## + ## `padding` characters (by default spaces) are added after `s` resulting in + ## left alignment. If ``s.len >= count``, no spaces are added and `s` is + ## returned unchanged. If you need to right align a string use the `align + ## proc <#align>`_. Example: + ## + ## .. code-block:: nim + ## assert alignLeft("abc", 4) == "abc " + ## assert alignLeft("a", 0) == "a" + ## assert alignLeft("1232", 6) == "1232 " + ## assert alignLeft("1232", 6, '#') == "1232##" + if s.len < count: + result = newString(count) + if s.len > 0: + result[0 .. (s.len - 1)] = s + for i in s.len ..< count: + result[i] = padding + else: + result = s + iterator tokenize*(s: string, seps: set[char] = Whitespace): tuple[ token: string, isSep: bool] = ## Tokenizes the string `s` into substrings. @@ -1175,7 +1244,7 @@ proc unindent*(s: string, count: Natural, padding: string = " "): string var indentCount = 0 for j in 0..<count.int: indentCount.inc - if line[j .. j + <padding.len] != padding: + if line[j .. j + padding.len-1] != padding: indentCount = j break result.add(line[indentCount*padding.len .. ^1]) @@ -1250,14 +1319,13 @@ proc addSep*(dest: var string, sep = ", ", startLen: Natural = 0) ## This is often useful for generating some code where the items need to ## be *separated* by `sep`. `sep` is only added if `dest` is longer than ## `startLen`. The following example creates a string describing - ## an array of integers: - ## - ## .. code-block:: nim - ## var arr = "[" - ## for x in items([2, 3, 5, 7, 11]): - ## addSep(arr, startLen=len("[")) - ## add(arr, $x) - ## add(arr, "]") + ## an array of integers. + runnableExamples: + var arr = "[" + for x in items([2, 3, 5, 7, 11]): + addSep(arr, startLen=len("[")) + add(arr, $x) + add(arr, "]") if dest.len > startLen: add(dest, sep) proc allCharsInSet*(s: string, theSet: set[char]): bool = @@ -1306,18 +1374,36 @@ proc join*[T: not string](a: openArray[T], sep: string = ""): string {. add(result, $x) type - SkipTable = array[char, int] - -{.push profiler: off.} -proc preprocessSub(sub: string, a: var SkipTable) = - var m = len(sub) - for i in 0..0xff: a[chr(i)] = m+1 - for i in 0..m-1: a[sub[i]] = m-i -{.pop.} + SkipTable* = array[char, int] -proc findAux(s, sub: string, start, last: int, a: SkipTable): int = - # Fast "quick search" algorithm: - var +proc initSkipTable*(a: var SkipTable, sub: string) + {.noSideEffect, rtl, extern: "nsuInitSkipTable".} = + ## Preprocess table `a` for `sub`. + let m = len(sub) + let m1 = m + 1 + var i = 0 + while i <= 0xff-7: + a[chr(i + 0)] = m1 + a[chr(i + 1)] = m1 + a[chr(i + 2)] = m1 + a[chr(i + 3)] = m1 + a[chr(i + 4)] = m1 + a[chr(i + 5)] = m1 + a[chr(i + 6)] = m1 + a[chr(i + 7)] = m1 + i += 8 + + for i in 0..m-1: + a[sub[i]] = m-i + +proc find*(a: SkipTable, s, sub: string, start: Natural = 0, last: Natural = 0): int + {.noSideEffect, rtl, extern: "nsuFindStrA".} = + ## Searches for `sub` in `s` inside range `start`..`last` using preprocessed table `a`. + ## If `last` is unspecified, it defaults to `s.high`. + ## + ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned. + let + last = if last==0: s.high else: last m = len(sub) n = last + 1 # search: @@ -1337,17 +1423,6 @@ when not (defined(js) or defined(nimdoc) or defined(nimscript)): else: const hasCStringBuiltin = false -proc find*(s, sub: string, start: Natural = 0, last: Natural = 0): int {.noSideEffect, - rtl, extern: "nsuFindStr".} = - ## Searches for `sub` in `s` inside range `start`..`last`. - ## If `last` is unspecified, it defaults to `s.high`. - ## - ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned. - var a {.noinit.}: SkipTable - let last = if last==0: s.high else: last - preprocessSub(sub, a) - result = findAux(s, sub, start, last, a) - proc find*(s: string, sub: char, start: Natural = 0, last: Natural = 0): int {.noSideEffect, rtl, extern: "nsuFindChar".} = ## Searches for `sub` in `s` inside range `start`..`last`. @@ -1366,9 +1441,24 @@ proc find*(s: string, sub: char, start: Natural = 0, last: Natural = 0): int {.n else: for i in start..last: if sub == s[i]: return i - return -1 +proc find*(s, sub: string, start: Natural = 0, last: Natural = 0): int {.noSideEffect, + rtl, extern: "nsuFindStr".} = + ## Searches for `sub` in `s` inside range `start`..`last`. + ## If `last` is unspecified, it defaults to `s.high`. + ## + ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned. + if sub.len > s.len: + return -1 + + if sub.len == 1: + return find(s, sub[0], start, last) + + var a {.noinit.}: SkipTable + initSkipTable(a, sub) + result = find(a, s, sub, start, last) + proc find*(s: string, chars: set[char], start: Natural = 0, last: Natural = 0): int {.noSideEffect, rtl, extern: "nsuFindCharSet".} = ## Searches for `chars` in `s` inside range `start`..`last`. @@ -1500,11 +1590,11 @@ proc replace*(s, sub: string, by = ""): string {.noSideEffect, ## Replaces `sub` in `s` by the string `by`. var a {.noinit.}: SkipTable result = "" - preprocessSub(sub, a) + initSkipTable(a, sub) let last = s.high var i = 0 while true: - var j = findAux(s, sub, i, last, a) + var j = find(a, s, sub, i, last) if j < 0: break add result, substr(s, i, j - 1) add result, by @@ -1534,11 +1624,11 @@ proc replaceWord*(s, sub: string, by = ""): string {.noSideEffect, const wordChars = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\128'..'\255'} var a {.noinit.}: SkipTable result = "" - preprocessSub(sub, a) + initSkipTable(a, sub) var i = 0 let last = s.high while true: - var j = findAux(s, sub, i, last, a) + var j = find(a, s, sub, i, last) if j < 0: break # word boundary? if (j == 0 or s[j-1] notin wordChars) and @@ -1653,7 +1743,9 @@ proc insertSep*(s: string, sep = '_', digits = 3): string {.noSideEffect, ## ## Even though the algorithm works with any string `s`, it is only useful ## if `s` contains a number. - ## Example: ``insertSep("1000000") == "1_000_000"`` + runnableExamples: + doAssert insertSep("1000000") == "1_000_000" + var L = (s.len-1) div digits + s.len result = newString(L) var j = 0 @@ -1669,29 +1761,15 @@ proc insertSep*(s: string, sep = '_', digits = 3): string {.noSideEffect, proc escape*(s: string, prefix = "\"", suffix = "\""): string {.noSideEffect, rtl, extern: "nsuEscape".} = - ## Escapes a string `s`. - ## - ## This does these operations (at the same time): - ## * replaces any ``\`` by ``\\`` - ## * replaces any ``'`` by ``\'`` - ## * replaces any ``"`` by ``\"`` - ## * replaces any other character in the set ``{'\0'..'\31', '\127'..'\255'}`` - ## by ``\xHH`` where ``HH`` is its hexadecimal value. - ## The procedure has been designed so that its output is usable for many - ## different common syntaxes. The resulting string is prefixed with - ## `prefix` and suffixed with `suffix`. Both may be empty strings. - ## **Note**: This is not correct for producing Ansi C code! + ## Escapes a string `s`. See `system.addEscapedChar <system.html#addEscapedChar>`_ + ## for the escaping scheme. + ## + ## The resulting string is prefixed with `prefix` and suffixed with `suffix`. + ## Both may be empty strings. result = newStringOfCap(s.len + s.len shr 2) result.add(prefix) for c in items(s): - case c - of '\0'..'\31', '\127'..'\255': - add(result, "\\x") - add(result, toHex(ord(c), 2)) - of '\\': add(result, "\\\\") - of '\'': add(result, "\\'") - of '\"': add(result, "\\\"") - else: add(result, c) + result.addEscapedChar(c) add(result, suffix) proc unescape*(s: string, prefix = "\"", suffix = "\""): string {.noSideEffect, @@ -1741,6 +1819,8 @@ proc validIdentifier*(s: string): bool {.noSideEffect, ## ## A valid identifier starts with a character of the set `IdentStartChars` ## and is followed by any number of characters of the set `IdentChars`. + runnableExamples: + doAssert "abc_def08".validIdentifier if s[0] in IdentStartChars: for i in 1..s.len-1: if s[i] notin IdentChars: return false @@ -1751,7 +1831,7 @@ proc editDistance*(a, b: string): int {.noSideEffect, ## Returns the edit distance between `a` and `b`. ## ## This uses the `Levenshtein`:idx: distance algorithm with only a linear - ## memory overhead. This implementation is highly optimized! + ## memory overhead. var len1 = a.len var len2 = b.len if len1 > len2: @@ -1850,7 +1930,7 @@ type {.deprecated: [TFloatFormat: FloatFormatMode].} proc formatBiggestFloat*(f: BiggestFloat, format: FloatFormatMode = ffDefault, - precision: range[0..32] = 16; + precision: range[-1..32] = 16; decimalSep = '.'): string {. noSideEffect, rtl, extern: "nsu$1".} = ## Converts a floating point value `f` to a string. @@ -1862,7 +1942,7 @@ proc formatBiggestFloat*(f: BiggestFloat, format: FloatFormatMode = ffDefault, ## `precision`'s default value is the maximum number of meaningful digits ## after the decimal point for Nim's ``biggestFloat`` type. ## - ## If ``precision == 0``, it tries to format it nicely. + ## If ``precision == -1``, it tries to format it nicely. when defined(js): var res: cstring case format @@ -1884,7 +1964,7 @@ proc formatBiggestFloat*(f: BiggestFloat, format: FloatFormatMode = ffDefault, buf {.noinit.}: array[0..2500, char] L: cint frmtstr[0] = '%' - if precision > 0: + if precision >= 0: frmtstr[1] = '#' frmtstr[2] = '.' frmtstr[3] = '*' @@ -1907,9 +1987,18 @@ proc formatBiggestFloat*(f: BiggestFloat, format: FloatFormatMode = ffDefault, # but nothing else is possible: if buf[i] in {'.', ','}: result[i] = decimalsep else: result[i] = buf[i] + when defined(windows): + # VS pre 2015 violates the C standard: "The exponent always contains at + # least two digits, and only as many more digits as necessary to + # represent the exponent." [C11 ยง7.21.6.1] + # The following post-processing fixes this behavior. + if result.len > 4 and result[^4] == '+' and result[^3] == '0': + result[^3] = result[^2] + result[^2] = result[^1] + result.setLen(result.len - 1) proc formatFloat*(f: float, format: FloatFormatMode = ffDefault, - precision: range[0..32] = 16; decimalSep = '.'): string {. + precision: range[-1..32] = 16; decimalSep = '.'): string {. noSideEffect, rtl, extern: "nsu$1".} = ## Converts a floating point value `f` to a string. ## @@ -1920,7 +2009,12 @@ proc formatFloat*(f: float, format: FloatFormatMode = ffDefault, ## `precision`'s default value is the maximum number of meaningful digits ## after the decimal point for Nim's ``float`` type. ## - ## If ``precision == 0``, it tries to format it nicely. + ## If ``precision == -1``, it tries to format it nicely. + runnableExamples: + let x = 123.456 + doAssert x.formatFloat() == "123.4560000000000" + doAssert x.formatFloat(ffDecimal, 4) == "123.4560" + doAssert x.formatFloat(ffScientific, 2) == "1.23e+02" result = formatBiggestFloat(f, format, precision, decimalSep) proc trimZeros*(x: var string) {.noSideEffect.} = @@ -1955,18 +2049,13 @@ proc formatSize*(bytes: int64, ## ## `includeSpace` can be set to true to include the (SI preferred) space ## between the number and the unit (e.g. 1 KiB). - ## - ## Examples: - ## - ## .. code-block:: nim - ## - ## formatSize((1'i64 shl 31) + (300'i64 shl 20)) == "2.293GiB" - ## formatSize((2.234*1024*1024).int) == "2.234MiB" - ## formatSize(4096, includeSpace=true) == "4 KiB" - ## formatSize(4096, prefix=bpColloquial, includeSpace=true) == "4 kB" - ## formatSize(4096) == "4KiB" - ## formatSize(5_378_934, prefix=bpColloquial, decimalSep=',') == "5,13MB" - ## + runnableExamples: + doAssert formatSize((1'i64 shl 31) + (300'i64 shl 20)) == "2.293GiB" + doAssert formatSize((2.234*1024*1024).int) == "2.234MiB" + doAssert formatSize(4096, includeSpace=true) == "4 KiB" + doAssert formatSize(4096, prefix=bpColloquial, includeSpace=true) == "4 kB" + doAssert formatSize(4096) == "4KiB" + doAssert formatSize(5_378_934, prefix=bpColloquial, decimalSep=',') == "5,13MB" const iecPrefixes = ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi"] const collPrefixes = ["", "k", "M", "G", "T", "P", "E", "Z", "Y"] var @@ -2060,7 +2149,7 @@ proc formatEng*(f: BiggestFloat, ## formatEng(4100, unit="V") == "4.1e3 V" ## formatEng(4100, unit="") == "4.1e3 " # Space with unit="" ## - ## `decimalSep` is used as the decimal separator + ## `decimalSep` is used as the decimal separator. var absolute: BiggestFloat significand: BiggestFloat @@ -2271,69 +2360,116 @@ proc format*(formatstr: string, a: varargs[string, `$`]): string {.noSideEffect, proc removeSuffix*(s: var string, chars: set[char] = Newlines) {. rtl, extern: "nsuRemoveSuffixCharSet".} = - ## Removes the first matching character from the string (in-place) given a - ## set of characters. If the set of characters is only equal to `Newlines` - ## then it will remove both the newline and return feed. - ## .. code-block:: nim - ## var - ## userInput = "Hello World!\r\n" - ## otherInput = "Hello!?!" - ## userInput.removeSuffix - ## userInput == "Hello World!" - ## userInput.removeSuffix({'!', '?'}) - ## userInput == "Hello World" - ## otherInput.removeSuffix({'!', '?'}) - ## otherInput == "Hello!?" + ## Removes all characters from `chars` from the end of the string `s` + ## (in-place). + runnableExamples: + var userInput = "Hello World!*~\r\n" + userInput.removeSuffix + doAssert userInput == "Hello World!*~" + userInput.removeSuffix({'~', '*'}) + doAssert userInput == "Hello World!" + + var otherInput = "Hello!?!" + otherInput.removeSuffix({'!', '?'}) + doAssert otherInput == "Hello" if s.len == 0: return - var last = len(s) - 1 - if chars == Newlines: - if s[last] == '\10': - last -= 1 - if s[last] == '\13': - last -= 1 - else: - if s[last] in chars: - last -= 1 + var last = s.high + while last > -1 and s[last] in chars: last -= 1 s.setLen(last + 1) proc removeSuffix*(s: var string, c: char) {. rtl, extern: "nsuRemoveSuffixChar".} = - ## Removes a single character (in-place) from a string. - ## .. code-block:: nim - ## var - ## table = "users" - ## table.removeSuffix('s') - ## table == "user" + ## Removes all occurrences of a single character (in-place) from the end + ## of a string. + ## + runnableExamples: + var table = "users" + table.removeSuffix('s') + doAssert table == "user" + + var dots = "Trailing dots......." + dots.removeSuffix('.') + doAssert dots == "Trailing dots" removeSuffix(s, chars = {c}) proc removeSuffix*(s: var string, suffix: string) {. rtl, extern: "nsuRemoveSuffixString".} = ## Remove the first matching suffix (in-place) from a string. - ## .. code-block:: nim - ## var - ## answers = "yeses" - ## answers.removeSuffix("es") - ## answers == "yes" + runnableExamples: + var answers = "yeses" + answers.removeSuffix("es") + doAssert answers == "yes" var newLen = s.len if s.endsWith(suffix): newLen -= len(suffix) s.setLen(newLen) +proc removePrefix*(s: var string, chars: set[char] = Newlines) {. + rtl, extern: "nsuRemovePrefixCharSet".} = + ## Removes all characters from `chars` from the start of the string `s` + ## (in-place). + ## + runnableExamples: + var userInput = "\r\n*~Hello World!" + userInput.removePrefix + doAssert userInput == "*~Hello World!" + userInput.removePrefix({'~', '*'}) + doAssert userInput == "Hello World!" + + var otherInput = "?!?Hello!?!" + otherInput.removePrefix({'!', '?'}) + doAssert otherInput == "Hello!?!" + var start = 0 + while start < s.len and s[start] in chars: start += 1 + if start > 0: s.delete(0, start - 1) + +proc removePrefix*(s: var string, c: char) {. + rtl, extern: "nsuRemovePrefixChar".} = + ## Removes all occurrences of a single character (in-place) from the start + ## of a string. + ## + runnableExamples: + var ident = "pControl" + ident.removePrefix('p') + doAssert ident == "Control" + removePrefix(s, chars = {c}) + +proc removePrefix*(s: var string, prefix: string) {. + rtl, extern: "nsuRemovePrefixString".} = + ## Remove the first matching prefix (in-place) from a string. + ## + runnableExamples: + var answers = "yesyes" + answers.removePrefix("yes") + doAssert answers == "yes" + if s.startsWith(prefix): + s.delete(0, prefix.len - 1) + when isMainModule: doAssert align("abc", 4) == " abc" doAssert align("a", 0) == "a" doAssert align("1232", 6) == " 1232" doAssert align("1232", 6, '#') == "##1232" + doAssert alignLeft("abc", 4) == "abc " + doAssert alignLeft("a", 0) == "a" + doAssert alignLeft("1232", 6) == "1232 " + doAssert alignLeft("1232", 6, '#') == "1232##" + let inp = """ this is a long text -- muchlongerthan10chars and here it goes""" outp = " this is a\nlong text\n--\nmuchlongerthan10chars\nand here\nit goes" doAssert wordWrap(inp, 10, false) == outp + doAssert formatBiggestFloat(1234.567, ffDecimal, -1) == "1234.567000" + doAssert formatBiggestFloat(1234.567, ffDecimal, 0) == "1235." + doAssert formatBiggestFloat(1234.567, ffDecimal, 1) == "1234.6" doAssert formatBiggestFloat(0.00000000001, ffDecimal, 11) == "0.00000000001" doAssert formatBiggestFloat(0.00000000001, ffScientific, 1, ',') in ["1,0e-11", "1,0e-011"] + # bug #6589 + doAssert formatFloat(123.456, ffScientific, precision = -1) == "1.234560e+02" doAssert "$# $3 $# $#" % ["a", "b", "c"] == "a c b c" doAssert "${1}12 ${-1}$2" % ["a", "b"] == "a12 bb" @@ -2497,6 +2633,12 @@ bar doAssert s.split(' ', maxsplit=1) == @["", "this is an example "] doAssert s.split(" ", maxsplit=4) == @["", "this", "is", "an", "example "] + doAssert s.splitWhitespace() == @["this", "is", "an", "example"] + doAssert s.splitWhitespace(maxsplit=1) == @["this", "is an example "] + doAssert s.splitWhitespace(maxsplit=2) == @["this", "is", "an example "] + doAssert s.splitWhitespace(maxsplit=3) == @["this", "is", "an", "example "] + doAssert s.splitWhitespace(maxsplit=4) == @["this", "is", "an", "example"] + block: # formatEng tests doAssert formatEng(0, 2, trim=false) == "0.00" doAssert formatEng(0, 2) == "0" |