diff options
Diffstat (limited to 'lib/pure/strutils.nim')
-rw-r--r-- | lib/pure/strutils.nim | 188 |
1 files changed, 116 insertions, 72 deletions
diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim index 6c1e49564..81be7db17 100644 --- a/lib/pure/strutils.nim +++ b/lib/pure/strutils.nim @@ -70,12 +70,12 @@ runnableExamples: ## easier substring extraction than regular expressions -import parseutils -from math import pow, floor, log10 -from algorithm import fill, reverse +import std/parseutils +from std/math import pow, floor, log10 +from std/algorithm import fill, reverse import std/enumutils -from unicode import toLower, toUpper +from std/unicode import toLower, toUpper export toLower, toUpper include "system/inclrtl" @@ -129,11 +129,11 @@ const ## Not very useful by its own, you can use it to create *inverted* sets to ## make the `find func<#find,string,set[char],Natural,int>`_ ## find **invalid** characters in strings. Example: - ## - ## .. code-block:: nim + ## ```nim ## let invalid = AllChars - Digits ## doAssert "01234".find(invalid) == -1 ## doAssert "01A34".find(invalid) == 2 + ## ``` func isAlphaAscii*(c: char): bool {.rtl, extern: "nsuIsAlphaAsciiChar".} = ## Checks whether or not character `c` is alphabetical. @@ -334,9 +334,9 @@ func normalize*(s: string): string {.rtl, extern: "nsuNormalize".} = func cmpIgnoreCase*(a, b: string): int {.rtl, extern: "nsuCmpIgnoreCase".} = ## Compares two strings in a case insensitive manner. Returns: ## - ## | 0 if a == b - ## | < 0 if a < b - ## | > 0 if a > b + ## | `0` if a == b + ## | `< 0` if a < b + ## | `> 0` if a > b runnableExamples: doAssert cmpIgnoreCase("FooBar", "foobar") == 0 doAssert cmpIgnoreCase("bar", "Foo") < 0 @@ -354,9 +354,9 @@ func cmpIgnoreStyle*(a, b: string): int {.rtl, extern: "nsuCmpIgnoreStyle".} = ## ## Returns: ## - ## | 0 if a == b - ## | < 0 if a < b - ## | > 0 if a > b + ## | `0` if a == b + ## | `< 0` if a < b + ## | `> 0` if a > b runnableExamples: doAssert cmpIgnoreStyle("foo_bar", "FooBar") == 0 doAssert cmpIgnoreStyle("foo_bar_5", "FooBar4") > 0 @@ -423,14 +423,12 @@ iterator split*(s: string, sep: char, maxsplit: int = -1): string = ## ## Substrings are separated by the character `sep`. ## The code: - ## - ## .. code-block:: nim + ## ```nim ## for word in split(";;this;is;an;;example;;;", ';'): ## writeLine(stdout, word) - ## + ## ``` ## Results in: - ## - ## .. code-block:: + ## ``` ## "" ## "" ## "this" @@ -441,6 +439,7 @@ iterator split*(s: string, sep: char, maxsplit: int = -1): string = ## "" ## "" ## "" + ## ``` ## ## See also: ## * `rsplit iterator<#rsplit.i,string,char,int>`_ @@ -455,41 +454,46 @@ iterator split*(s: string, seps: set[char] = Whitespace, ## ## Substrings are separated by a substring containing only `seps`. ## - ## .. code-block:: nim + ## ```nim ## for word in split("this\lis an\texample"): ## writeLine(stdout, word) + ## ``` ## ## ...generates this output: ## - ## .. code-block:: + ## ``` ## "this" ## "is" ## "an" ## "example" + ## ``` ## ## And the following code: ## - ## .. code-block:: nim + ## ```nim ## for word in split("this:is;an$example", {';', ':', '$'}): ## writeLine(stdout, word) + ## ``` ## ## ...produces the same output as the first example. The code: ## - ## .. code-block:: nim + ## ```nim ## let date = "2012-11-20T22:08:08.398990" ## let separators = {' ', '-', ':', 'T'} ## for number in split(date, separators): ## writeLine(stdout, number) + ## ``` ## ## ...results in: ## - ## .. code-block:: + ## ``` ## "2012" ## "11" ## "20" ## "22" ## "08" ## "08.398990" + ## ``` ## ## .. note:: Empty separator set results in returning an original string, ## following the interpretation "split by no element". @@ -507,16 +511,18 @@ iterator split*(s: string, sep: string, maxsplit: int = -1): string = ## Substrings are separated by the string `sep`. ## The code: ## - ## .. code-block:: nim + ## ```nim ## for word in split("thisDATAisDATAcorrupted", "DATA"): ## writeLine(stdout, word) + ## ``` ## ## Results in: ## - ## .. code-block:: + ## ``` ## "this" ## "is" ## "corrupted" + ## ``` ## ## .. note:: Empty separator string results in returning an original string, ## following the interpretation "split by no element". @@ -559,17 +565,19 @@ iterator rsplit*(s: string, sep: char, maxsplit: int = -1): string = ## Splits the string `s` into substrings from the right using a ## string separator. Works exactly the same as `split iterator - ## <#split.i,string,char,int>`_ except in reverse order. + ## <#split.i,string,char,int>`_ except in **reverse** order. ## - ## .. code-block:: nim + ## ```nim ## for piece in "foo:bar".rsplit(':'): ## echo piece + ## ``` ## ## Results in: ## - ## .. code-block:: nim + ## ``` ## "bar" ## "foo" + ## ``` ## ## Substrings are separated from the right by the char `sep`. ## @@ -584,17 +592,19 @@ iterator rsplit*(s: string, seps: set[char] = Whitespace, maxsplit: int = -1): string = ## Splits the string `s` into substrings from the right using a ## string separator. Works exactly the same as `split iterator - ## <#split.i,string,char,int>`_ except in reverse order. + ## <#split.i,string,char,int>`_ except in **reverse** order. ## - ## .. code-block:: nim + ## ```nim ## for piece in "foo bar".rsplit(WhiteSpace): ## echo piece + ## ``` ## ## Results in: ## - ## .. code-block:: nim + ## ``` ## "bar" ## "foo" + ## ``` ## ## Substrings are separated from the right by the set of chars `seps` ## @@ -612,17 +622,19 @@ iterator rsplit*(s: string, sep: string, maxsplit: int = -1, keepSeparators: bool = false): string = ## Splits the string `s` into substrings from the right using a ## string separator. Works exactly the same as `split iterator - ## <#split.i,string,string,int>`_ except in reverse order. + ## <#split.i,string,string,int>`_ except in **reverse** order. ## - ## .. code-block:: nim + ## ```nim ## for piece in "foothebar".rsplit("the"): ## echo piece + ## ``` ## ## Results in: ## - ## .. code-block:: nim + ## ``` ## "bar" ## "foo" + ## ``` ## ## Substrings are separated from the right by the string `sep` ## @@ -648,13 +660,14 @@ iterator splitLines*(s: string, keepEol = false): string = ## ## Example: ## - ## .. code-block:: nim + ## ```nim ## for line in splitLines("\nthis\nis\nan\n\nexample\n"): ## writeLine(stdout, line) + ## ``` ## ## Results in: ## - ## .. code-block:: nim + ## ```nim ## "" ## "this" ## "is" @@ -662,6 +675,7 @@ iterator splitLines*(s: string, keepEol = false): string = ## "" ## "example" ## "" + ## ``` ## ## See also: ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_ @@ -694,16 +708,17 @@ iterator splitWhitespace*(s: string, maxsplit: int = -1): string = ## ## The following code: ## - ## .. code-block:: nim + ## ```nim ## let s = " foo \t bar baz " ## for ms in [-1, 1, 2, 3]: ## echo "------ maxsplit = ", ms, ":" ## for item in s.splitWhitespace(maxsplit=ms): ## echo '"', item, '"' + ## ``` ## ## ...results in: ## - ## .. code-block:: + ## ``` ## ------ maxsplit = -1: ## "foo" ## "bar" @@ -719,6 +734,7 @@ iterator splitWhitespace*(s: string, maxsplit: int = -1): string = ## "foo" ## "bar" ## "baz" + ## ``` ## ## See also: ## * `splitLines iterator<#splitLines.i,string>`_ @@ -789,7 +805,7 @@ func split*(s: string, sep: string, maxsplit: int = -1): seq[string] {.rtl, func rsplit*(s: string, sep: char, maxsplit: int = -1): seq[string] {.rtl, extern: "nsuRSplitChar".} = ## The same as the `rsplit iterator <#rsplit.i,string,char,int>`_, but is a func - ## that returns a sequence of substrings. + ## that returns a sequence of substrings in original order. ## ## A possible common use case for `rsplit` is path manipulation, ## particularly on systems that don't use a common delimiter. @@ -797,13 +813,15 @@ func rsplit*(s: string, sep: char, maxsplit: int = -1): seq[string] {.rtl, ## For example, if a system had `#` as a delimiter, you could ## do the following to get the tail of the path: ## - ## .. code-block:: nim + ## ```nim ## var tailSplit = rsplit("Root#Object#Method#Index", '#', maxsplit=1) + ## ``` ## ## Results in `tailSplit` containing: ## - ## .. code-block:: nim + ## ```nim ## @["Root#Object#Method", "Index"] + ## ``` ## ## See also: ## * `rsplit iterator <#rsplit.i,string,char,int>`_ @@ -817,7 +835,7 @@ func rsplit*(s: string, seps: set[char] = Whitespace, maxsplit: int = -1): seq[string] {.rtl, extern: "nsuRSplitCharSet".} = ## The same as the `rsplit iterator <#rsplit.i,string,set[char],int>`_, but is a - ## func that returns a sequence of substrings. + ## func that returns a sequence of substrings in original order. ## ## A possible common use case for `rsplit` is path manipulation, ## particularly on systems that don't use a common delimiter. @@ -825,13 +843,15 @@ func rsplit*(s: string, seps: set[char] = Whitespace, ## For example, if a system had `#` as a delimiter, you could ## do the following to get the tail of the path: ## - ## .. code-block:: nim + ## ```nim ## var tailSplit = rsplit("Root#Object#Method#Index", {'#'}, maxsplit=1) + ## ``` ## ## Results in `tailSplit` containing: ## - ## .. code-block:: nim + ## ```nim ## @["Root#Object#Method", "Index"] + ## ``` ## ## .. note:: Empty separator set results in returning an original string, ## following the interpretation "split by no element". @@ -847,7 +867,7 @@ func rsplit*(s: string, seps: set[char] = Whitespace, func rsplit*(s: string, sep: string, maxsplit: int = -1): seq[string] {.rtl, extern: "nsuRSplitString".} = ## The same as the `rsplit iterator <#rsplit.i,string,string,int,bool>`_, but is a func - ## that returns a sequence of substrings. + ## that returns a sequence of substrings in original order. ## ## A possible common use case for `rsplit` is path manipulation, ## particularly on systems that don't use a common delimiter. @@ -855,13 +875,15 @@ func rsplit*(s: string, sep: string, maxsplit: int = -1): seq[string] {.rtl, ## For example, if a system had `#` as a delimiter, you could ## do the following to get the tail of the path: ## - ## .. code-block:: nim + ## ```nim ## var tailSplit = rsplit("Root#Object#Method#Index", "#", maxsplit=1) + ## ``` ## ## Results in `tailSplit` containing: ## - ## .. code-block:: nim + ## ```nim ## @["Root#Object#Method", "Index"] + ## ``` ## ## .. note:: Empty separator string results in returning an original string, ## following the interpretation "split by no element". @@ -1285,7 +1307,7 @@ func parseEnum*[T: enum](s: string): T = ## type contains multiple fields with the same string value. ## ## Raises `ValueError` for an invalid value in `s`. The comparison is - ## done in a style insensitive way. + ## done in a style insensitive way (first letter is still case-sensitive). runnableExamples: type MyEnum = enum @@ -1305,7 +1327,7 @@ func parseEnum*[T: enum](s: string, default: T): T = ## type contains multiple fields with the same string value. ## ## Uses `default` for an invalid value in `s`. The comparison is done in a - ## style insensitive way. + ## style insensitive way (first letter is still case-sensitive). runnableExamples: type MyEnum = enum @@ -1786,8 +1808,9 @@ func addSep*(dest: var string, sep = ", ", startLen: Natural = 0) {.inline.} = ## ## A shorthand for: ## - ## .. code-block:: nim + ## ```nim ## if dest.len > startLen: add(dest, sep) + ## ``` ## ## This is often useful for generating some code where the items need to ## be *separated* by `sep`. `sep` is only added if `dest` is longer than @@ -1852,7 +1875,7 @@ func join*(a: openArray[string], sep: string = ""): string {.rtl, else: result = "" -func join*[T: not string](a: openArray[T], sep: string = ""): string = +proc join*[T: not string](a: openArray[T], sep: string = ""): string = ## Converts all elements in the container `a` to strings using `$`, ## and concatenates them with `sep`. runnableExamples: @@ -2258,17 +2281,31 @@ func replaceWord*(s, sub: string, by = ""): string {.rtl, add result, substr(s, i) func multiReplace*(s: string, replacements: varargs[(string, string)]): string = - ## Same as replace, but specialized for doing multiple replacements in a single - ## pass through the input string. + ## Same as `replace<#replace,string,string,string>`_, but specialized for + ## doing multiple replacements in a single pass through the input string. + ## + ## `multiReplace` scans the input string from left to right and replaces the + ## matching substrings in the same order as passed in the argument list. ## - ## `multiReplace` performs all replacements in a single pass, this means it - ## can be used to swap the occurrences of "a" and "b", for instance. + ## The implications of the order of scanning the string and matching the + ## replacements: + ## - In case of multiple matches at a given position, the earliest + ## replacement is applied. + ## - Overlaps are not handled. After performing a replacement, the scan + ## continues from the character after the matched substring. If the + ## resulting string then contains a possible match starting in a newly + ## placed substring, the additional replacement is not performed. ## ## If the resulting string is not longer than the original input string, ## only a single memory allocation is required. ## - ## The order of the replacements does matter. Earlier replacements are - ## preferred over later replacements in the argument list. + runnableExamples: + # Swapping occurrences of 'a' and 'b': + doAssert multireplace("abba", [("a", "b"), ("b", "a")]) == "baab" + + # The second replacement ("ab") is matched and performed first, the scan then + # continues from 'c', so the "bc" replacement is never matched and thus skipped. + doAssert multireplace("abc", [("bc", "x"), ("ab", "_b")]) == "_bc" result = newStringOfCap(s.len) var i = 0 var fastChk: set[char] = {} @@ -2417,8 +2454,8 @@ func validIdentifier*(s: string): bool {.rtl, extern: "nsuValidIdentifier".} = # floating point formatting: when not defined(js): - func c_sprintf(buf, frmt: cstring): cint {.header: "<stdio.h>", - importc: "sprintf", varargs.} + func c_snprintf(buf: cstring, n: csize_t, frmt: cstring): cint {.header: "<stdio.h>", + importc: "snprintf", varargs.} type FloatFormatMode* = enum @@ -2451,7 +2488,7 @@ func formatBiggestFloat*(f: BiggestFloat, format: FloatFormatMode = ffDefault, when defined(js): var precision = precision if precision == -1: - # use the same default precision as c_sprintf + # use the same default precision as c_snprintf precision = 6 var res: cstring case format @@ -2482,11 +2519,11 @@ func formatBiggestFloat*(f: BiggestFloat, format: FloatFormatMode = ffDefault, frmtstr[3] = '*' frmtstr[4] = floatFormatToChar[format] frmtstr[5] = '\0' - L = c_sprintf(cast[cstring](addr buf), cast[cstring](addr frmtstr), precision, f) + L = c_snprintf(cast[cstring](addr buf), csize_t(2501), cast[cstring](addr frmtstr), precision, f) else: frmtstr[1] = floatFormatToChar[format] frmtstr[2] = '\0' - L = c_sprintf(cast[cstring](addr buf), cast[cstring](addr frmtstr), f) + L = c_snprintf(cast[cstring](addr buf), csize_t(2501), cast[cstring](addr frmtstr), f) result = newString(L) for i in 0 ..< L: # Depending on the locale either dot or comma is produced, @@ -2634,13 +2671,13 @@ func formatEng*(f: BiggestFloat, ## decimal point or (if `trim` is true) the maximum number of digits to be ## shown. ## - ## .. code-block:: nim - ## + ## ```nim ## formatEng(0, 2, trim=false) == "0.00" ## formatEng(0, 2) == "0" ## formatEng(0.053, 0) == "53e-3" ## formatEng(52731234, 2) == "52.73e6" ## formatEng(-52731234, 2) == "-52.73e6" + ## ``` ## ## If `siPrefix` is set to true, the number will be displayed with the SI ## prefix corresponding to the exponent. For example 4100 will be displayed @@ -2655,8 +2692,7 @@ func formatEng*(f: BiggestFloat, ## different to appending the unit to the result as the location of the space ## is altered depending on whether there is an exponent. ## - ## .. code-block:: nim - ## + ## ```nim ## formatEng(4100, siPrefix=true, unit="V") == "4.1 kV" ## formatEng(4.1, siPrefix=true, unit="V") == "4.1 V" ## formatEng(4.1, siPrefix=true) == "4.1" # Note lack of space @@ -2666,6 +2702,7 @@ func formatEng*(f: BiggestFloat, ## formatEng(4100) == "4.1e3" ## formatEng(4100, unit="V") == "4.1e3 V" ## formatEng(4100, unit="", useUnitSpace=true) == "4.1e3 " # Space with useUnitSpace=true + ## ``` ## ## `decimalSep` is used as the decimal separator. ## @@ -2829,13 +2866,15 @@ func `%`*(formatstr: string, a: openArray[string]): string {.rtl, ## ## This is best explained by an example: ## - ## .. code-block:: nim + ## ```nim ## "$1 eats $2." % ["The cat", "fish"] + ## ``` ## ## Results in: ## - ## .. code-block:: nim + ## ```nim ## "The cat eats fish." + ## ``` ## ## The substitution variables (the thing after the `$`) are enumerated ## from 1 to `a.len`. @@ -2843,21 +2882,24 @@ func `%`*(formatstr: string, a: openArray[string]): string {.rtl, ## The notation `$#` can be used to refer to the next substitution ## variable: ## - ## .. code-block:: nim + ## ```nim ## "$# eats $#." % ["The cat", "fish"] + ## ``` ## ## Substitution variables can also be words (that is ## `[A-Za-z_]+[A-Za-z0-9_]*`) in which case the arguments in `a` with even ## indices are keys and with odd indices are the corresponding values. ## An example: ## - ## .. code-block:: nim + ## ```nim ## "$animal eats $food." % ["animal", "The cat", "food", "fish"] + ## ``` ## ## Results in: ## - ## .. code-block:: nim + ## ```nim ## "The cat eats fish." + ## ``` ## ## The variables are compared with `cmpIgnoreStyle`. `ValueError` is ## raised if an ill-formed format string has been passed to the `%` operator. @@ -2955,13 +2997,14 @@ iterator tokenize*(s: string, seps: set[char] = Whitespace): tuple[ ## Substrings are separated by a substring containing only `seps`. ## Example: ## - ## .. code-block:: nim + ## ```nim ## for word in tokenize(" this is an example "): ## writeLine(stdout, word) + ## ``` ## ## Results in: ## - ## .. code-block:: nim + ## ```nim ## (" ", true) ## ("this", false) ## (" ", true) @@ -2971,6 +3014,7 @@ iterator tokenize*(s: string, seps: set[char] = Whitespace): tuple[ ## (" ", true) ## ("example", false) ## (" ", true) + ## ``` var i = 0 while true: var j = i |