diff options
Diffstat (limited to 'lib/pure/strutils.nim')
-rw-r--r-- | lib/pure/strutils.nim | 632 |
1 files changed, 409 insertions, 223 deletions
diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim index 4098749ce..81be7db17 100644 --- a/lib/pure/strutils.nim +++ b/lib/pure/strutils.nim @@ -70,17 +70,21 @@ runnableExamples: ## easier substring extraction than regular expressions -import parseutils -from math import pow, floor, log10 -from algorithm import reverse +import std/parseutils +from std/math import pow, floor, log10 +from std/algorithm import fill, reverse import std/enumutils -from unicode import toLower, toUpper +from std/unicode import toLower, toUpper export toLower, toUpper include "system/inclrtl" -import std/private/since -from std/private/strimpl import cmpIgnoreStyleImpl, cmpIgnoreCaseImpl, startsWithImpl, endsWithImpl +import std/private/[since, jsutils] +from std/private/strimpl import cmpIgnoreStyleImpl, cmpIgnoreCaseImpl, + startsWithImpl, endsWithImpl + +when defined(nimPreviewSlimSystem): + import std/assertions const @@ -91,6 +95,15 @@ const Letters* = {'A'..'Z', 'a'..'z'} ## The set of letters. + UppercaseLetters* = {'A'..'Z'} + ## The set of uppercase ASCII letters. + + LowercaseLetters* = {'a'..'z'} + ## The set of lowercase ASCII letters. + + PunctuationChars* = {'!'..'/', ':'..'@', '['..'`', '{'..'~'} + ## The set of all ASCII punctuation characters. + Digits* = {'0'..'9'} ## The set of digits. @@ -107,17 +120,20 @@ const ## The set of characters a newline terminator can start with (carriage ## return, line feed). + PrintableChars* = Letters + Digits + PunctuationChars + Whitespace + ## The set of all printable ASCII characters (letters, digits, whitespace, and punctuation characters). + AllChars* = {'\x00'..'\xFF'} ## A set with all the possible characters. ## ## Not very useful by its own, you can use it to create *inverted* sets to ## make the `find func<#find,string,set[char],Natural,int>`_ ## find **invalid** characters in strings. Example: - ## - ## .. code-block:: nim + ## ```nim ## let invalid = AllChars - Digits ## doAssert "01234".find(invalid) == -1 ## doAssert "01A34".find(invalid) == 2 + ## ``` func isAlphaAscii*(c: char): bool {.rtl, extern: "nsuIsAlphaAsciiChar".} = ## Checks whether or not character `c` is alphabetical. @@ -169,7 +185,7 @@ func isLowerAscii*(c: char): bool {.rtl, extern: "nsuIsLowerAsciiChar".} = doAssert isLowerAscii('e') == true doAssert isLowerAscii('E') == false doAssert isLowerAscii('7') == false - return c in {'a'..'z'} + return c in LowercaseLetters func isUpperAscii*(c: char): bool {.rtl, extern: "nsuIsUpperAsciiChar".} = ## Checks whether or not `c` is an upper case character. @@ -183,8 +199,7 @@ func isUpperAscii*(c: char): bool {.rtl, extern: "nsuIsUpperAsciiChar".} = doAssert isUpperAscii('e') == false doAssert isUpperAscii('E') == true doAssert isUpperAscii('7') == false - return c in {'A'..'Z'} - + return c in UppercaseLetters func toLowerAscii*(c: char): char {.rtl, extern: "nsuToLowerAsciiChar".} = ## Returns the lower case version of character `c`. @@ -199,7 +214,7 @@ func toLowerAscii*(c: char): char {.rtl, extern: "nsuToLowerAsciiChar".} = runnableExamples: doAssert toLowerAscii('A') == 'a' doAssert toLowerAscii('e') == 'e' - if c in {'A'..'Z'}: + if c in UppercaseLetters: result = char(uint8(c) xor 0b0010_0000'u8) else: result = c @@ -236,7 +251,7 @@ func toUpperAscii*(c: char): char {.rtl, extern: "nsuToUpperAsciiChar".} = runnableExamples: doAssert toUpperAscii('a') == 'A' doAssert toUpperAscii('E') == 'E' - if c in {'a'..'z'}: + if c in LowercaseLetters: result = char(uint8(c) xor 0b0010_0000'u8) else: result = c @@ -273,14 +288,20 @@ func nimIdentNormalize*(s: string): string = ## ## That means to convert to lower case and remove any '_' on all characters ## except first one. + ## + ## .. Warning:: Backticks (`) are not handled: they remain *as is* and + ## spaces are preserved. See `nimIdentBackticksNormalize + ## <dochelpers.html#nimIdentBackticksNormalize,string>`_ for + ## an alternative approach. runnableExamples: doAssert nimIdentNormalize("Foo_bar") == "Foobar" result = newString(s.len) - if s.len > 0: - result[0] = s[0] + if s.len == 0: + return + result[0] = s[0] var j = 1 for i in 1..len(s) - 1: - if s[i] in {'A'..'Z'}: + if s[i] in UppercaseLetters: result[j] = chr(ord(s[i]) + (ord('a') - ord('A'))) inc j elif s[i] != '_': @@ -302,7 +323,7 @@ func normalize*(s: string): string {.rtl, extern: "nsuNormalize".} = result = newString(s.len) var j = 0 for i in 0..len(s) - 1: - if s[i] in {'A'..'Z'}: + if s[i] in UppercaseLetters: result[j] = chr(ord(s[i]) + (ord('a') - ord('A'))) inc j elif s[i] != '_': @@ -313,9 +334,9 @@ func normalize*(s: string): string {.rtl, extern: "nsuNormalize".} = func cmpIgnoreCase*(a, b: string): int {.rtl, extern: "nsuCmpIgnoreCase".} = ## Compares two strings in a case insensitive manner. Returns: ## - ## | 0 if a == b - ## | < 0 if a < b - ## | > 0 if a > b + ## | `0` if a == b + ## | `< 0` if a < b + ## | `> 0` if a > b runnableExamples: doAssert cmpIgnoreCase("FooBar", "foobar") == 0 doAssert cmpIgnoreCase("bar", "Foo") < 0 @@ -333,9 +354,9 @@ func cmpIgnoreStyle*(a, b: string): int {.rtl, extern: "nsuCmpIgnoreStyle".} = ## ## Returns: ## - ## | 0 if a == b - ## | < 0 if a < b - ## | > 0 if a > b + ## | `0` if a == b + ## | `< 0` if a < b + ## | `> 0` if a > b runnableExamples: doAssert cmpIgnoreStyle("foo_bar", "FooBar") == 0 doAssert cmpIgnoreStyle("foo_bar_5", "FooBar4") > 0 @@ -345,11 +366,14 @@ func cmpIgnoreStyle*(a, b: string): int {.rtl, extern: "nsuCmpIgnoreStyle".} = # --------- Private templates for different split separators ----------- func substrEq(s: string, pos: int, substr: string): bool = - var i = 0 + # Always returns false for empty `substr` var length = substr.len - while i < length and pos+i < s.len and s[pos+i] == substr[i]: - inc i - return i == length + if length > 0: + var i = 0 + while i < length and pos+i < s.len and s[pos+i] == substr[i]: + inc i + i == length + else: false template stringHasSep(s: string, index: int, seps: set[char]): bool = s[index] in seps @@ -399,14 +423,12 @@ iterator split*(s: string, sep: char, maxsplit: int = -1): string = ## ## Substrings are separated by the character `sep`. ## The code: - ## - ## .. code-block:: nim + ## ```nim ## for word in split(";;this;is;an;;example;;;", ';'): ## writeLine(stdout, word) - ## + ## ``` ## Results in: - ## - ## .. code-block:: + ## ``` ## "" ## "" ## "this" @@ -417,6 +439,7 @@ iterator split*(s: string, sep: char, maxsplit: int = -1): string = ## "" ## "" ## "" + ## ``` ## ## See also: ## * `rsplit iterator<#rsplit.i,string,char,int>`_ @@ -431,41 +454,49 @@ iterator split*(s: string, seps: set[char] = Whitespace, ## ## Substrings are separated by a substring containing only `seps`. ## - ## .. code-block:: nim + ## ```nim ## for word in split("this\lis an\texample"): ## writeLine(stdout, word) + ## ``` ## ## ...generates this output: ## - ## .. code-block:: + ## ``` ## "this" ## "is" ## "an" ## "example" + ## ``` ## ## And the following code: ## - ## .. code-block:: nim + ## ```nim ## for word in split("this:is;an$example", {';', ':', '$'}): ## writeLine(stdout, word) + ## ``` ## ## ...produces the same output as the first example. The code: ## - ## .. code-block:: nim + ## ```nim ## let date = "2012-11-20T22:08:08.398990" ## let separators = {' ', '-', ':', 'T'} ## for number in split(date, separators): ## writeLine(stdout, number) + ## ``` ## ## ...results in: ## - ## .. code-block:: + ## ``` ## "2012" ## "11" ## "20" ## "22" ## "08" ## "08.398990" + ## ``` + ## + ## .. note:: Empty separator set results in returning an original string, + ## following the interpretation "split by no element". ## ## See also: ## * `rsplit iterator<#rsplit.i,string,set[char],int>`_ @@ -480,23 +511,30 @@ iterator split*(s: string, sep: string, maxsplit: int = -1): string = ## Substrings are separated by the string `sep`. ## The code: ## - ## .. code-block:: nim + ## ```nim ## for word in split("thisDATAisDATAcorrupted", "DATA"): ## writeLine(stdout, word) + ## ``` ## ## Results in: ## - ## .. code-block:: + ## ``` ## "this" ## "is" ## "corrupted" + ## ``` + ## + ## .. note:: Empty separator string results in returning an original string, + ## following the interpretation "split by no element". ## ## See also: ## * `rsplit iterator<#rsplit.i,string,string,int,bool>`_ ## * `splitLines iterator<#splitLines.i,string>`_ ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_ ## * `split func<#split,string,string,int>`_ - splitCommon(s, sep, maxsplit, sep.len) + let sepLen = if sep.len == 0: 1 # prevents infinite loop + else: sep.len + splitCommon(s, sep, maxsplit, sepLen) template rsplitCommon(s, sep, maxsplit, sepLen) = @@ -527,17 +565,19 @@ iterator rsplit*(s: string, sep: char, maxsplit: int = -1): string = ## Splits the string `s` into substrings from the right using a ## string separator. Works exactly the same as `split iterator - ## <#split.i,string,char,int>`_ except in reverse order. + ## <#split.i,string,char,int>`_ except in **reverse** order. ## - ## .. code-block:: nim + ## ```nim ## for piece in "foo:bar".rsplit(':'): ## echo piece + ## ``` ## ## Results in: ## - ## .. code-block:: nim + ## ``` ## "bar" ## "foo" + ## ``` ## ## Substrings are separated from the right by the char `sep`. ## @@ -552,20 +592,25 @@ iterator rsplit*(s: string, seps: set[char] = Whitespace, maxsplit: int = -1): string = ## Splits the string `s` into substrings from the right using a ## string separator. Works exactly the same as `split iterator - ## <#split.i,string,char,int>`_ except in reverse order. + ## <#split.i,string,char,int>`_ except in **reverse** order. ## - ## .. code-block:: nim + ## ```nim ## for piece in "foo bar".rsplit(WhiteSpace): ## echo piece + ## ``` ## ## Results in: ## - ## .. code-block:: nim + ## ``` ## "bar" ## "foo" + ## ``` ## ## Substrings are separated from the right by the set of chars `seps` ## + ## .. note:: Empty separator set results in returning an original string, + ## following the interpretation "split by no element". + ## ## See also: ## * `split iterator<#split.i,string,set[char],int>`_ ## * `splitLines iterator<#splitLines.i,string>`_ @@ -577,44 +622,52 @@ iterator rsplit*(s: string, sep: string, maxsplit: int = -1, keepSeparators: bool = false): string = ## Splits the string `s` into substrings from the right using a ## string separator. Works exactly the same as `split iterator - ## <#split.i,string,string,int>`_ except in reverse order. + ## <#split.i,string,string,int>`_ except in **reverse** order. ## - ## .. code-block:: nim + ## ```nim ## for piece in "foothebar".rsplit("the"): ## echo piece + ## ``` ## ## Results in: ## - ## .. code-block:: nim + ## ``` ## "bar" ## "foo" + ## ``` ## ## Substrings are separated from the right by the string `sep` ## + ## .. note:: Empty separator string results in returning an original string, + ## following the interpretation "split by no element". + ## ## See also: ## * `split iterator<#split.i,string,string,int>`_ ## * `splitLines iterator<#splitLines.i,string>`_ ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_ ## * `rsplit func<#rsplit,string,string,int>`_ - rsplitCommon(s, sep, maxsplit, sep.len) + let sepLen = if sep.len == 0: 1 # prevents infinite loop + else: sep.len + rsplitCommon(s, sep, maxsplit, sepLen) iterator splitLines*(s: string, keepEol = false): string = ## Splits the string `s` into its containing lines. ## ## Every `character literal <manual.html#lexical-analysis-character-literals>`_ ## newline combination (CR, LF, CR-LF) is supported. The result strings - ## contain no trailing end of line characters unless parameter `keepEol` + ## contain no trailing end of line characters unless the parameter `keepEol` ## is set to `true`. ## ## Example: ## - ## .. code-block:: nim + ## ```nim ## for line in splitLines("\nthis\nis\nan\n\nexample\n"): ## writeLine(stdout, line) + ## ``` ## ## Results in: ## - ## .. code-block:: nim + ## ```nim ## "" ## "this" ## "is" @@ -622,6 +675,7 @@ iterator splitLines*(s: string, keepEol = false): string = ## "" ## "example" ## "" + ## ``` ## ## See also: ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_ @@ -654,16 +708,17 @@ iterator splitWhitespace*(s: string, maxsplit: int = -1): string = ## ## The following code: ## - ## .. code-block:: nim + ## ```nim ## let s = " foo \t bar baz " ## for ms in [-1, 1, 2, 3]: ## echo "------ maxsplit = ", ms, ":" ## for item in s.splitWhitespace(maxsplit=ms): ## echo '"', item, '"' + ## ``` ## ## ...results in: ## - ## .. code-block:: + ## ``` ## ------ maxsplit = -1: ## "foo" ## "bar" @@ -679,6 +734,7 @@ iterator splitWhitespace*(s: string, maxsplit: int = -1): string = ## "foo" ## "bar" ## "baz" + ## ``` ## ## See also: ## * `splitLines iterator<#splitLines.i,string>`_ @@ -707,6 +763,9 @@ func split*(s: string, seps: set[char] = Whitespace, maxsplit: int = -1): seq[ ## The same as the `split iterator <#split.i,string,set[char],int>`_ (see its ## documentation), but is a func that returns a sequence of substrings. ## + ## .. note:: Empty separator set results in returning an original string, + ## following the interpretation "split by no element". + ## ## See also: ## * `split iterator <#split.i,string,set[char],int>`_ ## * `rsplit func<#rsplit,string,set[char],int>`_ @@ -715,6 +774,7 @@ func split*(s: string, seps: set[char] = Whitespace, maxsplit: int = -1): seq[ runnableExamples: doAssert "a,b;c".split({',', ';'}) == @["a", "b", "c"] doAssert "".split({' '}) == @[""] + doAssert "empty seps return unsplit s".split({}) == @["empty seps return unsplit s"] accResult(split(s, seps, maxsplit)) func split*(s: string, sep: string, maxsplit: int = -1): seq[string] {.rtl, @@ -724,6 +784,9 @@ func split*(s: string, sep: string, maxsplit: int = -1): seq[string] {.rtl, ## Substrings are separated by the string `sep`. This is a wrapper around the ## `split iterator <#split.i,string,string,int>`_. ## + ## .. note:: Empty separator string results in returning an original string, + ## following the interpretation "split by no element". + ## ## See also: ## * `split iterator <#split.i,string,string,int>`_ ## * `rsplit func<#rsplit,string,string,int>`_ @@ -736,14 +799,13 @@ func split*(s: string, sep: string, maxsplit: int = -1): seq[string] {.rtl, doAssert "a largely spaced sentence".split(" ") == @["a", "", "largely", "", "", "", "spaced", "sentence"] doAssert "a largely spaced sentence".split(" ", maxsplit = 1) == @["a", " largely spaced sentence"] - doAssert(sep.len > 0) - + doAssert "empty sep returns unsplit s".split("") == @["empty sep returns unsplit s"] accResult(split(s, sep, maxsplit)) func rsplit*(s: string, sep: char, maxsplit: int = -1): seq[string] {.rtl, extern: "nsuRSplitChar".} = ## The same as the `rsplit iterator <#rsplit.i,string,char,int>`_, but is a func - ## that returns a sequence of substrings. + ## that returns a sequence of substrings in original order. ## ## A possible common use case for `rsplit` is path manipulation, ## particularly on systems that don't use a common delimiter. @@ -751,13 +813,15 @@ func rsplit*(s: string, sep: char, maxsplit: int = -1): seq[string] {.rtl, ## For example, if a system had `#` as a delimiter, you could ## do the following to get the tail of the path: ## - ## .. code-block:: nim + ## ```nim ## var tailSplit = rsplit("Root#Object#Method#Index", '#', maxsplit=1) + ## ``` ## ## Results in `tailSplit` containing: ## - ## .. code-block:: nim + ## ```nim ## @["Root#Object#Method", "Index"] + ## ``` ## ## See also: ## * `rsplit iterator <#rsplit.i,string,char,int>`_ @@ -771,7 +835,7 @@ func rsplit*(s: string, seps: set[char] = Whitespace, maxsplit: int = -1): seq[string] {.rtl, extern: "nsuRSplitCharSet".} = ## The same as the `rsplit iterator <#rsplit.i,string,set[char],int>`_, but is a - ## func that returns a sequence of substrings. + ## func that returns a sequence of substrings in original order. ## ## A possible common use case for `rsplit` is path manipulation, ## particularly on systems that don't use a common delimiter. @@ -779,13 +843,18 @@ func rsplit*(s: string, seps: set[char] = Whitespace, ## For example, if a system had `#` as a delimiter, you could ## do the following to get the tail of the path: ## - ## .. code-block:: nim + ## ```nim ## var tailSplit = rsplit("Root#Object#Method#Index", {'#'}, maxsplit=1) + ## ``` ## ## Results in `tailSplit` containing: ## - ## .. code-block:: nim + ## ```nim ## @["Root#Object#Method", "Index"] + ## ``` + ## + ## .. note:: Empty separator set results in returning an original string, + ## following the interpretation "split by no element". ## ## See also: ## * `rsplit iterator <#rsplit.i,string,set[char],int>`_ @@ -798,7 +867,7 @@ func rsplit*(s: string, seps: set[char] = Whitespace, func rsplit*(s: string, sep: string, maxsplit: int = -1): seq[string] {.rtl, extern: "nsuRSplitString".} = ## The same as the `rsplit iterator <#rsplit.i,string,string,int,bool>`_, but is a func - ## that returns a sequence of substrings. + ## that returns a sequence of substrings in original order. ## ## A possible common use case for `rsplit` is path manipulation, ## particularly on systems that don't use a common delimiter. @@ -806,13 +875,18 @@ func rsplit*(s: string, sep: string, maxsplit: int = -1): seq[string] {.rtl, ## For example, if a system had `#` as a delimiter, you could ## do the following to get the tail of the path: ## - ## .. code-block:: nim + ## ```nim ## var tailSplit = rsplit("Root#Object#Method#Index", "#", maxsplit=1) + ## ``` ## ## Results in `tailSplit` containing: ## - ## .. code-block:: nim + ## ```nim ## @["Root#Object#Method", "Index"] + ## ``` + ## + ## .. note:: Empty separator string results in returning an original string, + ## following the interpretation "split by no element". ## ## See also: ## * `rsplit iterator <#rsplit.i,string,string,int,bool>`_ @@ -828,6 +902,7 @@ func rsplit*(s: string, sep: string, maxsplit: int = -1): seq[string] {.rtl, doAssert "".rsplit("Elon Musk") == @[""] doAssert "a largely spaced sentence".rsplit(" ") == @["a", "", "largely", "", "", "", "spaced", "sentence"] + doAssert "empty sep returns unsplit s".rsplit("") == @["empty sep returns unsplit s"] accResult(rsplit(s, sep, maxsplit)) result.reverse() @@ -923,14 +998,26 @@ func toHex*[T: SomeInteger](x: T, len: Positive): string = doAssert b.toHex(4) == "1001" doAssert toHex(62, 3) == "03E" doAssert toHex(-8, 6) == "FFFFF8" - toHexImpl(cast[BiggestUInt](x), len, x < 0) + whenJsNoBigInt64: + toHexImpl(cast[BiggestUInt](x), len, x < 0) + do: + when T is SomeSignedInt: + toHexImpl(cast[BiggestUInt](BiggestInt(x)), len, x < 0) + else: + toHexImpl(BiggestUInt(x), len, x < 0) func toHex*[T: SomeInteger](x: T): string = ## Shortcut for `toHex(x, T.sizeof * 2)` runnableExamples: doAssert toHex(1984'i64) == "00000000000007C0" doAssert toHex(1984'i16) == "07C0" - toHexImpl(cast[BiggestUInt](x), 2*sizeof(T), x < 0) + whenJsNoBigInt64: + toHexImpl(cast[BiggestUInt](x), 2*sizeof(T), x < 0) + do: + when T is SomeSignedInt: + toHexImpl(cast[BiggestUInt](BiggestInt(x)), 2*sizeof(T), x < 0) + else: + toHexImpl(BiggestUInt(x), 2*sizeof(T), x < 0) func toHex*(s: string): string {.rtl.} = ## Converts a bytes string to its hexadecimal representation. @@ -1220,7 +1307,7 @@ func parseEnum*[T: enum](s: string): T = ## type contains multiple fields with the same string value. ## ## Raises `ValueError` for an invalid value in `s`. The comparison is - ## done in a style insensitive way. + ## done in a style insensitive way (first letter is still case-sensitive). runnableExamples: type MyEnum = enum @@ -1240,7 +1327,7 @@ func parseEnum*[T: enum](s: string, default: T): T = ## type contains multiple fields with the same string value. ## ## Uses `default` for an invalid value in `s`. The comparison is done in a - ## style insensitive way. + ## style insensitive way (first letter is still case-sensitive). runnableExamples: type MyEnum = enum @@ -1476,12 +1563,40 @@ func dedent*(s: string, count: Natural = indentation(s)): string {.rtl, doAssert x == "Hello\n There\n" unindent(s, count, " ") -func delete*(s: var string, first, last: int) {.rtl, extern: "nsuDelete".} = - ## Deletes in `s` (must be declared as `var`) the characters at positions - ## `first .. last` (both ends included). - ## - ## This modifies `s` itself, it does not return a copy. - runnableExamples: +func delete*(s: var string, slice: Slice[int]) = + ## Deletes the items `s[slice]`, raising `IndexDefect` if the slice contains + ## elements out of range. + ## + ## This operation moves all elements after `s[slice]` in linear time, and + ## is the string analog to `sequtils.delete`. + runnableExamples: + var a = "abcde" + doAssertRaises(IndexDefect): a.delete(4..5) + assert a == "abcde" + a.delete(4..4) + assert a == "abcd" + a.delete(1..2) + assert a == "ad" + a.delete(1..<1) # empty slice + assert a == "ad" + when compileOption("boundChecks"): + if not (slice.a < s.len and slice.a >= 0 and slice.b < s.len): + raise newException(IndexDefect, $(slice: slice, len: s.len)) + if slice.b >= slice.a: + var i = slice.a + var j = slice.b + 1 + var newLen = s.len - j + i + # if j < s.len: moveMem(addr s[i], addr s[j], s.len - j) # pending benchmark + while i < newLen: + s[i] = s[j] + inc(i) + inc(j) + setLen(s, newLen) + +func delete*(s: var string, first, last: int) {.rtl, extern: "nsuDelete", + deprecated: "use `delete(s, first..last)`".} = + ## Deletes in `s` the characters at positions `first .. last` (both ends included). + runnableExamples("--warning:deprecated:off"): var a = "abracadabra" a.delete(4, 5) @@ -1502,7 +1617,6 @@ func delete*(s: var string, first, last: int) {.rtl, extern: "nsuDelete".} = inc(j) setLen(s, newLen) - func startsWith*(s: string, prefix: char): bool {.inline.} = ## Returns true if `s` starts with character `prefix`. ## @@ -1600,7 +1714,7 @@ func removePrefix*(s: var string, chars: set[char] = Newlines) {.rtl, var start = 0 while start < s.len and s[start] in chars: start += 1 - if start > 0: s.delete(0, start - 1) + if start > 0: s.delete(0..start - 1) func removePrefix*(s: var string, c: char) {.rtl, extern: "nsuRemovePrefixChar".} = @@ -1627,8 +1741,8 @@ func removePrefix*(s: var string, prefix: string) {.rtl, var answers = "yesyes" answers.removePrefix("yes") doAssert answers == "yes" - if s.startsWith(prefix): - s.delete(0, prefix.len - 1) + if s.startsWith(prefix) and prefix.len > 0: + s.delete(0..prefix.len - 1) func removeSuffix*(s: var string, chars: set[char] = Newlines) {.rtl, extern: "nsuRemoveSuffixCharSet".} = @@ -1694,8 +1808,9 @@ func addSep*(dest: var string, sep = ", ", startLen: Natural = 0) {.inline.} = ## ## A shorthand for: ## - ## .. code-block:: nim + ## ```nim ## if dest.len > startLen: add(dest, sep) + ## ``` ## ## This is often useful for generating some code where the items need to ## be *separated* by `sep`. `sep` is only added if `dest` is longer than @@ -1760,7 +1875,7 @@ func join*(a: openArray[string], sep: string = ""): string {.rtl, else: result = "" -func join*[T: not string](a: openArray[T], sep: string = ""): string {.rtl.} = +proc join*[T: not string](a: openArray[T], sep: string = ""): string = ## Converts all elements in the container `a` to strings using `$`, ## and concatenates them with `sep`. runnableExamples: @@ -1773,36 +1888,44 @@ func join*[T: not string](a: openArray[T], sep: string = ""): string {.rtl.} = add(result, $x) type - SkipTable* = array[char, int] + SkipTable* = array[char, int] ## Character table for efficient substring search. func initSkipTable*(a: var SkipTable, sub: string) {.rtl, extern: "nsuInitSkipTable".} = - ## Preprocess table `a` for `sub`. + ## Initializes table `a` for efficient search of substring `sub`. + ## + ## See also: + ## * `initSkipTable func<#initSkipTable,string>`_ + ## * `find func<#find,SkipTable,string,string,Natural,int>`_ + # TODO: this should be the `default()` initializer for the type. let m = len(sub) - var i = 0 - while i <= 0xff-7: - a[chr(i + 0)] = m - a[chr(i + 1)] = m - a[chr(i + 2)] = m - a[chr(i + 3)] = m - a[chr(i + 4)] = m - a[chr(i + 5)] = m - a[chr(i + 6)] = m - a[chr(i + 7)] = m - i += 8 + fill(a, m) for i in 0 ..< m - 1: a[sub[i]] = m - 1 - i -func find*(a: SkipTable, s, sub: string, start: Natural = 0, last = 0): int {. +func initSkipTable*(sub: string): SkipTable {.noinit, rtl, + extern: "nsuInitNewSkipTable".} = + ## Returns a new table initialized for `sub`. + ## + ## See also: + ## * `initSkipTable func<#initSkipTable,SkipTable,string>`_ + ## * `find func<#find,SkipTable,string,string,Natural,int>`_ + initSkipTable(result, sub) + +func find*(a: SkipTable, s, sub: string, start: Natural = 0, last = -1): int {. rtl, extern: "nsuFindStrA".} = ## Searches for `sub` in `s` inside range `start..last` using preprocessed ## table `a`. If `last` is unspecified, it defaults to `s.high` (the last ## element). ## ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned. + ## + ## See also: + ## * `initSkipTable func<#initSkipTable,string>`_ + ## * `initSkipTable func<#initSkipTable,SkipTable,string>`_ let - last = if last == 0: s.high else: last + last = if last < 0: s.high else: last subLast = sub.len - 1 if subLast == -1: @@ -1812,6 +1935,7 @@ func find*(a: SkipTable, s, sub: string, start: Natural = 0, last = 0): int {. # This is an implementation of the Boyer-Moore Horspool algorithms # https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore%E2%80%93Horspool_algorithm + result = -1 var skip = start while last - skip >= subLast: @@ -1821,7 +1945,6 @@ func find*(a: SkipTable, s, sub: string, start: Natural = 0, last = 0): int {. return skip dec i inc skip, a[s[skip + subLast]] - return -1 when not (defined(js) or defined(nimdoc) or defined(nimscript)): func c_memchr(cstr: pointer, c: char, n: csize_t): pointer {. @@ -1830,68 +1953,97 @@ when not (defined(js) or defined(nimdoc) or defined(nimscript)): else: const hasCStringBuiltin = false -func find*(s: string, sub: char, start: Natural = 0, last = 0): int {.rtl, +func find*(s: string, sub: char, start: Natural = 0, last = -1): int {.rtl, extern: "nsuFindChar".} = ## Searches for `sub` in `s` inside range `start..last` (both ends included). - ## If `last` is unspecified, it defaults to `s.high` (the last element). + ## If `last` is unspecified or negative, it defaults to `s.high` (the last element). ## ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned. ## Otherwise the index returned is relative to `s[0]`, not `start`. - ## Use `s[start..last].rfind` for a `start`-origin index. + ## Subtract `start` from the result for a `start`-origin index. ## ## See also: - ## * `rfind func<#rfind,string,char,Natural>`_ + ## * `rfind func<#rfind,string,char,Natural,int>`_ ## * `replace func<#replace,string,char,char>`_ - let last = if last == 0: s.high else: last - when nimvm: + result = -1 + let last = if last < 0: s.high else: last + + template findImpl = for i in int(start)..last: - if sub == s[i]: return i + if s[i] == sub: + return i + + when nimvm: + findImpl() else: when hasCStringBuiltin: - let L = last-start+1 - if L > 0: - let found = c_memchr(s[start].unsafeAddr, sub, cast[csize_t](L)) + let length = last-start+1 + if length > 0: + let found = c_memchr(s[start].unsafeAddr, sub, cast[csize_t](length)) if not found.isNil: - return cast[ByteAddress](found) -% cast[ByteAddress](s.cstring) + return cast[int](found) -% cast[int](s.cstring) else: - for i in int(start)..last: - if sub == s[i]: return i - return -1 + findImpl() -func find*(s: string, chars: set[char], start: Natural = 0, last = 0): int {. +func find*(s: string, chars: set[char], start: Natural = 0, last = -1): int {. rtl, extern: "nsuFindCharSet".} = ## Searches for `chars` in `s` inside range `start..last` (both ends included). - ## If `last` is unspecified, it defaults to `s.high` (the last element). + ## If `last` is unspecified or negative, it defaults to `s.high` (the last element). ## ## If `s` contains none of the characters in `chars`, -1 is returned. ## Otherwise the index returned is relative to `s[0]`, not `start`. - ## Use `s[start..last].find` for a `start`-origin index. + ## Subtract `start` from the result for a `start`-origin index. ## ## See also: - ## * `rfind func<#rfind,string,set[char],Natural>`_ + ## * `rfind func<#rfind,string,set[char],Natural,int>`_ ## * `multiReplace func<#multiReplace,string,varargs[]>`_ - let last = if last == 0: s.high else: last + result = -1 + let last = if last < 0: s.high else: last for i in int(start)..last: - if s[i] in chars: return i - return -1 - -func find*(s, sub: string, start: Natural = 0, last = 0): int {.rtl, + if s[i] in chars: + return i + +when defined(linux): + proc memmem(haystack: pointer, haystacklen: csize_t, + needle: pointer, needlelen: csize_t): pointer {.importc, header: """#define _GNU_SOURCE +#include <string.h>""".} +elif defined(bsd) or (defined(macosx) and not defined(ios)): + proc memmem(haystack: pointer, haystacklen: csize_t, + needle: pointer, needlelen: csize_t): pointer {.importc, header: "#include <string.h>".} + +func find*(s, sub: string, start: Natural = 0, last = -1): int {.rtl, extern: "nsuFindStr".} = ## Searches for `sub` in `s` inside range `start..last` (both ends included). - ## If `last` is unspecified, it defaults to `s.high` (the last element). + ## If `last` is unspecified or negative, it defaults to `s.high` (the last element). ## ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned. ## Otherwise the index returned is relative to `s[0]`, not `start`. - ## Use `s[start..last].find` for a `start`-origin index. + ## Subtract `start` from the result for a `start`-origin index. ## ## See also: - ## * `rfind func<#rfind,string,string,Natural>`_ + ## * `rfind func<#rfind,string,string,Natural,int>`_ ## * `replace func<#replace,string,string,string>`_ - if sub.len > s.len: return -1 + if sub.len > s.len - start: return -1 if sub.len == 1: return find(s, sub[0], start, last) - var a {.noinit.}: SkipTable - initSkipTable(a, sub) - result = find(a, s, sub, start, last) + + template useSkipTable = + result = find(initSkipTable(sub), s, sub, start, last) + + when nimvm: + useSkipTable() + else: + when declared(memmem): + let subLen = sub.len + if last < 0 and start < s.len and subLen != 0: + let found = memmem(s[start].unsafeAddr, csize_t(s.len - start), sub.cstring, csize_t(subLen)) + result = if not found.isNil: + cast[int](found) -% cast[int](s.cstring) + else: + -1 + else: + useSkipTable() + else: + useSkipTable() func rfind*(s: string, sub: char, start: Natural = 0, last = -1): int {.rtl, extern: "nsuRFindChar".} = @@ -1902,7 +2054,7 @@ func rfind*(s: string, sub: char, start: Natural = 0, last = -1): int {.rtl, ## ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned. ## Otherwise the index returned is relative to `s[0]`, not `start`. - ## Use `s[start..last].find` for a `start`-origin index. + ## Subtract `start` from the result for a `start`-origin index. ## ## See also: ## * `find func<#find,string,char,Natural,int>`_ @@ -1920,7 +2072,7 @@ func rfind*(s: string, chars: set[char], start: Natural = 0, last = -1): int {. ## ## If `s` contains none of the characters in `chars`, -1 is returned. ## Otherwise the index returned is relative to `s[0]`, not `start`. - ## Use `s[start..last].rfind` for a `start`-origin index. + ## Subtract `start` from the result for a `start`-origin index. ## ## See also: ## * `find func<#find,string,set[char],Natural,int>`_ @@ -1938,11 +2090,14 @@ func rfind*(s, sub: string, start: Natural = 0, last = -1): int {.rtl, ## ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned. ## Otherwise the index returned is relative to `s[0]`, not `start`. - ## Use `s[start..last].rfind` for a `start`-origin index. + ## Subtract `start` from the result for a `start`-origin index. ## ## See also: ## * `find func<#find,string,string,Natural,int>`_ if sub.len == 0: + let rightIndex: Natural = if last < 0: s.len else: last + return max(start, rightIndex) + if sub.len > s.len - start: return -1 let last = if last == -1: s.high else: last result = 0 @@ -1998,7 +2153,7 @@ func countLines*(s: string): int {.rtl, extern: "nsuCountLines".} = ## Returns the number of lines in the string `s`. ## ## This is the same as `len(splitLines(s))`, but much more efficient - ## because it doesn't modify the string creating temporal objects. Every + ## because it doesn't modify the string creating temporary objects. Every ## `character literal <manual.html#lexical-analysis-character-literals>`_ ## newline combination (CR, LF, CR-LF) is supported. ## @@ -2064,8 +2219,7 @@ func replace*(s, sub: string, by = ""): string {.rtl, # copy the rest: add result, substr(s, i) else: - var a {.noinit.}: SkipTable - initSkipTable(a, sub) + var a = initSkipTable(sub) let last = s.high var i = 0 while true: @@ -2105,9 +2259,8 @@ func replaceWord*(s, sub: string, by = ""): string {.rtl, ## replaced. if sub.len == 0: return s const wordChars = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\128'..'\255'} - var a {.noinit.}: SkipTable result = "" - initSkipTable(a, sub) + var a = initSkipTable(sub) var i = 0 let last = s.high let sublen = sub.len @@ -2128,17 +2281,31 @@ func replaceWord*(s, sub: string, by = ""): string {.rtl, add result, substr(s, i) func multiReplace*(s: string, replacements: varargs[(string, string)]): string = - ## Same as replace, but specialized for doing multiple replacements in a single - ## pass through the input string. + ## Same as `replace<#replace,string,string,string>`_, but specialized for + ## doing multiple replacements in a single pass through the input string. + ## + ## `multiReplace` scans the input string from left to right and replaces the + ## matching substrings in the same order as passed in the argument list. ## - ## `multiReplace` performs all replacements in a single pass, this means it - ## can be used to swap the occurrences of "a" and "b", for instance. + ## The implications of the order of scanning the string and matching the + ## replacements: + ## - In case of multiple matches at a given position, the earliest + ## replacement is applied. + ## - Overlaps are not handled. After performing a replacement, the scan + ## continues from the character after the matched substring. If the + ## resulting string then contains a possible match starting in a newly + ## placed substring, the additional replacement is not performed. ## ## If the resulting string is not longer than the original input string, ## only a single memory allocation is required. ## - ## The order of the replacements does matter. Earlier replacements are - ## preferred over later replacements in the argument list. + runnableExamples: + # Swapping occurrences of 'a' and 'b': + doAssert multireplace("abba", [("a", "b"), ("b", "a")]) == "baab" + + # The second replacement ("ab") is matched and performed first, the scan then + # continues from 'c', so the "bc" replacement is never matched and thus skipped. + doAssert multireplace("abc", [("bc", "x"), ("ab", "_b")]) == "_bc" result = newStringOfCap(s.len) var i = 0 var fastChk: set[char] = {} @@ -2173,7 +2340,7 @@ func insertSep*(s: string, sep = '_', digits = 3): string {.rtl, doAssert insertSep("1000000") == "1_000_000" result = newStringOfCap(s.len) let hasPrefix = isDigit(s[s.low]) == false - var idx:int + var idx: int if hasPrefix: result.add s[s.low] for i in (s.low + 1)..s.high: @@ -2187,7 +2354,7 @@ func insertSep*(s: string, sep = '_', digits = 3): string {.rtl, result.setLen(L + idx) var j = 0 dec(L) - for i in countdown(partsLen-1,0): + for i in countdown(partsLen-1, 0): if j == digits: result[L + idx] = sep dec(L) @@ -2198,13 +2365,21 @@ func insertSep*(s: string, sep = '_', digits = 3): string {.rtl, func escape*(s: string, prefix = "\"", suffix = "\""): string {.rtl, extern: "nsuEscape".} = - ## Escapes a string `s`. See `system.addEscapedChar - ## <system.html#addEscapedChar,string,char>`_ for the escaping scheme. + ## Escapes a string `s`. + ## + ## .. note:: The escaping scheme is different from + ## `system.addEscapedChar`. + ## + ## * replaces `'\0'..'\31'` and `'\127'..'\255'` by `\xHH` where `HH` is its hexadecimal value + ## * replaces ``\`` by `\\` + ## * replaces `'` by `\'` + ## * replaces `"` by `\"` ## ## The resulting string is prefixed with `prefix` and suffixed with `suffix`. ## Both may be empty strings. ## ## See also: + ## * `addEscapedChar proc<system.html#addEscapedChar,string,char>`_ ## * `unescape func<#unescape,string,string,string>`_ for the opposite ## operation result = newStringOfCap(s.len + s.len shr 2) @@ -2279,8 +2454,8 @@ func validIdentifier*(s: string): bool {.rtl, extern: "nsuValidIdentifier".} = # floating point formatting: when not defined(js): - func c_sprintf(buf, frmt: cstring): cint {.header: "<stdio.h>", - importc: "sprintf", varargs} + func c_snprintf(buf: cstring, n: csize_t, frmt: cstring): cint {.header: "<stdio.h>", + importc: "snprintf", varargs.} type FloatFormatMode* = enum @@ -2307,60 +2482,63 @@ func formatBiggestFloat*(f: BiggestFloat, format: FloatFormatMode = ffDefault, doAssert x.formatBiggestFloat() == "123.4560000000000" doAssert x.formatBiggestFloat(ffDecimal, 4) == "123.4560" doAssert x.formatBiggestFloat(ffScientific, 2) == "1.23e+02" - when defined(js): - var precision = precision - if precision == -1: - # use the same default precision as c_sprintf - precision = 6 - var res: cstring - case format - of ffDefault: - {.emit: "`res` = `f`.toString();".} - of ffDecimal: - {.emit: "`res` = `f`.toFixed(`precision`);".} - of ffScientific: - {.emit: "`res` = `f`.toExponential(`precision`);".} - result = $res - if 1.0 / f == -Inf: - # JavaScript removes the "-" from negative Zero, add it back here - result = "-" & $res - for i in 0 ..< result.len: - # Depending on the locale either dot or comma is produced, - # but nothing else is possible: - if result[i] in {'.', ','}: result[i] = decimalSep + when nimvm: + discard "implemented in the vmops" else: - const floatFormatToChar: array[FloatFormatMode, char] = ['g', 'f', 'e'] - var - frmtstr {.noinit.}: array[0..5, char] - buf {.noinit.}: array[0..2500, char] - L: cint - frmtstr[0] = '%' - if precision >= 0: - frmtstr[1] = '#' - frmtstr[2] = '.' - frmtstr[3] = '*' - frmtstr[4] = floatFormatToChar[format] - frmtstr[5] = '\0' - L = c_sprintf(addr buf, addr frmtstr, precision, f) + when defined(js): + var precision = precision + if precision == -1: + # use the same default precision as c_snprintf + precision = 6 + var res: cstring + case format + of ffDefault: + {.emit: "`res` = `f`.toString();".} + of ffDecimal: + {.emit: "`res` = `f`.toFixed(`precision`);".} + of ffScientific: + {.emit: "`res` = `f`.toExponential(`precision`);".} + result = $res + if 1.0 / f == -Inf: + # JavaScript removes the "-" from negative Zero, add it back here + result = "-" & $res + for i in 0 ..< result.len: + # Depending on the locale either dot or comma is produced, + # but nothing else is possible: + if result[i] in {'.', ','}: result[i] = decimalSep else: - frmtstr[1] = floatFormatToChar[format] - frmtstr[2] = '\0' - L = c_sprintf(addr buf, addr frmtstr, f) - result = newString(L) - for i in 0 ..< L: - # Depending on the locale either dot or comma is produced, - # but nothing else is possible: - if buf[i] in {'.', ','}: result[i] = decimalSep - else: result[i] = buf[i] - when defined(windows): - # VS pre 2015 violates the C standard: "The exponent always contains at - # least two digits, and only as many more digits as necessary to - # represent the exponent." [C11 §7.21.6.1] - # The following post-processing fixes this behavior. - if result.len > 4 and result[^4] == '+' and result[^3] == '0': - result[^3] = result[^2] - result[^2] = result[^1] - result.setLen(result.len - 1) + const floatFormatToChar: array[FloatFormatMode, char] = ['g', 'f', 'e'] + var + frmtstr {.noinit.}: array[0..5, char] + buf {.noinit.}: array[0..2500, char] + L: cint + frmtstr[0] = '%' + if precision >= 0: + frmtstr[1] = '#' + frmtstr[2] = '.' + frmtstr[3] = '*' + frmtstr[4] = floatFormatToChar[format] + frmtstr[5] = '\0' + L = c_snprintf(cast[cstring](addr buf), csize_t(2501), cast[cstring](addr frmtstr), precision, f) + else: + frmtstr[1] = floatFormatToChar[format] + frmtstr[2] = '\0' + L = c_snprintf(cast[cstring](addr buf), csize_t(2501), cast[cstring](addr frmtstr), f) + result = newString(L) + for i in 0 ..< L: + # Depending on the locale either dot or comma is produced, + # but nothing else is possible: + if buf[i] in {'.', ','}: result[i] = decimalSep + else: result[i] = buf[i] + when defined(windows): + # VS pre 2015 violates the C standard: "The exponent always contains at + # least two digits, and only as many more digits as necessary to + # represent the exponent." [C11 §7.21.6.1] + # The following post-processing fixes this behavior. + if result.len > 4 and result[^4] == '+' and result[^3] == '0': + result[^3] = result[^2] + result[^2] = result[^1] + result.setLen(result.len - 1) func formatFloat*(f: float, format: FloatFormatMode = ffDefault, precision: range[-1..32] = 16; decimalSep = '.'): string {. @@ -2400,7 +2578,8 @@ func trimZeros*(x: var string; decimalSep = '.') = var pos = last while pos >= 0 and x[pos] == '0': dec(pos) if pos > sPos: inc(pos) - x.delete(pos, last) + if last >= pos: + x.delete(pos..last) type BinaryPrefixMode* = enum ## The different names for binary prefixes. @@ -2492,13 +2671,13 @@ func formatEng*(f: BiggestFloat, ## decimal point or (if `trim` is true) the maximum number of digits to be ## shown. ## - ## .. code-block:: nim - ## + ## ```nim ## formatEng(0, 2, trim=false) == "0.00" ## formatEng(0, 2) == "0" ## formatEng(0.053, 0) == "53e-3" ## formatEng(52731234, 2) == "52.73e6" ## formatEng(-52731234, 2) == "-52.73e6" + ## ``` ## ## If `siPrefix` is set to true, the number will be displayed with the SI ## prefix corresponding to the exponent. For example 4100 will be displayed @@ -2513,8 +2692,7 @@ func formatEng*(f: BiggestFloat, ## different to appending the unit to the result as the location of the space ## is altered depending on whether there is an exponent. ## - ## .. code-block:: nim - ## + ## ```nim ## formatEng(4100, siPrefix=true, unit="V") == "4.1 kV" ## formatEng(4.1, siPrefix=true, unit="V") == "4.1 V" ## formatEng(4.1, siPrefix=true) == "4.1" # Note lack of space @@ -2524,6 +2702,7 @@ func formatEng*(f: BiggestFloat, ## formatEng(4100) == "4.1e3" ## formatEng(4100, unit="V") == "4.1e3 V" ## formatEng(4100, unit="", useUnitSpace=true) == "4.1e3 " # Space with useUnitSpace=true + ## ``` ## ## `decimalSep` is used as the decimal separator. ## @@ -2611,8 +2790,8 @@ func findNormalized(x: string, inArray: openArray[string]): int = # security hole... return -1 -func invalidFormatString() {.noinline.} = - raise newException(ValueError, "invalid format string") +func invalidFormatString(formatstr: string) {.noinline.} = + raise newException(ValueError, "invalid format string: " & formatstr) func addf*(s: var string, formatstr: string, a: varargs[string, `$`]) {.rtl, extern: "nsuAddf".} = @@ -2624,7 +2803,7 @@ func addf*(s: var string, formatstr: string, a: varargs[string, `$`]) {.rtl, if formatstr[i] == '$' and i+1 < len(formatstr): case formatstr[i+1] of '#': - if num > a.high: invalidFormatString() + if num > a.high: invalidFormatString(formatstr) add s, a[num] inc i, 2 inc num @@ -2640,7 +2819,7 @@ func addf*(s: var string, formatstr: string, a: varargs[string, `$`]) {.rtl, j = j * 10 + ord(formatstr[i]) - ord('0') inc(i) let idx = if not negative: j-1 else: a.len-j - if idx < 0 or idx > a.high: invalidFormatString() + if idx < 0 or idx > a.high: invalidFormatString(formatstr) add s, a[idx] of '{': var j = i+2 @@ -2657,27 +2836,27 @@ func addf*(s: var string, formatstr: string, a: varargs[string, `$`]) {.rtl, inc(j) if isNumber == 1: let idx = if not negative: k-1 else: a.len-k - if idx < 0 or idx > a.high: invalidFormatString() + if idx < 0 or idx > a.high: invalidFormatString(formatstr) add s, a[idx] else: var x = findNormalized(substr(formatstr, i+2, j-1), a) if x >= 0 and x < high(a): add s, a[x+1] - else: invalidFormatString() + else: invalidFormatString(formatstr) i = j+1 of 'a'..'z', 'A'..'Z', '\128'..'\255', '_': var j = i+1 while j < formatstr.len and formatstr[j] in PatternChars: inc(j) var x = findNormalized(substr(formatstr, i+1, j-1), a) if x >= 0 and x < high(a): add s, a[x+1] - else: invalidFormatString() + else: invalidFormatString(formatstr) i = j else: - invalidFormatString() + invalidFormatString(formatstr) else: add s, formatstr[i] inc(i) -func `%` *(formatstr: string, a: openArray[string]): string {.rtl, +func `%`*(formatstr: string, a: openArray[string]): string {.rtl, extern: "nsuFormatOpenArray".} = ## Interpolates a format string with the values from `a`. ## @@ -2687,13 +2866,15 @@ func `%` *(formatstr: string, a: openArray[string]): string {.rtl, ## ## This is best explained by an example: ## - ## .. code-block:: nim + ## ```nim ## "$1 eats $2." % ["The cat", "fish"] + ## ``` ## ## Results in: ## - ## .. code-block:: nim + ## ```nim ## "The cat eats fish." + ## ``` ## ## The substitution variables (the thing after the `$`) are enumerated ## from 1 to `a.len`. @@ -2701,21 +2882,24 @@ func `%` *(formatstr: string, a: openArray[string]): string {.rtl, ## The notation `$#` can be used to refer to the next substitution ## variable: ## - ## .. code-block:: nim + ## ```nim ## "$# eats $#." % ["The cat", "fish"] + ## ``` ## ## Substitution variables can also be words (that is ## `[A-Za-z_]+[A-Za-z0-9_]*`) in which case the arguments in `a` with even ## indices are keys and with odd indices are the corresponding values. ## An example: ## - ## .. code-block:: nim + ## ```nim ## "$animal eats $food." % ["animal", "The cat", "food", "fish"] + ## ``` ## ## Results in: ## - ## .. code-block:: nim + ## ```nim ## "The cat eats fish." + ## ``` ## ## The variables are compared with `cmpIgnoreStyle`. `ValueError` is ## raised if an ill-formed format string has been passed to the `%` operator. @@ -2725,7 +2909,7 @@ func `%` *(formatstr: string, a: openArray[string]): string {.rtl, result = newStringOfCap(formatstr.len + a.len shl 4) addf(result, formatstr, a) -func `%` *(formatstr, a: string): string {.rtl, +func `%`*(formatstr, a: string): string {.rtl, extern: "nsuFormatSingleElem".} = ## This is the same as `formatstr % [a]` (see ## `% func<#%25,string,openArray[string]>`_). @@ -2781,7 +2965,7 @@ func strip*(s: string, leading = true, trailing = true, result = substr(s, first, last) func stripLineEnd*(s: var string) = - ## Returns `s` stripped from one of these suffixes: + ## Strips one of these suffixes from `s` in-place: ## `\r, \n, \r\n, \f, \v` (at most once instance). ## For example, can be useful in conjunction with `osproc.execCmdEx`. ## aka: `chomp`:idx: @@ -2813,13 +2997,14 @@ iterator tokenize*(s: string, seps: set[char] = Whitespace): tuple[ ## Substrings are separated by a substring containing only `seps`. ## Example: ## - ## .. code-block:: nim + ## ```nim ## for word in tokenize(" this is an example "): ## writeLine(stdout, word) + ## ``` ## ## Results in: ## - ## .. code-block:: nim + ## ```nim ## (" ", true) ## ("this", false) ## (" ", true) @@ -2829,6 +3014,7 @@ iterator tokenize*(s: string, seps: set[char] = Whitespace): tuple[ ## (" ", true) ## ("example", false) ## (" ", true) + ## ``` var i = 0 while true: var j = i |