diff options
Diffstat (limited to 'lib/pure/strutils.nim')
-rw-r--r-- | lib/pure/strutils.nim | 2032 |
1 files changed, 1263 insertions, 769 deletions
diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim index 4d0fe800e..8385eb24e 100644 --- a/lib/pure/strutils.nim +++ b/lib/pure/strutils.nim @@ -7,11 +7,70 @@ # distribution, for details about the copyright. # -## This module contains various string utility routines. -## See the module `re <re.html>`_ for regular expression support. -## See the module `pegs <pegs.html>`_ for PEG support. +## The system module defines several common functions for working with strings, +## such as: +## * ``$`` for converting other data-types to strings +## * ``&`` for string concatenation +## * ``add`` for adding a new character or a string to the existing one +## * ``in`` (alias for ``contains``) and ``notin`` for checking if a character +## is in a string +## +## This module builds upon that, providing additional functionality in form of +## procedures, iterators and templates for strings. +## +## .. code-block:: +## import strutils +## +## let +## numbers = @[867, 5309] +## multiLineString = "first line\nsecond line\nthird line" +## +## let jenny = numbers.join("-") +## assert jenny == "867-5309" +## +## assert splitLines(multiLineString) == +## @["first line", "second line", "third line"] +## assert split(multiLineString) == @["first", "line", "second", +## "line", "third", "line"] +## assert indent(multiLineString, 4) == +## " first line\n second line\n third line" +## assert 'z'.repeat(5) == "zzzzz" +## +## The chaining of functions is possible thanks to the +## `method call syntax<manual.html#procedures-method-call-syntax>`_: +## +## .. code-block:: +## import strutils +## from sequtils import map +## +## let jenny = "867-5309" +## assert jenny.split('-').map(parseInt) == @[867, 5309] +## +## assert "Beetlejuice".indent(1).repeat(3).strip == +## "Beetlejuice Beetlejuice Beetlejuice" +## ## This module is available for the `JavaScript target ## <backends.html#the-javascript-target>`_. +## +## ---- +## +## **See also:** +## * `strformat module<strformat.html>`_ for string interpolation and formatting +## * `unicode module<unicode.html>`_ for Unicode UTF-8 handling +## * `sequtils module<collections/sequtils.html>`_ for operations on container +## types (including strings) +## * `parseutils module<parseutils.html>`_ for lower-level parsing of tokens, +## numbers, identifiers, etc. +## * `parseopt module<parseopt.html>`_ for command-line parsing +## * `strtabs module<strtabs.html>`_ for efficient hash tables +## (dictionaries, in some programming languages) mapping from strings to strings +## * `pegs module<pegs.html>`_ for PEG (Parsing Expression Grammar) support +## * `ropes module<ropes.html>`_ for rope data type, which can represent very +## long strings efficiently +## * `re module<re.html>`_ for regular expression (regex) support +## * `strscans<strscans.html>`_ for ``scanf`` and ``scanp`` macros, which offer +## easier substring extraction than regular expressions + import parseutils from math import pow, floor, log10 @@ -38,7 +97,8 @@ else: const Whitespace* = {' ', '\t', '\v', '\r', '\l', '\f'} - ## All the characters that count as whitespace. + ## All the characters that count as whitespace (space, tab, vertical tab, + ## carriage return, new line, form feed) Letters* = {'A'..'Z', 'a'..'z'} ## the set of letters @@ -56,14 +116,15 @@ const ## the set of characters an identifier can start with NewLines* = {'\13', '\10'} - ## the set of characters a newline terminator can start with + ## the set of characters a newline terminator can start with (carriage + ## return, line feed) AllChars* = {'\x00'..'\xFF'} ## A set with all the possible characters. ## ## Not very useful by its own, you can use it to create *inverted* sets to - ## make the `find() proc <#find,string,set[char],int>`_ find **invalid** - ## characters in strings. Example: + ## make the `find proc<#find,string,set[char],Natural,int>`_ + ## find **invalid** characters in strings. Example: ## ## .. code-block:: nim ## let invalid = AllChars - Digits @@ -72,9 +133,10 @@ const proc isAlphaAscii*(c: char): bool {.noSideEffect, procvar, rtl, extern: "nsuIsAlphaAsciiChar".}= - ## Checks whether or not `c` is alphabetical. + ## Checks whether or not character `c` is alphabetical. ## ## This checks a-z, A-Z ASCII characters only. + ## Use `Unicode module<unicode.html>`_ for UTF-8 support. runnableExamples: doAssert isAlphaAscii('e') == true doAssert isAlphaAscii('E') == true @@ -108,6 +170,7 @@ proc isSpaceAscii*(c: char): bool {.noSideEffect, procvar, runnableExamples: doAssert isSpaceAscii('n') == false doAssert isSpaceAscii(' ') == true + doAssert isSpaceAscii('\t') == true return c in Whitespace proc isLowerAscii*(c: char): bool {.noSideEffect, procvar, @@ -115,6 +178,10 @@ proc isLowerAscii*(c: char): bool {.noSideEffect, procvar, ## Checks whether or not `c` is a lower case character. ## ## This checks ASCII characters only. + ## Use `Unicode module<unicode.html>`_ for UTF-8 support. + ## + ## See also: + ## * `toLowerAscii proc<#toLowerAscii,char>`_ runnableExamples: doAssert isLowerAscii('e') == true doAssert isLowerAscii('E') == false @@ -126,138 +193,28 @@ proc isUpperAscii*(c: char): bool {.noSideEffect, procvar, ## Checks whether or not `c` is an upper case character. ## ## This checks ASCII characters only. + ## Use `Unicode module<unicode.html>`_ for UTF-8 support. + ## + ## See also: + ## * `toUpperAscii proc<#toUpperAscii,char>`_ runnableExamples: doAssert isUpperAscii('e') == false doAssert isUpperAscii('E') == true doAssert isUpperAscii('7') == false return c in {'A'..'Z'} -template isImpl(call) = - if s.len == 0: return false - result = true - for c in s: - if not call(c): return false - -proc isAlphaAscii*(s: string): bool {.noSideEffect, procvar, - rtl, extern: "nsuIsAlphaAsciiStr", - deprecated: "Deprecated since version 0.20 since its semantics are unclear".} = - ## Checks whether or not `s` is alphabetical. - ## - ## This checks a-z, A-Z ASCII characters only. - ## Returns true if all characters in `s` are - ## alphabetic and there is at least one character - ## in `s`. - runnableExamples: - doAssert isAlphaAscii("fooBar") == true - doAssert isAlphaAscii("fooBar1") == false - doAssert isAlphaAscii("foo Bar") == false - isImpl isAlphaAscii - -proc isAlphaNumeric*(s: string): bool {.noSideEffect, procvar, - rtl, extern: "nsuIsAlphaNumericStr", - deprecated: "Deprecated since version 0.20 since its semantics are unclear".} = - ## Checks whether or not `s` is alphanumeric. - ## - ## This checks a-z, A-Z, 0-9 ASCII characters only. - ## Returns true if all characters in `s` are - ## alpanumeric and there is at least one character - ## in `s`. - runnableExamples: - doAssert isAlphaNumeric("fooBar") == true - doAssert isAlphaNumeric("fooBar") == true - doAssert isAlphaNumeric("foo Bar") == false - isImpl isAlphaNumeric - -proc isDigit*(s: string): bool {.noSideEffect, procvar, - rtl, extern: "nsuIsDigitStr", - deprecated: "Deprecated since version 0.20 since its semantics are unclear".} = - ## Checks whether or not `s` is a numeric value. - ## - ## This checks 0-9 ASCII characters only. - ## Returns true if all characters in `s` are - ## numeric and there is at least one character - ## in `s`. - runnableExamples: - doAssert isDigit("1908") == true - doAssert isDigit("fooBar1") == false - isImpl isDigit - -proc isSpaceAscii*(s: string): bool {.noSideEffect, procvar, - rtl, extern: "nsuIsSpaceAsciiStr", - deprecated: "Deprecated since version 0.20 since its semantics are unclear".} = - ## Checks whether or not `s` is completely whitespace. - ## - ## Returns true if all characters in `s` are whitespace - ## characters and there is at least one character in `s`. - runnableExamples: - doAssert isSpaceAscii(" ") == true - doAssert isSpaceAscii("") == false - isImpl isSpaceAscii - -template isCaseImpl(s, charProc, skipNonAlpha) = - var hasAtleastOneAlphaChar = false - if s.len == 0: return false - for c in s: - if skipNonAlpha: - var charIsAlpha = c.isAlphaAscii() - if not hasAtleastOneAlphaChar: - hasAtleastOneAlphaChar = charIsAlpha - if charIsAlpha and (not charProc(c)): - return false - else: - if not charProc(c): - return false - return if skipNonAlpha: hasAtleastOneAlphaChar else: true - -proc isLowerAscii*(s: string, skipNonAlpha: bool): bool {. - deprecated: "Deprecated since version 0.20 since its semantics are unclear".} = - ## Checks whether ``s`` is lower case. - ## - ## This checks ASCII characters only. - ## - ## If ``skipNonAlpha`` is true, returns true if all alphabetical - ## characters in ``s`` are lower case. Returns false if none of the - ## characters in ``s`` are alphabetical. - ## - ## If ``skipNonAlpha`` is false, returns true only if all characters - ## in ``s`` are alphabetical and lower case. - ## - ## For either value of ``skipNonAlpha``, returns false if ``s`` is - ## an empty string. - runnableExamples: - doAssert isLowerAscii("1foobar", false) == false - doAssert isLowerAscii("1foobar", true) == true - doAssert isLowerAscii("1fooBar", true) == false - isCaseImpl(s, isLowerAscii, skipNonAlpha) - -proc isUpperAscii*(s: string, skipNonAlpha: bool): bool {. - deprecated: "Deprecated since version 0.20 since its semantics are unclear".} = - ## Checks whether ``s`` is upper case. - ## - ## This checks ASCII characters only. - ## - ## If ``skipNonAlpha`` is true, returns true if all alphabetical - ## characters in ``s`` are upper case. Returns false if none of the - ## characters in ``s`` are alphabetical. - ## - ## If ``skipNonAlpha`` is false, returns true only if all characters - ## in ``s`` are alphabetical and upper case. - ## - ## For either value of ``skipNonAlpha``, returns false if ``s`` is - ## an empty string. - runnableExamples: - doAssert isUpperAscii("1FOO", false) == false - doAssert isUpperAscii("1FOO", true) == true - doAssert isUpperAscii("1Foo", true) == false - isCaseImpl(s, isUpperAscii, skipNonAlpha) proc toLowerAscii*(c: char): char {.noSideEffect, procvar, rtl, extern: "nsuToLowerAsciiChar".} = - ## Returns the lower case version of ``c``. + ## Returns the lower case version of character ``c``. ## ## This works only for the letters ``A-Z``. See `unicode.toLower ## <unicode.html#toLower>`_ for a version that works for any Unicode ## character. + ## + ## See also: + ## * `isLowerAscii proc<#isLowerAscii,char>`_ + ## * `toLowerAscii proc<#toLowerAscii,string>`_ for converting a string runnableExamples: doAssert toLowerAscii('A') == 'a' doAssert toLowerAscii('e') == 'e' @@ -273,22 +230,30 @@ template toImpl(call) = proc toLowerAscii*(s: string): string {.noSideEffect, procvar, rtl, extern: "nsuToLowerAsciiStr".} = - ## Converts `s` into lower case. + ## Converts string `s` into lower case. ## ## This works only for the letters ``A-Z``. See `unicode.toLower ## <unicode.html#toLower>`_ for a version that works for any Unicode ## character. + ## + ## See also: + ## * `normalize proc<#normalize,string>`_ runnableExamples: doAssert toLowerAscii("FooBar!") == "foobar!" toImpl toLowerAscii proc toUpperAscii*(c: char): char {.noSideEffect, procvar, rtl, extern: "nsuToUpperAsciiChar".} = - ## Converts `c` into upper case. + ## Converts character `c` into upper case. ## ## This works only for the letters ``A-Z``. See `unicode.toUpper ## <unicode.html#toUpper>`_ for a version that works for any Unicode ## character. + ## + ## See also: + ## * `isLowerAscii proc<#isLowerAscii,char>`_ + ## * `toUpperAscii proc<#toUpperAscii,string>`_ for converting a string + ## * `capitalizeAscii proc<#capitalizeAscii,string>`_ runnableExamples: doAssert toUpperAscii('a') == 'A' doAssert toUpperAscii('E') == 'E' @@ -299,20 +264,27 @@ proc toUpperAscii*(c: char): char {.noSideEffect, procvar, proc toUpperAscii*(s: string): string {.noSideEffect, procvar, rtl, extern: "nsuToUpperAsciiStr".} = - ## Converts `s` into upper case. + ## Converts string `s` into upper case. ## ## This works only for the letters ``A-Z``. See `unicode.toUpper ## <unicode.html#toUpper>`_ for a version that works for any Unicode ## character. + ## + ## See also: + ## * `capitalizeAscii proc<#capitalizeAscii,string>`_ runnableExamples: doAssert toUpperAscii("FooBar!") == "FOOBAR!" toImpl toUpperAscii proc capitalizeAscii*(s: string): string {.noSideEffect, procvar, rtl, extern: "nsuCapitalizeAscii".} = - ## Converts the first character of `s` into upper case. + ## Converts the first character of string `s` into upper case. ## ## This works only for the letters ``A-Z``. + ## Use `Unicode module<unicode.html>`_ for UTF-8 support. + ## + ## See also: + ## * `toUpperAscii proc<#toUpperAscii,char>`_ runnableExamples: doAssert capitalizeAscii("foo") == "Foo" doAssert capitalizeAscii("-bar") == "-bar" @@ -325,6 +297,9 @@ proc normalize*(s: string): string {.noSideEffect, procvar, ## ## That means to convert it to lower case and remove any '_'. This ## should NOT be used to normalize Nim identifier names. + ## + ## See also: + ## * `toLowerAscii proc<#toLowerAscii,string>`_ runnableExamples: doAssert normalize("Foo_bar") == "foobar" doAssert normalize("Foo Bar") == "foo bar" @@ -343,9 +318,9 @@ proc cmpIgnoreCase*(a, b: string): int {.noSideEffect, rtl, extern: "nsuCmpIgnoreCase", procvar.} = ## Compares two strings in a case insensitive manner. Returns: ## - ## | 0 iff a == b - ## | < 0 iff a < b - ## | > 0 iff a > b + ## | 0 if a == b + ## | < 0 if a < b + ## | > 0 if a > b runnableExamples: doAssert cmpIgnoreCase("FooBar", "foobar") == 0 doAssert cmpIgnoreCase("bar", "Foo") < 0 @@ -365,12 +340,14 @@ proc cmpIgnoreStyle*(a, b: string): int {.noSideEffect, rtl, extern: "nsuCmpIgnoreStyle", procvar.} = ## Semantically the same as ``cmp(normalize(a), normalize(b))``. It ## is just optimized to not allocate temporary strings. This should - ## NOT be used to compare Nim identifier names. use `macros.eqIdent` - ## for that. Returns: + ## NOT be used to compare Nim identifier names. + ## Use `macros.eqIdent<macros.html#eqIdent,string,string>`_ for that. + ## + ## Returns: ## - ## | 0 iff a == b - ## | < 0 iff a < b - ## | > 0 iff a > b + ## | 0 if a == b + ## | < 0 if a < b + ## | > 0 if a > b runnableExamples: doAssert cmpIgnoreStyle("foo_bar", "FooBar") == 0 doAssert cmpIgnoreStyle("foo_bar_5", "FooBar4") > 0 @@ -394,51 +371,8 @@ proc cmpIgnoreStyle*(a, b: string): int {.noSideEffect, inc i inc j -proc strip*(s: string, leading = true, trailing = true, - chars: set[char] = Whitespace): string - {.noSideEffect, rtl, extern: "nsuStrip".} = - ## Strips leading or trailing `chars` from `s` and returns - ## the resulting string. - ## - ## If `leading` is true, leading `chars` are stripped. - ## If `trailing` is true, trailing `chars` are stripped. - ## If both are false, the string is returned unchanged. - runnableExamples: - doAssert " vhellov ".strip().strip(trailing = false, chars = {'v'}) == "hellov" - var - first = 0 - last = len(s)-1 - if leading: - while first <= last and s[first] in chars: inc(first) - if trailing: - while last >= 0 and s[last] in chars: dec(last) - result = substr(s, first, last) - -proc toOctal*(c: char): string {.noSideEffect, rtl, extern: "nsuToOctal".} = - ## Converts a character `c` to its octal representation. - ## - ## The resulting string may not have a leading zero. Its length is always - ## exactly 3. - runnableExamples: - doAssert toOctal('!') == "041" - result = newString(3) - var val = ord(c) - for i in countdown(2, 0): - result[i] = chr(val mod 8 + ord('0')) - val = val div 8 -proc isNilOrEmpty*(s: string): bool {.noSideEffect, procvar, rtl, - extern: "nsuIsNilOrEmpty", - deprecated: "use 'x.len == 0' instead".} = - ## Checks if `s` is nil or empty. - result = len(s) == 0 - -proc isNilOrWhitespace*(s: string): bool {.noSideEffect, procvar, rtl, extern: "nsuIsNilOrWhitespace".} = - ## Checks if `s` is nil or consists entirely of whitespace characters. - result = true - for c in s: - if not c.isSpaceAscii(): - return false +# --------- Private templates for different split separators ----------- proc substrEq(s: string, pos: int, substr: string): bool = var i = 0 @@ -447,8 +381,6 @@ proc substrEq(s: string, pos: int, substr: string): bool = inc i return i == length -# --------- Private templates for different split separators ----------- - template stringHasSep(s: string, index: int, seps: set[char]): bool = s[index] in seps @@ -459,7 +391,7 @@ template stringHasSep(s: string, index: int, sep: string): bool = s.substrEq(index, sep) template splitCommon(s, sep, maxsplit, sepLen) = - ## Common code for split procedures + ## Common code for split procs var last = 0 var splits = maxsplit @@ -487,6 +419,42 @@ template oldSplit(s, seps, maxsplit) = if splits == 0: break dec(splits) +template accResult(iter: untyped) = + result = @[] + for x in iter: add(result, x) + + +iterator split*(s: string, sep: char, maxsplit: int = -1): string = + ## Splits the string `s` into substrings using a single separator. + ## + ## Substrings are separated by the character `sep`. + ## The code: + ## + ## .. code-block:: nim + ## for word in split(";;this;is;an;;example;;;", ';'): + ## writeLine(stdout, word) + ## + ## Results in: + ## + ## .. code-block:: + ## "" + ## "" + ## "this" + ## "is" + ## "an" + ## "" + ## "example" + ## "" + ## "" + ## "" + ## + ## See also: + ## * `rsplit iterator<#rsplit.i,string,char,int>`_ + ## * `splitLines iterator<#splitLines.i,string>`_ + ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_ + ## * `split proc<#split,string,char,int>`_ + splitCommon(s, sep, maxsplit, 1) + iterator split*(s: string, seps: set[char] = Whitespace, maxsplit: int = -1): string = ## Splits the string `s` into substrings using a group of separators. @@ -529,79 +497,13 @@ iterator split*(s: string, seps: set[char] = Whitespace, ## "08" ## "08.398990" ## + ## See also: + ## * `rsplit iterator<#rsplit.i,string,set[char],int>`_ + ## * `splitLines iterator<#splitLines.i,string>`_ + ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_ + ## * `split proc<#split,string,set[char],int>`_ splitCommon(s, seps, maxsplit, 1) -iterator splitWhitespace*(s: string, maxsplit: int = -1): string = - ## Splits the string ``s`` at whitespace stripping leading and trailing - ## whitespace if necessary. If ``maxsplit`` is specified and is positive, - ## no more than ``maxsplit`` splits is made. - ## - ## The following code: - ## - ## .. code-block:: nim - ## let s = " foo \t bar baz " - ## for ms in [-1, 1, 2, 3]: - ## echo "------ maxsplit = ", ms, ":" - ## for item in s.splitWhitespace(maxsplit=ms): - ## echo '"', item, '"' - ## - ## ...results in: - ## - ## .. code-block:: - ## ------ maxsplit = -1: - ## "foo" - ## "bar" - ## "baz" - ## ------ maxsplit = 1: - ## "foo" - ## "bar baz " - ## ------ maxsplit = 2: - ## "foo" - ## "bar" - ## "baz " - ## ------ maxsplit = 3: - ## "foo" - ## "bar" - ## "baz" - ## - oldSplit(s, Whitespace, maxsplit) - -template accResult(iter: untyped) = - result = @[] - for x in iter: add(result, x) - -proc splitWhitespace*(s: string, maxsplit: int = -1): seq[string] {.noSideEffect, - rtl, extern: "nsuSplitWhitespace".} = - ## The same as the `splitWhitespace <#splitWhitespace.i,string,int>`_ - ## iterator, but is a proc that returns a sequence of substrings. - accResult(splitWhitespace(s, maxsplit)) - -iterator split*(s: string, sep: char, maxsplit: int = -1): string = - ## Splits the string `s` into substrings using a single separator. - ## - ## Substrings are separated by the character `sep`. - ## The code: - ## - ## .. code-block:: nim - ## for word in split(";;this;is;an;;example;;;", ';'): - ## writeLine(stdout, word) - ## - ## Results in: - ## - ## .. code-block:: - ## "" - ## "" - ## "this" - ## "is" - ## "an" - ## "" - ## "example" - ## "" - ## "" - ## "" - ## - splitCommon(s, sep, maxsplit, 1) - iterator split*(s: string, sep: string, maxsplit: int = -1): string = ## Splits the string `s` into substrings using a string separator. ## @@ -619,8 +521,14 @@ iterator split*(s: string, sep: string, maxsplit: int = -1): string = ## "is" ## "corrupted" ## + ## See also: + ## * `rsplit iterator<#rsplit.i,string,string,int,bool>`_ + ## * `splitLines iterator<#splitLines.i,string>`_ + ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_ + ## * `split proc<#split,string,string,int>`_ splitCommon(s, sep, maxsplit, sep.len) + template rsplitCommon(s, sep, maxsplit, sepLen) = ## Common code for rsplit functions var @@ -645,14 +553,14 @@ template rsplitCommon(s, sep, maxsplit, sepLen) = dec(first) last = first -iterator rsplit*(s: string, seps: set[char] = Whitespace, +iterator rsplit*(s: string, sep: char, maxsplit: int = -1): string = ## Splits the string `s` into substrings from the right using a ## string separator. Works exactly the same as `split iterator ## <#split.i,string,char,int>`_ except in reverse order. ## ## .. code-block:: nim - ## for piece in "foo bar".rsplit(WhiteSpace): + ## for piece in "foo:bar".rsplit(':'): ## echo piece ## ## Results in: @@ -661,17 +569,23 @@ iterator rsplit*(s: string, seps: set[char] = Whitespace, ## "bar" ## "foo" ## - ## Substrings are separated from the right by the set of chars `seps` - rsplitCommon(s, seps, maxsplit, 1) + ## Substrings are separated from the right by the char `sep`. + ## + ## See also: + ## * `split iterator<#split.i,string,char,int>`_ + ## * `splitLines iterator<#splitLines.i,string>`_ + ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_ + ## * `rsplit proc<#rsplit,string,char,int>`_ + rsplitCommon(s, sep, maxsplit, 1) -iterator rsplit*(s: string, sep: char, +iterator rsplit*(s: string, seps: set[char] = Whitespace, maxsplit: int = -1): string = ## Splits the string `s` into substrings from the right using a ## string separator. Works exactly the same as `split iterator ## <#split.i,string,char,int>`_ except in reverse order. ## ## .. code-block:: nim - ## for piece in "foo:bar".rsplit(':'): + ## for piece in "foo bar".rsplit(WhiteSpace): ## echo piece ## ## Results in: @@ -680,8 +594,14 @@ iterator rsplit*(s: string, sep: char, ## "bar" ## "foo" ## - ## Substrings are separated from the right by the char `sep` - rsplitCommon(s, sep, maxsplit, 1) + ## Substrings are separated from the right by the set of chars `seps` + ## + ## See also: + ## * `split iterator<#split.i,string,set[char],int>`_ + ## * `splitLines iterator<#splitLines.i,string>`_ + ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_ + ## * `rsplit proc<#rsplit,string,set[char],int>`_ + rsplitCommon(s, seps, maxsplit, 1) iterator rsplit*(s: string, sep: string, maxsplit: int = -1, keepSeparators: bool = false): string = @@ -700,6 +620,12 @@ iterator rsplit*(s: string, sep: string, maxsplit: int = -1, ## "foo" ## ## Substrings are separated from the right by the string `sep` + ## + ## See also: + ## * `split iterator<#split.i,string,string,int>`_ + ## * `splitLines iterator<#splitLines.i,string>`_ + ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_ + ## * `rsplit proc<#rsplit,string,string,int>`_ rsplitCommon(s, sep, maxsplit, sep.len) iterator splitLines*(s: string, keepEol = false): string = @@ -726,6 +652,10 @@ iterator splitLines*(s: string, keepEol = false): string = ## "" ## "example" ## "" + ## + ## See also: + ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_ + ## * `splitLines proc<#splitLines,string>`_ var first = 0 var last = 0 var eolpos = 0 @@ -747,76 +677,102 @@ iterator splitLines*(s: string, keepEol = false): string = first = last -proc splitLines*(s: string, keepEol = false): seq[string] {.noSideEffect, - rtl, extern: "nsuSplitLines".} = - ## The same as the `splitLines <#splitLines.i,string>`_ iterator, but is a - ## proc that returns a sequence of substrings. - accResult(splitLines(s, keepEol=keepEol)) - -proc countLines*(s: string): int {.noSideEffect, - rtl, extern: "nsuCountLines".} = - ## Returns the number of lines in the string `s`. +iterator splitWhitespace*(s: string, maxsplit: int = -1): string = + ## Splits the string ``s`` at whitespace stripping leading and trailing + ## whitespace if necessary. If ``maxsplit`` is specified and is positive, + ## no more than ``maxsplit`` splits is made. ## - ## This is the same as ``len(splitLines(s))``, but much more efficient - ## because it doesn't modify the string creating temporal objects. Every - ## `character literal <manual.html#character-literals>`_ newline combination - ## (CR, LF, CR-LF) is supported. + ## The following code: ## - ## In this context, a line is any string seperated by a newline combination. - ## A line can be an empty string. - runnableExamples: - doAssert countLines("First line\l and second line.") == 2 - result = 1 - var i = 0 - while i < s.len: - case s[i] - of '\c': - if i+1 < s.len and s[i+1] == '\l': inc i - inc result - of '\l': inc result - else: discard - inc i + ## .. code-block:: nim + ## let s = " foo \t bar baz " + ## for ms in [-1, 1, 2, 3]: + ## echo "------ maxsplit = ", ms, ":" + ## for item in s.splitWhitespace(maxsplit=ms): + ## echo '"', item, '"' + ## + ## ...results in: + ## + ## .. code-block:: + ## ------ maxsplit = -1: + ## "foo" + ## "bar" + ## "baz" + ## ------ maxsplit = 1: + ## "foo" + ## "bar baz " + ## ------ maxsplit = 2: + ## "foo" + ## "bar" + ## "baz " + ## ------ maxsplit = 3: + ## "foo" + ## "bar" + ## "baz" + ## + ## See also: + ## * `splitLines iterator<#splitLines.i,string>`_ + ## * `splitWhitespace proc<#splitWhitespace,string,int>`_ + oldSplit(s, Whitespace, maxsplit) + -proc split*(s: string, seps: set[char] = Whitespace, maxsplit: int = -1): seq[string] {. - noSideEffect, rtl, extern: "nsuSplitCharSet".} = - ## The same as the `split iterator <#split.i,string,set[char],int>`_, but is a - ## proc that returns a sequence of substrings. - runnableExamples: - doAssert "a,b;c".split({',', ';'}) == @["a", "b", "c"] - doAssert "".split({' '}) == @[""] - accResult(split(s, seps, maxsplit)) proc split*(s: string, sep: char, maxsplit: int = -1): seq[string] {.noSideEffect, rtl, extern: "nsuSplitChar".} = - ## The same as the `split iterator <#split.i,string,char,int>`_, but is a proc - ## that returns a sequence of substrings. + ## The same as the `split iterator <#split.i,string,char,int>`_ (see its + ## documentation), but is a proc that returns a sequence of substrings. + ## + ## See also: + ## * `split iterator <#split.i,string,char,int>`_ + ## * `rsplit proc<#rsplit,string,char,int>`_ + ## * `splitLines proc<#splitLines,string>`_ + ## * `splitWhitespace proc<#splitWhitespace,string,int>`_ runnableExamples: doAssert "a,b,c".split(',') == @["a", "b", "c"] doAssert "".split(' ') == @[""] accResult(split(s, sep, maxsplit)) +proc split*(s: string, seps: set[char] = Whitespace, maxsplit: int = -1): seq[string] {. + noSideEffect, rtl, extern: "nsuSplitCharSet".} = + ## The same as the `split iterator <#split.i,string,set[char],int>`_ (see its + ## documentation), but is a proc that returns a sequence of substrings. + ## + ## See also: + ## * `split iterator <#split.i,string,set[char],int>`_ + ## * `rsplit proc<#rsplit,string,set[char],int>`_ + ## * `splitLines proc<#splitLines,string>`_ + ## * `splitWhitespace proc<#splitWhitespace,string,int>`_ + runnableExamples: + doAssert "a,b;c".split({',', ';'}) == @["a", "b", "c"] + doAssert "".split({' '}) == @[""] + accResult(split(s, seps, maxsplit)) + proc split*(s: string, sep: string, maxsplit: int = -1): seq[string] {.noSideEffect, rtl, extern: "nsuSplitString".} = ## Splits the string `s` into substrings using a string separator. ## ## Substrings are separated by the string `sep`. This is a wrapper around the ## `split iterator <#split.i,string,string,int>`_. + ## + ## See also: + ## * `split iterator <#split.i,string,string,int>`_ + ## * `rsplit proc<#rsplit,string,string,int>`_ + ## * `splitLines proc<#splitLines,string>`_ + ## * `splitWhitespace proc<#splitWhitespace,string,int>`_ runnableExamples: doAssert "a,b,c".split(",") == @["a", "b", "c"] doAssert "a man a plan a canal panama".split("a ") == @["", "man ", "plan ", "canal panama"] doAssert "".split("Elon Musk") == @[""] doAssert "a largely spaced sentence".split(" ") == @["a", "", "largely", "", "", "", "spaced", "sentence"] - doAssert "a largely spaced sentence".split(" ", maxsplit=1) == @["a", " largely spaced sentence"] doAssert(sep.len > 0) accResult(split(s, sep, maxsplit)) -proc rsplit*(s: string, seps: set[char] = Whitespace, - maxsplit: int = -1): seq[string] - {.noSideEffect, rtl, extern: "nsuRSplitCharSet".} = - ## The same as the `rsplit iterator <#rsplit.i,string,set[char],int>`_, but is a - ## proc that returns a sequence of substrings. +proc rsplit*(s: string, sep: char, maxsplit: int = -1): seq[string] + {.noSideEffect, rtl, extern: "nsuRSplitChar".} = + ## The same as the `rsplit iterator <#rsplit.i,string,char,int>`_, but is a proc + ## that returns a sequence of substrings. ## ## A possible common use case for `rsplit` is path manipulation, ## particularly on systems that don't use a common delimiter. @@ -825,20 +781,26 @@ proc rsplit*(s: string, seps: set[char] = Whitespace, ## do the following to get the tail of the path: ## ## .. code-block:: nim - ## var tailSplit = rsplit("Root#Object#Method#Index", {'#'}, maxsplit=1) + ## var tailSplit = rsplit("Root#Object#Method#Index", '#', maxsplit=1) ## ## Results in `tailSplit` containing: ## ## .. code-block:: nim ## @["Root#Object#Method", "Index"] ## - accResult(rsplit(s, seps, maxsplit)) + ## See also: + ## * `rsplit iterator <#rsplit.i,string,char,int>`_ + ## * `split proc<#split,string,char,int>`_ + ## * `splitLines proc<#splitLines,string>`_ + ## * `splitWhitespace proc<#splitWhitespace,string,int>`_ + accResult(rsplit(s, sep, maxsplit)) result.reverse() -proc rsplit*(s: string, sep: char, maxsplit: int = -1): seq[string] - {.noSideEffect, rtl, extern: "nsuRSplitChar".} = - ## The same as the `rsplit iterator <#rsplit.i,string,char,int>`_, but is a proc - ## that returns a sequence of substrings. +proc rsplit*(s: string, seps: set[char] = Whitespace, + maxsplit: int = -1): seq[string] + {.noSideEffect, rtl, extern: "nsuRSplitCharSet".} = + ## The same as the `rsplit iterator <#rsplit.i,string,set[char],int>`_, but is a + ## proc that returns a sequence of substrings. ## ## A possible common use case for `rsplit` is path manipulation, ## particularly on systems that don't use a common delimiter. @@ -847,19 +809,24 @@ proc rsplit*(s: string, sep: char, maxsplit: int = -1): seq[string] ## do the following to get the tail of the path: ## ## .. code-block:: nim - ## var tailSplit = rsplit("Root#Object#Method#Index", '#', maxsplit=1) + ## var tailSplit = rsplit("Root#Object#Method#Index", {'#'}, maxsplit=1) ## ## Results in `tailSplit` containing: ## ## .. code-block:: nim ## @["Root#Object#Method", "Index"] ## - accResult(rsplit(s, sep, maxsplit)) + ## See also: + ## * `rsplit iterator <#rsplit.i,string,set[char],int>`_ + ## * `split proc<#split,string,set[char],int>`_ + ## * `splitLines proc<#splitLines,string>`_ + ## * `splitWhitespace proc<#splitWhitespace,string,int>`_ + accResult(rsplit(s, seps, maxsplit)) result.reverse() proc rsplit*(s: string, sep: string, maxsplit: int = -1): seq[string] {.noSideEffect, rtl, extern: "nsuRSplitString".} = - ## The same as the `rsplit iterator <#rsplit.i,string,string,int>`_, but is a proc + ## The same as the `rsplit iterator <#rsplit.i,string,string,int,bool>`_, but is a proc ## that returns a sequence of substrings. ## ## A possible common use case for `rsplit` is path manipulation, @@ -876,9 +843,13 @@ proc rsplit*(s: string, sep: string, maxsplit: int = -1): seq[string] ## .. code-block:: nim ## @["Root#Object#Method", "Index"] ## + ## See also: + ## * `rsplit iterator <#rsplit.i,string,string,int,bool>`_ + ## * `split proc<#split,string,string,int>`_ + ## * `splitLines proc<#splitLines,string>`_ + ## * `splitWhitespace proc<#splitWhitespace,string,int>`_ runnableExamples: doAssert "a largely spaced sentence".rsplit(" ", maxsplit=1) == @["a largely spaced", "sentence"] - doAssert "a,b,c".rsplit(",") == @["a", "b", "c"] doAssert "a man a plan a canal panama".rsplit("a ") == @["", "man ", "plan ", "canal panama"] doAssert "".rsplit("Elon Musk") == @[""] @@ -886,6 +857,75 @@ proc rsplit*(s: string, sep: string, maxsplit: int = -1): seq[string] accResult(rsplit(s, sep, maxsplit)) result.reverse() +proc splitLines*(s: string, keepEol = false): seq[string] {.noSideEffect, + rtl, extern: "nsuSplitLines".} = + ## The same as the `splitLines iterator<#splitLines.i,string>`_ (see its + ## documentation), but is a proc that returns a sequence of substrings. + ## + ## See also: + ## * `splitLines iterator<#splitLines.i,string>`_ + ## * `splitWhitespace proc<#splitWhitespace,string,int>`_ + ## * `countLines proc<#countLines,string>`_ + accResult(splitLines(s, keepEol=keepEol)) + +proc splitWhitespace*(s: string, maxsplit: int = -1): seq[string] {.noSideEffect, + rtl, extern: "nsuSplitWhitespace".} = + ## The same as the `splitWhitespace iterator <#splitWhitespace.i,string,int>`_ + ## (see its documentation), but is a proc that returns a sequence of substrings. + ## + ## See also: + ## * `splitWhitespace iterator <#splitWhitespace.i,string,int>`_ + ## * `splitLines proc<#splitLines,string>`_ + accResult(splitWhitespace(s, maxsplit)) + +proc toBin*(x: BiggestInt, len: Positive): string {.noSideEffect, + rtl, extern: "nsuToBin".} = + ## Converts `x` into its binary representation. + ## + ## The resulting string is always `len` characters long. No leading ``0b`` + ## prefix is generated. + runnableExamples: + let + a = 29 + b = 257 + doAssert a.toBin(8) == "00011101" + doAssert b.toBin(8) == "00000001" + doAssert b.toBin(9) == "100000001" + var + mask: BiggestInt = 1 + shift: BiggestInt = 0 + assert(len > 0) + result = newString(len) + for j in countdown(len-1, 0): + result[j] = chr(int((x and mask) shr shift) + ord('0')) + shift = shift + 1 + mask = mask shl 1 + +proc toOct*(x: BiggestInt, len: Positive): string {.noSideEffect, + rtl, extern: "nsuToOct".} = + ## Converts `x` into its octal representation. + ## + ## The resulting string is always `len` characters long. No leading ``0o`` + ## prefix is generated. + ## + ## Do not confuse it with `toOctal proc<#toOctal,char>`_. + runnableExamples: + let + a = 62 + b = 513 + doAssert a.toOct(3) == "076" + doAssert b.toOct(3) == "001" + doAssert b.toOct(5) == "01001" + var + mask: BiggestInt = 7 + shift: BiggestInt = 0 + assert(len > 0) + result = newString(len) + for j in countdown(len-1, 0): + result[j] = chr(int((x and mask) shr shift) + ord('0')) + shift = shift + 3 + mask = mask shl 3 + proc toHex*(x: BiggestInt, len: Positive): string {.noSideEffect, rtl, extern: "nsuToHex".} = ## Converts `x` to its hexadecimal representation. @@ -893,8 +933,12 @@ proc toHex*(x: BiggestInt, len: Positive): string {.noSideEffect, ## The resulting string will be exactly `len` characters long. No prefix like ## ``0x`` is generated. `x` is treated as an unsigned value. runnableExamples: - doAssert toHex(1984, 6) == "0007C0" - doAssert toHex(1984, 2) == "C0" + let + a = 62 + b = 4097 + doAssert a.toHex(3) == "03E" + doAssert b.toHex(3) == "001" + doAssert b.toHex(4) == "1001" const HexChars = "0123456789ABCDEF" var @@ -917,6 +961,18 @@ proc toHex*(s: string): string {.noSideEffect, rtl.} = ## ## The output is twice the input long. No prefix like ## ``0x`` is generated. + ## + ## See also: + ## * `parseHexStr proc<#parseHexStr,string>`_ for the reverse operation + runnableExamples: + let + a = "1" + b = "A" + c = "\0\255" + doAssert a.toHex() == "31" + doAssert b.toHex() == "41" + doAssert c.toHex() == "00FF" + const HexChars = "0123456789ABCDEF" result = newString(s.len * 2) for pos, c in s: @@ -925,6 +981,25 @@ proc toHex*(s: string): string {.noSideEffect, rtl.} = n = n shr 4 result[pos * 2] = HexChars[n] +proc toOctal*(c: char): string {.noSideEffect, rtl, extern: "nsuToOctal".} = + ## Converts a character `c` to its octal representation. + ## + ## The resulting string may not have a leading zero. Its length is always + ## exactly 3. + ## + ## Do not confuse it with `toOct proc<#toOct,BiggestInt,Positive>`_. + runnableExamples: + doAssert toOctal('1') == "061" + doAssert toOctal('A') == "101" + doAssert toOctal('a') == "141" + doAssert toOctal('!') == "041" + + result = newString(3) + var val = ord(c) + for i in countdown(2, 0): + result[i] = chr(val mod 8 + ord('0')) + val = val div 8 + proc intToStr*(x: int, minchars: Positive = 1): string {.noSideEffect, rtl, extern: "nsuIntToStr".} = ## Converts `x` to its decimal representation. @@ -980,9 +1055,10 @@ proc parseBiggestUInt*(s: string): BiggestUInt {.noSideEffect, procvar, proc parseFloat*(s: string): float {.noSideEffect, procvar, rtl, extern: "nsuParseFloat".} = - ## Parses a decimal floating point value contained in `s`. If `s` is not - ## a valid floating point number, `ValueError` is raised. ``NAN``, - ## ``INF``, ``-INF`` are also supported (case insensitive comparison). + ## Parses a decimal floating point value contained in `s`. + ## + ## If `s` is not a valid floating point number, `ValueError` is raised. + ##``NAN``, ``INF``, ``-INF`` are also supported (case insensitive comparison). runnableExamples: doAssert parseFloat("3.14") == 3.14 doAssert parseFloat("inf") == 1.0/0 @@ -997,6 +1073,13 @@ proc parseBinInt*(s: string): int {.noSideEffect, procvar, ## If `s` is not a valid binary integer, `ValueError` is raised. `s` can have ## one of the following optional prefixes: ``0b``, ``0B``. Underscores within ## `s` are ignored. + runnableExamples: + let + a = "0b11_0101" + b = "111" + doAssert a.parseBinInt() == 53 + doAssert b.parseBinInt() == 7 + let L = parseutils.parseBin(s, result, 0) if L != s.len or L == 0: raise newException(ValueError, "invalid binary integer: " & s) @@ -1042,11 +1125,20 @@ proc parseHexStr*(s: string): string {.noSideEffect, procvar, rtl, extern: "nsuParseHexStr".} = ## Convert hex-encoded string to byte string, e.g.: ## - ## .. code-block:: nim - ## hexToStr("00ff") == "\0\255" - ## ## Raises ``ValueError`` for an invalid hex values. The comparison is ## case-insensitive. + ## + ## See also: + ## * `toHex proc<#toHex,string>`_ for the reverse operation + runnableExamples: + let + a = "41" + b = "3161" + c = "00ff" + doAssert parseHexStr(a) == "A" + doAssert parseHexStr(b) == "1a" + doAssert parseHexStr(c) == "\0\255" + if s.len mod 2 != 0: raise newException(ValueError, "Incorrect hex string len") result = newString(s.len div 2) @@ -1067,6 +1159,10 @@ proc parseBool*(s: string): bool = ## returns `true`. If ``s`` is one of the following values: ``n, no, false, ## 0, off``, then returns `false`. If ``s`` is something else a ## ``ValueError`` exception is raised. + runnableExamples: + let a = "n" + doAssert parseBool(a) == false + case normalize(s) of "y", "yes", "true", "1", "on": result = true of "n", "no", "false", "0", "off": result = false @@ -1095,39 +1191,41 @@ proc parseEnum*[T: enum](s: string, default: T): T = proc repeat*(c: char, count: Natural): string {.noSideEffect, rtl, extern: "nsuRepeatChar".} = ## Returns a string of length `count` consisting only of - ## the character `c`. You can use this proc to left align strings. Example: - ## - ## .. code-block:: nim - ## proc tabexpand(indent: int, text: string, tabsize: int = 4) = - ## echo '\t'.repeat(indent div tabsize), ' '.repeat(indent mod tabsize), - ## text - ## - ## tabexpand(4, "At four") - ## tabexpand(5, "At five") - ## tabexpand(6, "At six") + ## the character `c`. + runnableExamples: + let a = 'z' + doAssert a.repeat(5) == "zzzzz" result = newString(count) for i in 0..count-1: result[i] = c proc repeat*(s: string, n: Natural): string {.noSideEffect, rtl, extern: "nsuRepeatStr".} = - ## Returns String `s` concatenated `n` times. Example: - ## - ## .. code-block:: nim - ## echo "+++ STOP ".repeat(4), "+++" + ## Returns string `s` concatenated `n` times. + runnableExamples: + doAssert "+ foo +".repeat(3) == "+ foo ++ foo ++ foo +" + result = newStringOfCap(n * s.len) for i in 1..n: result.add(s) -template spaces*(n: Natural): string = repeat(' ', n) - ## Returns a String with `n` space characters. You can use this proc - ## to left align strings. Example: +proc spaces*(n: Natural): string {.inline.} = + ## Returns a string with `n` space characters. You can use this proc + ## to left align strings. ## - ## .. code-block:: nim - ## let - ## width = 15 - ## text1 = "Hello user!" - ## text2 = "This is a very long string" - ## echo text1 & spaces(max(0, width - text1.len)) & "|" - ## echo text2 & spaces(max(0, width - text2.len)) & "|" + ## See also: + ## * `align proc<#align,string,Natural,Char>`_ + ## * `alignLeft proc<#alignLeft,string,Natural,Char>`_ + ## * `indent proc<#indent,string,Natural,string>`_ + ## * `center proc<#center,string,int,char>`_ + runnableExamples: + let + width = 15 + text1 = "Hello user!" + text2 = "This is a very long string" + doAssert text1 & spaces(max(0, width - text1.len)) & "|" == + "Hello user! |" + doAssert text2 & spaces(max(0, width - text2.len)) & "|" == + "This is a very long string|" + repeat(' ', n) proc align*(s: string, count: Natural, padding = ' '): string {. noSideEffect, rtl, extern: "nsuAlignString".} = @@ -1136,13 +1234,18 @@ proc align*(s: string, count: Natural, padding = ' '): string {. ## `padding` characters (by default spaces) are added before `s` resulting in ## right alignment. If ``s.len >= count``, no spaces are added and `s` is ## returned unchanged. If you need to left align a string use the `alignLeft - ## proc <#alignLeft>`_. Example: + ## proc <#alignLeft,string,Natural,Char>`_. ## - ## .. code-block:: nim - ## assert align("abc", 4) == " abc" - ## assert align("a", 0) == "a" - ## assert align("1232", 6) == " 1232" - ## assert align("1232", 6, '#') == "##1232" + ## See also: + ## * `alignLeft proc<#alignLeft,string,Natural,Char>`_ + ## * `spaces proc<#spaces,Natural>`_ + ## * `indent proc<#indent,string,Natural,string>`_ + ## * `center proc<#center,string,int,char>`_ + runnableExamples: + assert align("abc", 4) == " abc" + assert align("a", 0) == "a" + assert align("1232", 6) == " 1232" + assert align("1232", 6, '#') == "##1232" if s.len < count: result = newString(count) let spaces = count - s.len @@ -1157,13 +1260,18 @@ proc alignLeft*(s: string, count: Natural, padding = ' '): string {.noSideEffect ## `padding` characters (by default spaces) are added after `s` resulting in ## left alignment. If ``s.len >= count``, no spaces are added and `s` is ## returned unchanged. If you need to right align a string use the `align - ## proc <#align>`_. Example: + ## proc <#align,string,Natural,Char>`_. ## - ## .. code-block:: nim - ## assert alignLeft("abc", 4) == "abc " - ## assert alignLeft("a", 0) == "a" - ## assert alignLeft("1232", 6) == "1232 " - ## assert alignLeft("1232", 6, '#') == "1232##" + ## See also: + ## * `align proc<#align,string,Natural,Char>`_ + ## * `spaces proc<#spaces,Natural>`_ + ## * `indent proc<#indent,string,Natural,string>`_ + ## * `center proc<#center,string,int,char>`_ + runnableExamples: + assert alignLeft("abc", 4) == "abc " + assert alignLeft("a", 0) == "a" + assert alignLeft("1232", 6) == "1232 " + assert alignLeft("1232", 6, '#') == "1232##" if s.len < count: result = newString(count) if s.len > 0: @@ -1173,83 +1281,55 @@ proc alignLeft*(s: string, count: Natural, padding = ' '): string {.noSideEffect else: result = s -iterator tokenize*(s: string, seps: set[char] = Whitespace): tuple[ - token: string, isSep: bool] = - ## Tokenizes the string `s` into substrings. - ## - ## Substrings are separated by a substring containing only `seps`. - ## Examples: - ## - ## .. code-block:: nim - ## for word in tokenize(" this is an example "): - ## writeLine(stdout, word) +proc center*(s: string, width: int, fillChar: char = ' '): string {. + noSideEffect, rtl, extern: "nsuCenterString".} = + ## Return the contents of `s` centered in a string `width` long using + ## `fillChar` (default: space) as padding. ## - ## Results in: + ## The original string is returned if `width` is less than or equal + ## to `s.len`. ## - ## .. code-block:: nim - ## (" ", true) - ## ("this", false) - ## (" ", true) - ## ("is", false) - ## (" ", true) - ## ("an", false) - ## (" ", true) - ## ("example", false) - ## (" ", true) - var i = 0 - while true: - var j = i - var isSep = j < s.len and s[j] in seps - while j < s.len and (s[j] in seps) == isSep: inc(j) - if j > i: - yield (substr(s, i, j-1), isSep) - else: - break - i = j - -proc wordWrap*(s: string, maxLineWidth = 80, - splitLongWords = true, - seps: set[char] = Whitespace, - newLine = "\n"): string {. - noSideEffect, rtl, extern: "nsuWordWrap", - deprecated: "use wrapWords in std/wordwrap instead".} = - ## Word wraps `s`. - result = newStringOfCap(s.len + s.len shr 6) - var spaceLeft = maxLineWidth - var lastSep = "" - for word, isSep in tokenize(s, seps): - if isSep: - lastSep = word - spaceLeft = spaceLeft - len(word) - continue - if len(word) > spaceLeft: - if splitLongWords and len(word) > maxLineWidth: - result.add(substr(word, 0, spaceLeft-1)) - var w = spaceLeft - var wordLeft = len(word) - spaceLeft - while wordLeft > 0: - result.add(newLine) - var L = min(maxLineWidth, wordLeft) - spaceLeft = maxLineWidth - L - result.add(substr(word, w, w+L-1)) - inc(w, L) - dec(wordLeft, L) - else: - spaceLeft = maxLineWidth - len(word) - result.add(newLine) - result.add(word) + ## See also: + ## * `align proc<#align,string,Natural,Char>`_ + ## * `alignLeft proc<#alignLeft,string,Natural,Char>`_ + ## * `spaces proc<#spaces,Natural>`_ + ## * `indent proc<#indent,string,Natural,string>`_ + runnableExamples: + let a = "foo" + doAssert a.center(2) == "foo" + doAssert a.center(5) == " foo " + doAssert a.center(6) == " foo " + if width <= s.len: return s + result = newString(width) + # Left padding will be one fillChar + # smaller if there are an odd number + # of characters + let + charsLeft = (width - s.len) + leftPadding = charsLeft div 2 + for i in 0 ..< width: + if i >= leftPadding and i < leftPadding + s.len: + # we are where the string should be located + result[i] = s[i-leftPadding] else: - spaceLeft = spaceLeft - len(word) - result.add(lastSep & word) - lastSep.setLen(0) + # we are either before or after where + # the string s should go + result[i] = fillChar proc indent*(s: string, count: Natural, padding: string = " "): string {.noSideEffect, rtl, extern: "nsuIndent".} = ## Indents each line in ``s`` by ``count`` amount of ``padding``. ## ## **Note:** This does not preserve the new line characters used in ``s``. + ## + ## See also: + ## * `align proc<#align,string,Natural,Char>`_ + ## * `alignLeft proc<#alignLeft,string,Natural,Char>`_ + ## * `spaces proc<#spaces,Natural>`_ + ## * `unindent proc<#unindent,string,Natural,string>`_ runnableExamples: - doAssert indent("First line\c\l and second line.", 2) == " First line\l and second line." + doAssert indent("First line\c\l and second line.", 2) == + " First line\l and second line." result = "" var i = 0 for line in s.splitLines(): @@ -1266,8 +1346,15 @@ proc unindent*(s: string, count: Natural, padding: string = " "): string ## Sometimes called `dedent`:idx: ## ## **Note:** This does not preserve the new line characters used in ``s``. + ## + ## See also: + ## * `align proc<#align,string,Natural,Char>`_ + ## * `alignLeft proc<#alignLeft,string,Natural,Char>`_ + ## * `spaces proc<#spaces,Natural>`_ + ## * `indent proc<#indent,string,Natural,string>`_ runnableExamples: - doAssert unindent(" First line\l and second line", 3) == "First line\land second line" + doAssert unindent(" First line\l and second line", 3) == + "First line\land second line" result = "" var i = 0 for line in s.splitLines(): @@ -1286,37 +1373,105 @@ proc unindent*(s: string): string {.noSideEffect, rtl, extern: "nsuUnindentAll".} = ## Removes all indentation composed of whitespace from each line in ``s``. ## - ## For example: + ## See also: + ## * `align proc<#align,string,Natural,Char>`_ + ## * `alignLeft proc<#alignLeft,string,Natural,Char>`_ + ## * `spaces proc<#spaces,Natural>`_ + ## * `indent proc<#indent,string,Natural,string>`_ + runnableExamples: + let x = """ + Hello + There + """.unindent() + + doAssert x == "Hello\nThere\n" + unindent(s, 1000) # TODO: Passing a 1000 is a bit hackish. + +proc delete*(s: var string, first, last: int) {.noSideEffect, + rtl, extern: "nsuDelete".} = + ## Deletes in `s` (must be declared as ``var``) the characters at positions + ## ``first ..last`` (both ends included). ## - ## .. code-block:: nim - ## const x = """ - ## Hello - ## There - ## """.unindent() + ## This modifies `s` itself, it does not return a copy. + runnableExamples: + var a = "abracadabra" + + a.delete(4, 5) + doAssert a == "abradabra" + + a.delete(1, 6) + doAssert a == "ara" + + var i = first + var j = last+1 + var newLen = len(s)-j+i + while i < newLen: + s[i] = s[j] + inc(i) + inc(j) + setLen(s, newLen) + + +proc startsWith*(s: string, prefix: char): bool {.noSideEffect, inline.} = + ## Returns true if ``s`` starts with character ``prefix``. ## - ## doAssert x == "Hello\nThere\n" - unindent(s, 1000) # TODO: Passing a 1000 is a bit hackish. + ## See also: + ## * `endsWith proc<#endsWith,string,char>`_ + ## * `continuesWith proc<#continuesWith,string,string,Natural>`_ + ## * `removePrefix proc<#removePrefix,string,char>`_ + runnableExamples: + let a = "abracadabra" + doAssert a.startsWith('a') == true + doAssert a.startsWith('b') == false + result = s.len > 0 and s[0] == prefix proc startsWith*(s, prefix: string): bool {.noSideEffect, rtl, extern: "nsuStartsWith".} = - ## Returns true iff ``s`` starts with ``prefix``. + ## Returns true if ``s`` starts with string ``prefix``. ## ## If ``prefix == ""`` true is returned. + ## + ## See also: + ## * `endsWith proc<#endsWith,string,string>`_ + ## * `continuesWith proc<#continuesWith,string,string,Natural>`_ + ## * `removePrefix proc<#removePrefix,string,string>`_ + runnableExamples: + let a = "abracadabra" + doAssert a.startsWith("abra") == true + doAssert a.startsWith("bra") == false var i = 0 while true: if i >= prefix.len: return true if i >= s.len or s[i] != prefix[i]: return false inc(i) -proc startsWith*(s: string, prefix: char): bool {.noSideEffect, inline.} = - ## Returns true iff ``s`` starts with ``prefix``. - result = s.len > 0 and s[0] == prefix +proc endsWith*(s: string, suffix: char): bool {.noSideEffect, inline.} = + ## Returns true if ``s`` ends with ``suffix``. + ## + ## See also: + ## * `startsWith proc<#startsWith,string,char>`_ + ## * `continuesWith proc<#continuesWith,string,string,Natural>`_ + ## * `removeSuffix proc<#removeSuffix,string,char>`_ + runnableExamples: + let a = "abracadabra" + doAssert a.endsWith('a') == true + doAssert a.endsWith('b') == false + result = s.len > 0 and s[s.high] == suffix proc endsWith*(s, suffix: string): bool {.noSideEffect, rtl, extern: "nsuEndsWith".} = - ## Returns true iff ``s`` ends with ``suffix``. + ## Returns true if ``s`` ends with ``suffix``. ## ## If ``suffix == ""`` true is returned. + ## + ## See also: + ## * `startsWith proc<#startsWith,string,string>`_ + ## * `continuesWith proc<#continuesWith,string,string,Natural>`_ + ## * `removeSuffix proc<#removeSuffix,string,string>`_ + runnableExamples: + let a = "abracadabra" + doAssert a.endsWith("abra") == true + doAssert a.endsWith("dab") == false var i = 0 var j = len(s) - len(suffix) while i+j <% s.len: @@ -1324,21 +1479,136 @@ proc endsWith*(s, suffix: string): bool {.noSideEffect, inc(i) if i >= suffix.len: return true -proc endsWith*(s: string, suffix: char): bool {.noSideEffect, inline.} = - ## Returns true iff ``s`` ends with ``suffix``. - result = s.len > 0 and s[s.high] == suffix - proc continuesWith*(s, substr: string, start: Natural): bool {.noSideEffect, rtl, extern: "nsuContinuesWith".} = - ## Returns true iff ``s`` continues with ``substr`` at position ``start``. + ## Returns true if ``s`` continues with ``substr`` at position ``start``. ## ## If ``substr == ""`` true is returned. + ## + ## See also: + ## * `startsWith proc<#startsWith,string,string>`_ + ## * `endsWith proc<#endsWith,string,string>`_ + runnableExamples: + let a = "abracadabra" + doAssert a.continuesWith("ca", 4) == true + doAssert a.continuesWith("ca", 5) == false + doAssert a.continuesWith("dab", 6) == true var i = 0 while true: if i >= substr.len: return true if i+start >= s.len or s[i+start] != substr[i]: return false inc(i) + +proc removePrefix*(s: var string, chars: set[char] = Newlines) {. + rtl, extern: "nsuRemovePrefixCharSet".} = + ## Removes all characters from `chars` from the start of the string `s` + ## (in-place). + ## + ## See also: + ## * `removeSuffix proc<#removeSuffix,string,set[char]>`_ + runnableExamples: + var userInput = "\r\n*~Hello World!" + userInput.removePrefix + doAssert userInput == "*~Hello World!" + userInput.removePrefix({'~', '*'}) + doAssert userInput == "Hello World!" + + var otherInput = "?!?Hello!?!" + otherInput.removePrefix({'!', '?'}) + doAssert otherInput == "Hello!?!" + + var start = 0 + while start < s.len and s[start] in chars: start += 1 + if start > 0: s.delete(0, start - 1) + +proc removePrefix*(s: var string, c: char) {. + rtl, extern: "nsuRemovePrefixChar".} = + ## Removes all occurrences of a single character (in-place) from the start + ## of a string. + ## + ## See also: + ## * `removeSuffix proc<#removeSuffix,string,char>`_ + ## * `startsWith proc<#startsWith,string,char>`_ + runnableExamples: + var ident = "pControl" + ident.removePrefix('p') + doAssert ident == "Control" + removePrefix(s, chars = {c}) + +proc removePrefix*(s: var string, prefix: string) {. + rtl, extern: "nsuRemovePrefixString".} = + ## Remove the first matching prefix (in-place) from a string. + ## + ## See also: + ## * `removeSuffix proc<#removeSuffix,string,string>`_ + ## * `startsWith proc<#startsWith,string,string>`_ + runnableExamples: + var answers = "yesyes" + answers.removePrefix("yes") + doAssert answers == "yes" + if s.startsWith(prefix): + s.delete(0, prefix.len - 1) + +proc removeSuffix*(s: var string, chars: set[char] = Newlines) {. + rtl, extern: "nsuRemoveSuffixCharSet".} = + ## Removes all characters from `chars` from the end of the string `s` + ## (in-place). + ## + ## See also: + ## * `removePrefix proc<#removePrefix,string,set[char]>`_ + runnableExamples: + var userInput = "Hello World!*~\r\n" + userInput.removeSuffix + doAssert userInput == "Hello World!*~" + userInput.removeSuffix({'~', '*'}) + doAssert userInput == "Hello World!" + + var otherInput = "Hello!?!" + otherInput.removeSuffix({'!', '?'}) + doAssert otherInput == "Hello" + + if s.len == 0: return + var last = s.high + while last > -1 and s[last] in chars: last -= 1 + s.setLen(last + 1) + +proc removeSuffix*(s: var string, c: char) {. + rtl, extern: "nsuRemoveSuffixChar".} = + ## Removes all occurrences of a single character (in-place) from the end + ## of a string. + ## + ## See also: + ## * `removePrefix proc<#removePrefix,string,char>`_ + ## * `endsWith proc<#endsWith,string,char>`_ + runnableExamples: + var table = "users" + table.removeSuffix('s') + doAssert table == "user" + + var dots = "Trailing dots......." + dots.removeSuffix('.') + doAssert dots == "Trailing dots" + + removeSuffix(s, chars = {c}) + +proc removeSuffix*(s: var string, suffix: string) {. + rtl, extern: "nsuRemoveSuffixString".} = + ## Remove the first matching suffix (in-place) from a string. + ## + ## See also: + ## * `removePrefix proc<#removePrefix,string,string>`_ + ## * `endsWith proc<#endsWith,string,string>`_ + runnableExamples: + var answers = "yeses" + answers.removeSuffix("es") + doAssert answers == "yes" + var newLen = s.len + if s.endsWith(suffix): + newLen -= len(suffix) + s.setLen(newLen) + + proc addSep*(dest: var string, sep = ", ", startLen: Natural = 0) {.noSideEffect, inline.} = ## Adds a separator to `dest` only if its length is bigger than `startLen`. @@ -1353,24 +1623,28 @@ proc addSep*(dest: var string, sep = ", ", startLen: Natural = 0) ## `startLen`. The following example creates a string describing ## an array of integers. runnableExamples: - var arr = "[" - for x in items([2, 3, 5, 7, 11]): - addSep(arr, startLen=len("[")) - add(arr, $x) - add(arr, "]") + var arr = "[" + for x in items([2, 3, 5, 7, 11]): + addSep(arr, startLen=len("[")) + add(arr, $x) + add(arr, "]") + doAssert arr == "[2, 3, 5, 7, 11]" + if dest.len > startLen: add(dest, sep) proc allCharsInSet*(s: string, theSet: set[char]): bool = - ## Returns true iff each character of `s` is in the set `theSet`. + ## Returns true if every character of `s` is in the set `theSet`. runnableExamples: doAssert allCharsInSet("aeea", {'a', 'e'}) == true doAssert allCharsInSet("", {'a', 'e'}) == true + for c in items(s): if c notin theSet: return false return true proc abbrev*(s: string, possibilities: openArray[string]): int = - ## Returns the index of the first item in ``possibilities`` which starts with ``s``, if not ambiguous. + ## Returns the index of the first item in ``possibilities`` which starts + ## with ``s``, if not ambiguous. ## ## Returns -1 if no item has been found and -2 if multiple items match. runnableExamples: @@ -1378,6 +1652,7 @@ proc abbrev*(s: string, possibilities: openArray[string]): int = doAssert abbrev("foo", ["college", "faculty", "industry"]) == -1 # Not found doAssert abbrev("fac", ["college", "faculty", "faculties"]) == -2 # Ambiguous doAssert abbrev("college", ["college", "colleges", "industry"]) == 0 + result = -1 # none found for i in 0..possibilities.len-1: if possibilities[i].startsWith(s): @@ -1391,9 +1666,10 @@ proc abbrev*(s: string, possibilities: openArray[string]): int = proc join*(a: openArray[string], sep: string = ""): string {. noSideEffect, rtl, extern: "nsuJoinSep".} = - ## Concatenates all strings in `a` separating them with `sep`. + ## Concatenates all strings in the container `a`, separating them with `sep`. runnableExamples: doAssert join(["A", "B", "Conclusion"], " -> ") == "A -> B -> Conclusion" + if len(a) > 0: var L = sep.len * (a.len-1) for i in 0..high(a): inc(L, a[i].len) @@ -1407,10 +1683,11 @@ proc join*(a: openArray[string], sep: string = ""): string {. proc join*[T: not string](a: openArray[T], sep: string = ""): string {. noSideEffect, rtl.} = - ## Converts all elements in `a` to strings using `$` and concatenates them - ## with `sep`. + ## Converts all elements in the container `a` to strings using `$`, + ## and concatenates them with `sep`. runnableExamples: doAssert join([1, 2, 3], " -> ") == "1 -> 2 -> 3" + result = "" for i, x in a: if i > 0: @@ -1441,14 +1718,13 @@ proc initSkipTable*(a: var SkipTable, sub: string) proc find*(a: SkipTable, s, sub: string, start: Natural = 0, last = 0): int {.noSideEffect, rtl, extern: "nsuFindStrA".} = - ## Searches for `sub` in `s` inside range `start`..`last` using preprocessed table `a`. - ## If `last` is unspecified, it defaults to `s.high`. + ## Searches for `sub` in `s` inside range `start`..`last` using preprocessed + ## table `a`. If `last` is unspecified, it defaults to `s.high` (the last + ## element). ## ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned. - let last = if last==0: s.high else: last - sLen = last - start + 1 subLast = sub.len - 1 if subLast == -1: @@ -1467,7 +1743,6 @@ proc find*(a: SkipTable, s, sub: string, start: Natural = 0, last = 0): int return skip dec i inc skip, a[s[skip + subLast]] - return -1 when not (defined(js) or defined(nimdoc) or defined(nimscript)): @@ -1479,10 +1754,14 @@ else: proc find*(s: string, sub: char, start: Natural = 0, last = 0): int {.noSideEffect, rtl, extern: "nsuFindChar".} = - ## Searches for `sub` in `s` inside range `start`..`last`. - ## If `last` is unspecified, it defaults to `s.high`. + ## Searches for `sub` in `s` inside range ``start..last`` (both ends included). + ## If `last` is unspecified, it defaults to `s.high` (the last element). ## ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned. + ## + ## See also: + ## * `rfind proc<#rfind,string,char,int>`_ + ## * `replace proc<#replace,string,char,char>`_ let last = if last==0: s.high else: last when nimvm: for i in int(start)..last: @@ -1499,34 +1778,72 @@ proc find*(s: string, sub: char, start: Natural = 0, last = 0): int {.noSideEffe if sub == s[i]: return i return -1 +proc find*(s: string, chars: set[char], start: Natural = 0, last = 0): int {.noSideEffect, + rtl, extern: "nsuFindCharSet".} = + ## Searches for `chars` in `s` inside range ``start..last`` (both ends included). + ## If `last` is unspecified, it defaults to `s.high` (the last element). + ## + ## If `s` contains none of the characters in `chars`, -1 is returned. + ## + ## See also: + ## * `rfind proc<#rfind,string,set[char],int>`_ + ## * `multiReplace proc<#multiReplace,string,varargs[]>`_ + let last = if last==0: s.high else: last + for i in int(start)..last: + if s[i] in chars: return i + return -1 + proc find*(s, sub: string, start: Natural = 0, last = 0): int {.noSideEffect, rtl, extern: "nsuFindStr".} = - ## Searches for `sub` in `s` inside range `start`..`last`. - ## If `last` is unspecified, it defaults to `s.high`. + ## Searches for `sub` in `s` inside range ``start..last`` (both ends included). + ## If `last` is unspecified, it defaults to `s.high` (the last element). ## ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned. + ## + ## See also: + ## * `rfind proc<#rfind,string,string,int>`_ + ## * `replace proc<#replace,string,string,string>`_ if sub.len > s.len: return -1 if sub.len == 1: return find(s, sub[0], start, last) var a {.noinit.}: SkipTable initSkipTable(a, sub) result = find(a, s, sub, start, last) -proc find*(s: string, chars: set[char], start: Natural = 0, last = 0): int {.noSideEffect, - rtl, extern: "nsuFindCharSet".} = - ## Searches for `chars` in `s` inside range `start`..`last`. - ## If `last` is unspecified, it defaults to `s.high`. +proc rfind*(s: string, sub: char, start: int = -1): int {.noSideEffect, + rtl.} = + ## Searches for characer `sub` in `s` in reverse, starting at position `start` + ## (default: the last character) and going backwards to the first character. ## - ## If `s` contains none of the characters in `chars`, -1 is returned. - let last = if last==0: s.high else: last - for i in int(start)..last: + ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned. + ## + ## See also: + ## * `find proc<#find,string,char,int,int>`_ + let realStart = if start == -1: s.len-1 else: start + for i in countdown(realStart, 0): + if sub == s[i]: return i + return -1 + +proc rfind*(s: string, chars: set[char], start: int = -1): int {.noSideEffect.} = + ## Searches for `chars` in `s` in reverse, starting at position `start` + ## (default: the last character) and going backwards to the first character. + ## + ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned. + ## + ## See also: + ## * `find proc<#find,string,set[char],Natural,int>`_ + let realStart = if start == -1: s.len-1 else: start + for i in countdown(realStart, 0): if s[i] in chars: return i return -1 proc rfind*(s, sub: string, start: int = -1): int {.noSideEffect.} = - ## Searches for `sub` in `s` in reverse, starting at `start` and going - ## backwards to 0. + ## Searches for string `sub` in `s` in reverse, starting at position `start` + ## (default: the last character) and going backwards to the first character. ## ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned. + ## + ## See also: + ## * `find proc<#find,string,string,Natural,int>`_ if sub.len == 0: return -1 let realStart = if start == -1: s.len else: start @@ -1539,54 +1856,34 @@ proc rfind*(s, sub: string, start: int = -1): int {.noSideEffect.} = if result != -1: return return -1 -proc rfind*(s: string, sub: char, start: int = -1): int {.noSideEffect, - rtl.} = - ## Searches for `sub` in `s` in reverse starting at position `start`. - ## - ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned. - let realStart = if start == -1: s.len-1 else: start - for i in countdown(realStart, 0): - if sub == s[i]: return i - return -1 -proc rfind*(s: string, chars: set[char], start: int = -1): int {.noSideEffect.} = - ## Searches for `chars` in `s` in reverse starting at position `start`. +proc count*(s: string, sub: char): int {.noSideEffect, + rtl, extern: "nsuCountChar".} = + ## Count the occurrences of the character `sub` in the string `s`. ## - ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned. - let realStart = if start == -1: s.len-1 else: start - for i in countdown(realStart, 0): - if s[i] in chars: return i - return -1 + ## See also: + ## * `countLines proc<#countLines,string>`_ + for c in s: + if c == sub: inc result -proc center*(s: string, width: int, fillChar: char = ' '): string {. - noSideEffect, rtl, extern: "nsuCenterString".} = - ## Return the contents of `s` centered in a string `width` long using - ## `fillChar` as padding. +proc count*(s: string, subs: set[char]): int {.noSideEffect, + rtl, extern: "nsuCountCharSet".} = + ## Count the occurrences of the group of character `subs` in the string `s`. ## - ## The original string is returned if `width` is less than or equal - ## to `s.len`. - if width <= s.len: return s - result = newString(width) - # Left padding will be one fillChar - # smaller if there are an odd number - # of characters - let - charsLeft = (width - s.len) - leftPadding = charsLeft div 2 - for i in 0 ..< width: - if i >= leftPadding and i < leftPadding + s.len: - # we are where the string should be located - result[i] = s[i-leftPadding] - else: - # we are either before or after where - # the string s should go - result[i] = fillChar + ## See also: + ## * `countLines proc<#countLines,string>`_ + doAssert card(subs) > 0 + for c in s: + if c in subs: inc result proc count*(s: string, sub: string, overlapping: bool = false): int {. noSideEffect, rtl, extern: "nsuCountString".} = ## Count the occurrences of a substring `sub` in the string `s`. ## Overlapping occurrences of `sub` only count when `overlapping` - ## is set to true. + ## is set to true (default: false). + ## + ## See also: + ## * `countLines proc<#countLines,string>`_ doAssert sub.len > 0 var i = 0 while true: @@ -1596,43 +1893,58 @@ proc count*(s: string, sub: string, overlapping: bool = false): int {. else: i += sub.len inc result -proc count*(s: string, sub: char): int {.noSideEffect, - rtl, extern: "nsuCountChar".} = - ## Count the occurrences of the character `sub` in the string `s`. - for c in s: - if c == sub: inc result - -proc count*(s: string, subs: set[char]): int {.noSideEffect, - rtl, extern: "nsuCountCharSet".} = - ## Count the occurrences of the group of character `subs` in the string `s`. - doAssert card(subs) > 0 - for c in s: - if c in subs: inc result - -proc quoteIfContainsWhite*(s: string): string {.deprecated.} = - ## Returns ``'"' & s & '"'`` if `s` contains a space and does not - ## start with a quote, else returns `s`. +proc countLines*(s: string): int {.noSideEffect, + rtl, extern: "nsuCountLines".} = + ## Returns the number of lines in the string `s`. + ## + ## This is the same as ``len(splitLines(s))``, but much more efficient + ## because it doesn't modify the string creating temporal objects. Every + ## `character literal <manual.html#lexical-analysis-character-literals>`_ + ## newline combination (CR, LF, CR-LF) is supported. ## - ## **DEPRECATED** as it was confused for shell quoting function. For this - ## application use `osproc.quoteShell <osproc.html#quoteShell>`_. - if find(s, {' ', '\t'}) >= 0 and s[0] != '"': result = '"' & s & '"' - else: result = s + ## In this context, a line is any string seperated by a newline combination. + ## A line can be an empty string. + ## + ## See also: + ## * `splitLines proc<#splitLines,string>`_ + runnableExamples: + doAssert countLines("First line\l and second line.") == 2 + result = 1 + var i = 0 + while i < s.len: + case s[i] + of '\c': + if i+1 < s.len and s[i+1] == '\l': inc i + inc result + of '\l': inc result + else: discard + inc i -proc contains*(s: string, c: char): bool {.noSideEffect.} = - ## Same as ``find(s, c) >= 0``. - return find(s, c) >= 0 proc contains*(s, sub: string): bool {.noSideEffect.} = ## Same as ``find(s, sub) >= 0``. + ## + ## See also: + ## * `find proc<#find,string,string,Natural,int>`_ return find(s, sub) >= 0 proc contains*(s: string, chars: set[char]): bool {.noSideEffect.} = ## Same as ``find(s, chars) >= 0``. + ## + ## See also: + ## * `find proc<#find,string,set[char],Natural,int>`_ return find(s, chars) >= 0 proc replace*(s, sub: string, by = ""): string {.noSideEffect, rtl, extern: "nsuReplaceStr".} = ## Replaces `sub` in `s` by the string `by`. + ## + ## See also: + ## * `find proc<#find,string,string,Natural,int>`_ + ## * `replace proc<#replace,string,char,char>`_ for replacing + ## single characters + ## * `replaceWord proc<#replaceWord,string,string,string>`_ + ## * `multiReplace proc<#multiReplace,string,varargs[]>`_ result = "" let subLen = sub.len if subLen == 0: @@ -1670,6 +1982,11 @@ proc replace*(s: string, sub, by: char): string {.noSideEffect, ## Replaces `sub` in `s` by the character `by`. ## ## Optimized version of `replace <#replace,string,string>`_ for characters. + ## + ## See also: + ## * `find proc<#find,string,char,Natural,int>`_ + ## * `replaceWord proc<#replaceWord,string,string,string>`_ + ## * `multiReplace proc<#multiReplace,string,varargs[]>`_ result = newString(s.len) var i = 0 while i < s.len: @@ -1682,7 +1999,7 @@ proc replaceWord*(s, sub: string, by = ""): string {.noSideEffect, ## Replaces `sub` in `s` by the string `by`. ## ## Each occurrence of `sub` has to be surrounded by word boundaries - ## (comparable to ``\\w`` in regular expressions), otherwise it is not + ## (comparable to ``\b`` in regular expressions), otherwise it is not ## replaced. if sub.len == 0: return s const wordChars = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\128'..'\255'} @@ -1712,14 +2029,14 @@ proc multiReplace*(s: string, replacements: varargs[(string, string)]): string { ## Same as replace, but specialized for doing multiple replacements in a single ## pass through the input string. ## - ## multiReplace performs all replacements in a single pass, this means it can be used - ## to swap the occurences of "a" and "b", for instance. + ## `multiReplace` performs all replacements in a single pass, this means it + ## can be used to swap the occurences of "a" and "b", for instance. ## - ## If the resulting string is not longer than the original input string, only a single - ## memory allocation is required. + ## If the resulting string is not longer than the original input string, + ## only a single memory allocation is required. ## - ## The order of the replacements does matter. Earlier replacements are preferred over later - ## replacements in the argument list. + ## The order of the replacements does matter. Earlier replacements are + ## preferred over later replacements in the argument list. result = newStringOfCap(s.len) var i = 0 var fastChk: set[char] = {} @@ -1741,55 +2058,12 @@ proc multiReplace*(s: string, replacements: varargs[(string, string)]): string { add result, s[i] inc(i) -proc delete*(s: var string, first, last: int) {.noSideEffect, - rtl, extern: "nsuDelete".} = - ## Deletes in `s` the characters at position `first` .. `last`. - ## - ## This modifies `s` itself, it does not return a copy. - var i = first - var j = last+1 - var newLen = len(s)-j+i - while i < newLen: - s[i] = s[j] - inc(i) - inc(j) - setLen(s, newLen) -proc toOct*(x: BiggestInt, len: Positive): string {.noSideEffect, - rtl, extern: "nsuToOct".} = - ## Converts `x` into its octal representation. - ## - ## The resulting string is always `len` characters long. No leading ``0o`` - ## prefix is generated. - var - mask: BiggestInt = 7 - shift: BiggestInt = 0 - assert(len > 0) - result = newString(len) - for j in countdown(len-1, 0): - result[j] = chr(int((x and mask) shr shift) + ord('0')) - shift = shift + 3 - mask = mask shl 3 - -proc toBin*(x: BiggestInt, len: Positive): string {.noSideEffect, - rtl, extern: "nsuToBin".} = - ## Converts `x` into its binary representation. - ## - ## The resulting string is always `len` characters long. No leading ``0b`` - ## prefix is generated. - var - mask: BiggestInt = 1 - shift: BiggestInt = 0 - assert(len > 0) - result = newString(len) - for j in countdown(len-1, 0): - result[j] = chr(int((x and mask) shr shift) + ord('0')) - shift = shift + 1 - mask = mask shl 1 proc insertSep*(s: string, sep = '_', digits = 3): string {.noSideEffect, rtl, extern: "nsuInsertSep".} = - ## Inserts the separator `sep` after `digits` digits from right to left. + ## Inserts the separator `sep` after `digits` characters (default: 3) + ## from right to left. ## ## Even though the algorithm works with any string `s`, it is only useful ## if `s` contains a number. @@ -1811,11 +2085,15 @@ proc insertSep*(s: string, sep = '_', digits = 3): string {.noSideEffect, proc escape*(s: string, prefix = "\"", suffix = "\""): string {.noSideEffect, rtl, extern: "nsuEscape".} = - ## Escapes a string `s`. See `system.addEscapedChar <system.html#addEscapedChar>`_ - ## for the escaping scheme. + ## Escapes a string `s`. See `system.addEscapedChar + ## <system.html#addEscapedChar,string,char>`_ for the escaping scheme. ## ## The resulting string is prefixed with `prefix` and suffixed with `suffix`. ## Both may be empty strings. + ## + ## See also: + ## * `unescape proc<#unescape,string,string,string>`_ for the opposite + ## operation result = newStringOfCap(s.len + s.len shr 2) result.add(prefix) for c in items(s): @@ -1833,8 +2111,8 @@ proc unescape*(s: string, prefix = "\"", suffix = "\""): string {.noSideEffect, rtl, extern: "nsuUnescape".} = ## Unescapes a string `s`. ## - ## This complements `escape <#escape>`_ as it performs the opposite - ## operations. + ## This complements `escape proc<#escape,string,string,string>`_ + ## as it performs the opposite operations. ## ## If `s` does not begin with ``prefix`` and end with ``suffix`` a ## ValueError exception will be raised. @@ -1880,101 +2158,12 @@ proc validIdentifier*(s: string): bool {.noSideEffect, ## and is followed by any number of characters of the set `IdentChars`. runnableExamples: doAssert "abc_def08".validIdentifier + if s.len > 0 and s[0] in IdentStartChars: for i in 1..s.len-1: if s[i] notin IdentChars: return false return true -{.push warning[Deprecated]: off.} -proc editDistance*(a, b: string): int {.noSideEffect, - rtl, extern: "nsuEditDistance", - deprecated: "use editdistance.editDistanceAscii instead".} = - ## Returns the edit distance between `a` and `b`. - ## - ## This uses the `Levenshtein`:idx: distance algorithm with only a linear - ## memory overhead. - var len1 = a.len - var len2 = b.len - if len1 > len2: - # make `b` the longer string - return editDistance(b, a) - - # strip common prefix: - var s = 0 - while s < len1 and a[s] == b[s]: - inc(s) - dec(len1) - dec(len2) - # strip common suffix: - while len1 > 0 and len2 > 0 and a[s+len1-1] == b[s+len2-1]: - dec(len1) - dec(len2) - # trivial cases: - if len1 == 0: return len2 - if len2 == 0: return len1 - - # another special case: - if len1 == 1: - for j in s..s+len2-1: - if a[s] == b[j]: return len2 - 1 - return len2 - - inc(len1) - inc(len2) - var half = len1 shr 1 - # initalize first row: - #var row = cast[ptr array[0..high(int) div 8, int]](alloc(len2*sizeof(int))) - var row: seq[int] - newSeq(row, len2) - var e = s + len2 - 1 # end marker - for i in 1..len2 - half - 1: row[i] = i - row[0] = len1 - half - 1 - for i in 1 .. len1 - 1: - var char1 = a[i + s - 1] - var char2p: int - var D, x: int - var p: int - if i >= len1 - half: - # skip the upper triangle: - var offset = i - len1 + half - char2p = offset - p = offset - var c3 = row[p] + ord(char1 != b[s + char2p]) - inc(p) - inc(char2p) - x = row[p] + 1 - D = x - if x > c3: x = c3 - row[p] = x - inc(p) - else: - p = 1 - char2p = 0 - D = i - x = i - if i <= half + 1: - # skip the lower triangle: - e = len2 + i - half - 2 - # main: - while p <= e: - dec(D) - var c3 = D + ord(char1 != b[char2p + s]) - inc(char2p) - inc(x) - if x > c3: x = c3 - D = row[p] + 1 - if x > D: x = D - row[p] = x - inc(p) - # lower triangle sentinel: - if i <= half: - dec(D) - var c3 = D + ord(char1 != b[char2p + s]) - inc(x) - if x > c3: x = c3 - row[p] = x - result = row[e] -{.pop.} # floating point formating: when not defined(js): @@ -1982,10 +2171,11 @@ when not defined(js): importc: "sprintf", varargs, noSideEffect.} type - FloatFormatMode* = enum ## the different modes of floating point formating - ffDefault, ## use the shorter floating point notation - ffDecimal, ## use decimal floating point notation - ffScientific ## use scientific notation (using ``e`` character) + FloatFormatMode* = enum + ## the different modes of floating point formating + ffDefault, ## use the shorter floating point notation + ffDecimal, ## use decimal floating point notation + ffScientific ## use scientific notation (using ``e`` character) proc formatBiggestFloat*(f: BiggestFloat, format: FloatFormatMode = ffDefault, precision: range[-1..32] = 16; @@ -2001,6 +2191,11 @@ proc formatBiggestFloat*(f: BiggestFloat, format: FloatFormatMode = ffDefault, ## after the decimal point for Nim's ``biggestFloat`` type. ## ## If ``precision == -1``, it tries to format it nicely. + runnableExamples: + let x = 123.456 + doAssert x.formatBiggestFloat() == "123.4560000000000" + doAssert x.formatBiggestFloat(ffDecimal, 4) == "123.4560" + doAssert x.formatBiggestFloat(ffScientific, 2) == "1.23e+02" when defined(js): var precision = precision if precision == -1: @@ -2080,11 +2275,18 @@ proc formatFloat*(f: float, format: FloatFormatMode = ffDefault, doAssert x.formatFloat() == "123.4560000000000" doAssert x.formatFloat(ffDecimal, 4) == "123.4560" doAssert x.formatFloat(ffScientific, 2) == "1.23e+02" + result = formatBiggestFloat(f, format, precision, decimalSep) proc trimZeros*(x: var string) {.noSideEffect.} = ## Trim trailing zeros from a formatted floating point - ## value (`x`). Modifies the passed value. + ## value `x` (must be declared as ``var``). + ## + ## This modifies `x` itself, it does not return a copy. + runnableExamples: + var x = "123.456000000" + x.trimZeros() + doAssert x == "123.456" var spl: seq[string] if x.contains('.') or x.contains(','): if x.contains('e'): @@ -2114,6 +2316,9 @@ proc formatSize*(bytes: int64, ## ## `includeSpace` can be set to true to include the (SI preferred) space ## between the number and the unit (e.g. 1 KiB). + ## + ## See also: + ## * `strformat module<strformat.html>`_ for string interpolation and formatting runnableExamples: doAssert formatSize((1'i64 shl 31) + (300'i64 shl 20)) == "2.293GiB" doAssert formatSize((2.234*1024*1024).int) == "2.234MiB" @@ -2121,6 +2326,7 @@ proc formatSize*(bytes: int64, doAssert formatSize(4096, prefix=bpColloquial, includeSpace=true) == "4 kB" doAssert formatSize(4096) == "4KiB" doAssert formatSize(5_378_934, prefix=bpColloquial, decimalSep=',') == "5,13MB" + const iecPrefixes = ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi"] const collPrefixes = ["", "k", "M", "G", "T", "P", "E", "Z", "Y"] var @@ -2216,6 +2422,9 @@ proc formatEng*(f: BiggestFloat, ## formatEng(4100, unit="", useUnitSpace=true) == "4.1e3 " # Space with useUnitSpace=true ## ## `decimalSep` is used as the decimal separator. + ## + ## See also: + ## * `strformat module<strformat.html>`_ for string interpolation and formatting var absolute: BiggestFloat significand: BiggestFloat @@ -2403,110 +2612,69 @@ proc `%` *(formatstr: string, a: openArray[string]): string {.noSideEffect, ## ## The variables are compared with `cmpIgnoreStyle`. `ValueError` is ## raised if an ill-formed format string has been passed to the `%` operator. + ## + ## See also: + ## * `strformat module<strformat.html>`_ for string interpolation and formatting result = newStringOfCap(formatstr.len + a.len shl 4) addf(result, formatstr, a) proc `%` *(formatstr, a: string): string {.noSideEffect, rtl, extern: "nsuFormatSingleElem".} = - ## This is the same as ``formatstr % [a]``. + ## This is the same as ``formatstr % [a]`` (see + ## `% proc<#%25,string,openArray[string]>`_). result = newStringOfCap(formatstr.len + a.len) addf(result, formatstr, [a]) proc format*(formatstr: string, a: varargs[string, `$`]): string {.noSideEffect, rtl, extern: "nsuFormatVarargs".} = - ## This is the same as ``formatstr % a`` except that it supports + ## This is the same as ``formatstr % a`` (see + ## `% proc<#%25,string,openArray[string]>`_) except that it supports ## auto stringification. + ## + ## See also: + ## * `strformat module<strformat.html>`_ for string interpolation and formatting result = newStringOfCap(formatstr.len + a.len) addf(result, formatstr, a) {.pop.} -proc removeSuffix*(s: var string, chars: set[char] = Newlines) {. - rtl, extern: "nsuRemoveSuffixCharSet".} = - ## Removes all characters from `chars` from the end of the string `s` - ## (in-place). - runnableExamples: - var userInput = "Hello World!*~\r\n" - userInput.removeSuffix - doAssert userInput == "Hello World!*~" - userInput.removeSuffix({'~', '*'}) - doAssert userInput == "Hello World!" - var otherInput = "Hello!?!" - otherInput.removeSuffix({'!', '?'}) - doAssert otherInput == "Hello" - if s.len == 0: return - var last = s.high - while last > -1 and s[last] in chars: last -= 1 - s.setLen(last + 1) -proc removeSuffix*(s: var string, c: char) {. - rtl, extern: "nsuRemoveSuffixChar".} = - ## Removes all occurrences of a single character (in-place) from the end - ## of a string. +proc strip*(s: string, leading = true, trailing = true, + chars: set[char] = Whitespace): string + {.noSideEffect, rtl, extern: "nsuStrip".} = + ## Strips leading or trailing `chars` (default: whitespace characters) + ## from `s` and returns the resulting string. ## - runnableExamples: - var table = "users" - table.removeSuffix('s') - doAssert table == "user" - - var dots = "Trailing dots......." - dots.removeSuffix('.') - doAssert dots == "Trailing dots" - removeSuffix(s, chars = {c}) - -proc removeSuffix*(s: var string, suffix: string) {. - rtl, extern: "nsuRemoveSuffixString".} = - ## Remove the first matching suffix (in-place) from a string. - runnableExamples: - var answers = "yeses" - answers.removeSuffix("es") - doAssert answers == "yes" - var newLen = s.len - if s.endsWith(suffix): - newLen -= len(suffix) - s.setLen(newLen) - -proc removePrefix*(s: var string, chars: set[char] = Newlines) {. - rtl, extern: "nsuRemovePrefixCharSet".} = - ## Removes all characters from `chars` from the start of the string `s` - ## (in-place). + ## If `leading` is true (default), leading `chars` are stripped. + ## If `trailing` is true (default), trailing `chars` are stripped. + ## If both are false, the string is returned unchanged. ## + ## See also: + ## * `stripLineEnd proc<#stripLineEnd,string>`_ runnableExamples: - var userInput = "\r\n*~Hello World!" - userInput.removePrefix - doAssert userInput == "*~Hello World!" - userInput.removePrefix({'~', '*'}) - doAssert userInput == "Hello World!" + let a = " vhellov " + let b = strip(a) + doAssert b == "vhellov" - var otherInput = "?!?Hello!?!" - otherInput.removePrefix({'!', '?'}) - doAssert otherInput == "Hello!?!" - var start = 0 - while start < s.len and s[start] in chars: start += 1 - if start > 0: s.delete(0, start - 1) + doAssert a.strip(leading = false) == " vhellov" + doAssert a.strip(trailing = false) == "vhellov " -proc removePrefix*(s: var string, c: char) {. - rtl, extern: "nsuRemovePrefixChar".} = - ## Removes all occurrences of a single character (in-place) from the start - ## of a string. - ## - runnableExamples: - var ident = "pControl" - ident.removePrefix('p') - doAssert ident == "Control" - removePrefix(s, chars = {c}) + doAssert b.strip(chars = {'v'}) == "hello" + doAssert b.strip(leading = false, chars = {'v'}) == "vhello" -proc removePrefix*(s: var string, prefix: string) {. - rtl, extern: "nsuRemovePrefixString".} = - ## Remove the first matching prefix (in-place) from a string. - ## - runnableExamples: - var answers = "yesyes" - answers.removePrefix("yes") - doAssert answers == "yes" - if s.startsWith(prefix): - s.delete(0, prefix.len - 1) + let c = "blaXbla" + doAssert c.strip(chars = {'b', 'a'}) == "laXbl" + doAssert c.strip(chars = {'b', 'a', 'l'}) == "X" + + var + first = 0 + last = len(s)-1 + if leading: + while first <= last and s[first] in chars: inc(first) + if trailing: + while last >= 0 and s[last] in chars: dec(last) + result = substr(s, first, last) proc stripLineEnd*(s: var string) = ## Returns ``s`` stripped from one of these suffixes: @@ -2520,6 +2688,7 @@ proc stripLineEnd*(s: var string) = s = "foo\r\n" s.stripLineEnd doAssert s == "foo" + if s.len > 0: case s[^1] of '\n': @@ -2532,6 +2701,331 @@ proc stripLineEnd*(s: var string) = else: discard + +iterator tokenize*(s: string, seps: set[char] = Whitespace): tuple[ + token: string, isSep: bool] = + ## Tokenizes the string `s` into substrings. + ## + ## Substrings are separated by a substring containing only `seps`. + ## Example: + ## + ## .. code-block:: nim + ## for word in tokenize(" this is an example "): + ## writeLine(stdout, word) + ## + ## Results in: + ## + ## .. code-block:: nim + ## (" ", true) + ## ("this", false) + ## (" ", true) + ## ("is", false) + ## (" ", true) + ## ("an", false) + ## (" ", true) + ## ("example", false) + ## (" ", true) + var i = 0 + while true: + var j = i + var isSep = j < s.len and s[j] in seps + while j < s.len and (s[j] in seps) == isSep: inc(j) + if j > i: + yield (substr(s, i, j-1), isSep) + else: + break + i = j + + + + + +# -------------------------------------------------------------------------- +# Deprecated procs + +{.push warning[Deprecated]: off.} +proc editDistance*(a, b: string): int {.noSideEffect, + rtl, extern: "nsuEditDistance", + deprecated: "use editdistance.editDistanceAscii instead".} = + ## **Deprecated**: Use `editdistance module<editdistance.html>`_ + ## + ## Returns the edit distance between `a` and `b`. + ## + ## This uses the `Levenshtein`:idx: distance algorithm with only a linear + ## memory overhead. + var len1 = a.len + var len2 = b.len + if len1 > len2: + # make `b` the longer string + return editDistance(b, a) + + # strip common prefix: + var s = 0 + while s < len1 and a[s] == b[s]: + inc(s) + dec(len1) + dec(len2) + # strip common suffix: + while len1 > 0 and len2 > 0 and a[s+len1-1] == b[s+len2-1]: + dec(len1) + dec(len2) + # trivial cases: + if len1 == 0: return len2 + if len2 == 0: return len1 + + # another special case: + if len1 == 1: + for j in s..s+len2-1: + if a[s] == b[j]: return len2 - 1 + return len2 + + inc(len1) + inc(len2) + var half = len1 shr 1 + # initalize first row: + #var row = cast[ptr array[0..high(int) div 8, int]](alloc(len2*sizeof(int))) + var row: seq[int] + newSeq(row, len2) + var e = s + len2 - 1 # end marker + for i in 1..len2 - half - 1: row[i] = i + row[0] = len1 - half - 1 + for i in 1 .. len1 - 1: + var char1 = a[i + s - 1] + var char2p: int + var D, x: int + var p: int + if i >= len1 - half: + # skip the upper triangle: + var offset = i - len1 + half + char2p = offset + p = offset + var c3 = row[p] + ord(char1 != b[s + char2p]) + inc(p) + inc(char2p) + x = row[p] + 1 + D = x + if x > c3: x = c3 + row[p] = x + inc(p) + else: + p = 1 + char2p = 0 + D = i + x = i + if i <= half + 1: + # skip the lower triangle: + e = len2 + i - half - 2 + # main: + while p <= e: + dec(D) + var c3 = D + ord(char1 != b[char2p + s]) + inc(char2p) + inc(x) + if x > c3: x = c3 + D = row[p] + 1 + if x > D: x = D + row[p] = x + inc(p) + # lower triangle sentinel: + if i <= half: + dec(D) + var c3 = D + ord(char1 != b[char2p + s]) + inc(x) + if x > c3: x = c3 + row[p] = x + result = row[e] +{.pop.} + +proc isNilOrEmpty*(s: string): bool {.noSideEffect, procvar, rtl, + extern: "nsuIsNilOrEmpty", + deprecated: "use 'x.len == 0' instead".} = + ## **Deprecated**: use 'x.len == 0' + ## + ## Checks if `s` is nil or empty. + result = len(s) == 0 + +proc isNilOrWhitespace*(s: string): bool {.noSideEffect, procvar, rtl, extern: "nsuIsNilOrWhitespace".} = + ## Checks if `s` is nil or consists entirely of whitespace characters. + result = true + for c in s: + if not c.isSpaceAscii(): + return false + +template isImpl(call) = + if s.len == 0: return false + result = true + for c in s: + if not call(c): return false + +proc isAlphaAscii*(s: string): bool {.noSideEffect, procvar, + rtl, extern: "nsuIsAlphaAsciiStr", + deprecated: "Deprecated since version 0.20 since its semantics are unclear".} = + ## **Deprecated**: Deprecated since version 0.20 since its semantics are unclear + ## + ## Checks whether or not `s` is alphabetical. + ## + ## This checks a-z, A-Z ASCII characters only. + ## Returns true if all characters in `s` are + ## alphabetic and there is at least one character + ## in `s`. + ## Use `Unicode module<unicode.html>`_ for UTF-8 support. + runnableExamples: + doAssert isAlphaAscii("fooBar") == true + doAssert isAlphaAscii("fooBar1") == false + doAssert isAlphaAscii("foo Bar") == false + isImpl isAlphaAscii + +proc isAlphaNumeric*(s: string): bool {.noSideEffect, procvar, + rtl, extern: "nsuIsAlphaNumericStr", + deprecated: "Deprecated since version 0.20 since its semantics are unclear".} = + ## **Deprecated**: Deprecated since version 0.20 since its semantics are unclear + ## + ## Checks whether or not `s` is alphanumeric. + ## + ## This checks a-z, A-Z, 0-9 ASCII characters only. + ## Returns true if all characters in `s` are + ## alpanumeric and there is at least one character + ## in `s`. + ## Use `Unicode module<unicode.html>`_ for UTF-8 support. + runnableExamples: + doAssert isAlphaNumeric("fooBar") == true + doAssert isAlphaNumeric("fooBar1") == true + doAssert isAlphaNumeric("foo Bar") == false + isImpl isAlphaNumeric + +proc isDigit*(s: string): bool {.noSideEffect, procvar, + rtl, extern: "nsuIsDigitStr", + deprecated: "Deprecated since version 0.20 since its semantics are unclear".} = + ## **Deprecated**: Deprecated since version 0.20 since its semantics are unclear + ## + ## Checks whether or not `s` is a numeric value. + ## + ## This checks 0-9 ASCII characters only. + ## Returns true if all characters in `s` are + ## numeric and there is at least one character + ## in `s`. + runnableExamples: + doAssert isDigit("1908") == true + doAssert isDigit("fooBar1") == false + isImpl isDigit + +proc isSpaceAscii*(s: string): bool {.noSideEffect, procvar, + rtl, extern: "nsuIsSpaceAsciiStr", + deprecated: "Deprecated since version 0.20 since its semantics are unclear".} = + ## **Deprecated**: Deprecated since version 0.20 since its semantics are unclear + ## + ## Checks whether or not `s` is completely whitespace. + ## + ## Returns true if all characters in `s` are whitespace + ## characters and there is at least one character in `s`. + runnableExamples: + doAssert isSpaceAscii(" ") == true + doAssert isSpaceAscii("") == false + isImpl isSpaceAscii + +template isCaseImpl(s, charProc, skipNonAlpha) = + var hasAtleastOneAlphaChar = false + if s.len == 0: return false + for c in s: + if skipNonAlpha: + var charIsAlpha = c.isAlphaAscii() + if not hasAtleastOneAlphaChar: + hasAtleastOneAlphaChar = charIsAlpha + if charIsAlpha and (not charProc(c)): + return false + else: + if not charProc(c): + return false + return if skipNonAlpha: hasAtleastOneAlphaChar else: true + +proc isLowerAscii*(s: string, skipNonAlpha: bool): bool {. + deprecated: "Deprecated since version 0.20 since its semantics are unclear".} = + ## **Deprecated**: Deprecated since version 0.20 since its semantics are unclear + ## + ## Checks whether ``s`` is lower case. + ## + ## This checks ASCII characters only. + ## + ## If ``skipNonAlpha`` is true, returns true if all alphabetical + ## characters in ``s`` are lower case. Returns false if none of the + ## characters in ``s`` are alphabetical. + ## + ## If ``skipNonAlpha`` is false, returns true only if all characters + ## in ``s`` are alphabetical and lower case. + ## + ## For either value of ``skipNonAlpha``, returns false if ``s`` is + ## an empty string. + ## Use `Unicode module<unicode.html>`_ for UTF-8 support. + runnableExamples: + doAssert isLowerAscii("1foobar", false) == false + doAssert isLowerAscii("1foobar", true) == true + doAssert isLowerAscii("1fooBar", true) == false + isCaseImpl(s, isLowerAscii, skipNonAlpha) + +proc isUpperAscii*(s: string, skipNonAlpha: bool): bool {. + deprecated: "Deprecated since version 0.20 since its semantics are unclear".} = + ## **Deprecated**: Deprecated since version 0.20 since its semantics are unclear + ## + ## Checks whether ``s`` is upper case. + ## + ## This checks ASCII characters only. + ## + ## If ``skipNonAlpha`` is true, returns true if all alphabetical + ## characters in ``s`` are upper case. Returns false if none of the + ## characters in ``s`` are alphabetical. + ## + ## If ``skipNonAlpha`` is false, returns true only if all characters + ## in ``s`` are alphabetical and upper case. + ## + ## For either value of ``skipNonAlpha``, returns false if ``s`` is + ## an empty string. + ## Use `Unicode module<unicode.html>`_ for UTF-8 support. + runnableExamples: + doAssert isUpperAscii("1FOO", false) == false + doAssert isUpperAscii("1FOO", true) == true + doAssert isUpperAscii("1Foo", true) == false + isCaseImpl(s, isUpperAscii, skipNonAlpha) + +proc wordWrap*(s: string, maxLineWidth = 80, + splitLongWords = true, + seps: set[char] = Whitespace, + newLine = "\n"): string {. + noSideEffect, rtl, extern: "nsuWordWrap", + deprecated: "use wrapWords in std/wordwrap instead".} = + ## **Deprecated**: use wrapWords in std/wordwrap instead + ## + ## Word wraps `s`. + result = newStringOfCap(s.len + s.len shr 6) + var spaceLeft = maxLineWidth + var lastSep = "" + for word, isSep in tokenize(s, seps): + if isSep: + lastSep = word + spaceLeft = spaceLeft - len(word) + continue + if len(word) > spaceLeft: + if splitLongWords and len(word) > maxLineWidth: + result.add(substr(word, 0, spaceLeft-1)) + var w = spaceLeft + var wordLeft = len(word) - spaceLeft + while wordLeft > 0: + result.add(newLine) + var L = min(maxLineWidth, wordLeft) + spaceLeft = maxLineWidth - L + result.add(substr(word, w, w+L-1)) + inc(w, L) + dec(wordLeft, L) + else: + spaceLeft = maxLineWidth - len(word) + result.add(newLine) + result.add(word) + else: + spaceLeft = spaceLeft - len(word) + result.add(lastSep & word) + lastSep.setLen(0) + + + when isMainModule: proc nonStaticTests = doAssert formatBiggestFloat(1234.567, ffDecimal, -1) == "1234.567000" |