diff options
author | zetashift <rskaraya@gmail.com> | 2021-02-26 15:08:48 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-02-26 15:08:48 +0100 |
commit | c7d6e4c6a6078075433828ccec6a7f4351d6a096 (patch) | |
tree | aa6aef3a95c4c3bfb58841e2d3ead77b9ed8647e | |
parent | 63f1c38f4e0c6f8700a8877d5f3f6f2ac0fc16e6 (diff) | |
download | Nim-c7d6e4c6a6078075433828ccec6a7f4351d6a096.tar.gz |
Fix unicode.split with seperators examples (#17176)
* Fix unicode.split with seperators examples https://nim-lang.org/docs/unicode.html#split.i%2Cstring%2CRune%2Cint didn't work with the set constructor `{}` so replaced it with a string that's converted to an openArray compatible type containing `Rune`s. * Add runnableExamples to unicode.split * Add runnableExamples to split with single separator too * Simplify runnableExamples unicode.split * Improve the rest of the runnableExamples with the simplified code * Simplify runnableExamples of unicode.split even more * Formatted unicode.split example * Update lib/pure/unicode.nim Co-authored-by: zetashift <rishi2@laptop.localdomain> Co-authored-by: Timothee Cour <timothee.cour2@gmail.com>
-rw-r--r-- | lib/pure/unicode.nim | 83 |
1 files changed, 25 insertions, 58 deletions
diff --git a/lib/pure/unicode.nim b/lib/pure/unicode.nim index 8939186a7..903f01fb4 100644 --- a/lib/pure/unicode.nim +++ b/lib/pure/unicode.nim @@ -955,43 +955,25 @@ iterator split*(s: string, seps: openArray[Rune] = unicodeSpaces, ## Splits the unicode string ``s`` into substrings using a group of separators. ## ## Substrings are separated by a substring containing only ``seps``. - ## - ## .. code-block:: nim - ## for word in split("this\lis an\texample"): - ## writeLine(stdout, word) - ## - ## ...generates this output: - ## - ## .. code-block:: - ## "this" - ## "is" - ## "an" - ## "example" - ## - ## And the following code: - ## - ## .. code-block:: nim - ## for word in split("this:is;an$example", {';', ':', '$'}): - ## writeLine(stdout, word) - ## - ## ...produces the same output as the first example. The code: - ## - ## .. code-block:: nim - ## let date = "2012-11-20T22:08:08.398990" - ## let separators = {' ', '-', ':', 'T'} - ## for number in split(date, separators): - ## writeLine(stdout, number) - ## - ## ...results in: - ## - ## .. code-block:: - ## "2012" - ## "11" - ## "20" - ## "22" - ## "08" - ## "08.398990" - ## + runnableExamples: + import std/sequtils + + assert toSeq("hÃllo\lthis\lis an\texample\l是".split) == + @["hÃllo", "this", "is", "an", "example", "是"] + + # And the following code splits the same string using a sequence of Runes. + assert toSeq(split("añyóng:hÃllo;是$example", ";:$".toRunes)) == + @["añyóng", "hÃllo", "是", "example"] + + # example with a `Rune` separator and unused one `;`: + assert toSeq(split("ab是de:f:", ";:是".toRunes)) == @["ab", "de", "f", ""] + + # Another example that splits a string containing a date. + let date = "2012-11-20T22:08:08.398990" + + assert toSeq(split(date, " -:T".toRunes)) == + @["2012", "11", "20", "22", "08", "08.398990"] + splitCommon(s, seps, maxsplit) iterator splitWhitespace*(s: string): string = @@ -1010,28 +992,13 @@ proc splitWhitespace*(s: string): seq[string] {.noSideEffect, iterator split*(s: string, sep: Rune, maxsplit: int = -1): string = ## Splits the unicode string ``s`` into substrings using a single separator. - ## ## Substrings are separated by the rune ``sep``. - ## The code: - ## - ## .. code-block:: nim - ## for word in split(";;this;is;an;;example;;;", ';'): - ## writeLine(stdout, word) - ## - ## Results in: - ## - ## .. code-block:: - ## "" - ## "" - ## "this" - ## "is" - ## "an" - ## "" - ## "example" - ## "" - ## "" - ## "" - ## + runnableExamples: + import std/sequtils + + assert toSeq(split(";;hÃllo;this;is;an;;example;;;是", ";".runeAt(0))) == + @["", "", "hÃllo", "this", "is", "an", "", "example", "", "", "是"] + splitCommon(s, sep, maxsplit) proc split*(s: string, seps: openArray[Rune] = unicodeSpaces, maxsplit: int = -1): |