diff options
author | Joey Payne <jyapayne@gmail.com> | 2016-06-16 13:40:56 -0600 |
---|---|---|
committer | Joey Payne <jyapayne@gmail.com> | 2016-07-01 07:24:30 -0600 |
commit | 76f81d4aa4fa79de19019b77d0ca972b68e7be6e (patch) | |
tree | 6e3fd15d227f9a85c28ebdbc1e036947659446ad /lib/pure/strutils.nim | |
parent | d8ee2c240920db62537ad0861f336e700f72d162 (diff) | |
download | Nim-76f81d4aa4fa79de19019b77d0ca972b68e7be6e.tar.gz |
Fix #4305: Make split proc for set[char] consistent
Diffstat (limited to 'lib/pure/strutils.nim')
-rw-r--r-- | lib/pure/strutils.nim | 133 |
1 files changed, 63 insertions, 70 deletions
diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim index 7d1b1a3d9..623ab3199 100644 --- a/lib/pure/strutils.nim +++ b/lib/pure/strutils.nim @@ -341,17 +341,49 @@ proc isNilOrWhitespace*(s: string): bool {.noSideEffect, procvar, rtl, extern: " if not c.isSpace(): return false +proc substrEq(s: string, pos: int, substr: string): bool = + var i = 0 + var length = substr.len + while i < length and s[pos+i] == substr[i]: + inc i + + return i == length + +# --------- Private templates for different split separators ----------- + +template stringHasSep(s: string, index: int, seps: set[char]): bool = + s[index] in seps + +template stringHasSep(s: string, index: int, sep: char): bool = + s[index] == sep + +template stringHasSep(s: string, index: int, sep: string): bool = + s.substrEq(index, sep) + +template splitCommon(s, sep, maxsplit, sepLen) = + ## Common code for split procedures + var last = 0 + var splits = maxsplit + + if len(s) > 0: + while last <= len(s): + var first = last + while last < len(s) and not stringHasSep(s, last, sep): + inc(last) + if splits == 0: last = len(s) + yield substr(s, first, last-1) + if splits == 0: break + dec(splits) + inc(last, sepLen) + iterator split*(s: string, seps: set[char] = Whitespace, maxsplit: int = -1): string = ## Splits the string `s` into substrings using a group of separators. ## - ## Substrings are separated by a substring containing only `seps`. Note - ## that whole sequences of characters found in ``seps`` will be counted as - ## a single split point and leading/trailing separators will be ignored. - ## The following example: + ## Substrings are separated by a substring containing only `seps`. ## ## .. code-block:: nim - ## for word in split(" this is an example "): + ## for word in split("this\lis an\texample"): ## writeLine(stdout, word) ## ## ...generates this output: @@ -365,7 +397,7 @@ iterator split*(s: string, seps: set[char] = Whitespace, ## And the following code: ## ## .. code-block:: nim - ## for word in split(";;this;is;an;;example;;;", {';'}): + ## for word in split("this:is;an$example", {';', ':', '$'}): ## writeLine(stdout, word) ## ## ...produces the same output as the first example. The code: @@ -386,26 +418,13 @@ iterator split*(s: string, seps: set[char] = Whitespace, ## "08" ## "08.398990" ## - var last = 0 - var splits = maxsplit - assert(not ('\0' in seps)) - while last < len(s): - while s[last] in seps: inc(last) - var first = last - while last < len(s) and s[last] notin seps: inc(last) # BUGFIX! - if first <= last-1: - if splits == 0: last = len(s) - yield substr(s, first, last-1) - if splits == 0: break - dec(splits) + splitCommon(s, seps, maxsplit, 1) iterator split*(s: string, sep: char, maxsplit: int = -1): string = ## Splits the string `s` into substrings using a single separator. ## ## Substrings are separated by the character `sep`. - ## Unlike the version of the iterator which accepts a set of separator - ## characters, this proc will not coalesce groups of the - ## separator, returning a string for each found character. The code: + ## The code: ## ## .. code-block:: nim ## for word in split(";;this;is;an;;example;;;", ';'): @@ -425,56 +444,27 @@ iterator split*(s: string, sep: char, maxsplit: int = -1): string = ## "" ## "" ## - var last = 0 - var splits = maxsplit - assert('\0' != sep) - if len(s) > 0: - # `<=` is correct here for the edge cases! - while last <= len(s): - var first = last - while last < len(s) and s[last] != sep: inc(last) - if splits == 0: last = len(s) - yield substr(s, first, last-1) - if splits == 0: break - dec(splits) - inc(last) - -proc substrEq(s: string, pos: int, substr: string): bool = - var i = 0 - var length = substr.len - while i < length and s[pos+i] == substr[i]: - inc i - - return i == length + splitCommon(s, sep, maxsplit, 1) iterator split*(s: string, sep: string, maxsplit: int = -1): string = ## Splits the string `s` into substrings using a string separator. ## ## Substrings are separated by the string `sep`. - var last = 0 - var splits = maxsplit - - if len(s) > 0: - while last <= len(s): - var first = last - while last < len(s) and not s.substrEq(last, sep): - inc(last) - if splits == 0: last = len(s) - yield substr(s, first, last-1) - if splits == 0: break - dec(splits) - inc(last, sep.len) - -# --------- Private templates for different rsplit separators ----------- - -template stringHasSep(s: string, index: int, seps: set[char]): bool = - s[index] in seps - -template stringHasSep(s: string, index: int, sep: char): bool = - s[index] == sep + ## The code: + ## + ## .. code-block:: nim + ## for word in split("thisDATAisDATAcorrupted", "DATA"): + ## writeLine(stdout, word) + ## + ## Results in: + ## + ## .. code-block:: + ## "this" + ## "is" + ## "corrupted" + ## -template stringHasSep(s: string, index: int, sep: string): bool = - s.substrEq(index, sep) + splitCommon(s, sep, maxsplit, sep.len) template rsplitCommon(s, sep, maxsplit, sepLen) = ## Common code for rsplit functions @@ -2244,11 +2234,14 @@ bar bar """.unindent() == "foo\nfoo\nbar\n" - let s = " this is an example " - doAssert s.split() == @["this", "is", "an", "example"] - doAssert s.split(maxsplit=4) == @["this", "is", "an", "example"] - doAssert s.split(' ', maxsplit=4) == @["", "this", "", "", "is an example "] - doAssert s.split(" ", maxsplit=4) == @["", "this", "", "", "is an example "] + let s = " this is an example " + let s2 = ":this;is;an:example;;" + + doAssert s.split() == @["", "this", "is", "an", "example", "", ""] + doAssert s2.split(seps={':', ';'}) == @["", "this", "is", "an", "example", "", ""] + doAssert s.split(maxsplit=4) == @["", "this", "is", "an", "example "] + doAssert s.split(' ', maxsplit=1) == @["", "this is an example "] + doAssert s.split(" ", maxsplit=4) == @["", "this", "is", "an", "example "] block: # formatEng tests doAssert formatEng(0, 2, trim=false) == "0.00" |