diff options
author | Andreas Rumpf <rumpf_a@web.de> | 2016-07-02 17:55:57 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2016-07-02 17:55:57 +0200 |
commit | b56e5e159a166e290fe202eae6197494d6f5acd8 (patch) | |
tree | 0c652e9fd419b829c4d56611209b2f4109a78eec /lib | |
parent | 97b59506d6652f56e8680735884aa7b4029f0b4d (diff) | |
parent | 79a8a5ee72b7aca09e1dfdb03744020ebd4dae2b (diff) | |
download | Nim-b56e5e159a166e290fe202eae6197494d6f5acd8.tar.gz |
Merge pull request #4361 from jyapayne/fix_split
Fix #4305: Make split proc for set[char] consistent
Diffstat (limited to 'lib')
-rw-r--r-- | lib/pure/strutils.nim | 158 |
1 files changed, 88 insertions, 70 deletions
diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim index 38807b29a..757268cd1 100644 --- a/lib/pure/strutils.nim +++ b/lib/pure/strutils.nim @@ -26,6 +26,12 @@ include "system/inclrtl" {.pop.} +# Support old split with set[char] +when defined(nimOldSplit): + {.pragma: deprecatedSplit, deprecated.} +else: + {.pragma: deprecatedSplit.} + type CharSet* {.deprecated.} = set[char] # for compatibility with Nim {.deprecated: [TCharSet: CharSet].} @@ -472,17 +478,65 @@ proc isNilOrWhitespace*(s: string): bool {.noSideEffect, procvar, rtl, extern: " if not c.isSpace(): return false +proc substrEq(s: string, pos: int, substr: string): bool = + var i = 0 + var length = substr.len + while i < length and s[pos+i] == substr[i]: + inc i + + return i == length + +# --------- Private templates for different split separators ----------- + +template stringHasSep(s: string, index: int, seps: set[char]): bool = + s[index] in seps + +template stringHasSep(s: string, index: int, sep: char): bool = + s[index] == sep + +template stringHasSep(s: string, index: int, sep: string): bool = + s.substrEq(index, sep) + +template splitCommon(s, sep, maxsplit, sepLen) = + ## Common code for split procedures + var last = 0 + var splits = maxsplit + + if len(s) > 0: + while last <= len(s): + var first = last + while last < len(s) and not stringHasSep(s, last, sep): + inc(last) + if splits == 0: last = len(s) + yield substr(s, first, last-1) + if splits == 0: break + dec(splits) + inc(last, sepLen) + +when defined(nimOldSplit): + template oldSplit(s, seps, maxsplit) = + ## Deprecated split[char] for transition period + var last = 0 + var splits = maxsplit + assert(not ('\0' in seps)) + while last < len(s): + while s[last] in seps: inc(last) + var first = last + while last < len(s) and s[last] notin seps: inc(last) # BUGFIX! + if first <= last-1: + if splits == 0: last = len(s) + yield substr(s, first, last-1) + if splits == 0: break + dec(splits) + iterator split*(s: string, seps: set[char] = Whitespace, maxsplit: int = -1): string = ## Splits the string `s` into substrings using a group of separators. ## - ## Substrings are separated by a substring containing only `seps`. Note - ## that whole sequences of characters found in ``seps`` will be counted as - ## a single split point and leading/trailing separators will be ignored. - ## The following example: + ## Substrings are separated by a substring containing only `seps`. ## ## .. code-block:: nim - ## for word in split(" this is an example "): + ## for word in split("this\lis an\texample"): ## writeLine(stdout, word) ## ## ...generates this output: @@ -496,7 +550,7 @@ iterator split*(s: string, seps: set[char] = Whitespace, ## And the following code: ## ## .. code-block:: nim - ## for word in split(";;this;is;an;;example;;;", {';'}): + ## for word in split("this:is;an$example", {';', ':', '$'}): ## writeLine(stdout, word) ## ## ...produces the same output as the first example. The code: @@ -517,26 +571,16 @@ iterator split*(s: string, seps: set[char] = Whitespace, ## "08" ## "08.398990" ## - var last = 0 - var splits = maxsplit - assert(not ('\0' in seps)) - while last < len(s): - while s[last] in seps: inc(last) - var first = last - while last < len(s) and s[last] notin seps: inc(last) # BUGFIX! - if first <= last-1: - if splits == 0: last = len(s) - yield substr(s, first, last-1) - if splits == 0: break - dec(splits) + when defined(nimOldSplit): + oldSplit(s, seps, maxsplit) + else: + splitCommon(s, seps, maxsplit, 1) iterator split*(s: string, sep: char, maxsplit: int = -1): string = ## Splits the string `s` into substrings using a single separator. ## ## Substrings are separated by the character `sep`. - ## Unlike the version of the iterator which accepts a set of separator - ## characters, this proc will not coalesce groups of the - ## separator, returning a string for each found character. The code: + ## The code: ## ## .. code-block:: nim ## for word in split(";;this;is;an;;example;;;", ';'): @@ -556,56 +600,27 @@ iterator split*(s: string, sep: char, maxsplit: int = -1): string = ## "" ## "" ## - var last = 0 - var splits = maxsplit - assert('\0' != sep) - if len(s) > 0: - # `<=` is correct here for the edge cases! - while last <= len(s): - var first = last - while last < len(s) and s[last] != sep: inc(last) - if splits == 0: last = len(s) - yield substr(s, first, last-1) - if splits == 0: break - dec(splits) - inc(last) - -proc substrEq(s: string, pos: int, substr: string): bool = - var i = 0 - var length = substr.len - while i < length and s[pos+i] == substr[i]: - inc i - - return i == length + splitCommon(s, sep, maxsplit, 1) iterator split*(s: string, sep: string, maxsplit: int = -1): string = ## Splits the string `s` into substrings using a string separator. ## ## Substrings are separated by the string `sep`. - var last = 0 - var splits = maxsplit - - if len(s) > 0: - while last <= len(s): - var first = last - while last < len(s) and not s.substrEq(last, sep): - inc(last) - if splits == 0: last = len(s) - yield substr(s, first, last-1) - if splits == 0: break - dec(splits) - inc(last, sep.len) - -# --------- Private templates for different rsplit separators ----------- - -template stringHasSep(s: string, index: int, seps: set[char]): bool = - s[index] in seps - -template stringHasSep(s: string, index: int, sep: char): bool = - s[index] == sep + ## The code: + ## + ## .. code-block:: nim + ## for word in split("thisDATAisDATAcorrupted", "DATA"): + ## writeLine(stdout, word) + ## + ## Results in: + ## + ## .. code-block:: + ## "this" + ## "is" + ## "corrupted" + ## -template stringHasSep(s: string, index: int, sep: string): bool = - s.substrEq(index, sep) + splitCommon(s, sep, maxsplit, sep.len) template rsplitCommon(s, sep, maxsplit, sepLen) = ## Common code for rsplit functions @@ -2375,11 +2390,14 @@ bar bar """.unindent() == "foo\nfoo\nbar\n" - let s = " this is an example " - doAssert s.split() == @["this", "is", "an", "example"] - doAssert s.split(maxsplit=4) == @["this", "is", "an", "example"] - doAssert s.split(' ', maxsplit=4) == @["", "this", "", "", "is an example "] - doAssert s.split(" ", maxsplit=4) == @["", "this", "", "", "is an example "] + let s = " this is an example " + let s2 = ":this;is;an:example;;" + + doAssert s.split() == @["", "this", "is", "an", "example", "", ""] + doAssert s2.split(seps={':', ';'}) == @["", "this", "is", "an", "example", "", ""] + doAssert s.split(maxsplit=4) == @["", "this", "is", "an", "example "] + doAssert s.split(' ', maxsplit=1) == @["", "this is an example "] + doAssert s.split(" ", maxsplit=4) == @["", "this", "is", "an", "example "] block: # formatEng tests doAssert formatEng(0, 2, trim=false) == "0.00" |