diff options
author | Kaushal Modi <kaushal.modi@gmail.com> | 2018-06-06 17:44:11 -0400 |
---|---|---|
committer | Kaushal Modi <kaushal.modi@gmail.com> | 2018-06-08 15:14:29 -0400 |
commit | 24df909d8a953f2b7ba0e0d1adf3a256042cd9bc (patch) | |
tree | 8ee5c50bf6084fa1c60a3c29f8601946230666d2 /lib/pure/strutils.nim | |
parent | 3e799d7876110d970c365d61c05e887729488e2f (diff) | |
download | Nim-24df909d8a953f2b7ba0e0d1adf3a256042cd9bc.tar.gz |
Make isUpper (and variants) work for strings with non-alpha chars
The other variants are isLower, isUpperAscii and isLowerAscii Fixes https://github.com/nim-lang/Nim/issues/7963. This commit changes the behavior and signatures of: - isUpper, isLower in the unicode module - isUpperAscii, isLowerAscii in the strutils module A second mandatory parameter skipNonAlpha is added to these 4 procs. (This change affects only for the case where the input is a *string*.) --- With skipNonAlpha set to true, the behavior mimics the Python isupper and islower behavior i.e. non-alphabetic chars/runes are ignored when checking if the string is upper-case or lower-case. Before this commit: doAssert(not isUpper("A B")) After this commit: doAssert(not isUpper("A B", false)) <-- old behavior doAssert isUpper("A B", true) Below two are equivalent: isUpper("A B", true) isAlpha("A B") and isUpper("A B", false) .. and the similar for other 3 procs.
Diffstat (limited to 'lib/pure/strutils.nim')
-rw-r--r-- | lib/pure/strutils.nim | 81 |
1 files changed, 62 insertions, 19 deletions
diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim index a4fd20fdb..bea0a0243 100644 --- a/lib/pure/strutils.nim +++ b/lib/pure/strutils.nim @@ -150,23 +150,52 @@ proc isSpaceAscii*(s: string): bool {.noSideEffect, procvar, ## characters and there is at least one character in `s`. isImpl isSpaceAscii -proc isLowerAscii*(s: string): bool {.noSideEffect, procvar, - rtl, extern: "nsuIsLowerAsciiStr".} = - ## Checks whether or not `s` contains all lower case characters. +template isCaseImpl(s, charProc, skipNonAlpha) = + var hasAtleastOneAlphaChar = false + if s.len == 0: return false + for c in s: + if skipNonAlpha: + var charIsAlpha = c.isAlphaAscii() + if not hasAtleastOneAlphaChar: + hasAtleastOneAlphaChar = charIsAlpha + if charIsAlpha and (not charProc(c)): + return false + else: + if not charProc(c): + return false + return if skipNonAlpha: hasAtleastOneAlphaChar else: true + +proc isLowerAscii*(s: string, skipNonAlpha: bool): bool = + ## Checks whether ``s`` is lower case. ## ## This checks ASCII characters only. - ## Returns true if all characters in `s` are lower case - ## and there is at least one character in `s`. - isImpl isLowerAscii + ## + ## If ``skipNonAlpha`` is true, returns true if all alphabetical + ## characters in ``s`` are lower case. Returns false if none of the + ## characters in ``s`` are alphabetical. + ## + ## If ``skipNonAlpha`` is false, returns true only if all characters + ## in ``s`` are alphabetical and lower case. + ## + ## For either value of ``skipNonAlpha``, returns false if ``s`` is + ## an empty string. + isCaseImpl(s, isLowerAscii, skipNonAlpha) -proc isUpperAscii*(s: string): bool {.noSideEffect, procvar, - rtl, extern: "nsuIsUpperAsciiStr".} = - ## Checks whether or not `s` contains all upper case characters. +proc isUpperAscii*(s: string, skipNonAlpha: bool): bool = + ## Checks whether ``s`` is upper case. ## ## This checks ASCII characters only. - ## Returns true if all characters in `s` are upper case - ## and there is at least one character in `s`. - isImpl isUpperAscii + ## + ## If ``skipNonAlpha`` is true, returns true if all alphabetical + ## characters in ``s`` are upper case. Returns false if none of the + ## characters in ``s`` are alphabetical. + ## + ## If ``skipNonAlpha`` is false, returns true only if all characters + ## in ``s`` are alphabetical and upper case. + ## + ## For either value of ``skipNonAlpha``, returns false if ``s`` is + ## an empty string. + isCaseImpl(s, isUpperAscii, skipNonAlpha) proc toLowerAscii*(c: char): char {.noSideEffect, procvar, rtl, extern: "nsuToLowerAsciiChar".} = @@ -2516,19 +2545,34 @@ when isMainModule: doAssert(not isLowerAscii('A')) doAssert(not isLowerAscii('5')) doAssert(not isLowerAscii('&')) + doAssert(not isLowerAscii(' ')) - doAssert isLowerAscii("abcd") - doAssert(not isLowerAscii("abCD")) - doAssert(not isLowerAscii("33aa")) + doAssert isLowerAscii("abcd", false) + doAssert(not isLowerAscii("33aa", false)) + doAssert(not isLowerAscii("a b", false)) + + doAssert(not isLowerAscii("abCD", true)) + doAssert isLowerAscii("33aa", true) + doAssert isLowerAscii("a b", true) + doAssert isLowerAscii("1, 2, 3 go!", true) + doAssert(not isLowerAscii(" ", true)) + doAssert(not isLowerAscii("(*&#@(^#$ ", true)) # None of the string chars are alphabets doAssert isUpperAscii('A') doAssert(not isUpperAscii('b')) doAssert(not isUpperAscii('5')) doAssert(not isUpperAscii('%')) - doAssert isUpperAscii("ABC") - doAssert(not isUpperAscii("AAcc")) - doAssert(not isUpperAscii("A#$")) + doAssert isUpperAscii("ABC", false) + doAssert(not isUpperAscii("A#$", false)) + doAssert(not isUpperAscii("A B", false)) + + doAssert(not isUpperAscii("AAcc", true)) + doAssert isUpperAscii("A#$", true) + doAssert isUpperAscii("A B", true) + doAssert isUpperAscii("1, 2, 3 GO!", true) + doAssert(not isUpperAscii(" ", true)) + doAssert(not isUpperAscii("(*&#@(^#$ ", true)) # None of the string chars are alphabets doAssert rsplit("foo bar", seps=Whitespace) == @["foo", "bar"] doAssert rsplit(" foo bar", seps=Whitespace, maxsplit=1) == @[" foo", "bar"] @@ -2601,4 +2645,3 @@ bar nonStaticTests() staticTests() static: staticTests() - |