diff options
author | Kaushal Modi <kaushal.modi@gmail.com> | 2018-06-06 17:44:11 -0400 |
---|---|---|
committer | Kaushal Modi <kaushal.modi@gmail.com> | 2018-06-08 15:14:29 -0400 |
commit | 24df909d8a953f2b7ba0e0d1adf3a256042cd9bc (patch) | |
tree | 8ee5c50bf6084fa1c60a3c29f8601946230666d2 /lib/pure | |
parent | 3e799d7876110d970c365d61c05e887729488e2f (diff) | |
download | Nim-24df909d8a953f2b7ba0e0d1adf3a256042cd9bc.tar.gz |
Make isUpper (and variants) work for strings with non-alpha chars
The other variants are isLower, isUpperAscii and isLowerAscii Fixes https://github.com/nim-lang/Nim/issues/7963. This commit changes the behavior and signatures of: - isUpper, isLower in the unicode module - isUpperAscii, isLowerAscii in the strutils module A second mandatory parameter skipNonAlpha is added to these 4 procs. (This change affects only for the case where the input is a *string*.) --- With skipNonAlpha set to true, the behavior mimics the Python isupper and islower behavior i.e. non-alphabetic chars/runes are ignored when checking if the string is upper-case or lower-case. Before this commit: doAssert(not isUpper("A B")) After this commit: doAssert(not isUpper("A B", false)) <-- old behavior doAssert isUpper("A B", true) Below two are equivalent: isUpper("A B", true) isAlpha("A B") and isUpper("A B", false) .. and the similar for other 3 procs.
Diffstat (limited to 'lib/pure')
-rw-r--r-- | lib/pure/strutils.nim | 81 | ||||
-rw-r--r-- | lib/pure/unicode.nim | 114 |
2 files changed, 146 insertions, 49 deletions
diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim index a4fd20fdb..bea0a0243 100644 --- a/lib/pure/strutils.nim +++ b/lib/pure/strutils.nim @@ -150,23 +150,52 @@ proc isSpaceAscii*(s: string): bool {.noSideEffect, procvar, ## characters and there is at least one character in `s`. isImpl isSpaceAscii -proc isLowerAscii*(s: string): bool {.noSideEffect, procvar, - rtl, extern: "nsuIsLowerAsciiStr".} = - ## Checks whether or not `s` contains all lower case characters. +template isCaseImpl(s, charProc, skipNonAlpha) = + var hasAtleastOneAlphaChar = false + if s.len == 0: return false + for c in s: + if skipNonAlpha: + var charIsAlpha = c.isAlphaAscii() + if not hasAtleastOneAlphaChar: + hasAtleastOneAlphaChar = charIsAlpha + if charIsAlpha and (not charProc(c)): + return false + else: + if not charProc(c): + return false + return if skipNonAlpha: hasAtleastOneAlphaChar else: true + +proc isLowerAscii*(s: string, skipNonAlpha: bool): bool = + ## Checks whether ``s`` is lower case. ## ## This checks ASCII characters only. - ## Returns true if all characters in `s` are lower case - ## and there is at least one character in `s`. - isImpl isLowerAscii + ## + ## If ``skipNonAlpha`` is true, returns true if all alphabetical + ## characters in ``s`` are lower case. Returns false if none of the + ## characters in ``s`` are alphabetical. + ## + ## If ``skipNonAlpha`` is false, returns true only if all characters + ## in ``s`` are alphabetical and lower case. + ## + ## For either value of ``skipNonAlpha``, returns false if ``s`` is + ## an empty string. + isCaseImpl(s, isLowerAscii, skipNonAlpha) -proc isUpperAscii*(s: string): bool {.noSideEffect, procvar, - rtl, extern: "nsuIsUpperAsciiStr".} = - ## Checks whether or not `s` contains all upper case characters. +proc isUpperAscii*(s: string, skipNonAlpha: bool): bool = + ## Checks whether ``s`` is upper case. ## ## This checks ASCII characters only. - ## Returns true if all characters in `s` are upper case - ## and there is at least one character in `s`. - isImpl isUpperAscii + ## + ## If ``skipNonAlpha`` is true, returns true if all alphabetical + ## characters in ``s`` are upper case. Returns false if none of the + ## characters in ``s`` are alphabetical. + ## + ## If ``skipNonAlpha`` is false, returns true only if all characters + ## in ``s`` are alphabetical and upper case. + ## + ## For either value of ``skipNonAlpha``, returns false if ``s`` is + ## an empty string. + isCaseImpl(s, isUpperAscii, skipNonAlpha) proc toLowerAscii*(c: char): char {.noSideEffect, procvar, rtl, extern: "nsuToLowerAsciiChar".} = @@ -2516,19 +2545,34 @@ when isMainModule: doAssert(not isLowerAscii('A')) doAssert(not isLowerAscii('5')) doAssert(not isLowerAscii('&')) + doAssert(not isLowerAscii(' ')) - doAssert isLowerAscii("abcd") - doAssert(not isLowerAscii("abCD")) - doAssert(not isLowerAscii("33aa")) + doAssert isLowerAscii("abcd", false) + doAssert(not isLowerAscii("33aa", false)) + doAssert(not isLowerAscii("a b", false)) + + doAssert(not isLowerAscii("abCD", true)) + doAssert isLowerAscii("33aa", true) + doAssert isLowerAscii("a b", true) + doAssert isLowerAscii("1, 2, 3 go!", true) + doAssert(not isLowerAscii(" ", true)) + doAssert(not isLowerAscii("(*&#@(^#$ ", true)) # None of the string chars are alphabets doAssert isUpperAscii('A') doAssert(not isUpperAscii('b')) doAssert(not isUpperAscii('5')) doAssert(not isUpperAscii('%')) - doAssert isUpperAscii("ABC") - doAssert(not isUpperAscii("AAcc")) - doAssert(not isUpperAscii("A#$")) + doAssert isUpperAscii("ABC", false) + doAssert(not isUpperAscii("A#$", false)) + doAssert(not isUpperAscii("A B", false)) + + doAssert(not isUpperAscii("AAcc", true)) + doAssert isUpperAscii("A#$", true) + doAssert isUpperAscii("A B", true) + doAssert isUpperAscii("1, 2, 3 GO!", true) + doAssert(not isUpperAscii(" ", true)) + doAssert(not isUpperAscii("(*&#@(^#$ ", true)) # None of the string chars are alphabets doAssert rsplit("foo bar", seps=Whitespace) == @["foo", "bar"] doAssert rsplit(" foo bar", seps=Whitespace, maxsplit=1) == @[" foo", "bar"] @@ -2601,4 +2645,3 @@ bar nonStaticTests() staticTests() static: staticTests() - diff --git a/lib/pure/unicode.nim b/lib/pure/unicode.nim index bfd01be55..978f569ac 100644 --- a/lib/pure/unicode.nim +++ b/lib/pure/unicode.nim @@ -1392,7 +1392,7 @@ proc isCombining*(c: Rune): bool {.rtl, extern: "nuc$1", procvar.} = (c >= 0xfe20 and c <= 0xfe2f)) template runeCheck(s, runeProc) = - ## Common code for rune.isLower, rune.isUpper, etc + ## Common code for isAlpha and isSpace. result = if len(s) == 0: false else: true var @@ -1403,16 +1403,6 @@ template runeCheck(s, runeProc) = fastRuneAt(s, i, rune, doInc=true) result = runeProc(rune) and result -proc isUpper*(s: string): bool {.noSideEffect, procvar, - rtl, extern: "nuc$1Str".} = - ## Returns true iff `s` contains all upper case unicode characters. - runeCheck(s, isUpper) - -proc isLower*(s: string): bool {.noSideEffect, procvar, - rtl, extern: "nuc$1Str".} = - ## Returns true iff `s` contains all lower case unicode characters. - runeCheck(s, isLower) - proc isAlpha*(s: string): bool {.noSideEffect, procvar, rtl, extern: "nuc$1Str".} = ## Returns true iff `s` contains all alphabetic unicode characters. @@ -1423,6 +1413,56 @@ proc isSpace*(s: string): bool {.noSideEffect, procvar, ## Returns true iff `s` contains all whitespace unicode characters. runeCheck(s, isWhiteSpace) +template runeCaseCheck(s, runeProc, skipNonAlpha) = + ## Common code for rune.isLower and rune.isUpper. + if len(s) == 0: return false + + var + i = 0 + rune: Rune + hasAtleastOneAlphaRune = false + + while i < len(s): + fastRuneAt(s, i, rune, doInc=true) + if skipNonAlpha: + var runeIsAlpha = isAlpha(rune) + if not hasAtleastOneAlphaRune: + hasAtleastOneAlphaRune = runeIsAlpha + if runeIsAlpha and (not runeProc(rune)): + return false + else: + if not runeProc(rune): + return false + return if skipNonAlpha: hasAtleastOneAlphaRune else: true + +proc isLower*(s: string, skipNonAlpha: bool): bool = + ## Checks whether ``s`` is lower case. + ## + ## If ``skipNonAlpha`` is true, returns true if all alphabetical + ## runes in ``s`` are lower case. Returns false if none of the + ## runes in ``s`` are alphabetical. + ## + ## If ``skipNonAlpha`` is false, returns true only if all runes in + ## ``s`` are alphabetical and lower case. + ## + ## For either value of ``skipNonAlpha``, returns false if ``s`` is + ## an empty string. + runeCaseCheck(s, isLower, skipNonAlpha) + +proc isUpper*(s: string, skipNonAlpha: bool): bool = + ## Checks whether ``s`` is upper case. + ## + ## If ``skipNonAlpha`` is true, returns true if all alphabetical + ## runes in ``s`` are upper case. Returns false if none of the + ## runes in ``s`` are alphabetical. + ## + ## If ``skipNonAlpha`` is false, returns true only if all runes in + ## ``s`` are alphabetical and upper case. + ## + ## For either value of ``skipNonAlpha``, returns false if ``s`` is + ## an empty string. + runeCaseCheck(s, isUpper, skipNonAlpha) + template convertRune(s, runeProc) = ## Convert runes in `s` using `runeProc` as the converter. result = newString(len(s)) @@ -1755,25 +1795,39 @@ when isMainModule: doAssert(not isSpace("")) doAssert(not isSpace("ΑΓc \td")) - doAssert isLower("a") - doAssert isLower("γ") - doAssert(not isLower("Γ")) - doAssert(not isLower("4")) - doAssert(not isLower("")) - - doAssert isLower("abcdγ") - doAssert(not isLower("abCDΓ")) - doAssert(not isLower("33aaΓ")) - - doAssert isUpper("Γ") - doAssert(not isUpper("b")) - doAssert(not isUpper("α")) - doAssert(not isUpper("✓")) - doAssert(not isUpper("")) - - doAssert isUpper("ΑΒΓ") - doAssert(not isUpper("AAccβ")) - doAssert(not isUpper("A#$β")) + doAssert(not isLower(' '.Rune)) + + doAssert isLower("a", false) + doAssert isLower("γ", true) + doAssert(not isLower("Γ", false)) + doAssert(not isLower("4", true)) + doAssert(not isLower("", false)) + doAssert isLower("abcdγ", false) + doAssert(not isLower("33aaΓ", false)) + doAssert(not isLower("a b", false)) + + doAssert(not isLower("abCDΓ", true)) + doAssert isLower("a b", true) + doAssert isLower("1, 2, 3 go!", true) + doAssert(not isLower(" ", true)) + doAssert(not isLower("(*&#@(^#$✓ ", true)) # None of the string runes are alphabets + + doAssert(not isUpper(' '.Rune)) + + doAssert isUpper("Γ", false) + doAssert(not isUpper("α", false)) + doAssert(not isUpper("", false)) + doAssert isUpper("ΑΒΓ", false) + doAssert(not isUpper("A#$β", false)) + doAssert(not isUpper("A B", false)) + + doAssert(not isUpper("b", true)) + doAssert(not isUpper("✓", true)) + doAssert(not isUpper("AAccβ", true)) + doAssert isUpper("A B", true) + doAssert isUpper("1, 2, 3 GO!", true) + doAssert(not isUpper(" ", true)) + doAssert(not isUpper("(*&#@(^#$✓ ", true)) # None of the string runes are alphabets doAssert toUpper("Γ") == "Γ" doAssert toUpper("b") == "B" |