diff options
-rw-r--r-- | changelog.md | 6 | ||||
-rw-r--r-- | lib/pure/strutils.nim | 81 | ||||
-rw-r--r-- | lib/pure/unicode.nim | 114 |
3 files changed, 152 insertions, 49 deletions
diff --git a/changelog.md b/changelog.md index aae275c1c..cb2a4b91d 100644 --- a/changelog.md +++ b/changelog.md @@ -42,6 +42,12 @@ - ``math.`mod` `` for floats now behaves the same as ``mod`` for integers (previously it used floor division like Python). Use ``math.floorMod`` for the old behavior. +- For string inputs, ``unicode.isUpper`` and ``unicode.isLower`` now require a + second mandatory parameter ``skipNonAlpha``. + +- For string inputs, ``strutils.isUpperAscii`` and ``strutils.isLowerAscii`` now + require a second mandatory parameter ``skipNonAlpha``. + #### Breaking changes in the compiler - The undocumented ``#? braces`` parsing mode was removed. diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim index a4fd20fdb..bea0a0243 100644 --- a/lib/pure/strutils.nim +++ b/lib/pure/strutils.nim @@ -150,23 +150,52 @@ proc isSpaceAscii*(s: string): bool {.noSideEffect, procvar, ## characters and there is at least one character in `s`. isImpl isSpaceAscii -proc isLowerAscii*(s: string): bool {.noSideEffect, procvar, - rtl, extern: "nsuIsLowerAsciiStr".} = - ## Checks whether or not `s` contains all lower case characters. +template isCaseImpl(s, charProc, skipNonAlpha) = + var hasAtleastOneAlphaChar = false + if s.len == 0: return false + for c in s: + if skipNonAlpha: + var charIsAlpha = c.isAlphaAscii() + if not hasAtleastOneAlphaChar: + hasAtleastOneAlphaChar = charIsAlpha + if charIsAlpha and (not charProc(c)): + return false + else: + if not charProc(c): + return false + return if skipNonAlpha: hasAtleastOneAlphaChar else: true + +proc isLowerAscii*(s: string, skipNonAlpha: bool): bool = + ## Checks whether ``s`` is lower case. ## ## This checks ASCII characters only. - ## Returns true if all characters in `s` are lower case - ## and there is at least one character in `s`. - isImpl isLowerAscii + ## + ## If ``skipNonAlpha`` is true, returns true if all alphabetical + ## characters in ``s`` are lower case. Returns false if none of the + ## characters in ``s`` are alphabetical. + ## + ## If ``skipNonAlpha`` is false, returns true only if all characters + ## in ``s`` are alphabetical and lower case. + ## + ## For either value of ``skipNonAlpha``, returns false if ``s`` is + ## an empty string. + isCaseImpl(s, isLowerAscii, skipNonAlpha) -proc isUpperAscii*(s: string): bool {.noSideEffect, procvar, - rtl, extern: "nsuIsUpperAsciiStr".} = - ## Checks whether or not `s` contains all upper case characters. +proc isUpperAscii*(s: string, skipNonAlpha: bool): bool = + ## Checks whether ``s`` is upper case. ## ## This checks ASCII characters only. - ## Returns true if all characters in `s` are upper case - ## and there is at least one character in `s`. - isImpl isUpperAscii + ## + ## If ``skipNonAlpha`` is true, returns true if all alphabetical + ## characters in ``s`` are upper case. Returns false if none of the + ## characters in ``s`` are alphabetical. + ## + ## If ``skipNonAlpha`` is false, returns true only if all characters + ## in ``s`` are alphabetical and upper case. + ## + ## For either value of ``skipNonAlpha``, returns false if ``s`` is + ## an empty string. + isCaseImpl(s, isUpperAscii, skipNonAlpha) proc toLowerAscii*(c: char): char {.noSideEffect, procvar, rtl, extern: "nsuToLowerAsciiChar".} = @@ -2516,19 +2545,34 @@ when isMainModule: doAssert(not isLowerAscii('A')) doAssert(not isLowerAscii('5')) doAssert(not isLowerAscii('&')) + doAssert(not isLowerAscii(' ')) - doAssert isLowerAscii("abcd") - doAssert(not isLowerAscii("abCD")) - doAssert(not isLowerAscii("33aa")) + doAssert isLowerAscii("abcd", false) + doAssert(not isLowerAscii("33aa", false)) + doAssert(not isLowerAscii("a b", false)) + + doAssert(not isLowerAscii("abCD", true)) + doAssert isLowerAscii("33aa", true) + doAssert isLowerAscii("a b", true) + doAssert isLowerAscii("1, 2, 3 go!", true) + doAssert(not isLowerAscii(" ", true)) + doAssert(not isLowerAscii("(*&#@(^#$ ", true)) # None of the string chars are alphabets doAssert isUpperAscii('A') doAssert(not isUpperAscii('b')) doAssert(not isUpperAscii('5')) doAssert(not isUpperAscii('%')) - doAssert isUpperAscii("ABC") - doAssert(not isUpperAscii("AAcc")) - doAssert(not isUpperAscii("A#$")) + doAssert isUpperAscii("ABC", false) + doAssert(not isUpperAscii("A#$", false)) + doAssert(not isUpperAscii("A B", false)) + + doAssert(not isUpperAscii("AAcc", true)) + doAssert isUpperAscii("A#$", true) + doAssert isUpperAscii("A B", true) + doAssert isUpperAscii("1, 2, 3 GO!", true) + doAssert(not isUpperAscii(" ", true)) + doAssert(not isUpperAscii("(*&#@(^#$ ", true)) # None of the string chars are alphabets doAssert rsplit("foo bar", seps=Whitespace) == @["foo", "bar"] doAssert rsplit(" foo bar", seps=Whitespace, maxsplit=1) == @[" foo", "bar"] @@ -2601,4 +2645,3 @@ bar nonStaticTests() staticTests() static: staticTests() - diff --git a/lib/pure/unicode.nim b/lib/pure/unicode.nim index bfd01be55..978f569ac 100644 --- a/lib/pure/unicode.nim +++ b/lib/pure/unicode.nim @@ -1392,7 +1392,7 @@ proc isCombining*(c: Rune): bool {.rtl, extern: "nuc$1", procvar.} = (c >= 0xfe20 and c <= 0xfe2f)) template runeCheck(s, runeProc) = - ## Common code for rune.isLower, rune.isUpper, etc + ## Common code for isAlpha and isSpace. result = if len(s) == 0: false else: true var @@ -1403,16 +1403,6 @@ template runeCheck(s, runeProc) = fastRuneAt(s, i, rune, doInc=true) result = runeProc(rune) and result -proc isUpper*(s: string): bool {.noSideEffect, procvar, - rtl, extern: "nuc$1Str".} = - ## Returns true iff `s` contains all upper case unicode characters. - runeCheck(s, isUpper) - -proc isLower*(s: string): bool {.noSideEffect, procvar, - rtl, extern: "nuc$1Str".} = - ## Returns true iff `s` contains all lower case unicode characters. - runeCheck(s, isLower) - proc isAlpha*(s: string): bool {.noSideEffect, procvar, rtl, extern: "nuc$1Str".} = ## Returns true iff `s` contains all alphabetic unicode characters. @@ -1423,6 +1413,56 @@ proc isSpace*(s: string): bool {.noSideEffect, procvar, ## Returns true iff `s` contains all whitespace unicode characters. runeCheck(s, isWhiteSpace) +template runeCaseCheck(s, runeProc, skipNonAlpha) = + ## Common code for rune.isLower and rune.isUpper. + if len(s) == 0: return false + + var + i = 0 + rune: Rune + hasAtleastOneAlphaRune = false + + while i < len(s): + fastRuneAt(s, i, rune, doInc=true) + if skipNonAlpha: + var runeIsAlpha = isAlpha(rune) + if not hasAtleastOneAlphaRune: + hasAtleastOneAlphaRune = runeIsAlpha + if runeIsAlpha and (not runeProc(rune)): + return false + else: + if not runeProc(rune): + return false + return if skipNonAlpha: hasAtleastOneAlphaRune else: true + +proc isLower*(s: string, skipNonAlpha: bool): bool = + ## Checks whether ``s`` is lower case. + ## + ## If ``skipNonAlpha`` is true, returns true if all alphabetical + ## runes in ``s`` are lower case. Returns false if none of the + ## runes in ``s`` are alphabetical. + ## + ## If ``skipNonAlpha`` is false, returns true only if all runes in + ## ``s`` are alphabetical and lower case. + ## + ## For either value of ``skipNonAlpha``, returns false if ``s`` is + ## an empty string. + runeCaseCheck(s, isLower, skipNonAlpha) + +proc isUpper*(s: string, skipNonAlpha: bool): bool = + ## Checks whether ``s`` is upper case. + ## + ## If ``skipNonAlpha`` is true, returns true if all alphabetical + ## runes in ``s`` are upper case. Returns false if none of the + ## runes in ``s`` are alphabetical. + ## + ## If ``skipNonAlpha`` is false, returns true only if all runes in + ## ``s`` are alphabetical and upper case. + ## + ## For either value of ``skipNonAlpha``, returns false if ``s`` is + ## an empty string. + runeCaseCheck(s, isUpper, skipNonAlpha) + template convertRune(s, runeProc) = ## Convert runes in `s` using `runeProc` as the converter. result = newString(len(s)) @@ -1755,25 +1795,39 @@ when isMainModule: doAssert(not isSpace("")) doAssert(not isSpace("ΑΓc \td")) - doAssert isLower("a") - doAssert isLower("γ") - doAssert(not isLower("Γ")) - doAssert(not isLower("4")) - doAssert(not isLower("")) - - doAssert isLower("abcdγ") - doAssert(not isLower("abCDΓ")) - doAssert(not isLower("33aaΓ")) - - doAssert isUpper("Γ") - doAssert(not isUpper("b")) - doAssert(not isUpper("α")) - doAssert(not isUpper("✓")) - doAssert(not isUpper("")) - - doAssert isUpper("ΑΒΓ") - doAssert(not isUpper("AAccβ")) - doAssert(not isUpper("A#$β")) + doAssert(not isLower(' '.Rune)) + + doAssert isLower("a", false) + doAssert isLower("γ", true) + doAssert(not isLower("Γ", false)) + doAssert(not isLower("4", true)) + doAssert(not isLower("", false)) + doAssert isLower("abcdγ", false) + doAssert(not isLower("33aaΓ", false)) + doAssert(not isLower("a b", false)) + + doAssert(not isLower("abCDΓ", true)) + doAssert isLower("a b", true) + doAssert isLower("1, 2, 3 go!", true) + doAssert(not isLower(" ", true)) + doAssert(not isLower("(*&#@(^#$✓ ", true)) # None of the string runes are alphabets + + doAssert(not isUpper(' '.Rune)) + + doAssert isUpper("Γ", false) + doAssert(not isUpper("α", false)) + doAssert(not isUpper("", false)) + doAssert isUpper("ΑΒΓ", false) + doAssert(not isUpper("A#$β", false)) + doAssert(not isUpper("A B", false)) + + doAssert(not isUpper("b", true)) + doAssert(not isUpper("✓", true)) + doAssert(not isUpper("AAccβ", true)) + doAssert isUpper("A B", true) + doAssert isUpper("1, 2, 3 GO!", true) + doAssert(not isUpper(" ", true)) + doAssert(not isUpper("(*&#@(^#$✓ ", true)) # None of the string runes are alphabets doAssert toUpper("Γ") == "Γ" doAssert toUpper("b") == "B" |