summary refs log tree commit diff stats
path: root/lib/pure/strutils.nim
diff options
context:
space:
mode:
authorKaushal Modi <kaushal.modi@gmail.com>2018-06-06 17:44:11 -0400
committerKaushal Modi <kaushal.modi@gmail.com>2018-06-08 15:14:29 -0400
commit24df909d8a953f2b7ba0e0d1adf3a256042cd9bc (patch)
tree8ee5c50bf6084fa1c60a3c29f8601946230666d2 /lib/pure/strutils.nim
parent3e799d7876110d970c365d61c05e887729488e2f (diff)
downloadNim-24df909d8a953f2b7ba0e0d1adf3a256042cd9bc.tar.gz
Make isUpper (and variants) work for strings with non-alpha chars
The other variants are isLower, isUpperAscii and isLowerAscii

Fixes https://github.com/nim-lang/Nim/issues/7963.

This commit changes the behavior and signatures of:

- isUpper, isLower in the unicode module
- isUpperAscii, isLowerAscii in the strutils module

A second mandatory parameter skipNonAlpha is added to these 4 procs.

(This change affects only for the case where the input is a *string*.)

---

With skipNonAlpha set to true, the behavior mimics the Python isupper and
islower behavior i.e. non-alphabetic chars/runes are ignored when checking if
the string is upper-case or lower-case.

    Before this commit:

      doAssert(not isUpper("A B"))

    After this commit:

      doAssert(not isUpper("A B", false))    <-- old behavior
      doAssert isUpper("A B", true)

      Below two are equivalent:

                           isUpper("A B", true)

        isAlpha("A B") and isUpper("A B", false)

.. and the similar for other 3 procs.
Diffstat (limited to 'lib/pure/strutils.nim')
-rw-r--r--lib/pure/strutils.nim81
1 files changed, 62 insertions, 19 deletions
diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim
index a4fd20fdb..bea0a0243 100644
--- a/lib/pure/strutils.nim
+++ b/lib/pure/strutils.nim
@@ -150,23 +150,52 @@ proc isSpaceAscii*(s: string): bool {.noSideEffect, procvar,
   ## characters and there is at least one character in `s`.
   isImpl isSpaceAscii
 
-proc isLowerAscii*(s: string): bool {.noSideEffect, procvar,
-  rtl, extern: "nsuIsLowerAsciiStr".} =
-  ## Checks whether or not `s` contains all lower case characters.
+template isCaseImpl(s, charProc, skipNonAlpha) =
+  var hasAtleastOneAlphaChar = false
+  if s.len == 0: return false
+  for c in s:
+    if skipNonAlpha:
+      var charIsAlpha = c.isAlphaAscii()
+      if not hasAtleastOneAlphaChar:
+        hasAtleastOneAlphaChar = charIsAlpha
+      if charIsAlpha and (not charProc(c)):
+        return false
+    else:
+      if not charProc(c):
+        return false
+  return if skipNonAlpha: hasAtleastOneAlphaChar else: true
+
+proc isLowerAscii*(s: string, skipNonAlpha: bool): bool =
+  ## Checks whether ``s`` is lower case.
   ##
   ## This checks ASCII characters only.
-  ## Returns true if all characters in `s` are lower case
-  ## and there is at least one character  in `s`.
-  isImpl isLowerAscii
+  ##
+  ## If ``skipNonAlpha`` is true, returns true if all alphabetical
+  ## characters in ``s`` are lower case.  Returns false if none of the
+  ## characters in ``s`` are alphabetical.
+  ##
+  ## If ``skipNonAlpha`` is false, returns true only if all characters
+  ## in ``s`` are alphabetical and lower case.
+  ##
+  ## For either value of ``skipNonAlpha``, returns false if ``s`` is
+  ## an empty string.
+  isCaseImpl(s, isLowerAscii, skipNonAlpha)
 
-proc isUpperAscii*(s: string): bool {.noSideEffect, procvar,
-  rtl, extern: "nsuIsUpperAsciiStr".} =
-  ## Checks whether or not `s` contains all upper case characters.
+proc isUpperAscii*(s: string, skipNonAlpha: bool): bool =
+  ## Checks whether ``s`` is upper case.
   ##
   ## This checks ASCII characters only.
-  ## Returns true if all characters in `s` are upper case
-  ## and there is at least one character in `s`.
-  isImpl isUpperAscii
+  ##
+  ## If ``skipNonAlpha`` is true, returns true if all alphabetical
+  ## characters in ``s`` are upper case.  Returns false if none of the
+  ## characters in ``s`` are alphabetical.
+  ##
+  ## If ``skipNonAlpha`` is false, returns true only if all characters
+  ## in ``s`` are alphabetical and upper case.
+  ##
+  ## For either value of ``skipNonAlpha``, returns false if ``s`` is
+  ## an empty string.
+  isCaseImpl(s, isUpperAscii, skipNonAlpha)
 
 proc toLowerAscii*(c: char): char {.noSideEffect, procvar,
   rtl, extern: "nsuToLowerAsciiChar".} =
@@ -2516,19 +2545,34 @@ when isMainModule:
     doAssert(not isLowerAscii('A'))
     doAssert(not isLowerAscii('5'))
     doAssert(not isLowerAscii('&'))
+    doAssert(not isLowerAscii(' '))
 
-    doAssert isLowerAscii("abcd")
-    doAssert(not isLowerAscii("abCD"))
-    doAssert(not isLowerAscii("33aa"))
+    doAssert isLowerAscii("abcd", false)
+    doAssert(not isLowerAscii("33aa", false))
+    doAssert(not isLowerAscii("a b", false))
+
+    doAssert(not isLowerAscii("abCD", true))
+    doAssert isLowerAscii("33aa", true)
+    doAssert isLowerAscii("a b", true)
+    doAssert isLowerAscii("1, 2, 3 go!", true)
+    doAssert(not isLowerAscii(" ", true))
+    doAssert(not isLowerAscii("(*&#@(^#$ ", true)) # None of the string chars are alphabets
 
     doAssert isUpperAscii('A')
     doAssert(not isUpperAscii('b'))
     doAssert(not isUpperAscii('5'))
     doAssert(not isUpperAscii('%'))
 
-    doAssert isUpperAscii("ABC")
-    doAssert(not isUpperAscii("AAcc"))
-    doAssert(not isUpperAscii("A#$"))
+    doAssert isUpperAscii("ABC", false)
+    doAssert(not isUpperAscii("A#$", false))
+    doAssert(not isUpperAscii("A B", false))
+
+    doAssert(not isUpperAscii("AAcc", true))
+    doAssert isUpperAscii("A#$", true)
+    doAssert isUpperAscii("A B", true)
+    doAssert isUpperAscii("1, 2, 3 GO!", true)
+    doAssert(not isUpperAscii(" ", true))
+    doAssert(not isUpperAscii("(*&#@(^#$ ", true)) # None of the string chars are alphabets
 
     doAssert rsplit("foo bar", seps=Whitespace) == @["foo", "bar"]
     doAssert rsplit(" foo bar", seps=Whitespace, maxsplit=1) == @[" foo", "bar"]
@@ -2601,4 +2645,3 @@ bar
   nonStaticTests()
   staticTests()
   static: staticTests()
-