Merge pull request #4276 from jyapayne/devel

Add extra string functions to strutils to satisfy part of #4218 and #4251
author: Andreas Rumpf <rumpf_a@web.de> 2016-06-14 09:38:45 +0200
committer: GitHub <noreply@github.com> 2016-06-14 09:38:45 +0200
commit: c9c4b6c41d9465599f4a627c09fe626cab727270 (patch)
tree: ce1068c16a5cd5a0824b24ea7392eb62c55c7392 /lib
parent: 93424420d48929965929901c2d5698de189c1a34 (diff)
parent: 84d9081e21783151b40c0320ca50df4845043559 (diff)
download: Nim-c9c4b6c41d9465599f4a627c09fe626cab727270.tar.gz
3 files changed, 526 insertions, 34 deletions
diff --git a/lib/pure/strmisc.nim b/lib/pure/strmisc.nim
new file mode 100644
index 000000000..359014d8c
--- /dev/null
+++ b/lib/pure/strmisc.nim
@@ -0,0 +1,56 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2016 Joey Payne
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## This module contains various string utility routines that are uncommonly
+## used in comparison to `strutils <strutils.html>`_.
+
+import strutils
+
+{.deadCodeElim: on.}
+
+proc partition*(s: string, sep: string,
+                right: bool = false): (string, string, string)
+                {.noSideEffect, procvar.} =
+  ## Split the string at the first or last occurrence of `sep` into a 3-tuple
+  ##
+  ## Returns a 3 string tuple of (beforeSep, `sep`, afterSep) or
+  ## (`s`, "", "") if `sep` is not found and `right` is false or
+  ## ("", "", `s`) if `sep` is not found and `right` is true
+
+  let position = if right: s.rfind(sep) else: s.find(sep)
+
+  if position != -1:
+    let
+      beforeSep = s[0 ..< position]
+      afterSep = s[position + sep.len ..< s.len]
+
+    return (beforeSep, sep, afterSep)
+
+  return if right: ("", "", s) else: (s, "", "")
+
+proc rpartition*(s: string, sep: string): (string, string, string)
+                {.noSideEffect, procvar.} =
+  ## Split the string at the last occurrence of `sep` into a 3-tuple
+  ##
+  ## Returns a 3 string tuple of (beforeSep, `sep`, afterSep) or
+  ## ("", "", `s`) if `sep` is not found
+  return partition(s, sep, right = true)
+
+when isMainModule:
+  doAssert partition("foo:bar", ":") == ("foo", ":", "bar")
+  doAssert partition("foobarbar", "bar") == ("foo", "bar", "bar")
+  doAssert partition("foobarbar", "bank") == ("foobarbar", "", "")
+  doAssert partition("foobarbar", "foo") == ("", "foo", "barbar")
+  doAssert partition("foofoobar", "bar") == ("foofoo", "bar", "")
+
+  doAssert rpartition("foo:bar", ":") == ("foo", ":", "bar")
+  doAssert rpartition("foobarbar", "bar") == ("foobar", "bar", "")
+  doAssert rpartition("foobarbar", "bank") == ("", "", "foobarbar")
+  doAssert rpartition("foobarbar", "foo") == ("", "foo", "barbar")
+  doAssert rpartition("foofoobar", "bar") == ("foofoo", "bar", "")
diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim
index 86af6ac88..708f9ed4b 100644
--- a/lib/pure/strutils.nim
+++ b/lib/pure/strutils.nim
@@ -15,6 +15,7 @@
 
 import parseutils
 from math import pow, round, floor, log10
+from algorithm import reverse
 
 {.deadCodeElim: on.}
 
@@ -325,7 +326,8 @@ proc toOctal*(c: char): string {.noSideEffect, rtl, extern: "nsuToOctal".} =
     result[i] = chr(val mod 8 + ord('0'))
     val = val div 8
 
-iterator split*(s: string, seps: set[char] = Whitespace, maxsplit: int = -1): string =
+iterator split*(s: string, seps: set[char] = Whitespace,
+                maxsplit: int = -1): string =
   ## Splits the string `s` into substrings using a group of separators.
   ##
   ## Substrings are separated by a substring containing only `seps`. Note
@@ -422,10 +424,13 @@ iterator split*(s: string, sep: char, maxsplit: int = -1): string =
       dec(splits)
       inc(last)
 
-proc substrEq(s: string, a, L: int, x: string): bool =
+proc substrEq(s: string, pos: int, substr: string): bool =
   var i = 0
-  while i < L and s[a+i] == x[i]: inc i
-  result = i == L
+  var length = substr.len
+  while i < length and s[pos+i] == substr[i]:
+    inc i
+
+  return i == length
 
 iterator split*(s: string, sep: string, maxsplit: int = -1): string =
   ## Splits the string `s` into substrings using a string separator.
@@ -433,10 +438,11 @@ iterator split*(s: string, sep: string, maxsplit: int = -1): string =
   ## Substrings are separated by the string `sep`.
   var last = 0
   var splits = maxsplit
+
   if len(s) > 0:
     while last <= len(s):
       var first = last
-      while last < len(s) and not s.substrEq(last, sep.len, sep):
+      while last < len(s) and not s.substrEq(last, sep):
         inc(last)
       if splits == 0: last = len(s)
       yield substr(s, first, last-1)
@@ -444,6 +450,108 @@ iterator split*(s: string, sep: string, maxsplit: int = -1): string =
       dec(splits)
       inc(last, sep.len)
 
+# --------- Private templates for different rsplit separators -----------
+
+template stringHasSep(s: string, index: int, seps: set[char]): bool =
+  s[index] in seps
+
+template stringHasSep(s: string, index: int, sep: char): bool =
+  s[index] == sep
+
+template stringHasSep(s: string, index: int, sep: string): bool =
+  s.substrEq(index, sep)
+
+template rsplitCommon(s, sep, maxsplit, sepLen) =
+  ## Common code for rsplit functions
+  var
+    last = s.len - 1
+    first = last
+    splits = maxsplit
+    startPos = 0
+
+  if len(s) > 0:
+    # go to -1 in order to get separators at the beginning
+    while first >= -1:
+      while first >= 0 and not stringHasSep(s, first, sep):
+        dec(first)
+
+      if splits == 0:
+        # No more splits means set first to the beginning
+        first = -1
+
+      if first == -1:
+        startPos = 0
+      else:
+        startPos = first + sepLen
+
+      yield substr(s, startPos, last)
+
+      if splits == 0:
+        break
+
+      dec(splits)
+      dec(first)
+
+      last = first
+
+iterator rsplit*(s: string, seps: set[char] = Whitespace,
+                 maxsplit: int = -1): string =
+  ## Splits the string `s` into substrings from the right using a
+  ## string separator. Works exactly the same as `split iterator
+  ## <#split.i,string,char>`_ except in reverse order.
+  ##
+  ## .. code-block:: nim
+  ##   for piece in "foo bar".rsplit(WhiteSpace):
+  ##     echo piece
+  ##
+  ## Results in:
+  ##
+  ## .. code-block:: nim
+  ##   "bar"
+  ##   "foo"
+  ##
+  ## Substrings are separated from the right by the set of chars `seps`
+
+  rsplitCommon(s, seps, maxsplit, 1)
+
+iterator rsplit*(s: string, sep: char,
+                 maxsplit: int = -1): string =
+  ## Splits the string `s` into substrings from the right using a
+  ## string separator. Works exactly the same as `split iterator
+  ## <#split.i,string,char>`_ except in reverse order.
+  ##
+  ## .. code-block:: nim
+  ##   for piece in "foo:bar".rsplit(':'):
+  ##     echo piece
+  ##
+  ## Results in:
+  ##
+  ## .. code-block:: nim
+  ##   "bar"
+  ##   "foo"
+  ##
+  ## Substrings are separated from the right by the char `sep`
+  rsplitCommon(s, sep, maxsplit, 1)
+
+iterator rsplit*(s: string, sep: string, maxsplit: int = -1,
+                 keepSeparators: bool = false): string =
+  ## Splits the string `s` into substrings from the right using a
+  ## string separator. Works exactly the same as `split iterator
+  ## <#split.i,string,string>`_ except in reverse order.
+  ##
+  ## .. code-block:: nim
+  ##   for piece in "foothebar".rsplit("the"):
+  ##     echo piece
+  ##
+  ## Results in:
+  ##
+  ## .. code-block:: nim
+  ##   "bar"
+  ##   "foo"
+  ##
+  ## Substrings are separated from the right by the string `sep`
+  rsplitCommon(s, sep, maxsplit, sep.len)
+
 iterator splitLines*(s: string): string =
   ## Splits the string `s` into its containing lines.
   ##
@@ -531,6 +639,73 @@ proc split*(s: string, sep: string, maxsplit: int = -1): seq[string] {.noSideEff
   ## `split iterator <#split.i,string,string>`_.
   accumulateResult(split(s, sep, maxsplit))
 
+proc rsplit*(s: string, seps: set[char] = Whitespace,
+             maxsplit: int = -1): seq[string]
+             {.noSideEffect, rtl, extern: "nsuRSplitCharSet".} =
+  ## The same as the `rsplit iterator <#rsplit.i,string,set[char]>`_, but is a
+  ## proc that returns a sequence of substrings.
+  ##
+  ## A possible common use case for `rsplit` is path manipulation,
+  ## particularly on systems that don't use a common delimiter.
+  ##
+  ## For example, if a system had `#` as a delimiter, you could
+  ## do the following to get the tail of the path:
+  ##
+  ## .. code-block:: nim
+  ##   var tailSplit = rsplit("Root#Object#Method#Index", {'#'}, maxsplit=1)
+  ##
+  ## Results in `tailSplit` containing:
+  ##
+  ## .. code-block:: nim
+  ##   @["Root#Object#Method", "Index"]
+  ##
+  accumulateResult(rsplit(s, seps, maxsplit))
+  result.reverse()
+
+proc rsplit*(s: string, sep: char, maxsplit: int = -1): seq[string]
+             {.noSideEffect, rtl, extern: "nsuRSplitChar".} =
+  ## The same as the `split iterator <#rsplit.i,string,char>`_, but is a proc
+  ## that returns a sequence of substrings.
+  ##
+  ## A possible common use case for `rsplit` is path manipulation,
+  ## particularly on systems that don't use a common delimiter.
+  ##
+  ## For example, if a system had `#` as a delimiter, you could
+  ## do the following to get the tail of the path:
+  ##
+  ## .. code-block:: nim
+  ##   var tailSplit = rsplit("Root#Object#Method#Index", '#', maxsplit=1)
+  ##
+  ## Results in `tailSplit` containing:
+  ##
+  ## .. code-block:: nim
+  ##   @["Root#Object#Method", "Index"]
+  ##
+  accumulateResult(rsplit(s, sep, maxsplit))
+  result.reverse()
+
+proc rsplit*(s: string, sep: string, maxsplit: int = -1): seq[string]
+             {.noSideEffect, rtl, extern: "nsuRSplitString".} =
+  ## The same as the `split iterator <#rsplit.i,string,string>`_, but is a proc
+  ## that returns a sequence of substrings.
+  ##
+  ## A possible common use case for `rsplit` is path manipulation,
+  ## particularly on systems that don't use a common delimiter.
+  ##
+  ## For example, if a system had `#` as a delimiter, you could
+  ## do the following to get the tail of the path:
+  ##
+  ## .. code-block:: nim
+  ##   var tailSplit = rsplit("Root#Object#Method#Index", "#", maxsplit=1)
+  ##
+  ## Results in `tailSplit` containing:
+  ##
+  ## .. code-block:: nim
+  ##   @["Root#Object#Method", "Index"]
+  ##
+  accumulateResult(rsplit(s, sep, maxsplit))
+  result.reverse()
+
 proc toHex*(x: BiggestInt, len: Positive): string {.noSideEffect,
   rtl, extern: "nsuToHex".} =
   ## Converts `x` to its hexadecimal representation.
@@ -1035,6 +1210,34 @@ proc rfind*(s: string, sub: char, start: int = -1): int {.noSideEffect,
     if sub == s[i]: return i
   return -1
 
+proc center*(s: string, width: int, fillChar: char = ' '): string {.
+  noSideEffect, rtl, extern: "nsuCenterString".} =
+  ## Return the contents of `s` centered in a string `width` long using
+  ## `fillChar` as padding.
+  ##
+  ## The original string is returned if `width` is less than or equal
+  ## to `s.len`.
+  if width <= s.len:
+    return s
+
+  result = newString(width)
+
+  # Left padding will be one fillChar
+  # smaller if there are an odd number
+  # of characters
+  let
+    charsLeft = (width - s.len)
+    leftPadding = charsLeft div 2
+
+  for i in 0 ..< width:
+    if i >= leftPadding and i < leftPadding + s.len:
+      # we are where the string should be located
+      result[i] = s[i-leftPadding]
+    else:
+      # we are either before or after where
+      # the string s should go
+      result[i] = fillChar
+
 proc count*(s: string, sub: string, overlapping: bool = false): int {.
   noSideEffect, rtl, extern: "nsuCountString".} =
   ## Count the occurrences of a substring `sub` in the string `s`.
@@ -1116,6 +1319,38 @@ proc replace*(s: string, sub, by: char): string {.noSideEffect,
     else: result[i] = s[i]
     inc(i)
 
+proc expandTabs*(s: string, tabSize: int = 8): string {.noSideEffect,
+  procvar, rtl, extern: "nsuExpandTabsStr".} =
+  ## Expand tab characters in `s` by `tabSize` spaces
+
+  if len(s) == 0:
+    return s
+
+  result = newStringOfCap(s.len + s.len shr 2)
+
+  var pos = 0
+
+  template addSpaces(n) =
+    for j in 0 ..< n:
+      result.add(' ')
+      pos += 1
+
+  for i in 0 ..< len(s):
+    let c = s[i]
+
+    if c == '\t':
+      let
+        denominator = if tabSize > 0: tabSize else: 1
+        numSpaces = tabSize - pos mod denominator
+
+      addSpaces(numSpaces)
+    else:
+      result.add(c)
+      pos += 1
+
+    if c == '\l':
+      pos = 0
+
 proc replaceWord*(s, sub: string, by = ""): string {.noSideEffect,
   rtl, extern: "nsuReplaceWord".} =
   ## Replaces `sub` in `s` by the string `by`.
@@ -1899,6 +2134,11 @@ when isMainModule:
 
   doAssert parseEnum("invalid enum value", enC) == enC
 
+  doAssert center("foo", 13) == "     foo     "
+  doAssert center("foo", 0) == "foo"
+  doAssert center("foo", 3, fillChar = 'a') == "foo"
+  doAssert center("foo", 10, fillChar = '\t') == "\t\t\tfoo\t\t\t\t"
+
   doAssert count("foofoofoo", "foofoo") == 1
   doAssert count("foofoofoo", "foofoo", overlapping = true) == 2
   doAssert count("foofoofoo", 'f') == 3
@@ -1967,6 +2207,22 @@ when isMainModule:
   doAssert(not isUpper("AAcc"))
   doAssert(not isUpper("A#$"))
 
+  doAssert expandTabs("\t", 4) == "    "
+  doAssert expandTabs("\tfoo\t", 4) == "    foo "
+  doAssert expandTabs("\tfoo\tbar", 4) == "    foo bar"
+  doAssert expandTabs("\tfoo\tbar\t", 4) == "    foo bar "
+  doAssert expandTabs("", 4) == ""
+  doAssert expandTabs("", 0) == ""
+  doAssert expandTabs("\t\t\t", 0) == ""
+
+  doAssert rsplit("foo bar", seps=Whitespace) == @["foo", "bar"]
+  doAssert rsplit(" foo bar", seps=Whitespace, maxsplit=1) == @[" foo", "bar"]
+  doAssert rsplit(" foo bar ", seps=Whitespace, maxsplit=1) == @[" foo bar", ""]
+  doAssert rsplit(":foo:bar", sep=':') == @["", "foo", "bar"]
+  doAssert rsplit(":foo:bar", sep=':', maxsplit=2) == @["", "foo", "bar"]
+  doAssert rsplit(":foo:bar", sep=':', maxsplit=3) == @["", "foo", "bar"]
+  doAssert rsplit("foothebar", sep="the") == @["foo", "bar"]
+
   doAssert(unescape(r"\x013", "", "") == "\x013")
 
   doAssert join(["foo", "bar", "baz"]) == "foobarbaz"
diff --git a/lib/pure/unicode.nim b/lib/pure/unicode.nim
index 5d302c9dc..ac25dccef 100644
--- a/lib/pure/unicode.nim
+++ b/lib/pure/unicode.nim
@@ -135,45 +135,62 @@ proc runeAt*(s: string, i: Natural): Rune =
   ## Returns the unicode character in ``s`` at byte index ``i``
   fastRuneAt(s, i, result, false)
 
-proc toUTF8*(c: Rune): string {.rtl, extern: "nuc$1".} =
-  ## Converts a rune into its UTF-8 representation
+template fastToUTF8Copy*(c: Rune, s: var string, pos: int, doInc = true) =
+  ## Copies UTF-8 representation of `c` into the preallocated string `s`
+  ## starting at position `pos`. If `doInc == true`, `pos` is incremented
+  ## by the number of bytes that have been processed.
+  ##
+  ## To be the most efficient, make sure `s` is preallocated
+  ## with an additional amount equal to the byte length of
+  ## `c`.
   var i = RuneImpl(c)
   if i <=% 127:
-    result = newString(1)
-    result[0] = chr(i)
+    s.setLen(pos+1)
+    s[pos+0] = chr(i)
+    when doInc: inc(pos)
   elif i <=% 0x07FF:
-    result = newString(2)
-    result[0] = chr((i shr 6) or 0b110_00000)
-    result[1] = chr((i and ones(6)) or 0b10_0000_00)
+    s.setLen(pos+2)
+    s[pos+0] = chr((i shr 6) or 0b110_00000)
+    s[pos+1] = chr((i and ones(6)) or 0b10_0000_00)
+    when doInc: inc(pos, 2)
   elif i <=% 0xFFFF:
-    result = newString(3)
-    result[0] = chr(i shr 12 or 0b1110_0000)
-    result[1] = chr(i shr 6 and ones(6) or 0b10_0000_00)
-    result[2] = chr(i and ones(6) or 0b10_0000_00)
+    s.setLen(pos+3)
+    s[pos+0] = chr(i shr 12 or 0b1110_0000)
+    s[pos+1] = chr(i shr 6 and ones(6) or 0b10_0000_00)
+    s[pos+2] = chr(i and ones(6) or 0b10_0000_00)
+    when doInc: inc(pos, 3)
   elif i <=% 0x001FFFFF:
-    result = newString(4)
-    result[0] = chr(i shr 18 or 0b1111_0000)
-    result[1] = chr(i shr 12 and ones(6) or 0b10_0000_00)
-    result[2] = chr(i shr 6 and ones(6) or 0b10_0000_00)
-    result[3] = chr(i and ones(6) or 0b10_0000_00)
+    s.setLen(pos+4)
+    s[pos+0] = chr(i shr 18 or 0b1111_0000)
+    s[pos+1] = chr(i shr 12 and ones(6) or 0b10_0000_00)
+    s[pos+2] = chr(i shr 6 and ones(6) or 0b10_0000_00)
+    s[pos+3] = chr(i and ones(6) or 0b10_0000_00)
+    when doInc: inc(pos, 4)
   elif i <=% 0x03FFFFFF:
-    result = newString(5)
-    result[0] = chr(i shr 24 or 0b111110_00)
-    result[1] = chr(i shr 18 and ones(6) or 0b10_0000_00)
-    result[2] = chr(i shr 12 and ones(6) or 0b10_0000_00)
-    result[3] = chr(i shr 6 and ones(6) or 0b10_0000_00)
-    result[4] = chr(i and ones(6) or 0b10_0000_00)
+    s.setLen(pos+5)
+    s[pos+0] = chr(i shr 24 or 0b111110_00)
+    s[pos+1] = chr(i shr 18 and ones(6) or 0b10_0000_00)
+    s[pos+2] = chr(i shr 12 and ones(6) or 0b10_0000_00)
+    s[pos+3] = chr(i shr 6 and ones(6) or 0b10_0000_00)
+    s[pos+4] = chr(i and ones(6) or 0b10_0000_00)
+    when doInc: inc(pos, 5)
   elif i <=% 0x7FFFFFFF:
-    result = newString(6)
-    result[0] = chr(i shr 30 or 0b1111110_0)
-    result[1] = chr(i shr 24 and ones(6) or 0b10_0000_00)
-    result[2] = chr(i shr 18 and ones(6) or 0b10_0000_00)
-    result[3] = chr(i shr 12 and ones(6) or 0b10_0000_00)
-    result[4] = chr(i shr 6 and ones(6) or 0b10_0000_00)
-    result[5] = chr(i and ones(6) or 0b10_0000_00)
+    s.setLen(pos+6)
+    s[pos+0] = chr(i shr 30 or 0b1111110_0)
+    s[pos+1] = chr(i shr 24 and ones(6) or 0b10_0000_00)
+    s[pos+2] = chr(i shr 18 and ones(6) or 0b10_0000_00)
+    s[pos+3] = chr(i shr 12 and ones(6) or 0b10_0000_00)
+    s[pos+4] = chr(i shr 6 and ones(6) or 0b10_0000_00)
+    s[pos+5] = chr(i and ones(6) or 0b10_0000_00)
+    when doInc: inc(pos, 6)
   else:
     discard # error, exception?
 
+proc toUTF8*(c: Rune): string {.rtl, extern: "nuc$1".} =
+  ## Converts a rune into its UTF-8 representation
+  result = ""
+  fastToUTF8Copy(c, result, 0, false)
+
 proc `$`*(rune: Rune): string =
   ## Converts a Rune to a string
   rune.toUTF8
@@ -1352,6 +1369,136 @@ proc isCombining*(c: Rune): bool {.rtl, extern: "nuc$1", procvar.} =
     (c >= 0x20d0 and c <= 0x20ff) or
     (c >= 0xfe20 and c <= 0xfe2f))
 
+proc swapCase*(s: string): string {.noSideEffect, procvar,
+  rtl, extern: "nuc$1".} =
+  ## Swaps the case of unicode characters in `s`
+  ##
+  ## Returns a new string such that the cases of all unicode characters
+  ## are swapped if possible
+
+  var
+    i = 0
+    lastIndex = 0
+    rune: Rune
+
+  result = newString(len(s))
+
+  while i < len(s):
+    lastIndex = i
+
+    fastRuneAt(s, i, rune)
+
+    if rune.isUpper():
+      rune = rune.toLower()
+    elif rune.isLower():
+      rune = rune.toUpper()
+
+    rune.fastToUTF8Copy(result, lastIndex)
+
+proc translate*(s: string, replacements: proc(key: string): string): string {.
+  rtl, extern: "nuc$1".} =
+  ## Translates words in a string using the `replacements` proc to substitute
+  ## words inside `s` with their replacements
+  ##
+  ## `replacements` is any proc that takes a word and returns
+  ## a new word to fill it's place.
+
+  # Allocate memory for the new string based on the old one.
+  # If the new string length is less than the old, no allocations
+  # will be needed. If the new string length is greater than the
+  # old, then maybe only one allocation is needed
+  result = newStringOfCap(s.len)
+
+  var
+    index = 0
+    lastIndex = 0
+    wordStart = 0
+    inWord = false
+    rune: Rune
+
+  while index < len(s):
+    lastIndex = index
+
+    fastRuneAt(s, index, rune)
+
+    let whiteSpace = rune.isWhiteSpace()
+
+    if whiteSpace and inWord:
+      # If we've reached the end of a word
+      let word = s[wordStart ..< lastIndex]
+      result.add(replacements(word))
+      result.add($rune)
+
+      inWord = false
+    elif not whiteSpace and not inWord:
+      # If we've hit a non space character and
+      # are not currently in a word, track
+      # the starting index of the word
+      inWord = true
+      wordStart = lastIndex
+    elif whiteSpace:
+      result.add($rune)
+
+  if wordStart < len(s) and inWord:
+    # Get the trailing word at the end
+    let word = s[wordStart .. ^1]
+    result.add(replacements(word))
+
+proc title*(s: string): string {.noSideEffect, procvar,
+  rtl, extern: "nuc$1".} =
+  ## Converts `s` to a unicode title.
+  ##
+  ## Returns a new string such that the first character
+  ## in each word inside `s` is capitalized
+
+  var
+    i = 0
+    lastIndex = 0
+    rune: Rune
+
+  result = newString(len(s))
+
+  var firstRune = true
+
+  while i < len(s):
+    lastIndex = i
+
+    fastRuneAt(s, i, rune)
+
+    if not rune.isWhiteSpace() and firstRune:
+      rune = rune.toUpper()
+      firstRune = false
+    elif rune.isWhiteSpace():
+      firstRune = true
+
+    rune.fastToUTF8Copy(result, lastIndex)
+
+proc isTitle*(s: string): bool {.noSideEffect, procvar,
+  rtl, extern: "nuc$1Str".}=
+  ## Checks whether or not `s` is a unicode title.
+  ##
+  ## Returns true if the first character in each word inside `s`
+  ## are upper case and there is at least one character in `s`.
+  if s.len() == 0:
+    return false
+
+  result = true
+
+  var
+    i = 0
+    rune: Rune
+
+  var firstRune = true
+
+  while i < len(s) and result:
+    fastRuneAt(s, i, rune, doInc=true)
+
+    if not rune.isWhiteSpace() and firstRune:
+      result = rune.isUpper() and result
+      firstRune = false
+    elif rune.isWhiteSpace():
+      firstRune = true
+
 iterator runes*(s: string): Rune =
   ## Iterates over any unicode character of the string ``s``
   var
@@ -1451,6 +1598,39 @@ when isMainModule:
     compared = (someString == $someRunes)
   doAssert compared == true
 
+  proc test_replacements(word: string): string =
+    case word
+    of "two":
+      return "2"
+    of "foo":
+      return "BAR"
+    of "βeta":
+      return "beta"
+    of "alpha":
+      return "αlpha"
+    else:
+      return "12345"
+
+  doAssert translate("two not alpha foo βeta", test_replacements) == "2 12345 αlpha BAR beta"
+  doAssert translate("  two not foo βeta  ", test_replacements) == "  2 12345 BAR beta  "
+
+  doAssert title("foo bar") == "Foo Bar"
+  doAssert title("αlpha βeta γamma") == "Αlpha Βeta Γamma"
+  doAssert title("") == ""
+
+  doAssert isTitle("Foo")
+  doAssert(not isTitle("Foo bar"))
+  doAssert(not isTitle("αlpha Βeta"))
+  doAssert(isTitle("Αlpha Βeta Γamma"))
+  doAssert(not isTitle("fFoo"))
+
+  doAssert swapCase("FooBar") == "fOObAR"
+  doAssert swapCase(" ") == " "
+  doAssert swapCase("Αlpha Βeta Γamma") == "αLPHA βETA γAMMA"
+  doAssert swapCase("a✓B") == "A✓b"
+  doAssert swapCase("") == ""
+
+
   doAssert reversed("Reverse this!") == "!siht esreveR"
   doAssert reversed("先秦兩漢") == "漢兩秦先"
   doAssert reversed("as⃝df̅") == "f̅ds⃝a"
author	Andreas Rumpf <rumpf_a@web.de>	2016-06-14 09:38:45 +0200
committer	GitHub <noreply@github.com>	2016-06-14 09:38:45 +0200
commit	c9c4b6c41d9465599f4a627c09fe626cab727270 (patch)
tree	ce1068c16a5cd5a0824b24ea7392eb62c55c7392 /lib
parent	93424420d48929965929901c2d5698de189c1a34 (diff)
parent	84d9081e21783151b40c0320ca50df4845043559 (diff)
download	Nim-c9c4b6c41d9465599f4a627c09fe626cab727270.tar.gz