summary refs log tree commit diff stats
path: root/lib/pure/unicode.nim
diff options
context:
space:
mode:
Diffstat (limited to 'lib/pure/unicode.nim')
-rw-r--r--[-rwxr-xr-x]lib/pure/unicode.nim2509
1 files changed, 1422 insertions, 1087 deletions
diff --git a/lib/pure/unicode.nim b/lib/pure/unicode.nim
index 1edddecd9..8cbe117bb 100755..100644
--- a/lib/pure/unicode.nim
+++ b/lib/pure/unicode.nim
@@ -1,1069 +1,473 @@
 #
 #
-#            Nimrod's Runtime Library
-#        (c) Copyright 2009 Andreas Rumpf
+#            Nim's Runtime Library
+#        (c) Copyright 2012 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
 #
 
 ## This module provides support to handle the Unicode UTF-8 encoding.
+##
+## There are no specialized ``insert``, ``delete``, ``add`` and ``contains``
+## procedures for ``seq[Rune]`` in this module because the generic variants
+## of these procedures in the system module already work with it.
+##
+## The current version is compatible with Unicode v12.0.0.
+##
+## **See also:**
+## * `strutils module <strutils.html>`_
+## * `unidecode module <unidecode.html>`_
+## * `encodings module <encodings.html>`_
 
-{.deadCodeElim: on.}
+include "system/inclrtl"
+import std/strbasics
+template toOa(s: string): auto = s.toOpenArray(0, s.high)
+
+proc substr(s: openArray[char] , first, last: int): string =
+  # Copied substr from system
+  let first = max(first, 0)
+  let L = max(min(last, high(s)) - first + 1, 0)
+  result = newString(L)
+  for i in 0 .. L-1:
+    result[i] = s[i+first]
 
 type
-  irune = int # underlying type of TRune
-  TRune* = distinct irune   ## type that can hold any Unicode character
-  TRune16* = distinct int16 ## 16 bit Unicode character
-  
-proc `<=%`*(a, b: TRune): bool {.borrow.}
-proc `<%`*(a, b: TRune): bool {.borrow.}
-proc `==`*(a, b: TRune): bool {.borrow.}
-
-template ones(n: expr): expr = ((1 shl n)-1)
-
-proc runeLen*(s: string): int =
-  ## returns the number of Unicode characters of the string `s`.
+  RuneImpl = int32 # underlying type of Rune
+  Rune* = distinct RuneImpl ## \
+    ## Type that can hold a single Unicode code point.
+    ##
+    ## A Rune may be composed with other Runes to a character on the screen.
+    ## `RuneImpl` is the underlying type used to store Runes, currently `int32`.
+
+template ones(n: untyped): untyped = ((1 shl n)-1)
+
+proc runeLen*(s: openArray[char]): int {.rtl, extern: "nuc$1".} =
+  ## Returns the number of runes of the string ``s``.
+  runnableExamples:
+    let a = "añyóng"
+    doAssert a.runeLen == 6
+    ## note: a.len == 8
+
+  result = 0
   var i = 0
   while i < len(s):
-    if ord(s[i]) <=% 127: inc(i)
-    elif ord(s[i]) shr 5 == 0b110: inc(i, 2)
-    elif ord(s[i]) shr 4 == 0b1110: inc(i, 3)
-    elif ord(s[i]) shr 3 == 0b11110: inc(i, 4)
-    else: assert(false)
+    if uint(s[i]) <= 127: inc(i)
+    elif uint(s[i]) shr 5 == 0b110: inc(i, 2)
+    elif uint(s[i]) shr 4 == 0b1110: inc(i, 3)
+    elif uint(s[i]) shr 3 == 0b11110: inc(i, 4)
+    elif uint(s[i]) shr 2 == 0b111110: inc(i, 5)
+    elif uint(s[i]) shr 1 == 0b1111110: inc(i, 6)
+    else: inc i
     inc(result)
 
-proc runeLenAt*(s: string, i: int): int =
-  ## returns the number of bytes the rune starting at ``s[i]`` takes.
-  if ord(s[i]) <=% 127: result = 1
-  elif ord(s[i]) shr 5 == 0b110: result = 2
-  elif ord(s[i]) shr 4 == 0b1110: result = 3
-  elif ord(s[i]) shr 3 == 0b11110: result = 4
-  else: assert(false)
-
-template fastRuneAt*(s: string, i: int, result: expr, doInc = true) =
-  ## Returns the unicode character ``s[i]`` in `result`. If ``doInc == true``
-  ## `i` is incremented by the number of bytes that have been processed.
-  when not defined(ones):
-    template ones(n: expr): expr = ((1 shl n)-1)
-
-  if ord(s[i]) <=% 127:
-    result = TRune(ord(s[i]))
+proc runeLenAt*(s: openArray[char], i: Natural): int =
+  ## Returns the number of bytes the rune starting at ``s[i]`` takes.
+  ##
+  ## See also:
+  ## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_
+  runnableExamples:
+    let a = "añyóng"
+    doAssert a.runeLenAt(0) == 1
+    doAssert a.runeLenAt(1) == 2
+
+  if uint(s[i]) <= 127: result = 1
+  elif uint(s[i]) shr 5 == 0b110: result = 2
+  elif uint(s[i]) shr 4 == 0b1110: result = 3
+  elif uint(s[i]) shr 3 == 0b11110: result = 4
+  elif uint(s[i]) shr 2 == 0b111110: result = 5
+  elif uint(s[i]) shr 1 == 0b1111110: result = 6
+  else: result = 1
+
+const replRune = Rune(0xFFFD)
+
+template fastRuneAt*(s: openArray[char] or string, i: int, result: untyped, doInc = true) =
+  ## Returns the rune ``s[i]`` in ``result``.
+  ##
+  ## If ``doInc == true`` (default), ``i`` is incremented by the number
+  ## of bytes that have been processed.
+  bind ones
+  if uint(s[i]) <= 127:
+    result = Rune(uint(s[i]))
     when doInc: inc(i)
-  elif ord(s[i]) shr 5 == 0b110:
-    assert(ord(s[i+1]) shr 6 == 0b10)
-    result = TRune((ord(s[i]) and ones(5)) shl 6 or (ord(s[i+1]) and ones(6)))
-    when doInc: inc(i, 2)
-  elif ord(s[i]) shr 4 == 0b1110:
-    assert(ord(s[i+1]) shr 6 == 0b10)
-    assert(ord(s[i+2]) shr 6 == 0b10)
-    result = TRune((ord(s[i]) and ones(4)) shl 12 or
-             (ord(s[i+1]) and ones(6)) shl 6 or
-             (ord(s[i+2]) and ones(6)))
-    when doInc: inc(i, 3)
-  elif ord(s[i]) shr 3 == 0b11110:
-    assert(ord(s[i+1]) shr 6 == 0b10)
-    assert(ord(s[i+2]) shr 6 == 0b10)
-    assert(ord(s[i+3]) shr 6 == 0b10)
-    result = TRune((ord(s[i]) and ones(3)) shl 18 or
-             (ord(s[i+1]) and ones(6)) shl 12 or
-             (ord(s[i+2]) and ones(6)) shl 6 or
-             (ord(s[i+3]) and ones(6)))
-    when doInc: inc(i, 4)
+  elif uint(s[i]) shr 5 == 0b110:
+    # assert(uint(s[i+1]) shr 6 == 0b10)
+    if i <= s.len - 2:
+      result = Rune((uint(s[i]) and (ones(5))) shl 6 or
+                    (uint(s[i+1]) and ones(6)))
+      when doInc: inc(i, 2)
+    else:
+      result = replRune
+      when doInc: inc(i)
+  elif uint(s[i]) shr 4 == 0b1110:
+    # assert(uint(s[i+1]) shr 6 == 0b10)
+    # assert(uint(s[i+2]) shr 6 == 0b10)
+    if i <= s.len - 3:
+      result = Rune((uint(s[i]) and ones(4)) shl 12 or
+                    (uint(s[i+1]) and ones(6)) shl 6 or
+                    (uint(s[i+2]) and ones(6)))
+      when doInc: inc(i, 3)
+    else:
+      result = replRune
+      when doInc: inc(i)
+  elif uint(s[i]) shr 3 == 0b11110:
+    # assert(uint(s[i+1]) shr 6 == 0b10)
+    # assert(uint(s[i+2]) shr 6 == 0b10)
+    # assert(uint(s[i+3]) shr 6 == 0b10)
+    if i <= s.len - 4:
+      result = Rune((uint(s[i]) and ones(3)) shl 18 or
+                    (uint(s[i+1]) and ones(6)) shl 12 or
+                    (uint(s[i+2]) and ones(6)) shl 6 or
+                    (uint(s[i+3]) and ones(6)))
+      when doInc: inc(i, 4)
+    else:
+      result = replRune
+      when doInc: inc(i)
+  elif uint(s[i]) shr 2 == 0b111110:
+    # assert(uint(s[i+1]) shr 6 == 0b10)
+    # assert(uint(s[i+2]) shr 6 == 0b10)
+    # assert(uint(s[i+3]) shr 6 == 0b10)
+    # assert(uint(s[i+4]) shr 6 == 0b10)
+    if i <= s.len - 5:
+      result = Rune((uint(s[i]) and ones(2)) shl 24 or
+                (uint(s[i+1]) and ones(6)) shl 18 or
+                (uint(s[i+2]) and ones(6)) shl 12 or
+                (uint(s[i+3]) and ones(6)) shl 6 or
+                (uint(s[i+4]) and ones(6)))
+      when doInc: inc(i, 5)
+    else:
+      result = replRune
+      when doInc: inc(i)
+  elif uint(s[i]) shr 1 == 0b1111110:
+    # assert(uint(s[i+1]) shr 6 == 0b10)
+    # assert(uint(s[i+2]) shr 6 == 0b10)
+    # assert(uint(s[i+3]) shr 6 == 0b10)
+    # assert(uint(s[i+4]) shr 6 == 0b10)
+    # assert(uint(s[i+5]) shr 6 == 0b10)
+    if i <= s.len - 6:
+      result = Rune((uint(s[i]) and ones(1)) shl 30 or
+                    (uint(s[i+1]) and ones(6)) shl 24 or
+                    (uint(s[i+2]) and ones(6)) shl 18 or
+                    (uint(s[i+3]) and ones(6)) shl 12 or
+                    (uint(s[i+4]) and ones(6)) shl 6 or
+                    (uint(s[i+5]) and ones(6)))
+      when doInc: inc(i, 6)
+    else:
+      result = replRune
+      when doInc: inc(i)
   else:
-    assert(false)
+    result = Rune(uint(s[i]))
+    when doInc: inc(i)
 
-proc runeAt*(s: string, i: int): TRune =
-  ## returns the unicode character in `s` at byte index `i`
+proc runeAt*(s: openArray[char], i: Natural): Rune =
+  ## Returns the rune in ``s`` at **byte index** ``i``.
+  ##
+  ## See also:
+  ## * `runeAtPos proc <#runeAtPos,string,int>`_
+  ## * `runeStrAtPos proc <#runeStrAtPos,string,Natural>`_
+  ## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_
+  runnableExamples:
+    let a = "añyóng"
+    doAssert a.runeAt(1) == "ñ".runeAt(0)
+    doAssert a.runeAt(2) == "ñ".runeAt(1)
+    doAssert a.runeAt(3) == "y".runeAt(0)
   fastRuneAt(s, i, result, false)
 
-proc toUTF8*(c: TRune): string = 
-  ## converts a rune into its UTF8 representation
-  var i = irune(c)
+proc validateUtf8*(s: openArray[char]): int =
+  ## Returns the position of the invalid byte in ``s`` if the string ``s`` does
+  ## not hold valid UTF-8 data. Otherwise ``-1`` is returned.
+  ##
+  ## See also:
+  ## * `toUTF8 proc <#toUTF8,Rune>`_
+  ## * `$ proc <#$,Rune>`_ alias for `toUTF8`
+  ## * `fastToUTF8Copy template <#fastToUTF8Copy.t,Rune,string,int>`_
+  var i = 0
+  let L = s.len
+  while i < L:
+    if uint(s[i]) <= 127:
+      inc(i)
+    elif uint(s[i]) shr 5 == 0b110:
+      if uint(s[i]) < 0xc2: return i # Catch overlong ascii representations.
+      if i+1 < L and uint(s[i+1]) shr 6 == 0b10: inc(i, 2)
+      else: return i
+    elif uint(s[i]) shr 4 == 0b1110:
+      if i+2 < L and uint(s[i+1]) shr 6 == 0b10 and uint(s[i+2]) shr 6 == 0b10:
+        inc i, 3
+      else: return i
+    elif uint(s[i]) shr 3 == 0b11110:
+      if i+3 < L and uint(s[i+1]) shr 6 == 0b10 and
+                     uint(s[i+2]) shr 6 == 0b10 and
+                     uint(s[i+3]) shr 6 == 0b10:
+        inc i, 4
+      else: return i
+    else:
+      return i
+  return -1
+
+template fastToUTF8Copy*(c: Rune, s: var string, pos: int, doInc = true) =
+  ## Copies UTF-8 representation of ``c`` into the preallocated string ``s``
+  ## starting at position ``pos``.
+  ##
+  ## If ``doInc == true`` (default), ``pos`` is incremented
+  ## by the number of bytes that have been processed.
+  ##
+  ## To be the most efficient, make sure ``s`` is preallocated
+  ## with an additional amount equal to the byte length of ``c``.
+  ##
+  ## See also:
+  ## * `validateUtf8 proc <#validateUtf8,string>`_
+  ## * `toUTF8 proc <#toUTF8,Rune>`_
+  ## * `$ proc <#$,Rune>`_ alias for `toUTF8`
+  var i = RuneImpl(c)
   if i <=% 127:
-    result = newString(1)
-    result[0] = chr(i)
+    s.setLen(pos+1)
+    s[pos+0] = chr(i)
+    when doInc: inc(pos)
   elif i <=% 0x07FF:
-    result = newString(2)
-    result[0] = chr(i shr 6 or 0b110_0000)
-    result[1] = chr(i and ones(6) or 0b10_000000)
+    s.setLen(pos+2)
+    s[pos+0] = chr((i shr 6) or 0b110_00000)
+    s[pos+1] = chr((i and ones(6)) or 0b10_0000_00)
+    when doInc: inc(pos, 2)
   elif i <=% 0xFFFF:
-    result = newString(3)
-    result[0] = chr(i shr 12 or 0b1110_0000)
-    result[1] = chr(i shr 6 and ones(6) or 0b10_0000_00)
-    result[2] = chr(i and ones(6) or 0b10_0000_00)
-  elif i <=% 0x0010FFFF:
-    result = newString(4)
-    result[0] = chr(i shr 18 or 0b1111_0000)
-    result[1] = chr(i shr 12 and ones(6) or 0b10_0000_00)
-    result[2] = chr(i shr 6 and ones(6) or 0b10_0000_00)
-    result[3] = chr(i and ones(6) or 0b10_0000_00)
+    s.setLen(pos+3)
+    s[pos+0] = chr(i shr 12 or 0b1110_0000)
+    s[pos+1] = chr(i shr 6 and ones(6) or 0b10_0000_00)
+    s[pos+2] = chr(i and ones(6) or 0b10_0000_00)
+    when doInc: inc(pos, 3)
+  elif i <=% 0x001FFFFF:
+    s.setLen(pos+4)
+    s[pos+0] = chr(i shr 18 or 0b1111_0000)
+    s[pos+1] = chr(i shr 12 and ones(6) or 0b10_0000_00)
+    s[pos+2] = chr(i shr 6 and ones(6) or 0b10_0000_00)
+    s[pos+3] = chr(i and ones(6) or 0b10_0000_00)
+    when doInc: inc(pos, 4)
+  elif i <=% 0x03FFFFFF:
+    s.setLen(pos+5)
+    s[pos+0] = chr(i shr 24 or 0b111110_00)
+    s[pos+1] = chr(i shr 18 and ones(6) or 0b10_0000_00)
+    s[pos+2] = chr(i shr 12 and ones(6) or 0b10_0000_00)
+    s[pos+3] = chr(i shr 6 and ones(6) or 0b10_0000_00)
+    s[pos+4] = chr(i and ones(6) or 0b10_0000_00)
+    when doInc: inc(pos, 5)
+  elif i <=% 0x7FFFFFFF:
+    s.setLen(pos+6)
+    s[pos+0] = chr(i shr 30 or 0b1111110_0)
+    s[pos+1] = chr(i shr 24 and ones(6) or 0b10_0000_00)
+    s[pos+2] = chr(i shr 18 and ones(6) or 0b10_0000_00)
+    s[pos+3] = chr(i shr 12 and ones(6) or 0b10_0000_00)
+    s[pos+4] = chr(i shr 6 and ones(6) or 0b10_0000_00)
+    s[pos+5] = chr(i and ones(6) or 0b10_0000_00)
+    when doInc: inc(pos, 6)
+  else:
+    discard # error, exception?
+
+proc toUTF8*(c: Rune): string {.rtl, extern: "nuc$1".} =
+  ## Converts a rune into its UTF-8 representation.
+  ##
+  ## See also:
+  ## * `validateUtf8 proc <#validateUtf8,string>`_
+  ## * `$ proc <#$,Rune>`_ alias for `toUTF8`
+  ## * `utf8 iterator <#utf8.i,string>`_
+  ## * `fastToUTF8Copy template <#fastToUTF8Copy.t,Rune,string,int>`_
+  runnableExamples:
+    let a = "añyóng"
+    doAssert a.runeAt(1).toUTF8 == "ñ"
+
+  result = ""
+  fastToUTF8Copy(c, result, 0, false)
+
+proc add*(s: var string; c: Rune) =
+  ## Adds a rune ``c`` to a string ``s``.
+  runnableExamples:
+    var s = "abc"
+    let c = "ä".runeAt(0)
+    s.add(c)
+    doAssert s == "abcä"
+
+  let pos = s.len
+  fastToUTF8Copy(c, s, pos, false)
+
+proc `$`*(rune: Rune): string =
+  ## An alias for `toUTF8 <#toUTF8,Rune>`_.
+  ##
+  ## See also:
+  ## * `validateUtf8 proc <#validateUtf8,string>`_
+  ## * `fastToUTF8Copy template <#fastToUTF8Copy.t,Rune,string,int>`_
+  rune.toUTF8
+
+proc `$`*(runes: seq[Rune]): string =
+  ## Converts a sequence of Runes to a string.
+  ##
+  ## See also:
+  ## * `toRunes <#toRunes,string>`_ for a reverse operation
+  runnableExamples:
+    let
+      someString = "öÑ"
+      someRunes = toRunes(someString)
+    doAssert $someRunes == someString
+
+  result = ""
+  for rune in runes:
+    result.add rune
+
+proc runeOffset*(s: openArray[char], pos: Natural, start: Natural = 0): int =
+  ## Returns the byte position of rune
+  ## at position ``pos`` in ``s`` with an optional start byte position.
+  ## Returns the special value -1 if it runs out of the string.
+  ##
+  ## **Beware:** This can lead to unoptimized code and slow execution!
+  ## Most problems can be solved more efficiently by using an iterator
+  ## or conversion to a seq of Rune.
+  ##
+  ## See also:
+  ## * `runeReverseOffset proc <#runeReverseOffset,string,Positive>`_
+  runnableExamples:
+    let a = "añyóng"
+    doAssert a.runeOffset(1) == 1
+    doAssert a.runeOffset(3) == 4
+    doAssert a.runeOffset(4) == 6
+
+  var
+    i = 0
+    o = start
+  while i < pos:
+    o += runeLenAt(s, o)
+    if o >= s.len:
+      return -1
+    inc i
+  return o
+
+proc runeReverseOffset*(s: openArray[char], rev: Positive): (int, int) =
+  ## Returns a tuple with the byte offset of the
+  ## rune at position ``rev`` in ``s``, counting
+  ## from the end (starting with 1) and the total
+  ## number of runes in the string.
+  ##
+  ## Returns a negative value for offset if there are too few runes in
+  ## the string to satisfy the request.
+  ##
+  ## **Beware:** This can lead to unoptimized code and slow execution!
+  ## Most problems can be solved more efficiently by using an iterator
+  ## or conversion to a seq of Rune.
+  ##
+  ## See also:
+  ## * `runeOffset proc <#runeOffset,string,Natural,Natural>`_
+  var
+    a = rev.int
+    o = 0
+    x = 0
+  let times = 2*rev.int-s.runeLen # transformed from rev.int - a < s.runeLen - rev.int
+  while o < s.len:
+    let r = runeLenAt(s, o)
+    o += r
+    if a > times:
+      x += r
+    dec a
+  result = if a > 0: (-a, rev.int-a) else: (x, -a+rev.int)
+
+proc runeAtPos*(s: openArray[char], pos: int): Rune =
+  ## Returns the rune at position ``pos``.
+  ##
+  ## **Beware:** This can lead to unoptimized code and slow execution!
+  ## Most problems can be solved more efficiently by using an iterator
+  ## or conversion to a seq of Rune.
+  ##
+  ## See also:
+  ## * `runeAt proc <#runeAt,string,Natural>`_
+  ## * `runeStrAtPos proc <#runeStrAtPos,string,Natural>`_
+  ## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_
+  fastRuneAt(s, runeOffset(s, pos), result, false)
+
+proc runeStrAtPos*(s: openArray[char], pos: Natural): string =
+  ## Returns the rune at position ``pos`` as UTF8 String.
+  ##
+  ## **Beware:** This can lead to unoptimized code and slow execution!
+  ## Most problems can be solved more efficiently by using an iterator
+  ## or conversion to a seq of Rune.
+  ##
+  ## See also:
+  ## * `runeAt proc <#runeAt,string,Natural>`_
+  ## * `runeAtPos proc <#runeAtPos,string,int>`_
+  ## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_
+  let o = runeOffset(s, pos)
+  substr(s.toOpenArray(o,  (o+runeLenAt(s, o)-1)))
+
+proc runeSubStr*(s: openArray[char], pos: int, len: int = int.high): string =
+  ## Returns the UTF-8 substring starting at code point ``pos``
+  ## with ``len`` code points.
+  ##
+  ## If ``pos`` or ``len`` is negative they count from
+  ## the end of the string. If ``len`` is not given it means the longest
+  ## possible string.
+  runnableExamples:
+    let s = "Hänsel  ««: 10,00€"
+    doAssert(runeSubStr(s, 0, 2) == "Hä")
+    doAssert(runeSubStr(s, 10, 1) == ":")
+    doAssert(runeSubStr(s, -6) == "10,00€")
+    doAssert(runeSubStr(s, 10) == ": 10,00€")
+    doAssert(runeSubStr(s, 12, 5) == "10,00")
+    doAssert(runeSubStr(s, -6, 3) == "10,")
+
+  if pos < 0:
+    let (o, rl) = runeReverseOffset(s, -pos)
+    if len >= rl:
+      result = s.substr(o, s.high)
+    elif len < 0:
+      let e = rl + len
+      if e < 0:
+        result = ""
+      else:
+        result = s.substr(o, runeOffset(s, e-(rl+pos), o)-1)
+    else:
+      result = s.substr(o, runeOffset(s, len, o)-1)
   else:
-    assert false
-
-const
-  alphaRanges = [
-    0x00d8,  0x00f6,  # Ø - ö
-    0x00f8,  0x01f5,  # ø - ǵ   
-    0x0250,  0x02a8,  # ɐ - ʨ   
-    0x038e,  0x03a1,  # Ύ - Ρ   
-    0x03a3,  0x03ce,  # Σ - ώ   
-    0x03d0,  0x03d6,  # ϐ - ϖ   
-    0x03e2,  0x03f3,  # Ϣ - ϳ   
-    0x0490,  0x04c4,  # Ґ - ӄ   
-    0x0561,  0x0587,  # ա - և   
-    0x05d0,  0x05ea,  # א - ת   
-    0x05f0,  0x05f2,  # װ - ײ   
-    0x0621,  0x063a,  # ء - غ   
-    0x0640,  0x064a,  # ـ - ي   
-    0x0671,  0x06b7,  # ٱ - ڷ   
-    0x06ba,  0x06be,  # ں - ھ   
-    0x06c0,  0x06ce,  # ۀ - ێ   
-    0x06d0,  0x06d3,  # ې - ۓ   
-    0x0905,  0x0939,  # अ - ह   
-    0x0958,  0x0961,  # क़ - ॡ   
-    0x0985,  0x098c,  # অ - ঌ   
-    0x098f,  0x0990,  # এ - ঐ   
-    0x0993,  0x09a8,  # ও - ন   
-    0x09aa,  0x09b0,  # প - র   
-    0x09b6,  0x09b9,  # শ - হ   
-    0x09dc,  0x09dd,  # ড় - ঢ়   
-    0x09df,  0x09e1,  # য় - ৡ   
-    0x09f0,  0x09f1,  # ৰ - ৱ   
-    0x0a05,  0x0a0a,  # ਅ - ਊ   
-    0x0a0f,  0x0a10,  # ਏ - ਐ   
-    0x0a13,  0x0a28,  # ਓ - ਨ   
-    0x0a2a,  0x0a30,  # ਪ - ਰ   
-    0x0a32,  0x0a33,  # ਲ - ਲ਼   
-    0x0a35,  0x0a36,  # ਵ - ਸ਼   
-    0x0a38,  0x0a39,  # ਸ - ਹ   
-    0x0a59,  0x0a5c,  # ਖ਼ - ੜ   
-    0x0a85,  0x0a8b,  # અ - ઋ   
-    0x0a8f,  0x0a91,  # એ - ઑ   
-    0x0a93,  0x0aa8,  # ઓ - ન   
-    0x0aaa,  0x0ab0,  # પ - ર   
-    0x0ab2,  0x0ab3,  # લ - ળ   
-    0x0ab5,  0x0ab9,  # વ - હ   
-    0x0b05,  0x0b0c,  # ଅ - ଌ   
-    0x0b0f,  0x0b10,  # ଏ - ଐ   
-    0x0b13,  0x0b28,  # ଓ - ନ   
-    0x0b2a,  0x0b30,  # ପ - ର   
-    0x0b32,  0x0b33,  # ଲ - ଳ   
-    0x0b36,  0x0b39,  # ଶ - ହ   
-    0x0b5c,  0x0b5d,  # ଡ଼ - ଢ଼   
-    0x0b5f,  0x0b61,  # ୟ - ୡ   
-    0x0b85,  0x0b8a,  # அ - ஊ   
-    0x0b8e,  0x0b90,  # எ - ஐ   
-    0x0b92,  0x0b95,  # ஒ - க   
-    0x0b99,  0x0b9a,  # ங - ச   
-    0x0b9e,  0x0b9f,  # ஞ - ட   
-    0x0ba3,  0x0ba4,  # ண - த   
-    0x0ba8,  0x0baa,  # ந - ப   
-    0x0bae,  0x0bb5,  # ம - வ   
-    0x0bb7,  0x0bb9,  # ஷ - ஹ   
-    0x0c05,  0x0c0c,  # అ - ఌ   
-    0x0c0e,  0x0c10,  # ఎ - ఐ   
-    0x0c12,  0x0c28,  # ఒ - న   
-    0x0c2a,  0x0c33,  # ప - ళ   
-    0x0c35,  0x0c39,  # వ - హ   
-    0x0c60,  0x0c61,  # ౠ - ౡ   
-    0x0c85,  0x0c8c,  # ಅ - ಌ   
-    0x0c8e,  0x0c90,  # ಎ - ಐ   
-    0x0c92,  0x0ca8,  # ಒ - ನ   
-    0x0caa,  0x0cb3,  # ಪ - ಳ   
-    0x0cb5,  0x0cb9,  # ವ - ಹ   
-    0x0ce0,  0x0ce1,  # ೠ - ೡ   
-    0x0d05,  0x0d0c,  # അ - ഌ   
-    0x0d0e,  0x0d10,  # എ - ഐ   
-    0x0d12,  0x0d28,  # ഒ - ന   
-    0x0d2a,  0x0d39,  # പ - ഹ   
-    0x0d60,  0x0d61,  # ൠ - ൡ   
-    0x0e01,  0x0e30,  # ก - ะ   
-    0x0e32,  0x0e33,  # า - ำ   
-    0x0e40,  0x0e46,  # เ - ๆ   
-    0x0e5a,  0x0e5b,  # ๚ - ๛   
-    0x0e81,  0x0e82,  # ກ - ຂ   
-    0x0e87,  0x0e88,  # ງ - ຈ   
-    0x0e94,  0x0e97,  # ດ - ທ   
-    0x0e99,  0x0e9f,  # ນ - ຟ   
-    0x0ea1,  0x0ea3,  # ມ - ຣ   
-    0x0eaa,  0x0eab,  # ສ - ຫ   
-    0x0ead,  0x0eae,  # ອ - ຮ   
-    0x0eb2,  0x0eb3,  # າ - ຳ   
-    0x0ec0,  0x0ec4,  # ເ - ໄ   
-    0x0edc,  0x0edd,  # ໜ - ໝ   
-    0x0f18,  0x0f19,  # ༘ - ༙   
-    0x0f40,  0x0f47,  # ཀ - ཇ   
-    0x0f49,  0x0f69,  # ཉ - ཀྵ   
-    0x10d0,  0x10f6,  # ა - ჶ   
-    0x1100,  0x1159,  # ᄀ - ᅙ   
-    0x115f,  0x11a2,  # ᅟ - ᆢ   
-    0x11a8,  0x11f9,  # ᆨ - ᇹ   
-    0x1e00,  0x1e9b,  # Ḁ - ẛ   
-    0x1f50,  0x1f57,  # ὐ - ὗ   
-    0x1f80,  0x1fb4,  # ᾀ - ᾴ   
-    0x1fb6,  0x1fbc,  # ᾶ - ᾼ   
-    0x1fc2,  0x1fc4,  # ῂ - ῄ   
-    0x1fc6,  0x1fcc,  # ῆ - ῌ   
-    0x1fd0,  0x1fd3,  # ῐ - ΐ   
-    0x1fd6,  0x1fdb,  # ῖ - Ί   
-    0x1fe0,  0x1fec,  # ῠ - Ῥ   
-    0x1ff2,  0x1ff4,  # ῲ - ῴ   
-    0x1ff6,  0x1ffc,  # ῶ - ῼ   
-    0x210a,  0x2113,  # ℊ - ℓ   
-    0x2115,  0x211d,  # ℕ - ℝ   
-    0x2120,  0x2122,  # ℠ - ™   
-    0x212a,  0x2131,  # K - ℱ   
-    0x2133,  0x2138,  # ℳ - ℸ   
-    0x3041,  0x3094,  # ぁ - ゔ   
-    0x30a1,  0x30fa,  # ァ - ヺ   
-    0x3105,  0x312c,  # ㄅ - ㄬ   
-    0x3131,  0x318e,  # ㄱ - ㆎ   
-    0x3192,  0x319f,  # ㆒ - ㆟   
-    0x3260,  0x327b,  # ㉠ - ㉻   
-    0x328a,  0x32b0,  # ㊊ - ㊰   
-    0x32d0,  0x32fe,  # ㋐ - ㋾   
-    0x3300,  0x3357,  # ㌀ - ㍗   
-    0x3371,  0x3376,  # ㍱ - ㍶   
-    0x337b,  0x3394,  # ㍻ - ㎔   
-    0x3399,  0x339e,  # ㎙ - ㎞   
-    0x33a9,  0x33ad,  # ㎩ - ㎭   
-    0x33b0,  0x33c1,  # ㎰ - ㏁   
-    0x33c3,  0x33c5,  # ㏃ - ㏅   
-    0x33c7,  0x33d7,  # ㏇ - ㏗   
-    0x33d9,  0x33dd,  # ㏙ - ㏝   
-    0x4e00,  0x9fff,  # 一 - 鿿   
-    0xac00,  0xd7a3,  # 가 - 힣   
-    0xf900,  0xfb06,  # 豈 - st   
-    0xfb13,  0xfb17,  # ﬓ - ﬗ   
-    0xfb1f,  0xfb28,  # ײַ - ﬨ   
-    0xfb2a,  0xfb36,  # שׁ - זּ   
-    0xfb38,  0xfb3c,  # טּ - לּ   
-    0xfb40,  0xfb41,  # נּ - סּ   
-    0xfb43,  0xfb44,  # ףּ - פּ   
-    0xfb46,  0xfbb1,  # צּ - ﮱ   
-    0xfbd3,  0xfd3d,  # ﯓ - ﴽ   
-    0xfd50,  0xfd8f,  # ﵐ - ﶏ   
-    0xfd92,  0xfdc7,  # ﶒ - ﷇ   
-    0xfdf0,  0xfdf9,  # ﷰ - ﷹ   
-    0xfe70,  0xfe72,  # ﹰ - ﹲ   
-    0xfe76,  0xfefc,  # ﹶ - ﻼ   
-    0xff66,  0xff6f,  # ヲ - ッ   
-    0xff71,  0xff9d,  # ア - ン   
-    0xffa0,  0xffbe,  # ᅠ - ᄒ   
-    0xffc2,  0xffc7,  # ᅡ - ᅦ   
-    0xffca,  0xffcf,  # ᅧ - ᅬ   
-    0xffd2,  0xffd7,  # ᅭ - ᅲ   
-    0xffda,  0xffdc]  # ᅳ - ᅵ   
-
-  alphaSinglets = [
-    0x00aa,  # ª   
-    0x00b5,  # µ   
-    0x00ba,  # º   
-    0x03da,  # Ϛ   
-    0x03dc,  # Ϝ   
-    0x03de,  # Ϟ   
-    0x03e0,  # Ϡ   
-    0x06d5,  # ە   
-    0x09b2,  # ল   
-    0x0a5e,  # ਫ਼   
-    0x0a8d,  # ઍ   
-    0x0ae0,  # ૠ   
-    0x0b9c,  # ஜ   
-    0x0cde,  # ೞ   
-    0x0e4f,  # ๏   
-    0x0e84,  # ຄ   
-    0x0e8a,  # ຊ   
-    0x0e8d,  # ຍ   
-    0x0ea5,  # ລ   
-    0x0ea7,  # ວ   
-    0x0eb0,  # ະ   
-    0x0ebd,  # ຽ   
-    0x1fbe,  # ι   
-    0x207f,  # ⁿ   
-    0x20a8,  # ₨   
-    0x2102,  # ℂ   
-    0x2107,  # ℇ   
-    0x2124,  # ℤ   
-    0x2126,  # Ω   
-    0x2128,  # ℨ   
-    0xfb3e,  # מּ   
-    0xfe74]  # ﹴ   
-
-  spaceRanges = [
-    0x0009,  0x000a,  # tab and newline   
-    0x0020,  0x0020,  # space   
-    0x00a0,  0x00a0,  #     
-    0x2000,  0x200b,  #   - ​   
-    0x2028,  0x2029,  #  - 
    0x3000,  0x3000,  #     
-    0xfeff,  0xfeff]  #    
-
-  toupperRanges = [
-    0x0061,  0x007a, 468,  # a-z A-Z   
-    0x00e0,  0x00f6, 468,  # à-ö À-Ö   
-    0x00f8,  0x00fe, 468,  # ø-þ Ø-Þ   
-    0x0256,  0x0257, 295,  # ɖ-ɗ Ɖ-Ɗ   
-    0x0258,  0x0259, 298,  # ɘ-ə Ǝ-Ə   
-    0x028a,  0x028b, 283,  # ʊ-ʋ Ʊ-Ʋ   
-    0x03ad,  0x03af, 463,  # έ-ί Έ-Ί   
-    0x03b1,  0x03c1, 468,  # α-ρ Α-Ρ   
-    0x03c3,  0x03cb, 468,  # σ-ϋ Σ-Ϋ   
-    0x03cd,  0x03ce, 437,  # ύ-ώ Ύ-Ώ   
-    0x0430,  0x044f, 468,  # а-я А-Я   
-    0x0451,  0x045c, 420,  # ё-ќ Ё-Ќ   
-    0x045e,  0x045f, 420,  # ў-џ Ў-Џ   
-    0x0561,  0x0586, 452,  # ա-ֆ Ա-Ֆ   
-    0x1f00,  0x1f07, 508,  # ἀ-ἇ Ἀ-Ἇ   
-    0x1f10,  0x1f15, 508,  # ἐ-ἕ Ἐ-Ἕ   
-    0x1f20,  0x1f27, 508,  # ἠ-ἧ Ἠ-Ἧ   
-    0x1f30,  0x1f37, 508,  # ἰ-ἷ Ἰ-Ἷ   
-    0x1f40,  0x1f45, 508,  # ὀ-ὅ Ὀ-Ὅ   
-    0x1f60,  0x1f67, 508,  # ὠ-ὧ Ὠ-Ὧ   
-    0x1f70,  0x1f71, 574,  # ὰ-ά Ὰ-Ά   
-    0x1f72,  0x1f75, 586,  # ὲ-ή Ὲ-Ή   
-    0x1f76,  0x1f77, 600,  # ὶ-ί Ὶ-Ί   
-    0x1f78,  0x1f79, 628,  # ὸ-ό Ὸ-Ό   
-    0x1f7a,  0x1f7b, 612,  # ὺ-ύ Ὺ-Ύ   
-    0x1f7c,  0x1f7d, 626,  # ὼ-ώ Ὼ-Ώ   
-    0x1f80,  0x1f87, 508,  # ᾀ-ᾇ ᾈ-ᾏ   
-    0x1f90,  0x1f97, 508,  # ᾐ-ᾗ ᾘ-ᾟ   
-    0x1fa0,  0x1fa7, 508,  # ᾠ-ᾧ ᾨ-ᾯ   
-    0x1fb0,  0x1fb1, 508,  # ᾰ-ᾱ Ᾰ-Ᾱ   
-    0x1fd0,  0x1fd1, 508,  # ῐ-ῑ Ῐ-Ῑ   
-    0x1fe0,  0x1fe1, 508,  # ῠ-ῡ Ῠ-Ῡ   
-    0x2170,  0x217f, 484,  # ⅰ-ⅿ Ⅰ-Ⅿ   
-    0x24d0,  0x24e9, 474,  # ⓐ-ⓩ Ⓐ-Ⓩ   
-    0xff41,  0xff5a, 468]  # a-z A-Z   
-
-  toupperSinglets = [
-    0x00ff, 621,  # ÿ Ÿ   
-    0x0101, 499,  # ā Ā   
-    0x0103, 499,  # ă Ă   
-    0x0105, 499,  # ą Ą   
-    0x0107, 499,  # ć Ć   
-    0x0109, 499,  # ĉ Ĉ   
-    0x010b, 499,  # ċ Ċ   
-    0x010d, 499,  # č Č   
-    0x010f, 499,  # ď Ď   
-    0x0111, 499,  # đ Đ   
-    0x0113, 499,  # ē Ē   
-    0x0115, 499,  # ĕ Ĕ   
-    0x0117, 499,  # ė Ė   
-    0x0119, 499,  # ę Ę   
-    0x011b, 499,  # ě Ě   
-    0x011d, 499,  # ĝ Ĝ   
-    0x011f, 499,  # ğ Ğ   
-    0x0121, 499,  # ġ Ġ   
-    0x0123, 499,  # ģ Ģ   
-    0x0125, 499,  # ĥ Ĥ   
-    0x0127, 499,  # ħ Ħ   
-    0x0129, 499,  # ĩ Ĩ   
-    0x012b, 499,  # ī Ī   
-    0x012d, 499,  # ĭ Ĭ   
-    0x012f, 499,  # į Į   
-    0x0131, 268,  # ı I   
-    0x0133, 499,  # ij IJ   
-    0x0135, 499,  # ĵ Ĵ   
-    0x0137, 499,  # ķ Ķ   
-    0x013a, 499,  # ĺ Ĺ   
-    0x013c, 499,  # ļ Ļ   
-    0x013e, 499,  # ľ Ľ   
-    0x0140, 499,  # ŀ Ŀ   
-    0x0142, 499,  # ł Ł   
-    0x0144, 499,  # ń Ń   
-    0x0146, 499,  # ņ Ņ   
-    0x0148, 499,  # ň Ň   
-    0x014b, 499,  # ŋ Ŋ   
-    0x014d, 499,  # ō Ō   
-    0x014f, 499,  # ŏ Ŏ   
-    0x0151, 499,  # ő Ő   
-    0x0153, 499,  # œ Œ   
-    0x0155, 499,  # ŕ Ŕ   
-    0x0157, 499,  # ŗ Ŗ   
-    0x0159, 499,  # ř Ř   
-    0x015b, 499,  # ś Ś   
-    0x015d, 499,  # ŝ Ŝ   
-    0x015f, 499,  # ş Ş   
-    0x0161, 499,  # š Š   
-    0x0163, 499,  # ţ Ţ   
-    0x0165, 499,  # ť Ť   
-    0x0167, 499,  # ŧ Ŧ   
-    0x0169, 499,  # ũ Ũ   
-    0x016b, 499,  # ū Ū   
-    0x016d, 499,  # ŭ Ŭ   
-    0x016f, 499,  # ů Ů   
-    0x0171, 499,  # ű Ű   
-    0x0173, 499,  # ų Ų   
-    0x0175, 499,  # ŵ Ŵ   
-    0x0177, 499,  # ŷ Ŷ   
-    0x017a, 499,  # ź Ź   
-    0x017c, 499,  # ż Ż   
-    0x017e, 499,  # ž Ž   
-    0x017f, 200,  # ſ S   
-    0x0183, 499,  # ƃ Ƃ   
-    0x0185, 499,  # ƅ Ƅ   
-    0x0188, 499,  # ƈ Ƈ   
-    0x018c, 499,  # ƌ Ƌ   
-    0x0192, 499,  # ƒ Ƒ   
-    0x0199, 499,  # ƙ Ƙ   
-    0x01a1, 499,  # ơ Ơ   
-    0x01a3, 499,  # ƣ Ƣ   
-    0x01a5, 499,  # ƥ Ƥ   
-    0x01a8, 499,  # ƨ Ƨ   
-    0x01ad, 499,  # ƭ Ƭ   
-    0x01b0, 499,  # ư Ư   
-    0x01b4, 499,  # ƴ Ƴ   
-    0x01b6, 499,  # ƶ Ƶ   
-    0x01b9, 499,  # ƹ Ƹ   
-    0x01bd, 499,  # ƽ Ƽ   
-    0x01c5, 499,  # Dž DŽ   
-    0x01c6, 498,  # dž DŽ   
-    0x01c8, 499,  # Lj LJ   
-    0x01c9, 498,  # lj LJ   
-    0x01cb, 499,  # Nj NJ   
-    0x01cc, 498,  # nj NJ   
-    0x01ce, 499,  # ǎ Ǎ   
-    0x01d0, 499,  # ǐ Ǐ   
-    0x01d2, 499,  # ǒ Ǒ   
-    0x01d4, 499,  # ǔ Ǔ   
-    0x01d6, 499,  # ǖ Ǖ   
-    0x01d8, 499,  # ǘ Ǘ   
-    0x01da, 499,  # ǚ Ǚ   
-    0x01dc, 499,  # ǜ Ǜ   
-    0x01df, 499,  # ǟ Ǟ   
-    0x01e1, 499,  # ǡ Ǡ   
-    0x01e3, 499,  # ǣ Ǣ   
-    0x01e5, 499,  # ǥ Ǥ   
-    0x01e7, 499,  # ǧ Ǧ   
-    0x01e9, 499,  # ǩ Ǩ   
-    0x01eb, 499,  # ǫ Ǫ   
-    0x01ed, 499,  # ǭ Ǭ   
-    0x01ef, 499,  # ǯ Ǯ   
-    0x01f2, 499,  # Dz DZ   
-    0x01f3, 498,  # dz DZ   
-    0x01f5, 499,  # ǵ Ǵ   
-    0x01fb, 499,  # ǻ Ǻ   
-    0x01fd, 499,  # ǽ Ǽ   
-    0x01ff, 499,  # ǿ Ǿ   
-    0x0201, 499,  # ȁ Ȁ   
-    0x0203, 499,  # ȃ Ȃ   
-    0x0205, 499,  # ȅ Ȅ   
-    0x0207, 499,  # ȇ Ȇ   
-    0x0209, 499,  # ȉ Ȉ   
-    0x020b, 499,  # ȋ Ȋ   
-    0x020d, 499,  # ȍ Ȍ   
-    0x020f, 499,  # ȏ Ȏ   
-    0x0211, 499,  # ȑ Ȑ   
-    0x0213, 499,  # ȓ Ȓ   
-    0x0215, 499,  # ȕ Ȕ   
-    0x0217, 499,  # ȗ Ȗ   
-    0x0253, 290,  # ɓ Ɓ   
-    0x0254, 294,  # ɔ Ɔ   
-    0x025b, 297,  # ɛ Ɛ   
-    0x0260, 295,  # ɠ Ɠ   
-    0x0263, 293,  # ɣ Ɣ   
-    0x0268, 291,  # ɨ Ɨ   
-    0x0269, 289,  # ɩ Ɩ   
-    0x026f, 289,  # ɯ Ɯ   
-    0x0272, 287,  # ɲ Ɲ   
-    0x0283, 282,  # ʃ Ʃ   
-    0x0288, 282,  # ʈ Ʈ   
-    0x0292, 281,  # ʒ Ʒ   
-    0x03ac, 462,  # ά Ά   
-    0x03cc, 436,  # ό Ό   
-    0x03d0, 438,  # ϐ Β   
-    0x03d1, 443,  # ϑ Θ   
-    0x03d5, 453,  # ϕ Φ   
-    0x03d6, 446,  # ϖ Π   
-    0x03e3, 499,  # ϣ Ϣ   
-    0x03e5, 499,  # ϥ Ϥ   
-    0x03e7, 499,  # ϧ Ϧ   
-    0x03e9, 499,  # ϩ Ϩ   
-    0x03eb, 499,  # ϫ Ϫ   
-    0x03ed, 499,  # ϭ Ϭ   
-    0x03ef, 499,  # ϯ Ϯ   
-    0x03f0, 414,  # ϰ Κ   
-    0x03f1, 420,  # ϱ Ρ   
-    0x0461, 499,  # ѡ Ѡ   
-    0x0463, 499,  # ѣ Ѣ   
-    0x0465, 499,  # ѥ Ѥ   
-    0x0467, 499,  # ѧ Ѧ   
-    0x0469, 499,  # ѩ Ѩ   
-    0x046b, 499,  # ѫ Ѫ   
-    0x046d, 499,  # ѭ Ѭ   
-    0x046f, 499,  # ѯ Ѯ   
-    0x0471, 499,  # ѱ Ѱ   
-    0x0473, 499,  # ѳ Ѳ   
-    0x0475, 499,  # ѵ Ѵ   
-    0x0477, 499,  # ѷ Ѷ   
-    0x0479, 499,  # ѹ Ѹ   
-    0x047b, 499,  # ѻ Ѻ   
-    0x047d, 499,  # ѽ Ѽ   
-    0x047f, 499,  # ѿ Ѿ   
-    0x0481, 499,  # ҁ Ҁ   
-    0x0491, 499,  # ґ Ґ   
-    0x0493, 499,  # ғ Ғ   
-    0x0495, 499,  # ҕ Ҕ   
-    0x0497, 499,  # җ Җ   
-    0x0499, 499,  # ҙ Ҙ   
-    0x049b, 499,  # қ Қ   
-    0x049d, 499,  # ҝ Ҝ   
-    0x049f, 499,  # ҟ Ҟ   
-    0x04a1, 499,  # ҡ Ҡ   
-    0x04a3, 499,  # ң Ң   
-    0x04a5, 499,  # ҥ Ҥ   
-    0x04a7, 499,  # ҧ Ҧ   
-    0x04a9, 499,  # ҩ Ҩ   
-    0x04ab, 499,  # ҫ Ҫ   
-    0x04ad, 499,  # ҭ Ҭ   
-    0x04af, 499,  # ү Ү   
-    0x04b1, 499,  # ұ Ұ   
-    0x04b3, 499,  # ҳ Ҳ   
-    0x04b5, 499,  # ҵ Ҵ   
-    0x04b7, 499,  # ҷ Ҷ   
-    0x04b9, 499,  # ҹ Ҹ   
-    0x04bb, 499,  # һ Һ   
-    0x04bd, 499,  # ҽ Ҽ   
-    0x04bf, 499,  # ҿ Ҿ   
-    0x04c2, 499,  # ӂ Ӂ   
-    0x04c4, 499,  # ӄ Ӄ   
-    0x04c8, 499,  # ӈ Ӈ   
-    0x04cc, 499,  # ӌ Ӌ   
-    0x04d1, 499,  # ӑ Ӑ   
-    0x04d3, 499,  # ӓ Ӓ   
-    0x04d5, 499,  # ӕ Ӕ   
-    0x04d7, 499,  # ӗ Ӗ   
-    0x04d9, 499,  # ә Ә   
-    0x04db, 499,  # ӛ Ӛ   
-    0x04dd, 499,  # ӝ Ӝ   
-    0x04df, 499,  # ӟ Ӟ   
-    0x04e1, 499,  # ӡ Ӡ   
-    0x04e3, 499,  # ӣ Ӣ   
-    0x04e5, 499,  # ӥ Ӥ   
-    0x04e7, 499,  # ӧ Ӧ   
-    0x04e9, 499,  # ө Ө   
-    0x04eb, 499,  # ӫ Ӫ   
-    0x04ef, 499,  # ӯ Ӯ   
-    0x04f1, 499,  # ӱ Ӱ   
-    0x04f3, 499,  # ӳ Ӳ   
-    0x04f5, 499,  # ӵ Ӵ   
-    0x04f9, 499,  # ӹ Ӹ   
-    0x1e01, 499,  # ḁ Ḁ   
-    0x1e03, 499,  # ḃ Ḃ   
-    0x1e05, 499,  # ḅ Ḅ   
-    0x1e07, 499,  # ḇ Ḇ   
-    0x1e09, 499,  # ḉ Ḉ   
-    0x1e0b, 499,  # ḋ Ḋ   
-    0x1e0d, 499,  # ḍ Ḍ   
-    0x1e0f, 499,  # ḏ Ḏ   
-    0x1e11, 499,  # ḑ Ḑ   
-    0x1e13, 499,  # ḓ Ḓ   
-    0x1e15, 499,  # ḕ Ḕ   
-    0x1e17, 499,  # ḗ Ḗ   
-    0x1e19, 499,  # ḙ Ḙ   
-    0x1e1b, 499,  # ḛ Ḛ   
-    0x1e1d, 499,  # ḝ Ḝ   
-    0x1e1f, 499,  # ḟ Ḟ   
-    0x1e21, 499,  # ḡ Ḡ   
-    0x1e23, 499,  # ḣ Ḣ   
-    0x1e25, 499,  # ḥ Ḥ   
-    0x1e27, 499,  # ḧ Ḧ   
-    0x1e29, 499,  # ḩ Ḩ   
-    0x1e2b, 499,  # ḫ Ḫ   
-    0x1e2d, 499,  # ḭ Ḭ   
-    0x1e2f, 499,  # ḯ Ḯ   
-    0x1e31, 499,  # ḱ Ḱ   
-    0x1e33, 499,  # ḳ Ḳ   
-    0x1e35, 499,  # ḵ Ḵ   
-    0x1e37, 499,  # ḷ Ḷ   
-    0x1e39, 499,  # ḹ Ḹ   
-    0x1e3b, 499,  # ḻ Ḻ   
-    0x1e3d, 499,  # ḽ Ḽ   
-    0x1e3f, 499,  # ḿ Ḿ   
-    0x1e41, 499,  # ṁ Ṁ   
-    0x1e43, 499,  # ṃ Ṃ   
-    0x1e45, 499,  # ṅ Ṅ   
-    0x1e47, 499,  # ṇ Ṇ   
-    0x1e49, 499,  # ṉ Ṉ   
-    0x1e4b, 499,  # ṋ Ṋ   
-    0x1e4d, 499,  # ṍ Ṍ   
-    0x1e4f, 499,  # ṏ Ṏ   
-    0x1e51, 499,  # ṑ Ṑ   
-    0x1e53, 499,  # ṓ Ṓ   
-    0x1e55, 499,  # ṕ Ṕ   
-    0x1e57, 499,  # ṗ Ṗ   
-    0x1e59, 499,  # ṙ Ṙ   
-    0x1e5b, 499,  # ṛ Ṛ   
-    0x1e5d, 499,  # ṝ Ṝ   
-    0x1e5f, 499,  # ṟ Ṟ   
-    0x1e61, 499,  # ṡ Ṡ   
-    0x1e63, 499,  # ṣ Ṣ   
-    0x1e65, 499,  # ṥ Ṥ   
-    0x1e67, 499,  # ṧ Ṧ   
-    0x1e69, 499,  # ṩ Ṩ   
-    0x1e6b, 499,  # ṫ Ṫ   
-    0x1e6d, 499,  # ṭ Ṭ   
-    0x1e6f, 499,  # ṯ Ṯ   
-    0x1e71, 499,  # ṱ Ṱ   
-    0x1e73, 499,  # ṳ Ṳ   
-    0x1e75, 499,  # ṵ Ṵ   
-    0x1e77, 499,  # ṷ Ṷ   
-    0x1e79, 499,  # ṹ Ṹ   
-    0x1e7b, 499,  # ṻ Ṻ   
-    0x1e7d, 499,  # ṽ Ṽ   
-    0x1e7f, 499,  # ṿ Ṿ   
-    0x1e81, 499,  # ẁ Ẁ   
-    0x1e83, 499,  # ẃ Ẃ   
-    0x1e85, 499,  # ẅ Ẅ   
-    0x1e87, 499,  # ẇ Ẇ   
-    0x1e89, 499,  # ẉ Ẉ   
-    0x1e8b, 499,  # ẋ Ẋ   
-    0x1e8d, 499,  # ẍ Ẍ   
-    0x1e8f, 499,  # ẏ Ẏ   
-    0x1e91, 499,  # ẑ Ẑ   
-    0x1e93, 499,  # ẓ Ẓ   
-    0x1e95, 499,  # ẕ Ẕ   
-    0x1ea1, 499,  # ạ Ạ   
-    0x1ea3, 499,  # ả Ả   
-    0x1ea5, 499,  # ấ Ấ   
-    0x1ea7, 499,  # ầ Ầ   
-    0x1ea9, 499,  # ẩ Ẩ   
-    0x1eab, 499,  # ẫ Ẫ   
-    0x1ead, 499,  # ậ Ậ   
-    0x1eaf, 499,  # ắ Ắ   
-    0x1eb1, 499,  # ằ Ằ   
-    0x1eb3, 499,  # ẳ Ẳ   
-    0x1eb5, 499,  # ẵ Ẵ   
-    0x1eb7, 499,  # ặ Ặ   
-    0x1eb9, 499,  # ẹ Ẹ   
-    0x1ebb, 499,  # ẻ Ẻ   
-    0x1ebd, 499,  # ẽ Ẽ   
-    0x1ebf, 499,  # ế Ế   
-    0x1ec1, 499,  # ề Ề   
-    0x1ec3, 499,  # ể Ể   
-    0x1ec5, 499,  # ễ Ễ   
-    0x1ec7, 499,  # ệ Ệ   
-    0x1ec9, 499,  # ỉ Ỉ   
-    0x1ecb, 499,  # ị Ị   
-    0x1ecd, 499,  # ọ Ọ   
-    0x1ecf, 499,  # ỏ Ỏ   
-    0x1ed1, 499,  # ố Ố   
-    0x1ed3, 499,  # ồ Ồ   
-    0x1ed5, 499,  # ổ Ổ   
-    0x1ed7, 499,  # ỗ Ỗ   
-    0x1ed9, 499,  # ộ Ộ   
-    0x1edb, 499,  # ớ Ớ   
-    0x1edd, 499,  # ờ Ờ   
-    0x1edf, 499,  # ở Ở   
-    0x1ee1, 499,  # ỡ Ỡ   
-    0x1ee3, 499,  # ợ Ợ   
-    0x1ee5, 499,  # ụ Ụ   
-    0x1ee7, 499,  # ủ Ủ   
-    0x1ee9, 499,  # ứ Ứ   
-    0x1eeb, 499,  # ừ Ừ   
-    0x1eed, 499,  # ử Ử   
-    0x1eef, 499,  # ữ Ữ   
-    0x1ef1, 499,  # ự Ự   
-    0x1ef3, 499,  # ỳ Ỳ   
-    0x1ef5, 499,  # ỵ Ỵ   
-    0x1ef7, 499,  # ỷ Ỷ   
-    0x1ef9, 499,  # ỹ Ỹ   
-    0x1f51, 508,  # ὑ Ὑ   
-    0x1f53, 508,  # ὓ Ὓ   
-    0x1f55, 508,  # ὕ Ὕ   
-    0x1f57, 508,  # ὗ Ὗ   
-    0x1fb3, 509,  # ᾳ ᾼ   
-    0x1fc3, 509,  # ῃ ῌ   
-    0x1fe5, 507,  # ῥ Ῥ   
-    0x1ff3, 509]  # ῳ ῼ   
-
-  tolowerRanges = [
-    0x0041,  0x005a, 532,  # A-Z a-z
-    0x00c0,  0x00d6, 532,  # À-Ö à-ö   
-    0x00d8,  0x00de, 532,  # Ø-Þ ø-þ   
-    0x0189,  0x018a, 705,  # Ɖ-Ɗ ɖ-ɗ   
-    0x018e,  0x018f, 702,  # Ǝ-Ə ɘ-ə   
-    0x01b1,  0x01b2, 717,  # Ʊ-Ʋ ʊ-ʋ   
-    0x0388,  0x038a, 537,  # Έ-Ί έ-ί   
-    0x038e,  0x038f, 563,  # Ύ-Ώ ύ-ώ   
-    0x0391,  0x03a1, 532,  # Α-Ρ α-ρ   
-    0x03a3,  0x03ab, 532,  # Σ-Ϋ σ-ϋ   
-    0x0401,  0x040c, 580,  # Ё-Ќ ё-ќ   
-    0x040e,  0x040f, 580,  # Ў-Џ ў-џ   
-    0x0410,  0x042f, 532,  # А-Я а-я   
-    0x0531,  0x0556, 548,  # Ա-Ֆ ա-ֆ   
-    0x10a0,  0x10c5, 548,  # Ⴀ-Ⴥ ა-ჵ   
-    0x1f08,  0x1f0f, 492,  # Ἀ-Ἇ ἀ-ἇ   
-    0x1f18,  0x1f1d, 492,  # Ἐ-Ἕ ἐ-ἕ   
-    0x1f28,  0x1f2f, 492,  # Ἠ-Ἧ ἠ-ἧ   
-    0x1f38,  0x1f3f, 492,  # Ἰ-Ἷ ἰ-ἷ   
-    0x1f48,  0x1f4d, 492,  # Ὀ-Ὅ ὀ-ὅ   
-    0x1f68,  0x1f6f, 492,  # Ὠ-Ὧ ὠ-ὧ   
-    0x1f88,  0x1f8f, 492,  # ᾈ-ᾏ ᾀ-ᾇ   
-    0x1f98,  0x1f9f, 492,  # ᾘ-ᾟ ᾐ-ᾗ   
-    0x1fa8,  0x1faf, 492,  # ᾨ-ᾯ ᾠ-ᾧ   
-    0x1fb8,  0x1fb9, 492,  # Ᾰ-Ᾱ ᾰ-ᾱ   
-    0x1fba,  0x1fbb, 426,  # Ὰ-Ά ὰ-ά   
-    0x1fc8,  0x1fcb, 414,  # Ὲ-Ή ὲ-ή   
-    0x1fd8,  0x1fd9, 492,  # Ῐ-Ῑ ῐ-ῑ   
-    0x1fda,  0x1fdb, 400,  # Ὶ-Ί ὶ-ί   
-    0x1fe8,  0x1fe9, 492,  # Ῠ-Ῡ ῠ-ῡ   
-    0x1fea,  0x1feb, 388,  # Ὺ-Ύ ὺ-ύ   
-    0x1ff8,  0x1ff9, 372,  # Ὸ-Ό ὸ-ό   
-    0x1ffa,  0x1ffb, 374,  # Ὼ-Ώ ὼ-ώ   
-    0x2160,  0x216f, 516,  # Ⅰ-Ⅿ ⅰ-ⅿ   
-    0x24b6,  0x24cf, 526,  # Ⓐ-Ⓩ ⓐ-ⓩ   
-    0xff21,  0xff3a, 532]  # A-Z a-z   
-
-  tolowerSinglets = [
-    0x0100, 501,  # Ā ā   
-    0x0102, 501,  # Ă ă   
-    0x0104, 501,  # Ą ą   
-    0x0106, 501,  # Ć ć   
-    0x0108, 501,  # Ĉ ĉ   
-    0x010a, 501,  # Ċ ċ   
-    0x010c, 501,  # Č č   
-    0x010e, 501,  # Ď ď   
-    0x0110, 501,  # Đ đ   
-    0x0112, 501,  # Ē ē   
-    0x0114, 501,  # Ĕ ĕ   
-    0x0116, 501,  # Ė ė   
-    0x0118, 501,  # Ę ę   
-    0x011a, 501,  # Ě ě   
-    0x011c, 501,  # Ĝ ĝ   
-    0x011e, 501,  # Ğ ğ   
-    0x0120, 501,  # Ġ ġ   
-    0x0122, 501,  # Ģ ģ   
-    0x0124, 501,  # Ĥ ĥ   
-    0x0126, 501,  # Ħ ħ   
-    0x0128, 501,  # Ĩ ĩ   
-    0x012a, 501,  # Ī ī   
-    0x012c, 501,  # Ĭ ĭ   
-    0x012e, 501,  # Į į   
-    0x0130, 301,  # İ i   
-    0x0132, 501,  # IJ ij   
-    0x0134, 501,  # Ĵ ĵ   
-    0x0136, 501,  # Ķ ķ   
-    0x0139, 501,  # Ĺ ĺ   
-    0x013b, 501,  # Ļ ļ   
-    0x013d, 501,  # Ľ ľ   
-    0x013f, 501,  # Ŀ ŀ   
-    0x0141, 501,  # Ł ł   
-    0x0143, 501,  # Ń ń   
-    0x0145, 501,  # Ņ ņ   
-    0x0147, 501,  # Ň ň   
-    0x014a, 501,  # Ŋ ŋ   
-    0x014c, 501,  # Ō ō   
-    0x014e, 501,  # Ŏ ŏ   
-    0x0150, 501,  # Ő ő   
-    0x0152, 501,  # Œ œ   
-    0x0154, 501,  # Ŕ ŕ   
-    0x0156, 501,  # Ŗ ŗ   
-    0x0158, 501,  # Ř ř   
-    0x015a, 501,  # Ś ś   
-    0x015c, 501,  # Ŝ ŝ   
-    0x015e, 501,  # Ş ş   
-    0x0160, 501,  # Š š   
-    0x0162, 501,  # Ţ ţ   
-    0x0164, 501,  # Ť ť   
-    0x0166, 501,  # Ŧ ŧ   
-    0x0168, 501,  # Ũ ũ   
-    0x016a, 501,  # Ū ū   
-    0x016c, 501,  # Ŭ ŭ   
-    0x016e, 501,  # Ů ů   
-    0x0170, 501,  # Ű ű   
-    0x0172, 501,  # Ų ų   
-    0x0174, 501,  # Ŵ ŵ   
-    0x0176, 501,  # Ŷ ŷ   
-    0x0178, 379,  # Ÿ ÿ   
-    0x0179, 501,  # Ź ź   
-    0x017b, 501,  # Ż ż   
-    0x017d, 501,  # Ž ž   
-    0x0181, 710,  # Ɓ ɓ   
-    0x0182, 501,  # Ƃ ƃ   
-    0x0184, 501,  # Ƅ ƅ   
-    0x0186, 706,  # Ɔ ɔ   
-    0x0187, 501,  # Ƈ ƈ   
-    0x018b, 501,  # Ƌ ƌ   
-    0x0190, 703,  # Ɛ ɛ   
-    0x0191, 501,  # Ƒ ƒ   
-    0x0193, 705,  # Ɠ ɠ   
-    0x0194, 707,  # Ɣ ɣ   
-    0x0196, 711,  # Ɩ ɩ   
-    0x0197, 709,  # Ɨ ɨ   
-    0x0198, 501,  # Ƙ ƙ   
-    0x019c, 711,  # Ɯ ɯ   
-    0x019d, 713,  # Ɲ ɲ   
-    0x01a0, 501,  # Ơ ơ   
-    0x01a2, 501,  # Ƣ ƣ   
-    0x01a4, 501,  # Ƥ ƥ   
-    0x01a7, 501,  # Ƨ ƨ   
-    0x01a9, 718,  # Ʃ ʃ   
-    0x01ac, 501,  # Ƭ ƭ   
-    0x01ae, 718,  # Ʈ ʈ   
-    0x01af, 501,  # Ư ư   
-    0x01b3, 501,  # Ƴ ƴ   
-    0x01b5, 501,  # Ƶ ƶ   
-    0x01b7, 719,  # Ʒ ʒ   
-    0x01b8, 501,  # Ƹ ƹ   
-    0x01bc, 501,  # Ƽ ƽ   
-    0x01c4, 502,  # DŽ dž   
-    0x01c5, 501,  # Dž dž   
-    0x01c7, 502,  # LJ lj   
-    0x01c8, 501,  # Lj lj   
-    0x01ca, 502,  # NJ nj   
-    0x01cb, 501,  # Nj nj   
-    0x01cd, 501,  # Ǎ ǎ   
-    0x01cf, 501,  # Ǐ ǐ   
-    0x01d1, 501,  # Ǒ ǒ   
-    0x01d3, 501,  # Ǔ ǔ   
-    0x01d5, 501,  # Ǖ ǖ   
-    0x01d7, 501,  # Ǘ ǘ   
-    0x01d9, 501,  # Ǚ ǚ   
-    0x01db, 501,  # Ǜ ǜ   
-    0x01de, 501,  # Ǟ ǟ   
-    0x01e0, 501,  # Ǡ ǡ   
-    0x01e2, 501,  # Ǣ ǣ   
-    0x01e4, 501,  # Ǥ ǥ   
-    0x01e6, 501,  # Ǧ ǧ   
-    0x01e8, 501,  # Ǩ ǩ   
-    0x01ea, 501,  # Ǫ ǫ   
-    0x01ec, 501,  # Ǭ ǭ   
-    0x01ee, 501,  # Ǯ ǯ   
-    0x01f1, 502,  # DZ dz   
-    0x01f2, 501,  # Dz dz   
-    0x01f4, 501,  # Ǵ ǵ   
-    0x01fa, 501,  # Ǻ ǻ   
-    0x01fc, 501,  # Ǽ ǽ   
-    0x01fe, 501,  # Ǿ ǿ   
-    0x0200, 501,  # Ȁ ȁ   
-    0x0202, 501,  # Ȃ ȃ   
-    0x0204, 501,  # Ȅ ȅ   
-    0x0206, 501,  # Ȇ ȇ   
-    0x0208, 501,  # Ȉ ȉ   
-    0x020a, 501,  # Ȋ ȋ   
-    0x020c, 501,  # Ȍ ȍ   
-    0x020e, 501,  # Ȏ ȏ   
-    0x0210, 501,  # Ȑ ȑ   
-    0x0212, 501,  # Ȓ ȓ   
-    0x0214, 501,  # Ȕ ȕ   
-    0x0216, 501,  # Ȗ ȗ   
-    0x0386, 538,  # Ά ά   
-    0x038c, 564,  # Ό ό   
-    0x03e2, 501,  # Ϣ ϣ   
-    0x03e4, 501,  # Ϥ ϥ   
-    0x03e6, 501,  # Ϧ ϧ   
-    0x03e8, 501,  # Ϩ ϩ   
-    0x03ea, 501,  # Ϫ ϫ   
-    0x03ec, 501,  # Ϭ ϭ   
-    0x03ee, 501,  # Ϯ ϯ   
-    0x0460, 501,  # Ѡ ѡ   
-    0x0462, 501,  # Ѣ ѣ   
-    0x0464, 501,  # Ѥ ѥ   
-    0x0466, 501,  # Ѧ ѧ   
-    0x0468, 501,  # Ѩ ѩ   
-    0x046a, 501,  # Ѫ ѫ   
-    0x046c, 501,  # Ѭ ѭ   
-    0x046e, 501,  # Ѯ ѯ   
-    0x0470, 501,  # Ѱ ѱ   
-    0x0472, 501,  # Ѳ ѳ   
-    0x0474, 501,  # Ѵ ѵ   
-    0x0476, 501,  # Ѷ ѷ   
-    0x0478, 501,  # Ѹ ѹ   
-    0x047a, 501,  # Ѻ ѻ   
-    0x047c, 501,  # Ѽ ѽ   
-    0x047e, 501,  # Ѿ ѿ   
-    0x0480, 501,  # Ҁ ҁ   
-    0x0490, 501,  # Ґ ґ   
-    0x0492, 501,  # Ғ ғ   
-    0x0494, 501,  # Ҕ ҕ   
-    0x0496, 501,  # Җ җ   
-    0x0498, 501,  # Ҙ ҙ   
-    0x049a, 501,  # Қ қ   
-    0x049c, 501,  # Ҝ ҝ   
-    0x049e, 501,  # Ҟ ҟ   
-    0x04a0, 501,  # Ҡ ҡ   
-    0x04a2, 501,  # Ң ң   
-    0x04a4, 501,  # Ҥ ҥ   
-    0x04a6, 501,  # Ҧ ҧ   
-    0x04a8, 501,  # Ҩ ҩ   
-    0x04aa, 501,  # Ҫ ҫ   
-    0x04ac, 501,  # Ҭ ҭ   
-    0x04ae, 501,  # Ү ү   
-    0x04b0, 501,  # Ұ ұ   
-    0x04b2, 501,  # Ҳ ҳ   
-    0x04b4, 501,  # Ҵ ҵ   
-    0x04b6, 501,  # Ҷ ҷ   
-    0x04b8, 501,  # Ҹ ҹ   
-    0x04ba, 501,  # Һ һ   
-    0x04bc, 501,  # Ҽ ҽ   
-    0x04be, 501,  # Ҿ ҿ   
-    0x04c1, 501,  # Ӂ ӂ   
-    0x04c3, 501,  # Ӄ ӄ   
-    0x04c7, 501,  # Ӈ ӈ   
-    0x04cb, 501,  # Ӌ ӌ   
-    0x04d0, 501,  # Ӑ ӑ   
-    0x04d2, 501,  # Ӓ ӓ   
-    0x04d4, 501,  # Ӕ ӕ   
-    0x04d6, 501,  # Ӗ ӗ   
-    0x04d8, 501,  # Ә ә   
-    0x04da, 501,  # Ӛ ӛ   
-    0x04dc, 501,  # Ӝ ӝ   
-    0x04de, 501,  # Ӟ ӟ   
-    0x04e0, 501,  # Ӡ ӡ   
-    0x04e2, 501,  # Ӣ ӣ   
-    0x04e4, 501,  # Ӥ ӥ   
-    0x04e6, 501,  # Ӧ ӧ   
-    0x04e8, 501,  # Ө ө   
-    0x04ea, 501,  # Ӫ ӫ   
-    0x04ee, 501,  # Ӯ ӯ   
-    0x04f0, 501,  # Ӱ ӱ   
-    0x04f2, 501,  # Ӳ ӳ   
-    0x04f4, 501,  # Ӵ ӵ   
-    0x04f8, 501,  # Ӹ ӹ   
-    0x1e00, 501,  # Ḁ ḁ   
-    0x1e02, 501,  # Ḃ ḃ   
-    0x1e04, 501,  # Ḅ ḅ   
-    0x1e06, 501,  # Ḇ ḇ   
-    0x1e08, 501,  # Ḉ ḉ   
-    0x1e0a, 501,  # Ḋ ḋ   
-    0x1e0c, 501,  # Ḍ ḍ   
-    0x1e0e, 501,  # Ḏ ḏ   
-    0x1e10, 501,  # Ḑ ḑ   
-    0x1e12, 501,  # Ḓ ḓ   
-    0x1e14, 501,  # Ḕ ḕ   
-    0x1e16, 501,  # Ḗ ḗ   
-    0x1e18, 501,  # Ḙ ḙ   
-    0x1e1a, 501,  # Ḛ ḛ   
-    0x1e1c, 501,  # Ḝ ḝ   
-    0x1e1e, 501,  # Ḟ ḟ   
-    0x1e20, 501,  # Ḡ ḡ   
-    0x1e22, 501,  # Ḣ ḣ   
-    0x1e24, 501,  # Ḥ ḥ   
-    0x1e26, 501,  # Ḧ ḧ   
-    0x1e28, 501,  # Ḩ ḩ   
-    0x1e2a, 501,  # Ḫ ḫ   
-    0x1e2c, 501,  # Ḭ ḭ   
-    0x1e2e, 501,  # Ḯ ḯ   
-    0x1e30, 501,  # Ḱ ḱ   
-    0x1e32, 501,  # Ḳ ḳ   
-    0x1e34, 501,  # Ḵ ḵ   
-    0x1e36, 501,  # Ḷ ḷ   
-    0x1e38, 501,  # Ḹ ḹ   
-    0x1e3a, 501,  # Ḻ ḻ   
-    0x1e3c, 501,  # Ḽ ḽ   
-    0x1e3e, 501,  # Ḿ ḿ   
-    0x1e40, 501,  # Ṁ ṁ   
-    0x1e42, 501,  # Ṃ ṃ   
-    0x1e44, 501,  # Ṅ ṅ   
-    0x1e46, 501,  # Ṇ ṇ   
-    0x1e48, 501,  # Ṉ ṉ   
-    0x1e4a, 501,  # Ṋ ṋ   
-    0x1e4c, 501,  # Ṍ ṍ   
-    0x1e4e, 501,  # Ṏ ṏ   
-    0x1e50, 501,  # Ṑ ṑ   
-    0x1e52, 501,  # Ṓ ṓ   
-    0x1e54, 501,  # Ṕ ṕ   
-    0x1e56, 501,  # Ṗ ṗ   
-    0x1e58, 501,  # Ṙ ṙ   
-    0x1e5a, 501,  # Ṛ ṛ   
-    0x1e5c, 501,  # Ṝ ṝ   
-    0x1e5e, 501,  # Ṟ ṟ   
-    0x1e60, 501,  # Ṡ ṡ   
-    0x1e62, 501,  # Ṣ ṣ   
-    0x1e64, 501,  # Ṥ ṥ   
-    0x1e66, 501,  # Ṧ ṧ   
-    0x1e68, 501,  # Ṩ ṩ   
-    0x1e6a, 501,  # Ṫ ṫ   
-    0x1e6c, 501,  # Ṭ ṭ   
-    0x1e6e, 501,  # Ṯ ṯ   
-    0x1e70, 501,  # Ṱ ṱ   
-    0x1e72, 501,  # Ṳ ṳ   
-    0x1e74, 501,  # Ṵ ṵ   
-    0x1e76, 501,  # Ṷ ṷ   
-    0x1e78, 501,  # Ṹ ṹ   
-    0x1e7a, 501,  # Ṻ ṻ   
-    0x1e7c, 501,  # Ṽ ṽ   
-    0x1e7e, 501,  # Ṿ ṿ   
-    0x1e80, 501,  # Ẁ ẁ   
-    0x1e82, 501,  # Ẃ ẃ   
-    0x1e84, 501,  # Ẅ ẅ   
-    0x1e86, 501,  # Ẇ ẇ   
-    0x1e88, 501,  # Ẉ ẉ   
-    0x1e8a, 501,  # Ẋ ẋ   
-    0x1e8c, 501,  # Ẍ ẍ   
-    0x1e8e, 501,  # Ẏ ẏ   
-    0x1e90, 501,  # Ẑ ẑ   
-    0x1e92, 501,  # Ẓ ẓ   
-    0x1e94, 501,  # Ẕ ẕ   
-    0x1ea0, 501,  # Ạ ạ   
-    0x1ea2, 501,  # Ả ả   
-    0x1ea4, 501,  # Ấ ấ   
-    0x1ea6, 501,  # Ầ ầ   
-    0x1ea8, 501,  # Ẩ ẩ   
-    0x1eaa, 501,  # Ẫ ẫ   
-    0x1eac, 501,  # Ậ ậ   
-    0x1eae, 501,  # Ắ ắ   
-    0x1eb0, 501,  # Ằ ằ   
-    0x1eb2, 501,  # Ẳ ẳ   
-    0x1eb4, 501,  # Ẵ ẵ   
-    0x1eb6, 501,  # Ặ ặ   
-    0x1eb8, 501,  # Ẹ ẹ   
-    0x1eba, 501,  # Ẻ ẻ   
-    0x1ebc, 501,  # Ẽ ẽ   
-    0x1ebe, 501,  # Ế ế   
-    0x1ec0, 501,  # Ề ề   
-    0x1ec2, 501,  # Ể ể   
-    0x1ec4, 501,  # Ễ ễ   
-    0x1ec6, 501,  # Ệ ệ   
-    0x1ec8, 501,  # Ỉ ỉ   
-    0x1eca, 501,  # Ị ị   
-    0x1ecc, 501,  # Ọ ọ   
-    0x1ece, 501,  # Ỏ ỏ   
-    0x1ed0, 501,  # Ố ố   
-    0x1ed2, 501,  # Ồ ồ   
-    0x1ed4, 501,  # Ổ ổ   
-    0x1ed6, 501,  # Ỗ ỗ   
-    0x1ed8, 501,  # Ộ ộ   
-    0x1eda, 501,  # Ớ ớ   
-    0x1edc, 501,  # Ờ ờ   
-    0x1ede, 501,  # Ở ở   
-    0x1ee0, 501,  # Ỡ ỡ   
-    0x1ee2, 501,  # Ợ ợ   
-    0x1ee4, 501,  # Ụ ụ   
-    0x1ee6, 501,  # Ủ ủ   
-    0x1ee8, 501,  # Ứ ứ   
-    0x1eea, 501,  # Ừ ừ   
-    0x1eec, 501,  # Ử ử   
-    0x1eee, 501,  # Ữ ữ   
-    0x1ef0, 501,  # Ự ự   
-    0x1ef2, 501,  # Ỳ ỳ   
-    0x1ef4, 501,  # Ỵ ỵ   
-    0x1ef6, 501,  # Ỷ ỷ   
-    0x1ef8, 501,  # Ỹ ỹ   
-    0x1f59, 492,  # Ὑ ὑ   
-    0x1f5b, 492,  # Ὓ ὓ   
-    0x1f5d, 492,  # Ὕ ὕ   
-    0x1f5f, 492,  # Ὗ ὗ   
-    0x1fbc, 491,  # ᾼ ᾳ   
-    0x1fcc, 491,  # ῌ ῃ   
-    0x1fec, 493,  # Ῥ ῥ   
-    0x1ffc, 491]  # ῼ ῳ   
-
-  toTitleSinglets = [
-    0x01c4, 501,  # DŽ Dž   
-    0x01c6, 499,  # dž Dž   
-    0x01c7, 501,  # LJ Lj   
-    0x01c9, 499,  # lj Lj   
-    0x01ca, 501,  # NJ Nj   
-    0x01cc, 499,  # nj Nj   
-    0x01f1, 501,  # DZ Dz   
-    0x01f3, 499]  # dz Dz   
-
-proc binarySearch(c: irune, tab: openArray[iRune], len, stride: int): int = 
+    let o = runeOffset(s, pos)
+    if o < 0:
+      result = ""
+    elif len == int.high:
+      result = s.substr(o, s.len-1)
+    elif len < 0:
+      let (e, rl) = runeReverseOffset(s, -len)
+      discard rl
+      if e <= 0:
+        result = ""
+      else:
+        result = s.substr(o, e-1)
+    else:
+      var e = runeOffset(s, len, o)
+      if e < 0:
+        e = s.len
+      result = s.substr(o, e-1)
+
+proc `<=%`*(a, b: Rune): bool =
+  ## Checks if code point of `a` is smaller or equal to code point of `b`.
+  runnableExamples:
+    let
+      a = "ú".runeAt(0)
+      b = "ü".runeAt(0)
+    doAssert a <=% b
+  return int(a) <=% int(b)
+
+proc `<%`*(a, b: Rune): bool =
+  ## Checks if code point of `a` is smaller than code point of `b`.
+  runnableExamples:
+    let
+      a = "ú".runeAt(0)
+      b = "ü".runeAt(0)
+    doAssert a <% b
+  return int(a) <% int(b)
+
+proc `==`*(a, b: Rune): bool =
+  ## Checks if two runes are equal.
+  return int(a) == int(b)
+
+
+include "includes/unicode_ranges"
+
+proc binarySearch(c: RuneImpl, tab: openArray[int32], len, stride: int): int =
   var n = len
   var t = 0
-  while n > 1: 
+  while n > 1:
     var m = n div 2
     var p = t + m*stride
     if c >= tab[p]:
@@ -1075,41 +479,65 @@ proc binarySearch(c: irune, tab: openArray[iRune], len, stride: int): int =
     return t
   return -1
 
-proc toLower*(c: TRune): TRune = 
-  ## Converts `c` into lower case. This works for any Unicode character.
-  ## If possible, prefer `toLower` over `toUpper`. 
-  var c = irune(c)
-  var p = binarySearch(c, tolowerRanges, len(toLowerRanges) div 3, 3)
-  if p >= 0 and c >= tolowerRanges[p] and c <= tolowerRanges[p+1]:
-    return TRune(c + tolowerRanges[p+2] - 500)
+proc toLower*(c: Rune): Rune {.rtl, extern: "nuc$1".} =
+  ## Converts ``c`` into lower case. This works for any rune.
+  ##
+  ## If possible, prefer ``toLower`` over ``toUpper``.
+  ##
+  ## See also:
+  ## * `toUpper proc <#toUpper,Rune>`_
+  ## * `toTitle proc <#toTitle,Rune>`_
+  ## * `isLower proc <#isLower,Rune>`_
+  var c = RuneImpl(c)
+  var p = binarySearch(c, toLowerRanges, len(toLowerRanges) div 3, 3)
+  if p >= 0 and c >= toLowerRanges[p] and c <= toLowerRanges[p+1]:
+    return Rune(c + toLowerRanges[p+2] - 500)
   p = binarySearch(c, toLowerSinglets, len(toLowerSinglets) div 2, 2)
   if p >= 0 and c == toLowerSinglets[p]:
-    return TRune(c + toLowerSinglets[p+1] - 500)
-  return TRune(c)
+    return Rune(c + toLowerSinglets[p+1] - 500)
+  return Rune(c)
 
-proc toUpper*(c: TRune): TRune = 
-  ## Converts `c` into upper case. This works for any Unicode character.
-  ## If possible, prefer `toLower` over `toUpper`. 
-  var c = irune(c)
+proc toUpper*(c: Rune): Rune {.rtl, extern: "nuc$1".} =
+  ## Converts ``c`` into upper case. This works for any rune.
+  ##
+  ## If possible, prefer ``toLower`` over ``toUpper``.
+  ##
+  ## See also:
+  ## * `toLower proc <#toLower,Rune>`_
+  ## * `toTitle proc <#toTitle,Rune>`_
+  ## * `isUpper proc <#isUpper,Rune>`_
+  var c = RuneImpl(c)
   var p = binarySearch(c, toUpperRanges, len(toUpperRanges) div 3, 3)
   if p >= 0 and c >= toUpperRanges[p] and c <= toUpperRanges[p+1]:
-    return TRune(c + toUpperRanges[p+2] - 500)
+    return Rune(c + toUpperRanges[p+2] - 500)
   p = binarySearch(c, toUpperSinglets, len(toUpperSinglets) div 2, 2)
   if p >= 0 and c == toUpperSinglets[p]:
-    return TRune(c + toUpperSinglets[p+1] - 500)
-  return TRune(c)
+    return Rune(c + toUpperSinglets[p+1] - 500)
+  return Rune(c)
 
-proc toTitle*(c: TRune): TRune = 
-  var c = irune(c)
+proc toTitle*(c: Rune): Rune {.rtl, extern: "nuc$1".} =
+  ## Converts ``c`` to title case.
+  ##
+  ## See also:
+  ## * `toLower proc <#toLower,Rune>`_
+  ## * `toUpper proc <#toUpper,Rune>`_
+  ## * `isTitle proc <#isTitle,Rune>`_
+  var c = RuneImpl(c)
   var p = binarySearch(c, toTitleSinglets, len(toTitleSinglets) div 2, 2)
   if p >= 0 and c == toTitleSinglets[p]:
-    return TRune(c + toTitleSinglets[p+1] - 500)
-  return TRune(c)
+    return Rune(c + toTitleSinglets[p+1] - 500)
+  return Rune(c)
 
-proc isLower*(c: TRune): bool = 
-  ## returns true iff `c` is a lower case Unicode character
-  ## If possible, prefer `isLower` over `isUpper`. 
-  var c = irune(c)
+proc isLower*(c: Rune): bool {.rtl, extern: "nuc$1".} =
+  ## Returns true if ``c`` is a lower case rune.
+  ##
+  ## If possible, prefer ``isLower`` over ``isUpper``.
+  ##
+  ## See also:
+  ## * `toLower proc <#toLower,Rune>`_
+  ## * `isUpper proc <#isUpper,Rune>`_
+  ## * `isTitle proc <#isTitle,Rune>`_
+  var c = RuneImpl(c)
   # Note: toUpperRanges is correct here!
   var p = binarySearch(c, toUpperRanges, len(toUpperRanges) div 3, 3)
   if p >= 0 and c >= toUpperRanges[p] and c <= toUpperRanges[p+1]:
@@ -1118,10 +546,18 @@ proc isLower*(c: TRune): bool =
   if p >= 0 and c == toUpperSinglets[p]:
     return true
 
-proc isUpper*(c: TRune): bool = 
-  ## returns true iff `c` is a upper case Unicode character
-  ## If possible, prefer `isLower` over `isUpper`. 
-  var c = irune(c)
+proc isUpper*(c: Rune): bool {.rtl, extern: "nuc$1".} =
+  ## Returns true if ``c`` is a upper case rune.
+  ##
+  ## If possible, prefer ``isLower`` over ``isUpper``.
+  ##
+  ## See also:
+  ## * `toUpper proc <#toUpper,Rune>`_
+  ## * `isLower proc <#isLower,Rune>`_
+  ## * `isTitle proc <#isTitle,Rune>`_
+  ## * `isAlpha proc <#isAlpha,Rune>`_
+  ## * `isWhiteSpace proc <#isWhiteSpace,Rune>`_
+  var c = RuneImpl(c)
   # Note: toLowerRanges is correct here!
   var p = binarySearch(c, toLowerRanges, len(toLowerRanges) div 3, 3)
   if p >= 0 and c >= toLowerRanges[p] and c <= toLowerRanges[p+1]:
@@ -1130,51 +566,950 @@ proc isUpper*(c: TRune): bool =
   if p >= 0 and c == toLowerSinglets[p]:
     return true
 
-proc isAlpha*(c: TRune): bool = 
-  ## returns true iff `c` is an *alpha* Unicode character (i.e. a letter)
-  if isUpper(c) or isLower(c): 
+proc isAlpha*(c: Rune): bool {.rtl, extern: "nuc$1".} =
+  ## Returns true if ``c`` is an *alpha* rune (i.e., a letter).
+  ##
+  ## See also:
+  ## * `isLower proc <#isLower,Rune>`_
+  ## * `isTitle proc <#isTitle,Rune>`_
+  ## * `isAlpha proc <#isAlpha,Rune>`_
+  ## * `isWhiteSpace proc <#isWhiteSpace,Rune>`_
+  ## * `isCombining proc <#isCombining,Rune>`_
+  if isUpper(c) or isLower(c):
     return true
-  var c = irune(c)
+  var c = RuneImpl(c)
   var p = binarySearch(c, alphaRanges, len(alphaRanges) div 2, 2)
   if p >= 0 and c >= alphaRanges[p] and c <= alphaRanges[p+1]:
     return true
   p = binarySearch(c, alphaSinglets, len(alphaSinglets), 1)
   if p >= 0 and c == alphaSinglets[p]:
     return true
-  
-proc isTitle*(c: TRune): bool = 
+
+proc isTitle*(c: Rune): bool {.rtl, extern: "nuc$1".} =
+  ## Returns true if ``c`` is a Unicode titlecase code point.
+  ##
+  ## See also:
+  ## * `toTitle proc <#toTitle,Rune>`_
+  ## * `isLower proc <#isLower,Rune>`_
+  ## * `isUpper proc <#isUpper,Rune>`_
+  ## * `isAlpha proc <#isAlpha,Rune>`_
+  ## * `isWhiteSpace proc <#isWhiteSpace,Rune>`_
   return isUpper(c) and isLower(c)
 
-proc isWhiteSpace*(c: TRune): bool = 
-  ## returns true iff `c` is a Unicode whitespace character
-  var c = irune(c)
+proc isWhiteSpace*(c: Rune): bool {.rtl, extern: "nuc$1".} =
+  ## Returns true if ``c`` is a Unicode whitespace code point.
+  ##
+  ## See also:
+  ## * `isLower proc <#isLower,Rune>`_
+  ## * `isUpper proc <#isUpper,Rune>`_
+  ## * `isTitle proc <#isTitle,Rune>`_
+  ## * `isAlpha proc <#isAlpha,Rune>`_
+  var c = RuneImpl(c)
   var p = binarySearch(c, spaceRanges, len(spaceRanges) div 2, 2)
   if p >= 0 and c >= spaceRanges[p] and c <= spaceRanges[p+1]:
     return true
 
-iterator runes*(s: string): TRune =
-  ## iterates over any unicode character of the string `s`.
+proc isCombining*(c: Rune): bool {.rtl, extern: "nuc$1".} =
+  ## Returns true if ``c`` is a Unicode combining code unit.
+  ##
+  ## See also:
+  ## * `isLower proc <#isLower,Rune>`_
+  ## * `isUpper proc <#isUpper,Rune>`_
+  ## * `isTitle proc <#isTitle,Rune>`_
+  ## * `isAlpha proc <#isAlpha,Rune>`_
+  var c = RuneImpl(c)
+
+  # Optimized to return false immediately for ASCII
+  return c >= 0x0300 and (c <= 0x036f or
+    (c >= 0x1ab0 and c <= 0x1aff) or
+    (c >= 0x1dc0 and c <= 0x1dff) or
+    (c >= 0x20d0 and c <= 0x20ff) or
+    (c >= 0xfe20 and c <= 0xfe2f))
+
+template runeCheck(s, runeProc) =
+  ## Common code for isAlpha and isSpace.
+  result = if len(s) == 0: false else: true
+  var
+    i = 0
+    rune: Rune
+  while i < len(s) and result:
+    fastRuneAt(s, i, rune, doInc = true)
+    result = runeProc(rune) and result
+
+proc isAlpha*(s: openArray[char]): bool {.noSideEffect,
+  rtl, extern: "nuc$1Str".} =
+  ## Returns true if ``s`` contains all alphabetic runes.
+  runnableExamples:
+    let a = "añyóng"
+    doAssert a.isAlpha
+  runeCheck(s, isAlpha)
+
+proc isSpace*(s: openArray[char]): bool {.noSideEffect,
+  rtl, extern: "nuc$1Str".} =
+  ## Returns true if ``s`` contains all whitespace runes.
+  runnableExamples:
+    let a = "\t\l \v\r\f"
+    doAssert a.isSpace
+  runeCheck(s, isWhiteSpace)
+
+
+template convertRune(s, runeProc) =
+  ## Convert runes in ``s`` using ``runeProc`` as the converter.
+  result = newString(len(s))
+  var
+    i = 0
+    resultIndex = 0
+    rune: Rune
+  while i < len(s):
+    fastRuneAt(s, i, rune, doInc = true)
+    rune = runeProc(rune)
+    fastToUTF8Copy(rune, result, resultIndex, doInc = true)
+
+proc toUpper*(s: openArray[char]): string {.noSideEffect,
+  rtl, extern: "nuc$1Str".} =
+  ## Converts ``s`` into upper-case runes.
+  runnableExamples:
+    doAssert toUpper("abγ") == "ABΓ"
+  convertRune(s, toUpper)
+
+proc toLower*(s: openArray[char]): string {.noSideEffect,
+  rtl, extern: "nuc$1Str".} =
+  ## Converts ``s`` into lower-case runes.
+  runnableExamples:
+    doAssert toLower("ABΓ") == "abγ"
+  convertRune(s, toLower)
+
+proc swapCase*(s: openArray[char]): string {.noSideEffect,
+  rtl, extern: "nuc$1".} =
+  ## Swaps the case of runes in ``s``.
+  ##
+  ## Returns a new string such that the cases of all runes
+  ## are swapped if possible.
+  runnableExamples:
+    doAssert swapCase("Αlpha Βeta Γamma") == "αLPHA βETA γAMMA"
+
   var
     i = 0
-    result: TRune
+    resultIndex = 0
+    rune: Rune
+  result = newString(len(s))
+  while i < len(s):
+    fastRuneAt(s, i, rune)
+    if rune.isUpper():
+      rune = rune.toLower()
+    elif rune.isLower():
+      rune = rune.toUpper()
+    fastToUTF8Copy(rune, result, resultIndex, doInc = true)
+
+proc capitalize*(s: openArray[char]): string {.noSideEffect,
+  rtl, extern: "nuc$1".} =
+  ## Converts the first character of ``s`` into an upper-case rune.
+  runnableExamples:
+    doAssert capitalize("βeta") == "Βeta"
+
+  if len(s) == 0:
+    return ""
+  var
+    rune: Rune
+    i = 0
+  fastRuneAt(s, i, rune, doInc = true)
+  result = $toUpper(rune) & substr(s.toOpenArray(i, s.high))
+
+when not defined(nimHasEffectsOf):
+  {.pragma: effectsOf.}
+
+proc translate*(s: openArray[char], replacements: proc(key: string): string): string {.
+  rtl, extern: "nuc$1", effectsOf: replacements.} =
+  ## Translates words in a string using the ``replacements`` proc to substitute
+  ## words inside ``s`` with their replacements.
+  ##
+  ## ``replacements`` is any proc that takes a word and returns
+  ## a new word to fill it's place.
+  runnableExamples:
+    proc wordToNumber(s: string): string =
+      case s
+      of "one": "1"
+      of "two": "2"
+      else: s
+    let a = "one two three four"
+    doAssert a.translate(wordToNumber) == "1 2 three four"
+
+  # Allocate memory for the new string based on the old one.
+  # If the new string length is less than the old, no allocations
+  # will be needed. If the new string length is greater than the
+  # old, then maybe only one allocation is needed
+  result = newStringOfCap(s.len)
+  var
+    index = 0
+    lastIndex = 0
+    wordStart = 0
+    inWord = false
+    rune: Rune
+
+  while index < len(s):
+    lastIndex = index
+    fastRuneAt(s, index, rune)
+    let whiteSpace = rune.isWhiteSpace()
+
+    if whiteSpace and inWord:
+      # If we've reached the end of a word
+      let word = substr(s.toOpenArray(wordStart, lastIndex - 1))
+      result.add(replacements(word))
+      result.add($rune)
+      inWord = false
+    elif not whiteSpace and not inWord:
+      # If we've hit a non space character and
+      # are not currently in a word, track
+      # the starting index of the word
+      inWord = true
+      wordStart = lastIndex
+    elif whiteSpace:
+      result.add($rune)
+
+  if wordStart < len(s) and inWord:
+    # Get the trailing word at the end
+    let word = substr(s.toOpenArray(wordStart,  s.high))
+    result.add(replacements(word))
+
+proc title*(s: openArray[char]): string {.noSideEffect,
+  rtl, extern: "nuc$1".} =
+  ## Converts ``s`` to a unicode title.
+  ##
+  ## Returns a new string such that the first character
+  ## in each word inside ``s`` is capitalized.
+  runnableExamples:
+    doAssert title("αlpha βeta γamma") == "Αlpha Βeta Γamma"
+
+  var
+    i = 0
+    resultIndex = 0
+    rune: Rune
+  result = newString(len(s))
+  var firstRune = true
+
+  while i < len(s):
+    fastRuneAt(s, i, rune)
+    if not rune.isWhiteSpace() and firstRune:
+      rune = rune.toUpper()
+      firstRune = false
+    elif rune.isWhiteSpace():
+      firstRune = true
+    fastToUTF8Copy(rune, result, resultIndex, doInc = true)
+
+
+iterator runes*(s: openArray[char]): Rune =
+  ## Iterates over any rune of the string ``s`` returning runes.
+  var
+    i = 0
+    result: Rune
   while i < len(s):
     fastRuneAt(s, i, result, true)
     yield result
 
-proc cmpRunesIgnoreCase*(a, b: string): int = 
-  ## compares two UTF8 strings and ignores the case. Returns:
+iterator utf8*(s: openArray[char]): string =
+  ## Iterates over any rune of the string ``s`` returning utf8 values.
+  ##
+  ## See also:
+  ## * `validateUtf8 proc <#validateUtf8,string>`_
+  ## * `toUTF8 proc <#toUTF8,Rune>`_
+  ## * `$ proc <#$,Rune>`_ alias for `toUTF8`
+  ## * `fastToUTF8Copy template <#fastToUTF8Copy.t,Rune,string,int>`_
+  var o = 0
+  while o < s.len:
+    let n = runeLenAt(s, o)
+    yield substr(s.toOpenArray(o, (o+n-1)))
+    o += n
+
+proc toRunes*(s: openArray[char]): seq[Rune] =
+  ## Obtains a sequence containing the Runes in ``s``.
+  ##
+  ## See also:
+  ## * `$ proc <#$,Rune>`_ for a reverse operation
+  runnableExamples:
+    let a = toRunes("aáä")
+    doAssert a == @["a".runeAt(0), "á".runeAt(0), "ä".runeAt(0)]
+
+  result = newSeq[Rune]()
+  for r in s.runes:
+    result.add(r)
+
+proc cmpRunesIgnoreCase*(a, b: openArray[char]): int {.rtl, extern: "nuc$1".} =
+  ## Compares two UTF-8 strings and ignores the case. Returns:
   ##
-  ## | 0 iff a == b
-  ## | < 0 iff a < b
-  ## | > 0 iff a > b
+  ## | `0` if a == b
+  ## | `< 0` if a < b
+  ## | `> 0` if a > b
   var i = 0
   var j = 0
-  var ar, br: TRune
+  var ar, br: Rune
   while i < a.len and j < b.len:
     # slow path:
     fastRuneAt(a, i, ar)
     fastRuneAt(b, j, br)
-    result = irune(toLower(ar)) - irune(toLower(br))
+    when sizeof(int) < 4:
+      const lo = low(int).int32
+      const hi = high(int).int32
+      result = clamp(RuneImpl(toLower(ar)) - RuneImpl(toLower(br)), lo, hi).int
+    else:
+      result = RuneImpl(toLower(ar)) - RuneImpl(toLower(br))
     if result != 0: return
   result = a.len - b.len
 
+proc reversed*(s: openArray[char]): string =
+  ## Returns the reverse of ``s``, interpreting it as runes.
+  ##
+  ## Unicode combining characters are correctly interpreted as well.
+  runnableExamples:
+    assert reversed("Reverse this!") == "!siht esreveR"
+    assert reversed("先秦兩漢") == "漢兩秦先"
+    assert reversed("as⃝df̅") == "f̅ds⃝a"
+    assert reversed("a⃞b⃞c⃞") == "c⃞b⃞a⃞"
+
+  var
+    i = 0
+    lastI = 0
+    newPos = len(s) - 1
+    blockPos = 0
+    r: Rune
+
+  template reverseUntil(pos) =
+    var j = pos - 1
+    while j > blockPos:
+      result[newPos] = s[j]
+      dec j
+      dec newPos
+    blockPos = pos - 1
+
+  result = newString(len(s))
+
+  while i < len(s):
+    lastI = i
+    fastRuneAt(s, i, r, true)
+    if not isCombining(r):
+      reverseUntil(lastI)
+
+  reverseUntil(len(s))
+
+proc graphemeLen*(s: openArray[char]; i: Natural): Natural =
+  ## The number of bytes belonging to byte index ``s[i]``,
+  ## including following combining code units.
+  runnableExamples:
+    let a = "añyóng"
+    doAssert a.graphemeLen(1) == 2 ## ñ
+    doAssert a.graphemeLen(2) == 1
+    doAssert a.graphemeLen(4) == 2 ## ó
+
+  var j = i.int
+  var r, r2: Rune
+  if j < s.len:
+    fastRuneAt(s, j, r, true)
+    result = j-i
+    while j < s.len:
+      fastRuneAt(s, j, r2, true)
+      if not isCombining(r2): break
+      result = j-i
+
+proc lastRune*(s: openArray[char]; last: int): (Rune, int) =
+  ## Length of the last rune in ``s[0..last]``. Returns the rune and its length
+  ## in bytes.
+  if s[last] <= chr(127):
+    result = (Rune(s[last]), 1)
+  else:
+    var L = 0
+    while last-L >= 0 and uint(s[last-L]) shr 6 == 0b10: inc(L)
+    var r: Rune
+    fastRuneAt(s, last-L, r, false)
+    result = (r, L+1)
+
+proc size*(r: Rune): int {.noSideEffect.} =
+  ## Returns the number of bytes the rune ``r`` takes.
+  runnableExamples:
+    let a = toRunes "aá"
+    doAssert size(a[0]) == 1
+    doAssert size(a[1]) == 2
+
+  let v = r.uint32
+  if v <= 0x007F'u32: result = 1
+  elif v <= 0x07FF'u32: result = 2
+  elif v <= 0xFFFF'u32: result = 3
+  elif v <= 0x1FFFFF'u32: result = 4
+  elif v <= 0x3FFFFFF'u32: result = 5
+  elif v <= 0x7FFFFFFF'u32: result = 6
+  else: result = 1
+
+# --------- Private templates for different split separators -----------
+proc stringHasSep(s: openArray[char], index: int, seps: openArray[Rune]): bool =
+  var rune: Rune
+  fastRuneAt(s, index, rune, false)
+  return seps.contains(rune)
+
+proc stringHasSep(s: openArray[char], index: int, sep: Rune): bool =
+  var rune: Rune
+  fastRuneAt(s, index, rune, false)
+  return sep == rune
+
+template splitCommon(s, sep, maxsplit: untyped) =
+  ## Common code for split procedures.
+  let
+    sLen = len(s)
+  var
+    last = 0
+    splits = maxsplit
+  if sLen > 0:
+    while last <= sLen:
+      var first = last
+      while last < sLen and not stringHasSep(s, last, sep):
+        inc(last, runeLenAt(s, last))
+      if splits == 0: last = sLen
+      yield substr(s.toOpenArray(first, (last - 1)))
+      if splits == 0: break
+      dec(splits)
+      inc(last, if last < sLen: runeLenAt(s, last) else: 1)
+
+iterator split*(s: openArray[char], seps: openArray[Rune] = unicodeSpaces,
+  maxsplit: int = -1): string =
+  ## Splits the unicode string ``s`` into substrings using a group of separators.
+  ##
+  ## Substrings are separated by a substring containing only ``seps``.
+  runnableExamples:
+    import std/sequtils
+
+    assert toSeq("hÃllo\lthis\lis an\texample\l是".split) ==
+      @["hÃllo", "this", "is", "an", "example", "是"]
+
+    # And the following code splits the same string using a sequence of Runes.
+    assert toSeq(split("añyóng:hÃllo;是$example", ";:$".toRunes)) ==
+      @["añyóng", "hÃllo", "是", "example"]
+
+    # example with a `Rune` separator and unused one `;`:
+    assert toSeq(split("ab是de:f:", ";:是".toRunes)) == @["ab", "de", "f", ""]
+
+    # Another example that splits a string containing a date.
+    let date = "2012-11-20T22:08:08.398990"
+
+    assert toSeq(split(date, " -:T".toRunes)) ==
+      @["2012", "11", "20", "22", "08", "08.398990"]
+
+  splitCommon(s, seps, maxsplit)
+
+iterator splitWhitespace*(s: openArray[char]): string =
+  ## Splits a unicode string at whitespace runes.
+  splitCommon(s, unicodeSpaces, -1)
+
+template accResult(iter: untyped) =
+  result = @[]
+  for x in iter: add(result, x)
+
+proc splitWhitespace*(s: openArray[char]): seq[string] {.noSideEffect,
+  rtl, extern: "ncuSplitWhitespace".} =
+  ## The same as the `splitWhitespace <#splitWhitespace.i,string>`_
+  ## iterator, but is a proc that returns a sequence of substrings.
+  accResult(splitWhitespace(s))
+
+iterator split*(s: openArray[char], sep: Rune, maxsplit: int = -1): string =
+  ## Splits the unicode string ``s`` into substrings using a single separator.
+  ## Substrings are separated by the rune ``sep``.
+  runnableExamples:
+    import std/sequtils
+
+    assert toSeq(split(";;hÃllo;this;is;an;;example;;;是", ";".runeAt(0))) ==
+      @["", "", "hÃllo", "this", "is", "an", "", "example", "", "", "是"]
+
+  splitCommon(s, sep, maxsplit)
+
+proc split*(s: openArray[char], seps: openArray[Rune] = unicodeSpaces, maxsplit: int = -1):
+    seq[string] {.noSideEffect, rtl, extern: "nucSplitRunes".} =
+  ## The same as the `split iterator <#split.i,string,openArray[Rune],int>`_,
+  ## but is a proc that returns a sequence of substrings.
+  accResult(split(s, seps, maxsplit))
+
+proc split*(s: openArray[char], sep: Rune, maxsplit: int = -1): seq[string] {.noSideEffect,
+  rtl, extern: "nucSplitRune".} =
+  ## The same as the `split iterator <#split.i,string,Rune,int>`_, but is a proc
+  ## that returns a sequence of substrings.
+  accResult(split(s, sep, maxsplit))
+
+proc strip*(s: openArray[char], leading = true, trailing = true,
+            runes: openArray[Rune] = unicodeSpaces): string {.noSideEffect,
+            rtl, extern: "nucStrip".} =
+  ## Strips leading or trailing ``runes`` from ``s`` and returns
+  ## the resulting string.
+  ##
+  ## If ``leading`` is true (default), leading ``runes`` are stripped.
+  ## If ``trailing`` is true (default), trailing ``runes`` are stripped.
+  ## If both are false, the string is returned unchanged.
+  runnableExamples:
+    let a = "\táñyóng   "
+    doAssert a.strip == "áñyóng"
+    doAssert a.strip(leading = false) == "\táñyóng"
+    doAssert a.strip(trailing = false) == "áñyóng   "
+
+  var
+    sI = 0          ## starting index into string ``s``
+    eI = len(s) - 1 ## ending index into ``s``, where the last ``Rune`` starts
+  if leading:
+    var
+      i = 0
+      xI: int ## value of ``sI`` at the beginning of the iteration
+      rune: Rune
+    while i < len(s):
+      xI = i
+      fastRuneAt(s, i, rune)
+      sI = i # Assume to start from next rune
+      if not runes.contains(rune):
+        sI = xI # Go back to where the current rune starts
+        break
+  if trailing:
+    var
+      i = eI
+      xI: int
+      rune: Rune
+    while i >= 0:
+      xI = i
+      fastRuneAt(s, xI, rune)
+      var yI = i - 1
+      while yI >= 0:
+        var
+          yIend = yI
+          pRune: Rune
+        fastRuneAt(s, yIend, pRune)
+        if yIend < xI: break
+        i = yI
+        rune = pRune
+        dec(yI)
+      if not runes.contains(rune):
+        eI = xI - 1
+        break
+      dec(i)
+  let newLen = eI - sI + 1
+  result = newStringOfCap(newLen)
+  if newLen > 0:
+    result.add substr(s.toOpenArray(sI, eI))
+
+proc repeat*(c: Rune, count: Natural): string {.noSideEffect,
+  rtl, extern: "nucRepeatRune".} =
+  ## Returns a string of ``count`` Runes ``c``.
+  ##
+  ## The returned string will have a rune-length of ``count``.
+  runnableExamples:
+    let a = "ñ".runeAt(0)
+    doAssert a.repeat(5) == "ñññññ"
+
+  let s = $c
+  result = newStringOfCap(count * s.len)
+  for i in 0 ..< count:
+    result.add s
+
+proc align*(s: openArray[char], count: Natural, padding = ' '.Rune): string {.
+  noSideEffect, rtl, extern: "nucAlignString".} =
+  ## Aligns a unicode string ``s`` with ``padding``, so that it has a rune-length
+  ## of ``count``.
+  ##
+  ## ``padding`` characters (by default spaces) are added before ``s`` resulting in
+  ## right alignment. If ``s.runelen >= count``, no spaces are added and ``s`` is
+  ## returned unchanged. If you need to left align a string use the `alignLeft
+  ## proc <#alignLeft,string,Natural>`_.
+  runnableExamples:
+    assert align("abc", 4) == " abc"
+    assert align("a", 0) == "a"
+    assert align("1232", 6) == "  1232"
+    assert align("1232", 6, '#'.Rune) == "##1232"
+    assert align("Åge", 5) == "  Åge"
+    assert align("×", 4, '_'.Rune) == "___×"
+
+  let sLen = s.runeLen
+  if sLen < count:
+    let padStr = $padding
+    result = newStringOfCap(padStr.len * count)
+    let spaces = count - sLen
+    for i in 0 ..< spaces: result.add padStr
+    result.add s
+  else:
+    result = s.substr
+
+proc alignLeft*(s: openArray[char], count: Natural, padding = ' '.Rune): string {.
+    noSideEffect.} =
+  ## Left-aligns a unicode string ``s`` with ``padding``, so that it has a
+  ## rune-length of ``count``.
+  ##
+  ## ``padding`` characters (by default spaces) are added after ``s`` resulting in
+  ## left alignment. If ``s.runelen >= count``, no spaces are added and ``s`` is
+  ## returned unchanged. If you need to right align a string use the `align
+  ## proc <#align,string,Natural>`_.
+  runnableExamples:
+    assert alignLeft("abc", 4) == "abc "
+    assert alignLeft("a", 0) == "a"
+    assert alignLeft("1232", 6) == "1232  "
+    assert alignLeft("1232", 6, '#'.Rune) == "1232##"
+    assert alignLeft("Åge", 5) == "Åge  "
+    assert alignLeft("×", 4, '_'.Rune) == "×___"
+  let sLen = s.runeLen
+  if sLen < count:
+    let padStr = $padding
+    result = newStringOfCap(s.len + (count - sLen) * padStr.len)
+    result.add s
+    for i in sLen ..< count:
+      result.add padStr
+  else:
+    result = s.substr
+
+
+proc runeLen*(s: string): int {.inline.} =
+  ## Returns the number of runes of the string ``s``.
+  runnableExamples:
+    let a = "añyóng"
+    doAssert a.runeLen == 6
+    ## note: a.len == 8
+  runeLen(toOa(s))
+
+proc runeLenAt*(s: string, i: Natural): int {.inline.} =
+  ## Returns the number of bytes the rune starting at ``s[i]`` takes.
+  ##
+  ## See also:
+  ## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_
+  runnableExamples:
+    let a = "añyóng"
+    doAssert a.runeLenAt(0) == 1
+    doAssert a.runeLenAt(1) == 2
+  runeLenAt(toOa(s), i)
+
+proc runeAt*(s: string, i: Natural): Rune {.inline.} =
+  ## Returns the rune in ``s`` at **byte index** ``i``.
+  ##
+  ## See also:
+  ## * `runeAtPos proc <#runeAtPos,string,int>`_
+  ## * `runeStrAtPos proc <#runeStrAtPos,string,Natural>`_
+  ## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_
+  runnableExamples:
+    let a = "añyóng"
+    doAssert a.runeAt(1) == "ñ".runeAt(0)
+    doAssert a.runeAt(2) == "ñ".runeAt(1)
+    doAssert a.runeAt(3) == "y".runeAt(0)
+  fastRuneAt(s, i, result, false)
+
+proc validateUtf8*(s: string): int {.inline.} =
+  ## Returns the position of the invalid byte in ``s`` if the string ``s`` does
+  ## not hold valid UTF-8 data. Otherwise ``-1`` is returned.
+  ##
+  ## See also:
+  ## * `toUTF8 proc <#toUTF8,Rune>`_
+  ## * `$ proc <#$,Rune>`_ alias for `toUTF8`
+  ## * `fastToUTF8Copy template <#fastToUTF8Copy.t,Rune,string,int>`_
+  validateUtf8(toOa(s))
+
+proc runeOffset*(s: string, pos: Natural, start: Natural = 0): int {.inline.} =
+  ## Returns the byte position of rune
+  ## at position ``pos`` in ``s`` with an optional start byte position.
+  ## Returns the special value -1 if it runs out of the string.
+  ##
+  ## **Beware:** This can lead to unoptimized code and slow execution!
+  ## Most problems can be solved more efficiently by using an iterator
+  ## or conversion to a seq of Rune.
+  ##
+  ## See also:
+  ## * `runeReverseOffset proc <#runeReverseOffset,string,Positive>`_
+  runnableExamples:
+    let a = "añyóng"
+    doAssert a.runeOffset(1) == 1
+    doAssert a.runeOffset(3) == 4
+    doAssert a.runeOffset(4) == 6
+  runeOffset(toOa(s), pos, start)
+
+proc runeReverseOffset*(s: string, rev: Positive): (int, int) {.inline.} =
+  ## Returns a tuple with the byte offset of the
+  ## rune at position ``rev`` in ``s``, counting
+  ## from the end (starting with 1) and the total
+  ## number of runes in the string.
+  ##
+  ## Returns a negative value for offset if there are too few runes in
+  ## the string to satisfy the request.
+  ##
+  ## **Beware:** This can lead to unoptimized code and slow execution!
+  ## Most problems can be solved more efficiently by using an iterator
+  ## or conversion to a seq of Rune.
+  ##
+  ## See also:
+  ## * `runeOffset proc <#runeOffset,string,Natural,Natural>`_
+  runeReverseOffset(toOa(s), rev)
+
+proc runeAtPos*(s: string, pos: int): Rune {.inline.} =
+  ## Returns the rune at position ``pos``.
+  ##
+  ## **Beware:** This can lead to unoptimized code and slow execution!
+  ## Most problems can be solved more efficiently by using an iterator
+  ## or conversion to a seq of Rune.
+  ##
+  ## See also:
+  ## * `runeAt proc <#runeAt,string,Natural>`_
+  ## * `runeStrAtPos proc <#runeStrAtPos,string,Natural>`_
+  ## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_
+  fastRuneAt(toOa(s), runeOffset(s, pos), result, false)
+
+proc runeStrAtPos*(s: string, pos: Natural): string {.inline.} =
+  ## Returns the rune at position ``pos`` as UTF8 String.
+  ##
+  ## **Beware:** This can lead to unoptimized code and slow execution!
+  ## Most problems can be solved more efficiently by using an iterator
+  ## or conversion to a seq of Rune.
+  ##
+  ## See also:
+  ## * `runeAt proc <#runeAt,string,Natural>`_
+  ## * `runeAtPos proc <#runeAtPos,string,int>`_
+  ## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_
+  let o = runeOffset(s, pos)
+  substr(s.toOpenArray(o, (o+runeLenAt(s, o)-1)))
+
+proc runeSubStr*(s: string, pos: int, len: int = int.high): string {.inline.} =
+  ## Returns the UTF-8 substring starting at code point ``pos``
+  ## with ``len`` code points.
+  ##
+  ## If ``pos`` or ``len`` is negative they count from
+  ## the end of the string. If ``len`` is not given it means the longest
+  ## possible string.
+  runnableExamples:
+    let s = "Hänsel  ««: 10,00€"
+    doAssert(runeSubStr(s, 0, 2) == "Hä")
+    doAssert(runeSubStr(s, 10, 1) == ":")
+    doAssert(runeSubStr(s, -6) == "10,00€")
+    doAssert(runeSubStr(s, 10) == ": 10,00€")
+    doAssert(runeSubStr(s, 12, 5) == "10,00")
+    doAssert(runeSubStr(s, -6, 3) == "10,")
+  runeSubStr(toOa(s), pos, len)
+
+
+proc isAlpha*(s: string): bool {.noSideEffect, inline.} =
+  ## Returns true if ``s`` contains all alphabetic runes.
+  runnableExamples:
+    let a = "añyóng"
+    doAssert a.isAlpha
+  isAlpha(toOa(s))
+
+proc isSpace*(s: string): bool {.noSideEffect, inline.} =
+  ## Returns true if ``s`` contains all whitespace runes.
+  runnableExamples:
+    let a = "\t\l \v\r\f"
+    doAssert a.isSpace
+  isSpace(toOa(s))
+
+
+proc toUpper*(s: string): string {.noSideEffect, inline.} =
+  ## Converts ``s`` into upper-case runes.
+  runnableExamples:
+    doAssert toUpper("abγ") == "ABΓ"
+  toUpper(toOa(s))
+
+proc toLower*(s: string): string {.noSideEffect, inline.} =
+  ## Converts ``s`` into lower-case runes.
+  runnableExamples:
+    doAssert toLower("ABΓ") == "abγ"
+  toLower(toOa(s))
+
+proc swapCase*(s: string): string {.noSideEffect, inline.} =
+  ## Swaps the case of runes in ``s``.
+  ##
+  ## Returns a new string such that the cases of all runes
+  ## are swapped if possible.
+  runnableExamples:
+    doAssert swapCase("Αlpha Βeta Γamma") == "αLPHA βETA γAMMA"
+  swapCase(toOa(s))
+
+proc capitalize*(s: string): string {.noSideEffect.} =
+  ## Converts the first character of ``s`` into an upper-case rune.
+  runnableExamples:
+    doAssert capitalize("βeta") == "Βeta"
+  capitalize(toOa(s))
+
+
+proc translate*(s: string, replacements: proc(key: string): string): string {.effectsOf: replacements, inline.} =
+  ## Translates words in a string using the ``replacements`` proc to substitute
+  ## words inside ``s`` with their replacements.
+  ##
+  ## ``replacements`` is any proc that takes a word and returns
+  ## a new word to fill it's place.
+  runnableExamples:
+    proc wordToNumber(s: string): string =
+      case s
+      of "one": "1"
+      of "two": "2"
+      else: s
+    let a = "one two three four"
+    doAssert a.translate(wordToNumber) == "1 2 three four"
+  translate(toOa(s), replacements)
+
+proc title*(s: string): string {.noSideEffect, inline.} =
+  ## Converts ``s`` to a unicode title.
+  ##
+  ## Returns a new string such that the first character
+  ## in each word inside ``s`` is capitalized.
+  runnableExamples:
+    doAssert title("αlpha βeta γamma") == "Αlpha Βeta Γamma"
+  title(toOa(s))
+
+
+iterator runes*(s: string): Rune =
+  ## Iterates over any rune of the string ``s`` returning runes.
+  for rune in runes(toOa(s)):
+    yield rune
+
+iterator utf8*(s: string): string =
+  ## Iterates over any rune of the string ``s`` returning utf8 values.
+  ##
+  ## See also:
+  ## * `validateUtf8 proc <#validateUtf8,string>`_
+  ## * `toUTF8 proc <#toUTF8,Rune>`_
+  ## * `$ proc <#$,Rune>`_ alias for `toUTF8`
+  ## * `fastToUTF8Copy template <#fastToUTF8Copy.t,Rune,string,int>`_
+  for str in utf8(toOa(s)):
+    yield str
+
+proc toRunes*(s: string): seq[Rune] {.inline.} =
+  ## Obtains a sequence containing the Runes in ``s``.
+  ##
+  ## See also:
+  ## * `$ proc <#$,Rune>`_ for a reverse operation
+  runnableExamples:
+    let a = toRunes("aáä")
+    doAssert a == @["a".runeAt(0), "á".runeAt(0), "ä".runeAt(0)]
+  toRunes(toOa(s))
+
+proc cmpRunesIgnoreCase*(a, b: string): int {.inline.} =
+  ## Compares two UTF-8 strings and ignores the case. Returns:
+  ##
+  ## | `0` if a == b
+  ## | `< 0` if a < b
+  ## | `> 0` if a > b
+  cmpRunesIgnoreCase(a.toOa(), b.toOa())
+
+proc reversed*(s: string): string {.inline.} =
+  ## Returns the reverse of ``s``, interpreting it as runes.
+  ##
+  ## Unicode combining characters are correctly interpreted as well.
+  runnableExamples:
+    assert reversed("Reverse this!") == "!siht esreveR"
+    assert reversed("先秦兩漢") == "漢兩秦先"
+    assert reversed("as⃝df̅") == "f̅ds⃝a"
+    assert reversed("a⃞b⃞c⃞") == "c⃞b⃞a⃞"
+  reversed(toOa(s))
+
+proc graphemeLen*(s: string; i: Natural): Natural {.inline.} =
+  ## The number of bytes belonging to byte index ``s[i]``,
+  ## including following combining code unit.
+  runnableExamples:
+    let a = "añyóng"
+    doAssert a.graphemeLen(1) == 2 ## ñ
+    doAssert a.graphemeLen(2) == 1
+    doAssert a.graphemeLen(4) == 2 ## ó
+  graphemeLen(toOa(s), i)
+
+proc lastRune*(s: string; last: int): (Rune, int) {.inline.} =
+  ## Length of the last rune in ``s[0..last]``. Returns the rune and its length
+  ## in bytes.
+  lastRune(toOa(s), last)
+
+iterator split*(s: string, seps: openArray[Rune] = unicodeSpaces,
+  maxsplit: int = -1): string =
+  ## Splits the unicode string ``s`` into substrings using a group of separators.
+  ##
+  ## Substrings are separated by a substring containing only ``seps``.
+  runnableExamples:
+    import std/sequtils
+
+    assert toSeq("hÃllo\lthis\lis an\texample\l是".split) ==
+      @["hÃllo", "this", "is", "an", "example", "是"]
+
+    # And the following code splits the same string using a sequence of Runes.
+    assert toSeq(split("añyóng:hÃllo;是$example", ";:$".toRunes)) ==
+      @["añyóng", "hÃllo", "是", "example"]
+
+    # example with a `Rune` separator and unused one `;`:
+    assert toSeq(split("ab是de:f:", ";:是".toRunes)) == @["ab", "de", "f", ""]
+
+    # Another example that splits a string containing a date.
+    let date = "2012-11-20T22:08:08.398990"
+
+    assert toSeq(split(date, " -:T".toRunes)) ==
+      @["2012", "11", "20", "22", "08", "08.398990"]
+
+  splitCommon(toOa(s), seps, maxsplit)
+
+iterator splitWhitespace*(s: string): string =
+  ## Splits a unicode string at whitespace runes.
+  splitCommon(s.toOa(), unicodeSpaces, -1)
+
+
+proc splitWhitespace*(s: string): seq[string] {.noSideEffect, inline.}=
+  ## The same as the `splitWhitespace <#splitWhitespace.i,string>`_
+  ## iterator, but is a proc that returns a sequence of substrings.
+  accResult(splitWhitespace(toOa(s)))
+
+iterator split*(s: string, sep: Rune, maxsplit: int = -1): string =
+  ## Splits the unicode string ``s`` into substrings using a single separator.
+  ## Substrings are separated by the rune ``sep``.
+  runnableExamples:
+    import std/sequtils
+
+    assert toSeq(split(";;hÃllo;this;is;an;;example;;;是", ";".runeAt(0))) ==
+      @["", "", "hÃllo", "this", "is", "an", "", "example", "", "", "是"]
+
+  splitCommon(toOa(s), sep, maxsplit)
+
+proc split*(s: string, seps: openArray[Rune] = unicodeSpaces, maxsplit: int = -1):
+    seq[string] {.noSideEffect, inline.} =
+  ## The same as the `split iterator <#split.i,string,openArray[Rune],int>`_,
+  ## but is a proc that returns a sequence of substrings.
+  accResult(split(toOa(s), seps, maxsplit))
+
+proc split*(s: string, sep: Rune, maxsplit: int = -1): seq[string] {.noSideEffect, inline.} =
+  ## The same as the `split iterator <#split.i,string,Rune,int>`_, but is a proc
+  ## that returns a sequence of substrings.
+  accResult(split(toOa(s), sep, maxsplit))
+
+proc strip*(s: string, leading = true, trailing = true,
+            runes: openArray[Rune] = unicodeSpaces): string {.noSideEffect, inline.} =
+  ## Strips leading or trailing ``runes`` from ``s`` and returns
+  ## the resulting string.
+  ##
+  ## If ``leading`` is true (default), leading ``runes`` are stripped.
+  ## If ``trailing`` is true (default), trailing ``runes`` are stripped.
+  ## If both are false, the string is returned unchanged.
+  runnableExamples:
+    let a = "\táñyóng   "
+    doAssert a.strip == "áñyóng"
+    doAssert a.strip(leading = false) == "\táñyóng"
+    doAssert a.strip(trailing = false) == "áñyóng   "
+  strip(toOa(s), leading, trailing, runes)
+
+
+proc align*(s: string, count: Natural, padding = ' '.Rune): string {.noSideEffect, inline.} =
+  ## Aligns a unicode string ``s`` with ``padding``, so that it has a rune-length
+  ## of ``count``.
+  ##
+  ## ``padding`` characters (by default spaces) are added before ``s`` resulting in
+  ## right alignment. If ``s.runelen >= count``, no spaces are added and ``s`` is
+  ## returned unchanged. If you need to left align a string use the `alignLeft
+  ## proc <#alignLeft,string,Natural>`_.
+  runnableExamples:
+    assert align("abc", 4) == " abc"
+    assert align("a", 0) == "a"
+    assert align("1232", 6) == "  1232"
+    assert align("1232", 6, '#'.Rune) == "##1232"
+    assert align("Åge", 5) == "  Åge"
+    assert align("×", 4, '_'.Rune) == "___×"
+  align(toOa(s), count, padding)
+
+proc alignLeft*(s: string, count: Natural, padding = ' '.Rune): string {.noSideEffect, inline.} =
+  ## Left-aligns a unicode string ``s`` with ``padding``, so that it has a
+  ## rune-length of ``count``.
+  ##
+  ## ``padding`` characters (by default spaces) are added after ``s`` resulting in
+  ## left alignment. If ``s.runelen >= count``, no spaces are added and ``s`` is
+  ## returned unchanged. If you need to right align a string use the `align
+  ## proc <#align,string,Natural>`_.
+  runnableExamples:
+    assert alignLeft("abc", 4) == "abc "
+    assert alignLeft("a", 0) == "a"
+    assert alignLeft("1232", 6) == "1232  "
+    assert alignLeft("1232", 6, '#'.Rune) == "1232##"
+    assert alignLeft("Åge", 5) == "Åge  "
+    assert alignLeft("×", 4, '_'.Rune) == "×___"
+  alignLeft(toOa(s), count, padding)