summary refs log tree commit diff stats
path: root/lib/pure/unicode.nim
diff options
context:
space:
mode:
Diffstat (limited to 'lib/pure/unicode.nim')
-rw-r--r--lib/pure/unicode.nim1170
1 files changed, 1170 insertions, 0 deletions
diff --git a/lib/pure/unicode.nim b/lib/pure/unicode.nim
new file mode 100644
index 000000000..735713cf5
--- /dev/null
+++ b/lib/pure/unicode.nim
@@ -0,0 +1,1170 @@
+#
+#
+#            Nimrod's Runtime Library
+#        (c) Copyright 2009 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## This module provides support to handle the Unicode UTF-8 encoding.
+
+{.deadCodeElim: on.}
+
+type
+  TRune* = int ## type that can hold any Unicode character
+  TRune16* = int16 ## 16 bit Unicode character
+  
+template ones(n: expr): expr = ((1 shl n)-1)
+
+proc runeLen*(s: string): int =
+  ## returns the number of Unicode characters of the string `s`.
+  var i = 0
+  while i < len(s):
+    if ord(s[i]) <=% 127: inc(i)
+    elif ord(s[i]) shr 5 == 0b110: inc(i, 2)
+    elif ord(s[i]) shr 4 == 0b1110: inc(i, 3)
+    elif ord(s[i]) shr 3 == 0b11110: inc(i, 4)
+    else: assert(false)
+    inc(result)
+
+proc runeAt*(s: string, i: int): TRune =
+  ## returns the unicode character in `s` at byte index `i`
+  if ord(s[i]) <=% 127:
+    result = ord(s[i])
+  elif ord(s[i]) shr 5 == 0b110:
+    assert(ord(s[i+1]) shr 6 == 0b10)
+    result = (ord(s[i]) and ones(5)) shl 6 or (ord(s[i+1]) and ones(6))
+  elif ord(s[i]) shr 4 == 0b1110:
+    assert(ord(s[i+1]) shr 6 == 0b10)
+    assert(ord(s[i+2]) shr 6 == 0b10)
+    result = (ord(s[i]) and ones(4)) shl 12 or
+             (ord(s[i+1]) and ones(6)) shl 6 or
+             (ord(s[i+2]) and ones(6))
+  elif ord(s[i]) shr 3 == 0b11110:
+    assert(ord(s[i+1]) shr 6 == 0b10)
+    assert(ord(s[i+2]) shr 6 == 0b10)
+    assert(ord(s[i+3]) shr 6 == 0b10)
+    result = (ord(s[i]) and ones(3)) shl 18 or
+             (ord(s[i+1]) and ones(6)) shl 12 or
+             (ord(s[i+2]) and ones(6)) shl 6 or
+             (ord(s[i+3]) and ones(6))
+  else:
+    assert(false)
+
+template fastRuneAt(s, i, result: expr): stmt = 
+  if ord(s[i]) <=% 127:
+    result = ord(s[i])
+    inc(i)
+  elif ord(s[i]) shr 5 == 0b110:
+    assert(ord(s[i+1]) shr 6 == 0b10)
+    result = (ord(s[i]) and ones(5)) shl 6 or (ord(s[i+1]) and ones(6))
+    inc(i, 2)
+  elif ord(s[i]) shr 4 == 0b1110:
+    assert(ord(s[i+1]) shr 6 == 0b10)
+    assert(ord(s[i+2]) shr 6 == 0b10)
+    result = (ord(s[i]) and ones(4)) shl 12 or
+             (ord(s[i+1]) and ones(6)) shl 6 or
+             (ord(s[i+2]) and ones(6))
+    inc(i, 3)
+  elif ord(s[i]) shr 3 == 0b11110:
+    assert(ord(s[i+1]) shr 6 == 0b10)
+    assert(ord(s[i+2]) shr 6 == 0b10)
+    assert(ord(s[i+3]) shr 6 == 0b10)
+    result = (ord(s[i]) and ones(3)) shl 18 or
+             (ord(s[i+1]) and ones(6)) shl 12 or
+             (ord(s[i+2]) and ones(6)) shl 6 or
+             (ord(s[i+3]) and ones(6))
+    inc(i, 4)
+  else:
+    assert(false)
+
+proc toUTF8*(c: TRune): string = 
+  ## converts a character into its UTF8 representation
+  if c <=% 127:
+    result = newString(1)
+    result[0] = chr(c)
+  elif c <=% 0x07FF:
+    result = newString(2)
+    result[0] = chr(c shr 6 or 0b110_0000)
+    result[1] = chr(c and ones(6) or 0b10_000000)
+  elif c <=% 0xFFFF:
+    result = newString(3)
+    result[0] = chr(c shr 12 or 0b1110_0000)
+    result[1] = chr(c shr 6 and ones(6) or 0b10_0000_00)
+    result[2] = chr(c and ones(6) or 0b10_0000_00)
+  elif c <=% 0x0010FFFF:
+    result = newString(4)
+    result[0] = chr(c shr 18 or 0b1111_0000)
+    result[1] = chr(c shr 12 and ones(6) or 0b10_0000_00)
+    result[2] = chr(c shr 6 and ones(6) or 0b10_0000_00)
+    result[3] = chr(c and ones(6) or 0b10_0000_00)
+  else:
+    assert false
+
+const
+  alphaRanges = [
+    0x00d8,  0x00f6,  # Ø - ö
+    0x00f8,  0x01f5,  # ø - ǵ   
+    0x0250,  0x02a8,  # ɐ - ʨ   
+    0x038e,  0x03a1,  # Ύ - Ρ   
+    0x03a3,  0x03ce,  # Σ - ώ   
+    0x03d0,  0x03d6,  # ϐ - ϖ   
+    0x03e2,  0x03f3,  # Ϣ - ϳ   
+    0x0490,  0x04c4,  # Ґ - ӄ   
+    0x0561,  0x0587,  # ա - և   
+    0x05d0,  0x05ea,  # א - ת   
+    0x05f0,  0x05f2,  # װ - ײ   
+    0x0621,  0x063a,  # ء - غ   
+    0x0640,  0x064a,  # ـ - ي   
+    0x0671,  0x06b7,  # ٱ - ڷ   
+    0x06ba,  0x06be,  # ں - ھ   
+    0x06c0,  0x06ce,  # ۀ - ێ   
+    0x06d0,  0x06d3,  # ې - ۓ   
+    0x0905,  0x0939,  # अ - ह   
+    0x0958,  0x0961,  # क़ - ॡ   
+    0x0985,  0x098c,  # অ - ঌ   
+    0x098f,  0x0990,  # এ - ঐ   
+    0x0993,  0x09a8,  # ও - ন   
+    0x09aa,  0x09b0,  # প - র   
+    0x09b6,  0x09b9,  # শ - হ   
+    0x09dc,  0x09dd,  # ড় - ঢ়   
+    0x09df,  0x09e1,  # য় - ৡ   
+    0x09f0,  0x09f1,  # ৰ - ৱ   
+    0x0a05,  0x0a0a,  # ਅ - ਊ   
+    0x0a0f,  0x0a10,  # ਏ - ਐ   
+    0x0a13,  0x0a28,  # ਓ - ਨ   
+    0x0a2a,  0x0a30,  # ਪ - ਰ   
+    0x0a32,  0x0a33,  # ਲ - ਲ਼   
+    0x0a35,  0x0a36,  # ਵ - ਸ਼   
+    0x0a38,  0x0a39,  # ਸ - ਹ   
+    0x0a59,  0x0a5c,  # ਖ਼ - ੜ   
+    0x0a85,  0x0a8b,  # અ - ઋ   
+    0x0a8f,  0x0a91,  # એ - ઑ   
+    0x0a93,  0x0aa8,  # ઓ - ન   
+    0x0aaa,  0x0ab0,  # પ - ર   
+    0x0ab2,  0x0ab3,  # લ - ળ   
+    0x0ab5,  0x0ab9,  # વ - હ   
+    0x0b05,  0x0b0c,  # ଅ - ଌ   
+    0x0b0f,  0x0b10,  # ଏ - ଐ   
+    0x0b13,  0x0b28,  # ଓ - ନ   
+    0x0b2a,  0x0b30,  # ପ - ର   
+    0x0b32,  0x0b33,  # ଲ - ଳ   
+    0x0b36,  0x0b39,  # ଶ - ହ   
+    0x0b5c,  0x0b5d,  # ଡ଼ - ଢ଼   
+    0x0b5f,  0x0b61,  # ୟ - ୡ   
+    0x0b85,  0x0b8a,  # அ - ஊ   
+    0x0b8e,  0x0b90,  # எ - ஐ   
+    0x0b92,  0x0b95,  # ஒ - க   
+    0x0b99,  0x0b9a,  # ங - ச   
+    0x0b9e,  0x0b9f,  # ஞ - ட   
+    0x0ba3,  0x0ba4,  # ண - த   
+    0x0ba8,  0x0baa,  # ந - ப   
+    0x0bae,  0x0bb5,  # ம - வ   
+    0x0bb7,  0x0bb9,  # ஷ - ஹ   
+    0x0c05,  0x0c0c,  # అ - ఌ   
+    0x0c0e,  0x0c10,  # ఎ - ఐ   
+    0x0c12,  0x0c28,  # ఒ - న   
+    0x0c2a,  0x0c33,  # ప - ళ   
+    0x0c35,  0x0c39,  # వ - హ   
+    0x0c60,  0x0c61,  # ౠ - ౡ   
+    0x0c85,  0x0c8c,  # ಅ - ಌ   
+    0x0c8e,  0x0c90,  # ಎ - ಐ   
+    0x0c92,  0x0ca8,  # ಒ - ನ   
+    0x0caa,  0x0cb3,  # ಪ - ಳ   
+    0x0cb5,  0x0cb9,  # ವ - ಹ   
+    0x0ce0,  0x0ce1,  # ೠ - ೡ   
+    0x0d05,  0x0d0c,  # അ - ഌ   
+    0x0d0e,  0x0d10,  # എ - ഐ   
+    0x0d12,  0x0d28,  # ഒ - ന   
+    0x0d2a,  0x0d39,  # പ - ഹ   
+    0x0d60,  0x0d61,  # ൠ - ൡ   
+    0x0e01,  0x0e30,  # ก - ะ   
+    0x0e32,  0x0e33,  # า - ำ   
+    0x0e40,  0x0e46,  # เ - ๆ   
+    0x0e5a,  0x0e5b,  # ๚ - ๛   
+    0x0e81,  0x0e82,  # ກ - ຂ   
+    0x0e87,  0x0e88,  # ງ - ຈ   
+    0x0e94,  0x0e97,  # ດ - ທ   
+    0x0e99,  0x0e9f,  # ນ - ຟ   
+    0x0ea1,  0x0ea3,  # ມ - ຣ   
+    0x0eaa,  0x0eab,  # ສ - ຫ   
+    0x0ead,  0x0eae,  # ອ - ຮ   
+    0x0eb2,  0x0eb3,  # າ - ຳ   
+    0x0ec0,  0x0ec4,  # ເ - ໄ   
+    0x0edc,  0x0edd,  # ໜ - ໝ   
+    0x0f18,  0x0f19,  # ༘ - ༙   
+    0x0f40,  0x0f47,  # ཀ - ཇ   
+    0x0f49,  0x0f69,  # ཉ - ཀྵ   
+    0x10d0,  0x10f6,  # ა - ჶ   
+    0x1100,  0x1159,  # ᄀ - ᅙ   
+    0x115f,  0x11a2,  # ᅟ - ᆢ   
+    0x11a8,  0x11f9,  # ᆨ - ᇹ   
+    0x1e00,  0x1e9b,  # Ḁ - ẛ   
+    0x1f50,  0x1f57,  # ὐ - ὗ   
+    0x1f80,  0x1fb4,  # ᾀ - ᾴ   
+    0x1fb6,  0x1fbc,  # ᾶ - ᾼ   
+    0x1fc2,  0x1fc4,  # ῂ - ῄ   
+    0x1fc6,  0x1fcc,  # ῆ - ῌ   
+    0x1fd0,  0x1fd3,  # ῐ - ΐ   
+    0x1fd6,  0x1fdb,  # ῖ - Ί   
+    0x1fe0,  0x1fec,  # ῠ - Ῥ   
+    0x1ff2,  0x1ff4,  # ῲ - ῴ   
+    0x1ff6,  0x1ffc,  # ῶ - ῼ   
+    0x210a,  0x2113,  # ℊ - ℓ   
+    0x2115,  0x211d,  # ℕ - ℝ   
+    0x2120,  0x2122,  # ℠ - ™   
+    0x212a,  0x2131,  # K - ℱ   
+    0x2133,  0x2138,  # ℳ - ℸ   
+    0x3041,  0x3094,  # ぁ - ゔ   
+    0x30a1,  0x30fa,  # ァ - ヺ   
+    0x3105,  0x312c,  # ㄅ - ㄬ   
+    0x3131,  0x318e,  # ㄱ - ㆎ   
+    0x3192,  0x319f,  # ㆒ - ㆟   
+    0x3260,  0x327b,  # ㉠ - ㉻   
+    0x328a,  0x32b0,  # ㊊ - ㊰   
+    0x32d0,  0x32fe,  # ㋐ - ㋾   
+    0x3300,  0x3357,  # ㌀ - ㍗   
+    0x3371,  0x3376,  # ㍱ - ㍶   
+    0x337b,  0x3394,  # ㍻ - ㎔   
+    0x3399,  0x339e,  # ㎙ - ㎞   
+    0x33a9,  0x33ad,  # ㎩ - ㎭   
+    0x33b0,  0x33c1,  # ㎰ - ㏁   
+    0x33c3,  0x33c5,  # ㏃ - ㏅   
+    0x33c7,  0x33d7,  # ㏇ - ㏗   
+    0x33d9,  0x33dd,  # ㏙ - ㏝   
+    0x4e00,  0x9fff,  # 一 - 鿿   
+    0xac00,  0xd7a3,  # 가 - 힣   
+    0xf900,  0xfb06,  # 豈 - st   
+    0xfb13,  0xfb17,  # ﬓ - ﬗ   
+    0xfb1f,  0xfb28,  # ײַ - ﬨ   
+    0xfb2a,  0xfb36,  # שׁ - זּ   
+    0xfb38,  0xfb3c,  # טּ - לּ   
+    0xfb40,  0xfb41,  # נּ - סּ   
+    0xfb43,  0xfb44,  # ףּ - פּ   
+    0xfb46,  0xfbb1,  # צּ - ﮱ   
+    0xfbd3,  0xfd3d,  # ﯓ - ﴽ   
+    0xfd50,  0xfd8f,  # ﵐ - ﶏ   
+    0xfd92,  0xfdc7,  # ﶒ - ﷇ   
+    0xfdf0,  0xfdf9,  # ﷰ - ﷹ   
+    0xfe70,  0xfe72,  # ﹰ - ﹲ   
+    0xfe76,  0xfefc,  # ﹶ - ﻼ   
+    0xff66,  0xff6f,  # ヲ - ッ   
+    0xff71,  0xff9d,  # ア - ン   
+    0xffa0,  0xffbe,  # ᅠ - ᄒ   
+    0xffc2,  0xffc7,  # ᅡ - ᅦ   
+    0xffca,  0xffcf,  # ᅧ - ᅬ   
+    0xffd2,  0xffd7,  # ᅭ - ᅲ   
+    0xffda,  0xffdc]  # ᅳ - ᅵ   
+
+  alphaSinglets = [
+    0x00aa,  # ª   
+    0x00b5,  # µ   
+    0x00ba,  # º   
+    0x03da,  # Ϛ   
+    0x03dc,  # Ϝ   
+    0x03de,  # Ϟ   
+    0x03e0,  # Ϡ   
+    0x06d5,  # ە   
+    0x09b2,  # ল   
+    0x0a5e,  # ਫ਼   
+    0x0a8d,  # ઍ   
+    0x0ae0,  # ૠ   
+    0x0b9c,  # ஜ   
+    0x0cde,  # ೞ   
+    0x0e4f,  # ๏   
+    0x0e84,  # ຄ   
+    0x0e8a,  # ຊ   
+    0x0e8d,  # ຍ   
+    0x0ea5,  # ລ   
+    0x0ea7,  # ວ   
+    0x0eb0,  # ະ   
+    0x0ebd,  # ຽ   
+    0x1fbe,  # ι   
+    0x207f,  # ⁿ   
+    0x20a8,  # ₨   
+    0x2102,  # ℂ   
+    0x2107,  # ℇ   
+    0x2124,  # ℤ   
+    0x2126,  # Ω   
+    0x2128,  # ℨ   
+    0xfb3e,  # מּ   
+    0xfe74]  # ﹴ   
+
+  spaceRanges = [
+    0x0009,  0x000a,  # tab and newline   
+    0x0020,  0x0020,  # space   
+    0x00a0,  0x00a0,  #     
+    0x2000,  0x200b,  #   - ​   
+    0x2028,  0x2029,  #  - 
    0x3000,  0x3000,  #     
+    0xfeff,  0xfeff]  #    
+
+  toupperRanges = [
+    0x0061,  0x007a, 468,  # a-z A-Z   
+    0x00e0,  0x00f6, 468,  # à-ö À-Ö   
+    0x00f8,  0x00fe, 468,  # ø-þ Ø-Þ   
+    0x0256,  0x0257, 295,  # ɖ-ɗ Ɖ-Ɗ   
+    0x0258,  0x0259, 298,  # ɘ-ə Ǝ-Ə   
+    0x028a,  0x028b, 283,  # ʊ-ʋ Ʊ-Ʋ   
+    0x03ad,  0x03af, 463,  # έ-ί Έ-Ί   
+    0x03b1,  0x03c1, 468,  # α-ρ Α-Ρ   
+    0x03c3,  0x03cb, 468,  # σ-ϋ Σ-Ϋ   
+    0x03cd,  0x03ce, 437,  # ύ-ώ Ύ-Ώ   
+    0x0430,  0x044f, 468,  # а-я А-Я   
+    0x0451,  0x045c, 420,  # ё-ќ Ё-Ќ   
+    0x045e,  0x045f, 420,  # ў-џ Ў-Џ   
+    0x0561,  0x0586, 452,  # ա-ֆ Ա-Ֆ   
+    0x1f00,  0x1f07, 508,  # ἀ-ἇ Ἀ-Ἇ   
+    0x1f10,  0x1f15, 508,  # ἐ-ἕ Ἐ-Ἕ   
+    0x1f20,  0x1f27, 508,  # ἠ-ἧ Ἠ-Ἧ   
+    0x1f30,  0x1f37, 508,  # ἰ-ἷ Ἰ-Ἷ   
+    0x1f40,  0x1f45, 508,  # ὀ-ὅ Ὀ-Ὅ   
+    0x1f60,  0x1f67, 508,  # ὠ-ὧ Ὠ-Ὧ   
+    0x1f70,  0x1f71, 574,  # ὰ-ά Ὰ-Ά   
+    0x1f72,  0x1f75, 586,  # ὲ-ή Ὲ-Ή   
+    0x1f76,  0x1f77, 600,  # ὶ-ί Ὶ-Ί   
+    0x1f78,  0x1f79, 628,  # ὸ-ό Ὸ-Ό   
+    0x1f7a,  0x1f7b, 612,  # ὺ-ύ Ὺ-Ύ   
+    0x1f7c,  0x1f7d, 626,  # ὼ-ώ Ὼ-Ώ   
+    0x1f80,  0x1f87, 508,  # ᾀ-ᾇ ᾈ-ᾏ   
+    0x1f90,  0x1f97, 508,  # ᾐ-ᾗ ᾘ-ᾟ   
+    0x1fa0,  0x1fa7, 508,  # ᾠ-ᾧ ᾨ-ᾯ   
+    0x1fb0,  0x1fb1, 508,  # ᾰ-ᾱ Ᾰ-Ᾱ   
+    0x1fd0,  0x1fd1, 508,  # ῐ-ῑ Ῐ-Ῑ   
+    0x1fe0,  0x1fe1, 508,  # ῠ-ῡ Ῠ-Ῡ   
+    0x2170,  0x217f, 484,  # ⅰ-ⅿ Ⅰ-Ⅿ   
+    0x24d0,  0x24e9, 474,  # ⓐ-ⓩ Ⓐ-Ⓩ   
+    0xff41,  0xff5a, 468]  # a-z A-Z   
+
+  toupperSinglets = [
+    0x00ff, 621,  # ÿ Ÿ   
+    0x0101, 499,  # ā Ā   
+    0x0103, 499,  # ă Ă   
+    0x0105, 499,  # ą Ą   
+    0x0107, 499,  # ć Ć   
+    0x0109, 499,  # ĉ Ĉ   
+    0x010b, 499,  # ċ Ċ   
+    0x010d, 499,  # č Č   
+    0x010f, 499,  # ď Ď   
+    0x0111, 499,  # đ Đ   
+    0x0113, 499,  # ē Ē   
+    0x0115, 499,  # ĕ Ĕ   
+    0x0117, 499,  # ė Ė   
+    0x0119, 499,  # ę Ę   
+    0x011b, 499,  # ě Ě   
+    0x011d, 499,  # ĝ Ĝ   
+    0x011f, 499,  # ğ Ğ   
+    0x0121, 499,  # ġ Ġ   
+    0x0123, 499,  # ģ Ģ   
+    0x0125, 499,  # ĥ Ĥ   
+    0x0127, 499,  # ħ Ħ   
+    0x0129, 499,  # ĩ Ĩ   
+    0x012b, 499,  # ī Ī   
+    0x012d, 499,  # ĭ Ĭ   
+    0x012f, 499,  # į Į   
+    0x0131, 268,  # ı I   
+    0x0133, 499,  # ij IJ   
+    0x0135, 499,  # ĵ Ĵ   
+    0x0137, 499,  # ķ Ķ   
+    0x013a, 499,  # ĺ Ĺ   
+    0x013c, 499,  # ļ Ļ   
+    0x013e, 499,  # ľ Ľ   
+    0x0140, 499,  # ŀ Ŀ   
+    0x0142, 499,  # ł Ł   
+    0x0144, 499,  # ń Ń   
+    0x0146, 499,  # ņ Ņ   
+    0x0148, 499,  # ň Ň   
+    0x014b, 499,  # ŋ Ŋ   
+    0x014d, 499,  # ō Ō   
+    0x014f, 499,  # ŏ Ŏ   
+    0x0151, 499,  # ő Ő   
+    0x0153, 499,  # œ Œ   
+    0x0155, 499,  # ŕ Ŕ   
+    0x0157, 499,  # ŗ Ŗ   
+    0x0159, 499,  # ř Ř   
+    0x015b, 499,  # ś Ś   
+    0x015d, 499,  # ŝ Ŝ   
+    0x015f, 499,  # ş Ş   
+    0x0161, 499,  # š Š   
+    0x0163, 499,  # ţ Ţ   
+    0x0165, 499,  # ť Ť   
+    0x0167, 499,  # ŧ Ŧ   
+    0x0169, 499,  # ũ Ũ   
+    0x016b, 499,  # ū Ū   
+    0x016d, 499,  # ŭ Ŭ   
+    0x016f, 499,  # ů Ů   
+    0x0171, 499,  # ű Ű   
+    0x0173, 499,  # ų Ų   
+    0x0175, 499,  # ŵ Ŵ   
+    0x0177, 499,  # ŷ Ŷ   
+    0x017a, 499,  # ź Ź   
+    0x017c, 499,  # ż Ż   
+    0x017e, 499,  # ž Ž   
+    0x017f, 200,  # ſ S   
+    0x0183, 499,  # ƃ Ƃ   
+    0x0185, 499,  # ƅ Ƅ   
+    0x0188, 499,  # ƈ Ƈ   
+    0x018c, 499,  # ƌ Ƌ   
+    0x0192, 499,  # ƒ Ƒ   
+    0x0199, 499,  # ƙ Ƙ   
+    0x01a1, 499,  # ơ Ơ   
+    0x01a3, 499,  # ƣ Ƣ   
+    0x01a5, 499,  # ƥ Ƥ   
+    0x01a8, 499,  # ƨ Ƨ   
+    0x01ad, 499,  # ƭ Ƭ   
+    0x01b0, 499,  # ư Ư   
+    0x01b4, 499,  # ƴ Ƴ   
+    0x01b6, 499,  # ƶ Ƶ   
+    0x01b9, 499,  # ƹ Ƹ   
+    0x01bd, 499,  # ƽ Ƽ   
+    0x01c5, 499,  # Dž DŽ   
+    0x01c6, 498,  # dž DŽ   
+    0x01c8, 499,  # Lj LJ   
+    0x01c9, 498,  # lj LJ   
+    0x01cb, 499,  # Nj NJ   
+    0x01cc, 498,  # nj NJ   
+    0x01ce, 499,  # ǎ Ǎ   
+    0x01d0, 499,  # ǐ Ǐ   
+    0x01d2, 499,  # ǒ Ǒ   
+    0x01d4, 499,  # ǔ Ǔ   
+    0x01d6, 499,  # ǖ Ǖ   
+    0x01d8, 499,  # ǘ Ǘ   
+    0x01da, 499,  # ǚ Ǚ   
+    0x01dc, 499,  # ǜ Ǜ   
+    0x01df, 499,  # ǟ Ǟ   
+    0x01e1, 499,  # ǡ Ǡ   
+    0x01e3, 499,  # ǣ Ǣ   
+    0x01e5, 499,  # ǥ Ǥ   
+    0x01e7, 499,  # ǧ Ǧ   
+    0x01e9, 499,  # ǩ Ǩ   
+    0x01eb, 499,  # ǫ Ǫ   
+    0x01ed, 499,  # ǭ Ǭ   
+    0x01ef, 499,  # ǯ Ǯ   
+    0x01f2, 499,  # Dz DZ   
+    0x01f3, 498,  # dz DZ   
+    0x01f5, 499,  # ǵ Ǵ   
+    0x01fb, 499,  # ǻ Ǻ   
+    0x01fd, 499,  # ǽ Ǽ   
+    0x01ff, 499,  # ǿ Ǿ   
+    0x0201, 499,  # ȁ Ȁ   
+    0x0203, 499,  # ȃ Ȃ   
+    0x0205, 499,  # ȅ Ȅ   
+    0x0207, 499,  # ȇ Ȇ   
+    0x0209, 499,  # ȉ Ȉ   
+    0x020b, 499,  # ȋ Ȋ   
+    0x020d, 499,  # ȍ Ȍ   
+    0x020f, 499,  # ȏ Ȏ   
+    0x0211, 499,  # ȑ Ȑ   
+    0x0213, 499,  # ȓ Ȓ   
+    0x0215, 499,  # ȕ Ȕ   
+    0x0217, 499,  # ȗ Ȗ   
+    0x0253, 290,  # ɓ Ɓ   
+    0x0254, 294,  # ɔ Ɔ   
+    0x025b, 297,  # ɛ Ɛ   
+    0x0260, 295,  # ɠ Ɠ   
+    0x0263, 293,  # ɣ Ɣ   
+    0x0268, 291,  # ɨ Ɨ   
+    0x0269, 289,  # ɩ Ɩ   
+    0x026f, 289,  # ɯ Ɯ   
+    0x0272, 287,  # ɲ Ɲ   
+    0x0283, 282,  # ʃ Ʃ   
+    0x0288, 282,  # ʈ Ʈ   
+    0x0292, 281,  # ʒ Ʒ   
+    0x03ac, 462,  # ά Ά   
+    0x03cc, 436,  # ό Ό   
+    0x03d0, 438,  # ϐ Β   
+    0x03d1, 443,  # ϑ Θ   
+    0x03d5, 453,  # ϕ Φ   
+    0x03d6, 446,  # ϖ Π   
+    0x03e3, 499,  # ϣ Ϣ   
+    0x03e5, 499,  # ϥ Ϥ   
+    0x03e7, 499,  # ϧ Ϧ   
+    0x03e9, 499,  # ϩ Ϩ   
+    0x03eb, 499,  # ϫ Ϫ   
+    0x03ed, 499,  # ϭ Ϭ   
+    0x03ef, 499,  # ϯ Ϯ   
+    0x03f0, 414,  # ϰ Κ   
+    0x03f1, 420,  # ϱ Ρ   
+    0x0461, 499,  # ѡ Ѡ   
+    0x0463, 499,  # ѣ Ѣ   
+    0x0465, 499,  # ѥ Ѥ   
+    0x0467, 499,  # ѧ Ѧ   
+    0x0469, 499,  # ѩ Ѩ   
+    0x046b, 499,  # ѫ Ѫ   
+    0x046d, 499,  # ѭ Ѭ   
+    0x046f, 499,  # ѯ Ѯ   
+    0x0471, 499,  # ѱ Ѱ   
+    0x0473, 499,  # ѳ Ѳ   
+    0x0475, 499,  # ѵ Ѵ   
+    0x0477, 499,  # ѷ Ѷ   
+    0x0479, 499,  # ѹ Ѹ   
+    0x047b, 499,  # ѻ Ѻ   
+    0x047d, 499,  # ѽ Ѽ   
+    0x047f, 499,  # ѿ Ѿ   
+    0x0481, 499,  # ҁ Ҁ   
+    0x0491, 499,  # ґ Ґ   
+    0x0493, 499,  # ғ Ғ   
+    0x0495, 499,  # ҕ Ҕ   
+    0x0497, 499,  # җ Җ   
+    0x0499, 499,  # ҙ Ҙ   
+    0x049b, 499,  # қ Қ   
+    0x049d, 499,  # ҝ Ҝ   
+    0x049f, 499,  # ҟ Ҟ   
+    0x04a1, 499,  # ҡ Ҡ   
+    0x04a3, 499,  # ң Ң   
+    0x04a5, 499,  # ҥ Ҥ   
+    0x04a7, 499,  # ҧ Ҧ   
+    0x04a9, 499,  # ҩ Ҩ   
+    0x04ab, 499,  # ҫ Ҫ   
+    0x04ad, 499,  # ҭ Ҭ   
+    0x04af, 499,  # ү Ү   
+    0x04b1, 499,  # ұ Ұ   
+    0x04b3, 499,  # ҳ Ҳ   
+    0x04b5, 499,  # ҵ Ҵ   
+    0x04b7, 499,  # ҷ Ҷ   
+    0x04b9, 499,  # ҹ Ҹ   
+    0x04bb, 499,  # һ Һ   
+    0x04bd, 499,  # ҽ Ҽ   
+    0x04bf, 499,  # ҿ Ҿ   
+    0x04c2, 499,  # ӂ Ӂ   
+    0x04c4, 499,  # ӄ Ӄ   
+    0x04c8, 499,  # ӈ Ӈ   
+    0x04cc, 499,  # ӌ Ӌ   
+    0x04d1, 499,  # ӑ Ӑ   
+    0x04d3, 499,  # ӓ Ӓ   
+    0x04d5, 499,  # ӕ Ӕ   
+    0x04d7, 499,  # ӗ Ӗ   
+    0x04d9, 499,  # ә Ә   
+    0x04db, 499,  # ӛ Ӛ   
+    0x04dd, 499,  # ӝ Ӝ   
+    0x04df, 499,  # ӟ Ӟ   
+    0x04e1, 499,  # ӡ Ӡ   
+    0x04e3, 499,  # ӣ Ӣ   
+    0x04e5, 499,  # ӥ Ӥ   
+    0x04e7, 499,  # ӧ Ӧ   
+    0x04e9, 499,  # ө Ө   
+    0x04eb, 499,  # ӫ Ӫ   
+    0x04ef, 499,  # ӯ Ӯ   
+    0x04f1, 499,  # ӱ Ӱ   
+    0x04f3, 499,  # ӳ Ӳ   
+    0x04f5, 499,  # ӵ Ӵ   
+    0x04f9, 499,  # ӹ Ӹ   
+    0x1e01, 499,  # ḁ Ḁ   
+    0x1e03, 499,  # ḃ Ḃ   
+    0x1e05, 499,  # ḅ Ḅ   
+    0x1e07, 499,  # ḇ Ḇ   
+    0x1e09, 499,  # ḉ Ḉ   
+    0x1e0b, 499,  # ḋ Ḋ   
+    0x1e0d, 499,  # ḍ Ḍ   
+    0x1e0f, 499,  # ḏ Ḏ   
+    0x1e11, 499,  # ḑ Ḑ   
+    0x1e13, 499,  # ḓ Ḓ   
+    0x1e15, 499,  # ḕ Ḕ   
+    0x1e17, 499,  # ḗ Ḗ   
+    0x1e19, 499,  # ḙ Ḙ   
+    0x1e1b, 499,  # ḛ Ḛ   
+    0x1e1d, 499,  # ḝ Ḝ   
+    0x1e1f, 499,  # ḟ Ḟ   
+    0x1e21, 499,  # ḡ Ḡ   
+    0x1e23, 499,  # ḣ Ḣ   
+    0x1e25, 499,  # ḥ Ḥ   
+    0x1e27, 499,  # ḧ Ḧ   
+    0x1e29, 499,  # ḩ Ḩ   
+    0x1e2b, 499,  # ḫ Ḫ   
+    0x1e2d, 499,  # ḭ Ḭ   
+    0x1e2f, 499,  # ḯ Ḯ   
+    0x1e31, 499,  # ḱ Ḱ   
+    0x1e33, 499,  # ḳ Ḳ   
+    0x1e35, 499,  # ḵ Ḵ   
+    0x1e37, 499,  # ḷ Ḷ   
+    0x1e39, 499,  # ḹ Ḹ   
+    0x1e3b, 499,  # ḻ Ḻ   
+    0x1e3d, 499,  # ḽ Ḽ   
+    0x1e3f, 499,  # ḿ Ḿ   
+    0x1e41, 499,  # ṁ Ṁ   
+    0x1e43, 499,  # ṃ Ṃ   
+    0x1e45, 499,  # ṅ Ṅ   
+    0x1e47, 499,  # ṇ Ṇ   
+    0x1e49, 499,  # ṉ Ṉ   
+    0x1e4b, 499,  # ṋ Ṋ   
+    0x1e4d, 499,  # ṍ Ṍ   
+    0x1e4f, 499,  # ṏ Ṏ   
+    0x1e51, 499,  # ṑ Ṑ   
+    0x1e53, 499,  # ṓ Ṓ   
+    0x1e55, 499,  # ṕ Ṕ   
+    0x1e57, 499,  # ṗ Ṗ   
+    0x1e59, 499,  # ṙ Ṙ   
+    0x1e5b, 499,  # ṛ Ṛ   
+    0x1e5d, 499,  # ṝ Ṝ   
+    0x1e5f, 499,  # ṟ Ṟ   
+    0x1e61, 499,  # ṡ Ṡ   
+    0x1e63, 499,  # ṣ Ṣ   
+    0x1e65, 499,  # ṥ Ṥ   
+    0x1e67, 499,  # ṧ Ṧ   
+    0x1e69, 499,  # ṩ Ṩ   
+    0x1e6b, 499,  # ṫ Ṫ   
+    0x1e6d, 499,  # ṭ Ṭ   
+    0x1e6f, 499,  # ṯ Ṯ   
+    0x1e71, 499,  # ṱ Ṱ   
+    0x1e73, 499,  # ṳ Ṳ   
+    0x1e75, 499,  # ṵ Ṵ   
+    0x1e77, 499,  # ṷ Ṷ   
+    0x1e79, 499,  # ṹ Ṹ   
+    0x1e7b, 499,  # ṻ Ṻ   
+    0x1e7d, 499,  # ṽ Ṽ   
+    0x1e7f, 499,  # ṿ Ṿ   
+    0x1e81, 499,  # ẁ Ẁ   
+    0x1e83, 499,  # ẃ Ẃ   
+    0x1e85, 499,  # ẅ Ẅ   
+    0x1e87, 499,  # ẇ Ẇ   
+    0x1e89, 499,  # ẉ Ẉ   
+    0x1e8b, 499,  # ẋ Ẋ   
+    0x1e8d, 499,  # ẍ Ẍ   
+    0x1e8f, 499,  # ẏ Ẏ   
+    0x1e91, 499,  # ẑ Ẑ   
+    0x1e93, 499,  # ẓ Ẓ   
+    0x1e95, 499,  # ẕ Ẕ   
+    0x1ea1, 499,  # ạ Ạ   
+    0x1ea3, 499,  # ả Ả   
+    0x1ea5, 499,  # ấ Ấ   
+    0x1ea7, 499,  # ầ Ầ   
+    0x1ea9, 499,  # ẩ Ẩ   
+    0x1eab, 499,  # ẫ Ẫ   
+    0x1ead, 499,  # ậ Ậ   
+    0x1eaf, 499,  # ắ Ắ   
+    0x1eb1, 499,  # ằ Ằ   
+    0x1eb3, 499,  # ẳ Ẳ   
+    0x1eb5, 499,  # ẵ Ẵ   
+    0x1eb7, 499,  # ặ Ặ   
+    0x1eb9, 499,  # ẹ Ẹ   
+    0x1ebb, 499,  # ẻ Ẻ   
+    0x1ebd, 499,  # ẽ Ẽ   
+    0x1ebf, 499,  # ế Ế   
+    0x1ec1, 499,  # ề Ề   
+    0x1ec3, 499,  # ể Ể   
+    0x1ec5, 499,  # ễ Ễ   
+    0x1ec7, 499,  # ệ Ệ   
+    0x1ec9, 499,  # ỉ Ỉ   
+    0x1ecb, 499,  # ị Ị   
+    0x1ecd, 499,  # ọ Ọ   
+    0x1ecf, 499,  # ỏ Ỏ   
+    0x1ed1, 499,  # ố Ố   
+    0x1ed3, 499,  # ồ Ồ   
+    0x1ed5, 499,  # ổ Ổ   
+    0x1ed7, 499,  # ỗ Ỗ   
+    0x1ed9, 499,  # ộ Ộ   
+    0x1edb, 499,  # ớ Ớ   
+    0x1edd, 499,  # ờ Ờ   
+    0x1edf, 499,  # ở Ở   
+    0x1ee1, 499,  # ỡ Ỡ   
+    0x1ee3, 499,  # ợ Ợ   
+    0x1ee5, 499,  # ụ Ụ   
+    0x1ee7, 499,  # ủ Ủ   
+    0x1ee9, 499,  # ứ Ứ   
+    0x1eeb, 499,  # ừ Ừ   
+    0x1eed, 499,  # ử Ử   
+    0x1eef, 499,  # ữ Ữ   
+    0x1ef1, 499,  # ự Ự   
+    0x1ef3, 499,  # ỳ Ỳ   
+    0x1ef5, 499,  # ỵ Ỵ   
+    0x1ef7, 499,  # ỷ Ỷ   
+    0x1ef9, 499,  # ỹ Ỹ   
+    0x1f51, 508,  # ὑ Ὑ   
+    0x1f53, 508,  # ὓ Ὓ   
+    0x1f55, 508,  # ὕ Ὕ   
+    0x1f57, 508,  # ὗ Ὗ   
+    0x1fb3, 509,  # ᾳ ᾼ   
+    0x1fc3, 509,  # ῃ ῌ   
+    0x1fe5, 507,  # ῥ Ῥ   
+    0x1ff3, 509]  # ῳ ῼ   
+
+  tolowerRanges = [
+    0x0041,  0x005a, 532,  # A-Z a-z
+    0x00c0,  0x00d6, 532,  # À-Ö à-ö   
+    0x00d8,  0x00de, 532,  # Ø-Þ ø-þ   
+    0x0189,  0x018a, 705,  # Ɖ-Ɗ ɖ-ɗ   
+    0x018e,  0x018f, 702,  # Ǝ-Ə ɘ-ə   
+    0x01b1,  0x01b2, 717,  # Ʊ-Ʋ ʊ-ʋ   
+    0x0388,  0x038a, 537,  # Έ-Ί έ-ί   
+    0x038e,  0x038f, 563,  # Ύ-Ώ ύ-ώ   
+    0x0391,  0x03a1, 532,  # Α-Ρ α-ρ   
+    0x03a3,  0x03ab, 532,  # Σ-Ϋ σ-ϋ   
+    0x0401,  0x040c, 580,  # Ё-Ќ ё-ќ   
+    0x040e,  0x040f, 580,  # Ў-Џ ў-џ   
+    0x0410,  0x042f, 532,  # А-Я а-я   
+    0x0531,  0x0556, 548,  # Ա-Ֆ ա-ֆ   
+    0x10a0,  0x10c5, 548,  # Ⴀ-Ⴥ ა-ჵ   
+    0x1f08,  0x1f0f, 492,  # Ἀ-Ἇ ἀ-ἇ   
+    0x1f18,  0x1f1d, 492,  # Ἐ-Ἕ ἐ-ἕ   
+    0x1f28,  0x1f2f, 492,  # Ἠ-Ἧ ἠ-ἧ   
+    0x1f38,  0x1f3f, 492,  # Ἰ-Ἷ ἰ-ἷ   
+    0x1f48,  0x1f4d, 492,  # Ὀ-Ὅ ὀ-ὅ   
+    0x1f68,  0x1f6f, 492,  # Ὠ-Ὧ ὠ-ὧ   
+    0x1f88,  0x1f8f, 492,  # ᾈ-ᾏ ᾀ-ᾇ   
+    0x1f98,  0x1f9f, 492,  # ᾘ-ᾟ ᾐ-ᾗ   
+    0x1fa8,  0x1faf, 492,  # ᾨ-ᾯ ᾠ-ᾧ   
+    0x1fb8,  0x1fb9, 492,  # Ᾰ-Ᾱ ᾰ-ᾱ   
+    0x1fba,  0x1fbb, 426,  # Ὰ-Ά ὰ-ά   
+    0x1fc8,  0x1fcb, 414,  # Ὲ-Ή ὲ-ή   
+    0x1fd8,  0x1fd9, 492,  # Ῐ-Ῑ ῐ-ῑ   
+    0x1fda,  0x1fdb, 400,  # Ὶ-Ί ὶ-ί   
+    0x1fe8,  0x1fe9, 492,  # Ῠ-Ῡ ῠ-ῡ   
+    0x1fea,  0x1feb, 388,  # Ὺ-Ύ ὺ-ύ   
+    0x1ff8,  0x1ff9, 372,  # Ὸ-Ό ὸ-ό   
+    0x1ffa,  0x1ffb, 374,  # Ὼ-Ώ ὼ-ώ   
+    0x2160,  0x216f, 516,  # Ⅰ-Ⅿ ⅰ-ⅿ   
+    0x24b6,  0x24cf, 526,  # Ⓐ-Ⓩ ⓐ-ⓩ   
+    0xff21,  0xff3a, 532]  # A-Z a-z   
+
+  tolowerSinglets = [
+    0x0100, 501,  # Ā ā   
+    0x0102, 501,  # Ă ă   
+    0x0104, 501,  # Ą ą   
+    0x0106, 501,  # Ć ć   
+    0x0108, 501,  # Ĉ ĉ   
+    0x010a, 501,  # Ċ ċ   
+    0x010c, 501,  # Č č   
+    0x010e, 501,  # Ď ď   
+    0x0110, 501,  # Đ đ   
+    0x0112, 501,  # Ē ē   
+    0x0114, 501,  # Ĕ ĕ   
+    0x0116, 501,  # Ė ė   
+    0x0118, 501,  # Ę ę   
+    0x011a, 501,  # Ě ě   
+    0x011c, 501,  # Ĝ ĝ   
+    0x011e, 501,  # Ğ ğ   
+    0x0120, 501,  # Ġ ġ   
+    0x0122, 501,  # Ģ ģ   
+    0x0124, 501,  # Ĥ ĥ   
+    0x0126, 501,  # Ħ ħ   
+    0x0128, 501,  # Ĩ ĩ   
+    0x012a, 501,  # Ī ī   
+    0x012c, 501,  # Ĭ ĭ   
+    0x012e, 501,  # Į į   
+    0x0130, 301,  # İ i   
+    0x0132, 501,  # IJ ij   
+    0x0134, 501,  # Ĵ ĵ   
+    0x0136, 501,  # Ķ ķ   
+    0x0139, 501,  # Ĺ ĺ   
+    0x013b, 501,  # Ļ ļ   
+    0x013d, 501,  # Ľ ľ   
+    0x013f, 501,  # Ŀ ŀ   
+    0x0141, 501,  # Ł ł   
+    0x0143, 501,  # Ń ń   
+    0x0145, 501,  # Ņ ņ   
+    0x0147, 501,  # Ň ň   
+    0x014a, 501,  # Ŋ ŋ   
+    0x014c, 501,  # Ō ō   
+    0x014e, 501,  # Ŏ ŏ   
+    0x0150, 501,  # Ő ő   
+    0x0152, 501,  # Œ œ   
+    0x0154, 501,  # Ŕ ŕ   
+    0x0156, 501,  # Ŗ ŗ   
+    0x0158, 501,  # Ř ř   
+    0x015a, 501,  # Ś ś   
+    0x015c, 501,  # Ŝ ŝ   
+    0x015e, 501,  # Ş ş   
+    0x0160, 501,  # Š š   
+    0x0162, 501,  # Ţ ţ   
+    0x0164, 501,  # Ť ť   
+    0x0166, 501,  # Ŧ ŧ   
+    0x0168, 501,  # Ũ ũ   
+    0x016a, 501,  # Ū ū   
+    0x016c, 501,  # Ŭ ŭ   
+    0x016e, 501,  # Ů ů   
+    0x0170, 501,  # Ű ű   
+    0x0172, 501,  # Ų ų   
+    0x0174, 501,  # Ŵ ŵ   
+    0x0176, 501,  # Ŷ ŷ   
+    0x0178, 379,  # Ÿ ÿ   
+    0x0179, 501,  # Ź ź   
+    0x017b, 501,  # Ż ż   
+    0x017d, 501,  # Ž ž   
+    0x0181, 710,  # Ɓ ɓ   
+    0x0182, 501,  # Ƃ ƃ   
+    0x0184, 501,  # Ƅ ƅ   
+    0x0186, 706,  # Ɔ ɔ   
+    0x0187, 501,  # Ƈ ƈ   
+    0x018b, 501,  # Ƌ ƌ   
+    0x0190, 703,  # Ɛ ɛ   
+    0x0191, 501,  # Ƒ ƒ   
+    0x0193, 705,  # Ɠ ɠ   
+    0x0194, 707,  # Ɣ ɣ   
+    0x0196, 711,  # Ɩ ɩ   
+    0x0197, 709,  # Ɨ ɨ   
+    0x0198, 501,  # Ƙ ƙ   
+    0x019c, 711,  # Ɯ ɯ   
+    0x019d, 713,  # Ɲ ɲ   
+    0x01a0, 501,  # Ơ ơ   
+    0x01a2, 501,  # Ƣ ƣ   
+    0x01a4, 501,  # Ƥ ƥ   
+    0x01a7, 501,  # Ƨ ƨ   
+    0x01a9, 718,  # Ʃ ʃ   
+    0x01ac, 501,  # Ƭ ƭ   
+    0x01ae, 718,  # Ʈ ʈ   
+    0x01af, 501,  # Ư ư   
+    0x01b3, 501,  # Ƴ ƴ   
+    0x01b5, 501,  # Ƶ ƶ   
+    0x01b7, 719,  # Ʒ ʒ   
+    0x01b8, 501,  # Ƹ ƹ   
+    0x01bc, 501,  # Ƽ ƽ   
+    0x01c4, 502,  # DŽ dž   
+    0x01c5, 501,  # Dž dž   
+    0x01c7, 502,  # LJ lj   
+    0x01c8, 501,  # Lj lj   
+    0x01ca, 502,  # NJ nj   
+    0x01cb, 501,  # Nj nj   
+    0x01cd, 501,  # Ǎ ǎ   
+    0x01cf, 501,  # Ǐ ǐ   
+    0x01d1, 501,  # Ǒ ǒ   
+    0x01d3, 501,  # Ǔ ǔ   
+    0x01d5, 501,  # Ǖ ǖ   
+    0x01d7, 501,  # Ǘ ǘ   
+    0x01d9, 501,  # Ǚ ǚ   
+    0x01db, 501,  # Ǜ ǜ   
+    0x01de, 501,  # Ǟ ǟ   
+    0x01e0, 501,  # Ǡ ǡ   
+    0x01e2, 501,  # Ǣ ǣ   
+    0x01e4, 501,  # Ǥ ǥ   
+    0x01e6, 501,  # Ǧ ǧ   
+    0x01e8, 501,  # Ǩ ǩ   
+    0x01ea, 501,  # Ǫ ǫ   
+    0x01ec, 501,  # Ǭ ǭ   
+    0x01ee, 501,  # Ǯ ǯ   
+    0x01f1, 502,  # DZ dz   
+    0x01f2, 501,  # Dz dz   
+    0x01f4, 501,  # Ǵ ǵ   
+    0x01fa, 501,  # Ǻ ǻ   
+    0x01fc, 501,  # Ǽ ǽ   
+    0x01fe, 501,  # Ǿ ǿ   
+    0x0200, 501,  # Ȁ ȁ   
+    0x0202, 501,  # Ȃ ȃ   
+    0x0204, 501,  # Ȅ ȅ   
+    0x0206, 501,  # Ȇ ȇ   
+    0x0208, 501,  # Ȉ ȉ   
+    0x020a, 501,  # Ȋ ȋ   
+    0x020c, 501,  # Ȍ ȍ   
+    0x020e, 501,  # Ȏ ȏ   
+    0x0210, 501,  # Ȑ ȑ   
+    0x0212, 501,  # Ȓ ȓ   
+    0x0214, 501,  # Ȕ ȕ   
+    0x0216, 501,  # Ȗ ȗ   
+    0x0386, 538,  # Ά ά   
+    0x038c, 564,  # Ό ό   
+    0x03e2, 501,  # Ϣ ϣ   
+    0x03e4, 501,  # Ϥ ϥ   
+    0x03e6, 501,  # Ϧ ϧ   
+    0x03e8, 501,  # Ϩ ϩ   
+    0x03ea, 501,  # Ϫ ϫ   
+    0x03ec, 501,  # Ϭ ϭ   
+    0x03ee, 501,  # Ϯ ϯ   
+    0x0460, 501,  # Ѡ ѡ   
+    0x0462, 501,  # Ѣ ѣ   
+    0x0464, 501,  # Ѥ ѥ   
+    0x0466, 501,  # Ѧ ѧ   
+    0x0468, 501,  # Ѩ ѩ   
+    0x046a, 501,  # Ѫ ѫ   
+    0x046c, 501,  # Ѭ ѭ   
+    0x046e, 501,  # Ѯ ѯ   
+    0x0470, 501,  # Ѱ ѱ   
+    0x0472, 501,  # Ѳ ѳ   
+    0x0474, 501,  # Ѵ ѵ   
+    0x0476, 501,  # Ѷ ѷ   
+    0x0478, 501,  # Ѹ ѹ   
+    0x047a, 501,  # Ѻ ѻ   
+    0x047c, 501,  # Ѽ ѽ   
+    0x047e, 501,  # Ѿ ѿ   
+    0x0480, 501,  # Ҁ ҁ   
+    0x0490, 501,  # Ґ ґ   
+    0x0492, 501,  # Ғ ғ   
+    0x0494, 501,  # Ҕ ҕ   
+    0x0496, 501,  # Җ җ   
+    0x0498, 501,  # Ҙ ҙ   
+    0x049a, 501,  # Қ қ   
+    0x049c, 501,  # Ҝ ҝ   
+    0x049e, 501,  # Ҟ ҟ   
+    0x04a0, 501,  # Ҡ ҡ   
+    0x04a2, 501,  # Ң ң   
+    0x04a4, 501,  # Ҥ ҥ   
+    0x04a6, 501,  # Ҧ ҧ   
+    0x04a8, 501,  # Ҩ ҩ   
+    0x04aa, 501,  # Ҫ ҫ   
+    0x04ac, 501,  # Ҭ ҭ   
+    0x04ae, 501,  # Ү ү   
+    0x04b0, 501,  # Ұ ұ   
+    0x04b2, 501,  # Ҳ ҳ   
+    0x04b4, 501,  # Ҵ ҵ   
+    0x04b6, 501,  # Ҷ ҷ   
+    0x04b8, 501,  # Ҹ ҹ   
+    0x04ba, 501,  # Һ һ   
+    0x04bc, 501,  # Ҽ ҽ   
+    0x04be, 501,  # Ҿ ҿ   
+    0x04c1, 501,  # Ӂ ӂ   
+    0x04c3, 501,  # Ӄ ӄ   
+    0x04c7, 501,  # Ӈ ӈ   
+    0x04cb, 501,  # Ӌ ӌ   
+    0x04d0, 501,  # Ӑ ӑ   
+    0x04d2, 501,  # Ӓ ӓ   
+    0x04d4, 501,  # Ӕ ӕ   
+    0x04d6, 501,  # Ӗ ӗ   
+    0x04d8, 501,  # Ә ә   
+    0x04da, 501,  # Ӛ ӛ   
+    0x04dc, 501,  # Ӝ ӝ   
+    0x04de, 501,  # Ӟ ӟ   
+    0x04e0, 501,  # Ӡ ӡ   
+    0x04e2, 501,  # Ӣ ӣ   
+    0x04e4, 501,  # Ӥ ӥ   
+    0x04e6, 501,  # Ӧ ӧ   
+    0x04e8, 501,  # Ө ө   
+    0x04ea, 501,  # Ӫ ӫ   
+    0x04ee, 501,  # Ӯ ӯ   
+    0x04f0, 501,  # Ӱ ӱ   
+    0x04f2, 501,  # Ӳ ӳ   
+    0x04f4, 501,  # Ӵ ӵ   
+    0x04f8, 501,  # Ӹ ӹ   
+    0x1e00, 501,  # Ḁ ḁ   
+    0x1e02, 501,  # Ḃ ḃ   
+    0x1e04, 501,  # Ḅ ḅ   
+    0x1e06, 501,  # Ḇ ḇ   
+    0x1e08, 501,  # Ḉ ḉ   
+    0x1e0a, 501,  # Ḋ ḋ   
+    0x1e0c, 501,  # Ḍ ḍ   
+    0x1e0e, 501,  # Ḏ ḏ   
+    0x1e10, 501,  # Ḑ ḑ   
+    0x1e12, 501,  # Ḓ ḓ   
+    0x1e14, 501,  # Ḕ ḕ   
+    0x1e16, 501,  # Ḗ ḗ   
+    0x1e18, 501,  # Ḙ ḙ   
+    0x1e1a, 501,  # Ḛ ḛ   
+    0x1e1c, 501,  # Ḝ ḝ   
+    0x1e1e, 501,  # Ḟ ḟ   
+    0x1e20, 501,  # Ḡ ḡ   
+    0x1e22, 501,  # Ḣ ḣ   
+    0x1e24, 501,  # Ḥ ḥ   
+    0x1e26, 501,  # Ḧ ḧ   
+    0x1e28, 501,  # Ḩ ḩ   
+    0x1e2a, 501,  # Ḫ ḫ   
+    0x1e2c, 501,  # Ḭ ḭ   
+    0x1e2e, 501,  # Ḯ ḯ   
+    0x1e30, 501,  # Ḱ ḱ   
+    0x1e32, 501,  # Ḳ ḳ   
+    0x1e34, 501,  # Ḵ ḵ   
+    0x1e36, 501,  # Ḷ ḷ   
+    0x1e38, 501,  # Ḹ ḹ   
+    0x1e3a, 501,  # Ḻ ḻ   
+    0x1e3c, 501,  # Ḽ ḽ   
+    0x1e3e, 501,  # Ḿ ḿ   
+    0x1e40, 501,  # Ṁ ṁ   
+    0x1e42, 501,  # Ṃ ṃ   
+    0x1e44, 501,  # Ṅ ṅ   
+    0x1e46, 501,  # Ṇ ṇ   
+    0x1e48, 501,  # Ṉ ṉ   
+    0x1e4a, 501,  # Ṋ ṋ   
+    0x1e4c, 501,  # Ṍ ṍ   
+    0x1e4e, 501,  # Ṏ ṏ   
+    0x1e50, 501,  # Ṑ ṑ   
+    0x1e52, 501,  # Ṓ ṓ   
+    0x1e54, 501,  # Ṕ ṕ   
+    0x1e56, 501,  # Ṗ ṗ   
+    0x1e58, 501,  # Ṙ ṙ   
+    0x1e5a, 501,  # Ṛ ṛ   
+    0x1e5c, 501,  # Ṝ ṝ   
+    0x1e5e, 501,  # Ṟ ṟ   
+    0x1e60, 501,  # Ṡ ṡ   
+    0x1e62, 501,  # Ṣ ṣ   
+    0x1e64, 501,  # Ṥ ṥ   
+    0x1e66, 501,  # Ṧ ṧ   
+    0x1e68, 501,  # Ṩ ṩ   
+    0x1e6a, 501,  # Ṫ ṫ   
+    0x1e6c, 501,  # Ṭ ṭ   
+    0x1e6e, 501,  # Ṯ ṯ   
+    0x1e70, 501,  # Ṱ ṱ   
+    0x1e72, 501,  # Ṳ ṳ   
+    0x1e74, 501,  # Ṵ ṵ   
+    0x1e76, 501,  # Ṷ ṷ   
+    0x1e78, 501,  # Ṹ ṹ   
+    0x1e7a, 501,  # Ṻ ṻ   
+    0x1e7c, 501,  # Ṽ ṽ   
+    0x1e7e, 501,  # Ṿ ṿ   
+    0x1e80, 501,  # Ẁ ẁ   
+    0x1e82, 501,  # Ẃ ẃ   
+    0x1e84, 501,  # Ẅ ẅ   
+    0x1e86, 501,  # Ẇ ẇ   
+    0x1e88, 501,  # Ẉ ẉ   
+    0x1e8a, 501,  # Ẋ ẋ   
+    0x1e8c, 501,  # Ẍ ẍ   
+    0x1e8e, 501,  # Ẏ ẏ   
+    0x1e90, 501,  # Ẑ ẑ   
+    0x1e92, 501,  # Ẓ ẓ   
+    0x1e94, 501,  # Ẕ ẕ   
+    0x1ea0, 501,  # Ạ ạ   
+    0x1ea2, 501,  # Ả ả   
+    0x1ea4, 501,  # Ấ ấ   
+    0x1ea6, 501,  # Ầ ầ   
+    0x1ea8, 501,  # Ẩ ẩ   
+    0x1eaa, 501,  # Ẫ ẫ   
+    0x1eac, 501,  # Ậ ậ   
+    0x1eae, 501,  # Ắ ắ   
+    0x1eb0, 501,  # Ằ ằ   
+    0x1eb2, 501,  # Ẳ ẳ   
+    0x1eb4, 501,  # Ẵ ẵ   
+    0x1eb6, 501,  # Ặ ặ   
+    0x1eb8, 501,  # Ẹ ẹ   
+    0x1eba, 501,  # Ẻ ẻ   
+    0x1ebc, 501,  # Ẽ ẽ   
+    0x1ebe, 501,  # Ế ế   
+    0x1ec0, 501,  # Ề ề   
+    0x1ec2, 501,  # Ể ể   
+    0x1ec4, 501,  # Ễ ễ   
+    0x1ec6, 501,  # Ệ ệ   
+    0x1ec8, 501,  # Ỉ ỉ   
+    0x1eca, 501,  # Ị ị   
+    0x1ecc, 501,  # Ọ ọ   
+    0x1ece, 501,  # Ỏ ỏ   
+    0x1ed0, 501,  # Ố ố   
+    0x1ed2, 501,  # Ồ ồ   
+    0x1ed4, 501,  # Ổ ổ   
+    0x1ed6, 501,  # Ỗ ỗ   
+    0x1ed8, 501,  # Ộ ộ   
+    0x1eda, 501,  # Ớ ớ   
+    0x1edc, 501,  # Ờ ờ   
+    0x1ede, 501,  # Ở ở   
+    0x1ee0, 501,  # Ỡ ỡ   
+    0x1ee2, 501,  # Ợ ợ   
+    0x1ee4, 501,  # Ụ ụ   
+    0x1ee6, 501,  # Ủ ủ   
+    0x1ee8, 501,  # Ứ ứ   
+    0x1eea, 501,  # Ừ ừ   
+    0x1eec, 501,  # Ử ử   
+    0x1eee, 501,  # Ữ ữ   
+    0x1ef0, 501,  # Ự ự   
+    0x1ef2, 501,  # Ỳ ỳ   
+    0x1ef4, 501,  # Ỵ ỵ   
+    0x1ef6, 501,  # Ỷ ỷ   
+    0x1ef8, 501,  # Ỹ ỹ   
+    0x1f59, 492,  # Ὑ ὑ   
+    0x1f5b, 492,  # Ὓ ὓ   
+    0x1f5d, 492,  # Ὕ ὕ   
+    0x1f5f, 492,  # Ὗ ὗ   
+    0x1fbc, 491,  # ᾼ ᾳ   
+    0x1fcc, 491,  # ῌ ῃ   
+    0x1fec, 493,  # Ῥ ῥ   
+    0x1ffc, 491]  # ῼ ῳ   
+
+  toTitleSinglets = [
+    0x01c4, 501,  # DŽ Dž   
+    0x01c6, 499,  # dž Dž   
+    0x01c7, 501,  # LJ Lj   
+    0x01c9, 499,  # lj Lj   
+    0x01ca, 501,  # NJ Nj   
+    0x01cc, 499,  # nj Nj   
+    0x01f1, 501,  # DZ Dz   
+    0x01f3, 499]  # dz Dz   
+
+proc binarySearch(c: TRune, tab: openArray[TRune], len, stride: int): int = 
+  var n = len
+  var t = 0
+  while n > 1: 
+    var m = n div 2
+    var p = t + m*stride
+    if c >= tab[p]:
+      t = p
+      n = n-m
+    else:
+      n = m
+  if n != 0 and c >= tab[t]:
+    return t
+  return -1
+
+proc toLower*(c: TRune): TRune = 
+  ## Converts `c` into lower case. This works for any Unicode character.
+  var p = binarySearch(c, tolowerRanges, len(toLowerRanges) div 3, 3)
+  if p >= 0 and c >= tolowerRanges[p] and c <= tolowerRanges[p+1]:
+    return c + tolowerRanges[p+2] - 500
+  p = binarySearch(c, toLowerSinglets, len(toLowerSinglets) div 2, 2)
+  if p >= 0 and c == toLowerSinglets[p]:
+    return c + toLowerSinglets[p+1] - 500
+  return c
+
+proc toUpper*(c: TRune): TRune = 
+  ## Converts `c` into upper case. This works for any Unicode character.
+  var p = binarySearch(c, toUpperRanges, len(toUpperRanges) div 3, 3)
+  if p >= 0 and c >= toUpperRanges[p] and c <= toUpperRanges[p+1]:
+    return c + toUpperRanges[p+2] - 500
+  p = binarySearch(c, toUpperSinglets, len(toUpperSinglets) div 2, 2)
+  if p >= 0 and c == toUpperSinglets[p]:
+    return c + toUpperSinglets[p+1] - 500
+  return c
+
+proc toTitle*(c: TRune): TRune = 
+  var p = binarySearch(c, toTitleSinglets, len(toTitleSinglets) div 2, 2)
+  if p >= 0 and c == toTitleSinglets[p]:
+    return c + toTitleSinglets[p+1] - 500
+  return c
+
+proc isLower*(c: TRune): bool = 
+  ## returns true iff `c` is a lower case Unicode character
+  # Note: toUpperRanges is correct here!
+  var p = binarySearch(c, toUpperRanges, len(toUpperRanges) div 3, 3)
+  if p >= 0 and c >= toUpperRanges[p] and c <= toUpperRanges[p+1]:
+    return true
+  p = binarySearch(c, toUpperSinglets, len(toUpperSinglets) div 2, 2)
+  if p >= 0 and c == toUpperSinglets[p]:
+    return true
+
+proc isUpper*(c: TRune): bool = 
+  ## returns true iff `c` is a upper case Unicode character
+  # Note: toLowerRanges is correct here!
+  var p = binarySearch(c, toLowerRanges, len(toLowerRanges) div 3, 3)
+  if p >= 0 and c >= toLowerRanges[p] and c <= toLowerRanges[p+1]:
+    return true
+  p = binarySearch(c, toLowerSinglets, len(toLowerSinglets) div 2, 2)
+  if p >= 0 and c == toLowerSinglets[p]:
+    return true
+
+proc isAlpha*(c: TRune): bool = 
+  ## returns true iff `c` is an *alpha* Unicode character (i.e. a letter)
+  if isUpper(c) or isLower(c): 
+    return true
+  var p = binarySearch(c, alphaRanges, len(alphaRanges) div 2, 2)
+  if p >= 0 and c >= alphaRanges[p] and c <= alphaRanges[p+1]:
+    return true
+  p = binarySearch(c, alphaSinglets, len(alphaSinglets), 1)
+  if p >= 0 and c == alphaSinglets[p]:
+    return true
+  
+proc isTitle*(c: TRune): bool = 
+  return isUpper(c) and isLower(c)
+
+proc isWhiteSpace*(c: TRune): bool = 
+  ## returns true iff `c` is a Unicode whitespace character
+  var p = binarySearch(c, spaceRanges, len(spaceRanges) div 2, 2)
+  if p >= 0 and c >= spaceRanges[p] and c <= spaceRanges[p+1]:
+    return true
+
+iterator runes*(s: string): TRune =
+  ## iterates over any unicode character of the string `s`.
+  var
+    i = 0
+    result: TRune
+  while i < len(s):
+    fastRuneAt(s, i, result)
+    yield result
+
+proc cmpRunesIgnoreCase*(a, b: string): int = 
+  ## compares two UTF8 strings and ignores the case. Returns:
+  ##
+  ## | 0 iff a == b
+  ## | < 0 iff a < b
+  ## | > 0 iff a > b
+  var i = 0
+  var j = 0
+  var ar, br: TRune
+  while i < a.len and j < b.len:
+    # slow path:
+    fastRuneAt(a, i, ar)
+    fastRuneAt(b, j, br)
+    result = toLower(ar) - toLower(br)
+    if result != 0: return
+  result = a.len - b.len
+