about summary refs log tree commit diff stats
path: root/src/data
diff options
context:
space:
mode:
authorbptato <nincsnevem662@gmail.com>2023-08-14 21:38:49 +0200
committerbptato <nincsnevem662@gmail.com>2023-08-14 21:38:49 +0200
commit6b0b7ccfc571b1df8bfbce26703f64e89861f779 (patch)
tree68e87d16b5285b29ab8ecdbbf887821cc1e0b67e /src/data
parent5cca932e90387781cfa14ae77d587c25c2e0dcf4 (diff)
downloadchawan-6b0b7ccfc571b1df8bfbce26703f64e89861f779.tar.gz
Move charsets into chakasu
Operation "modularize Chawan somewhat" part 1
Diffstat (limited to 'src/data')
-rw-r--r--src/data/charset.nim458
1 files changed, 0 insertions, 458 deletions
diff --git a/src/data/charset.nim b/src/data/charset.nim
deleted file mode 100644
index f8f833d5..00000000
--- a/src/data/charset.nim
+++ /dev/null
@@ -1,458 +0,0 @@
-import algorithm
-import os
-import strutils
-import tables
-
-import utils/twtstr
-
-type Charset* = enum
-  CHARSET_UNKNOWN
-  CHARSET_UTF_8 = "UTF-8"
-  CHARSET_IBM866 = "IBM866"
-  CHARSET_ISO_8859_2 = "ISO-8859-2"
-  CHARSET_ISO_8859_3 = "ISO-8859-3"
-  CHARSET_ISO_8859_4 = "ISO-8859-4"
-  CHARSET_ISO_8859_5 = "ISO-8859-5"
-  CHARSET_ISO_8859_6 = "ISO-8859-6"
-  CHARSET_ISO_8859_7 = "ISO-8859-7"
-  CHARSET_ISO_8859_8 = "ISO-8859-8"
-  CHARSET_ISO_8859_8_I = "ISO-8859-8-I"
-  CHARSET_ISO_8859_10 = "ISO-8859-10"
-  CHARSET_ISO_8859_13 = "ISO-8859-13"
-  CHARSET_ISO_8859_14 = "ISO-8859-14"
-  CHARSET_ISO_8859_15 = "ISO-8859-15"
-  CHARSET_ISO_8859_16 = "ISO-8859-16"
-  CHARSET_KOI8_R = "KOI8-R"
-  CHARSET_KOI8_U = "KOI8-U"
-  CHARSET_MACINTOSH = "macintosh"
-  CHARSET_WINDOWS_874 = "windows-874"
-  CHARSET_WINDOWS_1250 = "windows-1250"
-  CHARSET_WINDOWS_1251 = "windows-1251"
-  CHARSET_WINDOWS_1252 = "windows-1252"
-  CHARSET_WINDOWS_1253 = "windows-1253"
-  CHARSET_WINDOWS_1254 = "windows-1254"
-  CHARSET_WINDOWS_1255 = "windows-1255"
-  CHARSET_WINDOWS_1256 = "windows-1256"
-  CHARSET_WINDOWS_1257 = "windows-1257"
-  CHARSET_WINDOWS_1258 = "windows-1258"
-  CHARSET_X_MAC_CYRILLIC = "x-mac-cyrillic"
-  CHARSET_GBK = "GBK"
-  CHARSET_GB18030 = "gb18030"
-  CHARSET_BIG5 = "Big5"
-  CHARSET_EUC_JP = "EUC-JP"
-  CHARSET_ISO_2022_JP = "ISO-2022-JP"
-  CHARSET_SHIFT_JIS = "Shift_JIS"
-  CHARSET_EUC_KR = "EUC-KR"
-  CHARSET_REPLACEMENT = "replacement"
-  CHARSET_UTF_16_BE = "UTF-16BE"
-  CHARSET_UTF_16_LE = "UTF-16LE"
-  CHARSET_X_USER_DEFINED = "x-user-defined"
-
-const CharsetMap = {
-  # UTF-8 (The Encoding)
-  "unicode-1-1-utf-8": CHARSET_UTF_8,
-  "unicode11utf-8": CHARSET_UTF_8,
-  "unicode20utf-8": CHARSET_UTF_8,
-  "utf-8": CHARSET_UTF_8,
-  "utf8": CHARSET_UTF_8,
-  "x-unicode20utf8": CHARSET_UTF_8,
-  # IBM866
-  "866": CHARSET_IBM_866,
-  "cp866": CHARSET_IBM_866,
-  "csibm866": CHARSET_IBM_866,
-  "ibm866": CHARSET_IBM_866,
-  # ISO-8859-2
-  "csisolatin2": CHARSET_ISO_8859_2,
-  "iso-8859-2": CHARSET_ISO_8859_2,
-  "iso-ir-101": CHARSET_ISO_8859_2,
-  "iso8859-2": CHARSET_ISO_8859_2,
-  "iso88592": CHARSET_ISO_8859_2,
-  "iso_8859-2": CHARSET_ISO_8859_2,
-  "iso_8859-2:1987": CHARSET_ISO_8859_2,
-  "l2": CHARSET_ISO_8859_2,
-  "latin2": CHARSET_ISO_8859_2,
-  # ISO-8859-3
-  "csisolatin3": CHARSET_ISO_8859_3,
-  "iso-8859-3": CHARSET_ISO_8859_3,
-  "iso-ir-109": CHARSET_ISO_8859_3,
-  "iso8859-3": CHARSET_ISO_8859_3,
-  "iso88593": CHARSET_ISO_8859_3,
-  "iso_8859-3": CHARSET_ISO_8859_3,
-  "iso_8859-3:1988": CHARSET_ISO_8859_3,
-  "l3": CHARSET_ISO_8859_3,
-  "latin3": CHARSET_ISO_8859_3,
-  # ISO-8859-4
-  "csisolatin4": CHARSET_ISO_8859_4,
-  "iso-8859-4": CHARSET_ISO_8859_4,
-  "iso-ir-110": CHARSET_ISO_8859_4,
-  "iso8859-4": CHARSET_ISO_8859_4,
-  "iso88594": CHARSET_ISO_8859_4,
-  "iso_8859-4": CHARSET_ISO_8859_4,
-  "iso_8859-4:1988": CHARSET_ISO_8859_4,
-  "l4": CHARSET_ISO_8859_4,
-  "latin4": CHARSET_ISO_8859_4,
-  # ISO-8859-5
-  "csisolatincyrillic": CHARSET_ISO_8859_5,
-  "cyrillic": CHARSET_ISO_8859_5,
-  "iso-8859-5": CHARSET_ISO_8859_5,
-  "iso-ir-144": CHARSET_ISO_8859_5,
-  "iso8859-5": CHARSET_ISO_8859_5,
-  "iso88595": CHARSET_ISO_8859_5,
-  "iso_8859-5": CHARSET_ISO_8859_5,
-  "iso_8859-5:1988": CHARSET_ISO_8859_5,
-  # ISO-8859-6
-  "arabic": CHARSET_ISO_8859_6,
-  "asmo-708": CHARSET_ISO_8859_6,
-  "csiso88596e": CHARSET_ISO_8859_6,
-  "csiso88596i": CHARSET_ISO_8859_6,
-  "csisolatinarabic": CHARSET_ISO_8859_6,
-  "ecma-114": CHARSET_ISO_8859_6,
-  "iso-8859-6": CHARSET_ISO_8859_6,
-  "iso-8859-6-e": CHARSET_ISO_8859_6,
-  "iso-8859-6-i": CHARSET_ISO_8859_6,
-  "iso-ir-127": CHARSET_ISO_8859_6,
-  "iso8859-6": CHARSET_ISO_8859_6,
-  "iso88596": CHARSET_ISO_8859_6,
-  "iso_8859-6": CHARSET_ISO_8859_6,
-  "iso_8859-6:1987": CHARSET_ISO_8859_6,
-  # ISO-8859-7
-  "csisolatingreek": CHARSET_ISO_8859_7,
-  "ecma-118": CHARSET_ISO_8859_7,
-  "elot_928": CHARSET_ISO_8859_7,
-  "greek": CHARSET_ISO_8859_7,
-  "greek8": CHARSET_ISO_8859_7,
-  "iso-8859-7": CHARSET_ISO_8859_7,
-  "iso-ir-126": CHARSET_ISO_8859_7,
-  "iso8859-7": CHARSET_ISO_8859_7,
-  "iso88597": CHARSET_ISO_8859_7,
-  "iso_8859-7": CHARSET_ISO_8859_7,
-  "iso_8859-7:1987": CHARSET_ISO_8859_7,
-  "sun_eu_greek": CHARSET_ISO_8859_7,
-  # ISO-8859-8
-  "csiso88598e": CHARSET_ISO_8859_8,
-  "csisolatinhebrew": CHARSET_ISO_8859_8,
-  "hebrew": CHARSET_ISO_8859_8,
-  "iso-8859-8": CHARSET_ISO_8859_8,
-  "iso-8859-8-e": CHARSET_ISO_8859_8,
-  "iso-ir-138": CHARSET_ISO_8859_8,
-  "iso8859-8": CHARSET_ISO_8859_8,
-  "iso88598": CHARSET_ISO_8859_8,
-  "iso_8859-8": CHARSET_ISO_8859_8,
-  "iso_8859-8:1988": CHARSET_ISO_8859_8,
-  "visual": CHARSET_ISO_8859_8,
-  # ISO-8859-8-I
-  "csiso88598i": CHARSET_ISO_8859_8_I,
-  "iso-8859-8-i": CHARSET_ISO_8859_8_I,
-  "logical": CHARSET_ISO_8859_8_I,
-  # ISO-8859-10
-  "csisolatin6": CHARSET_ISO_8859_10,
-  "iso-8859-10": CHARSET_ISO_8859_10,
-  "iso-ir-157": CHARSET_ISO_8859_10,
-  "iso8859-10": CHARSET_ISO_8859_10,
-  "iso885910": CHARSET_ISO_8859_10,
-  "l6": CHARSET_ISO_8859_10,
-  "latin6": CHARSET_ISO_8859_10,
-  # ISO-8859-13
-  "iso-8859-13": CHARSET_ISO_8859_13,
-  "iso8859-13": CHARSET_ISO_8859_13,
-  "iso885913": CHARSET_ISO_8859_13,
-  # ISO-8859-14
-  "iso-8859-14": CHARSET_ISO_8859_14,
-  "iso8859-14": CHARSET_ISO_8859_14,
-  "iso885914": CHARSET_ISO_8859_14,
-  # ISO-8859-15
-  "csisolatin9": CHARSET_ISO_8859_15,
-  "iso-8859-15": CHARSET_ISO_8859_15,
-  "iso8859-15": CHARSET_ISO_8859_15,
-  "iso885915": CHARSET_ISO_8859_15,
-  "iso_8859-15": CHARSET_ISO_8859_15,
-  "l9": CHARSET_ISO_8859_15,
-  # ISO-8859-16
-  "iso-8859-16": CHARSET_ISO_8859_16,
-  # KOI8-R
-  "cskoi8r": CHARSET_KOI8_R,
-  "koi": CHARSET_KOI8_R,
-  "koi8": CHARSET_KOI8_R,
-  "koi8-r": CHARSET_KOI8_R,
-  "koi8_r": CHARSET_KOI8_R,
-  # KOI8-U
-  "koi8-ru": CHARSET_KOI8_U,
-  "koi8-u": CHARSET_KOI8_U,
-  # macintosh
-  "csmacintosh": CHARSET_MACINTOSH,
-  "mac": CHARSET_MACINTOSH,
-  "macintosh": CHARSET_MACINTOSH,
-  "x-mac-roman": CHARSET_MACINTOSH,
-  # windows-874
-  "dos-874": CHARSET_WINDOWS_874,
-  "iso-8859-11": CHARSET_WINDOWS_874,
-  "iso8859-11": CHARSET_WINDOWS_874,
-  "iso885911": CHARSET_WINDOWS_874,
-  "tis-620": CHARSET_WINDOWS_874,
-  "windows-874": CHARSET_WINDOWS_874,
-  # windows-1250
-  "cp1250": CHARSET_WINDOWS_1250,
-  "windows-1250": CHARSET_WINDOWS_1250,
-  "x-cp1250" : CHARSET_WINDOWS_1250,
-  # windows-1251
-  "cp1251": CHARSET_WINDOWS_1251,
-  "windows-1251": CHARSET_WINDOWS_1251,
-  "x-cp1251": CHARSET_WINDOWS_1251,
-  # windows-1252
-  "ansi_x3.4-1968": CHARSET_WINDOWS_1252,
-  "ascii": CHARSET_WINDOWS_1252, # lol
-  "cp1252": CHARSET_WINDOWS_1252,
-  "cp819": CHARSET_WINDOWS_1252,
-  "csisolatin1": CHARSET_WINDOWS_1252,
-  "ibm819": CHARSET_WINDOWS_1252,
-  "iso-8859-1": CHARSET_WINDOWS_1252,
-  "iso88591": CHARSET_WINDOWS_1252,
-  "iso_8859-1:1987": CHARSET_WINDOWS_1252,
-  "l1": CHARSET_WINDOWS_1252,
-  "latin1": CHARSET_WINDOWS_1252,
-  "us-ascii": CHARSET_WINDOWS_1252,
-  "windows-1252": CHARSET_WINDOWS_1252,
-  "x-cp1252": CHARSET_WINDOWS_1252,
-  # windows-1253
-  "cp1253": CHARSET_WINDOWS_1253,
-  "windows-1253": CHARSET_WINDOWS_1253,
-  "x-cp1253": CHARSET_WINDOWS_1253,
-  # windows-1254
-  "cp1254": CHARSET_WINDOWS_1254,
-  "csisolatin5": CHARSET_WINDOWS_1254,
-  "iso-8859-9": CHARSET_WINDOWS_1254,
-  "iso-ir-148": CHARSET_WINDOWS_1254,
-  "iso8859-9": CHARSET_WINDOWS_1254,
-  "iso88599": CHARSET_WINDOWS_1254,
-  "iso_8859-9": CHARSET_WINDOWS_1254,
-  "iso_8859-9:1989": CHARSET_WINDOWS_1254,
-  "l5": CHARSET_WINDOWS_1254,
-  "latin5": CHARSET_WINDOWS_1254,
-  "windows-1254": CHARSET_WINDOWS_1254,
-  "x-cp1254": CHARSET_WINDOWS_1254,
-  # windows-1255
-  "cp1255": CHARSET_WINDOWS_1255,
-  "windows-1255": CHARSET_WINDOWS_1255,
-  "x-cp1255": CHARSET_WINDOWS_1255,
-  # windows-1256
-  "cp1256": CHARSET_WINDOWS_1256,
-  "windows-1256": CHARSET_WINDOWS_1256,
-  "x-cp1256": CHARSET_WINDOWS_1256,
-  # windows-1257
-  "cp1257": CHARSET_WINDOWS_1257,
-  "windows-1257": CHARSET_WINDOWS_1257,
-  "x-cp1257": CHARSET_WINDOWS_1257,
-  # windows-1258
-  "cp1258": CHARSET_WINDOWS_1258,
-  "windows-1258": CHARSET_WINDOWS_1258,
-  "x-cp1258": CHARSET_WINDOWS_1258,
-  # x-mac-cyrillic
-  "x-mac-cyrillic": CHARSET_X_MAC_CYRILLIC,
-  "x-mac-ukrainian": CHARSET_X_MAC_CYRILLIC,
-  # GBK
-  "chinese": CHARSET_GBK,
-  "csgb2312": CHARSET_GBK,
-  "csiso58gb231280": CHARSET_GBK,
-  "gb2312": CHARSET_GBK,
-  "gb_2312": CHARSET_GBK,
-  "gb_2312-80": CHARSET_GBK,
-  "gbk": CHARSET_GBK,
-  "iso-ir-58": CHARSET_GBK,
-  "x-gbk": CHARSET_GBK,
-  # gb18030
-  "gb18030": CHARSET_GB18030,
-  # Big5
-  "big5": CHARSET_BIG5,
-  "big5-hkscs": CHARSET_BIG5,
-  "cn-big5": CHARSET_BIG5,
-  "csbig5": CHARSET_BIG5,
-  "x-x-big5": CHARSET_BIG5,
-  # EUC-JP
-  "cseucpkdfmtjapanese": CHARSET_EUC_JP,
-  "euc-jp": CHARSET_EUC_JP,
-  "x-euc-jp": CHARSET_EUC_JP,
-  # ISO-2022-JP (ugh)
-  "csiso2022jp": CHARSET_ISO_2022_JP,
-  "iso-2022-jp": CHARSET_ISO_2022_JP,
-  # Shift_JIS
-  "csshiftjis": CHARSET_SHIFT_JIS,
-  "ms932": CHARSET_SHIFT_JIS,
-  "ms_kanji": CHARSET_SHIFT_JIS,
-  "shift-jis": CHARSET_SHIFT_JIS,
-  "shift_jis": CHARSET_SHIFT_JIS,
-  "sjis": CHARSET_SHIFT_JIS,
-  "windows-31j": CHARSET_SHIFT_JIS,
-  "x-sjis": CHARSET_SHIFT_JIS,
-  # EUC-KR
-  "cseuckr": CHARSET_EUC_KR,
-  "csksc56011987": CHARSET_EUC_KR,
-  "euc-kr": CHARSET_EUC_KR,
-  "iso-ir-149": CHARSET_EUC_KR,
-  "korean": CHARSET_EUC_KR,
-  "ks_c_5601-1987": CHARSET_EUC_KR,
-  "ks_c_5601-1989": CHARSET_EUC_KR,
-  "ksc5601": CHARSET_EUC_KR,
-  "ksc_5601": CHARSET_EUC_KR,
-  "windows-949": CHARSET_EUC_KR,
-  # replacement
-  "csiso2022kr": CHARSET_REPLACEMENT,
-  "hz-gb-2312": CHARSET_REPLACEMENT,
-  "iso-2022-cn": CHARSET_REPLACEMENT,
-  "iso-2022-cn-ext": CHARSET_REPLACEMENT,
-  "iso-2022-kr": CHARSET_REPLACEMENT,
-  "replacement": CHARSET_REPLACEMENT,
-  # UTF-16BE
-  "unicodefffe": CHARSET_UTF_16_BE,
-  "utf-16be": CHARSET_UTF_16_BE,
-  # UTF-16LE
-  "csunicode": CHARSET_UTF_16_LE,
-  "iso-10646-ucs-2": CHARSET_UTF_16_LE,
-  "ucs-2": CHARSET_UTF_16_LE,
-  "unicode": CHARSET_UTF_16_LE,
-  "unicodefeff": CHARSET_UTF_16_LE,
-  "utf-16": CHARSET_UTF_16_LE,
-  "utf-16le": CHARSET_UTF_16_LE,
-  # x-user-defined
-  "x-user-defined": CHARSET_X_USER_DEFINED
-}.toTable()
-
-const NormalizedCharsetMap = (func(): Table[string, Charset] =
-  for k, v in CharsetMap:
-    result[k.normalizeLocale()] = v)()
-
-const DefaultCharset* = CHARSET_UTF_8
-
-proc getCharset*(s: string): Charset =
-  return CharsetMap.getOrDefault(s.strip().toLower(), CHARSET_UNKNOWN)
-
-proc getLocaleCharset*(s: string): Charset =
-  let ss = s.after('.')
-  if ss != "":
-    return NormalizedCharsetMap.getOrDefault(ss.normalizeLocale(),
-      CHARSET_UNKNOWN)
-  # We could try to guess the charset based on the language here, like w3m
-  # does.
-  # However, these days it is more likely for any system to be using UTF-8
-  # than any other charset, irrespective of the language. So we just assume
-  # UTF-8.
-  return DefaultCharset
-
-iterator mappairs(path: string): tuple[a, b: int] =
-  let s = staticRead(path)
-  for line in s.split('\n'):
-    if line.len == 0 or line[0] == '#': continue
-    var i = 0
-    while line[i] == ' ': inc i
-    var j = i
-    while i < line.len and line[i] in '0'..'9': inc i
-    let index = parseInt(line.substr(j, i - 1))
-    inc i # tab
-    j = i
-    while i < line.len and line[i] in {'0'..'9', 'A'..'F', 'x'}: inc i
-    let n = parseHexInt(line.substr(j, i - 1))
-    yield (index, n)
-
-# I'm pretty sure single-byte encodings map to ucs-2.
-func loadCharsetMap8(path: string): tuple[
-      decode: array[char, uint16],
-      encode: seq[
-        tuple[
-          ucs: uint16,
-          val: char
-        ]
-      ],
-    ] =
-  var m: int
-  for index, n in mappairs("res/map" / path):
-    result.decode[char(index)] = uint16(n)
-    if index > m: m = index
-  for index in low(char) .. char(m):
-    let val = result.decode[index] 
-    if val != 0u16:
-      result.encode.add((val, index))
-  result.encode.sort()
-
-func loadCharsetMap8Encode(path: string): seq[tuple[ucs: uint16, val: char]] =
-  for index, n in mappairs("res/map" / path):
-    result.add((uint16(n), char(index)))
-  result.sort()
-
-func loadGb18030Ranges(path: string): tuple[
-        decode: seq[
-          tuple[
-            p: uint16,
-            ucs: uint16 ]],
-        encode: seq[
-          tuple[
-            ucs: uint16,
-            p: uint16 ]]] =
-  for index, n in mappairs("res/map" / path):
-    if uint32(index) > uint32(high(uint16)): break
-    result.decode.add((uint16(index), uint16(n)))
-    result.encode.add((uint16(n), uint16(index)))
-  result.encode.sort()
-
-type UCS16x16* = tuple[ucs, p: uint16]
-
-func loadCharsetMap16(path: string, len: static uint16): tuple[
-        decode: array[len, uint16],
-        encode: seq[UCS16x16]] =
-  for index, n in mappairs("res/map" / path):
-    result.decode[uint16(index)] = uint16(n)
-    result.encode.add((uint16(n), uint16(index)))
-  result.encode.sort()
-
-func loadCharsetMapSJIS(path: string): seq[UCS16x16] =
-  for index, n in mappairs("res/map" / path):
-    if n notin 8272..8835:
-      result.add((uint16(n), uint16(index)))
-  result.sort()
-
-type UCS32x16* = tuple[ucs: uint32, p: uint16]
-
-func loadBig5Map(path: string, offset: static uint16): tuple[
-        decode: array[19782u16 - offset, uint32], # ouch (+75KB...)
-        encode: seq[UCS32x16]] =
-  for index, n in mappairs("res/map" / path):
-    result.decode[uint16(index) - offset] = uint32(n)
-    result.encode.add((uint32(n), uint16(index)))
-  #for i in result.decode: assert x != 0 # fail
-  result.encode.sort()
-
-const (IBM866Decode*, IBM866Encode*) = loadCharsetMap8("index-ibm866.txt")
-const (ISO88592Decode*, ISO88592Encode*) = loadCharsetMap8("index-iso-8859-2.txt")
-const (ISO88593Decode*, ISO88593Encode*) = loadCharsetMap8("index-iso-8859-3.txt")
-const (ISO88594Decode*, ISO88594Encode*) = loadCharsetMap8("index-iso-8859-4.txt")
-const (ISO88595Decode*, ISO88595Encode*) = loadCharsetMap8("index-iso-8859-5.txt")
-const (ISO88596Decode*, ISO88596Encode*) = loadCharsetMap8("index-iso-8859-6.txt")
-const (ISO88597Decode*, ISO88597Encode*) = loadCharsetMap8("index-iso-8859-7.txt")
-const (ISO88598Decode*, ISO88598Encode*) = loadCharsetMap8("index-iso-8859-8.txt")
-const (ISO885910Decode*, ISO885910Encode*) = loadCharsetMap8("index-iso-8859-10.txt")
-const (ISO885913Decode*, ISO885913Encode*) = loadCharsetMap8("index-iso-8859-13.txt")
-const (ISO885914Decode*, ISO885914Encode*) = loadCharsetMap8("index-iso-8859-14.txt")
-const (ISO885915Decode*, ISO885915Encode*) = loadCharsetMap8("index-iso-8859-15.txt")
-const (ISO885916Decode*, ISO885916Encode*) = loadCharsetMap8("index-iso-8859-16.txt")
-const (KOI8RDecode*, KOI8REncode*) = loadCharsetMap8("index-koi8-r.txt")
-const (KOI8UDecode*, KOI8UEncode*) = loadCharsetMap8("index-koi8-u.txt")
-const (MacintoshDecode*, MacintoshEncode*) = loadCharsetMap8("index-macintosh.txt")
-const (Windows874Decode*, Windows874Encode*) = loadCharsetMap8("index-windows-874.txt")
-const (Windows1250Decode*, Windows1250Encode*) = loadCharsetMap8("index-windows-1250.txt")
-const (Windows1251Decode*, Windows1251Encode*) = loadCharsetMap8("index-windows-1251.txt")
-const (Windows1252Decode*, Windows1252Encode*) = loadCharsetMap8("index-windows-1252.txt")
-const (Windows1253Decode*, Windows1253Encode*) = loadCharsetMap8("index-windows-1253.txt")
-const (Windows1254Decode*, Windows1254Encode*) = loadCharsetMap8("index-windows-1254.txt")
-const (Windows1255Decode*, Windows1255Encode*) = loadCharsetMap8("index-windows-1255.txt")
-const (Windows1256Decode*, Windows1256Encode*) = loadCharsetMap8("index-windows-1256.txt")
-const (Windows1257Decode*, Windows1257Encode*) = loadCharsetMap8("index-windows-1257.txt")
-const (Windows1258Decode*, Windows1258Encode*) = loadCharsetMap8("index-windows-1258.txt")
-const (XMacCyrillicDecode*, XMacCyrillicEncode*) = loadCharsetMap8("index-x-mac-cyrillic.txt")
-const (Gb18030RangesDecode*, Gb18030RangesEncode*) = loadGb18030Ranges("index-gb18030-ranges.txt")
-const (Gb18030Decode*, Gb18030Encode*) = loadCharsetMap16("index-gb18030.txt", len = 23940)
-#for x in Gb18030Decode: assert x != 0 # success
-const Big5DecodeOffset* = 942
-const (Big5Decode*, Big5Encode*) = loadBig5Map("index-big5.txt", offset = Big5DecodeOffset)
-const (Jis0208Decode*, Jis0208Encode*) = loadCharsetMap16("index-jis0208.txt", len = 11104)
-const ShiftJISEncode* = loadCharsetMapSJIS("index-jis0208.txt")
-const (Jis0212Decode*, Jis0212Encode*) = loadCharsetMap16("index-jis0212.txt", len = 7211)
-const ISO2022JPKatakanaEncode* = loadCharsetMap8Encode("index-iso-2022-jp-katakana.txt")
-const (EUCKRDecode*, EUCKREncode*) = loadCharsetMap16("index-euc-kr.txt", len = 23750)