about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorbptato <nincsnevem662@gmail.com>2024-01-04 18:01:35 +0100
committerbptato <nincsnevem662@gmail.com>2024-01-04 18:01:35 +0100
commitc9337674db699951918225cddc56fcf75f313a15 (patch)
tree1273d92050a20c8b093c27aca748ba654a4d1733 /src
parentc7b46f6f08d38b48af8583adbb3ae288a1f2135d (diff)
downloadchawan-c9337674db699951918225cddc56fcf75f313a15.tar.gz
idna: use pre-generated map file, misc changes
Speeds up compilation somewhat. Included in the repository because
it's not that huge.

misc changes:
* use seq, not set for UCS-16 sets (it takes up less space)
* remove unnecessary noSideEffects casts
Diffstat (limited to 'src')
-rw-r--r--src/data/idna.nim212
1 files changed, 38 insertions, 174 deletions
diff --git a/src/data/idna.nim b/src/data/idna.nim
index a067a66f..e78bb7d8 100644
--- a/src/data/idna.nim
+++ b/src/data/idna.nim
@@ -1,187 +1,51 @@
-import algorithm
-import unicode
-import sets
-import tables
-import strutils
+import std/unicode
 
 import utils/map
 
+include res/map/idna_gen
+
 type
   IDNATableStatus* = enum
     IDNA_VALID, IDNA_IGNORED, IDNA_MAPPED, IDNA_DEVIATION, IDNA_DISALLOWED
 
-type
-  LowMap[T] = seq[(uint16, T)]
-  FullMap[T] = (LowMap[T], seq[(uint32, T)])
-  FullRangeList = (seq[(uint16, uint16)], seq[(uint32, uint32)])
-  FullSet = (set[uint16], HashSet[uint32])
-
-const IdnaMappingTable = staticRead"res/map/IdnaMappingTable.txt"
-
-func loadStuff(s: string): (FullMap[cstring], # Map
-                            FullRangeList, # Disallowed Ranges
-                            FullSet, # Disallowed
-                            FullSet, # Ignored
-                            LowMap[cstring]) = # Deviation
-  template add_map(i: uint32, str: string) =
-    if i <= high(uint16):
-      result[0][0].add((uint16(i), cstring(str)))
-    else:
-      result[0][1].add((i, cstring(str)))
-  template add_disallow(i, j: uint32) =
-    if i <= high(uint16):
-      result[1][0].add((uint16(i), uint16(j)))
-    else:
-      result[1][1].add((i, j))
-  template add_disallow(i: uint32) =
-    if i <= high(uint16):
-      result[2][0].incl(uint16(i))
-    else:
-      result[2][1].incl(i)
-  template add_ignore(rstart, rend: uint32) =
-    for i in rstart..rend:
-      if i <= high(uint16):
-        result[3][0].incl(uint16(i))
-      else:
-        result[3][1].incl(i)
-  template add_ignore(i: uint32) =
-    if i <= high(uint16):
-      result[3][0].incl(uint16(i))
-    else:
-      result[3][1].incl(i)
-  template add_deviation(i: uint32, str: string) =
-    if i <= high(uint16):
-      result[4].add((uint16(i), cstring(str)))
-    else:
-      assert false
-  template add(firstcol: string, str: string, temp: untyped) =
-    if firstcol.contains(".."):
-      let fcs = firstcol.split("..")
-      let rstart = uint32(parseHexInt(fcs[0]))
-      let rend = uint32(parseHexInt(fcs[1]))
-      for i in rstart..rend:
-        temp(i, str)
-    else:
-      temp(uint32(parseHexInt(firstcol)), str)
-  template add(firstcol: string, temp: untyped) =
-    if firstcol.contains(".."):
-      let fcs = firstcol.split("..")
-      let rstart = uint32(parseHexInt(fcs[0]))
-      let rend = uint32(parseHexInt(fcs[1]))
-      temp(rstart, rend)
-    else:
-      temp(uint32(parseHexInt(firstcol)))
-
-  for line in s.split('\n'):
-    if line.len == 0 or line[0] == '#':
-      continue
-    var i = 0
-    var firstcol = ""
-    var status = ""
-    var thirdcol: seq[string]
-    var fourthcol = ""
-
-    while i < line.len and line[i] notin {'#', ';'}:
-      if line[i] != ' ':
-        firstcol &= line[i]
-      inc i
-    if line[i] != '#':
-      inc i
-
-    while i < line.len and line[i] notin {'#', ';'}:
-      if line[i] != ' ':
-        status &= line[i]
-      inc i
-    if line[i] != '#':
-      inc i
-
-    var nw = true
-    while i < line.len and line[i] notin {'#', ';'}:
-      if line[i] == ' ':
-        nw = true
-      else:
-        if nw:
-          thirdcol.add("")
-          nw = false
-        thirdcol[^1] &= line[i]
-      inc i
-    if line[i] != '#':
-      inc i
-
-    while i < line.len and line[i] notin {'#', ';'}:
-      if line[i] != ' ':
-        fourthcol &= line[i]
-      inc i
-
-    case status
-    of "mapped", "disallowed_STD3_mapped":
-      let codepoints = thirdcol
-      var str = ""
-      for code in codepoints:
-        str &= Rune(parseHexInt(code))
-
-      add(firstcol, str, add_map)
-    of "deviation":
-      let codepoints = thirdcol
-      var str = ""
-      for code in codepoints:
-        str &= Rune(parseHexInt(code))
-
-      add(firstcol, str, add_deviation)
-    of "valid":
-      if fourthcol == "NV8" or fourthcol == "XV8":
-        add(firstcol, add_disallow)
-    of "disallowed":
-      add(firstcol, add_disallow)
-    of "ignored":
-      add(firstcol, add_ignore)
-
-const (MappedMap,
-       DisallowedRanges,
-       Disallowed,
-       Ignored,
-       Deviation) = loadStuff(IdnaMappingTable)
-
 func getIdnaTableStatus*(r: Rune): IDNATableStatus =
   let i = uint32(r)
-  {.cast(noSideEffect).}:
-    if i <= high(uint16):
-      let u = uint16(i)
-      if u in Ignored[0]:
-        return IDNA_IGNORED
-      if u in Disallowed[0]:
-        return IDNA_DISALLOWED
-      for item in Deviation:
-        if item[0] == u:
-          return IDNA_DEVIATION
-      if DisallowedRanges[0].isInRange(u):
-        return IDNA_DISALLOWED
-      if MappedMap[0].isInMap(u):
-        return IDNA_MAPPED
-    else:
-      if i in Ignored[1]:
-        return IDNA_IGNORED
-      if i in Disallowed[1]:
-        return IDNA_DISALLOWED
-      if DisallowedRanges[1].isInRange(i):
-        return IDNA_DISALLOWED
-      if MappedMap[1].isInMap(uint32(i)):
-        return IDNA_MAPPED
-    return IDNA_VALID
+  if i <= high(uint16):
+    let u = uint16(i)
+    if u in IgnoredLow:
+      return IDNA_IGNORED
+    if u in DisallowedLow:
+      return IDNA_DISALLOWED
+    for item in Deviation:
+      if item[0] == u:
+        return IDNA_DEVIATION
+    if DisallowedRangesLow.isInRange(u):
+      return IDNA_DISALLOWED
+    if MappedMapLow.isInMap(u):
+      return IDNA_MAPPED
+  else:
+    if i in IgnoredHigh:
+      return IDNA_IGNORED
+    if i in DisallowedHigh:
+      return IDNA_DISALLOWED
+    if DisallowedRangesHigh.isInRange(i):
+      return IDNA_DISALLOWED
+    if MappedMapHigh.isInMap(uint32(i)):
+      return IDNA_MAPPED
+  return IDNA_VALID
 
 func getIdnaMapped*(r: Rune): string =
-  {.cast(noSideEffect).}:
-    let i = uint32(r)
-    if i <= high(uint16):
-      let u = uint16(i)
-      let n = MappedMap[0].searchInMap(u)
-      if n != -1:
-        return $MappedMap[0][n][1]
-    let n = MappedMap[1].searchInMap(i)
-    return $MappedMap[1][n][1]
+  let i = uint32(r)
+  if i <= high(uint16):
+    let u = uint16(i)
+    let n = MappedMapLow.searchInMap(u)
+    if n != -1:
+      return $MappedMapLow[n].mapped
+  let n = MappedMapHigh.searchInMap(i)
+  return $MappedMapHigh[n].mapped
 
 func getDeviationMapped*(r: Rune): string =
-  {.cast(noSideEffect).}:
-    for item in Deviation:
-      if item[0] == uint16(r):
-        return $item[1]
+  for item in Deviation:
+    if item[0] == uint16(r):
+      return $item[1]
+  return ""