about summary refs log tree commit diff stats
path: root/res/genidna.nim
diff options
context:
space:
mode:
authorbptato <nincsnevem662@gmail.com>2024-09-01 16:27:39 +0200
committerbptato <nincsnevem662@gmail.com>2024-09-01 16:27:39 +0200
commitd1ff49b0fc39171859cfa781f459a2e0242c2e79 (patch)
tree0d0853eda744e0a8c00ba255a48603642dad0788 /res/genidna.nim
parente0a9cec50023bc21c443b51433b486cbe8e52e53 (diff)
downloadchawan-d1ff49b0fc39171859cfa781f459a2e0242c2e79.tar.gz
url: reduce idna size
Instead of storing pointers to cstrings, put all of them in a single
huge string and store indices of that.
Diffstat (limited to 'res/genidna.nim')
-rw-r--r--res/genidna.nim34
1 files changed, 26 insertions, 8 deletions
diff --git a/res/genidna.nim b/res/genidna.nim
index 8aed63b2..4ae9d34c 100644
--- a/res/genidna.nim
+++ b/res/genidna.nim
@@ -1,6 +1,8 @@
+import std/algorithm
 import std/sets
 import std/streams
 import std/strutils
+import std/tables
 import std/unicode
 
 type
@@ -9,8 +11,9 @@ type
   FullRangeList = tuple[lm: seq[(uint16, uint16)], hm: seq[(uint32, uint32)]]
   FullSet = tuple[lm: seq[uint16], hm: seq[uint32]]
 
-var MappedMapLow: LowMap
-var MappedMapHigh: HighMap
+var MappedMapLow: LowMap = @[]
+var MappedMapHigh: HighMap = @[]
+var MappedMapStrings: seq[string] = @[]
 var DisallowedRanges: FullRangeList
 var Disallowed: FullSet
 var Ignored: FullSet
@@ -21,6 +24,7 @@ proc loadIdnaData() =
       MappedMapLow.add((uint16(i), str))
     else:
       MappedMapHigh.add((i, str))
+    MappedMapStrings.add(str)
   template add_disallow(i, j: uint32) =
     if i <= high(uint16):
       DisallowedRanges.lm.add((uint16(i), uint16(j)))
@@ -140,22 +144,36 @@ proc flush(writer: var LineWriter) =
 proc main() =
   loadIdnaData()
   var writer = LineWriter(s: newFileStream(stdout))
-  echo "type Z = cstring"
-
   echo "const MappedMapLow: array[" & $MappedMapLow.len &
-    ", tuple[ucs: uint16, mapped: Z]] = ["
+    ", tuple[ucs, idx: uint16]] = ["
+  MappedMapStrings.sort(proc(a, b: string): int = cmp(a.len, b.len),
+    order = Descending)
+  var mdata = ""
+  var idxMap = initTable[string, int]()
+  for s in MappedMapStrings:
+    let s0 = s & '\0'
+    let i = mdata.find(s0)
+    if i != -1:
+      idxMap[s] = i
+    else:
+      idxMap[s] = mdata.len
+      mdata &= s0
   for (ucs, s) in MappedMapLow:
-    writer.write("(" & $ucs & "," & s.escape() & ".Z),")
+    writer.write("(" & $ucs & "," & $idxMap[s] & "),")
   writer.flush()
   echo "]"
   echo ""
   echo "const MappedMapHigh: array[" & $MappedMapHigh.len &
-    ", tuple[ucs: uint32, mapped: Z]] = ["
+    ", tuple[ucs: uint32; idx: uint16]] = ["
   for (ucs, s) in MappedMapHigh:
-    writer.write("(" & $ucs & "," & s.escape() & ".Z),")
+    writer.write("(" & $ucs & "," & $idxMap[s] & "),")
   writer.flush()
   echo "]"
   echo ""
+  stdout.write("const MappedMapData = ")
+  stdout.write(mdata.escape())
+  echo ""
+  echo ""
 
   echo "const DisallowedRangesLow: array[" & $DisallowedRanges.lm.len &
     ", tuple[ucs, mapped: uint16]] = ["