diff options
author | bptato <nincsnevem662@gmail.com> | 2024-01-04 18:01:35 +0100 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2024-01-04 18:01:35 +0100 |
commit | c9337674db699951918225cddc56fcf75f313a15 (patch) | |
tree | 1273d92050a20c8b093c27aca748ba654a4d1733 /src | |
parent | c7b46f6f08d38b48af8583adbb3ae288a1f2135d (diff) | |
download | chawan-c9337674db699951918225cddc56fcf75f313a15.tar.gz |
idna: use pre-generated map file, misc changes
Speeds up compilation somewhat. Included in the repository because it's not that huge. misc changes: * use seq, not set for UCS-16 sets (it takes up less space) * remove unnecessary noSideEffects casts
Diffstat (limited to 'src')
-rw-r--r-- | src/data/idna.nim | 212 |
1 files changed, 38 insertions, 174 deletions
diff --git a/src/data/idna.nim b/src/data/idna.nim index a067a66f..e78bb7d8 100644 --- a/src/data/idna.nim +++ b/src/data/idna.nim @@ -1,187 +1,51 @@ -import algorithm -import unicode -import sets -import tables -import strutils +import std/unicode import utils/map +include res/map/idna_gen + type IDNATableStatus* = enum IDNA_VALID, IDNA_IGNORED, IDNA_MAPPED, IDNA_DEVIATION, IDNA_DISALLOWED -type - LowMap[T] = seq[(uint16, T)] - FullMap[T] = (LowMap[T], seq[(uint32, T)]) - FullRangeList = (seq[(uint16, uint16)], seq[(uint32, uint32)]) - FullSet = (set[uint16], HashSet[uint32]) - -const IdnaMappingTable = staticRead"res/map/IdnaMappingTable.txt" - -func loadStuff(s: string): (FullMap[cstring], # Map - FullRangeList, # Disallowed Ranges - FullSet, # Disallowed - FullSet, # Ignored - LowMap[cstring]) = # Deviation - template add_map(i: uint32, str: string) = - if i <= high(uint16): - result[0][0].add((uint16(i), cstring(str))) - else: - result[0][1].add((i, cstring(str))) - template add_disallow(i, j: uint32) = - if i <= high(uint16): - result[1][0].add((uint16(i), uint16(j))) - else: - result[1][1].add((i, j)) - template add_disallow(i: uint32) = - if i <= high(uint16): - result[2][0].incl(uint16(i)) - else: - result[2][1].incl(i) - template add_ignore(rstart, rend: uint32) = - for i in rstart..rend: - if i <= high(uint16): - result[3][0].incl(uint16(i)) - else: - result[3][1].incl(i) - template add_ignore(i: uint32) = - if i <= high(uint16): - result[3][0].incl(uint16(i)) - else: - result[3][1].incl(i) - template add_deviation(i: uint32, str: string) = - if i <= high(uint16): - result[4].add((uint16(i), cstring(str))) - else: - assert false - template add(firstcol: string, str: string, temp: untyped) = - if firstcol.contains(".."): - let fcs = firstcol.split("..") - let rstart = uint32(parseHexInt(fcs[0])) - let rend = uint32(parseHexInt(fcs[1])) - for i in rstart..rend: - temp(i, str) - else: - temp(uint32(parseHexInt(firstcol)), str) - template add(firstcol: string, temp: untyped) = - if firstcol.contains(".."): - let fcs = firstcol.split("..") - let rstart = uint32(parseHexInt(fcs[0])) - let rend = uint32(parseHexInt(fcs[1])) - temp(rstart, rend) - else: - temp(uint32(parseHexInt(firstcol))) - - for line in s.split('\n'): - if line.len == 0 or line[0] == '#': - continue - var i = 0 - var firstcol = "" - var status = "" - var thirdcol: seq[string] - var fourthcol = "" - - while i < line.len and line[i] notin {'#', ';'}: - if line[i] != ' ': - firstcol &= line[i] - inc i - if line[i] != '#': - inc i - - while i < line.len and line[i] notin {'#', ';'}: - if line[i] != ' ': - status &= line[i] - inc i - if line[i] != '#': - inc i - - var nw = true - while i < line.len and line[i] notin {'#', ';'}: - if line[i] == ' ': - nw = true - else: - if nw: - thirdcol.add("") - nw = false - thirdcol[^1] &= line[i] - inc i - if line[i] != '#': - inc i - - while i < line.len and line[i] notin {'#', ';'}: - if line[i] != ' ': - fourthcol &= line[i] - inc i - - case status - of "mapped", "disallowed_STD3_mapped": - let codepoints = thirdcol - var str = "" - for code in codepoints: - str &= Rune(parseHexInt(code)) - - add(firstcol, str, add_map) - of "deviation": - let codepoints = thirdcol - var str = "" - for code in codepoints: - str &= Rune(parseHexInt(code)) - - add(firstcol, str, add_deviation) - of "valid": - if fourthcol == "NV8" or fourthcol == "XV8": - add(firstcol, add_disallow) - of "disallowed": - add(firstcol, add_disallow) - of "ignored": - add(firstcol, add_ignore) - -const (MappedMap, - DisallowedRanges, - Disallowed, - Ignored, - Deviation) = loadStuff(IdnaMappingTable) - func getIdnaTableStatus*(r: Rune): IDNATableStatus = let i = uint32(r) - {.cast(noSideEffect).}: - if i <= high(uint16): - let u = uint16(i) - if u in Ignored[0]: - return IDNA_IGNORED - if u in Disallowed[0]: - return IDNA_DISALLOWED - for item in Deviation: - if item[0] == u: - return IDNA_DEVIATION - if DisallowedRanges[0].isInRange(u): - return IDNA_DISALLOWED - if MappedMap[0].isInMap(u): - return IDNA_MAPPED - else: - if i in Ignored[1]: - return IDNA_IGNORED - if i in Disallowed[1]: - return IDNA_DISALLOWED - if DisallowedRanges[1].isInRange(i): - return IDNA_DISALLOWED - if MappedMap[1].isInMap(uint32(i)): - return IDNA_MAPPED - return IDNA_VALID + if i <= high(uint16): + let u = uint16(i) + if u in IgnoredLow: + return IDNA_IGNORED + if u in DisallowedLow: + return IDNA_DISALLOWED + for item in Deviation: + if item[0] == u: + return IDNA_DEVIATION + if DisallowedRangesLow.isInRange(u): + return IDNA_DISALLOWED + if MappedMapLow.isInMap(u): + return IDNA_MAPPED + else: + if i in IgnoredHigh: + return IDNA_IGNORED + if i in DisallowedHigh: + return IDNA_DISALLOWED + if DisallowedRangesHigh.isInRange(i): + return IDNA_DISALLOWED + if MappedMapHigh.isInMap(uint32(i)): + return IDNA_MAPPED + return IDNA_VALID func getIdnaMapped*(r: Rune): string = - {.cast(noSideEffect).}: - let i = uint32(r) - if i <= high(uint16): - let u = uint16(i) - let n = MappedMap[0].searchInMap(u) - if n != -1: - return $MappedMap[0][n][1] - let n = MappedMap[1].searchInMap(i) - return $MappedMap[1][n][1] + let i = uint32(r) + if i <= high(uint16): + let u = uint16(i) + let n = MappedMapLow.searchInMap(u) + if n != -1: + return $MappedMapLow[n].mapped + let n = MappedMapHigh.searchInMap(i) + return $MappedMapHigh[n].mapped func getDeviationMapped*(r: Rune): string = - {.cast(noSideEffect).}: - for item in Deviation: - if item[0] == uint16(r): - return $item[1] + for item in Deviation: + if item[0] == uint16(r): + return $item[1] + return "" |