diff options
author | bptato <nincsnevem662@gmail.com> | 2021-12-29 02:25:40 +0100 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2021-12-29 02:25:40 +0100 |
commit | 53f9d91dd6377c4f689e2b237d4a25c33c1d9e16 (patch) | |
tree | 0bd48aeea9c8b0f4ddf0531a8f3a7df02f6124e5 /src/data | |
parent | b7f19942e3d0a4cf25981c1686ffdb200ec4fd6f (diff) | |
download | chawan-53f9d91dd6377c4f689e2b237d4a25c33c1d9e16.tar.gz |
Add punycode support
Diffstat (limited to 'src/data')
-rw-r--r-- | src/data/idna.nim | 212 |
1 files changed, 212 insertions, 0 deletions
diff --git a/src/data/idna.nim b/src/data/idna.nim new file mode 100644 index 00000000..5bd38ee4 --- /dev/null +++ b/src/data/idna.nim @@ -0,0 +1,212 @@ +import algorithm +import unicode +import sets +import tables +import sugar +import strutils + +type IDNATableStatus* = enum + IDNA_VALID, IDNA_IGNORED, IDNA_MAPPED, IDNA_DEVIATION, IDNA_DISALLOWED + +const IdnaMappingTable = staticRead"res/IdnaMappingTable.txt" + +func loadStuff(s: string): (seq[(uint16, cstring)], seq[(int, cstring)], + seq[(uint16, uint16)], seq[(int, int)], + set[uint16], HashSet[int], + set[uint16], HashSet[int], + seq[(uint16, cstring)]) = + template add_map(i: int, str: string) = + if cast[uint](i) <= high(uint16): + result[0].add((cast[uint16](i), cstring(str))) + else: + result[1].add((i, cstring(str))) + template add_disallow(i, j: int) = + if cast[uint](i) <= high(uint16): + result[2].add((cast[uint16](i), cast[uint16](j))) + else: + result[3].add((i, j)) + template add_disallow(i: int) = + if cast[uint](i) <= high(uint16): + result[4].incl(cast[uint16](i)) + else: + result[5].incl(i) + template add_ignore(i: int) = + if cast[uint](i) <= high(uint16): + result[6].incl(cast[uint16](i)) + else: + result[7].incl(i) + template add_deviation(i: int, str: string) = + if cast[uint](i) <= high(uint16): + result[8].add((cast[uint16](i), cstring(str))) + else: + assert false + + for line in s.split('\n'): + if line.len == 0 or line[0] == '#': + continue + var i = 0 + var firstcol = "" + var status = "" + var thirdcol: seq[string] + var fourthcol = "" + + while i < line.len and line[i] notin {'#', ';'}: + if line[i] != ' ': + firstcol &= line[i] + inc i + if line[i] != '#': + inc i + + while i < line.len and line[i] notin {'#', ';'}: + if line[i] != ' ': + status &= line[i] + inc i + if line[i] != '#': + inc i + + var nw = true + while i < line.len and line[i] notin {'#', ';'}: + if line[i] == ' ': + nw = true + else: + if nw: + thirdcol.add("") + nw = false + thirdcol[^1] &= line[i] + inc i + if line[i] != '#': + inc i + + while i < line.len and line[i] notin {'#', ';'}: + if line[i] != ' ': + fourthcol &= line[i] + inc i + + case status + of "mapped", "disallowed_STD3_mapped": + let codepoints = thirdcol + var str = "" + for code in codepoints: + str &= Rune(parseHexInt(code)) + + if firstcol.contains(".."): + let fcs = firstcol.split("..") + let rstart = parseHexInt(fcs[0]) + let rend = parseHexInt(fcs[1]) + for i in rstart..rend: + add_map(i, str) + else: + add_map(parseHexInt(firstcol), str) + of "deviation": + let codepoints = thirdcol + var str = "" + for code in codepoints: + str &= Rune(parseHexInt(code)) + if firstcol.contains(".."): + let fcs = firstcol.split("..") + let rstart = parseHexInt(fcs[0]) + let rend = parseHexInt(fcs[1]) + for i in rstart..rend: + add_deviation(i, str) + else: + add_deviation(parseHexInt(firstcol), str) + of "valid": + if fourthcol == "NV8" or fourthcol == "XV8": + if firstcol.contains(".."): + let fcs = firstcol.split("..") + let rstart = parseHexInt(fcs[0]) + let rend = parseHexInt(fcs[1]) + add_disallow(rstart, rend) + else: + add_disallow(parseHexInt(firstcol)) + of "disallowed": + if firstcol.contains(".."): + let fcs = firstcol.split("..") + let rstart = parseHexInt(fcs[0]) + let rend = parseHexInt(fcs[1]) + add_disallow(rstart, rend) + else: + add_disallow(parseHexInt(firstcol)) + of "ignored": + if firstcol.contains(".."): + let fcs = firstcol.split("..") + let rstart = parseHexInt(fcs[0]) + let rend = parseHexInt(fcs[1]) + for i in rstart..rend: + add_ignore(i) + else: + add_ignore(parseHexInt(firstcol)) + +when defined(release): + const (MappedMap1, + MappedMap2, + DisallowedRanges1, + DisallowedRanges2, + Disallowed1, + Disallowed2, + Ignored1, + Ignored2, + Deviation) = loadStuff(IdnaMappingTable) +else: + let (MappedMap1, + MappedMap2, + DisallowedRanges1, + DisallowedRanges2, + Disallowed1, + Disallowed2, + Ignored1, + Ignored2, + Deviation) = loadStuff(IdnaMappingTable) + +func searchInMap[U, T](a: openarray[(U, T)], u: U): int = + binarySearch(a, u, (x, y) => cmp(x[0], y)) + +func isInMap[U, T](a: openarray[(U, T)], u: U): bool = + a.searchInMap(u) != -1 + +func isInRange[U](a: openarray[(U, U)], u: U): bool = + binarySearch(a, u, (x, y) => (if x[0] < y: -1 elif x[1] > y: 1 else: 0)) != -1 + +func getIdnaTableStatus*(r: Rune): IDNATableStatus = + let i = int(r) + {.cast(noSideEffect).}: + if cast[uint](i) <= high(uint16): + let u = cast[uint16](i) + if u in Ignored1: + return IDNA_IGNORED + if u in Disallowed1: + return IDNA_DISALLOWED + for item in Deviation: + if item[0] == u: + return IDNA_DEVIATION + if DisallowedRanges1.isInRange(u): + return IDNA_DISALLOWED + if MappedMap1.isInMap(u): + return IDNA_MAPPED + else: + if i in Ignored2: + return IDNA_IGNORED + if i in Disallowed2: + return IDNA_DISALLOWED + if DisallowedRanges2.isInRange(i): + return IDNA_DISALLOWED + if MappedMap2.isInMap(i): + return IDNA_MAPPED + return IDNA_VALID + +func getIdnaMapped*(r: Rune): string = + {.cast(noSideEffect).}: + let i = int(r) + if cast[uint](i) <= high(uint16): + let u = cast[uint16](i) + let n = MappedMap1.searchInMap(u) + if n != -1: + return $MappedMap1[n][1] + let n = MappedMap2.searchInMap(i) + return $MappedMap2[n][1] + +func getDeviationMapped*(r: Rune): string = + {.cast(noSideEffect).}: + for item in Deviation: + if item[0] == cast[uint16](r): + return $item[1] |