import std/algorithm import std/sets import std/streams import std/strutils import std/tables import std/unicode type LowMap = seq[tuple[ucs: uint16, s: string]] HighMap = seq[tuple[ucs: uint32, s: string]] FullRangeList = tuple[lm: seq[(uint16, uint16)], hm: seq[(uint32, uint32)]] FullSet = tuple[lm: seq[uint16], hm: seq[uint32]] var MappedMapLow: LowMap = @[] var MappedMapHigh: HighMap = @[] var MappedMapStrings: seq[string] = @[] var DisallowedRanges: FullRangeList var Disallowed: FullSet var Ignored: FullSet proc loadIdnaData() = template add_map(i: uint32, str: string) = if i <= high(uint16): MappedMapLow.add((uint16(i), str)) else: MappedMapHigh.add((i, str)) MappedMapStrings.add(str) template add_disallow(i, j: uint32) = if i <= high(uint16): DisallowedRanges.lm.add((uint16(i), uint16(j))) else: DisallowedRanges.hm.add((i, j)) template add_disallow(i: uint32) = if i <= high(uint16): Disallowed.lm.add(uint16(i)) else: Disallowed.hm.add(i) template add_ignore(rstart, rend: uint32) = for i in rstart..rend: if i <= high(uint16): Ignored.lm.add(uint16(i)) else: Ignored.hm.add(i) template add_ignore(i: uint32) = if i <= high(uint16): Ignored.lm.add(uint16(i)) else: Ignored.hm.add(i) template add(firstcol: string, str: string, temp: untyped) = if firstcol.contains(".."): let fcs = firstcol.split("..") let rstart = uint32(parseHexInt(fcs[0])) let rend = uint32(parseHexInt(fcs[1])) for i in rstart..rend: temp(i, str) else: temp(uint32(parseHexInt(firstcol)), str) template add(firstcol: string, temp: untyped) = if firstcol.contains(".."): let fcs = firstcol.split("..") let rstart = uint32(parseHexInt(fcs[0])) let rend = uint32(parseHexInt(fcs[1])) temp(rstart, rend) else: temp(uint32(parseHexInt(firstcol))) var f: File if not open(f, "res/map/IdnaMappingTable.txt"): stderr.write("res/map/IdnaMappingTable.txt not found\n") quit(1) let s = f.readAll() f.close() for line in s.split('\n'): if line.len == 0 or line[0] == '#': continue var i = 0 var firstcol = "" var status = "" var thirdcol: seq[string] var fourthcol = "" while i < line.len and line[i] notin {'#', ';'}: if line[i] != ' ': firstcol &= line[i] inc i if line[i] != '#': inc i while i < line.len and line[i] notin {'#', ';'}: if line[i] != ' ': status &= line[i] inc i if line[i] != '#': inc i var nw = true while i < line.len and line[i] notin {'#', ';'}: if line[i] == ' ': nw = true else: if nw: thirdcol.add("") nw = false thirdcol[^1] &= line[i] inc i if line[i] != '#': inc i while i < line.len and line[i] notin {'#', ';'}: if line[i] != ' ': fourthcol &= line[i] inc i case status of "mapped", "disallowed_STD3_mapped": let codepoints = thirdcol var str = "" for code in codepoints: str &= Rune(parseHexInt(code)) add(firstcol, str, add_map) of "valid": if fourthcol == "NV8" or fourthcol == "XV8": add(firstcol, add_disallow) of "disallowed": add(firstcol, add_disallow) of "ignored": add(firstcol, add_ignore) type LineWriter = object s: Stream line: string proc write(writer: var LineWriter, s: string) = if s.len + writer.line.len > 80: writer.s.writeLine(writer.line) writer.line = "" writer.line &= s proc flush(writer: var LineWriter) = writer.s.writeLine(writer.line) writer.line = "" proc main() = loadIdnaData() var writer = LineWriter(s: newFileStream(stdout)) echo "const MappedMapLow: array[" & $MappedMapLow.len & ", tuple[ucs, idx: uint16]] = [" MappedMapStrings.sort(proc(a, b: string): int = cmp(a.len, b.len), order = Descending) var mdata = "" var idxMap = initTable[string, int]() for s in MappedMapStrings: let s0 = s & '\0' let i = mdata.find(s0) if i != -1: idxMap[s] = i else: idxMap[s] = mdata.len mdata &= s0 for (ucs, s) in MappedMapLow: writer.write("(" & $ucs & "," & $idxMap[s] & "),") writer.flush() echo "]" echo "" echo "const MappedMapHigh: array[" & $MappedMapHigh.len & ", tuple[ucs: uint32; idx: uint16]] = [" for (ucs, s) in MappedMapHigh: writer.write("(" & $ucs & "," & $idxMap[s] & "),") writer.flush() echo "]" echo "" stdout.write("const MappedMapData = ") stdout.write(mdata.escape()) echo "" echo "" echo "const DisallowedRangesLow: array[" & $DisallowedRanges.lm.len & ", tuple[ucs, mapped: uint16]] = [" for (ucs, mapped) in DisallowedRanges.lm: writer.write("(" & $ucs & "," & $mapped & "),") writer.flush() echo "]" echo "" echo "const DisallowedRangesHigh: array[" & $DisallowedRanges.hm.len & ", tuple[ucs, mapped: uint32]] = [" for (ucs, mapped) in DisallowedRanges.hm: writer.write("(" & $ucs & "," & $mapped & "),") writer.flush() echo "]" echo "" echo "const DisallowedLow: array[" & $Disallowed.lm.len & ", uint16] = [" for ucs in Disallowed.lm: writer.write($ucs & ",") writer.flush() echo "]" echo "" echo "const DisallowedHigh: array[" & $Disallowed.hm.len & ", uint32] = [" for ucs in Disallowed.hm: writer.write($ucs & ",") writer.flush() echo "]" echo "" echo "const IgnoredLow: array[" & $Ignored.lm.len & ", uint16] = [" for ucs in Ignored.lm: writer.write($ucs & ",") writer.flush() echo "]" echo "" echo "const IgnoredHigh: array[" & $Ignored.hm.len & ", uint32] = [" for ucs in Ignored.hm: writer.write($ucs & ",") writer.flush() echo "]" main()