about summary refs log blame commit diff stats
path: root/res/genidna.nim
blob: 4ae9d34caca6f6ebe20558fcf9c14ce8b9a0979d (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12
13
                    


                   
                 







                                                                             


                                       


                                   






                                            
                             




















                                                     






















                                                              
           
















































                                                    
























                                                   
                                                         












                                                                    
                               
                                                      



                                                           
                                            
                                
                                                      


                



                                        








































                                                                            
      
import std/algorithm
import std/sets
import std/streams
import std/strutils
import std/tables
import std/unicode

type
  LowMap = seq[tuple[ucs: uint16, s: string]]
  HighMap = seq[tuple[ucs: uint32, s: string]]
  FullRangeList = tuple[lm: seq[(uint16, uint16)], hm: seq[(uint32, uint32)]]
  FullSet = tuple[lm: seq[uint16], hm: seq[uint32]]

var MappedMapLow: LowMap = @[]
var MappedMapHigh: HighMap = @[]
var MappedMapStrings: seq[string] = @[]
var DisallowedRanges: FullRangeList
var Disallowed: FullSet
var Ignored: FullSet

proc loadIdnaData() =
  template add_map(i: uint32, str: string) =
    if i <= high(uint16):
      MappedMapLow.add((uint16(i), str))
    else:
      MappedMapHigh.add((i, str))
    MappedMapStrings.add(str)
  template add_disallow(i, j: uint32) =
    if i <= high(uint16):
      DisallowedRanges.lm.add((uint16(i), uint16(j)))
    else:
      DisallowedRanges.hm.add((i, j))
  template add_disallow(i: uint32) =
    if i <= high(uint16):
      Disallowed.lm.add(uint16(i))
    else:
      Disallowed.hm.add(i)
  template add_ignore(rstart, rend: uint32) =
    for i in rstart..rend:
      if i <= high(uint16):
        Ignored.lm.add(uint16(i))
      else:
        Ignored.hm.add(i)
  template add_ignore(i: uint32) =
    if i <= high(uint16):
      Ignored.lm.add(uint16(i))
    else:
      Ignored.hm.add(i)
  template add(firstcol: string, str: string, temp: untyped) =
    if firstcol.contains(".."):
      let fcs = firstcol.split("..")
      let rstart = uint32(parseHexInt(fcs[0]))
      let rend = uint32(parseHexInt(fcs[1]))
      for i in rstart..rend:
        temp(i, str)
    else:
      temp(uint32(parseHexInt(firstcol)), str)
  template add(firstcol: string, temp: untyped) =
    if firstcol.contains(".."):
      let fcs = firstcol.split("..")
      let rstart = uint32(parseHexInt(fcs[0]))
      let rend = uint32(parseHexInt(fcs[1]))
      temp(rstart, rend)
    else:
      temp(uint32(parseHexInt(firstcol)))

  var f: File
  if not open(f, "res/map/IdnaMappingTable.txt"):
    stderr.write("res/map/IdnaMappingTable.txt not found\n")
    quit(1)
  let s = f.readAll()
  f.close()
  for line in s.split('\n'):
    if line.len == 0 or line[0] == '#':
      continue
    var i = 0
    var firstcol = ""
    var status = ""
    var thirdcol: seq[string]
    var fourthcol = ""

    while i < line.len and line[i] notin {'#', ';'}:
      if line[i] != ' ':
        firstcol &= line[i]
      inc i
    if line[i] != '#':
      inc i

    while i < line.len and line[i] notin {'#', ';'}:
      if line[i] != ' ':
        status &= line[i]
      inc i
    if line[i] != '#':
      inc i

    var nw = true
    while i < line.len and line[i] notin {'#', ';'}:
      if line[i] == ' ':
        nw = true
      else:
        if nw:
          thirdcol.add("")
          nw = false
        thirdcol[^1] &= line[i]
      inc i
    if line[i] != '#':
      inc i

    while i < line.len and line[i] notin {'#', ';'}:
      if line[i] != ' ':
        fourthcol &= line[i]
      inc i

    case status
    of "mapped", "disallowed_STD3_mapped":
      let codepoints = thirdcol
      var str = ""
      for code in codepoints:
        str &= Rune(parseHexInt(code))

      add(firstcol, str, add_map)
    of "valid":
      if fourthcol == "NV8" or fourthcol == "XV8":
        add(firstcol, add_disallow)
    of "disallowed":
      add(firstcol, add_disallow)
    of "ignored":
      add(firstcol, add_ignore)

type LineWriter = object
  s: Stream
  line: string

proc write(writer: var LineWriter, s: string) =
  if s.len + writer.line.len > 80:
    writer.s.writeLine(writer.line)
    writer.line = ""
  writer.line &= s

proc flush(writer: var LineWriter) =
  writer.s.writeLine(writer.line)
  writer.line = ""

proc main() =
  loadIdnaData()
  var writer = LineWriter(s: newFileStream(stdout))
  echo "const MappedMapLow: array[" & $MappedMapLow.len &
    ", tuple[ucs, idx: uint16]] = ["
  MappedMapStrings.sort(proc(a, b: string): int = cmp(a.len, b.len),
    order = Descending)
  var mdata = ""
  var idxMap = initTable[string, int]()
  for s in MappedMapStrings:
    let s0 = s & '\0'
    let i = mdata.find(s0)
    if i != -1:
      idxMap[s] = i
    else:
      idxMap[s] = mdata.len
      mdata &= s0
  for (ucs, s) in MappedMapLow:
    writer.write("(" & $ucs & "," & $idxMap[s] & "),")
  writer.flush()
  echo "]"
  echo ""
  echo "const MappedMapHigh: array[" & $MappedMapHigh.len &
    ", tuple[ucs: uint32; idx: uint16]] = ["
  for (ucs, s) in MappedMapHigh:
    writer.write("(" & $ucs & "," & $idxMap[s] & "),")
  writer.flush()
  echo "]"
  echo ""
  stdout.write("const MappedMapData = ")
  stdout.write(mdata.escape())
  echo ""
  echo ""

  echo "const DisallowedRangesLow: array[" & $DisallowedRanges.lm.len &
    ", tuple[ucs, mapped: uint16]] = ["
  for (ucs, mapped) in DisallowedRanges.lm:
    writer.write("(" & $ucs & "," & $mapped & "),")
  writer.flush()
  echo "]"
  echo ""
  echo "const DisallowedRangesHigh: array[" & $DisallowedRanges.hm.len &
    ", tuple[ucs, mapped: uint32]] = ["
  for (ucs, mapped) in DisallowedRanges.hm:
    writer.write("(" & $ucs & "," & $mapped & "),")
  writer.flush()
  echo "]"
  echo ""

  echo "const DisallowedLow: array[" & $Disallowed.lm.len & ", uint16] = ["
  for ucs in Disallowed.lm:
    writer.write($ucs & ",")
  writer.flush()
  echo "]"
  echo ""
  echo "const DisallowedHigh: array[" & $Disallowed.hm.len & ", uint32] = ["
  for ucs in Disallowed.hm:
    writer.write($ucs & ",")
  writer.flush()
  echo "]"

  echo ""
  echo "const IgnoredLow: array[" & $Ignored.lm.len & ", uint16] = ["
  for ucs in Ignored.lm:
    writer.write($ucs & ",")
  writer.flush()
  echo "]"
  echo ""
  echo "const IgnoredHigh: array[" & $Ignored.hm.len & ", uint32] = ["
  for ucs in Ignored.hm:
    writer.write($ucs & ",")
  writer.flush()
  echo "]"

main()