about summary refs log tree commit diff stats
path: root/src/utils/widthconv.nim
blob: 32a904f6b5eb696fbb7f82ea67860d5180f8c84b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import std/strutils

import utils/map
import utils/twtuni

const CanHaveDakuten = ("かきくけこさしすせそたちつてとはひふへほカキクケコ" &
  "サシスセソタチツテトハヒフヘホ").toPoints()

const CanHaveHanDakuten = "はひふへほハヒフヘホ".toPoints()

const HasDakuten = ("がぎぐげござじずぜぞだぢづでどばびぶべぼガギグゲゴ" &
  "ザジズゼゾダヂヅデドバビブベボ").toPoints()

const HasHanDakuten = "ぱぴぷぺぽパピプペポ".toPoints()

# Halfwidth to fullwidth & vice versa
const halfFullMap = (func(): seq[tuple[half, full1, full2: uint32]] =
  result = @[]
  const map = staticRead"res/widthconvmap.tab"
  for line in map.split('\n'):
    if line == "":
      break
    var i = 0
    let half = line.nextUTF8(i)
    assert line[i] == '\t'
    inc i
    let full1 = line.nextUTF8(i)
    var full2 = 0u32
    if i < line.len:
      assert line[i] == '\t'
      full2 = line.nextUTF8(i)
    result.add((half, full1, full2))
)()

func halfwidth(u: uint32): uint32 =
  if u != 0: # special case to avoid comparison with f2
    for (h, f1, f2) in halfFullMap:
      if f1 == u or f2 == u:
        return h
  return u

const HalfDakuten = 0xFF9Eu32 # half-width dakuten
const HalfHanDakuten = 0xFF9Fu32 # half-width handakuten

# Note: in unicode, char + 1 is dakuten and char + 2 handakuten

func halfwidth*(s: string): string =
  result = ""
  for u in s.points:
    case u
    of HasDakuten:
      result.addUTF8(halfwidth(u - 1))
      result.addUTF8(HalfDakuten)
    of HasHanDakuten:
      result.addUTF8(halfwidth(u - 2))
      result.addUTF8(HalfHanDakuten)
    else:
      result.addUTF8(halfwidth(u))

func fullwidth(r: uint32): uint32 =
  if r != 0: # special case to avoid comparison with f2
    for (h, f1, f2) in halfFullMap:
      if h == r:
        return f1
  return r

func fullwidth*(s: string): string =
  result = ""
  var lastu = 0u32
  for u in s.points:
    if lastu != 0:
      if u == HalfDakuten:
        # flush with dakuten
        result.addUTF8(lastu + 1)
        lastu = 0
        continue
      elif u == HalfHanDakuten and lastu in CanHaveHanDakuten:
        # flush with handakuten
        result.addUTF8(lastu + 2)
        lastu = 0
        continue
      result.addUTF8(lastu)
      lastu = 0
    let u = fullwidth(u)
    if u in CanHaveDakuten:
      lastu = u
    else:
      result.addUTF8(u)
  if lastu != 0:
    # flush
    result.addUTF8(lastu)

const kanamap = staticRead"res/kanamap.tab"
func genFullSizeMap(): seq[(uint32, uint32)] =
  result = @[]
  for line in kanamap.split('\n'):
    if line.len == 0: break
    let rs = line.toPoints()
    assert rs[1] == uint32('\t')
    result.add((rs[0], rs[2]))
const fullSizeMap = genFullSizeMap()

proc fullsize*(s: string): string =
  result = ""
  for u in s.points:
    let i = searchInMap(fullSizeMap, u)
    if i == -1:
      result.addUTF8(u)
    else:
      result.addUTF8(fullSizeMap[i][1])