1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
|
import std/strutils
import utils/map
import utils/twtuni
const CanHaveDakuten = ("かきくけこさしすせそたちつてとはひふへほカキクケコ" &
"サシスセソタチツテトハヒフヘホ").toPoints()
const CanHaveHanDakuten = "はひふへほハヒフヘホ".toPoints()
const HasDakuten = ("がぎぐげござじずぜぞだぢづでどばびぶべぼガギグゲゴ" &
"ザジズゼゾダヂヅデドバビブベボ").toPoints()
const HasHanDakuten = "ぱぴぷぺぽパピプペポ".toPoints()
# Halfwidth to fullwidth & vice versa
const halfFullMap = (func(): seq[tuple[half, full1, full2: uint32]] =
result = @[]
const map = staticRead"res/widthconvmap.tab"
for line in map.split('\n'):
if line == "":
break
var i = 0
let half = line.nextUTF8(i)
assert line[i] == '\t'
inc i
let full1 = line.nextUTF8(i)
var full2 = 0u32
if i < line.len:
assert line[i] == '\t'
full2 = line.nextUTF8(i)
result.add((half, full1, full2))
)()
func halfwidth(u: uint32): uint32 =
if u != 0: # special case to avoid comparison with f2
for (h, f1, f2) in halfFullMap:
if f1 == u or f2 == u:
return h
return u
const HalfDakuten = 0xFF9Eu32 # half-width dakuten
const HalfHanDakuten = 0xFF9Fu32 # half-width handakuten
# Note: in unicode, char + 1 is dakuten and char + 2 handakuten
func halfwidth*(s: string): string =
result = ""
for u in s.points:
case u
of HasDakuten:
result.addUTF8(halfwidth(u - 1))
result.addUTF8(HalfDakuten)
of HasHanDakuten:
result.addUTF8(halfwidth(u - 2))
result.addUTF8(HalfHanDakuten)
else:
result.addUTF8(halfwidth(u))
func fullwidth(r: uint32): uint32 =
if r != 0: # special case to avoid comparison with f2
for (h, f1, f2) in halfFullMap:
if h == r:
return f1
return r
func fullwidth*(s: string): string =
result = ""
var lastu = 0u32
for u in s.points:
if lastu != 0:
if u == HalfDakuten:
# flush with dakuten
result.addUTF8(lastu + 1)
lastu = 0
continue
elif u == HalfHanDakuten and lastu in CanHaveHanDakuten:
# flush with handakuten
result.addUTF8(lastu + 2)
lastu = 0
continue
result.addUTF8(lastu)
lastu = 0
let u = fullwidth(u)
if u in CanHaveDakuten:
lastu = u
else:
result.addUTF8(u)
if lastu != 0:
# flush
result.addUTF8(lastu)
const kanamap = staticRead"res/kanamap.tab"
func genFullSizeMap(): seq[(uint32, uint32)] =
result = @[]
for line in kanamap.split('\n'):
if line.len == 0: break
let rs = line.toPoints()
assert rs[1] == uint32('\t')
result.add((rs[0], rs[2]))
const fullSizeMap = genFullSizeMap()
proc fullsize*(s: string): string =
result = ""
for u in s.points:
let i = searchInMap(fullSizeMap, u)
if i == -1:
result.addUTF8(u)
else:
result.addUTF8(fullSizeMap[i][1])
|