https://github.com/akkartik/mu/blob/master/403code-point.mu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 fn to-grapheme in: code-point -> out/eax: grapheme {
21 $to-grapheme:body: {
22 var c/eax: int <- copy in
23 var num-trailers/ecx: int <- copy 0
24 var first/edx: int <- copy 0
25 $to-grapheme:compute-length: {
26
27 compare c, 0x7f
28 {
29 break-if->
30 out <- copy c
31 break $to-grapheme:body
32 }
33
34 compare c, 0x7ff
35 {
36 break-if->
37 num-trailers <- copy 1
38 first <- copy 0xc0
39 break $to-grapheme:compute-length
40 }
41
42 compare c, 0xffff
43 {
44 break-if->
45 num-trailers <- copy 2
46 first <- copy 0xe0
47 break $to-grapheme:compute-length
48 }
49
50 compare c, 0x1fffff
51 {
52 break-if->
53 num-trailers <- copy 3
54 first <- copy 0xf0
55 break $to-grapheme:compute-length
56 }
57
58 compare c, 0x1fffff
59 {
60 break-if->
61 print-string-to-real-screen "unsupported code point "
62 print-int32-hex-to-real-screen c
63 print-string-to-real-screen "\n"
64 var exit-status/ebx: int <- copy 1
65 syscall_exit
66 }
67 }
68
69 var result/edi: int <- copy 0
70 {
71 compare num-trailers, 0
72 break-if-<=
73 var tmp/esi: int <- copy c
74 tmp <- and 0x3f
75 tmp <- or 0x80
76 result <- shift-left 8
77 result <- or tmp
78
79 c <- shift-right 6
80 num-trailers <- decrement
81 loop
82 }
83
84 result <- shift-left 8
85 result <- or c
86 result <- or first
87
88 out <- copy result
89 }
90 }
91
92
93 fn test-to-grapheme-single-byte {
94 var in-int/ecx: int <- copy 0
95 {
96 compare in-int, 0x7f
97 break-if->
98 var in/eax: code-point <- copy in-int
99 var out/eax: grapheme <- to-grapheme in
100 var out-int/eax: int <- copy out
101 check-ints-equal out-int, in-int, "F - test-to-grapheme-single-byte"
102 in-int <- increment
103 loop
104 }
105 }
106
107
108
109 fn test-to-grapheme-two-bytes-min {
110 var in/eax: code-point <- copy 0x80
111 var out/eax: grapheme <- to-grapheme in
112 var out-int/eax: int <- copy out
113 check-ints-equal out-int, 0x80c2, "F - to-grapheme/2a"
114 }
115
116
117 fn test-to-grapheme-two-bytes-max {
118 var in/eax: code-point <- copy 0x7ff
119 var out/eax: grapheme <- to-grapheme in
120 var out-int/eax: int <- copy out
121 check-ints-equal out-int, 0xbfdf, "F - to-grapheme/2b"
122 }
123
124
125 fn test-to-grapheme-three-bytes-min {
126 var in/eax: code-point <- copy 0x800
127 var out/eax: grapheme <- to-grapheme in
128 var out-int/eax: int <- copy out
129 check-ints-equal out-int, 0x80a0e0, "F - to-grapheme/3a"
130 }
131
132
133 fn test-to-grapheme-three-bytes-max {
134 var in/eax: code-point <- copy 0xffff
135 var out/eax: grapheme <- to-grapheme in
136 var out-int/eax: int <- copy out
137 check-ints-equal out-int, 0xbfbfef, "F - to-grapheme/3b"
138 }
139
140
141 fn test-to-grapheme-four-bytes-min {
142 var in/eax: code-point <- copy 0x10000
143 var out/eax: grapheme <- to-grapheme in
144 var out-int/eax: int <- copy out
145 check-ints-equal out-int, 0x808090f0, "F - to-grapheme/4a"
146 }
147
148
149 fn test-to-grapheme-four-bytes-max {
150 var in/eax: code-point <- copy 0x1fffff
151 var out/eax: grapheme <- to-grapheme in
152 var out-int/eax: int <- copy out
153 check-ints-equal out-int, 0xbfbfbff7, "F - to-grapheme/4b"
154 }
155
156
157
158
159
160
161