https://github.com/akkartik/mu/blob/main/baremetal/403unicode.mu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 fn to-grapheme in: code-point -> _/eax: grapheme {
23 var c/eax: int <- copy in
24 var num-trailers/ecx: int <- copy 0
25 var first/edx: int <- copy 0
26 $to-grapheme:compute-length: {
27
28 compare c, 0x7f
29 {
30 break-if->
31 var g/eax: grapheme <- copy c
32 return g
33 }
34
35 compare c, 0x7ff
36 {
37 break-if->
38 num-trailers <- copy 1
39 first <- copy 0xc0
40 break $to-grapheme:compute-length
41 }
42
43 compare c, 0xffff
44 {
45 break-if->
46 num-trailers <- copy 2
47 first <- copy 0xe0
48 break $to-grapheme:compute-length
49 }
50
51 compare c, 0x1fffff
52 {
53 break-if->
54 num-trailers <- copy 3
55 first <- copy 0xf0
56 break $to-grapheme:compute-length
57 }
58
59
60 compare c, 0x1fffff
61 {
62 break-if->
63 return 0
64 }
65 }
66
67 var result/edi: grapheme <- copy 0
68 {
69 compare num-trailers, 0
70 break-if-<=
71 var tmp/esi: int <- copy c
72 tmp <- and 0x3f
73 tmp <- or 0x80
74 result <- shift-left 8
75 result <- or tmp
76
77 c <- shift-right 6
78 num-trailers <- decrement
79 loop
80 }
81
82 result <- shift-left 8
83 result <- or c
84 result <- or first
85
86 return result
87 }
88
89
90
91
92 fn read-grapheme in: (addr stream byte) -> _/eax: grapheme {
93
94 {
95 var eof?/eax: boolean <- stream-empty? in
96 compare eof?, 0
97 break-if-=
98 return 0xffffffff
99 }
100 var c/eax: byte <- read-byte in
101 var num-trailers/ecx: int <- copy 0
102 $read-grapheme:compute-length: {
103
104 compare c, 0xc0
105 {
106 break-if->=
107 var g/eax: grapheme <- copy c
108 return g
109 }
110 compare c, 0xfe
111 {
112 break-if-<
113 var g/eax: grapheme <- copy c
114 return g
115 }
116
117 compare c, 0xe0
118 {
119 break-if->=
120 num-trailers <- copy 1
121 break $read-grapheme:compute-length
122 }
123
124 compare c, 0xf0
125 {
126 break-if->=
127 num-trailers <- copy 2
128 break $read-grapheme:compute-length
129 }
130
131 compare c, 0xf8
132 {
133 break-if->=
134 num-trailers <- copy 3
135 break $read-grapheme:compute-length
136 }
137
138 return 0
139 }
140
141 var result/edi: grapheme <- copy c
142 var num-byte-shifts/edx: int <- copy 1
143 {
144 compare num-trailers, 0
145 break-if-<=
146 var tmp/eax: byte <- read-byte in
147 var tmp2/eax: int <- copy tmp
148 tmp2 <- shift-left-bytes tmp2, num-byte-shifts
149 result <- or tmp2
150
151 num-byte-shifts <- increment
152 num-trailers <- decrement
153 loop
154 }
155 return result
156 }
157
158
159 fn shift-left-bytes n: int, k: int -> _/eax: int {
160 var i/ecx: int <- copy 0
161 var result/eax: int <- copy n
162 {
163 compare i, k
164 break-if->=
165 compare i, 4
166 break-if->=
167 result <- shift-left 8
168 i <- increment
169 loop
170 }
171 return result
172 }
173
174
175
176 fn write-grapheme out: (addr stream byte), g: grapheme {
177 $write-grapheme:body: {
178 var c/eax: int <- copy g
179 append-byte out, c
180 c <- shift-right 8
181 compare c, 0
182 break-if-= $write-grapheme:body
183 append-byte out, c
184 c <- shift-right 8
185 compare c, 0
186 break-if-= $write-grapheme:body
187 append-byte out, c
188 c <- shift-right 8
189 compare c, 0
190 break-if-= $write-grapheme:body
191 append-byte out, c
192 }
193 }