https://github.com/akkartik/mu/blob/main/shell/tokenize.mu
1
2
3 type token {
4 type: int
5
6
7 text-data: (handle stream byte)
8
9
10 number-data: int
11 }
12
13 fn tokenize in: (addr gap-buffer), out: (addr stream token), trace: (addr trace) {
14 trace-text trace, "tokenize", "tokenize"
15 trace-lower trace
16 rewind-gap-buffer in
17 var at-start-of-line?/edi: boolean <- copy 1/true
18 {
19 var done?/eax: boolean <- gap-buffer-scan-done? in
20 compare done?, 0/false
21 break-if-!=
22
23 var token-storage: token
24 var token/edx: (addr token) <- address token-storage
25 at-start-of-line? <- next-token in, token, at-start-of-line?, trace
26 var error?/eax: boolean <- has-errors? trace
27 compare error?, 0/false
28 {
29 break-if-=
30 return
31 }
32 var skip?/eax: boolean <- skip-token? token
33 compare skip?, 0/false
34 loop-if-!=
35 write-to-stream out, token
36 loop
37 }
38 trace-higher trace
39 }
40
41 fn test-tokenize-number {
42 var in-storage: gap-buffer
43 var in/esi: (addr gap-buffer) <- address in-storage
44 initialize-gap-buffer-with in, "123 a"
45
46 var stream-storage: (stream token 0x10)
47 var stream/edi: (addr stream token) <- address stream-storage
48
49 var trace-storage: trace
50 var trace/edx: (addr trace) <- address trace-storage
51 initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
52 tokenize in, stream, trace
53
54 var curr-token-storage: token
55 var curr-token/ebx: (addr token) <- address curr-token-storage
56 read-from-stream stream, curr-token
57 var curr-token-type/eax: (addr int) <- get curr-token, type
58 check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-number/before-indent-type"
59 var curr-token-data/eax: (addr int) <- get curr-token, number-data
60 check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-number/before-indent"
61 read-from-stream stream, curr-token
62 var number?/eax: boolean <- number-token? curr-token
63 check number?, "F - test-tokenize-number"
64 var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
65 var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
66 check-stream-equal curr-token-data, "123", "F - test-tokenize-number: value"
67 }
68
69 fn test-tokenize-negative-number {
70 var in-storage: gap-buffer
71 var in/esi: (addr gap-buffer) <- address in-storage
72 initialize-gap-buffer-with in, "-123 a"
73
74 var stream-storage: (stream token 0x10)
75 var stream/edi: (addr stream token) <- address stream-storage
76
77 var trace-storage: trace
78 var trace/edx: (addr trace) <- address trace-storage
79 initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
80 tokenize in, stream, trace
81
82 var curr-token-storage: token
83 var curr-token/ebx: (addr token) <- address curr-token-storage
84 read-from-stream stream, curr-token
85 var curr-token-type/eax: (addr int) <- get curr-token, type
86 check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-negative-number/before-indent-type"
87 var curr-token-data/eax: (addr int) <- get curr-token, number-data
88 check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-negative-number/before-indent"
89 read-from-stream stream, curr-token
90 var number?/eax: boolean <- number-token? curr-token
91 check number?, "F - test-tokenize-negative-number"
92 var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
93 var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
94 check-stream-equal curr-token-data, "-123", "F - test-tokenize-negative-number: value"
95 }
96
97 fn test-tokenize-quote {
98 var in-storage: gap-buffer
99 var in/esi: (addr gap-buffer) <- address in-storage
100 initialize-gap-buffer-with in, "'(a)"
101
102 var stream-storage: (stream token 0x10)
103 var stream/edi: (addr stream token) <- address stream-storage
104
105 var trace-storage: trace
106 var trace/edx: (addr trace) <- address trace-storage
107 initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
108 tokenize in, stream, trace
109
110 var curr-token-storage: token
111 var curr-token/ebx: (addr token) <- address curr-token-storage
112 read-from-stream stream, curr-token
113 var curr-token-type/eax: (addr int) <- get curr-token, type
114 check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-quote/before-indent-type"
115 var curr-token-data/eax: (addr int) <- get curr-token, number-data
116 check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-quote/before-indent"
117 read-from-stream stream, curr-token
118 var quote?/eax: boolean <- quote-token? curr-token
119 check quote?, "F - test-tokenize-quote: quote"
120 read-from-stream stream, curr-token
121 var open-paren?/eax: boolean <- open-paren-token? curr-token
122 check open-paren?, "F - test-tokenize-quote: open paren"
123 read-from-stream stream, curr-token
124 read-from-stream stream, curr-token
125 var close-paren?/eax: boolean <- close-paren-token? curr-token
126 check close-paren?, "F - test-tokenize-quote: close paren"
127 }
128
129 fn test-tokenize-backquote {
130 var in-storage: gap-buffer
131 var in/esi: (addr gap-buffer) <- address in-storage
132 initialize-gap-buffer-with in, "`(a)"
133
134 var stream-storage: (stream token 0x10)
135 var stream/edi: (addr stream token) <- address stream-storage
136
137 var trace-storage: trace
138 var trace/edx: (addr trace) <- address trace-storage
139 initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
140 tokenize in, stream, trace
141
142 var curr-token-storage: token
143 var curr-token/ebx: (addr token) <- address curr-token-storage
144 read-from-stream stream, curr-token
145 var curr-token-type/eax: (addr int) <- get curr-token, type
146 check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-backquote/before-indent-type"
147 var curr-token-data/eax: (addr int) <- get curr-token, number-data
148 check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-backquote/before-indent"
149 read-from-stream stream, curr-token
150 var backquote?/eax: boolean <- backquote-token? curr-token
151 check backquote?, "F - test-tokenize-backquote: backquote"
152 read-from-stream stream, curr-token
153 var open-paren?/eax: boolean <- open-paren-token? curr-token
154 check open-paren?, "F - test-tokenize-backquote: open paren"
155 read-from-stream stream, curr-token
156 read-from-stream stream, curr-token
157 var close-paren?/eax: boolean <- close-paren-token? curr-token
158 check close-paren?, "F - test-tokenize-backquote: close paren"
159 }
160
161 fn test-tokenize-unquote {
162 var in-storage: gap-buffer
163 var in/esi: (addr gap-buffer) <- address in-storage
164 initialize-gap-buffer-with in, ",(a)"
165
166 var stream-storage: (stream token 0x10)
167 var stream/edi: (addr stream token) <- address stream-storage
168
169 var trace-storage: trace
170 var trace/edx: (addr trace) <- address trace-storage
171 initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
172 tokenize in, stream, trace
173
174 var curr-token-storage: token
175 var curr-token/ebx: (addr token) <- address curr-token-storage
176 read-from-stream stream, curr-token
177 var curr-token-type/eax: (addr int) <- get curr-token, type
178 check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-unquote/before-indent-type"
179 var curr-token-data/eax: (addr int) <- get curr-token, number-data
180 check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-unquote/before-indent"
181 read-from-stream stream, curr-token
182 var unquote?/eax: boolean <- unquote-token? curr-token
183 check unquote?, "F - test-tokenize-unquote: unquote"
184 read-from-stream stream, curr-token
185 var open-paren?/eax: boolean <- open-paren-token? curr-token
186 check open-paren?, "F - test-tokenize-unquote: open paren"
187 read-from-stream stream, curr-token
188 read-from-stream stream, curr-token
189 var close-paren?/eax: boolean <- close-paren-token? curr-token
190 check close-paren?, "F - test-tokenize-unquote: close paren"
191 }
192
193 fn test-tokenize-unquote-splice {
194 var in-storage: gap-buffer
195 var in/esi: (addr gap-buffer) <- address in-storage
196 initialize-gap-buffer-with in, ",@a"
197
198 var stream-storage: (stream token 0x10)
199 var stream/edi: (addr stream token) <- address stream-storage
200
201 var trace-storage: trace
202 var trace/edx: (addr trace) <- address trace-storage
203 initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
204 tokenize in, stream, trace
205
206 var curr-token-storage: token
207 var curr-token/ebx: (addr token) <- address curr-token-storage
208 read-from-stream stream, curr-token
209 var curr-token-type/eax: (addr int) <- get curr-token, type
210 check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-unquote-splice/before-indent-type"
211 var curr-token-data/eax: (addr int) <- get curr-token, number-data
212 check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-unquote-splice/before-indent"
213 read-from-stream stream, curr-token
214 var unquote-splice?/eax: boolean <- unquote-splice-token? curr-token
215 check unquote-splice?, "F - test-tokenize-unquote-splice: unquote-splice"
216 }
217
218 fn test-tokenize-dotted-list {
219 var in-storage: gap-buffer
220 var in/esi: (addr gap-buffer) <- address in-storage
221 initialize-gap-buffer-with in, "(a . b)"
222
223 var stream-storage: (stream token 0x10)
224 var stream/edi: (addr stream token) <- address stream-storage
225
226 var trace-storage: trace
227 var trace/edx: (addr trace) <- address trace-storage
228 initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
229 tokenize in, stream, trace
230
231 var curr-token-storage: token
232 var curr-token/ebx: (addr token) <- address curr-token-storage
233 read-from-stream stream, curr-token
234 var curr-token-type/eax: (addr int) <- get curr-token, type
235 check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-dotted-list/before-indent-type"
236 var curr-token-data/eax: (addr int) <- get curr-token, number-data
237 check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-dotted-list/before-indent"
238 read-from-stream stream, curr-token
239 var open-paren?/eax: boolean <- open-paren-token? curr-token
240 check open-paren?, "F - test-tokenize-dotted-list: open paren"
241 read-from-stream stream, curr-token
242 read-from-stream stream, curr-token
243 var dot?/eax: boolean <- dot-token? curr-token
244 check dot?, "F - test-tokenize-dotted-list: dot"
245 read-from-stream stream, curr-token
246 read-from-stream stream, curr-token
247 var close-paren?/eax: boolean <- close-paren-token? curr-token
248 check close-paren?, "F - test-tokenize-dotted-list: close paren"
249 }
250
251
252 fn test-tokenize-stream-literal {
253 var in-storage: gap-buffer
254 var in/esi: (addr gap-buffer) <- address in-storage
255 initialize-gap-buffer-with in, "\"abc def\""
256
257 var stream-storage: (stream token 0x10)
258 var stream/edi: (addr stream token) <- address stream-storage
259
260 var trace-storage: trace
261 var trace/edx: (addr trace) <- address trace-storage
262 initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
263 tokenize in, stream, trace
264
265 var curr-token-storage: token
266 var curr-token/ebx: (addr token) <- address curr-token-storage
267 read-from-stream stream, curr-token
268 var curr-token-type/eax: (addr int) <- get curr-token, type
269 check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-stream-literal/before-indent-type"
270 var curr-token-data/eax: (addr int) <- get curr-token, number-data
271 check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-stream-literal/before-indent"
272 read-from-stream stream, curr-token
273 var stream?/eax: boolean <- stream-token? curr-token
274 check stream?, "F - test-tokenize-stream-literal: type"
275 var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
276 var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
277 var data-equal?/eax: boolean <- stream-data-equal? curr-token-data, "abc def"
278 check data-equal?, "F - test-tokenize-stream-literal"
279 var empty?/eax: boolean <- stream-empty? stream
280 check empty?, "F - test-tokenize-stream-literal: empty?"
281 }
282
283
284 fn test-tokenize-balanced-stream-literal {
285 var in-storage: gap-buffer
286 var in/esi: (addr gap-buffer) <- address in-storage
287 initialize-gap-buffer-with in, "[abc def]"
288
289 var stream-storage: (stream token 0x10)
290 var stream/edi: (addr stream token) <- address stream-storage
291
292 var trace-storage: trace
293 var trace/edx: (addr trace) <- address trace-storage
294 initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
295 tokenize in, stream, trace
296
297 var curr-token-storage: token
298 var curr-token/ebx: (addr token) <- address curr-token-storage
299 read-from-stream stream, curr-token
300 var curr-token-type/eax: (addr int) <- get curr-token, type
301 check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-balanced-stream-literal/before-indent-type"
302 var curr-token-data/eax: (addr int) <- get curr-token, number-data
303 check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-balanced-stream-literal/before-indent"
304 read-from-stream stream, curr-token
305 var stream?/eax: boolean <- stream-token? curr-token
306 check stream?, "F - test-tokenize-stream-literal: type"
307 var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
308 var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
309 var data-equal?/eax: boolean <- stream-data-equal? curr-token-data, "abc def"
310 check data-equal?, "F - test-tokenize-balanced-stream-literal"
311 var empty?/eax: boolean <- stream-empty? stream
312 check empty?, "F - test-tokenize-balanced-stream-literal: empty?"
313 }
314
315 fn test-tokenize-nested-stream-literal {
316 var in-storage: gap-buffer
317 var in/esi: (addr gap-buffer) <- address in-storage
318 initialize-gap-buffer-with in, "[abc [def]]"
319
320 var stream-storage: (stream token 0x10)
321 var stream/edi: (addr stream token) <- address stream-storage
322
323 var trace-storage: trace
324 var trace/edx: (addr trace) <- address trace-storage
325 initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
326 tokenize in, stream, trace
327
328 var curr-token-storage: token
329 var curr-token/ebx: (addr token) <- address curr-token-storage
330 read-from-stream stream, curr-token
331 var curr-token-type/eax: (addr int) <- get curr-token, type
332 check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-nested-stream-literal/before-indent-type"
333 var curr-token-data/eax: (addr int) <- get curr-token, number-data
334 check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-nested-stream-literal/before-indent"
335 read-from-stream stream, curr-token
336 var stream?/eax: boolean <- stream-token? curr-token
337 check stream?, "F - test-tokenize-stream-literal: type"
338 var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
339 var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
340 var data-equal?/eax: boolean <- stream-data-equal? curr-token-data, "abc [def]"
341 check data-equal?, "F - test-tokenize-nested-stream-literal"
342 var empty?/eax: boolean <- stream-empty? stream
343 check empty?, "F - test-tokenize-nested-stream-literal: empty?"
344 }
345
346 fn test-tokenize-stream-literal-in-tree {
347 var in-storage: gap-buffer
348 var in/esi: (addr gap-buffer) <- address in-storage
349 initialize-gap-buffer-with in, "([abc def])"
350
351 var stream-storage: (stream token 0x10)
352 var stream/edi: (addr stream token) <- address stream-storage
353
354 var trace-storage: trace
355 var trace/edx: (addr trace) <- address trace-storage
356 initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
357 tokenize in, stream, trace
358
359 var curr-token-storage: token
360 var curr-token/ebx: (addr token) <- address curr-token-storage
361 read-from-stream stream, curr-token
362 var curr-token-type/eax: (addr int) <- get curr-token, type
363 check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-stream-literal-in-tree/before-indent-type"
364 var curr-token-data/eax: (addr int) <- get curr-token, number-data
365 check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-stream-literal-in-tree/before-indent"
366 read-from-stream stream, curr-token
367 var bracket?/eax: boolean <- bracket-token? curr-token
368 check bracket?, "F - test-tokenize-stream-literal-in-tree: open paren"
369 read-from-stream stream, curr-token
370 var stream?/eax: boolean <- stream-token? curr-token
371 check stream?, "F - test-tokenize-stream-literal-in-tree: type"
372 var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
373 var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
374 var data-equal?/eax: boolean <- stream-data-equal? curr-token-data, "abc def"
375 check data-equal?, "F - test-tokenize-stream-literal-in-tree"
376 read-from-stream stream, curr-token
377 var bracket?/eax: boolean <- bracket-token? curr-token
378 check bracket?, "F - test-tokenize-stream-literal-in-tree: close paren"
379 var empty?/eax: boolean <- stream-empty? stream
380 check empty?, "F - test-tokenize-stream-literal-in-tree: empty?"
381 }
382
383 fn test-tokenize-indent {
384 var in-storage: gap-buffer
385 var in/esi: (addr gap-buffer) <- address in-storage
386 initialize-gap-buffer-with in, "abc\n def"
387
388 var stream-storage: (stream token 0x10)
389 var stream/edi: (addr stream token) <- address stream-storage
390
391 var trace-storage: trace
392 var trace/edx: (addr trace) <- address trace-storage
393 initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
394 tokenize in, stream, trace
395
396 var curr-token-storage: token
397 var curr-token/ebx: (addr token) <- address curr-token-storage
398 read-from-stream stream, curr-token
399 var curr-token-type/eax: (addr int) <- get curr-token, type
400 check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-indent/before-indent-type"
401 var curr-token-data/eax: (addr int) <- get curr-token, number-data
402 check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-indent/before-indent"
403 read-from-stream stream, curr-token
404 var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
405 var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
406 check-stream-equal curr-token-data, "abc", "F - test-tokenize-indent/before"
407
408 read-from-stream stream, curr-token
409 var curr-token-type/eax: (addr int) <- get curr-token, type
410 check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-indent/type"
411 var curr-token-data/eax: (addr int) <- get curr-token, number-data
412 check-ints-equal *curr-token-data, 2/spaces, "F - test-tokenize-indent"
413
414 read-from-stream stream, curr-token
415 var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
416 var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
417 check-stream-equal curr-token-data, "def", "F - test-tokenize-indent/after"
418 }
419
420
421
422 fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean, trace: (addr trace) -> _/edi: boolean {
423 trace-text trace, "tokenize", "next-token"
424 trace-lower trace
425
426 {
427 compare start-of-line?, 0/false
428 break-if-=
429 next-indent-token in, out, trace
430 }
431 skip-spaces-from-gap-buffer in
432 var g/eax: code-point-utf8 <- peek-from-gap-buffer in
433 {
434 compare g, 0x23/comment
435 break-if-!=
436 skip-rest-of-line in
437 }
438 var g/eax: code-point-utf8 <- peek-from-gap-buffer in
439 {
440 compare g, 0xa/newline
441 break-if-!=
442 trace-text trace, "tokenize", "newline"
443 g <- read-from-gap-buffer in
444 initialize-skip-token out
445 trace-higher trace
446 return 1/at-start-of-line
447 }
448 {
449 compare start-of-line?, 0/false
450 break-if-=
451
452 trace-higher trace
453 return 0/not-at-start-of-line
454 }
455 {
456 var done?/eax: boolean <- gap-buffer-scan-done? in
457 compare done?, 0/false
458 break-if-=
459 trace-text trace, "tokenize", "end"
460 initialize-skip-token out
461 trace-higher trace
462 return 1/at-start-of-line
463 }
464 var _g/eax: code-point-utf8 <- peek-from-gap-buffer in
465 var g/ecx: code-point-utf8 <- copy _g
466 {
467 var should-trace?/eax: boolean <- should-trace? trace
468 compare should-trace?, 0/false
469 break-if-=
470 var stream-storage: (stream byte 0x40)
471 var stream/esi: (addr stream byte) <- address stream-storage
472 write stream, "next: "
473 var gval/eax: int <- copy g
474 write-int32-hex stream, gval
475 trace trace, "tokenize", stream
476 }
477 $next-token:case: {
478
479 {
480 compare g, 0x22/double-quote
481 break-if-!=
482 var dummy/eax: code-point-utf8 <- read-from-gap-buffer in
483 next-stream-token in, out, trace
484 break $next-token:case
485 }
486
487 {
488 compare g, 0x5b/open-square-bracket
489 break-if-!=
490 var dummy/eax: code-point-utf8 <- read-from-gap-buffer in
491 next-balanced-stream-token in, out, trace
492 break $next-token:case
493 }
494
495 {
496 var symbol?/eax: boolean <- symbol-code-point-utf8? g
497 compare symbol?, 0/false
498 break-if-=
499 next-symbol-token in, out, trace
500 break $next-token:case
501 }
502
503 {
504 compare g, 0x5d/close-square-bracket
505 break-if-!=
506 error trace, "unbalanced ']'"
507 return start-of-line?
508 }
509
510 {
511 var bracket?/eax: boolean <- bracket-code-point-utf8? g
512 compare bracket?, 0/false
513 break-if-=
514 var g/eax: code-point-utf8 <- read-from-gap-buffer in
515 next-bracket-token g, out, trace
516 break $next-token:case
517 }
518
519 {
520 compare g, 0x27/single-quote
521 break-if-!=
522 var g/eax: code-point-utf8 <- read-from-gap-buffer in
523 initialize-token out, "'"
524 break $next-token:case
525 }
526
527 {
528 compare g, 0x60/backquote
529 break-if-!=
530 var g/eax: code-point-utf8 <- read-from-gap-buffer in
531 initialize-token out, "`"
532 break $next-token:case
533 }
534
535 {
536 compare g, 0x2c/comma
537 break-if-!=
538 var g/eax: code-point-utf8 <- read-from-gap-buffer in
539
540 {
541 g <- peek-from-gap-buffer in
542 compare g, 0x40/at-sign
543 break-if-!=
544 g <- read-from-gap-buffer in
545 initialize-token out, ",@"
546 break $next-token:case
547 }
548 initialize-token out, ","
549 break $next-token:case
550 }
551 set-cursor-position 0/screen, 0x40 0x20
552 {
553 var foo/eax: int <- copy g
554 draw-int32-decimal-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, foo, 7/fg 0/bg
555 }
556 abort "unknown token type"
557 }
558 trace-higher trace
559 {
560 var should-trace?/eax: boolean <- should-trace? trace
561 compare should-trace?, 0/false
562 break-if-=
563 var stream-storage: (stream byte 0x400)
564 var stream/eax: (addr stream byte) <- address stream-storage
565 write stream, "=> "
566 write-token-text-data stream, out
567 trace trace, "tokenize", stream
568 }
569 return start-of-line?
570 }
571
572 fn next-symbol-token in: (addr gap-buffer), _out: (addr token), trace: (addr trace) {
573 trace-text trace, "tokenize", "looking for a symbol"
574 trace-lower trace
575 var out/eax: (addr token) <- copy _out
576 var out-data-ah/eax: (addr handle stream byte) <- get out, text-data
577 populate-stream out-data-ah, 0x40/max-symbol-size
578 var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
579 var out-data/edi: (addr stream byte) <- copy _out-data
580 $next-symbol-token:loop: {
581 var done?/eax: boolean <- gap-buffer-scan-done? in
582 compare done?, 0/false
583 break-if-!=
584 var g/eax: code-point-utf8 <- peek-from-gap-buffer in
585 {
586 {
587 var should-trace?/eax: boolean <- should-trace? trace
588 compare should-trace?, 0/false
589 }
590 break-if-=
591 var stream-storage: (stream byte 0x40)
592 var stream/esi: (addr stream byte) <- address stream-storage
593 write stream, "next: "
594 var gval/eax: int <- copy g
595 write-int32-hex stream, gval
596 trace trace, "tokenize", stream
597 }
598
599 {
600 var symbol-code-point-utf8?/eax: boolean <- symbol-code-point-utf8? g
601 compare symbol-code-point-utf8?, 0/false
602 break-if-!=
603 trace-text trace, "tokenize", "stop"
604 break $next-symbol-token:loop
605 }
606 var g/eax: code-point-utf8 <- read-from-gap-buffer in
607 write-code-point-utf8 out-data, g
608 loop
609 }
610 trace-higher trace
611 {
612 var should-trace?/eax: boolean <- should-trace? trace
613 compare should-trace?, 0/false
614 break-if-=
615 var stream-storage: (stream byte 0x40)
616 var stream/esi: (addr stream byte) <- address stream-storage
617 write stream, "=> "
618 rewind-stream out-data
619 write-stream stream, out-data
620 trace trace, "tokenize", stream
621 }
622 }
623
624 fn next-number-token in: (addr gap-buffer), _out: (addr token), trace: (addr trace) {
625 trace-text trace, "tokenize", "looking for a number"
626 trace-lower trace
627 var out/eax: (addr token) <- copy _out
628 var out-data-ah/eax: (addr handle stream byte) <- get out, text-data
629 populate-stream out-data-ah, 0x40
630 var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
631 var out-data/edi: (addr stream byte) <- copy _out-data
632 $next-number-token:check-minus: {
633 var g/eax: code-point-utf8 <- peek-from-gap-buffer in
634 compare g, 0x2d/minus
635 g <- read-from-gap-buffer in
636 write-code-point-utf8 out-data, g
637 }
638 $next-number-token:loop: {
639 var done?/eax: boolean <- gap-buffer-scan-done? in
640 compare done?, 0/false
641 break-if-!=
642 var g/eax: code-point-utf8 <- peek-from-gap-buffer in
643 {
644 {
645 var should-trace?/eax: boolean <- should-trace? trace
646 compare should-trace?, 0/false
647 }
648 break-if-=
649 var stream-storage: (stream byte 0x40)
650 var stream/esi: (addr stream byte) <- address stream-storage
651 write stream, "next: "
652 var gval/eax: int <- copy g
653 write-int32-hex stream, gval
654 trace trace, "tokenize", stream
655 }
656
657 {
658 var symbol-code-point-utf8?/eax: boolean <- symbol-code-point-utf8? g
659 compare symbol-code-point-utf8?, 0/false
660 break-if-!=
661 trace-text trace, "tokenize", "stop"
662 break $next-number-token:loop
663 }
664
665 {
666 var digit?/eax: boolean <- decimal-digit? g
667 compare digit?, 0/false
668 break-if-!=
669 error trace, "invalid number"
670 return
671 }
672 trace-text trace, "tokenize", "append"
673 var g/eax: code-point-utf8 <- read-from-gap-buffer in
674 write-code-point-utf8 out-data, g
675 loop
676 }
677 trace-higher trace
678 }
679
680 fn next-stream-token in: (addr gap-buffer), _out: (addr token), trace: (addr trace) {
681 trace-text trace, "tokenize", "stream"
682 var out/edi: (addr token) <- copy _out
683 var out-type/eax: (addr int) <- get out, type
684 copy-to *out-type, 1/stream
685 var out-data-ah/eax: (addr handle stream byte) <- get out, text-data
686
687
688 populate-stream out-data-ah, 0x400/max-definition-size=1KB
689 var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
690 var out-data/edi: (addr stream byte) <- copy _out-data
691 {
692 var empty?/eax: boolean <- gap-buffer-scan-done? in
693 compare empty?, 0/false
694 {
695 break-if-=
696 error trace, "unbalanced '\"'"
697 return
698 }
699 var g/eax: code-point-utf8 <- read-from-gap-buffer in
700 compare g, 0x22/double-quote
701 break-if-=
702 write-code-point-utf8 out-data, g
703 loop
704 }
705 {
706 var should-trace?/eax: boolean <- should-trace? trace
707 compare should-trace?, 0/false
708 break-if-=
709 var stream-storage: (stream byte 0x400)
710 var stream/esi: (addr stream byte) <- address stream-storage
711 write stream, "=> "
712 rewind-stream out-data
713 write-stream-immutable stream, out-data
714 trace trace, "tokenize", stream
715 }
716 }
717
718 fn next-balanced-stream-token in: (addr gap-buffer), _out: (addr token), trace: (addr trace) {
719 trace-text trace, "tokenize", "balanced stream"
720 var out/edi: (addr token) <- copy _out
721 var out-type/eax: (addr int) <- get out, type
722 copy-to *out-type, 1/stream
723 var out-data-ah/eax: (addr handle stream byte) <- get out, text-data
724 var bracket-count: int
725
726
727 populate-stream out-data-ah, 0x40000/max-definition-size=256KB
728 var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
729 var out-data/edi: (addr stream byte) <- copy _out-data
730 $next-balanced-stream-token:loop: {
731 var empty?/eax: boolean <- gap-buffer-scan-done? in
732 compare empty?, 0/false
733 {
734 break-if-=
735 error trace, "unbalanced '['"
736 return
737 }
738 var g/eax: code-point-utf8 <- read-from-gap-buffer in
739 {
740 compare g, 0x5b/open-square-bracket
741 break-if-!=
742 increment bracket-count
743 }
744 {
745 compare g, 0x5d/close-square-bracket
746 break-if-!=
747 compare bracket-count, 0
748 break-if-= $next-balanced-stream-token:loop
749 decrement bracket-count
750 }
751 write-code-point-utf8 out-data, g
752 loop
753 }
754 {
755 var should-trace?/eax: boolean <- should-trace? trace
756 compare should-trace?, 0/false
757 break-if-=
758 var stream-storage: (stream byte 0x400)
759 var stream/esi: (addr stream byte) <- address stream-storage
760 write stream, "=> "
761 rewind-stream out-data
762 write-stream-immutable stream, out-data
763 trace trace, "tokenize", stream
764 }
765 }
766
767 fn next-bracket-token g: code-point-utf8, _out: (addr token), trace: (addr trace) {
768 trace-text trace, "tokenize", "bracket"
769 var out/eax: (addr token) <- copy _out
770 var out-data-ah/eax: (addr handle stream byte) <- get out, text-data
771 populate-stream out-data-ah, 0x40
772 var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
773 var out-data/edi: (addr stream byte) <- copy _out-data
774 write-code-point-utf8 out-data, g
775 {
776 var should-trace?/eax: boolean <- should-trace? trace
777 compare should-trace?, 0/false
778 break-if-=
779 var stream-storage: (stream byte 0x40)
780 var stream/esi: (addr stream byte) <- address stream-storage
781 write stream, "=> "
782 rewind-stream out-data
783 write-stream stream, out-data
784 trace trace, "tokenize", stream
785 }
786 }
787
788 fn skip-rest-of-line in: (addr gap-buffer) {
789 {
790 var done?/eax: boolean <- gap-buffer-scan-done? in
791 compare done?, 0/false
792 break-if-!=
793 var g/eax: code-point-utf8 <- peek-from-gap-buffer in
794 compare g, 0xa/newline
795 break-if-=
796 g <- read-from-gap-buffer in
797 loop
798 }
799 }
800
801 fn next-indent-token in: (addr gap-buffer), _out: (addr token), trace: (addr trace) {
802 trace-text trace, "tokenize", "indent"
803 trace-lower trace
804 var out/edi: (addr token) <- copy _out
805 var out-type/eax: (addr int) <- get out, type
806 copy-to *out-type, 3/indent
807 var dest/edi: (addr int) <- get out, number-data
808 copy-to *dest, 0
809 {
810 var done?/eax: boolean <- gap-buffer-scan-done? in
811 compare done?, 0/false
812 break-if-!=
813 var g/eax: code-point-utf8 <- peek-from-gap-buffer in
814 {
815 {
816 var should-trace?/eax: boolean <- should-trace? trace
817 compare should-trace?, 0/false
818 }
819 break-if-=
820 var stream-storage: (stream byte 0x40)
821 var stream/esi: (addr stream byte) <- address stream-storage
822 write stream, "next: "
823 var gval/eax: int <- copy g
824 write-int32-hex stream, gval
825 trace trace, "tokenize", stream
826 }
827
828 compare g, 0x20/space
829 break-if-!=
830 g <- read-from-gap-buffer in
831 increment *dest
832 loop
833 }
834 trace-higher trace
835 {
836 var should-trace?/eax: boolean <- should-trace? trace
837 compare should-trace?, 0/false
838 break-if-=
839 var stream-storage: (stream byte 0x40)
840 var stream/esi: (addr stream byte) <- address stream-storage
841 write stream, "=> indent "
842 write-int32-hex stream, *dest
843 trace trace, "tokenize", stream
844 }
845 }
846
847
848
849
850
851
852
853
854
855
856
857
858
859 fn symbol-code-point-utf8? g: code-point-utf8 -> _/eax: boolean {
860 var whitespace?/eax: boolean <- whitespace-code-point-utf8? g
861 compare whitespace?, 0/false
862 {
863 break-if-=
864 return 0/false
865 }
866 var quote-or-unquote?/eax: boolean <- quote-or-unquote-code-point-utf8? g
867 compare quote-or-unquote?, 0/false
868 {
869 break-if-=
870 return 0/false
871 }
872 var bracket?/eax: boolean <- bracket-code-point-utf8? g
873 compare bracket?, 0/false
874 {
875 break-if-=
876 return 0/false
877 }
878 compare g, 0x23/hash
879 {
880 break-if-!=
881 return 0/false
882 }
883 compare g, 0x22/double-quote
884 {
885 break-if-!=
886 return 0/false
887 }
888 return 1/true
889 }
890
891 fn whitespace-code-point-utf8? g: code-point-utf8 -> _/eax: boolean {
892 compare g, 9/tab
893 {
894 break-if-!=
895 return 1/true
896 }
897 compare g, 0xa/newline
898 {
899 break-if-!=
900 return 1/true
901 }
902 compare g, 0x20/space
903 {
904 break-if-!=
905 return 1/true
906 }
907 return 0/false
908 }
909
910 fn quote-or-unquote-code-point-utf8? g: code-point-utf8 -> _/eax: boolean {
911 compare g, 0x27/single-quote
912 {
913 break-if-!=
914 return 1/true
915 }
916 compare g, 0x60/backquote
917 {
918 break-if-!=
919 return 1/true
920 }
921 compare g, 0x2c/comma
922 {
923 break-if-!=
924 return 1/true
925 }
926 compare g, 0x40/at-sign
927 {
928 break-if-!=
929 return 1/true
930 }
931 return 0/false
932 }
933
934 fn bracket-code-point-utf8? g: code-point-utf8 -> _/eax: boolean {
935 compare g, 0x28/open-paren
936 {
937 break-if-!=
938 return 1/true
939 }
940 compare g, 0x29/close-paren
941 {
942 break-if-!=
943 return 1/true
944 }
945 compare g, 0x5b/open-square-bracket
946 {
947 break-if-!=
948 return 1/true
949 }
950 compare g, 0x5d/close-square-bracket
951 {
952 break-if-!=
953 return 1/true
954 }
955 compare g, 0x7b/open-curly-bracket
956 {
957 break-if-!=
958 return 1/true
959 }
960 compare g, 0x7d/close-curly-bracket
961 {
962 break-if-!=
963 return 1/true
964 }
965 return 0/false
966 }
967
968 fn number-token? _self: (addr token) -> _/eax: boolean {
969 var self/eax: (addr token) <- copy _self
970 var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
971 var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
972 var in-data/ecx: (addr stream byte) <- copy _in-data
973 rewind-stream in-data
974 var g/eax: code-point-utf8 <- read-code-point-utf8 in-data
975
976 {
977 compare g, 0x2d/minus
978 break-if-!=
979 g <- read-code-point-utf8 in-data
980 }
981 {
982 {
983 var result/eax: boolean <- decimal-digit? g
984 compare result, 0/false
985 break-if-!=
986 return 0/false
987 }
988 {
989 var done?/eax: boolean <- stream-empty? in-data
990 compare done?, 0/false
991 }
992 break-if-!=
993 g <- read-code-point-utf8 in-data
994 loop
995 }
996 return 1/true
997 }
998
999 fn bracket-token? _self: (addr token) -> _/eax: boolean {
1000 var self/eax: (addr token) <- copy _self
1001 {
1002 var in-type/eax: (addr int) <- get self, type
1003 compare *in-type, 1/stream
1004 break-if-!=
1005
1006 return 0/false
1007 }
1008 var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
1009 var in-data/eax: (addr stream byte) <- lookup *in-data-ah
1010 rewind-stream in-data
1011 var g/eax: code-point-utf8 <- read-code-point-utf8 in-data
1012 var result/eax: boolean <- bracket-code-point-utf8? g
1013 return result
1014 }
1015
1016 fn quote-token? _self: (addr token) -> _/eax: boolean {
1017 var self/eax: (addr token) <- copy _self
1018 var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
1019 var in-data/eax: (addr stream byte) <- lookup *in-data-ah
1020 rewind-stream in-data
1021 var result/eax: boolean <- stream-data-equal? in-data, "'"
1022 return result
1023 }
1024
1025 fn backquote-token? _self: (addr token) -> _/eax: boolean {
1026 var self/eax: (addr token) <- copy _self
1027 var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
1028 var in-data/eax: (addr stream byte) <- lookup *in-data-ah
1029 rewind-stream in-data
1030 var result/eax: boolean <- stream-data-equal? in-data, "`"
1031 return result
1032 }
1033
1034 fn unquote-token? _self: (addr token) -> _/eax: boolean {
1035 var self/eax: (addr token) <- copy _self
1036 var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
1037 var in-data/eax: (addr stream byte) <- lookup *in-data-ah
1038 rewind-stream in-data
1039 var result/eax: boolean <- stream-data-equal? in-data, ","
1040 return result
1041 }
1042
1043 fn unquote-splice-token? _self: (addr token) -> _/eax: boolean {
1044 var self/eax: (addr token) <- copy _self
1045 var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
1046 var in-data/eax: (addr stream byte) <- lookup *in-data-ah
1047 rewind-stream in-data
1048 var result/eax: boolean <- stream-data-equal? in-data, ",@"
1049 return result
1050 }
1051
1052 fn open-paren-token? _self: (addr token) -> _/eax: boolean {
1053 var self/eax: (addr token) <- copy _self
1054 var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
1055 var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
1056 var in-data/ecx: (addr stream byte) <- copy _in-data
1057 rewind-stream in-data
1058 var g/eax: code-point-utf8 <- read-code-point-utf8 in-data
1059 compare g, 0x28/open-paren
1060 {
1061 break-if-!=
1062 var result/eax: boolean <- stream-empty? in-data
1063 return result
1064 }
1065 return 0/false
1066 }
1067
1068 fn close-paren-token? _self: (addr token) -> _/eax: boolean {
1069 var self/eax: (addr token) <- copy _self
1070 var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
1071 var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
1072 var in-data/ecx: (addr stream byte) <- copy _in-data
1073 rewind-stream in-data
1074 var g/eax: code-point-utf8 <- read-code-point-utf8 in-data
1075 compare g, 0x29/close-paren
1076 {
1077 break-if-!=
1078 var result/eax: boolean <- stream-empty? in-data
1079 return result
1080 }
1081 return 0/false
1082 }
1083
1084 fn dot-token? _self: (addr token) -> _/eax: boolean {
1085 var self/eax: (addr token) <- copy _self
1086 var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
1087 var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
1088 var in-data/ecx: (addr stream byte) <- copy _in-data
1089 rewind-stream in-data
1090 var g/eax: code-point-utf8 <- read-code-point-utf8 in-data
1091 compare g, 0x2e/dot
1092 {
1093 break-if-!=
1094 var result/eax: boolean <- stream-empty? in-data
1095 return result
1096 }
1097 return 0/false
1098 }
1099
1100 fn test-dot-token {
1101 var tmp-storage: (handle token)
1102 var tmp-ah/eax: (addr handle token) <- address tmp-storage
1103 allocate-token tmp-ah
1104 var tmp/eax: (addr token) <- lookup *tmp-ah
1105 initialize-token tmp, "."
1106 var result/eax: boolean <- dot-token? tmp
1107 check result, "F - test-dot-token"
1108 }
1109
1110 fn stream-token? _self: (addr token) -> _/eax: boolean {
1111 var self/eax: (addr token) <- copy _self
1112 var in-type/eax: (addr int) <- get self, type
1113 compare *in-type, 1/stream
1114 {
1115 break-if-=
1116 return 0/false
1117 }
1118 return 1/true
1119 }
1120
1121 fn skip-token? _self: (addr token) -> _/eax: boolean {
1122 var self/eax: (addr token) <- copy _self
1123 var in-type/eax: (addr int) <- get self, type
1124 compare *in-type, 2/skip
1125 {
1126 break-if-=
1127 return 0/false
1128 }
1129 return 1/true
1130 }
1131
1132 fn indent-token? _self: (addr token) -> _/eax: boolean {
1133 var self/eax: (addr token) <- copy _self
1134 var in-type/eax: (addr int) <- get self, type
1135 compare *in-type, 3/indent
1136 {
1137 break-if-=
1138 return 0/false
1139 }
1140 return 1/true
1141 }
1142
1143 fn allocate-token _self-ah: (addr handle token) {
1144 var self-ah/eax: (addr handle token) <- copy _self-ah
1145 allocate self-ah
1146 var self/eax: (addr token) <- lookup *self-ah
1147 var dest-ah/eax: (addr handle stream byte) <- get self, text-data
1148 populate-stream dest-ah, 0x40/max-symbol-size
1149 }
1150
1151 fn initialize-token _self: (addr token), val: (addr array byte) {
1152 var self/eax: (addr token) <- copy _self
1153 var dest-ah/eax: (addr handle stream byte) <- get self, text-data
1154 populate-stream dest-ah, 0x40
1155 var dest/eax: (addr stream byte) <- lookup *dest-ah
1156 write dest, val
1157 }
1158
1159 fn initialize-skip-token _self: (addr token) {
1160 var self/eax: (addr token) <- copy _self
1161 var self-type/eax: (addr int) <- get self, type
1162 copy-to *self-type, 2/skip
1163 }
1164
1165 fn write-token-text-data out: (addr stream byte), _self: (addr token) {
1166 var self/eax: (addr token) <- copy _self
1167 var data-ah/eax: (addr handle stream byte) <- get self, text-data
1168 var data/eax: (addr stream byte) <- lookup *data-ah
1169 rewind-stream data
1170 write-stream out, data
1171 }
1172
1173 fn tokens-equal? _a: (addr token), _b: (addr token) -> _/eax: boolean {
1174 var a/edx: (addr token) <- copy _a
1175 var b/ebx: (addr token) <- copy _b
1176 var a-type-addr/eax: (addr int) <- get a, type
1177 var a-type/eax: int <- copy *a-type-addr
1178 var b-type-addr/ecx: (addr int) <- get b, type
1179 compare a-type, *b-type-addr
1180 {
1181 break-if-=
1182 return 0/false
1183 }
1184 compare a-type, 2/skip
1185 {
1186 break-if-!=
1187
1188 return 1/true
1189 }
1190 compare a-type, 3/indent
1191 {
1192 break-if-!=
1193
1194 var a-number-data-addr/eax: (addr int) <- get a, number-data
1195 var a-number-data/eax: int <- copy *a-number-data-addr
1196 var b-number-data-addr/ecx: (addr int) <- get b, number-data
1197 compare a-number-data, *b-number-data-addr
1198 {
1199 break-if-=
1200 return 0/false
1201 }
1202 return 1/true
1203 }
1204 var b-data-ah/eax: (addr handle stream byte) <- get b, text-data
1205 var _b-data/eax: (addr stream byte) <- lookup *b-data-ah
1206 var b-data/ebx: (addr stream byte) <- copy _b-data
1207 var a-data-ah/eax: (addr handle stream byte) <- get a, text-data
1208 var a-data/eax: (addr stream byte) <- lookup *a-data-ah
1209 var data-match?/eax: boolean <- streams-data-equal? a-data, b-data
1210 return data-match?
1211 }
1212
1213 fn dump-token-from-cursor _t: (addr token) {
1214 var t/esi: (addr token) <- copy _t
1215 var type/eax: (addr int) <- get t, type
1216 draw-int32-decimal-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, *type, 7/fg 0/bg
1217 draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, " ", 7/fg 0/bg
1218 var text-ah/eax: (addr handle stream byte) <- get t, text-data
1219 var text/eax: (addr stream byte) <- lookup *text-ah
1220 rewind-stream text
1221 draw-stream-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, text, 7/fg 0/bg
1222 draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, " ", 7/fg 0/bg
1223 var num/eax: (addr int) <- get t, number-data
1224 draw-int32-decimal-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, *num, 7/fg 0/bg
1225 draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "\n", 7/fg 0/bg
1226 }