https://github.com/akkartik/mu/blob/main/shell/tokenize.mu
1
2
3 type token {
4 type: int
5
6
7 text-data: (handle stream byte)
8
9
10 number-data: int
11 }
12
13 fn tokenize in: (addr gap-buffer), out: (addr stream token), trace: (addr trace) {
14 trace-text trace, "tokenize", "tokenize"
15 trace-lower trace
16 rewind-gap-buffer in
17 var at-start-of-line?/edi: boolean <- copy 1/true
18 {
19 var done?/eax: boolean <- gap-buffer-scan-done? in
20 compare done?, 0/false
21 break-if-!=
22
23 var token-storage: token
24 var token/edx: (addr token) <- address token-storage
25 at-start-of-line? <- next-token in, token, at-start-of-line?, trace
26 var error?/eax: boolean <- has-errors? trace
27 compare error?, 0/false
28 {
29 break-if-=
30 return
31 }
32 var skip?/eax: boolean <- skip-token? token
33 compare skip?, 0/false
34 loop-if-!=
35 write-to-stream out, token
36 loop
37 }
38 trace-higher trace
39 }
40
41 fn test-tokenize-number {
42 var in-storage: gap-buffer
43 var in/esi: (addr gap-buffer) <- address in-storage
44 initialize-gap-buffer-with in, "123 a"
45
46 var stream-storage: (stream token 0x10)
47 var stream/edi: (addr stream token) <- address stream-storage
48
49 var trace-storage: trace
50 var trace/edx: (addr trace) <- address trace-storage
51 initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
52 tokenize in, stream, trace
53
54 var curr-token-storage: token
55 var curr-token/ebx: (addr token) <- address curr-token-storage
56 read-from-stream stream, curr-token
57 var curr-token-type/eax: (addr int) <- get curr-token, type
58 check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-number/before-indent-type"
59 var curr-token-data/eax: (addr int) <- get curr-token, number-data
60 check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-number/before-indent"
61 read-from-stream stream, curr-token
62 var number?/eax: boolean <- number-token? curr-token
63 check number?, "F - test-tokenize-number"
64 var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
65 var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
66 check-stream-equal curr-token-data, "123", "F - test-tokenize-number: value"
67 }
68
69 fn test-tokenize-negative-number {
70 var in-storage: gap-buffer
71 var in/esi: (addr gap-buffer) <- address in-storage
72 initialize-gap-buffer-with in, "-123 a"
73
74 var stream-storage: (stream token 0x10)
75 var stream/edi: (addr stream token) <- address stream-storage
76
77 var trace-storage: trace
78 var trace/edx: (addr trace) <- address trace-storage
79 initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
80 tokenize in, stream, trace
81
82 var curr-token-storage: token
83 var curr-token/ebx: (addr token) <- address curr-token-storage
84 read-from-stream stream, curr-token
85 var curr-token-type/eax: (addr int) <- get curr-token, type
86 check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-negative-number/before-indent-type"
87 var curr-token-data/eax: (addr int) <- get curr-token, number-data
88 check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-negative-number/before-indent"
89 read-from-stream stream, curr-token
90 var number?/eax: boolean <- number-token? curr-token
91 check number?, "F - test-tokenize-negative-number"
92 var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
93 var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
94 check-stream-equal curr-token-data, "-123", "F - test-tokenize-negative-number: value"
95 }
96
97 fn test-tokenize-quote {
98 var in-storage: gap-buffer
99 var in/esi: (addr gap-buffer) <- address in-storage
100 initialize-gap-buffer-with in, "'(a)"
101
102 var stream-storage: (stream token 0x10)
103 var stream/edi: (addr stream token) <- address stream-storage
104
105 var trace-storage: trace
106 var trace/edx: (addr trace) <- address trace-storage
107 initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
108 tokenize in, stream, trace
109
110 var curr-token-storage: token
111 var curr-token/ebx: (addr token) <- address curr-token-storage
112 read-from-stream stream, curr-token
113 var curr-token-type/eax: (addr int) <- get curr-token, type
114 check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-quote/before-indent-type"
115 var curr-token-data/eax: (addr int) <- get curr-token, number-data
116 check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-quote/before-indent"
117 read-from-stream stream, curr-token
118 var quote?/eax: boolean <- quote-token? curr-token
119 check quote?, "F - test-tokenize-quote: quote"
120 read-from-stream stream, curr-token
121 var open-paren?/eax: boolean <- open-paren-token? curr-token
122 check open-paren?, "F - test-tokenize-quote: open paren"
123 read-from-stream stream, curr-token
124 read-from-stream stream, curr-token
125 var close-paren?/eax: boolean <- close-paren-token? curr-token
126 check close-paren?, "F - test-tokenize-quote: close paren"
127 }
128
129 fn test-tokenize-backquote {
130 var in-storage: gap-buffer
131 var in/esi: (addr gap-buffer) <- address in-storage
132 initialize-gap-buffer-with in, "`(a)"
133
134 var stream-storage: (stream token 0x10)
135 var stream/edi: (addr stream token) <- address stream-storage
136
137 var trace-storage: trace
138 var trace/edx: (addr trace) <- address trace-storage
139 initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
140 tokenize in, stream, trace
141
142 var curr-token-storage: token
143 var curr-token/ebx: (addr token) <- address curr-token-storage
144 read-from-stream stream, curr-token
145 var curr-token-type/eax: (addr int) <- get curr-token, type
146 check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-backquote/before-indent-type"
147 var curr-token-data/eax: (addr int) <- get curr-token, number-data
148 check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-backquote/before-indent"
149 read-from-stream stream, curr-token
150 var backquote?/eax: boolean <- backquote-token? curr-token
151 check backquote?, "F - test-tokenize-backquote: backquote"
152 read-from-stream stream, curr-token
153 var open-paren?/eax: boolean <- open-paren-token? curr-token
154 check open-paren?, "F - test-tokenize-backquote: open paren"
155 read-from-stream stream, curr-token
156 read-from-stream stream, curr-token
157 var close-paren?/eax: boolean <- close-paren-token? curr-token
158 check close-paren?, "F - test-tokenize-backquote: close paren"
159 }
160
161 fn test-tokenize-unquote {
162 var in-storage: gap-buffer
163 var in/esi: (addr gap-buffer) <- address in-storage
164 initialize-gap-buffer-with in, ",(a)"
165
166 var stream-storage: (stream token 0x10)
167 var stream/edi: (addr stream token) <- address stream-storage
168
169 var trace-storage: trace
170 var trace/edx: (addr trace) <- address trace-storage
171 initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
172 tokenize in, stream, trace
173
174 var curr-token-storage: token
175 var curr-token/ebx: (addr token) <- address curr-token-storage
176 read-from-stream stream, curr-token
177 var curr-token-type/eax: (addr int) <- get curr-token, type
178 check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-unquote/before-indent-type"
179 var curr-token-data/eax: (addr int) <- get curr-token, number-data
180 check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-unquote/before-indent"
181 read-from-stream stream, curr-token
182 var unquote?/eax: boolean <- unquote-token? curr-token
183 check unquote?, "F - test-tokenize-unquote: unquote"
184 read-from-stream stream, curr-token
185 var open-paren?/eax: boolean <- open-paren-token? curr-token
186 check open-paren?, "F - test-tokenize-unquote: open paren"
187 read-from-stream stream, curr-token
188 read-from-stream stream, curr-token
189 var close-paren?/eax: boolean <- close-paren-token? curr-token
190 check close-paren?, "F - test-tokenize-unquote: close paren"
191 }
192
193 fn test-tokenize-unquote-splice {
194 var in-storage: gap-buffer
195 var in/esi: (addr gap-buffer) <- address in-storage
196 initialize-gap-buffer-with in, ",@a"
197
198 var stream-storage: (stream token 0x10)
199 var stream/edi: (addr stream token) <- address stream-storage
200
201 var trace-storage: trace
202 var trace/edx: (addr trace) <- address trace-storage
203 initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
204 tokenize in, stream, trace
205
206 var curr-token-storage: token
207 var curr-token/ebx: (addr token) <- address curr-token-storage
208 read-from-stream stream, curr-token
209 var curr-token-type/eax: (addr int) <- get curr-token, type
210 check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-unquote-splice/before-indent-type"
211 var curr-token-data/eax: (addr int) <- get curr-token, number-data
212 check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-unquote-splice/before-indent"
213 read-from-stream stream, curr-token
214 var unquote-splice?/eax: boolean <- unquote-splice-token? curr-token
215 check unquote-splice?, "F - test-tokenize-unquote-splice: unquote-splice"
216 }
217
218 fn test-tokenize-dotted-list {
219 var in-storage: gap-buffer
220 var in/esi: (addr gap-buffer) <- address in-storage
221 initialize-gap-buffer-with in, "(a . b)"
222
223 var stream-storage: (stream token 0x10)
224 var stream/edi: (addr stream token) <- address stream-storage
225
226 var trace-storage: trace
227 var trace/edx: (addr trace) <- address trace-storage
228 initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
229 tokenize in, stream, trace
230
231 var curr-token-storage: token
232 var curr-token/ebx: (addr token) <- address curr-token-storage
233 read-from-stream stream, curr-token
234 var curr-token-type/eax: (addr int) <- get curr-token, type
235 check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-dotted-list/before-indent-type"
236 var curr-token-data/eax: (addr int) <- get curr-token, number-data
237 check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-dotted-list/before-indent"
238 read-from-stream stream, curr-token
239 var open-paren?/eax: boolean <- open-paren-token? curr-token
240 check open-paren?, "F - test-tokenize-dotted-list: open paren"
241 read-from-stream stream, curr-token
242 read-from-stream stream, curr-token
243 var dot?/eax: boolean <- dot-token? curr-token
244 check dot?, "F - test-tokenize-dotted-list: dot"
245 read-from-stream stream, curr-token
246 read-from-stream stream, curr-token
247 var close-paren?/eax: boolean <- close-paren-token? curr-token
248 check close-paren?, "F - test-tokenize-dotted-list: close paren"
249 }
250
251 fn test-tokenize-stream-literal {
252 var in-storage: gap-buffer
253 var in/esi: (addr gap-buffer) <- address in-storage
254 initialize-gap-buffer-with in, "[abc def]"
255
256 var stream-storage: (stream token 0x10)
257 var stream/edi: (addr stream token) <- address stream-storage
258
259 var trace-storage: trace
260 var trace/edx: (addr trace) <- address trace-storage
261 initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
262 tokenize in, stream, trace
263
264 var curr-token-storage: token
265 var curr-token/ebx: (addr token) <- address curr-token-storage
266 read-from-stream stream, curr-token
267 var curr-token-type/eax: (addr int) <- get curr-token, type
268 check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-stream-literal/before-indent-type"
269 var curr-token-data/eax: (addr int) <- get curr-token, number-data
270 check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-stream-literal/before-indent"
271 read-from-stream stream, curr-token
272 var stream?/eax: boolean <- stream-token? curr-token
273 check stream?, "F - test-tokenize-stream-literal: type"
274 var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
275 var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
276 var data-equal?/eax: boolean <- stream-data-equal? curr-token-data, "abc def"
277 check data-equal?, "F - test-tokenize-stream-literal"
278 var empty?/eax: boolean <- stream-empty? stream
279 check empty?, "F - test-tokenize-stream-literal: empty?"
280 }
281
282 fn test-tokenize-stream-literal-in-tree {
283 var in-storage: gap-buffer
284 var in/esi: (addr gap-buffer) <- address in-storage
285 initialize-gap-buffer-with in, "([abc def])"
286
287 var stream-storage: (stream token 0x10)
288 var stream/edi: (addr stream token) <- address stream-storage
289
290 var trace-storage: trace
291 var trace/edx: (addr trace) <- address trace-storage
292 initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
293 tokenize in, stream, trace
294
295 var curr-token-storage: token
296 var curr-token/ebx: (addr token) <- address curr-token-storage
297 read-from-stream stream, curr-token
298 var curr-token-type/eax: (addr int) <- get curr-token, type
299 check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-stream-literal-in-tree/before-indent-type"
300 var curr-token-data/eax: (addr int) <- get curr-token, number-data
301 check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-stream-literal-in-tree/before-indent"
302 read-from-stream stream, curr-token
303 var bracket?/eax: boolean <- bracket-token? curr-token
304 check bracket?, "F - test-tokenize-stream-literal-in-tree: open paren"
305 read-from-stream stream, curr-token
306 var stream?/eax: boolean <- stream-token? curr-token
307 check stream?, "F - test-tokenize-stream-literal-in-tree: type"
308 var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
309 var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
310 var data-equal?/eax: boolean <- stream-data-equal? curr-token-data, "abc def"
311 check data-equal?, "F - test-tokenize-stream-literal-in-tree"
312 read-from-stream stream, curr-token
313 var bracket?/eax: boolean <- bracket-token? curr-token
314 check bracket?, "F - test-tokenize-stream-literal-in-tree: close paren"
315 var empty?/eax: boolean <- stream-empty? stream
316 check empty?, "F - test-tokenize-stream-literal-in-tree: empty?"
317 }
318
319 fn test-tokenize-indent {
320 var in-storage: gap-buffer
321 var in/esi: (addr gap-buffer) <- address in-storage
322 initialize-gap-buffer-with in, "abc\n def"
323
324 var stream-storage: (stream token 0x10)
325 var stream/edi: (addr stream token) <- address stream-storage
326
327 var trace-storage: trace
328 var trace/edx: (addr trace) <- address trace-storage
329 initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
330 tokenize in, stream, trace
331
332 var curr-token-storage: token
333 var curr-token/ebx: (addr token) <- address curr-token-storage
334 read-from-stream stream, curr-token
335 var curr-token-type/eax: (addr int) <- get curr-token, type
336 check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-indent/before-indent-type"
337 var curr-token-data/eax: (addr int) <- get curr-token, number-data
338 check-ints-equal *curr-token-data, 0/spaces, "F - test-tokenize-indent/before-indent"
339 read-from-stream stream, curr-token
340 var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
341 var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
342 check-stream-equal curr-token-data, "abc", "F - test-tokenize-indent/before"
343
344 read-from-stream stream, curr-token
345 var curr-token-type/eax: (addr int) <- get curr-token, type
346 check-ints-equal *curr-token-type, 3/indent, "F - test-tokenize-indent/type"
347 var curr-token-data/eax: (addr int) <- get curr-token, number-data
348 check-ints-equal *curr-token-data, 2/spaces, "F - test-tokenize-indent"
349
350 read-from-stream stream, curr-token
351 var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
352 var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
353 check-stream-equal curr-token-data, "def", "F - test-tokenize-indent/after"
354 }
355
356
357
358 fn next-token in: (addr gap-buffer), out: (addr token), start-of-line?: boolean, trace: (addr trace) -> _/edi: boolean {
359 trace-text trace, "tokenize", "next-token"
360 trace-lower trace
361
362 {
363 compare start-of-line?, 0/false
364 break-if-=
365 next-indent-token in, out, trace
366 }
367 skip-spaces-from-gap-buffer in
368 var g/eax: grapheme <- peek-from-gap-buffer in
369 {
370 compare g, 0x23/comment
371 break-if-!=
372 skip-rest-of-line in
373 }
374 var g/eax: grapheme <- peek-from-gap-buffer in
375 {
376 compare g, 0xa/newline
377 break-if-!=
378 trace-text trace, "tokenize", "newline"
379 g <- read-from-gap-buffer in
380 initialize-skip-token out
381 trace-higher trace
382 return 1/at-start-of-line
383 }
384 {
385 compare start-of-line?, 0/false
386 break-if-=
387
388 trace-higher trace
389 return 0/not-at-start-of-line
390 }
391 {
392 var done?/eax: boolean <- gap-buffer-scan-done? in
393 compare done?, 0/false
394 break-if-=
395 trace-text trace, "tokenize", "end"
396 initialize-skip-token out
397 trace-higher trace
398 return 1/at-start-of-line
399 }
400 var _g/eax: grapheme <- peek-from-gap-buffer in
401 var g/ecx: grapheme <- copy _g
402 {
403 var should-trace?/eax: boolean <- should-trace? trace
404 compare should-trace?, 0/false
405 break-if-=
406 var stream-storage: (stream byte 0x40)
407 var stream/esi: (addr stream byte) <- address stream-storage
408 write stream, "next: "
409 var gval/eax: int <- copy g
410 write-int32-hex stream, gval
411 trace trace, "tokenize", stream
412 }
413 $next-token:case: {
414
415 {
416 compare g, 0x5b/open-square-bracket
417 break-if-!=
418 var dummy/eax: grapheme <- read-from-gap-buffer in
419 next-stream-token in, out, trace
420 break $next-token:case
421 }
422
423 {
424 var symbol?/eax: boolean <- symbol-grapheme? g
425 compare symbol?, 0/false
426 break-if-=
427 next-symbol-token in, out, trace
428 break $next-token:case
429 }
430
431 {
432 compare g, 0x5d/close-square-bracket
433 break-if-!=
434 error trace, "unbalanced ']'"
435 return start-of-line?
436 }
437
438 {
439 var bracket?/eax: boolean <- bracket-grapheme? g
440 compare bracket?, 0/false
441 break-if-=
442 var g/eax: grapheme <- read-from-gap-buffer in
443 next-bracket-token g, out, trace
444 break $next-token:case
445 }
446
447 {
448 compare g, 0x27/single-quote
449 break-if-!=
450 var g/eax: grapheme <- read-from-gap-buffer in
451 initialize-token out, "'"
452 break $next-token:case
453 }
454
455 {
456 compare g, 0x60/backquote
457 break-if-!=
458 var g/eax: grapheme <- read-from-gap-buffer in
459 initialize-token out, "`"
460 break $next-token:case
461 }
462
463 {
464 compare g, 0x2c/comma
465 break-if-!=
466 var g/eax: grapheme <- read-from-gap-buffer in
467
468 {
469 g <- peek-from-gap-buffer in
470 compare g, 0x40/at-sign
471 break-if-!=
472 g <- read-from-gap-buffer in
473 initialize-token out, ",@"
474 break $next-token:case
475 }
476 initialize-token out, ","
477 break $next-token:case
478 }
479 set-cursor-position 0/screen, 0x40 0x20
480 {
481 var foo/eax: int <- copy g
482 draw-int32-decimal-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, foo, 7/fg 0/bg
483 }
484 abort "unknown token type"
485 }
486 trace-higher trace
487 {
488 var should-trace?/eax: boolean <- should-trace? trace
489 compare should-trace?, 0/false
490 break-if-=
491 var stream-storage: (stream byte 0x400)
492 var stream/eax: (addr stream byte) <- address stream-storage
493 write stream, "=> "
494 write-token-text-data stream, out
495 trace trace, "tokenize", stream
496 }
497 return start-of-line?
498 }
499
500 fn next-symbol-token in: (addr gap-buffer), _out: (addr token), trace: (addr trace) {
501 trace-text trace, "tokenize", "looking for a symbol"
502 trace-lower trace
503 var out/eax: (addr token) <- copy _out
504 var out-data-ah/eax: (addr handle stream byte) <- get out, text-data
505 populate-stream out-data-ah, 0x40/max-symbol-size
506 var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
507 var out-data/edi: (addr stream byte) <- copy _out-data
508 $next-symbol-token:loop: {
509 var done?/eax: boolean <- gap-buffer-scan-done? in
510 compare done?, 0/false
511 break-if-!=
512 var g/eax: grapheme <- peek-from-gap-buffer in
513 {
514 {
515 var should-trace?/eax: boolean <- should-trace? trace
516 compare should-trace?, 0/false
517 }
518 break-if-=
519 var stream-storage: (stream byte 0x40)
520 var stream/esi: (addr stream byte) <- address stream-storage
521 write stream, "next: "
522 var gval/eax: int <- copy g
523 write-int32-hex stream, gval
524 trace trace, "tokenize", stream
525 }
526
527 {
528 var symbol-grapheme?/eax: boolean <- symbol-grapheme? g
529 compare symbol-grapheme?, 0/false
530 break-if-!=
531 trace-text trace, "tokenize", "stop"
532 break $next-symbol-token:loop
533 }
534 var g/eax: grapheme <- read-from-gap-buffer in
535 write-grapheme out-data, g
536 loop
537 }
538 trace-higher trace
539 {
540 var should-trace?/eax: boolean <- should-trace? trace
541 compare should-trace?, 0/false
542 break-if-=
543 var stream-storage: (stream byte 0x40)
544 var stream/esi: (addr stream byte) <- address stream-storage
545 write stream, "=> "
546 rewind-stream out-data
547 write-stream stream, out-data
548 trace trace, "tokenize", stream
549 }
550 }
551
552 fn next-number-token in: (addr gap-buffer), _out: (addr token), trace: (addr trace) {
553 trace-text trace, "tokenize", "looking for a number"
554 trace-lower trace
555 var out/eax: (addr token) <- copy _out
556 var out-data-ah/eax: (addr handle stream byte) <- get out, text-data
557 populate-stream out-data-ah, 0x40
558 var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
559 var out-data/edi: (addr stream byte) <- copy _out-data
560 $next-number-token:check-minus: {
561 var g/eax: grapheme <- peek-from-gap-buffer in
562 compare g, 0x2d/minus
563 g <- read-from-gap-buffer in
564 write-grapheme out-data, g
565 }
566 $next-number-token:loop: {
567 var done?/eax: boolean <- gap-buffer-scan-done? in
568 compare done?, 0/false
569 break-if-!=
570 var g/eax: grapheme <- peek-from-gap-buffer in
571 {
572 {
573 var should-trace?/eax: boolean <- should-trace? trace
574 compare should-trace?, 0/false
575 }
576 break-if-=
577 var stream-storage: (stream byte 0x40)
578 var stream/esi: (addr stream byte) <- address stream-storage
579 write stream, "next: "
580 var gval/eax: int <- copy g
581 write-int32-hex stream, gval
582 trace trace, "tokenize", stream
583 }
584
585 {
586 var symbol-grapheme?/eax: boolean <- symbol-grapheme? g
587 compare symbol-grapheme?, 0/false
588 break-if-!=
589 trace-text trace, "tokenize", "stop"
590 break $next-number-token:loop
591 }
592
593 {
594 var digit?/eax: boolean <- decimal-digit? g
595 compare digit?, 0/false
596 break-if-!=
597 error trace, "invalid number"
598 return
599 }
600 trace-text trace, "tokenize", "append"
601 var g/eax: grapheme <- read-from-gap-buffer in
602 write-grapheme out-data, g
603 loop
604 }
605 trace-higher trace
606 }
607
608 fn next-stream-token in: (addr gap-buffer), _out: (addr token), trace: (addr trace) {
609 trace-text trace, "tokenize", "stream"
610 var out/edi: (addr token) <- copy _out
611 var out-type/eax: (addr int) <- get out, type
612 copy-to *out-type, 1/stream
613 var out-data-ah/eax: (addr handle stream byte) <- get out, text-data
614
615
616 populate-stream out-data-ah, 0x400/max-definition-size=1KB
617 var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
618 var out-data/edi: (addr stream byte) <- copy _out-data
619 {
620 var empty?/eax: boolean <- gap-buffer-scan-done? in
621 compare empty?, 0/false
622 {
623 break-if-=
624 error trace, "unbalanced '['"
625 return
626 }
627 var g/eax: grapheme <- read-from-gap-buffer in
628 compare g, 0x5d/close-square-bracket
629 break-if-=
630 write-grapheme out-data, g
631 loop
632 }
633 {
634 var should-trace?/eax: boolean <- should-trace? trace
635 compare should-trace?, 0/false
636 break-if-=
637 var stream-storage: (stream byte 0x400)
638 var stream/esi: (addr stream byte) <- address stream-storage
639 write stream, "=> "
640 rewind-stream out-data
641 write-stream-immutable stream, out-data
642 trace trace, "tokenize", stream
643 }
644 }
645
646 fn next-bracket-token g: grapheme, _out: (addr token), trace: (addr trace) {
647 trace-text trace, "tokenize", "bracket"
648 var out/eax: (addr token) <- copy _out
649 var out-data-ah/eax: (addr handle stream byte) <- get out, text-data
650 populate-stream out-data-ah, 0x40
651 var _out-data/eax: (addr stream byte) <- lookup *out-data-ah
652 var out-data/edi: (addr stream byte) <- copy _out-data
653 write-grapheme out-data, g
654 {
655 var should-trace?/eax: boolean <- should-trace? trace
656 compare should-trace?, 0/false
657 break-if-=
658 var stream-storage: (stream byte 0x40)
659 var stream/esi: (addr stream byte) <- address stream-storage
660 write stream, "=> "
661 rewind-stream out-data
662 write-stream stream, out-data
663 trace trace, "tokenize", stream
664 }
665 }
666
667 fn skip-rest-of-line in: (addr gap-buffer) {
668 {
669 var done?/eax: boolean <- gap-buffer-scan-done? in
670 compare done?, 0/false
671 break-if-!=
672 var g/eax: grapheme <- peek-from-gap-buffer in
673 compare g, 0xa/newline
674 break-if-=
675 g <- read-from-gap-buffer in
676 loop
677 }
678 }
679
680 fn next-indent-token in: (addr gap-buffer), _out: (addr token), trace: (addr trace) {
681 trace-text trace, "tokenize", "indent"
682 trace-lower trace
683 var out/edi: (addr token) <- copy _out
684 var out-type/eax: (addr int) <- get out, type
685 copy-to *out-type, 3/indent
686 var dest/edi: (addr int) <- get out, number-data
687 copy-to *dest, 0
688 {
689 var done?/eax: boolean <- gap-buffer-scan-done? in
690 compare done?, 0/false
691 break-if-!=
692 var g/eax: grapheme <- peek-from-gap-buffer in
693 {
694 {
695 var should-trace?/eax: boolean <- should-trace? trace
696 compare should-trace?, 0/false
697 }
698 break-if-=
699 var stream-storage: (stream byte 0x40)
700 var stream/esi: (addr stream byte) <- address stream-storage
701 write stream, "next: "
702 var gval/eax: int <- copy g
703 write-int32-hex stream, gval
704 trace trace, "tokenize", stream
705 }
706
707 compare g, 0x20/space
708 break-if-!=
709 g <- read-from-gap-buffer in
710 increment *dest
711 loop
712 }
713 trace-higher trace
714 {
715 var should-trace?/eax: boolean <- should-trace? trace
716 compare should-trace?, 0/false
717 break-if-=
718 var stream-storage: (stream byte 0x40)
719 var stream/esi: (addr stream byte) <- address stream-storage
720 write stream, "=> indent "
721 write-int32-hex stream, *dest
722 trace trace, "tokenize", stream
723 }
724 }
725
726
727
728
729
730
731
732
733
734
735
736
737
738 fn symbol-grapheme? g: grapheme -> _/eax: boolean {
739 var whitespace?/eax: boolean <- whitespace-grapheme? g
740 compare whitespace?, 0/false
741 {
742 break-if-=
743 return 0/false
744 }
745 var quote-or-unquote?/eax: boolean <- quote-or-unquote-grapheme? g
746 compare quote-or-unquote?, 0/false
747 {
748 break-if-=
749 return 0/false
750 }
751 var bracket?/eax: boolean <- bracket-grapheme? g
752 compare bracket?, 0/false
753 {
754 break-if-=
755 return 0/false
756 }
757 compare g, 0x23/hash
758 {
759 break-if-!=
760 return 0/false
761 }
762 compare g, 0x22/double-quote
763 {
764 break-if-!=
765 return 0/false
766 }
767 return 1/true
768 }
769
770 fn whitespace-grapheme? g: grapheme -> _/eax: boolean {
771 compare g, 9/tab
772 {
773 break-if-!=
774 return 1/true
775 }
776 compare g, 0xa/newline
777 {
778 break-if-!=
779 return 1/true
780 }
781 compare g, 0x20/space
782 {
783 break-if-!=
784 return 1/true
785 }
786 return 0/false
787 }
788
789 fn quote-or-unquote-grapheme? g: grapheme -> _/eax: boolean {
790 compare g, 0x27/single-quote
791 {
792 break-if-!=
793 return 1/true
794 }
795 compare g, 0x60/backquote
796 {
797 break-if-!=
798 return 1/true
799 }
800 compare g, 0x2c/comma
801 {
802 break-if-!=
803 return 1/true
804 }
805 compare g, 0x40/at-sign
806 {
807 break-if-!=
808 return 1/true
809 }
810 return 0/false
811 }
812
813 fn bracket-grapheme? g: grapheme -> _/eax: boolean {
814 compare g, 0x28/open-paren
815 {
816 break-if-!=
817 return 1/true
818 }
819 compare g, 0x29/close-paren
820 {
821 break-if-!=
822 return 1/true
823 }
824 compare g, 0x5b/open-square-bracket
825 {
826 break-if-!=
827 return 1/true
828 }
829 compare g, 0x5d/close-square-bracket
830 {
831 break-if-!=
832 return 1/true
833 }
834 compare g, 0x7b/open-curly-bracket
835 {
836 break-if-!=
837 return 1/true
838 }
839 compare g, 0x7d/close-curly-bracket
840 {
841 break-if-!=
842 return 1/true
843 }
844 return 0/false
845 }
846
847 fn number-token? _self: (addr token) -> _/eax: boolean {
848 var self/eax: (addr token) <- copy _self
849 var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
850 var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
851 var in-data/ecx: (addr stream byte) <- copy _in-data
852 rewind-stream in-data
853 var g/eax: grapheme <- read-grapheme in-data
854
855 {
856 compare g, 0x2d/minus
857 break-if-!=
858 g <- read-grapheme in-data
859 }
860 {
861 {
862 var result/eax: boolean <- decimal-digit? g
863 compare result, 0/false
864 break-if-!=
865 return 0/false
866 }
867 {
868 var done?/eax: boolean <- stream-empty? in-data
869 compare done?, 0/false
870 }
871 break-if-!=
872 g <- read-grapheme in-data
873 loop
874 }
875 return 1/true
876 }
877
878 fn bracket-token? _self: (addr token) -> _/eax: boolean {
879 var self/eax: (addr token) <- copy _self
880 {
881 var in-type/eax: (addr int) <- get self, type
882 compare *in-type, 1/stream
883 break-if-!=
884
885 return 0/false
886 }
887 var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
888 var in-data/eax: (addr stream byte) <- lookup *in-data-ah
889 rewind-stream in-data
890 var g/eax: grapheme <- read-grapheme in-data
891 var result/eax: boolean <- bracket-grapheme? g
892 return result
893 }
894
895 fn quote-token? _self: (addr token) -> _/eax: boolean {
896 var self/eax: (addr token) <- copy _self
897 var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
898 var in-data/eax: (addr stream byte) <- lookup *in-data-ah
899 rewind-stream in-data
900 var result/eax: boolean <- stream-data-equal? in-data, "'"
901 return result
902 }
903
904 fn backquote-token? _self: (addr token) -> _/eax: boolean {
905 var self/eax: (addr token) <- copy _self
906 var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
907 var in-data/eax: (addr stream byte) <- lookup *in-data-ah
908 rewind-stream in-data
909 var result/eax: boolean <- stream-data-equal? in-data, "`"
910 return result
911 }
912
913 fn unquote-token? _self: (addr token) -> _/eax: boolean {
914 var self/eax: (addr token) <- copy _self
915 var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
916 var in-data/eax: (addr stream byte) <- lookup *in-data-ah
917 rewind-stream in-data
918 var result/eax: boolean <- stream-data-equal? in-data, ","
919 return result
920 }
921
922 fn unquote-splice-token? _self: (addr token) -> _/eax: boolean {
923 var self/eax: (addr token) <- copy _self
924 var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
925 var in-data/eax: (addr stream byte) <- lookup *in-data-ah
926 rewind-stream in-data
927 var result/eax: boolean <- stream-data-equal? in-data, ",@"
928 return result
929 }
930
931 fn open-paren-token? _self: (addr token) -> _/eax: boolean {
932 var self/eax: (addr token) <- copy _self
933 var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
934 var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
935 var in-data/ecx: (addr stream byte) <- copy _in-data
936 rewind-stream in-data
937 var g/eax: grapheme <- read-grapheme in-data
938 compare g, 0x28/open-paren
939 {
940 break-if-!=
941 var result/eax: boolean <- stream-empty? in-data
942 return result
943 }
944 return 0/false
945 }
946
947 fn close-paren-token? _self: (addr token) -> _/eax: boolean {
948 var self/eax: (addr token) <- copy _self
949 var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
950 var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
951 var in-data/ecx: (addr stream byte) <- copy _in-data
952 rewind-stream in-data
953 var g/eax: grapheme <- read-grapheme in-data
954 compare g, 0x29/close-paren
955 {
956 break-if-!=
957 var result/eax: boolean <- stream-empty? in-data
958 return result
959 }
960 return 0/false
961 }
962
963 fn dot-token? _self: (addr token) -> _/eax: boolean {
964 var self/eax: (addr token) <- copy _self
965 var in-data-ah/eax: (addr handle stream byte) <- get self, text-data
966 var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
967 var in-data/ecx: (addr stream byte) <- copy _in-data
968 rewind-stream in-data
969 var g/eax: grapheme <- read-grapheme in-data
970 compare g, 0x2e/dot
971 {
972 break-if-!=
973 var result/eax: boolean <- stream-empty? in-data
974 return result
975 }
976 return 0/false
977 }
978
979 fn test-dot-token {
980 var tmp-storage: (handle token)
981 var tmp-ah/eax: (addr handle token) <- address tmp-storage
982 allocate-token tmp-ah
983 var tmp/eax: (addr token) <- lookup *tmp-ah
984 initialize-token tmp, "."
985 var result/eax: boolean <- dot-token? tmp
986 check result, "F - test-dot-token"
987 }
988
989 fn stream-token? _self: (addr token) -> _/eax: boolean {
990 var self/eax: (addr token) <- copy _self
991 var in-type/eax: (addr int) <- get self, type
992 compare *in-type, 1/stream
993 {
994 break-if-=
995 return 0/false
996 }
997 return 1/true
998 }
999
1000 fn skip-token? _self: (addr token) -> _/eax: boolean {
1001 var self/eax: (addr token) <- copy _self
1002 var in-type/eax: (addr int) <- get self, type
1003 compare *in-type, 2/skip
1004 {
1005 break-if-=
1006 return 0/false
1007 }
1008 return 1/true
1009 }
1010
1011 fn indent-token? _self: (addr token) -> _/eax: boolean {
1012 var self/eax: (addr token) <- copy _self
1013 var in-type/eax: (addr int) <- get self, type
1014 compare *in-type, 3/indent
1015 {
1016 break-if-=
1017 return 0/false
1018 }
1019 return 1/true
1020 }
1021
1022 fn allocate-token _self-ah: (addr handle token) {
1023 var self-ah/eax: (addr handle token) <- copy _self-ah
1024 allocate self-ah
1025 var self/eax: (addr token) <- lookup *self-ah
1026 var dest-ah/eax: (addr handle stream byte) <- get self, text-data
1027 populate-stream dest-ah, 0x40/max-symbol-size
1028 }
1029
1030 fn initialize-token _self: (addr token), val: (addr array byte) {
1031 var self/eax: (addr token) <- copy _self
1032 var dest-ah/eax: (addr handle stream byte) <- get self, text-data
1033 populate-stream dest-ah, 0x40
1034 var dest/eax: (addr stream byte) <- lookup *dest-ah
1035 write dest, val
1036 }
1037
1038 fn initialize-skip-token _self: (addr token) {
1039 var self/eax: (addr token) <- copy _self
1040 var self-type/eax: (addr int) <- get self, type
1041 copy-to *self-type, 2/skip
1042 }
1043
1044 fn write-token-text-data out: (addr stream byte), _self: (addr token) {
1045 var self/eax: (addr token) <- copy _self
1046 var data-ah/eax: (addr handle stream byte) <- get self, text-data
1047 var data/eax: (addr stream byte) <- lookup *data-ah
1048 rewind-stream data
1049 write-stream out, data
1050 }
1051
1052 fn tokens-equal? _a: (addr token), _b: (addr token) -> _/eax: boolean {
1053 var a/edx: (addr token) <- copy _a
1054 var b/ebx: (addr token) <- copy _b
1055 var a-type-addr/eax: (addr int) <- get a, type
1056 var a-type/eax: int <- copy *a-type-addr
1057 var b-type-addr/ecx: (addr int) <- get b, type
1058 compare a-type, *b-type-addr
1059 {
1060 break-if-=
1061 return 0/false
1062 }
1063 compare a-type, 2/skip
1064 {
1065 break-if-!=
1066
1067 return 1/true
1068 }
1069 compare a-type, 3/indent
1070 {
1071 break-if-!=
1072
1073 var a-number-data-addr/eax: (addr int) <- get a, number-data
1074 var a-number-data/eax: int <- copy *a-number-data-addr
1075 var b-number-data-addr/ecx: (addr int) <- get b, number-data
1076 compare a-number-data, *b-number-data-addr
1077 {
1078 break-if-=
1079 return 0/false
1080 }
1081 return 1/true
1082 }
1083 var b-data-ah/eax: (addr handle stream byte) <- get b, text-data
1084 var _b-data/eax: (addr stream byte) <- lookup *b-data-ah
1085 var b-data/ebx: (addr stream byte) <- copy _b-data
1086 var a-data-ah/eax: (addr handle stream byte) <- get a, text-data
1087 var a-data/eax: (addr stream byte) <- lookup *a-data-ah
1088 var data-match?/eax: boolean <- streams-data-equal? a-data, b-data
1089 return data-match?
1090 }
1091
1092 fn dump-token-from-cursor _t: (addr token) {
1093 var t/esi: (addr token) <- copy _t
1094 var type/eax: (addr int) <- get t, type
1095 draw-int32-decimal-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, *type, 7/fg 0/bg
1096 draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, " ", 7/fg 0/bg
1097 var text-ah/eax: (addr handle stream byte) <- get t, text-data
1098 var text/eax: (addr stream byte) <- lookup *text-ah
1099 rewind-stream text
1100 draw-stream-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, text, 7/fg 0/bg
1101 draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, " ", 7/fg 0/bg
1102 var num/eax: (addr int) <- get t, number-data
1103 draw-int32-decimal-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, *num, 7/fg 0/bg
1104 draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "\n", 7/fg 0/bg
1105 }