https://github.com/akkartik/mu/blob/main/shell/tokenize.mu
  1 # We reuse the cell data structure for tokenization
  2 # Token cells are special, though. They have no type, they're always atoms,
  3 # they always have text-data.
  4 
  5 fn tokenize in: (addr gap-buffer), out: (addr stream cell), trace: (addr trace) {
  6   trace-text trace, "read", "tokenize"
  7   trace-lower trace
  8   rewind-gap-buffer in
  9   var token-storage: cell
 10   var token/edx: (addr cell) <- address token-storage
 11   {
 12     skip-whitespace-from-gap-buffer in
 13     var done?/eax: boolean <- gap-buffer-scan-done? in
 14     compare done?, 0/false
 15     break-if-!=
 16     # initialize token data each iteration to avoid aliasing
 17     var dest-ah/eax: (addr handle stream byte) <- get token, text-data
 18     populate-stream dest-ah, 0x100/max-definition-size
 19     #
 20     next-token in, token, trace
 21     var skip?/eax: boolean <- comment-token? token
 22     compare skip?, 0/false
 23     loop-if-!=
 24     var error?/eax: boolean <- has-errors? trace
 25     compare error?, 0/false
 26     {
 27       break-if-=
 28       return
 29     }
 30     write-to-stream out, token  # shallow-copy text-data
 31     loop
 32   }
 33   trace-higher trace
 34 }
 35 
 36 fn test-tokenize-dotted-list {
 37   var in-storage: gap-buffer
 38   var in/esi: (addr gap-buffer) <- address in-storage
 39   initialize-gap-buffer-with in, "(a . b)"
 40   #
 41   var stream-storage: (stream cell 0x10)
 42   var stream/edi: (addr stream cell) <- address stream-storage
 43   #
 44   tokenize in, stream, 0/no-trace
 45   #
 46   var curr-token-storage: cell
 47   var curr-token/ebx: (addr cell) <- address curr-token-storage
 48   read-from-stream stream, curr-token
 49   var open-paren?/eax: boolean <- open-paren-token? curr-token
 50   check open-paren?, "F - test-tokenize-dotted-list: open paren"
 51   read-from-stream stream, curr-token  # skip a
 52   read-from-stream stream, curr-token
 53   var dot?/eax: boolean <- dot-token? curr-token
 54   check dot?, "F - test-tokenize-dotted-list: dot"
 55   read-from-stream stream, curr-token  # skip b
 56   read-from-stream stream, curr-token
 57   var close-paren?/eax: boolean <- close-paren-token? curr-token
 58   check close-paren?, "F - test-tokenize-dotted-list: close paren"
 59 }
 60 
 61 fn test-tokenize-stream-literal {
 62   var in-storage: gap-buffer
 63   var in/esi: (addr gap-buffer) <- address in-storage
 64   initialize-gap-buffer-with in, "[abc def]"
 65   #
 66   var stream-storage: (stream cell 0x10)
 67   var stream/edi: (addr stream cell) <- address stream-storage
 68   #
 69   tokenize in, stream, 0/no-trace
 70   #
 71   var curr-token-storage: cell
 72   var curr-token/ebx: (addr cell) <- address curr-token-storage
 73   read-from-stream stream, curr-token
 74   var stream?/eax: boolean <- stream-token? curr-token
 75   check stream?, "F - test-tokenize-stream-literal: type"
 76   var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
 77   var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
 78   var data-equal?/eax: boolean <- stream-data-equal? curr-token-data, "abc def"
 79   check data-equal?, "F - test-tokenize-stream-literal"
 80   var empty?/eax: boolean <- stream-empty? stream
 81   check empty?, "F - test-tokenize-stream-literal: empty?"
 82 }
 83 
 84 fn test-tokenize-stream-literal-in-tree {
 85   var in-storage: gap-buffer
 86   var in/esi: (addr gap-buffer) <- address in-storage
 87   initialize-gap-buffer-with in, "([abc def])"
 88   #
 89   var stream-storage: (stream cell 0x10)
 90   var stream/edi: (addr stream cell) <- address stream-storage
 91   #
 92   tokenize in, stream, 0/no-trace
 93   #
 94   var curr-token-storage: cell
 95   var curr-token/ebx: (addr cell) <- address curr-token-storage
 96   read-from-stream stream, curr-token
 97   var bracket?/eax: boolean <- bracket-token? curr-token
 98   check bracket?, "F - test-tokenize-stream-literal-in-tree: open paren"
 99   read-from-stream stream, curr-token
100   var stream?/eax: boolean <- stream-token? curr-token
101   check stream?, "F - test-tokenize-stream-literal-in-tree: type"
102   var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
103   var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
104   var data-equal?/eax: boolean <- stream-data-equal? curr-token-data, "abc def"
105   check data-equal?, "F - test-tokenize-stream-literal-in-tree"
106   read-from-stream stream, curr-token
107   var bracket?/eax: boolean <- bracket-token? curr-token
108   check bracket?, "F - test-tokenize-stream-literal-in-tree: close paren"
109   var empty?/eax: boolean <- stream-empty? stream
110   check empty?, "F - test-tokenize-stream-literal-in-tree: empty?"
111 }
112 
113 fn next-token in: (addr gap-buffer), _out-cell: (addr cell), trace: (addr trace) {
114   trace-text trace, "read", "next-token"
115   trace-lower trace
116   var out-cell/eax: (addr cell) <- copy _out-cell
117   {
118     var out-cell-type/eax: (addr int) <- get out-cell, type
119     copy-to *out-cell-type, 0/uninitialized
120   }
121   var out-ah/eax: (addr handle stream byte) <- get out-cell, text-data
122   var _out/eax: (addr stream byte) <- lookup *out-ah
123   var out/edi: (addr stream byte) <- copy _out
124   $next-token:body: {
125     clear-stream out
126     var g/eax: grapheme <- peek-from-gap-buffer in
127     {
128       var stream-storage: (stream byte 0x40)
129       var stream/esi: (addr stream byte) <- address stream-storage
130       write stream, "next: "
131       var gval/eax: int <- copy g
132       write-int32-hex stream, gval
133       trace trace, "read", stream
134     }
135     # comment
136     {
137       compare g, 0x23/comment
138       break-if-!=
139       rest-of-line in, out, trace
140       break $next-token:body
141     }
142     # digit
143     {
144       var digit?/eax: boolean <- decimal-digit? g
145       compare digit?, 0/false
146       break-if-=
147       next-number-token in, out, trace
148       break $next-token:body
149     }
150     # other symbol char
151     {
152       var symbol?/eax: boolean <- symbol-grapheme? g
153       compare symbol?, 0/false
154       break-if-=
155       next-symbol-token in, out, trace
156       break $next-token:body
157     }
158     # open square brackets begin streams
159     {
160       compare g, 0x5b/open-square-bracket
161       break-if-!=
162       g <- read-from-gap-buffer in  # skip open bracket
163       next-stream-token in, out, trace
164       var out-cell/eax: (addr cell) <- copy _out-cell
165       var out-cell-type/eax: (addr int) <- get out-cell, type
166       copy-to *out-cell-type, 3/stream
167       break $next-token:body
168     }
169     # unbalanced close square brackets are errors
170     {
171       compare g, 0x5d/close-square-bracket
172       break-if-!=
173       error trace, "unbalanced ']'"
174       return
175     }
176     # other brackets are always single-char tokens
177     {
178       var bracket?/eax: boolean <- bracket-grapheme? g
179       compare bracket?, 0/false
180       break-if-=
181       var g/eax: grapheme <- read-from-gap-buffer in
182       next-bracket-token g, out, trace
183       break $next-token:body
184     }
185     # non-symbol operators
186     {
187       var operator?/eax: boolean <- operator-grapheme? g
188       compare operator?, 0/false
189       break-if-=
190       next-operator-token in, out, trace
191       break $next-token:body
192     }
193   }
194   trace-higher trace
195   var stream-storage: (stream byte 0x40)
196   var stream/eax: (addr stream byte) <- address stream-storage
197   write stream, "=> "
198   rewind-stream out
199   write-stream stream, out
200   trace trace, "read", stream
201 }
202 
203 fn next-symbol-token in: (addr gap-buffer), out: (addr stream byte), trace: (addr trace) {
204   trace-text trace, "read", "looking for a symbol"
205   trace-lower trace
206   $next-symbol-token:loop: {
207     var done?/eax: boolean <- gap-buffer-scan-done? in
208     compare done?, 0/false
209     break-if-!=
210     var g/eax: grapheme <- peek-from-gap-buffer in
211     {
212       var stream-storage: (stream byte 0x40)
213       var stream/esi: (addr stream byte) <- address stream-storage
214       write stream, "next: "
215       var gval/eax: int <- copy g
216       write-int32-hex stream, gval
217       trace trace, "read", stream
218     }
219     # if non-symbol, return
220     {
221       var symbol-grapheme?/eax: boolean <- symbol-grapheme? g
222       compare symbol-grapheme?, 0/false
223       break-if-!=
224       trace-text trace, "read", "stop"
225       break $next-symbol-token:loop
226     }
227     var g/eax: grapheme <- read-from-gap-buffer in
228     write-grapheme out, g
229     loop
230   }
231   trace-higher trace
232   var stream-storage: (stream byte 0x40)
233   var stream/esi: (addr stream byte) <- address stream-storage
234   write stream, "=> "
235   rewind-stream out
236   write-stream stream, out
237   trace trace, "read", stream
238 }
239 
240 fn next-operator-token in: (addr gap-buffer), out: (addr stream byte), trace: (addr trace) {
241   trace-text trace, "read", "looking for a operator"
242   trace-lower trace
243   $next-operator-token:loop: {
244     var done?/eax: boolean <- gap-buffer-scan-done? in
245     compare done?, 0/false
246     break-if-!=
247     var g/eax: grapheme <- peek-from-gap-buffer in
248     {
249       var stream-storage: (stream byte 0x40)
250       var stream/esi: (addr stream byte) <- address stream-storage
251       write stream, "next: "
252       var gval/eax: int <- copy g
253       write-int32-hex stream, gval
254       trace trace, "read", stream
255     }
256     # if non-operator, return
257     {
258       var operator-grapheme?/eax: boolean <- operator-grapheme? g
259       compare operator-grapheme?, 0/false
260       break-if-!=
261       trace-text trace, "read", "stop"
262       break $next-operator-token:loop
263     }
264     var g/eax: grapheme <- read-from-gap-buffer in
265     write-grapheme out, g
266     loop
267   }
268   trace-higher trace
269   var stream-storage: (stream byte 0x40)
270   var stream/esi: (addr stream byte) <- address stream-storage
271   write stream, "=> "
272   rewind-stream out
273   write-stream stream, out
274   trace trace, "read", stream
275 }
276 
277 fn next-number-token in: (addr gap-buffer), out: (addr stream byte), trace: (addr trace) {
278   trace-text trace, "read", "looking for a number"
279   trace-lower trace
280   $next-number-token:loop: {
281     var done?/eax: boolean <- gap-buffer-scan-done? in
282     compare done?, 0/false
283     break-if-!=
284     var g/eax: grapheme <- peek-from-gap-buffer in
285     {
286       var stream-storage: (stream byte 0x40)
287       var stream/esi: (addr stream byte) <- address stream-storage
288       write stream, "next: "
289       var gval/eax: int <- copy g
290       write-int32-hex stream, gval
291       trace trace, "read", stream
292     }
293     # if not symbol grapheme, return
294     {
295       var symbol-grapheme?/eax: boolean <- symbol-grapheme? g
296       compare symbol-grapheme?, 0/false
297       break-if-!=
298       trace-text trace, "read", "stop"
299       break $next-number-token:loop
300     }
301     # if not digit grapheme, abort
302     {
303       var digit?/eax: boolean <- decimal-digit? g
304       compare digit?, 0/false
305       break-if-!=
306       error trace, "invalid number"
307       return
308     }
309     trace-text trace, "read", "append"
310     var g/eax: grapheme <- read-from-gap-buffer in
311     write-grapheme out, g
312     loop
313   }
314   trace-higher trace
315 }
316 
317 fn next-stream-token in: (addr gap-buffer), out: (addr stream byte), trace: (addr trace) {
318   trace-text trace, "read", "stream"
319   {
320     var empty?/eax: boolean <- gap-buffer-scan-done? in
321     compare empty?, 0/false
322     {
323       break-if-=
324       error trace, "unbalanced '['"
325       return
326     }
327     var g/eax: grapheme <- read-from-gap-buffer in
328     compare g, 0x5d/close-square-bracket
329     break-if-=
330     write-grapheme out, g
331     loop
332   }
333   var stream-storage: (stream byte 0x40)
334   var stream/esi: (addr stream byte) <- address stream-storage
335   write stream, "=> "
336   rewind-stream out
337   write-stream stream, out
338   trace trace, "read", stream
339 }
340 
341 fn next-bracket-token g: grapheme, out: (addr stream byte), trace: (addr trace) {
342   trace-text trace, "read", "bracket"
343   write-grapheme out, g
344   var stream-storage: (stream byte 0x40)
345   var stream/esi: (addr stream byte) <- address stream-storage
346   write stream, "=> "
347   rewind-stream out
348   write-stream stream, out
349   trace trace, "read", stream
350 }
351 
352 fn rest-of-line in: (addr gap-buffer), out: (addr stream byte), trace: (addr trace) {
353   trace-text trace, "read", "comment"
354   {
355     var empty?/eax: boolean <- gap-buffer-scan-done? in
356     compare empty?, 0/false
357     {
358       break-if-=
359       return
360     }
361     var g/eax: grapheme <- read-from-gap-buffer in
362     compare g, 0xa/newline
363     break-if-=
364     write-grapheme out, g
365     loop
366   }
367   var stream-storage: (stream byte 0x80)
368   var stream/esi: (addr stream byte) <- address stream-storage
369   write stream, "=> "
370   rewind-stream out
371   write-stream stream, out
372   trace trace, "read", stream
373 }
374 
375 fn symbol-grapheme? g: grapheme -> _/eax: boolean {
376   ## whitespace
377   compare g, 9/tab
378   {
379     break-if-!=
380     return 0/false
381   }
382   compare g, 0xa/newline
383   {
384     break-if-!=
385     return 0/false
386   }
387   compare g, 0x20/space
388   {
389     break-if-!=
390     return 0/false
391   }
392   ## quotes
393   compare g, 0x22/double-quote
394   {
395     break-if-!=
396     return 0/false
397   }
398   compare g, 0x60/backquote
399   {
400     break-if-!=
401     return 0/false
402   }
403   ## brackets
404   compare g, 0x28/open-paren
405   {
406     break-if-!=
407     return 0/false
408   }
409   compare g, 0x29/close-paren
410   {
411     break-if-!=
412     return 0/false
413   }
414   compare g, 0x5b/open-square-bracket
415   {
416     break-if-!=
417     return 0/false
418   }
419   compare g, 0x5d/close-square-bracket
420   {
421     break-if-!=
422     return 0/false
423   }
424   compare g, 0x7b/open-curly-bracket
425   {
426     break-if-!=
427     return 0/false
428   }
429   compare g, 0x7d/close-curly-bracket
430   {
431     break-if-!=
432     return 0/false
433   }
434   # - other punctuation
435   # '!' is a symbol char
436   compare g, 0x23/hash
437   {
438     break-if-!=
439     return 0/false
440   }
441   # '$' is a symbol char
442   compare g, 0x25/percent
443   {
444     break-if-!=
445     return 0/false
446   }
447   compare g, 0x26/ampersand
448   {
449     break-if-!=
450     return 0/false
451   }
452   compare g, 0x27/single-quote
453   {
454     break-if-!=
455     return 0/false
456   }
457   compare g, 0x2a/asterisk
458   {
459     break-if-!=
460     return 0/false
461   }
462   compare g, 0x2b/plus
463   {
464     break-if-!=
465     return 0/false
466   }
467   compare g, 0x2c/comma
468   {
469     break-if-!=
470     return 0/false
471   }
472   compare g, 0x2d/dash  # '-' not allowed in symbols
473   {
474     break-if-!=
475     return 0/false
476   }
477   compare g, 0x2e/period
478   {
479     break-if-!=
480     return 0/false
481   }
482   compare g, 0x2f/slash
483   {
484     break-if-!=
485     return 0/false
486   }
487   compare g, 0x3a/colon
488   {
489     break-if-!=
490     return 0/false
491   }
492   compare g, 0x3b/semi-colon
493   {
494     break-if-!=
495     return 0/false
496   }
497   compare g, 0x3c/less-than
498   {
499     break-if-!=
500     return 0/false
501   }
502   compare g, 0x3d/equal
503   {
504     break-if-!=
505     return 0/false
506   }
507   compare g, 0x3e/greater-than
508   {
509     break-if-!=
510     return 0/false
511   }
512   # '?' is a symbol char
513   compare g, 0x40/at-sign
514   {
515     break-if-!=
516     return 0/false
517   }
518   compare g, 0x5c/backslash
519   {
520     break-if-!=
521     return 0/false
522   }
523   compare g, 0x5e/caret
524   {
525     break-if-!=
526     return 0/false
527   }
528   # '_' is a symbol char
529   compare g, 0x7c/vertical-line
530   {
531     break-if-!=
532     return 0/false
533   }
534   compare g, 0x7e/tilde
535   {
536     break-if-!=
537     return 0/false
538   }
539   return 1/true
540 }
541 
542 fn bracket-grapheme? g: grapheme -> _/eax: boolean {
543   compare g, 0x28/open-paren
544   {
545     break-if-!=
546     return 1/true
547   }
548   compare g, 0x29/close-paren
549   {
550     break-if-!=
551     return 1/true
552   }
553   compare g, 0x5b/open-square-bracket
554   {
555     break-if-!=
556     return 1/true
557   }
558   compare g, 0x5d/close-square-bracket
559   {
560     break-if-!=
561     return 1/true
562   }
563   compare g, 0x7b/open-curly-bracket
564   {
565     break-if-!=
566     return 1/true
567   }
568   compare g, 0x7d/close-curly-bracket
569   {
570     break-if-!=
571     return 1/true
572   }
573   return 0/false
574 }
575 
576 fn operator-grapheme? g: grapheme -> _/eax: boolean {
577   # '$' is a symbol char
578   compare g, 0x25/percent
579   {
580     break-if-!=
581     return 1/false
582   }
583   compare g, 0x26/ampersand
584   {
585     break-if-!=
586     return 1/true
587   }
588   compare g, 0x27/single-quote
589   {
590     break-if-!=
591     return 1/true
592   }
593   compare g, 0x2a/asterisk
594   {
595     break-if-!=
596     return 1/true
597   }
598   compare g, 0x2b/plus
599   {
600     break-if-!=
601     return 1/true
602   }
603   compare g, 0x2c/comma
604   {
605     break-if-!=
606     return 1/true
607   }
608   compare g, 0x2d/dash  # '-' not allowed in symbols
609   {
610     break-if-!=
611     return 1/true
612   }
613   compare g, 0x2e/period
614   {
615     break-if-!=
616     return 1/true
617   }
618   compare g, 0x2f/slash
619   {
620     break-if-!=
621     return 1/true
622   }
623   compare g, 0x3a/colon
624   {
625     break-if-!=
626     return 1/true
627   }
628   compare g, 0x3b/semi-colon
629   {
630     break-if-!=
631     return 1/true
632   }
633   compare g, 0x3c/less-than
634   {
635     break-if-!=
636     return 1/true
637   }
638   compare g, 0x3d/equal
639   {
640     break-if-!=
641     return 1/true
642   }
643   compare g, 0x3e/greater-than
644   {
645     break-if-!=
646     return 1/true
647   }
648   # '?' is a symbol char
649   compare g, 0x40/at-sign
650   {
651     break-if-!=
652     return 1/true
653   }
654   compare g, 0x5c/backslash
655   {
656     break-if-!=
657     return 1/true
658   }
659   compare g, 0x5e/caret
660   {
661     break-if-!=
662     return 1/true
663   }
664   # '_' is a symbol char
665   compare g, 0x7c/vertical-line
666   {
667     break-if-!=
668     return 1/true
669   }
670   compare g, 0x7e/tilde
671   {
672     break-if-!=
673     return 1/true
674   }
675   return 0/false
676 }
677 
678 fn number-token? _in: (addr cell) -> _/eax: boolean {
679   var in/eax: (addr cell) <- copy _in
680   var in-data-ah/eax: (addr handle stream byte) <- get in, text-data
681   var in-data/eax: (addr stream byte) <- lookup *in-data-ah
682   rewind-stream in-data
683   var g/eax: grapheme <- read-grapheme in-data
684   var result/eax: boolean <- decimal-digit? g
685   return result
686 }
687 
688 fn bracket-token? _in: (addr cell) -> _/eax: boolean {
689   var in/eax: (addr cell) <- copy _in
690   {
691     var in-type/eax: (addr int) <- get in, type
692     compare *in-type, 3/stream
693     break-if-!=
694     # streams are never paren tokens
695     return 0/false
696   }
697   var in-data-ah/eax: (addr handle stream byte) <- get in, text-data
698   var in-data/eax: (addr stream byte) <- lookup *in-data-ah
699   rewind-stream in-data
700   var g/eax: grapheme <- read-grapheme in-data
701   var result/eax: boolean <- bracket-grapheme? g
702   return result
703 }
704 
705 fn quote-token? _in: (addr cell) -> _/eax: boolean {
706   var in/eax: (addr cell) <- copy _in
707   var in-data-ah/eax: (addr handle stream byte) <- get in, text-data
708   var in-data/eax: (addr stream byte) <- lookup *in-data-ah
709   rewind-stream in-data
710   var g/eax: grapheme <- read-grapheme in-data
711   compare g, 0x27/single-quote
712   {
713     break-if-!=
714     return 1/true
715   }
716   return 0/false
717 }
718 
719 fn open-paren-token? _in: (addr cell) -> _/eax: boolean {
720   var in/eax: (addr cell) <- copy _in
721   var in-data-ah/eax: (addr handle stream byte) <- get in, text-data
722   var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
723   var in-data/ecx: (addr stream byte) <- copy _in-data
724   rewind-stream in-data
725   var g/eax: grapheme <- read-grapheme in-data
726   compare g, 0x28/open-paren
727   {
728     break-if-!=
729     var result/eax: boolean <- stream-empty? in-data
730     return result
731   }
732   return 0/false
733 }
734 
735 fn close-paren-token? _in: (addr cell) -> _/eax: boolean {
736   var in/eax: (addr cell) <- copy _in
737   var in-data-ah/eax: (addr handle stream byte) <- get in, text-data
738   var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
739   var in-data/ecx: (addr stream byte) <- copy _in-data
740   rewind-stream in-data
741   var g/eax: grapheme <- read-grapheme in-data
742   compare g, 0x29/close-paren
743   {
744     break-if-!=
745     var result/eax: boolean <- stream-empty? in-data
746     return result
747   }
748   return 0/false
749 }
750 
751 fn dot-token? _in: (addr cell) -> _/eax: boolean {
752   var in/eax: (addr cell) <- copy _in
753   var in-data-ah/eax: (addr handle stream byte) <- get in, text-data
754   var _in-data/eax: (addr stream byte) <- lookup *in-data-ah
755   var in-data/ecx: (addr stream byte) <- copy _in-data
756   rewind-stream in-data
757   var g/eax: grapheme <- read-grapheme in-data
758   compare g, 0x2e/dot
759   {
760     break-if-!=
761     var result/eax: boolean <- stream-empty? in-data
762     return result
763   }
764   return 0/false
765 }
766 
767 fn test-dot-token {
768   var tmp-storage: (handle cell)
769   var tmp-ah/eax: (addr handle cell) <- address tmp-storage
770   new-symbol tmp-ah, "."
771   var tmp/eax: (addr cell) <- lookup *tmp-ah
772   var result/eax: boolean <- dot-token? tmp
773   check result, "F - test-dot-token"
774 }
775 
776 fn stream-token? _in: (addr cell) -> _/eax: boolean {
777   var in/eax: (addr cell) <- copy _in
778   var in-type/eax: (addr int) <- get in, type
779   compare *in-type, 3/stream
780   {
781     break-if-=
782     return 0/false
783   }
784   return 1/true
785 }
786 
787 fn comment-token? _in: (addr cell) -> _/eax: boolean {
788   var in/eax: (addr cell) <- copy _in
789   var in-data-ah/eax: (addr handle stream byte) <- get in, text-data
790   var in-data/eax: (addr stream byte) <- lookup *in-data-ah
791   rewind-stream in-data
792   var g/eax: grapheme <- read-grapheme in-data
793   compare g, 0x23/hash
794   {
795     break-if-=
796     return 0/false
797   }
798   return 1/true
799 }