From aebcfd1bfbf1d071ffbff7603c89c9746cb81017 Mon Sep 17 00:00:00 2001 From: "Kartik K. Agaram" Date: Tue, 22 Jun 2021 22:30:21 -0700 Subject: beginnings of tokenization within symbols We're now down to 4 failing tests. But these will require surgery. --- shell/infix.mu | 83 ++++++++++++++++++++++++++++++++++++++++++++----------- shell/tokenize.mu | 2 +- 2 files changed, 68 insertions(+), 17 deletions(-) (limited to 'shell') diff --git a/shell/infix.mu b/shell/infix.mu index e7982d0e..b0b13fb6 100644 --- a/shell/infix.mu +++ b/shell/infix.mu @@ -55,7 +55,7 @@ fn transform-infix-2 _x-ah: (addr handle cell), trace: (addr trace) { # symbol? maybe break it down into a pair { compare *x-type, 2/symbol - break-if-= + break-if-!= tokenize-infix x-ah, trace } # not a pair? return @@ -233,14 +233,65 @@ fn swap-cells a-ah: (addr handle cell), b-ah: (addr handle cell) { } fn tokenize-infix _sym-ah: (addr handle cell), trace: (addr trace) { -#? # non-symbol operators + var sym-ah/eax: (addr handle cell) <- copy _sym-ah + var sym/eax: (addr cell) <- lookup *sym-ah + var sym-data-ah/eax: (addr handle stream byte) <- get sym, text-data + var _sym-data/eax: (addr stream byte) <- lookup *sym-data-ah + var sym-data/esi: (addr stream byte) <- copy _sym-data + rewind-stream sym-data + # read sym into a gap buffer and insert spaces in a few places + var buffer-storage: gap-buffer + var buffer/edi: (addr gap-buffer) <- address buffer-storage + initialize-gap-buffer buffer, 0x40/max-symbol-size + # scan for first non-$ + var g/eax: grapheme <- read-grapheme sym-data + add-grapheme-at-gap buffer, g + { + compare g, 0x24/dollar + break-if-!= + { + var done?/eax: boolean <- stream-empty? sym-data + compare done?, 0/false + break-if-= + return # symbol is all '$'s; do nothing + } + g <- read-grapheme sym-data + add-grapheme-at-gap buffer, g + loop + } + var tokenization-needed?: boolean + var _operator-so-far?/eax: boolean <- operator-grapheme? g + var operator-so-far?/ecx: boolean <- copy _operator-so-far? + { + var done?/eax: boolean <- stream-empty? sym-data + compare done?, 0/false + break-if-!= + var g/eax: grapheme <- read-grapheme sym-data + { + var curr-operator?/eax: boolean <- operator-grapheme? g + compare curr-operator?, operator-so-far? + break-if-= + # if grapheme switches state, insert a space + add-grapheme-at-gap buffer, 0x20/space + operator-so-far? <- copy curr-operator? + copy-to tokenization-needed?, 1/true + } + add-grapheme-at-gap buffer, g + loop + } + compare tokenization-needed?, 0/false + break-if-= +#? { +#? var dummy1/eax: int <- copy 0 +#? var dummy2/ecx: int <- copy 0 +#? dummy1, dummy2 <- render-gap-buffer-wrapping-right-then-down 0/screen, buffer, 0x20/xmin 5/ymin, 0x80/xmax 0x30/ymax, 0/no-cursor, 3/fg 0/bg #? { -#? var operator?/eax: boolean <- operator-grapheme? g -#? compare operator?, 0/false -#? break-if-= -#? next-operator-token in, out, trace -#? break $next-token:case +#? loop #? } +#? } + # recursively process buffer + # this time we're guaranteed we won't enter tokenize-infix + read-cell buffer, _sym-ah, trace } fn test-infix { @@ -269,17 +320,17 @@ fn test-infix { check-infix "(a + b + c)", "(+ (+ a b) c)", "F - test-infix/left-associative" #? check-infix "(f a + b)", "(f (+ a b))", "F - test-infix/higher-precedence-than-call" #? check-infix "(f a + b c + d)", "(f (+ a b) (+ c d))", "F - test-infix/multiple" -#? check-infix "+a", "(+ a)", "F - test-infix/unary-operator-2" -#? check-infix "-a", "(- a)", "F - test-infix/unary-operator-3" -#? check-infix "a+b", "(+ a b)", "F - test-infix/no-spaces" -#? check-infix "',a+b", "',(+ a b)", "F - test-infix/no-spaces-with-nested-quotes" -#? check-infix "$a+b", "(+ $a b)", "F - test-infix/no-spaces-2" + check-infix "+a", "(+ a)", "F - test-infix/unary-operator-2" + check-infix "-a", "(- a)", "F - test-infix/unary-operator-3" + check-infix "a+b", "(+ a b)", "F - test-infix/no-spaces" + check-infix "',a+b", "',(+ a b)", "F - test-infix/no-spaces-with-nested-quotes" + check-infix "$a+b", "(+ $a b)", "F - test-infix/no-spaces-2" #? check-infix "-a+b", "(+ (- a) b)", "F - test-infix/unary-over-binary" #? check-infix "~a+b", "(+ (~ a) b)", "F - test-infix/unary-complement" -#? check-infix "(n * n-1)", "(* n (- n 1))", "F - test-infix/no-spaces-over-spaces" -#? check-infix "`(a + b)", "`(+ a b)", "F - test-infix/backquote" -#? check-infix ",@a+b", ",@(+ a b)", "F - test-infix/unquote-splice" -#? check-infix ",@(a + b)", ",@(+ a b)", "F - test-infix/unquote-splice-2" + check-infix "(n * n-1)", "(* n (- n 1))", "F - test-infix/no-spaces-over-spaces" + check-infix "`(a + b)", "`(+ a b)", "F - test-infix/backquote" + check-infix ",@a+b", ",@(+ a b)", "F - test-infix/unquote-splice" + check-infix ",@(a + b)", ",@(+ a b)", "F - test-infix/unquote-splice-2" } # helpers diff --git a/shell/tokenize.mu b/shell/tokenize.mu index 3a080135..2d7ea041 100644 --- a/shell/tokenize.mu +++ b/shell/tokenize.mu @@ -523,7 +523,7 @@ fn next-symbol-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra trace-lower trace var out/eax: (addr token) <- copy _out var out-data-ah/eax: (addr handle stream byte) <- get out, text-data - populate-stream out-data-ah, 0x40 + populate-stream out-data-ah, 0x40/max-symbol-size var _out-data/eax: (addr stream byte) <- lookup *out-data-ah var out-data/edi: (addr stream byte) <- copy _out-data $next-symbol-token:loop: { -- cgit 1.4.1-2-gfad0