diff options
author | Kartik K. Agaram <vc@akkartik.com> | 2021-06-22 21:43:44 -0700 |
---|---|---|
committer | Kartik K. Agaram <vc@akkartik.com> | 2021-06-22 21:43:44 -0700 |
commit | 0436ab71eab8768d643d9c8568bdfef1ecc7079b (patch) | |
tree | bf473648079471c126d87edb0862e5ca6a0fd8c1 /shell | |
parent | 59d904b4df82e96c56e6f06358ac9de278ea7d6a (diff) | |
download | mu-0436ab71eab8768d643d9c8568bdfef1ecc7079b.tar.gz |
clean up lexical categories
Diffstat (limited to 'shell')
-rw-r--r-- | shell/infix.mu | 104 | ||||
-rw-r--r-- | shell/tokenize.mu | 86 |
2 files changed, 48 insertions, 142 deletions
diff --git a/shell/infix.mu b/shell/infix.mu index f1d9d5d6..a0e81ff2 100644 --- a/shell/infix.mu +++ b/shell/infix.mu @@ -1,6 +1,7 @@ fn transform-infix x-ah: (addr handle cell), trace: (addr trace) { trace-text trace, "infix", "transform infix" trace-lower trace +#? trace-text trace, "infix", "todo" #? draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "a:", 2/fg 0/bg #? dump-cell-from-cursor-over-full-screen x-ah, 7/fg 0/bg transform-infix-2 x-ah, trace @@ -316,91 +317,18 @@ fn operator-symbol? _x: (addr cell) -> _/eax: boolean { } fn non-operator-grapheme? g: grapheme -> _/eax: boolean { - ## whitespace - compare g, 9/tab + var operator?/eax: boolean <- operator-grapheme? g + compare operator?, 0/false { - break-if-!= - return 0/false - } - compare g, 0xa/newline - { - break-if-!= - return 0/false - } - compare g, 0x20/space - { - break-if-!= - return 0/false - } - ## we don't really use double quotes - compare g, 0x22/double-quote - { - break-if-!= - return 1/true - } - ## brackets - compare g, 0x28/open-paren - { - break-if-!= - return 0/false - } - compare g, 0x29/close-paren - { - break-if-!= - return 0/false - } - compare g, 0x5b/open-square-bracket - { - break-if-!= - return 0/false - } - compare g, 0x5d/close-square-bracket - { - break-if-!= - return 0/false - } - compare g, 0x7b/open-curly-bracket - { - break-if-!= - return 0/false - } - compare g, 0x7d/close-curly-bracket - { - break-if-!= - return 0/false - } - # quotes and unquotes are like symbols for this purpose - compare g, 0x27/single-quote - { - break-if-!= - return 1/true - } - compare g, 0x60/backquote - { - break-if-!= - return 1/true - } - compare g, 0x2c/comma - { - break-if-!= - return 1/true - } - compare g, 0x40/at-sign - { - break-if-!= - return 1/true - } - # - other punctuation - compare g, 0x23/hash - { - break-if-!= + break-if-= return 0/false } return 1/true } +# just a short list of operator graphemes for now fn operator-grapheme? g: grapheme -> _/eax: boolean { - # '$' is a symbol char + # '$' is special and can be in either a symbol or operator compare g, 0x25/percent { break-if-!= @@ -411,26 +339,6 @@ fn operator-grapheme? g: grapheme -> _/eax: boolean { break-if-!= return 1/true } - compare g, 0x27/single-quote - { - break-if-!= - return 0/false - } - compare g, 0x60/backquote - { - break-if-!= - return 0/false - } - compare g, 0x2c/comma - { - break-if-!= - return 0/false - } - compare g, 0x40/at-sign - { - break-if-!= - return 0/false - } compare g, 0x2a/asterisk { break-if-!= diff --git a/shell/tokenize.mu b/shell/tokenize.mu index b75e57e1..3a080135 100644 --- a/shell/tokenize.mu +++ b/shell/tokenize.mu @@ -744,93 +744,91 @@ fn next-indent-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra } } +# Mu carves up the space of graphemes into 4 categories: +# whitespace +# quotes and unquotes (from a Lisp perspective; doesn't include double +# quotes or other Unicode quotes) +# operators +# symbols +# (Numbers have their own parsing rules that don't fit cleanly in this +# partition.) +# +# During tokenization operators and symbols are treated identically. +# A later phase digs into that nuance. + fn symbol-grapheme? g: grapheme -> _/eax: boolean { - ## whitespace - compare g, 9/tab - { - break-if-!= - return 0/false - } - compare g, 0xa/newline + var whitespace?/eax: boolean <- whitespace-grapheme? g + compare whitespace?, 0/false { - break-if-!= - return 0/false - } - compare g, 0x20/space - { - break-if-!= - return 0/false - } - ## quotes - compare g, 0x22/double-quote - { - break-if-!= + break-if-= return 0/false } - compare g, 0x60/backquote + var quote-or-unquote?/eax: boolean <- quote-or-unquote-grapheme? g + compare quote-or-unquote?, 0/false { - break-if-!= + break-if-= return 0/false } - ## brackets - compare g, 0x28/open-paren + var bracket?/eax: boolean <- bracket-grapheme? g + compare bracket?, 0/false { - break-if-!= + break-if-= return 0/false } - compare g, 0x29/close-paren + compare g, 0x23/hash # comments get filtered out { break-if-!= return 0/false } - compare g, 0x5b/open-square-bracket + compare g, 0x22/double-quote # double quotes reserved for now { break-if-!= return 0/false } - compare g, 0x5d/close-square-bracket + return 1/true +} + +fn whitespace-grapheme? g: grapheme -> _/eax: boolean { + compare g, 9/tab { break-if-!= - return 0/false + return 1/true } - compare g, 0x7b/open-curly-bracket + compare g, 0xa/newline { break-if-!= - return 0/false + return 1/true } - compare g, 0x7d/close-curly-bracket + compare g, 0x20/space { break-if-!= - return 0/false + return 1/true } - # quotes and unquotes + return 0/false +} + +fn quote-or-unquote-grapheme? g: grapheme -> _/eax: boolean { compare g, 0x27/single-quote { break-if-!= - return 0/false + return 1/true } compare g, 0x60/backquote { break-if-!= - return 0/false + return 1/true } compare g, 0x2c/comma { break-if-!= - return 0/false + return 1/true } compare g, 0x40/at-sign { break-if-!= - return 0/false - } - # - other punctuation - compare g, 0x23/hash - { - break-if-!= - return 0/false + return 1/true } - return 1/true + return 0/false } fn bracket-grapheme? g: grapheme -> _/eax: boolean { |