diff options
Diffstat (limited to 'shell/tokenize.mu')
-rw-r--r-- | shell/tokenize.mu | 86 |
1 files changed, 42 insertions, 44 deletions
diff --git a/shell/tokenize.mu b/shell/tokenize.mu index b75e57e1..3a080135 100644 --- a/shell/tokenize.mu +++ b/shell/tokenize.mu @@ -744,93 +744,91 @@ fn next-indent-token in: (addr gap-buffer), _out: (addr token), trace: (addr tra } } +# Mu carves up the space of graphemes into 4 categories: +# whitespace +# quotes and unquotes (from a Lisp perspective; doesn't include double +# quotes or other Unicode quotes) +# operators +# symbols +# (Numbers have their own parsing rules that don't fit cleanly in this +# partition.) +# +# During tokenization operators and symbols are treated identically. +# A later phase digs into that nuance. + fn symbol-grapheme? g: grapheme -> _/eax: boolean { - ## whitespace - compare g, 9/tab - { - break-if-!= - return 0/false - } - compare g, 0xa/newline + var whitespace?/eax: boolean <- whitespace-grapheme? g + compare whitespace?, 0/false { - break-if-!= - return 0/false - } - compare g, 0x20/space - { - break-if-!= - return 0/false - } - ## quotes - compare g, 0x22/double-quote - { - break-if-!= + break-if-= return 0/false } - compare g, 0x60/backquote + var quote-or-unquote?/eax: boolean <- quote-or-unquote-grapheme? g + compare quote-or-unquote?, 0/false { - break-if-!= + break-if-= return 0/false } - ## brackets - compare g, 0x28/open-paren + var bracket?/eax: boolean <- bracket-grapheme? g + compare bracket?, 0/false { - break-if-!= + break-if-= return 0/false } - compare g, 0x29/close-paren + compare g, 0x23/hash # comments get filtered out { break-if-!= return 0/false } - compare g, 0x5b/open-square-bracket + compare g, 0x22/double-quote # double quotes reserved for now { break-if-!= return 0/false } - compare g, 0x5d/close-square-bracket + return 1/true +} + +fn whitespace-grapheme? g: grapheme -> _/eax: boolean { + compare g, 9/tab { break-if-!= - return 0/false + return 1/true } - compare g, 0x7b/open-curly-bracket + compare g, 0xa/newline { break-if-!= - return 0/false + return 1/true } - compare g, 0x7d/close-curly-bracket + compare g, 0x20/space { break-if-!= - return 0/false + return 1/true } - # quotes and unquotes + return 0/false +} + +fn quote-or-unquote-grapheme? g: grapheme -> _/eax: boolean { compare g, 0x27/single-quote { break-if-!= - return 0/false + return 1/true } compare g, 0x60/backquote { break-if-!= - return 0/false + return 1/true } compare g, 0x2c/comma { break-if-!= - return 0/false + return 1/true } compare g, 0x40/at-sign { break-if-!= - return 0/false - } - # - other punctuation - compare g, 0x23/hash - { - break-if-!= - return 0/false + return 1/true } - return 1/true + return 0/false } fn bracket-grapheme? g: grapheme -> _/eax: boolean { |