diff options
Diffstat (limited to 'js/scripting-lang/lexer.js')
-rw-r--r-- | js/scripting-lang/lexer.js | 64 |
1 files changed, 58 insertions, 6 deletions
diff --git a/js/scripting-lang/lexer.js b/js/scripting-lang/lexer.js index c7c026f..4c50b6e 100644 --- a/js/scripting-lang/lexer.js +++ b/js/scripting-lang/lexer.js @@ -3,6 +3,21 @@ /** * Token types for the language + * + * @description Defines all token types used by the lexer and parser. + * Each token type represents a distinct syntactic element in the language. + * + * The token types are organized into categories: + * - Literals: NUMBER, STRING, TRUE, FALSE + * - Operators: PLUS, MINUS, MULTIPLY, DIVIDE, MODULO, POWER, etc. + * - Keywords: WHEN, IS, THEN, FUNCTION, etc. + * - Punctuation: LEFT_PAREN, RIGHT_PAREN, SEMICOLON, COMMA, etc. + * - Special: IO_IN, IO_OUT, IO_ASSERT, FUNCTION_REF, FUNCTION_ARG + * + * This enumeration provides a centralized definition of all possible + * token types, ensuring consistency between lexer and parser. The token + * types are designed to support the combinator-based architecture where + * all operations are translated to function calls. */ export const TokenType = { NUMBER: 'NUMBER', @@ -47,13 +62,39 @@ export const TokenType = { IO_IN: 'IO_IN', IO_OUT: 'IO_OUT', IO_ASSERT: 'IO_ASSERT', - FUNCTION_REF: 'FUNCTION_REF' + FUNCTION_REF: 'FUNCTION_REF', + FUNCTION_ARG: 'FUNCTION_ARG', + COMPOSE: 'COMPOSE' }; /** * Converts source code into tokens + * * @param {string} input - The source code to tokenize - * @returns {Array} Array of tokens + * @returns {Array.<Object>} Array of token objects with type, value, line, and column + * @throws {Error} For unexpected characters or malformed tokens + * + * @description The lexer performs lexical analysis by converting source code + * into a stream of tokens. Each token represents a meaningful unit of the + * language syntax, such as identifiers, literals, operators, and keywords. + * + * The lexer implements a character-by-character scanning approach with + * lookahead for multi-character tokens. It maintains line and column + * information for accurate error reporting and debugging. + * + * Key features: + * - Handles whitespace and comments (single-line and multi-line) + * - Recognizes all language constructs including operators, keywords, and literals + * - Supports string literals with escape sequences + * - Provides detailed position information for error reporting + * - Cross-platform compatibility (Node.js, Bun, browser) + * - Supports function composition with 'via' keyword + * - Handles function references with '@' operator + * + * The lexer is designed to be robust and provide clear error messages + * for malformed input, making it easier to debug syntax errors in user code. + * It supports the combinator-based architecture by recognizing all operators + * and special tokens needed for function composition and application. */ export function lexer(input) { const tokens = []; @@ -135,11 +176,18 @@ export function lexer(input) { continue; } - // Function references (@function) + // Function references (@function) and function arguments (@(expression)) if (char === '@') { current++; // Skip '@' column++; + // Check if this is @(expression) for function arguments + if (current < input.length && input[current] === '(') { + // This is @(expression) - mark as function argument + tokens.push({ type: TokenType.FUNCTION_ARG, line, column: column - 1 }); + continue; + } + // Read the function name let functionName = ''; while (current < input.length && /[a-zA-Z0-9_]/.test(input[current])) { @@ -216,6 +264,12 @@ export function lexer(input) { case 'function': tokens.push({ type: TokenType.FUNCTION, line, column: startColumn }); break; + case 'via': + tokens.push({ type: TokenType.COMPOSE, line, column: startColumn }); + break; + case '_': + tokens.push({ type: TokenType.WILDCARD, line, column: startColumn }); + break; default: tokens.push({ type: TokenType.IDENTIFIER, value, line, column: startColumn }); } @@ -311,9 +365,7 @@ export function lexer(input) { case ':': tokens.push({ type: TokenType.ASSIGNMENT, line, column }); break; - case '_': - tokens.push({ type: TokenType.WILDCARD, line, column }); - break; + case '=': if (input[current + 1] === '=') { tokens.push({ type: TokenType.EQUALS, line, column }); |