diff options
author | elioat <elioat@tilde.institute> | 2025-07-27 12:28:44 -0400 |
---|---|---|
committer | elioat <elioat@tilde.institute> | 2025-07-27 12:28:44 -0400 |
commit | 59a4a649ad3038857dd091dcc69ca86bf993b3f6 (patch) | |
tree | 7db1cbbbbda88fb268ce91175bad2a31014c1d27 /js/scripting-lang/parser.js | |
parent | 83ece970842db4ca0a619a0392a98b2b7445e482 (diff) | |
download | tour-master.tar.gz |
Diffstat (limited to 'js/scripting-lang/parser.js')
-rw-r--r-- | js/scripting-lang/parser.js | 257 |
1 files changed, 188 insertions, 69 deletions
diff --git a/js/scripting-lang/parser.js b/js/scripting-lang/parser.js index b1aa77f..73079c0 100644 --- a/js/scripting-lang/parser.js +++ b/js/scripting-lang/parser.js @@ -26,10 +26,14 @@ import { TokenType } from './lexer.js'; * - Function calls are detected by looking for identifiers followed by expressions * - When expressions and case patterns are parsed with special handling * - Table literals and access are parsed as structured data + * - Function composition uses 'via' keyword with right-associative precedence + * - Function application uses juxtaposition with left-associative precedence * * The parser maintains a current token index and advances through the token * stream, building the AST bottom-up from primary expressions to complex - * logical expressions. + * logical expressions. This approach ensures that all operations are consistently + * represented as function calls, enabling the interpreter to use the combinator + * foundation for execution. */ export function parser(tokens) { let current = 0; @@ -40,6 +44,11 @@ export function parser(tokens) { * @returns {Object} Complete AST with program body * @description Iterates through all tokens, parsing each statement or expression * and building the program body. Handles empty programs gracefully. + * + * This function orchestrates the parsing process by repeatedly calling walk() + * until all tokens are consumed. It ensures that the final AST contains all + * statements and expressions in the correct order, ready for interpretation + * by the combinator-based interpreter. */ function parse() { const body = []; @@ -68,6 +77,12 @@ export function parser(tokens) { * 3. When expressions (pattern matching) * 4. Function definitions (explicit function declarations) * 5. Logical expressions (default case for all other expressions) + * + * This function implements the top-level parsing strategy by checking for + * specific token patterns that indicate different language constructs. + * The order of checks is crucial for correct parsing precedence and + * ensures that complex expressions are properly decomposed into their + * constituent parts for combinator translation. */ function walk() { const token = tokens[current]; @@ -233,23 +248,9 @@ export function parser(tokens) { // but not treat them as function calls let value; if (tokens[current].type === TokenType.IDENTIFIER) { - // Check if this is followed by another identifier (multi-value case) - if (current + 1 < tokens.length && - tokens[current + 1].type === TokenType.IDENTIFIER && - tokens[current + 2].type === TokenType.IS) { - // This is a multi-value case like "when x y is" - value = { type: 'Identifier', value: tokens[current].value }; - current++; - values.push(value); - value = { type: 'Identifier', value: tokens[current].value }; - current++; - values.push(value); - break; // We've consumed both values and will hit IS next - } else { - // Single identifier value - value = { type: 'Identifier', value: tokens[current].value }; - current++; - } + // Single identifier value + value = { type: 'Identifier', value: tokens[current].value }; + current++; } else { // For other types, use normal expression parsing value = parseLogicalExpression(); @@ -274,7 +275,19 @@ export function parser(tokens) { if (process.env.DEBUG) { console.log(`[DEBUG] parseWhenExpression: parsing pattern, current token = ${tokens[current].type}, value = ${tokens[current].value || 'N/A'}`); } - if (tokens[current].type === TokenType.IDENTIFIER) { + + // Check if this is a comparison expression (starts with identifier followed by comparison operator) + if (tokens[current].type === TokenType.IDENTIFIER && + current + 1 < tokens.length && + (tokens[current + 1].type === TokenType.LESS_THAN || + tokens[current + 1].type === TokenType.GREATER_THAN || + tokens[current + 1].type === TokenType.LESS_EQUAL || + tokens[current + 1].type === TokenType.GREATER_EQUAL || + tokens[current + 1].type === TokenType.EQUALS || + tokens[current + 1].type === TokenType.NOT_EQUAL)) { + // Parse as a comparison expression + pattern = parseExpression(); + } else if (tokens[current].type === TokenType.IDENTIFIER) { pattern = { type: 'Identifier', value: tokens[current].value }; current++; } else if (tokens[current].type === TokenType.NUMBER) { @@ -289,10 +302,25 @@ export function parser(tokens) { } else if (tokens[current].type === TokenType.FUNCTION_REF) { pattern = { type: 'FunctionReference', name: tokens[current].name }; current++; + } else if (tokens[current].type === TokenType.TRUE) { + pattern = { type: 'BooleanLiteral', value: true }; + current++; + } else if (tokens[current].type === TokenType.FALSE) { + pattern = { type: 'BooleanLiteral', value: false }; + current++; } else { - throw new Error(`Expected pattern (identifier, number, string, wildcard, or function reference) in when expression, got ${tokens[current].type}`); + throw new Error(`Expected pattern (identifier, number, string, wildcard, function reference, boolean, or comparison) in when expression, got ${tokens[current].type}`); } patterns.push(pattern); + + // If we have multiple patterns, we need to handle them correctly + // Check if the next token is a valid pattern start (not THEN) + if (current < tokens.length && + tokens[current].type !== TokenType.THEN && + tokens[current].type !== TokenType.SEMICOLON) { + // Continue parsing more patterns + continue; + } } if (current >= tokens.length || tokens[current].type !== TokenType.THEN) { @@ -506,6 +534,17 @@ export function parser(tokens) { * executed by the interpreter using standard library combinators. */ function parseExpression() { + // Handle unary minus at the beginning of expressions + if (current < tokens.length && tokens[current].type === TokenType.MINUS) { + current++; + const operand = parseTerm(); + return { + type: 'FunctionCall', + name: 'negate', + args: [operand] + }; + } + let left = parseTerm(); while (current < tokens.length) { @@ -515,12 +554,20 @@ export function parser(tokens) { console.log(`[DEBUG] parseExpression: current token = ${token.type}, value = ${token.value || 'N/A'}`); } - if (token.type === TokenType.PLUS || token.type === TokenType.MINUS) { + if (token.type === TokenType.PLUS) { current++; const right = parseTerm(); left = { type: 'FunctionCall', - name: token.type === TokenType.PLUS ? 'add' : 'subtract', + name: 'add', + args: [left, right] + }; + } else if (token.type === TokenType.MINUS) { + current++; + const right = parseTerm(); + left = { + type: 'FunctionCall', + name: 'subtract', args: [left, right] }; } else if (token.type === TokenType.EQUALS || @@ -557,7 +604,7 @@ export function parser(tokens) { * FunctionCall nodes using the corresponding combinator functions. */ function parseTerm() { - let left = parseFactor(); + let left = parseApplication(); while (current < tokens.length) { const token = tokens[current]; @@ -592,6 +639,7 @@ export function parser(tokens) { function parseFactor() { let left = parsePrimary(); + // Parse power expressions (existing logic) while (current < tokens.length) { const token = tokens[current]; @@ -612,6 +660,94 @@ export function parser(tokens) { } /** + * Parse function composition expressions + * + * @returns {Object} AST node representing the composition expression + * @description Parses function composition using the 'via' keyword + * with right-associative precedence: f via g via h = compose(f, compose(g, h)) + * + * Function composition is a fundamental feature that allows functions to be + * combined naturally. The right-associative precedence means that composition + * chains are built from right to left, which matches mathematical function + * composition notation. This enables powerful functional programming patterns + * where complex transformations can be built from simple, composable functions. + */ + function parseComposition() { + let left = parseFactor(); + + // Parse right-associative composition: f via g via h = compose(f, compose(g, h)) + while (current < tokens.length && tokens[current].type === TokenType.COMPOSE) { + current++; // Skip 'via' + const right = parseFactor(); + + left = { + type: 'FunctionCall', + name: 'compose', + args: [left, right] + }; + } + + return left; + } + + /** + * Parse function application (juxtaposition) + * + * @returns {Object} AST node representing the function application + * @description Parses function application using juxtaposition (f x) + * with left-associative precedence: f g x = apply(apply(f, g), x) + * + * Function application using juxtaposition is the primary mechanism for + * calling functions in the language. The left-associative precedence means + * that application chains are built from left to right, which is intuitive + * for most programmers. This approach eliminates the need for parentheses + * in many cases while maintaining clear precedence rules. + */ + function parseApplication() { + let left = parseComposition(); + + // Parse left-associative function application: f g x = apply(apply(f, g), x) + while (current < tokens.length && isValidArgumentStart(tokens[current])) { + const arg = parseComposition(); // Parse the argument as a composition expression + left = { + type: 'FunctionCall', + name: 'apply', + args: [left, arg] + }; + } + + return left; + } + + /** + * Check if a token is a valid start of a function argument + * + * @param {Object} token - Token to check + * @returns {boolean} True if the token can start a function argument + * @description Determines if a token can be the start of a function argument. + * This is used to detect function application (juxtaposition) where function + * application binds tighter than infix operators. + * + * This function is crucial for the juxtaposition-based function application + * system. It determines when the parser should treat an expression as a + * function argument rather than as part of an infix operator expression. + * The tokens that can start arguments are carefully chosen to ensure that + * function application has the correct precedence relative to operators. + */ + function isValidArgumentStart(token) { + return token.type === TokenType.IDENTIFIER || + token.type === TokenType.NUMBER || + token.type === TokenType.STRING || + token.type === TokenType.LEFT_PAREN || + token.type === TokenType.LEFT_BRACE || + token.type === TokenType.TRUE || + token.type === TokenType.FALSE || + token.type === TokenType.FUNCTION_REF || + token.type === TokenType.MINUS || + token.type === TokenType.NOT; + } + + /** * Parse table literals: {key: value, key2: value2} or {value1, value2, value3} * * @returns {Object} TableLiteral AST node @@ -794,45 +930,23 @@ export function parser(tokens) { }; } - // Parse function call arguments (including parenthesized expressions) - const args = []; - while ( - current < tokens.length && - ( - tokens[current].type === TokenType.IDENTIFIER || - tokens[current].type === TokenType.NUMBER || - tokens[current].type === TokenType.STRING || - tokens[current].type === TokenType.LEFT_PAREN || - tokens[current].type === TokenType.LEFT_BRACE || - tokens[current].type === TokenType.TRUE || - tokens[current].type === TokenType.FALSE || - tokens[current].type === TokenType.FUNCTION_REF || - (tokens[current].type === TokenType.MINUS && - current + 1 < tokens.length && - tokens[current + 1].type === TokenType.NUMBER) - ) - ) { - // Special case: if we see FUNCTION_REF followed by MINUS followed by NUMBER, - // parse them as separate arguments - if (tokens[current].type === TokenType.FUNCTION_REF && - current + 1 < tokens.length && tokens[current + 1].type === TokenType.MINUS && - current + 2 < tokens.length && tokens[current + 2].type === TokenType.NUMBER) { - // Parse the function reference - args.push(parsePrimary()); - // Parse the unary minus as a separate argument - args.push(parsePrimary()); - } else { - // Parse each argument as a complete expression - args.push(parseExpression()); + // Parenthesized expressions are handled as simple grouping, not function calls + // This maintains consistency with the juxtaposition pattern + if (current < tokens.length && tokens[current].type === TokenType.LEFT_PAREN) { + current++; // consume '(' + + // Parse the expression inside parentheses + const expression = parseLogicalExpression(); + + if (current >= tokens.length || tokens[current].type !== TokenType.RIGHT_PAREN) { + throw new Error('Expected ")" after expression'); } + current++; // consume ')' + + return expression; } - if (args.length > 0) { - return { - type: 'FunctionCall', - name: identifierValue, - args - }; - } + + // Juxtaposition function calls are now handled in parseFactor() with proper precedence return { type: 'Identifier', value: identifierValue }; case TokenType.LEFT_PAREN: @@ -845,6 +959,16 @@ export function parser(tokens) { throw new Error('Expected ")" after expression'); } current++; + + // Check if this is just a simple identifier in parentheses + if (expression.type === 'Identifier') { + return { + type: 'FunctionCall', + name: 'identity', + args: [expression] + }; + } + return expression; case TokenType.WILDCARD: @@ -856,7 +980,7 @@ export function parser(tokens) { - case TokenType.NOT: + case TokenType.NOT: current++; const operand = parsePrimary(); return { @@ -866,13 +990,8 @@ export function parser(tokens) { }; case TokenType.MINUS: - current++; - const unaryOperand = parsePrimary(); - return { - type: 'FunctionCall', - name: 'negate', - args: [unaryOperand] - }; + // Delegate unary minus to parseExpression for proper precedence + return parseExpression(); case TokenType.ARROW: current++; @@ -880,7 +999,7 @@ export function parser(tokens) { return { type: 'ArrowExpression', body: arrowBody }; case TokenType.FUNCTION_REF: - const functionRef = { type: 'FunctionReference', name: token.name }; + const functionRef = { type: 'FunctionReference', name: tokens[current].name }; current++; return functionRef; |