* HEAD master

author: elioat <elioat@tilde.institute> 2025-07-27 12:28:44 -0400
committer: elioat <elioat@tilde.institute> 2025-07-27 12:28:44 -0400
commit: 59a4a649ad3038857dd091dcc69ca86bf993b3f6 (patch)
tree: 7db1cbbbbda88fb268ce91175bad2a31014c1d27 /js/scripting-lang/parser.js
parent: 83ece970842db4ca0a619a0392a98b2b7445e482 (diff)
download: tour-master.tar.gz
1 files changed, 188 insertions, 69 deletions
diff --git a/js/scripting-lang/parser.js b/js/scripting-lang/parser.js
index b1aa77f..73079c0 100644
--- a/js/scripting-lang/parser.js
+++ b/js/scripting-lang/parser.js
@@ -26,10 +26,14 @@ import { TokenType } from './lexer.js';
  * - Function calls are detected by looking for identifiers followed by expressions
  * - When expressions and case patterns are parsed with special handling
  * - Table literals and access are parsed as structured data
+ * - Function composition uses 'via' keyword with right-associative precedence
+ * - Function application uses juxtaposition with left-associative precedence
  * 
  * The parser maintains a current token index and advances through the token
  * stream, building the AST bottom-up from primary expressions to complex
- * logical expressions.
+ * logical expressions. This approach ensures that all operations are consistently
+ * represented as function calls, enabling the interpreter to use the combinator
+ * foundation for execution.
  */
 export function parser(tokens) {
     let current = 0;
@@ -40,6 +44,11 @@ export function parser(tokens) {
      * @returns {Object} Complete AST with program body
      * @description Iterates through all tokens, parsing each statement or expression
      * and building the program body. Handles empty programs gracefully.
+     * 
+     * This function orchestrates the parsing process by repeatedly calling walk()
+     * until all tokens are consumed. It ensures that the final AST contains all
+     * statements and expressions in the correct order, ready for interpretation
+     * by the combinator-based interpreter.
      */
     function parse() {
         const body = [];
@@ -68,6 +77,12 @@ export function parser(tokens) {
      * 3. When expressions (pattern matching)
      * 4. Function definitions (explicit function declarations)
      * 5. Logical expressions (default case for all other expressions)
+     * 
+     * This function implements the top-level parsing strategy by checking for
+     * specific token patterns that indicate different language constructs.
+     * The order of checks is crucial for correct parsing precedence and
+     * ensures that complex expressions are properly decomposed into their
+     * constituent parts for combinator translation.
      */
     function walk() {
         const token = tokens[current];
@@ -233,23 +248,9 @@ export function parser(tokens) {
             // but not treat them as function calls
             let value;
             if (tokens[current].type === TokenType.IDENTIFIER) {
-                // Check if this is followed by another identifier (multi-value case)
-                if (current + 1 < tokens.length && 
-                    tokens[current + 1].type === TokenType.IDENTIFIER && 
-                    tokens[current + 2].type === TokenType.IS) {
-                    // This is a multi-value case like "when x y is"
-                    value = { type: 'Identifier', value: tokens[current].value };
-                    current++;
-                    values.push(value);
-                    value = { type: 'Identifier', value: tokens[current].value };
-                    current++;
-                    values.push(value);
-                    break; // We've consumed both values and will hit IS next
-                } else {
-                    // Single identifier value
-                    value = { type: 'Identifier', value: tokens[current].value };
-                    current++;
-                }
+                // Single identifier value
+                value = { type: 'Identifier', value: tokens[current].value };
+                current++;
             } else {
                 // For other types, use normal expression parsing
                 value = parseLogicalExpression();
@@ -274,7 +275,19 @@ export function parser(tokens) {
                 if (process.env.DEBUG) {
                     console.log(`[DEBUG] parseWhenExpression: parsing pattern, current token = ${tokens[current].type}, value = ${tokens[current].value || 'N/A'}`);
                 }
-                if (tokens[current].type === TokenType.IDENTIFIER) {
+                
+                // Check if this is a comparison expression (starts with identifier followed by comparison operator)
+                if (tokens[current].type === TokenType.IDENTIFIER && 
+                    current + 1 < tokens.length &&
+                    (tokens[current + 1].type === TokenType.LESS_THAN ||
+                     tokens[current + 1].type === TokenType.GREATER_THAN ||
+                     tokens[current + 1].type === TokenType.LESS_EQUAL ||
+                     tokens[current + 1].type === TokenType.GREATER_EQUAL ||
+                     tokens[current + 1].type === TokenType.EQUALS ||
+                     tokens[current + 1].type === TokenType.NOT_EQUAL)) {
+                    // Parse as a comparison expression
+                    pattern = parseExpression();
+                } else if (tokens[current].type === TokenType.IDENTIFIER) {
                     pattern = { type: 'Identifier', value: tokens[current].value };
                     current++;
                 } else if (tokens[current].type === TokenType.NUMBER) {
@@ -289,10 +302,25 @@ export function parser(tokens) {
                 } else if (tokens[current].type === TokenType.FUNCTION_REF) {
                     pattern = { type: 'FunctionReference', name: tokens[current].name };
                     current++;
+                } else if (tokens[current].type === TokenType.TRUE) {
+                    pattern = { type: 'BooleanLiteral', value: true };
+                    current++;
+                } else if (tokens[current].type === TokenType.FALSE) {
+                    pattern = { type: 'BooleanLiteral', value: false };
+                    current++;
                 } else {
-                    throw new Error(`Expected pattern (identifier, number, string, wildcard, or function reference) in when expression, got ${tokens[current].type}`);
+                    throw new Error(`Expected pattern (identifier, number, string, wildcard, function reference, boolean, or comparison) in when expression, got ${tokens[current].type}`);
                 }
                 patterns.push(pattern);
+                
+                // If we have multiple patterns, we need to handle them correctly
+                // Check if the next token is a valid pattern start (not THEN)
+                if (current < tokens.length && 
+                    tokens[current].type !== TokenType.THEN &&
+                    tokens[current].type !== TokenType.SEMICOLON) {
+                    // Continue parsing more patterns
+                    continue;
+                }
             }
             
             if (current >= tokens.length || tokens[current].type !== TokenType.THEN) {
@@ -506,6 +534,17 @@ export function parser(tokens) {
      * executed by the interpreter using standard library combinators.
      */
     function parseExpression() {
+        // Handle unary minus at the beginning of expressions
+        if (current < tokens.length && tokens[current].type === TokenType.MINUS) {
+            current++;
+            const operand = parseTerm();
+            return {
+                type: 'FunctionCall',
+                name: 'negate',
+                args: [operand]
+            };
+        }
+        
         let left = parseTerm();
         
         while (current < tokens.length) {
@@ -515,12 +554,20 @@ export function parser(tokens) {
                 console.log(`[DEBUG] parseExpression: current token = ${token.type}, value = ${token.value || 'N/A'}`);
             }
             
-            if (token.type === TokenType.PLUS || token.type === TokenType.MINUS) {
+            if (token.type === TokenType.PLUS) {
                 current++;
                 const right = parseTerm();
                 left = {
                     type: 'FunctionCall',
-                    name: token.type === TokenType.PLUS ? 'add' : 'subtract',
+                    name: 'add',
+                    args: [left, right]
+                };
+            } else if (token.type === TokenType.MINUS) {
+                current++;
+                const right = parseTerm();
+                left = {
+                    type: 'FunctionCall',
+                    name: 'subtract',
                     args: [left, right]
                 };
             } else if (token.type === TokenType.EQUALS || 
@@ -557,7 +604,7 @@ export function parser(tokens) {
      * FunctionCall nodes using the corresponding combinator functions.
      */
     function parseTerm() {
-        let left = parseFactor();
+        let left = parseApplication();
         
         while (current < tokens.length) {
             const token = tokens[current];
@@ -592,6 +639,7 @@ export function parser(tokens) {
     function parseFactor() {
         let left = parsePrimary();
         
+        // Parse power expressions (existing logic)
         while (current < tokens.length) {
             const token = tokens[current];
             
@@ -612,6 +660,94 @@ export function parser(tokens) {
     }
     
     /**
+     * Parse function composition expressions
+     * 
+     * @returns {Object} AST node representing the composition expression
+     * @description Parses function composition using the 'via' keyword
+     * with right-associative precedence: f via g via h = compose(f, compose(g, h))
+     * 
+     * Function composition is a fundamental feature that allows functions to be
+     * combined naturally. The right-associative precedence means that composition
+     * chains are built from right to left, which matches mathematical function
+     * composition notation. This enables powerful functional programming patterns
+     * where complex transformations can be built from simple, composable functions.
+     */
+    function parseComposition() {
+        let left = parseFactor();
+        
+        // Parse right-associative composition: f via g via h = compose(f, compose(g, h))
+        while (current < tokens.length && tokens[current].type === TokenType.COMPOSE) {
+            current++; // Skip 'via'
+            const right = parseFactor();
+            
+            left = {
+                type: 'FunctionCall',
+                name: 'compose',
+                args: [left, right]
+            };
+        }
+        
+        return left;
+    }
+    
+    /**
+     * Parse function application (juxtaposition)
+     * 
+     * @returns {Object} AST node representing the function application
+     * @description Parses function application using juxtaposition (f x)
+     * with left-associative precedence: f g x = apply(apply(f, g), x)
+     * 
+     * Function application using juxtaposition is the primary mechanism for
+     * calling functions in the language. The left-associative precedence means
+     * that application chains are built from left to right, which is intuitive
+     * for most programmers. This approach eliminates the need for parentheses
+     * in many cases while maintaining clear precedence rules.
+     */
+    function parseApplication() {
+        let left = parseComposition();
+        
+        // Parse left-associative function application: f g x = apply(apply(f, g), x)
+        while (current < tokens.length && isValidArgumentStart(tokens[current])) {
+            const arg = parseComposition(); // Parse the argument as a composition expression
+            left = {
+                type: 'FunctionCall',
+                name: 'apply',
+                args: [left, arg]
+            };
+        }
+        
+        return left;
+    }
+    
+    /**
+     * Check if a token is a valid start of a function argument
+     * 
+     * @param {Object} token - Token to check
+     * @returns {boolean} True if the token can start a function argument
+     * @description Determines if a token can be the start of a function argument.
+     * This is used to detect function application (juxtaposition) where function
+     * application binds tighter than infix operators.
+     * 
+     * This function is crucial for the juxtaposition-based function application
+     * system. It determines when the parser should treat an expression as a
+     * function argument rather than as part of an infix operator expression.
+     * The tokens that can start arguments are carefully chosen to ensure that
+     * function application has the correct precedence relative to operators.
+     */
+    function isValidArgumentStart(token) {
+        return token.type === TokenType.IDENTIFIER ||
+               token.type === TokenType.NUMBER ||
+               token.type === TokenType.STRING ||
+               token.type === TokenType.LEFT_PAREN ||
+               token.type === TokenType.LEFT_BRACE ||
+               token.type === TokenType.TRUE ||
+               token.type === TokenType.FALSE ||
+               token.type === TokenType.FUNCTION_REF ||
+               token.type === TokenType.MINUS ||
+               token.type === TokenType.NOT;
+    }
+    
+    /**
      * Parse table literals: {key: value, key2: value2} or {value1, value2, value3}
      * 
      * @returns {Object} TableLiteral AST node
@@ -794,45 +930,23 @@ export function parser(tokens) {
                     };
                 }
                 
-                // Parse function call arguments (including parenthesized expressions)
-                const args = [];
-                while (
-                    current < tokens.length &&
-                    (
-                        tokens[current].type === TokenType.IDENTIFIER ||
-                        tokens[current].type === TokenType.NUMBER ||
-                        tokens[current].type === TokenType.STRING ||
-                        tokens[current].type === TokenType.LEFT_PAREN ||
-                        tokens[current].type === TokenType.LEFT_BRACE ||
-                        tokens[current].type === TokenType.TRUE ||
-                        tokens[current].type === TokenType.FALSE ||
-                        tokens[current].type === TokenType.FUNCTION_REF ||
-                        (tokens[current].type === TokenType.MINUS && 
-                         current + 1 < tokens.length && 
-                         tokens[current + 1].type === TokenType.NUMBER)
-                    )
-                ) {
-                    // Special case: if we see FUNCTION_REF followed by MINUS followed by NUMBER,
-                    // parse them as separate arguments
-                    if (tokens[current].type === TokenType.FUNCTION_REF &&
-                        current + 1 < tokens.length && tokens[current + 1].type === TokenType.MINUS &&
-                        current + 2 < tokens.length && tokens[current + 2].type === TokenType.NUMBER) {
-                        // Parse the function reference
-                        args.push(parsePrimary());
-                        // Parse the unary minus as a separate argument
-                        args.push(parsePrimary());
-                    } else {
-                        // Parse each argument as a complete expression
-                        args.push(parseExpression());
+                // Parenthesized expressions are handled as simple grouping, not function calls
+                // This maintains consistency with the juxtaposition pattern
+                if (current < tokens.length && tokens[current].type === TokenType.LEFT_PAREN) {
+                    current++; // consume '('
+                    
+                    // Parse the expression inside parentheses
+                    const expression = parseLogicalExpression();
+                    
+                    if (current >= tokens.length || tokens[current].type !== TokenType.RIGHT_PAREN) {
+                        throw new Error('Expected ")" after expression');
                     }
+                    current++; // consume ')'
+                    
+                    return expression;
                 }
-                if (args.length > 0) {
-                    return {
-                        type: 'FunctionCall',
-                        name: identifierValue,
-                        args
-                    };
-                }
+                
+                // Juxtaposition function calls are now handled in parseFactor() with proper precedence
                 return { type: 'Identifier', value: identifierValue };
 
             case TokenType.LEFT_PAREN:
@@ -845,6 +959,16 @@ export function parser(tokens) {
                     throw new Error('Expected ")" after expression');
                 }
                 current++;
+                
+                // Check if this is just a simple identifier in parentheses
+                if (expression.type === 'Identifier') {
+                    return { 
+                        type: 'FunctionCall', 
+                        name: 'identity', 
+                        args: [expression] 
+                    };
+                }
+                
                 return expression;
 
             case TokenType.WILDCARD:
@@ -856,7 +980,7 @@ export function parser(tokens) {
                 
 
                 
-            case TokenType.NOT:
+                                                case TokenType.NOT:
                 current++;
                 const operand = parsePrimary();
                 return { 
@@ -866,13 +990,8 @@ export function parser(tokens) {
                 };
                 
             case TokenType.MINUS:
-                current++;
-                const unaryOperand = parsePrimary();
-                return { 
-                    type: 'FunctionCall',
-                    name: 'negate',
-                    args: [unaryOperand]
-                };
+                // Delegate unary minus to parseExpression for proper precedence
+                return parseExpression();
                 
             case TokenType.ARROW:
                 current++;
@@ -880,7 +999,7 @@ export function parser(tokens) {
                 return { type: 'ArrowExpression', body: arrowBody };
                 
             case TokenType.FUNCTION_REF:
-                const functionRef = { type: 'FunctionReference', name: token.name };
+                const functionRef = { type: 'FunctionReference', name: tokens[current].name };
                 current++;
                 return functionRef;
author	elioat <elioat@tilde.institute>	2025-07-27 12:28:44 -0400
committer	elioat <elioat@tilde.institute>	2025-07-27 12:28:44 -0400
commit	59a4a649ad3038857dd091dcc69ca86bf993b3f6 (patch)
tree	7db1cbbbbda88fb268ce91175bad2a31014c1d27 /js/scripting-lang/parser.js
parent	83ece970842db4ca0a619a0392a98b2b7445e482 (diff)
download	tour-master.tar.gz