// parser.js import { tokenTypes } from './lexer.js'; import { ParseError, ErrorHelpers } from './error.js'; function createParser(tokens, debugMode = false, source = '') { let position = 0; function log(...args) { if (debugMode) { console.log(...args); } } function peek() { const token = tokens[position]; return token; } function peek2() { return tokens[position + 1] || { type: tokenTypes.EOF }; } function consume(type, value) { const token = peek(); if (type && token.type !== type) { throw ErrorHelpers.unexpectedToken(type, token.type, token, source); } if (value && token.value !== value) { const suggestions = []; if (value === 'then' && token.value === 'than') { suggestions.push('Use "then" not "than" in when expressions'); } else if (value === 'is' && token.value === 'in') { suggestions.push('Use "is" not "in" for pattern matching'); } throw new ParseError( `Expected "${value}" but got "${token.value}"`, { line: token.line, column: token.column, length: token.value?.length || 1 }, source, suggestions ); } position++; return token; } function parseStatement() { const token = peek(); let result; if (token.type === tokenTypes.IDENTIFIER && tokens[position + 1].type === tokenTypes.TYPE) { result = parseTypeDeclaration(); } else if (token.type === tokenTypes.IDENTIFIER && tokens[position + 1].type === tokenTypes.COLON) { // Look ahead to distinguish between function and variable declaration let isFunctionDeclaration = false; let lookAheadPos = position + 2; // After IDENTIFIER and COLON if (tokens[lookAheadPos].type === tokenTypes.LPAREN) { let parenPos = lookAheadPos + 1; let hasTypedParams = false; // Case 1: typed parameters present if (parenPos < tokens.length && tokens[parenPos].type === tokenTypes.IDENTIFIER && parenPos + 1 < tokens.length && tokens[parenPos + 1].type === tokenTypes.COLON && parenPos + 2 < tokens.length && tokens[parenPos + 2].type === tokenTypes.TYPE) { hasTypedParams = true; } // Case 2: empty parameter list followed by return annotation/body e.g. () -> Type -> ... const emptyParamsThenArrow = (tokens[parenPos] && tokens[parenPos].type === tokenTypes.RPAREN && tokens[parenPos + 1] && tokens[parenPos + 1].type === tokenTypes.ARROW); if (hasTypedParams || emptyParamsThenArrow) { isFunctionDeclaration = true; } } else { while (lookAheadPos < tokens.length && tokens[lookAheadPos].type === tokenTypes.IDENTIFIER) { lookAheadPos++; } if (lookAheadPos < tokens.length && tokens[lookAheadPos].type === tokenTypes.ARROW) { isFunctionDeclaration = true; } } if (isFunctionDeclaration) { result = parseFunctionDeclaration(); } else { result = parseVariableDeclaration(); } } else { result = parseExpression(); } // Consume a trailing semicolon if present. Do not force it. if (peek().type === tokenTypes.SEMICOLON) { consume(tokenTypes.SEMICOLON); } return result; } function parseTypeDeclaration() { const name = consume(tokenTypes.IDENTIFIER).value; const type = consume(tokenTypes.TYPE).value; return { type: 'TypeDeclaration', name, typeAnnotation: type }; } function parseVariableDeclaration() { const name = consume(tokenTypes.IDENTIFIER).value; consume(tokenTypes.COLON); const value = parseExpression(); return { type: 'VariableDeclaration', name, value }; } function parseFunctionDeclaration() { const name = consume(tokenTypes.IDENTIFIER).value; consume(tokenTypes.COLON); // Check if we have typed parameters (enclosed in parentheses) let params = []; let returnType = null; if (peek().type === tokenTypes.LPAREN) { // Look ahead to determine if this is curried syntax: (x: Type) -> (Type -> Type) -> body // vs multi-param syntax: (x: Type, y: Type) -> ReturnType -> body const startPos = position; consume(tokenTypes.LPAREN); // Parse first parameter to check for single-param curried syntax if (peek().type === tokenTypes.IDENTIFIER) { const paramName = consume(tokenTypes.IDENTIFIER).value; if (peek().type === tokenTypes.COLON) { consume(tokenTypes.COLON); const paramType = parseType(); // Check if this is single-param curried: (x: Type) -> (Type -> Type) if (peek().type === tokenTypes.RPAREN && tokens[position + 1] && tokens[position + 1].type === tokenTypes.ARROW && tokens[position + 2] && tokens[position + 2].type === tokenTypes.LPAREN) { consume(tokenTypes.RPAREN); consume(tokenTypes.ARROW); // Parse function return type: (Type -> Type) const functionReturnType = parseType(); consume(tokenTypes.ARROW); // Extract the final return type from nested function types const finalReturnType = extractFinalReturnType(functionReturnType); // Parse curried body const body = parseCurriedFunctionBody(finalReturnType); return { type: 'CurriedFunctionDeclaration', name, param: { name: paramName, type: paramType }, returnType: functionReturnType, body }; } } } // Reset position and parse as multi-parameter function (existing behavior) position = startPos; consume(tokenTypes.LPAREN); params = parseTypedParameters(); consume(tokenTypes.RPAREN); // Parse return type if present if (peek().type === tokenTypes.ARROW) { consume(tokenTypes.ARROW); if (peek().type === tokenTypes.TYPE) { returnType = consume(tokenTypes.TYPE).value; } } } else { // Untyped function: x y -> body (backward compatibility) while (peek().type === tokenTypes.IDENTIFIER) { params.push(parseIdentifier()); } } // Parse the arrow and body if (peek().type === tokenTypes.ARROW) { consume(tokenTypes.ARROW); // Optional header with-clause if (peek().type === tokenTypes.IDENTIFIER && peek().value === 'with') { const body = parseWithHeader(); return { type: 'FunctionDeclaration', name, params, body, returnType }; } // Handle currying: if another arrow is present, it's a nested function if (peek().type === tokenTypes.IDENTIFIER && tokens[position + 1].type === tokenTypes.ARROW) { const body = parseFunctionDeclarationBody(returnType); return { type: 'FunctionDeclaration', name, params, body, returnType }; } else { const body = parseExpression(); return { type: 'FunctionDeclaration', name, params, body, returnType }; } } else { throw ErrorHelpers.unexpectedToken('ARROW', peek().type, peek(), source); } } // Parse type expressions including function types function parseType() { if (peek().type === tokenTypes.LPAREN) { // Function type: (Type1, Type2) -> ReturnType or (Type1 -> Type2) consume(tokenTypes.LPAREN); // Check if this is a single parameter function type: (Type -> Type) if (peek().type === tokenTypes.TYPE) { const firstType = parseType(); if (peek().type === tokenTypes.ARROW) { consume(tokenTypes.ARROW); const returnType = parseType(); consume(tokenTypes.RPAREN); return { type: 'FunctionType', paramTypes: [firstType], returnType }; } else { // Multi-parameter function type: (Type1, Type2) -> ReturnType const paramTypes = [firstType]; while (peek().type === tokenTypes.COMMA) { consume(tokenTypes.COMMA); paramTypes.push(parseType()); } consume(tokenTypes.RPAREN); consume(tokenTypes.ARROW); const returnType = parseType(); return { type: 'FunctionType', paramTypes, returnType }; } } else { throw ErrorHelpers.unexpectedToken('TYPE', peek().type, peek(), source); } } else if (peek().type === tokenTypes.TYPE) { return { type: 'PrimitiveType', name: consume(tokenTypes.TYPE).value }; } else { throw ErrorHelpers.unexpectedToken('TYPE', peek().type, peek(), source); } } // Helper function to extract the final return type from nested function types function extractFinalReturnType(type) { if (type && type.type === 'FunctionType') { return extractFinalReturnType(type.returnType); } return type; } // Parse typed parameters: x: Int, y: String function parseTypedParameters() { const params = []; while (peek().type === tokenTypes.IDENTIFIER) { const paramName = consume(tokenTypes.IDENTIFIER).value; if (peek().type === tokenTypes.COLON) { consume(tokenTypes.COLON); const paramType = parseType(); params.push({ name: paramName, type: paramType }); } else { // Untyped parameter (for backward compatibility) params.push({ name: paramName, type: null }); } // Check for comma separator (tolerate legacy OPERATOR ',') if (peek().type === tokenTypes.COMMA || (peek().type === tokenTypes.OPERATOR && peek().value === ',')) { if (peek().type === tokenTypes.COMMA) { consume(tokenTypes.COMMA); } else { consume(tokenTypes.OPERATOR); } } else if (peek().type !== tokenTypes.RPAREN) { break; // No comma and not closing paren, so end of parameters } } return params; } // Parse curried function body for new typed curried syntax function parseCurriedFunctionBody(finalReturnType = null) { // Parse remaining curried parameters and body const params = []; // Parse untyped parameters in curried chain while (peek().type === tokenTypes.IDENTIFIER && tokens[position + 1].type === tokenTypes.ARROW) { params.push(parseIdentifier()); consume(tokenTypes.ARROW); // Consume the arrow after each parameter } // Parse the final expression or with-header let body; if (peek().type === tokenTypes.IDENTIFIER && peek().value === 'with') { body = parseWithHeader(); } else { body = parseExpression(); } return { type: 'CurriedFunctionBody', params, body, returnType: finalReturnType }; } // Helper function to parse the body of a nested function for currying function parseFunctionDeclarationBody(parentReturnType = null) { let params = []; let returnType = parentReturnType; // Check if we have typed parameters if (peek().type === tokenTypes.LPAREN) { consume(tokenTypes.LPAREN); params = parseTypedParameters(); consume(tokenTypes.RPAREN); // Parse return type if present if (peek().type === tokenTypes.ARROW) { consume(tokenTypes.ARROW); if (peek().type === tokenTypes.TYPE) { returnType = consume(tokenTypes.TYPE).value; } } } else { // Untyped parameters (backward compatibility) while (peek().type === tokenTypes.IDENTIFIER) { params.push(parseIdentifier()); } } consume(tokenTypes.ARROW); let body; // Optional header with-clause if (peek().type === tokenTypes.IDENTIFIER && peek().value === 'with') { body = parseWithHeader(); return { type: 'FunctionDeclarationBody', params, body, returnType }; } if (peek().type === tokenTypes.IDENTIFIER && tokens[position + 1].type === tokenTypes.ARROW) { body = parseFunctionDeclarationBody(returnType); } else { body = parseExpression(); } return { type: 'FunctionDeclarationBody', params, body, returnType }; } // Parse a with-header: with (entries...) -> body function parseWithHeader() { const withToken = consume(tokenTypes.IDENTIFIER); if (withToken.value !== 'with') { throw new ParseError( `Expected 'with' but got '${withToken.value}'`, { line: withToken.line, column: withToken.column, length: withToken.value?.length || 1 }, source, ['Use "with" to define local bindings', 'Check syntax for local variable declarations'] ); } let recursive = false; if (peek().type === tokenTypes.IDENTIFIER && (peek().value === 'rec' || peek().value === 'recursion')) { consume(tokenTypes.IDENTIFIER); recursive = true; } consume(tokenTypes.LPAREN); const entries = []; while (peek().type !== tokenTypes.RPAREN) { if (peek().type === tokenTypes.SEMICOLON) { consume(tokenTypes.SEMICOLON); continue; } const name = consume(tokenTypes.IDENTIFIER).value; if (peek().type === tokenTypes.COLON) { // Assignment: name : expr; (supports arrow-literal: params -> body) consume(tokenTypes.COLON); // Look ahead to see if this is an arrow function literal like: x y -> body let lookAheadPos = position; let sawParams = false; while (lookAheadPos < tokens.length && tokens[lookAheadPos].type === tokenTypes.IDENTIFIER) { sawParams = true; lookAheadPos++; } const isArrow = (lookAheadPos < tokens.length && tokens[lookAheadPos].type === tokenTypes.ARROW); if (sawParams && isArrow) { // Parse inline arrow function literal const params = []; while (peek().type === tokenTypes.IDENTIFIER && tokens[position + 1].type !== tokenTypes.ARROW) { params.push(consume(tokenTypes.IDENTIFIER).value); } if (peek().type === tokenTypes.IDENTIFIER) { params.push(consume(tokenTypes.IDENTIFIER).value); } consume(tokenTypes.ARROW); const body = parseExpression(true, [tokenTypes.SEMICOLON, tokenTypes.RPAREN]); const value = { type: 'AnonymousFunction', params, body }; entries.push({ type: 'WithAssign', name, value }); if (peek().type === tokenTypes.SEMICOLON) consume(tokenTypes.SEMICOLON); } else { // Check if this is a when expression - if so, parse it completely let value; if (peek().type === tokenTypes.KEYWORD && peek().value === 'when') { // For when expressions, we need to parse them completely // They have their own termination logic value = parseWhenExpression(); // After parsing when expression, consume semicolon if present if (peek().type === tokenTypes.SEMICOLON) consume(tokenTypes.SEMICOLON); } else { // For other expressions, use the standard termination logic value = parseExpression(true, [tokenTypes.SEMICOLON, tokenTypes.RPAREN]); if (peek().type === tokenTypes.SEMICOLON) consume(tokenTypes.SEMICOLON); } entries.push({ type: 'WithAssign', name, value }); } } else { // Type decl: name Type; (Type can be primitive or function type) const typeAnnotation = parseType(); entries.push({ type: 'WithTypeDecl', name, typeAnnotation }); if (peek().type === tokenTypes.SEMICOLON) { consume(tokenTypes.SEMICOLON); } } } consume(tokenTypes.RPAREN); consume(tokenTypes.ARROW); const body = parseExpression(); return { type: 'WithHeader', recursive, entries, body }; } // Operator precedence (higher number = higher precedence) function getOperatorPrecedence(operator) { switch (operator) { case '..': return 1; // String concatenation (lowest) case 'or': return 2; // Logical OR case 'and': return 3; // Logical AND case 'xor': return 4; // XOR case '=': case '!=': case '>': case '<': case '>=': case '<=': return 5; // Comparison case '+': case '-': return 6; // Addition/Subtraction case '*': case '/': case '%': return 7; // Multiplication/Division (highest) default: return 0; } } function parseExpression(allowFunctionCalls = true, endTokens = [tokenTypes.EOF, tokenTypes.SEMICOLON]) { // Check if we've hit a pattern marker before parsing if (isNextPattern()) { // Return an empty expression if we hit a pattern marker return { type: 'Identifier', name: 'undefined' }; } return parseExpressionWithPrecedence(allowFunctionCalls, endTokens, 0); } function parseConsequentExpression() { // A consequent ends at a semicolon, EOF, or a keyword that starts a new pattern. return parseExpression(true, [tokenTypes.SEMICOLON, tokenTypes.EOF, tokenTypes.KEYWORD]); } function parseExpressionForDiscriminant() { // Parse expression for when discriminant, allowing logical operators // Stop only at 'is' keyword, not all keywords let expr = parsePrimary(true); while (true) { const nextToken = peek(); // Stop if we hit 'is' keyword if (nextToken.type === tokenTypes.KEYWORD && nextToken.value === 'is') { break; } // Stop at end tokens if (nextToken.type === tokenTypes.SEMICOLON || nextToken.type === tokenTypes.EOF || nextToken.type === tokenTypes.RPAREN) { break; } // Handle operators if (nextToken.type === tokenTypes.OPERATOR) { const operator = nextToken.value; const precedence = getOperatorPrecedence(operator); consume(tokenTypes.OPERATOR); const right = parseExpressionWithPrecedence(true, [tokenTypes.SEMICOLON, tokenTypes.EOF, tokenTypes.RPAREN], precedence + 1); expr = { type: 'BinaryExpression', operator, left: expr, right }; } else if (nextToken.type === tokenTypes.KEYWORD && ['and', 'or', 'xor'].includes(nextToken.value)) { // Handle logical operators const operator = nextToken.value; const precedence = getOperatorPrecedence(operator); consume(tokenTypes.KEYWORD); const right = parseExpressionWithPrecedence(true, [tokenTypes.SEMICOLON, tokenTypes.EOF, tokenTypes.RPAREN], precedence + 1); expr = { type: 'BinaryExpression', operator, left: expr, right }; } else { break; } } return expr; } function isNextPattern() { // Check if the next tokens form a pattern for the next when case const token = peek(); if (!token) return false; // Wildcard pattern if (token.type === tokenTypes.IDENTIFIER && token.value === '_') { return true; } // Number pattern followed by 'then' if (token.type === tokenTypes.NUMBER) { const nextToken = tokens[position + 1]; if (nextToken && nextToken.type === tokenTypes.KEYWORD && nextToken.value === 'then') { return true; } } // String pattern followed by 'then' if (token.type === tokenTypes.STRING) { const nextToken = tokens[position + 1]; if (nextToken && nextToken.type === tokenTypes.KEYWORD && nextToken.value === 'then') { return true; } } // Type pattern followed by 'then' if (token.type === tokenTypes.TYPE) { const nextToken = tokens[position + 1]; if (nextToken && nextToken.type === tokenTypes.KEYWORD && nextToken.value === 'then') { return true; } } return false; } function isPatternMarker() { // Check if the current token is a pattern marker that should stop function argument parsing const token = peek(); if (!token) return false; // Wildcard pattern - always a pattern marker if (token.type === tokenTypes.IDENTIFIER && token.value === '_') { return true; } return false; } function parseExpressionWithPrecedence(allowFunctionCalls, endTokens, minPrecedence) { let left = parsePrimary(allowFunctionCalls); while (true) { const nextToken = peek(); if (endTokens.includes(nextToken.type) || nextToken.type === tokenTypes.EOF) { break; } if (nextToken.type === tokenTypes.OPERATOR) { const operator = nextToken.value; const precedence = getOperatorPrecedence(operator); // If this operator has lower precedence than minimum, stop if (precedence < minPrecedence) { break; } consume(tokenTypes.OPERATOR); // Consume the operator // Parse right side with higher precedence (left associative) const right = parseExpressionWithPrecedence(allowFunctionCalls, endTokens, precedence + 1); left = { type: 'BinaryExpression', operator, left, right }; } else if (nextToken.type === tokenTypes.KEYWORD && ['and', 'or', 'xor'].includes(nextToken.value)) { // Handle text-based logical operators const operator = nextToken.value; const precedence = getOperatorPrecedence(operator); // If this operator has lower precedence than minimum, stop if (precedence < minPrecedence) { break; } consume(tokenTypes.KEYWORD); // Consume the keyword // Parse right side with higher precedence (left associative) const right = parseExpressionWithPrecedence(allowFunctionCalls, endTokens, precedence + 1); left = { type: 'BinaryExpression', operator, left, right }; } else { break; // No operator, so end of expression } } return left; } function parsePrimary(allowFunctionCalls = true) { const token = peek(); if (token.type === tokenTypes.NUMBER) { return parseNumber(); } else if (token.type === tokenTypes.STRING) { return parseString(); } else if (token.type === tokenTypes.IDENTIFIER) { let identifier = parseIdentifier(); while (peek().type === tokenTypes.DOT) { consume(tokenTypes.DOT); const property = parsePrimary(false); // Allow number or string literals as properties identifier = { type: 'MemberExpression', object: identifier, property: property }; } // Special case: if the next token is a semicolon or a pattern marker, this is a variable reference, not a function call // Do NOT block boolean/constant keywords here; they are valid call arguments if (peek().type === tokenTypes.SEMICOLON || isPatternMarker() || (peek().type === tokenTypes.KEYWORD && !(peek().value === 'true' || peek().value === 'false' || peek().value === 'PI' || peek().value === 'INFINITY'))) { return identifier; } if (allowFunctionCalls && peek().type !== tokenTypes.OPERATOR && peek().type !== tokenTypes.SEMICOLON && peek().type !== tokenTypes.EOF && peek().type !== tokenTypes.RPAREN && peek().type !== tokenTypes.RBRACE && peek().type !== tokenTypes.RBRACKET && peek().type !== tokenTypes.COMMA) { const args = []; while (peek().type !== tokenTypes.SEMICOLON && peek().type !== tokenTypes.EOF && peek().type !== tokenTypes.RPAREN && peek().type !== tokenTypes.RBRACE && peek().type !== tokenTypes.RBRACKET && peek().type !== tokenTypes.COMMA) { // Check if we've hit a pattern marker (this stops function argument parsing) if (isPatternMarker()) { break; } // Allow boolean literals (true/false) and constants (PI/INFINITY) as arguments if (peek().type === tokenTypes.KEYWORD && (peek().value === 'true' || peek().value === 'false')) { args.push(parseBooleanLiteral()); } else if (peek().type === tokenTypes.KEYWORD && (peek().value === 'PI' || peek().value === 'INFINITY')) { args.push(parseConstant()); } else if (peek().type === tokenTypes.KEYWORD) { break; // Stop at other keywords } else { args.push(parsePrimary(false)); } } return { type: 'FunctionCall', callee: identifier, arguments: args }; } return identifier; } else if (token.type === tokenTypes.KEYWORD && (token.value === 'Ok' || token.value === 'Err')) { return parseResultExpression(); } else if (token.type === tokenTypes.KEYWORD && token.value === 'when') { return parseWhenExpression(); } else if (token.type === tokenTypes.KEYWORD && (token.value === 'true' || token.value === 'false')) { return parseBooleanLiteral(); } else if (token.type === tokenTypes.KEYWORD && (token.value === 'PI' || token.value === 'INFINITY')) { return parseConstant(); } else if (token.type === tokenTypes.OPERATOR && token.value === '-') { // Handle unary minus consume(tokenTypes.OPERATOR); const operand = parsePrimary(allowFunctionCalls); return { type: 'UnaryExpression', operator: '-', operand }; } else if (token.type === tokenTypes.LPAREN) { consume(tokenTypes.LPAREN); // Check if it's an anonymous function literal // It's an anonymous function if we see identifiers followed by an ARROW let isAnonymousFunction = false; let tempPos = position; while (tempPos < tokens.length && tokens[tempPos].type === tokenTypes.IDENTIFIER) { tempPos++; } if (tempPos < tokens.length && tokens[tempPos].type === tokenTypes.ARROW) { isAnonymousFunction = true; } if (isAnonymousFunction) { const params = []; while (peek().type === tokenTypes.IDENTIFIER) { params.push(parseIdentifier()); } consume(tokenTypes.ARROW); // Allow an optional semicolon to terminate the anonymous function body before ')' const body = parseExpression(true, [tokenTypes.RPAREN, tokenTypes.SEMICOLON]); if (peek().type === tokenTypes.SEMICOLON) { consume(tokenTypes.SEMICOLON); } consume(tokenTypes.RPAREN); const anonymousFunc = { type: 'AnonymousFunction', params, body }; // Check if this anonymous function is immediately followed by arguments (function call) if (allowFunctionCalls && peek().type !== tokenTypes.OPERATOR && peek().type !== tokenTypes.SEMICOLON && peek().type !== tokenTypes.EOF && peek().type !== tokenTypes.RPAREN && peek().type !== tokenTypes.RBRACE && peek().type !== tokenTypes.RBRACKET && peek().type !== tokenTypes.COMMA) { const args = []; while (peek().type !== tokenTypes.SEMICOLON && peek().type !== tokenTypes.EOF && peek().type !== tokenTypes.RPAREN && peek().type !== tokenTypes.RBRACE && peek().type !== tokenTypes.RBRACKET && peek().type !== tokenTypes.COMMA) { // Allow boolean literals (true/false) and constants (PI/INFINITY) as arguments if (peek().type === tokenTypes.KEYWORD && (peek().value === 'true' || peek().value === 'false')) { args.push(parseBooleanLiteral()); } else if (peek().type === tokenTypes.KEYWORD && (peek().value === 'PI' || peek().value === 'INFINITY')) { args.push(parseConstant()); } else if (peek().type === tokenTypes.KEYWORD) { break; // Stop at other keywords } else { args.push(parsePrimary(false)); } } return { type: 'FunctionCall', callee: anonymousFunc, arguments: args }; } return anonymousFunc; } else { const expression = parseExpression(true, [tokenTypes.RPAREN, tokenTypes.SEMICOLON]); consume(tokenTypes.RPAREN); return expression; } } else if (token.type === tokenTypes.LBRACKET) { const listLiteral = parseListLiteral(); // Check if this list literal is followed by a dot (member access) if (peek().type === tokenTypes.DOT) { let expression = listLiteral; while (peek().type === tokenTypes.DOT) { consume(tokenTypes.DOT); const property = parsePrimary(false); // Allow number or string literals as properties expression = { type: 'MemberExpression', object: expression, property: property }; } return expression; } return listLiteral; } else if (token.type === tokenTypes.LBRACE) { const tableLiteral = parseTableLiteral(); // Check if this table literal is followed by a dot (member access) if (peek().type === tokenTypes.DOT) { let expression = tableLiteral; while (peek().type === tokenTypes.DOT) { consume(tokenTypes.DOT); const property = parsePrimary(false); // Allow number or string literals as properties expression = { type: 'MemberExpression', object: expression, property: property }; } return expression; } return tableLiteral; } else { const suggestions = []; if (token.type === tokenTypes.IDENTIFIER) { const keywords = ['when', 'is', 'then', 'with', 'rec', 'Ok', 'Err']; suggestions.push(...ErrorHelpers.generateSuggestions(token.value, keywords)); } else if (token.type === tokenTypes.EOF) { suggestions.push('Check for missing closing parentheses, braces, or brackets'); } throw new ParseError( `Unexpected token: ${token.type} (${token.value})`, { line: token.line, column: token.column, length: token.value?.length || 1 }, source, suggestions ); } } function parseListLiteral() { consume(tokenTypes.LBRACKET); const elements = []; while (peek().type !== tokenTypes.RBRACKET) { // Parse each element, stopping at comma or closing bracket elements.push(parseExpression(true, [tokenTypes.COMMA, tokenTypes.RBRACKET])); // Check for comma separator if (peek().type === tokenTypes.COMMA) { consume(tokenTypes.COMMA); } } consume(tokenTypes.RBRACKET); return { type: 'ListLiteral', elements }; } function parseArrowFunction() { const params = []; // Parse all parameters (identifiers before ->) while (peek().type === tokenTypes.IDENTIFIER && peek(2).type !== tokenTypes.ARROW) { params.push(consume(tokenTypes.IDENTIFIER).value); } // Parse the last parameter (the one right before ->) if (peek().type === tokenTypes.IDENTIFIER) { params.push(consume(tokenTypes.IDENTIFIER).value); } // Consume the arrow consume(tokenTypes.ARROW); // Parse the body const body = parseExpression(true); return { type: 'AnonymousFunction', params, body }; } function parseTableLiteral() { consume(tokenTypes.LBRACE); const properties = []; while (peek().type !== tokenTypes.RBRACE) { // Check if we've hit a pattern marker (for when expressions) if (isPatternMarker()) { break; } const key = consume(tokenTypes.IDENTIFIER).value; consume(tokenTypes.COLON); // Check if this looks like an arrow function // We're now at the position after the colon, so we check for IDENTIFIER* ARROW let isArrow = false; let lookAheadPos = position; let paramCount = 0; // Count consecutive identifiers (parameters) while (lookAheadPos < tokens.length && tokens[lookAheadPos].type === tokenTypes.IDENTIFIER) { paramCount++; lookAheadPos++; } // Check if the next token is an arrow (allow zero parameters) // This ensures we only detect arrow functions, not other expressions isArrow = lookAheadPos < tokens.length && tokens[lookAheadPos].type === tokenTypes.ARROW; let value; if (isArrow) { // Parse arrow function without requiring semicolon const params = []; // Parse all parameters (identifiers before ->) while (peek().type === tokenTypes.IDENTIFIER && peek(2).type !== tokenTypes.ARROW) { params.push(consume(tokenTypes.IDENTIFIER).value); } // Parse the last parameter (the one right before ->) if (peek().type === tokenTypes.IDENTIFIER) { params.push(consume(tokenTypes.IDENTIFIER).value); } // Consume the arrow consume(tokenTypes.ARROW); // Parse the body (don't require semicolon in table literals) // Stop at semicolon, comma, or closing brace to avoid parsing too much const body = parseExpression(true, [tokenTypes.SEMICOLON, tokenTypes.COMMA, tokenTypes.RBRACE]); // If we stopped at a semicolon, advance past it if (peek().type === tokenTypes.SEMICOLON) { consume(tokenTypes.SEMICOLON); } value = { type: 'AnonymousFunction', params, body }; } else { value = parseExpression(true, [tokenTypes.SEMICOLON, tokenTypes.COMMA, tokenTypes.RBRACE]); // Use parseExpression to handle binary expressions // Consume semicolon if present (for table literals) if (peek().type === tokenTypes.SEMICOLON) { consume(tokenTypes.SEMICOLON); } } properties.push({ key, value }); // Check for comma separator if (peek().type === tokenTypes.COMMA) { consume(tokenTypes.COMMA); } } consume(tokenTypes.RBRACE); return { type: 'TableLiteral', properties }; } function parseResultExpression() { const variant = consume(tokenTypes.KEYWORD).value; const value = parsePrimary(true); return { type: 'ResultExpression', variant, value }; } function parseWhenExpression() { consume(tokenTypes.KEYWORD, 'when'); const discriminants = []; while (peek().type !== tokenTypes.KEYWORD || peek().value !== 'is') { // Parse discriminant expression, but allow logical operators (and, or, xor) // Only stop at 'is' keyword, not all keywords const expr = parseExpressionForDiscriminant(); discriminants.push(expr); } consume(tokenTypes.KEYWORD, 'is'); const cases = []; while (peek().type !== tokenTypes.SEMICOLON && peek().type !== tokenTypes.EOF && peek().type !== tokenTypes.RPAREN) { const patterns = []; while (peek().type !== tokenTypes.KEYWORD || peek().value !== 'then') { patterns.push(parsePattern()); } consume(tokenTypes.KEYWORD, 'then'); // Parse the consequent with a more sophisticated approach // We need to handle nested when expressions properly let consequent; if (peek().type === tokenTypes.KEYWORD && peek().value === 'when') { // This is a nested when expression - parse it completely consequent = parseWhenExpression(); } else { // This is a regular expression - parse until we hit a pattern marker // Use a custom parsing approach that stops at the right boundary consequent = parseConsequentExpression(); } cases.push({ type: 'WhenCase', patterns, consequent }); } return { type: 'WhenExpression', discriminants, cases }; } function parsePattern() { const token = peek(); let pattern; if (token.type === tokenTypes.TYPE) { const typeToken = consume(tokenTypes.TYPE); pattern = { type: 'TypePattern', name: typeToken.value }; } else if (token.type === tokenTypes.KEYWORD && (token.value === 'Ok' || token.value === 'Err')) { const variant = consume(tokenTypes.KEYWORD).value; const identifier = parseIdentifier(); pattern = { type: 'ResultPattern', variant, identifier }; } else if (token.type === tokenTypes.IDENTIFIER && token.value === '_') { // Handle wildcard pattern consume(tokenTypes.IDENTIFIER); pattern = { type: 'WildcardPattern' }; } else if (token.type === tokenTypes.LBRACKET) { pattern = parseListPattern(); } else if (token.type === tokenTypes.LBRACE) { pattern = parseTablePattern(); } else { pattern = parsePrimary(false); } // Check for guard clause if (peek().type === tokenTypes.KEYWORD && peek().value === 'if') { consume(tokenTypes.KEYWORD); // consume 'if' const guard = parseExpression(true, [tokenTypes.KEYWORD, tokenTypes.SEMICOLON, tokenTypes.EOF, tokenTypes.RPAREN]); return { type: 'GuardPattern', pattern, guard }; } return pattern; } function parseListPattern() { consume(tokenTypes.LBRACKET); const elements = []; while (peek().type !== tokenTypes.RBRACKET) { elements.push(parsePattern()); // Check for comma separator if (peek().type === tokenTypes.COMMA) { consume(tokenTypes.COMMA); } } consume(tokenTypes.RBRACKET); return { type: 'ListPattern', elements }; } function parseTablePattern() { consume(tokenTypes.LBRACE); const properties = []; while (peek().type !== tokenTypes.RBRACE) { const key = consume(tokenTypes.IDENTIFIER).value; consume(tokenTypes.COLON); const value = parsePattern(); properties.push({ key, value }); // Check for comma separator if (peek().type === tokenTypes.COMMA) { consume(tokenTypes.COMMA); } } consume(tokenTypes.RBRACE); return { type: 'TablePattern', properties }; } function parseNumber() { const token = consume(tokenTypes.NUMBER); return { type: 'NumberLiteral', value: token.value, isFloat: token.isFloat }; } function parseString() { const token = consume(tokenTypes.STRING); return { type: 'StringLiteral', value: token.value }; } function parseBooleanLiteral() { const token = consume(tokenTypes.KEYWORD); return { type: 'BooleanLiteral', value: token.value === 'true' }; } function parseConstant() { const token = consume(tokenTypes.KEYWORD); if (token.value === 'PI') { return { type: 'NumberLiteral', value: Math.PI, isFloat: true }; } else if (token.value === 'INFINITY') { return { type: 'NumberLiteral', value: Infinity, isFloat: true }; } else { throw new ParseError( `Unknown constant: ${token.value}`, { line: token.line, column: token.column, length: token.value?.length || 1 }, source, ['Use PI or INFINITY for mathematical constants', 'Check spelling of constant names'] ); } } function parseIdentifier() { const token = consume(tokenTypes.IDENTIFIER); return { type: 'Identifier', name: token.value }; } function parse() { log('Parser received tokens:', tokens); const program = { type: 'Program', body: [] }; while (peek().type !== tokenTypes.EOF) { program.body.push(parseStatement()); } return program; } return { parse, }; } export { createParser };