diff options
Diffstat (limited to 'js/baba-yaga/src/core/parser.js')
-rw-r--r-- | js/baba-yaga/src/core/parser.js | 1045 |
1 files changed, 1045 insertions, 0 deletions
diff --git a/js/baba-yaga/src/core/parser.js b/js/baba-yaga/src/core/parser.js new file mode 100644 index 0000000..4cc1cc2 --- /dev/null +++ b/js/baba-yaga/src/core/parser.js @@ -0,0 +1,1045 @@ +// parser.js + +import { tokenTypes } from './lexer.js'; +import { ParseError, ErrorHelpers } from './error.js'; + +function createParser(tokens, debugMode = false, source = '') { + let position = 0; + + function log(...args) { + if (debugMode) { + console.log(...args); + } + } + + function peek() { + const token = tokens[position]; + return token; + } + + function peek2() { + return tokens[position + 1] || { type: tokenTypes.EOF }; + } + + function consume(type, value) { + const token = peek(); + if (type && token.type !== type) { + throw ErrorHelpers.unexpectedToken(type, token.type, token, source); + } + if (value && token.value !== value) { + const suggestions = []; + if (value === 'then' && token.value === 'than') { + suggestions.push('Use "then" not "than" in when expressions'); + } else if (value === 'is' && token.value === 'in') { + suggestions.push('Use "is" not "in" for pattern matching'); + } + + throw new ParseError( + `Expected "${value}" but got "${token.value}"`, + { line: token.line, column: token.column, length: token.value?.length || 1 }, + source, + suggestions + ); + } + position++; + return token; + } + + function parseStatement() { + const token = peek(); + let result; + + if (token.type === tokenTypes.IDENTIFIER && tokens[position + 1].type === tokenTypes.TYPE) { + result = parseTypeDeclaration(); + } else if (token.type === tokenTypes.IDENTIFIER && tokens[position + 1].type === tokenTypes.COLON) { + // Look ahead to distinguish between function and variable declaration + let isFunctionDeclaration = false; + let lookAheadPos = position + 2; // After IDENTIFIER and COLON + + if (tokens[lookAheadPos].type === tokenTypes.LPAREN) { + let parenPos = lookAheadPos + 1; + let hasTypedParams = false; + // Case 1: typed parameters present + if (parenPos < tokens.length && + tokens[parenPos].type === tokenTypes.IDENTIFIER && + parenPos + 1 < tokens.length && + tokens[parenPos + 1].type === tokenTypes.COLON && + parenPos + 2 < tokens.length && + tokens[parenPos + 2].type === tokenTypes.TYPE) { + hasTypedParams = true; + } + // Case 2: empty parameter list followed by return annotation/body e.g. () -> Type -> ... + const emptyParamsThenArrow = (tokens[parenPos] && tokens[parenPos].type === tokenTypes.RPAREN && + tokens[parenPos + 1] && tokens[parenPos + 1].type === tokenTypes.ARROW); + + if (hasTypedParams || emptyParamsThenArrow) { + isFunctionDeclaration = true; + } + } else { + while (lookAheadPos < tokens.length && tokens[lookAheadPos].type === tokenTypes.IDENTIFIER) { + lookAheadPos++; + } + if (lookAheadPos < tokens.length && tokens[lookAheadPos].type === tokenTypes.ARROW) { + isFunctionDeclaration = true; + } + } + + if (isFunctionDeclaration) { + result = parseFunctionDeclaration(); + } else { + result = parseVariableDeclaration(); + } + } else { + result = parseExpression(); + } + + // Consume a trailing semicolon if present. Do not force it. + if (peek().type === tokenTypes.SEMICOLON) { + consume(tokenTypes.SEMICOLON); + } + return result; + } + + function parseTypeDeclaration() { + const name = consume(tokenTypes.IDENTIFIER).value; + const type = consume(tokenTypes.TYPE).value; + return { type: 'TypeDeclaration', name, typeAnnotation: type }; + } + + function parseVariableDeclaration() { + const name = consume(tokenTypes.IDENTIFIER).value; + consume(tokenTypes.COLON); + const value = parseExpression(); + return { type: 'VariableDeclaration', name, value }; + } + + function parseFunctionDeclaration() { + const name = consume(tokenTypes.IDENTIFIER).value; + consume(tokenTypes.COLON); + + // Check if we have typed parameters (enclosed in parentheses) + let params = []; + let returnType = null; + + if (peek().type === tokenTypes.LPAREN) { + // Look ahead to determine if this is curried syntax: (x: Type) -> (Type -> Type) -> body + // vs multi-param syntax: (x: Type, y: Type) -> ReturnType -> body + const startPos = position; + consume(tokenTypes.LPAREN); + + // Parse first parameter to check for single-param curried syntax + if (peek().type === tokenTypes.IDENTIFIER) { + const paramName = consume(tokenTypes.IDENTIFIER).value; + if (peek().type === tokenTypes.COLON) { + consume(tokenTypes.COLON); + const paramType = parseType(); + + // Check if this is single-param curried: (x: Type) -> (Type -> Type) + if (peek().type === tokenTypes.RPAREN && + tokens[position + 1] && tokens[position + 1].type === tokenTypes.ARROW && + tokens[position + 2] && tokens[position + 2].type === tokenTypes.LPAREN) { + + consume(tokenTypes.RPAREN); + consume(tokenTypes.ARROW); + + // Parse function return type: (Type -> Type) + const functionReturnType = parseType(); + consume(tokenTypes.ARROW); + + // Extract the final return type from nested function types + const finalReturnType = extractFinalReturnType(functionReturnType); + + // Parse curried body + const body = parseCurriedFunctionBody(finalReturnType); + + return { + type: 'CurriedFunctionDeclaration', + name, + param: { name: paramName, type: paramType }, + returnType: functionReturnType, + body + }; + } + } + } + + // Reset position and parse as multi-parameter function (existing behavior) + position = startPos; + consume(tokenTypes.LPAREN); + params = parseTypedParameters(); + consume(tokenTypes.RPAREN); + + // Parse return type if present + if (peek().type === tokenTypes.ARROW) { + consume(tokenTypes.ARROW); + if (peek().type === tokenTypes.TYPE) { + returnType = consume(tokenTypes.TYPE).value; + } + } + } else { + // Untyped function: x y -> body (backward compatibility) + while (peek().type === tokenTypes.IDENTIFIER) { + params.push(parseIdentifier()); + } + } + + // Parse the arrow and body + if (peek().type === tokenTypes.ARROW) { + consume(tokenTypes.ARROW); + // Optional header with-clause + if (peek().type === tokenTypes.IDENTIFIER && peek().value === 'with') { + const body = parseWithHeader(); + return { type: 'FunctionDeclaration', name, params, body, returnType }; + } + // Handle currying: if another arrow is present, it's a nested function + if (peek().type === tokenTypes.IDENTIFIER && tokens[position + 1].type === tokenTypes.ARROW) { + const body = parseFunctionDeclarationBody(returnType); + return { type: 'FunctionDeclaration', name, params, body, returnType }; + } else { + const body = parseExpression(); + return { type: 'FunctionDeclaration', name, params, body, returnType }; + } + } else { + throw ErrorHelpers.unexpectedToken('ARROW', peek().type, peek(), source); + } + } + + // Parse type expressions including function types + function parseType() { + if (peek().type === tokenTypes.LPAREN) { + // Function type: (Type1, Type2) -> ReturnType or (Type1 -> Type2) + consume(tokenTypes.LPAREN); + + // Check if this is a single parameter function type: (Type -> Type) + if (peek().type === tokenTypes.TYPE) { + const firstType = parseType(); + if (peek().type === tokenTypes.ARROW) { + consume(tokenTypes.ARROW); + const returnType = parseType(); + consume(tokenTypes.RPAREN); + return { type: 'FunctionType', paramTypes: [firstType], returnType }; + } else { + // Multi-parameter function type: (Type1, Type2) -> ReturnType + const paramTypes = [firstType]; + while (peek().type === tokenTypes.COMMA) { + consume(tokenTypes.COMMA); + paramTypes.push(parseType()); + } + consume(tokenTypes.RPAREN); + consume(tokenTypes.ARROW); + const returnType = parseType(); + return { type: 'FunctionType', paramTypes, returnType }; + } + } else { + throw ErrorHelpers.unexpectedToken('TYPE', peek().type, peek(), source); + } + } else if (peek().type === tokenTypes.TYPE) { + return { type: 'PrimitiveType', name: consume(tokenTypes.TYPE).value }; + } else { + throw ErrorHelpers.unexpectedToken('TYPE', peek().type, peek(), source); + } + } + + // Helper function to extract the final return type from nested function types + function extractFinalReturnType(type) { + if (type && type.type === 'FunctionType') { + return extractFinalReturnType(type.returnType); + } + return type; + } + + // Parse typed parameters: x: Int, y: String + function parseTypedParameters() { + const params = []; + + while (peek().type === tokenTypes.IDENTIFIER) { + const paramName = consume(tokenTypes.IDENTIFIER).value; + + if (peek().type === tokenTypes.COLON) { + consume(tokenTypes.COLON); + const paramType = parseType(); + params.push({ name: paramName, type: paramType }); + } else { + // Untyped parameter (for backward compatibility) + params.push({ name: paramName, type: null }); + } + + // Check for comma separator (tolerate legacy OPERATOR ',') + if (peek().type === tokenTypes.COMMA || (peek().type === tokenTypes.OPERATOR && peek().value === ',')) { + if (peek().type === tokenTypes.COMMA) { + consume(tokenTypes.COMMA); + } else { + consume(tokenTypes.OPERATOR); + } + } else if (peek().type !== tokenTypes.RPAREN) { + break; // No comma and not closing paren, so end of parameters + } + } + + return params; + } + + // Parse curried function body for new typed curried syntax + function parseCurriedFunctionBody(finalReturnType = null) { + // Parse remaining curried parameters and body + const params = []; + + // Parse untyped parameters in curried chain + while (peek().type === tokenTypes.IDENTIFIER && tokens[position + 1].type === tokenTypes.ARROW) { + params.push(parseIdentifier()); + consume(tokenTypes.ARROW); // Consume the arrow after each parameter + } + + // Parse the final expression or with-header + let body; + if (peek().type === tokenTypes.IDENTIFIER && peek().value === 'with') { + body = parseWithHeader(); + } else { + body = parseExpression(); + } + + return { type: 'CurriedFunctionBody', params, body, returnType: finalReturnType }; + } + + // Helper function to parse the body of a nested function for currying + function parseFunctionDeclarationBody(parentReturnType = null) { + let params = []; + let returnType = parentReturnType; + + // Check if we have typed parameters + if (peek().type === tokenTypes.LPAREN) { + consume(tokenTypes.LPAREN); + params = parseTypedParameters(); + consume(tokenTypes.RPAREN); + + // Parse return type if present + if (peek().type === tokenTypes.ARROW) { + consume(tokenTypes.ARROW); + if (peek().type === tokenTypes.TYPE) { + returnType = consume(tokenTypes.TYPE).value; + } + } + } else { + // Untyped parameters (backward compatibility) + while (peek().type === tokenTypes.IDENTIFIER) { + params.push(parseIdentifier()); + } + } + + consume(tokenTypes.ARROW); + let body; + // Optional header with-clause + if (peek().type === tokenTypes.IDENTIFIER && peek().value === 'with') { + body = parseWithHeader(); + return { type: 'FunctionDeclarationBody', params, body, returnType }; + } + if (peek().type === tokenTypes.IDENTIFIER && tokens[position + 1].type === tokenTypes.ARROW) { + body = parseFunctionDeclarationBody(returnType); + } else { + body = parseExpression(); + } + return { type: 'FunctionDeclarationBody', params, body, returnType }; + } + + // Parse a with-header: with (entries...) -> body + function parseWithHeader() { + const withToken = consume(tokenTypes.IDENTIFIER); + if (withToken.value !== 'with') { + throw new ParseError( + `Expected 'with' but got '${withToken.value}'`, + { line: withToken.line, column: withToken.column, length: withToken.value?.length || 1 }, + source, + ['Use "with" to define local bindings', 'Check syntax for local variable declarations'] + ); + } + let recursive = false; + if (peek().type === tokenTypes.IDENTIFIER && (peek().value === 'rec' || peek().value === 'recursion')) { + consume(tokenTypes.IDENTIFIER); + recursive = true; + } + consume(tokenTypes.LPAREN); + const entries = []; + while (peek().type !== tokenTypes.RPAREN) { + if (peek().type === tokenTypes.SEMICOLON) { + consume(tokenTypes.SEMICOLON); + continue; + } + const name = consume(tokenTypes.IDENTIFIER).value; + if (peek().type === tokenTypes.COLON) { + // Assignment: name : expr; (supports arrow-literal: params -> body) + consume(tokenTypes.COLON); + // Look ahead to see if this is an arrow function literal like: x y -> body + let lookAheadPos = position; + let sawParams = false; + while (lookAheadPos < tokens.length && tokens[lookAheadPos].type === tokenTypes.IDENTIFIER) { + sawParams = true; + lookAheadPos++; + } + const isArrow = (lookAheadPos < tokens.length && tokens[lookAheadPos].type === tokenTypes.ARROW); + if (sawParams && isArrow) { + // Parse inline arrow function literal + const params = []; + while (peek().type === tokenTypes.IDENTIFIER && tokens[position + 1].type !== tokenTypes.ARROW) { + params.push(consume(tokenTypes.IDENTIFIER).value); + } + if (peek().type === tokenTypes.IDENTIFIER) { + params.push(consume(tokenTypes.IDENTIFIER).value); + } + consume(tokenTypes.ARROW); + const body = parseExpression(true, [tokenTypes.SEMICOLON, tokenTypes.RPAREN]); + const value = { type: 'AnonymousFunction', params, body }; + entries.push({ type: 'WithAssign', name, value }); + if (peek().type === tokenTypes.SEMICOLON) consume(tokenTypes.SEMICOLON); + } else { + // Check if this is a when expression - if so, parse it completely + let value; + if (peek().type === tokenTypes.KEYWORD && peek().value === 'when') { + // For when expressions, we need to parse them completely + // They have their own termination logic + value = parseWhenExpression(); + // After parsing when expression, consume semicolon if present + if (peek().type === tokenTypes.SEMICOLON) consume(tokenTypes.SEMICOLON); + } else { + // For other expressions, use the standard termination logic + value = parseExpression(true, [tokenTypes.SEMICOLON, tokenTypes.RPAREN]); + if (peek().type === tokenTypes.SEMICOLON) consume(tokenTypes.SEMICOLON); + } + entries.push({ type: 'WithAssign', name, value }); + } + } else { + // Type decl: name Type; (Type can be primitive or function type) + const typeAnnotation = parseType(); + entries.push({ type: 'WithTypeDecl', name, typeAnnotation }); + if (peek().type === tokenTypes.SEMICOLON) { + consume(tokenTypes.SEMICOLON); + } + } + } + consume(tokenTypes.RPAREN); + consume(tokenTypes.ARROW); + const body = parseExpression(); + return { type: 'WithHeader', recursive, entries, body }; + } + + // Operator precedence (higher number = higher precedence) + function getOperatorPrecedence(operator) { + switch (operator) { + case '..': return 1; // String concatenation (lowest) + case 'or': return 2; // Logical OR + case 'and': return 3; // Logical AND + case 'xor': return 4; // XOR + case '=': case '!=': case '>': case '<': case '>=': case '<=': return 5; // Comparison + case '+': case '-': return 6; // Addition/Subtraction + case '*': case '/': case '%': return 7; // Multiplication/Division (highest) + default: return 0; + } + } + + function parseExpression(allowFunctionCalls = true, endTokens = [tokenTypes.EOF, tokenTypes.SEMICOLON]) { + // Check if we've hit a pattern marker before parsing + if (isNextPattern()) { + // Return an empty expression if we hit a pattern marker + return { type: 'Identifier', name: 'undefined' }; + } + return parseExpressionWithPrecedence(allowFunctionCalls, endTokens, 0); + } + + function parseConsequentExpression() { + // A consequent ends at a semicolon, EOF, or a keyword that starts a new pattern. + return parseExpression(true, [tokenTypes.SEMICOLON, tokenTypes.EOF, tokenTypes.KEYWORD]); + } + + function parseExpressionForDiscriminant() { + // Parse expression for when discriminant, allowing logical operators + // Stop only at 'is' keyword, not all keywords + let expr = parsePrimary(true); + + while (true) { + const nextToken = peek(); + + // Stop if we hit 'is' keyword + if (nextToken.type === tokenTypes.KEYWORD && nextToken.value === 'is') { + break; + } + + // Stop at end tokens + if (nextToken.type === tokenTypes.SEMICOLON || + nextToken.type === tokenTypes.EOF || + nextToken.type === tokenTypes.RPAREN) { + break; + } + + // Handle operators + if (nextToken.type === tokenTypes.OPERATOR) { + const operator = nextToken.value; + const precedence = getOperatorPrecedence(operator); + + consume(tokenTypes.OPERATOR); + const right = parseExpressionWithPrecedence(true, [tokenTypes.SEMICOLON, tokenTypes.EOF, tokenTypes.RPAREN], precedence + 1); + expr = { type: 'BinaryExpression', operator, left: expr, right }; + } else if (nextToken.type === tokenTypes.KEYWORD && ['and', 'or', 'xor'].includes(nextToken.value)) { + // Handle logical operators + const operator = nextToken.value; + const precedence = getOperatorPrecedence(operator); + + consume(tokenTypes.KEYWORD); + const right = parseExpressionWithPrecedence(true, [tokenTypes.SEMICOLON, tokenTypes.EOF, tokenTypes.RPAREN], precedence + 1); + expr = { type: 'BinaryExpression', operator, left: expr, right }; + } else { + break; + } + } + + return expr; + } + + function isNextPattern() { + // Check if the next tokens form a pattern for the next when case + const token = peek(); + + if (!token) return false; + + // Wildcard pattern + if (token.type === tokenTypes.IDENTIFIER && token.value === '_') { + return true; + } + + // Number pattern followed by 'then' + if (token.type === tokenTypes.NUMBER) { + const nextToken = tokens[position + 1]; + if (nextToken && nextToken.type === tokenTypes.KEYWORD && nextToken.value === 'then') { + return true; + } + } + + // String pattern followed by 'then' + if (token.type === tokenTypes.STRING) { + const nextToken = tokens[position + 1]; + if (nextToken && nextToken.type === tokenTypes.KEYWORD && nextToken.value === 'then') { + return true; + } + } + + // Type pattern followed by 'then' + if (token.type === tokenTypes.TYPE) { + const nextToken = tokens[position + 1]; + if (nextToken && nextToken.type === tokenTypes.KEYWORD && nextToken.value === 'then') { + return true; + } + } + + return false; + } + + function isPatternMarker() { + // Check if the current token is a pattern marker that should stop function argument parsing + const token = peek(); + + if (!token) return false; + + // Wildcard pattern - always a pattern marker + if (token.type === tokenTypes.IDENTIFIER && token.value === '_') { + return true; + } + + return false; + } + + function parseExpressionWithPrecedence(allowFunctionCalls, endTokens, minPrecedence) { + let left = parsePrimary(allowFunctionCalls); + + while (true) { + const nextToken = peek(); + if (endTokens.includes(nextToken.type) || nextToken.type === tokenTypes.EOF) { + break; + } + + if (nextToken.type === tokenTypes.OPERATOR) { + const operator = nextToken.value; + const precedence = getOperatorPrecedence(operator); + + // If this operator has lower precedence than minimum, stop + if (precedence < minPrecedence) { + break; + } + + consume(tokenTypes.OPERATOR); // Consume the operator + + // Parse right side with higher precedence (left associative) + const right = parseExpressionWithPrecedence(allowFunctionCalls, endTokens, precedence + 1); + left = { type: 'BinaryExpression', operator, left, right }; + } else if (nextToken.type === tokenTypes.KEYWORD && ['and', 'or', 'xor'].includes(nextToken.value)) { + // Handle text-based logical operators + const operator = nextToken.value; + const precedence = getOperatorPrecedence(operator); + + // If this operator has lower precedence than minimum, stop + if (precedence < minPrecedence) { + break; + } + + consume(tokenTypes.KEYWORD); // Consume the keyword + + // Parse right side with higher precedence (left associative) + const right = parseExpressionWithPrecedence(allowFunctionCalls, endTokens, precedence + 1); + left = { type: 'BinaryExpression', operator, left, right }; + } else { + break; // No operator, so end of expression + } + } + + return left; + } + + function parsePrimary(allowFunctionCalls = true) { + const token = peek(); + if (token.type === tokenTypes.NUMBER) { + return parseNumber(); + } else if (token.type === tokenTypes.STRING) { + return parseString(); + } else if (token.type === tokenTypes.IDENTIFIER) { + let identifier = parseIdentifier(); + while (peek().type === tokenTypes.DOT) { + consume(tokenTypes.DOT); + const property = parsePrimary(false); // Allow number or string literals as properties + identifier = { type: 'MemberExpression', object: identifier, property: property }; + } + + // Special case: if the next token is a semicolon or a pattern marker, this is a variable reference, not a function call + // Do NOT block boolean/constant keywords here; they are valid call arguments + if (peek().type === tokenTypes.SEMICOLON || isPatternMarker() || + (peek().type === tokenTypes.KEYWORD && !(peek().value === 'true' || peek().value === 'false' || peek().value === 'PI' || peek().value === 'INFINITY'))) { + return identifier; + } + + if (allowFunctionCalls && + peek().type !== tokenTypes.OPERATOR && + peek().type !== tokenTypes.SEMICOLON && + peek().type !== tokenTypes.EOF && + peek().type !== tokenTypes.RPAREN && + peek().type !== tokenTypes.RBRACE && + peek().type !== tokenTypes.RBRACKET && + peek().type !== tokenTypes.COMMA) { + const args = []; + while (peek().type !== tokenTypes.SEMICOLON && + peek().type !== tokenTypes.EOF && + peek().type !== tokenTypes.RPAREN && + peek().type !== tokenTypes.RBRACE && + peek().type !== tokenTypes.RBRACKET && + peek().type !== tokenTypes.COMMA) { + // Check if we've hit a pattern marker (this stops function argument parsing) + if (isPatternMarker()) { + break; + } + + // Allow boolean literals (true/false) and constants (PI/INFINITY) as arguments + if (peek().type === tokenTypes.KEYWORD && (peek().value === 'true' || peek().value === 'false')) { + args.push(parseBooleanLiteral()); + } else if (peek().type === tokenTypes.KEYWORD && (peek().value === 'PI' || peek().value === 'INFINITY')) { + args.push(parseConstant()); + } else if (peek().type === tokenTypes.KEYWORD) { + break; // Stop at other keywords + } else { + args.push(parsePrimary(false)); + } + } + return { type: 'FunctionCall', callee: identifier, arguments: args }; + } + return identifier; + } else if (token.type === tokenTypes.KEYWORD && (token.value === 'Ok' || token.value === 'Err')) { + return parseResultExpression(); + } else if (token.type === tokenTypes.KEYWORD && token.value === 'when') { + return parseWhenExpression(); + } else if (token.type === tokenTypes.KEYWORD && (token.value === 'true' || token.value === 'false')) { + return parseBooleanLiteral(); + } else if (token.type === tokenTypes.KEYWORD && (token.value === 'PI' || token.value === 'INFINITY')) { + return parseConstant(); + } else if (token.type === tokenTypes.OPERATOR && token.value === '-') { + // Handle unary minus + consume(tokenTypes.OPERATOR); + const operand = parsePrimary(allowFunctionCalls); + return { type: 'UnaryExpression', operator: '-', operand }; + } else if (token.type === tokenTypes.LPAREN) { + consume(tokenTypes.LPAREN); + // Check if it's an anonymous function literal + // It's an anonymous function if we see identifiers followed by an ARROW + let isAnonymousFunction = false; + let tempPos = position; + while (tempPos < tokens.length && tokens[tempPos].type === tokenTypes.IDENTIFIER) { + tempPos++; + } + if (tempPos < tokens.length && tokens[tempPos].type === tokenTypes.ARROW) { + isAnonymousFunction = true; + } + + if (isAnonymousFunction) { + const params = []; + while (peek().type === tokenTypes.IDENTIFIER) { + params.push(parseIdentifier()); + } + consume(tokenTypes.ARROW); + // Allow an optional semicolon to terminate the anonymous function body before ')' + const body = parseExpression(true, [tokenTypes.RPAREN, tokenTypes.SEMICOLON]); + if (peek().type === tokenTypes.SEMICOLON) { + consume(tokenTypes.SEMICOLON); + } + consume(tokenTypes.RPAREN); + const anonymousFunc = { type: 'AnonymousFunction', params, body }; + + // Check if this anonymous function is immediately followed by arguments (function call) + if (allowFunctionCalls && + peek().type !== tokenTypes.OPERATOR && + peek().type !== tokenTypes.SEMICOLON && + peek().type !== tokenTypes.EOF && + peek().type !== tokenTypes.RPAREN && + peek().type !== tokenTypes.RBRACE && + peek().type !== tokenTypes.RBRACKET && + peek().type !== tokenTypes.COMMA) { + const args = []; + while (peek().type !== tokenTypes.SEMICOLON && + peek().type !== tokenTypes.EOF && + peek().type !== tokenTypes.RPAREN && + peek().type !== tokenTypes.RBRACE && + peek().type !== tokenTypes.RBRACKET && + peek().type !== tokenTypes.COMMA) { + // Allow boolean literals (true/false) and constants (PI/INFINITY) as arguments + if (peek().type === tokenTypes.KEYWORD && (peek().value === 'true' || peek().value === 'false')) { + args.push(parseBooleanLiteral()); + } else if (peek().type === tokenTypes.KEYWORD && (peek().value === 'PI' || peek().value === 'INFINITY')) { + args.push(parseConstant()); + } else if (peek().type === tokenTypes.KEYWORD) { + break; // Stop at other keywords + } else { + args.push(parsePrimary(false)); + } + } + return { type: 'FunctionCall', callee: anonymousFunc, arguments: args }; + } + return anonymousFunc; + } else { + const expression = parseExpression(true, [tokenTypes.RPAREN, tokenTypes.SEMICOLON]); + consume(tokenTypes.RPAREN); + return expression; + } + } else if (token.type === tokenTypes.LBRACKET) { + const listLiteral = parseListLiteral(); + // Check if this list literal is followed by a dot (member access) + if (peek().type === tokenTypes.DOT) { + let expression = listLiteral; + while (peek().type === tokenTypes.DOT) { + consume(tokenTypes.DOT); + const property = parsePrimary(false); // Allow number or string literals as properties + expression = { type: 'MemberExpression', object: expression, property: property }; + } + return expression; + } + return listLiteral; + } else if (token.type === tokenTypes.LBRACE) { + const tableLiteral = parseTableLiteral(); + // Check if this table literal is followed by a dot (member access) + if (peek().type === tokenTypes.DOT) { + let expression = tableLiteral; + while (peek().type === tokenTypes.DOT) { + consume(tokenTypes.DOT); + const property = parsePrimary(false); // Allow number or string literals as properties + expression = { type: 'MemberExpression', object: expression, property: property }; + } + return expression; + } + return tableLiteral; + } else { + const suggestions = []; + + if (token.type === tokenTypes.IDENTIFIER) { + const keywords = ['when', 'is', 'then', 'with', 'rec', 'Ok', 'Err']; + suggestions.push(...ErrorHelpers.generateSuggestions(token.value, keywords)); + } else if (token.type === tokenTypes.EOF) { + suggestions.push('Check for missing closing parentheses, braces, or brackets'); + } + + throw new ParseError( + `Unexpected token: ${token.type} (${token.value})`, + { line: token.line, column: token.column, length: token.value?.length || 1 }, + source, + suggestions + ); + } + } + + function parseListLiteral() { + consume(tokenTypes.LBRACKET); + const elements = []; + while (peek().type !== tokenTypes.RBRACKET) { + // Parse each element, stopping at comma or closing bracket + elements.push(parseExpression(true, [tokenTypes.COMMA, tokenTypes.RBRACKET])); + // Check for comma separator + if (peek().type === tokenTypes.COMMA) { + consume(tokenTypes.COMMA); + } + } + consume(tokenTypes.RBRACKET); + return { type: 'ListLiteral', elements }; + } + + function parseArrowFunction() { + const params = []; + + // Parse all parameters (identifiers before ->) + while (peek().type === tokenTypes.IDENTIFIER && + peek(2).type !== tokenTypes.ARROW) { + params.push(consume(tokenTypes.IDENTIFIER).value); + } + + // Parse the last parameter (the one right before ->) + if (peek().type === tokenTypes.IDENTIFIER) { + params.push(consume(tokenTypes.IDENTIFIER).value); + } + + // Consume the arrow + consume(tokenTypes.ARROW); + + // Parse the body + const body = parseExpression(true); + + return { type: 'AnonymousFunction', params, body }; + } + + function parseTableLiteral() { + consume(tokenTypes.LBRACE); + const properties = []; + while (peek().type !== tokenTypes.RBRACE) { + // Check if we've hit a pattern marker (for when expressions) + if (isPatternMarker()) { + break; + } + + const key = consume(tokenTypes.IDENTIFIER).value; + consume(tokenTypes.COLON); + + // Check if this looks like an arrow function + // We're now at the position after the colon, so we check for IDENTIFIER* ARROW + let isArrow = false; + let lookAheadPos = position; + let paramCount = 0; + + // Count consecutive identifiers (parameters) + while (lookAheadPos < tokens.length && tokens[lookAheadPos].type === tokenTypes.IDENTIFIER) { + paramCount++; + lookAheadPos++; + } + + // Check if the next token is an arrow (allow zero parameters) + // This ensures we only detect arrow functions, not other expressions + isArrow = lookAheadPos < tokens.length && + tokens[lookAheadPos].type === tokenTypes.ARROW; + + let value; + if (isArrow) { + // Parse arrow function without requiring semicolon + const params = []; + + // Parse all parameters (identifiers before ->) + while (peek().type === tokenTypes.IDENTIFIER && + peek(2).type !== tokenTypes.ARROW) { + params.push(consume(tokenTypes.IDENTIFIER).value); + } + + // Parse the last parameter (the one right before ->) + if (peek().type === tokenTypes.IDENTIFIER) { + params.push(consume(tokenTypes.IDENTIFIER).value); + } + + // Consume the arrow + consume(tokenTypes.ARROW); + + // Parse the body (don't require semicolon in table literals) + // Stop at semicolon, comma, or closing brace to avoid parsing too much + const body = parseExpression(true, [tokenTypes.SEMICOLON, tokenTypes.COMMA, tokenTypes.RBRACE]); + + // If we stopped at a semicolon, advance past it + if (peek().type === tokenTypes.SEMICOLON) { + consume(tokenTypes.SEMICOLON); + } + + value = { type: 'AnonymousFunction', params, body }; + } else { + value = parseExpression(true, [tokenTypes.SEMICOLON, tokenTypes.COMMA, tokenTypes.RBRACE]); // Use parseExpression to handle binary expressions + // Consume semicolon if present (for table literals) + if (peek().type === tokenTypes.SEMICOLON) { + consume(tokenTypes.SEMICOLON); + } + } + + properties.push({ key, value }); + + // Check for comma separator + if (peek().type === tokenTypes.COMMA) { + consume(tokenTypes.COMMA); + } + } + consume(tokenTypes.RBRACE); + return { type: 'TableLiteral', properties }; + } + + + + function parseResultExpression() { + const variant = consume(tokenTypes.KEYWORD).value; + const value = parsePrimary(true); + return { type: 'ResultExpression', variant, value }; + } + + function parseWhenExpression() { + consume(tokenTypes.KEYWORD, 'when'); + const discriminants = []; + while (peek().type !== tokenTypes.KEYWORD || peek().value !== 'is') { + // Parse discriminant expression, but allow logical operators (and, or, xor) + // Only stop at 'is' keyword, not all keywords + const expr = parseExpressionForDiscriminant(); + discriminants.push(expr); + } + consume(tokenTypes.KEYWORD, 'is'); + const cases = []; + while (peek().type !== tokenTypes.SEMICOLON && peek().type !== tokenTypes.EOF && peek().type !== tokenTypes.RPAREN) { + const patterns = []; + while (peek().type !== tokenTypes.KEYWORD || peek().value !== 'then') { + patterns.push(parsePattern()); + } + consume(tokenTypes.KEYWORD, 'then'); + // Parse the consequent with a more sophisticated approach + // We need to handle nested when expressions properly + let consequent; + + if (peek().type === tokenTypes.KEYWORD && peek().value === 'when') { + // This is a nested when expression - parse it completely + consequent = parseWhenExpression(); + } else { + // This is a regular expression - parse until we hit a pattern marker + // Use a custom parsing approach that stops at the right boundary + consequent = parseConsequentExpression(); + } + cases.push({ type: 'WhenCase', patterns, consequent }); + } + return { type: 'WhenExpression', discriminants, cases }; + } + + function parsePattern() { + const token = peek(); + let pattern; + + if (token.type === tokenTypes.TYPE) { + const typeToken = consume(tokenTypes.TYPE); + pattern = { type: 'TypePattern', name: typeToken.value }; + } else if (token.type === tokenTypes.KEYWORD && (token.value === 'Ok' || token.value === 'Err')) { + const variant = consume(tokenTypes.KEYWORD).value; + const identifier = parseIdentifier(); + pattern = { type: 'ResultPattern', variant, identifier }; + } else if (token.type === tokenTypes.IDENTIFIER && token.value === '_') { + // Handle wildcard pattern + consume(tokenTypes.IDENTIFIER); + pattern = { type: 'WildcardPattern' }; + } else if (token.type === tokenTypes.LBRACKET) { + pattern = parseListPattern(); + } else if (token.type === tokenTypes.LBRACE) { + pattern = parseTablePattern(); + } else { + pattern = parsePrimary(false); + } + + // Check for guard clause + if (peek().type === tokenTypes.KEYWORD && peek().value === 'if') { + consume(tokenTypes.KEYWORD); // consume 'if' + const guard = parseExpression(true, [tokenTypes.KEYWORD, tokenTypes.SEMICOLON, tokenTypes.EOF, tokenTypes.RPAREN]); + return { type: 'GuardPattern', pattern, guard }; + } + + return pattern; + } + + function parseListPattern() { + consume(tokenTypes.LBRACKET); + const elements = []; + while (peek().type !== tokenTypes.RBRACKET) { + elements.push(parsePattern()); + // Check for comma separator + if (peek().type === tokenTypes.COMMA) { + consume(tokenTypes.COMMA); + } + } + consume(tokenTypes.RBRACKET); + return { type: 'ListPattern', elements }; + } + + function parseTablePattern() { + consume(tokenTypes.LBRACE); + const properties = []; + while (peek().type !== tokenTypes.RBRACE) { + const key = consume(tokenTypes.IDENTIFIER).value; + consume(tokenTypes.COLON); + const value = parsePattern(); + properties.push({ key, value }); + + // Check for comma separator + if (peek().type === tokenTypes.COMMA) { + consume(tokenTypes.COMMA); + } + } + consume(tokenTypes.RBRACE); + return { type: 'TablePattern', properties }; + } + + function parseNumber() { + const token = consume(tokenTypes.NUMBER); + return { + type: 'NumberLiteral', + value: token.value, + isFloat: token.isFloat + }; + } + + function parseString() { + const token = consume(tokenTypes.STRING); + return { type: 'StringLiteral', value: token.value }; + } + + function parseBooleanLiteral() { + const token = consume(tokenTypes.KEYWORD); + return { type: 'BooleanLiteral', value: token.value === 'true' }; + } + + function parseConstant() { + const token = consume(tokenTypes.KEYWORD); + if (token.value === 'PI') { + return { type: 'NumberLiteral', value: Math.PI, isFloat: true }; + } else if (token.value === 'INFINITY') { + return { type: 'NumberLiteral', value: Infinity, isFloat: true }; + } else { + throw new ParseError( + `Unknown constant: ${token.value}`, + { line: token.line, column: token.column, length: token.value?.length || 1 }, + source, + ['Use PI or INFINITY for mathematical constants', 'Check spelling of constant names'] + ); + } + } + + function parseIdentifier() { + const token = consume(tokenTypes.IDENTIFIER); + return { type: 'Identifier', name: token.value }; + } + + function parse() { + log('Parser received tokens:', tokens); + const program = { type: 'Program', body: [] }; + while (peek().type !== tokenTypes.EOF) { + program.body.push(parseStatement()); + } + return program; + } + + return { + parse, + }; +} + +export { createParser }; \ No newline at end of file |