diff options
Diffstat (limited to 'js/scripting-lang/baba-yaga-c/src/parser.c')
-rw-r--r-- | js/scripting-lang/baba-yaga-c/src/parser.c | 2973 |
1 files changed, 2973 insertions, 0 deletions
diff --git a/js/scripting-lang/baba-yaga-c/src/parser.c b/js/scripting-lang/baba-yaga-c/src/parser.c new file mode 100644 index 0000000..6c94913 --- /dev/null +++ b/js/scripting-lang/baba-yaga-c/src/parser.c @@ -0,0 +1,2973 @@ +/** + * @file parser.c + * @brief Parser implementation for Baba Yaga + * @author eli_oat + * @version 0.0.1 + * @date 2025 + * + * This file implements the parser for the Baba Yaga language. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +#include "baba_yaga.h" + +/* ============================================================================ + * Token Types (from lexer.c) + * ============================================================================ */ + +typedef enum { + TOKEN_EOF, + TOKEN_NUMBER, + TOKEN_STRING, + TOKEN_BOOLEAN, + TOKEN_IDENTIFIER, + TOKEN_KEYWORD_WHEN, + TOKEN_KEYWORD_IS, + TOKEN_KEYWORD_THEN, + TOKEN_KEYWORD_AND, + TOKEN_KEYWORD_OR, + TOKEN_KEYWORD_XOR, + TOKEN_KEYWORD_NOT, + TOKEN_KEYWORD_VIA, + TOKEN_OP_PLUS, + TOKEN_OP_MINUS, + TOKEN_OP_UNARY_MINUS, + TOKEN_OP_MULTIPLY, + TOKEN_OP_DIVIDE, + TOKEN_OP_MODULO, + TOKEN_OP_POWER, + TOKEN_OP_EQUALS, + TOKEN_OP_NOT_EQUALS, + TOKEN_OP_LESS, + TOKEN_OP_LESS_EQUAL, + TOKEN_OP_GREATER, + TOKEN_OP_GREATER_EQUAL, + TOKEN_LPAREN, + TOKEN_RPAREN, + TOKEN_LBRACE, + TOKEN_RBRACE, + TOKEN_LBRACKET, + TOKEN_RBRACKET, + TOKEN_COMMA, + TOKEN_COLON, + TOKEN_SEMICOLON, + TOKEN_ARROW, + TOKEN_DOT, + TOKEN_FUNCTION_REF, + TOKEN_IO_IN, + TOKEN_IO_OUT, + TOKEN_IO_ASSERT, + TOKEN_IO_EMIT, + TOKEN_IO_LISTEN +} TokenType; + +typedef struct { + TokenType type; + char* lexeme; + int line; + int column; + union { + double number; + bool boolean; + } literal; +} Token; + +/* ============================================================================ + * AST Node Types + * ============================================================================ */ + +/* NodeType enum is now defined in baba_yaga.h */ + +/* ============================================================================ + * AST Node Structure + * ============================================================================ */ + +struct ASTNode { + NodeType type; + int line; + int column; + union { + Value literal; + char* identifier; + struct { + struct ASTNode* left; + struct ASTNode* right; + char* operator; + } binary; + struct { + struct ASTNode* operand; + char* operator; + } unary; + struct { + struct ASTNode* function; + struct ASTNode** arguments; + int arg_count; + } function_call; + struct { + char* name; + struct ASTNode** parameters; + int param_count; + struct ASTNode* body; + } function_def; + struct { + char* name; + struct ASTNode* value; + } variable_decl; + struct { + struct ASTNode* test; + struct ASTNode** patterns; + int pattern_count; + } when_expr; + struct { + struct ASTNode* test; + struct ASTNode* result; + } when_pattern; + struct { + struct ASTNode** elements; + int element_count; + } table; + struct { + struct ASTNode* object; + struct ASTNode* key; + } table_access; + struct { + char* operation; + struct ASTNode* argument; + } io_operation; + struct { + struct ASTNode** statements; + int statement_count; + } sequence; + } data; +}; + +/* ============================================================================ + * Parser Structure + * ============================================================================ */ + +typedef struct { + Token** tokens; + int token_count; + int current; + bool has_error; + char* error_message; +} Parser; + +/* ============================================================================ + * AST Node Management + * ============================================================================ */ + +/** + * @brief Create a literal node + * + * @param value Literal value + * @param line Line number + * @param column Column number + * @return New literal node + */ +static ASTNode* ast_literal_node(Value value, int line, int column) { + ASTNode* node = malloc(sizeof(ASTNode)); + if (node == NULL) { + return NULL; + } + + node->type = NODE_LITERAL; + node->line = line; + node->column = column; + node->data.literal = value; + + return node; +} + +/** + * @brief Create an identifier node + * + * @param identifier Identifier name + * @param line Line number + * @param column Column number + * @return New identifier node + */ +static ASTNode* ast_identifier_node(const char* identifier, int line, int column) { + ASTNode* node = malloc(sizeof(ASTNode)); + if (node == NULL) { + return NULL; + } + + node->type = NODE_IDENTIFIER; + node->line = line; + node->column = column; + node->data.identifier = strdup(identifier); + + return node; +} + +/** + * @brief Create a function call node + * + * @param function Function expression + * @param arguments Array of argument expressions + * @param arg_count Number of arguments + * @param line Line number + * @param column Column number + * @return New function call node + */ +static ASTNode* ast_function_call_node(ASTNode* function, ASTNode** arguments, + int arg_count, int line, int column) { + ASTNode* node = malloc(sizeof(ASTNode)); + if (node == NULL) { + return NULL; + } + + node->type = NODE_FUNCTION_CALL; + node->line = line; + node->column = column; + node->data.function_call.function = function; + node->data.function_call.arguments = arguments; + node->data.function_call.arg_count = arg_count; + + return node; +} + +/** + * @brief Create a binary operator node + * + * @param left Left operand + * @param right Right operand + * @param operator Operator name + * @param line Line number + * @param column Column number + * @return New binary operator node + */ +static ASTNode* ast_binary_op_node(ASTNode* left, ASTNode* right, + const char* operator, int line, int column) { + ASTNode* node = malloc(sizeof(ASTNode)); + if (node == NULL) { + return NULL; + } + + node->type = NODE_BINARY_OP; + node->line = line; + node->column = column; + node->data.binary.left = left; + node->data.binary.right = right; + node->data.binary.operator = strdup(operator); + + return node; +} + +/** + * @brief Create a unary operator node (translated to function call) + * + * @param operand Operand expression + * @param operator Operator name + * @param line Line number + * @param column Column number + * @return New function call node representing the operator + */ +static ASTNode* ast_unary_op_node(ASTNode* operand, const char* operator, + int line, int column) { + /* Create simple function call: operator(operand) */ + ASTNode* operator_node = ast_identifier_node(operator, line, column); + if (operator_node == NULL) { + return NULL; + } + + ASTNode** args = malloc(1 * sizeof(ASTNode*)); + if (args == NULL) { + free(operator_node); + return NULL; + } + args[0] = operand; + + return ast_function_call_node(operator_node, args, 1, line, column); +} + +/** + * @brief Create a sequence node + * + * @param statements Array of statement nodes + * @param statement_count Number of statements + * @param line Line number + * @param column Column number + * @return New sequence node + */ +static ASTNode* ast_sequence_node(ASTNode** statements, int statement_count, + int line, int column) { + ASTNode* node = malloc(sizeof(ASTNode)); + if (node == NULL) { + return NULL; + } + + node->type = NODE_SEQUENCE; + node->line = line; + node->column = column; + node->data.sequence.statements = statements; + node->data.sequence.statement_count = statement_count; + + return node; +} + +/** + * @brief Create a when expression node + * + * @param test Test expression + * @param patterns Array of pattern nodes + * @param pattern_count Number of patterns + * @param line Line number + * @param column Column number + * @return New when expression node + */ +static ASTNode* ast_when_expr_node(ASTNode* test, ASTNode** patterns, + int pattern_count, int line, int column) { + ASTNode* node = malloc(sizeof(ASTNode)); + if (node == NULL) { + return NULL; + } + + node->type = NODE_WHEN_EXPR; + node->line = line; + node->column = column; + node->data.when_expr.test = test; + node->data.when_expr.patterns = patterns; + node->data.when_expr.pattern_count = pattern_count; + + + return node; +} + +/** + * @brief Create a when pattern node + * + * @param test Pattern test expression + * @param result Result expression + * @param line Line number + * @param column Column number + * @return New when pattern node + */ +static ASTNode* ast_when_pattern_node(ASTNode* test, ASTNode* result, + int line, int column) { + ASTNode* node = malloc(sizeof(ASTNode)); + if (node == NULL) { + return NULL; + } + + node->type = NODE_WHEN_PATTERN; + node->line = line; + node->column = column; + node->data.when_pattern.test = test; + node->data.when_pattern.result = result; + + return node; +} + +/** + * @brief Destroy an AST node + * + * @param node Node to destroy + */ +static void ast_destroy_node(ASTNode* node) { + if (node == NULL) { + return; + } + + switch (node->type) { + case NODE_IDENTIFIER: + free(node->data.identifier); + break; + case NODE_FUNCTION_CALL: + for (int i = 0; i < node->data.function_call.arg_count; i++) { + ast_destroy_node(node->data.function_call.arguments[i]); + } + free(node->data.function_call.arguments); + ast_destroy_node(node->data.function_call.function); + break; + case NODE_FUNCTION_DEF: + for (int i = 0; i < node->data.function_def.param_count; i++) { + ast_destroy_node(node->data.function_def.parameters[i]); + } + free(node->data.function_def.parameters); + free(node->data.function_def.name); + ast_destroy_node(node->data.function_def.body); + break; + case NODE_VARIABLE_DECL: + free(node->data.variable_decl.name); + ast_destroy_node(node->data.variable_decl.value); + break; + case NODE_WHEN_EXPR: + ast_destroy_node(node->data.when_expr.test); + for (int i = 0; i < node->data.when_expr.pattern_count; i++) { + ast_destroy_node(node->data.when_expr.patterns[i]); + } + free(node->data.when_expr.patterns); + break; + case NODE_WHEN_PATTERN: + ast_destroy_node(node->data.when_pattern.test); + ast_destroy_node(node->data.when_pattern.result); + break; + case NODE_TABLE: + for (int i = 0; i < node->data.table.element_count; i++) { + ast_destroy_node(node->data.table.elements[i]); + } + free(node->data.table.elements); + break; + case NODE_TABLE_ACCESS: + ast_destroy_node(node->data.table_access.object); + ast_destroy_node(node->data.table_access.key); + break; + case NODE_IO_OPERATION: + free(node->data.io_operation.operation); + ast_destroy_node(node->data.io_operation.argument); + break; + case NODE_SEQUENCE: + for (int i = 0; i < node->data.sequence.statement_count; i++) { + ast_destroy_node(node->data.sequence.statements[i]); + } + free(node->data.sequence.statements); + break; + default: + /* No cleanup needed for other types */ + break; + } + + free(node); +} + +/* ============================================================================ + * Parser Functions + * ============================================================================ */ + +/** + * @brief Create a new parser + * + * @param tokens Array of tokens + * @param token_count Number of tokens + * @return New parser instance, or NULL on failure + */ +static Parser* parser_create(Token** tokens, int token_count) { + Parser* parser = malloc(sizeof(Parser)); + if (parser == NULL) { + return NULL; + } + + parser->tokens = tokens; + parser->token_count = token_count; + parser->current = 0; + parser->has_error = false; + parser->error_message = NULL; + + return parser; +} + +/** + * @brief Destroy a parser + * + * @param parser Parser to destroy + */ +static void parser_destroy(Parser* parser) { + if (parser == NULL) { + return; + } + + if (parser->error_message != NULL) { + free(parser->error_message); + } + + free(parser); +} + +/** + * @brief Set parser error + * + * @param parser Parser instance + * @param message Error message + */ +static void parser_set_error(Parser* parser, const char* message) { + if (parser == NULL) { + return; + } + + parser->has_error = true; + if (parser->error_message != NULL) { + free(parser->error_message); + } + parser->error_message = strdup(message); +} + +/** + * @brief Check if we're at the end of tokens + * + * @param parser Parser instance + * @return true if at end, false otherwise + */ +static bool parser_is_at_end(const Parser* parser) { + return parser->current >= parser->token_count; +} + +/** + * @brief Peek at current token + * + * @param parser Parser instance + * @return Current token, or NULL if at end + */ +static Token* parser_peek(const Parser* parser) { + if (parser_is_at_end(parser)) { + return NULL; + } + return parser->tokens[parser->current]; +} + +/** + * @brief Peek at next token + * + * @param parser Parser instance + * @return Next token, or NULL if at end + */ +static Token* parser_peek_next(const Parser* parser) { + if (parser->current + 1 >= parser->token_count) { + return NULL; + } + return parser->tokens[parser->current + 1]; +} + +/** + * @brief Advance to next token + * + * @param parser Parser instance + * @return Token that was advanced over + */ +static Token* parser_advance(Parser* parser) { + if (parser_is_at_end(parser)) { + return NULL; + } + return parser->tokens[parser->current++]; +} + +/** + * @brief Check if current token matches expected type + * + * @param parser Parser instance + * @param type Expected token type + * @return true if matches, false otherwise + */ +static bool parser_check(const Parser* parser, TokenType type) { + if (parser_is_at_end(parser)) { + return false; + } + return parser->tokens[parser->current]->type == type; +} + +/** + * @brief Consume token of expected type + * + * @param parser Parser instance + * @param type Expected token type + * @param error_message Error message if type doesn't match + * @return Consumed token, or NULL on error + */ +static Token* parser_consume(Parser* parser, TokenType type, const char* error_message) { + if (parser_check(parser, type)) { + return parser_advance(parser); + } + + parser_set_error(parser, error_message); + return NULL; +} + +/* ============================================================================ + * Expression Parsing (Operator Precedence) + * ============================================================================ */ + +/* Forward declarations */ +static ASTNode* parser_parse_expression(Parser* parser); +static ASTNode* parser_parse_logical(Parser* parser); +/* static ASTNode* parser_parse_composition(Parser* parser); */ +/* static ASTNode* parser_parse_application(Parser* parser); */ +static ASTNode* parser_parse_statement(Parser* parser); +static ASTNode* parser_parse_when_expression(Parser* parser); +static ASTNode* parser_parse_when_pattern(Parser* parser); +static ASTNode* parser_parse_when_result_expression(Parser* parser); +static ASTNode* parser_parse_postfix(Parser* parser); +static const char* node_type_name(NodeType type); +static ASTNode* parser_parse_function_def(Parser* parser); +static ASTNode* parser_parse_embedded_arrow_function(Parser* parser); + +/** + * @brief Parse primary expression (literals, identifiers, parentheses) + * + * @param parser Parser instance + * @return Parsed expression node + */ +static ASTNode* parser_parse_primary(Parser* parser) { + Token* token = parser_peek(parser); + if (token == NULL) { + parser_set_error(parser, "Unexpected end of input"); + return NULL; + } + + switch (token->type) { + case TOKEN_NUMBER: { + DEBUG_TRACE("parser_parse_primary consuming number: %g", token->literal.number); + parser_advance(parser); + return ast_literal_node(baba_yaga_value_number(token->literal.number), + token->line, token->column); + } + case TOKEN_STRING: { + DEBUG_TRACE("parser_parse_primary consuming string: %s", token->lexeme); + parser_advance(parser); + return ast_literal_node(baba_yaga_value_string(token->lexeme), + token->line, token->column); + } + case TOKEN_BOOLEAN: { + DEBUG_TRACE("parser_parse_primary consuming boolean: %s", token->literal.boolean ? "true" : "false"); + parser_advance(parser); + return ast_literal_node(baba_yaga_value_boolean(token->literal.boolean), + token->line, token->column); + } + case TOKEN_IDENTIFIER: { + DEBUG_TRACE("parser_parse_primary consuming identifier: %s", token->lexeme); + parser_advance(parser); + /* Special handling for wildcard pattern */ + if (strcmp(token->lexeme, "_") == 0) { + /* Create a special wildcard literal */ + return ast_literal_node(baba_yaga_value_string("_"), token->line, token->column); + } + return ast_identifier_node(token->lexeme, token->line, token->column); + } + case TOKEN_IO_IN: + case TOKEN_IO_OUT: + case TOKEN_IO_ASSERT: + case TOKEN_IO_EMIT: + case TOKEN_IO_LISTEN: { + DEBUG_TRACE("parser_parse_primary consuming io operation: %s", token->lexeme); + parser_advance(parser); + /* IO operations are treated as function calls - strip the ".." prefix */ + const char* func_name = token->lexeme + 2; /* Skip ".." */ + + /* For ..assert, parse the entire expression as a single argument */ + if (strcmp(func_name, "assert") == 0) { + /* Parse the assertion expression */ + ASTNode* assertion_expr = parser_parse_expression(parser); + if (assertion_expr == NULL) { + return NULL; + } + + /* Create function call with the assertion expression as argument */ + ASTNode** args = malloc(1 * sizeof(ASTNode*)); + if (args == NULL) { + ast_destroy_node(assertion_expr); + return NULL; + } + args[0] = assertion_expr; + + ASTNode* func_node = ast_identifier_node(func_name, token->line, token->column); + if (func_node == NULL) { + free(args); + ast_destroy_node(assertion_expr); + return NULL; + } + + return ast_function_call_node(func_node, args, 1, token->line, token->column); + } + + /* For ..emit, parse the entire expression as a single argument */ + if (strcmp(func_name, "emit") == 0) { + /* Parse the expression */ + ASTNode* expr = parser_parse_expression(parser); + if (expr == NULL) { + return NULL; + } + + /* Create function call with the expression as argument */ + ASTNode** args = malloc(1 * sizeof(ASTNode*)); + if (args == NULL) { + ast_destroy_node(expr); + return NULL; + } + args[0] = expr; + + ASTNode* func_node = ast_identifier_node(func_name, token->line, token->column); + if (func_node == NULL) { + free(args); + ast_destroy_node(expr); + return NULL; + } + + return ast_function_call_node(func_node, args, 1, token->line, token->column); + } + + /* For ..listen, create a function call with no arguments */ + if (strcmp(func_name, "listen") == 0) { + ASTNode* func_node = ast_identifier_node(func_name, token->line, token->column); + if (func_node == NULL) { + return NULL; + } + + return ast_function_call_node(func_node, NULL, 0, token->line, token->column); + } + + return ast_identifier_node(func_name, token->line, token->column); + } + case TOKEN_KEYWORD_WHEN: { + + return parser_parse_when_expression(parser); + } + case TOKEN_FUNCTION_REF: { + DEBUG_TRACE("parser_parse_primary consuming function ref: %s", token->lexeme); + parser_advance(parser); + + /* Check if this is @(expression) syntax */ + if (!parser_is_at_end(parser) && parser_peek(parser)->type == TOKEN_LPAREN) { + DEBUG_TRACE("parser_parse_primary consuming '('"); + parser_advance(parser); /* consume '(' */ + + /* Parse the expression inside parentheses */ + ASTNode* expr = parser_parse_expression(parser); + if (expr == NULL) { + return NULL; + } + + /* Expect closing parenthesis */ + if (!parser_consume(parser, TOKEN_RPAREN, "Expected ')' after expression")) { + ast_destroy_node(expr); + return NULL; + } + + /* Return the expression as-is (it will be evaluated when used as an argument) */ + return expr; + } + + /* Handle @function_name syntax */ + ASTNode* func_node = ast_identifier_node(token->lexeme, token->line, token->column); + if (func_node == NULL) { + return NULL; + } + + /* Check if this function reference is followed by arguments */ + /* Only treat as function call if it's at the top level (not in an argument position) */ + if (!parser_is_at_end(parser)) { + Token* next_token = parser_peek(parser); + if (next_token != NULL && + next_token->type != TOKEN_OP_PLUS && + next_token->type != TOKEN_OP_MINUS && + next_token->type != TOKEN_OP_MULTIPLY && + next_token->type != TOKEN_OP_DIVIDE && + next_token->type != TOKEN_OP_MODULO && + next_token->type != TOKEN_OP_POWER && + next_token->type != TOKEN_OP_EQUALS && + next_token->type != TOKEN_OP_NOT_EQUALS && + next_token->type != TOKEN_OP_LESS && + next_token->type != TOKEN_OP_LESS_EQUAL && + next_token->type != TOKEN_OP_GREATER && + next_token->type != TOKEN_OP_GREATER_EQUAL && + next_token->type != TOKEN_RPAREN && + next_token->type != TOKEN_RBRACE && + next_token->type != TOKEN_RBRACKET && + next_token->type != TOKEN_SEMICOLON && + next_token->type != TOKEN_COMMA && + next_token->type != TOKEN_EOF) { + + /* For now, always treat function references as values, not function calls */ + /* This allows them to be passed as arguments to other functions */ + DEBUG_TRACE("parser_parse_primary: treating function reference as value"); + return func_node; + + /* Parse arguments for this function call */ + ASTNode** args = NULL; + int arg_count = 0; + + while (!parser_is_at_end(parser)) { + Token* arg_token = parser_peek(parser); + if (arg_token == NULL) { + break; + } + + /* Stop if we hit an operator or delimiter */ + if (arg_token->type == TOKEN_OP_PLUS || + arg_token->type == TOKEN_OP_MINUS || + arg_token->type == TOKEN_OP_MULTIPLY || + arg_token->type == TOKEN_OP_DIVIDE || + arg_token->type == TOKEN_OP_MODULO || + arg_token->type == TOKEN_OP_POWER || + arg_token->type == TOKEN_OP_EQUALS || + arg_token->type == TOKEN_OP_NOT_EQUALS || + arg_token->type == TOKEN_OP_LESS || + arg_token->type == TOKEN_OP_LESS_EQUAL || + arg_token->type == TOKEN_OP_GREATER || + arg_token->type == TOKEN_OP_GREATER_EQUAL || + arg_token->type == TOKEN_RPAREN || + arg_token->type == TOKEN_RBRACE || + arg_token->type == TOKEN_RBRACKET || + arg_token->type == TOKEN_SEMICOLON || + arg_token->type == TOKEN_COMMA || + arg_token->type == TOKEN_EOF) { + break; + } + + /* Parse argument */ + ASTNode* arg = parser_parse_postfix(parser); + if (arg == NULL) { + /* Cleanup on error */ + for (int i = 0; i < arg_count; i++) { + ast_destroy_node(args[i]); + } + free(args); + ast_destroy_node(func_node); + return NULL; + } + + /* Add to arguments array */ + ASTNode** new_args = realloc(args, (arg_count + 1) * sizeof(ASTNode*)); + if (new_args == NULL) { + /* Cleanup on error */ + for (int i = 0; i < arg_count; i++) { + ast_destroy_node(args[i]); + } + free(args); + ast_destroy_node(arg); + ast_destroy_node(func_node); + return NULL; + } + args = new_args; + args[arg_count] = arg; + arg_count++; + } + + /* Create function call with the arguments */ + if (arg_count > 0) { + ASTNode* func_call = ast_function_call_node(func_node, args, arg_count, func_node->line, func_node->column); + if (func_call == NULL) { + /* Cleanup on error */ + for (int i = 0; i < arg_count; i++) { + ast_destroy_node(args[i]); + } + free(args); + ast_destroy_node(func_node); + return NULL; + } + return func_call; + } + } + } + + return func_node; + } + case TOKEN_LPAREN: { + DEBUG_TRACE("parser_parse_primary consuming '('"); + parser_advance(parser); /* consume '(' */ + ASTNode* expr = parser_parse_expression(parser); + if (expr == NULL) { + return NULL; + } + + if (!parser_consume(parser, TOKEN_RPAREN, "Expected ')' after expression")) { + ast_destroy_node(expr); + return NULL; + } + + return expr; + } + case TOKEN_LBRACE: { + DEBUG_TRACE("parser_parse_primary consuming table literal '{'"); + parser_advance(parser); /* consume '{' */ + + ASTNode** elements = NULL; + int element_count = 0; + int capacity = 10; + + /* Allocate initial space for elements */ + elements = malloc(capacity * sizeof(ASTNode*)); + if (elements == NULL) { + return NULL; + } + + /* Parse table entries */ + while (!parser_is_at_end(parser) && parser_peek(parser)->type != TOKEN_RBRACE) { + ASTNode* value = NULL; + + /* Check if this is a key-value pair (any token: value) */ + + /* Check if this is a key-value pair */ + bool is_key_value_pair = false; + + if (parser_peek(parser)->type == TOKEN_LPAREN) { + /* For expression keys, we need to look ahead to find the colon */ + int look_ahead = parser->current; + int paren_count = 0; + bool found_colon = false; + + while (look_ahead < parser->token_count) { + Token* token = parser->tokens[look_ahead]; + if (token->type == TOKEN_LPAREN) { + paren_count++; + } else if (token->type == TOKEN_RPAREN) { + paren_count--; + if (paren_count == 0) { + /* We've found the closing parenthesis, check if next is colon */ + if (look_ahead + 1 < parser->token_count && + parser->tokens[look_ahead + 1]->type == TOKEN_COLON) { + found_colon = true; + } + break; + } + } else if (token->type == TOKEN_COMMA || token->type == TOKEN_RBRACE) { + /* Stop looking if we hit table boundaries */ + break; + } + look_ahead++; + } + is_key_value_pair = found_colon; + } else { + /* For literal keys, check if next token is colon */ + is_key_value_pair = (parser_peek(parser)->type == TOKEN_IDENTIFIER || + parser_peek(parser)->type == TOKEN_NUMBER || + parser_peek(parser)->type == TOKEN_BOOLEAN || + parser_peek(parser)->type == TOKEN_STRING) && + !parser_is_at_end(parser) && + parser_peek_next(parser)->type == TOKEN_COLON; + } + + if (is_key_value_pair) { + + /* Parse key-value pair */ + ASTNode* key_node = NULL; + Token* key_token = NULL; + + if (parser_peek(parser)->type == TOKEN_LPAREN) { + /* Parse expression key */ + key_node = parser_parse_expression(parser); + if (key_node == NULL) { + /* Cleanup on error */ + for (int i = 0; i < element_count; i++) { + ast_destroy_node(elements[i]); + } + free(elements); + return NULL; + } + /* Create a dummy token for line/column info */ + key_token = parser_peek(parser); + if (key_token == NULL) { + /* Cleanup on error */ + for (int i = 0; i < element_count; i++) { + ast_destroy_node(elements[i]); + } + free(elements); + ast_destroy_node(key_node); + return NULL; + } + } else { + /* Parse literal key */ + key_token = parser_advance(parser); /* Consume the key token */ + if (key_token == NULL) { + /* Cleanup on error */ + for (int i = 0; i < element_count; i++) { + ast_destroy_node(elements[i]); + } + free(elements); + return NULL; + } + } + + /* Consume colon */ + if (!parser_consume(parser, TOKEN_COLON, "Expected ':' after table key")) { + /* Cleanup on error */ + for (int i = 0; i < element_count; i++) { + ast_destroy_node(elements[i]); + } + free(elements); + return NULL; + } + + /* Check if this is an arrow function by looking ahead */ + bool is_arrow_function = false; + int look_ahead = parser->current; + int identifier_count = 0; + + /* Look ahead to see if we have identifiers followed by '->' */ + while (look_ahead < parser->token_count) { + Token* token = parser->tokens[look_ahead]; + if (token->type == TOKEN_ARROW) { + /* If we have at least one identifier before '->', it's an arrow function */ + if (identifier_count > 0) { + is_arrow_function = true; + } + break; + } + if (token->type == TOKEN_IDENTIFIER) { + identifier_count++; + } else if (token->type == TOKEN_COMMA || token->type == TOKEN_RBRACE) { + /* Stop looking if we hit table boundaries */ + break; + } else { + /* If we hit anything else, it's not an arrow function */ + identifier_count = 0; + break; + } + look_ahead++; + } + + /* Parse the value */ + if (is_arrow_function) { + /* Parse as embedded arrow function */ + value = parser_parse_embedded_arrow_function(parser); + } else { + /* Parse as general expression */ + value = parser_parse_expression(parser); + } + if (value == NULL) { + /* Cleanup on error */ + for (int i = 0; i < element_count; i++) { + ast_destroy_node(elements[i]); + } + free(elements); + return NULL; + } + + /* For now, we'll store key-value pairs as function calls to a special "table_entry" function */ + /* This allows us to represent both key-value pairs and array-like entries uniformly */ + ASTNode** entry_args = malloc(2 * sizeof(ASTNode*)); + if (entry_args == NULL) { + /* Cleanup on error */ + for (int i = 0; i < element_count; i++) { + ast_destroy_node(elements[i]); + } + free(elements); + ast_destroy_node(value); + return NULL; + } + + /* Create key value based on token type or expression */ + ASTNode* key_arg = NULL; + if (key_node != NULL) { + /* Expression key - use the parsed AST node */ + key_arg = key_node; + } else { + /* Literal key - create literal value from token */ + Value key_value; + if (key_token->type == TOKEN_IDENTIFIER) { + key_value = baba_yaga_value_string(key_token->lexeme); + } else if (key_token->type == TOKEN_NUMBER) { + key_value = baba_yaga_value_number(key_token->literal.number); + } else if (key_token->type == TOKEN_BOOLEAN) { + key_value = baba_yaga_value_boolean(key_token->literal.boolean); + } else if (key_token->type == TOKEN_STRING) { + key_value = baba_yaga_value_string(key_token->lexeme); + } else { + /* Cleanup on error */ + for (int i = 0; i < element_count; i++) { + ast_destroy_node(elements[i]); + } + free(elements); + free(entry_args); + ast_destroy_node(value); + return NULL; + } + key_arg = ast_literal_node(key_value, key_token->line, key_token->column); + } + + entry_args[0] = key_arg; + entry_args[1] = value; + + ASTNode* table_entry_node = ast_identifier_node("table_entry", key_token->line, key_token->column); + if (table_entry_node == NULL) { + /* Cleanup on error */ + for (int i = 0; i < element_count; i++) { + ast_destroy_node(elements[i]); + } + free(elements); + free(entry_args); + ast_destroy_node(value); + if (key_node != NULL) { + ast_destroy_node(key_node); + } + return NULL; + } + + ASTNode* entry_node = ast_function_call_node(table_entry_node, entry_args, 2, key_token->line, key_token->column); + if (entry_node == NULL) { + /* Cleanup on error */ + for (int i = 0; i < element_count; i++) { + ast_destroy_node(elements[i]); + } + free(elements); + free(entry_args); + ast_destroy_node(table_entry_node); + ast_destroy_node(value); + if (key_node != NULL) { + ast_destroy_node(key_node); + } + return NULL; + } + + value = entry_node; + } else { + /* Parse array-like entry (just a value) */ + value = parser_parse_expression(parser); + if (value == NULL) { + /* Cleanup on error */ + for (int i = 0; i < element_count; i++) { + ast_destroy_node(elements[i]); + } + free(elements); + return NULL; + } + } + + /* Check if we need more space */ + if (element_count >= capacity) { + capacity *= 2; + ASTNode** new_elements = realloc(elements, capacity * sizeof(ASTNode*)); + if (new_elements == NULL) { + /* Cleanup on error */ + for (int i = 0; i < element_count; i++) { + ast_destroy_node(elements[i]); + } + free(elements); + ast_destroy_node(value); + return NULL; + } + elements = new_elements; + } + + elements[element_count++] = value; + + /* Check for comma separator */ + if (!parser_is_at_end(parser) && parser_peek(parser)->type == TOKEN_COMMA) { + parser_advance(parser); /* consume ',' */ + } else if (!parser_is_at_end(parser) && parser_peek(parser)->type != TOKEN_RBRACE) { + /* No comma but not end of table - this is an error */ + parser_set_error(parser, "Expected ',' or '}' in table literal"); + /* Cleanup on error */ + for (int i = 0; i < element_count; i++) { + ast_destroy_node(elements[i]); + } + free(elements); + return NULL; + } + } + + /* Expect closing brace */ + if (!parser_consume(parser, TOKEN_RBRACE, "Expected '}' after table literal")) { + /* Cleanup on error */ + for (int i = 0; i < element_count; i++) { + ast_destroy_node(elements[i]); + } + free(elements); + return NULL; + } + + /* Create table node */ + ASTNode* node = malloc(sizeof(ASTNode)); + if (node == NULL) { + /* Cleanup on error */ + for (int i = 0; i < element_count; i++) { + ast_destroy_node(elements[i]); + } + free(elements); + return NULL; + } + + node->type = NODE_TABLE; + node->line = token->line; + node->column = token->column; + node->data.table.elements = elements; + node->data.table.element_count = element_count; + + return node; + } + case TOKEN_OP_UNARY_MINUS: { + DEBUG_TRACE("parser_parse_primary consuming unary minus"); + parser_advance(parser); /* consume '-' */ + ASTNode* operand = parser_parse_postfix(parser); + if (operand == NULL) { + return NULL; + } + return ast_unary_op_node(operand, "negate", token->line, token->column); + } + case TOKEN_KEYWORD_NOT: { + DEBUG_TRACE("parser_parse_primary consuming 'not'"); + parser_advance(parser); /* consume 'not' */ + ASTNode* operand = parser_parse_postfix(parser); + if (operand == NULL) { + return NULL; + } + return ast_unary_op_node(operand, "not", token->line, token->column); + } + default: + parser_set_error(parser, "Unexpected token in expression"); + return NULL; + } +} + +/** + * @brief Parse function call expression + * + * @param parser Parser instance + * @return Parsed expression node + */ +/* TODO: Re-implement function call parsing at application level */ +/* TODO: Re-implement function call parsing at application level */ + +/** + * @brief Parse power expression (^) + * + * @param parser Parser instance + * @return Parsed expression node + */ +static ASTNode* parser_parse_power(Parser* parser) { + ASTNode* left = parser_parse_postfix(parser); + if (left == NULL) { + return NULL; + } + + while (parser_check(parser, TOKEN_OP_POWER)) { + Token* op = parser_advance(parser); + ASTNode* right = parser_parse_postfix(parser); + if (right == NULL) { + ast_destroy_node(left); + return NULL; + } + + ASTNode* new_left = ast_binary_op_node(left, right, "pow", op->line, op->column); + if (new_left == NULL) { + ast_destroy_node(left); + ast_destroy_node(right); + return NULL; + } + + left = new_left; + } + + return left; +} + +/** + * @brief Parse multiplicative expression (*, /, %) + * + * @param parser Parser instance + * @return Parsed expression node + */ +static ASTNode* parser_parse_multiplicative(Parser* parser) { + ASTNode* left = parser_parse_power(parser); + if (left == NULL) { + return NULL; + } + + while (parser_check(parser, TOKEN_OP_MULTIPLY) || + parser_check(parser, TOKEN_OP_DIVIDE) || + parser_check(parser, TOKEN_OP_MODULO)) { + Token* op = parser_advance(parser); + ASTNode* right = parser_parse_power(parser); + if (right == NULL) { + ast_destroy_node(left); + return NULL; + } + + const char* operator_name; + switch (op->type) { + case TOKEN_OP_MULTIPLY: operator_name = "multiply"; break; + case TOKEN_OP_DIVIDE: operator_name = "divide"; break; + case TOKEN_OP_MODULO: operator_name = "modulo"; break; + default: operator_name = "unknown"; break; + } + + ASTNode* new_left = ast_binary_op_node(left, right, operator_name, op->line, op->column); + if (new_left == NULL) { + ast_destroy_node(left); + ast_destroy_node(right); + return NULL; + } + + left = new_left; + } + + return left; +} + +/** + * @brief Parse additive expression (+, -) + * + * @param parser Parser instance + * @return Parsed expression node + */ +static ASTNode* parser_parse_additive(Parser* parser) { + ASTNode* left = parser_parse_multiplicative(parser); + if (left == NULL) { + return NULL; + } + + while (parser_check(parser, TOKEN_OP_PLUS) || parser_check(parser, TOKEN_OP_MINUS)) { + Token* op = parser_advance(parser); + ASTNode* right = parser_parse_multiplicative(parser); + if (right == NULL) { + ast_destroy_node(left); + return NULL; + } + + const char* operator_name = (op->type == TOKEN_OP_PLUS) ? "add" : "subtract"; + + ASTNode* new_left = ast_binary_op_node(left, right, operator_name, op->line, op->column); + if (new_left == NULL) { + ast_destroy_node(left); + ast_destroy_node(right); + return NULL; + } + + left = new_left; + } + + return left; +} + +/** + * @brief Parse comparison expression (=, !=, <, <=, >, >=) + * + * @param parser Parser instance + * @return Parsed expression node + */ +static ASTNode* parser_parse_comparison(Parser* parser) { + ASTNode* left = parser_parse_additive(parser); + if (left == NULL) { + return NULL; + } + + while (parser_check(parser, TOKEN_OP_EQUALS) || + parser_check(parser, TOKEN_OP_NOT_EQUALS) || + parser_check(parser, TOKEN_OP_LESS) || + parser_check(parser, TOKEN_OP_LESS_EQUAL) || + parser_check(parser, TOKEN_OP_GREATER) || + parser_check(parser, TOKEN_OP_GREATER_EQUAL)) { + Token* op = parser_advance(parser); + ASTNode* right = parser_parse_additive(parser); + if (right == NULL) { + ast_destroy_node(left); + return NULL; + } + + const char* operator_name; + switch (op->type) { + case TOKEN_OP_EQUALS: operator_name = "equals"; break; + case TOKEN_OP_NOT_EQUALS: operator_name = "not_equals"; break; + case TOKEN_OP_LESS: operator_name = "less"; break; + case TOKEN_OP_LESS_EQUAL: operator_name = "less_equal"; break; + case TOKEN_OP_GREATER: operator_name = "greater"; break; + case TOKEN_OP_GREATER_EQUAL: operator_name = "greater_equal"; break; + default: operator_name = "unknown"; break; + } + + ASTNode* new_left = ast_binary_op_node(left, right, operator_name, op->line, op->column); + if (new_left == NULL) { + ast_destroy_node(left); + ast_destroy_node(right); + return NULL; + } + + left = new_left; + } + + return left; +} + +/** + * @brief Parse logical expression (and, or, xor) + * + * @param parser Parser instance + * @return Parsed expression node + */ +static ASTNode* parser_parse_logical(Parser* parser) { + ASTNode* left = parser_parse_comparison(parser); + if (left == NULL) { + return NULL; + } + + /* Handle logical operators */ + while ((parser_check(parser, TOKEN_KEYWORD_AND) || + parser_check(parser, TOKEN_KEYWORD_OR) || + parser_check(parser, TOKEN_KEYWORD_XOR)) || + (parser_check(parser, TOKEN_IDENTIFIER) && + (strcmp(parser_peek(parser)->lexeme, "and") == 0 || + strcmp(parser_peek(parser)->lexeme, "or") == 0 || + strcmp(parser_peek(parser)->lexeme, "xor") == 0))) { + Token* op = parser_advance(parser); + ASTNode* right = parser_parse_comparison(parser); + if (right == NULL) { + ast_destroy_node(left); + return NULL; + } + + const char* operator_name; + if (op->type == TOKEN_KEYWORD_AND || + (op->type == TOKEN_IDENTIFIER && strcmp(op->lexeme, "and") == 0)) { + operator_name = "and"; + } else if (op->type == TOKEN_KEYWORD_OR || + (op->type == TOKEN_IDENTIFIER && strcmp(op->lexeme, "or") == 0)) { + operator_name = "or"; + } else if (op->type == TOKEN_KEYWORD_XOR || + (op->type == TOKEN_IDENTIFIER && strcmp(op->lexeme, "xor") == 0)) { + operator_name = "xor"; + } else { + operator_name = "unknown"; + } + + ASTNode* new_left = ast_binary_op_node(left, right, operator_name, op->line, op->column); + if (new_left == NULL) { + ast_destroy_node(left); + ast_destroy_node(right); + return NULL; + } + + left = new_left; + } + + /* Handle via operator (function composition) - right-associative */ + while (parser_check(parser, TOKEN_KEYWORD_VIA)) { + Token* op = parser_advance(parser); + ASTNode* right = parser_parse_logical(parser); /* Right-associative: recurse */ + if (right == NULL) { + ast_destroy_node(left); + return NULL; + } + + ASTNode* new_left = ast_binary_op_node(left, right, "via", op->line, op->column); + if (new_left == NULL) { + ast_destroy_node(left); + ast_destroy_node(right); + return NULL; + } + + left = new_left; + } + + /* Handle function application */ + /* Skip function application if the left node is a when expression */ + if (left->type == NODE_WHEN_EXPR) { + return left; + } + + while (!parser_is_at_end(parser)) { + Token* next_token = parser_peek(parser); + if (next_token == NULL) break; + + + + /* Check if this token can be a function argument */ + bool can_be_arg = (next_token->type == TOKEN_IDENTIFIER || + next_token->type == TOKEN_FUNCTION_REF || + next_token->type == TOKEN_NUMBER || + next_token->type == TOKEN_STRING || + next_token->type == TOKEN_BOOLEAN || + next_token->type == TOKEN_LPAREN || + next_token->type == TOKEN_LBRACE || + next_token->type == TOKEN_OP_UNARY_MINUS || + next_token->type == TOKEN_KEYWORD_NOT); + + /* Check if this token should not trigger function application */ + bool should_not_trigger = (next_token->type == TOKEN_OP_PLUS || + next_token->type == TOKEN_OP_MINUS || + next_token->type == TOKEN_OP_MULTIPLY || + next_token->type == TOKEN_OP_DIVIDE || + next_token->type == TOKEN_OP_MODULO || + next_token->type == TOKEN_OP_POWER || + next_token->type == TOKEN_OP_EQUALS || + next_token->type == TOKEN_OP_NOT_EQUALS || + next_token->type == TOKEN_OP_LESS || + next_token->type == TOKEN_OP_LESS_EQUAL || + next_token->type == TOKEN_OP_GREATER || + next_token->type == TOKEN_OP_GREATER_EQUAL || + next_token->type == TOKEN_KEYWORD_AND || + next_token->type == TOKEN_KEYWORD_OR || + next_token->type == TOKEN_KEYWORD_XOR || + (next_token->type == TOKEN_IDENTIFIER && + (strcmp(next_token->lexeme, "and") == 0 || + strcmp(next_token->lexeme, "or") == 0 || + strcmp(next_token->lexeme, "xor") == 0)) || + next_token->type == TOKEN_KEYWORD_WHEN || + next_token->type == TOKEN_KEYWORD_IS || + next_token->type == TOKEN_KEYWORD_THEN || + next_token->type == TOKEN_KEYWORD_VIA || + next_token->type == TOKEN_RPAREN || + next_token->type == TOKEN_RBRACE || + next_token->type == TOKEN_RBRACKET || + next_token->type == TOKEN_SEMICOLON || + next_token->type == TOKEN_COMMA || + next_token->type == TOKEN_EOF); + + /* Check if this is a pattern boundary (identifier followed by 'then') */ + bool is_pattern_boundary = false; + if (next_token->type == TOKEN_IDENTIFIER) { + /* Look ahead to see if the next token is 'then' */ + if (parser->current + 1 < parser->token_count) { + Token* next_next_token = parser->tokens[parser->current + 1]; + if (next_next_token && next_next_token->type == TOKEN_KEYWORD_THEN) { + is_pattern_boundary = true; + DEBUG_TRACE("Found pattern boundary: %s followed by 'then'", next_token->lexeme); + } + } + } + + DEBUG_TRACE("Function application check: can_be_arg=%d, should_not_trigger=%d, is_pattern_boundary=%d", + can_be_arg, should_not_trigger, is_pattern_boundary); + + /* Only proceed with function application if it can be an arg and shouldn't trigger */ + if (!can_be_arg || should_not_trigger || is_pattern_boundary) { + + break; + } + + /* Collect all arguments for this function call */ + ASTNode** args = NULL; + int arg_count = 0; + + while (!parser_is_at_end(parser)) { + Token* arg_token = parser_peek(parser); + if (arg_token == NULL) break; + + /* Check if this token can be a function argument */ + bool can_be_arg = (arg_token->type == TOKEN_IDENTIFIER || + arg_token->type == TOKEN_FUNCTION_REF || + arg_token->type == TOKEN_NUMBER || + arg_token->type == TOKEN_STRING || + arg_token->type == TOKEN_BOOLEAN || + arg_token->type == TOKEN_LPAREN || + arg_token->type == TOKEN_LBRACE || + arg_token->type == TOKEN_OP_UNARY_MINUS || + arg_token->type == TOKEN_KEYWORD_NOT); + + /* Check if this token should not trigger function application */ + bool should_not_trigger = (arg_token->type == TOKEN_OP_PLUS || + arg_token->type == TOKEN_OP_MINUS || + arg_token->type == TOKEN_OP_MULTIPLY || + arg_token->type == TOKEN_OP_DIVIDE || + arg_token->type == TOKEN_OP_MODULO || + arg_token->type == TOKEN_OP_POWER || + arg_token->type == TOKEN_OP_EQUALS || + arg_token->type == TOKEN_OP_NOT_EQUALS || + arg_token->type == TOKEN_OP_LESS || + arg_token->type == TOKEN_OP_LESS_EQUAL || + arg_token->type == TOKEN_OP_GREATER || + arg_token->type == TOKEN_OP_GREATER_EQUAL || + arg_token->type == TOKEN_KEYWORD_AND || + arg_token->type == TOKEN_KEYWORD_OR || + arg_token->type == TOKEN_KEYWORD_XOR || + arg_token->type == TOKEN_KEYWORD_WHEN || + arg_token->type == TOKEN_KEYWORD_IS || + arg_token->type == TOKEN_KEYWORD_THEN || + arg_token->type == TOKEN_RPAREN || + arg_token->type == TOKEN_RBRACE || + arg_token->type == TOKEN_RBRACKET || + arg_token->type == TOKEN_SEMICOLON || + arg_token->type == TOKEN_COMMA || + arg_token->type == TOKEN_EOF); + + /* Check if this is a pattern boundary (identifier followed by 'then') */ + bool is_pattern_boundary = false; + if (arg_token->type == TOKEN_IDENTIFIER) { + /* Look ahead to see if the next token is 'then' */ + if (parser->current + 1 < parser->token_count) { + Token* next_next_token = parser->tokens[parser->current + 1]; + if (next_next_token && next_next_token->type == TOKEN_KEYWORD_THEN) { + is_pattern_boundary = true; + DEBUG_TRACE("Inner loop found pattern boundary: %s followed by 'then'", arg_token->lexeme); + } + } + } + + /* Stop if it can't be an arg, should not trigger, or is a pattern boundary */ + if (!can_be_arg || should_not_trigger || is_pattern_boundary) { + break; + } + + ASTNode* arg = parser_parse_comparison(parser); + if (arg == NULL) { + /* Cleanup on error */ + for (int i = 0; i < arg_count; i++) { + ast_destroy_node(args[i]); + } + free(args); + ast_destroy_node(left); + return NULL; + } + + /* Add to arguments array */ + ASTNode** new_args = realloc(args, (arg_count + 1) * sizeof(ASTNode*)); + if (new_args == NULL) { + /* Cleanup on error */ + for (int i = 0; i < arg_count; i++) { + ast_destroy_node(args[i]); + } + free(args); + ast_destroy_node(arg); + ast_destroy_node(left); + return NULL; + } + args = new_args; + args[arg_count++] = arg; + } + + /* Create function call with all arguments */ + ASTNode* new_left = ast_function_call_node(left, args, arg_count, left->line, left->column); + if (new_left == NULL) { + /* Cleanup on error */ + for (int i = 0; i < arg_count; i++) { + ast_destroy_node(args[i]); + } + free(args); + ast_destroy_node(left); + return NULL; + } + + left = new_left; + } + + return left; +} + +/** + * @brief Parse function composition (via) + * + * @param parser Parser instance + * @return Parsed expression node + */ +/* TODO: Re-implement composition parsing */ +/* +static ASTNode* parser_parse_composition(Parser* parser) { + ASTNode* left = parser_parse_application(parser); + if (left == NULL) { + return NULL; + } + + while (parser_check(parser, TOKEN_KEYWORD_VIA)) { + Token* op = parser_advance(parser); + ASTNode* right = parser_parse_logical(parser); + if (right == NULL) { + ast_destroy_node(left); + return NULL; + } + + ASTNode* new_left = ast_binary_op_node(left, right, "compose", op->line, op->column); + if (new_left == NULL) { + ast_destroy_node(left); + ast_destroy_node(right); + return NULL; + } + + left = new_left; + } + + return left; +} +*/ + + + +/** + * @brief Parse postfix operations (table access, function calls, etc.) + * + * @param parser Parser instance + * @return Parsed expression node + */ +static ASTNode* parser_parse_postfix(Parser* parser) { + ASTNode* left = parser_parse_primary(parser); + if (left == NULL) { + return NULL; + } + + while (!parser_is_at_end(parser)) { + Token* token = parser_peek(parser); + if (token == NULL) { + break; + } + + switch (token->type) { + case TOKEN_DOT: { + /* Table property access: table.property */ + parser_advance(parser); /* consume '.' */ + + Token* property = parser_consume(parser, TOKEN_IDENTIFIER, "Expected property name after '.'"); + if (property == NULL) { + ast_destroy_node(left); + return NULL; + } + + ASTNode* key = ast_literal_node(baba_yaga_value_string(property->lexeme), property->line, property->column); + if (key == NULL) { + ast_destroy_node(left); + return NULL; + } + + ASTNode* new_left = malloc(sizeof(ASTNode)); + if (new_left == NULL) { + ast_destroy_node(left); + ast_destroy_node(key); + return NULL; + } + + new_left->type = NODE_TABLE_ACCESS; + new_left->line = left->line; + new_left->column = left->column; + new_left->data.table_access.object = left; + new_left->data.table_access.key = key; + + left = new_left; + break; + } + case TOKEN_LBRACKET: { + /* Table bracket access: table[key] */ + parser_advance(parser); /* consume '[' */ + + ASTNode* key = parser_parse_expression(parser); + if (key == NULL) { + ast_destroy_node(left); + return NULL; + } + + if (!parser_consume(parser, TOKEN_RBRACKET, "Expected ']' after table key")) { + ast_destroy_node(left); + ast_destroy_node(key); + return NULL; + } + + ASTNode* new_left = malloc(sizeof(ASTNode)); + if (new_left == NULL) { + ast_destroy_node(left); + ast_destroy_node(key); + return NULL; + } + + new_left->type = NODE_TABLE_ACCESS; + new_left->line = left->line; + new_left->column = left->column; + new_left->data.table_access.object = left; + new_left->data.table_access.key = key; + + left = new_left; + break; + } + default: + /* No more postfix operations */ + return left; + } + } + + return left; +} + +/** + * @brief Parse expression (entry point) + * + * @param parser Parser instance + * @return Parsed expression node + */ +static ASTNode* parser_parse_expression(Parser* parser) { + return parser_parse_logical(parser); +} + +/* ============================================================================ + * Statement Parsing + * ============================================================================ */ + +/** + * @brief Parse variable declaration + * + * @param parser Parser instance + * @return Parsed variable declaration node + */ +static ASTNode* parser_parse_variable_decl(Parser* parser) { + Token* name = parser_consume(parser, TOKEN_IDENTIFIER, "Expected variable name"); + if (name == NULL) { + return NULL; + } + + if (!parser_consume(parser, TOKEN_COLON, "Expected ':' after variable name")) { + return NULL; + } + + ASTNode* value = parser_parse_expression(parser); + if (value == NULL) { + return NULL; + } + + + + ASTNode* node = malloc(sizeof(ASTNode)); + if (node == NULL) { + ast_destroy_node(value); + return NULL; + } + + node->type = NODE_VARIABLE_DECL; + node->line = name->line; + node->column = name->column; + node->data.variable_decl.name = strdup(name->lexeme); + node->data.variable_decl.value = value; + + + return node; +} + +/** + * @brief Parse function definition + * + * @param parser Parser instance + * @return Parsed function definition node + */ +static ASTNode* parser_parse_function_def(Parser* parser) { + Token* name = parser_consume(parser, TOKEN_IDENTIFIER, "Expected function name"); + if (name == NULL) { + return NULL; + } + + if (!parser_consume(parser, TOKEN_COLON, "Expected ':' after function name")) { + return NULL; + } + + /* Parse parameters */ + ASTNode** parameters = NULL; + int param_count = 0; + + while (!parser_is_at_end(parser) && + parser_peek(parser)->type == TOKEN_IDENTIFIER) { + Token* param = parser_advance(parser); + + ASTNode** new_params = realloc(parameters, (param_count + 1) * sizeof(ASTNode*)); + if (new_params == NULL) { + for (int i = 0; i < param_count; i++) { + ast_destroy_node(parameters[i]); + } + free(parameters); + return NULL; + } + parameters = new_params; + + parameters[param_count] = ast_identifier_node(param->lexeme, param->line, param->column); + param_count++; + } + + if (!parser_consume(parser, TOKEN_ARROW, "Expected '->' after parameters")) { + for (int i = 0; i < param_count; i++) { + ast_destroy_node(parameters[i]); + } + free(parameters); + return NULL; + } + + ASTNode* body = parser_parse_expression(parser); + if (body == NULL) { + for (int i = 0; i < param_count; i++) { + ast_destroy_node(parameters[i]); + } + free(parameters); + return NULL; + } + + ASTNode* node = malloc(sizeof(ASTNode)); + if (node == NULL) { + for (int i = 0; i < param_count; i++) { + ast_destroy_node(parameters[i]); + } + free(parameters); + ast_destroy_node(body); + return NULL; + } + + node->type = NODE_FUNCTION_DEF; + node->line = name->line; + node->column = name->column; + node->data.function_def.name = strdup(name->lexeme); + node->data.function_def.parameters = parameters; + node->data.function_def.param_count = param_count; + node->data.function_def.body = body; + + return node; +} + +/** + * @brief Parse embedded arrow function (params -> body) without function name + * + * @param parser Parser instance + * @return Parsed function definition node + */ +static ASTNode* parser_parse_embedded_arrow_function(Parser* parser) { + /* Parse parameters */ + ASTNode** parameters = NULL; + int param_count = 0; + + while (!parser_is_at_end(parser) && + parser_peek(parser)->type == TOKEN_IDENTIFIER) { + Token* param = parser_advance(parser); + + ASTNode** new_params = realloc(parameters, (param_count + 1) * sizeof(ASTNode*)); + if (new_params == NULL) { + for (int i = 0; i < param_count; i++) { + ast_destroy_node(parameters[i]); + } + free(parameters); + return NULL; + } + parameters = new_params; + + parameters[param_count] = ast_identifier_node(param->lexeme, param->line, param->column); + param_count++; + } + + if (!parser_consume(parser, TOKEN_ARROW, "Expected '->' after parameters")) { + for (int i = 0; i < param_count; i++) { + ast_destroy_node(parameters[i]); + } + free(parameters); + return NULL; + } + + ASTNode* body = parser_parse_expression(parser); + if (body == NULL) { + for (int i = 0; i < param_count; i++) { + ast_destroy_node(parameters[i]); + } + free(parameters); + return NULL; + } + + ASTNode* node = malloc(sizeof(ASTNode)); + if (node == NULL) { + for (int i = 0; i < param_count; i++) { + ast_destroy_node(parameters[i]); + } + free(parameters); + ast_destroy_node(body); + return NULL; + } + + node->type = NODE_FUNCTION_DEF; + node->line = parser_peek(parser)->line; + node->column = parser_peek(parser)->column; + node->data.function_def.name = strdup(""); /* Empty name for embedded functions */ + node->data.function_def.parameters = parameters; + node->data.function_def.param_count = param_count; + node->data.function_def.body = body; + + return node; +} + +/** + * @brief Parse multiple statements separated by semicolons + * + * @param parser Parser instance + * @return Parsed sequence node or single statement node + */ +static ASTNode* parser_parse_statements(Parser* parser) { + if (parser_is_at_end(parser)) { + return NULL; + } + + /* Parse first statement */ + ASTNode* first_statement = parser_parse_statement(parser); + if (first_statement == NULL) { + return NULL; + } + + /* Check if there are more statements (semicolon-separated) */ + if (parser_is_at_end(parser)) { + return first_statement; /* Single statement */ + } + + Token* next_token = parser_peek(parser); + if (next_token->type != TOKEN_SEMICOLON) { + return first_statement; /* Single statement */ + } + + /* We have multiple statements, collect them */ + ASTNode** statements = malloc(10 * sizeof(ASTNode*)); /* Start with space for 10 */ + if (statements == NULL) { + ast_destroy_node(first_statement); + return NULL; + } + + int statement_count = 0; + int capacity = 10; + + /* Add first statement */ + statements[statement_count++] = first_statement; + + /* Parse remaining statements */ + while (!parser_is_at_end(parser) && + parser_peek(parser)->type == TOKEN_SEMICOLON) { + + /* Consume semicolon */ + parser_consume(parser, TOKEN_SEMICOLON, "Expected semicolon"); + + /* Skip any whitespace after semicolon */ + /* Comments are already skipped by the lexer */ + + if (parser_is_at_end(parser)) { + break; /* Trailing semicolon */ + } + + /* Parse next statement */ + ASTNode* next_statement = parser_parse_statement(parser); + if (next_statement == NULL) { + /* Error parsing statement, but continue with what we have */ + break; + } + + /* Expand array if needed */ + if (statement_count >= capacity) { + capacity *= 2; + ASTNode** new_statements = realloc(statements, capacity * sizeof(ASTNode*)); + if (new_statements == NULL) { + /* Cleanup and return what we have */ + for (int i = 0; i < statement_count; i++) { + ast_destroy_node(statements[i]); + } + free(statements); + return NULL; + } + statements = new_statements; + } + + statements[statement_count++] = next_statement; + } + + /* If we only have one statement, return it directly */ + if (statement_count == 1) { + ASTNode* result = statements[0]; + free(statements); + return result; + } + + /* Create sequence node */ + return ast_sequence_node(statements, statement_count, + first_statement->line, first_statement->column); +} + +/** + * @brief Parse statement + * + * @param parser Parser instance + * @return Parsed statement node + */ +static ASTNode* parser_parse_statement(Parser* parser) { + if (parser_is_at_end(parser)) { + return NULL; + } + + Token* token = parser_peek(parser); + + /* Check for variable declaration */ + if (token->type == TOKEN_IDENTIFIER && + parser_peek_next(parser) != NULL && + parser_peek_next(parser)->type == TOKEN_COLON) { + + /* Look ahead to see if it's a function definition */ + int save_current = parser->current; + parser->current += 2; /* skip identifier and colon */ + + bool is_function = false; + while (!parser_is_at_end(parser) && + parser_peek(parser)->type == TOKEN_IDENTIFIER) { + parser->current++; + } + + if (!parser_is_at_end(parser) && + parser_peek(parser)->type == TOKEN_ARROW) { + is_function = true; + } + + parser->current = save_current; + + if (is_function) { + return parser_parse_function_def(parser); + } else { + return parser_parse_variable_decl(parser); + } + } + + + + /* Default to expression */ + return parser_parse_expression(parser); +} + +/* ============================================================================ + * Public Parser API + * ============================================================================ */ + +/** + * @brief Parse source code into AST + * + * @param tokens Array of tokens + * @param token_count Number of tokens + * @return Root AST node, or NULL on error + */ +void* baba_yaga_parse(void** tokens, size_t token_count) { + if (tokens == NULL || token_count == 0) { + return NULL; + } + + Parser* parser = parser_create((Token**)tokens, (int)token_count); + if (parser == NULL) { + return NULL; + } + + ASTNode* result = parser_parse_statements(parser); + + if (parser->has_error) { + fprintf(stderr, "Parse error: %s\n", parser->error_message); + if (result != NULL) { + ast_destroy_node(result); + result = NULL; + } + } + + parser_destroy(parser); + return (void*)result; +} + +/** + * @brief Destroy AST + * + * @param node Root AST node + */ +void baba_yaga_destroy_ast(void* node) { + ast_destroy_node((ASTNode*)node); +} + +/** + * @brief Print AST for debugging + * + * @param node Root AST node + * @param indent Initial indentation level + */ +/* ============================================================================ + * AST Accessor Functions + * ============================================================================ */ + +NodeType baba_yaga_ast_get_type(void* node) { + if (node == NULL) { + return NODE_LITERAL; /* Default fallback */ + } + ASTNode* ast_node = (ASTNode*)node; + return ast_node->type; +} + +Value baba_yaga_ast_get_literal(void* node) { + if (node == NULL) { + return baba_yaga_value_nil(); + } + ASTNode* ast_node = (ASTNode*)node; + if (ast_node->type == NODE_LITERAL) { + return baba_yaga_value_copy(&ast_node->data.literal); + } + return baba_yaga_value_nil(); +} + +const char* baba_yaga_ast_get_identifier(void* node) { + if (node == NULL) { + return NULL; + } + ASTNode* ast_node = (ASTNode*)node; + if (ast_node->type == NODE_IDENTIFIER) { + return ast_node->data.identifier; + } + return NULL; +} + +void* baba_yaga_ast_get_function_call_func(void* node) { + if (node == NULL) { + return NULL; + } + ASTNode* ast_node = (ASTNode*)node; + if (ast_node->type == NODE_FUNCTION_CALL) { + return ast_node->data.function_call.function; + } + return NULL; +} + +int baba_yaga_ast_get_function_call_arg_count(void* node) { + if (node == NULL) { + return 0; + } + ASTNode* ast_node = (ASTNode*)node; + if (ast_node->type == NODE_FUNCTION_CALL) { + return ast_node->data.function_call.arg_count; + } + return 0; +} + +void* baba_yaga_ast_get_function_call_arg(void* node, int index) { + if (node == NULL || index < 0) { + return NULL; + } + ASTNode* ast_node = (ASTNode*)node; + if (ast_node->type == NODE_FUNCTION_CALL && + index < ast_node->data.function_call.arg_count) { + return ast_node->data.function_call.arguments[index]; + } + return NULL; +} + +void* baba_yaga_ast_get_binary_op_left(void* node) { + if (node == NULL) { + return NULL; + } + ASTNode* ast_node = (ASTNode*)node; + if (ast_node->type == NODE_BINARY_OP) { + return ast_node->data.binary.left; + } + return NULL; +} + +void* baba_yaga_ast_get_binary_op_right(void* node) { + if (node == NULL) { + return NULL; + } + ASTNode* ast_node = (ASTNode*)node; + if (ast_node->type == NODE_BINARY_OP) { + return ast_node->data.binary.right; + } + return NULL; +} + +const char* baba_yaga_ast_get_binary_op_operator(void* node) { + if (node == NULL) { + return NULL; + } + ASTNode* ast_node = (ASTNode*)node; + if (ast_node->type == NODE_BINARY_OP) { + return ast_node->data.binary.operator; + } + return NULL; +} + +void* baba_yaga_ast_get_unary_op_operand(void* node) { + if (node == NULL) { + return NULL; + } + ASTNode* ast_node = (ASTNode*)node; + if (ast_node->type == NODE_UNARY_OP) { + return ast_node->data.unary.operand; + } + return NULL; +} + +const char* baba_yaga_ast_get_unary_op_operator(void* node) { + if (node == NULL) { + return NULL; + } + ASTNode* ast_node = (ASTNode*)node; + if (ast_node->type == NODE_UNARY_OP) { + return ast_node->data.unary.operator; + } + return NULL; +} + +const char* baba_yaga_ast_get_function_def_name(void* node) { + if (node == NULL) { + return NULL; + } + ASTNode* ast_node = (ASTNode*)node; + if (ast_node->type == NODE_FUNCTION_DEF) { + return ast_node->data.function_def.name; + } + return NULL; +} + +int baba_yaga_ast_get_function_def_param_count(void* node) { + if (node == NULL) { + return 0; + } + ASTNode* ast_node = (ASTNode*)node; + if (ast_node->type == NODE_FUNCTION_DEF) { + return ast_node->data.function_def.param_count; + } + return 0; +} + +void* baba_yaga_ast_get_function_def_param(void* node, int index) { + if (node == NULL || index < 0) { + return NULL; + } + ASTNode* ast_node = (ASTNode*)node; + if (ast_node->type == NODE_FUNCTION_DEF) { + if (index < ast_node->data.function_def.param_count) { + return ast_node->data.function_def.parameters[index]; + } + } + return NULL; +} + +void* baba_yaga_ast_get_function_def_body(void* node) { + if (node == NULL) { + return NULL; + } + ASTNode* ast_node = (ASTNode*)node; + if (ast_node->type == NODE_FUNCTION_DEF) { + return ast_node->data.function_def.body; + } + return NULL; +} + +const char* baba_yaga_ast_get_variable_decl_name(void* node) { + if (node == NULL) { + return NULL; + } + ASTNode* ast_node = (ASTNode*)node; + if (ast_node->type == NODE_VARIABLE_DECL) { + return ast_node->data.variable_decl.name; + } + return NULL; +} + +void* baba_yaga_ast_get_variable_decl_value(void* node) { + if (node == NULL) { + return NULL; + } + ASTNode* ast_node = (ASTNode*)node; + if (ast_node->type == NODE_VARIABLE_DECL) { + return ast_node->data.variable_decl.value; + } + return NULL; +} + +int baba_yaga_ast_get_sequence_statement_count(void* node) { + if (node == NULL) { + return 0; + } + ASTNode* ast_node = (ASTNode*)node; + if (ast_node->type == NODE_SEQUENCE) { + return ast_node->data.sequence.statement_count; + } + return 0; +} + +void* baba_yaga_ast_get_sequence_statement(void* node, int index) { + if (node == NULL || index < 0) { + return NULL; + } + ASTNode* ast_node = (ASTNode*)node; + if (ast_node->type == NODE_SEQUENCE) { + if (index < ast_node->data.sequence.statement_count) { + return ast_node->data.sequence.statements[index]; + } + } + return NULL; +} + +void* baba_yaga_ast_get_when_expr_test(void* node) { + if (node == NULL) { + return NULL; + } + + ASTNode* ast_node = (ASTNode*)node; + if (ast_node->type != NODE_WHEN_EXPR) { + return NULL; + } + + return ast_node->data.when_expr.test; +} + +int baba_yaga_ast_get_when_expr_pattern_count(void* node) { + if (node == NULL) { + return 0; + } + + ASTNode* ast_node = (ASTNode*)node; + if (ast_node->type != NODE_WHEN_EXPR) { + return 0; + } + + return ast_node->data.when_expr.pattern_count; +} + +void* baba_yaga_ast_get_when_expr_pattern(void* node, int index) { + if (node == NULL) { + return NULL; + } + + ASTNode* ast_node = (ASTNode*)node; + if (ast_node->type != NODE_WHEN_EXPR) { + return NULL; + } + + if (index >= 0 && index < ast_node->data.when_expr.pattern_count) { + return ast_node->data.when_expr.patterns[index]; + } + return NULL; +} + +void* baba_yaga_ast_get_when_pattern_test(void* node) { + if (node == NULL) { + return NULL; + } + + ASTNode* ast_node = (ASTNode*)node; + if (ast_node->type != NODE_WHEN_PATTERN) { + return NULL; + } + + return ast_node->data.when_pattern.test; +} + +void* baba_yaga_ast_get_when_pattern_result(void* node) { + if (node == NULL) { + return NULL; + } + + ASTNode* ast_node = (ASTNode*)node; + if (ast_node->type != NODE_WHEN_PATTERN) { + return NULL; + } + + return ast_node->data.when_pattern.result; +} + +int baba_yaga_ast_get_table_element_count(void* node) { + if (node == NULL) { + return 0; + } + + ASTNode* ast_node = (ASTNode*)node; + if (ast_node->type != NODE_TABLE) { + return 0; + } + + return ast_node->data.table.element_count; +} + +void* baba_yaga_ast_get_table_element(void* node, int index) { + if (node == NULL) { + return NULL; + } + + ASTNode* ast_node = (ASTNode*)node; + if (ast_node->type != NODE_TABLE) { + return NULL; + } + + if (index >= 0 && index < ast_node->data.table.element_count) { + return ast_node->data.table.elements[index]; + } + return NULL; +} + +void* baba_yaga_ast_get_table_access_object(void* node) { + if (node == NULL) { + return NULL; + } + + ASTNode* ast_node = (ASTNode*)node; + if (ast_node->type != NODE_TABLE_ACCESS) { + return NULL; + } + + return ast_node->data.table_access.object; +} + +void* baba_yaga_ast_get_table_access_key(void* node) { + if (node == NULL) { + return NULL; + } + + ASTNode* ast_node = (ASTNode*)node; + if (ast_node->type != NODE_TABLE_ACCESS) { + return NULL; + } + + return ast_node->data.table_access.key; +} + +void baba_yaga_print_ast(void* node, int indent) { + if (node == NULL) { + return; + } + + ASTNode* ast_node = (ASTNode*)node; + + /* Print indentation */ + for (int i = 0; i < indent; i++) { + printf(" "); + } + + /* Print node type */ + printf("%s", node_type_name(ast_node->type)); + + /* Print node-specific information */ + switch (ast_node->type) { + case NODE_LITERAL: + if (ast_node->data.literal.type == VAL_NUMBER) { + printf(": %g", ast_node->data.literal.data.number); + } else if (ast_node->data.literal.type == VAL_STRING) { + printf(": \"%s\"", ast_node->data.literal.data.string); + } else if (ast_node->data.literal.type == VAL_BOOLEAN) { + printf(": %s", ast_node->data.literal.data.boolean ? "true" : "false"); + } + break; + case NODE_IDENTIFIER: + printf(": %s", ast_node->data.identifier); + break; + case NODE_FUNCTION_CALL: + printf(" (args: %d)", ast_node->data.function_call.arg_count); + break; + case NODE_FUNCTION_DEF: + printf(": %s (params: %d)", ast_node->data.function_def.name, ast_node->data.function_def.param_count); + break; + case NODE_VARIABLE_DECL: + printf(": %s", ast_node->data.variable_decl.name); + break; + case NODE_SEQUENCE: + printf(" (statements: %d)", ast_node->data.sequence.statement_count); + break; + default: + break; + } + + printf(" (line %d, col %d)\n", ast_node->line, ast_node->column); + + /* Print children */ + switch (ast_node->type) { + case NODE_FUNCTION_CALL: + baba_yaga_print_ast(ast_node->data.function_call.function, indent + 1); + for (int i = 0; i < ast_node->data.function_call.arg_count; i++) { + baba_yaga_print_ast(ast_node->data.function_call.arguments[i], indent + 1); + } + break; + case NODE_FUNCTION_DEF: + for (int i = 0; i < ast_node->data.function_def.param_count; i++) { + baba_yaga_print_ast(ast_node->data.function_def.parameters[i], indent + 1); + } + baba_yaga_print_ast(ast_node->data.function_def.body, indent + 1); + break; + case NODE_VARIABLE_DECL: + baba_yaga_print_ast(ast_node->data.variable_decl.value, indent + 1); + break; + case NODE_SEQUENCE: + for (int i = 0; i < ast_node->data.sequence.statement_count; i++) { + baba_yaga_print_ast(ast_node->data.sequence.statements[i], indent + 1); + } + break; + default: + break; + } +} + +/** + * @brief Parse when expression + * + * @param parser Parser instance + * @return Parsed when expression node + */ +static ASTNode* parser_parse_when_expression(Parser* parser) { + DEBUG_DEBUG("Parsing WHEN expression at token %d", parser->current); + Token* when_token = parser_consume(parser, TOKEN_KEYWORD_WHEN, "Expected 'when'"); + if (!when_token) return NULL; + + + + /* Check if this is a multi-parameter pattern by looking ahead for multiple identifiers */ + bool is_multi_param = false; + int look_ahead = parser->current; + int identifier_count = 0; + + /* Count consecutive identifiers or expressions before 'is' */ + while (look_ahead < parser->token_count) { + Token* token = parser->tokens[look_ahead]; + if (token->type == TOKEN_KEYWORD_IS) { + break; + } + if (token->type == TOKEN_IDENTIFIER) { + identifier_count++; + } else if (token->type == TOKEN_LPAREN) { + /* Expression in parentheses - count as one parameter */ + identifier_count++; + /* Skip to closing parenthesis */ + int paren_count = 1; + look_ahead++; + while (look_ahead < parser->token_count && paren_count > 0) { + Token* next_token = parser->tokens[look_ahead]; + if (next_token->type == TOKEN_LPAREN) { + paren_count++; + } else if (next_token->type == TOKEN_RPAREN) { + paren_count--; + } + look_ahead++; + } + /* Continue from the position after the closing parenthesis */ + continue; + } else { + /* If we hit anything other than an identifier or expression, it's not multi-parameter */ + identifier_count = 0; + break; + } + look_ahead++; + } + + /* If we have multiple identifiers followed by 'is', it's multi-parameter */ + if (identifier_count > 1) { + is_multi_param = true; + } + + ASTNode* test; + if (is_multi_param) { + /* Parse as sequence of identifiers or expressions */ + ASTNode** identifiers = malloc(identifier_count * sizeof(ASTNode*)); + if (!identifiers) return NULL; + + for (int i = 0; i < identifier_count; i++) { + Token* current_token = parser_peek(parser); + if (current_token->type == TOKEN_LPAREN) { + /* Expression in parentheses - parse the expression */ + identifiers[i] = parser_parse_expression(parser); + if (identifiers[i] == NULL) { + /* Cleanup on error */ + for (int j = 0; j < i; j++) { + ast_destroy_node(identifiers[j]); + } + free(identifiers); + return NULL; + } + } else { + /* Identifier - parse as identifier */ + Token* id_token = parser_advance(parser); + identifiers[i] = ast_identifier_node(id_token->lexeme, id_token->line, id_token->column); + } + } + + /* Create a sequence node for the identifiers */ + test = ast_sequence_node(identifiers, identifier_count, when_token->line, when_token->column); + + /* Ensure we're positioned at the 'is' token */ + if (parser->current < parser->token_count && + parser->tokens[parser->current]->type != TOKEN_KEYWORD_IS) { + /* We're not at the 'is' token - find it */ + for (int j = parser->current; j < parser->token_count; j++) { + if (parser->tokens[j]->type == TOKEN_KEYWORD_IS) { + parser->current = j; + break; + } + } + } + } else { + /* Parse as single expression */ + test = parser_parse_expression(parser); + } + + if (!test) return NULL; + Token* is_token = parser_consume(parser, TOKEN_KEYWORD_IS, "Expected 'is' after test expression"); + if (!is_token) { ast_destroy_node(test); return NULL; } + + // Prepare flat array of NODE_WHEN_PATTERN nodes + ASTNode** patterns = NULL; + int pattern_count = 0, pattern_cap = 4; + patterns = malloc(pattern_cap * sizeof(ASTNode*)); + + while (!parser_is_at_end(parser) && parser_peek(parser)->type != TOKEN_SEMICOLON) { + // Parse pattern + ASTNode* pattern = parser_parse_when_pattern(parser); + if (!pattern) break; + // Expect 'then' + Token* then_token = parser_consume(parser, TOKEN_KEYWORD_THEN, "Expected 'then' after pattern in when case"); + if (!then_token) { ast_destroy_node(pattern); break; } + // Parse result (single expression) + ASTNode* result = parser_parse_when_result_expression(parser); + if (!result) { ast_destroy_node(pattern); break; } + // Create NODE_WHEN_PATTERN node + ASTNode* case_node = ast_when_pattern_node(pattern, result, when_token->line, when_token->column); + if (pattern_count >= pattern_cap) { + pattern_cap *= 2; + patterns = realloc(patterns, pattern_cap * sizeof(ASTNode*)); + } + patterns[pattern_count++] = case_node; + // If next token is a valid pattern start, continue loop; else break + Token* next = parser_peek(parser); + if (!next || next->type == TOKEN_SEMICOLON) break; + int is_wildcard = (next->type == TOKEN_IDENTIFIER && next->lexeme && strcmp(next->lexeme, "_") == 0); + if (!(is_wildcard || next->type == TOKEN_IDENTIFIER || next->type == TOKEN_NUMBER || next->type == TOKEN_STRING)) break; + } + // Build AST node for when expression + ASTNode* when_node = ast_when_expr_node(test, patterns, pattern_count, when_token->line, when_token->column); + + return when_node; +} + +/** + * @brief Parse when pattern + * + * @param parser Parser instance + * @return Parsed when pattern node + */ +// Helper: look ahead to see if the next two tokens are a pattern start followed by 'then' +static bool parser_is_next_pattern(Parser* parser) { + if (parser_is_at_end(parser)) return false; + Token* t1 = parser_peek(parser); + if (!t1) return false; + if (t1->type != TOKEN_IDENTIFIER && t1->type != TOKEN_NUMBER && t1->type != TOKEN_STRING) return false; + // Look ahead one more + if (parser->current + 1 >= parser->token_count) return false; + Token* t2 = parser->tokens[parser->current + 1]; + return t2 && t2->type == TOKEN_KEYWORD_THEN; +} + +// Parse a result expression for a when pattern, stopping at pattern boundaries +static ASTNode* parser_parse_when_result_expression(Parser* parser) { + DEBUG_TRACE("parser_parse_when_result_expression start at token %d", parser->current); + + // Show current token before parsing + Token* before_token = parser_peek(parser); + if (before_token) { + DEBUG_TRACE("Before parsing result, token type=%d, lexeme='%s'", + before_token->type, before_token->lexeme ? before_token->lexeme : "NULL"); + } + + // Check if the next token is a pattern start followed by 'then' + // If so, return an empty result expression + if (parser_is_next_pattern(parser)) { + DEBUG_TRACE("Detected next pattern, returning empty result"); + return ast_literal_node(baba_yaga_value_string(""), parser_peek(parser)->line, parser_peek(parser)->column); + } + + // Parse a single expression using a bounded parser + // Stop when we hit a pattern boundary or statement terminator + ASTNode* result = parser_parse_primary(parser); + if (result == NULL) { + return NULL; + } + + // Show current token after parsing + Token* after_token = parser_peek(parser); + if (after_token) { + DEBUG_TRACE("After parsing result, token type=%d, lexeme='%s'", + after_token->type, after_token->lexeme ? after_token->lexeme : "NULL"); + } + + DEBUG_TRACE("parser_parse_when_result_expression end at token %d", parser->current); + return result; +} + +static ASTNode* parser_parse_when_pattern(Parser* parser) { + DEBUG_DEBUG("Parsing WHEN pattern at token %d", parser->current); + DEBUG_TRACE("parser_parse_when_pattern start"); + + /* Show current token */ + Token* current_token = parser_peek(parser); + if (current_token != NULL) { + DEBUG_TRACE("Current token type=%d, lexeme='%s'", current_token->type, current_token->lexeme ? current_token->lexeme : "NULL"); + } + + /* Check if this is a multi-parameter pattern by looking ahead for multiple literals */ + bool is_multi_param = false; + int look_ahead = parser->current; + int literal_count = 0; + + /* Count consecutive literals or expressions before 'then' */ + DEBUG_DEBUG("Multi-parameter detection: starting at token %d", look_ahead); + while (look_ahead < parser->token_count) { + Token* token = parser->tokens[look_ahead]; + if (token->type == TOKEN_KEYWORD_THEN) { + break; + } + if (token->type == TOKEN_IDENTIFIER || + token->type == TOKEN_NUMBER || + token->type == TOKEN_STRING || + (token->type == TOKEN_IDENTIFIER && token->lexeme && strcmp(token->lexeme, "_") == 0)) { + literal_count++; + } else if (token->type == TOKEN_LPAREN) { + /* Expression in parentheses - count as one pattern */ + DEBUG_DEBUG("Multi-parameter detection: found TOKEN_LPAREN at token %d", look_ahead); + literal_count++; + /* Skip to closing parenthesis */ + int paren_count = 1; + look_ahead++; + while (look_ahead < parser->token_count && paren_count > 0) { + Token* next_token = parser->tokens[look_ahead]; + if (next_token->type == TOKEN_LPAREN) { + paren_count++; + } else if (next_token->type == TOKEN_RPAREN) { + paren_count--; + } + look_ahead++; + } + DEBUG_DEBUG("Multi-parameter detection: finished expression, literal_count=%d, look_ahead=%d", literal_count, look_ahead); + /* Continue from the position after the closing parenthesis */ + continue; + } else if (token->type == TOKEN_OP_EQUALS || + token->type == TOKEN_OP_NOT_EQUALS || + token->type == TOKEN_OP_LESS || + token->type == TOKEN_OP_LESS_EQUAL || + token->type == TOKEN_OP_GREATER || + token->type == TOKEN_OP_GREATER_EQUAL) { + /* If we hit a comparison operator, it's not multi-parameter */ + literal_count = 0; + break; + } else if (token->type == TOKEN_SEMICOLON) { + /* If we hit a semicolon, stop looking */ + break; + } else { + /* If we hit anything other than a literal or expression, it's not multi-parameter */ + literal_count = 0; + break; + } + look_ahead++; + } + + /* If we have multiple literals followed by 'then', it's multi-parameter */ + DEBUG_DEBUG("Multi-parameter detection: final literal_count=%d, is_multi_param=%s", literal_count, literal_count > 1 ? "true" : "false"); + if (literal_count > 1) { + is_multi_param = true; + } + + ASTNode* pattern_test; + if (is_multi_param) { + /* Parse as sequence of literals */ + ASTNode** literals = malloc(literal_count * sizeof(ASTNode*)); + if (!literals) return NULL; + + for (int i = 0; i < literal_count; i++) { + Token* current_token = parser_peek(parser); + if (current_token->type == TOKEN_LPAREN) { + /* Expression pattern - parse the expression */ + literals[i] = parser_parse_expression(parser); + if (literals[i] == NULL) { + /* Cleanup on error */ + for (int j = 0; j < i; j++) { + ast_destroy_node(literals[j]); + } + free(literals); + return NULL; + } + } else { + /* Literal pattern */ + Token* lit_token = parser_advance(parser); + if (lit_token->type == TOKEN_IDENTIFIER && lit_token->lexeme && strcmp(lit_token->lexeme, "_") == 0) { + /* Wildcard pattern - treat as literal in multi-parameter context */ + literals[i] = ast_literal_node(baba_yaga_value_string("_"), lit_token->line, lit_token->column); + } else if (lit_token->type == TOKEN_IDENTIFIER) { + /* Identifier pattern */ + literals[i] = ast_identifier_node(lit_token->lexeme, lit_token->line, lit_token->column); + } else if (lit_token->type == TOKEN_NUMBER) { + /* Number pattern */ + literals[i] = ast_literal_node(baba_yaga_value_number(lit_token->literal.number), lit_token->line, lit_token->column); + } else if (lit_token->type == TOKEN_STRING) { + /* String pattern */ + literals[i] = ast_literal_node(baba_yaga_value_string(lit_token->lexeme), lit_token->line, lit_token->column); + } else { + /* Cleanup on error */ + for (int j = 0; j < i; j++) { + ast_destroy_node(literals[j]); + } + free(literals); + return NULL; + } + } + } + + /* Create a sequence node for the literals */ + pattern_test = ast_sequence_node(literals, literal_count, parser_peek(parser)->line, parser_peek(parser)->column); + } else if (current_token && current_token->type == TOKEN_LBRACE) { + /* Table pattern: { status: "placeholder" } */ + DEBUG_TRACE("Found table pattern"); + /* Parse as table literal */ + pattern_test = parser_parse_primary(parser); + if (pattern_test == NULL) { + DEBUG_TRACE("Failed to parse table pattern"); + return NULL; + } + DEBUG_TRACE("Successfully parsed table pattern"); + } else if (current_token && current_token->type == TOKEN_IDENTIFIER && + current_token->lexeme && strcmp(current_token->lexeme, "_") == 0) { + /* Special handling for single wildcard pattern */ + DEBUG_TRACE("Found wildcard pattern"); + /* Create a special wildcard literal */ + pattern_test = ast_literal_node(baba_yaga_value_string("_"), + current_token->line, current_token->column); + /* Consume the _ token */ + parser_advance(parser); + DEBUG_TRACE("Consumed _ token, current token type=%d, lexeme='%s'", + parser_peek(parser)->type, parser_peek(parser)->lexeme ? parser_peek(parser)->lexeme : "NULL"); + } else { + /* Parse pattern test expression - stop at 'then' */ + /* Check if this is a comparison expression by looking ahead */ + bool is_comparison = false; + int look_ahead = parser->current; + + /* Look ahead to see if there's a comparison operator */ + while (look_ahead < parser->token_count) { + Token* token = parser->tokens[look_ahead]; + if (token->type == TOKEN_KEYWORD_THEN) { + break; /* Found 'then', stop looking */ + } + if (token->type == TOKEN_OP_EQUALS || + token->type == TOKEN_OP_NOT_EQUALS || + token->type == TOKEN_OP_LESS || + token->type == TOKEN_OP_LESS_EQUAL || + token->type == TOKEN_OP_GREATER || + token->type == TOKEN_OP_GREATER_EQUAL) { + is_comparison = true; + break; + } + look_ahead++; + } + + if (is_comparison) { + /* Parse as comparison expression but stop at 'then' */ + /* Find the 'then' token position */ + int then_pos = -1; + for (int i = parser->current; i < parser->token_count; i++) { + if (parser->tokens[i]->type == TOKEN_KEYWORD_THEN) { + then_pos = i; + break; + } + } + + if (then_pos == -1) { + DEBUG_TRACE("No 'then' token found after comparison pattern"); + return NULL; + } + + /* Temporarily limit parsing to stop at 'then' */ + int original_token_count = parser->token_count; + parser->token_count = then_pos; + + /* Parse the comparison expression */ + pattern_test = parser_parse_comparison(parser); + + /* Restore parser state */ + parser->token_count = original_token_count; + } else { + /* Parse as simple expression */ + pattern_test = parser_parse_primary(parser); + } + + if (pattern_test == NULL) { + DEBUG_TRACE("Failed to parse pattern test expression"); + return NULL; + } + DEBUG_TRACE("Parsed pattern test expression"); + } + + DEBUG_TRACE("parser_parse_when_pattern success"); + + /* Create when pattern node - only the pattern test, result will be added by caller */ + return pattern_test; +} + +/* Helper function to get node type name */ +static const char* node_type_name(NodeType type) { + switch (type) { + case NODE_LITERAL: return "LITERAL"; + case NODE_IDENTIFIER: return "IDENTIFIER"; + case NODE_BINARY_OP: return "BINARY_OP"; + case NODE_UNARY_OP: return "UNARY_OP"; + case NODE_FUNCTION_CALL: return "FUNCTION_CALL"; + case NODE_FUNCTION_DEF: return "FUNCTION_DEF"; + case NODE_VARIABLE_DECL: return "VARIABLE_DECL"; + case NODE_WHEN_EXPR: return "WHEN_EXPR"; + case NODE_WHEN_PATTERN: return "WHEN_PATTERN"; + case NODE_TABLE: return "TABLE"; + case NODE_TABLE_ACCESS: return "TABLE_ACCESS"; + case NODE_IO_OPERATION: return "IO_OPERATION"; + case NODE_SEQUENCE: return "SEQUENCE"; + default: return "UNKNOWN"; + } +} |