diff options
Diffstat (limited to 'js/scripting-lang/baba-yaga-c/src/parser.c')
-rw-r--r-- | js/scripting-lang/baba-yaga-c/src/parser.c | 1447 |
1 files changed, 1193 insertions, 254 deletions
diff --git a/js/scripting-lang/baba-yaga-c/src/parser.c b/js/scripting-lang/baba-yaga-c/src/parser.c index 3d60ac1..896c24f 100644 --- a/js/scripting-lang/baba-yaga-c/src/parser.c +++ b/js/scripting-lang/baba-yaga-c/src/parser.c @@ -61,7 +61,8 @@ typedef enum { TOKEN_IO_IN, TOKEN_IO_OUT, TOKEN_IO_ASSERT, - TOKEN_COMMENT + TOKEN_IO_EMIT, + TOKEN_IO_LISTEN } TokenType; typedef struct { @@ -232,32 +233,30 @@ static ASTNode* ast_function_call_node(ASTNode* function, ASTNode** arguments, } /** - * @brief Create a binary operator node (translated to function call) + * @brief Create a binary operator node * * @param left Left operand * @param right Right operand * @param operator Operator name * @param line Line number * @param column Column number - * @return New function call node representing the operator + * @return New binary operator node */ static ASTNode* ast_binary_op_node(ASTNode* left, ASTNode* right, const char* operator, int line, int column) { - /* Create simple function call: operator(left, right) */ - ASTNode* operator_node = ast_identifier_node(operator, line, column); - if (operator_node == NULL) { + ASTNode* node = malloc(sizeof(ASTNode)); + if (node == NULL) { return NULL; } - ASTNode** args = malloc(2 * sizeof(ASTNode*)); - if (args == NULL) { - free(operator_node); - return NULL; - } - args[0] = left; - args[1] = right; + node->type = NODE_BINARY_OP; + node->line = line; + node->column = column; + node->data.binary.left = left; + node->data.binary.right = right; + node->data.binary.operator = strdup(operator); - return ast_function_call_node(operator_node, args, 2, line, column); + return node; } /** @@ -336,6 +335,7 @@ static ASTNode* ast_when_expr_node(ASTNode* test, ASTNode** patterns, node->data.when_expr.patterns = patterns; node->data.when_expr.pattern_count = pattern_count; + return node; } @@ -584,12 +584,16 @@ static Token* parser_consume(Parser* parser, TokenType type, const char* error_m /* Forward declarations */ static ASTNode* parser_parse_expression(Parser* parser); static ASTNode* parser_parse_logical(Parser* parser); -static ASTNode* parser_parse_composition(Parser* parser); -static ASTNode* parser_parse_application(Parser* parser); +/* static ASTNode* parser_parse_composition(Parser* parser); */ +/* static ASTNode* parser_parse_application(Parser* parser); */ static ASTNode* parser_parse_statement(Parser* parser); static ASTNode* parser_parse_when_expression(Parser* parser); static ASTNode* parser_parse_when_pattern(Parser* parser); +static ASTNode* parser_parse_when_result_expression(Parser* parser); +static ASTNode* parser_parse_postfix(Parser* parser); static const char* node_type_name(NodeType type); +static ASTNode* parser_parse_function_def(Parser* parser); +static ASTNode* parser_parse_embedded_arrow_function(Parser* parser); /** * @brief Parse primary expression (literals, identifiers, parentheses) @@ -606,21 +610,25 @@ static ASTNode* parser_parse_primary(Parser* parser) { switch (token->type) { case TOKEN_NUMBER: { + DEBUG_TRACE("parser_parse_primary consuming number: %g", token->literal.number); parser_advance(parser); return ast_literal_node(baba_yaga_value_number(token->literal.number), token->line, token->column); } case TOKEN_STRING: { + DEBUG_TRACE("parser_parse_primary consuming string: %s", token->lexeme); parser_advance(parser); return ast_literal_node(baba_yaga_value_string(token->lexeme), token->line, token->column); } case TOKEN_BOOLEAN: { + DEBUG_TRACE("parser_parse_primary consuming boolean: %s", token->literal.boolean ? "true" : "false"); parser_advance(parser); return ast_literal_node(baba_yaga_value_boolean(token->literal.boolean), token->line, token->column); } case TOKEN_IDENTIFIER: { + DEBUG_TRACE("parser_parse_primary consuming identifier: %s", token->lexeme); parser_advance(parser); /* Special handling for wildcard pattern */ if (strcmp(token->lexeme, "_") == 0) { @@ -631,7 +639,10 @@ static ASTNode* parser_parse_primary(Parser* parser) { } case TOKEN_IO_IN: case TOKEN_IO_OUT: - case TOKEN_IO_ASSERT: { + case TOKEN_IO_ASSERT: + case TOKEN_IO_EMIT: + case TOKEN_IO_LISTEN: { + DEBUG_TRACE("parser_parse_primary consuming io operation: %s", token->lexeme); parser_advance(parser); /* IO operations are treated as function calls - strip the ".." prefix */ const char* func_name = token->lexeme + 2; /* Skip ".." */ @@ -662,16 +673,55 @@ static ASTNode* parser_parse_primary(Parser* parser) { return ast_function_call_node(func_node, args, 1, token->line, token->column); } + /* For ..emit, parse the entire expression as a single argument */ + if (strcmp(func_name, "emit") == 0) { + /* Parse the expression */ + ASTNode* expr = parser_parse_expression(parser); + if (expr == NULL) { + return NULL; + } + + /* Create function call with the expression as argument */ + ASTNode** args = malloc(1 * sizeof(ASTNode*)); + if (args == NULL) { + ast_destroy_node(expr); + return NULL; + } + args[0] = expr; + + ASTNode* func_node = ast_identifier_node(func_name, token->line, token->column); + if (func_node == NULL) { + free(args); + ast_destroy_node(expr); + return NULL; + } + + return ast_function_call_node(func_node, args, 1, token->line, token->column); + } + + /* For ..listen, create a function call with no arguments */ + if (strcmp(func_name, "listen") == 0) { + ASTNode* func_node = ast_identifier_node(func_name, token->line, token->column); + if (func_node == NULL) { + return NULL; + } + + return ast_function_call_node(func_node, NULL, 0, token->line, token->column); + } + return ast_identifier_node(func_name, token->line, token->column); } case TOKEN_KEYWORD_WHEN: { + return parser_parse_when_expression(parser); } case TOKEN_FUNCTION_REF: { + DEBUG_TRACE("parser_parse_primary consuming function ref: %s", token->lexeme); parser_advance(parser); /* Check if this is @(expression) syntax */ if (!parser_is_at_end(parser) && parser_peek(parser)->type == TOKEN_LPAREN) { + DEBUG_TRACE("parser_parse_primary consuming '('"); parser_advance(parser); /* consume '(' */ /* Parse the expression inside parentheses */ @@ -697,6 +747,7 @@ static ASTNode* parser_parse_primary(Parser* parser) { } /* Check if this function reference is followed by arguments */ + /* Only treat as function call if it's at the top level (not in an argument position) */ if (!parser_is_at_end(parser)) { Token* next_token = parser_peek(parser); if (next_token != NULL && @@ -719,6 +770,11 @@ static ASTNode* parser_parse_primary(Parser* parser) { next_token->type != TOKEN_COMMA && next_token->type != TOKEN_EOF) { + /* For now, always treat function references as values, not function calls */ + /* This allows them to be passed as arguments to other functions */ + DEBUG_TRACE("parser_parse_primary: treating function reference as value"); + return func_node; + /* Parse arguments for this function call */ ASTNode** args = NULL; int arg_count = 0; @@ -752,7 +808,7 @@ static ASTNode* parser_parse_primary(Parser* parser) { } /* Parse argument */ - ASTNode* arg = parser_parse_primary(parser); + ASTNode* arg = parser_parse_postfix(parser); if (arg == NULL) { /* Cleanup on error */ for (int i = 0; i < arg_count; i++) { @@ -800,6 +856,7 @@ static ASTNode* parser_parse_primary(Parser* parser) { return func_node; } case TOKEN_LPAREN: { + DEBUG_TRACE("parser_parse_primary consuming '('"); parser_advance(parser); /* consume '(' */ ASTNode* expr = parser_parse_expression(parser); if (expr == NULL) { @@ -813,171 +870,362 @@ static ASTNode* parser_parse_primary(Parser* parser) { return expr; } - case TOKEN_OP_UNARY_MINUS: { - parser_advance(parser); /* consume '-' */ - ASTNode* operand = parser_parse_primary(parser); - if (operand == NULL) { - return NULL; - } - return ast_unary_op_node(operand, "negate", token->line, token->column); - } - case TOKEN_KEYWORD_NOT: { - parser_advance(parser); /* consume 'not' */ - ASTNode* operand = parser_parse_primary(parser); - if (operand == NULL) { + case TOKEN_LBRACE: { + DEBUG_TRACE("parser_parse_primary consuming table literal '{'"); + parser_advance(parser); /* consume '{' */ + + ASTNode** elements = NULL; + int element_count = 0; + int capacity = 10; + + /* Allocate initial space for elements */ + elements = malloc(capacity * sizeof(ASTNode*)); + if (elements == NULL) { return NULL; } - return ast_unary_op_node(operand, "not", token->line, token->column); - } - default: - parser_set_error(parser, "Unexpected token in expression"); - return NULL; - } -} - -/** - * @brief Parse function call expression - * - * @param parser Parser instance - * @return Parsed expression node - */ -static ASTNode* parser_parse_function_call(Parser* parser) { - ASTNode* left = parser_parse_primary(parser); - if (left == NULL) { - return NULL; - } - - /* Check if this is a function call (identifier followed by arguments) */ - while (left->type == NODE_IDENTIFIER && !parser_is_at_end(parser)) { - Token* next_token = parser_peek(parser); - if (next_token == NULL) { - break; - } - /* If next token is not an operator or closing delimiter, treat as function call */ - if (next_token->type != TOKEN_OP_PLUS && - next_token->type != TOKEN_OP_MINUS && - next_token->type != TOKEN_OP_MULTIPLY && - next_token->type != TOKEN_OP_DIVIDE && - next_token->type != TOKEN_OP_MODULO && - next_token->type != TOKEN_OP_POWER && - next_token->type != TOKEN_OP_EQUALS && - next_token->type != TOKEN_OP_NOT_EQUALS && - next_token->type != TOKEN_OP_LESS && - next_token->type != TOKEN_OP_LESS_EQUAL && - next_token->type != TOKEN_OP_GREATER && - next_token->type != TOKEN_OP_GREATER_EQUAL && - next_token->type != TOKEN_KEYWORD_WHEN && - next_token->type != TOKEN_KEYWORD_IS && - next_token->type != TOKEN_KEYWORD_THEN && - next_token->type != TOKEN_RPAREN && - next_token->type != TOKEN_RBRACE && - next_token->type != TOKEN_RBRACKET && - next_token->type != TOKEN_SEMICOLON && - next_token->type != TOKEN_COMMA && - next_token->type != TOKEN_EOF) { + /* Parse table entries */ + while (!parser_is_at_end(parser) && parser_peek(parser)->type != TOKEN_RBRACE) { + ASTNode* value = NULL; - /* Collect all arguments for this function call */ - ASTNode** args = NULL; - int arg_count = 0; + /* Check if this is a key-value pair (any token: value) */ - while (!parser_is_at_end(parser)) { - Token* arg_token = parser_peek(parser); - if (arg_token == NULL) { - break; + /* Check if this is a key-value pair */ + bool is_key_value_pair = false; + + if (parser_peek(parser)->type == TOKEN_LPAREN) { + /* For expression keys, we need to look ahead to find the colon */ + int look_ahead = parser->current; + int paren_count = 0; + bool found_colon = false; + + while (look_ahead < parser->token_count) { + Token* token = parser->tokens[look_ahead]; + if (token->type == TOKEN_LPAREN) { + paren_count++; + } else if (token->type == TOKEN_RPAREN) { + paren_count--; + if (paren_count == 0) { + /* We've found the closing parenthesis, check if next is colon */ + if (look_ahead + 1 < parser->token_count && + parser->tokens[look_ahead + 1]->type == TOKEN_COLON) { + found_colon = true; + } + break; + } + } else if (token->type == TOKEN_COMMA || token->type == TOKEN_RBRACE) { + /* Stop looking if we hit table boundaries */ + break; + } + look_ahead++; } + is_key_value_pair = found_colon; + } else { + /* For literal keys, check if next token is colon */ + is_key_value_pair = (parser_peek(parser)->type == TOKEN_IDENTIFIER || + parser_peek(parser)->type == TOKEN_NUMBER || + parser_peek(parser)->type == TOKEN_BOOLEAN || + parser_peek(parser)->type == TOKEN_STRING) && + !parser_is_at_end(parser) && + parser_peek_next(parser)->type == TOKEN_COLON; + } + + if (is_key_value_pair) { - /* Stop if we hit an operator, delimiter, or function name */ - if (arg_token->type == TOKEN_OP_PLUS || - arg_token->type == TOKEN_OP_MINUS || - arg_token->type == TOKEN_OP_MULTIPLY || - arg_token->type == TOKEN_OP_DIVIDE || - arg_token->type == TOKEN_OP_MODULO || - arg_token->type == TOKEN_OP_POWER || - arg_token->type == TOKEN_OP_EQUALS || - arg_token->type == TOKEN_OP_NOT_EQUALS || - arg_token->type == TOKEN_OP_LESS || - arg_token->type == TOKEN_OP_LESS_EQUAL || - arg_token->type == TOKEN_OP_GREATER || - arg_token->type == TOKEN_OP_GREATER_EQUAL || - arg_token->type == TOKEN_KEYWORD_WHEN || - arg_token->type == TOKEN_KEYWORD_IS || - arg_token->type == TOKEN_KEYWORD_THEN || - arg_token->type == TOKEN_RPAREN || - arg_token->type == TOKEN_RBRACE || - arg_token->type == TOKEN_RBRACKET || - arg_token->type == TOKEN_SEMICOLON || - arg_token->type == TOKEN_COMMA || - arg_token->type == TOKEN_EOF) { - break; + /* Parse key-value pair */ + ASTNode* key_node = NULL; + Token* key_token = NULL; + + if (parser_peek(parser)->type == TOKEN_LPAREN) { + /* Parse expression key */ + key_node = parser_parse_expression(parser); + if (key_node == NULL) { + /* Cleanup on error */ + for (int i = 0; i < element_count; i++) { + ast_destroy_node(elements[i]); + } + free(elements); + return NULL; + } + /* Create a dummy token for line/column info */ + key_token = parser_peek(parser); + if (key_token == NULL) { + /* Cleanup on error */ + for (int i = 0; i < element_count; i++) { + ast_destroy_node(elements[i]); + } + free(elements); + ast_destroy_node(key_node); + return NULL; + } + } else { + /* Parse literal key */ + key_token = parser_advance(parser); /* Consume the key token */ + if (key_token == NULL) { + /* Cleanup on error */ + for (int i = 0; i < element_count; i++) { + ast_destroy_node(elements[i]); + } + free(elements); + return NULL; + } + } + + /* Consume colon */ + if (!parser_consume(parser, TOKEN_COLON, "Expected ':' after table key")) { + /* Cleanup on error */ + for (int i = 0; i < element_count; i++) { + ast_destroy_node(elements[i]); + } + free(elements); + return NULL; + } + + /* Check if this is an arrow function by looking ahead */ + bool is_arrow_function = false; + int look_ahead = parser->current; + int identifier_count = 0; + + /* Look ahead to see if we have identifiers followed by '->' */ + while (look_ahead < parser->token_count) { + Token* token = parser->tokens[look_ahead]; + if (token->type == TOKEN_ARROW) { + /* If we have at least one identifier before '->', it's an arrow function */ + if (identifier_count > 0) { + is_arrow_function = true; + } + break; + } + if (token->type == TOKEN_IDENTIFIER) { + identifier_count++; + } else if (token->type == TOKEN_COMMA || token->type == TOKEN_RBRACE) { + /* Stop looking if we hit table boundaries */ + break; + } else { + /* If we hit anything else, it's not an arrow function */ + identifier_count = 0; + break; + } + look_ahead++; + } + + /* Parse the value */ + if (is_arrow_function) { + /* Parse as embedded arrow function */ + value = parser_parse_embedded_arrow_function(parser); + } else { + /* Parse as general expression */ + value = parser_parse_expression(parser); + } + if (value == NULL) { + /* Cleanup on error */ + for (int i = 0; i < element_count; i++) { + ast_destroy_node(elements[i]); + } + free(elements); + return NULL; + } + + /* For now, we'll store key-value pairs as function calls to a special "table_entry" function */ + /* This allows us to represent both key-value pairs and array-like entries uniformly */ + ASTNode** entry_args = malloc(2 * sizeof(ASTNode*)); + if (entry_args == NULL) { + /* Cleanup on error */ + for (int i = 0; i < element_count; i++) { + ast_destroy_node(elements[i]); + } + free(elements); + ast_destroy_node(value); + return NULL; } - /* Parse argument */ - ASTNode* arg = parser_parse_primary(parser); + /* Create key value based on token type or expression */ + ASTNode* key_arg = NULL; + if (key_node != NULL) { + /* Expression key - use the parsed AST node */ + key_arg = key_node; + } else { + /* Literal key - create literal value from token */ + Value key_value; + if (key_token->type == TOKEN_IDENTIFIER) { + key_value = baba_yaga_value_string(key_token->lexeme); + } else if (key_token->type == TOKEN_NUMBER) { + key_value = baba_yaga_value_number(key_token->literal.number); + } else if (key_token->type == TOKEN_BOOLEAN) { + key_value = baba_yaga_value_boolean(key_token->literal.boolean); + } else if (key_token->type == TOKEN_STRING) { + key_value = baba_yaga_value_string(key_token->lexeme); + } else { + /* Cleanup on error */ + for (int i = 0; i < element_count; i++) { + ast_destroy_node(elements[i]); + } + free(elements); + free(entry_args); + ast_destroy_node(value); + return NULL; + } + key_arg = ast_literal_node(key_value, key_token->line, key_token->column); + } - if (arg == NULL) { + entry_args[0] = key_arg; + entry_args[1] = value; + + ASTNode* table_entry_node = ast_identifier_node("table_entry", key_token->line, key_token->column); + if (table_entry_node == NULL) { + /* Cleanup on error */ + for (int i = 0; i < element_count; i++) { + ast_destroy_node(elements[i]); + } + free(elements); + free(entry_args); + ast_destroy_node(value); + if (key_node != NULL) { + ast_destroy_node(key_node); + } + return NULL; + } + + ASTNode* entry_node = ast_function_call_node(table_entry_node, entry_args, 2, key_token->line, key_token->column); + if (entry_node == NULL) { /* Cleanup on error */ - for (int i = 0; i < arg_count; i++) { - ast_destroy_node(args[i]); + for (int i = 0; i < element_count; i++) { + ast_destroy_node(elements[i]); + } + free(elements); + free(entry_args); + ast_destroy_node(table_entry_node); + ast_destroy_node(value); + if (key_node != NULL) { + ast_destroy_node(key_node); } - free(args); - ast_destroy_node(left); return NULL; } - /* Add to arguments array */ - ASTNode** new_args = realloc(args, (arg_count + 1) * sizeof(ASTNode*)); - if (new_args == NULL) { + value = entry_node; + } else { + /* Parse array-like entry (just a value) */ + value = parser_parse_expression(parser); + if (value == NULL) { /* Cleanup on error */ - for (int i = 0; i < arg_count; i++) { - ast_destroy_node(args[i]); + for (int i = 0; i < element_count; i++) { + ast_destroy_node(elements[i]); } - free(args); - ast_destroy_node(arg); - ast_destroy_node(left); + free(elements); return NULL; } - args = new_args; - args[arg_count] = arg; - arg_count++; } - /* Create function call with all arguments */ - ASTNode* func_call = ast_function_call_node(left, args, arg_count, left->line, left->column); - if (func_call == NULL) { + /* Check if we need more space */ + if (element_count >= capacity) { + capacity *= 2; + ASTNode** new_elements = realloc(elements, capacity * sizeof(ASTNode*)); + if (new_elements == NULL) { + /* Cleanup on error */ + for (int i = 0; i < element_count; i++) { + ast_destroy_node(elements[i]); + } + free(elements); + ast_destroy_node(value); + return NULL; + } + elements = new_elements; + } + + elements[element_count++] = value; + + /* Check for comma separator */ + if (!parser_is_at_end(parser) && parser_peek(parser)->type == TOKEN_COMMA) { + parser_advance(parser); /* consume ',' */ + } else if (!parser_is_at_end(parser) && parser_peek(parser)->type != TOKEN_RBRACE) { + /* No comma but not end of table - this is an error */ + parser_set_error(parser, "Expected ',' or '}' in table literal"); /* Cleanup on error */ - for (int i = 0; i < arg_count; i++) { - ast_destroy_node(args[i]); + for (int i = 0; i < element_count; i++) { + ast_destroy_node(elements[i]); } - free(args); - ast_destroy_node(left); + free(elements); return NULL; } - - left = func_call; - } else { - break; } + + /* Expect closing brace */ + if (!parser_consume(parser, TOKEN_RBRACE, "Expected '}' after table literal")) { + /* Cleanup on error */ + for (int i = 0; i < element_count; i++) { + ast_destroy_node(elements[i]); + } + free(elements); + return NULL; + } + + /* Create table node */ + ASTNode* node = malloc(sizeof(ASTNode)); + if (node == NULL) { + /* Cleanup on error */ + for (int i = 0; i < element_count; i++) { + ast_destroy_node(elements[i]); + } + free(elements); + return NULL; + } + + node->type = NODE_TABLE; + node->line = token->line; + node->column = token->column; + node->data.table.elements = elements; + node->data.table.element_count = element_count; + + return node; + } + case TOKEN_OP_UNARY_MINUS: { + DEBUG_TRACE("parser_parse_primary consuming unary minus"); + parser_advance(parser); /* consume '-' */ + ASTNode* operand = parser_parse_postfix(parser); + if (operand == NULL) { + return NULL; + } + return ast_unary_op_node(operand, "negate", token->line, token->column); + } + case TOKEN_KEYWORD_NOT: { + DEBUG_TRACE("parser_parse_primary consuming 'not'"); + parser_advance(parser); /* consume 'not' */ + ASTNode* operand = parser_parse_postfix(parser); + if (operand == NULL) { + return NULL; + } + return ast_unary_op_node(operand, "not", token->line, token->column); + } + default: + parser_set_error(parser, "Unexpected token in expression"); + return NULL; } - - return left; } /** + * @brief Parse function call expression + * + * @param parser Parser instance + * @return Parsed expression node + */ +/* TODO: Re-implement function call parsing at application level */ +/* TODO: Re-implement function call parsing at application level */ + +/** * @brief Parse power expression (^) * * @param parser Parser instance * @return Parsed expression node */ static ASTNode* parser_parse_power(Parser* parser) { - ASTNode* left = parser_parse_function_call(parser); + ASTNode* left = parser_parse_postfix(parser); if (left == NULL) { return NULL; } while (parser_check(parser, TOKEN_OP_POWER)) { Token* op = parser_advance(parser); - ASTNode* right = parser_parse_function_call(parser); + ASTNode* right = parser_parse_postfix(parser); if (right == NULL) { ast_destroy_node(left); return NULL; @@ -1130,8 +1378,251 @@ static ASTNode* parser_parse_comparison(Parser* parser) { * @return Parsed expression node */ static ASTNode* parser_parse_logical(Parser* parser) { - /* Logical operators are handled as function calls, not binary operators */ - return parser_parse_comparison(parser); + ASTNode* left = parser_parse_comparison(parser); + if (left == NULL) { + return NULL; + } + + /* Handle logical operators */ + while ((parser_check(parser, TOKEN_KEYWORD_AND) || + parser_check(parser, TOKEN_KEYWORD_OR) || + parser_check(parser, TOKEN_KEYWORD_XOR)) || + (parser_check(parser, TOKEN_IDENTIFIER) && + (strcmp(parser_peek(parser)->lexeme, "and") == 0 || + strcmp(parser_peek(parser)->lexeme, "or") == 0 || + strcmp(parser_peek(parser)->lexeme, "xor") == 0))) { + Token* op = parser_advance(parser); + ASTNode* right = parser_parse_comparison(parser); + if (right == NULL) { + ast_destroy_node(left); + return NULL; + } + + const char* operator_name; + if (op->type == TOKEN_KEYWORD_AND || + (op->type == TOKEN_IDENTIFIER && strcmp(op->lexeme, "and") == 0)) { + operator_name = "and"; + } else if (op->type == TOKEN_KEYWORD_OR || + (op->type == TOKEN_IDENTIFIER && strcmp(op->lexeme, "or") == 0)) { + operator_name = "or"; + } else if (op->type == TOKEN_KEYWORD_XOR || + (op->type == TOKEN_IDENTIFIER && strcmp(op->lexeme, "xor") == 0)) { + operator_name = "xor"; + } else { + operator_name = "unknown"; + } + + ASTNode* new_left = ast_binary_op_node(left, right, operator_name, op->line, op->column); + if (new_left == NULL) { + ast_destroy_node(left); + ast_destroy_node(right); + return NULL; + } + + left = new_left; + } + + /* Handle via operator (function composition) - right-associative */ + while (parser_check(parser, TOKEN_KEYWORD_VIA)) { + Token* op = parser_advance(parser); + ASTNode* right = parser_parse_logical(parser); /* Right-associative: recurse */ + if (right == NULL) { + ast_destroy_node(left); + return NULL; + } + + ASTNode* new_left = ast_binary_op_node(left, right, "via", op->line, op->column); + if (new_left == NULL) { + ast_destroy_node(left); + ast_destroy_node(right); + return NULL; + } + + left = new_left; + } + + /* Handle function application */ + /* Skip function application if the left node is a when expression */ + if (left->type == NODE_WHEN_EXPR) { + return left; + } + + while (!parser_is_at_end(parser)) { + Token* next_token = parser_peek(parser); + if (next_token == NULL) break; + + + + /* Check if this token can be a function argument */ + bool can_be_arg = (next_token->type == TOKEN_IDENTIFIER || + next_token->type == TOKEN_FUNCTION_REF || + next_token->type == TOKEN_NUMBER || + next_token->type == TOKEN_STRING || + next_token->type == TOKEN_BOOLEAN || + next_token->type == TOKEN_LPAREN || + next_token->type == TOKEN_LBRACE || + next_token->type == TOKEN_OP_UNARY_MINUS || + next_token->type == TOKEN_KEYWORD_NOT); + + /* Check if this token should not trigger function application */ + bool should_not_trigger = (next_token->type == TOKEN_OP_PLUS || + next_token->type == TOKEN_OP_MINUS || + next_token->type == TOKEN_OP_MULTIPLY || + next_token->type == TOKEN_OP_DIVIDE || + next_token->type == TOKEN_OP_MODULO || + next_token->type == TOKEN_OP_POWER || + next_token->type == TOKEN_OP_EQUALS || + next_token->type == TOKEN_OP_NOT_EQUALS || + next_token->type == TOKEN_OP_LESS || + next_token->type == TOKEN_OP_LESS_EQUAL || + next_token->type == TOKEN_OP_GREATER || + next_token->type == TOKEN_OP_GREATER_EQUAL || + next_token->type == TOKEN_KEYWORD_AND || + next_token->type == TOKEN_KEYWORD_OR || + next_token->type == TOKEN_KEYWORD_XOR || + (next_token->type == TOKEN_IDENTIFIER && + (strcmp(next_token->lexeme, "and") == 0 || + strcmp(next_token->lexeme, "or") == 0 || + strcmp(next_token->lexeme, "xor") == 0)) || + next_token->type == TOKEN_KEYWORD_WHEN || + next_token->type == TOKEN_KEYWORD_IS || + next_token->type == TOKEN_KEYWORD_THEN || + next_token->type == TOKEN_KEYWORD_VIA || + next_token->type == TOKEN_RPAREN || + next_token->type == TOKEN_RBRACE || + next_token->type == TOKEN_RBRACKET || + next_token->type == TOKEN_SEMICOLON || + next_token->type == TOKEN_COMMA || + next_token->type == TOKEN_EOF); + + /* Check if this is a pattern boundary (identifier followed by 'then') */ + bool is_pattern_boundary = false; + if (next_token->type == TOKEN_IDENTIFIER) { + /* Look ahead to see if the next token is 'then' */ + if (parser->current + 1 < parser->token_count) { + Token* next_next_token = parser->tokens[parser->current + 1]; + if (next_next_token && next_next_token->type == TOKEN_KEYWORD_THEN) { + is_pattern_boundary = true; + DEBUG_TRACE("Found pattern boundary: %s followed by 'then'", next_token->lexeme); + } + } + } + + DEBUG_TRACE("Function application check: can_be_arg=%d, should_not_trigger=%d, is_pattern_boundary=%d", + can_be_arg, should_not_trigger, is_pattern_boundary); + + /* Only proceed with function application if it can be an arg and shouldn't trigger */ + if (!can_be_arg || should_not_trigger || is_pattern_boundary) { + + break; + } + + /* Collect all arguments for this function call */ + ASTNode** args = NULL; + int arg_count = 0; + + while (!parser_is_at_end(parser)) { + Token* arg_token = parser_peek(parser); + if (arg_token == NULL) break; + + /* Check if this token can be a function argument */ + bool can_be_arg = (arg_token->type == TOKEN_IDENTIFIER || + arg_token->type == TOKEN_FUNCTION_REF || + arg_token->type == TOKEN_NUMBER || + arg_token->type == TOKEN_STRING || + arg_token->type == TOKEN_BOOLEAN || + arg_token->type == TOKEN_LPAREN || + arg_token->type == TOKEN_LBRACE || + arg_token->type == TOKEN_OP_UNARY_MINUS || + arg_token->type == TOKEN_KEYWORD_NOT); + + /* Check if this token should not trigger function application */ + bool should_not_trigger = (arg_token->type == TOKEN_OP_PLUS || + arg_token->type == TOKEN_OP_MINUS || + arg_token->type == TOKEN_OP_MULTIPLY || + arg_token->type == TOKEN_OP_DIVIDE || + arg_token->type == TOKEN_OP_MODULO || + arg_token->type == TOKEN_OP_POWER || + arg_token->type == TOKEN_OP_EQUALS || + arg_token->type == TOKEN_OP_NOT_EQUALS || + arg_token->type == TOKEN_OP_LESS || + arg_token->type == TOKEN_OP_LESS_EQUAL || + arg_token->type == TOKEN_OP_GREATER || + arg_token->type == TOKEN_OP_GREATER_EQUAL || + arg_token->type == TOKEN_KEYWORD_AND || + arg_token->type == TOKEN_KEYWORD_OR || + arg_token->type == TOKEN_KEYWORD_XOR || + arg_token->type == TOKEN_KEYWORD_WHEN || + arg_token->type == TOKEN_KEYWORD_IS || + arg_token->type == TOKEN_KEYWORD_THEN || + arg_token->type == TOKEN_RPAREN || + arg_token->type == TOKEN_RBRACE || + arg_token->type == TOKEN_RBRACKET || + arg_token->type == TOKEN_SEMICOLON || + arg_token->type == TOKEN_COMMA || + arg_token->type == TOKEN_EOF); + + /* Check if this is a pattern boundary (identifier followed by 'then') */ + bool is_pattern_boundary = false; + if (arg_token->type == TOKEN_IDENTIFIER) { + /* Look ahead to see if the next token is 'then' */ + if (parser->current + 1 < parser->token_count) { + Token* next_next_token = parser->tokens[parser->current + 1]; + if (next_next_token && next_next_token->type == TOKEN_KEYWORD_THEN) { + is_pattern_boundary = true; + DEBUG_TRACE("Inner loop found pattern boundary: %s followed by 'then'", arg_token->lexeme); + } + } + } + + /* Stop if it can't be an arg, should not trigger, or is a pattern boundary */ + if (!can_be_arg || should_not_trigger || is_pattern_boundary) { + break; + } + + ASTNode* arg = parser_parse_comparison(parser); + if (arg == NULL) { + /* Cleanup on error */ + for (int i = 0; i < arg_count; i++) { + ast_destroy_node(args[i]); + } + free(args); + ast_destroy_node(left); + return NULL; + } + + /* Add to arguments array */ + ASTNode** new_args = realloc(args, (arg_count + 1) * sizeof(ASTNode*)); + if (new_args == NULL) { + /* Cleanup on error */ + for (int i = 0; i < arg_count; i++) { + ast_destroy_node(args[i]); + } + free(args); + ast_destroy_node(arg); + ast_destroy_node(left); + return NULL; + } + args = new_args; + args[arg_count++] = arg; + } + + /* Create function call with all arguments */ + ASTNode* new_left = ast_function_call_node(left, args, arg_count, left->line, left->column); + if (new_left == NULL) { + /* Cleanup on error */ + for (int i = 0; i < arg_count; i++) { + ast_destroy_node(args[i]); + } + free(args); + ast_destroy_node(left); + return NULL; + } + + left = new_left; + } + + return left; } /** @@ -1140,6 +1631,8 @@ static ASTNode* parser_parse_logical(Parser* parser) { * @param parser Parser instance * @return Parsed expression node */ +/* TODO: Re-implement composition parsing */ +/* static ASTNode* parser_parse_composition(Parser* parser) { ASTNode* left = parser_parse_application(parser); if (left == NULL) { @@ -1166,55 +1659,97 @@ static ASTNode* parser_parse_composition(Parser* parser) { return left; } +*/ + + /** - * @brief Parse function application (juxtaposition) + * @brief Parse postfix operations (table access, function calls, etc.) * * @param parser Parser instance * @return Parsed expression node */ -static ASTNode* parser_parse_application(Parser* parser) { - ASTNode* left = parser_parse_composition(parser); +static ASTNode* parser_parse_postfix(Parser* parser) { + ASTNode* left = parser_parse_primary(parser); if (left == NULL) { return NULL; } - /* Function application is left-associative */ - while (!parser_is_at_end(parser) && - (parser_peek(parser)->type == TOKEN_IDENTIFIER || - parser_peek(parser)->type == TOKEN_FUNCTION_REF || - parser_peek(parser)->type == TOKEN_NUMBER || - parser_peek(parser)->type == TOKEN_STRING || - parser_peek(parser)->type == TOKEN_BOOLEAN || - parser_peek(parser)->type == TOKEN_LPAREN || - parser_peek(parser)->type == TOKEN_LBRACE || - parser_peek(parser)->type == TOKEN_OP_UNARY_MINUS || - parser_peek(parser)->type == TOKEN_KEYWORD_NOT)) { - - ASTNode* right = parser_parse_composition(parser); - if (right == NULL) { - ast_destroy_node(left); - return NULL; + while (!parser_is_at_end(parser)) { + Token* token = parser_peek(parser); + if (token == NULL) { + break; } - /* Create function application: left(right) */ - ASTNode** args = malloc(1 * sizeof(ASTNode*)); - if (args == NULL) { - ast_destroy_node(left); - ast_destroy_node(right); - return NULL; + switch (token->type) { + case TOKEN_DOT: { + /* Table property access: table.property */ + parser_advance(parser); /* consume '.' */ + + Token* property = parser_consume(parser, TOKEN_IDENTIFIER, "Expected property name after '.'"); + if (property == NULL) { + ast_destroy_node(left); + return NULL; + } + + ASTNode* key = ast_literal_node(baba_yaga_value_string(property->lexeme), property->line, property->column); + if (key == NULL) { + ast_destroy_node(left); + return NULL; + } + + ASTNode* new_left = malloc(sizeof(ASTNode)); + if (new_left == NULL) { + ast_destroy_node(left); + ast_destroy_node(key); + return NULL; + } + + new_left->type = NODE_TABLE_ACCESS; + new_left->line = left->line; + new_left->column = left->column; + new_left->data.table_access.object = left; + new_left->data.table_access.key = key; + + left = new_left; + break; } - args[0] = right; - - ASTNode* new_left = ast_function_call_node(left, args, 1, left->line, left->column); - if (new_left == NULL) { - free(args); - ast_destroy_node(left); - ast_destroy_node(right); - return NULL; + case TOKEN_LBRACKET: { + /* Table bracket access: table[key] */ + parser_advance(parser); /* consume '[' */ + + ASTNode* key = parser_parse_expression(parser); + if (key == NULL) { + ast_destroy_node(left); + return NULL; + } + + if (!parser_consume(parser, TOKEN_RBRACKET, "Expected ']' after table key")) { + ast_destroy_node(left); + ast_destroy_node(key); + return NULL; + } + + ASTNode* new_left = malloc(sizeof(ASTNode)); + if (new_left == NULL) { + ast_destroy_node(left); + ast_destroy_node(key); + return NULL; + } + + new_left->type = NODE_TABLE_ACCESS; + new_left->line = left->line; + new_left->column = left->column; + new_left->data.table_access.object = left; + new_left->data.table_access.key = key; + + left = new_left; + break; + } + default: + /* No more postfix operations */ + return left; } - - left = new_left; } return left; @@ -1255,6 +1790,8 @@ static ASTNode* parser_parse_variable_decl(Parser* parser) { return NULL; } + + ASTNode* node = malloc(sizeof(ASTNode)); if (node == NULL) { ast_destroy_node(value); @@ -1267,6 +1804,7 @@ static ASTNode* parser_parse_variable_decl(Parser* parser) { node->data.variable_decl.name = strdup(name->lexeme); node->data.variable_decl.value = value; + return node; } @@ -1347,6 +1885,73 @@ static ASTNode* parser_parse_function_def(Parser* parser) { } /** + * @brief Parse embedded arrow function (params -> body) without function name + * + * @param parser Parser instance + * @return Parsed function definition node + */ +static ASTNode* parser_parse_embedded_arrow_function(Parser* parser) { + /* Parse parameters */ + ASTNode** parameters = NULL; + int param_count = 0; + + while (!parser_is_at_end(parser) && + parser_peek(parser)->type == TOKEN_IDENTIFIER) { + Token* param = parser_advance(parser); + + ASTNode** new_params = realloc(parameters, (param_count + 1) * sizeof(ASTNode*)); + if (new_params == NULL) { + for (int i = 0; i < param_count; i++) { + ast_destroy_node(parameters[i]); + } + free(parameters); + return NULL; + } + parameters = new_params; + + parameters[param_count] = ast_identifier_node(param->lexeme, param->line, param->column); + param_count++; + } + + if (!parser_consume(parser, TOKEN_ARROW, "Expected '->' after parameters")) { + for (int i = 0; i < param_count; i++) { + ast_destroy_node(parameters[i]); + } + free(parameters); + return NULL; + } + + ASTNode* body = parser_parse_expression(parser); + if (body == NULL) { + for (int i = 0; i < param_count; i++) { + ast_destroy_node(parameters[i]); + } + free(parameters); + return NULL; + } + + ASTNode* node = malloc(sizeof(ASTNode)); + if (node == NULL) { + for (int i = 0; i < param_count; i++) { + ast_destroy_node(parameters[i]); + } + free(parameters); + ast_destroy_node(body); + return NULL; + } + + node->type = NODE_FUNCTION_DEF; + node->line = parser_peek(parser)->line; + node->column = parser_peek(parser)->column; + node->data.function_def.name = strdup(""); /* Empty name for embedded functions */ + node->data.function_def.parameters = parameters; + node->data.function_def.param_count = param_count; + node->data.function_def.body = body; + + return node; +} + +/** * @brief Parse multiple statements separated by semicolons * * @param parser Parser instance @@ -1393,11 +1998,8 @@ static ASTNode* parser_parse_statements(Parser* parser) { /* Consume semicolon */ parser_consume(parser, TOKEN_SEMICOLON, "Expected semicolon"); - /* Skip any whitespace/comments after semicolon */ - while (!parser_is_at_end(parser) && - (parser_peek(parser)->type == TOKEN_COMMENT)) { - parser->current++; /* Skip comment */ - } + /* Skip any whitespace after semicolon */ + /* Comments are already skipped by the lexer */ if (parser_is_at_end(parser)) { break; /* Trailing semicolon */ @@ -1821,6 +2423,61 @@ void* baba_yaga_ast_get_when_pattern_result(void* node) { return ast_node->data.when_pattern.result; } +int baba_yaga_ast_get_table_element_count(void* node) { + if (node == NULL) { + return 0; + } + + ASTNode* ast_node = (ASTNode*)node; + if (ast_node->type != NODE_TABLE) { + return 0; + } + + return ast_node->data.table.element_count; +} + +void* baba_yaga_ast_get_table_element(void* node, int index) { + if (node == NULL) { + return NULL; + } + + ASTNode* ast_node = (ASTNode*)node; + if (ast_node->type != NODE_TABLE) { + return NULL; + } + + if (index >= 0 && index < ast_node->data.table.element_count) { + return ast_node->data.table.elements[index]; + } + return NULL; +} + +void* baba_yaga_ast_get_table_access_object(void* node) { + if (node == NULL) { + return NULL; + } + + ASTNode* ast_node = (ASTNode*)node; + if (ast_node->type != NODE_TABLE_ACCESS) { + return NULL; + } + + return ast_node->data.table_access.object; +} + +void* baba_yaga_ast_get_table_access_key(void* node) { + if (node == NULL) { + return NULL; + } + + ASTNode* ast_node = (ASTNode*)node; + if (ast_node->type != NODE_TABLE_ACCESS) { + return NULL; + } + + return ast_node->data.table_access.key; +} + void baba_yaga_print_ast(void* node, int indent) { if (node == NULL) { return; @@ -1902,76 +2559,132 @@ void baba_yaga_print_ast(void* node, int indent) { * @return Parsed when expression node */ static ASTNode* parser_parse_when_expression(Parser* parser) { - /* Consume 'when' keyword */ + DEBUG_DEBUG("Parsing WHEN expression at token %d", parser->current); Token* when_token = parser_consume(parser, TOKEN_KEYWORD_WHEN, "Expected 'when'"); - if (when_token == NULL) { - return NULL; - } + if (!when_token) return NULL; - /* Parse test expression */ - ASTNode* test = parser_parse_expression(parser); - if (test == NULL) { - return NULL; - } - - /* Consume 'is' keyword */ - Token* is_token = parser_consume(parser, TOKEN_KEYWORD_IS, "Expected 'is' after test expression"); - if (is_token == NULL) { - ast_destroy_node(test); - return NULL; - } + - /* Parse patterns */ - ASTNode** patterns = NULL; - int pattern_count = 0; - int capacity = 5; /* Start with space for 5 patterns */ + /* Check if this is a multi-parameter pattern by looking ahead for multiple identifiers */ + bool is_multi_param = false; + int look_ahead = parser->current; + int identifier_count = 0; - patterns = malloc(capacity * sizeof(ASTNode*)); - if (patterns == NULL) { - ast_destroy_node(test); - return NULL; + /* Count consecutive identifiers or expressions before 'is' */ + while (look_ahead < parser->token_count) { + Token* token = parser->tokens[look_ahead]; + if (token->type == TOKEN_KEYWORD_IS) { + break; + } + if (token->type == TOKEN_IDENTIFIER) { + identifier_count++; + } else if (token->type == TOKEN_LPAREN) { + /* Expression in parentheses - count as one parameter */ + identifier_count++; + /* Skip to closing parenthesis */ + int paren_count = 1; + look_ahead++; + while (look_ahead < parser->token_count && paren_count > 0) { + Token* next_token = parser->tokens[look_ahead]; + if (next_token->type == TOKEN_LPAREN) { + paren_count++; + } else if (next_token->type == TOKEN_RPAREN) { + paren_count--; + } + look_ahead++; + } + /* Continue from the position after the closing parenthesis */ + continue; + } else { + /* If we hit anything other than an identifier or expression, it's not multi-parameter */ + identifier_count = 0; + break; + } + look_ahead++; } - /* Parse first pattern */ - ASTNode* pattern = parser_parse_when_pattern(parser); - if (pattern == NULL) { - free(patterns); - ast_destroy_node(test); - return NULL; + /* If we have multiple identifiers followed by 'is', it's multi-parameter */ + if (identifier_count > 1) { + is_multi_param = true; } - patterns[pattern_count++] = pattern; - - /* Parse additional patterns */ - while (!parser_is_at_end(parser)) { - /* Parse next pattern */ - ASTNode* next_pattern = parser_parse_when_pattern(parser); - if (next_pattern == NULL) { - break; /* Error parsing pattern, but continue with what we have */ - } + ASTNode* test; + if (is_multi_param) { + /* Parse as sequence of identifiers or expressions */ + ASTNode** identifiers = malloc(identifier_count * sizeof(ASTNode*)); + if (!identifiers) return NULL; - /* Expand array if needed */ - if (pattern_count >= capacity) { - capacity *= 2; - ASTNode** new_patterns = realloc(patterns, capacity * sizeof(ASTNode*)); - if (new_patterns == NULL) { - /* Cleanup and return what we have */ - for (int i = 0; i < pattern_count; i++) { - ast_destroy_node(patterns[i]); + for (int i = 0; i < identifier_count; i++) { + Token* current_token = parser_peek(parser); + if (current_token->type == TOKEN_LPAREN) { + /* Expression in parentheses - parse the expression */ + /* Parse expression but stop at 'is' token */ + identifiers[i] = parser_parse_expression(parser); + if (identifiers[i] == NULL) { + /* Cleanup on error */ + for (int j = 0; j < i; j++) { + ast_destroy_node(identifiers[j]); + } + free(identifiers); + return NULL; } - free(patterns); - ast_destroy_node(test); - return NULL; + + /* Check if we consumed the 'is' token and back up if needed */ + if (parser->current < parser->token_count && + parser->tokens[parser->current]->type == TOKEN_KEYWORD_IS) { + /* We consumed the 'is' token, need to back up */ + parser->current--; + } + } else { + /* Identifier - parse as identifier */ + Token* id_token = parser_advance(parser); + identifiers[i] = ast_identifier_node(id_token->lexeme, id_token->line, id_token->column); } - patterns = new_patterns; } - patterns[pattern_count++] = next_pattern; + /* Create a sequence node for the identifiers */ + test = ast_sequence_node(identifiers, identifier_count, when_token->line, when_token->column); + } else { + /* Parse as single expression */ + test = parser_parse_expression(parser); } - /* Create when expression node */ - return ast_when_expr_node(test, patterns, pattern_count, - when_token->line, when_token->column); + if (!test) return NULL; + Token* is_token = parser_consume(parser, TOKEN_KEYWORD_IS, "Expected 'is' after test expression"); + if (!is_token) { ast_destroy_node(test); return NULL; } + + // Prepare flat array of NODE_WHEN_PATTERN nodes + ASTNode** patterns = NULL; + int pattern_count = 0, pattern_cap = 4; + patterns = malloc(pattern_cap * sizeof(ASTNode*)); + + while (!parser_is_at_end(parser) && parser_peek(parser)->type != TOKEN_SEMICOLON) { + // Parse pattern + ASTNode* pattern = parser_parse_when_pattern(parser); + if (!pattern) break; + // Expect 'then' + Token* then_token = parser_consume(parser, TOKEN_KEYWORD_THEN, "Expected 'then' after pattern in when case"); + if (!then_token) { ast_destroy_node(pattern); break; } + // Parse result (single expression) + ASTNode* result = parser_parse_when_result_expression(parser); + if (!result) { ast_destroy_node(pattern); break; } + // Create NODE_WHEN_PATTERN node + ASTNode* case_node = ast_when_pattern_node(pattern, result, when_token->line, when_token->column); + if (pattern_count >= pattern_cap) { + pattern_cap *= 2; + patterns = realloc(patterns, pattern_cap * sizeof(ASTNode*)); + } + patterns[pattern_count++] = case_node; + // If next token is a valid pattern start, continue loop; else break + Token* next = parser_peek(parser); + if (!next || next->type == TOKEN_SEMICOLON) break; + int is_wildcard = (next->type == TOKEN_IDENTIFIER && next->lexeme && strcmp(next->lexeme, "_") == 0); + if (!(is_wildcard || next->type == TOKEN_IDENTIFIER || next->type == TOKEN_NUMBER || next->type == TOKEN_STRING)) break; + } + // Build AST node for when expression + ASTNode* when_node = ast_when_expr_node(test, patterns, pattern_count, when_token->line, when_token->column); + + return when_node; } /** @@ -1980,30 +2693,256 @@ static ASTNode* parser_parse_when_expression(Parser* parser) { * @param parser Parser instance * @return Parsed when pattern node */ -static ASTNode* parser_parse_when_pattern(Parser* parser) { - /* Parse pattern test expression */ - ASTNode* pattern_test = parser_parse_expression(parser); - if (pattern_test == NULL) { - return NULL; +// Helper: look ahead to see if the next two tokens are a pattern start followed by 'then' +static bool parser_is_next_pattern(Parser* parser) { + if (parser_is_at_end(parser)) return false; + Token* t1 = parser_peek(parser); + if (!t1) return false; + if (t1->type != TOKEN_IDENTIFIER && t1->type != TOKEN_NUMBER && t1->type != TOKEN_STRING) return false; + // Look ahead one more + if (parser->current + 1 >= parser->token_count) return false; + Token* t2 = parser->tokens[parser->current + 1]; + return t2 && t2->type == TOKEN_KEYWORD_THEN; +} + +// Parse a result expression for a when pattern, stopping at pattern boundaries +static ASTNode* parser_parse_when_result_expression(Parser* parser) { + DEBUG_TRACE("parser_parse_when_result_expression start at token %d", parser->current); + + // Show current token before parsing + Token* before_token = parser_peek(parser); + if (before_token) { + DEBUG_TRACE("Before parsing result, token type=%d, lexeme='%s'", + before_token->type, before_token->lexeme ? before_token->lexeme : "NULL"); } - /* Consume 'then' keyword */ - Token* then_token = parser_consume(parser, TOKEN_KEYWORD_THEN, "Expected 'then' after pattern"); - if (then_token == NULL) { - ast_destroy_node(pattern_test); - return NULL; + // Check if the next token is a pattern start followed by 'then' + // If so, return an empty result expression + if (parser_is_next_pattern(parser)) { + DEBUG_TRACE("Detected next pattern, returning empty result"); + return ast_literal_node(baba_yaga_value_string(""), parser_peek(parser)->line, parser_peek(parser)->column); } - /* Parse result expression */ - ASTNode* result = parser_parse_expression(parser); + // Parse a single expression using a bounded parser + // Stop when we hit a pattern boundary or statement terminator + ASTNode* result = parser_parse_primary(parser); if (result == NULL) { - ast_destroy_node(pattern_test); return NULL; } - /* Create when pattern node */ - return ast_when_pattern_node(pattern_test, result, - then_token->line, then_token->column); + // Show current token after parsing + Token* after_token = parser_peek(parser); + if (after_token) { + DEBUG_TRACE("After parsing result, token type=%d, lexeme='%s'", + after_token->type, after_token->lexeme ? after_token->lexeme : "NULL"); + } + + DEBUG_TRACE("parser_parse_when_result_expression end at token %d", parser->current); + return result; +} + +static ASTNode* parser_parse_when_pattern(Parser* parser) { + DEBUG_DEBUG("Parsing WHEN pattern at token %d", parser->current); + DEBUG_TRACE("parser_parse_when_pattern start"); + + /* Show current token */ + Token* current_token = parser_peek(parser); + if (current_token != NULL) { + DEBUG_TRACE("Current token type=%d, lexeme='%s'", current_token->type, current_token->lexeme ? current_token->lexeme : "NULL"); + } + + /* Check if this is a multi-parameter pattern by looking ahead for multiple literals */ + bool is_multi_param = false; + int look_ahead = parser->current; + int literal_count = 0; + + /* Count consecutive literals or expressions before 'then' */ + DEBUG_DEBUG("Multi-parameter detection: starting at token %d", look_ahead); + while (look_ahead < parser->token_count) { + Token* token = parser->tokens[look_ahead]; + if (token->type == TOKEN_KEYWORD_THEN) { + break; + } + if (token->type == TOKEN_IDENTIFIER || + token->type == TOKEN_NUMBER || + token->type == TOKEN_STRING || + (token->type == TOKEN_IDENTIFIER && token->lexeme && strcmp(token->lexeme, "_") == 0)) { + literal_count++; + } else if (token->type == TOKEN_LPAREN) { + /* Expression in parentheses - count as one pattern */ + DEBUG_DEBUG("Multi-parameter detection: found TOKEN_LPAREN at token %d", look_ahead); + literal_count++; + /* Skip to closing parenthesis */ + int paren_count = 1; + look_ahead++; + while (look_ahead < parser->token_count && paren_count > 0) { + Token* next_token = parser->tokens[look_ahead]; + if (next_token->type == TOKEN_LPAREN) { + paren_count++; + } else if (next_token->type == TOKEN_RPAREN) { + paren_count--; + } + look_ahead++; + } + DEBUG_DEBUG("Multi-parameter detection: finished expression, literal_count=%d, look_ahead=%d", literal_count, look_ahead); + /* Continue from the position after the closing parenthesis */ + continue; + } else if (token->type == TOKEN_OP_EQUALS || + token->type == TOKEN_OP_NOT_EQUALS || + token->type == TOKEN_OP_LESS || + token->type == TOKEN_OP_LESS_EQUAL || + token->type == TOKEN_OP_GREATER || + token->type == TOKEN_OP_GREATER_EQUAL) { + /* If we hit a comparison operator, it's not multi-parameter */ + literal_count = 0; + break; + } else { + /* If we hit anything other than a literal or expression, it's not multi-parameter */ + literal_count = 0; + break; + } + look_ahead++; + } + + /* If we have multiple literals followed by 'then', it's multi-parameter */ + DEBUG_DEBUG("Multi-parameter detection: final literal_count=%d, is_multi_param=%s", literal_count, literal_count > 1 ? "true" : "false"); + if (literal_count > 1) { + is_multi_param = true; + } + + ASTNode* pattern_test; + if (is_multi_param) { + /* Parse as sequence of literals */ + ASTNode** literals = malloc(literal_count * sizeof(ASTNode*)); + if (!literals) return NULL; + + for (int i = 0; i < literal_count; i++) { + Token* current_token = parser_peek(parser); + if (current_token->type == TOKEN_LPAREN) { + /* Expression pattern - parse the expression */ + literals[i] = parser_parse_expression(parser); + if (literals[i] == NULL) { + /* Cleanup on error */ + for (int j = 0; j < i; j++) { + ast_destroy_node(literals[j]); + } + free(literals); + return NULL; + } + } else { + /* Literal pattern */ + Token* lit_token = parser_advance(parser); + if (lit_token->type == TOKEN_IDENTIFIER && lit_token->lexeme && strcmp(lit_token->lexeme, "_") == 0) { + /* Wildcard pattern - treat as literal in multi-parameter context */ + literals[i] = ast_literal_node(baba_yaga_value_string("_"), lit_token->line, lit_token->column); + } else if (lit_token->type == TOKEN_IDENTIFIER) { + /* Identifier pattern */ + literals[i] = ast_identifier_node(lit_token->lexeme, lit_token->line, lit_token->column); + } else if (lit_token->type == TOKEN_NUMBER) { + /* Number pattern */ + literals[i] = ast_literal_node(baba_yaga_value_number(lit_token->literal.number), lit_token->line, lit_token->column); + } else if (lit_token->type == TOKEN_STRING) { + /* String pattern */ + literals[i] = ast_literal_node(baba_yaga_value_string(lit_token->lexeme), lit_token->line, lit_token->column); + } else { + /* Cleanup on error */ + for (int j = 0; j < i; j++) { + ast_destroy_node(literals[j]); + } + free(literals); + return NULL; + } + } + } + + /* Create a sequence node for the literals */ + pattern_test = ast_sequence_node(literals, literal_count, parser_peek(parser)->line, parser_peek(parser)->column); + } else if (current_token && current_token->type == TOKEN_LBRACE) { + /* Table pattern: { status: "placeholder" } */ + DEBUG_TRACE("Found table pattern"); + /* Parse as table literal */ + pattern_test = parser_parse_primary(parser); + if (pattern_test == NULL) { + DEBUG_TRACE("Failed to parse table pattern"); + return NULL; + } + DEBUG_TRACE("Successfully parsed table pattern"); + } else if (current_token && current_token->type == TOKEN_IDENTIFIER && + current_token->lexeme && strcmp(current_token->lexeme, "_") == 0) { + /* Special handling for single wildcard pattern */ + DEBUG_TRACE("Found wildcard pattern"); + /* Create a special wildcard literal */ + pattern_test = ast_literal_node(baba_yaga_value_string("_"), + current_token->line, current_token->column); + /* Consume the _ token */ + parser_advance(parser); + DEBUG_TRACE("Consumed _ token, current token type=%d, lexeme='%s'", + parser_peek(parser)->type, parser_peek(parser)->lexeme ? parser_peek(parser)->lexeme : "NULL"); + } else { + /* Parse pattern test expression - stop at 'then' */ + /* Check if this is a comparison expression by looking ahead */ + bool is_comparison = false; + int look_ahead = parser->current; + + /* Look ahead to see if there's a comparison operator */ + while (look_ahead < parser->token_count) { + Token* token = parser->tokens[look_ahead]; + if (token->type == TOKEN_KEYWORD_THEN) { + break; /* Found 'then', stop looking */ + } + if (token->type == TOKEN_OP_EQUALS || + token->type == TOKEN_OP_NOT_EQUALS || + token->type == TOKEN_OP_LESS || + token->type == TOKEN_OP_LESS_EQUAL || + token->type == TOKEN_OP_GREATER || + token->type == TOKEN_OP_GREATER_EQUAL) { + is_comparison = true; + break; + } + look_ahead++; + } + + if (is_comparison) { + /* Parse as comparison expression but stop at 'then' */ + /* Find the 'then' token position */ + int then_pos = -1; + for (int i = parser->current; i < parser->token_count; i++) { + if (parser->tokens[i]->type == TOKEN_KEYWORD_THEN) { + then_pos = i; + break; + } + } + + if (then_pos == -1) { + DEBUG_TRACE("No 'then' token found after comparison pattern"); + return NULL; + } + + /* Temporarily limit parsing to stop at 'then' */ + int original_token_count = parser->token_count; + parser->token_count = then_pos; + + /* Parse the comparison expression */ + pattern_test = parser_parse_comparison(parser); + + /* Restore parser state */ + parser->token_count = original_token_count; + } else { + /* Parse as simple expression */ + pattern_test = parser_parse_primary(parser); + } + + if (pattern_test == NULL) { + DEBUG_TRACE("Failed to parse pattern test expression"); + return NULL; + } + DEBUG_TRACE("Parsed pattern test expression"); + } + + DEBUG_TRACE("parser_parse_when_pattern success"); + + /* Create when pattern node - only the pattern test, result will be added by caller */ + return pattern_test; } /* Helper function to get node type name */ |