about summary refs log tree commit diff stats
path: root/js/scripting-lang/baba-yaga-c/src/lexer.c
diff options
context:
space:
mode:
Diffstat (limited to 'js/scripting-lang/baba-yaga-c/src/lexer.c')
-rw-r--r--js/scripting-lang/baba-yaga-c/src/lexer.c820
1 files changed, 820 insertions, 0 deletions
diff --git a/js/scripting-lang/baba-yaga-c/src/lexer.c b/js/scripting-lang/baba-yaga-c/src/lexer.c
new file mode 100644
index 0000000..cc15047
--- /dev/null
+++ b/js/scripting-lang/baba-yaga-c/src/lexer.c
@@ -0,0 +1,820 @@
+/**
+ * @file lexer.c
+ * @brief Lexer implementation for Baba Yaga
+ * @author eli_oat
+ * @version 0.0.1
+ * @date 2025
+ * 
+ * This file implements the lexical analyzer for the Baba Yaga language.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <math.h>
+
+#include "baba_yaga.h"
+
+/* ============================================================================
+ * Token Types
+ * ============================================================================ */
+
+typedef enum {
+    /* End of file */
+    TOKEN_EOF,
+    
+    /* Literals */
+    TOKEN_NUMBER,
+    TOKEN_STRING,
+    TOKEN_BOOLEAN,
+    
+    /* Identifiers and keywords */
+    TOKEN_IDENTIFIER,
+    TOKEN_KEYWORD_WHEN,
+    TOKEN_KEYWORD_IS,
+    TOKEN_KEYWORD_THEN,
+    TOKEN_KEYWORD_AND,
+    TOKEN_KEYWORD_OR,
+    TOKEN_KEYWORD_XOR,
+    TOKEN_KEYWORD_NOT,
+    TOKEN_KEYWORD_VIA,
+    
+    /* Operators */
+    TOKEN_OP_PLUS,
+    TOKEN_OP_MINUS,
+    TOKEN_OP_UNARY_MINUS,
+    TOKEN_OP_MULTIPLY,
+    TOKEN_OP_DIVIDE,
+    TOKEN_OP_MODULO,
+    TOKEN_OP_POWER,
+    TOKEN_OP_EQUALS,
+    TOKEN_OP_NOT_EQUALS,
+    TOKEN_OP_LESS,
+    TOKEN_OP_LESS_EQUAL,
+    TOKEN_OP_GREATER,
+    TOKEN_OP_GREATER_EQUAL,
+    
+    /* Punctuation */
+    TOKEN_LPAREN,
+    TOKEN_RPAREN,
+    TOKEN_LBRACE,
+    TOKEN_RBRACE,
+    TOKEN_LBRACKET,
+    TOKEN_RBRACKET,
+    TOKEN_COMMA,
+    TOKEN_COLON,
+    TOKEN_SEMICOLON,
+    TOKEN_ARROW,
+    TOKEN_DOT,
+    
+    /* Special tokens */
+    TOKEN_FUNCTION_REF,  /* @function */
+    TOKEN_IO_IN,         /* ..in */
+    TOKEN_IO_OUT,        /* ..out */
+    TOKEN_IO_ASSERT,     /* ..assert */
+    
+    /* Comments */
+    TOKEN_COMMENT
+} TokenType;
+
+/* ============================================================================
+ * Token Structure
+ * ============================================================================ */
+
+typedef struct {
+    TokenType type;
+    char* lexeme;
+    int line;
+    int column;
+    union {
+        double number;
+        bool boolean;
+    } literal;
+} Token;
+
+/* ============================================================================
+ * Lexer Structure
+ * ============================================================================ */
+
+typedef struct {
+    const char* source;
+    size_t source_len;
+    size_t position;
+    int line;
+    int column;
+    Token current_token;
+    bool has_error;
+    char* error_message;
+} Lexer;
+
+/* ============================================================================
+ * Token Helper Functions
+ * ============================================================================ */
+
+/**
+ * @brief Create a simple token
+ * 
+ * @param type Token type
+ * @param lexeme Token lexeme
+ * @param line Line number
+ * @param column Column number
+ * @return New token
+ */
+static Token token_create(TokenType type, const char* lexeme, int line, int column) {
+    Token token;
+    token.type = type;
+    token.lexeme = lexeme != NULL ? strdup(lexeme) : NULL;
+    token.line = line;
+    token.column = column;
+    token.literal.number = 0.0; /* Initialize union */
+    return token;
+}
+
+/* ============================================================================
+ * Lexer Functions
+ * ============================================================================ */
+
+/**
+ * @brief Create a new lexer
+ * 
+ * @param source Source code to tokenize
+ * @param source_len Length of source code
+ * @return New lexer instance, or NULL on failure
+ */
+static Lexer* lexer_create(const char* source, size_t source_len) {
+    Lexer* lexer = malloc(sizeof(Lexer));
+    if (lexer == NULL) {
+        return NULL;
+    }
+    
+    lexer->source = source;
+    lexer->source_len = source_len;
+    lexer->position = 0;
+    lexer->line = 1;
+    lexer->column = 1;
+    lexer->has_error = false;
+    lexer->error_message = NULL;
+    
+    /* Initialize current token */
+    lexer->current_token.type = TOKEN_EOF;
+    lexer->current_token.lexeme = NULL;
+    lexer->current_token.line = 1;
+    lexer->current_token.column = 1;
+    
+    return lexer;
+}
+
+/**
+ * @brief Destroy a lexer
+ * 
+ * @param lexer Lexer to destroy
+ */
+static void lexer_destroy(Lexer* lexer) {
+    if (lexer == NULL) {
+        return;
+    }
+    
+    if (lexer->current_token.lexeme != NULL) {
+        free(lexer->current_token.lexeme);
+    }
+    
+    if (lexer->error_message != NULL) {
+        free(lexer->error_message);
+    }
+    
+    free(lexer);
+}
+
+/**
+ * @brief Set lexer error
+ * 
+ * @param lexer Lexer instance
+ * @param message Error message
+ */
+static void lexer_set_error(Lexer* lexer, const char* message) {
+    if (lexer == NULL) {
+        return;
+    }
+    
+    lexer->has_error = true;
+    if (lexer->error_message != NULL) {
+        free(lexer->error_message);
+    }
+    lexer->error_message = strdup(message);
+}
+
+/**
+ * @brief Check if we're at the end of input
+ * 
+ * @param lexer Lexer instance
+ * @return true if at end, false otherwise
+ */
+static bool lexer_is_at_end(const Lexer* lexer) {
+    return lexer->position >= lexer->source_len;
+}
+
+/**
+ * @brief Peek at current character
+ * 
+ * @param lexer Lexer instance
+ * @return Current character, or '\0' if at end
+ */
+static char lexer_peek(const Lexer* lexer) {
+    if (lexer_is_at_end(lexer)) {
+        return '\0';
+    }
+    return lexer->source[lexer->position];
+}
+
+/**
+ * @brief Peek at next character
+ * 
+ * @param lexer Lexer instance
+ * @return Next character, or '\0' if at end
+ */
+static char lexer_peek_next(const Lexer* lexer) {
+    if (lexer->position + 1 >= lexer->source_len) {
+        return '\0';
+    }
+    return lexer->source[lexer->position + 1];
+}
+
+/**
+ * @brief Advance to next character
+ * 
+ * @param lexer Lexer instance
+ * @return Character that was advanced over
+ */
+static char lexer_advance(Lexer* lexer) {
+    if (lexer_is_at_end(lexer)) {
+        return '\0';
+    }
+    
+    char c = lexer->source[lexer->position];
+    lexer->position++;
+    lexer->column++;
+    
+    if (c == '\n') {
+        lexer->line++;
+        lexer->column = 1;
+    }
+    
+    return c;
+}
+
+/**
+ * @brief Match current character and advance if it matches
+ * 
+ * @param lexer Lexer instance
+ * @param expected Expected character
+ * @return true if matched, false otherwise
+ */
+static bool lexer_match(Lexer* lexer, char expected) {
+    if (lexer_is_at_end(lexer)) {
+        return false;
+    }
+    
+    if (lexer->source[lexer->position] != expected) {
+        return false;
+    }
+    
+    lexer_advance(lexer);
+    return true;
+}
+
+/**
+ * @brief Skip whitespace
+ * 
+ * @param lexer Lexer instance
+ */
+static void lexer_skip_whitespace(Lexer* lexer) {
+    while (!lexer_is_at_end(lexer) && isspace(lexer_peek(lexer))) {
+        lexer_advance(lexer);
+    }
+}
+
+/**
+ * @brief Skip comments
+ * 
+ * @param lexer Lexer instance
+ */
+static void lexer_skip_comments(Lexer* lexer) {
+    if (lexer_peek(lexer) == '/' && lexer_peek_next(lexer) == '/') {
+        /* Single line comment */
+        while (!lexer_is_at_end(lexer) && lexer_peek(lexer) != '\n') {
+            lexer_advance(lexer);
+        }
+    } else if (lexer_peek(lexer) == '/' && lexer_peek_next(lexer) == '*') {
+        /* Multi-line comment */
+        lexer_advance(lexer); /* consume '/' */
+        lexer_advance(lexer); /* consume '*' */
+        
+        while (!lexer_is_at_end(lexer)) {
+            if (lexer_peek(lexer) == '*' && lexer_peek_next(lexer) == '/') {
+                lexer_advance(lexer); /* consume '*' */
+                lexer_advance(lexer); /* consume '/' */
+                break;
+            }
+            lexer_advance(lexer);
+        }
+    }
+}
+
+/**
+ * @brief Read a number literal
+ * 
+ * @param lexer Lexer instance
+ * @return Token with number literal
+ */
+static Token lexer_read_number(Lexer* lexer) {
+    Token token;
+    token.type = TOKEN_NUMBER;
+    token.line = lexer->line;
+    token.column = lexer->column;
+    
+    /* Read integer part */
+    while (!lexer_is_at_end(lexer) && isdigit(lexer_peek(lexer))) {
+        lexer_advance(lexer);
+    }
+    
+    /* Read decimal part */
+    if (!lexer_is_at_end(lexer) && lexer_peek(lexer) == '.' && 
+        isdigit(lexer_peek_next(lexer))) {
+        lexer_advance(lexer); /* consume '.' */
+        
+        while (!lexer_is_at_end(lexer) && isdigit(lexer_peek(lexer))) {
+            lexer_advance(lexer);
+        }
+    }
+    
+    /* Read exponent part */
+    if (!lexer_is_at_end(lexer) && (lexer_peek(lexer) == 'e' || lexer_peek(lexer) == 'E')) {
+        lexer_advance(lexer); /* consume 'e' or 'E' */
+        
+        if (!lexer_is_at_end(lexer) && (lexer_peek(lexer) == '+' || lexer_peek(lexer) == '-')) {
+            lexer_advance(lexer); /* consume sign */
+        }
+        
+        while (!lexer_is_at_end(lexer) && isdigit(lexer_peek(lexer))) {
+            lexer_advance(lexer);
+        }
+    }
+    
+    /* Extract lexeme and convert to number */
+    size_t start = lexer->position - (lexer->column - token.column);
+    size_t length = lexer->position - start;
+    
+    token.lexeme = malloc(length + 1);
+    if (token.lexeme == NULL) {
+        lexer_set_error(lexer, "Memory allocation failed");
+        token.type = TOKEN_EOF;
+        return token;
+    }
+    
+    strncpy(token.lexeme, lexer->source + start, length);
+    token.lexeme[length] = '\0';
+    
+    token.literal.number = atof(token.lexeme);
+    
+    return token;
+}
+
+/**
+ * @brief Read a string literal
+ * 
+ * @param lexer Lexer instance
+ * @return Token with string literal
+ */
+static Token lexer_read_string(Lexer* lexer) {
+    Token token;
+    token.type = TOKEN_STRING;
+    token.line = lexer->line;
+    token.column = lexer->column;
+    
+    lexer_advance(lexer); /* consume opening quote */
+    
+    size_t start = lexer->position;
+    size_t length = 0;
+    
+    while (!lexer_is_at_end(lexer) && lexer_peek(lexer) != '"') {
+        if (lexer_peek(lexer) == '\\' && !lexer_is_at_end(lexer)) {
+            lexer_advance(lexer); /* consume backslash */
+            if (!lexer_is_at_end(lexer)) {
+                lexer_advance(lexer); /* consume escaped character */
+            }
+        } else {
+            lexer_advance(lexer);
+        }
+        length++;
+    }
+    
+    if (lexer_is_at_end(lexer)) {
+        lexer_set_error(lexer, "Unterminated string literal");
+        token.type = TOKEN_EOF;
+        return token;
+    }
+    
+    lexer_advance(lexer); /* consume closing quote */
+    
+    /* Extract lexeme */
+    token.lexeme = malloc(length + 1);
+    if (token.lexeme == NULL) {
+        lexer_set_error(lexer, "Memory allocation failed");
+        token.type = TOKEN_EOF;
+        return token;
+    }
+    
+    strncpy(token.lexeme, lexer->source + start, length);
+    token.lexeme[length] = '\0';
+    
+    return token;
+}
+
+/**
+ * @brief Read an identifier or keyword
+ * 
+ * @param lexer Lexer instance
+ * @return Token with identifier or keyword
+ */
+static Token lexer_read_identifier(Lexer* lexer) {
+    Token token;
+    token.line = lexer->line;
+    token.column = lexer->column;
+    
+    size_t start = lexer->position;
+    size_t length = 0;
+    
+    while (!lexer_is_at_end(lexer) && 
+           (isalnum(lexer_peek(lexer)) || lexer_peek(lexer) == '_')) {
+        lexer_advance(lexer);
+        length++;
+    }
+    
+    /* Extract lexeme */
+    token.lexeme = malloc(length + 1);
+    if (token.lexeme == NULL) {
+        lexer_set_error(lexer, "Memory allocation failed");
+        token.type = TOKEN_EOF;
+        return token;
+    }
+    
+    strncpy(token.lexeme, lexer->source + start, length);
+    token.lexeme[length] = '\0';
+    
+    /* Check if it's a keyword */
+    if (strcmp(token.lexeme, "when") == 0) {
+        token.type = TOKEN_KEYWORD_WHEN;
+    } else if (strcmp(token.lexeme, "is") == 0) {
+        token.type = TOKEN_KEYWORD_IS;
+    } else if (strcmp(token.lexeme, "and") == 0) {
+        token.type = TOKEN_KEYWORD_AND;
+    } else if (strcmp(token.lexeme, "or") == 0) {
+        token.type = TOKEN_KEYWORD_OR;
+    } else if (strcmp(token.lexeme, "xor") == 0) {
+        token.type = TOKEN_KEYWORD_XOR;
+    } else if (strcmp(token.lexeme, "then") == 0) {
+        token.type = TOKEN_KEYWORD_THEN;
+    } else if (strcmp(token.lexeme, "not") == 0) {
+        token.type = TOKEN_KEYWORD_NOT;
+    } else if (strcmp(token.lexeme, "via") == 0) {
+        token.type = TOKEN_KEYWORD_VIA;
+    } else if (strcmp(token.lexeme, "true") == 0) {
+        token.type = TOKEN_BOOLEAN;
+        token.literal.boolean = true;
+    } else if (strcmp(token.lexeme, "false") == 0) {
+        token.type = TOKEN_BOOLEAN;
+        token.literal.boolean = false;
+    } else {
+        token.type = TOKEN_IDENTIFIER;
+    }
+    
+    return token;
+}
+
+/**
+ * @brief Read a special token (function reference, IO operations)
+ * 
+ * @param lexer Lexer instance
+ * @return Token with special type
+ */
+static Token lexer_read_special(Lexer* lexer) {
+    Token token;
+    token.line = lexer->line;
+    token.column = lexer->column;
+    
+    if (lexer_peek(lexer) == '@') {
+        /* Function reference */
+        lexer_advance(lexer); /* consume '@' */
+        
+        /* Check if this is @(expression) syntax */
+        if (!lexer_is_at_end(lexer) && lexer_peek(lexer) == '(') {
+            /* Just return the @ token for @(expression) syntax */
+            token.type = TOKEN_FUNCTION_REF;
+            token.lexeme = malloc(2); /* +1 for '@' and '\0' */
+            if (token.lexeme == NULL) {
+                lexer_set_error(lexer, "Memory allocation failed");
+                token.type = TOKEN_EOF;
+                return token;
+            }
+            token.lexeme[0] = '@';
+            token.lexeme[1] = '\0';
+        } else {
+            /* Handle @function_name syntax */
+            size_t start = lexer->position;
+            size_t length = 0;
+            
+            while (!lexer_is_at_end(lexer) && 
+                   (isalnum(lexer_peek(lexer)) || lexer_peek(lexer) == '_')) {
+                lexer_advance(lexer);
+                length++;
+            }
+            
+            if (length == 0) {
+                lexer_set_error(lexer, "Invalid function reference");
+                token.type = TOKEN_EOF;
+                return token;
+            }
+            
+            token.type = TOKEN_FUNCTION_REF;
+            token.lexeme = malloc(length + 2); /* +2 for '@' and '\0' */
+            if (token.lexeme == NULL) {
+                lexer_set_error(lexer, "Memory allocation failed");
+                token.type = TOKEN_EOF;
+                return token;
+            }
+            
+            token.lexeme[0] = '@';
+            strncpy(token.lexeme + 1, lexer->source + start, length);
+            token.lexeme[length + 1] = '\0';
+        }
+        
+    } else if (lexer_peek(lexer) == '.' && lexer_peek_next(lexer) == '.') {
+        /* IO operation */
+        lexer_advance(lexer); /* consume first '.' */
+        lexer_advance(lexer); /* consume second '.' */
+        
+        size_t start = lexer->position;
+        size_t length = 0;
+        
+        while (!lexer_is_at_end(lexer) && 
+               (isalpha(lexer_peek(lexer)) || lexer_peek(lexer) == '_')) {
+            lexer_advance(lexer);
+            length++;
+        }
+        
+        if (length == 0) {
+            lexer_set_error(lexer, "Invalid IO operation");
+            token.type = TOKEN_EOF;
+            return token;
+        }
+        
+        token.lexeme = malloc(length + 3); /* +3 for '..', operation, and '\0' */
+        if (token.lexeme == NULL) {
+            lexer_set_error(lexer, "Memory allocation failed");
+            token.type = TOKEN_EOF;
+            return token;
+        }
+        
+        token.lexeme[0] = '.';
+        token.lexeme[1] = '.';
+        strncpy(token.lexeme + 2, lexer->source + start, length);
+        token.lexeme[length + 2] = '\0';
+        
+        /* Determine IO operation type */
+        if (strcmp(token.lexeme, "..in") == 0) {
+            token.type = TOKEN_IO_IN;
+        } else if (strcmp(token.lexeme, "..out") == 0) {
+            token.type = TOKEN_IO_OUT;
+        } else if (strcmp(token.lexeme, "..assert") == 0) {
+            token.type = TOKEN_IO_ASSERT;
+        } else {
+            lexer_set_error(lexer, "Unknown IO operation");
+            token.type = TOKEN_EOF;
+            free(token.lexeme);
+            return token;
+        }
+    }
+    
+    return token;
+}
+
+/**
+ * @brief Read the next token
+ * 
+ * @param lexer Lexer instance
+ * @return Next token
+ */
+static Token lexer_next_token(Lexer* lexer) {
+    /* Skip whitespace and comments */
+    while (!lexer_is_at_end(lexer)) {
+        lexer_skip_whitespace(lexer);
+        lexer_skip_comments(lexer);
+        
+        /* Check if we still have whitespace after comments */
+        if (!lexer_is_at_end(lexer) && isspace(lexer_peek(lexer))) {
+            continue;
+        }
+        break;
+    }
+    
+    if (lexer_is_at_end(lexer)) {
+        Token token;
+        token.type = TOKEN_EOF;
+        token.lexeme = NULL;
+        token.line = lexer->line;
+        token.column = lexer->column;
+        return token;
+    }
+    
+    char c = lexer_peek(lexer);
+    
+    /* Numbers */
+    if (isdigit(c)) {
+        return lexer_read_number(lexer);
+    }
+    
+    /* Strings */
+    if (c == '"') {
+        return lexer_read_string(lexer);
+    }
+    
+    /* Special tokens */
+    if (c == '@' || (c == '.' && lexer_peek_next(lexer) == '.')) {
+        return lexer_read_special(lexer);
+    }
+    
+    /* Identifiers and keywords */
+    if (isalpha(c) || c == '_') {
+        return lexer_read_identifier(lexer);
+    }
+    
+    /* Single character tokens */
+    switch (c) {
+    case '(':
+        lexer_advance(lexer);
+        return token_create(TOKEN_LPAREN, "(", lexer->line, lexer->column - 1);
+    case ')':
+        lexer_advance(lexer);
+        return token_create(TOKEN_RPAREN, ")", lexer->line, lexer->column - 1);
+    case '{':
+        lexer_advance(lexer);
+        return token_create(TOKEN_LBRACE, "{", lexer->line, lexer->column - 1);
+    case '}':
+        lexer_advance(lexer);
+        return token_create(TOKEN_RBRACE, "}", lexer->line, lexer->column - 1);
+    case '[':
+        lexer_advance(lexer);
+        return token_create(TOKEN_LBRACKET, "[", lexer->line, lexer->column - 1);
+    case ']':
+        lexer_advance(lexer);
+        return token_create(TOKEN_RBRACKET, "]", lexer->line, lexer->column - 1);
+    case ',':
+        lexer_advance(lexer);
+        return token_create(TOKEN_COMMA, ",", lexer->line, lexer->column - 1);
+    case ':':
+        lexer_advance(lexer);
+        return token_create(TOKEN_COLON, ":", lexer->line, lexer->column - 1);
+    case ';':
+        lexer_advance(lexer);
+        return token_create(TOKEN_SEMICOLON, ";", lexer->line, lexer->column - 1);
+    case '.':
+        lexer_advance(lexer);
+        return token_create(TOKEN_DOT, ".", lexer->line, lexer->column - 1);
+    case '-':
+        lexer_advance(lexer);
+        if (lexer_match(lexer, '>')) {
+            return token_create(TOKEN_ARROW, "->", lexer->line, lexer->column - 2);
+        }
+        /* For now, always treat minus as binary operator */
+        /* TODO: Implement proper unary vs binary minus detection */
+        return token_create(TOKEN_OP_MINUS, "-", lexer->line, lexer->column - 1);
+    case '+':
+        lexer_advance(lexer);
+        return token_create(TOKEN_OP_PLUS, "+", lexer->line, lexer->column - 1);
+    case '*':
+        lexer_advance(lexer);
+        return token_create(TOKEN_OP_MULTIPLY, "*", lexer->line, lexer->column - 1);
+    case '/':
+        lexer_advance(lexer);
+        return token_create(TOKEN_OP_DIVIDE, "/", lexer->line, lexer->column - 1);
+    case '%':
+        lexer_advance(lexer);
+        return token_create(TOKEN_OP_MODULO, "%", lexer->line, lexer->column - 1);
+    case '^':
+        lexer_advance(lexer);
+        return token_create(TOKEN_OP_POWER, "^", lexer->line, lexer->column - 1);
+    case '=':
+        lexer_advance(lexer);
+        if (lexer_match(lexer, '=')) {
+            return token_create(TOKEN_OP_EQUALS, "==", lexer->line, lexer->column - 2);
+        }
+        return token_create(TOKEN_OP_EQUALS, "=", lexer->line, lexer->column - 1);
+    case '!':
+        lexer_advance(lexer);
+        if (lexer_match(lexer, '=')) {
+            return token_create(TOKEN_OP_NOT_EQUALS, "!=", lexer->line, lexer->column - 2);
+        }
+        break;
+    case '<':
+        lexer_advance(lexer);
+        if (lexer_match(lexer, '=')) {
+            return token_create(TOKEN_OP_LESS_EQUAL, "<=", lexer->line, lexer->column - 2);
+        }
+        return token_create(TOKEN_OP_LESS, "<", lexer->line, lexer->column - 1);
+    case '>':
+        lexer_advance(lexer);
+        if (lexer_match(lexer, '=')) {
+            return token_create(TOKEN_OP_GREATER_EQUAL, ">=", lexer->line, lexer->column - 2);
+        }
+        return token_create(TOKEN_OP_GREATER, ">", lexer->line, lexer->column - 1);
+    }
+    
+    /* Unknown character */
+    char error_msg[64];
+    snprintf(error_msg, sizeof(error_msg), "Unexpected character: '%c'", c);
+    lexer_set_error(lexer, error_msg);
+    
+    Token token;
+    token.type = TOKEN_EOF;
+    token.lexeme = NULL;
+    token.line = lexer->line;
+    token.column = lexer->column;
+    return token;
+}
+
+/* ============================================================================
+ * Public Lexer API
+ * ============================================================================ */
+
+/**
+ * @brief Tokenize source code
+ * 
+ * @param source Source code to tokenize
+ * @param source_len Length of source code
+ * @param tokens Output array for tokens
+ * @param max_tokens Maximum number of tokens to read
+ * @return Number of tokens read, or -1 on error
+ */
+int baba_yaga_tokenize(const char* source, size_t source_len, 
+                      void** tokens, size_t max_tokens) {
+    if (source == NULL || tokens == NULL) {
+        return -1;
+    }
+    
+    Lexer* lexer = lexer_create(source, source_len);
+    if (lexer == NULL) {
+        return -1;
+    }
+    
+    size_t token_count = 0;
+    
+    while (token_count < max_tokens) {
+        Token token = lexer_next_token(lexer);
+        
+        if (lexer->has_error) {
+            lexer_destroy(lexer);
+            return -1;
+        }
+        
+        if (token.type == TOKEN_EOF) {
+            break;
+        }
+        
+        /* Allocate token and copy data */
+        Token* token_ptr = malloc(sizeof(Token));
+        if (token_ptr == NULL) {
+            lexer_destroy(lexer);
+            return -1;
+        }
+        
+        *token_ptr = token;
+        tokens[token_count] = token_ptr;
+        token_count++;
+    }
+    
+    lexer_destroy(lexer);
+    return (int)token_count;
+}
+
+/**
+ * @brief Free tokens
+ * 
+ * @param tokens Array of tokens
+ * @param count Number of tokens
+ */
+void baba_yaga_free_tokens(void** tokens, size_t count) {
+    if (tokens == NULL) {
+        return;
+    }
+    
+    for (size_t i = 0; i < count; i++) {
+        if (tokens[i] != NULL) {
+            Token* token = (Token*)tokens[i];
+            if (token->lexeme != NULL) {
+                free(token->lexeme);
+            }
+            free(token);
+        }
+    }
+}