diff options
Diffstat (limited to 'js/scripting-lang/docs/baba-yaga/0.0.1/lexer.js.html')
-rw-r--r-- | js/scripting-lang/docs/baba-yaga/0.0.1/lexer.js.html | 591 |
1 files changed, 0 insertions, 591 deletions
diff --git a/js/scripting-lang/docs/baba-yaga/0.0.1/lexer.js.html b/js/scripting-lang/docs/baba-yaga/0.0.1/lexer.js.html deleted file mode 100644 index 1ebd7a1..0000000 --- a/js/scripting-lang/docs/baba-yaga/0.0.1/lexer.js.html +++ /dev/null @@ -1,591 +0,0 @@ -<!DOCTYPE html> -<html lang="en"> -<head> - <meta charset="utf-8"> - <meta name="viewport" content="width=device-width,initial-scale=1"> - <title>lexer.js - Documentation</title> - - <script src="scripts/prettify/prettify.js"></script> - <script src="scripts/prettify/lang-css.js"></script> - <!--[if lt IE 9]> - <script src="//html5shiv.googlecode.com/svn/trunk/html5.js"></script> - <![endif]--> - <link type="text/css" rel="stylesheet" href="https://code.ionicframework.com/ionicons/2.0.1/css/ionicons.min.css"> - <link type="text/css" rel="stylesheet" href="styles/prettify-tomorrow.css"> - <link type="text/css" rel="stylesheet" href="styles/jsdoc-default.css"> -</head> -<body> - -<input type="checkbox" id="nav-trigger" class="nav-trigger" /> -<label for="nav-trigger" class="navicon-button x"> - <div class="navicon"></div> -</label> - -<label for="nav-trigger" class="overlay"></label> - -<nav> - <li class="nav-link nav-home-link"><a href="index.html">Home</a></li><li class="nav-heading">Tutorials</li><li class="nav-item"><a href="tutorial-00_Introduction.html">00_Introduction</a></li><li class="nav-item"><a href="tutorial-01_Function_Calls.html">01_Function_Calls</a></li><li class="nav-item"><a href="tutorial-02_Function_Composition.html">02_Function_Composition</a></li><li class="nav-item"><a href="tutorial-03_Table_Operations.html">03_Table_Operations</a></li><li class="nav-item"><a href="tutorial-04_Currying.html">04_Currying</a></li><li class="nav-item"><a href="tutorial-05_Pattern_Matching.html">05_Pattern_Matching</a></li><li class="nav-item"><a href="tutorial-06_Immutable_Tables.html">06_Immutable_Tables</a></li><li class="nav-item"><a href="tutorial-07_Function_References.html">07_Function_References</a></li><li class="nav-item"><a href="tutorial-08_Combinators.html">08_Combinators</a></li><li class="nav-item"><a href="tutorial-09_Expression_Based.html">09_Expression_Based</a></li><li class="nav-item"><a href="tutorial-10_Tables_Deep_Dive.html">10_Tables_Deep_Dive</a></li><li class="nav-item"><a href="tutorial-11_Standard_Library.html">11_Standard_Library</a></li><li class="nav-item"><a href="tutorial-12_IO_Operations.html">12_IO_Operations</a></li><li class="nav-item"><a href="tutorial-13_Error_Handling.html">13_Error_Handling</a></li><li class="nav-item"><a href="tutorial-14_Advanced_Combinators.html">14_Advanced_Combinators</a></li><li class="nav-item"><a href="tutorial-15_Integration_Patterns.html">15_Integration_Patterns</a></li><li class="nav-item"><a href="tutorial-16_Best_Practices.html">16_Best_Practices</a></li><li class="nav-item"><a href="tutorial-README.html">README</a></li><li class="nav-heading"><a href="global.html">Globals</a></li><li class="nav-item"><span class="nav-item-type type-member">M</span><span class="nav-item-name"><a href="global.html#callStackTracker">callStackTracker</a></span></li><li class="nav-item"><span class="nav-item-type type-function">F</span><span class="nav-item-name"><a href="global.html#debugError">debugError</a></span></li><li class="nav-item"><span class="nav-item-type type-function">F</span><span class="nav-item-name"><a href="global.html#debugLog">debugLog</a></span></li><li class="nav-item"><span class="nav-item-type type-function">F</span><span class="nav-item-name"><a href="global.html#executeFile">executeFile</a></span></li><li class="nav-item"><span class="nav-item-type type-function">F</span><span class="nav-item-name"><a href="global.html#initializeStandardLibrary">initializeStandardLibrary</a></span></li><li class="nav-item"><span class="nav-item-type type-function">F</span><span class="nav-item-name"><a href="global.html#interpreter">interpreter</a></span></li><li class="nav-item"><span class="nav-item-type type-function">F</span><span class="nav-item-name"><a href="global.html#lexer">lexer</a></span></li><li class="nav-item"><span class="nav-item-type type-function">F</span><span class="nav-item-name"><a href="global.html#main">main</a></span></li><li class="nav-item"><span class="nav-item-type type-function">F</span><span class="nav-item-name"><a href="global.html#parser">parser</a></span></li><li class="nav-item"><span class="nav-item-type type-function">F</span><span class="nav-item-name"><a href="global.html#readFile">readFile</a></span></li><li class="nav-item"><span class="nav-item-type type-function">F</span><span class="nav-item-name"><a href="global.html#run">run</a></span></li> -</nav> - -<div id="main"> - - <h1 class="page-title">lexer.js</h1> - - - - - - - - <section> - <article> - <pre class="prettyprint source linenums"><code>// Lexer for the scripting language -// Supports both Node.js and browser environments - -/** - * Token types for the language - * - * @description Defines all token types used by the lexer and parser. - * Each token type represents a distinct syntactic element in the language. - * - * The token types are organized into categories: - * - Literals: NUMBER, STRING, TRUE, FALSE - * - Operators: PLUS, MINUS, MULTIPLY, DIVIDE, MODULO, POWER, etc. - * - Keywords: WHEN, IS, THEN, FUNCTION, etc. - * - Punctuation: LEFT_PAREN, RIGHT_PAREN, SEMICOLON, COMMA, etc. - * - Special: IO_IN, IO_OUT, IO_ASSERT, IO_LISTEN, IO_EMIT, FUNCTION_REF, FUNCTION_ARG - * - * This enumeration provides a centralized definition of all possible - * token types, ensuring consistency between lexer and parser. The token - * types are designed to support the combinator-based architecture where - * all operations are translated to function calls. - * - * @typedef {Object} TokenType - * @property {string} NUMBER - Numeric literals (integers and floats) - * @property {string} PLUS - Addition operator (+) - * @property {string} MINUS - Subtraction operator (-) - * @property {string} MULTIPLY - Multiplication operator (*) - * @property {string} DIVIDE - Division operator (/) - * @property {string} IDENTIFIER - Variable names and function names - * @property {string} ASSIGNMENT - Assignment operator (:) - * @property {string} ARROW - Function arrow (->) - * @property {string} CASE - Case keyword - * @property {string} OF - Of keyword - * @property {string} WHEN - When keyword for pattern matching - * @property {string} IS - Is keyword for pattern matching - * @property {string} THEN - Then keyword for pattern matching - * @property {string} WILDCARD - Wildcard pattern (_) - * @property {string} FUNCTION - Function keyword - * @property {string} LEFT_PAREN - Left parenthesis (() - * @property {string} RIGHT_PAREN - Right parenthesis ()) - * @property {string} LEFT_BRACE - Left brace ({) - * @property {string} RIGHT_BRACE - Right brace (}) - * @property {string} LEFT_BRACKET - Left bracket ([) - * @property {string} RIGHT_BRACKET - Right bracket (]) - * @property {string} SEMICOLON - Semicolon (;) - * @property {string} COMMA - Comma (,) - * @property {string} DOT - Dot (.) - * @property {string} STRING - String literals - * @property {string} TRUE - Boolean true literal - * @property {string} FALSE - Boolean false literal - * @property {string} AND - Logical AND operator - * @property {string} OR - Logical OR operator - * @property {string} XOR - Logical XOR operator - * @property {string} NOT - Logical NOT operator - * @property {string} EQUALS - Equality operator (==) - * @property {string} LESS_THAN - Less than operator (<) - * @property {string} GREATER_THAN - Greater than operator (>) - * @property {string} LESS_EQUAL - Less than or equal operator (<=) - * @property {string} GREATER_EQUAL - Greater than or equal operator (>=) - * @property {string} NOT_EQUAL - Not equal operator (!=) - * @property {string} MODULO - Modulo operator (%) - * @property {string} POWER - Power operator (^) - * @property {string} IO_IN - Input operation (..in) - * @property {string} IO_OUT - Output operation (..out) - * @property {string} IO_ASSERT - Assertion operation (..assert) - * @property {string} IO_LISTEN - Listen operation (..listen) - * @property {string} IO_EMIT - Emit operation (..emit) - * @property {string} FUNCTION_REF - Function reference (@function) - * @property {string} FUNCTION_ARG - Function argument (@(expression)) - * @property {string} COMPOSE - Function composition (via) - */ -export const TokenType = { - NUMBER: 'NUMBER', - PLUS: 'PLUS', - MINUS: 'MINUS', - UNARY_MINUS: 'UNARY_MINUS', - BINARY_MINUS: 'BINARY_MINUS', - MULTIPLY: 'MULTIPLY', - DIVIDE: 'DIVIDE', - IDENTIFIER: 'IDENTIFIER', - ASSIGNMENT: 'ASSIGNMENT', - ARROW: 'ARROW', - CASE: 'CASE', - OF: 'OF', - WHEN: 'WHEN', - IS: 'IS', - THEN: 'THEN', - WILDCARD: 'WILDCARD', - FUNCTION: 'FUNCTION', - LEFT_PAREN: 'LEFT_PAREN', - RIGHT_PAREN: 'RIGHT_PAREN', - LEFT_BRACE: 'LEFT_BRACE', - RIGHT_BRACE: 'RIGHT_BRACE', - LEFT_BRACKET: 'LEFT_BRACKET', - RIGHT_BRACKET: 'RIGHT_BRACKET', - SEMICOLON: 'SEMICOLON', - COMMA: 'COMMA', - DOT: 'DOT', - STRING: 'STRING', - TRUE: 'TRUE', - FALSE: 'FALSE', - AND: 'AND', - OR: 'OR', - XOR: 'XOR', - NOT: 'NOT', - EQUALS: 'EQUALS', - LESS_THAN: 'LESS_THAN', - GREATER_THAN: 'GREATER_THAN', - LESS_EQUAL: 'LESS_EQUAL', - GREATER_EQUAL: 'GREATER_EQUAL', - NOT_EQUAL: 'NOT_EQUAL', - MODULO: 'MODULO', - POWER: 'POWER', - IO_IN: 'IO_IN', - IO_OUT: 'IO_OUT', - IO_ASSERT: 'IO_ASSERT', - IO_LISTEN: 'IO_LISTEN', - IO_EMIT: 'IO_EMIT', - FUNCTION_REF: 'FUNCTION_REF', - FUNCTION_ARG: 'FUNCTION_ARG', - COMPOSE: 'COMPOSE' -}; - -/** - * Token object structure - * - * @typedef {Object} Token - * @property {string} type - The token type from TokenType enum - * @property {*} [value] - The token's value (for literals and identifiers) - * @property {string} [name] - Function name (for FUNCTION_REF tokens) - * @property {number} line - Line number where token appears (1-indexed) - * @property {number} column - Column number where token appears (1-indexed) - */ - -/** - * Converts source code into tokens for the combinator-based language - * - * @param {string} input - The source code to tokenize - * @returns {Array.<Token>} Array of token objects with type, value, line, and column - * @throws {Error} For unexpected characters or malformed tokens - * - * @description The lexer performs lexical analysis by converting source code - * into a stream of tokens. Each token represents a meaningful unit of the - * language syntax, such as identifiers, literals, operators, and keywords. - * - * The lexer implements a character-by-character scanning approach with - * lookahead for multi-character tokens. It maintains line and column - * information for accurate error reporting and debugging. - * - * Key features: - * - Handles whitespace and comments (single-line and multi-line) - * - Recognizes all language constructs including operators, keywords, and literals - * - Supports string literals with escape sequences - * - Provides detailed position information for error reporting - * - Cross-platform compatibility (Node.js, Bun, browser) - * - Supports function composition with 'via' keyword - * - Handles function references with '@' operator - * - * The lexer is designed to be robust and provide clear error messages - * for malformed input, making it easier to debug syntax errors in user code. - * It supports the combinator-based architecture by recognizing all operators - * and special tokens needed for function composition and application. - * - * The lexer is the first step in the language processing pipeline and must - * correctly identify all tokens that the parser will translate into function - * calls. This includes operators that will become combinator function calls, - * function references that enable higher-order programming, and special - * keywords that support the functional programming paradigm. - * - * The lexer uses a state machine approach where each character type triggers - * different parsing strategies. This design enables efficient tokenization - * while maintaining clear separation of concerns for different token types. - * The character-by-character approach allows for precise error reporting and - * supports multi-character tokens like operators and string literals - * with escape sequences. - * - * Error handling is designed to provide meaningful feedback by including - * line and column information in error messages. This enables users to - * quickly locate and fix syntax errors in their code. - */ -export function lexer(input) { - const tokens = []; - let current = 0; - let line = 1; - let column = 1; - - // Helper functions for spacing detection - function hasLeadingWhitespace() { - let pos = current - 1; - while (pos >= 0 && /\s/.test(input[pos])) pos--; - return pos >= 0 && input[pos] !== '\n' && input[pos] !== ';'; - } - - function hasLeadingAndTrailingSpaces() { - const hasLeading = current > 0 && /\s/.test(input[current - 1]); - const hasTrailing = current + 1 < input.length && /\s/.test(input[current + 1]); - return hasLeading && hasTrailing; - } - - while (current < input.length) { - let char = input[current]; - - // Skip whitespace - if (/\s/.test(char)) { - if (char === '\n') { - line++; - column = 1; - } else { - column++; - } - current++; - continue; - } - - // Skip comments (single line and multi-line) - if (char === '/' && input[current + 1] === '/') { - while (current < input.length && input[current] !== '\n') { - current++; - column++; - } - continue; - } - - // Skip multi-line comments /* ... */ - if (char === '/' && input[current + 1] === '*') { - current += 2; // Skip /* - column += 2; - while (current < input.length - 1 && !(input[current] === '*' && input[current + 1] === '/')) { - if (input[current] === '\n') { - line++; - column = 1; - } else { - column++; - } - current++; - } - if (current < input.length - 1) { - current += 2; // Skip */ - column += 2; - } - continue; - } - - // IO operations (..in, ..out, ..assert) - if (char === '.' && input[current + 1] === '.') { - current += 2; // Skip both dots - column += 2; - - // Read the IO operation name - let operation = ''; - while (current < input.length && /[a-zA-Z]/.test(input[current])) { - operation += input[current]; - current++; - column++; - } - - // Determine the IO operation type - switch (operation) { - case 'in': - tokens.push({ type: TokenType.IO_IN, line, column: column - operation.length - 2 }); - break; - case 'out': - tokens.push({ type: TokenType.IO_OUT, line, column: column - operation.length - 2 }); - break; - case 'assert': - tokens.push({ type: TokenType.IO_ASSERT, line, column: column - operation.length - 2 }); - break; - case 'listen': - tokens.push({ type: TokenType.IO_LISTEN, line, column: column - operation.length - 2 }); - break; - case 'emit': - tokens.push({ type: TokenType.IO_EMIT, line, column: column - operation.length - 2 }); - break; - default: - throw new Error(`Unknown IO operation: ..${operation} at line ${line}, column ${column - operation.length - 2}`); - } - continue; - } - - // Function references (@function) and function arguments (@(expression)) - if (char === '@') { - current++; // Skip '@' - column++; - - // Check if this is @(expression) for function arguments - if (current < input.length && input[current] === '(') { - // This is @(expression) - mark as function argument - tokens.push({ type: TokenType.FUNCTION_ARG, line, column: column - 1 }); - continue; - } - - // Read the function name - let functionName = ''; - while (current < input.length && /[a-zA-Z0-9_]/.test(input[current])) { - functionName += input[current]; - current++; - column++; - } - - if (functionName === '') { - throw new Error(`Invalid function reference at line ${line}, column ${column - 1}`); - } - - tokens.push({ type: TokenType.FUNCTION_REF, name: functionName, line, column: column - functionName.length - 1 }); - continue; - } - - // Numbers - if (/[0-9]/.test(char)) { - let value = ''; - while (current < input.length && /[0-9.]/.test(input[current])) { - value += input[current]; - current++; - column++; - } - tokens.push({ type: TokenType.NUMBER, value: parseFloat(value), line, column: column - value.length }); - continue; - } - - // Identifiers and keywords - if (/[a-zA-Z_]/.test(char)) { - let value = ''; - const startColumn = column; - while (current < input.length && /[a-zA-Z0-9_]/.test(input[current])) { - value += input[current]; - current++; - column++; - } - - // Check for keywords - switch (value) { - case 'true': - tokens.push({ type: TokenType.TRUE, value: true, line, column: startColumn }); - break; - case 'false': - tokens.push({ type: TokenType.FALSE, value: false, line, column: startColumn }); - break; - case 'and': - tokens.push({ type: TokenType.AND, line, column: startColumn }); - break; - case 'or': - tokens.push({ type: TokenType.OR, line, column: startColumn }); - break; - case 'xor': - tokens.push({ type: TokenType.XOR, line, column: startColumn }); - break; - case 'not': - tokens.push({ type: TokenType.NOT, line, column: startColumn }); - break; - case 'case': - tokens.push({ type: TokenType.CASE, line, column: startColumn }); - break; - case 'of': - tokens.push({ type: TokenType.OF, line, column: startColumn }); - break; - case 'when': - tokens.push({ type: TokenType.WHEN, line, column: startColumn }); - break; - case 'is': - tokens.push({ type: TokenType.IS, line, column: startColumn }); - break; - case 'then': - tokens.push({ type: TokenType.THEN, line, column: startColumn }); - break; - case 'function': - tokens.push({ type: TokenType.FUNCTION, line, column: startColumn }); - break; - case 'via': // Function composition operator: f via g = compose(f, g) - tokens.push({ type: TokenType.COMPOSE, line, column: startColumn }); - break; - case '_': - tokens.push({ type: TokenType.WILDCARD, line, column: startColumn }); - break; - default: - tokens.push({ type: TokenType.IDENTIFIER, value, line, column: startColumn }); - } - continue; - } - - // Strings - if (char === '"') { - let value = ''; - current++; - column++; - while (current < input.length && input[current] !== '"') { - if (input[current] === '\\') { - current++; - column++; - if (current < input.length) { - switch (input[current]) { - case 'n': value += '\n'; break; - case 't': value += '\t'; break; - case 'r': value += '\r'; break; - case '\\': value += '\\'; break; - case '"': value += '"'; break; - default: value += input[current]; - } - } - } else { - value += input[current]; - } - current++; - column++; - } - if (current < input.length) { - current++; - column++; - } - tokens.push({ type: TokenType.STRING, value, line, column: column - value.length - 2 }); - continue; - } - - // Operators and punctuation - switch (char) { - case '+': - tokens.push({ type: TokenType.PLUS, line, column }); - break; - case '-': - if (input[current + 1] === '>') { - tokens.push({ type: TokenType.ARROW, line, column }); - current++; - column++; - } else { - // Check spacing to determine token type - const isUnary = !hasLeadingWhitespace(); - const isBinary = hasLeadingAndTrailingSpaces(); - const isFollowedByNumber = current + 1 < input.length && /[0-9]/.test(input[current + 1]); - - if (isUnary && isFollowedByNumber) { - // Unary minus at start of expression: -5 - tokens.push({ type: TokenType.UNARY_MINUS, line, column }); - } else if (isBinary) { - // Binary minus with spaces: 5 - 3 - tokens.push({ type: TokenType.BINARY_MINUS, line, column }); - } else if (isFollowedByNumber) { - // Minus followed by number but not at start: 5-3 (legacy) - tokens.push({ type: TokenType.MINUS, line, column }); - } else { - // Fallback to legacy MINUS token for edge cases - tokens.push({ type: TokenType.MINUS, line, column }); - } - } - break; - case '*': - tokens.push({ type: TokenType.MULTIPLY, line, column }); - break; - case '/': - tokens.push({ type: TokenType.DIVIDE, line, column }); - break; - case '%': - tokens.push({ type: TokenType.MODULO, line, column }); - break; - case '^': - tokens.push({ type: TokenType.POWER, line, column }); - break; - case '(': - tokens.push({ type: TokenType.LEFT_PAREN, line, column }); - break; - case ')': - tokens.push({ type: TokenType.RIGHT_PAREN, line, column }); - break; - case '{': - tokens.push({ type: TokenType.LEFT_BRACE, line, column }); - break; - case '}': - tokens.push({ type: TokenType.RIGHT_BRACE, line, column }); - break; - case '[': - tokens.push({ type: TokenType.LEFT_BRACKET, line, column }); - break; - case ']': - tokens.push({ type: TokenType.RIGHT_BRACKET, line, column }); - break; - case ';': - tokens.push({ type: TokenType.SEMICOLON, line, column }); - break; - case ',': - tokens.push({ type: TokenType.COMMA, line, column }); - break; - case '.': - tokens.push({ type: TokenType.DOT, line, column }); - break; - case ':': - tokens.push({ type: TokenType.ASSIGNMENT, line, column }); - break; - - case '=': - if (input[current + 1] === '=') { - tokens.push({ type: TokenType.EQUALS, line, column }); - current++; - column++; - } else { - // Single = is used for equality comparison in assertions - tokens.push({ type: TokenType.EQUALS, line, column }); - } - break; - case '<': - if (input[current + 1] === '=') { - tokens.push({ type: TokenType.LESS_EQUAL, line, column }); - current++; - column++; - } else { - tokens.push({ type: TokenType.LESS_THAN, line, column }); - } - break; - case '>': - if (input[current + 1] === '=') { - tokens.push({ type: TokenType.GREATER_EQUAL, line, column }); - current++; - column++; - } else { - tokens.push({ type: TokenType.GREATER_THAN, line, column }); - } - break; - case '!': - if (input[current + 1] === '=') { - tokens.push({ type: TokenType.NOT_EQUAL, line, column }); - current++; - column++; - } else { - throw new Error(`Unexpected character: ${char} at line ${line}, column ${column}`); - } - break; - default: - throw new Error(`Unexpected character: ${char} at line ${line}, column ${column}`); - } - - current++; - column++; - } - - return tokens; -} </code></pre> - </article> - </section> - - - - -</div> - -<br class="clear"> - -<footer> - Generated by <a href="https://github.com/jsdoc3/jsdoc">JSDoc 4.0.4</a> on Tue Jul 29 2025 23:15:00 GMT-0400 (Eastern Daylight Time) using the Minami theme. -</footer> - -<script>prettyPrint();</script> -<script src="scripts/linenumber.js"></script> -</body> -</html> |