diff options
Diffstat (limited to 'js/baba-yaga/src/legacy')
-rw-r--r-- | js/baba-yaga/src/legacy/engine-optimized.js | 526 | ||||
-rw-r--r-- | js/baba-yaga/src/legacy/engine.js | 289 | ||||
-rw-r--r-- | js/baba-yaga/src/legacy/lexer-optimized.js | 357 | ||||
-rw-r--r-- | js/baba-yaga/src/legacy/lexer.js | 425 |
4 files changed, 1597 insertions, 0 deletions
diff --git a/js/baba-yaga/src/legacy/engine-optimized.js b/js/baba-yaga/src/legacy/engine-optimized.js new file mode 100644 index 0000000..5f78da7 --- /dev/null +++ b/js/baba-yaga/src/legacy/engine-optimized.js @@ -0,0 +1,526 @@ +// engine-optimized.js - High-performance Baba Yaga engine with all optimizations + +import { createOptimizedLexer, createLexerWithFallback } from './lexer-optimized.js'; +import { createParser } from './parser.js'; +import { createInterpreter } from './interpreter.js'; +import { BabaYagaConfig } from './config.js'; +import { InputValidator, SecurityValidator } from './validation.js'; +import { BabaError } from './error.js'; +import { ScopeStack, CompatibleScopeStack } from './scope-stack.js'; +import { OptimizedBuiltins } from './builtins-optimized.js'; +import { globalASTPool } from './ast-pool.js'; + +/** + * High-performance Baba Yaga engine with all optimizations enabled + */ +export class OptimizedBabaYagaEngine { + constructor(config = new BabaYagaConfig()) { + this.config = config; + this.validator = config.sandboxMode + ? new SecurityValidator(config) + : new InputValidator(config); + + // Initialize optimization components + this.optimizedBuiltins = new OptimizedBuiltins(); + this.astPool = globalASTPool; + + // Performance tracking with more detail + this.stats = { + totalExecutions: 0, + totalTime: 0, + averageTime: 0, + errors: 0, + lexingTime: 0, + parsingTime: 0, + interpretingTime: 0, + optimizationStats: { + lexerOptimizations: 0, + scopeOptimizations: 0, + builtinOptimizations: 0, + astPoolHits: 0 + } + }; + + // Warm up optimization components + if (config.enableOptimizations) { + this.warmUp(); + } + } + + /** + * Warm up optimization components for better initial performance + */ + warmUp() { + // Warm up AST pools + this.astPool.warmUp('BinaryExpression', 50); + this.astPool.warmUp('FunctionCall', 30); + this.astPool.warmUp('Identifier', 100); + this.astPool.warmUp('NumberLiteral', 50); + + // Warm up with a simple program + const warmupCode = 'x : 1 + 2; y : x * 3;'; + try { + this.executeSync(warmupCode, { silent: true }); + } catch (error) { + // Ignore warmup errors + } + } + + /** + * Execute Baba Yaga source code with all optimizations + */ + async execute(source, options = {}) { + const startTime = performance.now(); + + try { + // Validate input + this.validator.validateSourceCode(source, options.filename || '<input>'); + + // Optimized lexical analysis + const lexStart = performance.now(); + const lexer = this.config.enableOptimizations + ? createOptimizedLexer(source) + : await createLexerWithFallback(source, false); + const tokens = lexer.allTokens(); + const lexTime = performance.now() - lexStart; + + if (this.config.enableDebugMode) { + console.log(`[DEBUG] Lexing: ${lexTime.toFixed(2)}ms, Tokens: ${tokens.length}`); + } + + // Parsing with AST pooling + const parseStart = performance.now(); + const parser = this.createOptimizedParser(tokens, source); + const ast = parser.parse(); + const parseTime = performance.now() - parseStart; + + // Validate AST + this.validator.validateAST(ast, source); + + if (this.config.enableDebugMode) { + console.log(`[DEBUG] Parsing: ${parseTime.toFixed(2)}ms, AST depth: ${this.getASTDepth(ast)}`); + } + + // Optimized interpretation + const interpretStart = performance.now(); + const host = this.createOptimizedHostInterface(source, options); + const interpreter = this.createOptimizedInterpreter(ast, host); + + // Set up execution timeout + const result = await this.executeWithTimeout(interpreter, host); + const interpretTime = performance.now() - interpretStart; + + // Update statistics + const executionTime = performance.now() - startTime; + this.updateStats(executionTime, false, lexTime, parseTime, interpretTime); + + if (this.config.showTimings) { + console.log(`[TIMING] Total: ${executionTime.toFixed(2)}ms (Lex: ${lexTime.toFixed(2)}ms, Parse: ${parseTime.toFixed(2)}ms, Interpret: ${interpretTime.toFixed(2)}ms)`); + } + + // Clean up AST if pooling is enabled + if (this.config.enableOptimizations) { + this.astPool.releaseTree(ast); + } + + return { + result, + executionTime, + success: true, + breakdown: { + lexingTime: lexTime, + parsingTime: parseTime, + interpretingTime: interpretTime + } + }; + + } catch (error) { + const executionTime = performance.now() - startTime; + this.updateStats(executionTime, true); + + // Format error for display + if (error instanceof BabaError) { + const formattedError = this.config.verboseErrors ? error.formatError() : error.message; + + return { + error: formattedError, + errorType: error.name, + executionTime, + success: false, + suggestions: error.suggestions + }; + } else { + // Unexpected error + if (this.config.enableDebugMode) { + console.error('[INTERNAL ERROR]', error); + } + return { + error: 'Internal error occurred', + errorType: 'InternalError', + executionTime, + success: false, + suggestions: ['Report this as a bug', 'Check for malformed input'] + }; + } + } + } + + /** + * Synchronous execution for simple cases + */ + executeSync(source, options = {}) { + // Use Promise.resolve to handle async execute in sync context + let result; + let error; + + this.execute(source, options).then( + res => { result = res; }, + err => { error = err; } + ); + + // Simple busy wait for sync execution (not recommended for production) + const start = Date.now(); + while (result === undefined && error === undefined && Date.now() - start < 1000) { + // Wait + } + + if (error) throw error; + return result; + } + + /** + * Create optimized parser with AST pooling + */ + createOptimizedParser(tokens, source) { + const parser = createParser(tokens, this.config.enableDebugMode, source); + + // If optimizations are enabled, wrap parser methods to use pooling + if (this.config.enableOptimizations) { + const originalParse = parser.parse.bind(parser); + parser.parse = () => { + const ast = originalParse(); + this.stats.optimizationStats.astPoolHits += this.astPool.getStats().poolHits; + return ast; + }; + } + + return parser; + } + + /** + * Create optimized interpreter with scope stack and built-in optimizations + */ + createOptimizedInterpreter(ast, host) { + const interpreter = createInterpreter(ast, host); + + if (this.config.enableOptimizations) { + // Replace scope with optimized scope stack + const originalScope = interpreter.scope; + const optimizedScope = new CompatibleScopeStack(); + + // Copy existing scope data + for (const [key, value] of originalScope.entries()) { + optimizedScope.set(key, value); + } + + interpreter.scope = optimizedScope; + + // Inject optimized built-ins + this.injectOptimizedBuiltins(interpreter); + } + + return interpreter; + } + + /** + * Inject optimized built-in functions into interpreter + */ + injectOptimizedBuiltins(interpreter) { + const originalVisitFunctionCall = interpreter.visitFunctionCall; + + interpreter.visitFunctionCall = (node) => { + // Try optimized path first + if (node.callee && node.callee.type === 'Identifier') { + const functionName = node.callee.name; + const args = node.arguments.map(arg => interpreter.visit(arg)); + + if (this.optimizedBuiltins.canOptimize(functionName, args)) { + const result = this.optimizedBuiltins.execute(functionName, args, interpreter); + if (result !== null) { + this.stats.optimizationStats.builtinOptimizations++; + return result; + } + } + } + + // Fall back to standard implementation + return originalVisitFunctionCall.call(interpreter, node); + }; + } + + /** + * Create optimized host interface + */ + createOptimizedHostInterface(source, options) { + const host = { + source, + scope: options.scope || new Map(), + io: { + out: (...args) => { + if (options.silent) return; // Skip output in silent mode + + if (options.onOutput) { + options.onOutput(...args); + } else { + console.log(...args); + } + }, + in: () => { + if (options.onInput) { + return options.onInput(); + } else { + throw new BabaError('Input not available in this context'); + } + }, + emit: (event) => { + if (options.onEvent) { + options.onEvent(event); + } + }, + addListener: (topic, handler) => { + if (options.onAddListener) { + return options.onAddListener(topic, handler); + } + return () => {}; // No-op unsubscribe + }, + debug: this.config.enableDebugMode ? console.log : () => {}, + ...this.config.ioHandlers + } + }; + + // Add optimization-specific extensions + if (this.config.enableOptimizations) { + host.optimizations = { + builtins: this.optimizedBuiltins, + astPool: this.astPool + }; + } + + return host; + } + + /** + * Execute interpreter with timeout protection + */ + async executeWithTimeout(interpreter, host) { + let timeoutId; + + const executionPromise = new Promise((resolve, reject) => { + try { + const result = interpreter.interpret(); + resolve(result); + } catch (error) { + reject(error); + } + }); + + const timeoutPromise = new Promise((_, reject) => { + timeoutId = setTimeout(() => { + reject(new BabaError( + `Execution timeout after ${this.config.maxExecutionTime}ms`, + null, + host.source, + ['Reduce recursion depth', 'Optimize algorithm complexity', 'Increase maxExecutionTime'] + )); + }, this.config.maxExecutionTime); + }); + + try { + const result = await Promise.race([executionPromise, timeoutPromise]); + clearTimeout(timeoutId); + return result; + } catch (error) { + clearTimeout(timeoutId); + throw error; + } + } + + /** + * Get AST depth for validation and debugging + */ + getASTDepth(node, depth = 0) { + if (!node || typeof node !== 'object') { + return depth; + } + + let maxDepth = depth; + + // Check common AST node children + const childFields = ['body', 'left', 'right', 'operand', 'callee', 'arguments', 'elements', 'discriminants', 'cases']; + + for (const field of childFields) { + const child = node[field]; + if (child) { + if (Array.isArray(child)) { + for (const item of child) { + maxDepth = Math.max(maxDepth, this.getASTDepth(item, depth + 1)); + } + } else { + maxDepth = Math.max(maxDepth, this.getASTDepth(child, depth + 1)); + } + } + } + + return maxDepth; + } + + /** + * Update execution statistics with detailed breakdown + */ + updateStats(executionTime, isError, lexTime = 0, parseTime = 0, interpretTime = 0) { + this.stats.totalExecutions++; + this.stats.totalTime += executionTime; + this.stats.averageTime = this.stats.totalTime / this.stats.totalExecutions; + this.stats.lexingTime += lexTime; + this.stats.parsingTime += parseTime; + this.stats.interpretingTime += interpretTime; + + if (isError) { + this.stats.errors++; + } + } + + /** + * Get comprehensive engine statistics + */ + getStats() { + const builtinStats = this.optimizedBuiltins.getStats(); + const astPoolStats = this.astPool.getStats(); + + return { + ...this.stats, + errorRate: this.stats.totalExecutions > 0 ? this.stats.errors / this.stats.totalExecutions : 0, + averageLexTime: this.stats.totalExecutions > 0 ? this.stats.lexingTime / this.stats.totalExecutions : 0, + averageParseTime: this.stats.totalExecutions > 0 ? this.stats.parsingTime / this.stats.totalExecutions : 0, + averageInterpretTime: this.stats.totalExecutions > 0 ? this.stats.interpretingTime / this.stats.totalExecutions : 0, + optimizations: { + builtinOptimizationRate: builtinStats.optimizationRate, + astPoolHitRate: astPoolStats.hitRate, + astPoolReuseRate: astPoolStats.reuseRate, + totalOptimizations: this.stats.optimizationStats.builtinOptimizations + this.stats.optimizationStats.astPoolHits + } + }; + } + + /** + * Reset all statistics + */ + resetStats() { + this.stats = { + totalExecutions: 0, + totalTime: 0, + averageTime: 0, + errors: 0, + lexingTime: 0, + parsingTime: 0, + interpretingTime: 0, + optimizationStats: { + lexerOptimizations: 0, + scopeOptimizations: 0, + builtinOptimizations: 0, + astPoolHits: 0 + } + }; + + this.optimizedBuiltins.resetStats(); + this.astPool.resetStats(); + } + + /** + * Get optimization recommendations + */ + getOptimizationRecommendations() { + const stats = this.getStats(); + const recommendations = []; + + if (stats.optimizations.builtinOptimizationRate < 0.7) { + recommendations.push('Consider using more built-in functions (map, filter, reduce) for better performance'); + } + + if (stats.optimizations.astPoolHitRate < 0.5) { + recommendations.push('Enable AST pooling for better memory efficiency'); + } + + if (stats.averageLexTime > stats.averageParseTime) { + recommendations.push('Lexing is taking longer than parsing - consider optimizing token patterns'); + } + + if (stats.errorRate > 0.1) { + recommendations.push('High error rate detected - consider input validation improvements'); + } + + return recommendations; + } + + /** + * Create a performance profile for the current workload + */ + createPerformanceProfile() { + const stats = this.getStats(); + + return { + timestamp: new Date().toISOString(), + config: this.config.summary(), + performance: { + totalExecutions: stats.totalExecutions, + averageExecutionTime: stats.averageTime, + breakdown: { + lexing: stats.averageLexTime, + parsing: stats.averageParseTime, + interpreting: stats.averageInterpretTime + }, + optimizations: stats.optimizations + }, + recommendations: this.getOptimizationRecommendations() + }; + } +} + +/** + * Convenience function for optimized execution + */ +export async function executeOptimized(source, config = new BabaYagaConfig({ enableOptimizations: true })) { + const engine = new OptimizedBabaYagaEngine(config); + return engine.execute(source); +} + +/** + * Create optimized engine with preset configurations + */ +export function createOptimizedEngine(preset = 'performance') { + let config; + + switch (preset) { + case 'performance': + config = new BabaYagaConfig({ + enableOptimizations: true, + enableDebugMode: false, + strictMode: false, + maxRecursionDepth: 2000, + maxExecutionTime: 10000 + }); + break; + case 'development': + config = BabaYagaConfig.development(); + config.enableOptimizations = true; + break; + case 'production': + config = BabaYagaConfig.production(); + config.enableOptimizations = true; + break; + default: + config = new BabaYagaConfig({ enableOptimizations: true }); + } + + return new OptimizedBabaYagaEngine(config); +} diff --git a/js/baba-yaga/src/legacy/engine.js b/js/baba-yaga/src/legacy/engine.js new file mode 100644 index 0000000..6afece3 --- /dev/null +++ b/js/baba-yaga/src/legacy/engine.js @@ -0,0 +1,289 @@ +// engine.js - Main Baba Yaga engine with improved error handling and configuration + +import { createLexer } from './lexer.js'; +import { createParser } from './parser.js'; +import { createInterpreter } from './interpreter.js'; +import { BabaYagaConfig } from '../core/config.js'; +import { InputValidator, SecurityValidator } from '../core/validation.js'; +import { BabaError } from '../core/error.js'; + +/** + * Main Baba Yaga engine class + */ +export class BabaYagaEngine { + constructor(config = new BabaYagaConfig()) { + this.config = config; + this.validator = config.sandboxMode + ? new SecurityValidator(config) + : new InputValidator(config); + + // Performance tracking + this.stats = { + totalExecutions: 0, + totalTime: 0, + averageTime: 0, + errors: 0 + }; + } + + /** + * Execute Baba Yaga source code + */ + async execute(source, options = {}) { + const startTime = performance.now(); + + try { + // Validate input + this.validator.validateSourceCode(source, options.filename || '<input>'); + + // Lexical analysis + const lexer = createLexer(source); + const tokens = lexer.allTokens(); + + if (this.config.enableDebugMode) { + console.log('[DEBUG] Tokens:', tokens.length); + } + + // Parsing + const parser = createParser(tokens, this.config.enableDebugMode, source); + const ast = parser.parse(); + + // Validate AST + this.validator.validateAST(ast, source); + + if (this.config.enableDebugMode) { + console.log('[DEBUG] AST depth:', this.getASTDepth(ast)); + } + + // Interpretation + const host = this.createHostInterface(source, options); + const interpreter = createInterpreter(ast, host); + + // Set up execution timeout + let timeoutId; + const executionPromise = new Promise((resolve, reject) => { + try { + const result = interpreter.interpret(); + resolve(result); + } catch (error) { + reject(error); + } + }); + + const timeoutPromise = new Promise((_, reject) => { + timeoutId = setTimeout(() => { + reject(new BabaError( + `Execution timeout after ${this.config.maxExecutionTime}ms`, + null, + source, + ['Reduce recursion depth', 'Optimize algorithm complexity', 'Increase maxExecutionTime'] + )); + }, this.config.maxExecutionTime); + }); + + const result = await Promise.race([executionPromise, timeoutPromise]); + clearTimeout(timeoutId); + + // Update statistics + const executionTime = performance.now() - startTime; + this.updateStats(executionTime, false); + + if (this.config.showTimings) { + console.log(`[TIMING] Execution completed in ${executionTime.toFixed(2)}ms`); + } + + return { + result, + executionTime, + success: true + }; + + } catch (error) { + const executionTime = performance.now() - startTime; + this.updateStats(executionTime, true); + + // Format error for display + if (error instanceof BabaError) { + const formattedError = this.config.verboseErrors ? error.formatError() : error.message; + + return { + error: formattedError, + errorType: error.name, + executionTime, + success: false, + suggestions: error.suggestions + }; + } else { + // Unexpected error + console.error('[INTERNAL ERROR]', error); + return { + error: 'Internal error occurred', + errorType: 'InternalError', + executionTime, + success: false, + suggestions: ['Report this as a bug', 'Check for malformed input'] + }; + } + } + } + + /** + * Create host interface for interpreter + */ + createHostInterface(source, options) { + return { + source, + scope: options.scope || new Map(), + io: { + out: (...args) => { + if (options.onOutput) { + options.onOutput(...args); + } else { + console.log(...args); + } + }, + in: () => { + if (options.onInput) { + return options.onInput(); + } else { + throw new BabaError('Input not available in this context'); + } + }, + emit: (event) => { + if (options.onEvent) { + options.onEvent(event); + } + }, + addListener: (topic, handler) => { + if (options.onAddListener) { + return options.onAddListener(topic, handler); + } + return () => {}; // No-op unsubscribe + }, + debug: this.config.enableDebugMode ? console.log : () => {}, + ...this.config.ioHandlers + } + }; + } + + /** + * Get AST depth for validation + */ + getASTDepth(node, depth = 0) { + if (!node || typeof node !== 'object') { + return depth; + } + + let maxDepth = depth; + + // Check common AST node children + const childFields = ['body', 'left', 'right', 'operand', 'callee', 'arguments', 'elements', 'discriminants', 'cases']; + + for (const field of childFields) { + const child = node[field]; + if (child) { + if (Array.isArray(child)) { + for (const item of child) { + maxDepth = Math.max(maxDepth, this.getASTDepth(item, depth + 1)); + } + } else { + maxDepth = Math.max(maxDepth, this.getASTDepth(child, depth + 1)); + } + } + } + + return maxDepth; + } + + /** + * Update execution statistics + */ + updateStats(executionTime, isError) { + this.stats.totalExecutions++; + this.stats.totalTime += executionTime; + this.stats.averageTime = this.stats.totalTime / this.stats.totalExecutions; + + if (isError) { + this.stats.errors++; + } + } + + /** + * Get engine statistics + */ + getStats() { + return { + ...this.stats, + errorRate: this.stats.totalExecutions > 0 ? this.stats.errors / this.stats.totalExecutions : 0 + }; + } + + /** + * Reset statistics + */ + resetStats() { + this.stats = { + totalExecutions: 0, + totalTime: 0, + averageTime: 0, + errors: 0 + }; + } + + /** + * Validate configuration + */ + validateConfig() { + return this.config.validate(); + } + + /** + * Update configuration + */ + updateConfig(newConfig) { + if (newConfig instanceof BabaYagaConfig) { + this.config = newConfig; + } else { + this.config = this.config.merge(newConfig); + } + + // Update validator if security mode changed + this.validator = this.config.sandboxMode + ? new SecurityValidator(this.config) + : new InputValidator(this.config); + } +} + +/** + * Convenience function for quick execution + */ +export async function execute(source, config = new BabaYagaConfig()) { + const engine = new BabaYagaEngine(config); + return engine.execute(source); +} + +/** + * Create engine with preset configurations + */ +export function createEngine(preset = 'default') { + let config; + + switch (preset) { + case 'development': + config = BabaYagaConfig.development(); + break; + case 'production': + config = BabaYagaConfig.production(); + break; + case 'testing': + config = BabaYagaConfig.testing(); + break; + case 'sandbox': + config = BabaYagaConfig.sandbox(); + break; + default: + config = new BabaYagaConfig(); + } + + return new BabaYagaEngine(config); +} diff --git a/js/baba-yaga/src/legacy/lexer-optimized.js b/js/baba-yaga/src/legacy/lexer-optimized.js new file mode 100644 index 0000000..0d4dc51 --- /dev/null +++ b/js/baba-yaga/src/legacy/lexer-optimized.js @@ -0,0 +1,357 @@ +// lexer-optimized.js - High-performance regex-based lexer + +import { LexError, ErrorHelpers } from './error.js'; + +const tokenTypes = { + IDENTIFIER: 'IDENTIFIER', + TYPE: 'TYPE', + NUMBER: 'NUMBER', + STRING: 'STRING', + ARROW: 'ARROW', + COLON: 'COLON', + SEMICOLON: 'SEMICOLON', + COMMA: 'COMMA', + KEYWORD: 'KEYWORD', + OPERATOR: 'OPERATOR', + LPAREN: 'LPAREN', + RPAREN: 'RPAREN', + DOT: 'DOT', + LBRACKET: 'LBRACKET', + RBRACKET: 'RBRACKET', + LBRACE: 'LBRACE', + RBRACE: 'RBRACE', + EOF: 'EOF', +}; + +const keywords = new Set(['when', 'is', 'then', 'if', 'Ok', 'Err', 'true', 'false', 'PI', 'INFINITY', 'and', 'or', 'xor']); +const types = new Set(['Int', 'String', 'Result', 'Float', 'Number', 'List', 'Table', 'Bool']); + +/** + * Token pattern definitions with regex and processing functions + */ +const TOKEN_PATTERNS = [ + // Whitespace (skip) + { + name: 'WHITESPACE', + regex: /^[ \t\r]+/, + skip: true + }, + + // Newlines (track line numbers) - handled by advance function + { + name: 'NEWLINE', + regex: /^\n/, + skip: true + }, + + // Comments (skip) + { + name: 'COMMENT', + regex: /^\/\/.*$/m, + skip: true + }, + + // Multi-character operators (order matters - longest first) + { + name: 'ARROW', + regex: /^->/, + type: tokenTypes.ARROW + }, + + { + name: 'STRING_CONCAT', + regex: /^\.\./, + type: tokenTypes.OPERATOR, + value: '..' + }, + + { + name: 'COMPARISON_OPS', + regex: /^(>=|<=|!=)/, + type: tokenTypes.OPERATOR + }, + + // Numbers (including negative numbers in appropriate contexts) + { + name: 'NUMBER', + regex: /^-?\d+(\.\d+)?/, + type: tokenTypes.NUMBER, + process: (match, lexer) => { + const value = parseFloat(match[0]); + const isFloat = match[0].includes('.'); + return { + type: tokenTypes.NUMBER, + value, + isFloat, + originalString: match[0] + }; + } + }, + + // Strings with escape sequence handling + { + name: 'STRING', + regex: /^"((?:[^"\\]|\\.)*)"/, + type: tokenTypes.STRING, + process: (match, lexer) => { + const rawString = match[1]; + const processedString = rawString + .replace(/\\n/g, '\n') + .replace(/\\t/g, '\t') + .replace(/\\r/g, '\r') + .replace(/\\\\/g, '\\') + .replace(/\\"/g, '"'); + + return { + type: tokenTypes.STRING, + value: processedString + }; + } + }, + + // Identifiers, keywords, and types + { + name: 'IDENTIFIER', + regex: /^[a-zA-Z_][a-zA-Z0-9_]*/, + process: (match, lexer) => { + const value = match[0]; + + if (keywords.has(value)) { + return { + type: tokenTypes.KEYWORD, + value + }; + } else if (types.has(value)) { + return { + type: tokenTypes.TYPE, + value + }; + } else { + return { + type: tokenTypes.IDENTIFIER, + value + }; + } + } + }, + + // Single character operators + { + name: 'SINGLE_CHAR_OPS', + regex: /^[+\-*/%=><]/, + type: tokenTypes.OPERATOR + }, + + // Punctuation + { + name: 'PUNCTUATION', + regex: /^[()[\]{}:;,.]/, + process: (match, lexer) => { + const char = match[0]; + const typeMap = { + '(': tokenTypes.LPAREN, + ')': tokenTypes.RPAREN, + '[': tokenTypes.LBRACKET, + ']': tokenTypes.RBRACKET, + '{': tokenTypes.LBRACE, + '}': tokenTypes.RBRACE, + ':': tokenTypes.COLON, + ';': tokenTypes.SEMICOLON, + ',': tokenTypes.COMMA, + '.': tokenTypes.DOT + }; + + return { + type: typeMap[char], + value: char + }; + } + } +]; + +/** + * High-performance regex-based lexer + */ +function createOptimizedLexer(input) { + let position = 0; + let line = 1; + let column = 1; + + // Pre-compile all regexes for better performance + const compiledPatterns = TOKEN_PATTERNS.map(pattern => ({ + ...pattern, + compiledRegex: pattern.regex + })); + + function getCurrentLocation() { + return { line, column }; + } + + function advance(length) { + for (let i = 0; i < length; i++) { + if (input[position + i] === '\n') { + line++; + column = 1; + } else { + column++; + } + } + position += length; + } + + function nextToken() { + if (position >= input.length) { + return { + type: tokenTypes.EOF, + value: '', + line, + column + }; + } + + const remaining = input.slice(position); + const startLocation = getCurrentLocation(); + + // Try each pattern in order + for (const pattern of compiledPatterns) { + const match = remaining.match(pattern.compiledRegex); + + if (match) { + const matchedText = match[0]; + const tokenLength = matchedText.length; + + // Handle special patterns that affect lexer state + if (pattern.onMatch) { + pattern.onMatch({ line, column }); + } + + advance(tokenLength); + + // Skip tokens that should be ignored + if (pattern.skip) { + return nextToken(); + } + + // Create the token + let token; + + if (pattern.process) { + token = pattern.process(match, this); + } else { + token = { + type: pattern.type, + value: pattern.value || matchedText + }; + } + + // Add location information + token.line = startLocation.line; + token.column = startLocation.column; + + return token; + } + } + + // No pattern matched - handle error + const char = remaining[0]; + const suggestions = []; + + // Common character mistakes + if (char === '"' || char === '"') { + suggestions.push('Use straight quotes " instead of curly quotes'); + } else if (char === '–' || char === '—') { + suggestions.push('Use regular minus - or arrow -> instead of em/en dash'); + } else if (/[^\x00-\x7F]/.test(char)) { + suggestions.push('Use only ASCII characters in Baba Yaga code'); + } else { + suggestions.push(`Character "${char}" is not valid in Baba Yaga syntax`); + } + + throw new LexError( + `Unexpected character: ${JSON.stringify(char)}`, + { line, column, length: 1 }, + input, + suggestions + ); + } + + function allTokens() { + const tokens = []; + let token; + + do { + token = nextToken(); + tokens.push(token); + } while (token.type !== tokenTypes.EOF); + + return tokens; + } + + return { + allTokens, + nextToken + }; +} + +/** + * Performance comparison utility + */ +async function createLexerWithFallback(input, useOptimized = true) { + if (useOptimized) { + try { + return createOptimizedLexer(input); + } catch (error) { + // If optimized lexer fails, fall back to original + console.warn('Falling back to original lexer:', error.message); + const { createLexer } = await import('./lexer.js'); + return createLexer(input); + } + } else { + const { createLexer } = await import('./lexer.js'); + return createLexer(input); + } +} + +/** + * Benchmark function to compare lexer performance + */ +async function benchmarkLexers(input, iterations = 1000) { + console.log(`Benchmarking lexers with ${iterations} iterations...`); + + // Warm up + for (let i = 0; i < 10; i++) { + createOptimizedLexer(input).allTokens(); + } + + // Benchmark optimized lexer + const optimizedStart = performance.now(); + for (let i = 0; i < iterations; i++) { + createOptimizedLexer(input).allTokens(); + } + const optimizedTime = performance.now() - optimizedStart; + + // Benchmark original lexer + const { createLexer } = await import('./lexer.js'); + const originalStart = performance.now(); + for (let i = 0; i < iterations; i++) { + createLexer(input).allTokens(); + } + const originalTime = performance.now() - originalStart; + + console.log(`Original lexer: ${originalTime.toFixed(2)}ms`); + console.log(`Optimized lexer: ${optimizedTime.toFixed(2)}ms`); + console.log(`Speedup: ${(originalTime / optimizedTime).toFixed(2)}x`); + + return { + originalTime, + optimizedTime, + speedup: originalTime / optimizedTime + }; +} + +export { + createOptimizedLexer, + createLexerWithFallback, + benchmarkLexers, + tokenTypes +}; diff --git a/js/baba-yaga/src/legacy/lexer.js b/js/baba-yaga/src/legacy/lexer.js new file mode 100644 index 0000000..054dd0e --- /dev/null +++ b/js/baba-yaga/src/legacy/lexer.js @@ -0,0 +1,425 @@ +// lexer.js + +import { LexError, ErrorHelpers } from '../core/error.js'; + +const tokenTypes = { + IDENTIFIER: 'IDENTIFIER', + TYPE: 'TYPE', + NUMBER: 'NUMBER', + STRING: 'STRING', + ARROW: 'ARROW', + COLON: 'COLON', + SEMICOLON: 'SEMICOLON', + COMMA: 'COMMA', + KEYWORD: 'KEYWORD', + OPERATOR: 'OPERATOR', + LPAREN: 'LPAREN', + RPAREN: 'RPAREN', + DOT: 'DOT', + LBRACKET: 'LBRACKET', + RBRACKET: 'RBRACKET', + LBRACE: 'LBRACE', + RBRACE: 'RBRACE', + EOF: 'EOF', +}; + +const keywords = ['when', 'is', 'then', 'if', 'Ok', 'Err', 'true', 'false', 'PI', 'INFINITY', 'and', 'or', 'xor']; + +function createLexer(input) { + let position = 0; + let line = 1; + let column = 1; + + function isWhitespace(char) { + return /\s/.test(char); + } + + function isDigit(char) { + return /\d/.test(char); + } + + function isLetter(char) { + return /[a-zA-Z_0-9]/.test(char); + } + + function readWhile(predicate) { + let str = ''; + while (position < input.length && predicate(input[position])) { + str += input[position]; + position++; + column++; + } + return str; + } + + function readString() { + let str = ''; + const startLine = line; + const startColumn = column; + + position++; // Skip the opening quote + column++; + + while (position < input.length && input[position] !== '"') { + const char = input[position]; + + // Handle newlines in strings + if (char === '\n') { + line++; + column = 1; + } else { + column++; + } + + // Handle escape sequences + if (char === '\\' && position + 1 < input.length) { + const nextChar = input[position + 1]; + switch (nextChar) { + case 'n': + str += '\n'; + position += 2; + column++; + break; + case 't': + str += '\t'; + position += 2; + column++; + break; + case 'r': + str += '\r'; + position += 2; + column++; + break; + case '\\': + str += '\\'; + position += 2; + column++; + break; + case '"': + str += '"'; + position += 2; + column++; + break; + default: + str += char; + position++; + } + } else { + str += char; + position++; + } + } + + // Check for unterminated string + if (position >= input.length) { + throw new LexError( + 'Unterminated string literal', + { line: startLine, column: startColumn, length: str.length + 1 }, + input, + [ + 'Add closing quote " at the end of the string', + 'Check for unescaped quotes inside the string', + 'Use \\" to include quotes in strings' + ] + ); + } + + position++; // Skip the closing quote + column++; + return { type: tokenTypes.STRING, value: str, line: startLine, column: startColumn }; + } + + function readNumber() { + let value = readWhile(isDigit); + let isFloat = false; + if (peekChar() === '.') { + position++; + column++; + value += '.' + readWhile(isDigit); + isFloat = true; + } + + const numericValue = isFloat ? parseFloat(value) : parseInt(value, 10); + return { + type: tokenTypes.NUMBER, + value: numericValue, + isFloat: isFloat, + originalString: value, + line, + column + }; + } + + function peekChar() { + return input[position]; + } + + function shouldBeNegativeLiteral() { + // Look at the previous non-whitespace token to decide + let prevPos = position - 1; + while (prevPos >= 0 && isWhitespace(input[prevPos])) { + prevPos--; + } + + if (prevPos < 0) { + // At start of input - should be negative literal + return true; + } + + const prevChar = input[prevPos]; + + // After opening parenthesis, comma, or operators - should be negative literal + if (prevChar === '(' || prevChar === ',' || prevChar === '+' || + prevChar === '*' || prevChar === '/' || prevChar === '%' || + prevChar === '=' || prevChar === '>' || prevChar === '<' || + prevChar === ':' || prevChar === ';') { + return true; + } + + // After closing parenthesis - should be binary minus + if (prevChar === ')') { + return false; + } + + // After numbers - this is tricky. In most cases it should be binary minus, + // but in function call contexts it might be a negative literal. + // Let's look ahead to see if this is likely a function call context. + if (isDigit(prevChar)) { + // Look ahead to see if we're in a function call context + // If we see whitespace followed by another minus, it's probably a negative literal + let lookAheadPos = position + 1; + while (lookAheadPos < input.length && isWhitespace(input[lookAheadPos])) { + lookAheadPos++; + } + if (lookAheadPos < input.length && input[lookAheadPos] === '-') { + // This looks like a function call with consecutive negative arguments + return true; + } + return false; // Default to binary minus + } + + // After identifiers - could be either, but in most contexts it's a negative literal + // (function calls, variable declarations, etc.) + if (isLetter(prevChar)) { + return true; + } + + // Default to negative literal + return true; + } + + function readNegativeNumber() { + // Consume the minus sign + position++; + column++; + + // Read the number part + let value = '-' + readWhile(isDigit); + let isFloat = false; + + if (peekChar() === '.') { + position++; + column++; + value += '.' + readWhile(isDigit); + isFloat = true; + } + + const numericValue = isFloat ? parseFloat(value) : parseInt(value, 10); + return { + type: tokenTypes.NUMBER, + value: numericValue, + isFloat: isFloat, + originalString: value, + line, + column + }; + } + + function nextToken() { + if (position >= input.length) { + return { type: tokenTypes.EOF, line, column }; + } + + let char = input[position]; + + if (isWhitespace(char)) { + if (char === '\n') { + line++; + column = 1; + } else { + column++; + } + position++; + return nextToken(); + } + + if (char === '/' && input[position + 1] === '/') { + while (position < input.length && input[position] !== '\n') { + position++; + column++; + } + return nextToken(); // Skip the comment and get the next real token + } + + if (char === '(') { + position++; + column++; + return { type: tokenTypes.LPAREN, value: '(', line, column }; + } + + if (char === ')') { + position++; + column++; + return { type: tokenTypes.RPAREN, value: ')', line, column }; + } + + if (char === '[') { + position++; + column++; + return { type: tokenTypes.LBRACKET, value: '[', line, column }; + } + + if (char === ']') { + position++; + column++; + return { type: tokenTypes.RBRACKET, value: ']', line, column }; + } + + if (char === '{') { + position++; + column++; + return { type: tokenTypes.LBRACE, value: '{', line, column }; + } + + if (char === '}') { + position++; + column++; + return { type: tokenTypes.RBRACE, value: '}', line, column }; + } + + // Handle double dot operator for string concatenation (must come before single dot) + if (char === '.' && input[position + 1] === '.') { + position += 2; + column += 2; + return { type: tokenTypes.OPERATOR, value: '..', line, column }; + } + + if (char === '.') { + position++; + column++; + return { type: tokenTypes.DOT, value: '.', line, column }; + } + + // Handle negative numbers based on context + if (char === '-' && position + 1 < input.length && isDigit(input[position + 1])) { + // Check if this should be a negative literal vs binary minus + if (shouldBeNegativeLiteral()) { + return readNegativeNumber(); + } + } + + if (isDigit(char)) { + return readNumber(); + } + + if (isLetter(char)) { + const value = readWhile(isLetter); + if (['Int', 'String', 'Result', 'Float', 'Number', 'List', 'Table', 'Bool'].includes(value)) { + return { type: tokenTypes.TYPE, value, line, column }; + } + if (keywords.includes(value)) { + return { type: tokenTypes.KEYWORD, value, line, column }; + } + return { type: tokenTypes.IDENTIFIER, value, line, column }; + } + + if (char === '"') { + return readString(); + } + + if (char === ':') { + position++; + column++; + return { type: tokenTypes.COLON, value: ':', line, column }; + } + + if (char === '-' && input[position + 1] === '>') { + position += 2; + column += 2; + return { type: tokenTypes.ARROW, value: '->', line, column }; + } + + if (char === ';') { + position++; + column++; + return { type: tokenTypes.SEMICOLON, value: ';', line, column }; + } + + // Handle >= and <= + if (char === '>' && input[position + 1] === '=') { + position += 2; + column += 2; + return { type: tokenTypes.OPERATOR, value: '>=', line, column }; + } + if (char === '<' && input[position + 1] === '=') { + position += 2; + column += 2; + return { type: tokenTypes.OPERATOR, value: '<=', line, column }; + } + + // Handle != (not equal) + if (char === '!' && input[position + 1] === '=') { + position += 2; + column += 2; + return { type: tokenTypes.OPERATOR, value: '!=', line, column }; + } + + if (char === ',') { + position++; + column++; + return { type: tokenTypes.COMMA, value: ',', line, column }; + } + + if (['+', '-', '*', '/', '=', '>', '<', '%'].includes(char)) { + position++; + column++; + return { type: tokenTypes.OPERATOR, value: char, line, column }; + } + + const suggestions = []; + + // Common character mistakes + if (char === '"' || char === '"') { + suggestions.push('Use straight quotes " instead of curly quotes'); + } else if (char === '–' || char === '—') { + suggestions.push('Use regular minus - or arrow -> instead of em/en dash'); + } else if (/[^\x00-\x7F]/.test(char)) { + suggestions.push('Use only ASCII characters in Baba Yaga code'); + } else { + suggestions.push(`Character "${char}" is not valid in Baba Yaga syntax`); + } + + throw new LexError( + `Unexpected character: ${JSON.stringify(char)}`, + { line, column, length: 1 }, + input, + suggestions + ); + } + + function allTokens() { + const tokens = []; + let token; + do { + token = nextToken(); + tokens.push(token); + } while (token.type !== tokenTypes.EOF); + return tokens; + } + + return { + allTokens, + }; +} + +export { createLexer, tokenTypes }; |