// validation.js - Input validation and sanitization for Baba Yaga import { ValidationError, ErrorHelpers } from './error.js'; /** * Input validation for source code and runtime values */ export class InputValidator { constructor(config = {}) { this.maxSourceLength = config.maxSourceLength ?? 10_000_000; // 10MB this.maxASTDepth = config.maxASTDepth ?? 1000; this.maxIdentifierLength = config.maxIdentifierLength ?? 255; this.maxStringLength = config.maxStringLength ?? 1_000_000; // 1MB this.maxListLength = config.maxListLength ?? 100_000; this.maxTableSize = config.maxTableSize ?? 10_000; this.allowedCharacters = config.allowedCharacters ?? /^[\x20-\x7E\s\n\r\t]*$/; // Printable ASCII + whitespace } /** * Validate source code before lexing */ validateSourceCode(source, filename = '') { if (typeof source !== 'string') { throw new ValidationError( 'Source code must be a string', null, '', ['Ensure you are passing a string to the interpreter'] ); } // Check source length if (source.length > this.maxSourceLength) { throw new ValidationError( `Source code too large: ${source.length} characters (max: ${this.maxSourceLength})`, null, source.substring(0, 100) + '...', [ 'Break your code into smaller modules', 'Consider using external data files', `Increase maxSourceLength in configuration to ${source.length + 1000}` ] ); } // Check for null bytes and other problematic characters if (!this.allowedCharacters.test(source)) { const problematicChars = this.findProblematicCharacters(source); throw new ValidationError( 'Source code contains invalid characters', problematicChars.location, source, [ `Found invalid character: ${JSON.stringify(problematicChars.char)}`, 'Use only printable ASCII characters', 'Check for hidden Unicode characters' ] ); } // Check for extremely long lines (potential minified code) const lines = source.split('\n'); for (let i = 0; i < lines.length; i++) { if (lines[i].length > 10000) { throw new ValidationError( `Line ${i + 1} is extremely long (${lines[i].length} characters)`, { line: i + 1, column: 1 }, source, [ 'Break long lines into multiple lines', 'Check if this is minified code that should be formatted', 'Consider if this is actually data that should be in a separate file' ] ); } } return true; } /** * Find the first problematic character in source code */ findProblematicCharacters(source) { for (let i = 0; i < source.length; i++) { const char = source[i]; if (!this.allowedCharacters.test(char)) { const lines = source.substring(0, i).split('\n'); return { char, location: { line: lines.length, column: lines[lines.length - 1].length + 1, length: 1 } }; } } return null; } /** * Validate AST structure and depth */ validateAST(ast, source = '') { if (!ast || typeof ast !== 'object') { throw new ValidationError( 'Invalid AST: must be an object', null, source, ['Check parser output', 'Ensure parsing completed successfully'] ); } // Check AST depth to prevent stack overflow const maxDepth = this.checkASTDepth(ast); if (maxDepth > this.maxASTDepth) { throw new ValidationError( `AST too deep: ${maxDepth} levels (max: ${this.maxASTDepth})`, this.findDeepestNode(ast).location, source, [ 'Reduce nesting in your code', 'Break complex expressions into smaller parts', `Increase maxASTDepth in configuration to ${maxDepth + 100}` ] ); } // Validate AST node structure this.validateASTNodes(ast, source); return true; } /** * Recursively check AST depth */ checkASTDepth(node, depth = 0) { if (!node || typeof node !== 'object') { return depth; } let maxChildDepth = depth; // Check all possible child nodes const childNodes = this.getChildNodes(node); for (const child of childNodes) { if (child) { const childDepth = this.checkASTDepth(child, depth + 1); maxChildDepth = Math.max(maxChildDepth, childDepth); } } return maxChildDepth; } /** * Find the deepest node in the AST (for error reporting) */ findDeepestNode(ast) { let deepestNode = ast; let maxDepth = 0; const traverse = (node, depth = 0) => { if (depth > maxDepth) { maxDepth = depth; deepestNode = node; } const children = this.getChildNodes(node); for (const child of children) { if (child) { traverse(child, depth + 1); } } }; traverse(ast); return deepestNode; } /** * Get all child nodes from an AST node */ getChildNodes(node) { if (!node || typeof node !== 'object') { return []; } const children = []; switch (node.type) { case 'Program': children.push(...(node.body || [])); break; case 'FunctionDeclaration': case 'VariableDeclaration': if (node.body) children.push(node.body); if (node.value) children.push(node.value); break; case 'FunctionCall': if (node.callee) children.push(node.callee); children.push(...(node.arguments || [])); break; case 'BinaryExpression': if (node.left) children.push(node.left); if (node.right) children.push(node.right); break; case 'UnaryExpression': if (node.operand) children.push(node.operand); break; case 'WhenExpression': children.push(...(node.discriminants || [])); for (const whenCase of node.cases || []) { if (whenCase.consequent) children.push(whenCase.consequent); } break; case 'ListLiteral': children.push(...(node.elements || [])); break; case 'TableLiteral': for (const prop of node.properties || []) { if (prop.value) children.push(prop.value); } break; case 'MemberExpression': if (node.object) children.push(node.object); if (node.property) children.push(node.property); break; case 'AnonymousFunction': if (node.body) children.push(node.body); break; case 'WithHeader': for (const entry of node.entries || []) { if (entry.value) children.push(entry.value); } if (node.body) children.push(node.body); break; case 'ResultExpression': if (node.value) children.push(node.value); break; } return children; } /** * Validate individual AST nodes */ validateASTNodes(node, source) { if (!node || typeof node !== 'object') { return; } // Validate node has required type field if (!node.type || typeof node.type !== 'string') { throw new ValidationError( 'Invalid AST node: missing or invalid type field', node.location, source, ['Check parser implementation', 'Ensure all nodes have a type property'] ); } // Validate specific node types switch (node.type) { case 'Identifier': this.validateIdentifier(node, source); break; case 'StringLiteral': this.validateStringLiteral(node, source); break; case 'ListLiteral': this.validateListLiteral(node, source); break; case 'TableLiteral': this.validateTableLiteral(node, source); break; } // Recursively validate child nodes const children = this.getChildNodes(node); for (const child of children) { if (child) { this.validateASTNodes(child, source); } } } /** * Validate identifier names */ validateIdentifier(node, source) { if (!node.name || typeof node.name !== 'string') { throw new ValidationError( 'Invalid identifier: missing name', node.location, source, ['Check identifier declaration'] ); } if (node.name.length > this.maxIdentifierLength) { throw new ValidationError( `Identifier too long: ${node.name.length} characters (max: ${this.maxIdentifierLength})`, node.location, source, ['Use shorter variable names', 'Consider abbreviations'] ); } // Check for reserved words that might cause issues const reservedWords = ['undefined', 'null', 'NaN', 'Infinity', 'constructor', 'prototype']; if (reservedWords.includes(node.name)) { throw new ValidationError( `Identifier "${node.name}" conflicts with JavaScript reserved word`, node.location, source, [`Use a different name like "${node.name}_" or "my${node.name}"`] ); } } /** * Validate string literals */ validateStringLiteral(node, source) { if (typeof node.value !== 'string') { throw new ValidationError( 'Invalid string literal: value must be a string', node.location, source, ['Check string parsing logic'] ); } if (node.value.length > this.maxStringLength) { throw new ValidationError( `String too long: ${node.value.length} characters (max: ${this.maxStringLength})`, node.location, source, [ 'Consider breaking large strings into smaller parts', 'Use external files for large text data', `Increase maxStringLength to ${node.value.length + 1000}` ] ); } } /** * Validate list literals */ validateListLiteral(node, source) { if (!Array.isArray(node.elements)) { throw new ValidationError( 'Invalid list literal: elements must be an array', node.location, source, ['Check list parsing logic'] ); } if (node.elements.length > this.maxListLength) { throw new ValidationError( `List too long: ${node.elements.length} elements (max: ${this.maxListLength})`, node.location, source, [ 'Consider using external data files', 'Process data in smaller chunks', `Increase maxListLength to ${node.elements.length + 1000}` ] ); } } /** * Validate table literals */ validateTableLiteral(node, source) { if (!Array.isArray(node.properties)) { throw new ValidationError( 'Invalid table literal: properties must be an array', node.location, source, ['Check table parsing logic'] ); } if (node.properties.length > this.maxTableSize) { throw new ValidationError( `Table too large: ${node.properties.length} properties (max: ${this.maxTableSize})`, node.location, source, [ 'Break large tables into smaller ones', 'Use nested structures', `Increase maxTableSize to ${node.properties.length + 1000}` ] ); } // Check for duplicate keys const keys = new Set(); for (const prop of node.properties) { if (keys.has(prop.key)) { throw new ValidationError( `Duplicate table key: "${prop.key}"`, node.location, source, [`Remove duplicate key "${prop.key}"`, 'Use unique keys for table properties'] ); } keys.add(prop.key); } } /** * Validate runtime values during execution */ validateRuntimeValue(value, context = 'runtime') { // Check for circular references in objects if (typeof value === 'object' && value !== null) { this.checkCircularReferences(value, new WeakSet(), context); } // Validate specific value types if (Array.isArray(value)) { if (value.length > this.maxListLength) { throw new ValidationError( `Runtime list too long: ${value.length} elements (max: ${this.maxListLength})`, null, '', ['Process data in smaller chunks', 'Increase maxListLength'] ); } } return true; } /** * Check for circular references in objects */ checkCircularReferences(obj, visited, context) { if (visited.has(obj)) { throw new ValidationError( `Circular reference detected in ${context}`, null, '', [ 'Avoid creating circular object references', 'Use weak references where appropriate', 'Check object construction logic' ] ); } visited.add(obj); if (typeof obj === 'object' && obj !== null) { if (obj.properties instanceof Map) { // Handle Baba Yaga table objects for (const value of obj.properties.values()) { if (typeof value === 'object' && value !== null) { this.checkCircularReferences(value, visited, context); } } } else if (Array.isArray(obj)) { // Handle arrays for (const item of obj) { if (typeof item === 'object' && item !== null) { this.checkCircularReferences(item, visited, context); } } } else { // Handle regular objects for (const value of Object.values(obj)) { if (typeof value === 'object' && value !== null) { this.checkCircularReferences(value, visited, context); } } } } visited.delete(obj); } } /** * Security-focused validation for untrusted input */ export class SecurityValidator extends InputValidator { constructor(config = {}) { super(config); this.maxExecutionTime = config.maxExecutionTime ?? 30000; // 30 seconds this.maxMemoryUsage = config.maxMemoryUsage ?? 100_000_000; // 100MB this.allowedBuiltins = new Set(config.allowedBuiltins ?? [ 'map', 'filter', 'reduce', 'append', 'prepend', 'concat', 'str.concat', 'str.split', 'str.join', 'str.length', 'math.abs', 'math.min', 'math.max', 'math.floor', 'math.ceil' ]); } /** * Additional security validation for untrusted code */ validateUntrustedCode(source, filename = '') { // Run basic validation first this.validateSourceCode(source, filename); // Check for potentially dangerous patterns const dangerousPatterns = [ { pattern: /eval\s*\(/, message: 'eval() is not allowed' }, { pattern: /Function\s*\(/, message: 'Function constructor is not allowed' }, { pattern: /import\s+/, message: 'import statements are not allowed' }, { pattern: /require\s*\(/, message: 'require() is not allowed' }, { pattern: /process\s*\./, message: 'process object access is not allowed' }, { pattern: /global\s*\./, message: 'global object access is not allowed' }, { pattern: /__proto__/, message: '__proto__ access is not allowed' }, { pattern: /constructor\s*\./, message: 'constructor access is not allowed' } ]; for (const { pattern, message } of dangerousPatterns) { if (pattern.test(source)) { const match = source.match(pattern); const beforeMatch = source.substring(0, match.index); const lines = beforeMatch.split('\n'); throw new ValidationError( message, { line: lines.length, column: lines[lines.length - 1].length + 1, length: match[0].length }, source, ['Remove unsafe code patterns', 'Use only Baba Yaga built-in functions'] ); } } return true; } /** * Validate function calls against whitelist */ validateFunctionCall(functionName, location, source) { if (!this.allowedBuiltins.has(functionName)) { throw new ValidationError( `Function "${functionName}" is not allowed in restricted mode`, location, source, [ 'Use only whitelisted functions', 'Check security configuration', `Add "${functionName}" to allowedBuiltins if safe` ] ); } return true; } }