diff options
Diffstat (limited to 'js/baba-yaga/src/core/validation.js')
-rw-r--r-- | js/baba-yaga/src/core/validation.js | 567 |
1 files changed, 567 insertions, 0 deletions
diff --git a/js/baba-yaga/src/core/validation.js b/js/baba-yaga/src/core/validation.js new file mode 100644 index 0000000..eedf71e --- /dev/null +++ b/js/baba-yaga/src/core/validation.js @@ -0,0 +1,567 @@ +// validation.js - Input validation and sanitization for Baba Yaga + +import { ValidationError, ErrorHelpers } from './error.js'; + +/** + * Input validation for source code and runtime values + */ +export class InputValidator { + constructor(config = {}) { + this.maxSourceLength = config.maxSourceLength ?? 10_000_000; // 10MB + this.maxASTDepth = config.maxASTDepth ?? 1000; + this.maxIdentifierLength = config.maxIdentifierLength ?? 255; + this.maxStringLength = config.maxStringLength ?? 1_000_000; // 1MB + this.maxListLength = config.maxListLength ?? 100_000; + this.maxTableSize = config.maxTableSize ?? 10_000; + this.allowedCharacters = config.allowedCharacters ?? /^[\x20-\x7E\s\n\r\t]*$/; // Printable ASCII + whitespace + } + + /** + * Validate source code before lexing + */ + validateSourceCode(source, filename = '<input>') { + if (typeof source !== 'string') { + throw new ValidationError( + 'Source code must be a string', + null, + '', + ['Ensure you are passing a string to the interpreter'] + ); + } + + // Check source length + if (source.length > this.maxSourceLength) { + throw new ValidationError( + `Source code too large: ${source.length} characters (max: ${this.maxSourceLength})`, + null, + source.substring(0, 100) + '...', + [ + 'Break your code into smaller modules', + 'Consider using external data files', + `Increase maxSourceLength in configuration to ${source.length + 1000}` + ] + ); + } + + // Check for null bytes and other problematic characters + if (!this.allowedCharacters.test(source)) { + const problematicChars = this.findProblematicCharacters(source); + throw new ValidationError( + 'Source code contains invalid characters', + problematicChars.location, + source, + [ + `Found invalid character: ${JSON.stringify(problematicChars.char)}`, + 'Use only printable ASCII characters', + 'Check for hidden Unicode characters' + ] + ); + } + + // Check for extremely long lines (potential minified code) + const lines = source.split('\n'); + for (let i = 0; i < lines.length; i++) { + if (lines[i].length > 10000) { + throw new ValidationError( + `Line ${i + 1} is extremely long (${lines[i].length} characters)`, + { line: i + 1, column: 1 }, + source, + [ + 'Break long lines into multiple lines', + 'Check if this is minified code that should be formatted', + 'Consider if this is actually data that should be in a separate file' + ] + ); + } + } + + return true; + } + + /** + * Find the first problematic character in source code + */ + findProblematicCharacters(source) { + for (let i = 0; i < source.length; i++) { + const char = source[i]; + if (!this.allowedCharacters.test(char)) { + const lines = source.substring(0, i).split('\n'); + return { + char, + location: { + line: lines.length, + column: lines[lines.length - 1].length + 1, + length: 1 + } + }; + } + } + return null; + } + + /** + * Validate AST structure and depth + */ + validateAST(ast, source = '') { + if (!ast || typeof ast !== 'object') { + throw new ValidationError( + 'Invalid AST: must be an object', + null, + source, + ['Check parser output', 'Ensure parsing completed successfully'] + ); + } + + // Check AST depth to prevent stack overflow + const maxDepth = this.checkASTDepth(ast); + if (maxDepth > this.maxASTDepth) { + throw new ValidationError( + `AST too deep: ${maxDepth} levels (max: ${this.maxASTDepth})`, + this.findDeepestNode(ast).location, + source, + [ + 'Reduce nesting in your code', + 'Break complex expressions into smaller parts', + `Increase maxASTDepth in configuration to ${maxDepth + 100}` + ] + ); + } + + // Validate AST node structure + this.validateASTNodes(ast, source); + + return true; + } + + /** + * Recursively check AST depth + */ + checkASTDepth(node, depth = 0) { + if (!node || typeof node !== 'object') { + return depth; + } + + let maxChildDepth = depth; + + // Check all possible child nodes + const childNodes = this.getChildNodes(node); + for (const child of childNodes) { + if (child) { + const childDepth = this.checkASTDepth(child, depth + 1); + maxChildDepth = Math.max(maxChildDepth, childDepth); + } + } + + return maxChildDepth; + } + + /** + * Find the deepest node in the AST (for error reporting) + */ + findDeepestNode(ast) { + let deepestNode = ast; + let maxDepth = 0; + + const traverse = (node, depth = 0) => { + if (depth > maxDepth) { + maxDepth = depth; + deepestNode = node; + } + + const children = this.getChildNodes(node); + for (const child of children) { + if (child) { + traverse(child, depth + 1); + } + } + }; + + traverse(ast); + return deepestNode; + } + + /** + * Get all child nodes from an AST node + */ + getChildNodes(node) { + if (!node || typeof node !== 'object') { + return []; + } + + const children = []; + + switch (node.type) { + case 'Program': + children.push(...(node.body || [])); + break; + case 'FunctionDeclaration': + case 'VariableDeclaration': + if (node.body) children.push(node.body); + if (node.value) children.push(node.value); + break; + case 'FunctionCall': + if (node.callee) children.push(node.callee); + children.push(...(node.arguments || [])); + break; + case 'BinaryExpression': + if (node.left) children.push(node.left); + if (node.right) children.push(node.right); + break; + case 'UnaryExpression': + if (node.operand) children.push(node.operand); + break; + case 'WhenExpression': + children.push(...(node.discriminants || [])); + for (const whenCase of node.cases || []) { + if (whenCase.consequent) children.push(whenCase.consequent); + } + break; + case 'ListLiteral': + children.push(...(node.elements || [])); + break; + case 'TableLiteral': + for (const prop of node.properties || []) { + if (prop.value) children.push(prop.value); + } + break; + case 'MemberExpression': + if (node.object) children.push(node.object); + if (node.property) children.push(node.property); + break; + case 'AnonymousFunction': + if (node.body) children.push(node.body); + break; + case 'WithHeader': + for (const entry of node.entries || []) { + if (entry.value) children.push(entry.value); + } + if (node.body) children.push(node.body); + break; + case 'ResultExpression': + if (node.value) children.push(node.value); + break; + } + + return children; + } + + /** + * Validate individual AST nodes + */ + validateASTNodes(node, source) { + if (!node || typeof node !== 'object') { + return; + } + + // Validate node has required type field + if (!node.type || typeof node.type !== 'string') { + throw new ValidationError( + 'Invalid AST node: missing or invalid type field', + node.location, + source, + ['Check parser implementation', 'Ensure all nodes have a type property'] + ); + } + + // Validate specific node types + switch (node.type) { + case 'Identifier': + this.validateIdentifier(node, source); + break; + case 'StringLiteral': + this.validateStringLiteral(node, source); + break; + case 'ListLiteral': + this.validateListLiteral(node, source); + break; + case 'TableLiteral': + this.validateTableLiteral(node, source); + break; + } + + // Recursively validate child nodes + const children = this.getChildNodes(node); + for (const child of children) { + if (child) { + this.validateASTNodes(child, source); + } + } + } + + /** + * Validate identifier names + */ + validateIdentifier(node, source) { + if (!node.name || typeof node.name !== 'string') { + throw new ValidationError( + 'Invalid identifier: missing name', + node.location, + source, + ['Check identifier declaration'] + ); + } + + if (node.name.length > this.maxIdentifierLength) { + throw new ValidationError( + `Identifier too long: ${node.name.length} characters (max: ${this.maxIdentifierLength})`, + node.location, + source, + ['Use shorter variable names', 'Consider abbreviations'] + ); + } + + // Check for reserved words that might cause issues + const reservedWords = ['undefined', 'null', 'NaN', 'Infinity', 'constructor', 'prototype']; + if (reservedWords.includes(node.name)) { + throw new ValidationError( + `Identifier "${node.name}" conflicts with JavaScript reserved word`, + node.location, + source, + [`Use a different name like "${node.name}_" or "my${node.name}"`] + ); + } + } + + /** + * Validate string literals + */ + validateStringLiteral(node, source) { + if (typeof node.value !== 'string') { + throw new ValidationError( + 'Invalid string literal: value must be a string', + node.location, + source, + ['Check string parsing logic'] + ); + } + + if (node.value.length > this.maxStringLength) { + throw new ValidationError( + `String too long: ${node.value.length} characters (max: ${this.maxStringLength})`, + node.location, + source, + [ + 'Consider breaking large strings into smaller parts', + 'Use external files for large text data', + `Increase maxStringLength to ${node.value.length + 1000}` + ] + ); + } + } + + /** + * Validate list literals + */ + validateListLiteral(node, source) { + if (!Array.isArray(node.elements)) { + throw new ValidationError( + 'Invalid list literal: elements must be an array', + node.location, + source, + ['Check list parsing logic'] + ); + } + + if (node.elements.length > this.maxListLength) { + throw new ValidationError( + `List too long: ${node.elements.length} elements (max: ${this.maxListLength})`, + node.location, + source, + [ + 'Consider using external data files', + 'Process data in smaller chunks', + `Increase maxListLength to ${node.elements.length + 1000}` + ] + ); + } + } + + /** + * Validate table literals + */ + validateTableLiteral(node, source) { + if (!Array.isArray(node.properties)) { + throw new ValidationError( + 'Invalid table literal: properties must be an array', + node.location, + source, + ['Check table parsing logic'] + ); + } + + if (node.properties.length > this.maxTableSize) { + throw new ValidationError( + `Table too large: ${node.properties.length} properties (max: ${this.maxTableSize})`, + node.location, + source, + [ + 'Break large tables into smaller ones', + 'Use nested structures', + `Increase maxTableSize to ${node.properties.length + 1000}` + ] + ); + } + + // Check for duplicate keys + const keys = new Set(); + for (const prop of node.properties) { + if (keys.has(prop.key)) { + throw new ValidationError( + `Duplicate table key: "${prop.key}"`, + node.location, + source, + [`Remove duplicate key "${prop.key}"`, 'Use unique keys for table properties'] + ); + } + keys.add(prop.key); + } + } + + /** + * Validate runtime values during execution + */ + validateRuntimeValue(value, context = 'runtime') { + // Check for circular references in objects + if (typeof value === 'object' && value !== null) { + this.checkCircularReferences(value, new WeakSet(), context); + } + + // Validate specific value types + if (Array.isArray(value)) { + if (value.length > this.maxListLength) { + throw new ValidationError( + `Runtime list too long: ${value.length} elements (max: ${this.maxListLength})`, + null, + '', + ['Process data in smaller chunks', 'Increase maxListLength'] + ); + } + } + + return true; + } + + /** + * Check for circular references in objects + */ + checkCircularReferences(obj, visited, context) { + if (visited.has(obj)) { + throw new ValidationError( + `Circular reference detected in ${context}`, + null, + '', + [ + 'Avoid creating circular object references', + 'Use weak references where appropriate', + 'Check object construction logic' + ] + ); + } + + visited.add(obj); + + if (typeof obj === 'object' && obj !== null) { + if (obj.properties instanceof Map) { + // Handle Baba Yaga table objects + for (const value of obj.properties.values()) { + if (typeof value === 'object' && value !== null) { + this.checkCircularReferences(value, visited, context); + } + } + } else if (Array.isArray(obj)) { + // Handle arrays + for (const item of obj) { + if (typeof item === 'object' && item !== null) { + this.checkCircularReferences(item, visited, context); + } + } + } else { + // Handle regular objects + for (const value of Object.values(obj)) { + if (typeof value === 'object' && value !== null) { + this.checkCircularReferences(value, visited, context); + } + } + } + } + + visited.delete(obj); + } +} + +/** + * Security-focused validation for untrusted input + */ +export class SecurityValidator extends InputValidator { + constructor(config = {}) { + super(config); + this.maxExecutionTime = config.maxExecutionTime ?? 30000; // 30 seconds + this.maxMemoryUsage = config.maxMemoryUsage ?? 100_000_000; // 100MB + this.allowedBuiltins = new Set(config.allowedBuiltins ?? [ + 'map', 'filter', 'reduce', 'append', 'prepend', 'concat', + 'str.concat', 'str.split', 'str.join', 'str.length', + 'math.abs', 'math.min', 'math.max', 'math.floor', 'math.ceil' + ]); + } + + /** + * Additional security validation for untrusted code + */ + validateUntrustedCode(source, filename = '<untrusted>') { + // Run basic validation first + this.validateSourceCode(source, filename); + + // Check for potentially dangerous patterns + const dangerousPatterns = [ + { pattern: /eval\s*\(/, message: 'eval() is not allowed' }, + { pattern: /Function\s*\(/, message: 'Function constructor is not allowed' }, + { pattern: /import\s+/, message: 'import statements are not allowed' }, + { pattern: /require\s*\(/, message: 'require() is not allowed' }, + { pattern: /process\s*\./, message: 'process object access is not allowed' }, + { pattern: /global\s*\./, message: 'global object access is not allowed' }, + { pattern: /__proto__/, message: '__proto__ access is not allowed' }, + { pattern: /constructor\s*\./, message: 'constructor access is not allowed' } + ]; + + for (const { pattern, message } of dangerousPatterns) { + if (pattern.test(source)) { + const match = source.match(pattern); + const beforeMatch = source.substring(0, match.index); + const lines = beforeMatch.split('\n'); + + throw new ValidationError( + message, + { + line: lines.length, + column: lines[lines.length - 1].length + 1, + length: match[0].length + }, + source, + ['Remove unsafe code patterns', 'Use only Baba Yaga built-in functions'] + ); + } + } + + return true; + } + + /** + * Validate function calls against whitelist + */ + validateFunctionCall(functionName, location, source) { + if (!this.allowedBuiltins.has(functionName)) { + throw new ValidationError( + `Function "${functionName}" is not allowed in restricted mode`, + location, + source, + [ + 'Use only whitelisted functions', + 'Check security configuration', + `Add "${functionName}" to allowedBuiltins if safe` + ] + ); + } + + return true; + } +} |