// validation.js - Input validation and sanitization for Baba Yaga
import { ValidationError, ErrorHelpers } from './error.js';
/**
* Input validation for source code and runtime values
*/
export class InputValidator {
constructor(config = {}) {
this.maxSourceLength = config.maxSourceLength ?? 10_000_000; // 10MB
this.maxASTDepth = config.maxASTDepth ?? 1000;
this.maxIdentifierLength = config.maxIdentifierLength ?? 255;
this.maxStringLength = config.maxStringLength ?? 1_000_000; // 1MB
this.maxListLength = config.maxListLength ?? 100_000;
this.maxTableSize = config.maxTableSize ?? 10_000;
this.allowedCharacters = config.allowedCharacters ?? /^[\x20-\x7E\s\n\r\t]*$/; // Printable ASCII + whitespace
}
/**
* Validate source code before lexing
*/
validateSourceCode(source, filename = '') {
if (typeof source !== 'string') {
throw new ValidationError(
'Source code must be a string',
null,
'',
['Ensure you are passing a string to the interpreter']
);
}
// Check source length
if (source.length > this.maxSourceLength) {
throw new ValidationError(
`Source code too large: ${source.length} characters (max: ${this.maxSourceLength})`,
null,
source.substring(0, 100) + '...',
[
'Break your code into smaller modules',
'Consider using external data files',
`Increase maxSourceLength in configuration to ${source.length + 1000}`
]
);
}
// Check for null bytes and other problematic characters
if (!this.allowedCharacters.test(source)) {
const problematicChars = this.findProblematicCharacters(source);
throw new ValidationError(
'Source code contains invalid characters',
problematicChars.location,
source,
[
`Found invalid character: ${JSON.stringify(problematicChars.char)}`,
'Use only printable ASCII characters',
'Check for hidden Unicode characters'
]
);
}
// Check for extremely long lines (potential minified code)
const lines = source.split('\n');
for (let i = 0; i < lines.length; i++) {
if (lines[i].length > 10000) {
throw new ValidationError(
`Line ${i + 1} is extremely long (${lines[i].length} characters)`,
{ line: i + 1, column: 1 },
source,
[
'Break long lines into multiple lines',
'Check if this is minified code that should be formatted',
'Consider if this is actually data that should be in a separate file'
]
);
}
}
return true;
}
/**
* Find the first problematic character in source code
*/
findProblematicCharacters(source) {
for (let i = 0; i < source.length; i++) {
const char = source[i];
if (!this.allowedCharacters.test(char)) {
const lines = source.substring(0, i).split('\n');
return {
char,
location: {
line: lines.length,
column: lines[lines.length - 1].length + 1,
length: 1
}
};
}
}
return null;
}
/**
* Validate AST structure and depth
*/
validateAST(ast, source = '') {
if (!ast || typeof ast !== 'object') {
throw new ValidationError(
'Invalid AST: must be an object',
null,
source,
['Check parser output', 'Ensure parsing completed successfully']
);
}
// Check AST depth to prevent stack overflow
const maxDepth = this.checkASTDepth(ast);
if (maxDepth > this.maxASTDepth) {
throw new ValidationError(
`AST too deep: ${maxDepth} levels (max: ${this.maxASTDepth})`,
this.findDeepestNode(ast).location,
source,
[
'Reduce nesting in your code',
'Break complex expressions into smaller parts',
`Increase maxASTDepth in configuration to ${maxDepth + 100}`
]
);
}
// Validate AST node structure
this.validateASTNodes(ast, source);
return true;
}
/**
* Recursively check AST depth
*/
checkASTDepth(node, depth = 0) {
if (!node || typeof node !== 'object') {
return depth;
}
let maxChildDepth = depth;
// Check all possible child nodes
const childNodes = this.getChildNodes(node);
for (const child of childNodes) {
if (child) {
const childDepth = this.checkASTDepth(child, depth + 1);
maxChildDepth = Math.max(maxChildDepth, childDepth);
}
}
return maxChildDepth;
}
/**
* Find the deepest node in the AST (for error reporting)
*/
findDeepestNode(ast) {
let deepestNode = ast;
let maxDepth = 0;
const traverse = (node, depth = 0) => {
if (depth > maxDepth) {
maxDepth = depth;
deepestNode = node;
}
const children = this.getChildNodes(node);
for (const child of children) {
if (child) {
traverse(child, depth + 1);
}
}
};
traverse(ast);
return deepestNode;
}
/**
* Get all child nodes from an AST node
*/
getChildNodes(node) {
if (!node || typeof node !== 'object') {
return [];
}
const children = [];
switch (node.type) {
case 'Program':
children.push(...(node.body || []));
break;
case 'FunctionDeclaration':
case 'VariableDeclaration':
if (node.body) children.push(node.body);
if (node.value) children.push(node.value);
break;
case 'FunctionCall':
if (node.callee) children.push(node.callee);
children.push(...(node.arguments || []));
break;
case 'BinaryExpression':
if (node.left) children.push(node.left);
if (node.right) children.push(node.right);
break;
case 'UnaryExpression':
if (node.operand) children.push(node.operand);
break;
case 'WhenExpression':
children.push(...(node.discriminants || []));
for (const whenCase of node.cases || []) {
if (whenCase.consequent) children.push(whenCase.consequent);
}
break;
case 'ListLiteral':
children.push(...(node.elements || []));
break;
case 'TableLiteral':
for (const prop of node.properties || []) {
if (prop.value) children.push(prop.value);
}
break;
case 'MemberExpression':
if (node.object) children.push(node.object);
if (node.property) children.push(node.property);
break;
case 'AnonymousFunction':
if (node.body) children.push(node.body);
break;
case 'WithHeader':
for (const entry of node.entries || []) {
if (entry.value) children.push(entry.value);
}
if (node.body) children.push(node.body);
break;
case 'ResultExpression':
if (node.value) children.push(node.value);
break;
}
return children;
}
/**
* Validate individual AST nodes
*/
validateASTNodes(node, source) {
if (!node || typeof node !== 'object') {
return;
}
// Validate node has required type field
if (!node.type || typeof node.type !== 'string') {
throw new ValidationError(
'Invalid AST node: missing or invalid type field',
node.location,
source,
['Check parser implementation', 'Ensure all nodes have a type property']
);
}
// Validate specific node types
switch (node.type) {
case 'Identifier':
this.validateIdentifier(node, source);
break;
case 'StringLiteral':
this.validateStringLiteral(node, source);
break;
case 'ListLiteral':
this.validateListLiteral(node, source);
break;
case 'TableLiteral':
this.validateTableLiteral(node, source);
break;
}
// Recursively validate child nodes
const children = this.getChildNodes(node);
for (const child of children) {
if (child) {
this.validateASTNodes(child, source);
}
}
}
/**
* Validate identifier names
*/
validateIdentifier(node, source) {
if (!node.name || typeof node.name !== 'string') {
throw new ValidationError(
'Invalid identifier: missing name',
node.location,
source,
['Check identifier declaration']
);
}
if (node.name.length > this.maxIdentifierLength) {
throw new ValidationError(
`Identifier too long: ${node.name.length} characters (max: ${this.maxIdentifierLength})`,
node.location,
source,
['Use shorter variable names', 'Consider abbreviations']
);
}
// Check for reserved words that might cause issues
const reservedWords = ['undefined', 'null', 'NaN', 'Infinity', 'constructor', 'prototype'];
if (reservedWords.includes(node.name)) {
throw new ValidationError(
`Identifier "${node.name}" conflicts with JavaScript reserved word`,
node.location,
source,
[`Use a different name like "${node.name}_" or "my${node.name}"`]
);
}
}
/**
* Validate string literals
*/
validateStringLiteral(node, source) {
if (typeof node.value !== 'string') {
throw new ValidationError(
'Invalid string literal: value must be a string',
node.location,
source,
['Check string parsing logic']
);
}
if (node.value.length > this.maxStringLength) {
throw new ValidationError(
`String too long: ${node.value.length} characters (max: ${this.maxStringLength})`,
node.location,
source,
[
'Consider breaking large strings into smaller parts',
'Use external files for large text data',
`Increase maxStringLength to ${node.value.length + 1000}`
]
);
}
}
/**
* Validate list literals
*/
validateListLiteral(node, source) {
if (!Array.isArray(node.elements)) {
throw new ValidationError(
'Invalid list literal: elements must be an array',
node.location,
source,
['Check list parsing logic']
);
}
if (node.elements.length > this.maxListLength) {
throw new ValidationError(
`List too long: ${node.elements.length} elements (max: ${this.maxListLength})`,
node.location,
source,
[
'Consider using external data files',
'Process data in smaller chunks',
`Increase maxListLength to ${node.elements.length + 1000}`
]
);
}
}
/**
* Validate table literals
*/
validateTableLiteral(node, source) {
if (!Array.isArray(node.properties)) {
throw new ValidationError(
'Invalid table literal: properties must be an array',
node.location,
source,
['Check table parsing logic']
);
}
if (node.properties.length > this.maxTableSize) {
throw new ValidationError(
`Table too large: ${node.properties.length} properties (max: ${this.maxTableSize})`,
node.location,
source,
[
'Break large tables into smaller ones',
'Use nested structures',
`Increase maxTableSize to ${node.properties.length + 1000}`
]
);
}
// Check for duplicate keys
const keys = new Set();
for (const prop of node.properties) {
if (keys.has(prop.key)) {
throw new ValidationError(
`Duplicate table key: "${prop.key}"`,
node.location,
source,
[`Remove duplicate key "${prop.key}"`, 'Use unique keys for table properties']
);
}
keys.add(prop.key);
}
}
/**
* Validate runtime values during execution
*/
validateRuntimeValue(value, context = 'runtime') {
// Check for circular references in objects
if (typeof value === 'object' && value !== null) {
this.checkCircularReferences(value, new WeakSet(), context);
}
// Validate specific value types
if (Array.isArray(value)) {
if (value.length > this.maxListLength) {
throw new ValidationError(
`Runtime list too long: ${value.length} elements (max: ${this.maxListLength})`,
null,
'',
['Process data in smaller chunks', 'Increase maxListLength']
);
}
}
return true;
}
/**
* Check for circular references in objects
*/
checkCircularReferences(obj, visited, context) {
if (visited.has(obj)) {
throw new ValidationError(
`Circular reference detected in ${context}`,
null,
'',
[
'Avoid creating circular object references',
'Use weak references where appropriate',
'Check object construction logic'
]
);
}
visited.add(obj);
if (typeof obj === 'object' && obj !== null) {
if (obj.properties instanceof Map) {
// Handle Baba Yaga table objects
for (const value of obj.properties.values()) {
if (typeof value === 'object' && value !== null) {
this.checkCircularReferences(value, visited, context);
}
}
} else if (Array.isArray(obj)) {
// Handle arrays
for (const item of obj) {
if (typeof item === 'object' && item !== null) {
this.checkCircularReferences(item, visited, context);
}
}
} else {
// Handle regular objects
for (const value of Object.values(obj)) {
if (typeof value === 'object' && value !== null) {
this.checkCircularReferences(value, visited, context);
}
}
}
}
visited.delete(obj);
}
}
/**
* Security-focused validation for untrusted input
*/
export class SecurityValidator extends InputValidator {
constructor(config = {}) {
super(config);
this.maxExecutionTime = config.maxExecutionTime ?? 30000; // 30 seconds
this.maxMemoryUsage = config.maxMemoryUsage ?? 100_000_000; // 100MB
this.allowedBuiltins = new Set(config.allowedBuiltins ?? [
'map', 'filter', 'reduce', 'append', 'prepend', 'concat',
'str.concat', 'str.split', 'str.join', 'str.length',
'math.abs', 'math.min', 'math.max', 'math.floor', 'math.ceil'
]);
}
/**
* Additional security validation for untrusted code
*/
validateUntrustedCode(source, filename = '') {
// Run basic validation first
this.validateSourceCode(source, filename);
// Check for potentially dangerous patterns
const dangerousPatterns = [
{ pattern: /eval\s*\(/, message: 'eval() is not allowed' },
{ pattern: /Function\s*\(/, message: 'Function constructor is not allowed' },
{ pattern: /import\s+/, message: 'import statements are not allowed' },
{ pattern: /require\s*\(/, message: 'require() is not allowed' },
{ pattern: /process\s*\./, message: 'process object access is not allowed' },
{ pattern: /global\s*\./, message: 'global object access is not allowed' },
{ pattern: /__proto__/, message: '__proto__ access is not allowed' },
{ pattern: /constructor\s*\./, message: 'constructor access is not allowed' }
];
for (const { pattern, message } of dangerousPatterns) {
if (pattern.test(source)) {
const match = source.match(pattern);
const beforeMatch = source.substring(0, match.index);
const lines = beforeMatch.split('\n');
throw new ValidationError(
message,
{
line: lines.length,
column: lines[lines.length - 1].length + 1,
length: match[0].length
},
source,
['Remove unsafe code patterns', 'Use only Baba Yaga built-in functions']
);
}
}
return true;
}
/**
* Validate function calls against whitelist
*/
validateFunctionCall(functionName, location, source) {
if (!this.allowedBuiltins.has(functionName)) {
throw new ValidationError(
`Function "${functionName}" is not allowed in restricted mode`,
location,
source,
[
'Use only whitelisted functions',
'Check security configuration',
`Add "${functionName}" to allowedBuiltins if safe`
]
);
}
return true;
}
}