about summary refs log tree commit diff stats
path: root/js/baba-yaga/src/core/validation.js
diff options
context:
space:
mode:
Diffstat (limited to 'js/baba-yaga/src/core/validation.js')
-rw-r--r--js/baba-yaga/src/core/validation.js567
1 files changed, 567 insertions, 0 deletions
diff --git a/js/baba-yaga/src/core/validation.js b/js/baba-yaga/src/core/validation.js
new file mode 100644
index 0000000..eedf71e
--- /dev/null
+++ b/js/baba-yaga/src/core/validation.js
@@ -0,0 +1,567 @@
+// validation.js - Input validation and sanitization for Baba Yaga
+
+import { ValidationError, ErrorHelpers } from './error.js';
+
+/**
+ * Input validation for source code and runtime values
+ */
+export class InputValidator {
+  constructor(config = {}) {
+    this.maxSourceLength = config.maxSourceLength ?? 10_000_000; // 10MB
+    this.maxASTDepth = config.maxASTDepth ?? 1000;
+    this.maxIdentifierLength = config.maxIdentifierLength ?? 255;
+    this.maxStringLength = config.maxStringLength ?? 1_000_000; // 1MB
+    this.maxListLength = config.maxListLength ?? 100_000;
+    this.maxTableSize = config.maxTableSize ?? 10_000;
+    this.allowedCharacters = config.allowedCharacters ?? /^[\x20-\x7E\s\n\r\t]*$/; // Printable ASCII + whitespace
+  }
+
+  /**
+   * Validate source code before lexing
+   */
+  validateSourceCode(source, filename = '<input>') {
+    if (typeof source !== 'string') {
+      throw new ValidationError(
+        'Source code must be a string',
+        null,
+        '',
+        ['Ensure you are passing a string to the interpreter']
+      );
+    }
+
+    // Check source length
+    if (source.length > this.maxSourceLength) {
+      throw new ValidationError(
+        `Source code too large: ${source.length} characters (max: ${this.maxSourceLength})`,
+        null,
+        source.substring(0, 100) + '...',
+        [
+          'Break your code into smaller modules',
+          'Consider using external data files',
+          `Increase maxSourceLength in configuration to ${source.length + 1000}`
+        ]
+      );
+    }
+
+    // Check for null bytes and other problematic characters
+    if (!this.allowedCharacters.test(source)) {
+      const problematicChars = this.findProblematicCharacters(source);
+      throw new ValidationError(
+        'Source code contains invalid characters',
+        problematicChars.location,
+        source,
+        [
+          `Found invalid character: ${JSON.stringify(problematicChars.char)}`,
+          'Use only printable ASCII characters',
+          'Check for hidden Unicode characters'
+        ]
+      );
+    }
+
+    // Check for extremely long lines (potential minified code)
+    const lines = source.split('\n');
+    for (let i = 0; i < lines.length; i++) {
+      if (lines[i].length > 10000) {
+        throw new ValidationError(
+          `Line ${i + 1} is extremely long (${lines[i].length} characters)`,
+          { line: i + 1, column: 1 },
+          source,
+          [
+            'Break long lines into multiple lines',
+            'Check if this is minified code that should be formatted',
+            'Consider if this is actually data that should be in a separate file'
+          ]
+        );
+      }
+    }
+
+    return true;
+  }
+
+  /**
+   * Find the first problematic character in source code
+   */
+  findProblematicCharacters(source) {
+    for (let i = 0; i < source.length; i++) {
+      const char = source[i];
+      if (!this.allowedCharacters.test(char)) {
+        const lines = source.substring(0, i).split('\n');
+        return {
+          char,
+          location: {
+            line: lines.length,
+            column: lines[lines.length - 1].length + 1,
+            length: 1
+          }
+        };
+      }
+    }
+    return null;
+  }
+
+  /**
+   * Validate AST structure and depth
+   */
+  validateAST(ast, source = '') {
+    if (!ast || typeof ast !== 'object') {
+      throw new ValidationError(
+        'Invalid AST: must be an object',
+        null,
+        source,
+        ['Check parser output', 'Ensure parsing completed successfully']
+      );
+    }
+
+    // Check AST depth to prevent stack overflow
+    const maxDepth = this.checkASTDepth(ast);
+    if (maxDepth > this.maxASTDepth) {
+      throw new ValidationError(
+        `AST too deep: ${maxDepth} levels (max: ${this.maxASTDepth})`,
+        this.findDeepestNode(ast).location,
+        source,
+        [
+          'Reduce nesting in your code',
+          'Break complex expressions into smaller parts',
+          `Increase maxASTDepth in configuration to ${maxDepth + 100}`
+        ]
+      );
+    }
+
+    // Validate AST node structure
+    this.validateASTNodes(ast, source);
+
+    return true;
+  }
+
+  /**
+   * Recursively check AST depth
+   */
+  checkASTDepth(node, depth = 0) {
+    if (!node || typeof node !== 'object') {
+      return depth;
+    }
+
+    let maxChildDepth = depth;
+
+    // Check all possible child nodes
+    const childNodes = this.getChildNodes(node);
+    for (const child of childNodes) {
+      if (child) {
+        const childDepth = this.checkASTDepth(child, depth + 1);
+        maxChildDepth = Math.max(maxChildDepth, childDepth);
+      }
+    }
+
+    return maxChildDepth;
+  }
+
+  /**
+   * Find the deepest node in the AST (for error reporting)
+   */
+  findDeepestNode(ast) {
+    let deepestNode = ast;
+    let maxDepth = 0;
+
+    const traverse = (node, depth = 0) => {
+      if (depth > maxDepth) {
+        maxDepth = depth;
+        deepestNode = node;
+      }
+
+      const children = this.getChildNodes(node);
+      for (const child of children) {
+        if (child) {
+          traverse(child, depth + 1);
+        }
+      }
+    };
+
+    traverse(ast);
+    return deepestNode;
+  }
+
+  /**
+   * Get all child nodes from an AST node
+   */
+  getChildNodes(node) {
+    if (!node || typeof node !== 'object') {
+      return [];
+    }
+
+    const children = [];
+
+    switch (node.type) {
+      case 'Program':
+        children.push(...(node.body || []));
+        break;
+      case 'FunctionDeclaration':
+      case 'VariableDeclaration':
+        if (node.body) children.push(node.body);
+        if (node.value) children.push(node.value);
+        break;
+      case 'FunctionCall':
+        if (node.callee) children.push(node.callee);
+        children.push(...(node.arguments || []));
+        break;
+      case 'BinaryExpression':
+        if (node.left) children.push(node.left);
+        if (node.right) children.push(node.right);
+        break;
+      case 'UnaryExpression':
+        if (node.operand) children.push(node.operand);
+        break;
+      case 'WhenExpression':
+        children.push(...(node.discriminants || []));
+        for (const whenCase of node.cases || []) {
+          if (whenCase.consequent) children.push(whenCase.consequent);
+        }
+        break;
+      case 'ListLiteral':
+        children.push(...(node.elements || []));
+        break;
+      case 'TableLiteral':
+        for (const prop of node.properties || []) {
+          if (prop.value) children.push(prop.value);
+        }
+        break;
+      case 'MemberExpression':
+        if (node.object) children.push(node.object);
+        if (node.property) children.push(node.property);
+        break;
+      case 'AnonymousFunction':
+        if (node.body) children.push(node.body);
+        break;
+      case 'WithHeader':
+        for (const entry of node.entries || []) {
+          if (entry.value) children.push(entry.value);
+        }
+        if (node.body) children.push(node.body);
+        break;
+      case 'ResultExpression':
+        if (node.value) children.push(node.value);
+        break;
+    }
+
+    return children;
+  }
+
+  /**
+   * Validate individual AST nodes
+   */
+  validateASTNodes(node, source) {
+    if (!node || typeof node !== 'object') {
+      return;
+    }
+
+    // Validate node has required type field
+    if (!node.type || typeof node.type !== 'string') {
+      throw new ValidationError(
+        'Invalid AST node: missing or invalid type field',
+        node.location,
+        source,
+        ['Check parser implementation', 'Ensure all nodes have a type property']
+      );
+    }
+
+    // Validate specific node types
+    switch (node.type) {
+      case 'Identifier':
+        this.validateIdentifier(node, source);
+        break;
+      case 'StringLiteral':
+        this.validateStringLiteral(node, source);
+        break;
+      case 'ListLiteral':
+        this.validateListLiteral(node, source);
+        break;
+      case 'TableLiteral':
+        this.validateTableLiteral(node, source);
+        break;
+    }
+
+    // Recursively validate child nodes
+    const children = this.getChildNodes(node);
+    for (const child of children) {
+      if (child) {
+        this.validateASTNodes(child, source);
+      }
+    }
+  }
+
+  /**
+   * Validate identifier names
+   */
+  validateIdentifier(node, source) {
+    if (!node.name || typeof node.name !== 'string') {
+      throw new ValidationError(
+        'Invalid identifier: missing name',
+        node.location,
+        source,
+        ['Check identifier declaration']
+      );
+    }
+
+    if (node.name.length > this.maxIdentifierLength) {
+      throw new ValidationError(
+        `Identifier too long: ${node.name.length} characters (max: ${this.maxIdentifierLength})`,
+        node.location,
+        source,
+        ['Use shorter variable names', 'Consider abbreviations']
+      );
+    }
+
+    // Check for reserved words that might cause issues
+    const reservedWords = ['undefined', 'null', 'NaN', 'Infinity', 'constructor', 'prototype'];
+    if (reservedWords.includes(node.name)) {
+      throw new ValidationError(
+        `Identifier "${node.name}" conflicts with JavaScript reserved word`,
+        node.location,
+        source,
+        [`Use a different name like "${node.name}_" or "my${node.name}"`]
+      );
+    }
+  }
+
+  /**
+   * Validate string literals
+   */
+  validateStringLiteral(node, source) {
+    if (typeof node.value !== 'string') {
+      throw new ValidationError(
+        'Invalid string literal: value must be a string',
+        node.location,
+        source,
+        ['Check string parsing logic']
+      );
+    }
+
+    if (node.value.length > this.maxStringLength) {
+      throw new ValidationError(
+        `String too long: ${node.value.length} characters (max: ${this.maxStringLength})`,
+        node.location,
+        source,
+        [
+          'Consider breaking large strings into smaller parts',
+          'Use external files for large text data',
+          `Increase maxStringLength to ${node.value.length + 1000}`
+        ]
+      );
+    }
+  }
+
+  /**
+   * Validate list literals
+   */
+  validateListLiteral(node, source) {
+    if (!Array.isArray(node.elements)) {
+      throw new ValidationError(
+        'Invalid list literal: elements must be an array',
+        node.location,
+        source,
+        ['Check list parsing logic']
+      );
+    }
+
+    if (node.elements.length > this.maxListLength) {
+      throw new ValidationError(
+        `List too long: ${node.elements.length} elements (max: ${this.maxListLength})`,
+        node.location,
+        source,
+        [
+          'Consider using external data files',
+          'Process data in smaller chunks',
+          `Increase maxListLength to ${node.elements.length + 1000}`
+        ]
+      );
+    }
+  }
+
+  /**
+   * Validate table literals
+   */
+  validateTableLiteral(node, source) {
+    if (!Array.isArray(node.properties)) {
+      throw new ValidationError(
+        'Invalid table literal: properties must be an array',
+        node.location,
+        source,
+        ['Check table parsing logic']
+      );
+    }
+
+    if (node.properties.length > this.maxTableSize) {
+      throw new ValidationError(
+        `Table too large: ${node.properties.length} properties (max: ${this.maxTableSize})`,
+        node.location,
+        source,
+        [
+          'Break large tables into smaller ones',
+          'Use nested structures',
+          `Increase maxTableSize to ${node.properties.length + 1000}`
+        ]
+      );
+    }
+
+    // Check for duplicate keys
+    const keys = new Set();
+    for (const prop of node.properties) {
+      if (keys.has(prop.key)) {
+        throw new ValidationError(
+          `Duplicate table key: "${prop.key}"`,
+          node.location,
+          source,
+          [`Remove duplicate key "${prop.key}"`, 'Use unique keys for table properties']
+        );
+      }
+      keys.add(prop.key);
+    }
+  }
+
+  /**
+   * Validate runtime values during execution
+   */
+  validateRuntimeValue(value, context = 'runtime') {
+    // Check for circular references in objects
+    if (typeof value === 'object' && value !== null) {
+      this.checkCircularReferences(value, new WeakSet(), context);
+    }
+
+    // Validate specific value types
+    if (Array.isArray(value)) {
+      if (value.length > this.maxListLength) {
+        throw new ValidationError(
+          `Runtime list too long: ${value.length} elements (max: ${this.maxListLength})`,
+          null,
+          '',
+          ['Process data in smaller chunks', 'Increase maxListLength']
+        );
+      }
+    }
+
+    return true;
+  }
+
+  /**
+   * Check for circular references in objects
+   */
+  checkCircularReferences(obj, visited, context) {
+    if (visited.has(obj)) {
+      throw new ValidationError(
+        `Circular reference detected in ${context}`,
+        null,
+        '',
+        [
+          'Avoid creating circular object references',
+          'Use weak references where appropriate',
+          'Check object construction logic'
+        ]
+      );
+    }
+
+    visited.add(obj);
+
+    if (typeof obj === 'object' && obj !== null) {
+      if (obj.properties instanceof Map) {
+        // Handle Baba Yaga table objects
+        for (const value of obj.properties.values()) {
+          if (typeof value === 'object' && value !== null) {
+            this.checkCircularReferences(value, visited, context);
+          }
+        }
+      } else if (Array.isArray(obj)) {
+        // Handle arrays
+        for (const item of obj) {
+          if (typeof item === 'object' && item !== null) {
+            this.checkCircularReferences(item, visited, context);
+          }
+        }
+      } else {
+        // Handle regular objects
+        for (const value of Object.values(obj)) {
+          if (typeof value === 'object' && value !== null) {
+            this.checkCircularReferences(value, visited, context);
+          }
+        }
+      }
+    }
+
+    visited.delete(obj);
+  }
+}
+
+/**
+ * Security-focused validation for untrusted input
+ */
+export class SecurityValidator extends InputValidator {
+  constructor(config = {}) {
+    super(config);
+    this.maxExecutionTime = config.maxExecutionTime ?? 30000; // 30 seconds
+    this.maxMemoryUsage = config.maxMemoryUsage ?? 100_000_000; // 100MB
+    this.allowedBuiltins = new Set(config.allowedBuiltins ?? [
+      'map', 'filter', 'reduce', 'append', 'prepend', 'concat',
+      'str.concat', 'str.split', 'str.join', 'str.length',
+      'math.abs', 'math.min', 'math.max', 'math.floor', 'math.ceil'
+    ]);
+  }
+
+  /**
+   * Additional security validation for untrusted code
+   */
+  validateUntrustedCode(source, filename = '<untrusted>') {
+    // Run basic validation first
+    this.validateSourceCode(source, filename);
+
+    // Check for potentially dangerous patterns
+    const dangerousPatterns = [
+      { pattern: /eval\s*\(/, message: 'eval() is not allowed' },
+      { pattern: /Function\s*\(/, message: 'Function constructor is not allowed' },
+      { pattern: /import\s+/, message: 'import statements are not allowed' },
+      { pattern: /require\s*\(/, message: 'require() is not allowed' },
+      { pattern: /process\s*\./, message: 'process object access is not allowed' },
+      { pattern: /global\s*\./, message: 'global object access is not allowed' },
+      { pattern: /__proto__/, message: '__proto__ access is not allowed' },
+      { pattern: /constructor\s*\./, message: 'constructor access is not allowed' }
+    ];
+
+    for (const { pattern, message } of dangerousPatterns) {
+      if (pattern.test(source)) {
+        const match = source.match(pattern);
+        const beforeMatch = source.substring(0, match.index);
+        const lines = beforeMatch.split('\n');
+        
+        throw new ValidationError(
+          message,
+          {
+            line: lines.length,
+            column: lines[lines.length - 1].length + 1,
+            length: match[0].length
+          },
+          source,
+          ['Remove unsafe code patterns', 'Use only Baba Yaga built-in functions']
+        );
+      }
+    }
+
+    return true;
+  }
+
+  /**
+   * Validate function calls against whitelist
+   */
+  validateFunctionCall(functionName, location, source) {
+    if (!this.allowedBuiltins.has(functionName)) {
+      throw new ValidationError(
+        `Function "${functionName}" is not allowed in restricted mode`,
+        location,
+        source,
+        [
+          'Use only whitelisted functions',
+          'Check security configuration',
+          `Add "${functionName}" to allowedBuiltins if safe`
+        ]
+      );
+    }
+
+    return true;
+  }
+}