about summary refs log tree commit diff stats
path: root/tree-sitter/dsk/dsk-cli/src/utils/grammar-generator.ts
diff options
context:
space:
mode:
Diffstat (limited to 'tree-sitter/dsk/dsk-cli/src/utils/grammar-generator.ts')
-rw-r--r--tree-sitter/dsk/dsk-cli/src/utils/grammar-generator.ts466
1 files changed, 466 insertions, 0 deletions
diff --git a/tree-sitter/dsk/dsk-cli/src/utils/grammar-generator.ts b/tree-sitter/dsk/dsk-cli/src/utils/grammar-generator.ts
new file mode 100644
index 0000000..1b40eff
--- /dev/null
+++ b/tree-sitter/dsk/dsk-cli/src/utils/grammar-generator.ts
@@ -0,0 +1,466 @@
+/**
+ * Grammar Generator
+ * 
+ * Converts collected user information into Tree-sitter grammar.js files
+ * with paradigm-aware rule generation
+ */
+
+import { LanguageArchitecture, LanguageFeatures, LanguageSyntax } from '../commands/new.js';
+
+export interface GrammarRule {
+  name: string;
+  definition: string;
+  comment?: string;
+}
+
+export interface GeneratedGrammar {
+  name: string;
+  rules: GrammarRule[];
+  extras: string[];
+  conflicts: string[][];
+  precedences: string[][];
+  word?: string;
+}
+
+/**
+ * Generate a complete Tree-sitter grammar from user specifications
+ */
+export function generateGrammar(
+  architecture: LanguageArchitecture,
+  features: LanguageFeatures,
+  syntax: LanguageSyntax
+): GeneratedGrammar {
+  const rules: GrammarRule[] = [];
+  const extras: string[] = [];
+  
+  // Start with the root rule - this varies by paradigm
+  rules.push(generateRootRule(architecture, features));
+  
+  // Add basic token rules
+  rules.push(...generateTokenRules(syntax));
+  
+  // Add paradigm-specific rules
+  rules.push(...generateParadigmRules(architecture, features, syntax));
+  
+  // Add data structure rules if specified
+  if (features.dataStructures.length > 0) {
+    rules.push(...generateDataStructureRules(features.dataStructures));
+  }
+  
+  // Add control flow rules if specified
+  if (features.controlFlow.length > 0) {
+    rules.push(...generateControlFlowRules(features.controlFlow, syntax));
+  }
+  
+  // Set up extras (whitespace and comments)
+  extras.push('/\\s/', `$.${getCommentRuleName(syntax.comments.pattern)}`);
+  
+  return {
+    name: architecture.name,
+    rules,
+    extras,
+    conflicts: [], // TODO: Add conflicts if needed
+    precedences: generatePrecedences(architecture, features),
+    word: 'identifier' // Most languages use identifier as word token
+  };
+}
+
+/**
+ * Generate the root rule based on language paradigm
+ */
+function generateRootRule(architecture: LanguageArchitecture, features: LanguageFeatures): GrammarRule {
+  let definition: string;
+  
+  switch (architecture.paradigm) {
+    case 'declarative':
+      definition = 'repeat(choice($.rule_declaration, $.constraint, $.fact))';
+      break;
+    case 'functional':
+      definition = 'repeat(choice($.function_definition, $.expression, $.binding))';
+      break;
+    case 'object-oriented':
+      definition = 'repeat(choice($.class_definition, $.statement, $.expression))';
+      break;
+    case 'procedural':
+    case 'mixed':
+    default:
+      definition = 'repeat(choice($.statement, $.expression, $.declaration))';
+      break;
+  }
+  
+  return {
+    name: 'source_file',
+    definition,
+    comment: `Root rule for ${architecture.paradigm} language`
+  };
+}
+
+/**
+ * Generate basic token rules (identifiers, numbers, strings, comments)
+ */
+function generateTokenRules(syntax: LanguageSyntax): GrammarRule[] {
+  const rules: GrammarRule[] = [];
+  
+  // Identifier
+  rules.push({
+    name: 'identifier',
+    definition: `/${syntax.identifiers.pattern}/`,
+    comment: `Identifiers: ${syntax.identifiers.examples.join(', ')}`
+  });
+  
+  // Numbers
+  rules.push({
+    name: 'number',
+    definition: `/${syntax.numbers.pattern}/`,
+    comment: `Numbers: ${syntax.numbers.examples.join(', ')}`
+  });
+  
+  // Strings
+  rules.push({
+    name: 'string',
+    definition: `/${syntax.strings.pattern}/`,
+    comment: `Strings: ${syntax.strings.examples.join(', ')}`
+  });
+  
+  // Comments
+  const commentRuleName = getCommentRuleName(syntax.comments.pattern);
+  rules.push({
+    name: commentRuleName,
+    definition: `/${escapeRegex(syntax.comments.pattern)}.*$/`,
+    comment: `Line comments starting with ${syntax.comments.pattern}`
+  });
+  
+  return rules;
+}
+
+/**
+ * Generate paradigm-specific rules
+ */
+function generateParadigmRules(
+  architecture: LanguageArchitecture,
+  features: LanguageFeatures,
+  syntax: LanguageSyntax
+): GrammarRule[] {
+  const rules: GrammarRule[] = [];
+  
+  // Variable declarations (common to most paradigms)
+  rules.push({
+    name: 'variable_declaration',
+    definition: `seq("${syntax.variables.keyword}", $.identifier, "${syntax.variables.operator}", $.expression, "${syntax.variables.terminator}")`,
+    comment: `Variable declarations: ${syntax.variables.example}`
+  });
+  
+  // Expression rule (fundamental to all paradigms)
+  rules.push(generateExpressionRule(architecture, features));
+  
+  // Statement rule (for imperative paradigms)
+  if (['procedural', 'object-oriented', 'mixed'].includes(architecture.paradigm)) {
+    rules.push(generateStatementRule(architecture, features));
+  }
+  
+  // Add paradigm-specific constructs
+  switch (architecture.paradigm) {
+    case 'object-oriented':
+      if (syntax.paradigmExamples.class) {
+        rules.push(generateClassRule(syntax.paradigmExamples.class));
+      }
+      break;
+    case 'functional':
+      if (syntax.paradigmExamples.function) {
+        rules.push(generateFunctionRule(syntax.paradigmExamples.function, features.functionTypes));
+      }
+      break;
+    case 'declarative':
+      if (syntax.paradigmExamples.rule) {
+        rules.push(generateDeclarativeRule(syntax.paradigmExamples.rule));
+      }
+      break;
+  }
+  
+  return rules;
+}
+
+/**
+ * Generate expression rule based on paradigm
+ */
+function generateExpressionRule(architecture: LanguageArchitecture, features: LanguageFeatures): GrammarRule {
+  const choices = [
+    '$.identifier',
+    '$.number', 
+    '$.string',
+    '$.parenthesized_expression'
+  ];
+  
+  // Add function calls if functions are supported
+  if (features.functionTypes.length > 0) {
+    choices.push('$.function_call');
+  }
+  
+  // Add data structure literals
+  if (features.dataStructures.includes('arrays')) {
+    choices.push('$.array_literal');
+  }
+  if (features.dataStructures.includes('objects')) {
+    choices.push('$.object_literal');
+  }
+  
+  // Add binary operations for most paradigms
+  if (architecture.paradigm !== 'declarative') {
+    choices.push('$.binary_expression');
+  }
+  
+  return {
+    name: 'expression',
+    definition: `choice(${choices.join(', ')})`,
+    comment: 'Expression rule covering all expression types'
+  };
+}
+
+/**
+ * Generate statement rule for imperative paradigms
+ */
+function generateStatementRule(architecture: LanguageArchitecture, features: LanguageFeatures): GrammarRule {
+  const choices = [
+    '$.variable_declaration',
+    '$.expression_statement'
+  ];
+  
+  // Add control flow statements
+  if (features.controlFlow.includes('conditionals')) {
+    choices.push('$.if_statement');
+  }
+  if (features.controlFlow.includes('loops')) {
+    choices.push('$.for_statement', '$.while_statement');
+  }
+  
+  return {
+    name: 'statement',
+    definition: `choice(${choices.join(', ')})`,
+    comment: 'Statement rule for imperative constructs'
+  };
+}
+
+/**
+ * Generate data structure rules
+ */
+function generateDataStructureRules(dataStructures: string[]): GrammarRule[] {
+  const rules: GrammarRule[] = [];
+  
+  if (dataStructures.includes('arrays')) {
+    rules.push({
+      name: 'array_literal',
+      definition: 'seq("[", optional(seq($.expression, repeat(seq(",", $.expression)))), "]")',
+      comment: 'Array literals: [1, 2, 3]'
+    });
+  }
+  
+  if (dataStructures.includes('objects')) {
+    rules.push({
+      name: 'object_literal',
+      definition: 'seq("{", optional(seq($.property, repeat(seq(",", $.property)))), "}")',
+      comment: 'Object literals: {key: value}'
+    });
+    
+    rules.push({
+      name: 'property',
+      definition: 'seq(choice($.identifier, $.string), ":", $.expression)',
+      comment: 'Object property: key: value'
+    });
+  }
+  
+  if (dataStructures.includes('tuples')) {
+    rules.push({
+      name: 'tuple_literal',
+      definition: 'seq("(", $.expression, repeat1(seq(",", $.expression)), ")")',
+      comment: 'Tuple literals: (a, b, c)'
+    });
+  }
+  
+  return rules;
+}
+
+/**
+ * Generate control flow rules
+ */
+function generateControlFlowRules(controlFlow: string[], syntax: LanguageSyntax): GrammarRule[] {
+  const rules: GrammarRule[] = [];
+  
+  if (controlFlow.includes('conditionals')) {
+    rules.push({
+      name: 'if_statement',
+      definition: 'seq("if", "(", $.expression, ")", $.block, optional(seq("else", choice($.if_statement, $.block))))',
+      comment: 'If-else statements'
+    });
+    
+    rules.push({
+      name: 'block',
+      definition: 'seq("{", repeat($.statement), "}")',
+      comment: 'Code blocks'
+    });
+  }
+  
+  if (controlFlow.includes('loops')) {
+    rules.push({
+      name: 'while_statement',
+      definition: 'seq("while", "(", $.expression, ")", $.block)',
+      comment: 'While loops'
+    });
+    
+    rules.push({
+      name: 'for_statement',
+      definition: 'seq("for", "(", optional($.statement), ";", optional($.expression), ";", optional($.expression), ")", $.block)',
+      comment: 'For loops'
+    });
+  }
+  
+  return rules;
+}
+
+/**
+ * Generate class rule from user example
+ */
+function generateClassRule(classExample: string): GrammarRule {
+  // Simple class rule - could be enhanced with more parsing
+  return {
+    name: 'class_definition',
+    definition: 'seq("class", $.identifier, "{", repeat($.method_definition), "}")',
+    comment: `Class definition based on: ${classExample}`
+  };
+}
+
+/**
+ * Generate function rule from user example
+ */
+function generateFunctionRule(functionExample: string, functionTypes: string[]): GrammarRule {
+  let definition = 'seq("function", $.identifier, "(", optional($.parameter_list), ")", $.block)';
+  
+  // Add arrow functions if supported
+  if (functionTypes.includes('anonymous')) {
+    definition = `choice(${definition}, $.arrow_function)`;
+  }
+  
+  return {
+    name: 'function_definition',
+    definition,
+    comment: `Function definition based on: ${functionExample}`
+  };
+}
+
+/**
+ * Generate declarative rule from user example
+ */
+function generateDeclarativeRule(ruleExample: string): GrammarRule {
+  return {
+    name: 'rule_declaration',
+    definition: 'seq("rule", $.identifier, optional(seq("when", $.expression)))',
+    comment: `Rule declaration based on: ${ruleExample}`
+  };
+}
+
+/**
+ * Generate precedences based on paradigm
+ */
+function generatePrecedences(architecture: LanguageArchitecture, features: LanguageFeatures): string[][] {
+  // Basic precedence for binary operations
+  const precedences = [
+    ['$.binary_expression']
+  ];
+  
+  // Add function call precedence if functions are supported
+  if (features.functionTypes.length > 0) {
+    precedences.push(['$.function_call']);
+  }
+  
+  return precedences;
+}
+
+/**
+ * Generate the complete grammar.js file content
+ */
+export function generateGrammarFile(grammar: GeneratedGrammar): string {
+  const lines: string[] = [];
+  
+  lines.push('/**');
+  lines.push(` * Grammar for ${grammar.name}`);
+  lines.push(' * Generated by DSK (DSL Development Kit)');
+  lines.push(' */');
+  lines.push('');
+  lines.push('module.exports = grammar({');
+  lines.push(`  name: '${grammar.name}',`);
+  lines.push('');
+  
+  // Add word token if specified
+  if (grammar.word) {
+    lines.push(`  word: $ => $.${grammar.word},`);
+    lines.push('');
+  }
+  
+  // Add rules
+  lines.push('  rules: {');
+  
+  grammar.rules.forEach((rule, index) => {
+    if (rule.comment) {
+      lines.push(`    // ${rule.comment}`);
+    }
+    lines.push(`    ${rule.name}: $ => ${rule.definition}${index < grammar.rules.length - 1 ? ',' : ''}`);
+    if (index < grammar.rules.length - 1) {
+      lines.push('');
+    }
+  });
+  
+  lines.push('  }');
+  
+  // Add extras
+  if (grammar.extras.length > 0) {
+    lines.push(',');
+    lines.push('');
+    lines.push('  extras: $ => [');
+    grammar.extras.forEach((extra, index) => {
+      lines.push(`    ${extra}${index < grammar.extras.length - 1 ? ',' : ''}`);
+    });
+    lines.push('  ]');
+  }
+  
+  // Add conflicts if any
+  if (grammar.conflicts.length > 0) {
+    lines.push(',');
+    lines.push('');
+    lines.push('  conflicts: $ => [');
+    grammar.conflicts.forEach((conflict, index) => {
+      lines.push(`    [${conflict.join(', ')}]${index < grammar.conflicts.length - 1 ? ',' : ''}`);
+    });
+    lines.push('  ]');
+  }
+  
+  // Add precedences if any
+  if (grammar.precedences.length > 0) {
+    lines.push(',');
+    lines.push('');
+    lines.push('  precedences: $ => [');
+    grammar.precedences.forEach((prec, index) => {
+      lines.push(`    [${prec.join(', ')}]${index < grammar.precedences.length - 1 ? ',' : ''}`);
+    });
+    lines.push('  ]');
+  }
+  
+  lines.push('});');
+  lines.push('');
+  
+  return lines.join('\n');
+}
+
+/**
+ * Helper functions
+ */
+function getCommentRuleName(commentPattern: string): string {
+  switch (commentPattern) {
+    case '//': return 'line_comment_slash';
+    case '#': return 'line_comment_hash';
+    case ';': return 'line_comment_semicolon';
+    default: return 'line_comment';
+  }
+}
+
+function escapeRegex(pattern: string): string {
+  return pattern.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+}