/** * Grammar Generator * * Converts collected user information into Tree-sitter grammar.js files * with paradigm-aware rule generation */ import { LanguageArchitecture, LanguageFeatures, LanguageSyntax } from '../commands/new.js'; export interface GrammarRule { name: string; definition: string; comment?: string; } export interface GeneratedGrammar { name: string; rules: GrammarRule[]; extras: string[]; conflicts: string[][]; precedences: string[][]; word?: string; } /** * Generate a complete Tree-sitter grammar from user specifications */ export function generateGrammar( architecture: LanguageArchitecture, features: LanguageFeatures, syntax: LanguageSyntax ): GeneratedGrammar { const rules: GrammarRule[] = []; const extras: string[] = []; // Start with the root rule - this varies by paradigm rules.push(generateRootRule(architecture, features)); // Add basic token rules rules.push(...generateTokenRules(syntax)); // Add paradigm-specific rules rules.push(...generateParadigmRules(architecture, features, syntax)); // Add data structure rules if specified if (features.dataStructures.length > 0) { rules.push(...generateDataStructureRules(features.dataStructures)); } // Add control flow rules if specified if (features.controlFlow.length > 0) { rules.push(...generateControlFlowRules(features.controlFlow, syntax)); } // Set up extras (whitespace and comments) extras.push('/\\s/', `$.${getCommentRuleName(syntax.comments.pattern)}`); return { name: architecture.name, rules, extras, conflicts: [], // TODO: Add conflicts if needed precedences: generatePrecedences(architecture, features), word: 'identifier' // Most languages use identifier as word token }; } /** * Generate the root rule based on language paradigm */ function generateRootRule(architecture: LanguageArchitecture, features: LanguageFeatures): GrammarRule { let definition: string; switch (architecture.paradigm) { case 'declarative': definition = 'repeat(choice($.rule_declaration, $.constraint, $.fact))'; break; case 'functional': definition = 'repeat(choice($.function_definition, $.expression, $.binding))'; break; case 'object-oriented': definition = 'repeat(choice($.class_definition, $.statement, $.expression))'; break; case 'procedural': case 'mixed': default: definition = 'repeat(choice($.statement, $.expression, $.declaration))'; break; } return { name: 'source_file', definition, comment: `Root rule for ${architecture.paradigm} language` }; } /** * Generate basic token rules (identifiers, numbers, strings, comments) */ function generateTokenRules(syntax: LanguageSyntax): GrammarRule[] { const rules: GrammarRule[] = []; // Identifier rules.push({ name: 'identifier', definition: `/${syntax.identifiers.pattern}/`, comment: `Identifiers: ${syntax.identifiers.examples.join(', ')}` }); // Numbers rules.push({ name: 'number', definition: `/${syntax.numbers.pattern}/`, comment: `Numbers: ${syntax.numbers.examples.join(', ')}` }); // Strings rules.push({ name: 'string', definition: `/${syntax.strings.pattern}/`, comment: `Strings: ${syntax.strings.examples.join(', ')}` }); // Comments const commentRuleName = getCommentRuleName(syntax.comments.pattern); rules.push({ name: commentRuleName, definition: `/${escapeRegex(syntax.comments.pattern)}.*$/`, comment: `Line comments starting with ${syntax.comments.pattern}` }); return rules; } /** * Generate paradigm-specific rules */ function generateParadigmRules( architecture: LanguageArchitecture, features: LanguageFeatures, syntax: LanguageSyntax ): GrammarRule[] { const rules: GrammarRule[] = []; // Variable declarations (common to most paradigms) rules.push({ name: 'variable_declaration', definition: `seq("${syntax.variables.keyword}", $.identifier, "${syntax.variables.operator}", $.expression, "${syntax.variables.terminator}")`, comment: `Variable declarations: ${syntax.variables.example}` }); // Expression rule (fundamental to all paradigms) rules.push(generateExpressionRule(architecture, features)); // Statement rule (for imperative paradigms) if (['procedural', 'object-oriented', 'mixed'].includes(architecture.paradigm)) { rules.push(generateStatementRule(architecture, features)); } // Add paradigm-specific constructs switch (architecture.paradigm) { case 'object-oriented': if (syntax.paradigmExamples.class) { rules.push(generateClassRule(syntax.paradigmExamples.class)); } break; case 'functional': if (syntax.paradigmExamples.function) { rules.push(generateFunctionRule(syntax.paradigmExamples.function, features.functionTypes)); } break; case 'declarative': if (syntax.paradigmExamples.rule) { rules.push(generateDeclarativeRule(syntax.paradigmExamples.rule)); } break; } return rules; } /** * Generate expression rule based on paradigm */ function generateExpressionRule(architecture: LanguageArchitecture, features: LanguageFeatures): GrammarRule { const choices = [ '$.identifier', '$.number', '$.string', '$.parenthesized_expression' ]; // Add function calls if functions are supported if (features.functionTypes.length > 0) { choices.push('$.function_call'); } // Add data structure literals if (features.dataStructures.includes('arrays')) { choices.push('$.array_literal'); } if (features.dataStructures.includes('objects')) { choices.push('$.object_literal'); } // Add binary operations for most paradigms if (architecture.paradigm !== 'declarative') { choices.push('$.binary_expression'); } return { name: 'expression', definition: `choice(${choices.join(', ')})`, comment: 'Expression rule covering all expression types' }; } /** * Generate statement rule for imperative paradigms */ function generateStatementRule(architecture: LanguageArchitecture, features: LanguageFeatures): GrammarRule { const choices = [ '$.variable_declaration', '$.expression_statement' ]; // Add control flow statements if (features.controlFlow.includes('conditionals')) { choices.push('$.if_statement'); } if (features.controlFlow.includes('loops')) { choices.push('$.for_statement', '$.while_statement'); } return { name: 'statement', definition: `choice(${choices.join(', ')})`, comment: 'Statement rule for imperative constructs' }; } /** * Generate data structure rules */ function generateDataStructureRules(dataStructures: string[]): GrammarRule[] { const rules: GrammarRule[] = []; if (dataStructures.includes('arrays')) { rules.push({ name: 'array_literal', definition: 'seq("[", optional(seq($.expression, repeat(seq(",", $.expression)))), "]")', comment: 'Array literals: [1, 2, 3]' }); } if (dataStructures.includes('objects')) { rules.push({ name: 'object_literal', definition: 'seq("{", optional(seq($.property, repeat(seq(",", $.property)))), "}")', comment: 'Object literals: {key: value}' }); rules.push({ name: 'property', definition: 'seq(choice($.identifier, $.string), ":", $.expression)', comment: 'Object property: key: value' }); } if (dataStructures.includes('tuples')) { rules.push({ name: 'tuple_literal', definition: 'seq("(", $.expression, repeat1(seq(",", $.expression)), ")")', comment: 'Tuple literals: (a, b, c)' }); } return rules; } /** * Generate control flow rules */ function generateControlFlowRules(controlFlow: string[], syntax: LanguageSyntax): GrammarRule[] { const rules: GrammarRule[] = []; if (controlFlow.includes('conditionals')) { rules.push({ name: 'if_statement', definition: 'seq("if", "(", $.expression, ")", $.block, optional(seq("else", choice($.if_statement, $.block))))', comment: 'If-else statements' }); rules.push({ name: 'block', definition: 'seq("{", repeat($.statement), "}")', comment: 'Code blocks' }); } if (controlFlow.includes('loops')) { rules.push({ name: 'while_statement', definition: 'seq("while", "(", $.expression, ")", $.block)', comment: 'While loops' }); rules.push({ name: 'for_statement', definition: 'seq("for", "(", optional($.statement), ";", optional($.expression), ";", optional($.expression), ")", $.block)', comment: 'For loops' }); } return rules; } /** * Generate class rule from user example */ function generateClassRule(classExample: string): GrammarRule { // Simple class rule - could be enhanced with more parsing return { name: 'class_definition', definition: 'seq("class", $.identifier, "{", repeat($.method_definition), "}")', comment: `Class definition based on: ${classExample}` }; } /** * Generate function rule from user example */ function generateFunctionRule(functionExample: string, functionTypes: string[]): GrammarRule { let definition = 'seq("function", $.identifier, "(", optional($.parameter_list), ")", $.block)'; // Add arrow functions if supported if (functionTypes.includes('anonymous')) { definition = `choice(${definition}, $.arrow_function)`; } return { name: 'function_definition', definition, comment: `Function definition based on: ${functionExample}` }; } /** * Generate declarative rule from user example */ function generateDeclarativeRule(ruleExample: string): GrammarRule { return { name: 'rule_declaration', definition: 'seq("rule", $.identifier, optional(seq("when", $.expression)))', comment: `Rule declaration based on: ${ruleExample}` }; } /** * Generate precedences based on paradigm */ function generatePrecedences(architecture: LanguageArchitecture, features: LanguageFeatures): string[][] { // Basic precedence for binary operations const precedences = [ ['$.binary_expression'] ]; // Add function call precedence if functions are supported if (features.functionTypes.length > 0) { precedences.push(['$.function_call']); } return precedences; } /** * Generate the complete grammar.js file content */ export function generateGrammarFile(grammar: GeneratedGrammar): string { const lines: string[] = []; lines.push('/**'); lines.push(` * Grammar for ${grammar.name}`); lines.push(' * Generated by DSK (DSL Development Kit)'); lines.push(' */'); lines.push(''); lines.push('module.exports = grammar({'); lines.push(` name: '${grammar.name}',`); lines.push(''); // Add word token if specified if (grammar.word) { lines.push(` word: $ => $.${grammar.word},`); lines.push(''); } // Add rules lines.push(' rules: {'); grammar.rules.forEach((rule, index) => { if (rule.comment) { lines.push(` // ${rule.comment}`); } lines.push(` ${rule.name}: $ => ${rule.definition}${index < grammar.rules.length - 1 ? ',' : ''}`); if (index < grammar.rules.length - 1) { lines.push(''); } }); lines.push(' }'); // Add extras if (grammar.extras.length > 0) { lines.push(','); lines.push(''); lines.push(' extras: $ => ['); grammar.extras.forEach((extra, index) => { lines.push(` ${extra}${index < grammar.extras.length - 1 ? ',' : ''}`); }); lines.push(' ]'); } // Add conflicts if any if (grammar.conflicts.length > 0) { lines.push(','); lines.push(''); lines.push(' conflicts: $ => ['); grammar.conflicts.forEach((conflict, index) => { lines.push(` [${conflict.join(', ')}]${index < grammar.conflicts.length - 1 ? ',' : ''}`); }); lines.push(' ]'); } // Add precedences if any if (grammar.precedences.length > 0) { lines.push(','); lines.push(''); lines.push(' precedences: $ => ['); grammar.precedences.forEach((prec, index) => { lines.push(` [${prec.join(', ')}]${index < grammar.precedences.length - 1 ? ',' : ''}`); }); lines.push(' ]'); } lines.push('});'); lines.push(''); return lines.join('\n'); } /** * Helper functions */ function getCommentRuleName(commentPattern: string): string { switch (commentPattern) { case '//': return 'line_comment_slash'; case '#': return 'line_comment_hash'; case ';': return 'line_comment_semicolon'; default: return 'line_comment'; } } function escapeRegex(pattern: string): string { return pattern.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); }