diff options
Diffstat (limited to 'tree-sitter/dsk/dsk-cli/src/utils/grammar-generator.ts')
-rw-r--r-- | tree-sitter/dsk/dsk-cli/src/utils/grammar-generator.ts | 466 |
1 files changed, 466 insertions, 0 deletions
diff --git a/tree-sitter/dsk/dsk-cli/src/utils/grammar-generator.ts b/tree-sitter/dsk/dsk-cli/src/utils/grammar-generator.ts new file mode 100644 index 0000000..1b40eff --- /dev/null +++ b/tree-sitter/dsk/dsk-cli/src/utils/grammar-generator.ts @@ -0,0 +1,466 @@ +/** + * Grammar Generator + * + * Converts collected user information into Tree-sitter grammar.js files + * with paradigm-aware rule generation + */ + +import { LanguageArchitecture, LanguageFeatures, LanguageSyntax } from '../commands/new.js'; + +export interface GrammarRule { + name: string; + definition: string; + comment?: string; +} + +export interface GeneratedGrammar { + name: string; + rules: GrammarRule[]; + extras: string[]; + conflicts: string[][]; + precedences: string[][]; + word?: string; +} + +/** + * Generate a complete Tree-sitter grammar from user specifications + */ +export function generateGrammar( + architecture: LanguageArchitecture, + features: LanguageFeatures, + syntax: LanguageSyntax +): GeneratedGrammar { + const rules: GrammarRule[] = []; + const extras: string[] = []; + + // Start with the root rule - this varies by paradigm + rules.push(generateRootRule(architecture, features)); + + // Add basic token rules + rules.push(...generateTokenRules(syntax)); + + // Add paradigm-specific rules + rules.push(...generateParadigmRules(architecture, features, syntax)); + + // Add data structure rules if specified + if (features.dataStructures.length > 0) { + rules.push(...generateDataStructureRules(features.dataStructures)); + } + + // Add control flow rules if specified + if (features.controlFlow.length > 0) { + rules.push(...generateControlFlowRules(features.controlFlow, syntax)); + } + + // Set up extras (whitespace and comments) + extras.push('/\\s/', `$.${getCommentRuleName(syntax.comments.pattern)}`); + + return { + name: architecture.name, + rules, + extras, + conflicts: [], // TODO: Add conflicts if needed + precedences: generatePrecedences(architecture, features), + word: 'identifier' // Most languages use identifier as word token + }; +} + +/** + * Generate the root rule based on language paradigm + */ +function generateRootRule(architecture: LanguageArchitecture, features: LanguageFeatures): GrammarRule { + let definition: string; + + switch (architecture.paradigm) { + case 'declarative': + definition = 'repeat(choice($.rule_declaration, $.constraint, $.fact))'; + break; + case 'functional': + definition = 'repeat(choice($.function_definition, $.expression, $.binding))'; + break; + case 'object-oriented': + definition = 'repeat(choice($.class_definition, $.statement, $.expression))'; + break; + case 'procedural': + case 'mixed': + default: + definition = 'repeat(choice($.statement, $.expression, $.declaration))'; + break; + } + + return { + name: 'source_file', + definition, + comment: `Root rule for ${architecture.paradigm} language` + }; +} + +/** + * Generate basic token rules (identifiers, numbers, strings, comments) + */ +function generateTokenRules(syntax: LanguageSyntax): GrammarRule[] { + const rules: GrammarRule[] = []; + + // Identifier + rules.push({ + name: 'identifier', + definition: `/${syntax.identifiers.pattern}/`, + comment: `Identifiers: ${syntax.identifiers.examples.join(', ')}` + }); + + // Numbers + rules.push({ + name: 'number', + definition: `/${syntax.numbers.pattern}/`, + comment: `Numbers: ${syntax.numbers.examples.join(', ')}` + }); + + // Strings + rules.push({ + name: 'string', + definition: `/${syntax.strings.pattern}/`, + comment: `Strings: ${syntax.strings.examples.join(', ')}` + }); + + // Comments + const commentRuleName = getCommentRuleName(syntax.comments.pattern); + rules.push({ + name: commentRuleName, + definition: `/${escapeRegex(syntax.comments.pattern)}.*$/`, + comment: `Line comments starting with ${syntax.comments.pattern}` + }); + + return rules; +} + +/** + * Generate paradigm-specific rules + */ +function generateParadigmRules( + architecture: LanguageArchitecture, + features: LanguageFeatures, + syntax: LanguageSyntax +): GrammarRule[] { + const rules: GrammarRule[] = []; + + // Variable declarations (common to most paradigms) + rules.push({ + name: 'variable_declaration', + definition: `seq("${syntax.variables.keyword}", $.identifier, "${syntax.variables.operator}", $.expression, "${syntax.variables.terminator}")`, + comment: `Variable declarations: ${syntax.variables.example}` + }); + + // Expression rule (fundamental to all paradigms) + rules.push(generateExpressionRule(architecture, features)); + + // Statement rule (for imperative paradigms) + if (['procedural', 'object-oriented', 'mixed'].includes(architecture.paradigm)) { + rules.push(generateStatementRule(architecture, features)); + } + + // Add paradigm-specific constructs + switch (architecture.paradigm) { + case 'object-oriented': + if (syntax.paradigmExamples.class) { + rules.push(generateClassRule(syntax.paradigmExamples.class)); + } + break; + case 'functional': + if (syntax.paradigmExamples.function) { + rules.push(generateFunctionRule(syntax.paradigmExamples.function, features.functionTypes)); + } + break; + case 'declarative': + if (syntax.paradigmExamples.rule) { + rules.push(generateDeclarativeRule(syntax.paradigmExamples.rule)); + } + break; + } + + return rules; +} + +/** + * Generate expression rule based on paradigm + */ +function generateExpressionRule(architecture: LanguageArchitecture, features: LanguageFeatures): GrammarRule { + const choices = [ + '$.identifier', + '$.number', + '$.string', + '$.parenthesized_expression' + ]; + + // Add function calls if functions are supported + if (features.functionTypes.length > 0) { + choices.push('$.function_call'); + } + + // Add data structure literals + if (features.dataStructures.includes('arrays')) { + choices.push('$.array_literal'); + } + if (features.dataStructures.includes('objects')) { + choices.push('$.object_literal'); + } + + // Add binary operations for most paradigms + if (architecture.paradigm !== 'declarative') { + choices.push('$.binary_expression'); + } + + return { + name: 'expression', + definition: `choice(${choices.join(', ')})`, + comment: 'Expression rule covering all expression types' + }; +} + +/** + * Generate statement rule for imperative paradigms + */ +function generateStatementRule(architecture: LanguageArchitecture, features: LanguageFeatures): GrammarRule { + const choices = [ + '$.variable_declaration', + '$.expression_statement' + ]; + + // Add control flow statements + if (features.controlFlow.includes('conditionals')) { + choices.push('$.if_statement'); + } + if (features.controlFlow.includes('loops')) { + choices.push('$.for_statement', '$.while_statement'); + } + + return { + name: 'statement', + definition: `choice(${choices.join(', ')})`, + comment: 'Statement rule for imperative constructs' + }; +} + +/** + * Generate data structure rules + */ +function generateDataStructureRules(dataStructures: string[]): GrammarRule[] { + const rules: GrammarRule[] = []; + + if (dataStructures.includes('arrays')) { + rules.push({ + name: 'array_literal', + definition: 'seq("[", optional(seq($.expression, repeat(seq(",", $.expression)))), "]")', + comment: 'Array literals: [1, 2, 3]' + }); + } + + if (dataStructures.includes('objects')) { + rules.push({ + name: 'object_literal', + definition: 'seq("{", optional(seq($.property, repeat(seq(",", $.property)))), "}")', + comment: 'Object literals: {key: value}' + }); + + rules.push({ + name: 'property', + definition: 'seq(choice($.identifier, $.string), ":", $.expression)', + comment: 'Object property: key: value' + }); + } + + if (dataStructures.includes('tuples')) { + rules.push({ + name: 'tuple_literal', + definition: 'seq("(", $.expression, repeat1(seq(",", $.expression)), ")")', + comment: 'Tuple literals: (a, b, c)' + }); + } + + return rules; +} + +/** + * Generate control flow rules + */ +function generateControlFlowRules(controlFlow: string[], syntax: LanguageSyntax): GrammarRule[] { + const rules: GrammarRule[] = []; + + if (controlFlow.includes('conditionals')) { + rules.push({ + name: 'if_statement', + definition: 'seq("if", "(", $.expression, ")", $.block, optional(seq("else", choice($.if_statement, $.block))))', + comment: 'If-else statements' + }); + + rules.push({ + name: 'block', + definition: 'seq("{", repeat($.statement), "}")', + comment: 'Code blocks' + }); + } + + if (controlFlow.includes('loops')) { + rules.push({ + name: 'while_statement', + definition: 'seq("while", "(", $.expression, ")", $.block)', + comment: 'While loops' + }); + + rules.push({ + name: 'for_statement', + definition: 'seq("for", "(", optional($.statement), ";", optional($.expression), ";", optional($.expression), ")", $.block)', + comment: 'For loops' + }); + } + + return rules; +} + +/** + * Generate class rule from user example + */ +function generateClassRule(classExample: string): GrammarRule { + // Simple class rule - could be enhanced with more parsing + return { + name: 'class_definition', + definition: 'seq("class", $.identifier, "{", repeat($.method_definition), "}")', + comment: `Class definition based on: ${classExample}` + }; +} + +/** + * Generate function rule from user example + */ +function generateFunctionRule(functionExample: string, functionTypes: string[]): GrammarRule { + let definition = 'seq("function", $.identifier, "(", optional($.parameter_list), ")", $.block)'; + + // Add arrow functions if supported + if (functionTypes.includes('anonymous')) { + definition = `choice(${definition}, $.arrow_function)`; + } + + return { + name: 'function_definition', + definition, + comment: `Function definition based on: ${functionExample}` + }; +} + +/** + * Generate declarative rule from user example + */ +function generateDeclarativeRule(ruleExample: string): GrammarRule { + return { + name: 'rule_declaration', + definition: 'seq("rule", $.identifier, optional(seq("when", $.expression)))', + comment: `Rule declaration based on: ${ruleExample}` + }; +} + +/** + * Generate precedences based on paradigm + */ +function generatePrecedences(architecture: LanguageArchitecture, features: LanguageFeatures): string[][] { + // Basic precedence for binary operations + const precedences = [ + ['$.binary_expression'] + ]; + + // Add function call precedence if functions are supported + if (features.functionTypes.length > 0) { + precedences.push(['$.function_call']); + } + + return precedences; +} + +/** + * Generate the complete grammar.js file content + */ +export function generateGrammarFile(grammar: GeneratedGrammar): string { + const lines: string[] = []; + + lines.push('/**'); + lines.push(` * Grammar for ${grammar.name}`); + lines.push(' * Generated by DSK (DSL Development Kit)'); + lines.push(' */'); + lines.push(''); + lines.push('module.exports = grammar({'); + lines.push(` name: '${grammar.name}',`); + lines.push(''); + + // Add word token if specified + if (grammar.word) { + lines.push(` word: $ => $.${grammar.word},`); + lines.push(''); + } + + // Add rules + lines.push(' rules: {'); + + grammar.rules.forEach((rule, index) => { + if (rule.comment) { + lines.push(` // ${rule.comment}`); + } + lines.push(` ${rule.name}: $ => ${rule.definition}${index < grammar.rules.length - 1 ? ',' : ''}`); + if (index < grammar.rules.length - 1) { + lines.push(''); + } + }); + + lines.push(' }'); + + // Add extras + if (grammar.extras.length > 0) { + lines.push(','); + lines.push(''); + lines.push(' extras: $ => ['); + grammar.extras.forEach((extra, index) => { + lines.push(` ${extra}${index < grammar.extras.length - 1 ? ',' : ''}`); + }); + lines.push(' ]'); + } + + // Add conflicts if any + if (grammar.conflicts.length > 0) { + lines.push(','); + lines.push(''); + lines.push(' conflicts: $ => ['); + grammar.conflicts.forEach((conflict, index) => { + lines.push(` [${conflict.join(', ')}]${index < grammar.conflicts.length - 1 ? ',' : ''}`); + }); + lines.push(' ]'); + } + + // Add precedences if any + if (grammar.precedences.length > 0) { + lines.push(','); + lines.push(''); + lines.push(' precedences: $ => ['); + grammar.precedences.forEach((prec, index) => { + lines.push(` [${prec.join(', ')}]${index < grammar.precedences.length - 1 ? ',' : ''}`); + }); + lines.push(' ]'); + } + + lines.push('});'); + lines.push(''); + + return lines.join('\n'); +} + +/** + * Helper functions + */ +function getCommentRuleName(commentPattern: string): string { + switch (commentPattern) { + case '//': return 'line_comment_slash'; + case '#': return 'line_comment_hash'; + case ';': return 'line_comment_semicolon'; + default: return 'line_comment'; + } +} + +function escapeRegex(pattern: string): string { + return pattern.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} |