diff options
Diffstat (limited to 'js/baba-yaga/experimental')
-rw-r--r-- | js/baba-yaga/experimental/COMPILER.md | 270 | ||||
-rw-r--r-- | js/baba-yaga/experimental/availability.baba | 108 | ||||
-rw-r--r-- | js/baba-yaga/experimental/compiler.js | 728 | ||||
-rw-r--r-- | js/baba-yaga/experimental/fmt/fmt-README.md | 338 | ||||
-rw-r--r-- | js/baba-yaga/experimental/fmt/fmt.js | 700 | ||||
-rw-r--r-- | js/baba-yaga/experimental/parity.js | 137 |
6 files changed, 2281 insertions, 0 deletions
diff --git a/js/baba-yaga/experimental/COMPILER.md b/js/baba-yaga/experimental/COMPILER.md new file mode 100644 index 0000000..d451334 --- /dev/null +++ b/js/baba-yaga/experimental/COMPILER.md @@ -0,0 +1,270 @@ +## Baba Yaga → JavaScript Compiler (Combinator-Based) + +This document describes a minimal, practical compiler from Baba Yaga to JavaScript using abstraction elimination into combinators. It reuses the existing front-end (`lexer.js`, `parser.js`) and targets a small JS runtime that implements SKI-family combinators and primitives consistent with `interpreter.js` semantics. The working implementation now lives under `experimental/`. + +### Goals +- Compile any program accepted by the current `parser.js` and executed by `interpreter.js`. +- Preserve semantics: numbers with `{ value, isFloat }`, pure and immutable data, partial application, namespacing, and “when” pattern matching. +- Keep changes additive: new `experimental/compiler.js` and a small runtime; no required changes to lexer/parser/interpreter. +- Output readable, runnable JS with a tiny explicit runtime. + +### Non-Goals (initially) +- Advanced type-directed optimizations or codegen from declared types. +- Mutual recursion via pure combinator fixpoints (we’ll use two-phase `let`+assignment for simplicity). +- Aggressive size-optimizing bracket abstraction (we can add B/C/η later). + +## High-Level Architecture +1. Front-end: reuse `createLexer` and `createParser` to produce AST. +2. Core normalization pass (implemented in `experimental/compiler.js`): + - Desugar to a small core language (λ, application, identifiers, literals, primitives, data). + - Convert infix operators and keywords into primitive function calls. + - Normalize multi-arg functions to nested single-arg lambdas. + - Lower member access to `get key obj` form. + - Lower `when` to a chain of guarded branches with thunks. +3. Abstraction elimination (bracket abstraction) or closure-based codegen (mode-dependent) in `experimental/compiler.js`: remove λs into SKI-family combinators, or emit JS closures directly when selected. +4. Emission: generate JS module text that includes a tiny runtime and the compiled program. + +## Source Language Surface (from parser/interpreter) +- Program-level: `TypeDeclaration`, `VariableDeclaration`, `FunctionDeclaration`, expressions. +- Expressions: numbers, strings, booleans, identifiers, anonymous functions, application, unary/binary ops, lists, tables, member access, `ResultExpression` (Ok/Err), `WhenExpression` (patterns: type, result, list, table, wildcard), curried and typed functions. +- Namespaces: `io`, `str`, `math`, list/table functions (append, map, reduce, etc.) via native functions. + +## Core IR (post-desugar) +Use a minimal lambda calculus with primitives and data literals: +- Term := `Var(name)` | `Lam(param, term)` | `App(term, term)` | `Const(name)` | `Lit(<number|string|boolean|list|table|result>)`. +- No infix ops; all are `Const` applied to args. +- Member access lowered to `App(App(Const('get'), key), obj)`. +- `when` lowered to nested `cond` forms with thunks. + +### Desugaring Rules (selected) +- Multi-arg function `f: (x: T, y: U) -> R -> body` → `Lam(x, Lam(y, body))` with type metadata discarded at runtime. +- Untyped function `x y -> body` → `Lam(x, Lam(y, body))`. +- Anonymous `(x y -> e)` → `Lam(x, Lam(y, e))`. +- Unary minus: `-e` → `App(Const('neg'), e)`. +- Binary `e1 op e2` → `App(App(Const(opName), e1), e2)` where `opName ∈ { add, sub, mul, div, mod, eq, neq, gt, lt, gte, lte, and, or, xor, concatDot }`. +- Member `obj.prop` or `obj.0` → `App(App(Const('get'), keyLit), obj)`. +- `Ok e` → `App(Const('Ok'), e)`; `Err e` → `App(Const('Err'), e)`. +- Lists `[e1, ..., en]` → `Lit([...])` (JS arrays); tables `{ k: v, ... }` → `Lit({ ... })` or `Const('table')(...)` builder (see Runtime). +- `when`: + - `when d1 ... dn is p1 ... pn then c1; ...; _ then cK` → nested `cond` calls: + - `cond (match([p1..pn], [d1..dn])) (thunk(() => c1)) (thunk(() => cond (...)))`. + - Pattern matchers compile to booleans with helpers and do not bind new variables except `ResultPattern` which binds its value. + +## Bracket Abstraction +Eliminate lambdas into SKI-family combinators over the core term. Treat any `Var` not equal to the bound variable as free (terminal). Use the standard rules: +- [x]x → I +- [x]M where x ∉ FV(M) → K M +- [x](M N) → S ([x]M) ([x]N) + +Notes: +- Multi-arg λ: `[x][y]M` via nesting. +- We can later add optimizations: η-reduction, B/C introduction, constant folding of K-consts, etc. + +## Emission Strategy +Output a single JS module string with: +1. Runtime prelude (combinators, curry helpers, primitives, matchers, namespaces). +2. Top-level declarations: + - Functions: declare `let f;` for each function name first (supports mutual recursion), then `f = <compiled expression>;`. + - Variables: `const x = <compiled expression>;`. +3. Exports and format: by default emit UMD for maximal portability (Node/CommonJS, Bun, browsers). Also support format selection via CLI. + +### Module formats, exports, and codegen modes +- Default: UMD wrapper that exposes named exports under `module.exports` (CJS), `define` (AMD when available), and global `BabaYaga` namespace in browsers. +- Optional formats: `--format esm|cjs|umd` to force a specific target. ESM emits `export` statements; CJS uses `module.exports = { ... }`. + - Codegen mode: `--mode ski|closure|hybrid` (default: `ski`). + - `ski`: full combinatorization after normalization. + - `closure`: emit JS closures with currying helpers (easiest parity path, useful for debugging and when SKI would bloat output). + - `hybrid`: use closures for complex constructs (e.g., deeply nested `when`, heavy pattern matching) and SKI for simple lambdas; implementation-defined heuristics. + +### Entry points and exports +- Compiled module should export named bindings for all top-level declarations. +- Additionally, export a single entry for executing a program: + - `run(host)` preferred: evaluates the compiled `Program` under the provided `host.io` and returns the last expression value. + - Fallback: `main(host)` if a user-defined `main` exists; parity harness will try `run` then `main`. +- Host wiring: prefer a lightweight `run(host)` that creates the runtime namespaces based on `host`. Avoid global side-effects. + +### Recursion +- Self-recursion: works via two-phase assignment (`let f; f = ...`), because references resolve at call time. +- Mutual recursion: also works via predeclared `let` for all functions then assignments. +- Optional: pure combinator fixpoint `Z` for self-recursion; not required initially. + +## Runtime Design (tiny and explicit) + +### Combinators and Helpers +- `I(a) = a` +- `K(a)(b) = a` +- `S(f)(g)(x) = f(x)(g(x))` +- Optional: `B(f)(g)(x) = f(g(x))`, `C(f)(x)(y) = f(y)(x)`, `Z(f) = (g => g(g))(g => f(x => g(g)(x)))` for CBV. +- `curryN(n, f)` and arity-specific `curry2`, `curry3` for primitives. +- `thunk(f)` returns `f` (we pass thunks to `cond`). +- `cond(p)(t)(e) = p ? t() : e()`. + +### Numeric and Data Representation +- Numbers are `{ value: number, isFloat: boolean }` to match interpreter. +- Lists as JS arrays; tables as `{ type: 'Object', properties: Map }` built via a small `table(obj)` helper that constructs a `Map`. Access is always via `get(key)(obj)` in compiled code. We will NOT use a Proxy in compiled output; this keeps runtime simple and predictable while preserving interpreter semantics for member/index access. + +### Runtime namespace and symbol hygiene +- Prefix all runtime symbols with `__rt` (e.g., `__rt.I`, `__rt.add`, `__rt.get`) to avoid collisions with user-defined names. +- User symbols (top-level functions/vars) are emitted in their original names and exported by the module wrapper. + +### Primitives (curried) +- Arithmetic: `add, sub, mul, div, mod, neg` (unwrap/rewrap numbers; div guards divide-by-zero like interpreter). +- Compare: `eq, neq, gt, lt, gte, lte` (numeric unwrapping + generic string/boolean). +- Logic: `and, or, xor` (booleanize inputs). +- String: `concatDot` for `..` (String(lhs) + String(rhs)). +- Result: `Ok, Err` → `{ type: 'Result', variant: 'Ok'|'Err', value }`. +- Core ops: `get`, and helpers for lists/tables (append, prepend, concat, update, removeAt, slice, set, remove, merge, keys, values) mirroring interpreter. In compiled code, `get` returns `undefined` for missing keys/indices (including out-of-bounds array access) to match interpreter behavior. +- Namespaces: `io`, `str`, `math` exposing same functions/signatures; `io` is parameterized by a passed-in host object for integration. +- Matchers: `isType(expected, value)`, `matchList(pattern, value)`, `matchTable(pattern, value)`, `isOk`, `isErr`. + +### Pattern Matching Lowering +- Each `when` case compiles to `cond(guard)(() => consequent)(() => next)`, ensuring short-circuiting and correct evaluation order. +- Compound guards (e.g., multi-element list/table checks) are lazily folded using nested `cond` so that later predicates are not evaluated if earlier ones fail. This avoids unintended member/index access and preserves semantics. +- Guards are pure booleans; consequents compiled normally; for `ResultPattern Ok x`, the consequent becomes a λ bound to the inner value and is applied on the success branch. +- Switch micro-optimization: when there is exactly one discriminant and all patterns are literals with no bindings or type/list/table patterns, the compiler may emit a JavaScript `switch` on a normalized key (numbers unwrap to `.value`) for readability/perf. All other cases use the `cond`/if-else chain. + +## Mapping from AST → IR → JS (node-by-node) + +- Program: collect top-level function names; emit `let` declarations; then emit assignments/consts in order. Optionally return last expression for REPL mode. +- TypeDeclaration: ignored at runtime; optionally preserved as comments. +- VariableDeclaration: `const name = compile(expr)`. +- FunctionDeclaration/AnonymousFunction: normalize to nested `Lam`, then bracket abstraction, then emit the combinator tree expression. +- FunctionCall: normal application; arity handled at runtime (curried). +- NumberLiteral: `{ value: n, isFloat }`. String/Boolean: JS literals. +- UnaryExpression `-x`: `neg x`. BinaryExpression: map to primitive names. +- MemberExpression: `get key obj` (curried). +- ListLiteral/TableLiteral: JS arrays + `table` builder (to get `{type:'Object',properties:Map}`) where needed. +- ResultExpression: `Ok v`/`Err v`. +- WhenExpression: compiled to `cond` chain with matchers. + +## Example (sketch) + +Source: +```baba +add: (x: Number, y: Number) -> Number -> x + y; +main: -> add 2 3; +``` + +IR (informal): +```text +add = Lam(x, Lam(y, App(App(Const('add'), Var(x)), Var(y)))) +main = Lam(_unit, App(App(Var(add), Lit({2})), Lit({3}))) +``` + +After abstraction elimination (SKI form, schematic): +```text +add = S (K (S (K add) I)) (K (S (K ...) ...)) // produced by the standard rules +``` + +Emitted JS (abridged): +```js +// runtime: S,K,I,curry2, add, ... +let add; +add = /* SKI of λx.λy. add x y */; +const main = /* SKI of λ_. add 2 3 */; +export { add, main }; +``` + +## Evaluation Strategy +- We generate eager JS; to preserve lazy branches in `when`, consequents are wrapped in thunks and forced via `cond`. +- Curried primitives plus normal JS evaluation replicates call-by-value with partial application like the interpreter. + +## Testing Strategy +- Golden tests: compile and run existing `tests/*.test.js`-backed programs using both the interpreter and compiled output; assert observational equivalence of results/side-effects. +- Parity harness: + - For each sample program (or inline snippet), run: `interpret(ast)` and `execute(compiledJS)` with same `host`. + - Compare returned value shapes; for numbers compare `.value` and `isFloat`; for lists/tables structural equality; for `Result` `variant` and inner value. + - Capture `io.out` writes from both executions and compare sequences. +- CI entry: add a job that compiles a subset of fixtures and runs them; failures show diff of values and `io.out`. +- Edge cases: negative numbers vs unary minus; division by zero; list/table bounds; member access; nested `when` with short-circuiting; partial application and closures; recursion and mutual recursion; Ok/Err binding; boolean/logical ops precedence. + +### Parity harness usage +- A simple harness exists in `parity.js`: + - Runs interpreter and compiled module, normalizes results to plain JS via a deep-plain conversion, and compares `io.out` arrays. + - It currently imports the compiled module via a data URL and looks for `run(host)` or `main([host])` exports. + - Default compiler mode for parity is `closure` to reduce bring-up friction; once SKI is ready, switch to `--mode ski`. + +Examples: +```sh +node parity.js ../example.baba +``` +or via package script: +```sh +bun run parity +``` + +## Status and Next Steps + +What’s working now (closure mode MVP): +- Normalization covers literals, identifiers, anonymous and declared functions (curried), calls, unary minus, binary ops (+ - * / % .. and compare/logic), member access via `get`, `Ok`/`Err`, list/table literals, and a basic `when` to `Cond` lowering with thunks. +- Emission produces an ESM module exporting `run(host)`, supporting recursion via let-first then assignment. +- Runtime prelude implements: numbers with `{ value, isFloat }`, curry helpers, arithmetic/compare/logic, `concatDot`, `Ok/Err`, `get`, `cond`, `io` wiring, `table` builder, list HOFs (map/filter/reduce), and core `str.*` helpers. +- Parity harness loads ESM via data URL and can compare interpreter vs compiled; simple programs pass. + +Actionable next steps (ordered): +1) Runtime parity with interpreter +- Implement missing list/table primitives: `append`, `prepend`, `concat`, `update`, `removeAt`, `slice`, `set`, `remove`, `merge`, `keys`, `values` with identical semantics (indices unwrap numbers; maps remain `Map`). +- Implement `math.*` namespace mirroring `interpreter.js` (abs, sign, floor, ceil, round, trunc, min, max, clamp, pow, sqrt, exp, log, sin, cos, tan, asin, acos, atan, atan2, deg, rad, random, randomInt). +- Consider `io.emit`/`io.listen` to support host event wiring if compiled code uses them. + +2) When/pattern matching completeness +- Extend lowering to fully support: multi-discriminant patterns, `TypePattern`, `ListPattern`, `TablePattern`, and `ResultPattern` with bindings. Ensure every branch is thunked for short-circuit evaluation. Add switch micro-optimization for single discriminant with literal-only patterns (optional). + +3) Emission and module formats +- Add named exports for top-level declarations (in addition to `run(host)`). +- Implement `wrapModule` for `umd` and `cjs` formats. Keep `esm` as the default for parity. +- Add `--pretty` and `--sourcemap inline|file` support per the spec. + +4) Codegen modes +- Implement SKI mode with bracket abstraction (`I/K/S` at minimum) and corresponding runtime. +- Implement hybrid heuristics: closures for complex `when`/deep terms, SKI for simple lambdas. + +5) CLI and DX +- Integrate flags into `runner.js` or provide a `bin/compile` entry. Support `--runtime external:path` to reference an external runtime. + +6) Testing and CI +- Run fixture programs under both interpreter and compiler; compare values and `io.out` via parity harness. +- Add CI job to compile and run a representative subset; show diffs on failure. + +Notes/pitfalls to keep in mind +- Numbers must preserve `{ value, isFloat }` across all operations and boundaries. +- `get` must support arrays (numeric index) and tables (Map by key). Member access errors should match interpreter behavior. +- `when` consequents must not be evaluated eagerly; use thunks and `__rt.cond`. +- Keep runtime symbol hygiene: all helpers live under `__rt.*` to avoid user name collisions. + +## CLI +- Extend `runner.js` (or add `bin/compile`) with: + - `--compile <input.baba>`: compile input file + - `-o, --out <out.js>`: output file path + - `--format <esm|cjs|umd>`: module format (default: `umd`) + - `--mode <ski|closure|hybrid>`: code generation mode (default: `ski`) + - `--runtime <inline|external:path>`: inline runtime by default; or reference external runtime + - `--sourcemap <inline|file|none>`: source map emission (default: `none` initially) + - `--pretty`: pretty-print emitted JS (default off) + +Example: +```sh +bun run runner.js --compile src/main.baba -o dist/main.js --format umd --sourcemap file +``` + +## Source maps (planned) +- Nice-to-have; implement after parity. Approach: + - Thread source locations from `parser.js` into the core IR nodes. + - Generate mappings at IR application/literal/const boundaries rather than raw SKI nodes. + - Support `--sourcemap inline|file` to control emission. + - Mode-aware mapping: in `closure` mode, map directly at emitted closure expressions; in `ski`/`hybrid` modes, map at IR boundaries pre-elimination to keep maps stable. + +## Open Questions / TODO +- TODO: Confirm semantics for pattern matching on nested member expressions inside patterns (limited in current parser). +- TODO: Implement source maps per the plan once parity is achieved. + +## New files and changes +- Added `experimental/compiler.js`: scaffold with `compile`, normalization and codegen stubs, emission and runtime prelude stubs, and a minimal developer CLI. +- Added `experimental/parity.js`: parity harness described above. +- Updated `package.json` scripts with `parity` command. +- `runtime.js`: optional future extraction if we decide not to inline the prelude. + +## Minimalism and Parity +This approach is intentionally minimal: no changes to existing front-end/runtime semantics, small targeted runtime, and direct syntactic lowering. The initial goal is parity with `interpreter.js`; once achieved, we can iterate on size and performance optimizations. + + diff --git a/js/baba-yaga/experimental/availability.baba b/js/baba-yaga/experimental/availability.baba new file mode 100644 index 0000000..4bc59f8 --- /dev/null +++ b/js/baba-yaga/experimental/availability.baba @@ -0,0 +1,108 @@ +// Availability via 24-block boolean mask +// WIP + +// range0: [0..n-1] +range0 : n -> + when n is + 0 then [] + _ then append (range0 (n - 1)) (n - 1); + +// overlaps: does busy interval b overlap block i? +overlaps : b i dayStart blockSize -> + (b.start < (dayStart + (i * blockSize) + blockSize)) and (b.end > (dayStart + (i * blockSize))); + +// anyBusyOnBlock: reduce OR over busy intervals for block i +anyBusyOnBlock : busyList i dayStart blockSize -> + reduce (acc b -> acc or (overlaps b i dayStart blockSize)) false busyList; + +// allFreeOnBlock: everyone free on block i? +allFreeOnBlock : busyLists i dayStart blockSize -> + reduce (acc bl -> acc and (when (anyBusyOnBlock bl i dayStart blockSize) is true then false _ then true)) true busyLists; + +// stepRuns: accumulate contiguous free runs over blocks with min-block filtering +stepRuns : busyLists dayStart blockSize minBlocks acc i -> + when acc.inRun is + true then + when (allFreeOnBlock busyLists i dayStart blockSize) is + true then { inRun: true, start: acc.start, runs: acc.runs } + _ then ( + when ((i - acc.start) >= minBlocks) is + true then { inRun: false, start: 0, runs: append acc.runs { start: acc.start, end: i } } + _ then { inRun: false, start: 0, runs: acc.runs } + ) + _ then + when (allFreeOnBlock busyLists i dayStart blockSize) is + true then { inRun: true, start: i, runs: acc.runs } + _ then acc; + +// finalizeRuns: close trailing run if needed (with min-block filtering) +finalizeRuns : acc totalBlocks minBlocks -> + when acc.inRun is + true then ( + when ((totalBlocks - acc.start) >= minBlocks) is + true then append acc.runs { start: acc.start, end: totalBlocks } + _ then acc.runs + ) + _ then acc.runs; + +// convertRunsToMinutes: map block runs to minute intervals (shape-guarded) +convertRunsToMinutes : runs dayStart blockSize -> + when (shape runs).kind is + "List" then map (r -> { start: dayStart + (r.start * blockSize), end: dayStart + (r.end * blockSize) }) runs + _ then []; + +// takeLimit: slice helper +takeLimit : limit lst -> slice lst 0 (math.min (length lst) limit); + +// findCommonAvailability using mask approach +// runnerFor: folder factory capturing inputs, returns (acc i -> ...) +runnerFor : calendars dayStart dayEnd minMinutes acc i -> + stepRuns (values calendars) dayStart ((dayEnd - dayStart) / 24) (math.ceil (minMinutes / ((dayEnd - dayStart) / 24))) acc i; + +// buildRuns: produce runs list from inputs +buildRuns : calendars dayStart dayEnd minMinutes -> + finalizeRuns ( + reduce (runnerFor calendars dayStart dayEnd minMinutes) + { inRun: false, start: 0, runs: [] } + (range0 24) + ) 24 (math.ceil (minMinutes / ((dayEnd - dayStart) / 24))); + +// slotsFor: convert runs to minute intervals +slotsFor : calendars dayStart dayEnd minMinutes -> + convertRunsToMinutes (buildRuns calendars dayStart dayEnd minMinutes) dayStart ((dayEnd - dayStart) / 24); + +// findCommonAvailability: top-level pipeline +findCommonAvailability : calendars dayStart dayEnd minMinutes limit -> + takeLimit limit (slotsFor calendars dayStart dayEnd minMinutes); + +// ---------- Example usage ---------- + +// Working window 09:00-17:00 +dayStart : 9 * 60; // 540 +dayEnd : 17 * 60; // 1020 +minSlot : 30; // minutes +limit : 5; + +// Calendars: each value is a sorted list of busy intervals { start, end } in minutes +calendars : { + alice: [ + { start: 570, end: 630 } + { start: 720, end: 780 } + { start: 960, end: 1020 } + ], + bob: [ + { start: 540, end: 555 } + { start: 660, end: 720 } + { start: 840, end: 870 } + ], + carol: [ + { start: 600, end: 660 } + { start: 750, end: 810 } + { start: 915, end: 960 } + ] +}; + +io.out "loaded"; +available : findCommonAvailability calendars dayStart dayEnd minSlot limit; +io.out "done"; +io.out available; diff --git a/js/baba-yaga/experimental/compiler.js b/js/baba-yaga/experimental/compiler.js new file mode 100644 index 0000000..d5c70ce --- /dev/null +++ b/js/baba-yaga/experimental/compiler.js @@ -0,0 +1,728 @@ +// compiler.js +// +// High-level compiler scaffold for Baba Yaga → JavaScript according to COMPILER.md. +// This file intentionally contains detailed structure and JSDoc/TODOs so that +// an engineer can implement the compiler incrementally with minimal ambiguity. +// +// Overview of the pipeline implemented here (stubs): +// - lex + parse → AST (using existing lexer.js, parser.js) +// - normalize → Core IR (operators→primitives, currying, member→get, when→cond) +// - codegen mode: +// - ski: bracket abstraction to SKI combinators and emit with runtime +// - closure: direct JS closures with currying helpers +// - hybrid: heuristics to mix both +// - emit module (UMD default, ESM/CJS available) +// - optional CLI entry (use runner.js for project CLI; this is dev-friendly only) + +// NOTE: This file is deprecated; the active compiler lives at experimental/compiler/compiler.js +// It re-exports from the new location to preserve existing imports. + +import { createLexer } from '../lexer.js'; +import { createParser } from '../parser.js'; +export { compile, DEFAULT_COMPILE_OPTIONS, normalizeAstToIr, lowerIrToCodeIr, applyBracketAbstraction, applyHybridLowering, emitModule, emitProgramBody, generateRuntimePrelude, wrapModule } from './compiler.js'; + +/** + * Compiler options with sensible defaults. Extend as needed. + * @typedef {Object} CompileOptions + * @property {'ski'|'closure'|'hybrid'} mode - Codegen mode. Default 'ski'. + * @property {'umd'|'esm'|'cjs'} format - Module format. Default 'umd'. + * @property {'inline'|{ externalPath: string }} runtime - Runtime placement. + * @property {'none'|'inline'|'file'} sourcemap - Source map emission. + * @property {boolean} pretty - Pretty-print output. + * @property {string} moduleName - UMD global name for browser builds. + */ + +/** @type {CompileOptions} */ +export const DEFAULT_COMPILE_OPTIONS = { + mode: 'ski', + format: 'umd', + runtime: 'inline', + sourcemap: 'none', + pretty: false, + moduleName: 'BabaYaga', +}; + +/** + * Compile Baba Yaga source text to JavaScript. + * Orchestrates: lex → parse → normalize → codegen → emit. + * + * TODO: implement each stage; keep interfaces stable per COMPILER.md. + * + * @param {string} source - Baba Yaga program text + * @param {Partial<CompileOptions>} [options] - compiler options + * @returns {{ code: string, map?: string, diagnostics: Array<{kind:string,message:string,span?:any}> }} + */ +export function compile(source, options = {}) { + const opts = { ...DEFAULT_COMPILE_OPTIONS, ...options }; + + // 1) Front-end: lex + parse + const lexer = createLexer(source); + const tokens = lexer.allTokens(); + const parser = createParser(tokens); + const ast = parser.parse(); + + // 2) Normalization: AST → Core IR + const irProgram = normalizeAstToIr(ast, { mode: opts.mode }); + + // 3) Codegen: IR → Code IR (SKI tree or closure terms) + const codeIr = lowerIrToCodeIr(irProgram, { mode: opts.mode }); + + // 4) Emit: JS text (+ runtime prelude), UMD/ESM/CJS + const { code, map } = emitModule(codeIr, { + format: opts.format, + mode: opts.mode, + runtime: opts.runtime, + sourcemap: opts.sourcemap, + pretty: opts.pretty, + moduleName: opts.moduleName, + }); + + return { code, map, diagnostics: [] }; +} + +// ============================= +// IR definitions (documentation) +// ============================= + +/** + * Core IR after normalization (see COMPILER.md → Core IR). + * We represent terms as plain JS objects with a minimal tag. + * + * Term kinds: + * - Var: { kind:'Var', name:string } + * - Lam: { kind:'Lam', param:string, body:Term } + * - App: { kind:'App', func:Term, arg:Term } + * - Const: { kind:'Const', name:string } // runtime primitive or top-level symbol + * - Lit: { kind:'Lit', value:any } // numbers are {value,isFloat}, lists arrays, tables Map-wrapped objects + * - Cond: { kind:'Cond', predicate:Term, ifTrue:Term, ifFalse:Term } // used for lowered when + * + * Program: { kind:'Program', decls:Array<Decl> } + * Decl: FunctionDecl | VarDecl + * - FunctionDecl: { kind:'FunctionDecl', name:string, arity:number, body:Term } + * - VarDecl: { kind:'VarDecl', name:string, value:Term } + */ + +/** + * Normalize parsed AST into Core IR. + * - Converts infix ops to `Const` calls (e.g., add/sub/...) + * - Curries multi-arg lambdas into nested `Lam` + * - Lowers member access to `get` primitive calls + * - Lowers `when` to `Cond` with thunked branches (thunks become `Lam(_).body` applied later) + * - Converts Ok/Err, lists, tables into `Lit`/`Const` per COMPILER.md + * + * @param {any} ast - AST from parser.js + * @param {{ mode: 'ski'|'closure'|'hybrid' }} ctx + * @returns {{ kind:'Program', decls:Array<any> }} + */ +export function normalizeAstToIr(ast, ctx) { + /** + * Transform a parser AST node into Core IR Term. + */ + function lowerExpr(node) { + if (!node) return { kind: 'Lit', value: undefined }; + switch (node.type) { + case 'NumberLiteral': + return { kind: 'Lit', value: { type: 'Number', value: node.value, isFloat: !!node.isFloat } }; + case 'StringLiteral': + return { kind: 'Lit', value: { type: 'String', value: node.value } }; + case 'BooleanLiteral': + return { kind: 'Lit', value: { type: 'Boolean', value: node.value } }; + case 'Identifier': { + return { kind: 'Var', name: node.name }; + } + case 'AnonymousFunction': { + // Desugar multi-arg anonymous function to nested Lam + const params = node.params.map(p => (typeof p === 'string' ? p : p.name || p.value)); + let body = lowerExpr(node.body); + for (let i = params.length - 1; i >= 0; i--) { + body = { kind: 'Lam', param: params[i], body }; + } + return body; + } + case 'FunctionCall': { + let func = lowerExpr(node.callee); + for (const arg of node.arguments) { + func = { kind: 'App', func, arg: lowerExpr(arg) }; + } + return func; + } + case 'UnaryExpression': { + if (node.operator === '-') { + return { kind: 'App', func: { kind: 'Const', name: 'neg' }, arg: lowerExpr(node.operand) }; + } + throw new Error(`Unsupported unary operator: ${node.operator}`); + } + case 'BinaryExpression': { + const opMap = { + '+': 'add', + '-': 'sub', + '*': 'mul', + '/': 'div', + '%': 'mod', + '..': 'concatDot', + '=': 'eq', + '!=': 'neq', + '>': 'gt', + '<': 'lt', + '>=': 'gte', + '<=': 'lte', + and: 'and', + or: 'or', + xor: 'xor', + }; + const prim = opMap[node.operator]; + if (!prim) throw new Error(`Unknown operator: ${node.operator}`); + return { + kind: 'App', + func: { kind: 'App', func: { kind: 'Const', name: prim }, arg: lowerExpr(node.left) }, + arg: lowerExpr(node.right), + }; + } + case 'MemberExpression': { + // Lower to get key obj: App(App(Const('get'), key), obj) + const keyNode = node.property; + let keyLit; + if (keyNode.type === 'Identifier') { + keyLit = { kind: 'Lit', value: { type: 'String', value: keyNode.name } }; + } else if (keyNode.type === 'NumberLiteral') { + keyLit = { kind: 'Lit', value: { type: 'Number', value: keyNode.value, isFloat: !!keyNode.isFloat } }; + } else if (keyNode.type === 'StringLiteral') { + keyLit = { kind: 'Lit', value: { type: 'String', value: keyNode.value } }; + } else { + // Fallback: lower the property expression and hope it's literal-ish when emitted + keyLit = lowerExpr(keyNode); + } + const obj = lowerExpr(node.object); + return { kind: 'App', func: { kind: 'App', func: { kind: 'Const', name: 'get' }, arg: keyLit }, arg: obj }; + } + case 'ListLiteral': { + const elements = node.elements.map(lowerExpr); + return { kind: 'Lit', value: { type: 'List', elements } }; + } + case 'TableLiteral': { + const properties = node.properties.map(p => ({ key: p.key, value: lowerExpr(p.value) })); + return { kind: 'Lit', value: { type: 'Table', properties } }; + } + case 'ResultExpression': { + return { kind: 'App', func: { kind: 'Const', name: node.variant }, arg: lowerExpr(node.value) }; + } + case 'WhenExpression': { + const ds = node.discriminants.map(lowerExpr); + if (ds.length === 0) return { kind: 'Lit', value: undefined }; + // Helpers + const True = { kind: 'Lit', value: { type: 'Boolean', value: true } }; + const False = { kind: 'Lit', value: { type: 'Boolean', value: false } }; + const mkAnd = (a, b) => ({ kind: 'App', func: { kind: 'App', func: { kind: 'Const', name: 'and' }, arg: a }, arg: b }); + const mkEq = (a, b) => ({ kind: 'App', func: { kind: 'App', func: { kind: 'Const', name: 'eq' }, arg: a }, arg: b }); + const mkNum = (n) => ({ kind: 'Lit', value: { type: 'Number', value: n, isFloat: false } }); + const mkStr = (s) => ({ kind: 'Lit', value: { type: 'String', value: s } }); + const mkGet = (keyTerm, objTerm) => ({ kind: 'App', func: { kind: 'App', func: { kind: 'Const', name: 'get' }, arg: keyTerm }, arg: objTerm }); + + function buildPredicateForPattern(pat, discTerm) { + if (!pat) return False; + if (pat.type === 'WildcardPattern') return True; + if (pat.type === 'NumberLiteral' || pat.type === 'StringLiteral' || pat.type === 'BooleanLiteral') { + return mkEq(discTerm, lowerExpr(pat)); + } + if (pat.type === 'TypePattern') { + return { kind: 'App', func: { kind: 'App', func: { kind: 'Const', name: 'isType' }, arg: mkStr(pat.name) }, arg: discTerm }; + } + if (pat.type === 'ResultPattern') { + return { kind: 'App', func: { kind: 'App', func: { kind: 'Const', name: 'isResultVariant' }, arg: mkStr(pat.variant) }, arg: discTerm }; + } + if (pat.type === 'ListPattern') { + const n = pat.elements.length; + const preds = []; + preds.push(mkEq({ kind: 'App', func: { kind: 'Const', name: 'listLen' }, arg: discTerm }, mkNum(n))); + for (let j = 0; j < n; j++) { + const sub = pat.elements[j]; + if (sub.type === 'WildcardPattern') continue; + const elem = mkGet(mkNum(j), discTerm); + preds.push(mkEq(elem, lowerExpr(sub))); + } + // Fold with lazy conjunction using Cond to avoid evaluating later predicates when earlier fail + if (preds.length === 0) return True; + let folded = preds[0]; + for (let i = 1; i < preds.length; i++) { + folded = { kind: 'Cond', predicate: folded, ifTrue: preds[i], ifFalse: False }; + } + return folded; + } + if (pat.type === 'TablePattern') { + const preds = []; + preds.push({ kind: 'App', func: { kind: 'App', func: { kind: 'Const', name: 'isType' }, arg: mkStr('Table') }, arg: discTerm }); + for (const prop of pat.properties) { + const has = { kind: 'App', func: { kind: 'App', func: { kind: 'Const', name: 'has' }, arg: mkStr(prop.key) }, arg: discTerm }; + preds.push(has); + if (prop.value.type !== 'WildcardPattern') { + const eq = mkEq(mkGet(mkStr(prop.key), discTerm), lowerExpr(prop.value)); + preds.push(eq); + } + } + if (preds.length === 0) return True; + let folded = preds[0]; + for (let i = 1; i < preds.length; i++) { + folded = { kind: 'Cond', predicate: folded, ifTrue: preds[i], ifFalse: False }; + } + return folded; + } + // Fallback unsupported + return False; + } + + let chain = { kind: 'Lit', value: undefined }; + for (let i = node.cases.length - 1; i >= 0; i--) { + const c = node.cases[i]; + if (!c.patterns || c.patterns.length === 0) continue; + // Build predicate across all discriminants, folded lazily + const preds = []; + const binders = []; + for (let k = 0; k < Math.min(c.patterns.length, ds.length); k++) { + const p = c.patterns[k]; + preds.push(buildPredicateForPattern(p, ds[k])); + if (p.type === 'ResultPattern' && p.identifier && p.identifier.name) { + binders.push({ name: p.identifier.name, discIndex: k }); + } + } + let pred = preds.length ? preds[0] : True; + for (let j = 1; j < preds.length; j++) { + pred = { kind: 'Cond', predicate: pred, ifTrue: preds[j], ifFalse: False }; + } + // Build consequent, applying binders + let thenTerm = lowerExpr(c.consequent); + for (let b = binders.length - 1; b >= 0; b--) { + const valTerm = { kind: 'App', func: { kind: 'Const', name: 'resultValue' }, arg: ds[binders[b].discIndex] }; + thenTerm = { kind: 'App', func: { kind: 'Lam', param: binders[b].name, body: thenTerm }, arg: valTerm }; + } + chain = { kind: 'Cond', predicate: pred, ifTrue: thenTerm, ifFalse: chain }; + } + return chain; + } + default: + throw new Error(`normalize: unsupported AST node type ${node.type}`); + } + } + + function lowerFunctionLikeToLam(name, params, bodyNode) { + // Flatten curried/nested function bodies to a single final expression and parameter list + const flatParams = []; + const pushParam = (p) => flatParams.push(typeof p === 'string' ? p : (p && (p.name || p.value))); + params.forEach(pushParam); + + let bodyExprAst = bodyNode; + // Unwrap nested function declaration bodies + while (bodyExprAst && (bodyExprAst.type === 'FunctionDeclarationBody' || bodyExprAst.type === 'CurriedFunctionBody')) { + if (Array.isArray(bodyExprAst.params)) bodyExprAst.params.forEach(pushParam); + bodyExprAst = bodyExprAst.body; + } + let term = lowerExpr(bodyExprAst); + for (let i = flatParams.length - 1; i >= 0; i--) { + term = { kind: 'Lam', param: flatParams[i], body: term }; + } + return term; + } + + /** Build Program decls preserving order (functions declared first for recursion). */ + const funcDecls = []; + const otherDecls = []; + for (const stmt of ast.body || []) { + if (stmt.type === 'TypeDeclaration') { + continue; + } + if (stmt.type === 'FunctionDeclaration') { + const lam = lowerFunctionLikeToLam(stmt.name, stmt.params || [], stmt.body); + funcDecls.push({ kind: 'FunctionDecl', name: stmt.name, arity: (stmt.params || []).length, body: lam }); + continue; + } + if (stmt.type === 'CurriedFunctionDeclaration') { + const lam = lowerFunctionLikeToLam(stmt.name, [stmt.param], stmt.body); + funcDecls.push({ kind: 'FunctionDecl', name: stmt.name, arity: 1, body: lam }); + continue; + } + if (stmt.type === 'VariableDeclaration') { + otherDecls.push({ kind: 'VarDecl', name: stmt.name, value: lowerExpr(stmt.value) }); + continue; + } + // Expression statement + otherDecls.push({ kind: 'ExprStmt', expr: lowerExpr(stmt) }); + } + return { kind: 'Program', decls: [...funcDecls, ...otherDecls] }; +} + +/** + * Lower Core IR to Code IR depending on mode. + * - ski: apply bracket abstraction to produce SKI combinator trees + * - closure: keep `Lam` and emit closures later + * - hybrid: pick strategy per node heuristics (e.g., closure for complex Cond/when) + * + * @param {{ kind:'Program', decls:Array<any> }} irProgram + * @param {{ mode: 'ski'|'closure'|'hybrid' }} ctx + * @returns {{ kind:'Program', decls:Array<any> }} + */ +export function lowerIrToCodeIr(irProgram, ctx) { + switch (ctx.mode) { + case 'ski': + return applyBracketAbstraction(irProgram); + case 'closure': + return irProgram; // closures are emitted directly + case 'hybrid': + return applyHybridLowering(irProgram); + default: + throw new Error(`Unknown mode: ${ctx.mode}`); + } +} + +/** + * Apply standard bracket abstraction to remove all Lam nodes. + * See COMPILER.md → Bracket Abstraction. Introduce I, K, S as Const terminals. + * + * @param {{ kind:'Program', decls:Array<any> }} irProgram + * @returns {{ kind:'Program', decls:Array<any> }} + */ +export function applyBracketAbstraction(irProgram) { + // TODO: Walk decl bodies and transform Lam/App/Var terms into SKI trees. + // Use a helper: abstract(varName, term) → term' applying rules: + // - [x]x = I + // - [x]M (x ∉ FV(M)) = K M + // - [x](M N) = S ([x]M) ([x]N) + // Multi-arg lambdas handled by nesting. + return irProgram; +} + +/** + * Hybrid lowering placeholder. Use closures for complex cases, SKI for simple lambdas. + * @param {{ kind:'Program', decls:Array<any> }} irProgram + * @returns {{ kind:'Program', decls:Array<any> }} + */ +export function applyHybridLowering(irProgram) { + // TODO: Implement heuristics, e.g., + // - If body contains Cond/When or deep nested applications, keep closure + // - Else apply abstraction elimination + return irProgram; +} + +/** + * Emit a full JS module given Code IR and options. + * Responsible for: + * - Injecting runtime prelude (inline) or linking external + * - Emitting declarations (let-first for functions → assignment), variables + * - Wrapping in UMD/ESM/CJS based on options + * - Pretty-print toggles + * - Source map generation (stubbed) + * + * @param {{ kind:'Program', decls:Array<any> }} program + * @param {{ format:'umd'|'esm'|'cjs', mode:'ski'|'closure'|'hybrid', runtime:'inline'|{externalPath:string}, sourcemap:'none'|'inline'|'file', pretty:boolean, moduleName:string }} options + * @returns {{ code:string, map?:string }} + */ +export function emitModule(program, options) { + const prelude = options.runtime === 'inline' ? generateRuntimePrelude(options) : ''; + const body = emitProgramBody(program, options); + const wrapped = wrapModule(prelude + '\n' + body, options); + // TODO: Generate real source map when implemented + return { code: wrapped }; +} + +/** + * Emit declarations body (no wrapper). This function should: + * - collect function names; emit `let` declarations + * - emit function assignments from Code IR + * - emit variables as `const` + * - attach exports per module format in wrapper + * + * @param {{ kind:'Program', decls:Array<any> }} program + * @param {{ mode:'ski'|'closure'|'hybrid' }} options + * @returns {string} + */ +export function emitProgramBody(program, options) { + // Closure-mode only for now. + function emitTerm(term) { + switch (term.kind) { + case 'Var': { + const rtVars = new Set(['io','str','math','map','filter','reduce','append','prepend','concat','update','removeAt','slice','set','remove','merge','keys','values','length']); + if (rtVars.has(term.name)) return `__rt.${term.name}`; + return term.name; + } + case 'Const': + return `__rt.${term.name}`; + case 'Lam': + return `(${sanitizeParam(term.param)})=>${emitTerm(term.body)}`; + case 'App': { + const { callee, args } = flattenApp(term); + // Detect get 'out' io pattern: callee is Const('get'), args[0]=lit 'out', args[1]=Var('io') + if ( + callee && callee.kind === 'Const' && callee.name === 'get' && + args.length >= 2 && args[0] && args[0].kind === 'Lit' && args[0].value && args[0].value.type === 'String' && args[0].value.value === 'out' && + args[1] && args[1].kind === 'Var' && args[1].name === 'io' + ) { + const rest = args.slice(2).map(emitTerm).join(', '); + return rest.length ? `__rt.io.out(${rest})` : `__rt.io.out()`; + } + // Default: rebuild left-associative applications + let expr = emitTerm(callee); + for (const a of args) expr = `(${expr})(${emitTerm(a)})`; + return expr; + } + case 'Lit': + return emitLiteral(term.value); + case 'Cond': + return `__rt.cond(${emitTerm(term.predicate)})(()=>${emitTerm(term.ifTrue)})(() => ${emitTerm(term.ifFalse)})`; + default: + throw new Error(`emit: unknown term kind ${term.kind}`); + } + } + + function flattenApp(term) { + const args = []; + let callee = term; + while (callee && callee.kind === 'App') { + args.unshift(callee.arg); + callee = callee.func; + } + return { callee, args }; + } + + function isGetOfIoProp(term, propName) { + // Match: App(App(Const('get'), keyLit), obj) + if (!term || term.kind !== 'App') return false; + const inner = term.func; + if (!inner || inner.kind !== 'App') return false; + if (!inner.func || inner.func.kind !== 'Const' || inner.func.name !== 'get') return false; + const key = inner.arg; + const obj = term.arg; + if (!key || key.kind !== 'Lit' || !key.value || key.value.type !== 'String') return false; + if (key.value.value !== propName) return false; + if (!obj || obj.kind !== 'Var' || obj.name !== 'io') return false; + return true; + } + + function sanitizeParam(p) { + if (!p || typeof p !== 'string') return 'x'; + if (p === '_') return '_'; + return p; + } + + function emitLiteral(lit) { + if (!lit || typeof lit !== 'object' || !lit.type) return 'undefined'; + switch (lit.type) { + case 'Number': + return `__rt.num(${JSON.stringify(lit.value)}, ${lit.isFloat ? 'true' : 'false'})`; + case 'String': + return JSON.stringify(lit.value); + case 'Boolean': + return lit.value ? 'true' : 'false'; + case 'List': + return `[${lit.elements.map(emitTerm).join(', ')}]`; + case 'Table': { + const props = lit.properties.map(p => `${JSON.stringify(p.key)}: ${emitTerm(p.value)}`).join(', '); + return `__rt.table({ ${props} })`; + } + default: + return 'undefined'; + } + } + + const lines = []; + lines.push(`export function run(host){`); + lines.push(` __rt.installHost(host || {});`); + lines.push(` let __rt_last;`); + // Predeclare functions for recursion + const userFuncDecls = program.decls.filter(d => d.kind === 'FunctionDecl'); + const funcNameSet = new Set(); + for (const d of userFuncDecls) funcNameSet.add(d.name); + const funcNames = Array.from(funcNameSet); + if (funcNames.length) { + lines.push(` let ${funcNames.join(', ')};`); + } + for (const decl of userFuncDecls) { + lines.push(` ${decl.name} = ${emitTerm(decl.body)};`); + } + let lastResultVar = '__rt_last'; + for (const decl of program.decls) { + if (decl.kind === 'VarDecl') { + lines.push(` const ${decl.name} = ${emitTerm(decl.value)};`); + lines.push(` ${lastResultVar} = ${decl.name};`); + } else if (decl.kind === 'ExprStmt') { + lines.push(` ${lastResultVar} = ${emitTerm(decl.expr)};`); + } + } + lines.push(` return ${lastResultVar};`); + lines.push(`}`); + return lines.join('\n'); +} + +/** + * Generate the inline runtime prelude as text based on options and mode. + * MUST implement: + * - combinators I, K, S (and optionally B/C/Z later) + * - curry helpers (curry2, etc.) + * - numeric wrapper aware primitives: add, sub, mul, div, mod, neg, eq, ... + * - get, Ok, Err, cond; list/table ops; namespaces io/str/math (host-initialized) + * See COMPILER.md → Runtime Design. + * + * @param {{ mode:'ski'|'closure'|'hybrid' }} options + * @returns {string} + */ +export function generateRuntimePrelude(options) { + const prelude = `// Runtime prelude (closure mode minimal)\n` + +`const __rt = { };\n` + +`__rt.I = (a)=>a;\n` + +`__rt.K = (a)=>(b)=>a;\n` + +`__rt.S = (f)=>(g)=>(x)=>f(x)(g(x));\n` + +`__rt.curry2 = (f)=>(a)=>(b)=>f(a,b);\n` + +`__rt.curry3 = (f)=>(a)=>(b)=>(c)=>f(a,b,c);\n` + +`__rt.num = (value, isFloat)=>({ value, isFloat: !!isFloat });\n` + +`__rt.numValue = (x)=> (x && typeof x.value === 'number') ? x.value : Number(x);\n` + +`__rt.isFloatLike = (x)=> (x && typeof x.value === 'number') ? !!x.isFloat : !Number.isInteger(Number(x));\n` + +`__rt.bool = (x)=> !!(x && typeof x.value === 'number' ? x.value : x);\n` + +`__rt.table = (plain)=>{ const m = new Map(); for (const k of Object.keys(plain||{})) m.set(k, plain[k]); return { type:'Object', properties: m }; };\n` + +`__rt.add = __rt.curry2((a,b)=>{ const av=__rt.numValue(a), bv=__rt.numValue(b); const isF = __rt.isFloatLike(a)||__rt.isFloatLike(b); return __rt.num(av+bv, isF); });\n` + +`__rt.sub = __rt.curry2((a,b)=>{ const av=__rt.numValue(a), bv=__rt.numValue(b); const isF = __rt.isFloatLike(a)||__rt.isFloatLike(b); return __rt.num(av-bv, isF); });\n` + +`__rt.mul = __rt.curry2((a,b)=>{ const av=__rt.numValue(a), bv=__rt.numValue(b); const isF = __rt.isFloatLike(a)||__rt.isFloatLike(b); return __rt.num(av*bv, isF); });\n` + +`__rt.div = __rt.curry2((a,b)=>{ const av=__rt.numValue(a), bv=__rt.numValue(b); if (bv===0) throw new Error('Division by zero'); return __rt.num(av/bv, true); });\n` + +`__rt.mod = __rt.curry2((a,b)=>{ const av=__rt.numValue(a), bv=__rt.numValue(b); return __rt.num(av%bv, __rt.isFloatLike(a)||__rt.isFloatLike(b)); });\n` + +`__rt.neg = (x)=>{ const v=__rt.numValue(x); return __rt.num(-v, __rt.isFloatLike(x)); };\n` + +`__rt.eq = __rt.curry2((a,b)=>{ const an=a&&typeof a.value==='number', bn=b&&typeof b.value==='number'; if (an||bn) return __rt.numValue(a)===__rt.numValue(b); return a===b; });\n` + +`__rt.neq = __rt.curry2((a,b)=>!__rt.eq(a)(b));\n` + +`__rt.gt = __rt.curry2((a,b)=>__rt.numValue(a)>__rt.numValue(b));\n` + +`__rt.lt = __rt.curry2((a,b)=>__rt.numValue(a)<__rt.numValue(b));\n` + +`__rt.gte = __rt.curry2((a,b)=>__rt.numValue(a)>=__rt.numValue(b));\n` + +`__rt.lte = __rt.curry2((a,b)=>__rt.numValue(a)<=__rt.numValue(b));\n` + +`__rt.and = __rt.curry2((a,b)=>!!a && !!b);\n` + +`__rt.or = __rt.curry2((a,b)=>!!a || !!b);\n` + +`__rt.xor = __rt.curry2((a,b)=>!!a !== !!b);\n` + +`__rt.concatDot = __rt.curry2((a,b)=> String(a) + String(b));\n` + +`__rt.Ok = (v)=>({ type:'Result', variant:'Ok', value:v });\n` + +`__rt.Err = (v)=>({ type:'Result', variant:'Err', value:v });\n` + +`__rt.get = __rt.curry2((key,obj)=>{ const k = (key && typeof key.value==='number') ? key.value : (key && key.type==='Number'?key.value : (key && key.type==='String'?key.value : key)); if (obj==null) return null; if (Array.isArray(obj) && typeof k==='number') { if (k<0||k>=obj.length) return undefined; return obj[k]; } if (obj && obj.type==='Object' && obj.properties instanceof Map) { if (!obj.properties.has(String(k))) return undefined; return obj.properties.get(String(k)); } if (typeof obj==='object' && obj!==null && Object.prototype.hasOwnProperty.call(obj, k)) { return obj[k]; } return undefined; });\n` + +`__rt.cond = (p)=>(t)=>(e)=> (p ? t() : e());\n` + +`__rt.io = { out: (...xs)=>{ /* replaced in installHost */ }, in: ()=>'' };\n` + +`__rt.str = { }; __rt.math = { };\n` + +`__rt.installHost = (host)=>{ const hio = (host&&host.io)||{}; __rt.io = { out: (...xs)=>{ if (typeof hio.out==='function') { hio.out(...xs); } }, in: ()=>{ return typeof hio.in==='function' ? hio.in() : ''; } }; return __rt; };\n`; + // Add higher-order list ops and string namespace + const lib = `\n` + +`__rt.map = __rt.curry2((fn, list)=>{ if (!Array.isArray(list)) throw new Error('map expects list'); return list.map(x=> fn(x)); });\n` + +`__rt.filter = __rt.curry2((fn, list)=>{ if (!Array.isArray(list)) throw new Error('filter expects list'); return list.filter(x=> fn(x)); });\n` + +`__rt.reduce = __rt.curry3((fn, acc, list)=>{ if (!Array.isArray(list)) throw new Error('reduce expects list'); let a = acc; for (const item of list) { a = fn(a)(item); } return a; });\n` + +`__rt.length = (arg)=> { if (Array.isArray(arg)) return __rt.num(arg.length,false); if (typeof arg==='string') return __rt.num(arg.length,false); throw new Error('length expects a list or string'); };\n` + +`__rt.append = __rt.curry2((list, element)=>{ if (!Array.isArray(list)) throw new Error('append expects list'); return [...list, element]; });\n` + +`__rt.prepend = __rt.curry2((element, list)=>{ if (!Array.isArray(list)) throw new Error('prepend expects list'); return [element, ...list]; });\n` + +`__rt.concat = __rt.curry2((list1, list2)=>{ if (!Array.isArray(list1) || !Array.isArray(list2)) throw new Error('concat expects lists'); return [...list1, ...list2]; });\n` + +`__rt.update = __rt.curry3((list, index, value)=>{ if (!Array.isArray(list)) throw new Error('update expects list'); const i = (index && typeof index.value==='number') ? index.value : Number(index); if (!Number.isInteger(i) || i < 0 || i >= list.length) throw new Error('Index out of bounds: '+i); const out = [...list]; out[i] = value; return out; });\n` + +`__rt.removeAt = __rt.curry2((list, index)=>{ if (!Array.isArray(list)) throw new Error('removeAt expects list'); const i = (index && typeof index.value==='number') ? index.value : Number(index); if (!Number.isInteger(i) || i < 0 || i >= list.length) throw new Error('Index out of bounds: '+i); return list.filter((_, idx)=> idx !== i); });\n` + +`__rt.slice = __rt.curry3((list, start, end)=>{ if (!Array.isArray(list)) throw new Error('slice expects list'); const s = (start && typeof start.value==='number') ? start.value : Number(start); const e = (end==null) ? list.length : ((end && typeof end.value==='number') ? end.value : Number(end)); if (!Number.isInteger(s) || s < 0) throw new Error('Invalid start index: '+s); if (!Number.isInteger(e) || e < s || e > list.length) throw new Error('Invalid end index: '+e); return list.slice(s, e); });\n` + +`__rt.set = __rt.curry3((table, key, value)=>{ if (!table || table.type!=='Object' || !(table.properties instanceof Map)) throw new Error('set expects a table'); const m = new Map(table.properties); const k = String(key && key.value ? key.value : key); m.set(k, value); return { type:'Object', properties: m }; });\n` + +`__rt.remove = __rt.curry2((table, key)=>{ if (!table || table.type!=='Object' || !(table.properties instanceof Map)) throw new Error('remove expects a table'); const m = new Map(table.properties); const k = String(key && key.value ? key.value : key); m.delete(k); return { type:'Object', properties: m }; });\n` + +`__rt.merge = __rt.curry2((table1, table2)=>{ if (!table1 || table1.type!=='Object' || !(table1.properties instanceof Map)) throw new Error('merge expects tables'); if (!table2 || table2.type!=='Object' || !(table2.properties instanceof Map)) throw new Error('merge expects tables'); const m = new Map(table1.properties); for (const [k,v] of table2.properties.entries()) m.set(k,v); return { type:'Object', properties: m }; });\n` + +`__rt.keys = (table)=>{ if (!table || table.type!=='Object' || !(table.properties instanceof Map)) throw new Error('keys expects a table'); return Array.from(table.properties.keys()); };\n` + +`__rt.values = (table)=>{ if (!table || table.type!=='Object' || !(table.properties instanceof Map)) throw new Error('values expects a table'); return Array.from(table.properties.values()); };\n` + +`__rt.str.concat = __rt.curry2((a,b)=> String(a)+String(b));\n` + +`__rt.str.split = __rt.curry2((s,delim)=> String(s).split(String(delim)));\n` + +`__rt.str.join = __rt.curry2((arr,delim)=> { if (!Array.isArray(arr)) throw new Error('str.join expects array'); return arr.map(x=>String(x)).join(String(delim)); });\n` + +`__rt.str.length = (s)=> __rt.num(String(s).length, false);\n` + +`__rt.str.substring = __rt.curry3((s,start,end)=> String(s).substring(__rt.numValue(start), end==null? undefined : __rt.numValue(end)));\n` + +`__rt.str.replace = __rt.curry3((s,search,repl)=> String(s).replace(new RegExp(String(search),'g'), String(repl)));\n` + +`__rt.str.trim = (s)=> String(s).trim();\n` + +`__rt.str.upper = (s)=> String(s).toUpperCase();\n` + +`__rt.str.lower = (s)=> String(s).toLowerCase();\n`; + // Mark selected runtime functions to resemble interpreter NativeFunction shape during io.out + const nativeMarks = `\n` + +`try { __rt.str.concat.type='NativeFunction'; } catch{}\n` + +`try { __rt.str.split.type='NativeFunction'; } catch{}\n` + +`try { __rt.str.join.type='NativeFunction'; } catch{}\n` + +`try { __rt.str.length.type='NativeFunction'; } catch{}\n` + +`try { __rt.str.substring.type='NativeFunction'; } catch{}\n` + +`try { __rt.str.replace.type='NativeFunction'; } catch{}\n` + +`try { __rt.str.trim.type='NativeFunction'; } catch{}\n` + +`try { __rt.str.upper.type='NativeFunction'; } catch{}\n` + +`try { __rt.str.lower.type='NativeFunction'; } catch{}\n` + +`try { __rt.length.type='NativeFunction'; } catch{}\n`; + const match = `\n` + +`__rt.isType = __rt.curry2((expected, value)=>{ const exp = String(expected); if (exp==='Int') return !!(value && typeof value.value==='number' && !value.isFloat); if (exp==='Float') return !!(value && typeof value.value==='number'); if (exp==='Number') return !!(value && typeof value.value==='number'); if (exp==='String') return typeof value === 'string'; if (exp==='Bool' || exp==='Boolean') return typeof value === 'boolean'; if (exp==='List') return Array.isArray(value); if (exp==='Table') return !!(value && value.type==='Object' && value.properties instanceof Map); if (exp==='Result') return !!(value && value.type==='Result'); return false; });\n` + +`__rt.isResultVariant = __rt.curry2((variant, v)=> !!(v && v.type==='Result' && v.variant===String(variant)));\n` + +`__rt.resultValue = (v)=> v && v.type==='Result' ? v.value : undefined;\n` + +`__rt.listLen = (xs)=> Array.isArray(xs) ? __rt.num(xs.length, false) : __rt.num(0, false);\n` + +`__rt.has = __rt.curry2((key, obj)=> { const k = String(key && key.value ? key.value : key); if (obj && obj.type==='Object' && obj.properties instanceof Map) { return obj.properties.has(k); } if (obj && typeof obj==='object') { return Object.prototype.hasOwnProperty.call(obj, k); } return false; });\n`; + const math = `\n` + +`__rt.math = { };\n` + +`__rt.math.abs = (x)=> __rt.num(Math.abs(__rt.numValue(x)), true);\n` + +`__rt.math.sign = (x)=> __rt.num(Math.sign(__rt.numValue(x)), true);\n` + +`__rt.math.floor = (x)=> __rt.num(Math.floor(__rt.numValue(x)), true);\n` + +`__rt.math.ceil = (x)=> __rt.num(Math.ceil(__rt.numValue(x)), true);\n` + +`__rt.math.round = (x)=> __rt.num(Math.round(__rt.numValue(x)), true);\n` + +`__rt.math.trunc = (x)=> __rt.num(Math.trunc(__rt.numValue(x)), true);\n` + +`__rt.math.min = __rt.curry2((a,b)=> __rt.num(Math.min(__rt.numValue(a), __rt.numValue(b)), true));\n` + +`__rt.math.max = __rt.curry2((a,b)=> __rt.num(Math.max(__rt.numValue(a), __rt.numValue(b)), true));\n` + +`__rt.math.clamp = __rt.curry3((x, lo, hi)=> { const xv=__rt.numValue(x), lv=__rt.numValue(lo), hv=__rt.numValue(hi); return __rt.num(Math.min(Math.max(xv, lv), hv), true); });\n` + +`__rt.math.pow = __rt.curry2((x,y)=> __rt.num(Math.pow(__rt.numValue(x), __rt.numValue(y)), true));\n` + +`__rt.math.sqrt = (x)=> { const v=__rt.numValue(x); if (v < 0) throw new Error('Domain error: sqrt expects x >= 0'); return __rt.num(Math.sqrt(v), true); };\n` + +`__rt.math.exp = (x)=> __rt.num(Math.exp(__rt.numValue(x)), true);\n` + +`__rt.math.log = (x)=> { const v=__rt.numValue(x); if (v <= 0) throw new Error('Domain error: log expects x > 0'); return __rt.num(Math.log(v), true); };\n` + +`__rt.math.sin = (x)=> __rt.num(Math.sin(__rt.numValue(x)), true);\n` + +`__rt.math.cos = (x)=> __rt.num(Math.cos(__rt.numValue(x)), true);\n` + +`__rt.math.tan = (x)=> __rt.num(Math.tan(__rt.numValue(x)), true);\n` + +`__rt.math.asin = (x)=> __rt.num(Math.asin(__rt.numValue(x)), true);\n` + +`__rt.math.acos = (x)=> __rt.num(Math.acos(__rt.numValue(x)), true);\n` + +`__rt.math.atan = (x)=> __rt.num(Math.atan(__rt.numValue(x)), true);\n` + +`__rt.math.atan2 = __rt.curry2((y,x)=> __rt.num(Math.atan2(__rt.numValue(y), __rt.numValue(x)), true));\n` + +`__rt.math.deg = (r)=> __rt.num(__rt.numValue(r) * (180 / Math.PI), true);\n` + +`__rt.math.rad = (d)=> __rt.num(__rt.numValue(d) * (Math.PI / 180), true);\n` + +`__rt.math.random = ()=> __rt.num(Math.random(), true);\n` + +`__rt.math.randomInt = __rt.curry2((lo, hi)=> { const a = ~~(__rt.numValue(lo)); const b = ~~(__rt.numValue(hi)); if (a > b) throw new Error('Invalid range: lo > hi'); const n = a + Math.floor(Math.random() * (b - a + 1)); return __rt.num(n, false); });\n`; + return prelude + match + lib + nativeMarks + math; +} + +/** + * Wrap concatenated prelude+body into selected module format (UMD/ESM/CJS). + * The wrapper should export named user bindings. For now, return identity. + * + * @param {string} content + * @param {{ format:'umd'|'esm'|'cjs', moduleName:string }} options + * @returns {string} + */ +export function wrapModule(content, options) { + // TODO: Implement proper wrappers. Keep this trivial to enable early testing. + if (options.format === 'esm') return content; + if (options.format === 'cjs') return content; + // UMD default + return content; +} + +// ============================= +// Dev-friendly CLI (optional) +// ============================= + +/** + * Minimal CLI for direct compiler invocation. + * Prefer integrating flags into runner.js as the canonical CLI. + */ +if (typeof process !== 'undefined' && process.argv && process.argv[1] && process.argv[1].endsWith('compiler.js')) { + const fs = await import('fs'); + const path = await import('path'); + + const args = process.argv.slice(2); + const inIdx = args.indexOf('--in'); + const outIdx = args.indexOf('-o') >= 0 ? args.indexOf('-o') : args.indexOf('--out'); + const formatIdx = args.indexOf('--format'); + const modeIdx = args.indexOf('--mode'); + + if (inIdx === -1 || !args[inIdx + 1]) { + console.error('Usage: node compiler.js --in <input.baba> [-o out.js] [--format esm|cjs|umd] [--mode ski|closure|hybrid]'); + process.exit(1); + } + + const inputPath = path.resolve(process.cwd(), args[inIdx + 1]); + const outPath = outIdx !== -1 && args[outIdx + 1] ? path.resolve(process.cwd(), args[outIdx + 1]) : null; + const format = formatIdx !== -1 && args[formatIdx + 1] ? args[formatIdx + 1] : undefined; + const mode = modeIdx !== -1 && args[modeIdx + 1] ? args[modeIdx + 1] : undefined; + + const source = fs.readFileSync(inputPath, 'utf8'); + const { code } = compile(source, { format, mode }); + if (outPath) { + fs.writeFileSync(outPath, code, 'utf8'); + } else { + process.stdout.write(code); + } +} + + + diff --git a/js/baba-yaga/experimental/fmt/fmt-README.md b/js/baba-yaga/experimental/fmt/fmt-README.md new file mode 100644 index 0000000..132549b --- /dev/null +++ b/js/baba-yaga/experimental/fmt/fmt-README.md @@ -0,0 +1,338 @@ +# Baba Yaga Code Formatter (`fmt.js`) + +A code formatter for the Baba Yaga programming language, similar to Go's `fmt` tool. It automatically formats Baba Yaga source code according to consistent style guidelines. + +## Features + +- **Consistent Formatting**: Applies standard formatting rules across all Baba Yaga code +- **Automatic Indentation**: Proper indentation for nested structures (functions, when expressions, with blocks) +- **Operator Spacing**: Consistent spacing around operators and punctuation +- **Line Breaking**: Smart line breaking for long expressions and data structures +- **Comment Preservation**: Maintains existing comments (work in progress) +- **Type Annotation Formatting**: Proper formatting of typed function parameters and return types + +## Installation + +The formatter uses the existing Baba Yaga lexer and parser. Ensure you have the core language files: +- `lexer.js` +- `parser.js` +- `fmt.js` + +## Usage + +### Command Line + +```bash +# Format and print to stdout +node fmt.js file.baba + +# Format and write back to file +node fmt.js --write file.baba +node fmt.js -w file.baba + +# Check if file is already formatted (exit code 0 if formatted, 1 if not) +node fmt.js --check file.baba +node fmt.js -c file.baba + +# Custom indentation size (default: 2 spaces) +node fmt.js --indent=4 file.baba +``` + +### As a Module + +```javascript +import { BabaYagaFormatter } from './fmt.js'; + +const formatter = new BabaYagaFormatter({ + indentSize: 2, + maxLineLength: 100, + preserveComments: true +}); + +const source = `x:2+3;y:x*2;`; +const formatted = formatter.format(source); +console.log(formatted); +// Output: +// x : 2 + 3; +// y : x * 2; +``` + +## Formatting Rules + +### Function Body Indentation + +All function bodies are properly indented relative to the function name: + +```baba +// Simple function body +inc : x -> + x + 1; + +// Complex function body with when expression +classify : n -> + when n is + 0 then "zero" + _ then "other"; + +// Function with with header +calculate : a b -> + with ( + sum : a + b; + product : a * b; + ) -> + {sum: sum, product: product}; +``` + +### Basic Declarations + +**Before:** +```baba +x:42;y:"hello"; +``` + +**After:** +```baba +x : 42; +y : "hello"; +``` + +### Functions + +**Before:** +```baba +add:x y->x+y; +multiply:(x:Int,y:Int)->Int->x*y; +``` + +**After:** +```baba +add : x y -> + x + y; + +multiply : (x: Int, y: Int) -> Int -> + x * y; +``` + +### When Expressions + +**Before:** +```baba +check:x->when x is 1 then"one"2 then"two"_ then"other"; +``` + +**After:** +```baba +check : x -> + when x is + 1 then "one" + 2 then "two" + _ then "other"; +``` + +### Then Keyword Alignment + +The formatter ensures all `then` keywords within a `when` expression scope are aligned for maximum readability: + +**Before:** +```baba +processRequest : method path -> + when method path is + "GET" "/" then "Home page" + "GET" "/about" then "About page" + "POST" "/api/users" then "Create user" + "DELETE" "/api/users" then "Delete user" + _ _ then "Not found"; +``` + +**After:** +```baba +processRequest : method path -> + when method path is + "GET" "/" then "Home page" + "GET" "/about" then "About page" + "POST" "/api/users" then "Create user" + "DELETE" "/api/users" then "Delete user" + _ _ then "Not found"; +``` + +This alignment is maintained within each `when` scope, making nested when expressions highly readable. + +### Lists and Tables + +**Before:** +```baba +nums:[1,2,3,4,5]; +person:{name:"Alice",age:30,active:true}; +``` + +**After:** +```baba +nums : [1, 2, 3, 4, 5]; +person : {name: "Alice", age: 30, active: true}; +``` + +For longer structures, the formatter uses multi-line format: +```baba +longList : [ + 1, + 2, + 3, + 4, + 5 +]; + +complexTable : { + name: "Alice", + details: { + age: 30, + city: "Boston" + }, + preferences: ["tea", "books", "coding"] +}; +``` + +### With Headers + +**Before:** +```baba +calc:x y->with(a:x+1;b:y*2;)->a+b; +``` + +**After:** +```baba +calc : x y -> + with ( + a : x + 1; + b : y * 2; + ) -> + a + b; +``` + +### Function Calls + +**Before:** +```baba +result:add 5 3; +complex:map(x->x*2)[1,2,3]; +``` + +**After:** +```baba +result : add 5 3; +complex : map (x -> x * 2) [1, 2, 3]; +``` + +## Supported Node Types + +The formatter handles all major Baba Yaga language constructs: + +- **Declarations**: Variables, functions, types +- **Expressions**: Binary, unary, function calls, member access +- **Literals**: Numbers, strings, booleans, lists, tables +- **Control Flow**: When expressions with pattern matching +- **Advanced Features**: With headers, curried functions, anonymous functions +- **Type Annotations**: Typed parameters and return types + +## Error Handling + +If the formatter encounters a parsing error, it will report the issue and exit with a non-zero status code: + +```bash +$ node fmt.js invalid.baba +Error formatting 'invalid.baba': Formatting failed: Expected token type COLON but got SEMICOLON at 1:5 +``` + +## Integration with Editors + +The formatter can be integrated with various editors: + +### VS Code +Add to your `settings.json`: +```json +{ + "[baba]": { + "editor.defaultFormatter": "none", + "editor.formatOnSave": false + } +} +``` + +Then create a task in `.vscode/tasks.json`: +```json +{ + "version": "2.0.0", + "tasks": [ + { + "label": "Format Baba Yaga", + "type": "shell", + "command": "node", + "args": ["fmt.js", "--write", "${file}"], + "group": "build", + "presentation": { + "echo": true, + "reveal": "silent", + "focus": false, + "panel": "shared" + } + } + ] +} +``` + +### Command Line Integration +Add to your shell profile (`.bashrc`, `.zshrc`, etc.): +```bash +# Format all .baba files in current directory +alias babafmt='find . -name "*.baba" -exec node /path/to/fmt.js --write {} \;' + +# Check formatting of all .baba files +alias babafmtcheck='find . -name "*.baba" -exec node /path/to/fmt.js --check {} \;' +``` + +## Examples + +### Before Formatting +```baba +// Unformatted Baba Yaga code +factorial:n->when n is 0 then 1 1 then 1 _ then n*(factorial(n-1)); +numbers:[1,2,3,4,5];sum:reduce(acc x->acc+x)0 numbers; +user:{name:"Alice",age:30,calculate:x y->x+y}; +``` + +### After Formatting +```baba +// Unformatted Baba Yaga code +factorial : n -> + when n is + 0 then 1 + 1 then 1 + _ then n * (factorial (n - 1)); + +numbers : [1, 2, 3, 4, 5]; +sum : reduce (acc x -> acc + x) 0 numbers; + +user : { + name: "Alice", + age: 30, + calculate: x y -> x + y +}; +``` + +## Contributing + +The formatter is built using the existing Baba Yaga AST structure. To add support for new language features: + +1. Add the new node type to the `visitNode` method +2. Implement a corresponding `format*` method +3. Add test cases +4. Update this documentation + +## Known Limitations + +- Comment preservation is basic and may not handle all edge cases +- Very complex nested expressions might need manual formatting +- Error recovery could be improved for malformed input + +## License + +Same as the Baba Yaga language implementation. diff --git a/js/baba-yaga/experimental/fmt/fmt.js b/js/baba-yaga/experimental/fmt/fmt.js new file mode 100644 index 0000000..85076b9 --- /dev/null +++ b/js/baba-yaga/experimental/fmt/fmt.js @@ -0,0 +1,700 @@ +#!/usr/bin/env node + +// fmt.js - Baba Yaga code formatter +// Similar to Go's fmt tool, formats Baba Yaga source code according to standard style + +import { createLexer } from './lexer.js'; +import { createParser } from './parser.js'; +import fs from 'fs'; +import path from 'path'; + +/** + * Baba Yaga code formatter + * Formats code according to consistent style rules + */ +class BabaYagaFormatter { + constructor(options = {}) { + this.indentSize = options.indentSize || 2; + this.maxLineLength = options.maxLineLength || 100; + this.preserveComments = options.preserveComments !== false; + } + + /** + * Format source code string + */ + format(source) { + try { + const lexer = createLexer(source); + const tokens = lexer.allTokens(); + + // Extract comments before parsing + const comments = this.extractComments(source); + + const parser = createParser(tokens); + const ast = parser.parse(); + + return this.formatAST(ast, comments, source); + } catch (error) { + throw new Error(`Formatting failed: ${error.message}`); + } + } + + /** + * Extract comments from source with their positions + */ + extractComments(source) { + const comments = []; + const lines = source.split('\n'); + + lines.forEach((line, lineIndex) => { + const commentMatch = line.match(/\/\/(.*)$/); + if (commentMatch) { + const column = line.indexOf('//'); + comments.push({ + line: lineIndex + 1, + column: column, + text: commentMatch[0], + content: commentMatch[1].trim() + }); + } + }); + + return comments; + } + + /** + * Format AST node + */ + formatAST(ast, comments = [], originalSource = '') { + return this.visitNode(ast, 0, comments); + } + + /** + * Visit and format a node + */ + visitNode(node, depth = 0, comments = []) { + if (!node) return ''; + + switch (node.type) { + case 'Program': + return this.formatProgram(node, depth, comments); + case 'TypeDeclaration': + return this.formatTypeDeclaration(node, depth); + case 'VariableDeclaration': + return this.formatVariableDeclaration(node, depth, comments); + case 'FunctionDeclaration': + return this.formatFunctionDeclaration(node, depth, comments); + case 'CurriedFunctionDeclaration': + return this.formatCurriedFunctionDeclaration(node, depth, comments); + case 'WithHeader': + return this.formatWithHeader(node, depth, comments); + case 'WhenExpression': + return this.formatWhenExpression(node, depth, comments); + case 'BinaryExpression': + return this.formatBinaryExpression(node, depth, comments); + case 'UnaryExpression': + return this.formatUnaryExpression(node, depth, comments); + case 'FunctionCall': + return this.formatFunctionCall(node, depth, comments); + case 'AnonymousFunction': + return this.formatAnonymousFunction(node, depth, comments); + case 'ListLiteral': + return this.formatListLiteral(node, depth, comments); + case 'TableLiteral': + return this.formatTableLiteral(node, depth, comments); + case 'MemberExpression': + return this.formatMemberExpression(node, depth, comments); + case 'ResultExpression': + return this.formatResultExpression(node, depth, comments); + case 'NumberLiteral': + return this.formatNumberLiteral(node); + case 'StringLiteral': + return this.formatStringLiteral(node); + case 'BooleanLiteral': + return this.formatBooleanLiteral(node); + case 'Identifier': + return this.formatIdentifier(node); + default: + // Fallback for unknown node types - avoid infinite recursion + if (typeof node === 'string') { + return node; + } + if (typeof node === 'number') { + return node.toString(); + } + if (typeof node === 'boolean') { + return node.toString(); + } + if (node && typeof node === 'object') { + // Try to handle as a literal value + if (node.value !== undefined) { + return node.value.toString(); + } + if (node.name !== undefined) { + return node.name; + } + } + return JSON.stringify(node); + } + } + + /** + * Format program (top level) + */ + formatProgram(node, depth, comments) { + const statements = []; + let lastWasFunction = false; + + node.body.forEach((stmt, index) => { + const formatted = this.visitNode(stmt, depth, comments); + const isFunction = stmt.type === 'FunctionDeclaration' || + stmt.type === 'CurriedFunctionDeclaration'; + + // Add extra spacing between functions and other statements + if (index > 0 && (isFunction || lastWasFunction)) { + statements.push(''); + } + + statements.push(formatted); + lastWasFunction = isFunction; + }); + + return statements.join('\n') + (statements.length > 0 ? '\n' : ''); + } + + /** + * Format type declaration + */ + formatTypeDeclaration(node, depth) { + const indent = this.getIndent(depth); + return `${indent}${node.name} ${node.typeAnnotation};`; + } + + /** + * Format variable declaration + */ + formatVariableDeclaration(node, depth, comments) { + const indent = this.getIndent(depth); + + // Check if the value is a complex expression that should be on its own line + if (node.value.type === 'WhenExpression' || node.value.type === 'WithHeader') { + const value = this.visitNode(node.value, depth + 1, comments); + return `${indent}${node.name} :\n${value};`; + } else { + const value = this.visitNode(node.value, depth, comments); + return `${indent}${node.name} : ${value};`; + } + } + + /** + * Format function declaration + */ + formatFunctionDeclaration(node, depth, comments) { + const indent = this.getIndent(depth); + let result = `${indent}${node.name} : `; + + // Format parameters + if (node.params && node.params.length > 0) { + if (this.hasTypedParams(node.params)) { + result += this.formatTypedParameters(node.params); + } else { + result += node.params.map(p => typeof p === 'string' ? p : p.name).join(' '); + } + } + + // Add return type if present + if (node.returnType) { + result += ` -> ${this.formatType(node.returnType)}`; + } + + result += ' ->\n'; + + // Format body with proper indentation + const body = this.visitNode(node.body, depth + 1, comments); + // If the body doesn't start with indentation, add it + if (body && !body.startsWith(this.getIndent(depth + 1))) { + result += this.getIndent(depth + 1) + body; + } else { + result += body; + } + + result += ';'; + return result; + } + + /** + * Format curried function declaration + */ + formatCurriedFunctionDeclaration(node, depth, comments) { + const indent = this.getIndent(depth); + let result = `${indent}${node.name} : `; + + // Format first typed parameter + result += `(${node.param.name}: ${this.formatType(node.param.type)})`; + + // Format return type + if (node.returnType) { + result += ` -> ${this.formatType(node.returnType)}`; + } + + result += ' ->\n'; + + // Format body with proper indentation + const body = this.visitNode(node.body, depth + 1, comments); + result += body + ';'; + + return result; + } + + /** + * Format with header + */ + formatWithHeader(node, depth, comments) { + const indent = this.getIndent(depth); + let result = `${indent}with`; + + if (node.recursive) { + result += ' rec'; + } + + result += ' (\n'; + + // Format entries + node.entries.forEach((entry, index) => { + const entryIndent = this.getIndent(depth + 1); + if (entry.type === 'WithTypeDecl') { + result += `${entryIndent}${entry.name} ${this.formatType(entry.typeAnnotation)};`; + } else if (entry.type === 'WithAssign') { + const value = this.visitNode(entry.value, depth + 1, comments); + result += `${entryIndent}${entry.name} : ${value};`; + } + + if (index < node.entries.length - 1) { + result += '\n'; + } + }); + + result += `\n${indent}) ->\n`; + const body = this.visitNode(node.body, depth + 1, comments); + // Ensure the body is properly indented + if (body && !body.startsWith(this.getIndent(depth + 1))) { + result += this.getIndent(depth + 1) + body; + } else { + result += body; + } + + return result; + } + + /** + * Format when expression + */ + formatWhenExpression(node, depth, comments) { + const indent = this.getIndent(depth); + let result = `${indent}when `; + + // Format discriminants + const discriminants = node.discriminants.map(d => + this.visitNode(d, 0, comments) + ).join(' '); + result += `${discriminants} is\n`; + + // Calculate the maximum pattern width to align 'then' keywords + const caseIndent = this.getIndent(depth + 1); + const formattedCases = node.cases.map(caseNode => { + const patterns = caseNode.patterns.map(p => + this.formatPattern(p, depth + 1, comments) + ).join(' '); + return { + patterns, + consequent: caseNode.consequent, + originalCase: caseNode + }; + }); + + // Find the maximum pattern length for alignment + const maxPatternLength = Math.max( + ...formattedCases.map(c => c.patterns.length) + ); + + // Format cases with aligned 'then' keywords + formattedCases.forEach((formattedCase, index) => { + const { patterns, consequent } = formattedCase; + + // Pad patterns to align 'then' keywords + const paddedPatterns = patterns.padEnd(maxPatternLength); + result += `${caseIndent}${paddedPatterns} then `; + + // Format consequent - handle nested when expressions specially + if (consequent.type === 'WhenExpression') { + // For nested when expressions, add newline and proper indentation + result += '\n' + this.visitNode(consequent, depth + 2, comments); + } else { + // For simple consequents, add inline + const consequentFormatted = this.visitNode(consequent, 0, comments); + result += consequentFormatted; + } + + // Add newline between cases (but not after the last one) + if (index < formattedCases.length - 1) { + result += '\n'; + } + }); + + return result; + } + + /** + * Format pattern + */ + formatPattern(pattern, depth, comments) { + if (!pattern) return ''; + + switch (pattern.type) { + case 'WildcardPattern': + return '_'; + case 'TypePattern': + return pattern.name; + case 'ResultPattern': + return `${pattern.variant} ${pattern.identifier.name}`; + case 'ListPattern': + const elements = pattern.elements.map(e => + this.formatPattern(e, depth, comments) + ).join(', '); + return `[${elements}]`; + case 'TablePattern': + const properties = pattern.properties.map(prop => + `${prop.key}: ${this.formatPattern(prop.value, depth, comments)}` + ).join(', '); + return `{${properties}}`; + case 'NumberLiteral': + return pattern.value.toString(); + case 'StringLiteral': + return `"${pattern.value}"`; + case 'BooleanLiteral': + return pattern.value.toString(); + case 'Identifier': + return pattern.name; + default: + // For literal patterns, try to format them directly + if (typeof pattern === 'string') { + return pattern; + } + if (typeof pattern === 'number') { + return pattern.toString(); + } + return this.visitNode(pattern, depth, comments); + } + } + + /** + * Format binary expression + */ + formatBinaryExpression(node, depth, comments) { + const left = this.visitNode(node.left, depth, comments); + const right = this.visitNode(node.right, depth, comments); + + // Add spaces around operators + const needsSpaces = !['.', '..'].includes(node.operator); + if (needsSpaces) { + return `${left} ${node.operator} ${right}`; + } else { + return `${left}${node.operator}${right}`; + } + } + + /** + * Format unary expression + */ + formatUnaryExpression(node, depth, comments) { + const operand = this.visitNode(node.operand, depth, comments); + return `${node.operator}${operand}`; + } + + /** + * Format function call + */ + formatFunctionCall(node, depth, comments) { + const callee = this.visitNode(node.callee, depth, comments); + const args = node.arguments.map(arg => + this.visitNode(arg, depth, comments) + ); + + if (args.length === 0) { + return callee; + } + + // Handle parentheses for complex expressions + const formattedArgs = args.map(arg => { + // If argument contains operators or is complex, wrap in parentheses + if (arg.includes(' -> ') || (arg.includes(' ') && !arg.startsWith('"') && !arg.startsWith('['))) { + return `(${arg})`; + } + return arg; + }); + + return `${callee} ${formattedArgs.join(' ')}`; + } + + /** + * Format anonymous function + */ + formatAnonymousFunction(node, depth, comments) { + // Handle both string parameters and object parameters + const params = node.params.map(param => { + if (typeof param === 'string') { + return param; + } else if (param && typeof param === 'object' && param.name) { + return param.name; + } else if (param && typeof param === 'object' && param.type === 'Identifier') { + return param.name; + } else { + return String(param); + } + }).join(' '); + const body = this.visitNode(node.body, depth, comments); + return `${params} -> ${body}`; + } + + /** + * Format list literal + */ + formatListLiteral(node, depth, comments) { + if (node.elements.length === 0) { + return '[]'; + } + + const elements = node.elements.map(el => + this.visitNode(el, depth, comments) + ); + + // Single line if short, multi-line if long + const singleLine = `[${elements.join(', ')}]`; + if (singleLine.length <= 50) { + return singleLine; + } + + const indent = this.getIndent(depth); + const elementIndent = this.getIndent(depth + 1); + let result = '[\n'; + elements.forEach((el, index) => { + result += `${elementIndent}${el}`; + if (index < elements.length - 1) { + result += ','; + } + result += '\n'; + }); + result += `${indent}]`; + return result; + } + + /** + * Format table literal + */ + formatTableLiteral(node, depth, comments) { + if (node.properties.length === 0) { + return '{}'; + } + + const properties = node.properties.map(prop => { + const value = this.visitNode(prop.value, depth + 1, comments); + return `${prop.key}: ${value}`; + }); + + // Single line if short, multi-line if long + const singleLine = `{${properties.join(', ')}}`; + if (singleLine.length <= 50 && !properties.some(p => p.includes('\n'))) { + return singleLine; + } + + const indent = this.getIndent(depth); + const propIndent = this.getIndent(depth + 1); + let result = '{\n'; + properties.forEach((prop, index) => { + result += `${propIndent}${prop}`; + if (index < properties.length - 1) { + result += ','; + } + result += '\n'; + }); + result += `${indent}}`; + return result; + } + + /** + * Format member expression + */ + formatMemberExpression(node, depth, comments) { + const object = this.visitNode(node.object, depth, comments); + const property = this.visitNode(node.property, depth, comments); + return `${object}.${property}`; + } + + /** + * Format result expression + */ + formatResultExpression(node, depth, comments) { + const value = this.visitNode(node.value, depth, comments); + return `${node.variant} ${value}`; + } + + /** + * Format number literal + */ + formatNumberLiteral(node) { + return node.value.toString(); + } + + /** + * Format string literal + */ + formatStringLiteral(node) { + return `"${node.value}"`; + } + + /** + * Format boolean literal + */ + formatBooleanLiteral(node) { + return node.value.toString(); + } + + /** + * Format identifier + */ + formatIdentifier(node) { + return node.name; + } + + // Helper methods + + /** + * Get indentation string + */ + getIndent(depth) { + return ' '.repeat(depth * this.indentSize); + } + + /** + * Check if parameters have type annotations + */ + hasTypedParams(params) { + return params.some(p => + typeof p === 'object' && p.type && p.type !== 'Identifier' + ); + } + + /** + * Format typed parameters + */ + formatTypedParameters(params) { + const formatted = params.map(p => { + if (typeof p === 'string') { + return p; + } else if (p.type && p.type !== 'Identifier') { + return `${p.name}: ${this.formatType(p.type)}`; + } else { + return p.name; + } + }); + return `(${formatted.join(', ')})`; + } + + /** + * Format type annotation + */ + formatType(type) { + if (typeof type === 'string') { + return type; + } + + if (type.type === 'PrimitiveType') { + return type.name; + } + + if (type.type === 'FunctionType') { + const paramTypes = type.paramTypes.map(t => this.formatType(t)).join(', '); + const returnType = this.formatType(type.returnType); + return `(${paramTypes}) -> ${returnType}`; + } + + return 'Unknown'; + } +} + +/** + * CLI interface + */ +function main() { + const args = process.argv.slice(2); + + if (args.length === 0) { + console.error('Usage: node fmt.js <file.baba> [options]'); + console.error('Options:'); + console.error(' --write, -w Write result to file instead of stdout'); + console.error(' --check, -c Check if file is already formatted'); + console.error(' --indent=N Set indentation size (default: 2)'); + process.exit(1); + } + + const options = { + write: args.includes('--write') || args.includes('-w'), + check: args.includes('--check') || args.includes('-c'), + indentSize: 2 + }; + + // Parse indent option + const indentArg = args.find(arg => arg.startsWith('--indent=')); + if (indentArg) { + options.indentSize = parseInt(indentArg.split('=')[1]) || 2; + } + + const filename = args.find(arg => + !arg.startsWith('-') && !arg.startsWith('--') + ); + + if (!filename) { + console.error('Error: No input file specified'); + process.exit(1); + } + + if (!fs.existsSync(filename)) { + console.error(`Error: File '${filename}' not found`); + process.exit(1); + } + + try { + const source = fs.readFileSync(filename, 'utf8'); + const formatter = new BabaYagaFormatter(options); + const formatted = formatter.format(source); + + if (options.check) { + if (source.trim() !== formatted.trim()) { + console.error(`File '${filename}' is not formatted`); + process.exit(1); + } else { + console.log(`File '${filename}' is already formatted`); + process.exit(0); + } + } + + if (options.write) { + fs.writeFileSync(filename, formatted); + console.log(`Formatted '${filename}'`); + } else { + process.stdout.write(formatted); + } + + } catch (error) { + console.error(`Error formatting '${filename}': ${error.message}`); + process.exit(1); + } +} + +// Export for use as module +export { BabaYagaFormatter }; + +// Run CLI if called directly +if (import.meta.url === `file://${process.argv[1]}`) { + main(); +} diff --git a/js/baba-yaga/experimental/parity.js b/js/baba-yaga/experimental/parity.js new file mode 100644 index 0000000..2beedf3 --- /dev/null +++ b/js/baba-yaga/experimental/parity.js @@ -0,0 +1,137 @@ +// parity.js +// Simple parity harness: runs a Baba Yaga program via interpreter and compiled JS, compares results and outputs. +// Usage: node parity.js <file1.baba> [file2.baba ...] + +import fs from 'fs'; +import path from 'path'; +import { fileURLToPath, pathToFileURL } from 'url'; + +import { createLexer } from '../lexer.js'; +import { createParser } from '../parser.js'; +import { createInterpreter } from '../interpreter.js'; +import { compile } from './experimental/compiler/compiler.js'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); + +function deepPlain(value) { + if (typeof value === 'function') { + try { + if (value && value.type === 'NativeFunction') return { type: 'NativeFunction' }; + } catch {} + return { type: 'Function' }; + } + if (value && typeof value.value === 'number') return value.value; + if (Array.isArray(value)) return value.map(deepPlain); + if (value && value.type === 'Object' && value.properties instanceof Map) { + const obj = {}; + for (const [k, v] of value.properties.entries()) obj[k] = deepPlain(v); + return obj; + } + if (value && value.type === 'Function') return { type: 'Function' }; + if (value && value.type === 'NativeFunction') return { type: 'NativeFunction' }; + if (value && value.type === 'Result') { + return { type: 'Result', variant: value.variant, value: deepPlain(value.value) }; + } + return value; +} + +function makeHostCapture() { + const out = []; + const io = { + out: (...xs) => { out.push(xs.map((x) => deepPlain(x))); }, + in: () => '', + }; + return { io, out }; +} + +function runWithInterpreter(source) { + const lexer = createLexer(source); + const tokens = lexer.allTokens(); + const parser = createParser(tokens); + const ast = parser.parse(); + const { io, out } = makeHostCapture(); + const { interpret } = createInterpreter(ast, { io }); + const result = interpret(); + return { result, out }; +} + +async function runWithCompiler(source) { + try { + const { code } = compile(source, { format: 'esm', mode: 'closure' }); + // Attempt to import as ESM via data URL + const dataUrl = 'data:text/javascript;base64,' + Buffer.from(code, 'utf8').toString('base64'); + const mod = await import(dataUrl); + const { io, out } = makeHostCapture(); + // Heuristic: prefer `run(host)` if exported, else `main(host)` or `main()` + let result; + if (typeof mod.run === 'function') { + result = await mod.run({ io }); + } else if (typeof mod.main === 'function') { + try { result = await mod.main({ io }); } catch { result = await mod.main(); } + } else if (typeof mod.__main === 'function') { + result = await mod.__main({ io }); + } else { + return { pending: true, note: 'No run/main export in compiled module', out: [], result: undefined }; + } + return { result, out }; + } catch (e) { + return { pending: true, note: `Compiled execution unavailable: ${e && e.message ? e.message : String(e)}` }; + } +} + +function isDeepEqual(a, b) { + return JSON.stringify(a) === JSON.stringify(b); +} + +async function main() { + const args = process.argv.slice(2); + const files = args.length ? args : ['example.baba']; + let pass = 0; + let fail = 0; + let pending = 0; + + for (const file of files) { + const filePath = path.isAbsolute(file) ? file : path.resolve(__dirname, file); + const source = fs.readFileSync(filePath, 'utf8'); + + const interp = runWithInterpreter(source); + const comp = await runWithCompiler(source); + + const interpValue = deepPlain(interp.result); + const compValue = comp.pending ? undefined : deepPlain(comp.result); + + const interpOut = interp.out; + const compOut = comp.pending ? undefined : comp.out; + + const valueEq = !comp.pending && isDeepEqual(interpValue, compValue); + const outEq = !comp.pending && isDeepEqual(interpOut, compOut); + + if (comp.pending) { + pending++; + console.log(`[PENDING] ${path.basename(file)} - ${comp.note}`); + continue; + } + + if (valueEq && outEq) { + pass++; + console.log(`[PASS] ${path.basename(file)}`); + } else { + fail++; + console.log(`[FAIL] ${path.basename(file)}`); + if (!valueEq) { + console.log(' value: interpreter =', JSON.stringify(interpValue), ' compiled =', JSON.stringify(compValue)); + } + if (!outEq) { + console.log(' io.out: interpreter =', JSON.stringify(interpOut), ' compiled =', JSON.stringify(compOut)); + } + } + } + + console.log(`\nSummary: pass=${pass}, fail=${fail}, pending=${pending}`); + if (fail > 0) process.exitCode = 1; +} + +await main(); + + |