diff options
Diffstat (limited to 'awk/scheme')
-rwxr-xr-x | awk/scheme/s.awk | 139 | ||||
-rw-r--r-- | awk/scheme/scheme/README.md | 220 | ||||
-rw-r--r-- | awk/scheme/scheme/TODO.txt | 69 | ||||
-rwxr-xr-x | awk/scheme/scheme/bin/compiler.awk | 660 | ||||
-rwxr-xr-x | awk/scheme/scheme/bin/repl | 161 | ||||
-rwxr-xr-x | awk/scheme/scheme/bin/vm.awk | 997 | ||||
-rw-r--r-- | awk/scheme/scheme/diagram.md | 91 | ||||
-rw-r--r-- | awk/scheme/scheme/examples/cons.test.scm | 3 | ||||
-rw-r--r-- | awk/scheme/scheme/examples/define.test.scm | 2 | ||||
-rw-r--r-- | awk/scheme/scheme/examples/lambda.test.scm | 12 | ||||
-rw-r--r-- | awk/scheme/scheme/examples/let-and-define.test.scm | 9 | ||||
-rw-r--r-- | awk/scheme/scheme/examples/let.test.scm | 2 | ||||
-rwxr-xr-x | awk/scheme/scheme/scheme | 3 | ||||
-rw-r--r-- | awk/scheme/scheme/scratch/complex_test.scm.asm | 44 | ||||
-rw-r--r-- | awk/scheme/scheme/scratch/ideas-for-string-support.md | 556 | ||||
-rwxr-xr-x | awk/scheme/scheme/scratch/run.sh | 5 | ||||
-rw-r--r-- | awk/scheme/scheme/scratch/test.asm | 16 | ||||
-rw-r--r-- | awk/scheme/scheme/scratch/test.scm | 8 | ||||
-rw-r--r-- | awk/scheme/scheme/scratch/test.scm.asm | 7 |
19 files changed, 3004 insertions, 0 deletions
diff --git a/awk/scheme/s.awk b/awk/scheme/s.awk new file mode 100755 index 0000000..7c8bba6 --- /dev/null +++ b/awk/scheme/s.awk @@ -0,0 +1,139 @@ +#!/usr/bin/awk -f + +# Set debug mode +DEBUG = 1 # Change to 0 to disable debug output + +# Environment to store variable bindings +BEGIN { + print "Welcome to the AWK Scheme Interpreter!" + print "Type your Scheme expressions below (type 'exit' to quit):" + while (1) { + printf "> " + if (getline input <= 0) { + print "Error reading input. Exiting." + break + } + if (input == "exit") { + print "Exiting the interpreter." + exit + } + if (input == "") { + print "Empty input received, continuing..." + continue + } + + print "Input received: " input # Echo the input + ast = parse(input) # Parse the input + + # Print the entire AST for debugging + for (i = 1; i <= length(ast); i++) { + print "AST[" i "] = " ast[i] + } + + # Evaluate the AST + if (length(ast) > 0) { + result = eval(ast) # Evaluate the AST + print "Result: " result # Print the result + } else { + print "Parsed AST is empty." + } + } +} + +# Function to parse input into an AST +function parse(input) { + # Remove outer whitespace + gsub(/^\s+|\s+$/, "", input) + + # Check if input is empty after trimming + if (input == "") { + print "Input is empty after trimming" + return "" + } + + # Debugging: Print input before processing + print "Debug: Raw input for parsing: " input + + # Remove parentheses at start and end + if (substr(input, 1, 1) == "(") { + input = substr(input, 2) + } + if (substr(input, length(input), 1) == ")") { + input = substr(input, 1, length(input) - 1) + } + + # Debugging: Print input after removing outer parentheses + print "Debug: Input after removing outer parentheses: " input + + # Split the input into tokens + gsub(/\(/, " ( ", input) + gsub(/\)/, " ) ", input) + gsub(/\s+/, " ", input) # normalize whitespace + gsub(/^\s+|\s+$/, "", input) # trim + + # Debugging: Print input after tokenization + print "Debug: Input after tokenization: " input + + n = split(input, ast, " ") + + # Debugging: Print the number of tokens + print "Debug: Number of tokens: " n + + return ast +} + +# Function to evaluate the AST +function eval(ast, i, result) { + # Debugging: Print the current AST being evaluated + print "Debug: Evaluating AST: " ast[1] " " ast[2] " " ast[3] + + # Handle numbers directly + if (ast[1] ~ /^[+-]?[0-9]+$/) { + print "Debug: Returning number: " ast[1] + return ast[1] + 0 # Convert string to number + } + + # Handle addition + if (ast[1] == "+") { + result = 0 + for (i = 2; i <= length(ast); i++) { + print "Debug: Adding operand: " ast[i] + result += eval(ast[i]) # Recursively evaluate operands + } + return result + } + + # Handle subtraction + if (ast[1] == "-") { + result = eval(ast[2]) # Start with the first operand + for (i = 3; i <= length(ast); i++) { + print "Debug: Subtracting operand: " ast[i] + result -= eval(ast[i]) # Subtract subsequent operands + } + return result + } + + # Handle multiplication + if (ast[1] == "*") { + result = 1 + for (i = 2; i <= length(ast); i++) { + print "Debug: Multiplying operand: " ast[i] + result *= eval(ast[i]) # Multiply operands + } + return result + } + + # Handle division + if (ast[1] == "/") { + result = eval(ast[2]) # Start with the first operand + for (i = 3; i <= length(ast); i++) { + print "Debug: Dividing by operand: " ast[i] + result /= eval(ast[i]) # Divide by subsequent operands + } + return result + } + + # If we reach here, the operation is not recognized + return "Error: Unknown operation " ast[1] +} + diff --git a/awk/scheme/scheme/README.md b/awk/scheme/scheme/README.md new file mode 100644 index 0000000..5dae265 --- /dev/null +++ b/awk/scheme/scheme/README.md @@ -0,0 +1,220 @@ +# Awk-Scheme + +## Overview +A scheme interpreter implemented in AWK: + +- A compiler that translates scheme expressions to stack-based VM instructions +- A virtual machine that executes the compiled code +- Support for basic arithmetic, list operations, functions, and variable bindings +- **Closure support** for nested function definitions and lexical scoping +- Persistent global state between REPL sessions + +## Architecture + +### Components +1. **Compiler** (`bin/compiler.awk`): + - Lexical analyzer for tokenizing input + - Recursive descent parser for scheme expressions + - Code generator that produces stack-based VM instructions + - Handles nested expressions and proper scoping + - **Generates closure creation code for lambda expressions** + +2. **Virtual Machine** (`bin/vm.awk`): + - Stack-based execution model + - Typed value system with runtime type checking + - Environment-based variable binding + - **Direct function execution** (no program array modification) + - **Closure support** with environment capture and restoration + - Heap memory for cons cells + - State persistence for globals and functions + +3. **REPL** (`bin/repl`): + - Interactive command line interface + - Multi-line input support + - State persistence between sessions + - Debug mode support + +### Data Types +All values are tagged with their type: +- `N:value` - Numbers (integers) +- `B:value` - Booleans (0/1) +- `P:index` - Pairs (cons cells) +- `F:name` - Functions +- `NIL:` - Empty list +- `S:name` - Symbols +- **`CLOSURE:func_name:env_id` - Closure objects (function + captured environment)** + +## Usage + +### Running the REPL +```bash +# Start interactive REPL +./scheme + +# Run a Scheme file +./scheme myprogram.scm + +# Enable debug output +DEBUG=1 ./scheme +``` + +### Basic Operations + +1. **Arithmetic**: +```scheme +scheme> (+ 1 2 3) +N:6 +scheme> (* 4 (- 10 5)) +N:20 +``` + +2. **Comparisons**: +```scheme +scheme> (< 1 2) +B:1 +scheme> (= 42 42) +B:1 +``` + +3. **Lists**: +```scheme +scheme> (cons 1 (cons 2 nil)) +P:1 +scheme> (car (cons 1 2)) +N:1 +``` + +4. **Variables**: +```scheme +scheme> (define x 42) +N:42 +scheme> x +N:42 +``` + +5. **Functions**: +```scheme +scheme> (define add (x y) (+ x y)) +scheme> (add 2 3) +N:5 +``` + +6. **Let Expressions**: +```scheme +scheme> (let ((x 10) (y 20)) (+ x y)) +N:30 +``` + +7. **Lambda Functions**: +```scheme +scheme> ((lambda (x) (+ x 1)) 41) +N:42 +scheme> ((lambda (x y) (+ x y)) 20 22) +N:42 +``` + +8. **Closures**: +```scheme +scheme> (let ((x 10)) ((lambda (y) (+ x y)) 32)) +N:42 +scheme> (let ((add1 (lambda (x) (+ x 1)))) (add1 41)) +N:42 +``` + +**Note**: Nested lambda definitions (lambdas defined inside other lambdas) are not yet supported. + +## Implementation Details + +### Compiler Pipeline +1. **Lexical Analysis**: + - Tokenizes input into numbers, symbols, and parentheses + - Handles whitespace and basic comments + +2. **Parsing**: + - Builds expression tree from tokens + - Validates syntax and expression structure + +3. **Code Generation**: + - Translates expressions to VM instructions + - Manages scope and variable bindings + - Handles function definitions + - **Generates closure creation code for lambda expressions** + +### Virtual Machine +1. **Stack Operations**: + - PUSH_CONST: Push constant value + - POP: Remove top value + - STORE: Store variable binding + - LOOKUP: Retrieve variable value + +2. **Function Calls**: + - **Direct execution** of function code with proper program restoration + - Environment frame management + - **Closure support** with environment capture and restoration + +3. **Closure System**: + - **CAPTURE_ENV**: Captures current environment for closure creation + - **Environment restoration**: Restores captured environments on closure calls + - **Variable lookup enhancement**: Checks closure environments for variable resolution + +4. **Memory Management**: + - Stack-based evaluation + - Simple heap for cons cells + - Basic reference counting (not fully implemented) + +### State Persistence +- Global variables and functions persist between sessions +- State stored in `/tmp/scheme_vm.state` and `/tmp/scheme_vm.env` +- Automatic state loading/saving +- **Note**: State persistence is enabled by default in the REPL + +## Extending the System + +### Adding New Special Forms +1. Add parsing in `compile_expr()` +2. Implement code generation function +3. Add corresponding VM instructions if needed + +### Adding New Data Types +1. Define new type tag in VM +2. Add type checking predicates +3. Implement value construction/access +4. Update relevant operations + +### Adding VM Instructions +1. Add instruction handling in `execute()` +2. Implement instruction logic +3. Update compiler to generate new instruction + +### Debugging +- Enable debug output: `DEBUG=1 ./scheme` +- Debug messages show: + - Lexical analysis + - Parsing steps + - Code generation + - VM execution + - Stack operations + - Environment changes + - **Closure creation and restoration** +- **Note**: Some debug output may be suppressed in the current implementation + +## Limitations +Current implementation does not support: +- **Nested lambda definitions** (lambdas defined inside other lambdas) - this causes "Undefined closure function" errors +- Floating point numbers +- Strings +- Proper tail recursion +- Garbage collection +- Error recovery in REPL +- Full numeric tower +- Macros +- Standard library + +## Future Enhancements +1. **Nested lambda support** (complex but achievable) +2. Proper tail call optimization +3. Garbage collection +4. Error recovery +5. More data types +6. Standard library +7. Better debugging tools \ No newline at end of file diff --git a/awk/scheme/scheme/TODO.txt b/awk/scheme/scheme/TODO.txt new file mode 100644 index 0000000..31723a4 --- /dev/null +++ b/awk/scheme/scheme/TODO.txt @@ -0,0 +1,69 @@ +Scheme Interpreter TODO +===================== + +Current State: +------------- +- Basic arithmetic operations (+,-,*,/) are working +- Let expressions with simple bindings are working +- Function definitions (define) and lambda expressions are partially implemented +- Stack-based virtual machine with environment for variable bindings + +Recent Changes: +-------------- +1. Fixed function table array naming conflicts (func_name -> func_def_names) +2. Modified vm_get_value to handle function names correctly +3. Attempted to fix argument handling in function calls +4. Modified lambda compilation to avoid pushing function name twice + +Current Issues: +-------------- +1. Function Definitions (define): + - Function calls are failing with "ADD requires numeric operands" + - Arguments may not be properly passed to functions + - Function environment may not be properly set up + +2. Lambda Expressions: + - Direct lambda calls are failing + - Lambda bindings in let expressions are failing + - Function environment for lambda parameters may not be correct + +Next Steps: +----------- +1. Debug Function Calls: + - Add detailed debug logging in vm_call_function + - Verify argument handling and environment setup + - Check if function code is being properly stored and retrieved + +2. Fix Function Environment: + - Review how parameters are stored in the environment + - Ensure environment is properly restored after function calls + - Verify parameter values are correctly passed to functions + +3. Improve Lambda Support: + - Review lambda compilation process + - Ensure lambda functions are properly stored and called + - Fix environment handling for lambda parameters + +4. Testing Strategy: + - Create test cases for each type of function call + - Add debug output to track stack and environment state + - Verify each step of function compilation and execution + +5. Code Cleanup: + - Review and document function handling code + - Ensure consistent naming and error handling + - Add comments explaining complex operations + +Technical Notes: +--------------- +- Function calls use a combination of LOOKUP, GET_VALUE, and CALL instructions +- Environment stack handles variable bindings and function parameters +- Function code is stored in FUNCTIONS table with unique names +- Lambda functions use __lambda_N naming scheme + +Debugging Tips: +-------------- +1. Enable DEBUG=1 to see detailed execution logs +2. Check stack contents before and after each operation +3. Verify environment state during function calls +4. Monitor function code storage and retrieval \ No newline at end of file diff --git a/awk/scheme/scheme/bin/compiler.awk b/awk/scheme/scheme/bin/compiler.awk new file mode 100755 index 0000000..94d6e99 --- /dev/null +++ b/awk/scheme/scheme/bin/compiler.awk @@ -0,0 +1,660 @@ +#!/usr/bin/awk -f + +# This is a Scheme-to-Assembly compiler implemented in AWK +# It takes Scheme expressions as input and outputs assembly instructions +# for a custom stack-based virtual machine + +BEGIN { + # Compiler maintains internal state for code generation + curr_token = "" # Current token being processed by lexer + input_buffer = "" # Buffer for input text being tokenized + next_label = 0 # Counter for generating unique labels + program = "" # Accumulates the full program text + + # AWK FEATURE: ENVIRON is a built-in array containing environment variables + # Unlike JS process.env, this is automatically available in awk + DEBUG = (ENVIRON["DEBUG"] == "1") ? 1 : 0 +} + +# Debug logging helper function +function debug(msg) { + # AWK FEATURE: printf with > "/dev/stderr" redirects output to stderr + # Unlike console.error() in JS, this is how awk handles stderr output + if (DEBUG) printf("[DEBUG] %s\n", msg) > "/dev/stderr" +} + +# AWK FEATURE: Each line of input is automatically processed by this block +# This is awk's main input processing loop - every line from stdin/files goes here +# In JS, you'd need to explicitly read lines from a stream +{ + if (program != "") program = program "\n" + program = program $0 # $0 is the current line being processed +} + +# AWK FEATURE: END block runs after all input has been processed +# This is like a "finally" block that always executes after reading all input +END { + debug("Raw program:\n" program) + if (program == "") exit + + # Parse and compile each expression in the program + split_expressions(program) +} + +# Splits input into individual Scheme expressions +# Handles nested parentheses and comments +function split_expressions(prog, current, paren_count, i, c, expr, cleaned) { + current = "" + paren_count = 0 + + # Clean up the input: + # 1. Remove comments (text between ; and next opening paren) + # 2. Normalize whitespace + # 3. Trim leading/trailing whitespace + cleaned = prog + # AWK FEATURE: gsub() is a built-in function for global substitution (like replaceAll in JS) + # gsub(pattern, replacement, target) - modifies target in place and returns count + gsub(/;[^(]*\(/, "(", cleaned) # Remove comments before expressions + gsub(/\)[^)]*;/, ")", cleaned) # Remove comments after expressions + gsub(/[ \t\n]+/, " ", cleaned) # Normalize whitespace to single spaces + # AWK FEATURE: sub() is like gsub() but only replaces the first occurrence + sub(/^[ \t\n]+/, "", cleaned) # Trim leading whitespace + sub(/[ \t\n]+$/, "", cleaned) # Trim trailing whitespace + + debug("Cleaned program: [" cleaned "]") + + if (cleaned == "") return + + # Parse expressions by tracking parenthesis nesting + # AWK FEATURE: length(string) returns the length of a string + # Unlike JS string.length, this is a function call, not a property + for (i = 1; i <= length(cleaned); i++) { + c = substr(cleaned, i, 1) + + if (c == "(") { + if (paren_count == 0) current = "" + paren_count++ + } + + current = current c + + if (c == ")") { + paren_count-- + if (paren_count == 0) { + # Complete expression found - compile it + expr = current + sub(/^\s+/, "", expr) + sub(/\s+$/, "", expr) + + debug("Processing expression: [" expr "]") + program = expr # Set for parser + expr = parse_expr() + compile_expr(expr) + current = "" + } + } + } + + # Add final HALT instruction + print "HALT" +} + +# Lexer helper functions for character classification +# AWK FEATURE: String comparison with >= and <= works lexicographically +# Unlike JS where you need to convert to numbers, awk can compare strings directly +function is_digit(c) { return c >= "0" && c <= "9" } +function is_whitespace(c) { return c == " " || c == "\t" || c == "\n" } + +# Lexical analyzer - converts input into tokens +# Handles numbers, symbols, and parentheses +function next_token() { + # Initialize input buffer on first call + if (input_buffer == "") input_buffer = program + + # Skip whitespace between tokens + # AWK FEATURE: length(string) returns the length of a string + # Unlike JS string.length, this is a function call, not a property + while (length(input_buffer) > 0 && is_whitespace(substr(input_buffer, 1, 1))) + input_buffer = substr(input_buffer, 2) + + if (length(input_buffer) == 0) return "EOF" + + # Handle parentheses as single-character tokens + c = substr(input_buffer, 1, 1) + if (c == "(" || c == ")") { + input_buffer = substr(input_buffer, 2) + return c + } + + # Handle numbers (including negative numbers) + # AWK FEATURE: substr(string, start, length) extracts substring + # Unlike JS string.slice(), this is 1-indexed and requires explicit length + # AWK FEATURE: length(string) returns the length of a string + # Unlike JS string.length, this is a function call, not a property + if (is_digit(c) || c == "-" && length(input_buffer) > 1 && is_digit(substr(input_buffer, 2, 1))) { + num = "" + # AWK FEATURE: length(string) returns the length of a string + # Unlike JS string.length, this is a function call, not a property + while (length(input_buffer) > 0) { + c = substr(input_buffer, 1, 1) + if (!is_digit(c) && c != "-") break + num = num c + input_buffer = substr(input_buffer, 2) + } + return num + } + + # Handle symbols (identifiers and operators) + sym = "" + # AWK FEATURE: length(string) returns the length of a string + # Unlike JS string.length, this is a function call, not a property + while (length(input_buffer) > 0) { + c = substr(input_buffer, 1, 1) + if (is_whitespace(c) || c == "(" || c == ")") break + sym = sym c + input_buffer = substr(input_buffer, 2) + } + return sym +} + +# Recursive descent parser for Scheme expressions +# Returns parsed expression as a string +function parse_expr(token, result) { + token = next_token() + if (token == "EOF") return "" + + if (token == "(") { + result = parse_list() + debug("Parsed list: " result) + return result + } + + debug("Parsed token: " token) + return token +} + +# Parses a list expression (anything in parentheses) +function parse_list(result, expr) { + result = "" + + while (1) { + expr = parse_expr() + if (expr == "" || expr == ")") break + + if (result != "") result = result " " + result = result expr + } + + if (expr == "") error("Unexpected end of input in list") + return "(" result ")" +} + +# Splits an expression into operator and arguments +# Handles nested expressions correctly +function split_expr(expr, i, len, c, op, args, paren_count) { + # AWK FEATURE: length(string) returns the length of a string + # Unlike JS string.length, this is a function call, not a property + len = length(expr) + paren_count = 0 + + for (i = 1; i <= len; i++) { + c = substr(expr, i, 1) + if (c == " " && paren_count == 0) { + op = substr(expr, 1, i - 1) + args = substr(expr, i + 1) + break + } + if (c == "(") paren_count++ + if (c == ")") paren_count-- + } + + if (!op) { + op = expr + args = "" + } + + debug("Split expr: op=" op " args=" args) + # AWK FEATURE: SUBSEP is a built-in variable used as separator for array indices + # When you use array[key1,key2], awk internally stores it as array[key1 SUBSEP key2] + # This allows multi-dimensional arrays in awk (though they're really single-dimensional) + return op SUBSEP args +} + +# Splits argument list handling nested parentheses +function split_args(args, arg_array, len, i, c, current, paren_count, arg_count) { + # AWK FEATURE: length(string) returns the length of a string + # Unlike JS string.length, this is a function call, not a property + len = length(args) + current = "" + paren_count = 0 + arg_count = 0 + + for (i = 1; i <= len; i++) { + c = substr(args, i, 1) + + if (c == "(") paren_count++ + if (c == ")") paren_count-- + + if (c == " " && paren_count == 0 && current != "") { + arg_array[++arg_count] = current + current = "" + } else if (c != " " || paren_count > 0) { + current = current c + } + } + + if (current != "") { + arg_array[++arg_count] = current + } + + return arg_count +} + +# Code generation for numeric literals +function compile_number(num) { + debug("Compiling number: " num) + print "PUSH_CONST N:" num +} + +# Code generation for primitive operations (+, -, *, cons, etc) +function compile_primitive_call(op, args, arg_array, nargs, i) { + debug("Primitive call: op=" op " args=" args) + nargs = split_args(args, arg_array) + + # Check if this is a lambda function call + # AWK FEATURE: ~ is the regex match operator (like /pattern/.test() in JS) + # The pattern is a regex literal, not a string + if (op ~ /^\(lambda /) { + # This is a lambda function call + # First compile all arguments + for (i = 1; i <= nargs; i++) { + compile_expr(arg_array[i]) + } + + # Then compile the lambda function (this will push the function name) + compile_expr(op) + + # Call the function - the lambda name is now on top of stack + print "CALL" + return + } + + # Then emit appropriate operation + if (op == "+") { + # Compile arguments + for (i = 1; i <= nargs; i++) { + compile_expr(arg_array[i]) + } + for (i = 1; i < nargs; i++) + print "ADD" + } + else if (op == "-") { + # Compile arguments + for (i = 1; i <= nargs; i++) { + compile_expr(arg_array[i]) + } + if (nargs == 1) { + print "PUSH_CONST N:0" + print "SWAP" + } + for (i = 1; i < nargs; i++) + print "SUB" + } + else if (op == "*") { + # Compile arguments + for (i = 1; i <= nargs; i++) { + compile_expr(arg_array[i]) + } + for (i = 1; i < nargs; i++) + print "MUL" + } + else if (op == "cons") { + if (nargs != 2) error("cons requires 2 arguments") + # Compile arguments + for (i = 1; i <= nargs; i++) { + compile_expr(arg_array[i]) + } + print "CONS" + } + else if (op == "car") { + if (nargs != 1) error("car requires 1 argument") + # Compile argument + compile_expr(arg_array[1]) + print "CAR" + } + else if (op == "cdr") { + if (nargs != 1) error("cdr requires 1 argument") + # Compile argument + compile_expr(arg_array[1]) + print "CDR" + } + else if (op == "<") { + if (nargs != 2) error("< requires 2 arguments") + # Compile arguments + for (i = 1; i <= nargs; i++) { + compile_expr(arg_array[i]) + } + print "LT" + } + else if (op == "=") { + if (nargs != 2) error("= requires 2 arguments") + # Compile arguments + for (i = 1; i <= nargs; i++) { + compile_expr(arg_array[i]) + } + print "EQ" + } + else { + # Function call for user-defined functions + debug("Function call: " op) + # First compile arguments + for (i = 1; i <= nargs; i++) { + compile_expr(arg_array[i]) + } + # Then look up the function name + print "LOOKUP " op + # Get the actual function name + print "GET_VALUE" + # Call the function + print "CALL" + } +} + +# Splits let bindings into individual variable/value pairs +function split_bindings(bindings, binding_array, count, current, paren_count, i, c) { + count = 0 + current = "" + paren_count = 0 + + debug("split_bindings: parsing [" bindings "]") + + # AWK FEATURE: length(string) returns the length of a string + # Unlike JS string.length, this is a function call, not a property + for (i = 1; i <= length(bindings); i++) { + c = substr(bindings, i, 1) + + # Track nested parentheses + if (c == "(") { + paren_count++ + if (paren_count == 1) { + current = "" # Start new binding + continue + } + } + if (c == ")") { + paren_count-- + if (paren_count == 0) { + # End of binding + binding_array[++count] = current + debug("split_bindings: found binding [" current "]") + current = "" + continue + } + } + + # Add character if we're inside a binding + if (paren_count > 0) { + current = current c + } + } + + debug("split_bindings: found " count " bindings") + return count +} + +# Compiles let expressions (local variable bindings) +function compile_let(args, bindings, body, binding_array, nbindings, i, var, val, binding_parts) { + # Split into bindings and body + if (substr(args, 1, 1) != "(") error("Malformed let expression") + + # Find matching closing parenthesis for bindings + paren_count = 1 + i = 2 + # AWK FEATURE: length(string) returns the length of a string + # Unlike JS string.length, this is a function call, not a property + while (paren_count > 0 && i <= length(args)) { + if (substr(args, i, 1) == "(") paren_count++ + if (substr(args, i, 1) == ")") paren_count-- + i++ + } + if (paren_count > 0) error("Unmatched parenthesis in let bindings") + + bindings = substr(args, 2, i - 3) # Remove outer parentheses + body = substr(args, i) + + # Trim whitespace from body + sub(/^[ \t\n]+/, "", body) + sub(/[ \t\n]+$/, "", body) + + debug("Let bindings: " bindings) + debug("Let body: " body) + + # Compile each binding + nbindings = split_bindings(bindings, binding_array) + for (i = 1; i <= nbindings; i++) { + debug("Processing binding: " binding_array[i]) + + # Find the variable name (everything up to the first space) + var = binding_array[i] + sub(/ .*$/, "", var) + + # Find the value (everything after the first space) + val = binding_array[i] + sub(/^[^ ]+ /, "", val) + + debug("Binding var: " var " val: " val) + + # Compile the value + if (substr(val, 1, 1) == "(") { + # Handle lambda or other compound expressions + if (substr(val, 2, 6) == "lambda") { + # This is a lambda expression + # Pass the entire lambda expression to compile_lambda + compile_lambda(val) + # Store the function name in the environment + print "STORE " var + } else { + compile_expr(val) + print "STORE " var + } + } else { + compile_expr(val) + print "STORE " var + } + } + + # Compile the body + compile_expr(body) + + # Clean up bindings AFTER evaluating body + for (i = nbindings; i >= 1; i--) { + print "POP_ENV" + } +} + +# Compiles define expressions (function/variable definitions) +function compile_define(args, name, params, body, param_array, nparams, i, paren_start, paren_end) { + # Set flag for global definition + print "PUSH_CONST B:1" + print "STORE from_define" # Must match exactly what vm_store checks for + + # Find the function name (everything up to the first space) + i = index(args, " ") + if (i == 0) error("Malformed define expression") + name = substr(args, 1, i - 1) + args = substr(args, i + 1) + + # Check if it's a function or variable definition + if (substr(args, 1, 1) == "(") { + # It's a function definition + paren_count = 1 + i = 2 + while (paren_count > 0 && i <= length(args)) { + if (substr(args, i, 1) == "(") paren_count++ + if (substr(args, i, 1) == ")") paren_count-- + i++ + } + if (paren_count > 0) error("Unmatched parenthesis in parameter list") + + params = substr(args, 2, i - 3) # Remove parentheses + body = substr(args, i + 1) + + # Create function label + print "LABEL " name + + # Process parameters + # AWK FEATURE: split(string, array, separator) splits string into array elements + # Unlike JS string.split() which returns an array, this populates an existing array + nparams = split(params, param_array, " ") + for (i = 1; i <= nparams; i++) { + print "STORE " param_array[i] + } + + # Compile function body + compile_expr(body) + + # Clean up parameters and return + for (i = nparams; i >= 1; i--) { + print "POP_ENV" + } + print "RETURN" + } else { + # Variable definition + debug("Defining variable: " name " with value: " args) + compile_expr(args) # Compile the value + print "STORE " name # Store the variable + } +} + +# Compiles lambda expressions (anonymous functions) +function compile_lambda(args, params, body, param_array, nparams, i, lambda_name) { + # Generate a unique name for the lambda function + lambda_name = "__lambda_" next_label++ + + debug("compile_lambda: args = [" args "]") + + # Handle both full lambda expression and just the arguments part + if (substr(args, 1, 7) == "(lambda") { + debug("compile_lambda: detected full lambda expression") + # Full lambda expression: (lambda (params) body) + args = substr(args, 8) # Remove "(lambda" + debug("compile_lambda: after removing (lambda: [" args "]") + # Find the closing parenthesis of the entire lambda expression + paren_count = 0 + i = 1 + while (i <= length(args)) { + c = substr(args, i, 1) + if (c == "(") paren_count++ + if (c == ")") { + paren_count-- + if (paren_count == -1) break # Found the closing paren of the lambda + } + i++ + } + if (paren_count != -1) error("Unmatched parenthesis in lambda") + args = substr(args, 1, i - 1) # Remove the closing parenthesis + debug("compile_lambda: after removing closing paren: [" args "]") + # Remove leading whitespace + sub(/^[ \t\n]+/, "", args) + debug("compile_lambda: after removing leading whitespace: [" args "]") + } + + # Split into parameters and body + if (substr(args, 1, 1) != "(") error("Malformed lambda expression") + + # Find matching closing parenthesis for parameters + paren_count = 1 + i = 2 + # AWK FEATURE: length(string) returns the length of a string + # Unlike JS string.length, this is a function call, not a property + while (paren_count > 0 && i <= length(args)) { + if (substr(args, i, 1) == "(") paren_count++ + if (substr(args, i, 1) == ")") paren_count-- + i++ + } + if (paren_count > 0) error("Unmatched parenthesis in lambda parameters") + + params = substr(args, 2, i - 3) # Remove parentheses + body = substr(args, i) + + # Trim whitespace from body + sub(/^[ \t\n]+/, "", body) + sub(/[ \t\n]+$/, "", body) + + debug("Lambda parameters: " params) + debug("Lambda body: " body) + + # Create function label + print "LABEL " lambda_name + + # Process parameters + # AWK FEATURE: split(string, array, separator) splits string into array elements + # Unlike JS string.split() which returns an array, this populates an existing array + nparams = split(params, param_array, " ") + for (i = 1; i <= nparams; i++) { + print "STORE " param_array[i] + } + + # Compile function body + compile_expr(body) + + # Clean up parameters and return + for (i = nparams; i >= 1; i--) { + print "POP_ENV" + } + print "RETURN" + + # Create closure that captures current environment + print "CAPTURE_ENV " lambda_name + print "PUSH_CONST CLOSURE:" lambda_name ":ENV_ID" +} + +# Main expression compiler - dispatches based on expression type +function compile_expr(expr, split_result, op, args) { + debug("Compiling expression: " expr) + + # Handle numeric literals + if (expr ~ /^-?[0-9]+$/) { + compile_number(expr) + return + } + + # Handle nil constant + if (expr == "nil") { + print "PUSH_CONST NIL:" + return + } + + # Handle variable lookup + if (expr ~ /^[a-zA-Z_][a-zA-Z0-9_]*$/) { + print "LOOKUP " expr + return + } + + # Handle compound expressions (lists) + if (substr(expr, 1, 1) == "(") { + expr = substr(expr, 2, length(expr) - 2) + split_result = split_expr(expr) + op = substr(split_result, 1, index(split_result, SUBSEP) - 1) + args = substr(split_result, index(split_result, SUBSEP) + 1) + + if (op == "define") { + compile_define(args) + } else if (op == "let") { + compile_let(args) + } else if (op == "lambda") { + compile_lambda(args) + } else { + compile_primitive_call(op, args) + } + return + } + + error("Unknown expression type: " expr) +} + +# Error reporting helper +function error(msg) { + print "Error: " msg > "/dev/stderr" + exit 1 +} \ No newline at end of file diff --git a/awk/scheme/scheme/bin/repl b/awk/scheme/scheme/bin/repl new file mode 100755 index 0000000..1fce388 --- /dev/null +++ b/awk/scheme/scheme/bin/repl @@ -0,0 +1,161 @@ +#!/bin/bash + +# Enable debug tracing +# BASH FEATURE: ${VAR:-default} provides a default value if VAR is unset or empty +# Unlike JS where you'd use VAR || default, this only uses default if VAR is literally unset +DEBUG=${DEBUG:-0} + +debug() { + if [ "$DEBUG" = "1" ]; then + # BASH FEATURE: >&2 redirects output to stderr (file descriptor 2) + # Unlike JS console.error(), this explicitly redirects to stderr + echo "[DEBUG] $*" >&2 + fi +} + +# Find the directory containing this script and the components +# BASH FEATURE: ${BASH_SOURCE[0]} is the path to the current script +# Unlike JS __filename, this works even when the script is sourced +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +COMPILER="$DIR/compiler.awk" +VM="$DIR/vm.awk" + +debug "Using compiler: $COMPILER" +debug "Using VM: $VM" + +# Verify components exist and are executable +for component in "$COMPILER" "$VM"; do + if [ ! -f "$component" ]; then + echo "Error: Cannot find $component" >&2 + echo "Please ensure all components are present" >&2 + exit 1 + fi + chmod +x "$component" +done + +# Set up temporary files and state +TMPDIR=$(mktemp -d) +debug "Created temp dir: $TMPDIR" +STATE_FILE="/tmp/scheme_vm.state" + +cleanup() { + debug "Cleaning up temp dir: $TMPDIR" + rm -rf "$TMPDIR" + # Temporarily disable state file deletion for debugging + # if [ "$1" != "keep_state" ]; then + # rm -f "$STATE_FILE" + # rm -f "/tmp/scheme_vm.env" + # fi +} +trap "cleanup" EXIT + +# Set up temporary files +INPUT_FILE="$TMPDIR/input.scm" +ASM_FILE="$TMPDIR/output.asm" +DEBUG_FILE="$TMPDIR/debug.out" + +# Initialize/clear state files at REPL start +# Temporarily disable state file clearing for debugging +# if [ "$#" -eq 0 ]; then # Only for interactive mode +# : > "/tmp/scheme_vm.state" +# : > "/tmp/scheme_vm.env" +# fi + +# Function to handle evaluation +evaluate_expression() { + local input="$1" + local result + + # Skip empty lines + if [ -z "$input" ]; then + return 0 + fi + + debug "Evaluating expression: $input" + echo "$input" > "$INPUT_FILE" + debug "Input file contents:" + cat "$INPUT_FILE" >&2 + + # Show compilation output even if it fails + debug "Running compiler..." + if awk -f "$COMPILER" "$INPUT_FILE" > "$ASM_FILE" 2> "$DEBUG_FILE"; then + debug "Compilation successful. Debug output:" + cat "$DEBUG_FILE" >&2 + debug "Generated assembly:" + cat "$ASM_FILE" >&2 + + debug "Running VM..." + # Use persistent VM state and pass debug flag + # BASH FEATURE: -v var=value passes variables to awk + # Unlike JS where you'd use process.env, this sets awk variables directly + result=$(awk -v PERSIST=1 -v DEBUG="$DEBUG" -f "$VM" "$ASM_FILE" 2>&1) + debug "VM output: $result" + if [ -n "$result" ]; then + echo "$result" + fi + return 0 + else + echo "Compilation error" >&2 + debug "Compiler output:" + cat "$DEBUG_FILE" >&2 + return 1 + fi +} + +# Check if a file argument is provided +if [ "$#" -gt 0 ]; then + if [ ! -f "$1" ]; then + echo "Error: File not found: $1" >&2 + exit 1 + fi + debug "Reading file: $1" + file_content=$(cat "$1" | tr '\n' ' ') + debug "File content: $file_content" + evaluate_expression "$file_content" + cleanup "keep_state" # Keep state after file execution + exit 0 +fi + +# REPL state +paren_count=0 +current_input="" + +# Print welcome message +echo "Scheme REPL (Press Ctrl+D to exit)" +echo + +# Main REPL loop +while true; do + if [ $paren_count -eq 0 ]; then + printf "scheme> " + else + printf "... " + fi + + read -r line || exit 0 + + # Skip empty lines + if [ -z "$line" ]; then + continue + fi + + # Count parentheses + # BASH FEATURE: $(command) is command substitution - runs command and captures output + # Unlike JS where you'd use require('child_process').execSync(), this is built-in + open_parens=$(echo "$line" | tr -cd '(' | wc -c) + close_parens=$(echo "$line" | tr -cd ')' | wc -c) + # BASH FEATURE: $((expression)) is arithmetic expansion + # Unlike JS where you'd use eval() or a math library, this evaluates arithmetic expressions + paren_count=$((paren_count + open_parens - close_parens)) + + if [ -n "$current_input" ]; then + current_input="$current_input $line" + else + current_input="$line" + fi + + if [ $paren_count -eq 0 ]; then + evaluate_expression "$current_input" + current_input="" + fi +done \ No newline at end of file diff --git a/awk/scheme/scheme/bin/vm.awk b/awk/scheme/scheme/bin/vm.awk new file mode 100755 index 0000000..ea15884 --- /dev/null +++ b/awk/scheme/scheme/bin/vm.awk @@ -0,0 +1,997 @@ +#!/usr/bin/awk -f + +# This is a stack-based virtual machine for executing compiled Scheme code +# It implements a simple instruction set with support for: +# - Basic arithmetic operations +# - Function calls and returns +# - Variable bindings and lookups +# - Cons cells and list operations + +BEGIN { + # Type system tags for runtime type checking + T_NUMBER = "N" # Numbers (integers) + T_BOOLEAN = "B" # Booleans (0/1) + T_SYMBOL = "S" # Symbols (identifiers) + T_PAIR = "P" # Cons cells (pairs) + T_FUNCTION = "F" # Function references + T_NIL = "NIL" # Empty list marker + T_CLOSURE = "CLOSURE" # Closure objects (function + captured environment) + + # Virtual machine registers + stack_ptr = 0 # Points to top of evaluation stack + heap_ptr = 0 # Points to next free heap location + pc = 0 # Program counter for instruction fetch + + # Original program storage for nested function definitions + # AWK FEATURE: delete array removes all elements from an array + # Unlike JS where you'd set array = [], this clears the array in place + delete original_program # Stores the original program before function calls + + # AWK FEATURE: ENVIRON is a built-in array containing environment variables + DEBUG = (ENVIRON["DEBUG"] == "1") ? 1 : 0 + + # Environment for variable bindings + env_size = 0 # Current size of environment stack + + # Closure support - captured environments for nested lambdas + delete closure_envs # Maps env_id to captured environment arrays + delete closure_env_names # Variable names in captured environments + delete closure_env_vals # Variable values in captured environments + delete closure_env_sizes # Size of each captured environment + next_env_id = 1 # Counter for generating unique environment IDs + + # Function table for storing defined functions + delete func_def_names # Function names + delete func_def_pc # Entry points + delete func_def_code # Function bodies + func_def_size = 0 # Number of defined functions + + # Call stack for function returns + call_stack_ptr = 0 + + # Global function registry - clear it first + delete FUNCTIONS # Maps function names to implementations + + # State persistence configuration + STATE_FILE = "/tmp/scheme_vm.state" + debug("STATE_FILE_PATH: " STATE_FILE) + debug("PERSIST_FLAG: " PERSIST) + if (PERSIST) { + debug("Loading state from: " STATE_FILE) + debug("LOADING_STATE: Attempting to read " STATE_FILE) + debug("LOADING_STATE: FUNCTIONS table size before loading: " length(FUNCTIONS)) + # AWK FEATURE: getline is awk's file reading function + # getline var < file reads one line from file into var, returns 1 on success, 0 on EOF, -1 on error + # Unlike JS where you'd use fs.readFileSync(), this reads line by line + if ((getline line < STATE_FILE) >= 0) { # Check if file exists and is readable + debug("LOADING_STATE: File opened successfully, first line: " line) + # AWK FEATURE: do-while loop syntax - the condition is checked at the end + do { + debug("LOADING_STATE: Processing line: " line) + if (line ~ /^FUNC /) { + # Parse and load function definition + # AWK FEATURE: sub() modifies the string in place and returns count of replacements + sub(/^FUNC /, "", line) + name = line + sub(/ .*$/, "", name) + code = line + sub(/^[^ ]+ /, "", code) + + # Read the rest of the function code (until next FUNC or end of file) + debug("LOADING_STATE: Reading function code for " name) + while ((getline line < STATE_FILE) > 0) { + debug("LOADING_STATE: Reading line: " line) + if (line ~ /^FUNC /) { + # Found next function, put the line back + debug("LOADING_STATE: Found next function, stopping") + break + } + code = code "\n" line + } + + debug("Loaded function: " name) + debug("Code: " code) + + # Store function in FUNCTIONS table + FUNCTIONS[name] = code + debug("LOADED_FUNCTION: " name " with code length " length(code)) + debug("LOADED_FUNCTION: FUNCTIONS table size after loading " name ": " length(FUNCTIONS)) + debug("LOADED_FUNCTION: Checking if " name " is in table: " (name in FUNCTIONS)) + } + } while ((getline line < STATE_FILE) > 0) + # AWK FEATURE: close() closes a file handle + close(STATE_FILE) + } + } + + # Function environment storage + delete func_env_names # Variable names in function scope + delete func_env_vals # Variable values in function scope + delete func_env_sizes # Size of each function's environment + + # Register built-in functions first + debug("REGISTERING_BUILTINS: " length(FUNCTIONS) " functions before") + FUNCTIONS["+"] = "add" + FUNCTIONS["-"] = "subtract" + FUNCTIONS["*"] = "multiply" + FUNCTIONS["/"] = "divide" + FUNCTIONS["="] = "equals" + FUNCTIONS["<"] = "less_than" + FUNCTIONS[">"] = "greater_than" + FUNCTIONS["add1"] = "add_one" + debug("REGISTERING_BUILTINS: " length(FUNCTIONS) " functions after") + + # Environment persistence configuration + ENV_STATE_FILE = "/tmp/scheme_vm.env" + if (PERSIST) { + debug("Loading environment state from: " ENV_STATE_FILE) + if ((getline line < ENV_STATE_FILE) >= 0) { + do { + if (line ~ /^ENV /) { + # Parse and load environment binding + sub(/^ENV /, "", line) + name = line + sub(/ .*$/, "", name) + val = line + sub(/^[^ ]+ /, "", val) + + debug("Loaded env var: " name " = " val) + + # Store in environment + env_name[env_size] = name + env_val[env_size] = val + env_size++ + } + } while ((getline line < ENV_STATE_FILE) > 0) + close(ENV_STATE_FILE) + } + } + + # Track if VM halted normally (vs error) + normal_exit = 0 +} + +# Debug output helper +function debug(msg) { + if (DEBUG) printf("[DEBUG] %s\n", msg) > "/dev/stderr" +} + +# Value constructors and accessors +# Values are stored as type:value pairs for runtime type checking +function makeValue(type, val) { + return type ":" val +} + +function getType(val) { + # AWK FEATURE: substr(string, start, length) extracts substring + # Unlike JS string.slice(), this is 1-indexed and requires explicit length + # AWK FEATURE: index(string, substring) returns position of substring (1-indexed) + # Unlike JS string.indexOf(), this returns 0 if not found and is 1-indexed + type = substr(val, 1, index(val, ":") - 1) + debug("Get type: " type " from " val) + return type +} + +function getValue(val) { + # AWK FEATURE: index() returns 1-indexed position, so we add 1 to get after the colon + value = substr(val, index(val, ":") + 1) + debug("Get value: " value " from " val) + return value +} + +# Type checking predicates +function isNumber(val) { return getType(val) == T_NUMBER } +function isBoolean(val) { return getType(val) == T_BOOLEAN } +function isSymbol(val) { return getType(val) == T_SYMBOL } +function isPair(val) { return getType(val) == T_PAIR } +function isFunction(val) { return getType(val) == T_FUNCTION } +function isNil(val) { return getType(val) == T_NIL } +function isClosure(val) { return getType(val) == T_CLOSURE } + +# Closure helper functions +function makeClosure(func_name, env_id) { + return T_CLOSURE ":" func_name ":" env_id +} + +function getClosureFunction(closure_val) { + # Extract function name from CLOSURE:func_name:env_id + return substr(closure_val, index(closure_val, ":") + 1, + index(substr(closure_val, index(closure_val, ":") + 1), ":") - 1) +} + +function getClosureEnvId(closure_val) { + # Extract env_id from CLOSURE:func_name:env_id + first_colon = index(closure_val, ":") + second_colon = index(substr(closure_val, first_colon + 1), ":") + first_colon + return substr(closure_val, second_colon + 1) +} + +# Environment capture for closures +function captureEnvironment(env_id, i) { + debug("Capturing environment with ID: " env_id) + closure_env_sizes[env_id] = env_size + + # Copy current environment to closure environment + for (i = 0; i < env_size; i++) { + # Only capture non-global variables + if (env_name[i] !~ /^__global_/) { + closure_env_names[env_id, i] = env_name[i] + closure_env_vals[env_id, i] = env_val[i] + debug("Captured: " env_name[i] " = " env_val[i]) + } + } + + debug("Captured environment size: " closure_env_sizes[env_id]) +} + +# VM instruction to capture environment +function vm_capture_env(func_name) { + debug("Capturing environment for function: " func_name) + env_id = next_env_id++ + captureEnvironment(env_id) + + # Replace the placeholder ENV_ID in the closure value + # Find the closure value on the stack and update it + if (stack_ptr > 0) { + closure_val = stack[stack_ptr] + if (closure_val ~ /^CLOSURE:/) { + # Replace ENV_ID with actual env_id + new_closure_val = "CLOSURE:" func_name ":" env_id + stack[stack_ptr] = new_closure_val + debug("Updated closure value: " new_closure_val) + } + } +} + +# Environment restoration for closures +function pushClosureEnvironment(env_id, i) { + debug("Pushing closure environment: " env_id) + if (env_id in closure_env_sizes) { + for (i = 0; i < closure_env_sizes[env_id]; i++) { + if ((env_id, i) in closure_env_names) { + env_name[env_size] = closure_env_names[env_id, i] + env_val[env_size] = closure_env_vals[env_id, i] + env_size++ + debug("Restored: " closure_env_names[env_id, i] " = " closure_env_vals[env_id, i]) + } + } + } +} + +# Stack operations +function push(val) { + stack[++stack_ptr] = val + debug("Push: " val " (SP: " stack_ptr ")") +} + +function pop() { + if (stack_ptr < 1) error("Stack underflow") + val = stack[stack_ptr--] + debug("Pop: " val " (SP: " stack_ptr ")") + return val +} + +function peek() { + if (stack_ptr < 1) error("Stack empty") + debug("Peek: " stack[stack_ptr]) + return stack[stack_ptr] +} + +# Heap operations for cons cells +function allocate(val) { + heap[++heap_ptr] = val + refs[heap_ptr] = 1 # Reference counting (not fully implemented) + debug("Allocate: " val " at " heap_ptr) + return heap_ptr +} + +function getHeap(idx) { + if (!(idx in heap)) { + error("Invalid heap access: " idx) + return "" + } + return heap[idx] +} + +# Error handling +function error(msg) { + print "Error at PC " pc ": " msg > "/dev/stderr" + exit 1 +} + +# Arithmetic instruction implementations +function vm_add() { + if (stack_ptr < 2) error("ADD requires two operands") + val2 = pop() + val1 = pop() + if (!isNumber(val1) || !isNumber(val2)) + error("ADD requires numeric operands") + result = getValue(val1) + getValue(val2) + push(makeValue(T_NUMBER, result)) +} + +function vm_subtract() { + if (stack_ptr < 2) error("SUB requires two operands") + val2 = pop() + val1 = pop() + if (!isNumber(val1) || !isNumber(val2)) + error("SUB requires numeric operands") + result = getValue(val1) - getValue(val2) + push(makeValue(T_NUMBER, result)) +} + +function vm_multiply() { + if (stack_ptr < 2) error("MUL requires two operands") + val2 = pop() + val1 = pop() + if (!isNumber(val1) || !isNumber(val2)) + error("MUL requires numeric operands") + result = getValue(val1) * getValue(val2) + push(makeValue(T_NUMBER, result)) +} + +function vm_divide() { + if (stack_ptr < 2) error("DIV requires two operands") + val2 = pop() + val1 = pop() + if (!isNumber(val1) || !isNumber(val2)) + error("DIV requires numeric operands") + if (getValue(val2) == 0) + error("Division by zero") + result = getValue(val1) / getValue(val2) + push(makeValue(T_NUMBER, result)) +} + +# List operation implementations +function vm_cons() { + if (stack_ptr < 2) error("CONS requires two operands") + val2 = pop() + val1 = pop() + pair_val = val1 "," val2 + pair_idx = allocate(pair_val) + push(makeValue(T_PAIR, pair_idx)) +} + +function vm_car() { + if (stack_ptr < 1) error("CAR requires one operand") + val = pop() + if (!isPair(val)) error("CAR requires pair operand") + pair_idx = getValue(val) + pair = getHeap(pair_idx) + car_val = substr(pair, 1, index(pair, ",") - 1) + push(car_val) +} + +function vm_cdr() { + if (stack_ptr < 1) error("CDR requires one operand") + val = pop() + if (!isPair(val)) error("CDR requires pair operand") + pair_idx = getValue(val) + pair = getHeap(pair_idx) + cdr_val = substr(pair, index(pair, ",") + 1) + push(cdr_val) +} + +# Comparison operations +function vm_equal() { + if (stack_ptr < 2) error("EQ requires two operands") + val2 = pop() + val1 = pop() + result = (val1 == val2) ? "1" : "0" + debug("Equal comparison: " val1 " == " val2 " -> " result) + push(makeValue(T_BOOLEAN, result)) +} + +function vm_less_than() { + if (stack_ptr < 2) error("LT requires two operands") + val2 = pop() + val1 = pop() + if (!isNumber(val1) || !isNumber(val2)) + error("LT requires numeric operands") + result = (getValue(val1) < getValue(val2)) ? "1" : "0" + debug("Less than comparison: " val1 " < " val2 " -> " result) + push(makeValue(T_BOOLEAN, result)) +} + +# Main instruction execution loop +function execute(instr) { + # AWK FEATURE: split(string, array, separator) splits string into array elements + # Unlike JS string.split() which returns an array, this populates an existing array + split(instr, parts, " ") + op = parts[1] + debug("Execute: " instr) + + # Dispatch based on instruction opcode + if (op == "PUSH_CONST") { + push(parts[2]) + } + else if (op == "POP") { + pop() + } + else if (op == "DUP") { + val = peek() + push(val) + } + else if (op == "SWAP") { + if (stack_ptr < 2) error("SWAP requires two operands") + val2 = pop() + val1 = pop() + val1 = pop() + push(val2) + push(val1) + } + else if (op == "ADD") { + vm_add() + } + else if (op == "SUB") { + vm_subtract() + } + else if (op == "MUL") { + vm_multiply() + } + else if (op == "DIV") { + vm_divide() + } + else if (op == "CONS") { + vm_cons() + } + else if (op == "CAR") { + vm_car() + } + else if (op == "CDR") { + vm_cdr() + } + else if (op == "EQ") { + vm_equal() + } + else if (op == "LT") { + vm_less_than() + } + else if (op == "PRINT") { + if (stack_ptr < 1) error("PRINT requires one operand") + print peek() + } + else if (op == "HALT") { + debug("EXECUTING_HALT") + normal_exit = 1 + debug("HALT: Stack pointer = " stack_ptr) + if (stack_ptr > 0) { + result = peek() + debug("HALT: Result = " result) + } else { + debug("HALT: Stack is empty") + } + if (PERSIST) { + save_state() + } + if (result) { + debug("PRINTING_RESULT: " result) + print result + } + exit(0) + } + else if (op == "STORE") { + vm_store(parts[2]) + } + else if (op == "POP_ENV") { + vm_pop_env() + } + else if (op == "LOOKUP") { + vm_lookup(parts[2]) + } + else if (op == "LABEL") { + vm_define_function(parts[2], pc) + } + else if (op == "CALL") { + vm_call_function() + } + else if (op == "RETURN") { + debug("EXECUTING_RETURN") + # The call_stack_ptr is no longer used for return, so this instruction is effectively removed. + # The function execution itself handles the return. + } + else if (op == "GET_VALUE") { + vm_get_value() + } + else if (op == "CAPTURE_ENV") { + vm_capture_env(parts[2]) + } + else { + error("Unknown instruction: " op) + } +} + +# Load program instructions +# AWK FEATURE: Each line of input is automatically processed by this block +# NR is a built-in variable that contains the current record (line) number +# Unlike JS where you'd need to track line numbers manually +{ + program[NR-1] = $0 # $0 is the current line being processed + original_program[NR-1] = $0 # Keep a copy of the original program +} + +# AWK FEATURE: END block runs after all input has been processed +# This is like a "finally" block that always executes after reading all input +END { + # AWK FEATURE: length(array) returns the number of elements in an array + # Unlike JS array.length, this is a function call, not a property + while (pc < length(program)) { + # debug("EXECUTING_PC_" pc ": " program[pc]) + execute(program[pc++]) + } + + # Save state if we didn't halt normally + if (!normal_exit && PERSIST) { + save_state() + } +} + +# Variable binding implementation +function vm_store(name) { + debug("Storing " peek() " as " name " at env_size: " env_size) + + # Handle global definitions specially + if (lookup_no_error("from_define")) { + name = "__global_" name + # Clear the define flag + for (i = env_size - 1; i >= 0; i--) { + if (env_name[i] == "from_define") { + env_size-- + break + } + } + + # Remove any previous definition of this global + for (i = env_size - 1; i >= 0; i--) { + if (env_name[i] == name) { + # Shift everything down + for (j = i; j < env_size - 1; j++) { + env_name[j] = env_name[j + 1] + env_val[j] = env_val[j + 1] + } + env_size-- + break + } + } + } + + # Handle lambda functions + val = peek() + if (isSymbol(val)) { + func_name = getValue(val) + # AWK FEATURE: ~ is the regex match operator (like /pattern/.test() in JS) + # The pattern is a regex literal, not a string + if (func_name ~ /^__lambda_/) { + # Store the function code under the new name + FUNCTIONS[name] = FUNCTIONS[func_name] + # Store the new name in the environment + env_name[env_size] = name + env_val[env_size] = makeValue(T_SYMBOL, name) + env_size++ + return + } + } + + # Add to environment + env_name[env_size] = name + env_val[env_size] = peek() + env_size++ + + debug("Environment after store:") + dump_env() +} + +# Remove top binding from environment +function vm_pop_env() { + if (env_size <= 0) error("Environment underflow") + debug("Popping environment at size: " env_size) + + # Don't pop globals + if (env_name[env_size-1] ~ /^__global_/) { + debug("Keeping global definition: " env_name[env_size-1]) + return + } + + debug("Removing: " env_name[env_size-1] " = " env_val[env_size-1]) + env_size-- +} + +# Variable lookup implementation +function vm_lookup(name, i, global_name, val) { + debug("Looking up " name " in environment of size: " env_size) + dump_env() + + # Try global name first, then local + global_name = "__global_" name + debug("LOOKUP_ENV: Looking for " name " in environment of size " env_size) + for (i = env_size - 1; i >= 0; i--) { + debug("LOOKUP_ENV: Checking index " i ": " env_name[i] " = " env_val[i]) + if (env_name[i] == global_name || env_name[i] == name) { + debug("Found " name " = " env_val[i] " at position " i) + push(env_val[i]) + return + } + } + + # Check if it's a function (built-in or user-defined) + debug("LOOKUP_CHECKING: " name " in FUNCTIONS table") + debug("FUNCTIONS_TABLE_SIZE: " length(FUNCTIONS)) + debug("FUNCTIONS_IN_TABLE:") + # AWK FEATURE: for (var in array) iterates over array keys + # Unlike JS for...in which includes inherited properties, awk arrays don't have inheritance + for (f in FUNCTIONS) { + debug(" " f) + } + # AWK FEATURE: 'in' operator checks if key exists in array + # Unlike JS where you'd use array.hasOwnProperty(key) or 'key' in array + if (name in FUNCTIONS) { + debug("Found function: " name) + push(makeValue(T_SYMBOL, name)) + return + } + debug("FUNCTION_LOOKUP_FAILED: " name) + error("Undefined variable: " name) +} + +# Function definition implementation +function vm_define_function(name, start_pc) { + debug("Defining function: " name " at " start_pc) + debug("call_stack_ptr: " call_stack_ptr) + + # Clear the from_define flag + for (i = env_size - 1; i >= 0; i--) { + if (env_name[i] == "from_define") { + debug("Clearing from_define flag") + env_size-- + break + } + } + + # Build function code + code = "" + i = start_pc + + # If we're inside a function call, we need to find the corresponding position + # in the original program. For now, use a simple heuristic. + if (call_stack_ptr > 0) { + debug("Nested function definition - using current instruction") + # Just read from the current program position + # AWK FEATURE: length(array) returns the number of elements in an array + # Unlike JS array.length, this is a function call, not a property + while (i < length(program) && program[i] != "RETURN") { + if (code != "") code = code "\n" + code = code program[i] + i++ + } + } else { + debug("Top-level function definition - using original program") + # Use original_program for top-level function definitions + # AWK FEATURE: length(array) returns the number of elements in an array + # Unlike JS array.length, this is a function call, not a property + while (i < length(original_program) && original_program[i] != "RETURN") { + if (code != "") code = code "\n" + code = code original_program[i] + i++ + } + } + code = code "\nRETURN" + + # Store function + debug("Storing function: " name " = " code) + FUNCTIONS[name] = code + debug("Function stored. Total functions: " length(FUNCTIONS)) + debug("FUNCTION_STORED: " name) + + pc = i + 1 +} + +# Function call implementation +function vm_call_function(code_lines, j, saved_pc, saved_env_size, arg, param_name) { + # Get function name from stack + func_name = pop() + debug("Calling function: " func_name) + debug("CALLING_FUNCTION: " func_name) + + # If name is a symbol, get its value + if (isSymbol(func_name)) { + func_name = getValue(func_name) + } + + # Check if it's a built-in function first + if (func_name in FUNCTIONS) { + built_in_name = FUNCTIONS[func_name] + if (built_in_name == "add") { + debug("Calling built-in function: add") + add() + return + } else if (built_in_name == "subtract") { + debug("Calling built-in function: subtract") + subtract() + return + } else if (built_in_name == "multiply") { + debug("Calling built-in function: multiply") + multiply() + return + } else if (built_in_name == "divide") { + debug("Calling built-in function: divide") + divide() + return + } else if (built_in_name == "equals") { + debug("Calling built-in function: equals") + equals() + return + } else if (built_in_name == "less_than") { + debug("Calling built-in function: less_than") + less_than() + return + } else if (built_in_name == "greater_than") { + debug("Calling built-in function: greater_than") + greater_than() + return + } else if (built_in_name == "add_one") { + debug("Calling built-in function: add_one") + add_one() + return + } + } + + # Handle anonymous functions and closures + if (func_name ~ /^__lambda_/) { + if (!(func_name in FUNCTIONS)) { + error("Undefined lambda function: " func_name) + } + } else if (isClosure(func_name)) { + # This is a closure call + debug("Calling closure: " func_name) + closure_func = getClosureFunction(func_name) + closure_env_id = getClosureEnvId(func_name) + + if (!(closure_func in FUNCTIONS)) { + error("Undefined closure function: " closure_func) + } + + # Restore the captured environment + pushClosureEnvironment(closure_env_id) + + # Use the closure's function name for the rest of the call + func_name = closure_func + } else if (!(func_name in FUNCTIONS)) { + error("Undefined function: " func_name) + } + + saved_pc = pc + saved_env_size = env_size + + # AWK FEATURE: split(string, array, separator) splits string into array elements + # Unlike JS string.split() which returns an array, this populates an existing array + split(FUNCTIONS[func_name], code_lines, "\n") + + # Check if this is a parameterized function + if (code_lines[1] ~ /^STORE /) { + # This is a parameterized function (lambda) + # Get parameter name from STORE instruction + param_name = substr(code_lines[1], 7) + debug("Found parameter name: " param_name) + debug("FUNCTION_PARAM: " param_name) + + # Get argument from stack + arg = pop() + debug("Function argument: " arg) + debug("FUNCTION_ARG: " arg) + + # Create new environment frame + debug("Creating new environment frame at size: " env_size) + env_name[env_size] = param_name + env_val[env_size] = arg + env_size++ + debug("FUNCTION_ENV_STORE: " param_name " = " arg " at index " (env_size-1)) + + # Execute function code directly, skipping STORE and POP_ENV instructions + for (j = 2; j <= length(code_lines); j++) { + if (code_lines[j] != "" && code_lines[j] != "POP_ENV") { + debug("Executing function instruction: " code_lines[j]) + execute(code_lines[j]) + } + } + + # Clean up parameter + vm_pop_env() + + # Return to caller + debug("Function completed, returning to PC: " saved_pc) + pc = saved_pc + return + } else { + # This is a built-in function or non-parameterized function + debug("Calling non-parameterized function: " func_name) + + # Execute all function code directly + for (j in code_lines) { + if (code_lines[j] != "") { + debug("Executing function instruction: " code_lines[j]) + execute(code_lines[j]) + } + } + + # Return to caller + debug("Function completed, returning to PC: " saved_pc) + pc = saved_pc + return + } +} + +# Function return implementation - no longer needed with direct execution +# function vm_return() { +# debug("VM_RETURN: call_stack_ptr = " call_stack_ptr) +# if (call_stack_ptr > 0) { +# # Save return value +# ret_val = pop() +# debug("VM_RETURN: return value = " ret_val) +# +# # Restore environment +# while (env_size > env_stack[call_stack_ptr]) { +# debug("Popping environment at size: " env_size) +# vm_pop_env() +# } +# +# # Restore program counter +# pc = call_stack[call_stack_ptr--] +# debug("VM_RETURN: restored PC = " pc) +# +# # Restore the original program at the call position +# program[pc] = original_program_at_call[call_stack_ptr + 1] +# debug("Restored original program: " original_program_at_call[call_stack_ptr + 1]) +# +# # Push return value +# push(ret_val) +# debug("VM_RETURN: pushed return value " ret_val) +# +# debug("Returned with value: " ret_val " and env_size: " env_size) +# } +# } + +# Debug helper to dump environment contents +function dump_env( i) { + debug("Environment dump:") + for (i = 0; i < env_size; i++) { + # AWK FEATURE: sprintf() formats a string like printf but returns it instead of printing + # Unlike JS where you'd use template literals or String.format(), this is the awk way + debug(sprintf(" %d: %s = %s", i, env_name[i], env_val[i])) + } +} + +# Helper for checking variable existence without error +function lookup_no_error(name, i) { + for (i = env_size - 1; i >= 0; i--) { + if (env_name[i] == name) { + return 1 + } + } + return 0 +} + +# State persistence implementation +function save_state() { + debug("SAVE_STATE_CALLED") + debug("Saving state to: " STATE_FILE) + # Save functions from FUNCTIONS table + debug("SAVE_STATE: Saving " length(FUNCTIONS) " functions") + for (func_name in FUNCTIONS) { + if (func_name !~ /^__lambda_/) { # Don't save lambda functions + debug("Saving function: " func_name) + debug("SAVE_STATE: About to write function " func_name) + debug("SAVE_STATE: Function code length: " length(FUNCTIONS[func_name])) + # AWK FEATURE: printf with > file redirects output to a file + # Unlike JS where you'd use fs.writeFileSync(), this redirects from stdout to file + printf "FUNC %s %s\n", func_name, FUNCTIONS[func_name] > STATE_FILE + debug("SAVE_STATE: Saved function " func_name " to " STATE_FILE) + } + } + # AWK FEATURE: close() closes a file handle + close(STATE_FILE) + + # Save environment state + debug("Saving environment state to: " ENV_STATE_FILE) + for (i = 0; i < env_size; i++) { + if (env_name[i] ~ /^__global_/) { # Only save globals + debug("Saving env var: " env_name[i] " = " env_val[i]) + # AWK FEATURE: print with > file redirects output to a file + # Unlike JS console.log() which always goes to stdout + print "ENV " env_name[i] " " env_val[i] > ENV_STATE_FILE + } + } + close(ENV_STATE_FILE) +} + +# Built-in function implementations +function equals() { + if (stack_ptr < 2) error("= requires two operands") + val2 = pop() + val1 = pop() + if (!isNumber(val1) || !isNumber(val2)) error("= requires numeric operands") + result = (getValue(val1) == getValue(val2)) ? 1 : 0 + push(makeValue(T_BOOLEAN, result)) +} + +function less_than() { + if (stack_ptr < 2) error("< requires two operands") + val2 = pop() + val1 = pop() + if (!isNumber(val1) || !isNumber(val2)) error("< requires numeric operands") + result = (getValue(val1) < getValue(val2)) ? 1 : 0 + push(makeValue(T_BOOLEAN, result)) +} + +function greater_than() { + if (stack_ptr < 2) error("> requires two operands") + val2 = pop() + val1 = pop() + if (!isNumber(val1) || !isNumber(val2)) error("> requires numeric operands") + result = (getValue(val1) > getValue(val2)) ? 1 : 0 + push(makeValue(T_BOOLEAN, result)) +} + +function add() { + if (stack_ptr < 2) error("+ requires two operands") + val2 = pop() + val1 = pop() + if (!isNumber(val1) || !isNumber(val2)) error("+ requires numeric operands") + result = getValue(val1) + getValue(val2) + push(makeValue(T_NUMBER, result)) +} + +function subtract() { + if (stack_ptr < 2) error("- requires two operands") + val2 = pop() + val1 = pop() + if (!isNumber(val1) || !isNumber(val2)) error("- requires numeric operands") + result = getValue(val1) - getValue(val2) + push(makeValue(T_NUMBER, result)) +} + +function multiply() { + if (stack_ptr < 2) error("* requires two operands") + val2 = pop() + val1 = pop() + if (!isNumber(val1) || !isNumber(val2)) error("* requires numeric operands") + result = getValue(val1) * getValue(val2) + push(makeValue(T_NUMBER, result)) +} + +function divide() { + if (stack_ptr < 2) error("/ requires two operands") + val2 = pop() + val1 = pop() + if (!isNumber(val1) || !isNumber(val2)) error("/ requires numeric operands") + if (getValue(val2) == 0) error("Division by zero") + result = getValue(val1) / getValue(val2) + push(makeValue(T_NUMBER, result)) +} + +function add_one() { + if (stack_ptr < 1) error("add1 requires one operand") + val = pop() + if (!isNumber(val)) error("add1 requires numeric operand") + result = getValue(val) + 1 + push(makeValue(T_NUMBER, result)) +} + +# Get value from top of stack +function vm_get_value() { + val = pop() + if (isSymbol(val)) { + name = getValue(val) + # If it's a function name, just push the name directly + if (name in FUNCTIONS) { + push(name) + } else { + push(makeValue(T_SYMBOL, name)) + } + } else if (isClosure(val)) { + # For closures, just push the closure value back + push(val) + } else { + # For other types, just push the value back + push(val) + } +} \ No newline at end of file diff --git a/awk/scheme/scheme/diagram.md b/awk/scheme/scheme/diagram.md new file mode 100644 index 0000000..689a44a --- /dev/null +++ b/awk/scheme/scheme/diagram.md @@ -0,0 +1,91 @@ +# Awk-Scheme Architecture +## Component Interaction Diagram + +``` ++----------------+ Scheme Code +----------------+ Assembly +----------------+ +| | -----------------> | | -------------> | | +| REPL | "(+ 1 2)" | Compiler | "PUSH_CONST | VM | +| (bin/repl) | "(lambda (x) ...)" | compiler.awk | N:1 | vm.awk | +| | | | PUSH_CONST | | +| - Multi-line | | - Lexer | N:2 | - Stack-based | +| - Debug mode | | - Parser | ADD | - Type system | +| - File input | | - Code gen | HALT" | - Environment | +| | | - Closure gen | CAPTURE_ENV | - Direct exec | +| | <----------------- | | <------------- | | +| | Output: "N:3" | | Result | | ++----------------+ +----------------+ +----------------+ + ^ | + | v + | +----------------+ + | | Persistence | + | | /tmp files: | + +--------------------------------------------------------------+ - vm.state | + | - vm.env | + +----------------+ + +Debug Flow (when DEBUG=1): ++----------------+ Debug Info +----------------+ Debug Info +----------------+ +| REPL | -----------------> | Compiler | -------------> | VM | +| | [Input received] | | [Tokens found] | | +| [Debug output] | | [Parsing tree] | [Stack ops] | [Stack state] | +| stderr | <----------------- | [Gen code] | [Closure info] | [Environment] | ++----------------+ +----------------+ +----------------+ + +Execution Flow Example: +┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ +│ Input: │ │ Lexer/ │ │ Code Gen │ │ VM │ +│ (+ 1 2) │ ------> │ Parser: │ ------> │ PUSH_CONST │ ------> │ Stack: │ +│ │ │ (+ │ │ N:1 │ │ [N:1] │ +│ │ │ 1 │ │ PUSH_CONST │ │ [N:1,N:2] │ +│ │ │ 2) │ │ N:2 │ │ [N:3] │ +│ │ │ │ │ ADD │ │ │ +└─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ + +Closure Flow Example: +┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ +│ Input: │ │ Lexer/ │ │ Code Gen │ │ VM │ +│(let ((x 10))│ ------->│ Parser: │ ------> │ STORE x │ ------> │ Env: [x=10] │ +│ ((lambda │ │ (let │ │ LABEL λ │ │ │ +│ (y) (+ x │ │ ((x 10)) │ │ CAPTURE_ENV │ │ Closure: │ +│ y)) 32))) │ │ (lambda │ │ PUSH_CONST │ │ λ+env_id │ +│ │ │ (y) ...) │ │ CLOSURE:... │ │ │ +│ │ │ │ │ CALL │ │ Result: 42 │ +└─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ + +State Management: +┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ +│ Global │ │ Environment │ │ Function │ │ Closure │ +│ Variables │ ------> │ Stack │ ------> │ Calls │ ------> │ Environment │ +│ (persist) │ │ (frames) │ │ (direct) │ │ (captured) │ +└─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ + +Data Type System: +┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ +│ Numbers │ │ Booleans │ │ Symbols │ │ Closures │ +│ N:value │ │ B:value │ │ S:name │ │CLOSURE:fn:env_id│ +└─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ + │ │ │ │ + └───────────────────┼───────────────────┼───────────────────┘ + │ │ + ┌─────────────┐ ┌─────────────┐ + │ Pairs │ │ Nil │ + │ P:index │ │ NIL: │ + └─────────────┘ └─────────────┘ + +VM Instruction Set: +┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +│ Stack Ops │ │ Environment │ │ Functions │ +│ PUSH_CONST val │ │ STORE name │ │ LABEL name │ +│ POP │ │ LOOKUP name │ │ CALL │ +│ ADD/SUB/MUL/DIV │ │ POP_ENV │ │ RETURN │ +│ GET_VALUE │ │ │ │ │ +└─────────────────┘ └─────────────────┘ └─────────────────┘ + │ │ │ + └───────────────────────┼───────────────────────┘ + │ + ┌─────────────────┐ + │ Closures │ + │ CAPTURE_ENV fn │ + │ PUSH_CONST │ + │ CLOSURE:fn:env │ + └─────────────────┘ \ No newline at end of file diff --git a/awk/scheme/scheme/examples/cons.test.scm b/awk/scheme/scheme/examples/cons.test.scm new file mode 100644 index 0000000..d1e3847 --- /dev/null +++ b/awk/scheme/scheme/examples/cons.test.scm @@ -0,0 +1,3 @@ +(cons (+ 1 2) + (cons (* 3 4) + nil)) diff --git a/awk/scheme/scheme/examples/define.test.scm b/awk/scheme/scheme/examples/define.test.scm new file mode 100644 index 0000000..ec66b04 --- /dev/null +++ b/awk/scheme/scheme/examples/define.test.scm @@ -0,0 +1,2 @@ +(define add2 (x) (+ x 2)) +(add2 40) \ No newline at end of file diff --git a/awk/scheme/scheme/examples/lambda.test.scm b/awk/scheme/scheme/examples/lambda.test.scm new file mode 100644 index 0000000..1f2bb09 --- /dev/null +++ b/awk/scheme/scheme/examples/lambda.test.scm @@ -0,0 +1,12 @@ +; Test lambda function support +((lambda (x) (+ x 1)) 41) ; Should return 42 + +; Test lambda with multiple parameters +((lambda (x y) (+ x y)) 20 22) ; Should return 42 + +; Test lambda in let expression +(let ((add1 (lambda (x) (+ x 1)))) + (add1 41)) ; Should return 42 + +; Test nested lambda +((lambda (x) ((lambda (y) (+ x y)) 1)) 41) ; Should return 42 \ No newline at end of file diff --git a/awk/scheme/scheme/examples/let-and-define.test.scm b/awk/scheme/scheme/examples/let-and-define.test.scm new file mode 100644 index 0000000..fade30b --- /dev/null +++ b/awk/scheme/scheme/examples/let-and-define.test.scm @@ -0,0 +1,9 @@ +; Let expression example +(let ((x 5) (y 3)) + (+ x y)) + +; Function definition example +(define add2 (x) + (+ x 2)) + +(add2 40) ; Returns 42 \ No newline at end of file diff --git a/awk/scheme/scheme/examples/let.test.scm b/awk/scheme/scheme/examples/let.test.scm new file mode 100644 index 0000000..2cdc3b8 --- /dev/null +++ b/awk/scheme/scheme/examples/let.test.scm @@ -0,0 +1,2 @@ +(let ((x 5)) + (+ x 2)) diff --git a/awk/scheme/scheme/scheme b/awk/scheme/scheme/scheme new file mode 100755 index 0000000..cec35d1 --- /dev/null +++ b/awk/scheme/scheme/scheme @@ -0,0 +1,3 @@ +#!/bin/bash +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +exec "$DIR/bin/repl" "$@" diff --git a/awk/scheme/scheme/scratch/complex_test.scm.asm b/awk/scheme/scheme/scratch/complex_test.scm.asm new file mode 100644 index 0000000..67870c3 --- /dev/null +++ b/awk/scheme/scheme/scratch/complex_test.scm.asm @@ -0,0 +1,44 @@ +# Test proper list construction (3 2 1) +# Building the list in proper order: car points to value, cdr points to next pair + +# Start with empty list +PUSH_CONST NIL: # [nil] +PRINT # Print nil + +# Build (1 . nil) +PUSH_CONST NIL: # [nil] +PUSH_CONST N:1 # [nil 1] +SWAP # [1 nil] +CONS # [(1 . nil)] +DUP +PRINT # Print (1 . nil) + +# Build (2 . (1 . nil)) +PUSH_CONST N:2 # [(1.nil) 2] +SWAP # [2 (1.nil)] +CONS # [(2 . (1.nil))] +DUP +PRINT # Print (2 . (1.nil)) + +# Build (3 . (2 . (1 . nil))) +PUSH_CONST N:3 # [(2.(1.nil)) 3] +SWAP # [3 (2.(1.nil))] +CONS # [(3 . (2.(1.nil)))] +DUP +PRINT # Print full structure + +# Test CAR/CDR operations +DUP # Keep a copy of the list for later +DUP # Another copy for CAR +CAR # Get first element (3) +PRINT # Should print 3 + +SWAP # Bring back our spare list copy +CDR # Get rest of list ((2 . (1 . nil))) +DUP +PRINT # Print rest of list + +CAR # Get first of rest (2) +PRINT # Should print 2 + +HALT \ No newline at end of file diff --git a/awk/scheme/scheme/scratch/ideas-for-string-support.md b/awk/scheme/scheme/scratch/ideas-for-string-support.md new file mode 100644 index 0000000..0d81e5f --- /dev/null +++ b/awk/scheme/scheme/scratch/ideas-for-string-support.md @@ -0,0 +1,556 @@ +# String Support Implementation Plan for Awk-Scheme + +## Overview +This document outlines a comprehensive plan for adding string support to the Awk-Scheme interpreter. The implementation will follow the existing architecture patterns and maintain consistency with the current type system and execution model. + +## Current Architecture Analysis + +### Type System +- Current types: `N:`, `B:`, `S:`, `P:`, `F:`, `NIL:`, `CLOSURE:` +- All values use `type:value` format for runtime type checking +- Type checking via `getType()` and `getValue()` functions +- Type predicates: `isNumber()`, `isBoolean()`, `isSymbol()`, etc. + +### Compiler Pipeline +1. **Lexer** (`next_token()`): Handles numbers, symbols, parentheses +2. **Parser** (`parse_expr()`, `parse_list()`): Builds expression trees +3. **Code Generator**: Emits VM instructions for different expression types +4. **Expression Compiler** (`compile_expr()`): Dispatches based on expression type + +### VM Architecture +- Stack-based execution with typed values +- Environment-based variable binding +- Built-in function registry (`FUNCTIONS` table) +- Direct function execution (no program array modification) +- State persistence for globals and functions + +## String Implementation Plan + +### Phase 1: Core String Type System + +#### 1.1 VM Type System Extensions +**File: `bin/vm.awk`** + +```awk +# Add to BEGIN block +T_STRING = "STR" # String type tag + +# Add type predicate +function isString(val) { return getType(val) == T_STRING } + +# String value constructor +function makeString(val) { + return T_STRING ":" val +} +``` + +**Changes Required:** +- Add `T_STRING` constant in BEGIN block +- Add `isString()` predicate function +- Add `makeString()` constructor function +- Update type checking in existing operations where needed + +#### 1.2 String Storage Strategy +**Options:** +1. **Inline storage**: Store strings directly in the type:value format + - Pros: Simple, fast access + - Cons: Limited by awk string length, no escaping issues + +2. **Heap storage**: Store strings in heap like cons cells + - Pros: Unlimited length, consistent with existing patterns + - Cons: More complex, requires string management + +**Recommendation**: Start with inline storage for simplicity, can migrate to heap later. + +### Phase 2: Compiler Extensions + +#### 2.1 Lexer String Support +**File: `bin/compiler.awk`** + +**Current lexer handles:** +- Numbers: `-?[0-9]+` +- Symbols: `[a-zA-Z_][a-zA-Z0-9_]*` +- Parentheses: `(`, `)` + +**New string tokenization:** +```awk +# Add to next_token() function +# Handle string literals (double quotes) +if (c == "\"") { + str = "" + input_buffer = substr(input_buffer, 2) # Skip opening quote + + while (length(input_buffer) > 0) { + c = substr(input_buffer, 1, 1) + if (c == "\"") { + input_buffer = substr(input_buffer, 2) # Skip closing quote + break + } + if (c == "\\") { + # Handle escape sequences + input_buffer = substr(input_buffer, 2) + if (length(input_buffer) > 0) { + c = substr(input_buffer, 1, 1) + if (c == "n") str = str "\n" + else if (c == "t") str = str "\t" + else if (c == "\\") str = str "\\" + else if (c == "\"") str = str "\"" + else error("Invalid escape sequence: \\" c) + input_buffer = substr(input_buffer, 2) + } + } else { + str = str c + input_buffer = substr(input_buffer, 2) + } + } + return "\"" str "\"" # Return with quotes for identification +} +``` + +**Escape Sequences to Support:** +- `\"` - Literal double quote +- `\\` - Literal backslash +- `\n` - Newline +- `\t` - Tab +- `\r` - Carriage return + +#### 2.2 Parser String Support +**File: `bin/compiler.awk`** + +**Update `parse_expr()`:** +```awk +function parse_expr(token, result) { + token = next_token() + if (token == "EOF") return "" + + if (token == "(") { + result = parse_list() + debug("Parsed list: " result) + return result + } + + # Handle string literals + if (substr(token, 1, 1) == "\"") { + debug("Parsed string: " token) + return token + } + + debug("Parsed token: " token) + return token +} +``` + +#### 2.3 Code Generation for Strings +**File: `bin/compiler.awk`** + +**Add to `compile_expr()`:** +```awk +function compile_expr(expr, split_result, op, args) { + debug("Compiling expression: " expr) + + # Handle string literals + if (substr(expr, 1, 1) == "\"") { + compile_string(expr) + return + } + + # ... existing code for numbers, nil, variables, etc. +} + +function compile_string(str) { + debug("Compiling string: " str) + # Remove outer quotes and emit string constant + content = substr(str, 2, length(str) - 2) + print "PUSH_CONST STR:" content +} +``` + +### Phase 3: VM String Operations + +#### 3.1 String Built-in Functions +**File: `bin/vm.awk`** + +**Add to BEGIN block (built-in registration):** +```awk +# String operations +FUNCTIONS["string-length"] = "string_length" +FUNCTIONS["string-append"] = "string_append" +FUNCTIONS["string-ref"] = "string_ref" +FUNCTIONS["substring"] = "substring" +FUNCTIONS["string=?"] = "string_equal" +FUNCTIONS["string<?"] = "string_less_than" +FUNCTIONS["string>?"] = "string_greater_than" +FUNCTIONS["string-ci=?"] = "string_ci_equal" +FUNCTIONS["string-ci<?"] = "string_ci_less_than" +FUNCTIONS["string-ci>?"] = "string_ci_greater_than" +``` + +**Add to function call dispatch in `vm_call_function()`:** +```awk +} else if (built_in_name == "string_length") { + debug("Calling built-in function: string_length") + string_length() + return +} else if (built_in_name == "string_append") { + debug("Calling built-in function: string_append") + string_append() + return +# ... etc for other string functions +``` + +#### 3.2 String Function Implementations +**File: `bin/vm.awk`** + +```awk +# String length +function string_length() { + if (stack_ptr < 1) error("string-length requires one operand") + val = pop() + if (!isString(val)) error("string-length requires string operand") + str = getValue(val) + push(makeValue(T_NUMBER, length(str))) +} + +# String concatenation +function string_append() { + if (stack_ptr < 2) error("string-append requires at least two operands") + result = "" + # Pop all arguments and concatenate + while (stack_ptr > 0) { + val = pop() + if (!isString(val)) error("string-append requires string operands") + result = getValue(val) result + } + push(makeString(result)) +} + +# String character access (0-indexed) +function string_ref() { + if (stack_ptr < 2) error("string-ref requires two operands") + index_val = pop() + str_val = pop() + if (!isNumber(index_val)) error("string-ref index must be a number") + if (!isString(str_val)) error("string-ref requires string operand") + + str = getValue(str_val) + idx = getValue(index_val) + 1 # Convert to 1-indexed + if (idx < 1 || idx > length(str)) error("string-ref index out of bounds") + + char = substr(str, idx, 1) + push(makeString(char)) +} + +# Substring extraction +function substring() { + if (stack_ptr < 3) error("substring requires three operands") + end = pop() + start = pop() + str_val = pop() + + if (!isNumber(start) || !isNumber(end)) error("substring indices must be numbers") + if (!isString(str_val)) error("substring requires string operand") + + str = getValue(str_val) + start_idx = getValue(start) + 1 # Convert to 1-indexed + end_idx = getValue(end) + 1 + + if (start_idx < 1 || end_idx > length(str) || start_idx > end_idx) { + error("substring indices out of bounds") + } + + result = substr(str, start_idx, end_idx - start_idx + 1) + push(makeString(result)) +} + +# String comparison (case-sensitive) +function string_equal() { + if (stack_ptr < 2) error("string=? requires two operands") + val2 = pop() + val1 = pop() + if (!isString(val1) || !isString(val2)) error("string=? requires string operands") + result = (getValue(val1) == getValue(val2)) ? "1" : "0" + push(makeValue(T_BOOLEAN, result)) +} + +function string_less_than() { + if (stack_ptr < 2) error("string<? requires two operands") + val2 = pop() + val1 = pop() + if (!isString(val1) || !isString(val2)) error("string<? requires string operands") + result = (getValue(val1) < getValue(val2)) ? "1" : "0" + push(makeValue(T_BOOLEAN, result)) +} + +function string_greater_than() { + if (stack_ptr < 2) error("string>? requires two operands") + val2 = pop() + val1 = pop() + if (!isString(val1) || !isString(val2)) error("string>? requires string operands") + result = (getValue(val1) > getValue(val2)) ? "1" : "0" + push(makeValue(T_BOOLEAN, result)) +} + +# Case-insensitive string comparison +function string_ci_equal() { + if (stack_ptr < 2) error("string-ci=? requires two operands") + val2 = pop() + val1 = pop() + if (!isString(val1) || !isString(val2)) error("string-ci=? requires string operands") + # Convert to lowercase for comparison + str1 = tolower(getValue(val1)) + str2 = tolower(getValue(val2)) + result = (str1 == str2) ? "1" : "0" + push(makeValue(T_BOOLEAN, result)) +} + +function string_ci_less_than() { + if (stack_ptr < 2) error("string-ci<? requires two operands") + val2 = pop() + val1 = pop() + if (!isString(val1) || !isString(val2)) error("string-ci<? requires string operands") + str1 = tolower(getValue(val1)) + str2 = tolower(getValue(val2)) + result = (str1 < str2) ? "1" : "0" + push(makeValue(T_BOOLEAN, result)) +} + +function string_ci_greater_than() { + if (stack_ptr < 2) error("string-ci>? requires two operands") + val2 = pop() + val1 = pop() + if (!isString(val1) || !isString(val2)) error("string-ci>? requires string operands") + str1 = tolower(getValue(val1)) + str2 = tolower(getValue(val2)) + result = (str1 > str2) ? "1" : "0" + push(makeValue(T_BOOLEAN, result)) +} +``` + +### Phase 4: Enhanced String Operations + +#### 4.1 Additional String Functions +```awk +# String to number conversion +function string_to_number() { + if (stack_ptr < 1) error("string->number requires one operand") + val = pop() + if (!isString(val)) error("string->number requires string operand") + str = getValue(val) + if (str ~ /^-?[0-9]+$/) { + push(makeValue(T_NUMBER, str)) + } else { + push(makeValue(T_BOOLEAN, "0")) # Return false for invalid numbers + } +} + +# Number to string conversion +function number_to_string() { + if (stack_ptr < 1) error("number->string requires one operand") + val = pop() + if (!isNumber(val)) error("number->string requires number operand") + num = getValue(val) + push(makeString(num)) +} + +# String splitting +function string_split() { + if (stack_ptr < 2) error("string-split requires two operands") + delimiter = pop() + str_val = pop() + if (!isString(delimiter) || !isString(str_val)) { + error("string-split requires string operands") + } + + str = getValue(str_val) + delim = getValue(delimiter) + + # Use awk's split function + split(str, parts, delim) + result = "" + for (i = 1; i <= length(parts); i++) { + if (result != "") result = result " " + result = result "\"" parts[i] "\"" + } + push(makeString("(" result ")")) +} +``` + +#### 4.2 String Formatting +```awk +# String formatting (simple version) +function format() { + if (stack_ptr < 2) error("format requires at least two operands") + format_str = pop() + if (!isString(format_str)) error("format requires string format operand") + + # Simple implementation - replace ~a with arguments + fmt = getValue(format_str) + arg_count = 0 + + while (stack_ptr > 0) { + val = pop() + arg_count++ + # Replace ~a with the value + gsub(/~a/, val, fmt) + } + + push(makeString(fmt)) +} +``` + +### Phase 5: State Persistence for Strings + +#### 5.1 String State Management +**File: `bin/vm.awk`** + +**Update `save_state()` and state loading:** +- Strings stored in environment will be automatically persisted +- No special handling needed since strings use inline storage +- String literals in function definitions will be preserved + +**Considerations:** +- Long strings may impact state file size +- Escape sequences need proper handling in state files +- String encoding consistency across sessions + +### Phase 6: Testing and Validation + +#### 6.1 Test Cases +**Create test files:** + +```scheme +; Basic string literals +"hello world" +"" +"\"quoted\"" +"line1\nline2" + +; String operations +(string-length "hello") +(string-append "hello" " " "world") +(string-ref "hello" 0) +(substring "hello world" 0 4) + +; String comparisons +(string=? "hello" "hello") +(string=? "hello" "world") +(string<? "abc" "def") +(string>? "xyz" "abc") + +; Case-insensitive comparisons +(string-ci=? "Hello" "hello") +(string-ci<? "ABC" "def") + +; Conversions +(string->number "123") +(number->string 456) +(string->number "abc") + +; String in expressions +(define greeting "Hello") +(define name "World") +(string-append greeting " " name) + +; Strings in functions +(define (greet name) (string-append "Hello, " name "!")) +(greet "Alice") +``` + +#### 6.2 Edge Cases to Test +- Empty strings +- Strings with special characters +- Escape sequences +- Very long strings +- String operations on non-string types +- String comparisons with different encodings +- String persistence across REPL sessions + +### Phase 7: Performance and Optimization + +#### 7.1 Performance Considerations +- **String concatenation**: Current `string-append` is O(n²) - consider optimization +- **String storage**: Monitor memory usage with large strings +- **String comparisons**: Consider early termination for long strings +- **Escape sequence processing**: Optimize lexer for common cases + +#### 7.2 Potential Optimizations +```awk +# Optimized string concatenation (build in reverse) +function string_append_optimized() { + if (stack_ptr < 2) error("string-append requires at least two operands") + + # Count arguments and pre-allocate + arg_count = 0 + temp_stack[0] = stack_ptr + + # Pop all arguments to temp storage + while (stack_ptr > 0) { + val = pop() + if (!isString(val)) error("string-append requires string operands") + temp_stack[++arg_count] = getValue(val) + } + + # Concatenate in reverse order (more efficient) + result = "" + for (i = arg_count; i >= 1; i--) { + result = result temp_stack[i] + } + + push(makeString(result)) +} +``` + +### Phase 8: Documentation and Examples + +#### 8.1 Update README.md +- Add string data type to type system documentation +- Include string usage examples +- Document all string built-in functions +- Add string limitations and considerations + +#### 8.2 Update diagram.md +- Add string type to data type diagram +- Include string operations in VM instruction set +- Show string flow in execution examples + +### Implementation Order and Risk Assessment + +#### High Priority (Core Functionality) +1. **Type system extensions** - Low risk, foundational +2. **Lexer string support** - Medium risk, affects parsing +3. **Basic string operations** - Low risk, self-contained +4. **String literals in expressions** - Medium risk, integration + +#### Medium Priority (Enhanced Features) +5. **String comparisons** - Low risk, straightforward +6. **String conversions** - Low risk, utility functions +7. **State persistence** - Low risk, automatic + +#### Lower Priority (Advanced Features) +8. **String formatting** - Medium risk, complex parsing +9. **String splitting** - Low risk, utility function +10. **Performance optimizations** - Low risk, optional + +#### Risk Mitigation +- **Regression testing**: Ensure existing functionality unchanged +- **Incremental implementation**: Add features one at a time +- **Comprehensive testing**: Test all edge cases +- **Backward compatibility**: Maintain existing API + +### Success Criteria +1. String literals parse and compile correctly +2. Basic string operations work as expected +3. String state persists across REPL sessions +4. No regression in existing functionality +5. Performance remains acceptable +6. Documentation is complete and accurate + +### Future Enhancements +1. **Heap-based string storage** for very long strings +2. **String interning** for memory efficiency +3. **Unicode support** (complex, requires significant changes) +4. **Regular expression support** for string matching +5. **String formatting with more features** (like Common Lisp format) +6. **String streams** for efficient string building + +This plan provides a comprehensive roadmap for implementing string support while maintaining the existing architecture and ensuring backward compatibility. \ No newline at end of file diff --git a/awk/scheme/scheme/scratch/run.sh b/awk/scheme/scheme/scratch/run.sh new file mode 100755 index 0000000..0afdb41 --- /dev/null +++ b/awk/scheme/scheme/scratch/run.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# Compile Scheme to VM instructions +awk -f compiler.awk test.scm > test.asm +# Run VM instructions +awk -f vm.awk test.asm \ No newline at end of file diff --git a/awk/scheme/scheme/scratch/test.asm b/awk/scheme/scheme/scratch/test.asm new file mode 100644 index 0000000..8e7d8df --- /dev/null +++ b/awk/scheme/scheme/scratch/test.asm @@ -0,0 +1,16 @@ +PUSH_CONST N:1 +PUSH_CONST N:2 +ADD +PUSH_CONST N:3 +PUSH_CONST N:4 +MUL +PUSH_CONST N:10 +PUSH_CONST N:2 +PUSH_CONST N:3 +ADD +SUB +PUSH_CONST NIL: +CONS +CONS +CONS +HALT diff --git a/awk/scheme/scheme/scratch/test.scm b/awk/scheme/scheme/scratch/test.scm new file mode 100644 index 0000000..a01b174 --- /dev/null +++ b/awk/scheme/scheme/scratch/test.scm @@ -0,0 +1,8 @@ +;; Build a list of calculated values +(cons (+ 1 2) ; First element: 1 + 2 = 3 + (cons (* 3 4) ; Second element: 3 * 4 = 12 + (cons (- 10 + (+ 2 3)) ; Third element: 10 - (2 + 3) = 5 + nil))) ; End of list + +;; This should create a list: (3 12 5) \ No newline at end of file diff --git a/awk/scheme/scheme/scratch/test.scm.asm b/awk/scheme/scheme/scratch/test.scm.asm new file mode 100644 index 0000000..526e2b1 --- /dev/null +++ b/awk/scheme/scheme/scratch/test.scm.asm @@ -0,0 +1,7 @@ +PUSH_CONST N:5 +PUSH_CONST N:3 +ADD +PUSH_CONST N:2 +MUL +PRINT # should output N:16 +HALT \ No newline at end of file |