diff options
Diffstat (limited to 'awk/scheme')
-rwxr-xr-x | awk/scheme/s.awk | 139 | ||||
-rw-r--r-- | awk/scheme/scheme/README.md | 186 | ||||
-rw-r--r-- | awk/scheme/scheme/TODO.txt | 69 | ||||
-rwxr-xr-x | awk/scheme/scheme/bin/compiler.awk | 594 | ||||
-rwxr-xr-x | awk/scheme/scheme/bin/repl | 147 | ||||
-rwxr-xr-x | awk/scheme/scheme/bin/vm.awk | 707 | ||||
-rw-r--r-- | awk/scheme/scheme/diagram.md | 49 | ||||
-rw-r--r-- | awk/scheme/scheme/examples/cons.test.scm | 3 | ||||
-rw-r--r-- | awk/scheme/scheme/examples/define.test.scm | 2 | ||||
-rw-r--r-- | awk/scheme/scheme/examples/lambda.test.scm | 12 | ||||
-rw-r--r-- | awk/scheme/scheme/examples/let-and-define.test.scm | 9 | ||||
-rw-r--r-- | awk/scheme/scheme/examples/let.test.scm | 2 | ||||
-rwxr-xr-x | awk/scheme/scheme/scheme | 3 | ||||
-rw-r--r-- | awk/scheme/scheme/scratch/complex_test.scm.asm | 44 | ||||
-rwxr-xr-x | awk/scheme/scheme/scratch/run.sh | 5 | ||||
-rw-r--r-- | awk/scheme/scheme/scratch/test.asm | 16 | ||||
-rw-r--r-- | awk/scheme/scheme/scratch/test.scm | 8 | ||||
-rw-r--r-- | awk/scheme/scheme/scratch/test.scm.asm | 7 |
18 files changed, 2002 insertions, 0 deletions
diff --git a/awk/scheme/s.awk b/awk/scheme/s.awk new file mode 100755 index 0000000..7c8bba6 --- /dev/null +++ b/awk/scheme/s.awk @@ -0,0 +1,139 @@ +#!/usr/bin/awk -f + +# Set debug mode +DEBUG = 1 # Change to 0 to disable debug output + +# Environment to store variable bindings +BEGIN { + print "Welcome to the AWK Scheme Interpreter!" + print "Type your Scheme expressions below (type 'exit' to quit):" + while (1) { + printf "> " + if (getline input <= 0) { + print "Error reading input. Exiting." + break + } + if (input == "exit") { + print "Exiting the interpreter." + exit + } + if (input == "") { + print "Empty input received, continuing..." + continue + } + + print "Input received: " input # Echo the input + ast = parse(input) # Parse the input + + # Print the entire AST for debugging + for (i = 1; i <= length(ast); i++) { + print "AST[" i "] = " ast[i] + } + + # Evaluate the AST + if (length(ast) > 0) { + result = eval(ast) # Evaluate the AST + print "Result: " result # Print the result + } else { + print "Parsed AST is empty." + } + } +} + +# Function to parse input into an AST +function parse(input) { + # Remove outer whitespace + gsub(/^\s+|\s+$/, "", input) + + # Check if input is empty after trimming + if (input == "") { + print "Input is empty after trimming" + return "" + } + + # Debugging: Print input before processing + print "Debug: Raw input for parsing: " input + + # Remove parentheses at start and end + if (substr(input, 1, 1) == "(") { + input = substr(input, 2) + } + if (substr(input, length(input), 1) == ")") { + input = substr(input, 1, length(input) - 1) + } + + # Debugging: Print input after removing outer parentheses + print "Debug: Input after removing outer parentheses: " input + + # Split the input into tokens + gsub(/\(/, " ( ", input) + gsub(/\)/, " ) ", input) + gsub(/\s+/, " ", input) # normalize whitespace + gsub(/^\s+|\s+$/, "", input) # trim + + # Debugging: Print input after tokenization + print "Debug: Input after tokenization: " input + + n = split(input, ast, " ") + + # Debugging: Print the number of tokens + print "Debug: Number of tokens: " n + + return ast +} + +# Function to evaluate the AST +function eval(ast, i, result) { + # Debugging: Print the current AST being evaluated + print "Debug: Evaluating AST: " ast[1] " " ast[2] " " ast[3] + + # Handle numbers directly + if (ast[1] ~ /^[+-]?[0-9]+$/) { + print "Debug: Returning number: " ast[1] + return ast[1] + 0 # Convert string to number + } + + # Handle addition + if (ast[1] == "+") { + result = 0 + for (i = 2; i <= length(ast); i++) { + print "Debug: Adding operand: " ast[i] + result += eval(ast[i]) # Recursively evaluate operands + } + return result + } + + # Handle subtraction + if (ast[1] == "-") { + result = eval(ast[2]) # Start with the first operand + for (i = 3; i <= length(ast); i++) { + print "Debug: Subtracting operand: " ast[i] + result -= eval(ast[i]) # Subtract subsequent operands + } + return result + } + + # Handle multiplication + if (ast[1] == "*") { + result = 1 + for (i = 2; i <= length(ast); i++) { + print "Debug: Multiplying operand: " ast[i] + result *= eval(ast[i]) # Multiply operands + } + return result + } + + # Handle division + if (ast[1] == "/") { + result = eval(ast[2]) # Start with the first operand + for (i = 3; i <= length(ast); i++) { + print "Debug: Dividing by operand: " ast[i] + result /= eval(ast[i]) # Divide by subsequent operands + } + return result + } + + # If we reach here, the operation is not recognized + return "Error: Unknown operation " ast[1] +} + diff --git a/awk/scheme/scheme/README.md b/awk/scheme/scheme/README.md new file mode 100644 index 0000000..7711442 --- /dev/null +++ b/awk/scheme/scheme/README.md @@ -0,0 +1,186 @@ +# Awk-Scheme + +## Overview +A Scheme interpreter implemented in AWK, featuring: +- A compiler that translates Scheme expressions to stack-based VM instructions +- A virtual machine that executes the compiled code +- Support for basic arithmetic, list operations, functions, and variable bindings +- Persistent global state between REPL sessions + +## Architecture + +### Components +1. **Compiler** (`bin/compiler.awk`): + - Lexical analyzer for tokenizing input + - Recursive descent parser for Scheme expressions + - Code generator that produces stack-based VM instructions + - Handles nested expressions and proper scoping + +2. **Virtual Machine** (`bin/vm.awk`): + - Stack-based execution model + - Typed value system with runtime type checking + - Environment-based variable binding + - Function call/return mechanism + - Heap memory for cons cells + - State persistence for globals and functions + +3. **REPL** (`bin/repl`): + - Interactive command line interface + - Multi-line input support + - State persistence between sessions + - Debug mode support + +### Data Types +All values are tagged with their type: +- `N:value` - Numbers (integers) +- `B:value` - Booleans (0/1) +- `P:index` - Pairs (cons cells) +- `F:name` - Functions +- `NIL:` - Empty list +- `S:name` - Symbols + +## Usage + +### Running the REPL +```bash +# Start interactive REPL +./scheme + +# Run a Scheme file +./scheme myprogram.scm + +# Enable debug output +DEBUG=1 ./scheme +``` + +### Basic Operations + +1. **Arithmetic**: +```scheme +scheme> (+ 1 2 3) +N:6 +scheme> (* 4 (- 10 5)) +N:20 +``` + +2. **Comparisons**: +```scheme +scheme> (< 1 2) +B:1 +scheme> (= 42 42) +B:1 +``` + +3. **Lists**: +```scheme +scheme> (cons 1 (cons 2 nil)) +P:1 +scheme> (car (cons 1 2)) +N:1 +``` + +4. **Variables**: +```scheme +scheme> (define x 42) +N:42 +scheme> x +N:42 +``` + +5. **Functions**: +```scheme +scheme> (define add (x y) (+ x y)) +scheme> (add 2 3) +N:5 +``` + +6. **Let Expressions**: +```scheme +scheme> (let ((x 10) (y 20)) (+ x y)) +N:30 +``` + +## Implementation Details + +### Compiler Pipeline +1. **Lexical Analysis**: + - Tokenizes input into numbers, symbols, and parentheses + - Handles whitespace and basic comments + +2. **Parsing**: + - Builds expression tree from tokens + - Validates syntax and expression structure + +3. **Code Generation**: + - Translates expressions to VM instructions + - Manages scope and variable bindings + - Handles function definitions + +### Virtual Machine +1. **Stack Operations**: + - PUSH_CONST: Push constant value + - POP: Remove top value + - STORE: Store variable binding + - LOOKUP: Retrieve variable value + +2. **Function Calls**: + - CALL: Invoke function + - RETURN: Return from function + - Environment frame management + +3. **Memory Management**: + - Stack-based evaluation + - Simple heap for cons cells + - Basic reference counting (not fully implemented) + +### State Persistence +- Global variables and functions persist between sessions +- State stored in `/tmp/scheme_vm.state` and `/tmp/scheme_vm.env` +- Automatic state loading/saving + +## Extending the System + +### Adding New Special Forms +1. Add parsing in `compile_expr()` +2. Implement code generation function +3. Add corresponding VM instructions if needed + +### Adding New Data Types +1. Define new type tag in VM +2. Add type checking predicates +3. Implement value construction/access +4. Update relevant operations + +### Adding VM Instructions +1. Add instruction handling in `execute()` +2. Implement instruction logic +3. Update compiler to generate new instruction + +### Debugging +- Enable debug output: `DEBUG=1 ./scheme` +- Debug messages show: + - Lexical analysis + - Parsing steps + - Code generation + - VM execution + - Stack operations + - Environment changes + +## Limitations +Current implementation does not support: +- Floating point numbers +- Strings +- Proper tail recursion +- Garbage collection +- Error recovery in REPL +- Full numeric tower +- Macros +- Standard library + +## Future Enhancements +1. Proper tail call optimization +2. Garbage collection +3. Error recovery +4. More data types +5. Standard library +6. Better debugging tools \ No newline at end of file diff --git a/awk/scheme/scheme/TODO.txt b/awk/scheme/scheme/TODO.txt new file mode 100644 index 0000000..31723a4 --- /dev/null +++ b/awk/scheme/scheme/TODO.txt @@ -0,0 +1,69 @@ +Scheme Interpreter TODO +===================== + +Current State: +------------- +- Basic arithmetic operations (+,-,*,/) are working +- Let expressions with simple bindings are working +- Function definitions (define) and lambda expressions are partially implemented +- Stack-based virtual machine with environment for variable bindings + +Recent Changes: +-------------- +1. Fixed function table array naming conflicts (func_name -> func_def_names) +2. Modified vm_get_value to handle function names correctly +3. Attempted to fix argument handling in function calls +4. Modified lambda compilation to avoid pushing function name twice + +Current Issues: +-------------- +1. Function Definitions (define): + - Function calls are failing with "ADD requires numeric operands" + - Arguments may not be properly passed to functions + - Function environment may not be properly set up + +2. Lambda Expressions: + - Direct lambda calls are failing + - Lambda bindings in let expressions are failing + - Function environment for lambda parameters may not be correct + +Next Steps: +----------- +1. Debug Function Calls: + - Add detailed debug logging in vm_call_function + - Verify argument handling and environment setup + - Check if function code is being properly stored and retrieved + +2. Fix Function Environment: + - Review how parameters are stored in the environment + - Ensure environment is properly restored after function calls + - Verify parameter values are correctly passed to functions + +3. Improve Lambda Support: + - Review lambda compilation process + - Ensure lambda functions are properly stored and called + - Fix environment handling for lambda parameters + +4. Testing Strategy: + - Create test cases for each type of function call + - Add debug output to track stack and environment state + - Verify each step of function compilation and execution + +5. Code Cleanup: + - Review and document function handling code + - Ensure consistent naming and error handling + - Add comments explaining complex operations + +Technical Notes: +--------------- +- Function calls use a combination of LOOKUP, GET_VALUE, and CALL instructions +- Environment stack handles variable bindings and function parameters +- Function code is stored in FUNCTIONS table with unique names +- Lambda functions use __lambda_N naming scheme + +Debugging Tips: +-------------- +1. Enable DEBUG=1 to see detailed execution logs +2. Check stack contents before and after each operation +3. Verify environment state during function calls +4. Monitor function code storage and retrieval \ No newline at end of file diff --git a/awk/scheme/scheme/bin/compiler.awk b/awk/scheme/scheme/bin/compiler.awk new file mode 100755 index 0000000..debaa2c --- /dev/null +++ b/awk/scheme/scheme/bin/compiler.awk @@ -0,0 +1,594 @@ +#!/usr/bin/awk -f + +# This is a Scheme-to-Assembly compiler implemented in AWK +# It takes Scheme expressions as input and outputs assembly instructions +# for a custom stack-based virtual machine + +BEGIN { + # Compiler maintains internal state for code generation + curr_token = "" # Current token being processed by lexer + input_buffer = "" # Buffer for input text being tokenized + next_label = 0 # Counter for generating unique labels + program = "" # Accumulates the full program text + + # Debug mode disabled by default, can be enabled via DEBUG=1 environment variable + DEBUG = (ENVIRON["DEBUG"] == "1") ? 1 : 0 +} + +# Debug logging helper function +function debug(msg) { + if (DEBUG) printf("[DEBUG] %s\n", msg) > "/dev/stderr" +} + +# Each line of input is accumulated into the program string +# This allows handling multi-line expressions +{ + if (program != "") program = program "\n" + program = program $0 +} + +# Main compiler entry point - called after all input is read +END { + debug("Raw program:\n" program) + if (program == "") exit + + # Parse and compile each expression in the program + split_expressions(program) +} + +# Splits input into individual Scheme expressions +# Handles nested parentheses and comments +function split_expressions(prog, current, paren_count, i, c, expr, cleaned) { + current = "" + paren_count = 0 + + # Clean up the input: + # 1. Remove comments (text between ; and next opening paren) + # 2. Normalize whitespace + # 3. Trim leading/trailing whitespace + cleaned = prog + gsub(/;[^(]*\(/, "(", cleaned) # Remove comments before expressions + gsub(/\)[^)]*;/, ")", cleaned) # Remove comments after expressions + gsub(/[ \t\n]+/, " ", cleaned) # Normalize whitespace to single spaces + sub(/^[ \t\n]+/, "", cleaned) # Trim leading whitespace + sub(/[ \t\n]+$/, "", cleaned) # Trim trailing whitespace + + debug("Cleaned program: [" cleaned "]") + + if (cleaned == "") return + + # Parse expressions by tracking parenthesis nesting + for (i = 1; i <= length(cleaned); i++) { + c = substr(cleaned, i, 1) + + if (c == "(") { + if (paren_count == 0) current = "" + paren_count++ + } + + current = current c + + if (c == ")") { + paren_count-- + if (paren_count == 0) { + # Complete expression found - compile it + expr = current + sub(/^\s+/, "", expr) + sub(/\s+$/, "", expr) + + debug("Processing expression: [" expr "]") + program = expr # Set for parser + expr = parse_expr() + compile_expr(expr) + current = "" + } + } + } + + # Add final HALT instruction + print "HALT" +} + +# Lexer helper functions for character classification +function is_digit(c) { return c >= "0" && c <= "9" } +function is_whitespace(c) { return c == " " || c == "\t" || c == "\n" } + +# Lexical analyzer - converts input into tokens +# Handles numbers, symbols, and parentheses +function next_token() { + # Initialize input buffer on first call + if (input_buffer == "") input_buffer = program + + # Skip whitespace between tokens + while (length(input_buffer) > 0 && is_whitespace(substr(input_buffer, 1, 1))) + input_buffer = substr(input_buffer, 2) + + if (length(input_buffer) == 0) return "EOF" + + # Handle parentheses as single-character tokens + c = substr(input_buffer, 1, 1) + if (c == "(" || c == ")") { + input_buffer = substr(input_buffer, 2) + return c + } + + # Handle numbers (including negative numbers) + if (is_digit(c) || c == "-" && length(input_buffer) > 1 && is_digit(substr(input_buffer, 2, 1))) { + num = "" + while (length(input_buffer) > 0) { + c = substr(input_buffer, 1, 1) + if (!is_digit(c) && c != "-") break + num = num c + input_buffer = substr(input_buffer, 2) + } + return num + } + + # Handle symbols (identifiers and operators) + sym = "" + while (length(input_buffer) > 0) { + c = substr(input_buffer, 1, 1) + if (is_whitespace(c) || c == "(" || c == ")") break + sym = sym c + input_buffer = substr(input_buffer, 2) + } + return sym +} + +# Recursive descent parser for Scheme expressions +# Returns parsed expression as a string +function parse_expr(token, result) { + token = next_token() + if (token == "EOF") return "" + + if (token == "(") { + result = parse_list() + debug("Parsed list: " result) + return result + } + + debug("Parsed token: " token) + return token +} + +# Parses a list expression (anything in parentheses) +function parse_list(result, expr) { + result = "" + + while (1) { + expr = parse_expr() + if (expr == "" || expr == ")") break + + if (result != "") result = result " " + result = result expr + } + + if (expr == "") error("Unexpected end of input in list") + return "(" result ")" +} + +# Splits an expression into operator and arguments +# Handles nested expressions correctly +function split_expr(expr, i, len, c, op, args, paren_count) { + len = length(expr) + paren_count = 0 + + for (i = 1; i <= len; i++) { + c = substr(expr, i, 1) + if (c == " " && paren_count == 0) { + op = substr(expr, 1, i - 1) + args = substr(expr, i + 1) + break + } + if (c == "(") paren_count++ + if (c == ")") paren_count-- + } + + if (!op) { + op = expr + args = "" + } + + debug("Split expr: op=" op " args=" args) + return op SUBSEP args +} + +# Splits argument list handling nested parentheses +function split_args(args, arg_array, len, i, c, current, paren_count, arg_count) { + len = length(args) + current = "" + paren_count = 0 + arg_count = 0 + + for (i = 1; i <= len; i++) { + c = substr(args, i, 1) + + if (c == "(") paren_count++ + if (c == ")") paren_count-- + + if (c == " " && paren_count == 0 && current != "") { + arg_array[++arg_count] = current + current = "" + } else if (c != " " || paren_count > 0) { + current = current c + } + } + + if (current != "") { + arg_array[++arg_count] = current + } + + return arg_count +} + +# Code generation for numeric literals +function compile_number(num) { + debug("Compiling number: " num) + print "PUSH_CONST N:" num +} + +# Code generation for primitive operations (+, -, *, cons, etc) +function compile_primitive_call(op, args, arg_array, nargs, i) { + debug("Primitive call: op=" op " args=" args) + nargs = split_args(args, arg_array) + + # Check if this is a lambda function call + if (substr(op, 1, 1) == "(") { + # This is a lambda function call + # First compile the lambda function + compile_expr(op) + + # Then compile all arguments + for (i = 1; i <= nargs; i++) { + compile_expr(arg_array[i]) + } + + # Call the function + print "CALL __lambda_" (next_label - 1) + return + } + + # Then emit appropriate operation + if (op == "+") { + # Compile arguments + for (i = 1; i <= nargs; i++) { + compile_expr(arg_array[i]) + } + for (i = 1; i < nargs; i++) + print "ADD" + } + else if (op == "-") { + # Compile arguments + for (i = 1; i <= nargs; i++) { + compile_expr(arg_array[i]) + } + if (nargs == 1) { + print "PUSH_CONST N:0" + print "SWAP" + } + for (i = 1; i < nargs; i++) + print "SUB" + } + else if (op == "*") { + # Compile arguments + for (i = 1; i <= nargs; i++) { + compile_expr(arg_array[i]) + } + for (i = 1; i < nargs; i++) + print "MUL" + } + else if (op == "cons") { + if (nargs != 2) error("cons requires 2 arguments") + # Compile arguments + for (i = 1; i <= nargs; i++) { + compile_expr(arg_array[i]) + } + print "CONS" + } + else if (op == "car") { + if (nargs != 1) error("car requires 1 argument") + # Compile argument + compile_expr(arg_array[1]) + print "CAR" + } + else if (op == "cdr") { + if (nargs != 1) error("cdr requires 1 argument") + # Compile argument + compile_expr(arg_array[1]) + print "CDR" + } + else if (op == "<") { + if (nargs != 2) error("< requires 2 arguments") + # Compile arguments + for (i = 1; i <= nargs; i++) { + compile_expr(arg_array[i]) + } + print "LT" + } + else if (op == "=") { + if (nargs != 2) error("= requires 2 arguments") + # Compile arguments + for (i = 1; i <= nargs; i++) { + compile_expr(arg_array[i]) + } + print "EQ" + } + else { + # Function call for user-defined functions + debug("Function call: " op) + # Look up the function name + print "LOOKUP " op + # Get the actual function name + print "GET_VALUE" + # Then compile arguments + for (i = 1; i <= nargs; i++) { + compile_expr(arg_array[i]) + } + # Call the function + print "CALL" + } +} + +# Splits let bindings into individual variable/value pairs +function split_bindings(bindings, binding_array, count, current, paren_count, i, c, in_lambda) { + count = 0 + current = "" + paren_count = 0 + in_lambda = 0 + + for (i = 1; i <= length(bindings); i++) { + c = substr(bindings, i, 1) + + # Track nested parentheses + if (c == "(") { + paren_count++ + if (paren_count == 1 && !in_lambda) { + current = "" # Start new binding + continue + } + } + if (c == ")") { + paren_count-- + if (paren_count == 0 && !in_lambda) { + # End of binding + binding_array[++count] = current + current = "" + continue + } + } + + # Track if we're inside a lambda expression + if (substr(bindings, i, 7) == "lambda ") { + in_lambda = 1 + } + + # Only add character if we're inside a binding + if (paren_count > 0) { + current = current c + } + } + + return count +} + +# Compiles let expressions (local variable bindings) +function compile_let(args, bindings, body, binding_array, nbindings, i, var, val, binding_parts) { + # Split into bindings and body + if (substr(args, 1, 1) != "(") error("Malformed let expression") + + # Find matching closing parenthesis for bindings + paren_count = 1 + i = 2 + while (paren_count > 0 && i <= length(args)) { + if (substr(args, i, 1) == "(") paren_count++ + if (substr(args, i, 1) == ")") paren_count-- + i++ + } + if (paren_count > 0) error("Unmatched parenthesis in let bindings") + + bindings = substr(args, 2, i - 3) # Remove outer parentheses + body = substr(args, i) + + # Trim whitespace from body + sub(/^[ \t\n]+/, "", body) + sub(/[ \t\n]+$/, "", body) + + debug("Let bindings: " bindings) + debug("Let body: " body) + + # Compile each binding + nbindings = split_bindings(bindings, binding_array) + for (i = 1; i <= nbindings; i++) { + debug("Processing binding: " binding_array[i]) + + # Find the variable name (everything up to the first space) + var = binding_array[i] + sub(/ .*$/, "", var) + + # Find the value (everything after the first space) + val = binding_array[i] + sub(/^[^ ]+ /, "", val) + + debug("Binding var: " var " val: " val) + + # Compile the value + if (substr(val, 1, 1) == "(") { + # Handle lambda or other compound expressions + if (substr(val, 2, 6) == "lambda") { + # This is a lambda expression + # First compile the lambda + compile_lambda(substr(val, 8)) # Skip "(lambda " + # Store the function name in the environment + print "STORE " var + } else { + compile_expr(val) + print "STORE " var + } + } else { + compile_expr(val) + print "STORE " var + } + } + + # Compile the body + compile_expr(body) + + # Clean up bindings AFTER evaluating body + for (i = nbindings; i >= 1; i--) { + print "POP_ENV" + } +} + +# Compiles define expressions (function/variable definitions) +function compile_define(args, name, params, body, param_array, nparams, i, paren_start, paren_end) { + # Set flag for global definition + print "PUSH_CONST B:1" + print "STORE from_define" # Must match exactly what vm_store checks for + + # Find the function name (everything up to the first space) + i = index(args, " ") + if (i == 0) error("Malformed define expression") + name = substr(args, 1, i - 1) + args = substr(args, i + 1) + + # Check if it's a function or variable definition + if (substr(args, 1, 1) == "(") { + # It's a function definition + paren_count = 1 + i = 2 + while (paren_count > 0 && i <= length(args)) { + if (substr(args, i, 1) == "(") paren_count++ + if (substr(args, i, 1) == ")") paren_count-- + i++ + } + if (paren_count > 0) error("Unmatched parenthesis in parameter list") + + params = substr(args, 2, i - 3) # Remove parentheses + body = substr(args, i + 1) + + # Create function label + print "LABEL " name + + # Process parameters + nparams = split(params, param_array, " ") + for (i = 1; i <= nparams; i++) { + print "STORE " param_array[i] + } + + # Compile function body + compile_expr(body) + + # Clean up parameters and return + for (i = nparams; i >= 1; i--) { + print "POP_ENV" + } + print "RETURN" + } else { + # Variable definition + debug("Defining variable: " name " with value: " args) + compile_expr(args) # Compile the value + print "STORE " name # Store the variable + } +} + +# Compiles lambda expressions (anonymous functions) +function compile_lambda(args, params, body, param_array, nparams, i, lambda_name) { + # Generate a unique name for the lambda function + lambda_name = "__lambda_" next_label++ + + # Split into parameters and body + if (substr(args, 1, 1) != "(") error("Malformed lambda expression") + + # Find matching closing parenthesis for parameters + paren_count = 1 + i = 2 + while (paren_count > 0 && i <= length(args)) { + if (substr(args, i, 1) == "(") paren_count++ + if (substr(args, i, 1) == ")") paren_count-- + i++ + } + if (paren_count > 0) error("Unmatched parenthesis in lambda parameters") + + params = substr(args, 2, i - 3) # Remove parentheses + body = substr(args, i) + + # Trim whitespace from body + sub(/^[ \t\n]+/, "", body) + sub(/[ \t\n]+$/, "", body) + + debug("Lambda parameters: " params) + debug("Lambda body: " body) + + # Create function label + print "LABEL " lambda_name + + # Process parameters + nparams = split(params, param_array, " ") + for (i = 1; i <= nparams; i++) { + print "STORE " param_array[i] + } + + # Compile function body + compile_expr(body) + + # Clean up parameters and return + for (i = nparams; i >= 1; i--) { + print "POP_ENV" + } + print "RETURN" + + # Only push the function name if we're not in a direct call + if (!(args ~ /^\([^)]*\)[^(]*$/)) { + print "PUSH_CONST S:" lambda_name + } +} + +# Main expression compiler - dispatches based on expression type +function compile_expr(expr, split_result, op, args) { + debug("Compiling expression: " expr) + + # Handle numeric literals + if (expr ~ /^-?[0-9]+$/) { + compile_number(expr) + return + } + + # Handle nil constant + if (expr == "nil") { + print "PUSH_CONST NIL:" + return + } + + # Handle variable lookup + if (expr ~ /^[a-zA-Z_][a-zA-Z0-9_]*$/) { + print "LOOKUP " expr + return + } + + # Handle compound expressions (lists) + if (substr(expr, 1, 1) == "(") { + expr = substr(expr, 2, length(expr) - 2) + split_result = split_expr(expr) + op = substr(split_result, 1, index(split_result, SUBSEP) - 1) + args = substr(split_result, index(split_result, SUBSEP) + 1) + + if (op == "define") { + compile_define(args) + } else if (op == "let") { + compile_let(args) + } else if (op == "lambda") { + compile_lambda(args) + } else { + compile_primitive_call(op, args) + } + return + } + + error("Unknown expression type: " expr) +} + +# Error reporting helper +function error(msg) { + print "Error: " msg > "/dev/stderr" + exit 1 +} \ No newline at end of file diff --git a/awk/scheme/scheme/bin/repl b/awk/scheme/scheme/bin/repl new file mode 100755 index 0000000..14a10cf --- /dev/null +++ b/awk/scheme/scheme/bin/repl @@ -0,0 +1,147 @@ +#!/bin/bash + +# Enable debug tracing +DEBUG=0 + +debug() { + if [ "$DEBUG" = "1" ]; then + echo "[DEBUG] $*" >&2 + fi +} + +# Find the directory containing this script and the components +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +COMPILER="$DIR/compiler.awk" +VM="$DIR/vm.awk" + +debug "Using compiler: $COMPILER" +debug "Using VM: $VM" + +# Verify components exist and are executable +for component in "$COMPILER" "$VM"; do + if [ ! -f "$component" ]; then + echo "Error: Cannot find $component" >&2 + echo "Please ensure all components are present" >&2 + exit 1 + fi + chmod +x "$component" +done + +# Set up temporary files and state +TMPDIR=$(mktemp -d) +debug "Created temp dir: $TMPDIR" +STATE_FILE="/tmp/scheme_vm.state" + +cleanup() { + debug "Cleaning up temp dir: $TMPDIR" + rm -rf "$TMPDIR" + if [ "$1" != "keep_state" ]; then + rm -f "$STATE_FILE" + rm -f "/tmp/scheme_vm.env" + fi +} +trap "cleanup" EXIT + +# Set up temporary files +INPUT_FILE="$TMPDIR/input.scm" +ASM_FILE="$TMPDIR/output.asm" +DEBUG_FILE="$TMPDIR/debug.out" + +# Initialize/clear state files at REPL start +if [ "$#" -eq 0 ]; then # Only for interactive mode + : > "/tmp/scheme_vm.state" + : > "/tmp/scheme_vm.env" +fi + +# Function to handle evaluation +evaluate_expression() { + local input="$1" + local result + + # Skip empty lines + if [ -z "$input" ]; then + return 0 + fi + + debug "Evaluating expression: $input" + echo "$input" > "$INPUT_FILE" + debug "Input file contents:" + cat "$INPUT_FILE" >&2 + + # Show compilation output even if it fails + debug "Running compiler..." + if awk -f "$COMPILER" "$INPUT_FILE" > "$ASM_FILE" 2> "$DEBUG_FILE"; then + debug "Compilation successful. Debug output:" + cat "$DEBUG_FILE" >&2 + debug "Generated assembly:" + cat "$ASM_FILE" >&2 + + debug "Running VM..." + # Use persistent VM state + result=$(awk -v PERSIST=1 -f "$VM" "$ASM_FILE" 2>&1) + debug "VM output: $result" + if [ -n "$result" ]; then + echo "$result" + fi + return 0 + else + echo "Compilation error" >&2 + debug "Compiler output:" + cat "$DEBUG_FILE" >&2 + return 1 + fi +} + +# Check if a file argument is provided +if [ "$#" -gt 0 ]; then + if [ ! -f "$1" ]; then + echo "Error: File not found: $1" >&2 + exit 1 + fi + debug "Reading file: $1" + file_content=$(cat "$1" | tr '\n' ' ') + debug "File content: $file_content" + evaluate_expression "$file_content" + cleanup "keep_state" # Keep state after file execution + exit 0 +fi + +# REPL state +paren_count=0 +current_input="" + +# Print welcome message +echo "Scheme REPL (Press Ctrl+D to exit)" +echo + +# Main REPL loop +while true; do + if [ $paren_count -eq 0 ]; then + printf "scheme> " + else + printf "... " + fi + + read -r line || exit 0 + + # Skip empty lines + if [ -z "$line" ]; then + continue + fi + + # Count parentheses + open_parens=$(echo "$line" | tr -cd '(' | wc -c) + close_parens=$(echo "$line" | tr -cd ')' | wc -c) + paren_count=$((paren_count + open_parens - close_parens)) + + if [ -n "$current_input" ]; then + current_input="$current_input $line" + else + current_input="$line" + fi + + if [ $paren_count -eq 0 ]; then + evaluate_expression "$current_input" + current_input="" + fi +done \ No newline at end of file diff --git a/awk/scheme/scheme/bin/vm.awk b/awk/scheme/scheme/bin/vm.awk new file mode 100755 index 0000000..4e7d2c7 --- /dev/null +++ b/awk/scheme/scheme/bin/vm.awk @@ -0,0 +1,707 @@ +#!/usr/bin/awk -f + +# This is a stack-based virtual machine for executing compiled Scheme code +# It implements a simple instruction set with support for: +# - Basic arithmetic operations +# - Function calls and returns +# - Variable bindings and lookups +# - Cons cells and list operations + +BEGIN { + # Type system tags for runtime type checking + T_NUMBER = "N" # Numbers (integers) + T_BOOLEAN = "B" # Booleans (0/1) + T_SYMBOL = "S" # Symbols (identifiers) + T_PAIR = "P" # Cons cells (pairs) + T_FUNCTION = "F" # Function references + T_NIL = "NIL" # Empty list marker + + # Virtual machine registers + stack_ptr = 0 # Points to top of evaluation stack + heap_ptr = 0 # Points to next free heap location + pc = 0 # Program counter for instruction fetch + + # Debug mode disabled by default, can be enabled via DEBUG=1 environment variable + DEBUG = (ENVIRON["DEBUG"] == "1") ? 1 : 0 + + # Environment for variable bindings + env_size = 0 # Current size of environment stack + + # Function table for storing defined functions + delete func_def_names # Function names + delete func_def_pc # Entry points + delete func_def_code # Function bodies + func_def_size = 0 # Number of defined functions + + # Call stack for function returns + call_stack_ptr = 0 + + # State persistence configuration + STATE_FILE = "/tmp/scheme_vm.state" + if (PERSIST) { + debug("Loading state from: " STATE_FILE) + if ((getline line < STATE_FILE) >= 0) { # Check if file exists and is readable + do { + if (line ~ /^FUNC /) { + # Parse and load function definition + sub(/^FUNC /, "", line) + name = line + sub(/ .*$/, "", name) + code = line + sub(/^[^ ]+ /, "", code) + + debug("Loaded function: " name) + debug("Code: " code) + + # Store function in function table + func_def_names[func_def_size] = name + func_def_code[func_def_size] = code + func_def_size++ + } + } while ((getline line < STATE_FILE) > 0) + close(STATE_FILE) + } + } + + # Function environment storage + delete func_env_names # Variable names in function scope + delete func_env_vals # Variable values in function scope + delete func_env_sizes # Size of each function's environment + + # Global function registry + delete FUNCTIONS # Maps function names to implementations + + # Environment persistence configuration + ENV_STATE_FILE = "/tmp/scheme_vm.env" + if (PERSIST) { + debug("Loading environment state from: " ENV_STATE_FILE) + if ((getline line < ENV_STATE_FILE) >= 0) { + do { + if (line ~ /^ENV /) { + # Parse and load environment binding + sub(/^ENV /, "", line) + name = line + sub(/ .*$/, "", name) + val = line + sub(/^[^ ]+ /, "", val) + + debug("Loaded env var: " name " = " val) + + # Store in environment + env_name[env_size] = name + env_val[env_size] = val + env_size++ + } + } while ((getline line < ENV_STATE_FILE) > 0) + close(ENV_STATE_FILE) + } + } + + # Register built-in functions + FUNCTIONS["+"] = "add" + FUNCTIONS["-"] = "subtract" + FUNCTIONS["*"] = "multiply" + FUNCTIONS["/"] = "divide" + FUNCTIONS["="] = "equals" + FUNCTIONS["<"] = "less_than" + FUNCTIONS[">"] = "greater_than" + FUNCTIONS["add1"] = "add_one" + + # Track if VM halted normally (vs error) + normal_exit = 0 +} + +# Debug output helper +function debug(msg) { + if (DEBUG) printf("[DEBUG] %s\n", msg) > "/dev/stderr" +} + +# Value constructors and accessors +# Values are stored as type:value pairs for runtime type checking +function makeValue(type, val) { + return type ":" val +} + +function getType(val) { + type = substr(val, 1, index(val, ":") - 1) + debug("Get type: " type " from " val) + return type +} + +function getValue(val) { + value = substr(val, index(val, ":") + 1) + debug("Get value: " value " from " val) + return value +} + +# Type checking predicates +function isNumber(val) { return getType(val) == T_NUMBER } +function isBoolean(val) { return getType(val) == T_BOOLEAN } +function isSymbol(val) { return getType(val) == T_SYMBOL } +function isPair(val) { return getType(val) == T_PAIR } +function isFunction(val) { return getType(val) == T_FUNCTION } +function isNil(val) { return getType(val) == T_NIL } + +# Stack operations +function push(val) { + stack[++stack_ptr] = val + debug("Push: " val " (SP: " stack_ptr ")") +} + +function pop() { + if (stack_ptr < 1) error("Stack underflow") + val = stack[stack_ptr--] + debug("Pop: " val " (SP: " stack_ptr ")") + return val +} + +function peek() { + if (stack_ptr < 1) error("Stack empty") + debug("Peek: " stack[stack_ptr]) + return stack[stack_ptr] +} + +# Heap operations for cons cells +function allocate(val) { + heap[++heap_ptr] = val + refs[heap_ptr] = 1 # Reference counting (not fully implemented) + debug("Allocate: " val " at " heap_ptr) + return heap_ptr +} + +function getHeap(idx) { + if (!(idx in heap)) { + error("Invalid heap access: " idx) + return "" + } + return heap[idx] +} + +# Error handling +function error(msg) { + print "Error at PC " pc ": " msg > "/dev/stderr" + exit 1 +} + +# Arithmetic instruction implementations +function vm_add() { + if (stack_ptr < 2) error("ADD requires two operands") + val2 = pop() + val1 = pop() + if (!isNumber(val1) || !isNumber(val2)) + error("ADD requires numeric operands") + result = getValue(val1) + getValue(val2) + push(makeValue(T_NUMBER, result)) +} + +function vm_subtract() { + if (stack_ptr < 2) error("SUB requires two operands") + val2 = pop() + val1 = pop() + if (!isNumber(val1) || !isNumber(val2)) + error("SUB requires numeric operands") + result = getValue(val1) - getValue(val2) + push(makeValue(T_NUMBER, result)) +} + +function vm_multiply() { + if (stack_ptr < 2) error("MUL requires two operands") + val2 = pop() + val1 = pop() + if (!isNumber(val1) || !isNumber(val2)) + error("MUL requires numeric operands") + result = getValue(val1) * getValue(val2) + push(makeValue(T_NUMBER, result)) +} + +function vm_divide() { + if (stack_ptr < 2) error("DIV requires two operands") + val2 = pop() + val1 = pop() + if (!isNumber(val1) || !isNumber(val2)) + error("DIV requires numeric operands") + if (getValue(val2) == 0) + error("Division by zero") + result = getValue(val1) / getValue(val2) + push(makeValue(T_NUMBER, result)) +} + +# List operation implementations +function vm_cons() { + if (stack_ptr < 2) error("CONS requires two operands") + val2 = pop() + val1 = pop() + pair_val = val1 "," val2 + pair_idx = allocate(pair_val) + push(makeValue(T_PAIR, pair_idx)) +} + +function vm_car() { + if (stack_ptr < 1) error("CAR requires one operand") + val = pop() + if (!isPair(val)) error("CAR requires pair operand") + pair_idx = getValue(val) + pair = getHeap(pair_idx) + car_val = substr(pair, 1, index(pair, ",") - 1) + push(car_val) +} + +function vm_cdr() { + if (stack_ptr < 1) error("CDR requires one operand") + val = pop() + if (!isPair(val)) error("CDR requires pair operand") + pair_idx = getValue(val) + pair = getHeap(pair_idx) + cdr_val = substr(pair, index(pair, ",") + 1) + push(cdr_val) +} + +# Comparison operations +function vm_equal() { + if (stack_ptr < 2) error("EQ requires two operands") + val2 = pop() + val1 = pop() + result = (val1 == val2) ? "1" : "0" + debug("Equal comparison: " val1 " == " val2 " -> " result) + push(makeValue(T_BOOLEAN, result)) +} + +function vm_less_than() { + if (stack_ptr < 2) error("LT requires two operands") + val2 = pop() + val1 = pop() + if (!isNumber(val1) || !isNumber(val2)) + error("LT requires numeric operands") + result = (getValue(val1) < getValue(val2)) ? "1" : "0" + debug("Less than comparison: " val1 " < " val2 " -> " result) + push(makeValue(T_BOOLEAN, result)) +} + +# Main instruction execution loop +function execute(instr) { + split(instr, parts, " ") + op = parts[1] + debug("Execute: " instr) + + # Dispatch based on instruction opcode + if (op == "PUSH_CONST") { + push(parts[2]) + } + else if (op == "POP") { + pop() + } + else if (op == "DUP") { + val = peek() + push(val) + } + else if (op == "SWAP") { + if (stack_ptr < 2) error("SWAP requires two operands") + val2 = pop() + val1 = pop() + push(val2) + push(val1) + } + else if (op == "ADD") { + vm_add() + } + else if (op == "SUB") { + vm_subtract() + } + else if (op == "MUL") { + vm_multiply() + } + else if (op == "DIV") { + vm_divide() + } + else if (op == "CONS") { + vm_cons() + } + else if (op == "CAR") { + vm_car() + } + else if (op == "CDR") { + vm_cdr() + } + else if (op == "EQ") { + vm_equal() + } + else if (op == "LT") { + vm_less_than() + } + else if (op == "PRINT") { + if (stack_ptr < 1) error("PRINT requires one operand") + print peek() + } + else if (op == "HALT") { + normal_exit = 1 + if (stack_ptr > 0) { + result = peek() + } + if (PERSIST) { + save_state() + } + if (result) { + print result + } + exit(0) + } + else if (op == "STORE") { + vm_store(parts[2]) + } + else if (op == "POP_ENV") { + vm_pop_env() + } + else if (op == "LOOKUP") { + vm_lookup(parts[2]) + } + else if (op == "LABEL") { + vm_define_function(parts[2], pc) + } + else if (op == "CALL") { + vm_call_function(parts[2]) + } + else if (op == "RETURN") { + vm_return() + } + else if (op == "GET_VALUE") { + vm_get_value() + } + else { + error("Unknown instruction: " op) + } +} + +# Load program instructions +{ + program[NR-1] = $0 +} + +# Main execution loop +END { + while (pc < length(program)) { + execute(program[pc++]) + } + + # Save state if we didn't halt normally + if (!normal_exit && PERSIST) { + save_state() + } +} + +# Variable binding implementation +function vm_store(name) { + debug("Storing " peek() " as " name " at env_size: " env_size) + + # Handle global definitions specially + if (lookup_no_error("from_define")) { + name = "__global_" name + # Clear the define flag + for (i = env_size - 1; i >= 0; i--) { + if (env_name[i] == "from_define") { + env_size-- + break + } + } + + # Remove any previous definition of this global + for (i = env_size - 1; i >= 0; i--) { + if (env_name[i] == name) { + # Shift everything down + for (j = i; j < env_size - 1; j++) { + env_name[j] = env_name[j + 1] + env_val[j] = env_val[j + 1] + } + env_size-- + break + } + } + } + + # Handle lambda functions + val = peek() + if (isSymbol(val)) { + func_name = getValue(val) + if (func_name ~ /^__lambda_/) { + # Store the function code under the new name + FUNCTIONS[name] = FUNCTIONS[func_name] + # Store the new name in the environment + env_name[env_size] = name + env_val[env_size] = makeValue(T_SYMBOL, name) + env_size++ + return + } + } + + # Add to environment + env_name[env_size] = name + env_val[env_size] = peek() + env_size++ + + debug("Environment after store:") + dump_env() +} + +# Remove top binding from environment +function vm_pop_env() { + if (env_size <= 0) error("Environment underflow") + debug("Popping environment at size: " env_size) + + # Don't pop globals + if (env_name[env_size-1] ~ /^__global_/) { + debug("Keeping global definition: " env_name[env_size-1]) + return + } + + debug("Removing: " env_name[env_size-1] " = " env_val[env_size-1]) + env_size-- +} + +# Variable lookup implementation +function vm_lookup(name, i, global_name, val) { + debug("Looking up " name " in environment of size: " env_size) + dump_env() + + # Check if it's a function (built-in or user-defined) + if (name in FUNCTIONS) { + debug("Found function: " name) + push(makeValue(T_SYMBOL, name)) + return + } + + # Try global name first, then local + global_name = "__global_" name + for (i = env_size - 1; i >= 0; i--) { + if (env_name[i] == global_name || env_name[i] == name) { + debug("Found " name " = " env_val[i] " at position " i) + push(env_val[i]) + return + } + } + error("Undefined variable: " name) +} + +# Function definition implementation +function vm_define_function(name, start_pc) { + debug("Defining function: " name " at " start_pc) + + # Build function code + code = "" + i = start_pc + while (i < length(program) && program[i] != "RETURN") { + if (code != "") code = code "\n" + code = code program[i] + i++ + } + code = code "\nRETURN" + + # Store function + debug("Storing function: " name " = " code) + FUNCTIONS[name] = code + + pc = i + 1 +} + +# Function call implementation +function vm_call_function(func_name, code_lines, j, saved_pc, saved_env_size, arg, param_name) { + debug("Calling function: " func_name) + + # If name is a symbol, get its value + if (isSymbol(func_name)) { + func_name = getValue(func_name) + } + + # Handle anonymous functions + if (func_name ~ /^__lambda_/) { + if (!(func_name in FUNCTIONS)) { + error("Undefined lambda function: " func_name) + } + } else if (!(func_name in FUNCTIONS)) { + error("Undefined function: " func_name) + } + + saved_pc = pc + saved_env_size = env_size + + # Split function code into lines + split(FUNCTIONS[func_name], code_lines, "\n") + + # Add function code to program at current position + for (j in code_lines) { + program[pc + j - 1] = code_lines[j] + } + + # Check if this is a parameterized function + if (code_lines[1] ~ /^STORE /) { + # This is a parameterized function (lambda) + # Get parameter name from STORE instruction + param_name = substr(code_lines[1], 7) + debug("Found parameter name: " param_name) + + # Get argument from stack + arg = pop() + debug("Function argument: " arg) + + # Create new environment frame + debug("Creating new environment frame at size: " env_size) + env_name[env_size] = param_name + env_val[env_size] = arg + env_size++ + } else { + # This is a built-in function or non-parameterized function + debug("Calling non-parameterized function: " func_name) + } + + # Save return info and jump to function + call_stack[++call_stack_ptr] = saved_pc + env_stack[call_stack_ptr] = saved_env_size + + debug("Function found, jumping to PC: " pc " with env_size: " saved_env_size) + dump_env() +} + +# Function return implementation +function vm_return() { + if (call_stack_ptr > 0) { + # Save return value + ret_val = pop() + + # Restore environment + while (env_size > env_stack[call_stack_ptr]) { + debug("Popping environment at size: " env_size) + vm_pop_env() + } + + # Restore program counter + pc = call_stack[call_stack_ptr--] + + # Push return value + push(ret_val) + + debug("Returned with value: " ret_val " and env_size: " env_size) + } +} + +# Debug helper to dump environment contents +function dump_env( i) { + debug("Environment dump:") + for (i = 0; i < env_size; i++) { + debug(sprintf(" %d: %s = %s", i, env_name[i], env_val[i])) + } +} + +# Helper for checking variable existence without error +function lookup_no_error(name, i) { + for (i = env_size - 1; i >= 0; i--) { + if (env_name[i] == name) { + return 1 + } + } + return 0 +} + +# State persistence implementation +function save_state() { + debug("Saving state to: " STATE_FILE) + for (i = 0; i < func_def_size; i++) { + debug("Saving function: " func_def_names[i]) + print "FUNC " func_def_names[i] " " func_def_code[i] > STATE_FILE + } + close(STATE_FILE) + + # Save environment state + debug("Saving environment state to: " ENV_STATE_FILE) + for (i = 0; i < env_size; i++) { + if (env_name[i] ~ /^__global_/) { # Only save globals + debug("Saving env var: " env_name[i] " = " env_val[i]) + print "ENV " env_name[i] " " env_val[i] > ENV_STATE_FILE + } + } + close(ENV_STATE_FILE) +} + +# Built-in function implementations +function equals() { + if (stack_ptr < 2) error("= requires two operands") + val2 = pop() + val1 = pop() + if (!isNumber(val1) || !isNumber(val2)) error("= requires numeric operands") + result = (getValue(val1) == getValue(val2)) ? 1 : 0 + push(makeValue(T_BOOLEAN, result)) +} + +function less_than() { + if (stack_ptr < 2) error("< requires two operands") + val2 = pop() + val1 = pop() + if (!isNumber(val1) || !isNumber(val2)) error("< requires numeric operands") + result = (getValue(val1) < getValue(val2)) ? 1 : 0 + push(makeValue(T_BOOLEAN, result)) +} + +function greater_than() { + if (stack_ptr < 2) error("> requires two operands") + val2 = pop() + val1 = pop() + if (!isNumber(val1) || !isNumber(val2)) error("> requires numeric operands") + result = (getValue(val1) > getValue(val2)) ? 1 : 0 + push(makeValue(T_BOOLEAN, result)) +} + +function add() { + if (stack_ptr < 2) error("+ requires two operands") + val2 = pop() + val1 = pop() + if (!isNumber(val1) || !isNumber(val2)) error("+ requires numeric operands") + result = getValue(val1) + getValue(val2) + push(makeValue(T_NUMBER, result)) +} + +function subtract() { + if (stack_ptr < 2) error("- requires two operands") + val2 = pop() + val1 = pop() + if (!isNumber(val1) || !isNumber(val2)) error("- requires numeric operands") + result = getValue(val1) - getValue(val2) + push(makeValue(T_NUMBER, result)) +} + +function multiply() { + if (stack_ptr < 2) error("* requires two operands") + val2 = pop() + val1 = pop() + if (!isNumber(val1) || !isNumber(val2)) error("* requires numeric operands") + result = getValue(val1) * getValue(val2) + push(makeValue(T_NUMBER, result)) +} + +function divide() { + if (stack_ptr < 2) error("/ requires two operands") + val2 = pop() + val1 = pop() + if (!isNumber(val1) || !isNumber(val2)) error("/ requires numeric operands") + if (getValue(val2) == 0) error("Division by zero") + result = getValue(val1) / getValue(val2) + push(makeValue(T_NUMBER, result)) +} + +function add_one() { + if (stack_ptr < 1) error("add1 requires one operand") + val = pop() + if (!isNumber(val)) error("add1 requires numeric operand") + result = getValue(val) + 1 + push(makeValue(T_NUMBER, result)) +} + +# Get value from top of stack +function vm_get_value() { + val = peek() + if (isSymbol(val)) { + name = getValue(val) + # If it's a function name, just push the name directly + if (name in FUNCTIONS) { + push(name) + } else { + push(makeValue(T_SYMBOL, name)) + } + } +} \ No newline at end of file diff --git a/awk/scheme/scheme/diagram.md b/awk/scheme/scheme/diagram.md new file mode 100644 index 0000000..4a719b4 --- /dev/null +++ b/awk/scheme/scheme/diagram.md @@ -0,0 +1,49 @@ +# Awk-Scheme Architecture +## Component Interaction Diagram + +``` ++----------------+ Scheme Code +----------------+ Assembly +----------------+ +| | -----------------> | | -------------> | | +| REPL | "(+ 1 2)" | Compiler | "PUSH_CONST | VM | +| (bin/repl) | | compiler.awk | N:1 | vm.awk | +| | | | PUSH_CONST | | +| - Multi-line | | - Lexer | N:2 | - Stack-based | +| - Debug mode | | - Parser | ADD | - Type system | +| - File input | | - Code gen | HALT" | - Environment | +| | <----------------- | | <------------- | | +| | Output: "N:3" | | Result | | ++----------------+ +----------------+ +----------------+ + ^ | + | v + | +----------------+ + | | Persistence | + | | /tmp files: | + +--------------------------------------------------------------+ - vm.state | + | - vm.env | + +----------------+ + +Debug Flow (when DEBUG=1): ++----------------+ Debug Info +----------------+ Debug Info +----------------+ +| REPL | -----------------> | Compiler | -------------> | VM | +| | [Input received] | | [Tokens found] | | +| [Debug output] | | [Parsing tree] | [Stack ops] | [Stack state] | +| stderr | <----------------- | [Gen code] | <------------- | [Environment] | ++----------------+ +----------------+ +----------------+ + +Execution Flow Example: +┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ +│ Input: │ │ Lexer/ │ │ Code Gen │ │ VM │ +│ (+ 1 2) │ ------> │ Parser: │ ------> │ PUSH_CONST │ ------> │ Stack: │ +│ │ │ (+ │ │ N:1 │ │ [N:1] │ +│ │ │ 1 │ │ PUSH_CONST │ │ [N:1,N:2] │ +│ │ │ 2) │ │ N:2 │ │ [N:3] │ +│ │ │ │ │ ADD │ │ │ +└─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ + +State Management: +┌─────────────┐ ┌─────────────┐ ┌─────────────┐ +│ Global │ │ Environment │ │ Function │ +│ Variables │ ------> │ Stack │ ------> │ Calls │ +│ (persist) │ │ (frames) │ │ (stack) │ +└─────────────┘ └─────────────┘ └─────────────┘ +``` \ No newline at end of file diff --git a/awk/scheme/scheme/examples/cons.test.scm b/awk/scheme/scheme/examples/cons.test.scm new file mode 100644 index 0000000..d1e3847 --- /dev/null +++ b/awk/scheme/scheme/examples/cons.test.scm @@ -0,0 +1,3 @@ +(cons (+ 1 2) + (cons (* 3 4) + nil)) diff --git a/awk/scheme/scheme/examples/define.test.scm b/awk/scheme/scheme/examples/define.test.scm new file mode 100644 index 0000000..ec66b04 --- /dev/null +++ b/awk/scheme/scheme/examples/define.test.scm @@ -0,0 +1,2 @@ +(define add2 (x) (+ x 2)) +(add2 40) \ No newline at end of file diff --git a/awk/scheme/scheme/examples/lambda.test.scm b/awk/scheme/scheme/examples/lambda.test.scm new file mode 100644 index 0000000..1f2bb09 --- /dev/null +++ b/awk/scheme/scheme/examples/lambda.test.scm @@ -0,0 +1,12 @@ +; Test lambda function support +((lambda (x) (+ x 1)) 41) ; Should return 42 + +; Test lambda with multiple parameters +((lambda (x y) (+ x y)) 20 22) ; Should return 42 + +; Test lambda in let expression +(let ((add1 (lambda (x) (+ x 1)))) + (add1 41)) ; Should return 42 + +; Test nested lambda +((lambda (x) ((lambda (y) (+ x y)) 1)) 41) ; Should return 42 \ No newline at end of file diff --git a/awk/scheme/scheme/examples/let-and-define.test.scm b/awk/scheme/scheme/examples/let-and-define.test.scm new file mode 100644 index 0000000..fade30b --- /dev/null +++ b/awk/scheme/scheme/examples/let-and-define.test.scm @@ -0,0 +1,9 @@ +; Let expression example +(let ((x 5) (y 3)) + (+ x y)) + +; Function definition example +(define add2 (x) + (+ x 2)) + +(add2 40) ; Returns 42 \ No newline at end of file diff --git a/awk/scheme/scheme/examples/let.test.scm b/awk/scheme/scheme/examples/let.test.scm new file mode 100644 index 0000000..2cdc3b8 --- /dev/null +++ b/awk/scheme/scheme/examples/let.test.scm @@ -0,0 +1,2 @@ +(let ((x 5)) + (+ x 2)) diff --git a/awk/scheme/scheme/scheme b/awk/scheme/scheme/scheme new file mode 100755 index 0000000..cec35d1 --- /dev/null +++ b/awk/scheme/scheme/scheme @@ -0,0 +1,3 @@ +#!/bin/bash +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +exec "$DIR/bin/repl" "$@" diff --git a/awk/scheme/scheme/scratch/complex_test.scm.asm b/awk/scheme/scheme/scratch/complex_test.scm.asm new file mode 100644 index 0000000..67870c3 --- /dev/null +++ b/awk/scheme/scheme/scratch/complex_test.scm.asm @@ -0,0 +1,44 @@ +# Test proper list construction (3 2 1) +# Building the list in proper order: car points to value, cdr points to next pair + +# Start with empty list +PUSH_CONST NIL: # [nil] +PRINT # Print nil + +# Build (1 . nil) +PUSH_CONST NIL: # [nil] +PUSH_CONST N:1 # [nil 1] +SWAP # [1 nil] +CONS # [(1 . nil)] +DUP +PRINT # Print (1 . nil) + +# Build (2 . (1 . nil)) +PUSH_CONST N:2 # [(1.nil) 2] +SWAP # [2 (1.nil)] +CONS # [(2 . (1.nil))] +DUP +PRINT # Print (2 . (1.nil)) + +# Build (3 . (2 . (1 . nil))) +PUSH_CONST N:3 # [(2.(1.nil)) 3] +SWAP # [3 (2.(1.nil))] +CONS # [(3 . (2.(1.nil)))] +DUP +PRINT # Print full structure + +# Test CAR/CDR operations +DUP # Keep a copy of the list for later +DUP # Another copy for CAR +CAR # Get first element (3) +PRINT # Should print 3 + +SWAP # Bring back our spare list copy +CDR # Get rest of list ((2 . (1 . nil))) +DUP +PRINT # Print rest of list + +CAR # Get first of rest (2) +PRINT # Should print 2 + +HALT \ No newline at end of file diff --git a/awk/scheme/scheme/scratch/run.sh b/awk/scheme/scheme/scratch/run.sh new file mode 100755 index 0000000..0afdb41 --- /dev/null +++ b/awk/scheme/scheme/scratch/run.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# Compile Scheme to VM instructions +awk -f compiler.awk test.scm > test.asm +# Run VM instructions +awk -f vm.awk test.asm \ No newline at end of file diff --git a/awk/scheme/scheme/scratch/test.asm b/awk/scheme/scheme/scratch/test.asm new file mode 100644 index 0000000..8e7d8df --- /dev/null +++ b/awk/scheme/scheme/scratch/test.asm @@ -0,0 +1,16 @@ +PUSH_CONST N:1 +PUSH_CONST N:2 +ADD +PUSH_CONST N:3 +PUSH_CONST N:4 +MUL +PUSH_CONST N:10 +PUSH_CONST N:2 +PUSH_CONST N:3 +ADD +SUB +PUSH_CONST NIL: +CONS +CONS +CONS +HALT diff --git a/awk/scheme/scheme/scratch/test.scm b/awk/scheme/scheme/scratch/test.scm new file mode 100644 index 0000000..a01b174 --- /dev/null +++ b/awk/scheme/scheme/scratch/test.scm @@ -0,0 +1,8 @@ +;; Build a list of calculated values +(cons (+ 1 2) ; First element: 1 + 2 = 3 + (cons (* 3 4) ; Second element: 3 * 4 = 12 + (cons (- 10 + (+ 2 3)) ; Third element: 10 - (2 + 3) = 5 + nil))) ; End of list + +;; This should create a list: (3 12 5) \ No newline at end of file diff --git a/awk/scheme/scheme/scratch/test.scm.asm b/awk/scheme/scheme/scratch/test.scm.asm new file mode 100644 index 0000000..526e2b1 --- /dev/null +++ b/awk/scheme/scheme/scratch/test.scm.asm @@ -0,0 +1,7 @@ +PUSH_CONST N:5 +PUSH_CONST N:3 +ADD +PUSH_CONST N:2 +MUL +PRINT # should output N:16 +HALT \ No newline at end of file |