diff options
Diffstat (limited to 'awk')
-rwxr-xr-x | awk/scheme/scheme/bin/compiler.awk | 78 | ||||
-rwxr-xr-x | awk/scheme/scheme/bin/vm.awk | 153 |
2 files changed, 135 insertions, 96 deletions
diff --git a/awk/scheme/scheme/bin/compiler.awk b/awk/scheme/scheme/bin/compiler.awk index 5371bb2..b7773b7 100755 --- a/awk/scheme/scheme/bin/compiler.awk +++ b/awk/scheme/scheme/bin/compiler.awk @@ -1,43 +1,55 @@ #!/usr/bin/awk -f +# This is a Scheme-to-Assembly compiler implemented in AWK +# It takes Scheme expressions as input and outputs assembly instructions +# for a custom stack-based virtual machine + BEGIN { - # Compiler state - curr_token = "" - input_buffer = "" - next_label = 0 - program = "" + # Compiler maintains internal state for code generation + curr_token = "" # Current token being processed by lexer + input_buffer = "" # Buffer for input text being tokenized + next_label = 0 # Counter for generating unique labels + program = "" # Accumulates the full program text - # Debug mode, 0 off, 1 on + # Debug flag for development/troubleshooting DEBUG = 0 } +# Debug logging helper function function debug(msg) { if (DEBUG) printf("[DEBUG] %s\n", msg) > "/dev/stderr" } -# Process input line - accumulate the raw input +# Each line of input is accumulated into the program string +# This allows handling multi-line expressions { if (program != "") program = program "\n" program = program $0 } +# Main compiler entry point - called after all input is read END { debug("Raw program:\n" program) if (program == "") exit - # Split program into expressions and compile each one + # Parse and compile each expression in the program split_expressions(program) } +# Splits input into individual Scheme expressions +# Handles nested parentheses and comments function split_expressions(prog, current, paren_count, i, c, expr, cleaned) { current = "" paren_count = 0 - # Extract expressions between parentheses + # Clean up the input: + # 1. Remove comments (text between ; and next opening paren) + # 2. Normalize whitespace + # 3. Trim leading/trailing whitespace cleaned = prog gsub(/;[^(]*\(/, "(", cleaned) # Remove comments before expressions gsub(/\)[^)]*;/, ")", cleaned) # Remove comments after expressions - gsub(/[ \t\n]+/, " ", cleaned) # Normalize whitespace + gsub(/[ \t\n]+/, " ", cleaned) # Normalize whitespace to single spaces sub(/^[ \t\n]+/, "", cleaned) # Trim leading whitespace sub(/[ \t\n]+$/, "", cleaned) # Trim trailing whitespace @@ -45,6 +57,7 @@ function split_expressions(prog, current, paren_count, i, c, expr, cleaned) { if (cleaned == "") return + # Parse expressions by tracking parenthesis nesting for (i = 1; i <= length(cleaned); i++) { c = substr(cleaned, i, 1) @@ -58,7 +71,7 @@ function split_expressions(prog, current, paren_count, i, c, expr, cleaned) { if (c == ")") { paren_count-- if (paren_count == 0) { - # Found complete expression + # Complete expression found - compile it expr = current sub(/^\s+/, "", expr) sub(/\s+$/, "", expr) @@ -72,30 +85,34 @@ function split_expressions(prog, current, paren_count, i, c, expr, cleaned) { } } + # Add final HALT instruction print "HALT" } -# Lexer functions +# Lexer helper functions for character classification function is_digit(c) { return c >= "0" && c <= "9" } function is_whitespace(c) { return c == " " || c == "\t" || c == "\n" } +# Lexical analyzer - converts input into tokens +# Handles numbers, symbols, and parentheses function next_token() { - # Initialize input_buffer if this is first call + # Initialize input buffer on first call if (input_buffer == "") input_buffer = program - # Skip whitespace + # Skip whitespace between tokens while (length(input_buffer) > 0 && is_whitespace(substr(input_buffer, 1, 1))) input_buffer = substr(input_buffer, 2) if (length(input_buffer) == 0) return "EOF" + # Handle parentheses as single-character tokens c = substr(input_buffer, 1, 1) if (c == "(" || c == ")") { input_buffer = substr(input_buffer, 2) return c } - # Handle numbers + # Handle numbers (including negative numbers) if (is_digit(c) || c == "-" && length(input_buffer) > 1 && is_digit(substr(input_buffer, 2, 1))) { num = "" while (length(input_buffer) > 0) { @@ -107,7 +124,7 @@ function next_token() { return num } - # Handle symbols + # Handle symbols (identifiers and operators) sym = "" while (length(input_buffer) > 0) { c = substr(input_buffer, 1, 1) @@ -118,7 +135,8 @@ function next_token() { return sym } -# Parser functions +# Recursive descent parser for Scheme expressions +# Returns parsed expression as a string function parse_expr( token, result) { token = next_token() if (token == "EOF") return "" @@ -133,6 +151,7 @@ function parse_expr( token, result) { return token } +# Parses a list expression (anything in parentheses) function parse_list( result, expr) { result = "" @@ -148,7 +167,8 @@ function parse_list( result, expr) { return "(" result ")" } -# Split expression into operator and arguments +# Splits an expression into operator and arguments +# Handles nested expressions correctly function split_expr(expr, i, len, c, op, args, paren_count) { len = length(expr) paren_count = 0 @@ -173,7 +193,7 @@ function split_expr(expr, i, len, c, op, args, paren_count) { return op SUBSEP args } -# Split arguments handling nested parentheses +# Splits argument list handling nested parentheses function split_args(args, arg_array, len, i, c, current, paren_count, arg_count) { len = length(args) current = "" @@ -201,21 +221,23 @@ function split_args(args, arg_array, len, i, c, current, paren_count, arg_cou return arg_count } -# Code generation functions +# Code generation for numeric literals function compile_number(num) { debug("Compiling number: " num) print "PUSH_CONST N:" num } +# Code generation for primitive operations (+, -, *, cons, etc) function compile_primitive_call(op, args, arg_array, nargs, i) { debug("Primitive call: op=" op " args=" args) nargs = split_args(args, arg_array) - # Compile arguments for all operations + # First compile all arguments for (i = 1; i <= nargs; i++) { compile_expr(arg_array[i]) } + # Then emit appropriate operation if (op == "+") { for (i = 1; i < nargs; i++) print "ADD" @@ -253,12 +275,13 @@ function compile_primitive_call(op, args, arg_array, nargs, i) { print "EQ" } else { - # Function call + # Function call for user-defined functions debug("Function call: " op) print "CALL " op } } +# Splits let bindings into individual variable/value pairs function split_bindings(bindings, binding_array, count, current, paren_count, i, c) { count = 0 current = "" @@ -267,7 +290,7 @@ function split_bindings(bindings, binding_array, count, current, paren_count, for (i = 1; i <= length(bindings); i++) { c = substr(bindings, i, 1) - # Track parentheses + # Track nested parentheses if (c == "(") { paren_count++ if (paren_count == 1) { @@ -294,6 +317,7 @@ function split_bindings(bindings, binding_array, count, current, paren_count, return count } +# Compiles let expressions (local variable bindings) function compile_let(args, bindings, body, binding_array, nbindings, i, var, val, binding_parts) { # Split into bindings and body if (substr(args, 1, 1) != "(") error("Malformed let expression") @@ -344,6 +368,7 @@ function compile_let(args, bindings, body, binding_array, nbindings, i, var, } } +# Compiles define expressions (function/variable definitions) function compile_define(args, name, params, body, param_array, nparams, i, paren_start, paren_end) { # Set flag for global definition print "PUSH_CONST B:1" @@ -395,25 +420,29 @@ function compile_define(args, name, params, body, param_array, nparams, i, pa } } +# Main expression compiler - dispatches based on expression type function compile_expr(expr, split_result, op, args) { debug("Compiling expression: " expr) + # Handle numeric literals if (expr ~ /^-?[0-9]+$/) { compile_number(expr) return } + # Handle nil constant if (expr == "nil") { print "PUSH_CONST NIL:" return } - # Add variable lookup + # Handle variable lookup if (expr ~ /^[a-zA-Z_][a-zA-Z0-9_]*$/) { print "LOOKUP " expr return } + # Handle compound expressions (lists) if (substr(expr, 1, 1) == "(") { expr = substr(expr, 2, length(expr) - 2) split_result = split_expr(expr) @@ -433,6 +462,7 @@ function compile_expr(expr, split_result, op, args) { error("Unknown expression type: " expr) } +# Error reporting helper function error(msg) { print "Error: " msg > "/dev/stderr" exit 1 diff --git a/awk/scheme/scheme/bin/vm.awk b/awk/scheme/scheme/bin/vm.awk index aa85280..0f0d955 100755 --- a/awk/scheme/scheme/bin/vm.awk +++ b/awk/scheme/scheme/bin/vm.awk @@ -1,39 +1,49 @@ #!/usr/bin/awk -f +# This is a stack-based virtual machine for executing compiled Scheme code +# It implements a simple instruction set with support for: +# - Basic arithmetic operations +# - Function calls and returns +# - Variable bindings and lookups +# - Cons cells and list operations + BEGIN { - # Type tags - T_NUMBER = "N" - T_BOOLEAN = "B" - T_SYMBOL = "S" - T_PAIR = "P" - T_FUNCTION = "F" - T_NIL = "NIL" - - # Registers - stack_ptr = 0 # Stack pointer - heap_ptr = 0 # Heap pointer - pc = 0 # Program counter + # Type system tags for runtime type checking + T_NUMBER = "N" # Numbers (integers) + T_BOOLEAN = "B" # Booleans (0/1) + T_SYMBOL = "S" # Symbols (identifiers) + T_PAIR = "P" # Cons cells (pairs) + T_FUNCTION = "F" # Function references + T_NIL = "NIL" # Empty list marker + + # Virtual machine registers + stack_ptr = 0 # Points to top of evaluation stack + heap_ptr = 0 # Points to next free heap location + pc = 0 # Program counter for instruction fetch - # Debug mode, 0 off, 1 on + # Debug mode toggle DEBUG = 0 - # Environment for variables - env_size = 0 + # Environment for variable bindings + env_size = 0 # Current size of environment stack - delete func_name - delete func_pc - delete func_code - func_size = 0 + # Function table for storing defined functions + delete func_name # Function names + delete func_pc # Entry points + delete func_code # Function bodies + func_size = 0 # Number of defined functions + # Call stack for function returns call_stack_ptr = 0 - # State persistence + # State persistence configuration STATE_FILE = "/tmp/scheme_vm.state" if (PERSIST) { debug("Loading state from: " STATE_FILE) if ((getline line < STATE_FILE) >= 0) { # Check if file exists and is readable do { if (line ~ /^FUNC /) { + # Parse and load function definition sub(/^FUNC /, "", line) name = line sub(/ .*$/, "", name) @@ -43,6 +53,7 @@ BEGIN { debug("Loaded function: " name) debug("Code: " code) + # Store function in function table func_name[func_size] = name func_code[func_size] = code func_size++ @@ -52,21 +63,22 @@ BEGIN { } } - # Function environments - delete func_env_names - delete func_env_vals - delete func_env_sizes + # Function environment storage + delete func_env_names # Variable names in function scope + delete func_env_vals # Variable values in function scope + delete func_env_sizes # Size of each function's environment - # Global function storage - delete FUNCTIONS # Our own function storage array + # Global function registry + delete FUNCTIONS # Maps function names to implementations - # Environment persistence + # Environment persistence configuration ENV_STATE_FILE = "/tmp/scheme_vm.env" if (PERSIST) { debug("Loading environment state from: " ENV_STATE_FILE) if ((getline line < ENV_STATE_FILE) >= 0) { do { if (line ~ /^ENV /) { + # Parse and load environment binding sub(/^ENV /, "", line) name = line sub(/ .*$/, "", name) @@ -75,6 +87,7 @@ BEGIN { debug("Loaded env var: " name " = " val) + # Store in environment env_name[env_size] = name env_val[env_size] = val env_size++ @@ -84,15 +97,17 @@ BEGIN { } } - normal_exit = 0 # Track if we exited normally via HALT + # Track if VM halted normally (vs error) + normal_exit = 0 } -# Debug output function +# Debug output helper function debug(msg) { if (DEBUG) printf("[DEBUG] %s\n", msg) > "/dev/stderr" } -# Value construction and access +# Value constructors and accessors +# Values are stored as type:value pairs for runtime type checking function makeValue(type, val) { return type ":" val } @@ -105,7 +120,7 @@ function getValue(val) { return substr(val, index(val, ":") + 1) } -# Type checking +# Type checking predicates function isNumber(val) { return getType(val) == T_NUMBER } function isBoolean(val) { return getType(val) == T_BOOLEAN } function isSymbol(val) { return getType(val) == T_SYMBOL } @@ -131,10 +146,10 @@ function peek() { return stack[stack_ptr] } -# Heap operations +# Heap operations for cons cells function allocate(val) { heap[++heap_ptr] = val - refs[heap_ptr] = 1 + refs[heap_ptr] = 1 # Reference counting (not fully implemented) debug("Allocate: " val " at " heap_ptr) return heap_ptr } @@ -153,7 +168,7 @@ function error(msg) { exit 1 } -# Arithmetic operations +# Arithmetic instruction implementations function vm_add() { if (stack_ptr < 2) error("ADD requires two operands") val2 = pop() @@ -196,7 +211,7 @@ function vm_divide() { push(makeValue(T_NUMBER, result)) } -# List operations +# List operation implementations function vm_cons() { if (stack_ptr < 2) error("CONS requires two operands") val2 = pop() @@ -245,12 +260,13 @@ function vm_less_than() { push(makeValue(T_BOOLEAN, result)) } -# Instruction execution +# Main instruction execution loop function execute(instr) { split(instr, parts, " ") op = parts[1] debug("Execute: " instr) + # Dispatch based on instruction opcode if (op == "PUSH_CONST") { push(parts[2]) } @@ -300,7 +316,7 @@ function execute(instr) { print peek() } else if (op == "HALT") { - normal_exit = 1 # Mark that we're exiting normally + normal_exit = 1 if (stack_ptr > 0) { result = peek() } @@ -335,36 +351,34 @@ function execute(instr) { } } -# Program loading +# Load program instructions { - # Store each line as an instruction program[NR-1] = $0 } -# Program execution +# Main execution loop END { - # Execute program while (pc < length(program)) { execute(program[pc++]) } - # Only save state if we didn't halt normally + # Save state if we didn't halt normally if (!normal_exit && PERSIST) { save_state() } } -# Modify vm_store to handle global variables more consistently +# Variable binding implementation function vm_store(name) { debug("Storing " peek() " as " name " at env_size: " env_size) - # If this is from a define, mark it as global - if (lookup_no_error("from_define")) { # Check if from_define is set + # Handle global definitions specially + if (lookup_no_error("from_define")) { name = "__global_" name - # Clear the flag + # Clear the define flag for (i = env_size - 1; i >= 0; i--) { if (env_name[i] == "from_define") { - env_size-- # Remove the flag + env_size-- break } } @@ -372,7 +386,7 @@ function vm_store(name) { # Remove any previous definition of this global for (i = env_size - 1; i >= 0; i--) { if (env_name[i] == name) { - # Shift everything down to remove the old definition + # Shift everything down for (j = i; j < env_size - 1; j++) { env_name[j] = env_name[j + 1] env_val[j] = env_val[j + 1] @@ -383,7 +397,7 @@ function vm_store(name) { } } - # Store in current environment frame + # Add to environment env_name[env_size] = name env_val[env_size] = peek() env_size++ @@ -391,11 +405,12 @@ function vm_store(name) { dump_env() } +# Remove top binding from environment function vm_pop_env() { if (env_size <= 0) error("Environment underflow") debug("Popping environment at size: " env_size) - # Don't pop if this is a global definition (from define) + # Don't pop globals if (env_name[env_size-1] ~ /^__global_/) { debug("Keeping global definition: " env_name[env_size-1]) return @@ -405,12 +420,12 @@ function vm_pop_env() { env_size-- } -# Modify vm_lookup to be more explicit about global lookups +# Variable lookup implementation function vm_lookup(name, i, global_name) { debug("Looking up " name " in environment of size: " env_size) dump_env() - # First try looking up with global prefix + # Try global name first, then local global_name = "__global_" name for (i = env_size - 1; i >= 0; i--) { if (env_name[i] == global_name) { @@ -427,6 +442,7 @@ function vm_lookup(name, i, global_name) { error("Undefined variable: " name) } +# Function definition implementation function vm_define_function(name, start_pc) { debug("Defining function: " name " at " start_pc) @@ -440,13 +456,14 @@ function vm_define_function(name, start_pc) { } code = code "\nRETURN" - # Store in our function array + # Store function debug("Storing function: " name " = " code) FUNCTIONS[name] = code pc = i + 1 } +# Function call implementation function vm_call_function(name, code_lines, j, saved_pc, saved_env_size, arg, param_name) { debug("Calling function: " name) @@ -454,7 +471,7 @@ function vm_call_function(name, code_lines, j, saved_pc, saved_env_size, arg, error("Undefined function: " name) } - # Get argument from stack before modifying program + # Get argument before modifying program arg = pop() debug("Function argument: " arg) @@ -464,15 +481,15 @@ function vm_call_function(name, code_lines, j, saved_pc, saved_env_size, arg, # Split function code into lines split(FUNCTIONS[name], code_lines, "\n") - # Extract parameter name from first STORE instruction + # Get parameter name from STORE instruction if (code_lines[1] ~ /^STORE /) { - param_name = substr(code_lines[1], 7) # Skip "STORE " + param_name = substr(code_lines[1], 7) debug("Found parameter name: " param_name) } else { error("Function missing parameter definition") } - # Create new environment frame with correct parameter name + # Create new environment frame debug("Creating new environment frame at size: " env_size) env_name[env_size] = param_name env_val[env_size] = arg @@ -483,7 +500,7 @@ function vm_call_function(name, code_lines, j, saved_pc, saved_env_size, arg, program[length(program)] = code_lines[j] } - # Jump to function code + # Save return info and jump to function pc = length(program) - length(code_lines) call_stack[++call_stack_ptr] = saved_pc env_stack[call_stack_ptr] = saved_env_size @@ -492,6 +509,7 @@ function vm_call_function(name, code_lines, j, saved_pc, saved_env_size, arg, dump_env() } +# Function return implementation function vm_return() { if (call_stack_ptr > 0) { # Save return value @@ -506,14 +524,14 @@ function vm_return() { # Restore program counter pc = call_stack[call_stack_ptr--] - # Push return value back + # Push return value push(ret_val) debug("Returned with value: " ret_val " and env_size: " env_size) } } -# Add debug function to dump environment in a more readable format +# Debug helper to dump environment contents function dump_env( i) { debug("Environment dump:") for (i = 0; i < env_size; i++) { @@ -521,16 +539,7 @@ function dump_env( i) { } } -# Add flag for define statements -function compile_define(args, name, params, body, param_array, nparams, i, paren_start, paren_end) { - # Set flag to mark this as a global definition - print "PUSH_CONST B:1" # Push true - print "STORE __from_define" - - # ... rest of existing compile_define function ... -} - -# Add helper function for looking up without error +# Helper for checking variable existence without error function lookup_no_error(name, i) { for (i = env_size - 1; i >= 0; i--) { if (env_name[i] == name) { @@ -540,7 +549,7 @@ function lookup_no_error(name, i) { return 0 } -# Add new function to handle state saving +# State persistence implementation function save_state() { debug("Saving state to: " STATE_FILE) for (i = 0; i < func_size; i++) { @@ -552,7 +561,7 @@ function save_state() { # Save environment state debug("Saving environment state to: " ENV_STATE_FILE) for (i = 0; i < env_size; i++) { - if (env_name[i] ~ /^__global_/) { # Only save global variables + if (env_name[i] ~ /^__global_/) { # Only save globals debug("Saving env var: " env_name[i] " = " env_val[i]) print "ENV " env_name[i] " " env_val[i] > ENV_STATE_FILE } |