about summary refs log tree commit diff stats
path: root/awk
diff options
context:
space:
mode:
authorelioat <{ID}+{username}@users.noreply.github.com>2025-02-21 10:55:06 -0500
committerelioat <{ID}+{username}@users.noreply.github.com>2025-02-21 10:55:06 -0500
commitfa4847cb7fa840f4310c8de222f7e77d40c23141 (patch)
tree7acb716c59574e132be2949bf9f08224e8ebd6ad /awk
parent465de006a8f6db21a86d1ee5a4f45413693055a5 (diff)
downloadtour-fa4847cb7fa840f4310c8de222f7e77d40c23141.tar.gz
*
Diffstat (limited to 'awk')
-rwxr-xr-xawk/scheme/scheme/bin/compiler.awk78
-rwxr-xr-xawk/scheme/scheme/bin/vm.awk153
2 files changed, 135 insertions, 96 deletions
diff --git a/awk/scheme/scheme/bin/compiler.awk b/awk/scheme/scheme/bin/compiler.awk
index 5371bb2..b7773b7 100755
--- a/awk/scheme/scheme/bin/compiler.awk
+++ b/awk/scheme/scheme/bin/compiler.awk
@@ -1,43 +1,55 @@
 #!/usr/bin/awk -f
 
+# This is a Scheme-to-Assembly compiler implemented in AWK
+# It takes Scheme expressions as input and outputs assembly instructions
+# for a custom stack-based virtual machine
+
 BEGIN {
-    # Compiler state
-    curr_token = ""
-    input_buffer = ""
-    next_label = 0
-    program = ""
+    # Compiler maintains internal state for code generation
+    curr_token = ""      # Current token being processed by lexer
+    input_buffer = ""    # Buffer for input text being tokenized
+    next_label = 0       # Counter for generating unique labels
+    program = ""         # Accumulates the full program text
     
-    # Debug mode, 0 off, 1 on
+    # Debug flag for development/troubleshooting
     DEBUG = 0
 }
 
+# Debug logging helper function
 function debug(msg) {
     if (DEBUG) printf("[DEBUG] %s\n", msg) > "/dev/stderr"
 }
 
-# Process input line - accumulate the raw input
+# Each line of input is accumulated into the program string
+# This allows handling multi-line expressions
 {
     if (program != "") program = program "\n"
     program = program $0
 }
 
+# Main compiler entry point - called after all input is read
 END {
     debug("Raw program:\n" program)
     if (program == "") exit
     
-    # Split program into expressions and compile each one
+    # Parse and compile each expression in the program
     split_expressions(program)
 }
 
+# Splits input into individual Scheme expressions
+# Handles nested parentheses and comments
 function split_expressions(prog,    current, paren_count, i, c, expr, cleaned) {
     current = ""
     paren_count = 0
     
-    # Extract expressions between parentheses
+    # Clean up the input:
+    # 1. Remove comments (text between ; and next opening paren)
+    # 2. Normalize whitespace
+    # 3. Trim leading/trailing whitespace
     cleaned = prog
     gsub(/;[^(]*\(/, "(", cleaned)  # Remove comments before expressions
     gsub(/\)[^)]*;/, ")", cleaned)  # Remove comments after expressions
-    gsub(/[ \t\n]+/, " ", cleaned)  # Normalize whitespace
+    gsub(/[ \t\n]+/, " ", cleaned)  # Normalize whitespace to single spaces
     sub(/^[ \t\n]+/, "", cleaned)   # Trim leading whitespace
     sub(/[ \t\n]+$/, "", cleaned)   # Trim trailing whitespace
     
@@ -45,6 +57,7 @@ function split_expressions(prog,    current, paren_count, i, c, expr, cleaned) {
     
     if (cleaned == "") return
     
+    # Parse expressions by tracking parenthesis nesting
     for (i = 1; i <= length(cleaned); i++) {
         c = substr(cleaned, i, 1)
         
@@ -58,7 +71,7 @@ function split_expressions(prog,    current, paren_count, i, c, expr, cleaned) {
         if (c == ")") {
             paren_count--
             if (paren_count == 0) {
-                # Found complete expression
+                # Complete expression found - compile it
                 expr = current
                 sub(/^\s+/, "", expr)
                 sub(/\s+$/, "", expr)
@@ -72,30 +85,34 @@ function split_expressions(prog,    current, paren_count, i, c, expr, cleaned) {
         }
     }
     
+    # Add final HALT instruction
     print "HALT"
 }
 
-# Lexer functions
+# Lexer helper functions for character classification
 function is_digit(c) { return c >= "0" && c <= "9" }
 function is_whitespace(c) { return c == " " || c == "\t" || c == "\n" }
 
+# Lexical analyzer - converts input into tokens
+# Handles numbers, symbols, and parentheses
 function next_token() {
-    # Initialize input_buffer if this is first call
+    # Initialize input buffer on first call
     if (input_buffer == "") input_buffer = program
     
-    # Skip whitespace
+    # Skip whitespace between tokens
     while (length(input_buffer) > 0 && is_whitespace(substr(input_buffer, 1, 1)))
         input_buffer = substr(input_buffer, 2)
     
     if (length(input_buffer) == 0) return "EOF"
     
+    # Handle parentheses as single-character tokens
     c = substr(input_buffer, 1, 1)
     if (c == "(" || c == ")") {
         input_buffer = substr(input_buffer, 2)
         return c
     }
     
-    # Handle numbers
+    # Handle numbers (including negative numbers)
     if (is_digit(c) || c == "-" && length(input_buffer) > 1 && is_digit(substr(input_buffer, 2, 1))) {
         num = ""
         while (length(input_buffer) > 0) {
@@ -107,7 +124,7 @@ function next_token() {
         return num
     }
     
-    # Handle symbols
+    # Handle symbols (identifiers and operators)
     sym = ""
     while (length(input_buffer) > 0) {
         c = substr(input_buffer, 1, 1)
@@ -118,7 +135,8 @@ function next_token() {
     return sym
 }
 
-# Parser functions
+# Recursive descent parser for Scheme expressions
+# Returns parsed expression as a string
 function parse_expr(    token, result) {
     token = next_token()
     if (token == "EOF") return ""
@@ -133,6 +151,7 @@ function parse_expr(    token, result) {
     return token
 }
 
+# Parses a list expression (anything in parentheses)
 function parse_list(    result, expr) {
     result = ""
     
@@ -148,7 +167,8 @@ function parse_list(    result, expr) {
     return "(" result ")"
 }
 
-# Split expression into operator and arguments
+# Splits an expression into operator and arguments
+# Handles nested expressions correctly
 function split_expr(expr,    i, len, c, op, args, paren_count) {
     len = length(expr)
     paren_count = 0
@@ -173,7 +193,7 @@ function split_expr(expr,    i, len, c, op, args, paren_count) {
     return op SUBSEP args
 }
 
-# Split arguments handling nested parentheses
+# Splits argument list handling nested parentheses
 function split_args(args, arg_array,    len, i, c, current, paren_count, arg_count) {
     len = length(args)
     current = ""
@@ -201,21 +221,23 @@ function split_args(args, arg_array,    len, i, c, current, paren_count, arg_cou
     return arg_count
 }
 
-# Code generation functions
+# Code generation for numeric literals
 function compile_number(num) {
     debug("Compiling number: " num)
     print "PUSH_CONST N:" num
 }
 
+# Code generation for primitive operations (+, -, *, cons, etc)
 function compile_primitive_call(op, args,    arg_array, nargs, i) {
     debug("Primitive call: op=" op " args=" args)
     nargs = split_args(args, arg_array)
     
-    # Compile arguments for all operations
+    # First compile all arguments
     for (i = 1; i <= nargs; i++) {
         compile_expr(arg_array[i])
     }
     
+    # Then emit appropriate operation
     if (op == "+") {
         for (i = 1; i < nargs; i++)
             print "ADD"
@@ -253,12 +275,13 @@ function compile_primitive_call(op, args,    arg_array, nargs, i) {
         print "EQ"
     }
     else {
-        # Function call
+        # Function call for user-defined functions
         debug("Function call: " op)
         print "CALL " op
     }
 }
 
+# Splits let bindings into individual variable/value pairs
 function split_bindings(bindings, binding_array,    count, current, paren_count, i, c) {
     count = 0
     current = ""
@@ -267,7 +290,7 @@ function split_bindings(bindings, binding_array,    count, current, paren_count,
     for (i = 1; i <= length(bindings); i++) {
         c = substr(bindings, i, 1)
         
-        # Track parentheses
+        # Track nested parentheses
         if (c == "(") {
             paren_count++
             if (paren_count == 1) {
@@ -294,6 +317,7 @@ function split_bindings(bindings, binding_array,    count, current, paren_count,
     return count
 }
 
+# Compiles let expressions (local variable bindings)
 function compile_let(args,    bindings, body, binding_array, nbindings, i, var, val, binding_parts) {
     # Split into bindings and body
     if (substr(args, 1, 1) != "(") error("Malformed let expression")
@@ -344,6 +368,7 @@ function compile_let(args,    bindings, body, binding_array, nbindings, i, var,
     }
 }
 
+# Compiles define expressions (function/variable definitions)
 function compile_define(args,    name, params, body, param_array, nparams, i, paren_start, paren_end) {
     # Set flag for global definition
     print "PUSH_CONST B:1"
@@ -395,25 +420,29 @@ function compile_define(args,    name, params, body, param_array, nparams, i, pa
     }
 }
 
+# Main expression compiler - dispatches based on expression type
 function compile_expr(expr,    split_result, op, args) {
     debug("Compiling expression: " expr)
     
+    # Handle numeric literals
     if (expr ~ /^-?[0-9]+$/) {
         compile_number(expr)
         return
     }
     
+    # Handle nil constant
     if (expr == "nil") {
         print "PUSH_CONST NIL:"
         return
     }
     
-    # Add variable lookup
+    # Handle variable lookup
     if (expr ~ /^[a-zA-Z_][a-zA-Z0-9_]*$/) {
         print "LOOKUP " expr
         return
     }
     
+    # Handle compound expressions (lists)
     if (substr(expr, 1, 1) == "(") {
         expr = substr(expr, 2, length(expr) - 2)
         split_result = split_expr(expr)
@@ -433,6 +462,7 @@ function compile_expr(expr,    split_result, op, args) {
     error("Unknown expression type: " expr)
 }
 
+# Error reporting helper
 function error(msg) {
     print "Error: " msg > "/dev/stderr"
     exit 1
diff --git a/awk/scheme/scheme/bin/vm.awk b/awk/scheme/scheme/bin/vm.awk
index aa85280..0f0d955 100755
--- a/awk/scheme/scheme/bin/vm.awk
+++ b/awk/scheme/scheme/bin/vm.awk
@@ -1,39 +1,49 @@
 #!/usr/bin/awk -f
 
+# This is a stack-based virtual machine for executing compiled Scheme code
+# It implements a simple instruction set with support for:
+# - Basic arithmetic operations
+# - Function calls and returns
+# - Variable bindings and lookups
+# - Cons cells and list operations
+
 BEGIN {
-    # Type tags
-    T_NUMBER = "N"
-    T_BOOLEAN = "B"
-    T_SYMBOL = "S"
-    T_PAIR = "P"
-    T_FUNCTION = "F"
-    T_NIL = "NIL"
-
-    # Registers
-    stack_ptr = 0    # Stack pointer
-    heap_ptr = 0     # Heap pointer
-    pc = 0           # Program counter
+    # Type system tags for runtime type checking
+    T_NUMBER = "N"    # Numbers (integers)
+    T_BOOLEAN = "B"   # Booleans (0/1)
+    T_SYMBOL = "S"    # Symbols (identifiers)
+    T_PAIR = "P"      # Cons cells (pairs)
+    T_FUNCTION = "F"  # Function references
+    T_NIL = "NIL"     # Empty list marker
+
+    # Virtual machine registers
+    stack_ptr = 0     # Points to top of evaluation stack
+    heap_ptr = 0      # Points to next free heap location
+    pc = 0           # Program counter for instruction fetch
     
-    # Debug mode, 0 off, 1 on
+    # Debug mode toggle
     DEBUG = 0
 
-    # Environment for variables
-    env_size = 0
+    # Environment for variable bindings
+    env_size = 0     # Current size of environment stack
     
-    delete func_name
-    delete func_pc
-    delete func_code
-    func_size = 0
+    # Function table for storing defined functions
+    delete func_name  # Function names
+    delete func_pc    # Entry points
+    delete func_code  # Function bodies
+    func_size = 0    # Number of defined functions
     
+    # Call stack for function returns
     call_stack_ptr = 0
 
-    # State persistence
+    # State persistence configuration
     STATE_FILE = "/tmp/scheme_vm.state"
     if (PERSIST) {
         debug("Loading state from: " STATE_FILE)
         if ((getline line < STATE_FILE) >= 0) {  # Check if file exists and is readable
             do {
                 if (line ~ /^FUNC /) {
+                    # Parse and load function definition
                     sub(/^FUNC /, "", line)
                     name = line
                     sub(/ .*$/, "", name)
@@ -43,6 +53,7 @@ BEGIN {
                     debug("Loaded function: " name)
                     debug("Code: " code)
                     
+                    # Store function in function table
                     func_name[func_size] = name
                     func_code[func_size] = code
                     func_size++
@@ -52,21 +63,22 @@ BEGIN {
         }
     }
 
-    # Function environments
-    delete func_env_names
-    delete func_env_vals
-    delete func_env_sizes
+    # Function environment storage
+    delete func_env_names  # Variable names in function scope
+    delete func_env_vals   # Variable values in function scope
+    delete func_env_sizes  # Size of each function's environment
 
-    # Global function storage
-    delete FUNCTIONS  # Our own function storage array
+    # Global function registry
+    delete FUNCTIONS       # Maps function names to implementations
 
-    # Environment persistence
+    # Environment persistence configuration
     ENV_STATE_FILE = "/tmp/scheme_vm.env"
     if (PERSIST) {
         debug("Loading environment state from: " ENV_STATE_FILE)
         if ((getline line < ENV_STATE_FILE) >= 0) {
             do {
                 if (line ~ /^ENV /) {
+                    # Parse and load environment binding
                     sub(/^ENV /, "", line)
                     name = line
                     sub(/ .*$/, "", name)
@@ -75,6 +87,7 @@ BEGIN {
                     
                     debug("Loaded env var: " name " = " val)
                     
+                    # Store in environment
                     env_name[env_size] = name
                     env_val[env_size] = val
                     env_size++
@@ -84,15 +97,17 @@ BEGIN {
         }
     }
 
-    normal_exit = 0  # Track if we exited normally via HALT
+    # Track if VM halted normally (vs error)
+    normal_exit = 0
 }
 
-# Debug output function
+# Debug output helper
 function debug(msg) {
     if (DEBUG) printf("[DEBUG] %s\n", msg) > "/dev/stderr"
 }
 
-# Value construction and access
+# Value constructors and accessors
+# Values are stored as type:value pairs for runtime type checking
 function makeValue(type, val) {
     return type ":" val
 }
@@ -105,7 +120,7 @@ function getValue(val) {
     return substr(val, index(val, ":") + 1)
 }
 
-# Type checking
+# Type checking predicates
 function isNumber(val) { return getType(val) == T_NUMBER }
 function isBoolean(val) { return getType(val) == T_BOOLEAN }
 function isSymbol(val) { return getType(val) == T_SYMBOL }
@@ -131,10 +146,10 @@ function peek() {
     return stack[stack_ptr]
 }
 
-# Heap operations
+# Heap operations for cons cells
 function allocate(val) {
     heap[++heap_ptr] = val
-    refs[heap_ptr] = 1
+    refs[heap_ptr] = 1  # Reference counting (not fully implemented)
     debug("Allocate: " val " at " heap_ptr)
     return heap_ptr
 }
@@ -153,7 +168,7 @@ function error(msg) {
     exit 1
 }
 
-# Arithmetic operations
+# Arithmetic instruction implementations
 function vm_add() {
     if (stack_ptr < 2) error("ADD requires two operands")
     val2 = pop()
@@ -196,7 +211,7 @@ function vm_divide() {
     push(makeValue(T_NUMBER, result))
 }
 
-# List operations
+# List operation implementations
 function vm_cons() {
     if (stack_ptr < 2) error("CONS requires two operands")
     val2 = pop()
@@ -245,12 +260,13 @@ function vm_less_than() {
     push(makeValue(T_BOOLEAN, result))
 }
 
-# Instruction execution
+# Main instruction execution loop
 function execute(instr) {
     split(instr, parts, " ")
     op = parts[1]
     debug("Execute: " instr)
     
+    # Dispatch based on instruction opcode
     if (op == "PUSH_CONST") {
         push(parts[2])
     }
@@ -300,7 +316,7 @@ function execute(instr) {
         print peek()
     }
     else if (op == "HALT") {
-        normal_exit = 1  # Mark that we're exiting normally
+        normal_exit = 1
         if (stack_ptr > 0) {
             result = peek()
         }
@@ -335,36 +351,34 @@ function execute(instr) {
     }
 }
 
-# Program loading
+# Load program instructions
 {
-    # Store each line as an instruction
     program[NR-1] = $0
 }
 
-# Program execution
+# Main execution loop
 END {
-    # Execute program
     while (pc < length(program)) {
         execute(program[pc++])
     }
     
-    # Only save state if we didn't halt normally
+    # Save state if we didn't halt normally
     if (!normal_exit && PERSIST) {
         save_state()
     }
 }
 
-# Modify vm_store to handle global variables more consistently
+# Variable binding implementation
 function vm_store(name) {
     debug("Storing " peek() " as " name " at env_size: " env_size)
     
-    # If this is from a define, mark it as global
-    if (lookup_no_error("from_define")) {  # Check if from_define is set
+    # Handle global definitions specially
+    if (lookup_no_error("from_define")) {
         name = "__global_" name
-        # Clear the flag
+        # Clear the define flag
         for (i = env_size - 1; i >= 0; i--) {
             if (env_name[i] == "from_define") {
-                env_size--  # Remove the flag
+                env_size--
                 break
             }
         }
@@ -372,7 +386,7 @@ function vm_store(name) {
         # Remove any previous definition of this global
         for (i = env_size - 1; i >= 0; i--) {
             if (env_name[i] == name) {
-                # Shift everything down to remove the old definition
+                # Shift everything down
                 for (j = i; j < env_size - 1; j++) {
                     env_name[j] = env_name[j + 1]
                     env_val[j] = env_val[j + 1]
@@ -383,7 +397,7 @@ function vm_store(name) {
         }
     }
     
-    # Store in current environment frame
+    # Add to environment
     env_name[env_size] = name
     env_val[env_size] = peek()
     env_size++
@@ -391,11 +405,12 @@ function vm_store(name) {
     dump_env()
 }
 
+# Remove top binding from environment
 function vm_pop_env() {
     if (env_size <= 0) error("Environment underflow")
     debug("Popping environment at size: " env_size)
     
-    # Don't pop if this is a global definition (from define)
+    # Don't pop globals
     if (env_name[env_size-1] ~ /^__global_/) {
         debug("Keeping global definition: " env_name[env_size-1])
         return
@@ -405,12 +420,12 @@ function vm_pop_env() {
     env_size--
 }
 
-# Modify vm_lookup to be more explicit about global lookups
+# Variable lookup implementation
 function vm_lookup(name,    i, global_name) {
     debug("Looking up " name " in environment of size: " env_size)
     dump_env()
     
-    # First try looking up with global prefix
+    # Try global name first, then local
     global_name = "__global_" name
     for (i = env_size - 1; i >= 0; i--) {
         if (env_name[i] == global_name) {
@@ -427,6 +442,7 @@ function vm_lookup(name,    i, global_name) {
     error("Undefined variable: " name)
 }
 
+# Function definition implementation
 function vm_define_function(name, start_pc) {
     debug("Defining function: " name " at " start_pc)
     
@@ -440,13 +456,14 @@ function vm_define_function(name, start_pc) {
     }
     code = code "\nRETURN"
     
-    # Store in our function array
+    # Store function
     debug("Storing function: " name " = " code)
     FUNCTIONS[name] = code
     
     pc = i + 1
 }
 
+# Function call implementation
 function vm_call_function(name,    code_lines, j, saved_pc, saved_env_size, arg, param_name) {
     debug("Calling function: " name)
     
@@ -454,7 +471,7 @@ function vm_call_function(name,    code_lines, j, saved_pc, saved_env_size, arg,
         error("Undefined function: " name)
     }
     
-    # Get argument from stack before modifying program
+    # Get argument before modifying program
     arg = pop()
     debug("Function argument: " arg)
     
@@ -464,15 +481,15 @@ function vm_call_function(name,    code_lines, j, saved_pc, saved_env_size, arg,
     # Split function code into lines
     split(FUNCTIONS[name], code_lines, "\n")
     
-    # Extract parameter name from first STORE instruction
+    # Get parameter name from STORE instruction
     if (code_lines[1] ~ /^STORE /) {
-        param_name = substr(code_lines[1], 7)  # Skip "STORE "
+        param_name = substr(code_lines[1], 7)
         debug("Found parameter name: " param_name)
     } else {
         error("Function missing parameter definition")
     }
     
-    # Create new environment frame with correct parameter name
+    # Create new environment frame
     debug("Creating new environment frame at size: " env_size)
     env_name[env_size] = param_name
     env_val[env_size] = arg
@@ -483,7 +500,7 @@ function vm_call_function(name,    code_lines, j, saved_pc, saved_env_size, arg,
         program[length(program)] = code_lines[j]
     }
     
-    # Jump to function code
+    # Save return info and jump to function
     pc = length(program) - length(code_lines)
     call_stack[++call_stack_ptr] = saved_pc
     env_stack[call_stack_ptr] = saved_env_size
@@ -492,6 +509,7 @@ function vm_call_function(name,    code_lines, j, saved_pc, saved_env_size, arg,
     dump_env()
 }
 
+# Function return implementation
 function vm_return() {
     if (call_stack_ptr > 0) {
         # Save return value
@@ -506,14 +524,14 @@ function vm_return() {
         # Restore program counter
         pc = call_stack[call_stack_ptr--]
         
-        # Push return value back
+        # Push return value
         push(ret_val)
         
         debug("Returned with value: " ret_val " and env_size: " env_size)
     }
 }
 
-# Add debug function to dump environment in a more readable format
+# Debug helper to dump environment contents
 function dump_env(    i) {
     debug("Environment dump:")
     for (i = 0; i < env_size; i++) {
@@ -521,16 +539,7 @@ function dump_env(    i) {
     }
 }
 
-# Add flag for define statements
-function compile_define(args,    name, params, body, param_array, nparams, i, paren_start, paren_end) {
-    # Set flag to mark this as a global definition
-    print "PUSH_CONST B:1"  # Push true
-    print "STORE __from_define"
-    
-    # ... rest of existing compile_define function ...
-}
-
-# Add helper function for looking up without error
+# Helper for checking variable existence without error
 function lookup_no_error(name,    i) {
     for (i = env_size - 1; i >= 0; i--) {
         if (env_name[i] == name) {
@@ -540,7 +549,7 @@ function lookup_no_error(name,    i) {
     return 0
 }
 
-# Add new function to handle state saving
+# State persistence implementation
 function save_state() {
     debug("Saving state to: " STATE_FILE)
     for (i = 0; i < func_size; i++) {
@@ -552,7 +561,7 @@ function save_state() {
     # Save environment state
     debug("Saving environment state to: " ENV_STATE_FILE)
     for (i = 0; i < env_size; i++) {
-        if (env_name[i] ~ /^__global_/) {  # Only save global variables
+        if (env_name[i] ~ /^__global_/) {  # Only save globals
             debug("Saving env var: " env_name[i] " = " env_val[i])
             print "ENV " env_name[i] " " env_val[i] > ENV_STATE_FILE
         }