about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorelioat <elioat@tilde.institute>2025-01-20 10:51:32 -0500
committerelioat <elioat@tilde.institute>2025-01-20 10:51:32 -0500
commit1dda44054a253e027a2cb6f8d33ad56dac6bda2b (patch)
tree709e51bddb907b02446084884078bcd133cdeaf7
parent9563293d0521e5b5d3306e3bb3f512b5e4765d6b (diff)
downloadtour-1dda44054a253e027a2cb6f8d33ad56dac6bda2b.tar.gz
*
-rwxr-xr-xawk/scheme/scheme/bin/compiler.awk226
-rwxr-xr-xawk/scheme/scheme/bin/repl31
-rwxr-xr-xawk/scheme/scheme/bin/vm.awk202
-rw-r--r--awk/scheme/scheme/examples/function-test.scm2
-rw-r--r--awk/scheme/scheme/examples/let-and-def.scm9
-rw-r--r--awk/scheme/scheme/examples/simple.scm3
6 files changed, 436 insertions, 37 deletions
diff --git a/awk/scheme/scheme/bin/compiler.awk b/awk/scheme/scheme/bin/compiler.awk
index d4d0ff0..6a57f19 100755
--- a/awk/scheme/scheme/bin/compiler.awk
+++ b/awk/scheme/scheme/bin/compiler.awk
@@ -8,40 +8,71 @@ BEGIN {
     program = ""
     
     # Debug mode
-    DEBUG = 1
+    DEBUG = 0
 }
 
 function debug(msg) {
-    if (DEBUG) print "[DEBUG] " msg > "/dev/stderr"
+    if (DEBUG) printf("[DEBUG] %s\n", msg) > "/dev/stderr"
 }
 
-# Clean line by removing comments
-function clean_line(line) {
-    # Remove everything after and including semicolon
-    sub(/;.*$/, "", line)
-    # Trim whitespace
-    sub(/^[ \t]+/, "", line)
-    sub(/[ \t]+$/, "", line)
-    return line
-}
-
-# Process input line
+# Process input line - just accumulate the raw input
 {
-    line = clean_line($0)
-    if (line != "") {
-        if (program != "") program = program " "
-        program = program line
-    }
+    if (program != "") program = program "\n"
+    program = program $0
 }
 
 END {
-    debug("Input program after cleaning: " program)
+    debug("Raw program:\n" program)
     if (program == "") exit
     
-    # Parse and compile the program
-    expr = parse_expr()
-    debug("Parsed expression: " expr)
-    compile_expr(expr)
+    # Split program into expressions and compile each one
+    split_expressions(program)
+}
+
+# New function to handle multiple expressions
+function split_expressions(prog,    current, paren_count, i, c, expr, cleaned) {
+    current = ""
+    paren_count = 0
+    
+    # Extract expressions between parentheses
+    cleaned = prog
+    gsub(/;[^(]*\(/, "(", cleaned)  # Remove comments before expressions
+    gsub(/\)[^)]*;/, ")", cleaned)  # Remove comments after expressions
+    gsub(/[ \t\n]+/, " ", cleaned)  # Normalize whitespace
+    sub(/^[ \t\n]+/, "", cleaned)   # Trim leading whitespace
+    sub(/[ \t\n]+$/, "", cleaned)   # Trim trailing whitespace
+    
+    debug("Cleaned program: [" cleaned "]")
+    
+    if (cleaned == "") return
+    
+    for (i = 1; i <= length(cleaned); i++) {
+        c = substr(cleaned, i, 1)
+        
+        if (c == "(") {
+            if (paren_count == 0) current = ""
+            paren_count++
+        }
+        
+        current = current c
+        
+        if (c == ")") {
+            paren_count--
+            if (paren_count == 0) {
+                # Found complete expression
+                expr = current
+                sub(/^\s+/, "", expr)
+                sub(/\s+$/, "", expr)
+                
+                debug("Processing expression: [" expr "]")
+                program = expr  # Set for parser
+                expr = parse_expr()
+                compile_expr(expr)
+                current = ""
+            }
+        }
+    }
+    
     print "HALT"
 }
 
@@ -181,7 +212,7 @@ function compile_primitive_call(op, args,    arg_array, nargs, i) {
     debug("Primitive call: op=" op " args=" args)
     nargs = split_args(args, arg_array)
     
-    # Compile each argument
+    # Compile arguments for all operations
     for (i = 1; i <= nargs; i++) {
         compile_expr(arg_array[i])
     }
@@ -223,10 +254,139 @@ function compile_primitive_call(op, args,    arg_array, nargs, i) {
         print "EQ"
     }
     else {
-        error("Unknown operator: " op)
+        # Function call
+        debug("Function call: " op)
+        print "CALL " op
+    }
+}
+
+function split_bindings(bindings, binding_array,    count, current, paren_count, i, c) {
+    count = 0
+    current = ""
+    paren_count = 0
+    
+    for (i = 1; i <= length(bindings); i++) {
+        c = substr(bindings, i, 1)
+        
+        # Track parentheses
+        if (c == "(") {
+            paren_count++
+            if (paren_count == 1) {
+                current = ""  # Start new binding
+                continue
+            }
+        }
+        if (c == ")") {
+            paren_count--
+            if (paren_count == 0) {
+                # End of binding
+                binding_array[++count] = current
+                current = ""
+                continue
+            }
+        }
+        
+        # Only add character if we're inside a binding
+        if (paren_count > 0) {
+            current = current c
+        }
+    }
+    
+    return count
+}
+
+function compile_let(args,    bindings, body, binding_array, nbindings, i, var, val, binding_parts) {
+    # Split into bindings and body
+    if (substr(args, 1, 1) != "(") error("Malformed let expression")
+    
+    # Find matching closing parenthesis for bindings
+    paren_count = 1
+    i = 2
+    while (paren_count > 0 && i <= length(args)) {
+        if (substr(args, i, 1) == "(") paren_count++
+        if (substr(args, i, 1) == ")") paren_count--
+        i++
+    }
+    if (paren_count > 0) error("Unmatched parenthesis in let bindings")
+    
+    bindings = substr(args, 2, i - 3)  # Remove outer parentheses
+    body = substr(args, i)
+    
+    # Trim whitespace from body
+    sub(/^[ \t\n]+/, "", body)
+    sub(/[ \t\n]+$/, "", body)
+    
+    debug("Let bindings: " bindings)
+    debug("Let body: " body)
+    
+    # Compile each binding
+    nbindings = split_bindings(bindings, binding_array)
+    for (i = 1; i <= nbindings; i++) {
+        debug("Processing binding: " binding_array[i])
+        split(binding_array[i], binding_parts, " ")
+        var = binding_parts[1]
+        val = binding_parts[2]
+        
+        debug("Binding var: " var " val: " val)
+        
+        # Compile the value
+        compile_expr(val)
+        
+        # Store in environment
+        print "STORE " var
+    }
+    
+    # Compile the body
+    compile_expr(body)
+    
+    # Clean up bindings AFTER evaluating body
+    for (i = nbindings; i >= 1; i--) {
+        print "POP_ENV"
     }
 }
 
+function compile_define(args,    name, params, body, param_array, nparams, i, paren_start, paren_end) {
+    # Find the function name (everything up to the first space)
+    i = index(args, " ")
+    if (i == 0) error("Malformed define expression")
+    name = substr(args, 1, i - 1)
+    args = substr(args, i + 1)
+    
+    # Find parameter list
+    if (substr(args, 1, 1) != "(") error("Missing parameter list in define")
+    
+    # Find matching closing parenthesis for params
+    paren_count = 1
+    i = 2
+    while (paren_count > 0 && i <= length(args)) {
+        if (substr(args, i, 1) == "(") paren_count++
+        if (substr(args, i, 1) == ")") paren_count--
+        i++
+    }
+    if (paren_count > 0) error("Unmatched parenthesis in parameter list")
+    
+    params = substr(args, 2, i - 3)  # Remove parentheses
+    body = substr(args, i + 1)
+    
+    # Create function label
+    print "LABEL " name
+    
+    # Process parameters
+    nparams = split(params, param_array, " ")
+    for (i = 1; i <= nparams; i++) {
+        print "STORE " param_array[i]
+    }
+    
+    # Compile function body
+    compile_expr(body)
+    
+    # Clean up parameters and return
+    for (i = nparams; i >= 1; i--) {
+        print "POP_ENV"
+    }
+    print "RETURN"
+}
+
 function compile_expr(expr,    split_result, op, args) {
     debug("Compiling expression: " expr)
     
@@ -240,15 +400,25 @@ function compile_expr(expr,    split_result, op, args) {
         return
     }
     
+    # Add variable lookup
+    if (expr ~ /^[a-zA-Z_][a-zA-Z0-9_]*$/) {
+        print "LOOKUP " expr
+        return
+    }
+    
     if (substr(expr, 1, 1) == "(") {
-        # Strip outer parentheses
         expr = substr(expr, 2, length(expr) - 2)
         split_result = split_expr(expr)
         op = substr(split_result, 1, index(split_result, SUBSEP) - 1)
         args = substr(split_result, index(split_result, SUBSEP) + 1)
         
-        debug("Split expression: op=" op " args=" args)
-        compile_primitive_call(op, args)
+        if (op == "define") {
+            compile_define(args)
+        } else if (op == "let") {
+            compile_let(args)
+        } else {
+            compile_primitive_call(op, args)
+        }
         return
     }
     
diff --git a/awk/scheme/scheme/bin/repl b/awk/scheme/scheme/bin/repl
index b865aa2..7649a64 100755
--- a/awk/scheme/scheme/bin/repl
+++ b/awk/scheme/scheme/bin/repl
@@ -27,19 +27,24 @@ for component in "$COMPILER" "$VM"; do
     chmod +x "$component"
 done
 
-# Create temporary directory for our work
+# Set up temporary files and state
 TMPDIR=$(mktemp -d)
 debug "Created temp dir: $TMPDIR"
+STATE_FILE="/tmp/scheme_vm.state"
 
 cleanup() {
     debug "Cleaning up temp dir: $TMPDIR"
     rm -rf "$TMPDIR"
+    if [ "$1" != "keep_state" ]; then
+        rm -f "$STATE_FILE"
+    fi
 }
-trap cleanup EXIT
+trap "cleanup" EXIT
 
 # Set up temporary files
 INPUT_FILE="$TMPDIR/input.scm"
 ASM_FILE="$TMPDIR/output.asm"
+DEBUG_FILE="$TMPDIR/debug.out"
 
 # Function to handle evaluation
 evaluate_expression() {
@@ -53,15 +58,29 @@ evaluate_expression() {
     
     debug "Evaluating expression: $input"
     echo "$input" > "$INPUT_FILE"
+    debug "Input file contents:"
+    cat "$INPUT_FILE" >&2
     
-    if awk -f "$COMPILER" "$INPUT_FILE" > "$ASM_FILE" 2>/dev/null; then
-        result=$(awk -f "$VM" "$ASM_FILE")
+    # Show compilation output even if it fails
+    debug "Running compiler..."
+    if awk -f "$COMPILER" "$INPUT_FILE" > "$ASM_FILE" 2> "$DEBUG_FILE"; then
+        debug "Compilation successful. Debug output:"
+        cat "$DEBUG_FILE" >&2
+        debug "Generated assembly:"
+        cat "$ASM_FILE" >&2
+        
+        debug "Running VM..."
+        # Use persistent VM state
+        result=$(awk -v PERSIST=1 -f "$VM" "$ASM_FILE" 2>&1)
+        debug "VM output: $result"
         if [ -n "$result" ]; then
             echo "$result"
         fi
         return 0
     else
         echo "Compilation error" >&2
+        debug "Compiler output:"
+        cat "$DEBUG_FILE" >&2
         return 1
     fi
 }
@@ -72,9 +91,11 @@ if [ "$#" -gt 0 ]; then
         echo "Error: File not found: $1" >&2
         exit 1
     fi
-    # Read entire file content at once
+    debug "Reading file: $1"
     file_content=$(cat "$1" | tr '\n' ' ')
+    debug "File content: $file_content"
     evaluate_expression "$file_content"
+    cleanup "keep_state"  # Keep state after file execution
     exit 0
 fi
 
diff --git a/awk/scheme/scheme/bin/vm.awk b/awk/scheme/scheme/bin/vm.awk
index ce81bbe..41189be 100755
--- a/awk/scheme/scheme/bin/vm.awk
+++ b/awk/scheme/scheme/bin/vm.awk
@@ -17,13 +17,56 @@ BEGIN {
     
     # Debug mode
     DEBUG = 0
+
+    # Environment for variables
+    env_size = 0
+    
+    # Function table (make it persistent)
+    delete func_name
+    delete func_pc
+    delete func_code
+    func_size = 0
+    
+    # Call stack
+    call_stack_ptr = 0
+
+    # State persistence
+    STATE_FILE = "/tmp/scheme_vm.state"
+    if (PERSIST) {
+        debug("Loading state from: " STATE_FILE)
+        if ((getline line < STATE_FILE) >= 0) {  # Check if file exists and is readable
+            do {
+                if (line ~ /^FUNC /) {
+                    sub(/^FUNC /, "", line)
+                    name = line
+                    sub(/ .*$/, "", name)
+                    code = line
+                    sub(/^[^ ]+ /, "", code)
+                    
+                    debug("Loaded function: " name)
+                    debug("Code: " code)
+                    
+                    func_name[func_size] = name
+                    func_code[func_size] = code
+                    func_size++
+                }
+            } while ((getline line < STATE_FILE) > 0)
+            close(STATE_FILE)
+        }
+    }
+
+    # Function environments
+    delete func_env_names
+    delete func_env_vals
+    delete func_env_sizes
+
+    # Global function storage
+    delete FUNCTIONS  # Our own function storage array
 }
 
 # Debug output function
 function debug(msg) {
-    if (DEBUG) {
-        printf("[DEBUG] %s\n", msg) > "/dev/stderr"
-    }
+    if (DEBUG) printf("[DEBUG] %s\n", msg) > "/dev/stderr"
 }
 
 # Value construction and access
@@ -239,6 +282,24 @@ function execute(instr) {
         }
         exit(0)
     }
+    else if (op == "STORE") {
+        vm_store(parts[2])
+    }
+    else if (op == "POP_ENV") {
+        vm_pop_env()
+    }
+    else if (op == "LOOKUP") {
+        vm_lookup(parts[2])
+    }
+    else if (op == "LABEL") {
+        vm_define_function(parts[2], pc)
+    }
+    else if (op == "CALL") {
+        vm_call_function(parts[2])
+    }
+    else if (op == "RETURN") {
+        vm_return()
+    }
     else {
         error("Unknown instruction: " op)
     }
@@ -261,4 +322,139 @@ END {
     if (stack_ptr > 0) {
         print peek()
     }
+
+    # Save state if persistence is enabled
+    if (PERSIST) {
+        debug("Saving state to: " STATE_FILE)
+        for (i = 0; i < func_size; i++) {
+            debug("Saving function: " func_name[i])
+            print "FUNC " func_name[i] " " func_code[i] > STATE_FILE
+        }
+        close(STATE_FILE)
+    }
+}
+
+# Add new VM operations
+function vm_store(name) {
+    debug("Storing " peek() " as " name " at env_size: " env_size)
+    # Store in current environment frame
+    env_name[env_size] = name
+    env_val[env_size] = peek()
+    env_size++
+    debug("Environment after store:")
+    dump_env()
+}
+
+function vm_pop_env() {
+    if (env_size <= 0) error("Environment underflow")
+    debug("Popping environment at size: " env_size)
+    debug("Removing: " env_name[env_size-1] " = " env_val[env_size-1])
+    env_size--
+}
+
+function vm_lookup(name,    i) {
+    debug("Looking up " name " in environment of size: " env_size)
+    dump_env()
+    for (i = env_size - 1; i >= 0; i--) {
+        if (env_name[i] == name) {
+            debug("Found " name " = " env_val[i] " at position " i)
+            push(env_val[i])
+            return
+        }
+    }
+    error("Undefined variable: " name)
+}
+
+function vm_define_function(name, start_pc) {
+    debug("Defining function: " name " at " start_pc)
+    
+    # Build function code
+    code = ""
+    i = start_pc
+    while (i < length(program) && program[i] != "RETURN") {
+        if (code != "") code = code "\n"
+        code = code program[i]
+        i++
+    }
+    code = code "\nRETURN"
+    
+    # Store in our function array
+    debug("Storing function: " name " = " code)
+    FUNCTIONS[name] = code
+    
+    pc = i + 1
+}
+
+function vm_call_function(name,    code_lines, j, saved_pc, saved_env_size, arg, param_name) {
+    debug("Calling function: " name)
+    
+    if (!(name in FUNCTIONS)) {
+        error("Undefined function: " name)
+    }
+    
+    # Get argument from stack before modifying program
+    arg = pop()
+    debug("Function argument: " arg)
+    
+    saved_pc = pc
+    saved_env_size = env_size
+    
+    # Split function code into lines
+    split(FUNCTIONS[name], code_lines, "\n")
+    
+    # Extract parameter name from first STORE instruction
+    if (code_lines[1] ~ /^STORE /) {
+        param_name = substr(code_lines[1], 7)  # Skip "STORE "
+        debug("Found parameter name: " param_name)
+    } else {
+        error("Function missing parameter definition")
+    }
+    
+    # Create new environment frame with correct parameter name
+    debug("Creating new environment frame at size: " env_size)
+    env_name[env_size] = param_name
+    env_val[env_size] = arg
+    env_size++
+    
+    # Add function code to program
+    for (j in code_lines) {
+        program[length(program)] = code_lines[j]
+    }
+    
+    # Jump to function code
+    pc = length(program) - length(code_lines)
+    call_stack[++call_stack_ptr] = saved_pc
+    env_stack[call_stack_ptr] = saved_env_size
+    
+    debug("Function found, jumping to PC: " pc " with env_size: " saved_env_size)
+    dump_env()
+}
+
+function vm_return() {
+    if (call_stack_ptr > 0) {
+        # Save return value
+        ret_val = pop()
+        
+        # Restore environment
+        while (env_size > env_stack[call_stack_ptr]) {
+            debug("Popping environment at size: " env_size)
+            vm_pop_env()
+        }
+        
+        # Restore program counter
+        pc = call_stack[call_stack_ptr--]
+        
+        # Push return value back
+        push(ret_val)
+        
+        debug("Returned with value: " ret_val " and env_size: " env_size)
+    }
+}
+
+# New helper function to dump environment state
+function dump_env(    i) {
+    debug("Environment dump:")
+    for (i = 0; i < env_size; i++) {
+        debug("  " i ": " env_name[i] " = " env_val[i])
+    }
 }
\ No newline at end of file
diff --git a/awk/scheme/scheme/examples/function-test.scm b/awk/scheme/scheme/examples/function-test.scm
new file mode 100644
index 0000000..ec66b04
--- /dev/null
+++ b/awk/scheme/scheme/examples/function-test.scm
@@ -0,0 +1,2 @@
+(define add2 (x) (+ x 2))
+(add2 40) 
\ No newline at end of file
diff --git a/awk/scheme/scheme/examples/let-and-def.scm b/awk/scheme/scheme/examples/let-and-def.scm
new file mode 100644
index 0000000..fade30b
--- /dev/null
+++ b/awk/scheme/scheme/examples/let-and-def.scm
@@ -0,0 +1,9 @@
+; Let expression example
+(let ((x 5) (y 3))
+  (+ x y))
+
+; Function definition example
+(define add2 (x)
+  (+ x 2))
+
+(add2 40)  ; Returns 42
\ No newline at end of file
diff --git a/awk/scheme/scheme/examples/simple.scm b/awk/scheme/scheme/examples/simple.scm
index 4d956a6..2cdc3b8 100644
--- a/awk/scheme/scheme/examples/simple.scm
+++ b/awk/scheme/scheme/examples/simple.scm
@@ -1 +1,2 @@
-(+ 1 2)
+(let ((x 5))
+  (+ x 2))