diff options
author | elioat <elioat@tilde.institute> | 2025-01-20 10:51:32 -0500 |
---|---|---|
committer | elioat <elioat@tilde.institute> | 2025-01-20 10:51:32 -0500 |
commit | 1dda44054a253e027a2cb6f8d33ad56dac6bda2b (patch) | |
tree | 709e51bddb907b02446084884078bcd133cdeaf7 | |
parent | 9563293d0521e5b5d3306e3bb3f512b5e4765d6b (diff) | |
download | tour-1dda44054a253e027a2cb6f8d33ad56dac6bda2b.tar.gz |
*
-rwxr-xr-x | awk/scheme/scheme/bin/compiler.awk | 226 | ||||
-rwxr-xr-x | awk/scheme/scheme/bin/repl | 31 | ||||
-rwxr-xr-x | awk/scheme/scheme/bin/vm.awk | 202 | ||||
-rw-r--r-- | awk/scheme/scheme/examples/function-test.scm | 2 | ||||
-rw-r--r-- | awk/scheme/scheme/examples/let-and-def.scm | 9 | ||||
-rw-r--r-- | awk/scheme/scheme/examples/simple.scm | 3 |
6 files changed, 436 insertions, 37 deletions
diff --git a/awk/scheme/scheme/bin/compiler.awk b/awk/scheme/scheme/bin/compiler.awk index d4d0ff0..6a57f19 100755 --- a/awk/scheme/scheme/bin/compiler.awk +++ b/awk/scheme/scheme/bin/compiler.awk @@ -8,40 +8,71 @@ BEGIN { program = "" # Debug mode - DEBUG = 1 + DEBUG = 0 } function debug(msg) { - if (DEBUG) print "[DEBUG] " msg > "/dev/stderr" + if (DEBUG) printf("[DEBUG] %s\n", msg) > "/dev/stderr" } -# Clean line by removing comments -function clean_line(line) { - # Remove everything after and including semicolon - sub(/;.*$/, "", line) - # Trim whitespace - sub(/^[ \t]+/, "", line) - sub(/[ \t]+$/, "", line) - return line -} - -# Process input line +# Process input line - just accumulate the raw input { - line = clean_line($0) - if (line != "") { - if (program != "") program = program " " - program = program line - } + if (program != "") program = program "\n" + program = program $0 } END { - debug("Input program after cleaning: " program) + debug("Raw program:\n" program) if (program == "") exit - # Parse and compile the program - expr = parse_expr() - debug("Parsed expression: " expr) - compile_expr(expr) + # Split program into expressions and compile each one + split_expressions(program) +} + +# New function to handle multiple expressions +function split_expressions(prog, current, paren_count, i, c, expr, cleaned) { + current = "" + paren_count = 0 + + # Extract expressions between parentheses + cleaned = prog + gsub(/;[^(]*\(/, "(", cleaned) # Remove comments before expressions + gsub(/\)[^)]*;/, ")", cleaned) # Remove comments after expressions + gsub(/[ \t\n]+/, " ", cleaned) # Normalize whitespace + sub(/^[ \t\n]+/, "", cleaned) # Trim leading whitespace + sub(/[ \t\n]+$/, "", cleaned) # Trim trailing whitespace + + debug("Cleaned program: [" cleaned "]") + + if (cleaned == "") return + + for (i = 1; i <= length(cleaned); i++) { + c = substr(cleaned, i, 1) + + if (c == "(") { + if (paren_count == 0) current = "" + paren_count++ + } + + current = current c + + if (c == ")") { + paren_count-- + if (paren_count == 0) { + # Found complete expression + expr = current + sub(/^\s+/, "", expr) + sub(/\s+$/, "", expr) + + debug("Processing expression: [" expr "]") + program = expr # Set for parser + expr = parse_expr() + compile_expr(expr) + current = "" + } + } + } + print "HALT" } @@ -181,7 +212,7 @@ function compile_primitive_call(op, args, arg_array, nargs, i) { debug("Primitive call: op=" op " args=" args) nargs = split_args(args, arg_array) - # Compile each argument + # Compile arguments for all operations for (i = 1; i <= nargs; i++) { compile_expr(arg_array[i]) } @@ -223,10 +254,139 @@ function compile_primitive_call(op, args, arg_array, nargs, i) { print "EQ" } else { - error("Unknown operator: " op) + # Function call + debug("Function call: " op) + print "CALL " op + } +} + +function split_bindings(bindings, binding_array, count, current, paren_count, i, c) { + count = 0 + current = "" + paren_count = 0 + + for (i = 1; i <= length(bindings); i++) { + c = substr(bindings, i, 1) + + # Track parentheses + if (c == "(") { + paren_count++ + if (paren_count == 1) { + current = "" # Start new binding + continue + } + } + if (c == ")") { + paren_count-- + if (paren_count == 0) { + # End of binding + binding_array[++count] = current + current = "" + continue + } + } + + # Only add character if we're inside a binding + if (paren_count > 0) { + current = current c + } + } + + return count +} + +function compile_let(args, bindings, body, binding_array, nbindings, i, var, val, binding_parts) { + # Split into bindings and body + if (substr(args, 1, 1) != "(") error("Malformed let expression") + + # Find matching closing parenthesis for bindings + paren_count = 1 + i = 2 + while (paren_count > 0 && i <= length(args)) { + if (substr(args, i, 1) == "(") paren_count++ + if (substr(args, i, 1) == ")") paren_count-- + i++ + } + if (paren_count > 0) error("Unmatched parenthesis in let bindings") + + bindings = substr(args, 2, i - 3) # Remove outer parentheses + body = substr(args, i) + + # Trim whitespace from body + sub(/^[ \t\n]+/, "", body) + sub(/[ \t\n]+$/, "", body) + + debug("Let bindings: " bindings) + debug("Let body: " body) + + # Compile each binding + nbindings = split_bindings(bindings, binding_array) + for (i = 1; i <= nbindings; i++) { + debug("Processing binding: " binding_array[i]) + split(binding_array[i], binding_parts, " ") + var = binding_parts[1] + val = binding_parts[2] + + debug("Binding var: " var " val: " val) + + # Compile the value + compile_expr(val) + + # Store in environment + print "STORE " var + } + + # Compile the body + compile_expr(body) + + # Clean up bindings AFTER evaluating body + for (i = nbindings; i >= 1; i--) { + print "POP_ENV" } } +function compile_define(args, name, params, body, param_array, nparams, i, paren_start, paren_end) { + # Find the function name (everything up to the first space) + i = index(args, " ") + if (i == 0) error("Malformed define expression") + name = substr(args, 1, i - 1) + args = substr(args, i + 1) + + # Find parameter list + if (substr(args, 1, 1) != "(") error("Missing parameter list in define") + + # Find matching closing parenthesis for params + paren_count = 1 + i = 2 + while (paren_count > 0 && i <= length(args)) { + if (substr(args, i, 1) == "(") paren_count++ + if (substr(args, i, 1) == ")") paren_count-- + i++ + } + if (paren_count > 0) error("Unmatched parenthesis in parameter list") + + params = substr(args, 2, i - 3) # Remove parentheses + body = substr(args, i + 1) + + # Create function label + print "LABEL " name + + # Process parameters + nparams = split(params, param_array, " ") + for (i = 1; i <= nparams; i++) { + print "STORE " param_array[i] + } + + # Compile function body + compile_expr(body) + + # Clean up parameters and return + for (i = nparams; i >= 1; i--) { + print "POP_ENV" + } + print "RETURN" +} + function compile_expr(expr, split_result, op, args) { debug("Compiling expression: " expr) @@ -240,15 +400,25 @@ function compile_expr(expr, split_result, op, args) { return } + # Add variable lookup + if (expr ~ /^[a-zA-Z_][a-zA-Z0-9_]*$/) { + print "LOOKUP " expr + return + } + if (substr(expr, 1, 1) == "(") { - # Strip outer parentheses expr = substr(expr, 2, length(expr) - 2) split_result = split_expr(expr) op = substr(split_result, 1, index(split_result, SUBSEP) - 1) args = substr(split_result, index(split_result, SUBSEP) + 1) - debug("Split expression: op=" op " args=" args) - compile_primitive_call(op, args) + if (op == "define") { + compile_define(args) + } else if (op == "let") { + compile_let(args) + } else { + compile_primitive_call(op, args) + } return } diff --git a/awk/scheme/scheme/bin/repl b/awk/scheme/scheme/bin/repl index b865aa2..7649a64 100755 --- a/awk/scheme/scheme/bin/repl +++ b/awk/scheme/scheme/bin/repl @@ -27,19 +27,24 @@ for component in "$COMPILER" "$VM"; do chmod +x "$component" done -# Create temporary directory for our work +# Set up temporary files and state TMPDIR=$(mktemp -d) debug "Created temp dir: $TMPDIR" +STATE_FILE="/tmp/scheme_vm.state" cleanup() { debug "Cleaning up temp dir: $TMPDIR" rm -rf "$TMPDIR" + if [ "$1" != "keep_state" ]; then + rm -f "$STATE_FILE" + fi } -trap cleanup EXIT +trap "cleanup" EXIT # Set up temporary files INPUT_FILE="$TMPDIR/input.scm" ASM_FILE="$TMPDIR/output.asm" +DEBUG_FILE="$TMPDIR/debug.out" # Function to handle evaluation evaluate_expression() { @@ -53,15 +58,29 @@ evaluate_expression() { debug "Evaluating expression: $input" echo "$input" > "$INPUT_FILE" + debug "Input file contents:" + cat "$INPUT_FILE" >&2 - if awk -f "$COMPILER" "$INPUT_FILE" > "$ASM_FILE" 2>/dev/null; then - result=$(awk -f "$VM" "$ASM_FILE") + # Show compilation output even if it fails + debug "Running compiler..." + if awk -f "$COMPILER" "$INPUT_FILE" > "$ASM_FILE" 2> "$DEBUG_FILE"; then + debug "Compilation successful. Debug output:" + cat "$DEBUG_FILE" >&2 + debug "Generated assembly:" + cat "$ASM_FILE" >&2 + + debug "Running VM..." + # Use persistent VM state + result=$(awk -v PERSIST=1 -f "$VM" "$ASM_FILE" 2>&1) + debug "VM output: $result" if [ -n "$result" ]; then echo "$result" fi return 0 else echo "Compilation error" >&2 + debug "Compiler output:" + cat "$DEBUG_FILE" >&2 return 1 fi } @@ -72,9 +91,11 @@ if [ "$#" -gt 0 ]; then echo "Error: File not found: $1" >&2 exit 1 fi - # Read entire file content at once + debug "Reading file: $1" file_content=$(cat "$1" | tr '\n' ' ') + debug "File content: $file_content" evaluate_expression "$file_content" + cleanup "keep_state" # Keep state after file execution exit 0 fi diff --git a/awk/scheme/scheme/bin/vm.awk b/awk/scheme/scheme/bin/vm.awk index ce81bbe..41189be 100755 --- a/awk/scheme/scheme/bin/vm.awk +++ b/awk/scheme/scheme/bin/vm.awk @@ -17,13 +17,56 @@ BEGIN { # Debug mode DEBUG = 0 + + # Environment for variables + env_size = 0 + + # Function table (make it persistent) + delete func_name + delete func_pc + delete func_code + func_size = 0 + + # Call stack + call_stack_ptr = 0 + + # State persistence + STATE_FILE = "/tmp/scheme_vm.state" + if (PERSIST) { + debug("Loading state from: " STATE_FILE) + if ((getline line < STATE_FILE) >= 0) { # Check if file exists and is readable + do { + if (line ~ /^FUNC /) { + sub(/^FUNC /, "", line) + name = line + sub(/ .*$/, "", name) + code = line + sub(/^[^ ]+ /, "", code) + + debug("Loaded function: " name) + debug("Code: " code) + + func_name[func_size] = name + func_code[func_size] = code + func_size++ + } + } while ((getline line < STATE_FILE) > 0) + close(STATE_FILE) + } + } + + # Function environments + delete func_env_names + delete func_env_vals + delete func_env_sizes + + # Global function storage + delete FUNCTIONS # Our own function storage array } # Debug output function function debug(msg) { - if (DEBUG) { - printf("[DEBUG] %s\n", msg) > "/dev/stderr" - } + if (DEBUG) printf("[DEBUG] %s\n", msg) > "/dev/stderr" } # Value construction and access @@ -239,6 +282,24 @@ function execute(instr) { } exit(0) } + else if (op == "STORE") { + vm_store(parts[2]) + } + else if (op == "POP_ENV") { + vm_pop_env() + } + else if (op == "LOOKUP") { + vm_lookup(parts[2]) + } + else if (op == "LABEL") { + vm_define_function(parts[2], pc) + } + else if (op == "CALL") { + vm_call_function(parts[2]) + } + else if (op == "RETURN") { + vm_return() + } else { error("Unknown instruction: " op) } @@ -261,4 +322,139 @@ END { if (stack_ptr > 0) { print peek() } + + # Save state if persistence is enabled + if (PERSIST) { + debug("Saving state to: " STATE_FILE) + for (i = 0; i < func_size; i++) { + debug("Saving function: " func_name[i]) + print "FUNC " func_name[i] " " func_code[i] > STATE_FILE + } + close(STATE_FILE) + } +} + +# Add new VM operations +function vm_store(name) { + debug("Storing " peek() " as " name " at env_size: " env_size) + # Store in current environment frame + env_name[env_size] = name + env_val[env_size] = peek() + env_size++ + debug("Environment after store:") + dump_env() +} + +function vm_pop_env() { + if (env_size <= 0) error("Environment underflow") + debug("Popping environment at size: " env_size) + debug("Removing: " env_name[env_size-1] " = " env_val[env_size-1]) + env_size-- +} + +function vm_lookup(name, i) { + debug("Looking up " name " in environment of size: " env_size) + dump_env() + for (i = env_size - 1; i >= 0; i--) { + if (env_name[i] == name) { + debug("Found " name " = " env_val[i] " at position " i) + push(env_val[i]) + return + } + } + error("Undefined variable: " name) +} + +function vm_define_function(name, start_pc) { + debug("Defining function: " name " at " start_pc) + + # Build function code + code = "" + i = start_pc + while (i < length(program) && program[i] != "RETURN") { + if (code != "") code = code "\n" + code = code program[i] + i++ + } + code = code "\nRETURN" + + # Store in our function array + debug("Storing function: " name " = " code) + FUNCTIONS[name] = code + + pc = i + 1 +} + +function vm_call_function(name, code_lines, j, saved_pc, saved_env_size, arg, param_name) { + debug("Calling function: " name) + + if (!(name in FUNCTIONS)) { + error("Undefined function: " name) + } + + # Get argument from stack before modifying program + arg = pop() + debug("Function argument: " arg) + + saved_pc = pc + saved_env_size = env_size + + # Split function code into lines + split(FUNCTIONS[name], code_lines, "\n") + + # Extract parameter name from first STORE instruction + if (code_lines[1] ~ /^STORE /) { + param_name = substr(code_lines[1], 7) # Skip "STORE " + debug("Found parameter name: " param_name) + } else { + error("Function missing parameter definition") + } + + # Create new environment frame with correct parameter name + debug("Creating new environment frame at size: " env_size) + env_name[env_size] = param_name + env_val[env_size] = arg + env_size++ + + # Add function code to program + for (j in code_lines) { + program[length(program)] = code_lines[j] + } + + # Jump to function code + pc = length(program) - length(code_lines) + call_stack[++call_stack_ptr] = saved_pc + env_stack[call_stack_ptr] = saved_env_size + + debug("Function found, jumping to PC: " pc " with env_size: " saved_env_size) + dump_env() +} + +function vm_return() { + if (call_stack_ptr > 0) { + # Save return value + ret_val = pop() + + # Restore environment + while (env_size > env_stack[call_stack_ptr]) { + debug("Popping environment at size: " env_size) + vm_pop_env() + } + + # Restore program counter + pc = call_stack[call_stack_ptr--] + + # Push return value back + push(ret_val) + + debug("Returned with value: " ret_val " and env_size: " env_size) + } +} + +# New helper function to dump environment state +function dump_env( i) { + debug("Environment dump:") + for (i = 0; i < env_size; i++) { + debug(" " i ": " env_name[i] " = " env_val[i]) + } } \ No newline at end of file diff --git a/awk/scheme/scheme/examples/function-test.scm b/awk/scheme/scheme/examples/function-test.scm new file mode 100644 index 0000000..ec66b04 --- /dev/null +++ b/awk/scheme/scheme/examples/function-test.scm @@ -0,0 +1,2 @@ +(define add2 (x) (+ x 2)) +(add2 40) \ No newline at end of file diff --git a/awk/scheme/scheme/examples/let-and-def.scm b/awk/scheme/scheme/examples/let-and-def.scm new file mode 100644 index 0000000..fade30b --- /dev/null +++ b/awk/scheme/scheme/examples/let-and-def.scm @@ -0,0 +1,9 @@ +; Let expression example +(let ((x 5) (y 3)) + (+ x y)) + +; Function definition example +(define add2 (x) + (+ x 2)) + +(add2 40) ; Returns 42 \ No newline at end of file diff --git a/awk/scheme/scheme/examples/simple.scm b/awk/scheme/scheme/examples/simple.scm index 4d956a6..2cdc3b8 100644 --- a/awk/scheme/scheme/examples/simple.scm +++ b/awk/scheme/scheme/examples/simple.scm @@ -1 +1,2 @@ -(+ 1 2) +(let ((x 5)) + (+ x 2)) |