diff options
Diffstat (limited to 'awk')
41 files changed, 4945 insertions, 0 deletions
diff --git a/awk/forth/f.awk b/awk/forth/f.awk new file mode 100755 index 0000000..16de171 --- /dev/null +++ b/awk/forth/f.awk @@ -0,0 +1,369 @@ +#!/usr/bin/awk -f + +# I wanted to implement something non-trivial using awk. +# If I was clever I wouldn’t implement forth directly in awk, +# instead I’d implement a simple virtual machine using awk, +# and then implement the forth using the virtual machine’s byte code +# ...but there is only so much brain power I can exert on such a silly project. + +BEGIN { + # Initialize stacks and dictionaries + stack_ptr = 0 + dict_size = 0 + + # Built-in words, and some documentation (I could use stack comments, + # but I find those sort of unintuitive) + dict["+"] = "+ : Adds the top two numbers on the stack." + dict["-"] = "- : Subtracts the top number from the second top number on the stack." + dict["*"] = "* : Multiplies the top two numbers on the stack." + dict["/"] = "/ : Divides the second top number by the top number on the stack." + dict["."] = ". : Prints the top of the stack." + dict[".s"] = ".s : Shows all values on the stack." + dict["dup"] = "dup : Duplicates the top value on the stack." + dict["drop"] = "drop : Removes the top value from the stack." + dict["swap"] = "swap : Swaps the top two values on the stack." + dict["over"] = "over : Copies the second top value to the top of the stack." + dict["rot"] = "rot : Rotates the top three values on the stack." + dict["="] = "= : Compares the top two values for equality." + dict["<"] = "< : Checks if the second top value is less than the top value." + dict[">"] = "> : Checks if the second top value is greater than the top value." + dict["bye"] = "bye : Exits the interpreter." + dict["words"] = "words : Lists all available words and their documentation." + + # State flags + compiling = 0 + current_def = "" + def_name = "" + + # If an input file isn't specified, enter REPL mode + if (ARGC == 1) { + repl() + } +} + +# Handle file input +{ + if (FILENAME ~ /\.forth$/) { + interpret($0) + } +} + +function repl() { + print "f.awk! A forth interpreter.\nUse 'bye' to exit.\nUse 'words' to list all available words.\n" + while (1) { + printf "f> " + if (getline input < "/dev/tty" <= 0) break + interpret(input) + } +} + +function interpret(line) { + gsub(/\(.*\)/, "", line) # Remove everything from ( to ) + gsub(/\\.*$/, "", line) # Remove backslash comments, too + + n = split(line, words, /[ \t]+/) + + for (i = 1; i <= n; i++) { + word = words[i] + if (word == "") continue + + # print "Processing word: " word + + if (word == ":") { + compiling = 1 + i++ + def_name = words[i] + current_def = "" + continue + } + + if (compiling) { + if (word == ";") { + # Store user-defined word with its name and definition + dict[def_name] = "word " current_def + compiling = 0 + continue + } + current_def = current_def " " word + continue + } + + # Execute the word and skip further processing if it's .s + if (word == ".s") { + execute_word(word) + break # Exit the loop after executing .s + } + + execute_word(word) + } +} + +function execute_word(word) { + if (word ~ /^-?[0-9]+$/) { + push(word + 0) + } else if (word in dict) { + if (dict[word] ~ /^word /) { + # User-defined word + sequence = substr(dict[word], 6) + split(sequence, subwords, " ") + for (sw in subwords) { + if (subwords[sw] != "") { + execute_word(subwords[sw]) + } + } + } else { + # Built-in words + if (word == "+") math_add() + else if (word == "-") math_sub() + else if (word == "*") math_mul() + else if (word == "/") math_div() + else if (word == ".") stack_print() + else if (word == ".s") { + # print "Executing .s command" + stack_show() + } + else if (word == "dup") stack_dup() + else if (word == "drop") stack_drop() + else if (word == "swap") stack_swap() + else if (word == "over") stack_over() + else if (word == "rot") stack_rot() + else if (word == "=") compare_eq() + else if (word == "<") compare_lt() + else if (word == ">") compare_gt() + else if (word == "bye") exit_program() + else if (word == "words") list_words() + else if (word == "if") { + # Handle the if statement + if_condition = pop() + if (if_condition == 0) { + # Skip to the next part until we find 'then' or 'else' + skip_if = 1 + } + } + else if (word == "else") { + # Handle the else statement + if (skip_if) { + skip_if = 0 # Reset the skip flag + } else { + # Skip to the next part until we find 'then' + skip_else = 1 + } + } + else if (word == "then") { + # End of the conditional + skip_if = 0 + skip_else = 0 + } + } + } else { + print "Error: Unknown word '" word "'" + } +} + +function push(val) { + stack[stack_ptr++] = val +} + +function pop() { + if (stack_ptr <= 0) { + print "Error: Stack underflow" + return 0 + } + return stack[--stack_ptr] +} + +function math_add() { + if (stack_ptr < 2) { + print "Error: Stack underflow" + return + } + b = pop() + a = pop() + push(a + b) +} + +function math_sub() { + if (stack_ptr < 2) { + print "Error: Stack underflow" + return + } + b = pop() + a = pop() + push(a - b) +} + +function math_mul() { + if (stack_ptr < 2) { + print "Error: Stack underflow" + return + } + b = pop() + a = pop() + push(a * b) +} + +function math_div() { + if (stack_ptr < 2) { + print "Error: Stack underflow" + return + } + b = pop() + if (b == 0) { + print "Error: Division by zero" + return + } + a = pop() + push(int(a / b)) +} + +function stack_print() { + if (stack_ptr < 1) { + print "Error: Stack underflow" + return + } + print pop() +} + +function stack_show() { + print "<", stack_ptr, "> " + for (i = 0; i < stack_ptr; i++) { + printf "%s ", stack[i] + } + print "" + # print "Stack state after .s: " + # for (i = 0; i < stack_ptr; i++) { + # print stack[i] + # } + # print "" +} + +function stack_dup() { + if (stack_ptr < 1) { + print "Error: Stack underflow" + return + } + val = stack[stack_ptr - 1] + push(val) +} + +function stack_drop() { + if (stack_ptr < 1) { + print "Error: Stack underflow" + return + } + pop() +} + +function stack_swap() { + if (stack_ptr < 2) { + print "Error: Stack underflow" + return + } + b = pop() + a = pop() + push(b) + push(a) +} + +function stack_over() { + if (stack_ptr < 2) { + print "Error: Stack underflow" + return + } + b = pop() + a = pop() + push(a) + push(b) + push(a) +} + +function stack_rot() { + if (stack_ptr < 3) { + print "Error: Stack underflow" + return + } + c = pop() + b = pop() + a = pop() + push(b) + push(c) + push(a) +} + +function compare_eq() { + if (stack_ptr < 2) { + print "Error: Stack underflow" + return + } + b = pop() + a = pop() + push(a == b ? -1 : 0) +} + +function compare_lt() { + if (stack_ptr < 2) { + print "Error: Stack underflow" + return + } + b = pop() + a = pop() + push(a < b ? -1 : 0) +} + +function compare_gt() { + if (stack_ptr < 2) { + print "Error: Stack underflow" + return + } + b = pop() + a = pop() + push(a > b ? -1 : 0) +} + +function exit_program() { + print "Exiting program." + exit 0 +} + +function list_words() { + print "Available words:" + + # Separate arrays to hold built-in and user-defined words + split("", built_in_words) + split("", user_defined_words) + + for (w in dict) { + split(dict[w], parts, ": ") + if (parts[1] ~ /^word /) { + user_defined_words[w] = parts[2] + } else { + built_in_words[w] = parts[2] + } + } + + # Sort built-in words manually because I'm picky + n = 0 + for (w in built_in_words) { + sorted_words[n++] = w + } + + for (i = 0; i < n; i++) { + for (j = i + 1; j < n; j++) { + if (sorted_words[i] > sorted_words[j]) { + temp = sorted_words[i] + sorted_words[i] = sorted_words[j] + sorted_words[j] = temp + } + } + } + + # First print the built-in words + for (i = 0; i < n; i++) { + print sorted_words[i] ": " built_in_words[sorted_words[i]] + } + + # Then print the user-defined words + for (w in user_defined_words) { + print w ": " user_defined_words[w] " ( User-defined )" + } +} \ No newline at end of file diff --git a/awk/forth/old/f.awk b/awk/forth/old/f.awk new file mode 100755 index 0000000..eed9774 --- /dev/null +++ b/awk/forth/old/f.awk @@ -0,0 +1,344 @@ +#!/usr/bin/awk -f + +# Forth interpreter in AWK + +BEGIN { + print "Welcome to the AWK Forth Interpreter!" + print "Type your commands below. Use 'bye' to quit." + # Initialize variables + top = -1 # Initialize stack pointer + + # Initialize the dictionary with basic words + words["+"] = "+" + words["-"] = "-" + words["*"] = "*" + words["/"] = "/" + words["dup"] = "dup" + words["over"] = "over" + words["swap"] = "swap" + words["."] = "." + words["bye"] = "bye" + words["rot"] = "rot" + words["drop"] = "drop" + words["nip"] = "nip" + words["tuck"] = "tuck" + words["roll"] = "roll" + words["pick"] = "pick" + words["negate"] = "negate" + words["abs"] = "abs" + words["max"] = "max" + words["min"] = "min" + words["mod"] = "mod" + words["="] = "=" + words["see"] = "see" + words["if"] = "if" + words["then"] = "then" + words["else"] = "else" + words[">"] = ">" + words["<"] = "<" + + # Add handlers for all words + handlers["+"] = "add" + handlers["-"] = "subtract" + handlers["*"] = "multiply" + handlers["/"] = "divide" + handlers["dup"] = "dup" + handlers["over"] = "over" + handlers["swap"] = "swap" + handlers["."] = "print_top" + handlers["<"] = "less_than" + handlers[">"] = "greater_than" + handlers["rot"] = "rot" + handlers["drop"] = "drop" + handlers["nip"] = "nip" + handlers["tuck"] = "tuck" + handlers["roll"] = "roll" + handlers["pick"] = "pick" + handlers["negate"] = "negate" + handlers["abs"] = "abs" + handlers["max"] = "max" + handlers["min"] = "min" + handlers["mod"] = "mod" + handlers["="] = "equals" + handlers["if"] = "handle_if" + handlers["then"] = "handle_then" + handlers["else"] = "handle_else" + handlers["bye"] = "bye" + handlers["see"] = "see" + + # Add descriptions for words + desc["+"] = "( n1 n2 -- sum ) Add top two numbers" + desc["-"] = "( n1 n2 -- diff ) Subtract top number from second" + desc["*"] = "( n1 n2 -- prod ) Multiply top two numbers" + desc["/"] = "( n1 n2 -- quot ) Divide second by top" + desc["dup"] = "( n -- n n ) Duplicate top of stack" + desc["over"] = "( n1 n2 -- n1 n2 n1 ) Copy second item to top" + desc["swap"] = "( n1 n2 -- n2 n1 ) Swap top two items" + desc["rot"] = "( n1 n2 n3 -- n2 n3 n1 ) Rotate top three items" + desc["drop"] = "( n -- ) Discard top item" + desc["nip"] = "( n1 n2 -- n2 ) Remove second item" + desc["tuck"] = "( n1 n2 -- n2 n1 n2 ) Copy top item below second" + desc["roll"] = "( nk ... n1 n0 k -- nk-1 ... n1 n0 nk ) Move kth item to top" + desc["pick"] = "( nk ... n1 n0 k -- nk ... n1 n0 nk ) Copy kth item to top" + desc["negate"] = "( n -- -n ) Negate number" + desc["abs"] = "( n -- |n| ) Absolute value" + desc["max"] = "( n1 n2 -- max ) Maximum of top two numbers" + desc["min"] = "( n1 n2 -- min ) Minimum of top two numbers" + desc["mod"] = "( n1 n2 -- rem ) Remainder of n1/n2" + desc["="] = "( n1 n2 -- flag ) Test if equal, leaves 1 if true, 0 if false" + desc["if"] = "( flag -- ) Begin conditional execution" + desc["then"] = "( -- ) End conditional execution" + desc["else"] = "( -- ) Execute if previous condition was false" + desc[">"] = "( n1 n2 -- flag ) Returns true if n1 is greater than n2" + desc["<"] = "( n1 n2 -- flag ) Returns true if n1 is less than n2" + desc["bye"] = "( -- ) Exit the interpreter" + desc["see"] = "( -- ) Show definition of a word" + + # Initialize condition stack + cond_top = -1 + + # Mark these as compile-only words + compile_only["if"] = 1 + compile_only["then"] = 1 + compile_only["else"] = 1 +} + +# Stack operations +function push(value) { + stack[++top] = value +} + +function pop() { + if (top < 0) { + print "Error: Stack underflow" + return 0 + } + return stack[top--] +} + +function check_stack(min_items, error_msg) { + if (top < min_items - 1) { + print error_msg ? error_msg : "Error: Not enough values on stack" + return 0 + } + return 1 +} + +# Binary operations +function binary_op(operation) { + if (!check_stack(2)) return + second = pop() + first = pop() + if (operation == "+") push(first + second) + else if (operation == "-") push(first - second) + else if (operation == "*") push(first * second) + else if (operation == "/") { + if (second == 0) { + print "Error: Division by zero" + push(first) + push(second) + return + } + push(first / second) + } + else if (operation == "mod") push(first % second) + else if (operation == "=") push(first == second ? 1 : 0) + else if (operation == "<") push(first < second ? 1 : 0) + else if (operation == ">") push(first > second ? 1 : 0) +} + +# Handler functions +function add() { binary_op("+") } +function subtract() { binary_op("-") } +function multiply() { binary_op("*") } +function divide() { binary_op("/") } +function mod() { binary_op("mod") } +function equals() { binary_op("=") } +function less_than() { binary_op("<") } +function greater_than() { binary_op(">") } + +function dup() { + if (!check_stack(1)) return + push(stack[top]) +} + +function over() { + if (!check_stack(2)) return + push(stack[top - 1]) +} + +function swap() { + if (!check_stack(2)) return + temp = pop() + second = pop() + push(temp) + push(second) +} + +function rot() { + if (!check_stack(3)) return + third = pop() + second = pop() + first = pop() + push(second) + push(third) + push(first) +} + +function drop() { + if (!check_stack(1)) return + top-- +} + +function nip() { + if (!check_stack(2)) return + temp = stack[top] + drop() + drop() + push(temp) +} + +function tuck() { + if (!check_stack(2)) return + temp = pop() + second = pop() + push(temp) + push(second) + push(temp) +} + +function roll() { + if (!check_stack(1)) return + n = int(pop()) + if (!check_stack(n)) return + if (n <= 0) return + + temp = stack[top - n + 1] + for (i = top - n + 1; i < top; i++) { + stack[i] = stack[i + 1] + } + stack[top] = temp +} + +function pick() { + if (!check_stack(1)) return + n = int(pop()) + if (!check_stack(n)) return + if (n < 0) return + push(stack[top - n]) +} + +function negate() { + if (!check_stack(1)) return + push(-pop()) +} + +function abs() { + if (!check_stack(1)) return + n = pop() + push(n < 0 ? -n : n) +} + +function max() { + if (!check_stack(2)) return + b = pop() + a = pop() + push(a > b ? a : b) +} + +function min() { + if (!check_stack(2)) return + b = pop() + a = pop() + push(a < b ? a : b) +} + +function print_top() { + if (!check_stack(1)) return + print stack[top] + drop() +} + +function bye() { + exit +} + +function see(word) { + if (!(word in words)) { + print "Error: Word '" word "' not found" + return + } + if (word in desc) { + print desc[word] + } + if (word in raw_definitions) { + print ": " word " " raw_definitions[word] " ;" + } +} + +# Main processing function +function execute_word(word) { + if (word in handlers) { + handler = handlers[word] + if (handler == "bye") exit + else if (handler == "see") { + if (i + 1 <= NF) see($(++i)) + else print "Error: see requires a word name" + } + else if (handler == "add") add() + else if (handler == "subtract") subtract() + else if (handler == "multiply") multiply() + else if (handler == "divide") divide() + else if (handler == "dup") dup() + else if (handler == "over") over() + else if (handler == "swap") swap() + else if (handler == "print_top") print_top() + else if (handler == "less_than") less_than() + else if (handler == "greater_than") greater_than() + else if (handler == "rot") rot() + else if (handler == "drop") drop() + else if (handler == "nip") nip() + else if (handler == "tuck") tuck() + else if (handler == "roll") roll() + else if (handler == "pick") pick() + else if (handler == "negate") negate() + else if (handler == "abs") abs() + else if (handler == "max") max() + else if (handler == "min") min() + else if (handler == "mod") mod() + else if (handler == "equals") equals() + else if (handler == "handle_if") handle_if() + else if (handler == "handle_then") handle_then() + else if (handler == "handle_else") handle_else() + else { + print "Error: Handler '" handler "' not implemented" + return 0 + } + return 1 + } + return 0 +} + +# Process each line of input +{ + if (NF > 0) { + # Remove comments and normalize whitespace + gsub(/\(.*\)/, "") + gsub(/^[[:space:]]+/, "") + gsub(/[[:space:]]+$/, "") + gsub(/[[:space:]]+/, " ") + + # Process each token + for (i = 1; i <= NF; i++) { + if ($i ~ /^-?[0-9]+$/) { + push($i) + } else if ($i in words) { + if (!execute_word($i)) { + print "Error: Failed to execute word '" $i "'" + } + } else { + print "Error: Unknown word '" $i "'" + } + } + } +} \ No newline at end of file diff --git a/awk/forth/old/test.forth b/awk/forth/old/test.forth new file mode 100644 index 0000000..a1f4f50 --- /dev/null +++ b/awk/forth/old/test.forth @@ -0,0 +1,44 @@ +( Basic arithmetic operations ) +2 3 + . ( expect: 5 ) +10 3 - . ( expect: 7 ) +4 5 * . ( expect: 20 ) +20 4 / . ( expect: 5 ) +7 3 mod . ( expect: 1 ) + +( Stack manipulation operations ) +5 dup . . ( expect: 5 5 ) +1 2 swap . . ( expect: 2 1 ) +1 2 over . . . ( expect: 1 2 1 ) +1 2 3 rot . . . ( expect: 2 3 1 ) +1 2 3 4 2 roll . . . . ( expect: 1 3 4 2 ) +5 drop +1 2 nip . ( expect: 2 ) +1 2 tuck . . . ( expect: 2 1 2 ) + +( Comparison operations ) +5 3 > . ( expect: 1 ) +3 5 < . ( expect: 1 ) +4 4 = . ( expect: 1 ) +5 3 < . ( expect: 0 ) +3 5 > . ( expect: 0 ) +4 5 = . ( expect: 0 ) + +( Math operations ) +5 negate . ( expect: -5 ) +-7 abs . ( expect: 7 ) +5 2 max . ( expect: 5 ) +5 2 min . ( expect: 2 ) + +( Complex stack manipulations ) +1 2 3 4 5 \ Put 5 numbers on stack +3 pick . ( expect: 2 ) +2 roll . ( expect: 4 ) +. . . . ( expect: 5 3 1 ) + +( Error handling tests ) +drop drop drop drop drop \ Clear stack +drop ( expect: Error: Stack underflow ) +. ( expect: Error: Stack underflow ) +5 0 / ( expect: Error: Division by zero ) + +bye \ No newline at end of file diff --git a/awk/forth/test.forth b/awk/forth/test.forth new file mode 100644 index 0000000..daa6943 --- /dev/null +++ b/awk/forth/test.forth @@ -0,0 +1,34 @@ +\ Test arithmetic operations +10 5 + . \ Should print 15 +10 5 - . \ Should print 5 +10 5 * . \ Should print 50 +10 5 / . \ Should print 2 + +\ Test stack manipulation +1 2 3 .s \ Should show 3 values: 1 2 3 +dup . \ Should print 3 again +drop . \ Should print 2 +swap .s \ Should show 2 1 +over .s \ Should show 2 1 2 +rot .s \ Should show 1 2 3 + +\ Test comparisons +5 5 = . \ Should print -1 (true) +5 3 < . \ Should print 0 (false) +3 5 > . \ Should print 0 (false) + +\ Test conditionals within user-defined words +: test_if 10 20 if .s then ; \ Should print 1 2 (since the condition is true) +: test_else 10 5 if .s else 1 then ; \ Should print 1 (since the condition is false) + +\ Test user-defined words +: square dup * ; \ Define a word to square a number +4 square . \ Should print 16 + +: add_three 1 2 + + ; \ Define a word to add three numbers +1 2 add_three . \ Should print 6 + +\ List all words +words \ Should list all available words + +bye \ Exit the interpreter \ No newline at end of file diff --git a/awk/retro/retro.awk b/awk/retro/retro.awk new file mode 100755 index 0000000..2a14ff0 --- /dev/null +++ b/awk/retro/retro.awk @@ -0,0 +1,250 @@ +#!/usr/bin/awk -f + +# Constants and VM setup +BEGIN { + IMAGE_SIZE = 524288 # Amount of simulated RAM + DATA_DEPTH = 8192 # Depth of data stack + ADDRESS_DEPTH = 32768 # Depth of the stacks + + # Initialize stacks + data_sp = 0 + addr_sp = 0 + + # VM state + ip = 0 + + # Opcode definitions + OP_NOP = 0 + OP_LIT = 1 + OP_DUP = 2 + OP_DROP = 3 + OP_SWAP = 4 + OP_PUSH = 5 + OP_POP = 6 + OP_JUMP = 7 + OP_CALL = 8 + OP_CCALL = 9 + OP_RETURN = 10 + OP_EQ = 11 + OP_NEQ = 12 + OP_LT = 13 + OP_GT = 14 + OP_FETCH = 15 + OP_STORE = 16 + OP_ADD = 17 + OP_SUB = 18 + OP_MUL = 19 + OP_DIVMOD = 20 + OP_AND = 21 + OP_OR = 22 + OP_XOR = 23 + OP_SHIFT = 24 + OP_ZERO_EXIT = 25 + OP_HALT = 26 + + # Initialize VM + prepare_vm() + + # Load and run test program + load_test_program() + execute(0) + + # Print results + print "Stack contents after execution:" + print_stack() +} + +# Stack operations +function stack_push(stack_name, value) { + if (stack_name == "data") { + data_sp++ + data_stack[data_sp] = value + } else if (stack_name == "addr") { + addr_sp++ + addr_stack[addr_sp] = value + } +} + +function stack_pop(stack_name) { + if (stack_name == "data") { + if (data_sp > 0) { + value = data_stack[data_sp] + data_sp-- + return value + } + } else if (stack_name == "addr") { + if (addr_sp > 0) { + value = addr_stack[addr_sp] + addr_sp-- + return value + } + } + return 0 +} + +function stack_tos(stack_name) { + if (stack_name == "data" && data_sp > 0) { + return data_stack[data_sp] + } + return 0 +} + +function stack_nos(stack_name) { + if (stack_name == "data" && data_sp > 1) { + return data_stack[data_sp - 1] + } + return 0 +} + +# Bitwise operations +function bitwise_and(x, y, i, result, a, b) { + result = 0 + for (i = 0; i < 32; i++) { + a = int(x / (2 ^ i)) % 2 + b = int(y / (2 ^ i)) % 2 + if (a == 1 && b == 1) + result += 2 ^ i + } + return result +} + +function bitwise_or(x, y, i, result, a, b) { + result = 0 + for (i = 0; i < 32; i++) { + a = int(x / (2 ^ i)) % 2 + b = int(y / (2 ^ i)) % 2 + if (a == 1 || b == 1) + result += 2 ^ i + } + return result +} + +function bitwise_xor(x, y, i, result, a, b) { + result = 0 + for (i = 0; i < 32; i++) { + a = int(x / (2 ^ i)) % 2 + b = int(y / (2 ^ i)) % 2 + if (a != b) + result += 2 ^ i + } + return result +} + +# Helper functions +function abs(x) { + return x < 0 ? -x : x +} + +function lshift(x, n) { + return int(x * (2 ^ n)) +} + +function rshift(x, n) { + return int(x / (2 ^ n)) +} + +# VM core functions +function process_opcode(opcode) { + if (opcode == OP_NOP) { + return + } + else if (opcode == OP_LIT) { + ip++ + stack_push("data", image[ip]) + } + else if (opcode == OP_DUP) { + stack_push("data", stack_tos("data")) + } + else if (opcode == OP_DROP) { + stack_pop("data") + } + else if (opcode == OP_SWAP) { + temp = stack_pop("data") + temp2 = stack_pop("data") + stack_push("data", temp) + stack_push("data", temp2) + } + else if (opcode == OP_ADD) { + x = stack_pop("data") + y = stack_pop("data") + stack_push("data", x + y) + } + else if (opcode == OP_SUB) { + x = stack_pop("data") + y = stack_pop("data") + stack_push("data", y - x) + } + else if (opcode == OP_MUL) { + x = stack_pop("data") + y = stack_pop("data") + stack_push("data", x * y) + } + else if (opcode == OP_HALT) { + ip = IMAGE_SIZE + } +} + +function check_stack() { + if (data_sp < 0 || addr_sp < 0 || + data_sp > DATA_DEPTH || addr_sp > ADDRESS_DEPTH) { + ip = 0 + data_sp = 0 + addr_sp = 0 + } +} + +function process_packed_opcodes(packed) { + ops[0] = bitwise_and(packed, 255) + ops[1] = bitwise_and(rshift(packed, 8), 255) + ops[2] = bitwise_and(rshift(packed, 16), 255) + ops[3] = bitwise_and(rshift(packed, 24), 255) + + for (i = 0; i < 4; i++) { + if (ops[i] != 0) { + process_opcode(ops[i]) + } + } +} + +function execute(offset) { + addr_sp = 1 + ip = offset + + while (ip < IMAGE_SIZE) { + opcode = image[ip] + process_packed_opcodes(opcode) + + if (addr_sp == 0) + ip = IMAGE_SIZE + + ip++ + } +} + +function prepare_vm() { + ip = 0 + data_sp = 0 + addr_sp = 0 +} + +# Test program loader +function pack_opcodes(op1, op2, op3, op4) { + return op1 + (op2 * 256) + (op3 * 65536) + (op4 * 16777216) +} + +function load_test_program() { + # Simple test program that adds 10 and 5 + image[0] = pack_opcodes(OP_LIT, 0, 0, 0) # Push literal + image[1] = 10 # Value 10 + image[2] = pack_opcodes(OP_LIT, 0, 0, 0) # Push literal + image[3] = 5 # Value 5 + image[4] = pack_opcodes(OP_ADD, 0, 0, 0) # Add them + image[5] = pack_opcodes(OP_HALT, 0, 0, 0) # Halt +} + +# Debug helper +function print_stack() { + for (i = 1; i <= data_sp; i++) { + print "Item", i ":", data_stack[i] + } +} \ No newline at end of file diff --git a/awk/retro/test.awk b/awk/retro/test.awk new file mode 100755 index 0000000..191fa5b --- /dev/null +++ b/awk/retro/test.awk @@ -0,0 +1,52 @@ +#!/usr/bin/awk -f + +@include "vm.awk" + +# Complex test program +BEGIN { + # Test program to calculate factorial of 5 + i = 0 + + # Push 5 onto stack + image[i++] = pack_opcodes(OP_LIT, 0, 0, 0) + image[i++] = 5 + + # Push 1 onto stack (accumulator) + image[i++] = pack_opcodes(OP_LIT, 0, 0, 0) + image[i++] = 1 + + # Start of multiplication loop + loop_start = i + + # Duplicate top number (counter) + image[i++] = pack_opcodes(OP_DUP, 0, 0, 0) + + # Test if counter is zero + image[i++] = pack_opcodes(OP_ZERO_EXIT, 0, 0, 0) + + # Multiply accumulator by counter + image[i++] = pack_opcodes(OP_MUL, 0, 0, 0) + + # Decrement counter + image[i++] = pack_opcodes(OP_LIT, 0, 0, 0) + image[i++] = 1 + image[i++] = pack_opcodes(OP_SUB, 0, 0, 0) + + # Jump back to start of loop + image[i++] = pack_opcodes(OP_LIT, 0, 0, 0) + image[i++] = loop_start + image[i++] = pack_opcodes(OP_JUMP, 0, 0, 0) + + # Halt + image[i++] = pack_opcodes(OP_HALT, 0, 0, 0) + + # Execute program + execute(0) + + # Print result (should be 120 - factorial of 5) + print "Factorial of 5:", stack_tos("data") +} + +function pack_opcodes(op1, op2, op3, op4) { + return op1 + (op2 * 256) + (op3 * 65536) + (op4 * 16777216) +} \ No newline at end of file diff --git a/awk/retro/vm.awk b/awk/retro/vm.awk new file mode 100755 index 0000000..cd894c5 --- /dev/null +++ b/awk/retro/vm.awk @@ -0,0 +1,364 @@ +#!/usr/bin/awk -f + +# Constants +BEGIN { + IMAGE_SIZE = 524288 # Amount of simulated RAM + DATA_DEPTH = 8192 # Depth of data stack + ADDRESS_DEPTH = 32768 # Depth of the stacks + + # Initialize stacks + data_sp = 0 + addr_sp = 0 + + # VM state + ip = 0 + + # Opcode definitions + OP_NOP = 0 + OP_LIT = 1 + OP_DUP = 2 + OP_DROP = 3 + OP_SWAP = 4 + OP_PUSH = 5 + OP_POP = 6 + OP_JUMP = 7 + OP_CALL = 8 + OP_CCALL = 9 + OP_RETURN = 10 + OP_EQ = 11 + OP_NEQ = 12 + OP_LT = 13 + OP_GT = 14 + OP_FETCH = 15 + OP_STORE = 16 + OP_ADD = 17 + OP_SUB = 18 + OP_MUL = 19 + OP_DIVMOD = 20 + OP_AND = 21 + OP_OR = 22 + OP_XOR = 23 + OP_SHIFT = 24 + OP_ZERO_EXIT = 25 + OP_HALT = 26 + OP_IE = 27 + OP_IQ = 28 + OP_II = 29 +} + +# Stack operations +function stack_push(stack_name, value) { + if (stack_name == "data") { + data_sp++ + data_stack[data_sp] = value + } else if (stack_name == "addr") { + addr_sp++ + addr_stack[addr_sp] = value + } +} + +function stack_pop(stack_name) { + if (stack_name == "data") { + if (data_sp > 0) { + value = data_stack[data_sp] + data_sp-- + return value + } + } else if (stack_name == "addr") { + if (addr_sp > 0) { + value = addr_stack[addr_sp] + addr_sp-- + return value + } + } + return 0 +} + +function stack_tos(stack_name) { + if (stack_name == "data" && data_sp > 0) { + return data_stack[data_sp] + } + return 0 +} + +function stack_nos(stack_name) { + if (stack_name == "data" && data_sp > 1) { + return data_stack[data_sp - 1] + } + return 0 +} + +# Bitwise operation implementations +function bitwise_and(x, y, i, result, a, b) { + result = 0 + for (i = 0; i < 32; i++) { + a = int(x / (2 ^ i)) % 2 + b = int(y / (2 ^ i)) % 2 + if (a == 1 && b == 1) + result += 2 ^ i + } + return result +} + +function bitwise_or(x, y, i, result, a, b) { + result = 0 + for (i = 0; i < 32; i++) { + a = int(x / (2 ^ i)) % 2 + b = int(y / (2 ^ i)) % 2 + if (a == 1 || b == 1) + result += 2 ^ i + } + return result +} + +function bitwise_xor(x, y, i, result, a, b) { + result = 0 + for (i = 0; i < 32; i++) { + a = int(x / (2 ^ i)) % 2 + b = int(y / (2 ^ i)) % 2 + if (a != b) + result += 2 ^ i + } + return result +} + +function lshift(x, n) { + return int(x * (2 ^ n)) +} + +function rshift(x, n) { + return int(x / (2 ^ n)) +} + +# VM instruction implementations +function process_opcode(opcode) { + if (opcode == OP_NOP) { + return + } + else if (opcode == OP_LIT) { + ip++ + stack_push("data", image[ip]) + } + else if (opcode == OP_DUP) { + stack_push("data", stack_tos("data")) + } + else if (opcode == OP_DROP) { + stack_pop("data") + } + else if (opcode == OP_SWAP) { + temp = stack_pop("data") + temp2 = stack_pop("data") + stack_push("data", temp) + stack_push("data", temp2) + } + else if (opcode == OP_PUSH) { + stack_push("addr", stack_pop("data")) + } + else if (opcode == OP_POP) { + stack_push("data", stack_pop("addr")) + } + else if (opcode == OP_JUMP) { + ip = stack_pop("data") - 1 + } + else if (opcode == OP_CALL) { + stack_push("addr", ip) + ip = stack_pop("data") - 1 + } + else if (opcode == OP_CCALL) { + a = stack_pop("data") + b = stack_pop("data") + if (b != 0) { + stack_push("addr", ip) + ip = a - 1 + } + } + else if (opcode == OP_RETURN) { + ip = stack_pop("addr") + } + else if (opcode == OP_EQ) { + a = stack_pop("data") + b = stack_pop("data") + stack_push("data", (b == a) ? -1 : 0) + } + else if (opcode == OP_NEQ) { + a = stack_pop("data") + b = stack_pop("data") + stack_push("data", (b != a) ? -1 : 0) + } + else if (opcode == OP_LT) { + a = stack_pop("data") + b = stack_pop("data") + stack_push("data", (b < a) ? -1 : 0) + } + else if (opcode == OP_GT) { + a = stack_pop("data") + b = stack_pop("data") + stack_push("data", (b > a) ? -1 : 0) + } + else if (opcode == OP_FETCH) { + x = stack_pop("data") + if (x == -1) + stack_push("data", data_sp) + else if (x == -2) + stack_push("data", addr_sp) + else if (x == -3) + stack_push("data", IMAGE_SIZE) + else if (x == -4) + stack_push("data", -2147483648) + else if (x == -5) + stack_push("data", 2147483647) + else + stack_push("data", image[x]) + } + else if (opcode == OP_STORE) { + addr = stack_pop("data") + value = stack_pop("data") + image[addr] = value + } + else if (opcode == OP_ADD) { + x = stack_pop("data") + y = stack_pop("data") + stack_push("data", x + y) + } + else if (opcode == OP_SUB) { + x = stack_pop("data") + y = stack_pop("data") + stack_push("data", y - x) + } + else if (opcode == OP_MUL) { + x = stack_pop("data") + y = stack_pop("data") + stack_push("data", y * x) + } + else if (opcode == OP_DIVMOD) { + b = stack_pop("data") + a = stack_pop("data") + if (b == 0) { + ip = 0 + data_sp = 0 + addr_sp = 0 + } else { + x = abs(b) + y = abs(a) + q = int(y / x) + r = y % x + if (a < 0 && b < 0) + r = r * -1 + if (a > 0 && b < 0) + q = q * -1 + if (a < 0 && b > 0) { + r = r * -1 + q = q * -1 + } + stack_push("data", r) + stack_push("data", q) + } + } + else if (opcode == OP_AND) { + x = stack_pop("data") + y = stack_pop("data") + stack_push("data", bitwise_and(x, y)) + } + else if (opcode == OP_OR) { + x = stack_pop("data") + y = stack_pop("data") + stack_push("data", bitwise_or(x, y)) + } + else if (opcode == OP_XOR) { + x = stack_pop("data") + y = stack_pop("data") + stack_push("data", bitwise_xor(x, y)) + } + else if (opcode == OP_SHIFT) { + x = stack_pop("data") + y = stack_pop("data") + if (x < 0) + stack_push("data", lshift(y, -x)) + else + stack_push("data", rshift(y, x)) + } + else if (opcode == OP_ZERO_EXIT) { + if (stack_tos("data") == 0) { + stack_pop("data") + ip = stack_pop("addr") + } + } + else if (opcode == OP_HALT) { + ip = IMAGE_SIZE + } + + check_stack() +} + +# Helper functions +function abs(x) { + return x < 0 ? -x : x +} + +function check_stack() { + if (data_sp < 0 || addr_sp < 0 || + data_sp > DATA_DEPTH || addr_sp > ADDRESS_DEPTH) { + ip = 0 + data_sp = 0 + addr_sp = 0 + } +} + +function process_packed_opcodes(packed) { + ops[0] = bitwise_and(packed, 255) + ops[1] = bitwise_and(rshift(packed, 8), 255) + ops[2] = bitwise_and(rshift(packed, 16), 255) + ops[3] = bitwise_and(rshift(packed, 24), 255) + + for (i = 0; i < 4; i++) { + if (ops[i] != 0) { + process_opcode(ops[i]) + } + } +} + +# Main execution function +function execute(offset) { + addr_sp = 1 + ip = offset + + while (ip < IMAGE_SIZE) { + opcode = image[ip] + process_packed_opcodes(opcode) + + if (addr_sp == 0) + ip = IMAGE_SIZE + + ip++ + } +} + +# String handling functions +function string_inject(str, buffer, i, len) { + len = length(str) + for (i = 1; i <= len; i++) { + image[buffer + i - 1] = ord(substr(str, i, 1)) + image[buffer + i] = 0 + } +} + +function string_extract(at, str, i) { + str = "" + i = at + while (image[i] != 0) { + str = str chr(image[i]) + i++ + } + return str +} + +# Initialize VM +BEGIN { + prepare_vm() +} + +function prepare_vm() { + ip = 0 + data_sp = 0 + addr_sp = 0 +} \ No newline at end of file diff --git a/awk/scheme/s.awk b/awk/scheme/s.awk new file mode 100755 index 0000000..7c8bba6 --- /dev/null +++ b/awk/scheme/s.awk @@ -0,0 +1,139 @@ +#!/usr/bin/awk -f + +# Set debug mode +DEBUG = 1 # Change to 0 to disable debug output + +# Environment to store variable bindings +BEGIN { + print "Welcome to the AWK Scheme Interpreter!" + print "Type your Scheme expressions below (type 'exit' to quit):" + while (1) { + printf "> " + if (getline input <= 0) { + print "Error reading input. Exiting." + break + } + if (input == "exit") { + print "Exiting the interpreter." + exit + } + if (input == "") { + print "Empty input received, continuing..." + continue + } + + print "Input received: " input # Echo the input + ast = parse(input) # Parse the input + + # Print the entire AST for debugging + for (i = 1; i <= length(ast); i++) { + print "AST[" i "] = " ast[i] + } + + # Evaluate the AST + if (length(ast) > 0) { + result = eval(ast) # Evaluate the AST + print "Result: " result # Print the result + } else { + print "Parsed AST is empty." + } + } +} + +# Function to parse input into an AST +function parse(input) { + # Remove outer whitespace + gsub(/^\s+|\s+$/, "", input) + + # Check if input is empty after trimming + if (input == "") { + print "Input is empty after trimming" + return "" + } + + # Debugging: Print input before processing + print "Debug: Raw input for parsing: " input + + # Remove parentheses at start and end + if (substr(input, 1, 1) == "(") { + input = substr(input, 2) + } + if (substr(input, length(input), 1) == ")") { + input = substr(input, 1, length(input) - 1) + } + + # Debugging: Print input after removing outer parentheses + print "Debug: Input after removing outer parentheses: " input + + # Split the input into tokens + gsub(/\(/, " ( ", input) + gsub(/\)/, " ) ", input) + gsub(/\s+/, " ", input) # normalize whitespace + gsub(/^\s+|\s+$/, "", input) # trim + + # Debugging: Print input after tokenization + print "Debug: Input after tokenization: " input + + n = split(input, ast, " ") + + # Debugging: Print the number of tokens + print "Debug: Number of tokens: " n + + return ast +} + +# Function to evaluate the AST +function eval(ast, i, result) { + # Debugging: Print the current AST being evaluated + print "Debug: Evaluating AST: " ast[1] " " ast[2] " " ast[3] + + # Handle numbers directly + if (ast[1] ~ /^[+-]?[0-9]+$/) { + print "Debug: Returning number: " ast[1] + return ast[1] + 0 # Convert string to number + } + + # Handle addition + if (ast[1] == "+") { + result = 0 + for (i = 2; i <= length(ast); i++) { + print "Debug: Adding operand: " ast[i] + result += eval(ast[i]) # Recursively evaluate operands + } + return result + } + + # Handle subtraction + if (ast[1] == "-") { + result = eval(ast[2]) # Start with the first operand + for (i = 3; i <= length(ast); i++) { + print "Debug: Subtracting operand: " ast[i] + result -= eval(ast[i]) # Subtract subsequent operands + } + return result + } + + # Handle multiplication + if (ast[1] == "*") { + result = 1 + for (i = 2; i <= length(ast); i++) { + print "Debug: Multiplying operand: " ast[i] + result *= eval(ast[i]) # Multiply operands + } + return result + } + + # Handle division + if (ast[1] == "/") { + result = eval(ast[2]) # Start with the first operand + for (i = 3; i <= length(ast); i++) { + print "Debug: Dividing by operand: " ast[i] + result /= eval(ast[i]) # Divide by subsequent operands + } + return result + } + + # If we reach here, the operation is not recognized + return "Error: Unknown operation " ast[1] +} + diff --git a/awk/scheme/scheme/README.md b/awk/scheme/scheme/README.md new file mode 100644 index 0000000..6fcafe6 --- /dev/null +++ b/awk/scheme/scheme/README.md @@ -0,0 +1,189 @@ +# Awk-Scheme + +## Overview +This is a minimal Scheme implementation supporting basic arithmetic operations, list manipulation, and comparisons. All values are displayed with type tags (e.g., "N:42" for numbers). + +## Data Types + +### Numbers +- Represented as: `N:value` +- Examples: `42`, `-5`, `123` +```scheme +scheme> 42 +N:42 +``` + +### Booleans +- Represented as: `B:value` (1 for true, 0 for false) +- Generated by comparison operations +```scheme +scheme> (< 1 2) +B:1 +``` + +### Nil (Empty List) +- Represented as: `NIL:` +- Used for list termination +```scheme +scheme> nil +NIL: +``` + +### Pairs +- Represented as: `P:index` +- Created using cons +- Stored in heap with car and cdr values + +## Supported Operations + +### Arithmetic Operations +1. Addition: `(+ x y ...)` + ```scheme + scheme> (+ 1 2) + N:3 + scheme> (+ 1 2 3) + N:6 + ``` + +2. Subtraction: `(- x y ...)` + ```scheme + scheme> (- 5 3) + N:2 + scheme> (- 10 2 3) ; 10 - 2 - 3 + N:5 + ``` + +3. Multiplication: `(* x y ...)` + ```scheme + scheme> (* 3 4) + N:12 + scheme> (* 2 3 4) + N:24 + ``` + +4. Division: `(/ x y)` + ```scheme + scheme> (/ 10 2) + N:5 + ``` + +### Comparison Operations +1. Less Than: `(< x y)` + ```scheme + scheme> (< 1 2) + B:1 + scheme> (< 2 1) + B:0 + ``` + +2. Equality: `(= x y)` + ```scheme + scheme> (= 42 42) + B:1 + ``` + +### List Operations +1. Cons: `(cons x y)` + - Creates a pair from two values + ```scheme + scheme> (cons 1 2) + P:1 + scheme> (cons 1 nil) + P:1 + ``` + +2. Car: `(car pair)` + - Gets the first element of a pair + ```scheme + scheme> (car (cons 1 2)) + N:1 + ``` + +3. Cdr: `(cdr pair)` + - Gets the second element of a pair + ```scheme + scheme> (cdr (cons 1 2)) + N:2 + ``` + +### Building Lists +Lists can be built using nested cons operations with nil as the terminator: +```scheme +scheme> (cons 1 (cons 2 (cons 3 nil))) +P:1 ; This represents the list (1 2 3) +``` + +### Define +- Define a new variable or function +```scheme +scheme> (define x 10) +N:10 +scheme> (define add (x y) (+ x y)) +F:1 +``` + +### Let +- Define local bindings within a let expression +```scheme +scheme> (let ((x 10) (y 20)) (+ x y)) +N:30 +``` + +## Expression Structure +- All expressions must be properly parenthesized +- Operators come first in a form (prefix notation) +- Multiple expressions can be evaluated in sequence + +## REPL Features +- Multi-line input supported (continues with "..." prompt until parentheses balance) +- Ctrl+D to exit +- Comments start with semicolon (;) + +## Error Handling +The system will report errors for: +- Stack underflow +- Type mismatches +- Unknown operations +- Division by zero +- Invalid list operations +- Malformed expressions + +## Examples +Here are some more complex examples: + +1. Nested arithmetic: +```scheme +scheme> (+ (* 3 4) (- 10 5)) +N:17 +``` + +2. List construction and manipulation: +```scheme +scheme> (cons (+ 1 2) (cons (* 3 4) nil)) +P:1 ; Represents (3 12) +``` + +3. Combined operations: +```scheme +scheme> (car (cons (* 2 3) (+ 4 5))) +N:6 +``` + +## Limitations +Current implementation does not support: +- Variables or definition +- Functions or lambda expressions +- Control structures (if, cond) +- Quote or quasiquote +- String data type +- Input/output operations +- Standard library functions + +## Future Enhancements +Possible additions could include: +1. Let expressions for local bindings +2. Function definitions +3. Conditional expressions +4. More numeric operations +5. String support +6. Additional list operations \ No newline at end of file diff --git a/awk/scheme/scheme/TODO.md b/awk/scheme/scheme/TODO.md new file mode 100644 index 0000000..52218a8 --- /dev/null +++ b/awk/scheme/scheme/TODO.md @@ -0,0 +1,19 @@ +# Limitations +Current implementation does not support: + +- Variables or definition +- Functions or lambda expressions +- Control structures (if, cond) +- Quote or quasiquote +- String data type +- Input/output operations +- Standard library functions + +# Future Enhancements +Possible additions could include: + +- Let expressions for local bindings +- Function definitions +- Conditional expressions +- More numeric operations +- String support diff --git a/awk/scheme/scheme/bin/compiler.awk b/awk/scheme/scheme/bin/compiler.awk new file mode 100755 index 0000000..e7e8081 --- /dev/null +++ b/awk/scheme/scheme/bin/compiler.awk @@ -0,0 +1,440 @@ +#!/usr/bin/awk -f + +BEGIN { + # Compiler state + curr_token = "" + input_buffer = "" + next_label = 0 + program = "" + + # Debug mode + DEBUG = 0 +} + +function debug(msg) { + if (DEBUG) printf("[DEBUG] %s\n", msg) > "/dev/stderr" +} + +# Process input line - just accumulate the raw input +{ + if (program != "") program = program "\n" + program = program $0 +} + +END { + debug("Raw program:\n" program) + if (program == "") exit + + # Split program into expressions and compile each one + split_expressions(program) +} + +# New function to handle multiple expressions +function split_expressions(prog, current, paren_count, i, c, expr, cleaned) { + current = "" + paren_count = 0 + + # Extract expressions between parentheses + cleaned = prog + gsub(/;[^(]*\(/, "(", cleaned) # Remove comments before expressions + gsub(/\)[^)]*;/, ")", cleaned) # Remove comments after expressions + gsub(/[ \t\n]+/, " ", cleaned) # Normalize whitespace + sub(/^[ \t\n]+/, "", cleaned) # Trim leading whitespace + sub(/[ \t\n]+$/, "", cleaned) # Trim trailing whitespace + + debug("Cleaned program: [" cleaned "]") + + if (cleaned == "") return + + for (i = 1; i <= length(cleaned); i++) { + c = substr(cleaned, i, 1) + + if (c == "(") { + if (paren_count == 0) current = "" + paren_count++ + } + + current = current c + + if (c == ")") { + paren_count-- + if (paren_count == 0) { + # Found complete expression + expr = current + sub(/^\s+/, "", expr) + sub(/\s+$/, "", expr) + + debug("Processing expression: [" expr "]") + program = expr # Set for parser + expr = parse_expr() + compile_expr(expr) + current = "" + } + } + } + + print "HALT" +} + +# Lexer functions +function is_digit(c) { return c >= "0" && c <= "9" } +function is_whitespace(c) { return c == " " || c == "\t" || c == "\n" } + +function next_token() { + # Initialize input_buffer if this is first call + if (input_buffer == "") input_buffer = program + + # Skip whitespace + while (length(input_buffer) > 0 && is_whitespace(substr(input_buffer, 1, 1))) + input_buffer = substr(input_buffer, 2) + + if (length(input_buffer) == 0) return "EOF" + + c = substr(input_buffer, 1, 1) + if (c == "(" || c == ")") { + input_buffer = substr(input_buffer, 2) + return c + } + + # Handle numbers + if (is_digit(c) || c == "-" && length(input_buffer) > 1 && is_digit(substr(input_buffer, 2, 1))) { + num = "" + while (length(input_buffer) > 0) { + c = substr(input_buffer, 1, 1) + if (!is_digit(c) && c != "-") break + num = num c + input_buffer = substr(input_buffer, 2) + } + return num + } + + # Handle symbols + sym = "" + while (length(input_buffer) > 0) { + c = substr(input_buffer, 1, 1) + if (is_whitespace(c) || c == "(" || c == ")") break + sym = sym c + input_buffer = substr(input_buffer, 2) + } + return sym +} + +# Parser functions +function parse_expr( token, result) { + token = next_token() + if (token == "EOF") return "" + + if (token == "(") { + result = parse_list() + debug("Parsed list: " result) + return result + } + + debug("Parsed token: " token) + return token +} + +function parse_list( result, expr) { + result = "" + + while (1) { + expr = parse_expr() + if (expr == "" || expr == ")") break + + if (result != "") result = result " " + result = result expr + } + + if (expr == "") error("Unexpected end of input in list") + return "(" result ")" +} + +# Split expression into operator and arguments +function split_expr(expr, i, len, c, op, args, paren_count) { + len = length(expr) + paren_count = 0 + + for (i = 1; i <= len; i++) { + c = substr(expr, i, 1) + if (c == " " && paren_count == 0) { + op = substr(expr, 1, i - 1) + args = substr(expr, i + 1) + break + } + if (c == "(") paren_count++ + if (c == ")") paren_count-- + } + + if (!op) { + op = expr + args = "" + } + + debug("Split expr: op=" op " args=" args) + return op SUBSEP args +} + +# Split arguments handling nested parentheses +function split_args(args, arg_array, len, i, c, current, paren_count, arg_count) { + len = length(args) + current = "" + paren_count = 0 + arg_count = 0 + + for (i = 1; i <= len; i++) { + c = substr(args, i, 1) + + if (c == "(") paren_count++ + if (c == ")") paren_count-- + + if (c == " " && paren_count == 0 && current != "") { + arg_array[++arg_count] = current + current = "" + } else if (c != " " || paren_count > 0) { + current = current c + } + } + + if (current != "") { + arg_array[++arg_count] = current + } + + return arg_count +} + +# Code generation functions +function compile_number(num) { + debug("Compiling number: " num) + print "PUSH_CONST N:" num +} + +function compile_primitive_call(op, args, arg_array, nargs, i) { + debug("Primitive call: op=" op " args=" args) + nargs = split_args(args, arg_array) + + # Compile arguments for all operations + for (i = 1; i <= nargs; i++) { + compile_expr(arg_array[i]) + } + + if (op == "+") { + for (i = 1; i < nargs; i++) + print "ADD" + } + else if (op == "-") { + if (nargs == 1) { + print "PUSH_CONST N:0" + print "SWAP" + } + for (i = 1; i < nargs; i++) + print "SUB" + } + else if (op == "*") { + for (i = 1; i < nargs; i++) + print "MUL" + } + else if (op == "cons") { + if (nargs != 2) error("cons requires 2 arguments") + print "CONS" + } + else if (op == "car") { + if (nargs != 1) error("car requires 1 argument") + print "CAR" + } + else if (op == "cdr") { + if (nargs != 1) error("cdr requires 1 argument") + print "CDR" + } + else if (op == "<") { + if (nargs != 2) error("< requires 2 arguments") + print "LT" + } + else if (op == "=") { + if (nargs != 2) error("= requires 2 arguments") + print "EQ" + } + else { + # Function call + debug("Function call: " op) + print "CALL " op + } +} + +function split_bindings(bindings, binding_array, count, current, paren_count, i, c) { + count = 0 + current = "" + paren_count = 0 + + for (i = 1; i <= length(bindings); i++) { + c = substr(bindings, i, 1) + + # Track parentheses + if (c == "(") { + paren_count++ + if (paren_count == 1) { + current = "" # Start new binding + continue + } + } + if (c == ")") { + paren_count-- + if (paren_count == 0) { + # End of binding + binding_array[++count] = current + current = "" + continue + } + } + + # Only add character if we're inside a binding + if (paren_count > 0) { + current = current c + } + } + + return count +} + +function compile_let(args, bindings, body, binding_array, nbindings, i, var, val, binding_parts) { + # Split into bindings and body + if (substr(args, 1, 1) != "(") error("Malformed let expression") + + # Find matching closing parenthesis for bindings + paren_count = 1 + i = 2 + while (paren_count > 0 && i <= length(args)) { + if (substr(args, i, 1) == "(") paren_count++ + if (substr(args, i, 1) == ")") paren_count-- + i++ + } + if (paren_count > 0) error("Unmatched parenthesis in let bindings") + + bindings = substr(args, 2, i - 3) # Remove outer parentheses + body = substr(args, i) + + # Trim whitespace from body + sub(/^[ \t\n]+/, "", body) + sub(/[ \t\n]+$/, "", body) + + debug("Let bindings: " bindings) + debug("Let body: " body) + + # Compile each binding + nbindings = split_bindings(bindings, binding_array) + for (i = 1; i <= nbindings; i++) { + debug("Processing binding: " binding_array[i]) + split(binding_array[i], binding_parts, " ") + var = binding_parts[1] + val = binding_parts[2] + + debug("Binding var: " var " val: " val) + + # Compile the value + compile_expr(val) + + # Store in environment + print "STORE " var + } + + # Compile the body + compile_expr(body) + + # Clean up bindings AFTER evaluating body + for (i = nbindings; i >= 1; i--) { + print "POP_ENV" + } +} + +function compile_define(args, name, params, body, param_array, nparams, i, paren_start, paren_end) { + # Set flag for global definition + print "PUSH_CONST B:1" + print "STORE from_define" # Must match exactly what vm_store checks for + + # Find the function name (everything up to the first space) + i = index(args, " ") + if (i == 0) error("Malformed define expression") + name = substr(args, 1, i - 1) + args = substr(args, i + 1) + + # Check if it's a function or variable definition + if (substr(args, 1, 1) == "(") { + # It's a function definition + paren_count = 1 + i = 2 + while (paren_count > 0 && i <= length(args)) { + if (substr(args, i, 1) == "(") paren_count++ + if (substr(args, i, 1) == ")") paren_count-- + i++ + } + if (paren_count > 0) error("Unmatched parenthesis in parameter list") + + params = substr(args, 2, i - 3) # Remove parentheses + body = substr(args, i + 1) + + # Create function label + print "LABEL " name + + # Process parameters + nparams = split(params, param_array, " ") + for (i = 1; i <= nparams; i++) { + print "STORE " param_array[i] + } + + # Compile function body + compile_expr(body) + + # Clean up parameters and return + for (i = nparams; i >= 1; i--) { + print "POP_ENV" + } + print "RETURN" + } else { + # Variable definition + debug("Defining variable: " name " with value: " args) + compile_expr(args) # Compile the value + print "STORE " name # Store the variable + } +} + +function compile_expr(expr, split_result, op, args) { + debug("Compiling expression: " expr) + + if (expr ~ /^-?[0-9]+$/) { + compile_number(expr) + return + } + + if (expr == "nil") { + print "PUSH_CONST NIL:" + return + } + + # Add variable lookup + if (expr ~ /^[a-zA-Z_][a-zA-Z0-9_]*$/) { + print "LOOKUP " expr + return + } + + if (substr(expr, 1, 1) == "(") { + expr = substr(expr, 2, length(expr) - 2) + split_result = split_expr(expr) + op = substr(split_result, 1, index(split_result, SUBSEP) - 1) + args = substr(split_result, index(split_result, SUBSEP) + 1) + + if (op == "define") { + compile_define(args) + } else if (op == "let") { + compile_let(args) + } else { + compile_primitive_call(op, args) + } + return + } + + error("Unknown expression type: " expr) +} + +function error(msg) { + print "Error: " msg > "/dev/stderr" + exit 1 +} \ No newline at end of file diff --git a/awk/scheme/scheme/bin/repl b/awk/scheme/scheme/bin/repl new file mode 100755 index 0000000..14a10cf --- /dev/null +++ b/awk/scheme/scheme/bin/repl @@ -0,0 +1,147 @@ +#!/bin/bash + +# Enable debug tracing +DEBUG=0 + +debug() { + if [ "$DEBUG" = "1" ]; then + echo "[DEBUG] $*" >&2 + fi +} + +# Find the directory containing this script and the components +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +COMPILER="$DIR/compiler.awk" +VM="$DIR/vm.awk" + +debug "Using compiler: $COMPILER" +debug "Using VM: $VM" + +# Verify components exist and are executable +for component in "$COMPILER" "$VM"; do + if [ ! -f "$component" ]; then + echo "Error: Cannot find $component" >&2 + echo "Please ensure all components are present" >&2 + exit 1 + fi + chmod +x "$component" +done + +# Set up temporary files and state +TMPDIR=$(mktemp -d) +debug "Created temp dir: $TMPDIR" +STATE_FILE="/tmp/scheme_vm.state" + +cleanup() { + debug "Cleaning up temp dir: $TMPDIR" + rm -rf "$TMPDIR" + if [ "$1" != "keep_state" ]; then + rm -f "$STATE_FILE" + rm -f "/tmp/scheme_vm.env" + fi +} +trap "cleanup" EXIT + +# Set up temporary files +INPUT_FILE="$TMPDIR/input.scm" +ASM_FILE="$TMPDIR/output.asm" +DEBUG_FILE="$TMPDIR/debug.out" + +# Initialize/clear state files at REPL start +if [ "$#" -eq 0 ]; then # Only for interactive mode + : > "/tmp/scheme_vm.state" + : > "/tmp/scheme_vm.env" +fi + +# Function to handle evaluation +evaluate_expression() { + local input="$1" + local result + + # Skip empty lines + if [ -z "$input" ]; then + return 0 + fi + + debug "Evaluating expression: $input" + echo "$input" > "$INPUT_FILE" + debug "Input file contents:" + cat "$INPUT_FILE" >&2 + + # Show compilation output even if it fails + debug "Running compiler..." + if awk -f "$COMPILER" "$INPUT_FILE" > "$ASM_FILE" 2> "$DEBUG_FILE"; then + debug "Compilation successful. Debug output:" + cat "$DEBUG_FILE" >&2 + debug "Generated assembly:" + cat "$ASM_FILE" >&2 + + debug "Running VM..." + # Use persistent VM state + result=$(awk -v PERSIST=1 -f "$VM" "$ASM_FILE" 2>&1) + debug "VM output: $result" + if [ -n "$result" ]; then + echo "$result" + fi + return 0 + else + echo "Compilation error" >&2 + debug "Compiler output:" + cat "$DEBUG_FILE" >&2 + return 1 + fi +} + +# Check if a file argument is provided +if [ "$#" -gt 0 ]; then + if [ ! -f "$1" ]; then + echo "Error: File not found: $1" >&2 + exit 1 + fi + debug "Reading file: $1" + file_content=$(cat "$1" | tr '\n' ' ') + debug "File content: $file_content" + evaluate_expression "$file_content" + cleanup "keep_state" # Keep state after file execution + exit 0 +fi + +# REPL state +paren_count=0 +current_input="" + +# Print welcome message +echo "Scheme REPL (Press Ctrl+D to exit)" +echo + +# Main REPL loop +while true; do + if [ $paren_count -eq 0 ]; then + printf "scheme> " + else + printf "... " + fi + + read -r line || exit 0 + + # Skip empty lines + if [ -z "$line" ]; then + continue + fi + + # Count parentheses + open_parens=$(echo "$line" | tr -cd '(' | wc -c) + close_parens=$(echo "$line" | tr -cd ')' | wc -c) + paren_count=$((paren_count + open_parens - close_parens)) + + if [ -n "$current_input" ]; then + current_input="$current_input $line" + else + current_input="$line" + fi + + if [ $paren_count -eq 0 ]; then + evaluate_expression "$current_input" + current_input="" + fi +done \ No newline at end of file diff --git a/awk/scheme/scheme/bin/vm.awk b/awk/scheme/scheme/bin/vm.awk new file mode 100755 index 0000000..cb2b992 --- /dev/null +++ b/awk/scheme/scheme/bin/vm.awk @@ -0,0 +1,564 @@ +#!/usr/bin/awk -f + +# VM State Initialization +BEGIN { + # Type tags + T_NUMBER = "N" + T_BOOLEAN = "B" + T_SYMBOL = "S" + T_PAIR = "P" + T_FUNCTION = "F" + T_NIL = "NIL" + + # VM registers + stack_ptr = 0 # Stack pointer + heap_ptr = 0 # Heap pointer + pc = 0 # Program counter + + # Debug mode + DEBUG = 0 + + # Environment for variables + env_size = 0 + + # Function table (make it persistent) + delete func_name + delete func_pc + delete func_code + func_size = 0 + + # Call stack + call_stack_ptr = 0 + + # State persistence + STATE_FILE = "/tmp/scheme_vm.state" + if (PERSIST) { + debug("Loading state from: " STATE_FILE) + if ((getline line < STATE_FILE) >= 0) { # Check if file exists and is readable + do { + if (line ~ /^FUNC /) { + sub(/^FUNC /, "", line) + name = line + sub(/ .*$/, "", name) + code = line + sub(/^[^ ]+ /, "", code) + + debug("Loaded function: " name) + debug("Code: " code) + + func_name[func_size] = name + func_code[func_size] = code + func_size++ + } + } while ((getline line < STATE_FILE) > 0) + close(STATE_FILE) + } + } + + # Function environments + delete func_env_names + delete func_env_vals + delete func_env_sizes + + # Global function storage + delete FUNCTIONS # Our own function storage array + + # Environment persistence + ENV_STATE_FILE = "/tmp/scheme_vm.env" + if (PERSIST) { + debug("Loading environment state from: " ENV_STATE_FILE) + if ((getline line < ENV_STATE_FILE) >= 0) { + do { + if (line ~ /^ENV /) { + sub(/^ENV /, "", line) + name = line + sub(/ .*$/, "", name) + val = line + sub(/^[^ ]+ /, "", val) + + debug("Loaded env var: " name " = " val) + + env_name[env_size] = name + env_val[env_size] = val + env_size++ + } + } while ((getline line < ENV_STATE_FILE) > 0) + close(ENV_STATE_FILE) + } + } + + normal_exit = 0 # Track if we exited normally via HALT +} + +# Debug output function +function debug(msg) { + if (DEBUG) printf("[DEBUG] %s\n", msg) > "/dev/stderr" +} + +# Value construction and access +function makeValue(type, val) { + return type ":" val +} + +function getType(val) { + return substr(val, 1, index(val, ":") - 1) +} + +function getValue(val) { + return substr(val, index(val, ":") + 1) +} + +# Type checking +function isNumber(val) { return getType(val) == T_NUMBER } +function isBoolean(val) { return getType(val) == T_BOOLEAN } +function isSymbol(val) { return getType(val) == T_SYMBOL } +function isPair(val) { return getType(val) == T_PAIR } +function isFunction(val) { return getType(val) == T_FUNCTION } +function isNil(val) { return getType(val) == T_NIL } + +# Stack operations +function push(val) { + stack[++stack_ptr] = val + debug("Push: " val " (SP: " stack_ptr ")") +} + +function pop() { + if (stack_ptr < 1) error("Stack underflow") + val = stack[stack_ptr--] + debug("Pop: " val " (SP: " stack_ptr ")") + return val +} + +function peek() { + if (stack_ptr < 1) error("Stack empty") + return stack[stack_ptr] +} + +# Heap operations +function allocate(val) { + heap[++heap_ptr] = val + refs[heap_ptr] = 1 + debug("Allocate: " val " at " heap_ptr) + return heap_ptr +} + +function getHeap(idx) { + if (!(idx in heap)) { + error("Invalid heap access: " idx) + return "" + } + return heap[idx] +} + +# Error handling +function error(msg) { + print "Error at PC " pc ": " msg > "/dev/stderr" + exit 1 +} + +# Arithmetic operations +function vm_add() { + if (stack_ptr < 2) error("ADD requires two operands") + val2 = pop() + val1 = pop() + if (!isNumber(val1) || !isNumber(val2)) + error("ADD requires numeric operands") + result = getValue(val1) + getValue(val2) + push(makeValue(T_NUMBER, result)) +} + +function vm_subtract() { + if (stack_ptr < 2) error("SUB requires two operands") + val2 = pop() + val1 = pop() + if (!isNumber(val1) || !isNumber(val2)) + error("SUB requires numeric operands") + result = getValue(val1) - getValue(val2) + push(makeValue(T_NUMBER, result)) +} + +function vm_multiply() { + if (stack_ptr < 2) error("MUL requires two operands") + val2 = pop() + val1 = pop() + if (!isNumber(val1) || !isNumber(val2)) + error("MUL requires numeric operands") + result = getValue(val1) * getValue(val2) + push(makeValue(T_NUMBER, result)) +} + +function vm_divide() { + if (stack_ptr < 2) error("DIV requires two operands") + val2 = pop() + val1 = pop() + if (!isNumber(val1) || !isNumber(val2)) + error("DIV requires numeric operands") + if (getValue(val2) == 0) + error("Division by zero") + result = getValue(val1) / getValue(val2) + push(makeValue(T_NUMBER, result)) +} + +# List operations +function vm_cons() { + if (stack_ptr < 2) error("CONS requires two operands") + val2 = pop() + val1 = pop() + pair_val = val1 "," val2 + pair_idx = allocate(pair_val) + push(makeValue(T_PAIR, pair_idx)) +} + +function vm_car() { + if (stack_ptr < 1) error("CAR requires one operand") + val = pop() + if (!isPair(val)) error("CAR requires pair operand") + pair_idx = getValue(val) + pair = getHeap(pair_idx) + car_val = substr(pair, 1, index(pair, ",") - 1) + push(car_val) +} + +function vm_cdr() { + if (stack_ptr < 1) error("CDR requires one operand") + val = pop() + if (!isPair(val)) error("CDR requires pair operand") + pair_idx = getValue(val) + pair = getHeap(pair_idx) + cdr_val = substr(pair, index(pair, ",") + 1) + push(cdr_val) +} + +# Comparison operations +function vm_equal() { + if (stack_ptr < 2) error("EQ requires two operands") + val2 = pop() + val1 = pop() + result = (val1 == val2) ? "1" : "0" + push(makeValue(T_BOOLEAN, result)) +} + +function vm_less_than() { + if (stack_ptr < 2) error("LT requires two operands") + val2 = pop() + val1 = pop() + if (!isNumber(val1) || !isNumber(val2)) + error("LT requires numeric operands") + result = (getValue(val1) < getValue(val2)) ? "1" : "0" + push(makeValue(T_BOOLEAN, result)) +} + +# Instruction execution +function execute(instr) { + split(instr, parts, " ") + op = parts[1] + debug("Execute: " instr) + + if (op == "PUSH_CONST") { + push(parts[2]) + } + else if (op == "POP") { + pop() + } + else if (op == "DUP") { + val = peek() + push(val) + } + else if (op == "SWAP") { + if (stack_ptr < 2) error("SWAP requires two operands") + val2 = pop() + val1 = pop() + push(val2) + push(val1) + } + else if (op == "ADD") { + vm_add() + } + else if (op == "SUB") { + vm_subtract() + } + else if (op == "MUL") { + vm_multiply() + } + else if (op == "DIV") { + vm_divide() + } + else if (op == "CONS") { + vm_cons() + } + else if (op == "CAR") { + vm_car() + } + else if (op == "CDR") { + vm_cdr() + } + else if (op == "EQ") { + vm_equal() + } + else if (op == "LT") { + vm_less_than() + } + else if (op == "PRINT") { + if (stack_ptr < 1) error("PRINT requires one operand") + print peek() + } + else if (op == "HALT") { + normal_exit = 1 # Mark that we're exiting normally + if (stack_ptr > 0) { + result = peek() + } + if (PERSIST) { + save_state() + } + if (result) { + print result + } + exit(0) + } + else if (op == "STORE") { + vm_store(parts[2]) + } + else if (op == "POP_ENV") { + vm_pop_env() + } + else if (op == "LOOKUP") { + vm_lookup(parts[2]) + } + else if (op == "LABEL") { + vm_define_function(parts[2], pc) + } + else if (op == "CALL") { + vm_call_function(parts[2]) + } + else if (op == "RETURN") { + vm_return() + } + else { + error("Unknown instruction: " op) + } +} + +# Program loading +{ + # Store each line as an instruction + program[NR-1] = $0 +} + +# Program execution +END { + # Execute program + while (pc < length(program)) { + execute(program[pc++]) + } + + # Only save state if we didn't halt normally + if (!normal_exit && PERSIST) { + save_state() + } +} + +# Modify vm_store to handle global variables more consistently +function vm_store(name) { + debug("Storing " peek() " as " name " at env_size: " env_size) + + # If this is from a define, mark it as global + if (lookup_no_error("from_define")) { # Check if from_define is set + name = "__global_" name + # Clear the flag + for (i = env_size - 1; i >= 0; i--) { + if (env_name[i] == "from_define") { + env_size-- # Remove the flag + break + } + } + + # Remove any previous definition of this global + for (i = env_size - 1; i >= 0; i--) { + if (env_name[i] == name) { + # Shift everything down to remove the old definition + for (j = i; j < env_size - 1; j++) { + env_name[j] = env_name[j + 1] + env_val[j] = env_val[j + 1] + } + env_size-- + break + } + } + } + + # Store in current environment frame + env_name[env_size] = name + env_val[env_size] = peek() + env_size++ + debug("Environment after store:") + dump_env() +} + +function vm_pop_env() { + if (env_size <= 0) error("Environment underflow") + debug("Popping environment at size: " env_size) + + # Don't pop if this is a global definition (from define) + if (env_name[env_size-1] ~ /^__global_/) { + debug("Keeping global definition: " env_name[env_size-1]) + return + } + + debug("Removing: " env_name[env_size-1] " = " env_val[env_size-1]) + env_size-- +} + +# Modify vm_lookup to be more explicit about global lookups +function vm_lookup(name, i, global_name) { + debug("Looking up " name " in environment of size: " env_size) + dump_env() + + # First try looking up with global prefix + global_name = "__global_" name + for (i = env_size - 1; i >= 0; i--) { + if (env_name[i] == global_name) { + debug("Found global " name " = " env_val[i] " at position " i) + push(env_val[i]) + return + } + if (env_name[i] == name) { + debug("Found local " name " = " env_val[i] " at position " i) + push(env_val[i]) + return + } + } + error("Undefined variable: " name) +} + +function vm_define_function(name, start_pc) { + debug("Defining function: " name " at " start_pc) + + # Build function code + code = "" + i = start_pc + while (i < length(program) && program[i] != "RETURN") { + if (code != "") code = code "\n" + code = code program[i] + i++ + } + code = code "\nRETURN" + + # Store in our function array + debug("Storing function: " name " = " code) + FUNCTIONS[name] = code + + pc = i + 1 +} + +function vm_call_function(name, code_lines, j, saved_pc, saved_env_size, arg, param_name) { + debug("Calling function: " name) + + if (!(name in FUNCTIONS)) { + error("Undefined function: " name) + } + + # Get argument from stack before modifying program + arg = pop() + debug("Function argument: " arg) + + saved_pc = pc + saved_env_size = env_size + + # Split function code into lines + split(FUNCTIONS[name], code_lines, "\n") + + # Extract parameter name from first STORE instruction + if (code_lines[1] ~ /^STORE /) { + param_name = substr(code_lines[1], 7) # Skip "STORE " + debug("Found parameter name: " param_name) + } else { + error("Function missing parameter definition") + } + + # Create new environment frame with correct parameter name + debug("Creating new environment frame at size: " env_size) + env_name[env_size] = param_name + env_val[env_size] = arg + env_size++ + + # Add function code to program + for (j in code_lines) { + program[length(program)] = code_lines[j] + } + + # Jump to function code + pc = length(program) - length(code_lines) + call_stack[++call_stack_ptr] = saved_pc + env_stack[call_stack_ptr] = saved_env_size + + debug("Function found, jumping to PC: " pc " with env_size: " saved_env_size) + dump_env() +} + +function vm_return() { + if (call_stack_ptr > 0) { + # Save return value + ret_val = pop() + + # Restore environment + while (env_size > env_stack[call_stack_ptr]) { + debug("Popping environment at size: " env_size) + vm_pop_env() + } + + # Restore program counter + pc = call_stack[call_stack_ptr--] + + # Push return value back + push(ret_val) + + debug("Returned with value: " ret_val " and env_size: " env_size) + } +} + +# Add debug function to dump environment in a more readable format +function dump_env( i) { + debug("Environment dump:") + for (i = 0; i < env_size; i++) { + debug(sprintf(" %d: %s = %s", i, env_name[i], env_val[i])) + } +} + +# Add flag for define statements +function compile_define(args, name, params, body, param_array, nparams, i, paren_start, paren_end) { + # Set flag to mark this as a global definition + print "PUSH_CONST B:1" # Push true + print "STORE __from_define" + + # ... rest of existing compile_define function ... +} + +# Add helper function for looking up without error +function lookup_no_error(name, i) { + for (i = env_size - 1; i >= 0; i--) { + if (env_name[i] == name) { + return 1 + } + } + return 0 +} + +# Add new function to handle state saving +function save_state() { + debug("Saving state to: " STATE_FILE) + for (i = 0; i < func_size; i++) { + debug("Saving function: " func_name[i]) + print "FUNC " func_name[i] " " func_code[i] > STATE_FILE + } + close(STATE_FILE) + + # Save environment state + debug("Saving environment state to: " ENV_STATE_FILE) + for (i = 0; i < env_size; i++) { + if (env_name[i] ~ /^__global_/) { # Only save global variables + debug("Saving env var: " env_name[i] " = " env_val[i]) + print "ENV " env_name[i] " " env_val[i] > ENV_STATE_FILE + } + } + close(ENV_STATE_FILE) +} \ No newline at end of file diff --git a/awk/scheme/scheme/diagram.md b/awk/scheme/scheme/diagram.md new file mode 100644 index 0000000..2c48d7c --- /dev/null +++ b/awk/scheme/scheme/diagram.md @@ -0,0 +1,43 @@ +# Awk-Scheme Diagram + +How this is all orchestrated. + +``` ++----------------+ Scheme Code +----------------+ Assembly +----------------+ +| | -----------------> | | -------------> | | +| REPL | "(+ 1 2)" | Compiler | "PUSH_CONST | VM | +| (bin/repl) | | compiler.awk | N:1 | vm.awk | +| | | | PUSH_CONST | | +| User Input | | Translates | N:2 | Stack-based | +| Interface | | Scheme to | ADD | Interpreter | +| | | VM Assembly | HALT" | | +| | <------------------ | | <------------- | | +| | Output: "N:3" | | Result | | ++----------------+ +----------------+ +----------------+ + ^ | + | | + | Data Flow | + | v ++------+-------------------------------------------------------------------------+ +| | +| 1. REPL (bin/repl) reads Scheme expression from user | +| 2. Expression sent to compiler (bin/compiler.awk) | +| 3. Compiler generates VM assembly instructions | +| 4. VM (bin/vm.awk) executes assembly and maintains: | +| - Stack: For operation values | +| - Heap: For storing pairs/lists | +| - Type tags: N:(number), B:(boolean), P:(pair), NIL: | +| 5. Result returned through REPL to user | +| | ++--------------------------------------------------------------------------------+ + +Example Flow: +┌─────────────┐ ┌─────────────┐ ┌─────────────┐ +│ Input: │ │ Compiler │ │ VM │ +│ (+ 1 2) │ → │ PUSH_CONST │ → │ Stack: │ +│ │ │ N:1 │ │ [N:1] │ +│ │ │ PUSH_CONST │ │ [N:1,N:2] │ +│ │ │ N:2 │ │ [N:3] │ +│ │ │ ADD │ │ │ +└─────────────┘ └─────────────┘ └─────────────┘ +``` \ No newline at end of file diff --git a/awk/scheme/scheme/docs/awk-scheme-prompt.md b/awk/scheme/scheme/docs/awk-scheme-prompt.md new file mode 100644 index 0000000..f7e0414 --- /dev/null +++ b/awk/scheme/scheme/docs/awk-scheme-prompt.md @@ -0,0 +1,189 @@ +# Implementation Request for AWK-based Scheme Virtual Machine + +I need help implementing a stack-based virtual machine for a minimal Scheme implementation in AWK. The implementation should work with standard AWK (gawk features optional). + +## Core Data Structures + +### Value Representation +Values in AWK will be represented as strings with type tags. Each value should be a string with format "TYPE:VALUE" where: +- Numbers: "N:123.45" +- Booleans: "B:1" or "B:0" +- Symbols: "S:symbol-name" +- Pairs: "P:car_idx,cdr_idx" (indices into heap array) +- Functions: "F:addr,env_idx" (instruction address and environment index) +- Nil: "NIL:" + +### Memory Model +Using AWK's associative arrays: +```awk +# Stack - numeric indexed array +stack[stack_ptr] + +# Heap - numeric indexed array for allocated objects +heap[heap_ptr] + +# Environments - numeric indexed array of frames +env[env_ptr] + +# Each environment frame is stored as concatenated strings: +# "name1:val1,name2:val2,..." +``` + +### Instruction Format +Instructions stored in array with format: +```awk +instr[addr] = "OPCODE OPERAND1 OPERAND2..." +``` + +## Core Components Needed + +1. Value Handling +```awk +# Type checking +function isNumber(val) { return substr(val, 1, 2) == "N:" } +function isSymbol(val) { return substr(val, 1, 2) == "S:" } +# etc. + +# Value extraction +function getValue(val) { return substr(val, 3) } +``` + +2. Memory Management +- Simple reference counting using an additional array +- Object allocation through heap_ptr increment +- Basic sweep of unreferenced heap cells + +3. Stack Operations +```awk +# Push and pop +function push(val) { stack[++stack_ptr] = val } +function pop() { return stack[stack_ptr--] } +``` + +4. Environment Management +- Environment represented as chain of frames in env[] array +- Each frame is a string of name:value pairs +- Lookup walks the chain for variable resolution + +## Implementation Steps + +1. Parser/Tokenizer: + - Read instruction format from input file + - Parse into instruction array + - Handle basic syntax for immediate values + +2. Value System: + - Type tag handling + - Value construction + - Type checking + - Value extraction + +3. Core VM: + - Instruction dispatch + - Stack manipulation + - Basic arithmetic + - Flow control + +4. Memory Management: + - Heap allocation + - Reference counting + - Basic garbage collection + +5. Function Handling: + - Call frames + - Return handling + - Tail call optimization + +## Initial Implementation Structure + +```awk +BEGIN { + # Initialize VM state + stack_ptr = 0 + heap_ptr = 0 + env_ptr = 0 + pc = 0 # Program counter +} + +# Main instruction dispatch +function execute(instr) { + split(instr, parts, " ") + op = parts[1] + + if (op == "PUSH_CONST") + push(parts[2]) + else if (op == "ADD") { + b = pop() + a = pop() + push(add(a, b)) + } + # etc. +} + +# Main execution loop +{ + # Load instruction into array + instr[$1] = $0 +} + +END { + # Execute loaded program + while (pc < length(instr)) { + execute(instr[pc++]) + } +} +``` + +## Testing Strategy + +1. Input File Format: +``` +0 PUSH_CONST N:5 +1 PUSH_CONST N:3 +2 ADD +3 HALT +``` + +2. Test Cases: +- Basic arithmetic +- Stack operations +- Function calls +- Error handling +- Memory management + +## AWK-Specific Considerations + +1. String Operations: +- Use split() for parsing +- substr() for type tags +- string concatenation for compound values + +2. Limitations: +- No native types besides numbers and strings +- No recursive function calls in AWK +- Limited stack size +- Memory management needs care + +3. Advantages: +- Associative arrays simplify environment handling +- Built-in string operations help with parsing +- Line-oriented processing suits instruction loading + +## Style Guidelines + +1. Use clear function names: + - makeNumber(n) + - makeSymbol(s) + - etc. + +2. Consistent error handling: + - Set ERRNO + - Print to STDERR + - Exit codes + +3. Document array usage: + - Purpose of each array + - Format of entries + - Lifetime management + +Please start with implementing the value type system and basic stack operations, showing how to represent and manipulate Scheme values in AWK's string-based environment. diff --git a/awk/scheme/scheme/docs/awk-vm-implementation-prompt.md b/awk/scheme/scheme/docs/awk-vm-implementation-prompt.md new file mode 100644 index 0000000..6596589 --- /dev/null +++ b/awk/scheme/scheme/docs/awk-vm-implementation-prompt.md @@ -0,0 +1,226 @@ +# AWK-based Scheme VM Implementation Guide + +I need help implementing a stack-based virtual machine in AWK that will support a minimal Scheme implementation. Please focus on AWK-specific approaches and limitations. + +## VM Architecture Overview + +The VM should be stack-based with these key components: +1. An instruction array storing the program +2. A value stack using numeric-indexed arrays +3. A heap for pairs and closures using associative arrays +4. An environment chain for variable lookup + +## Core Data Types + +Each value should be represented as a string with format "TYPE:VALUE": + +```awk +# Examples +"N:42" # Number +"B:1" # Boolean true +"B:0" # Boolean false +"S:x" # Symbol 'x' +"P:1,2" # Pair (cons cell) with heap indices +"F:100,1" # Function (instruction addr, env index) +"NIL:" # Empty list +``` + +## Instruction Set + +### Stack Operations +``` +PUSH_CONST val # Push constant onto stack + # Example: "PUSH_CONST N:42" + +POP # Remove top value from stack + +DUP # Duplicate top stack value + # Before: [a] + # After: [a a] + +SWAP # Swap top two stack values + # Before: [a b] + # After: [b a] +``` + +### Memory Operations +``` +LOAD_LOCAL idx # Load local variable at index + # Example: "LOAD_LOCAL 0" + +STORE_LOCAL idx # Store into local variable + # Example: "STORE_LOCAL 1" + +MAKE_ENV n # Create new environment frame with n slots + # Example: "MAKE_ENV 2" + +LOAD_FREE idx # Load from closure's environment + # Example: "LOAD_FREE 0" + +STORE_FREE idx # Store into closure's environment + # Example: "STORE_FREE 1" +``` + +### Function Operations +``` +CALL n # Call function with n arguments + # Example: "CALL 2" + +TAIL_CALL n # Tail-call with n arguments + # Example: "TAIL_CALL 1" + +RETURN # Return from function + +MAKE_FUNCTION addr # Create function object + # Example: "MAKE_FUNCTION 100" +``` + +### List Operations +``` +CONS # Create pair from two stack values + # Before: [a b] + # After: [(a . b)] + +CAR # Get first element of pair + # Before: [(a . b)] + # After: [a] + +CDR # Get second element of pair + # Before: [(a . b)] + # After: [b] +``` + +### Arithmetic Operations +``` +ADD # Add top two numbers + # Before: [N:3 N:4] + # After: [N:7] + +SUB # Subtract +MUL # Multiply +DIV # Divide +``` + +### Comparison Operations +``` +EQ # Generic equality test +NUM_LT # Numeric less than +NUM_GT # Numeric greater than +``` + +### Control Flow +``` +JUMP offset # Unconditional jump + # Example: "JUMP 100" + +JUMP_IF_FALSE offset # Jump if top of stack is false + # Example: "JUMP_IF_FALSE 200" +``` + +## Implementation Requirements + +1. Instruction Format: +```awk +# Each instruction stored as string in instr[] array +instr[addr] = "OPCODE [operand1] [operand2]..." +``` + +2. Value Handling: +```awk +# Type checking functions +function isNumber(val) { return substr(val, 1, 2) == "N:" } +function isPair(val) { return substr(val, 1, 2) == "P:" } +# etc. + +# Value constructors +function makeNumber(n) { return "N:" n } +function makePair(car_idx, cdr_idx) { return "P:" car_idx "," cdr_idx } +# etc. +``` + +3. Stack Operations: +```awk +# Basic stack manipulation +function push(val) { stack[++stack_ptr] = val } +function pop() { if (stack_ptr < 1) error("stack underflow"); return stack[stack_ptr--] } +``` + +4. Memory Management: +```awk +# Heap allocation +function allocate(val) { + heap[++heap_ptr] = val + refs[heap_ptr] = 1 + return heap_ptr +} + +# Reference counting +function incref(idx) { if (idx > 0) refs[idx]++ } +function decref(idx) { if (idx > 0 && --refs[idx] == 0) free_cell(idx) } +``` + +## Starting Implementation + +Please help implement this VM following these steps: + +1. Core VM loop: +```awk +BEGIN { + # Initialize VM state + stack_ptr = 0 + heap_ptr = 0 + pc = 0 +} + +# Load program +{ + instr[NR-1] = $0 +} + +END { + # Main execution loop + while (pc < length(instr)) { + execute(instr[pc++]) + } +} +``` + +2. Instruction execution: +```awk +function execute(instr) { + split(instr, parts, " ") + op = parts[1] + + if (op == "PUSH_CONST") + push(parts[2]) + else if (op == "ADD") { + b = pop() + a = pop() + push(add(a, b)) + } + # etc. +} +``` + +Please start by implementing: +1. The basic VM loop +2. Stack operations +3. Arithmetic operations +4. Simple control flow + +Then we can move on to: +1. Function calls and returns +2. Environment handling +3. Cons cells and list operations +4. Garbage collection + +Focus on AWK's strengths: +- Associative arrays for memory management +- String operations for value handling +- Line-oriented processing for instruction loading + +And handle AWK's limitations: +- No native complex types +- Limited recursion +- String-based value representation +- Memory management constraints diff --git a/awk/scheme/scheme/docs/scheme-compilation-examples.md b/awk/scheme/scheme/docs/scheme-compilation-examples.md new file mode 100644 index 0000000..43bae3a --- /dev/null +++ b/awk/scheme/scheme/docs/scheme-compilation-examples.md @@ -0,0 +1,222 @@ +# Scheme to VM Instruction Examples + +## Basic Arithmetic + +### Simple Addition +```scheme +(+ 1 2) +``` +```awk +# VM Instructions +0 PUSH_CONST N:1 +1 PUSH_CONST N:2 +2 ADD +``` + +### Nested Arithmetic +```scheme +(+ (* 3 4) (- 10 5)) +``` +```awk +# VM Instructions +0 PUSH_CONST N:3 +1 PUSH_CONST N:4 +2 MUL # Stack now has 12 +3 PUSH_CONST N:10 +4 PUSH_CONST N:5 +5 SUB # Stack now has 5 +6 ADD # Final result 17 +``` + +## Variable Binding and Use + +### Let Expression +```scheme +(let ((x 5)) + (+ x 1)) +``` +```awk +# VM Instructions +0 MAKE_ENV 1 # Create environment with 1 slot +1 PUSH_CONST N:5 # Push initial value +2 STORE_LOCAL 0 # Store into x's slot +3 LOAD_LOCAL 0 # Load x +4 PUSH_CONST N:1 +5 ADD +``` + +### Nested Let +```scheme +(let ((x 5)) + (let ((y (+ x 1))) + (* x y))) +``` +```awk +# VM Instructions +0 MAKE_ENV 1 # Environment for x +1 PUSH_CONST N:5 +2 STORE_LOCAL 0 # Store x +3 MAKE_ENV 1 # Environment for y +4 LOAD_LOCAL 0 # Load x +5 PUSH_CONST N:1 +6 ADD +7 STORE_LOCAL 0 # Store y +8 LOAD_LOCAL 1 # Load x (from outer env) +9 LOAD_LOCAL 0 # Load y +10 MUL +``` + +## Function Definition and Call + +### Simple Function +```scheme +(define (add1 x) + (+ x 1)) +``` +```awk +# VM Instructions +0 MAKE_FUNCTION 2 # Create function pointing to instruction 2 +1 STORE_GLOBAL 0 # Store in global env slot for add1 +2 MAKE_ENV 1 # Function body starts here +3 LOAD_LOCAL 0 # Load parameter x +4 PUSH_CONST N:1 +5 ADD +6 RETURN +``` + +### Function Call +```scheme +(add1 5) +``` +```awk +# VM Instructions +0 PUSH_CONST N:5 # Push argument +1 LOAD_GLOBAL 0 # Load add1 function +2 CALL 1 # Call with 1 argument +``` + +## List Operations + +### List Construction +```scheme +(cons 1 (cons 2 '())) +``` +```awk +# VM Instructions +0 PUSH_CONST N:1 +1 PUSH_CONST N:2 +2 PUSH_CONST NIL: +3 CONS # Creates (2 . nil) +4 CONS # Creates (1 . (2 . nil)) +``` + +### List Operations +```scheme +(car (cons 1 2)) +``` +```awk +# VM Instructions +0 PUSH_CONST N:1 +1 PUSH_CONST N:2 +2 CONS +3 CAR +``` + +## Conditionals + +### If Expression +```scheme +(if (< x 0) + (- 0 x) + x) +``` +```awk +# VM Instructions +0 LOAD_LOCAL 0 # Load x +1 PUSH_CONST N:0 +2 NUM_LT # Compare x < 0 +3 JUMP_IF_FALSE 7 # Skip to else branch if false +4 PUSH_CONST N:0 +5 LOAD_LOCAL 0 # Load x again +6 SUB # Compute 0 - x +7 JUMP 9 # Skip else branch +8 LOAD_LOCAL 0 # Else branch: just load x +9 NOP # Continue here +``` + +## Closures + +### Create Closure +```scheme +(let ((x 1)) + (lambda (y) (+ x y))) +``` +```awk +# VM Instructions +0 MAKE_ENV 1 # Environment for x +1 PUSH_CONST N:1 +2 STORE_LOCAL 0 # Store x +3 MAKE_FUNCTION 5 # Create function +4 MAKE_CLOSURE 1 # Capture current env +5 MAKE_ENV 1 # Function body starts here +6 LOAD_FREE 0 # Load captured x +7 LOAD_LOCAL 0 # Load parameter y +8 ADD +9 RETURN +``` + +## Recursive Function + +### Factorial +```scheme +(define (factorial n) + (if (= n 0) + 1 + (* n (factorial (- n 1))))) +``` +```awk +# VM Instructions +0 MAKE_FUNCTION 2 # Create factorial function +1 STORE_GLOBAL 0 # Store in global env +2 MAKE_ENV 1 # Function body starts +3 LOAD_LOCAL 0 # Load n +4 PUSH_CONST N:0 +5 NUM_EQ # Compare n = 0 +6 JUMP_IF_FALSE 9 # Skip to else branch +7 PUSH_CONST N:1 # Return 1 for base case +8 RETURN +9 LOAD_LOCAL 0 # Load n +10 LOAD_LOCAL 0 # Load n again +11 PUSH_CONST N:1 +12 SUB # Compute n - 1 +13 LOAD_GLOBAL 0 # Load factorial function +14 TAIL_CALL 1 # Tail call factorial(n-1) +15 MUL # Multiply n * factorial(n-1) +16 RETURN +``` + +## Implementation Notes + +1. Environment Chain +- Each MAKE_ENV creates a new frame +- LOAD_LOCAL counts down the chain for outer references +- STORE_LOCAL works similarly + +2. Closure Creation +- MAKE_CLOSURE captures current environment +- LOAD_FREE accesses captured variables + +3. Tail Calls +- TAIL_CALL reuses current stack frame +- Essential for recursive functions + +4. Memory Management +- CONS allocates in heap +- Reference counting needed for heap objects +- Environment frames need reference counting too + +These examples show typical compilation patterns. The actual compiler would need to: +1. Track variable locations +2. Manage label generation +3. Detect tail call positions +4. Handle lexical scoping properly diff --git a/awk/scheme/scheme/docs/scheme-compiler-impl.md b/awk/scheme/scheme/docs/scheme-compiler-impl.md new file mode 100644 index 0000000..db8a204 --- /dev/null +++ b/awk/scheme/scheme/docs/scheme-compiler-impl.md @@ -0,0 +1,307 @@ +# Scheme Compiler Implementation in AWK + +## Basic Compiler Structure + +We'll implement the compiler as a recursive descent processor that converts Scheme expressions into VM instructions. Here's the core structure: + +```awk +# Global state +BEGIN { + next_label = 0 # For generating unique labels + next_address = 0 # Current instruction address + depth = 0 # Current environment depth +} + +# Main compile function +function compile(expr, parts, len) { + if (isAtom(expr)) + return compileAtom(expr) + + split(expr, parts, " ") + len = length(parts) + + # Strip parentheses + parts[1] = substr(parts[1], 2) # Remove leading ( + parts[len] = substr(parts[len], 1, length(parts[len])-1) # Remove trailing ) + + # Dispatch based on first element + if (parts[1] == "+") + return compileAdd(parts, len) + else if (parts[1] == "let") + return compileLet(parts, len) + else if (parts[1] == "lambda") + return compileLambda(parts, len) + else if (parts[1] == "if") + return compileIf(parts, len) + else + return compileCall(parts, len) +} + +# Utility functions +function isAtom(expr) { + return substr(expr, 1, 1) != "(" +} + +function newLabel() { + return "L" next_label++ +} + +function emit(instr) { + program[next_address++] = instr +} +``` + +## Compilation Patterns + +### 1. Basic Arithmetic + +```awk +function compileAdd(parts, len, i) { + # Compile each argument + for (i = 2; i <= len; i++) + compile(parts[i]) + + # Emit adds between each value + for (i = 3; i <= len; i++) + emit("ADD") + + return next_address - 1 +} + +# Example usage: +# Input: (+ 1 2 3) +# Output: +# PUSH_CONST N:1 +# PUSH_CONST N:2 +# ADD +# PUSH_CONST N:3 +# ADD +``` + +### 2. Let Expressions + +```awk +function compileLet(parts, len, bindings, body, i, num_bindings) { + # Parse let structure + bindings = parts[2] + body = parts[3] + + # Count bindings + split(bindings, binding_parts, " ") + num_bindings = (length(binding_parts) - 2) / 2 # Account for parentheses + + # Create new environment frame + emit("MAKE_ENV " num_bindings) + + # Compile each binding value and store + for (i = 1; i <= num_bindings; i++) { + compile(binding_parts[i * 2]) + emit("STORE_LOCAL " (i - 1)) + } + + # Compile body in new environment + depth++ + compile(body) + depth-- + + return next_address - 1 +} + +# Example usage: +# Input: (let ((x 5)) (+ x 1)) +# Output: +# MAKE_ENV 1 +# PUSH_CONST N:5 +# STORE_LOCAL 0 +# LOAD_LOCAL 0 +# PUSH_CONST N:1 +# ADD +``` + +### 3. Lambda Expressions + +```awk +function compileLambda(parts, len, param_list, body, label_start, label_end) { + label_start = newLabel() + label_end = newLabel() + + # Emit jump around function body + emit("JUMP " label_end) + + # Mark function start + emit(label_start ":") + + # Parse parameters + param_list = parts[2] + body = parts[3] + + # Compile function body + depth++ + compile(body) + emit("RETURN") + depth-- + + # Mark function end + emit(label_end ":") + + # Create function object + emit("MAKE_FUNCTION " label_start) + + return next_address - 1 +} + +# Example usage: +# Input: (lambda (x) (+ x 1)) +# Output: +# JUMP L1 +# L0: +# LOAD_LOCAL 0 +# PUSH_CONST N:1 +# ADD +# RETURN +# L1: +# MAKE_FUNCTION L0 +``` + +### 4. If Expressions + +```awk +function compileIf(parts, len, condition, true_branch, false_branch, label_else, label_end) { + label_else = newLabel() + label_end = newLabel() + + # Compile condition + compile(parts[2]) + + # Jump to else if false + emit("JUMP_IF_FALSE " label_else) + + # Compile true branch + compile(parts[3]) + emit("JUMP " label_end) + + # Compile false branch + emit(label_else ":") + if (len > 4) # If there is an else branch + compile(parts[4]) + + emit(label_end ":") + + return next_address - 1 +} + +# Example usage: +# Input: (if (< x 0) (- 0 x) x) +# Output: +# LOAD_LOCAL 0 +# PUSH_CONST N:0 +# NUM_LT +# JUMP_IF_FALSE L0 +# PUSH_CONST N:0 +# LOAD_LOCAL 0 +# SUB +# JUMP L1 +# L0: +# LOAD_LOCAL 0 +# L1: +``` + +### 5. Function Calls + +```awk +function compileCall(parts, len, i, num_args) { + num_args = len - 1 + + # Compile each argument + for (i = 2; i <= len; i++) + compile(parts[i]) + + # Compile function reference + compile(parts[1]) + + # Emit call instruction + emit("CALL " num_args) + + return next_address - 1 +} + +# Example usage: +# Input: (add1 5) +# Output: +# PUSH_CONST N:5 +# LOAD_GLOBAL 0 +# CALL 1 +``` + +## Symbol Table Management + +```awk +# Track variables and their locations +function enterScope() { + scope_depth++ + next_local = 0 +} + +function leaveScope() { + scope_depth-- +} + +function addLocal(name) { + symbols[scope_depth "," name] = next_local++ +} + +function findVariable(name, i) { + for (i = scope_depth; i >= 0; i--) + if ((i "," name) in symbols) + return "LOCAL " i "," symbols[i "," name] + return "GLOBAL " name +} +``` + +## Example Usage + +Here's how to use the compiler: + +```awk +BEGIN { + # Initialize compiler + next_label = 0 + next_address = 0 + depth = 0 + + # Compile some example code + expr = "(let ((x 5)) (+ x 1))" + compile(expr) + + # Print generated code + for (i = 0; i < next_address; i++) + print i ": " program[i] +} +``` + +## Implementation Notes + +1. Variable Resolution + - Track scope depth for proper variable lookup + - Generate appropriate LOAD/STORE instructions + - Handle closure captures + +2. Label Generation + - Use unique labels for control flow + - Track label addresses for jumps + +3. Environment Management + - Track environment depth + - Generate correct frame sizes + - Handle nested scopes + +4. Error Handling + - Check for syntax errors + - Validate number of arguments + - Report meaningful errors + +5. Optimization Opportunities + - Tail call detection + - Constant folding + - Peephole optimization + - Dead code elimination diff --git a/awk/scheme/scheme/docs/scheme-vm-prompt.md b/awk/scheme/scheme/docs/scheme-vm-prompt.md new file mode 100644 index 0000000..7463494 --- /dev/null +++ b/awk/scheme/scheme/docs/scheme-vm-prompt.md @@ -0,0 +1,112 @@ +# Implementation Request for Scheme Virtual Machine + +I need help implementing a stack-based virtual machine for a minimal Scheme implementation in JavaScript. The implementation should follow these requirements: + +## Core Data Structures + +### Value Representation +Please help me implement a tagged union type for Scheme values with these cases: +- Numbers (using JavaScript numbers) +- Booleans (using JavaScript booleans) +- Symbols (as strings) +- Pairs (cons cells) +- Functions (closures) +- Nil + +Each value should include a type tag and the actual value data. The implementation should be memory efficient while still being easy to debug. + +### Stack Frame +Design a stack frame structure that includes: +- An array for local variables +- An operand stack (array) +- Return address (number) +- Previous frame pointer +- Captured environment reference (for closures) + +### Instruction Format +Each instruction should be represented as an object with: +- opcode (string or number) +- operands (if any) +- source position (for debugging) + +## Core Components Needed + +1. Value Constructor/Accessors +- Functions to create each type of value +- Type predicates (isNumber, isPair, etc.) +- Safe accessors that check types + +2. Memory Management +- A simple mark-and-sweep garbage collector +- Root set scanning of VM stack and global environment +- Object allocation interface + +3. Environment Management +- Environment chain implementation +- Closure creation and variable capture +- Variable lookup and storage + +4. Instruction Interpreter +- Main instruction dispatch loop +- Stack manipulation helpers +- Error handling with meaningful messages +- Debugging support (stack traces) + +## Initial Implementation Steps + +Please help me implement these components in this order: + +1. Value type system and basic operations +2. Stack frame and basic stack operations +3. Main instruction interpreter loop +4. Simple arithmetic and comparison operations +5. Function calls and returns +6. Closure creation and environment handling +7. Cons cells and list operations +8. Basic garbage collection + +## Starting Point + +Please start by showing me how to implement: + +1. The tagged value type system +2. Basic stack operations (push, pop) +3. A simple instruction interpreter that can handle: + - PUSH_CONST + - POP + - ADD + - RETURN + +The code should be in a functional style, avoiding mutation where practical, and should use modern JavaScript features. Please include basic error checking and type safety. + +## Testing Approach + +For each component, I'd like to see: +1. Basic unit tests +2. Example instruction sequences +3. Error cases to handle +4. Edge cases to consider + +The implementation should prioritize correctness and clarity over performance initially. + +## Additional Considerations + +Please also consider: +1. How to handle tail calls efficiently +2. Debug information tracking +3. Error recovery strategies +4. Performance bottlenecks to watch for +5. Future extensibility + +## Style Preferences + +The implementation should: +- Use functional programming patterns where appropriate +- Minimize state mutation +- Use clear naming conventions +- Include detailed comments explaining non-obvious code +- Use TypeScript-style JSDoc comments for better tooling support +- Target modern browsers without external dependencies +- Use ES modules for code organization + +Please start with the value type system implementation, showing how to create and manipulate Scheme values in a type-safe way. diff --git a/awk/scheme/scheme/docs/scheme-vm-spec.md b/awk/scheme/scheme/docs/scheme-vm-spec.md new file mode 100644 index 0000000..424602e --- /dev/null +++ b/awk/scheme/scheme/docs/scheme-vm-spec.md @@ -0,0 +1,130 @@ +# Scheme Virtual Machine Specification + +## Stack and Memory Model +The VM uses a stack-based architecture with a separate heap for storing longer-lived objects. Each stack frame contains: +- Local variable space +- Operand stack +- Return address + +## Data Types +- Numbers (implemented as JavaScript numbers) +- Booleans +- Symbols (interned strings) +- Pairs (cons cells) +- Functions (closures) +- Nil + +## Instructions + +### Stack Operations +- PUSH_CONST value ; Push constant onto stack +- POP ; Remove top value from stack +- DUP ; Duplicate top stack value +- SWAP ; Swap top two stack values + +### Environment Operations +- LOAD_LOCAL idx ; Load local variable at index +- STORE_LOCAL idx ; Store into local variable +- MAKE_CLOSURE n ; Create closure with n free variables +- LOAD_FREE idx ; Load from closure's environment +- STORE_FREE idx ; Store into closure's environment + +### Function Operations +- CALL n ; Call function with n arguments +- TAIL_CALL n ; Tail-call with n arguments +- RETURN ; Return from function +- MAKE_FUNCTION addr ; Create function object + +### Pair Operations +- CONS ; Create pair from two stack values +- CAR ; Get first element of pair +- CDR ; Get second element of pair +- SET_CAR ; Set first element of pair +- SET_CDR ; Set second element of pair + +### Arithmetic Operations +- ADD ; Add top two numbers +- SUB ; Subtract +- MUL ; Multiply +- DIV ; Divide +- REMAINDER ; Modulo operation + +### Comparison Operations +- EQ ; Generic equality test +- NUM_EQ ; Numeric equality +- NUM_LT ; Less than +- NUM_GT ; Greater than + +### Control Flow +- JUMP offset ; Unconditional jump +- JUMP_IF_FALSE offset ; Conditional jump +- JUMP_IF_TRUE offset ; Conditional jump + +### Type Operations +- TYPE_OF ; Get type of value +- IS_PAIR ; Test if value is pair +- IS_NUMBER ; Test if value is number +- IS_SYMBOL ; Test if value is symbol + +## Example Instruction Sequences + +### Function Definition +```scheme +(define (add1 x) (+ x 1)) +``` +Compiles to: +``` +MAKE_FUNCTION L1 +STORE_LOCAL 0 +JUMP L2 +L1: + LOAD_LOCAL 0 ; Load x + PUSH_CONST 1 + ADD + RETURN +L2: +``` + +### List Creation +```scheme +(cons 1 (cons 2 '())) +``` +Compiles to: +``` +PUSH_CONST 1 +PUSH_CONST 2 +PUSH_CONST nil +CONS +CONS +``` + +### Closure Creation +```scheme +(let ((x 1)) + (lambda (y) (+ x y))) +``` +Compiles to: +``` +PUSH_CONST 1 ; Push initial value of x +MAKE_FUNCTION L1 ; Create function body +MAKE_CLOSURE 1 ; Create closure capturing x +JUMP L2 +L1: + LOAD_FREE 0 ; Load captured x + LOAD_LOCAL 0 ; Load parameter y + ADD + RETURN +L2: +``` + +## Implementation Notes + +1. The VM should implement proper tail-call optimization using the TAIL_CALL instruction. + +2. Garbage collection can be implemented using a simple mark-and-sweep collector, scanning the stack and heap for reachable objects. + +3. For efficiency, common constants (small integers, nil, etc.) can be preallocated and shared. + +4. The environment model uses static scope, with closures capturing their creation environment. + +5. Function calls maintain their own stack frame with local variables and operand stack. diff --git a/awk/scheme/scheme/docs/test-program.md b/awk/scheme/scheme/docs/test-program.md new file mode 100644 index 0000000..ee20e32 --- /dev/null +++ b/awk/scheme/scheme/docs/test-program.md @@ -0,0 +1,48 @@ +# Test Program + +Save this as `test.scm.asm`: +``` +PUSH_CONST N:5 +PUSH_CONST N:3 +ADD +PUSH_CONST N:2 +SUB +HALT +``` + +Run with: +```bash +awk -f vm.awk test.scm.asm +``` + +Expected output: +``` +Result: N:6 +``` + +# Additional Test Cases + +## List Operations +``` +PUSH_CONST N:1 +PUSH_CONST N:2 +CONS +PUSH_CONST N:3 +CONS +CAR +HALT +``` + +## Error Cases +``` +# Stack underflow +POP +POP +HALT + +# Type error +PUSH_CONST S:hello +PUSH_CONST N:1 +ADD +HALT +``` diff --git a/awk/scheme/scheme/examples/cons.test.scm b/awk/scheme/scheme/examples/cons.test.scm new file mode 100644 index 0000000..d1e3847 --- /dev/null +++ b/awk/scheme/scheme/examples/cons.test.scm @@ -0,0 +1,3 @@ +(cons (+ 1 2) + (cons (* 3 4) + nil)) diff --git a/awk/scheme/scheme/examples/define.test.scm b/awk/scheme/scheme/examples/define.test.scm new file mode 100644 index 0000000..ec66b04 --- /dev/null +++ b/awk/scheme/scheme/examples/define.test.scm @@ -0,0 +1,2 @@ +(define add2 (x) (+ x 2)) +(add2 40) \ No newline at end of file diff --git a/awk/scheme/scheme/examples/let-and-define.test.scm b/awk/scheme/scheme/examples/let-and-define.test.scm new file mode 100644 index 0000000..fade30b --- /dev/null +++ b/awk/scheme/scheme/examples/let-and-define.test.scm @@ -0,0 +1,9 @@ +; Let expression example +(let ((x 5) (y 3)) + (+ x y)) + +; Function definition example +(define add2 (x) + (+ x 2)) + +(add2 40) ; Returns 42 \ No newline at end of file diff --git a/awk/scheme/scheme/examples/let.test.scm b/awk/scheme/scheme/examples/let.test.scm new file mode 100644 index 0000000..2cdc3b8 --- /dev/null +++ b/awk/scheme/scheme/examples/let.test.scm @@ -0,0 +1,2 @@ +(let ((x 5)) + (+ x 2)) diff --git a/awk/scheme/scheme/scheme b/awk/scheme/scheme/scheme new file mode 100755 index 0000000..cec35d1 --- /dev/null +++ b/awk/scheme/scheme/scheme @@ -0,0 +1,3 @@ +#!/bin/bash +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +exec "$DIR/bin/repl" "$@" diff --git a/awk/scheme/scheme/scratch/complex_test.scm.asm b/awk/scheme/scheme/scratch/complex_test.scm.asm new file mode 100644 index 0000000..67870c3 --- /dev/null +++ b/awk/scheme/scheme/scratch/complex_test.scm.asm @@ -0,0 +1,44 @@ +# Test proper list construction (3 2 1) +# Building the list in proper order: car points to value, cdr points to next pair + +# Start with empty list +PUSH_CONST NIL: # [nil] +PRINT # Print nil + +# Build (1 . nil) +PUSH_CONST NIL: # [nil] +PUSH_CONST N:1 # [nil 1] +SWAP # [1 nil] +CONS # [(1 . nil)] +DUP +PRINT # Print (1 . nil) + +# Build (2 . (1 . nil)) +PUSH_CONST N:2 # [(1.nil) 2] +SWAP # [2 (1.nil)] +CONS # [(2 . (1.nil))] +DUP +PRINT # Print (2 . (1.nil)) + +# Build (3 . (2 . (1 . nil))) +PUSH_CONST N:3 # [(2.(1.nil)) 3] +SWAP # [3 (2.(1.nil))] +CONS # [(3 . (2.(1.nil)))] +DUP +PRINT # Print full structure + +# Test CAR/CDR operations +DUP # Keep a copy of the list for later +DUP # Another copy for CAR +CAR # Get first element (3) +PRINT # Should print 3 + +SWAP # Bring back our spare list copy +CDR # Get rest of list ((2 . (1 . nil))) +DUP +PRINT # Print rest of list + +CAR # Get first of rest (2) +PRINT # Should print 2 + +HALT \ No newline at end of file diff --git a/awk/scheme/scheme/scratch/run.sh b/awk/scheme/scheme/scratch/run.sh new file mode 100755 index 0000000..0afdb41 --- /dev/null +++ b/awk/scheme/scheme/scratch/run.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# Compile Scheme to VM instructions +awk -f compiler.awk test.scm > test.asm +# Run VM instructions +awk -f vm.awk test.asm \ No newline at end of file diff --git a/awk/scheme/scheme/scratch/test.asm b/awk/scheme/scheme/scratch/test.asm new file mode 100644 index 0000000..8e7d8df --- /dev/null +++ b/awk/scheme/scheme/scratch/test.asm @@ -0,0 +1,16 @@ +PUSH_CONST N:1 +PUSH_CONST N:2 +ADD +PUSH_CONST N:3 +PUSH_CONST N:4 +MUL +PUSH_CONST N:10 +PUSH_CONST N:2 +PUSH_CONST N:3 +ADD +SUB +PUSH_CONST NIL: +CONS +CONS +CONS +HALT diff --git a/awk/scheme/scheme/scratch/test.scm b/awk/scheme/scheme/scratch/test.scm new file mode 100644 index 0000000..a01b174 --- /dev/null +++ b/awk/scheme/scheme/scratch/test.scm @@ -0,0 +1,8 @@ +;; Build a list of calculated values +(cons (+ 1 2) ; First element: 1 + 2 = 3 + (cons (* 3 4) ; Second element: 3 * 4 = 12 + (cons (- 10 + (+ 2 3)) ; Third element: 10 - (2 + 3) = 5 + nil))) ; End of list + +;; This should create a list: (3 12 5) \ No newline at end of file diff --git a/awk/scheme/scheme/scratch/test.scm.asm b/awk/scheme/scheme/scratch/test.scm.asm new file mode 100644 index 0000000..526e2b1 --- /dev/null +++ b/awk/scheme/scheme/scratch/test.scm.asm @@ -0,0 +1,7 @@ +PUSH_CONST N:5 +PUSH_CONST N:3 +ADD +PUSH_CONST N:2 +MUL +PRINT # should output N:16 +HALT \ No newline at end of file diff --git a/awk/vm/README.md b/awk/vm/README.md new file mode 100644 index 0000000..83a35fd --- /dev/null +++ b/awk/vm/README.md @@ -0,0 +1,91 @@ +# Stack-Based Virtual Machine in AWK + +A simple stack-based virtual machine implementation in AWK, inspired by Forth. The VM provides basic stack manipulation, arithmetic operations, register access, and memory operations. + +## Architecture + +The VM consists of: +- A data stack (100 elements) +- Main memory (1000 cells) +- Three registers: + - A: General purpose register + - B: Often used as memory pointer + - P: Program pointer, used for sequential memory operations + +## Instruction Set + +### Stack Operations +- `DROP` - Remove top item from stack +- `DUP` - Duplicate top stack item +- `OVER` - Copy second item to top of stack +- `SWAP` - Exchange top two stack items +- Numbers are automatically pushed onto the stack + +### Arithmetic Operations +- `+` - Add top two stack items (a b -- a+b) +- `*` - Multiply top two stack items (a b -- a*b) +- `AND` - Bitwise AND of top two items +- `XOR` - Bitwise XOR of top two items +- `NOT` - Bitwise NOT of top item +- `2*` - Multiply top item by 2 (shift left) +- `2/` - Divide top item by 2 (shift right) + +### Register Operations +- `A` - Push value of A register onto stack +- `A!` - Store top stack value into A register +- `B!` - Store top stack value into B register + +### Memory Operations +- `@` - Fetch from memory address on stack (addr -- value) +- `!` - Store to memory address on stack (value addr --) +- `@+` - Fetch from memory at P, then increment P +- `!+` - Store to memory at P, then increment P +- `@B` - Fetch from memory address in B register +- `!B` - Store to memory address in B register +- `@P` - Fetch from memory address in P register +- `!P` - Store to memory address in P register + +### Debug & Control +- `.` - NO-OP (does nothing) +- `BYE` - Exit program +- `SHOW` - Display current machine state (stack, registers, memory) + +## Memory Model + +- Memory is zero-based +- Each cell can hold a numeric value +- Memory is accessed either directly (using @ and !) or through registers +- P register is typically used for sequential memory operations +- B register is typically used as a memory pointer + +## Example Programs + +### Store and retrieve a value +``` +5 DUP 0 ! # Store 5 at address 0 +3 DUP 1 ! # Store 3 at address 1 +0 @ 1 @ + # Load both values and add them +2 ! # Store result at address 2 +``` + +### Using registers +``` +42 A! # Store 42 in A register +A # Push A's value onto stack +100 B! # Set B to address 100 +42 !B # Store 42 at address 100 +@B # Read from address 100 +``` + +## Usage + +```bash +# Run a program directly +echo "5 3 + SHOW" | awk -f vm.awk + +# Compile and run a program +./compiler.py program.coffee | awk -f vm.awk + +# Run test suite +./vm_tests.sh +``` \ No newline at end of file diff --git a/awk/vm/compiler.py b/awk/vm/compiler.py new file mode 100755 index 0000000..a406779 --- /dev/null +++ b/awk/vm/compiler.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python3 + +""" +A simple compiler that translates CoffeeScript-like syntax to VM instructions. +Example input: + + # Simple arithmetic + x = 5 + y = 3 + z = x + y + + # Using memory + array = [] + array[0] = 42 + array[1] = array[0] * 2 + +Will compile to VM instructions like: + + 5 A! # store 5 in A register + 3 B! # store 3 in B register + A @B + # add them +""" + +import sys +import re + +class Compiler: + def __init__(self): + self.variables = {} # Maps variable names to memory locations + self.next_memory = 0 # Next available memory location + + def allocate_variable(self, name): + """Allocate memory location for a variable""" + if name not in self.variables: + self.variables[name] = self.next_memory + self.next_memory += 1 + return self.variables[name] + + def compile_assignment(self, line): + """Compile assignment statements like 'x = 5' or 'x = y + z'""" + # Remove any comments from the line + line = line.split('#')[0].strip() + + match = re.match(r'(\w+)\s*=\s*(.+)', line) + if not match: + return None + + var_name = match.group(1) + expression = match.group(2) + + print(f"# Compiling assignment: {var_name} = {expression}", file=sys.stderr) + + # First get the memory location + mem_loc = self.allocate_variable(var_name) + + # Then compile the expression + expr_code = self.compile_expression(expression) + if not expr_code: + print(f"# Error: Failed to compile expression: {expression}", file=sys.stderr) + return None + + # Generate code that: + # 1. Evaluates the expression + # 2. Duplicates the result (for storing and leaving on stack) + # 3. Stores at memory location + vm_code = [] + vm_code.extend(expr_code) # Evaluate expression + vm_code.append("DUP") # Make a copy + vm_code.append(str(mem_loc)) # Push memory location + vm_code.append("@") # Read current value (for debugging) + vm_code.append("DROP") # Drop the old value + vm_code.append("!") # Store new value + + return vm_code + + def compile_expression(self, expr): + """Compile expressions like '5', 'x + y', etc.""" + vm_code = [] + + # Remove any comments from the expression + expr = expr.split('#')[0].strip() + + # Handle simple number + if expr.isdigit(): + vm_code.append(expr) + return vm_code + + # Handle variable reference + if expr in self.variables: + vm_code.append(str(self.variables[expr])) + vm_code.append("@") + return vm_code + + # Handle binary operations + ops = { + '+': '+', + '*': '*', + '-': 'NOT +', + } + + # Try each operator + for op in ops: + if op in expr: + parts = expr.split(op, 1) + if len(parts) == 2: + left = parts[0].strip() + right = parts[1].strip() + + print(f"# Debug: left={left}, right={right}", file=sys.stderr) + + # Generate code for left operand + left_code = self.compile_expression(left) + if not left_code: + continue + vm_code.extend(left_code) + + # Generate code for right operand + right_code = self.compile_expression(right) + if not right_code: + continue + vm_code.extend(right_code) + + # Add the operation + vm_code.append(ops[op]) + return vm_code + + return vm_code + + def compile(self, source): + """Compile source code to VM instructions""" + output = [] + debug_output = [] + + for line in source.split('\n'): + line = line.strip() + if not line or line.startswith('#'): + continue + + if line == "SHOW": + output.append("SHOW") + continue + + if '=' in line: + vm_code = self.compile_assignment(line) + if vm_code: + output.extend(vm_code) + debug_output.append(f"{' '.join(vm_code)} # {line}") + if not line.startswith('result ='): # If not final result + output.append("DROP") # Drop the duplicate we left on stack + continue + + print("# Generated VM code:", file=sys.stderr) + for line in debug_output: + print(f"# {line}", file=sys.stderr) + + # Add final SHOW to see the result + output.append("SHOW") + return ' '.join(output) + +def main(): + if len(sys.argv) > 1: + with open(sys.argv[1]) as f: + source = f.read() + else: + source = sys.stdin.read() + + compiler = Compiler() + vm_code = compiler.compile(source) + print(vm_code) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/awk/vm/debug.coffee b/awk/vm/debug.coffee new file mode 100644 index 0000000..0663cec --- /dev/null +++ b/awk/vm/debug.coffee @@ -0,0 +1,6 @@ +x = 5 +SHOW +y = 3 +SHOW +z = x + y +SHOW \ No newline at end of file diff --git a/awk/vm/simple.coffee b/awk/vm/simple.coffee new file mode 100644 index 0000000..0a596a7 --- /dev/null +++ b/awk/vm/simple.coffee @@ -0,0 +1,4 @@ +x = 5 +y = 3 +z = x + y +result = z * 2 \ No newline at end of file diff --git a/awk/vm/simple_test.coffee b/awk/vm/simple_test.coffee new file mode 100644 index 0000000..8fec5ba --- /dev/null +++ b/awk/vm/simple_test.coffee @@ -0,0 +1,8 @@ +x = 5 +SHOW +y = 3 +SHOW +z = x + y # Should be 8 +SHOW +result = z * 2 # Should be 16 +SHOW \ No newline at end of file diff --git a/awk/vm/stack_test.coffee b/awk/vm/stack_test.coffee new file mode 100644 index 0000000..ab29e63 --- /dev/null +++ b/awk/vm/stack_test.coffee @@ -0,0 +1,15 @@ +# First store 5 in memory location 0 +x = 5 +SHOW + +# Then store 3 in memory location 1 +y = 3 +SHOW + +# Add them: load 5, load 3, add them +z = x + y +SHOW + +# Double z: load 8, multiply by 2 +result = z * 2 +SHOW \ No newline at end of file diff --git a/awk/vm/test.coffee b/awk/vm/test.coffee new file mode 100644 index 0000000..aecda14 --- /dev/null +++ b/awk/vm/test.coffee @@ -0,0 +1,7 @@ +# Calculate sum +x = 5 +y = 3 +z = x + y + +# Double it +result = z * 2 diff --git a/awk/vm/test_steps.coffee b/awk/vm/test_steps.coffee new file mode 100644 index 0000000..f1d0415 --- /dev/null +++ b/awk/vm/test_steps.coffee @@ -0,0 +1,15 @@ +# Step 1: Initialize x +x = 5 +SHOW + +# Step 2: Initialize y +y = 3 +SHOW + +# Step 3: Add x and y +z = x + y +SHOW + +# Step 4: Double z +result = z * 2 +SHOW \ No newline at end of file diff --git a/awk/vm/vm.awk b/awk/vm/vm.awk new file mode 100755 index 0000000..67da3e7 --- /dev/null +++ b/awk/vm/vm.awk @@ -0,0 +1,254 @@ +#!/usr/bin/awk -f + + +# Stack: DROP, DUP, OVER, PUSH, POP +# Math: + AND XOR NOT 2* 2/ multiply-step +# Call: JUMP CALL RETURN IF -IF +# Loop: NEXT UNEXT +# Register: A A! B! +# Memory: @ ! @+ !+ @B !B @P !P +# NO-OP: . + + +BEGIN { + # Initialize VM state + stack_pointer = 0 # Points to next free position + pc = 0 # Program counter + MAX_STACK = 100 # Maximum stack size + MAX_MEM = 1000 # Memory size + + # Initialize registers + A = 0 # A register + B = 0 # B register + P = 0 # P register (auxiliary pointer) + + # Stack operations + split("", stack) # Initialize stack array + split("", memory) # Initialize memory array +} + +# Stack operations +function push(value) { + if (stack_pointer >= MAX_STACK) { + print "Stack overflow!" > "/dev/stderr" + exit 1 + } + printf "# DEBUG: Pushing %d onto stack\n", value > "/dev/stderr" + stack[stack_pointer++] = value +} + +function pop() { + if (stack_pointer <= 0) { + print "Stack underflow!" > "/dev/stderr" + exit 1 + } + value = stack[--stack_pointer] + printf "# DEBUG: Popping %d from stack\n", value > "/dev/stderr" + return value +} + +# Basic stack manipulation +function op_drop() { + pop() +} + +function op_dup() { + if (stack_pointer <= 0) { + print "Stack underflow on DUP!" > "/dev/stderr" + exit 1 + } + push(stack[stack_pointer - 1]) +} + +function op_over() { + if (stack_pointer <= 1) { + print "Stack underflow on OVER!" > "/dev/stderr" + exit 1 + } + push(stack[stack_pointer - 2]) +} + +# Basic arithmetic operations +function op_add() { + b = pop() + a = pop() + result = a + b + printf "# DEBUG: Adding %d + %d = %d\n", a, b, result > "/dev/stderr" + push(result) +} + +function op_and() { + b = pop() + a = pop() + # For now, we'll just multiply as a placeholder + # In a real implementation, we'd need to implement proper bitwise AND + push(a * b) +} + +function op_xor() { + b = pop() + a = pop() + # For now, we'll just add as a placeholder + # In a real implementation, we'd need to implement proper bitwise XOR + push(a + b) +} + +function op_not() { + a = pop() + # For now, we'll just negate as a placeholder + # In a real implementation, we'd need to implement proper bitwise NOT + push(-a - 1) +} + +function op_2times() { + a = pop() + push(a * 2) +} + +function op_2div() { + a = pop() + push(int(a / 2)) +} + +function op_multiply() { + b = pop() + a = pop() + result = a * b + printf "# DEBUG: Multiplying %d * %d = %d\n", a, b, result > "/dev/stderr" + push(result) +} + +# Register operations +function op_a() { + push(A) +} + +function op_astore() { + A = pop() +} + +function op_bstore() { + B = pop() +} + +# Memory operations +function op_fetch() { + addr = pop() + value = memory[addr] + printf "# DEBUG: Fetching %d from memory[%d]\n", value, addr > "/dev/stderr" + push(value) +} + +function op_store() { + addr = pop() # First pop the address + value = pop() # Then pop the value + printf "# DEBUG: Storing %d at memory[%d]\n", value, addr > "/dev/stderr" + memory[addr] = value +} + +function op_fetchplus() { + push(memory[P++]) +} + +function op_storeplus() { + memory[P++] = pop() +} + +function op_fetchb() { + push(memory[B]) +} + +function op_storeb() { + memory[B] = pop() +} + +function op_fetchp() { + push(memory[P]) +} + +function op_storep() { + memory[P] = pop() +} + +function print_stack() { + printf "Stack: " + for (i = 0; i < stack_pointer; i++) { + printf "%d ", stack[i] + } + printf "\n" +} + +function print_state() { + print_stack() + printf "Registers: A=%d B=%d P=%d\n", A, B, P + printf "Memory[P]=%d Memory[B]=%d\n", memory[P], memory[B] + printf "Memory: " + for (i = 0; i < 4; i++) { # Show first 4 memory locations + printf "[%d]=%d ", i, memory[i] + } + printf "\n-------------------\n" +} + +function op_swap() { + if (stack_pointer < 2) { + print "Stack underflow on SWAP!" > "/dev/stderr" + exit 1 + } + # Swap the top two elements on the stack + temp = stack[stack_pointer - 1] + stack[stack_pointer - 1] = stack[stack_pointer - 2] + stack[stack_pointer - 2] = temp + printf "# DEBUG: Swapping top two stack elements\n" > "/dev/stderr" +} + +function execute_instruction(inst) { + if (inst ~ /^[0-9]+$/) { + # Numbers are pushed onto the stack + push(int(inst)) + return + } + + if (inst == "BYE") { exit 0 } # not really in the minimal spec as set out by Chuck Moore, but useful for a graceful exit. + if (inst == "DROP") { op_drop(); return } + if (inst == "DUP") { op_dup(); return } + if (inst == "OVER") { op_over(); return } # copy second item to top of stack + if (inst == "SWAP") { op_swap(); return } # swap top two stack items + if (inst == "+") { op_add(); return } + if (inst == "AND") { op_and(); return } + if (inst == "XOR") { op_xor(); return } + if (inst == "NOT") { op_not(); return } + if (inst == "2*") { op_2times(); return } # multiply-step + if (inst == "2/") { op_2div(); return } # divide-step + if (inst == "*") { op_multiply(); return } + if (inst == "A") { op_a(); return } # push A register + if (inst == "A!") { op_astore(); return } # store A register + if (inst == "B!") { op_bstore(); return } # store B register + if (inst == "@") { op_fetch(); return } # fetch from memory + if (inst == "!") { op_store(); return } # store to memory + if (inst == "@+") { op_fetchplus(); return } # fetch from memory at P+ + if (inst == "!+") { op_storeplus(); return } # store to memory at P+ + if (inst == "@B") { op_fetchb(); return } # fetch from memory at B + if (inst == "!B") { op_storeb(); return } # store to memory at B + if (inst == "@P") { op_fetchp(); return } # fetch from memory at P + if (inst == "!P") { op_storep(); return } # store to memory at P + if (inst == ".") { return } # NO-OP + if (inst == "SHOW") { print_state(); return } # show state info + + print "Unknown instruction: " inst > "/dev/stderr" + exit 1 +} + +# Main execution loop +{ + # Remove comments (everything after #) + sub(/#.*$/, "") + + # Skip empty lines after comment removal + if (NF == 0) next + + # Split the input line into words + n = split($0, words) + for (i = 1; i <= n; i++) { + execute_instruction(words[i]) + } +} diff --git a/awk/vm/vm_tests.sh b/awk/vm/vm_tests.sh new file mode 100755 index 0000000..6244c51 --- /dev/null +++ b/awk/vm/vm_tests.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +echo "Running VM tests..." +echo + +echo "Test 1: Basic register A operations" +echo "42 A! A A # Store 42 in A, then read A twice" | awk -f vm.awk +echo + +echo "Test 2: Register A and B with memory operations" +echo "100 A! 200 B! 42 @B # Store 100 in A, 200 in B, read from B's address" | awk -f vm.awk +echo + +echo "Test 3: Sequential memory operations using P register" +echo "0 !P 1 !+ 2 !+ 3 !+ 4 !+ 5 !+ # Store 1-5 sequentially using P" | awk -f vm.awk +echo "0 !P @+ @+ @+ @+ @+ # Read back values using P" | awk -f vm.awk +echo + +echo "Test 4: Complex register manipulation" +echo "42 A! A DUP B! @B # Store 42 in A, copy to B, read via B" | awk -f vm.awk +echo + +echo "Test 5: Register arithmetic" +echo "5 A! 3 B! A @B + # Store 5 in A, 3 in B, add A + mem[B]" | awk -f vm.awk +echo + +echo "Test 6: Memory pointer operations" +echo "42 0 ! 1 !P @P # Store 42 at addr 0, point P to 1, read P" | awk -f vm.awk +echo + +echo "Test 7: Register and memory interaction" +echo "10 A! 20 B! A @B ! # Store A's value at B's address" | awk -f vm.awk +echo "@B # Read from memory at B's address" | awk -f vm.awk +echo + +echo "Test 8: Demonstrating B! vs !B" +echo "100 B! # Set B register to 100" | awk -f vm.awk +echo "42 !B # Store 42 at memory location 100" | awk -f vm.awk +echo "@B # Read from memory location 100" | awk -f vm.awk +echo + +echo "Tests completed." \ No newline at end of file |