diff options
Diffstat (limited to 'awk/vm/compiler.py')
-rwxr-xr-x | awk/vm/compiler.py | 172 |
1 files changed, 172 insertions, 0 deletions
diff --git a/awk/vm/compiler.py b/awk/vm/compiler.py new file mode 100755 index 0000000..a406779 --- /dev/null +++ b/awk/vm/compiler.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python3 + +""" +A simple compiler that translates CoffeeScript-like syntax to VM instructions. +Example input: + + # Simple arithmetic + x = 5 + y = 3 + z = x + y + + # Using memory + array = [] + array[0] = 42 + array[1] = array[0] * 2 + +Will compile to VM instructions like: + + 5 A! # store 5 in A register + 3 B! # store 3 in B register + A @B + # add them +""" + +import sys +import re + +class Compiler: + def __init__(self): + self.variables = {} # Maps variable names to memory locations + self.next_memory = 0 # Next available memory location + + def allocate_variable(self, name): + """Allocate memory location for a variable""" + if name not in self.variables: + self.variables[name] = self.next_memory + self.next_memory += 1 + return self.variables[name] + + def compile_assignment(self, line): + """Compile assignment statements like 'x = 5' or 'x = y + z'""" + # Remove any comments from the line + line = line.split('#')[0].strip() + + match = re.match(r'(\w+)\s*=\s*(.+)', line) + if not match: + return None + + var_name = match.group(1) + expression = match.group(2) + + print(f"# Compiling assignment: {var_name} = {expression}", file=sys.stderr) + + # First get the memory location + mem_loc = self.allocate_variable(var_name) + + # Then compile the expression + expr_code = self.compile_expression(expression) + if not expr_code: + print(f"# Error: Failed to compile expression: {expression}", file=sys.stderr) + return None + + # Generate code that: + # 1. Evaluates the expression + # 2. Duplicates the result (for storing and leaving on stack) + # 3. Stores at memory location + vm_code = [] + vm_code.extend(expr_code) # Evaluate expression + vm_code.append("DUP") # Make a copy + vm_code.append(str(mem_loc)) # Push memory location + vm_code.append("@") # Read current value (for debugging) + vm_code.append("DROP") # Drop the old value + vm_code.append("!") # Store new value + + return vm_code + + def compile_expression(self, expr): + """Compile expressions like '5', 'x + y', etc.""" + vm_code = [] + + # Remove any comments from the expression + expr = expr.split('#')[0].strip() + + # Handle simple number + if expr.isdigit(): + vm_code.append(expr) + return vm_code + + # Handle variable reference + if expr in self.variables: + vm_code.append(str(self.variables[expr])) + vm_code.append("@") + return vm_code + + # Handle binary operations + ops = { + '+': '+', + '*': '*', + '-': 'NOT +', + } + + # Try each operator + for op in ops: + if op in expr: + parts = expr.split(op, 1) + if len(parts) == 2: + left = parts[0].strip() + right = parts[1].strip() + + print(f"# Debug: left={left}, right={right}", file=sys.stderr) + + # Generate code for left operand + left_code = self.compile_expression(left) + if not left_code: + continue + vm_code.extend(left_code) + + # Generate code for right operand + right_code = self.compile_expression(right) + if not right_code: + continue + vm_code.extend(right_code) + + # Add the operation + vm_code.append(ops[op]) + return vm_code + + return vm_code + + def compile(self, source): + """Compile source code to VM instructions""" + output = [] + debug_output = [] + + for line in source.split('\n'): + line = line.strip() + if not line or line.startswith('#'): + continue + + if line == "SHOW": + output.append("SHOW") + continue + + if '=' in line: + vm_code = self.compile_assignment(line) + if vm_code: + output.extend(vm_code) + debug_output.append(f"{' '.join(vm_code)} # {line}") + if not line.startswith('result ='): # If not final result + output.append("DROP") # Drop the duplicate we left on stack + continue + + print("# Generated VM code:", file=sys.stderr) + for line in debug_output: + print(f"# {line}", file=sys.stderr) + + # Add final SHOW to see the result + output.append("SHOW") + return ' '.join(output) + +def main(): + if len(sys.argv) > 1: + with open(sys.argv[1]) as f: + source = f.read() + else: + source = sys.stdin.read() + + compiler = Compiler() + vm_code = compiler.compile(source) + print(vm_code) + +if __name__ == '__main__': + main() \ No newline at end of file |