about summary refs log tree commit diff stats
path: root/awk/vm/compiler.py
blob: a406779b8935070e7e4209f1fd9266d7eceb4242 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#!/usr/bin/env python3

"""
A simple compiler that translates CoffeeScript-like syntax to VM instructions.
Example input:
    
    # Simple arithmetic
    x = 5
    y = 3
    z = x + y
    
    # Using memory
    array = []
    array[0] = 42
    array[1] = array[0] * 2
    
Will compile to VM instructions like:
    
    5 A!              # store 5 in A register
    3 B!              # store 3 in B register
    A @B +            # add them
"""

import sys
import re

class Compiler:
    def __init__(self):
        self.variables = {}  # Maps variable names to memory locations
        self.next_memory = 0  # Next available memory location
        
    def allocate_variable(self, name):
        """Allocate memory location for a variable"""
        if name not in self.variables:
            self.variables[name] = self.next_memory
            self.next_memory += 1
        return self.variables[name]
    
    def compile_assignment(self, line):
        """Compile assignment statements like 'x = 5' or 'x = y + z'"""
        # Remove any comments from the line
        line = line.split('#')[0].strip()
        
        match = re.match(r'(\w+)\s*=\s*(.+)', line)
        if not match:
            return None
        
        var_name = match.group(1)
        expression = match.group(2)
        
        print(f"# Compiling assignment: {var_name} = {expression}", file=sys.stderr)
        
        # First get the memory location
        mem_loc = self.allocate_variable(var_name)
        
        # Then compile the expression
        expr_code = self.compile_expression(expression)
        if not expr_code:
            print(f"# Error: Failed to compile expression: {expression}", file=sys.stderr)
            return None
        
        # Generate code that:
        # 1. Evaluates the expression
        # 2. Duplicates the result (for storing and leaving on stack)
        # 3. Stores at memory location
        vm_code = []
        vm_code.extend(expr_code)     # Evaluate expression
        vm_code.append("DUP")         # Make a copy
        vm_code.append(str(mem_loc))  # Push memory location
        vm_code.append("@")           # Read current value (for debugging)
        vm_code.append("DROP")        # Drop the old value
        vm_code.append("!")           # Store new value
        
        return vm_code
    
    def compile_expression(self, expr):
        """Compile expressions like '5', 'x + y', etc."""
        vm_code = []
        
        # Remove any comments from the expression
        expr = expr.split('#')[0].strip()
        
        # Handle simple number
        if expr.isdigit():
            vm_code.append(expr)
            return vm_code
            
        # Handle variable reference
        if expr in self.variables:
            vm_code.append(str(self.variables[expr]))
            vm_code.append("@")
            return vm_code
            
        # Handle binary operations
        ops = {
            '+': '+',
            '*': '*',
            '-': 'NOT +',
        }
        
        # Try each operator
        for op in ops:
            if op in expr:
                parts = expr.split(op, 1)
                if len(parts) == 2:
                    left = parts[0].strip()
                    right = parts[1].strip()
                    
                    print(f"# Debug: left={left}, right={right}", file=sys.stderr)
                    
                    # Generate code for left operand
                    left_code = self.compile_expression(left)
                    if not left_code:
                        continue
                    vm_code.extend(left_code)
                    
                    # Generate code for right operand
                    right_code = self.compile_expression(right)
                    if not right_code:
                        continue
                    vm_code.extend(right_code)
                    
                    # Add the operation
                    vm_code.append(ops[op])
                    return vm_code
        
        return vm_code

    def compile(self, source):
        """Compile source code to VM instructions"""
        output = []
        debug_output = []
        
        for line in source.split('\n'):
            line = line.strip()
            if not line or line.startswith('#'):
                continue
                
            if line == "SHOW":
                output.append("SHOW")
                continue
                
            if '=' in line:
                vm_code = self.compile_assignment(line)
                if vm_code:
                    output.extend(vm_code)
                    debug_output.append(f"{' '.join(vm_code)}  # {line}")
                    if not line.startswith('result ='):  # If not final result
                        output.append("DROP")  # Drop the duplicate we left on stack
                    continue
        
        print("# Generated VM code:", file=sys.stderr)
        for line in debug_output:
            print(f"# {line}", file=sys.stderr)
            
        # Add final SHOW to see the result
        output.append("SHOW")
        return ' '.join(output)

def main():
    if len(sys.argv) > 1:
        with open(sys.argv[1]) as f:
            source = f.read()
    else:
        source = sys.stdin.read()
    
    compiler = Compiler()
    vm_code = compiler.compile(source)
    print(vm_code)

if __name__ == '__main__':
    main()