about summary refs log tree commit diff stats
path: root/awk/rawk/scratch/rawk_final.awk
blob: 7edea0a064267ccb2f9c474b6eae9ec4692900f8 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
#!/usr/bin/env awk -f

# rawk_final.awk - Block-based functional programming language for awk
# Author: @eli_oat
# License: Public Domain
# Version: 1.0.0
# 
# This implementation uses a simple state machine without function calls
# to avoid all variable scoping issues.

# USAGE:
#   awk -f rawk_final.awk input.rawk | awk -f -
#   awk -f rawk_final.awk input.rawk > output.awk

# -----------------------------------------------------------------------------
# VARIABLES
# -----------------------------------------------------------------------------

# State tracking - use simple integers
state = 0  # 0=normal, 1=in_rawk_block, 2=in_function
brace_count = 0
line_count = 0

# Function tracking
function_count = 0
function_names[0] = ""
function_args[0] = ""
function_bodies[0] = ""

# Error tracking
error_count = 0
errors[0] = ""

# -----------------------------------------------------------------------------
# MAIN PARSING LOGIC
# -----------------------------------------------------------------------------

{
    line_count++
    
    # Skip comments and empty lines
    if ($0 ~ /^[ \t]*#/ || $0 ~ /^[ \t]*$/) {
        next
    }
    
    # Initialize arrays if needed
    if (function_count == 0) {
        function_names[0] = ""
        function_args[0] = ""
        function_bodies[0] = ""
        errors[0] = ""
    }
    
    # STATE 0: Normal state (outside RAWK blocks)
    if (state == 0) {
        # Check for RAWK block start
        if ($0 ~ /^[ \t]*RAWK[ \t]*\{/) {
            if (state != 0) {
                error_count++
                errors[error_count] = sprintf("Error at line %d: Nested RAWK blocks not allowed\n  %s\n  Expected: Close the current RAWK block first", 
                                             line_count, $0)
            } else {
                state = 1
                brace_count = 1
            }
            next
        }
        
        # Check for function definition outside RAWK block
        if ($0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->/) {
            error_count++
            errors[error_count] = sprintf("Error at line %d: Function definition outside RAWK block\n  %s\n  Expected: Place function definitions inside RAWK { ... } block", 
                                         line_count, $0)
            next
        }
        
        # Regular awk code - pass through unchanged
        print $0
        next
    }
    
    # STATE 1: Inside RAWK block
    if (state == 1) {
        # Count braces
        open_braces = gsub(/\{/, "&", $0)
        close_braces = gsub(/\}/, "&", $0)
        brace_count += open_braces - close_braces
        
        # Check for function definition
        if ($0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*\{/) {
            if (state == 2) {
                error_count++
                errors[error_count] = sprintf("Error at line %d: Nested function definitions not allowed\n  %s\n  Expected: Close the current function first", 
                                             line_count, $0)
            } else {
                state = 2
                # Parse function header inline
                if (match($0, /\$([a-zA-Z_][a-zA-Z0-9_]*)/)) {
                    func_name = substr($0, RSTART + 1, RLENGTH - 1)
                } else {
                    error_count++
                    errors[error_count] = sprintf("Error at line %d: Invalid function name\n  %s\n  Expected: Function names must start with $ and contain only letters, numbers, and underscores", 
                                                 line_count, $0)
                    next
                }
                
                if (match($0, /\(([^)]*)\)/)) {
                    func_args = substr($0, RSTART + 1, RLENGTH - 2)
                    gsub(/^[ \t]+|[ \t]+$/, "", func_args)
                } else {
                    error_count++
                    errors[error_count] = sprintf("Error at line %d: Invalid function arguments\n  %s\n  Expected: Function arguments must be enclosed in parentheses", 
                                                 line_count, $0)
                    next
                }
                
                function_count++
                function_names[function_count] = func_name
                function_args[function_count] = func_args
                function_bodies[function_count] = ""
            }
            next
        }
        
        # Check for function definition without braces
        if ($0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*[^{]/) {
            error_count++
            errors[error_count] = sprintf("Error at line %d: Function definition missing braces\n  %s\n  Expected: Use: $name = (args) -> { statements; }", 
                                         line_count, $0)
            next
        }
        
        # Check if RAWK block is complete
        if (brace_count == 0) {
            state = 0
            next
        }
        
        # Other code inside RAWK block (should be rare)
        if (!($0 ~ /^[ \t]*\$/)) {
            error_count++
            errors[error_count] = sprintf("Error at line %d: Invalid code inside RAWK block\n  %s\n  Expected: Only function definitions are allowed inside RAWK blocks", 
                                         line_count, $0)
        }
        next
    }
    
    # STATE 2: Inside function definition
    if (state == 2) {
        # Count braces
        open_braces = gsub(/\{/, "&", $0)
        close_braces = gsub(/\}/, "&", $0)
        brace_count += open_braces - close_braces
        
        # Add line to function body (skip the opening brace line)
        if (!($0 ~ /^[ \t]*\{/)) {
            function_bodies[function_count] = function_bodies[function_count] "\n    " $0
        }
        
        # Check if function is complete
        if (brace_count == 0) {
            state = 1
        }
        next
    }
}

# -----------------------------------------------------------------------------
# CODE GENERATION
# -----------------------------------------------------------------------------

END {
    # Check for unclosed blocks
    if (state != 0) {
        error_count++
        errors[error_count] = sprintf("Error at line %d: Unclosed RAWK block\n  Expected: Add closing brace '}' to close the RAWK block", 
                                     line_count)
    }
    
    # Output errors if any
    if (error_count > 0) {
        for (i = 1; i <= error_count; i++) {
            print errors[i] > "/dev/stderr"
        }
        exit 1
    }
    
    # Generate standard library functions
    print ""
    print "# Standard library functions"
    print "function assert(condition, message) {"
    print "    if (!condition) {"
    print "        print \"Assertion failed: \" message > \"/dev/stderr\""
    print "        exit 1"
    print "    }"
    print "}"
    print ""
    print "function expect_equal(actual, expected, message) {"
    print "    if (actual != expected) {"
    print "        print \"Test failed: \" message \" (expected \" expected \", got \" actual \")\" > \"/dev/stderr\""
    print "        exit 1"
    print "    }"
    print "}"
    print ""
    
    # Generate user-defined functions
    if (function_count > 0) {
        print "# User-defined functions"
        for (i = 1; i <= function_count; i++) {
            print "function " function_names[i] "(" function_args[i] ") {" function_bodies[i]
            print "}"
            print ""
        }
    }
}