1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
|
#!/usr/bin/env awk -f
# rawk_dispatch.awk - Block-based functional programming language for awk
# Author: @eli_oat
# License: Public Domain
# Version: 1.0.0
#
# This implementation uses a dispatch pattern to avoid variable scoping issues
# by passing state as parameters to functions instead of using global variables.
# USAGE:
# awk -f rawk_dispatch.awk input.rawk | awk -f -
# awk -f rawk_dispatch.awk input.rawk > output.awk
# -----------------------------------------------------------------------------
# DISPATCH FUNCTIONS
# -----------------------------------------------------------------------------
# Dispatch function to handle different parsing states
function dispatch_parse(state, brace_count, line_count, function_count, function_names, function_args, function_bodies, error_count, errors, line) {
if (state == 0) {
return handle_normal_state(state, brace_count, line_count, function_count, function_names, function_args, function_bodies, error_count, errors, line)
} else if (state == 1) {
return handle_rawk_state(state, brace_count, line_count, function_count, function_names, function_args, function_bodies, error_count, errors, line)
} else if (state == 2) {
return handle_function_state(state, brace_count, line_count, function_count, function_names, function_args, function_bodies, error_count, errors, line)
}
}
# Handle normal state (outside RAWK blocks)
function handle_normal_state(state, brace_count, line_count, function_count, function_names, function_args, function_bodies, error_count, errors, line) {
# Check for RAWK block start
if (line ~ /^[ \t]*RAWK[ \t]*\{/) {
if (state != 0) {
error_count++
errors[error_count] = sprintf("Error at line %d: Nested RAWK blocks not allowed\n %s\n Expected: Close the current RAWK block first",
line_count, line)
} else {
state = 1
brace_count = 1
}
return "next"
}
# Check for function definition outside RAWK block
if (line ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->/) {
error_count++
errors[error_count] = sprintf("Error at line %d: Function definition outside RAWK block\n %s\n Expected: Place function definitions inside RAWK { ... } block",
line_count, line)
return "next"
}
# Regular awk code - pass through unchanged
print line
return "continue"
}
# Handle RAWK block state
function handle_rawk_state(state, brace_count, line_count, function_count, function_names, function_args, function_bodies, error_count, errors, line) {
# Count braces
open_braces = gsub(/\{/, "&", line)
close_braces = gsub(/\}/, "&", line)
brace_count += open_braces - close_braces
# Check for function definition
if (line ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*\{/) {
if (state == 2) {
error_count++
errors[error_count] = sprintf("Error at line %d: Nested function definitions not allowed\n %s\n Expected: Close the current function first",
line_count, line)
} else {
state = 2
# Parse function header inline
if (match(line, /\$([a-zA-Z_][a-zA-Z0-9_]*)/)) {
func_name = substr(line, RSTART + 1, RLENGTH - 1)
} else {
error_count++
errors[error_count] = sprintf("Error at line %d: Invalid function name\n %s\n Expected: Function names must start with $ and contain only letters, numbers, and underscores",
line_count, line)
return "next"
}
if (match(line, /\(([^)]*)\)/)) {
func_args = substr(line, RSTART + 1, RLENGTH - 2)
gsub(/^[ \t]+|[ \t]+$/, "", func_args)
} else {
error_count++
errors[error_count] = sprintf("Error at line %d: Invalid function arguments\n %s\n Expected: Function arguments must be enclosed in parentheses",
line_count, line)
return "next"
}
function_count++
function_names[function_count] = func_name
function_args[function_count] = func_args
function_bodies[function_count] = ""
}
return "next"
}
# Check for function definition without braces
if (line ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*[^{]/) {
error_count++
errors[error_count] = sprintf("Error at line %d: Function definition missing braces\n %s\n Expected: Use: $name = (args) -> { statements; }",
line_count, line)
return "next"
}
# Check if RAWK block is complete
if (brace_count == 0) {
state = 0
return "next"
}
# Other code inside RAWK block (should be rare)
if (!(line ~ /^[ \t]*\$/)) {
error_count++
errors[error_count] = sprintf("Error at line %d: Invalid code inside RAWK block\n %s\n Expected: Only function definitions are allowed inside RAWK blocks",
line_count, line)
}
return "next"
}
# Handle function state (inside function definition)
function handle_function_state(state, brace_count, line_count, function_count, function_names, function_args, function_bodies, error_count, errors, line) {
# Count braces
open_braces = gsub(/\{/, "&", line)
close_braces = gsub(/\}/, "&", line)
brace_count += open_braces - close_braces
# Add line to function body (skip the opening brace line)
if (!(line ~ /^[ \t]*\{/)) {
function_bodies[function_count] = function_bodies[function_count] "\n " line
}
# Check if function is complete
if (brace_count == 0) {
state = 1
}
return "next"
}
# -----------------------------------------------------------------------------
# MAIN PARSING LOGIC
# -----------------------------------------------------------------------------
{
line_count++
# Skip comments and empty lines
if ($0 ~ /^[ \t]*#/ || $0 ~ /^[ \t]*$/) {
next
}
# Initialize state arrays if not already done
if (function_count == 0) {
function_names[0] = ""
function_args[0] = ""
function_bodies[0] = ""
errors[0] = ""
}
# Dispatch to appropriate handler
result = dispatch_parse(state, brace_count, line_count, function_count, function_names, function_args, function_bodies, error_count, errors, $0)
if (result == "next") {
next
}
}
# -----------------------------------------------------------------------------
# CODE GENERATION
# -----------------------------------------------------------------------------
END {
# Check for unclosed blocks
if (state != 0) {
error_count++
errors[error_count] = sprintf("Error at line %d: Unclosed RAWK block\n Expected: Add closing brace '}' to close the RAWK block",
line_count)
}
# Output errors if any
if (error_count > 0) {
for (i = 1; i <= error_count; i++) {
print errors[i] > "/dev/stderr"
}
exit 1
}
# Generate standard library functions
print ""
print "# Standard library functions"
print "function assert(condition, message) {"
print " if (!condition) {"
print " print \"Assertion failed: \" message > \"/dev/stderr\""
print " exit 1"
print " }"
print "}"
print ""
print "function expect_equal(actual, expected, message) {"
print " if (actual != expected) {"
print " print \"Test failed: \" message \" (expected \" expected \", got \" actual \")\" > \"/dev/stderr\""
print " exit 1"
print " }"
print "}"
print ""
# Generate user-defined functions
if (function_count > 0) {
print "# User-defined functions"
for (i = 1; i <= function_count; i++) {
print "function " function_names[i] "(" function_args[i] ") {" function_bodies[i]
print "}"
print ""
}
}
}
|