diff options
Diffstat (limited to 'awk/rawk/scratch/rawk_final.awk')
-rw-r--r-- | awk/rawk/scratch/rawk_final.awk | 215 |
1 files changed, 215 insertions, 0 deletions
diff --git a/awk/rawk/scratch/rawk_final.awk b/awk/rawk/scratch/rawk_final.awk new file mode 100644 index 0000000..7edea0a --- /dev/null +++ b/awk/rawk/scratch/rawk_final.awk @@ -0,0 +1,215 @@ +#!/usr/bin/env awk -f + +# rawk_final.awk - Block-based functional programming language for awk +# Author: @eli_oat +# License: Public Domain +# Version: 1.0.0 +# +# This implementation uses a simple state machine without function calls +# to avoid all variable scoping issues. + +# USAGE: +# awk -f rawk_final.awk input.rawk | awk -f - +# awk -f rawk_final.awk input.rawk > output.awk + +# ----------------------------------------------------------------------------- +# VARIABLES +# ----------------------------------------------------------------------------- + +# State tracking - use simple integers +state = 0 # 0=normal, 1=in_rawk_block, 2=in_function +brace_count = 0 +line_count = 0 + +# Function tracking +function_count = 0 +function_names[0] = "" +function_args[0] = "" +function_bodies[0] = "" + +# Error tracking +error_count = 0 +errors[0] = "" + +# ----------------------------------------------------------------------------- +# MAIN PARSING LOGIC +# ----------------------------------------------------------------------------- + +{ + line_count++ + + # Skip comments and empty lines + if ($0 ~ /^[ \t]*#/ || $0 ~ /^[ \t]*$/) { + next + } + + # Initialize arrays if needed + if (function_count == 0) { + function_names[0] = "" + function_args[0] = "" + function_bodies[0] = "" + errors[0] = "" + } + + # STATE 0: Normal state (outside RAWK blocks) + if (state == 0) { + # Check for RAWK block start + if ($0 ~ /^[ \t]*RAWK[ \t]*\{/) { + if (state != 0) { + error_count++ + errors[error_count] = sprintf("Error at line %d: Nested RAWK blocks not allowed\n %s\n Expected: Close the current RAWK block first", + line_count, $0) + } else { + state = 1 + brace_count = 1 + } + next + } + + # Check for function definition outside RAWK block + if ($0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->/) { + error_count++ + errors[error_count] = sprintf("Error at line %d: Function definition outside RAWK block\n %s\n Expected: Place function definitions inside RAWK { ... } block", + line_count, $0) + next + } + + # Regular awk code - pass through unchanged + print $0 + next + } + + # STATE 1: Inside RAWK block + if (state == 1) { + # Count braces + open_braces = gsub(/\{/, "&", $0) + close_braces = gsub(/\}/, "&", $0) + brace_count += open_braces - close_braces + + # Check for function definition + if ($0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*\{/) { + if (state == 2) { + error_count++ + errors[error_count] = sprintf("Error at line %d: Nested function definitions not allowed\n %s\n Expected: Close the current function first", + line_count, $0) + } else { + state = 2 + # Parse function header inline + if (match($0, /\$([a-zA-Z_][a-zA-Z0-9_]*)/)) { + func_name = substr($0, RSTART + 1, RLENGTH - 1) + } else { + error_count++ + errors[error_count] = sprintf("Error at line %d: Invalid function name\n %s\n Expected: Function names must start with $ and contain only letters, numbers, and underscores", + line_count, $0) + next + } + + if (match($0, /\(([^)]*)\)/)) { + func_args = substr($0, RSTART + 1, RLENGTH - 2) + gsub(/^[ \t]+|[ \t]+$/, "", func_args) + } else { + error_count++ + errors[error_count] = sprintf("Error at line %d: Invalid function arguments\n %s\n Expected: Function arguments must be enclosed in parentheses", + line_count, $0) + next + } + + function_count++ + function_names[function_count] = func_name + function_args[function_count] = func_args + function_bodies[function_count] = "" + } + next + } + + # Check for function definition without braces + if ($0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*[^{]/) { + error_count++ + errors[error_count] = sprintf("Error at line %d: Function definition missing braces\n %s\n Expected: Use: $name = (args) -> { statements; }", + line_count, $0) + next + } + + # Check if RAWK block is complete + if (brace_count == 0) { + state = 0 + next + } + + # Other code inside RAWK block (should be rare) + if (!($0 ~ /^[ \t]*\$/)) { + error_count++ + errors[error_count] = sprintf("Error at line %d: Invalid code inside RAWK block\n %s\n Expected: Only function definitions are allowed inside RAWK blocks", + line_count, $0) + } + next + } + + # STATE 2: Inside function definition + if (state == 2) { + # Count braces + open_braces = gsub(/\{/, "&", $0) + close_braces = gsub(/\}/, "&", $0) + brace_count += open_braces - close_braces + + # Add line to function body (skip the opening brace line) + if (!($0 ~ /^[ \t]*\{/)) { + function_bodies[function_count] = function_bodies[function_count] "\n " $0 + } + + # Check if function is complete + if (brace_count == 0) { + state = 1 + } + next + } +} + +# ----------------------------------------------------------------------------- +# CODE GENERATION +# ----------------------------------------------------------------------------- + +END { + # Check for unclosed blocks + if (state != 0) { + error_count++ + errors[error_count] = sprintf("Error at line %d: Unclosed RAWK block\n Expected: Add closing brace '}' to close the RAWK block", + line_count) + } + + # Output errors if any + if (error_count > 0) { + for (i = 1; i <= error_count; i++) { + print errors[i] > "/dev/stderr" + } + exit 1 + } + + # Generate standard library functions + print "" + print "# Standard library functions" + print "function assert(condition, message) {" + print " if (!condition) {" + print " print \"Assertion failed: \" message > \"/dev/stderr\"" + print " exit 1" + print " }" + print "}" + print "" + print "function expect_equal(actual, expected, message) {" + print " if (actual != expected) {" + print " print \"Test failed: \" message \" (expected \" expected \", got \" actual \")\" > \"/dev/stderr\"" + print " exit 1" + print " }" + print "}" + print "" + + # Generate user-defined functions + if (function_count > 0) { + print "# User-defined functions" + for (i = 1; i <= function_count; i++) { + print "function " function_names[i] "(" function_args[i] ") {" function_bodies[i] + print "}" + print "" + } + } +} \ No newline at end of file |