#!/usr/bin/env awk -f # rawk_final.awk - Block-based functional programming language for awk # Author: @eli_oat # License: Public Domain # Version: 1.0.0 # # This implementation uses a simple state machine without function calls # to avoid all variable scoping issues. # USAGE: # awk -f rawk_final.awk input.rawk | awk -f - # awk -f rawk_final.awk input.rawk > output.awk # ----------------------------------------------------------------------------- # VARIABLES # ----------------------------------------------------------------------------- # State tracking - use simple integers state = 0 # 0=normal, 1=in_rawk_block, 2=in_function brace_count = 0 line_count = 0 # Function tracking function_count = 0 function_names[0] = "" function_args[0] = "" function_bodies[0] = "" # Error tracking error_count = 0 errors[0] = "" # ----------------------------------------------------------------------------- # MAIN PARSING LOGIC # ----------------------------------------------------------------------------- { line_count++ # Skip comments and empty lines if ($0 ~ /^[ \t]*#/ || $0 ~ /^[ \t]*$/) { next } # Initialize arrays if needed if (function_count == 0) { function_names[0] = "" function_args[0] = "" function_bodies[0] = "" errors[0] = "" } # STATE 0: Normal state (outside RAWK blocks) if (state == 0) { # Check for RAWK block start if ($0 ~ /^[ \t]*RAWK[ \t]*\{/) { if (state != 0) { error_count++ errors[error_count] = sprintf("Error at line %d: Nested RAWK blocks not allowed\n %s\n Expected: Close the current RAWK block first", line_count, $0) } else { state = 1 brace_count = 1 } next } # Check for function definition outside RAWK block if ($0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->/) { error_count++ errors[error_count] = sprintf("Error at line %d: Function definition outside RAWK block\n %s\n Expected: Place function definitions inside RAWK { ... } block", line_count, $0) next } # Regular awk code - pass through unchanged print $0 next } # STATE 1: Inside RAWK block if (state == 1) { # Count braces open_braces = gsub(/\{/, "&", $0) close_braces = gsub(/\}/, "&", $0) brace_count += open_braces - close_braces # Check for function definition if ($0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*\{/) { if (state == 2) { error_count++ errors[error_count] = sprintf("Error at line %d: Nested function definitions not allowed\n %s\n Expected: Close the current function first", line_count, $0) } else { state = 2 # Parse function header inline if (match($0, /\$([a-zA-Z_][a-zA-Z0-9_]*)/)) { func_name = substr($0, RSTART + 1, RLENGTH - 1) } else { error_count++ errors[error_count] = sprintf("Error at line %d: Invalid function name\n %s\n Expected: Function names must start with $ and contain only letters, numbers, and underscores", line_count, $0) next } if (match($0, /\(([^)]*)\)/)) { func_args = substr($0, RSTART + 1, RLENGTH - 2) gsub(/^[ \t]+|[ \t]+$/, "", func_args) } else { error_count++ errors[error_count] = sprintf("Error at line %d: Invalid function arguments\n %s\n Expected: Function arguments must be enclosed in parentheses", line_count, $0) next } function_count++ function_names[function_count] = func_name function_args[function_count] = func_args function_bodies[function_count] = "" } next } # Check for function definition without braces if ($0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*[^{]/) { error_count++ errors[error_count] = sprintf("Error at line %d: Function definition missing braces\n %s\n Expected: Use: $name = (args) -> { statements; }", line_count, $0) next } # Check if RAWK block is complete if (brace_count == 0) { state = 0 next } # Other code inside RAWK block (should be rare) if (!($0 ~ /^[ \t]*\$/)) { error_count++ errors[error_count] = sprintf("Error at line %d: Invalid code inside RAWK block\n %s\n Expected: Only function definitions are allowed inside RAWK blocks", line_count, $0) } next } # STATE 2: Inside function definition if (state == 2) { # Count braces open_braces = gsub(/\{/, "&", $0) close_braces = gsub(/\}/, "&", $0) brace_count += open_braces - close_braces # Add line to function body (skip the opening brace line) if (!($0 ~ /^[ \t]*\{/)) { function_bodies[function_count] = function_bodies[function_count] "\n " $0 } # Check if function is complete if (brace_count == 0) { state = 1 } next } } # ----------------------------------------------------------------------------- # CODE GENERATION # ----------------------------------------------------------------------------- END { # Check for unclosed blocks if (state != 0) { error_count++ errors[error_count] = sprintf("Error at line %d: Unclosed RAWK block\n Expected: Add closing brace '}' to close the RAWK block", line_count) } # Output errors if any if (error_count > 0) { for (i = 1; i <= error_count; i++) { print errors[i] > "/dev/stderr" } exit 1 } # Generate standard library functions print "" print "# Standard library functions" print "function assert(condition, message) {" print " if (!condition) {" print " print \"Assertion failed: \" message > \"/dev/stderr\"" print " exit 1" print " }" print "}" print "" print "function expect_equal(actual, expected, message) {" print " if (actual != expected) {" print " print \"Test failed: \" message \" (expected \" expected \", got \" actual \")\" > \"/dev/stderr\"" print " exit 1" print " }" print "}" print "" # Generate user-defined functions if (function_count > 0) { print "# User-defined functions" for (i = 1; i <= function_count; i++) { print "function " function_names[i] "(" function_args[i] ") {" function_bodies[i] print "}" print "" } } }