#!/usr/bin/env awk -f # rawk_working.awk - Working block-based functional programming language for awk # Author: @eli_oat # License: Public Domain # Version: 1.0.0 # This script translates .rawk files into standard AWK code using a block-based approach. # All rawk-specific syntax must be contained within RAWK { ... } blocks. # USAGE: # awk -f rawk_working.awk input.rawk | awk -f - # awk -f rawk_working.awk input.rawk > output.awk # ----------------------------------------------------------------------------- # VARIABLES # ----------------------------------------------------------------------------- # State tracking state = 0 # 0=normal, 1=in_rawk_block, 2=in_function brace_count = 0 line_count = 0 # Function tracking function_count = 0 function_names[0] = "" function_args[0] = "" function_bodies[0] = "" # Error tracking error_count = 0 errors[0] = "" # ----------------------------------------------------------------------------- # MAIN PARSING LOGIC # ----------------------------------------------------------------------------- { line_count++ # Skip comments and empty lines if ($0 ~ /^[ \t]*#/ || $0 ~ /^[ \t]*$/) { next } # Check for RAWK block start if ($0 ~ /^[ \t]*RAWK[ \t]*\{/) { print "DEBUG: Found RAWK block start: " $0 > "/dev/stderr" if (state != 0) { error_count++ errors[error_count] = sprintf("Error at line %d: Nested RAWK blocks not allowed\n %s\n Expected: Close the current RAWK block first", line_count, $0) } else { state = 1 brace_count = 1 print "DEBUG: Set state = 1, brace_count = " brace_count > "/dev/stderr" } next } # If we're inside a RAWK block if (state == 1) { print "DEBUG: Inside RAWK block, line: " $0 > "/dev/stderr" # Count braces open_braces = gsub(/\{/, "&", $0) close_braces = gsub(/\}/, "&", $0) brace_count += open_braces - close_braces # Check for function definition if ($0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*\{/) { if (state == 2) { error_count++ errors[error_count] = sprintf("Error at line %d: Nested function definitions not allowed\n %s\n Expected: Close the current function first", line_count, $0) } else { state = 2 # Parse function header inline if (match($0, /\$([a-zA-Z_][a-zA-Z0-9_]*)/)) { func_name = substr($0, RSTART + 1, RLENGTH - 1) } else { error_count++ errors[error_count] = sprintf("Error at line %d: Invalid function name\n %s\n Expected: Function names must start with $ and contain only letters, numbers, and underscores", line_count, $0) next } if (match($0, /\(([^)]*)\)/)) { func_args = substr($0, RSTART + 1, RLENGTH - 2) gsub(/^[ \t]+|[ \t]+$/, "", func_args) } else { error_count++ errors[error_count] = sprintf("Error at line %d: Invalid function arguments\n %s\n Expected: Function arguments must be enclosed in parentheses", line_count, $0) next } function_count++ function_names[function_count] = func_name function_args[function_count] = func_args function_bodies[function_count] = "" } next } # Check for function definition without braces if ($0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*[^{]/) { error_count++ errors[error_count] = sprintf("Error at line %d: Function definition missing braces\n %s\n Expected: Use: $name = (args) -> { statements; }", line_count, $0) next } # If we're inside a function, collect the body if (state == 2) { print "DEBUG: Collecting function body: " $0 > "/dev/stderr" # Add line to function body (skip the opening brace line) if (!($0 ~ /^[ \t]*\{/)) { function_bodies[function_count] = function_bodies[function_count] "\n " $0 } # Check if function is complete if (brace_count == 0) { state = 1 print "DEBUG: Function complete, state = " state > "/dev/stderr" } next } # Check if RAWK block is complete if (brace_count == 0) { state = 0 next } # Other code inside RAWK block (should be rare) if (!($0 ~ /^[ \t]*\$/)) { error_count++ errors[error_count] = sprintf("Error at line %d: Invalid code inside RAWK block\n %s\n Expected: Only function definitions are allowed inside RAWK blocks", line_count, $0) } next } # Check for function definition outside RAWK block if ($0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->/) { error_count++ errors[error_count] = sprintf("Error at line %d: Function definition outside RAWK block\n %s\n Expected: Place function definitions inside RAWK { ... } block", line_count, $0) next } # Regular awk code - pass through unchanged print $0 } # ----------------------------------------------------------------------------- # CODE GENERATION # ----------------------------------------------------------------------------- END { # Check for unclosed blocks if (state != 0) { error_count++ errors[error_count] = sprintf("Error at line %d: Unclosed RAWK block\n Expected: Add closing brace '}' to close the RAWK block", line_count) } # Output errors if any if (error_count > 0) { for (i = 1; i <= error_count; i++) { print errors[i] > "/dev/stderr" } exit 1 } # Generate standard library functions print "" print "# Standard library functions" print "function assert(condition, message) {" print " if (!condition) {" print " print \"Assertion failed: \" message > \"/dev/stderr\"" print " exit 1" print " }" print "}" print "" print "function expect_equal(actual, expected, message) {" print " if (actual != expected) {" print " print \"Test failed: \" message \" (expected \" expected \", got \" actual \")\" > \"/dev/stderr\"" print " exit 1" print " }" print "}" print "" # Generate user-defined functions print "DEBUG: function_count = " function_count > "/dev/stderr" if (function_count > 0) { print "# User-defined functions" for (i = 1; i <= function_count; i++) { print "DEBUG: Function " i ": " function_names[i] "(" function_args[i] ")" > "/dev/stderr" print "function " function_names[i] "(" function_args[i] ") {" function_bodies[i] print "}" print "" } } else { print "DEBUG: No functions found" > "/dev/stderr" } }