#!/usr/bin/env awk -f # rawk_simple.awk - Simple block-based functional programming language for awk # This is a minimal working implementation to demonstrate the concept # USAGE: # awk -f rawk_simple.awk input.rawk | awk -f - # State tracking state = 0 # 0=normal, 1=in_rawk_block, 2=in_function brace_count = 0 line_count = 0 # Function tracking function_count = 0 function_names[0] = "" function_args[0] = "" function_bodies[0] = "" { line_count++ # Skip comments and empty lines if ($0 ~ /^[ \t]*#/ || $0 ~ /^[ \t]*$/) { next } # Check for RAWK block start if ($0 ~ /^[ \t]*RAWK[ \t]*\{/) { if (state != 0) { print "Error: Nested RAWK blocks not allowed" > "/dev/stderr" exit 1 } else { state = 1 brace_count = 1 } next } # If we're inside a RAWK block if (state == 1) { # Count braces open_braces = gsub(/\{/, "&", $0) close_braces = gsub(/\}/, "&", $0) brace_count += open_braces - close_braces # Check for function definition if ($0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*\{/) { if (state == 2) { print "Error: Nested function definitions not allowed" > "/dev/stderr" exit 1 } else { state = 2 # Parse function header inline if (match($0, /\$([a-zA-Z_][a-zA-Z0-9_]*)/)) { func_name = substr($0, RSTART + 1, RLENGTH - 1) } else { print "Error: Invalid function name" > "/dev/stderr" exit 1 } if (match($0, /\(([^)]*)\)/)) { func_args = substr($0, RSTART + 1, RLENGTH - 2) gsub(/^[ \t]+|[ \t]+$/, "", func_args) } else { print "Error: Invalid function arguments" > "/dev/stderr" exit 1 } function_count++ function_names[function_count] = func_name function_args[function_count] = func_args function_bodies[function_count] = "" } next } # If we're inside a function, collect the body if (state == 2) { # Add line to function body (skip the opening brace line) if (!($0 ~ /^[ \t]*\{/)) { function_bodies[function_count] = function_bodies[function_count] "\n " $0 } # Check if function is complete if (brace_count == 0) { state = 1 } next } # Check if RAWK block is complete if (brace_count == 0) { state = 0 next } next } # Check for function definition outside RAWK block if ($0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->/) { print "Error: Function definition outside RAWK block" > "/dev/stderr" exit 1 } # Regular awk code - pass through unchanged print $0 } END { # Check for unclosed blocks if (state != 0) { print "Error: Unclosed RAWK block" > "/dev/stderr" exit 1 } # Generate standard library functions print "" print "# Standard library functions" print "function assert(condition, message) {" print " if (!condition) {" print " print \"Assertion failed: \" message > \"/dev/stderr\"" print " exit 1" print " }" print "}" print "" print "function expect_equal(actual, expected, message) {" print " if (actual != expected) {" print " print \"Test failed: \" message \" (expected \" expected \", got \" actual \")\" > \"/dev/stderr\"" print " exit 1" print " }" print "}" print "" # Generate user-defined functions if (function_count > 0) { print "# User-defined functions" for (i = 1; i <= function_count; i++) { print "function " function_names[i] "(" function_args[i] ") {" function_bodies[i] print "}" print "" } } }