#!/usr/bin/env awk -f # rawk_dispatch.awk - Block-based functional programming language for awk # Author: @eli_oat # License: Public Domain # Version: 1.0.0 # # This implementation uses a dispatch pattern to avoid variable scoping issues # by passing state as parameters to functions instead of using global variables. # USAGE: # awk -f rawk_dispatch.awk input.rawk | awk -f - # awk -f rawk_dispatch.awk input.rawk > output.awk # ----------------------------------------------------------------------------- # DISPATCH FUNCTIONS # ----------------------------------------------------------------------------- # Dispatch function to handle different parsing states function dispatch_parse(state, brace_count, line_count, function_count, function_names, function_args, function_bodies, error_count, errors, line) { if (state == 0) { return handle_normal_state(state, brace_count, line_count, function_count, function_names, function_args, function_bodies, error_count, errors, line) } else if (state == 1) { return handle_rawk_state(state, brace_count, line_count, function_count, function_names, function_args, function_bodies, error_count, errors, line) } else if (state == 2) { return handle_function_state(state, brace_count, line_count, function_count, function_names, function_args, function_bodies, error_count, errors, line) } } # Handle normal state (outside RAWK blocks) function handle_normal_state(state, brace_count, line_count, function_count, function_names, function_args, function_bodies, error_count, errors, line) { # Check for RAWK block start if (line ~ /^[ \t]*RAWK[ \t]*\{/) { if (state != 0) { error_count++ errors[error_count] = sprintf("Error at line %d: Nested RAWK blocks not allowed\n %s\n Expected: Close the current RAWK block first", line_count, line) } else { state = 1 brace_count = 1 } return "next" } # Check for function definition outside RAWK block if (line ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->/) { error_count++ errors[error_count] = sprintf("Error at line %d: Function definition outside RAWK block\n %s\n Expected: Place function definitions inside RAWK { ... } block", line_count, line) return "next" } # Regular awk code - pass through unchanged print line return "continue" } # Handle RAWK block state function handle_rawk_state(state, brace_count, line_count, function_count, function_names, function_args, function_bodies, error_count, errors, line) { # Count braces open_braces = gsub(/\{/, "&", line) close_braces = gsub(/\}/, "&", line) brace_count += open_braces - close_braces # Check for function definition if (line ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*\{/) { if (state == 2) { error_count++ errors[error_count] = sprintf("Error at line %d: Nested function definitions not allowed\n %s\n Expected: Close the current function first", line_count, line) } else { state = 2 # Parse function header inline if (match(line, /\$([a-zA-Z_][a-zA-Z0-9_]*)/)) { func_name = substr(line, RSTART + 1, RLENGTH - 1) } else { error_count++ errors[error_count] = sprintf("Error at line %d: Invalid function name\n %s\n Expected: Function names must start with $ and contain only letters, numbers, and underscores", line_count, line) return "next" } if (match(line, /\(([^)]*)\)/)) { func_args = substr(line, RSTART + 1, RLENGTH - 2) gsub(/^[ \t]+|[ \t]+$/, "", func_args) } else { error_count++ errors[error_count] = sprintf("Error at line %d: Invalid function arguments\n %s\n Expected: Function arguments must be enclosed in parentheses", line_count, line) return "next" } function_count++ function_names[function_count] = func_name function_args[function_count] = func_args function_bodies[function_count] = "" } return "next" } # Check for function definition without braces if (line ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*[^{]/) { error_count++ errors[error_count] = sprintf("Error at line %d: Function definition missing braces\n %s\n Expected: Use: $name = (args) -> { statements; }", line_count, line) return "next" } # Check if RAWK block is complete if (brace_count == 0) { state = 0 return "next" } # Other code inside RAWK block (should be rare) if (!(line ~ /^[ \t]*\$/)) { error_count++ errors[error_count] = sprintf("Error at line %d: Invalid code inside RAWK block\n %s\n Expected: Only function definitions are allowed inside RAWK blocks", line_count, line) } return "next" } # Handle function state (inside function definition) function handle_function_state(state, brace_count, line_count, function_count, function_names, function_args, function_bodies, error_count, errors, line) { # Count braces open_braces = gsub(/\{/, "&", line) close_braces = gsub(/\}/, "&", line) brace_count += open_braces - close_braces # Add line to function body (skip the opening brace line) if (!(line ~ /^[ \t]*\{/)) { function_bodies[function_count] = function_bodies[function_count] "\n " line } # Check if function is complete if (brace_count == 0) { state = 1 } return "next" } # ----------------------------------------------------------------------------- # MAIN PARSING LOGIC # ----------------------------------------------------------------------------- { line_count++ # Skip comments and empty lines if ($0 ~ /^[ \t]*#/ || $0 ~ /^[ \t]*$/) { next } # Initialize state arrays if not already done if (function_count == 0) { function_names[0] = "" function_args[0] = "" function_bodies[0] = "" errors[0] = "" } # Dispatch to appropriate handler result = dispatch_parse(state, brace_count, line_count, function_count, function_names, function_args, function_bodies, error_count, errors, $0) if (result == "next") { next } } # ----------------------------------------------------------------------------- # CODE GENERATION # ----------------------------------------------------------------------------- END { # Check for unclosed blocks if (state != 0) { error_count++ errors[error_count] = sprintf("Error at line %d: Unclosed RAWK block\n Expected: Add closing brace '}' to close the RAWK block", line_count) } # Output errors if any if (error_count > 0) { for (i = 1; i <= error_count; i++) { print errors[i] > "/dev/stderr" } exit 1 } # Generate standard library functions print "" print "# Standard library functions" print "function assert(condition, message) {" print " if (!condition) {" print " print \"Assertion failed: \" message > \"/dev/stderr\"" print " exit 1" print " }" print "}" print "" print "function expect_equal(actual, expected, message) {" print " if (actual != expected) {" print " print \"Test failed: \" message \" (expected \" expected \", got \" actual \")\" > \"/dev/stderr\"" print " exit 1" print " }" print "}" print "" # Generate user-defined functions if (function_count > 0) { print "# User-defined functions" for (i = 1; i <= function_count; i++) { print "function " function_names[i] "(" function_args[i] ") {" function_bodies[i] print "}" print "" } } }