#!/usr/bin/env awk -f # rawk_v2_fixed.awk - Block-based functional programming language for awk # Author: @eli_oat # License: Public Domain # Version: 2.0.0 # # This implementation is based on the successful approach from the original rawk.awk # using proper state management and array indexing to avoid variable scoping issues. # USAGE: # awk -f rawk_v2_fixed.awk input.rawk | awk -f - # awk -f rawk_v2_fixed.awk input.rawk > output.awk # ----------------------------------------------------------------------------- # VARIABLES # ----------------------------------------------------------------------------- # State tracking - use multiple variables like the original in_function_def = 0 # Are we in a function definition context? in_function_body = 0 # Are we inside a function body? brace_count = 0 # Brace counter for function bodies current_function_index = 0 # Index of current function being processed line_count = 0 # Total lines processed # Function tracking function_count = 0 FUNCTION_NAMES[0] = "" FUNCTION_ARGS[0] = "" FUNCTION_BODIES[0] = "" FUNCTION_TYPES[0] = "" # Main script lines (non-function code) main_script_count = 0 main_script_lines[0] = "" # Error tracking error_count = 0 errors[0] = "" # ----------------------------------------------------------------------------- # MAIN PARSING LOGIC # ----------------------------------------------------------------------------- { line_count++ # Skip comments and empty lines if ($0 ~ /^[ \t]*#/ || $0 ~ /^[ \t]*$/) { next } # Pattern: Multi-line function definition start (the only allowed form) if ($0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*\{/) { print "DEBUG: Found function definition: " $0 > "/dev/stderr" in_function_def = 1 parse_multi_line_function($0, line_count) next # Do not add function definition line to main_script_lines } # Validate: Only allow function definitions with { ... } if ($0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*[^\{]/) { report_error("Function definitions must use braces: -> { ... }", line_count, $0, "Use: $name = (args) -> { ... }") next } # Pattern: Multi-line function body continuation if (in_function_body) { # Count opening and closing braces open_braces = gsub(/\{/, "&", $0) close_braces = gsub(/\}/, "&", $0) if (close_braces > 0 && brace_count <= 1) { # End of function body in_function_body = 0 in_function_def = 0 next } else { # Update brace count brace_count += open_braces - close_braces # Add line to current function body FUNCTION_BODIES[current_function_index] = FUNCTION_BODIES[current_function_index] "\n " $0 next } } # Pattern: Start of multi-line function body, but only if not already in a function body if (!in_function_body && in_function_def && $0 ~ /^[ \t]*\{/) { in_function_body = 1 brace_count = 1 next } # Pattern: Regular code - collect for main script if (!in_function_body && !($0 ~ /^[ \t]*\$/ && $0 ~ /->/)) { main_script_lines[++main_script_count] = $0 } # Unconditional next to suppress AWK's default printing next } # ----------------------------------------------------------------------------- # HELPER FUNCTIONS # ----------------------------------------------------------------------------- # Parse multi-line function definition function parse_multi_line_function(line, line_num) { print "DEBUG: parse_multi_line_function called with: " line > "/dev/stderr" # Extract function name if (match(line, /\$([a-zA-Z_][a-zA-Z0-9_]*)/)) { func_name = substr(line, RSTART + 1, RLENGTH - 1) print "DEBUG: Function name: " func_name > "/dev/stderr" } else { report_error("Invalid function name", line_num, line, "Function name must be a valid identifier") return } # Extract arguments if (match(line, /\(([^)]*)\)/)) { args = substr(line, RSTART + 1, RLENGTH - 2) print "DEBUG: Arguments: " args > "/dev/stderr" } else { report_error("Invalid argument list", line_num, line, "Arguments must be enclosed in parentheses") return } # Store function information function_count++ current_function_index = function_count FUNCTION_NAMES[function_count] = func_name FUNCTION_ARGS[function_count] = args FUNCTION_BODIES[function_count] = "" FUNCTION_TYPES[function_count] = "multi" print "DEBUG: function_count after increment: " function_count > "/dev/stderr" print "DEBUG: current_function_index: " current_function_index > "/dev/stderr" # Start collecting function body (the opening brace is already on this line) in_function_body = 1 brace_count = 1 # Start with 1 for the opening brace } function report_error(message, line_num, line, suggestion) { print "❌ " message > "/dev/stderr" print " at line " line_num " in " FILENAME > "/dev/stderr" print " context: " line > "/dev/stderr" if (suggestion != "") { print " 💡 " suggestion > "/dev/stderr" } print "" > "/dev/stderr" error_count++ } # ----------------------------------------------------------------------------- # CODE GENERATION # ----------------------------------------------------------------------------- END { # Check for validation errors if (error_count > 0) { print "❌ Compilation failed with " error_count " error(s)" > "/dev/stderr" exit 1 } # Generate standard library generate_standard_library() # Generate function definitions generate_function_definitions() # Generate main script body generate_main_script() # Add metadata print "# Generated by rawk v2.0.0" print "# Functions: " function_count print "# Lines: " line_count } function generate_standard_library() { print "# --- Standard Library ---" print "" # Add basic testing functions print "function assert(condition, message) {" print " if (!condition) {" print " print \"❌ Assertion failed: \" message > \"/dev/stderr\"" print " exit 1" print " }" print "}" print "" print "function expect_equal(actual, expected, message) {" print " if (actual != expected) {" print " print \"❌ Expected \" expected \" but got \" actual \" - \" message > \"/dev/stderr\"" print " exit 1" print " }" print "}" print "" } function generate_function_definitions() { print "DEBUG: function_count = " function_count > "/dev/stderr" if (function_count == 0) return print "# --- User Functions ---" print "" for (i = 1; i <= function_count; i++) { print "DEBUG: Generating function " i ": " FUNCTION_NAMES[i] > "/dev/stderr" print "function " FUNCTION_NAMES[i] "(" FUNCTION_ARGS[i] ") {" FUNCTION_BODIES[i] print "}" print "" } } function generate_main_script() { print "# --- Main Script ---" # Check if there's already a BEGIN block has_begin = 0 for (i = 1; i <= main_script_count; i++) { if (main_script_lines[i] ~ /^[ \t]*BEGIN[ \t]*\{/) { has_begin = 1 break } } if (has_begin) { # Print lines as-is for (i = 1; i <= main_script_count; i++) { print main_script_lines[i] } } else { # Wrap in BEGIN block print "BEGIN {" for (i = 1; i <= main_script_count; i++) { print " " main_script_lines[i] } print "}" } }