diff options
Diffstat (limited to 'awk/rawk/scratch/rawk_v2_fixed.awk')
-rw-r--r-- | awk/rawk/scratch/rawk_v2_fixed.awk | 245 |
1 files changed, 245 insertions, 0 deletions
diff --git a/awk/rawk/scratch/rawk_v2_fixed.awk b/awk/rawk/scratch/rawk_v2_fixed.awk new file mode 100644 index 0000000..1177bb1 --- /dev/null +++ b/awk/rawk/scratch/rawk_v2_fixed.awk @@ -0,0 +1,245 @@ +#!/usr/bin/env awk -f + +# rawk_v2_fixed.awk - Block-based functional programming language for awk +# Author: @eli_oat +# License: Public Domain +# Version: 2.0.0 +# +# This implementation is based on the successful approach from the original rawk.awk +# using proper state management and array indexing to avoid variable scoping issues. + +# USAGE: +# awk -f rawk_v2_fixed.awk input.rawk | awk -f - +# awk -f rawk_v2_fixed.awk input.rawk > output.awk + +# ----------------------------------------------------------------------------- +# VARIABLES +# ----------------------------------------------------------------------------- + +# State tracking - use multiple variables like the original +in_function_def = 0 # Are we in a function definition context? +in_function_body = 0 # Are we inside a function body? +brace_count = 0 # Brace counter for function bodies +current_function_index = 0 # Index of current function being processed +line_count = 0 # Total lines processed + +# Function tracking +function_count = 0 +FUNCTION_NAMES[0] = "" +FUNCTION_ARGS[0] = "" +FUNCTION_BODIES[0] = "" +FUNCTION_TYPES[0] = "" + +# Main script lines (non-function code) +main_script_count = 0 +main_script_lines[0] = "" + +# Error tracking +error_count = 0 +errors[0] = "" + +# ----------------------------------------------------------------------------- +# MAIN PARSING LOGIC +# ----------------------------------------------------------------------------- + +{ + line_count++ + + # Skip comments and empty lines + if ($0 ~ /^[ \t]*#/ || $0 ~ /^[ \t]*$/) { + next + } + + # Pattern: Multi-line function definition start (the only allowed form) + if ($0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*\{/) { + print "DEBUG: Found function definition: " $0 > "/dev/stderr" + in_function_def = 1 + parse_multi_line_function($0, line_count) + next # Do not add function definition line to main_script_lines + } + + # Validate: Only allow function definitions with { ... } + if ($0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*[^\{]/) { + report_error("Function definitions must use braces: -> { ... }", line_count, $0, "Use: $name = (args) -> { ... }") + next + } + + # Pattern: Multi-line function body continuation + if (in_function_body) { + # Count opening and closing braces + open_braces = gsub(/\{/, "&", $0) + close_braces = gsub(/\}/, "&", $0) + + if (close_braces > 0 && brace_count <= 1) { + # End of function body + in_function_body = 0 + in_function_def = 0 + next + } else { + # Update brace count + brace_count += open_braces - close_braces + + # Add line to current function body + FUNCTION_BODIES[current_function_index] = FUNCTION_BODIES[current_function_index] "\n " $0 + next + } + } + + # Pattern: Start of multi-line function body, but only if not already in a function body + if (!in_function_body && in_function_def && $0 ~ /^[ \t]*\{/) { + in_function_body = 1 + brace_count = 1 + next + } + + # Pattern: Regular code - collect for main script + if (!in_function_body && !($0 ~ /^[ \t]*\$/ && $0 ~ /->/)) { + main_script_lines[++main_script_count] = $0 + } + + # Unconditional next to suppress AWK's default printing + next +} + +# ----------------------------------------------------------------------------- +# HELPER FUNCTIONS +# ----------------------------------------------------------------------------- + +# Parse multi-line function definition +function parse_multi_line_function(line, line_num) { + print "DEBUG: parse_multi_line_function called with: " line > "/dev/stderr" + + # Extract function name + if (match(line, /\$([a-zA-Z_][a-zA-Z0-9_]*)/)) { + func_name = substr(line, RSTART + 1, RLENGTH - 1) + print "DEBUG: Function name: " func_name > "/dev/stderr" + } else { + report_error("Invalid function name", line_num, line, "Function name must be a valid identifier") + return + } + + # Extract arguments + if (match(line, /\(([^)]*)\)/)) { + args = substr(line, RSTART + 1, RLENGTH - 2) + print "DEBUG: Arguments: " args > "/dev/stderr" + } else { + report_error("Invalid argument list", line_num, line, "Arguments must be enclosed in parentheses") + return + } + + # Store function information + function_count++ + current_function_index = function_count + FUNCTION_NAMES[function_count] = func_name + FUNCTION_ARGS[function_count] = args + FUNCTION_BODIES[function_count] = "" + FUNCTION_TYPES[function_count] = "multi" + + print "DEBUG: function_count after increment: " function_count > "/dev/stderr" + print "DEBUG: current_function_index: " current_function_index > "/dev/stderr" + + # Start collecting function body (the opening brace is already on this line) + in_function_body = 1 + brace_count = 1 # Start with 1 for the opening brace +} + +function report_error(message, line_num, line, suggestion) { + print "❌ " message > "/dev/stderr" + print " at line " line_num " in " FILENAME > "/dev/stderr" + print " context: " line > "/dev/stderr" + if (suggestion != "") { + print " 💡 " suggestion > "/dev/stderr" + } + print "" > "/dev/stderr" + error_count++ +} + +# ----------------------------------------------------------------------------- +# CODE GENERATION +# ----------------------------------------------------------------------------- + +END { + # Check for validation errors + if (error_count > 0) { + print "❌ Compilation failed with " error_count " error(s)" > "/dev/stderr" + exit 1 + } + + # Generate standard library + generate_standard_library() + + # Generate function definitions + generate_function_definitions() + + # Generate main script body + generate_main_script() + + # Add metadata + print "# Generated by rawk v2.0.0" + print "# Functions: " function_count + print "# Lines: " line_count +} + +function generate_standard_library() { + print "# --- Standard Library ---" + print "" + + # Add basic testing functions + print "function assert(condition, message) {" + print " if (!condition) {" + print " print \"❌ Assertion failed: \" message > \"/dev/stderr\"" + print " exit 1" + print " }" + print "}" + print "" + + print "function expect_equal(actual, expected, message) {" + print " if (actual != expected) {" + print " print \"❌ Expected \" expected \" but got \" actual \" - \" message > \"/dev/stderr\"" + print " exit 1" + print " }" + print "}" + print "" +} + +function generate_function_definitions() { + print "DEBUG: function_count = " function_count > "/dev/stderr" + if (function_count == 0) return + + print "# --- User Functions ---" + print "" + + for (i = 1; i <= function_count; i++) { + print "DEBUG: Generating function " i ": " FUNCTION_NAMES[i] > "/dev/stderr" + print "function " FUNCTION_NAMES[i] "(" FUNCTION_ARGS[i] ") {" FUNCTION_BODIES[i] + print "}" + print "" + } +} + +function generate_main_script() { + print "# --- Main Script ---" + + # Check if there's already a BEGIN block + has_begin = 0 + for (i = 1; i <= main_script_count; i++) { + if (main_script_lines[i] ~ /^[ \t]*BEGIN[ \t]*\{/) { + has_begin = 1 + break + } + } + + if (has_begin) { + # Print lines as-is + for (i = 1; i <= main_script_count; i++) { + print main_script_lines[i] + } + } else { + # Wrap in BEGIN block + print "BEGIN {" + for (i = 1; i <= main_script_count; i++) { + print " " main_script_lines[i] + } + print "}" + } +} \ No newline at end of file |