about summary refs log tree commit diff stats
path: root/awk/rawk/scratch/rawk_v2_fixed.awk
diff options
context:
space:
mode:
Diffstat (limited to 'awk/rawk/scratch/rawk_v2_fixed.awk')
-rw-r--r--awk/rawk/scratch/rawk_v2_fixed.awk245
1 files changed, 245 insertions, 0 deletions
diff --git a/awk/rawk/scratch/rawk_v2_fixed.awk b/awk/rawk/scratch/rawk_v2_fixed.awk
new file mode 100644
index 0000000..1177bb1
--- /dev/null
+++ b/awk/rawk/scratch/rawk_v2_fixed.awk
@@ -0,0 +1,245 @@
+#!/usr/bin/env awk -f
+
+# rawk_v2_fixed.awk - Block-based functional programming language for awk
+# Author: @eli_oat
+# License: Public Domain
+# Version: 2.0.0
+# 
+# This implementation is based on the successful approach from the original rawk.awk
+# using proper state management and array indexing to avoid variable scoping issues.
+
+# USAGE:
+#   awk -f rawk_v2_fixed.awk input.rawk | awk -f -
+#   awk -f rawk_v2_fixed.awk input.rawk > output.awk
+
+# -----------------------------------------------------------------------------
+# VARIABLES
+# -----------------------------------------------------------------------------
+
+# State tracking - use multiple variables like the original
+in_function_def = 0      # Are we in a function definition context?
+in_function_body = 0     # Are we inside a function body?
+brace_count = 0          # Brace counter for function bodies
+current_function_index = 0  # Index of current function being processed
+line_count = 0           # Total lines processed
+
+# Function tracking
+function_count = 0
+FUNCTION_NAMES[0] = ""
+FUNCTION_ARGS[0] = ""
+FUNCTION_BODIES[0] = ""
+FUNCTION_TYPES[0] = ""
+
+# Main script lines (non-function code)
+main_script_count = 0
+main_script_lines[0] = ""
+
+# Error tracking
+error_count = 0
+errors[0] = ""
+
+# -----------------------------------------------------------------------------
+# MAIN PARSING LOGIC
+# -----------------------------------------------------------------------------
+
+{
+    line_count++
+    
+    # Skip comments and empty lines
+    if ($0 ~ /^[ \t]*#/ || $0 ~ /^[ \t]*$/) {
+        next
+    }
+    
+    # Pattern: Multi-line function definition start (the only allowed form)
+    if ($0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*\{/) {
+        print "DEBUG: Found function definition: " $0 > "/dev/stderr"
+        in_function_def = 1
+        parse_multi_line_function($0, line_count)
+        next  # Do not add function definition line to main_script_lines
+    }
+    
+    # Validate: Only allow function definitions with { ... }
+    if ($0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*[^\{]/) {
+        report_error("Function definitions must use braces: -> { ... }", line_count, $0, "Use: $name = (args) -> { ... }")
+        next
+    }
+    
+    # Pattern: Multi-line function body continuation
+    if (in_function_body) {
+        # Count opening and closing braces
+        open_braces = gsub(/\{/, "&", $0)
+        close_braces = gsub(/\}/, "&", $0)
+        
+        if (close_braces > 0 && brace_count <= 1) {
+            # End of function body
+            in_function_body = 0
+            in_function_def = 0
+            next
+        } else {
+            # Update brace count
+            brace_count += open_braces - close_braces
+            
+            # Add line to current function body
+            FUNCTION_BODIES[current_function_index] = FUNCTION_BODIES[current_function_index] "\n    " $0
+            next
+        }
+    }
+    
+    # Pattern: Start of multi-line function body, but only if not already in a function body
+    if (!in_function_body && in_function_def && $0 ~ /^[ \t]*\{/) {
+        in_function_body = 1
+        brace_count = 1
+        next
+    }
+    
+    # Pattern: Regular code - collect for main script
+    if (!in_function_body && !($0 ~ /^[ \t]*\$/ && $0 ~ /->/)) {
+        main_script_lines[++main_script_count] = $0
+    }
+    
+    # Unconditional next to suppress AWK's default printing
+    next
+}
+
+# -----------------------------------------------------------------------------
+# HELPER FUNCTIONS
+# -----------------------------------------------------------------------------
+
+# Parse multi-line function definition
+function parse_multi_line_function(line, line_num) {
+    print "DEBUG: parse_multi_line_function called with: " line > "/dev/stderr"
+    
+    # Extract function name
+    if (match(line, /\$([a-zA-Z_][a-zA-Z0-9_]*)/)) {
+        func_name = substr(line, RSTART + 1, RLENGTH - 1)
+        print "DEBUG: Function name: " func_name > "/dev/stderr"
+    } else {
+        report_error("Invalid function name", line_num, line, "Function name must be a valid identifier")
+        return
+    }
+    
+    # Extract arguments
+    if (match(line, /\(([^)]*)\)/)) {
+        args = substr(line, RSTART + 1, RLENGTH - 2)
+        print "DEBUG: Arguments: " args > "/dev/stderr"
+    } else {
+        report_error("Invalid argument list", line_num, line, "Arguments must be enclosed in parentheses")
+        return
+    }
+    
+    # Store function information
+    function_count++
+    current_function_index = function_count
+    FUNCTION_NAMES[function_count] = func_name
+    FUNCTION_ARGS[function_count] = args
+    FUNCTION_BODIES[function_count] = ""
+    FUNCTION_TYPES[function_count] = "multi"
+    
+    print "DEBUG: function_count after increment: " function_count > "/dev/stderr"
+    print "DEBUG: current_function_index: " current_function_index > "/dev/stderr"
+    
+    # Start collecting function body (the opening brace is already on this line)
+    in_function_body = 1
+    brace_count = 1  # Start with 1 for the opening brace
+}
+
+function report_error(message, line_num, line, suggestion) {
+    print "❌ " message > "/dev/stderr"
+    print "   at line " line_num " in " FILENAME > "/dev/stderr"
+    print "   context: " line > "/dev/stderr"
+    if (suggestion != "") {
+        print "   💡 " suggestion > "/dev/stderr"
+    }
+    print "" > "/dev/stderr"
+    error_count++
+}
+
+# -----------------------------------------------------------------------------
+# CODE GENERATION
+# -----------------------------------------------------------------------------
+
+END {
+    # Check for validation errors
+    if (error_count > 0) {
+        print "❌ Compilation failed with " error_count " error(s)" > "/dev/stderr"
+        exit 1
+    }
+    
+    # Generate standard library
+    generate_standard_library()
+    
+    # Generate function definitions
+    generate_function_definitions()
+    
+    # Generate main script body
+    generate_main_script()
+    
+    # Add metadata
+    print "# Generated by rawk v2.0.0"
+    print "# Functions: " function_count
+    print "# Lines: " line_count
+}
+
+function generate_standard_library() {
+    print "# --- Standard Library ---"
+    print ""
+    
+    # Add basic testing functions
+    print "function assert(condition, message) {"
+    print "    if (!condition) {"
+    print "        print \"❌ Assertion failed: \" message > \"/dev/stderr\""
+    print "        exit 1"
+    print "    }"
+    print "}"
+    print ""
+    
+    print "function expect_equal(actual, expected, message) {"
+    print "    if (actual != expected) {"
+    print "        print \"❌ Expected \" expected \" but got \" actual \" - \" message > \"/dev/stderr\""
+    print "        exit 1"
+    print "    }"
+    print "}"
+    print ""
+}
+
+function generate_function_definitions() {
+    print "DEBUG: function_count = " function_count > "/dev/stderr"
+    if (function_count == 0) return
+    
+    print "# --- User Functions ---"
+    print ""
+    
+    for (i = 1; i <= function_count; i++) {
+        print "DEBUG: Generating function " i ": " FUNCTION_NAMES[i] > "/dev/stderr"
+        print "function " FUNCTION_NAMES[i] "(" FUNCTION_ARGS[i] ") {" FUNCTION_BODIES[i]
+        print "}"
+        print ""
+    }
+}
+
+function generate_main_script() {
+    print "# --- Main Script ---"
+    
+    # Check if there's already a BEGIN block
+    has_begin = 0
+    for (i = 1; i <= main_script_count; i++) {
+        if (main_script_lines[i] ~ /^[ \t]*BEGIN[ \t]*\{/) {
+            has_begin = 1
+            break
+        }
+    }
+    
+    if (has_begin) {
+        # Print lines as-is
+        for (i = 1; i <= main_script_count; i++) {
+            print main_script_lines[i]
+        }
+    } else {
+        # Wrap in BEGIN block
+        print "BEGIN {"
+        for (i = 1; i <= main_script_count; i++) {
+            print "    " main_script_lines[i]
+        }
+        print "}"
+    }
+} 
\ No newline at end of file