about summary refs log tree commit diff stats
path: root/awk/rawk/scratch/rawk_final.awk
diff options
context:
space:
mode:
Diffstat (limited to 'awk/rawk/scratch/rawk_final.awk')
-rw-r--r--awk/rawk/scratch/rawk_final.awk215
1 files changed, 215 insertions, 0 deletions
diff --git a/awk/rawk/scratch/rawk_final.awk b/awk/rawk/scratch/rawk_final.awk
new file mode 100644
index 0000000..7edea0a
--- /dev/null
+++ b/awk/rawk/scratch/rawk_final.awk
@@ -0,0 +1,215 @@
+#!/usr/bin/env awk -f
+
+# rawk_final.awk - Block-based functional programming language for awk
+# Author: @eli_oat
+# License: Public Domain
+# Version: 1.0.0
+# 
+# This implementation uses a simple state machine without function calls
+# to avoid all variable scoping issues.
+
+# USAGE:
+#   awk -f rawk_final.awk input.rawk | awk -f -
+#   awk -f rawk_final.awk input.rawk > output.awk
+
+# -----------------------------------------------------------------------------
+# VARIABLES
+# -----------------------------------------------------------------------------
+
+# State tracking - use simple integers
+state = 0  # 0=normal, 1=in_rawk_block, 2=in_function
+brace_count = 0
+line_count = 0
+
+# Function tracking
+function_count = 0
+function_names[0] = ""
+function_args[0] = ""
+function_bodies[0] = ""
+
+# Error tracking
+error_count = 0
+errors[0] = ""
+
+# -----------------------------------------------------------------------------
+# MAIN PARSING LOGIC
+# -----------------------------------------------------------------------------
+
+{
+    line_count++
+    
+    # Skip comments and empty lines
+    if ($0 ~ /^[ \t]*#/ || $0 ~ /^[ \t]*$/) {
+        next
+    }
+    
+    # Initialize arrays if needed
+    if (function_count == 0) {
+        function_names[0] = ""
+        function_args[0] = ""
+        function_bodies[0] = ""
+        errors[0] = ""
+    }
+    
+    # STATE 0: Normal state (outside RAWK blocks)
+    if (state == 0) {
+        # Check for RAWK block start
+        if ($0 ~ /^[ \t]*RAWK[ \t]*\{/) {
+            if (state != 0) {
+                error_count++
+                errors[error_count] = sprintf("Error at line %d: Nested RAWK blocks not allowed\n  %s\n  Expected: Close the current RAWK block first", 
+                                             line_count, $0)
+            } else {
+                state = 1
+                brace_count = 1
+            }
+            next
+        }
+        
+        # Check for function definition outside RAWK block
+        if ($0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->/) {
+            error_count++
+            errors[error_count] = sprintf("Error at line %d: Function definition outside RAWK block\n  %s\n  Expected: Place function definitions inside RAWK { ... } block", 
+                                         line_count, $0)
+            next
+        }
+        
+        # Regular awk code - pass through unchanged
+        print $0
+        next
+    }
+    
+    # STATE 1: Inside RAWK block
+    if (state == 1) {
+        # Count braces
+        open_braces = gsub(/\{/, "&", $0)
+        close_braces = gsub(/\}/, "&", $0)
+        brace_count += open_braces - close_braces
+        
+        # Check for function definition
+        if ($0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*\{/) {
+            if (state == 2) {
+                error_count++
+                errors[error_count] = sprintf("Error at line %d: Nested function definitions not allowed\n  %s\n  Expected: Close the current function first", 
+                                             line_count, $0)
+            } else {
+                state = 2
+                # Parse function header inline
+                if (match($0, /\$([a-zA-Z_][a-zA-Z0-9_]*)/)) {
+                    func_name = substr($0, RSTART + 1, RLENGTH - 1)
+                } else {
+                    error_count++
+                    errors[error_count] = sprintf("Error at line %d: Invalid function name\n  %s\n  Expected: Function names must start with $ and contain only letters, numbers, and underscores", 
+                                                 line_count, $0)
+                    next
+                }
+                
+                if (match($0, /\(([^)]*)\)/)) {
+                    func_args = substr($0, RSTART + 1, RLENGTH - 2)
+                    gsub(/^[ \t]+|[ \t]+$/, "", func_args)
+                } else {
+                    error_count++
+                    errors[error_count] = sprintf("Error at line %d: Invalid function arguments\n  %s\n  Expected: Function arguments must be enclosed in parentheses", 
+                                                 line_count, $0)
+                    next
+                }
+                
+                function_count++
+                function_names[function_count] = func_name
+                function_args[function_count] = func_args
+                function_bodies[function_count] = ""
+            }
+            next
+        }
+        
+        # Check for function definition without braces
+        if ($0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*[^{]/) {
+            error_count++
+            errors[error_count] = sprintf("Error at line %d: Function definition missing braces\n  %s\n  Expected: Use: $name = (args) -> { statements; }", 
+                                         line_count, $0)
+            next
+        }
+        
+        # Check if RAWK block is complete
+        if (brace_count == 0) {
+            state = 0
+            next
+        }
+        
+        # Other code inside RAWK block (should be rare)
+        if (!($0 ~ /^[ \t]*\$/)) {
+            error_count++
+            errors[error_count] = sprintf("Error at line %d: Invalid code inside RAWK block\n  %s\n  Expected: Only function definitions are allowed inside RAWK blocks", 
+                                         line_count, $0)
+        }
+        next
+    }
+    
+    # STATE 2: Inside function definition
+    if (state == 2) {
+        # Count braces
+        open_braces = gsub(/\{/, "&", $0)
+        close_braces = gsub(/\}/, "&", $0)
+        brace_count += open_braces - close_braces
+        
+        # Add line to function body (skip the opening brace line)
+        if (!($0 ~ /^[ \t]*\{/)) {
+            function_bodies[function_count] = function_bodies[function_count] "\n    " $0
+        }
+        
+        # Check if function is complete
+        if (brace_count == 0) {
+            state = 1
+        }
+        next
+    }
+}
+
+# -----------------------------------------------------------------------------
+# CODE GENERATION
+# -----------------------------------------------------------------------------
+
+END {
+    # Check for unclosed blocks
+    if (state != 0) {
+        error_count++
+        errors[error_count] = sprintf("Error at line %d: Unclosed RAWK block\n  Expected: Add closing brace '}' to close the RAWK block", 
+                                     line_count)
+    }
+    
+    # Output errors if any
+    if (error_count > 0) {
+        for (i = 1; i <= error_count; i++) {
+            print errors[i] > "/dev/stderr"
+        }
+        exit 1
+    }
+    
+    # Generate standard library functions
+    print ""
+    print "# Standard library functions"
+    print "function assert(condition, message) {"
+    print "    if (!condition) {"
+    print "        print \"Assertion failed: \" message > \"/dev/stderr\""
+    print "        exit 1"
+    print "    }"
+    print "}"
+    print ""
+    print "function expect_equal(actual, expected, message) {"
+    print "    if (actual != expected) {"
+    print "        print \"Test failed: \" message \" (expected \" expected \", got \" actual \")\" > \"/dev/stderr\""
+    print "        exit 1"
+    print "    }"
+    print "}"
+    print ""
+    
+    # Generate user-defined functions
+    if (function_count > 0) {
+        print "# User-defined functions"
+        for (i = 1; i <= function_count; i++) {
+            print "function " function_names[i] "(" function_args[i] ") {" function_bodies[i]
+            print "}"
+            print ""
+        }
+    }
+} 
\ No newline at end of file