about summary refs log tree commit diff stats
path: root/awk/rawk/scratch/rawk_working.awk
diff options
context:
space:
mode:
Diffstat (limited to 'awk/rawk/scratch/rawk_working.awk')
-rw-r--r--awk/rawk/scratch/rawk_working.awk207
1 files changed, 207 insertions, 0 deletions
diff --git a/awk/rawk/scratch/rawk_working.awk b/awk/rawk/scratch/rawk_working.awk
new file mode 100644
index 0000000..9fab9c8
--- /dev/null
+++ b/awk/rawk/scratch/rawk_working.awk
@@ -0,0 +1,207 @@
+#!/usr/bin/env awk -f
+
+# rawk_working.awk - Working block-based functional programming language for awk
+# Author: @eli_oat
+# License: Public Domain
+# Version: 1.0.0
+
+# This script translates .rawk files into standard AWK code using a block-based approach.
+# All rawk-specific syntax must be contained within RAWK { ... } blocks.
+
+# USAGE:
+#   awk -f rawk_working.awk input.rawk | awk -f -
+#   awk -f rawk_working.awk input.rawk > output.awk
+
+# -----------------------------------------------------------------------------
+# VARIABLES
+# -----------------------------------------------------------------------------
+
+# State tracking
+state = 0  # 0=normal, 1=in_rawk_block, 2=in_function
+brace_count = 0
+line_count = 0
+
+# Function tracking
+function_count = 0
+function_names[0] = ""
+function_args[0] = ""
+function_bodies[0] = ""
+
+# Error tracking
+error_count = 0
+errors[0] = ""
+
+# -----------------------------------------------------------------------------
+# MAIN PARSING LOGIC
+# -----------------------------------------------------------------------------
+
+{
+    line_count++
+    
+    # Skip comments and empty lines
+    if ($0 ~ /^[ \t]*#/ || $0 ~ /^[ \t]*$/) {
+        next
+    }
+    
+    # Check for RAWK block start
+    if ($0 ~ /^[ \t]*RAWK[ \t]*\{/) {
+        print "DEBUG: Found RAWK block start: " $0 > "/dev/stderr"
+        if (state != 0) {
+            error_count++
+            errors[error_count] = sprintf("Error at line %d: Nested RAWK blocks not allowed\n  %s\n  Expected: Close the current RAWK block first", 
+                                         line_count, $0)
+        } else {
+            state = 1
+            brace_count = 1
+            print "DEBUG: Set state = 1, brace_count = " brace_count > "/dev/stderr"
+        }
+        next
+    }
+    
+    # If we're inside a RAWK block
+    if (state == 1) {
+        print "DEBUG: Inside RAWK block, line: " $0 > "/dev/stderr"
+        # Count braces
+        open_braces = gsub(/\{/, "&", $0)
+        close_braces = gsub(/\}/, "&", $0)
+        brace_count += open_braces - close_braces
+        
+        # Check for function definition
+        if ($0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*\{/) {
+            if (state == 2) {
+                error_count++
+                errors[error_count] = sprintf("Error at line %d: Nested function definitions not allowed\n  %s\n  Expected: Close the current function first", 
+                                             line_count, $0)
+            } else {
+                state = 2
+                # Parse function header inline
+                if (match($0, /\$([a-zA-Z_][a-zA-Z0-9_]*)/)) {
+                    func_name = substr($0, RSTART + 1, RLENGTH - 1)
+                } else {
+                    error_count++
+                    errors[error_count] = sprintf("Error at line %d: Invalid function name\n  %s\n  Expected: Function names must start with $ and contain only letters, numbers, and underscores", 
+                                                 line_count, $0)
+                    next
+                }
+                
+                if (match($0, /\(([^)]*)\)/)) {
+                    func_args = substr($0, RSTART + 1, RLENGTH - 2)
+                    gsub(/^[ \t]+|[ \t]+$/, "", func_args)
+                } else {
+                    error_count++
+                    errors[error_count] = sprintf("Error at line %d: Invalid function arguments\n  %s\n  Expected: Function arguments must be enclosed in parentheses", 
+                                                 line_count, $0)
+                    next
+                }
+                
+                function_count++
+                function_names[function_count] = func_name
+                function_args[function_count] = func_args
+                function_bodies[function_count] = ""
+            }
+            next
+        }
+        
+        # Check for function definition without braces
+        if ($0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*[^{]/) {
+            error_count++
+            errors[error_count] = sprintf("Error at line %d: Function definition missing braces\n  %s\n  Expected: Use: $name = (args) -> { statements; }", 
+                                         line_count, $0)
+            next
+        }
+        
+        # If we're inside a function, collect the body
+        if (state == 2) {
+            print "DEBUG: Collecting function body: " $0 > "/dev/stderr"
+            # Add line to function body (skip the opening brace line)
+            if (!($0 ~ /^[ \t]*\{/)) {
+                function_bodies[function_count] = function_bodies[function_count] "\n    " $0
+            }
+            
+            # Check if function is complete
+            if (brace_count == 0) {
+                state = 1
+                print "DEBUG: Function complete, state = " state > "/dev/stderr"
+            }
+            next
+        }
+        
+        # Check if RAWK block is complete
+        if (brace_count == 0) {
+            state = 0
+            next
+        }
+        
+        # Other code inside RAWK block (should be rare)
+        if (!($0 ~ /^[ \t]*\$/)) {
+            error_count++
+            errors[error_count] = sprintf("Error at line %d: Invalid code inside RAWK block\n  %s\n  Expected: Only function definitions are allowed inside RAWK blocks", 
+                                         line_count, $0)
+        }
+        next
+    }
+    
+    # Check for function definition outside RAWK block
+    if ($0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->/) {
+        error_count++
+        errors[error_count] = sprintf("Error at line %d: Function definition outside RAWK block\n  %s\n  Expected: Place function definitions inside RAWK { ... } block", 
+                                     line_count, $0)
+        next
+    }
+    
+    # Regular awk code - pass through unchanged
+    print $0
+}
+
+# -----------------------------------------------------------------------------
+# CODE GENERATION
+# -----------------------------------------------------------------------------
+
+END {
+    # Check for unclosed blocks
+    if (state != 0) {
+        error_count++
+        errors[error_count] = sprintf("Error at line %d: Unclosed RAWK block\n  Expected: Add closing brace '}' to close the RAWK block", 
+                                     line_count)
+    }
+    
+    # Output errors if any
+    if (error_count > 0) {
+        for (i = 1; i <= error_count; i++) {
+            print errors[i] > "/dev/stderr"
+        }
+        exit 1
+    }
+    
+    # Generate standard library functions
+    print ""
+    print "# Standard library functions"
+    print "function assert(condition, message) {"
+    print "    if (!condition) {"
+    print "        print \"Assertion failed: \" message > \"/dev/stderr\""
+    print "        exit 1"
+    print "    }"
+    print "}"
+    print ""
+    print "function expect_equal(actual, expected, message) {"
+    print "    if (actual != expected) {"
+    print "        print \"Test failed: \" message \" (expected \" expected \", got \" actual \")\" > \"/dev/stderr\""
+    print "        exit 1"
+    print "    }"
+    print "}"
+    print ""
+    
+    # Generate user-defined functions
+    print "DEBUG: function_count = " function_count > "/dev/stderr"
+    if (function_count > 0) {
+        print "# User-defined functions"
+        for (i = 1; i <= function_count; i++) {
+            print "DEBUG: Function " i ": " function_names[i] "(" function_args[i] ")" > "/dev/stderr"
+            print "function " function_names[i] "(" function_args[i] ") {" function_bodies[i]
+            print "}"
+            print ""
+        }
+    } else {
+        print "DEBUG: No functions found" > "/dev/stderr"
+    }
+} 
\ No newline at end of file