about summary refs log tree commit diff stats
path: root/awk/rawk/scratch/rawk_dispatch.awk
diff options
context:
space:
mode:
Diffstat (limited to 'awk/rawk/scratch/rawk_dispatch.awk')
-rw-r--r--awk/rawk/scratch/rawk_dispatch.awk218
1 files changed, 218 insertions, 0 deletions
diff --git a/awk/rawk/scratch/rawk_dispatch.awk b/awk/rawk/scratch/rawk_dispatch.awk
new file mode 100644
index 0000000..415143b
--- /dev/null
+++ b/awk/rawk/scratch/rawk_dispatch.awk
@@ -0,0 +1,218 @@
+#!/usr/bin/env awk -f
+
+# rawk_dispatch.awk - Block-based functional programming language for awk
+# Author: @eli_oat
+# License: Public Domain
+# Version: 1.0.0
+# 
+# This implementation uses a dispatch pattern to avoid variable scoping issues
+# by passing state as parameters to functions instead of using global variables.
+
+# USAGE:
+#   awk -f rawk_dispatch.awk input.rawk | awk -f -
+#   awk -f rawk_dispatch.awk input.rawk > output.awk
+
+# -----------------------------------------------------------------------------
+# DISPATCH FUNCTIONS
+# -----------------------------------------------------------------------------
+
+# Dispatch function to handle different parsing states
+function dispatch_parse(state, brace_count, line_count, function_count, function_names, function_args, function_bodies, error_count, errors, line) {
+    if (state == 0) {
+        return handle_normal_state(state, brace_count, line_count, function_count, function_names, function_args, function_bodies, error_count, errors, line)
+    } else if (state == 1) {
+        return handle_rawk_state(state, brace_count, line_count, function_count, function_names, function_args, function_bodies, error_count, errors, line)
+    } else if (state == 2) {
+        return handle_function_state(state, brace_count, line_count, function_count, function_names, function_args, function_bodies, error_count, errors, line)
+    }
+}
+
+# Handle normal state (outside RAWK blocks)
+function handle_normal_state(state, brace_count, line_count, function_count, function_names, function_args, function_bodies, error_count, errors, line) {
+    # Check for RAWK block start
+    if (line ~ /^[ \t]*RAWK[ \t]*\{/) {
+        if (state != 0) {
+            error_count++
+            errors[error_count] = sprintf("Error at line %d: Nested RAWK blocks not allowed\n  %s\n  Expected: Close the current RAWK block first", 
+                                         line_count, line)
+        } else {
+            state = 1
+            brace_count = 1
+        }
+        return "next"
+    }
+    
+    # Check for function definition outside RAWK block
+    if (line ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->/) {
+        error_count++
+        errors[error_count] = sprintf("Error at line %d: Function definition outside RAWK block\n  %s\n  Expected: Place function definitions inside RAWK { ... } block", 
+                                     line_count, line)
+        return "next"
+    }
+    
+    # Regular awk code - pass through unchanged
+    print line
+    return "continue"
+}
+
+# Handle RAWK block state
+function handle_rawk_state(state, brace_count, line_count, function_count, function_names, function_args, function_bodies, error_count, errors, line) {
+    # Count braces
+    open_braces = gsub(/\{/, "&", line)
+    close_braces = gsub(/\}/, "&", line)
+    brace_count += open_braces - close_braces
+    
+    # Check for function definition
+    if (line ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*\{/) {
+        if (state == 2) {
+            error_count++
+            errors[error_count] = sprintf("Error at line %d: Nested function definitions not allowed\n  %s\n  Expected: Close the current function first", 
+                                         line_count, line)
+        } else {
+            state = 2
+            # Parse function header inline
+            if (match(line, /\$([a-zA-Z_][a-zA-Z0-9_]*)/)) {
+                func_name = substr(line, RSTART + 1, RLENGTH - 1)
+            } else {
+                error_count++
+                errors[error_count] = sprintf("Error at line %d: Invalid function name\n  %s\n  Expected: Function names must start with $ and contain only letters, numbers, and underscores", 
+                                             line_count, line)
+                return "next"
+            }
+            
+            if (match(line, /\(([^)]*)\)/)) {
+                func_args = substr(line, RSTART + 1, RLENGTH - 2)
+                gsub(/^[ \t]+|[ \t]+$/, "", func_args)
+            } else {
+                error_count++
+                errors[error_count] = sprintf("Error at line %d: Invalid function arguments\n  %s\n  Expected: Function arguments must be enclosed in parentheses", 
+                                             line_count, line)
+                return "next"
+            }
+            
+            function_count++
+            function_names[function_count] = func_name
+            function_args[function_count] = func_args
+            function_bodies[function_count] = ""
+        }
+        return "next"
+    }
+    
+    # Check for function definition without braces
+    if (line ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*[^{]/) {
+        error_count++
+        errors[error_count] = sprintf("Error at line %d: Function definition missing braces\n  %s\n  Expected: Use: $name = (args) -> { statements; }", 
+                                     line_count, line)
+        return "next"
+    }
+    
+    # Check if RAWK block is complete
+    if (brace_count == 0) {
+        state = 0
+        return "next"
+    }
+    
+    # Other code inside RAWK block (should be rare)
+    if (!(line ~ /^[ \t]*\$/)) {
+        error_count++
+        errors[error_count] = sprintf("Error at line %d: Invalid code inside RAWK block\n  %s\n  Expected: Only function definitions are allowed inside RAWK blocks", 
+                                     line_count, line)
+    }
+    return "next"
+}
+
+# Handle function state (inside function definition)
+function handle_function_state(state, brace_count, line_count, function_count, function_names, function_args, function_bodies, error_count, errors, line) {
+    # Count braces
+    open_braces = gsub(/\{/, "&", line)
+    close_braces = gsub(/\}/, "&", line)
+    brace_count += open_braces - close_braces
+    
+    # Add line to function body (skip the opening brace line)
+    if (!(line ~ /^[ \t]*\{/)) {
+        function_bodies[function_count] = function_bodies[function_count] "\n    " line
+    }
+    
+    # Check if function is complete
+    if (brace_count == 0) {
+        state = 1
+    }
+    return "next"
+}
+
+# -----------------------------------------------------------------------------
+# MAIN PARSING LOGIC
+# -----------------------------------------------------------------------------
+
+{
+    line_count++
+    
+    # Skip comments and empty lines
+    if ($0 ~ /^[ \t]*#/ || $0 ~ /^[ \t]*$/) {
+        next
+    }
+    
+    # Initialize state arrays if not already done
+    if (function_count == 0) {
+        function_names[0] = ""
+        function_args[0] = ""
+        function_bodies[0] = ""
+        errors[0] = ""
+    }
+    
+    # Dispatch to appropriate handler
+    result = dispatch_parse(state, brace_count, line_count, function_count, function_names, function_args, function_bodies, error_count, errors, $0)
+    
+    if (result == "next") {
+        next
+    }
+}
+
+# -----------------------------------------------------------------------------
+# CODE GENERATION
+# -----------------------------------------------------------------------------
+
+END {
+    # Check for unclosed blocks
+    if (state != 0) {
+        error_count++
+        errors[error_count] = sprintf("Error at line %d: Unclosed RAWK block\n  Expected: Add closing brace '}' to close the RAWK block", 
+                                     line_count)
+    }
+    
+    # Output errors if any
+    if (error_count > 0) {
+        for (i = 1; i <= error_count; i++) {
+            print errors[i] > "/dev/stderr"
+        }
+        exit 1
+    }
+    
+    # Generate standard library functions
+    print ""
+    print "# Standard library functions"
+    print "function assert(condition, message) {"
+    print "    if (!condition) {"
+    print "        print \"Assertion failed: \" message > \"/dev/stderr\""
+    print "        exit 1"
+    print "    }"
+    print "}"
+    print ""
+    print "function expect_equal(actual, expected, message) {"
+    print "    if (actual != expected) {"
+    print "        print \"Test failed: \" message \" (expected \" expected \", got \" actual \")\" > \"/dev/stderr\""
+    print "        exit 1"
+    print "    }"
+    print "}"
+    print ""
+    
+    # Generate user-defined functions
+    if (function_count > 0) {
+        print "# User-defined functions"
+        for (i = 1; i <= function_count; i++) {
+            print "function " function_names[i] "(" function_args[i] ") {" function_bodies[i]
+            print "}"
+            print ""
+        }
+    }
+} 
\ No newline at end of file