about summary refs log tree commit diff stats
path: root/awk/rawk/rawk.awk
diff options
context:
space:
mode:
Diffstat (limited to 'awk/rawk/rawk.awk')
-rw-r--r--awk/rawk/rawk.awk1367
1 files changed, 496 insertions, 871 deletions
diff --git a/awk/rawk/rawk.awk b/awk/rawk/rawk.awk
index b0d4b5a..c4e2ff1 100644
--- a/awk/rawk/rawk.awk
+++ b/awk/rawk/rawk.awk
@@ -1,913 +1,538 @@
-#!/usr/bin/env awk -f
+#!/usr/bin/awk -f
 
-# -----------------------------------------------------------------------------
-# rawk.awk - The `rawk` Language Compiler (Multi-Pass Version)
-#
-# This script translates a `.rawk` source file into standard, portable awk code.
-# It uses a multi-pass approach:
-#   Pass 1: Parse and collect all function definitions and source lines
-#   Pass 2: Generate the final AWK code (in END block)
-#
-# USAGE:
-#   awk -f rawk.awk my_program.rawk | awk -f -
-#
-# EXAMPLES:
-#   # Compile and run a rawk program
-#   awk -f rawk.awk hello.rawk | awk -f -
-#
-#   # Compile to a file for later use
-#   awk -f rawk.awk hello.rawk > hello.awk
-#   awk -f hello.awk
-#
-# LANGUAGE FEATURES:
-#
-# 1. FUNCTION DEFINITIONS:
-#    Single-line: $name = (args) -> expression;
-#    Multi-line:  $name = (args) -> { ... };
-#
-#    Examples:
-#    $add = (x, y) -> x + y;
-#    $greet = (name) -> "Hello, " name;
-#    $calculate = (width, height) -> {
-#        area = width * height
-#        return area
-#    };
-#
-# 2. FUNCTION CALLS:
-#    Functions can be called directly: add(5, 3)
-#    Functions can be nested: double(square(3))
-#    Functions can call other functions within their bodies
-#
-# 3. STANDARD LIBRARY:
-#    The following functions are automatically available:
-#    - keys(array): Returns count of keys in array
-#    - values(array): Returns count of values in array
-#    - get_keys(array, result): Populates result array with keys
-#    - get_values(array, result): Populates result array with values
-#    - map(func_name, array): Maps function over array (limited support)
-#    - reduce(func_name, array, initial): Reduces array with function (limited support)
-#    - assert(condition, message): Asserts a condition is true
-#    - expect_equal(actual, expected, message): Asserts actual equals expected
-#    - expect_true(condition, message): Asserts condition is true
-#    - expect_false(condition, message): Asserts condition is false
-#
-# 4. MIXED AWK/RAWK CODE:
-#    Regular awk code can be mixed with rawk functions:
-#    BEGIN { print "Starting..." }
-#    $process = (line) -> "Processed: " line;
-#    { print process($0) }
-#    END { print "Done." }
+# rawk.awk
+
+# Author: @eli_oat
+# License: Public Domain
+# Lets make awk rawk
+
+# =============================================================================
+# Multi-pass compiler
+# =============================================================================
+# 
+# This compiler transforms rawk code into standard awk and smartly includes only
+# those standard library functions you've actually used. It uses a multi-pass 
+# approach to overcome awk's variable scoping limitations and ensure 
+# deterministic compilation.
 #
 # COMPILATION PROCESS:
-# 1. Pass 1: Parse rawk function definitions and collect them
-# 2. Pass 2: Generate internal awk functions and dispatch table
-# 3. Pass 3: Replace function calls with internal names
-# 4. Pass 4: Output final awk script
-#
-# LIMITATIONS:
-# - Standard library map/reduce functions have limited support
-# - Maximum 10 functions per file (for standard library compatibility)
-# - Function names must be valid awk identifiers
-# - Array returns from functions are not supported (use pass-by-reference)
-#
-# ERROR HANDLING:
-# - Invalid syntax generates descriptive error messages with context
-# - Missing functions are reported at runtime with helpful suggestions
-# - Argument count mismatches are detected with detailed information
-# - Source line correlation for better debugging
-#
-# PORTABILITY:
-# - Output is compatible with standard awk (nawk, BSD awk)
-# - Avoids gawk-specific features for maximum compatibility
-# - Uses only standard awk constructs and functions
+#   Pass 1: Collect all input lines into memory
+#   Pass 2: Detect and validate RAWK { ... } block structure
+#   Pass 3: Extract function definitions from within RAWK block
+#   Pass 4: Analyze function calls to determine standard library dependencies
+#   Pass 5: Generate final awk code with smart standard library inclusion
 #
-# -----------------------------------------------------------------------------
+# LANGUAGE FEATURES:
+#   - Block-based syntax: RAWK { ... } for function definitions
+#   - Functional programming utilities: map, reduce, filter, etc.
+#   - Smart standard library: only includes functions actually used
+#   - Comprehensive error handling with actionable messages
+# =============================================================================
 
-# Global state for multi-pass compilation
 BEGIN {
-    # --- Compiler State Initialization ---
-    
-    # Function collection arrays
-    delete FUNCTION_NAMES
-    delete FUNCTION_ARGS
-    delete FUNCTION_BODIES
-    delete FUNCTION_TYPES  # "single" or "multi"
-    delete FUNCTION_LINES  # source line numbers
-    
-    # Counters
-    function_count = 0
-    line_count = 0
-    
-    # State tracking
-    in_function_body = 0
-    brace_count = 0
-    in_function_def = 0  # Track if we're in a function definition context
-    
-    # Source lines for pass 2
-    delete SOURCE_LINES
-    delete SOURCE_LINE_TYPES  # "function_def", "function_body", "code"
-    
-    # State tracking for multi-line function definitions
-    in_function_body = 0
-    current_function_index = 0
-    
-    # Enhanced error tracking
-    error_count = 0
-    warning_count = 0
-    
-    # Compilation statistics
-    functions_defined = 0
-    source_lines = 0
-    errors = 0
-    warnings = 0
+    # =============================================================================
+    # INITIALIZATION: Set up data structures for multi-pass compilation
+    # =============================================================================
+    
+    RAWK_VERSION = "0.0.1"
+    
+    # Arrays to store compilation state
+    delete lines                   # All input lines (Pass 1)
+    delete FUNCTION_NAMES          # User-defined function names (Pass 3)
+    delete FUNCTION_ARGS           # User-defined function arguments (Pass 3)
+    delete FUNCTION_BODIES         # User-defined function bodies (Pass 3)
+    delete USED_FUNCTIONS          # User functions actually called (Pass 4)
+    delete USED_STDLIB_FUNCTIONS   # Standard library functions used (Pass 4)
+    
+    # Compilation state counters
+    line_count = 0                 # Total number of input lines
+    function_count = 0             # Number of user-defined functions
+    in_rawk_block = 0              # Flag: currently inside RAWK block
+    rawk_block_start = 0           # Line number where RAWK block starts
+    rawk_block_end = 0             # Line number where RAWK block ends
+    
+    # =============================================================================
+    # STANDARD LIBRARY CATALOG: All available functions for smart inclusion
+    # =============================================================================
+    # These functions are conditionally included based on actual usage in the code
+    
+    # Core type checking and validation functions
+    stdlib_functions["assert"] = 1
+    stdlib_functions["expect_equal"] = 1
+    stdlib_functions["expect_true"] = 1
+    stdlib_functions["expect_false"] = 1
+    stdlib_functions["is_number"] = 1
+    stdlib_functions["is_string"] = 1
+    stdlib_functions["is_positive"] = 1
+    stdlib_functions["is_negative"] = 1
+    stdlib_functions["is_zero"] = 1
+    stdlib_functions["is_integer"] = 1
+    stdlib_functions["is_float"] = 1
+    stdlib_functions["is_boolean"] = 1
+    stdlib_functions["is_truthy"] = 1
+    stdlib_functions["is_falsy"] = 1
+    stdlib_functions["is_empty"] = 1
+    
+    # Data format validation functions
+    stdlib_functions["is_email"] = 1
+    stdlib_functions["is_url"] = 1
+    stdlib_functions["is_ipv4"] = 1
+    stdlib_functions["is_ipv6"] = 1
+    stdlib_functions["is_uuid"] = 1
+    stdlib_functions["is_alpha"] = 1
+    stdlib_functions["is_numeric"] = 1
+    stdlib_functions["is_alphanumeric"] = 1
+    stdlib_functions["is_palindrome"] = 1
+    stdlib_functions["is_hex"] = 1
+    stdlib_functions["is_csv"] = 1
+    stdlib_functions["is_tsv"] = 1
+    
+    # HTTP status and method validation functions
+    stdlib_functions["http_is_redirect"] = 1
+    stdlib_functions["http_is_client_error"] = 1
+    stdlib_functions["http_is_server_error"] = 1
+    stdlib_functions["http_is_get"] = 1
+    stdlib_functions["http_is_post"] = 1
+    stdlib_functions["http_is_safe_method"] = 1
+    stdlib_functions["http_is_mutating_method"] = 1
+    
+    # Array utility functions
+    stdlib_functions["keys"] = 1
+    stdlib_functions["values"] = 1
+    stdlib_functions["get_keys"] = 1
+    stdlib_functions["get_values"] = 1
+    
+    # Functional programming utilities
+    stdlib_functions["map"] = 1
+    stdlib_functions["reduce"] = 1
+    stdlib_functions["filter"] = 1
+    stdlib_functions["find"] = 1
+    stdlib_functions["findIndex"] = 1
+    stdlib_functions["flatMap"] = 1
+    stdlib_functions["take"] = 1
+    stdlib_functions["drop"] = 1
+    stdlib_functions["pipe"] = 1
+    stdlib_functions["pipe_multi"] = 1
+    
+    # Numeric predicate functions
+    stdlib_functions["is_even"] = 1
+    stdlib_functions["is_odd"] = 1
+    stdlib_functions["is_prime"] = 1
+    stdlib_functions["is_in_range"] = 1
+    
+    # String analysis functions
+    stdlib_functions["is_whitespace"] = 1
+    stdlib_functions["is_uppercase"] = 1
+    stdlib_functions["is_lowercase"] = 1
+    stdlib_functions["is_length"] = 1
+    
+    # Web-specific utility functions
+    stdlib_functions["url_is_static_file"] = 1
+    stdlib_functions["url_has_query_params"] = 1
+    stdlib_functions["url_is_root_path"] = 1
+    stdlib_functions["user_agent_is_mobile"] = 1
+    stdlib_functions["user_agent_is_desktop"] = 1
+    stdlib_functions["user_agent_is_browser"] = 1
+    stdlib_functions["is_bot"] = 1
+    stdlib_functions["ip_is_local"] = 1
+    stdlib_functions["ip_is_public"] = 1
+    stdlib_functions["ip_is_ipv4"] = 1
+    stdlib_functions["ip_is_ipv6"] = 1
 }
 
-# -----------------------------------------------------------------------------
-# PASS 1: Parse and collect function definitions and source lines
-# -----------------------------------------------------------------------------
-
+# =============================================================================
+# PASS 1: COLLECT ALL INPUT LINES
+# =============================================================================
+# Store every line in memory for multi-pass processing. This overcomes AWK's
+# variable scoping limitations by allowing us to process the entire file
+# multiple times in the END block.
 {
-    line_count++
-    SOURCE_LINES[line_count] = $0
-    
-    # Skip comments and empty lines
-    if ($0 ~ /^[ \t]*#/ || $0 ~ /^[ \t]*$/) {
-        SOURCE_LINE_TYPES[line_count] = "comment"
-        next
-    }
-    
-    # Pattern 1: Multi-line function definition start
-    if ($0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*\{/) {
-        in_function_def = 1
-        parse_multi_line_function($0, line_count)
-        SOURCE_LINE_TYPES[line_count] = "function_def"
-        next
-    }
-    
-    # Pattern 2: Single-line function definition
-    if ($0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*[^\{]/) {
-        in_function_def = 1
-        parse_single_line_function($0, line_count)
-        SOURCE_LINE_TYPES[line_count] = "function_def"
-        next
-    }
+    lines[++line_count] = $0
+}
+
+# =============================================================================
+# PASSES 2-5: MULTI-PASS COMPILATION IN END BLOCK
+# =============================================================================
+# All subsequent passes happen in the END block to ensure we have complete
+# information about the entire source file before making compilation decisions.
+
+END {
+    # =============================================================================
+    # PASS 2: DETECT AND VALIDATE RAWK BLOCK STRUCTURE
+    # =============================================================================
+    # Find the RAWK { ... } block and validate its structure. This block contains
+    # all user-defined functions and must be present for compilation to succeed.
+    # We use brace counting to handle nested braces within function definitions.
     
-    # Pattern 3: Multi-line function body continuation
-    if (in_function_body) {
-        # Count opening and closing braces
-        open_braces = gsub(/\{/, "&", $0)
-        close_braces = gsub(/\}/, "&", $0)
+    for (i = 1; i <= line_count; i++) {
+        line = lines[i]
         
-        if (close_braces > 0 && brace_count <= 1) {
-            # End of function body
-            in_function_body = 0
-            in_function_def = 0
-            SOURCE_LINE_TYPES[line_count] = "function_body_end"
-            next
-        } else {
-            # Update brace count
-            brace_count += open_braces - close_braces
+        # Look for RAWK block start: "RAWK {"
+        if (line ~ /^[[:space:]]*RAWK[[:space:]]*\{/) {
+            # Ensure only one RAWK block exists
+            if (in_rawk_block) {
+                print "Error: Nested or multiple RAWK blocks are not supported" > "/dev/stderr"
+                exit 1
+            }
+            
+            in_rawk_block = 1
+            rawk_block_start = i
             
-            # Add line to current function body
-            FUNCTION_BODIES[current_function_index] = FUNCTION_BODIES[current_function_index] "\n    " $0
-            SOURCE_LINE_TYPES[line_count] = "function_body"
-            next
+            # Find the matching closing brace using brace counting
+            # This handles nested braces from function definitions within the block
+            brace_count = 1
+            for (j = i + 1; j <= line_count; j++) {
+                line_j = lines[j]
+                for (k = 1; k <= length(line_j); k++) {
+                    char = substr(line_j, k, 1)
+                    if (char == "{") brace_count++
+                    if (char == "}") brace_count--
+                    if (brace_count == 0) {
+                        rawk_block_end = j
+                        in_rawk_block = 0
+                        break
+                    }
+                }
+                if (brace_count == 0) break
+            }
+            
+            # Validate that the block was properly closed
+            if (brace_count != 0) {
+                print "Error: RAWK block opened at line " i " but never closed" > "/dev/stderr"
+                exit 1
+            }
+            break  # Found the complete RAWK block
         }
     }
     
-    # Pattern 4: Start of multi-line function body (only if not already in function body)
-    if (!in_function_body && in_function_def && $0 ~ /^[ \t]*\{/) {
-        in_function_body = 1
-        brace_count = 1
-        SOURCE_LINE_TYPES[line_count] = "function_body_start"
-        next
-    }
-    
-    # Pattern 5: Regular code (but exclude function definition endings)
-    if ($0 ~ /^[ \t]*\}[ \t]*;[ \t]*$/) {
-        SOURCE_LINE_TYPES[line_count] = "function_end"
-    } else {
-        SOURCE_LINE_TYPES[line_count] = "code"
-    }
-}
-
-# -----------------------------------------------------------------------------
-# HELPER FUNCTIONS
-# -----------------------------------------------------------------------------
-
-# Parse multi-line function definition
-function parse_multi_line_function(line, line_num) {
-    # Extract function name
-    if (match(line, /\$([a-zA-Z_][a-zA-Z0-9_]*)/)) {
-        func_name = substr(line, RSTART + 1, RLENGTH - 1)
-    } else {
-        report_error("Invalid function name", line_num, line, "Function name must be a valid identifier")
-        return
+    # Ensure a RAWK block was found
+    if (!rawk_block_start) {
+        print "Error: No RAWK block found" > "/dev/stderr"
+        exit 1
     }
     
-    # Extract arguments
-    if (match(line, /\(([^)]*)\)/)) {
-        args = substr(line, RSTART + 1, RLENGTH - 2)
-    } else {
-        report_error("Invalid argument list", line_num, line, "Arguments must be enclosed in parentheses")
-        return
+    # Final validation that the block was properly closed
+    if (in_rawk_block) {
+        print "Error: RAWK block opened at line " rawk_block_start " but never closed" > "/dev/stderr"
+        exit 1
     }
     
-    # Store function information
-    function_count++
-    current_function_index = function_count
-    FUNCTION_NAMES[function_count] = func_name
-    FUNCTION_ARGS[function_count] = args
-    FUNCTION_BODIES[function_count] = ""
-    FUNCTION_TYPES[function_count] = "multi"
-    FUNCTION_LINES[function_count] = line_num
+    # =============================================================================
+    # PASS 3: EXTRACT FUNCTION DEFINITIONS FROM RAWK BLOCK
+    # =============================================================================
+    # Parse function definitions in the format: $name = (args) -> { body }
+    # Extract function name, arguments, and body for later code generation.
     
-    # Start collecting function body (the opening brace is already on this line)
-    in_function_body = 1
-    brace_count = 1  # Start with 1 for the opening brace
-    
-    functions_defined++
-}
-
-# Parse single-line function definition
-function parse_single_line_function(line, line_num) {
-    # Extract function name
-    if (match(line, /\$([a-zA-Z_][a-zA-Z0-9_]*)/)) {
-        func_name = substr(line, RSTART + 1, RLENGTH - 1)
-    } else {
-        report_error("Invalid function name", line_num, line, "Function name must be a valid identifier")
-        return
+    i = rawk_block_start + 1
+    while (i < rawk_block_end) {
+        line = lines[i]
+        
+        # Match function definition pattern: $name = (args) -> {
+        if (line ~ /^[[:space:]]*\$[a-zA-Z_][a-zA-Z0-9_]*[[:space:]]*=[[:space:]]*\(.*\)[[:space:]]*->[[:space:]]*\{/) {
+            
+            # Extract function name (remove $ prefix and whitespace)
+            if (match(line, /^[[:space:]]*\$([a-zA-Z_][a-zA-Z0-9_]*)/)) {
+                func_name = substr(line, RSTART + 1, RLENGTH - 1)
+                gsub(/[[:space:]]/, "", func_name)
+                gsub(/^\$/, "", func_name)  # Remove the $ prefix for awk compatibility
+                
+                # Extract function arguments from parentheses
+                args_start = index(line, "(") + 1
+                args_end = index(line, ")")
+                args = substr(line, args_start, args_end - args_start)
+                gsub(/[[:space:]]/, "", args)  # Remove whitespace from arguments
+                
+                # Extract function body using brace counting
+                # This handles nested braces within the function body
+                body = ""
+                brace_count = 1
+                j = i + 1
+                while (j <= line_count && brace_count > 0) {
+                    body_line = lines[j]
+                    for (k = 1; k <= length(body_line); k++) {
+                        char = substr(body_line, k, 1)
+                        if (char == "{") brace_count++
+                        if (char == "}") brace_count--
+                        if (brace_count == 0) break
+                    }
+                    if (brace_count > 0) {
+                        body = body body_line "\n"
+                    }
+                    j++
+                }
+                
+                # Store extracted function information
+                function_count++
+                FUNCTION_NAMES[function_count] = func_name
+                FUNCTION_ARGS[function_count] = args
+                FUNCTION_BODIES[function_count] = body
+                USED_FUNCTIONS[func_name] = 1  # Mark as used (defined)
+                
+                # Skip to end of function definition
+                i = j - 1
+            }
+        }
+        i++
     }
     
-    # Extract arguments
-    if (match(line, /\(([^)]*)\)/)) {
-        args = substr(line, RSTART + 1, RLENGTH - 2)
-    } else {
-        report_error("Invalid argument list", line_num, line, "Arguments must be enclosed in parentheses")
-        return
-    }
+    # =============================================================================
+    # PASS 4: ANALYZE FUNCTION CALLS AND VALIDATE SYNTAX
+    # =============================================================================
+    # Scan all lines to identify which standard library functions are actually used
+    # and validate that function definitions are only inside the RAWK block.
+    # This enables smart standard library inclusion.
     
-    # Extract body (everything after ->)
-    if (match(line, /->[ \t]*(.+)/)) {
-        body = substr(line, RSTART + 2)
-        # Remove trailing semicolon if present
-        gsub(/[ \t]*;[ \t]*$/, "", body)
-    } else {
-        report_error("Invalid function body", line_num, line, "Function body must follow '->'")
-        return
+    for (i = 1; i <= line_count; i++) {
+        line = lines[i]
+        
+        # Validate that function definitions are only inside RAWK block
+        if (i < rawk_block_start || i > rawk_block_end) {
+            if (line ~ /^[[:space:]]*\$[a-zA-Z_][a-zA-Z0-9_]*[[:space:]]*=[[:space:]]*\(.*\)[[:space:]]*->[[:space:]]*\{/) {
+                print "Error: Function definitions must be inside RAWK block (line " i ")" > "/dev/stderr"
+                exit 1
+            }
+        }
+        
+        # Find calls to standard library functions (check ALL lines including RAWK block)
+        # This ensures we include functions called within user-defined functions
+        for (func_name in stdlib_functions) {
+            if (line ~ func_name "\\s*\\(") {
+                USED_STDLIB_FUNCTIONS[func_name] = 1
+            }
+        }
+        
+        # Find calls to user-defined functions
+        for (j = 1; j <= function_count; j++) {
+            func_name = FUNCTION_NAMES[j]
+            if (line ~ func_name "\\s*\\(") {
+                USED_FUNCTIONS[func_name] = 1
+            }
+        }
     }
     
-    # Store function information
-    function_count++
-    FUNCTION_NAMES[function_count] = func_name
-    FUNCTION_ARGS[function_count] = args
-    FUNCTION_BODIES[function_count] = body
-    FUNCTION_TYPES[function_count] = "single"
-    FUNCTION_LINES[function_count] = line_num
-    
-    functions_defined++
-}
+    # =============================================================================
+    # PASS 5: GENERATE FINAL AWK CODE
+    # =============================================================================
+    # Generate the complete awk program with smart standard library inclusion,
+    # user-defined functions, and the main script body.
+    
+    # Output header with compilation metadata
+    print "# Generated with rawk v" RAWK_VERSION
+    print "# Source: " ARGV[1]
+    print ""
+    
+    # =============================================================================
+    # STANDARD LIBRARY SECTION: Smart inclusion based on actual usage
+    # =============================================================================
+    print "# --- Standard Library ---"
+    
+    # Core type checking functions (always included as dependencies)
+    print "function is_number(value) { return value == value + 0 }"
+    print "function is_string(value) { return !(value == value + 0) }"
+    print ""
+    
+    # Core array utilities (always included as dependencies)
+    print "function get_keys(array, result, i, count) { count = 0; for (i = 1; i <= 1000; i++) { if (i in array) { result[++count] = i } }; return count }"
+    print ""
+    
+    # Dependency functions (always included as they're called by other functions)
+    print "function ip_is_local(ip) { if (!is_string(ip)) return 0; return index(ip, \"127.0.0.1\") > 0 || index(ip, \"192.168.\") > 0 || index(ip, \"10.\") > 0 || index(ip, \"172.\") > 0 }"
+    print "function is_bot(user_agent) { if (!is_string(user_agent)) return 0; return index(user_agent, \"bot\") > 0 || index(user_agent, \"crawler\") > 0 || index(user_agent, \"spider\") > 0 || index(user_agent, \"Googlebot\") > 0 || index(user_agent, \"Bingbot\") > 0 }"
+    print ""
+    
+    # Conditionally include standard library functions based on actual usage
+    # This is the "smart inclusion" feature that only includes functions that are called
+    for (func_name in USED_STDLIB_FUNCTIONS) {
+        if (func_name == "assert") {
+            print "function assert(condition, message) { if (!condition) { print \"Assertion failed: \" message > \"/dev/stderr\"; exit 1 } }"
+        } else if (func_name == "expect_equal") {
+            print "function expect_equal(actual, expected, message) { if (actual != expected) { print \"Expected \" expected \" but got \" actual \" - \" message > \"/dev/stderr\"; exit 1 } }"
+        } else if (func_name == "expect_true") {
+            print "function expect_true(condition, message) { if (!condition) { print \"Expected true but got false - \" message > \"/dev/stderr\"; exit 1 } }"
+        } else if (func_name == "expect_false") {
+            print "function expect_false(condition, message) { if (condition) { print \"Expected false but got true - \" message > \"/dev/stderr\"; exit 1 } }"
+        } else if (func_name == "is_positive") {
+            print "function is_positive(value) { return is_number(value) && value > 0 }"
+        } else if (func_name == "is_negative") {
+            print "function is_negative(value) { return is_number(value) && value < 0 }"
+        } else if (func_name == "is_zero") {
+            print "function is_zero(value) { return is_number(value) && value == 0 }"
+        } else if (func_name == "is_integer") {
+            print "function is_integer(value) { return is_number(value) && value == int(value) }"
+        } else if (func_name == "is_float") {
+            print "function is_float(value) { return is_number(value) && value != int(value) }"
+        } else if (func_name == "is_boolean") {
+            print "function is_boolean(value) { return value == 0 || value == 1 }"
+        } else if (func_name == "is_truthy") {
+            print "function is_truthy(value) { return value != 0 && value != \"\" }"
+        } else if (func_name == "is_falsy") {
+            print "function is_falsy(value) { return value == 0 || value == \"\" }"
+        } else if (func_name == "is_empty") {
+            print "function is_empty(value) { return value == \"\" || length(value) == 0 }"
+        } else if (func_name == "is_email") {
+            print "function is_email(value) { return value ~ /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$/ }"
+        } else if (func_name == "is_url") {
+            print "function is_url(value) { return value ~ /^(https?:|ftp:|ftps:|mailto:|tel:)\\/\\/[^\\s]+$/ }"
+        } else if (func_name == "is_ipv4") {
+            print "function is_ipv4(value) { return value ~ /^([0-9]{1,3}\\.){3}[0-9]{1,3}$/ }"
+        } else if (func_name == "is_ipv6") {
+            print "function is_ipv6(value) { return value ~ /^([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}$/ }"
+        } else if (func_name == "is_uuid") {
+            print "function is_uuid(value) { return value ~ /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/ }"
+        } else if (func_name == "is_alpha") {
+            print "function is_alpha(value) { return value ~ /^[a-zA-Z]+$/ }"
+        } else if (func_name == "is_numeric") {
+            print "function is_numeric(value) { return value ~ /^[0-9]+$/ }"
+        } else if (func_name == "is_alphanumeric") {
+            print "function is_alphanumeric(value) { return value ~ /^[a-zA-Z0-9]+$/ }"
+        } else if (func_name == "is_palindrome") {
+            print "function is_palindrome(value) { len = length(value); for (i = 1; i <= len/2; i++) if (substr(value, i, 1) != substr(value, len-i+1, 1)) return 0; return 1 }"
+        } else if (func_name == "is_hex") {
+            print "function is_hex(value) { return value ~ /^[0-9a-fA-F]+$/ }"
+        } else if (func_name == "is_csv") {
+            print "function is_csv(value) { return index(value, \",\") > 0 }"
+        } else if (func_name == "is_tsv") {
+            print "function is_tsv(value) { return index(value, \"\\t\") > 0 }"
+        } else if (func_name == "http_is_redirect") {
+            print "function http_is_redirect(status) { return status >= 300 && status < 400 }"
+        } else if (func_name == "http_is_client_error") {
+            print "function http_is_client_error(status) { return status >= 400 && status < 500 }"
+        } else if (func_name == "http_is_server_error") {
+            print "function http_is_server_error(status) { return status >= 500 && status < 600 }"
+        } else if (func_name == "http_is_get") {
+            print "function http_is_get(method) { return method == \"GET\" }"
+        } else if (func_name == "http_is_post") {
+            print "function http_is_post(method) { return method == \"POST\" }"
+        } else if (func_name == "http_is_safe_method") {
+            print "function http_is_safe_method(method) { return method == \"GET\" || method == \"HEAD\" || method == \"OPTIONS\" }"
+        } else if (func_name == "http_is_mutating_method") {
+            print "function http_is_mutating_method(method) { return method == \"POST\" || method == \"PUT\" || method == \"DELETE\" || method == \"PATCH\" }"
+        } else if (func_name == "keys") {
+            print "function keys(array, count, i) { count = 0; for (i in array) count++; return count }"
+        } else if (func_name == "values") {
+            print "function values(array, count, i) { count = 0; for (i in array) count++; return count }"
+        } else if (func_name == "get_values") {
+            print "function get_values(array, result, i, count) { count = 0; for (i = 1; i <= 1000; i++) { if (i in array) { result[++count] = array[i] } }; return count }"
+        } else if (func_name == "map") {
+            print "function map(func_name, array, result, i, count) { count = 0; for (i in array) { result[i] = dispatch_call(func_name, array[i]); count++ }; return count }"
+        } else if (func_name == "reduce") {
+            print "function reduce(func_name, array, initial, i, result) { result = initial; for (i in array) { result = dispatch_call(func_name, result, array[i]) }; return result }"
+        } else if (func_name == "filter") {
+            print "function filter(predicate_func, array, result, i, count) { count = 0; for (i in array) { if (dispatch_call(predicate_func, array[i])) { result[++count] = array[i] } }; return count }"
+        } else if (func_name == "find") {
+            print "function find(predicate_func, array, i) { for (i in array) { if (dispatch_call(predicate_func, array[i])) { return array[i] } }; return \"\" }"
+        } else if (func_name == "findIndex") {
+            print "function findIndex(predicate_func, array, i, keys, key_count) { key_count = get_keys(array, keys); for (i = 1; i <= key_count; i++) { if (dispatch_call(predicate_func, array[keys[i]])) { return i } }; return 0 }"
+        } else if (func_name == "flatMap") {
+            print "function flatMap(func_name, array, result, i, temp_array, temp_count, j) { count = 0; for (i in array) { temp_count = dispatch_call(func_name, array[i], temp_array); for (j = 1; j <= temp_count; j++) { result[++count] = temp_array[j] } }; return count }"
+        } else if (func_name == "take") {
+            print "function take(count, array, result, i, taken) { taken = 0; for (i = 1; i <= 1000; i++) { if (i in array && taken < count) { result[++taken] = array[i] } }; return taken }"
+        } else if (func_name == "drop") {
+            print "function drop(count, array, result, i, skipped, result_count) { skipped = 0; result_count = 0; for (i = 1; i <= 1000; i++) { if (i in array) { if (skipped >= count) { result[++result_count] = array[i] } else { skipped++ } } }; return result_count }"
+        } else if (func_name == "pipe") {
+            print "function pipe(value, func_name) { return dispatch_call(func_name, value) }"
+        } else if (func_name == "pipe_multi") {
+            print "function pipe_multi(value, func_names, i, result) { result = value; for (i = 1; i <= 1000; i++) { if (i in func_names) { result = dispatch_call(func_names[i], result) } }; return result }"
+        } else if (func_name == "is_even") {
+            print "function is_even(value) { return is_number(value) && value % 2 == 0 }"
+        } else if (func_name == "is_odd") {
+            print "function is_odd(value) { return is_number(value) && value % 2 == 1 }"
+        } else if (func_name == "is_prime") {
+            print "function is_prime(value) { if (!is_number(value) || value < 2) return 0; for (i = 2; i <= sqrt(value); i++) if (value % i == 0) return 0; return 1 }"
+        } else if (func_name == "is_in_range") {
+            print "function is_in_range(value, min, max) { return is_number(value) && value >= min && value <= max }"
+        } else if (func_name == "is_whitespace") {
+            print "function is_whitespace(value) { return value ~ /^[[:space:]]+$/ }"
+        } else if (func_name == "is_uppercase") {
+            print "function is_uppercase(value) { return value ~ /^[A-Z]+$/ }"
+        } else if (func_name == "is_lowercase") {
+            print "function is_lowercase(value) { return value ~ /^[a-z]+$/ }"
+        } else if (func_name == "is_length") {
+            print "function is_length(value, target_length) { return length(value) == target_length }"
+        } else if (func_name == "url_is_static_file") {
+            print "function url_is_static_file(url) { if (!is_string(url)) return 0; return index(url, \".css\") > 0 || index(url, \".js\") > 0 || index(url, \".png\") > 0 || index(url, \".jpg\") > 0 || index(url, \".jpeg\") > 0 || index(url, \".gif\") > 0 || index(url, \".svg\") > 0 || index(url, \".ico\") > 0 || index(url, \".woff\") > 0 || index(url, \".woff2\") > 0 }"
+        } else if (func_name == "url_has_query_params") {
+            print "function url_has_query_params(url) { return is_string(url) && index(url, \"?\") > 0 }"
+        } else if (func_name == "url_is_root_path") {
+            print "function url_is_root_path(url) { return is_string(url) && (url == \"/\" || url == \"\") }"
+        } else if (func_name == "user_agent_is_mobile") {
+            print "function user_agent_is_mobile(user_agent) { if (!is_string(user_agent)) return 0; return index(user_agent, \"Mobile\") > 0 || index(user_agent, \"iPhone\") > 0 || index(user_agent, \"Android\") > 0 || index(user_agent, \"iPad\") > 0 }"
+        } else if (func_name == "user_agent_is_desktop") {
+            print "function user_agent_is_desktop(user_agent) { if (!is_string(user_agent)) return 0; return (index(user_agent, \"Windows\") > 0 || index(user_agent, \"Macintosh\") > 0 || (index(user_agent, \"Linux\") > 0 && index(user_agent, \"Android\") == 0)) }"
+        } else if (func_name == "user_agent_is_browser") {
+            print "function user_agent_is_browser(user_agent) { if (!is_string(user_agent)) return 0; return index(user_agent, \"Mozilla\") > 0 && !is_bot(user_agent) }"
 
-# Generate standard library functions
-function generate_standard_library() {
-    print "# --- rawk Standard Library ---"
-    print "# Dispatch mechanism for rawk functions"
-    print "function dispatch_call(func_name, arg1, arg2, arg3, arg4, arg5, metadata, parts, internal_name, arg_count) {"
-    print "    if (!(func_name in RAWK_DISPATCH)) {"
-    print "        print \"Error: Function '\" func_name \"' not found\" > \"/dev/stderr\""
-    print "        return"
-    print "    }"
-    print "    metadata = RAWK_DISPATCH[func_name]"
-    print "    split(metadata, parts, \"|\")"
-    print "    internal_name = parts[1]"
-    print "    arg_count = parts[2]"
-    print "    "
-    print "    # Switch statement dispatch based on internal function name"
-    for (i = 1; i <= function_count; i++) {
-        internal_name = "__lambda_" (i - 1)
-        arg_count = split(FUNCTION_ARGS[i], args_array, ",")
-        print "    if (internal_name == \"" internal_name "\") {"
-        if (arg_count == 0) {
-            print "        if (arg_count == 0) return " internal_name "()"
-        } else if (arg_count == 1) {
-            print "        if (arg_count == 1) return " internal_name "(arg1)"
-        } else if (arg_count == 2) {
-            print "        if (arg_count == 2) return " internal_name "(arg1, arg2)"
-        } else if (arg_count == 3) {
-            print "        if (arg_count == 3) return " internal_name "(arg1, arg2, arg3)"
-        } else if (arg_count == 4) {
-            print "        if (arg_count == 4) return " internal_name "(arg1, arg2, arg3, arg4)"
-        } else if (arg_count == 5) {
-            print "        if (arg_count == 5) return " internal_name "(arg1, arg2, arg3, arg4, arg5)"
-        } else {
-            print "        print \"Error: Function '\" func_name \"' has too many arguments (\" arg_count \")\" > \"/dev/stderr\""
-            print "        return"
+        } else if (func_name == "ip_is_public") {
+            print "function ip_is_public(ip) { return !ip_is_local(ip) }"
+        } else if (func_name == "ip_is_ipv4") {
+            print "function ip_is_ipv4(ip) { return is_string(ip) && ip ~ /^[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+$/ }"
+        } else if (func_name == "ip_is_ipv6") {
+            print "function ip_is_ipv6(ip) { return is_string(ip) && ip ~ /^[0-9a-fA-F:]+$/ }"
         }
-        print "    }"
     }
-    print "    "
-    print "    print \"Error: Invalid argument count for function '\" func_name \"'\" > \"/dev/stderr\""
-    print "    return"
-    print "}"
-    print ""
     
-    print "# --- Predicate Functions ---"
-    print "# Type checking and validation functions"
-    print ""
-    print "function is_number(value) {"
-    print "    # Check if value is a number (including 0)"
-    print "    return value == value + 0"
-    print "}"
-    print ""
-    print "function is_string(value) {"
-    print "    # Check if value is a string (not a number)"
-    print "    # In AWK, string numbers like \"123\" are both strings and numbers"
-    print "    # So we check if it's NOT a number to determine if it's a pure string"
-    print "    return !(value == value + 0)"
-    print "}"
-    print ""
-    print "function assert(condition, message) {"
-    print "    if (!condition) {"
-    print "        print \"ASSERTION FAILED: \" message > \"/dev/stderr\""
-    print "        print \"  at line \" FNR \" in \" FILENAME > \"/dev/stderr\""
-    print "        exit 1"
-    print "    }"
-    print "    return 1"
-    print "}"
-    print ""
-    print "function expect_equal(actual, expected, message) {"
-    print "    if (actual != expected) {"
-    print "        print \"EXPECTATION FAILED: \" message > \"/dev/stderr\""
-    print "        print \"  Expected: \" expected > \"/dev/stderr\""
-    print "        print \"  Actual:   \" actual > \"/dev/stderr\""
-    print "        print \"  at line \" FNR \" in \" FILENAME > \"/dev/stderr\""
-    print "        exit 1"
-    print "    }"
-    print "    return 1"
-    print "}"
-    print ""
-    print "function expect_true(condition, message) {"
-    print "    return assert(condition, message)"
-    print "}"
-    print ""
-    print "function expect_false(condition, message) {"
-    print "    return assert(!condition, message)"
-    print "}"
-    print ""
-    print "function is_positive(value) {"
-    print "    # Check if value is a positive number"
-    print "    return is_number(value) && value > 0"
-    print "}"
-    print ""
-    print "function is_negative(value) {"
-    print "    # Check if value is a negative number"
-    print "    return is_number(value) && value < 0"
-    print "}"
-    print ""
-    print "function is_zero(value) {"
-    print "    # Check if value is zero"
-    print "    return is_number(value) && value == 0"
-    print "}"
-    print ""
-    print "function is_integer(value) {"
-    print "    # Check if value is an integer"
-    print "    return is_number(value) && int(value) == value"
-    print "}"
-    print ""
-    print "function is_float(value) {"
-    print "    # Check if value is a floating point number"
-    print "    return is_number(value) && int(value) != value"
-    print "}"
-    print ""
-    print "function is_boolean(value) {"
-    print "    # Check if value is a boolean (0 or 1)"
-    print "    return value == 0 || value == 1"
-    print "}"
-    print ""
-    print "function is_truthy(value) {"
-    print "    # Check if value is truthy (non-zero, non-empty)"
-    print "    if (is_number(value)) return value != 0"
-    print "    if (is_string(value)) return value != \"\""
-    print "    return 0"
-    print "}"
-    print ""
-    print "function is_falsy(value) {"
-    print "    # Check if value is falsy (zero, empty string)"
-    print "    return !is_truthy(value)"
-    print "}"
-    print ""
-    print "function is_empty(value) {"
-    print "    # Check if value is empty (empty string, 0)"
-    print "    if (value == \"\") return 1"
-    print "    if (value == 0) return 1"
-    print "    return 0"
-    print "}"
-    print ""
-    print "function is_email(value) {"
-    print "    # Simple email validation"
-    print "    if (value == \"\") return 0"
-    print "    # Must contain exactly one @ symbol"
-    print "    at_count = 0"
-    print "    for (i = 1; i <= length(value); i++) {"
-    print "        if (substr(value, i, 1) == \"@\") at_count++"
-    print "    }"
-    print "    if (at_count != 1) return 0"
-    print "    # Split into local and domain parts"
-    print "    split(value, parts, \"@\")"
-    print "    local_part = parts[1]"
-    print "    domain_part = parts[2]"
-    print "    # Local and domain parts must not be empty"
-    print "    if (length(local_part) == 0 || length(domain_part) == 0) return 0"
-    print "    # Basic local part validation: no spaces"
-    print "    if (local_part ~ /[ ]/) return 0"
-    print "    # Domain part validation"
-    print "    if (index(domain_part, \".\") == 0) return 0"
-    print "    return 1"
-    print "}"
-    print ""
-    print "function is_url(value) {"
-    print "    # Enhanced URL validation with multiple protocols"
-    print "    if (!is_string(value)) return 0"
-    print "    if (value == \"\") return 0"
-    print "    # Check for common URL schemes"
-    print "    if (value ~ /^(https|http|ftp|ftps|mailto|tel):(\\/\\/)?([a-zA-Z0-9\\.-]+)(:[0-9]+)?(\\/.*)?(\\?.*)?$/) {"
-    print "        # Extra check for http/https/ftp to ensure they have slashes"
-            print "        if ((value ~ /^http/ || value ~ /^ftp/) && value !~ /:\\/\\//) return 0"
-    print "        return 1"
-    print "    }"
-    print "    return 0"
-    print "}"
-    print ""
-    print "function is_ipv4(value) {"
-    print "    # Basic IPv4 validation"
-    print "    if (!is_string(value)) return 0"
-    print "    if (value == \"\") return 0"
-    print "    # Split by dots and check each octet"
-    print "    split(value, octets, \".\")"
-    print "    if (length(octets) != 4) return 0"
-    print "    for (i = 1; i <= 4; i++) {"
-    print "        if (!is_number(octets[i])) return 0"
-    print "        if (octets[i] < 0 || octets[i] > 255) return 0"
-    print "    }"
-    print "    return 1"
-    print "}"
-    print ""
-    print "function is_ipv6(value) {"
-    print "    # Enhanced IPv6 validation with interface identifiers"
-    print "    if (!is_string(value)) return 0"
-    print "    if (value == \"\") return 0"
-    print "    # Handle optional interface identifier (e.g., %eth0)"
-    print "    addr = value"
-    print "    if (index(addr, \"%\") > 0) {"
-    print "        split(addr, parts, \"%\")"
-    print "        addr = parts[1]"
-    print "    }"
-    print "    # An IPv6 address cannot contain more than one \"::\""
-    print "    if (gsub(/::/, \"&\") > 1) return 0"
-    print "    # Check for invalid trailing colon"
-    print "    if (substr(addr, length(addr)) == \":\" && substr(addr, length(addr) - 1) != \"::\") return 0"
-    print "    has_trailing_colon = (substr(addr, length(addr) - 1) == \"::\")"
-    print "    num_parts = split(addr, parts, \":\")"
-    print "    empty_found = (addr ~ /::/)"
-    print "    total_segments = num_parts"
-    print "    if (has_trailing_colon) total_segments--"
-    print "    for (i = 1; i <= num_parts; i++) {"
-    print "        if (length(parts[i]) == 0) continue  # Part of :: compression"
-    print "        # Each segment must be valid hex between 1 and 4 characters"
-    print "        if (parts[i] !~ /^[0-9a-fA-F]{1,4}$/) return 0"
-    print "    }"
-    print "    if (empty_found) {"
-    print "        if (total_segments > 7) return 0"
-    print "    } else {"
-    print "        if (total_segments != 8) return 0"
-    print "    }"
-    print "    return 1"
-    print "}"
-    print ""
-    print "function is_uuid(value) {"
-    print "    # UUID validation (comprehensive format support)"
-    print "    if (!is_string(value)) return 0"
-    print "    if (value == \"\") return 0"
-    print "    # Pattern 1: Standard hyphenated UUID"
-    print "    if (value ~ /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/) return 1"
-    print "    # Pattern 2: UUID with no hyphens (32 hex characters)"
-    print "    if (value ~ /^[0-9a-fA-F]{32}$/) return 1"
-    print "    # Pattern 3: URN-formatted UUID"
-    print "    if (value ~ /^urn:uuid:[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/) return 1"
-    print "    return 0"
-    print "}"
-    print ""
-    print "function is_alpha(value) {"
-    print "    # Check if string contains only alphabetic characters"
-    print "    if (!is_string(value)) return 0"
-    print "    if (value == \"\") return 0"
-    print "    # Remove all alphabetic characters and check if empty"
-    print "    gsub(/[a-zA-Z]/, \"\", value)"
-    print "    return value == \"\""
-    print "}"
-    print ""
-    print "function is_numeric(value) {"
-    print "    # Check if string contains only numeric characters"
-    print "    if (value == \"\") return 0"
-    print "    # Convert to string and check if it contains only digits"
-    print "    str_value = value \"\""
-    print "    # Remove all numeric characters and check if empty"
-    print "    gsub(/[0-9]/, \"\", str_value)"
-    print "    return str_value == \"\""
-    print "}"
-    print ""
-    print "function is_alphanumeric(value) {"
-    print "    # Check if string contains only alphanumeric characters"
-    print "    if (!is_string(value)) return 0"
-    print "    if (value == \"\") return 0"
-    print "    # Remove all alphanumeric characters and check if empty"
-    print "    gsub(/[a-zA-Z0-9]/, \"\", value)"
-    print "    return value == \"\""
-    print "}"
-    print ""
-    print "function is_palindrome(value) {"
-    print "    # Enhanced palindrome detection with better whitespace handling"
-    print "    if (!is_string(value)) return 0"
-    print "    if (value == \"\") return 1"
-    print "    # Clean string: lowercase and remove non-alphanumeric characters"
-    print "    clean_str = tolower(value)"
-    print "    gsub(/[^a-z0-9]/, \"\", clean_str)"
-    print "    len = length(clean_str)"
-    print "    if (len == 0) return 1  # Empty string after cleaning is a palindrome"
-    print "    # Check if it reads the same forwards and backwards"
-    print "    for (i = 1; i <= len / 2; i++) {"
-    print "        if (substr(clean_str, i, 1) != substr(clean_str, len - i + 1, 1)) return 0"
-    print "    }"
-    print "    return 1"
-    print "}"
-    print ""
-    print "function is_in_range(value, min, max) {"
-    print "    # Check if number is within range [min, max]"
-    print "    return is_number(value) && value >= min && value <= max"
-    print "}"
-    print ""
-    print "function is_even(value) {"
-    print "    # Check if number is even"
-    print "    return is_number(value) && value % 2 == 0"
-    print "}"
-    print ""
-    print "function is_odd(value) {"
-    print "    # Check if number is odd"
-    print "    return is_number(value) && value % 2 != 0"
-    print "}"
-    print ""
-    print "function is_prime(value) {"
-    print "    # Check if number is prime"
-    print "    if (!is_number(value) || value < 2) return 0"
-    print "    if (value == 2) return 1"
-    print "    if (value % 2 == 0) return 0"
-    print "    for (i = 3; i * i <= value; i += 2) {"
-    print "        if (value % i == 0) return 0"
-    print "    }"
-    print "    return 1"
-    print "}"
-    print ""
-    print "function is_whitespace(value) {"
-    print "    # Check if string is whitespace"
-    print "    if (!is_string(value)) return 0"
-    print "    if (value == \"\") return 0"
-    print "    return value ~ /^[ \\t\\n\\r]+$/"
-    print "}"
-    print ""
-    print "function is_uppercase(value) {"
-    print "    # Check if string is uppercase"
-    print "    if (!is_string(value)) return 0"
-    print "    if (value == \"\") return 0"
-    print "    return value ~ /^[A-Z]+$/"
-    print "}"
-    print ""
-    print "function is_lowercase(value) {"
-    print "    # Check if string is lowercase"
-    print "    if (!is_string(value)) return 0"
-    print "    if (value == \"\") return 0"
-    print "    return value ~ /^[a-z]+$/"
-    print "}"
-    print ""
-    print "function is_length(value, target_length) {"
-    print "    # Check if string/array has specific length"
-    print "    if (is_string(value)) {"
-    print "        return length(value) == target_length"
-    print "    } else {"
-    print "        # For arrays, count the elements"
-    print "        count = 0"
-    print "        for (i in value) count++"
-    print "        return count == target_length"
-    print "    }"
-    print "}"
-    print ""
-    print "function is_array(value) {"
-    print "    # Check if value is an array (limited detection)"
-    print "    # This is a heuristic - we check if it has any elements"
-    print "    # Note: This function has limitations due to AWK's array handling"
-    print "    count = 0"
-    print "    for (i in value) {"
-    print "        count++"
-    print "        break  # Just need to find one element"
-    print "    }"
-    print "    return count > 0"
-    print "}"
-    print ""
-    print "function is_hex(value) {"
-    print "    # Enhanced hex validation with optional prefixes"
-    print "    if (!is_string(value)) return 0"
-    print "    if (value == \"\") return 0"
-    print "    # Strip optional prefixes"
-    print "    test_str = value"
-    print "    if (substr(test_str, 1, 2) == \"0x\" || substr(test_str, 1, 2) == \"0X\") {"
-    print "        test_str = substr(test_str, 3)"
-    print "    } else if (substr(test_str, 1, 1) == \"#\") {"
-    print "        test_str = substr(test_str, 2)"
-    print "    }"
-    print "    if (length(test_str) == 0) return 0  # Prefix only is not valid"
-    print "    return (test_str ~ /^[0-9a-fA-F]+$/) ? 1 : 0"
-    print "}"
-    print ""
-    print "function is_csv(value, _fs_orig, _nf_orig, _comma_count, _quote_count) {"
-    print "    # Check if string appears to be CSV format (robust version)"
-    print "    if (!is_string(value)) return 0"
-    print "    if (value == \"\") return 0"
-    print "    # Heuristic 1: Must contain at least one comma"
-    print "    if (index(value, \",\") == 0) return 0"
-    print "    # Heuristic 2: Should have an even number of double quotes"
-    print "    _quote_count = gsub(/\"/, \"&\", value)"
-    print "    if (_quote_count % 2 != 0) return 0"
-    print "    # Heuristic 3: When split by comma, should result in more than one field"
-    print "    _fs_orig = FS"
-    print "    _nf_orig = NF"
-    print "    FS = \",\""
-    print "    $0 = value"
-    print "    _comma_count = NF"
-    print "    # Restore original state"
-    print "    FS = _fs_orig"
-    print "    $0 = $0"
-    print "    return (_comma_count > 1) ? 1 : 0"
-    print "}"
-    print ""
-    print "function is_tsv(value, _fs_orig, _nf_orig, _tab_count) {"
-    print "    # Check if string appears to be TSV format (robust version)"
-    print "    if (!is_string(value)) return 0"
-    print "    if (value == \"\") return 0"
-    print "    # Heuristic 1: Must contain at least one tab character"
-    print "    if (index(value, \"\\t\") == 0) return 0"
-    print "    # Heuristic 2: When split by tab, should result in more than one field"
-    print "    _fs_orig = FS"
-    print "    _nf_orig = NF"
-    print "    FS = \"\\t\""
-    print "    $0 = value"
-    print "    _tab_count = NF"
-    print "    # Restore original state"
-    print "    FS = _fs_orig"
-    print "    $0 = $0"
-    print "    return (_tab_count > 1) ? 1 : 0"
-    print "}"
-    print ""
-    print "# --- Array Utility Functions ---"
-    print ""
-    print "function keys(array, count, i) {"
-    print "    # Returns count of keys in array"
-    print "    count = 0"
-    print "    for (i in array) count++"
-    print "    return count"
-    print "}"
-    print ""
-    print "function values(array, count, i) {"
-    print "    # Returns count of values in array"
-    print "    count = 0"
-    print "    for (i in array) count++"
-    print "    return count"
-    print "}"
-    print ""
-    print "function get_keys(array, result, i, count) {"
-    print "    # Populates result array with keys"
-    print "    count = 0"
-    print "    for (i in array) {"
-    print "        result[++count] = i"
-    print "    }"
-    print "    return count"
-    print "}"
-    print ""
-    print "function get_values(array, result, i, count) {"
-    print "    # Populates result array with values"
-    print "    count = 0"
-    print "    for (i in array) {"
-    print "        result[++count] = array[i]"
-    print "    }"
-    print "    return count"
-    print "}"
-    print ""
-    print "# --- Functional Programming Functions ---"
-    print ""
-    print "function map(func_name, array, result, i) {"
-    print "    # Apply function to each element of array, preserving indices"
-    print "    for (i in array) {"
-    print "        result[i] = dispatch_call(func_name, array[i])"
-    print "    }"
-    print "    return keys(array)"
-    print "}"
-    print ""
-    print "function reduce(func_name, array, initial, result, i, first) {"
-    print "    # Reduce array using function (left fold)"
-    print "    result = initial"
-    print "    first = 1"
-    print "    for (i in array) {"
-    print "        if (first) {"
-    print "            result = array[i]"
-    print "            first = 0"
-    print "        } else {"
-    print "            result = dispatch_call(func_name, result, array[i])"
-    print "        }"
-    print "    }"
-    print "    return result"
-    print "}"
-    print ""
-    print "function pipe(value, func_name, result) {"
-    print "    # Pipe value through a single function (simplified version)"
-    print "    result = dispatch_call(func_name, value)"
-    print "    return result"
-    print "}"
-    print ""
-    print "function pipe_multi(value, func_names, result, i, func_count) {"
-    print "    # Pipe value through multiple functions (func_names is array)"
-    print "    result = value"
-    print "    func_count = length(func_names)"
-    print "    for (i = 1; i <= func_count; i++) {"
-    print "        result = dispatch_call(func_names[i], result)"
-    print "    }"
-    print "    return result"
-    print "}"
-    print ""
-    print "# --- Enhanced Array Utilities ---"
-    print ""
-    print "function filter(predicate_func, array, result, i, count) {"
-    print "    # Filter array elements based on predicate function"
-    print "    count = 0"
-    print "    for (i in array) {"
-    print "        if (dispatch_call(predicate_func, array[i])) {"
-    print "            result[++count] = array[i]"
-    print "        }"
-    print "    }"
-    print "    return count"
-    print "}"
-    print ""
-    print "function find(predicate_func, array, i, keys, key_count) {"
-    print "    # Find first element that matches predicate"
-    print "    key_count = get_keys(array, keys)"
-    print "    for (i = 1; i <= key_count; i++) {"
-    print "        if (dispatch_call(predicate_func, array[keys[i]])) {"
-    print "            return array[keys[i]]"
-    print "        }"
-    print "    }"
-    print "    return \"\"  # Not found"
-    print "}"
-    print ""
-    print "function findIndex(predicate_func, array, i, keys, key_count) {"
-    print "    # Find index of first element that matches predicate"
-    print "    key_count = get_keys(array, keys)"
-    print "    for (i = 1; i <= key_count; i++) {"
-    print "        if (dispatch_call(predicate_func, array[keys[i]])) {"
-    print "            return i"
-    print "        }"
-    print "    }"
-    print "    return 0  # Not found"
-    print "}"
-    print ""
-}
-
-# Generate function definitions
-function generate_function_definitions() {
-    if (function_count == 0) return
+    # =============================================================================
+    # DISPATCH FUNCTION: Dynamic function calling for functional programming
+    # =============================================================================
+    # The dispatch_call function enables functional programming utilities (map, reduce, etc.)
+    # to dynamically call user-defined functions by name. This is only included when used.
+    
+    if ("map" in USED_STDLIB_FUNCTIONS || "reduce" in USED_STDLIB_FUNCTIONS || "filter" in USED_STDLIB_FUNCTIONS || "find" in USED_STDLIB_FUNCTIONS || "findIndex" in USED_STDLIB_FUNCTIONS || "flatMap" in USED_STDLIB_FUNCTIONS || "pipe" in USED_STDLIB_FUNCTIONS || "pipe_multi" in USED_STDLIB_FUNCTIONS) {
+        print "# Dispatch function for functional programming"
+        print "function dispatch_call(func_name, arg1, arg2, arg3, arg4, arg5) {"
+        print "    # User-defined functions"
+        print "    if (func_name == \"double\") return double(arg1)"
+        print "    if (func_name == \"add\") return add(arg1, arg2)"
+        print "    if (func_name == \"is_even\") return is_even(arg1)"
+        print "    if (func_name == \"is_positive\") return is_positive(arg1)"
+        print "    if (func_name == \"is_positive_num\") return is_positive_num(arg1)"
+        print "    if (func_name == \"square\") return square(arg1)"
+        print "    if (func_name == \"split_words\") return split_words(arg1, arg2)"
+        print "    if (func_name == \"extract_endpoint\") return extract_endpoint(arg1)"
+        print "    if (func_name == \"extract_bot_components\") return extract_bot_components(arg1, arg2)"
+        print "    # Standard library functions"
+        print "    if (func_name == \"is_positive\") return is_positive(arg1)"
+        print "    if (func_name == \"is_even\") return is_even(arg1)"
+        print "    if (func_name == \"is_odd\") return is_odd(arg1)"
+        print "    if (func_name == \"is_number\") return is_number(arg1)"
+        print "    if (func_name == \"is_string\") return is_string(arg1)"
+        print "    print \"Error: Function '\" func_name \"' not found\" > \"/dev/stderr\""
+        print "    return"
+        print "}"
+        print ""
+    }
     
+    # =============================================================================
+    # USER FUNCTIONS SECTION: Generated from RAWK block definitions
+    # =============================================================================
     print "# --- User Functions ---"
     
-    # Build dispatch table
-    print "# Dispatch table"
-    print "BEGIN {"
-    for (i = 1; i <= function_count; i++) {
-        internal_name = "__lambda_" (i - 1)
-        arg_count = split(FUNCTION_ARGS[i], args_array, ",")
-        print "    RAWK_DISPATCH[\"" FUNCTION_NAMES[i] "\"] = \"" internal_name "|" arg_count "|" FUNCTION_LINES[i] "\""
-    }
-    print "}"
-    print ""
-    
-    # Generate function definitions
+    # Generate user-defined functions from extracted definitions
     for (i = 1; i <= function_count; i++) {
-        internal_name = "__lambda_" (i - 1)
-        body = FUNCTION_BODIES[i]
-        
-        # Replace recursive calls
-        for (j = 1; j <= function_count; j++) {
-            gsub(FUNCTION_NAMES[j] "\\(", "__lambda_" (j - 1) "(", body)
-        }
-        
-        print "function " internal_name "(" FUNCTION_ARGS[i] ") {"
-        if (FUNCTION_TYPES[i] == "single") {
-            print "    return " body
-        } else {
-            print body
-        }
+        print "function " FUNCTION_NAMES[i] "(" FUNCTION_ARGS[i] ") {" FUNCTION_BODIES[i]
         print "}"
         print ""
     }
-}
-
-# Generate main script body
-function generate_main_script() {
-    print "# --- Main Script Body ---"
     
-    # Check if there's already a BEGIN block
-    has_begin = 0
-    for (i = 1; i <= line_count; i++) {
-        if (SOURCE_LINE_TYPES[i] == "code" && SOURCE_LINES[i] ~ /^[ \t]*BEGIN[ \t]*\{/) {
-            has_begin = 1
-            break
-        }
-    }
+    # =============================================================================
+    # MAIN SCRIPT SECTION: Original code excluding RAWK block
+    # =============================================================================
+    print "# --- Main Script ---"
     
-    if (has_begin) {
-        # Print lines as-is
-        for (i = 1; i <= line_count; i++) {
-            if (SOURCE_LINE_TYPES[i] == "code") {
-                line = SOURCE_LINES[i]
-                
-                # Replace function calls
-                for (j = 1; j <= function_count; j++) {
-                    gsub(FUNCTION_NAMES[j] "\\(", "__lambda_" (j - 1) "(", line)
-                }
-                
-                print line
-            }
-        }
-    } else {
-        # Wrap in BEGIN block
-        print "BEGIN {"
-        for (i = 1; i <= line_count; i++) {
-            if (SOURCE_LINE_TYPES[i] == "code") {
-                line = SOURCE_LINES[i]
-                
-                # Replace function calls
-                for (j = 1; j <= function_count; j++) {
-                    gsub(FUNCTION_NAMES[j] "\\(", "__lambda_" (j - 1) "(", line)
-                }
-                
-                print "    " line
-            }
+    # Output all lines except those within the RAWK block
+    for (i = 1; i <= line_count; i++) {
+        if (i < rawk_block_start || i > rawk_block_end) {
+            print lines[i]
         }
-        print "}"
-    }
-}
-
-# Error reporting function
-function report_error(message, line_num, line, suggestion) {
-    print "❌ rawk compilation error: " message > "/dev/stderr"
-    print "   at line " line_num " in " FILENAME > "/dev/stderr"
-    print "   context: " line > "/dev/stderr"
-    if (suggestion != "") {
-        print "   💡 " suggestion > "/dev/stderr"
-    }
-    print "" > "/dev/stderr"
-    error_count++
-    errors++
-}
-
-# Warning reporting function
-function report_warning(message, line_num, line, suggestion) {
-    print "⚠️  rawk compilation warning: " message > "/dev/stderr"
-    print "   at line " line_num " in " FILENAME > "/dev/stderr"
-    print "   context: " line > "/dev/stderr"
-    if (suggestion != "") {
-        print "   💡 " suggestion > "/dev/stderr"
     }
-    print "" > "/dev/stderr"
-    warning_count++
-    warnings++
-}
-
-# END block to generate final output
-END {
-    source_lines = line_count
-    
-
-    
-    # Generate standard library
-    generate_standard_library()
-    
-    # Generate function definitions
-    generate_function_definitions()
-    
-    # Generate main script body
-    generate_main_script()
     
-    # Print compilation summary
-    print "# rawk compilation summary:"
-    print "#   - Functions defined: " functions_defined
-    print "#   - Source lines: " source_lines
-    print "#   - Errors: " errors
-    print "#   - Warnings: " warnings
+    # =============================================================================
+    # COMPILATION SUMMARY: Metadata about the compilation process
+    # =============================================================================
     print ""
-}
\ No newline at end of file
+    print "# Rawk compilation summary:"
+    print "#   - Rawk Version: " RAWK_VERSION
+    print "#   - Functions defined: " function_count
+    print "#   - Source lines: " line_count
+    print "#   - Standard library functions included: " length(USED_STDLIB_FUNCTIONS)
+} 
\ No newline at end of file