about summary refs log tree commit diff stats
path: root/awk/rawk/rawk.awk
diff options
context:
space:
mode:
Diffstat (limited to 'awk/rawk/rawk.awk')
-rw-r--r--awk/rawk/rawk.awk1656
1 files changed, 494 insertions, 1162 deletions
diff --git a/awk/rawk/rawk.awk b/awk/rawk/rawk.awk
index b4128e2..c4e2ff1 100644
--- a/awk/rawk/rawk.awk
+++ b/awk/rawk/rawk.awk
@@ -1,1206 +1,538 @@
-#!/usr/bin/env awk -f
-
-# -----------------------------------------------------------------------------
-# rawk.awk - The `rawk` Language Compiler
-#
-# This script translates a `.rawk` source file into standard, portable awk code.
-# It parses special `rawk` syntax, generates standard awk functions, and manages
-# a dispatch table for functional programming features.
-#
-# USAGE:
-#   awk -f rawk.awk my_program.rawk | awk -f -
-#
-# EXAMPLES:
-#   # Compile and run a rawk program
-#   awk -f rawk.awk hello.rawk | awk -f -
-#
-#   # Compile to a file for later use
-#   awk -f rawk.awk hello.rawk > hello.awk
-#   awk -f hello.awk
-#
-# LANGUAGE FEATURES:
-#
-# 1. FUNCTION DEFINITIONS:
-#    Single-line: $name = (args) -> expression;
-#    Multi-line:  $name = (args) -> { ... };
-#
-#    Examples:
-#    $add = (x, y) -> x + y;
-#    $greet = (name) -> "Hello, " name;
-#    $calculate = (width, height) -> {
-#        area = width * height
-#        return area
-#    };
-#
-# 2. FUNCTION CALLS:
-#    Functions can be called directly: add(5, 3)
-#    Functions can be nested: double(square(3))
-#    Functions can call other functions within their bodies
-#
-# 3. STANDARD LIBRARY:
-#    The following functions are automatically available:
-#    - keys(array): Returns count of keys in array
-#    - values(array): Returns count of values in array
-#    - get_keys(array, result): Populates result array with keys
-#    - get_values(array, result): Populates result array with values
-#    - map(func_name, array): Maps function over array (limited support)
-#    - reduce(func_name, array, initial): Reduces array with function (limited support)
-#    - assert(condition, message): Asserts a condition is true
-#    - expect_equal(actual, expected, message): Asserts actual equals expected
-#    - expect_true(condition, message): Asserts condition is true
-#    - expect_false(condition, message): Asserts condition is false
-#
-# 4. MIXED AWK/RAWK CODE:
-#    Regular awk code can be mixed with rawk functions:
-#    BEGIN { print "Starting..." }
-#    $process = (line) -> "Processed: " line;
-#    { print process($0) }
-#    END { print "Done." }
+#!/usr/bin/awk -f
+
+# rawk.awk
+
+# Author: @eli_oat
+# License: Public Domain
+# Lets make awk rawk
+
+# =============================================================================
+# Multi-pass compiler
+# =============================================================================
+# 
+# This compiler transforms rawk code into standard awk and smartly includes only
+# those standard library functions you've actually used. It uses a multi-pass 
+# approach to overcome awk's variable scoping limitations and ensure 
+# deterministic compilation.
 #
 # COMPILATION PROCESS:
-# 1. Parse rawk function definitions and generate internal awk functions
-# 2. Build dispatch table mapping public names to internal names
-# 3. Replace function calls with internal names
-# 4. Generate standard library functions
-# 5. Output final awk script
-#
-# LIMITATIONS:
-# - Standard library map/reduce functions have limited support
-# - Maximum 10 functions per file (for standard library compatibility)
-# - Function names must be valid awk identifiers
-# - Array returns from functions are not supported (use pass-by-reference)
+#   Pass 1: Collect all input lines into memory
+#   Pass 2: Detect and validate RAWK { ... } block structure
+#   Pass 3: Extract function definitions from within RAWK block
+#   Pass 4: Analyze function calls to determine standard library dependencies
+#   Pass 5: Generate final awk code with smart standard library inclusion
 #
-# ERROR HANDLING:
-# - Invalid syntax generates descriptive error messages with context
-# - Missing functions are reported at runtime with helpful suggestions
-# - Argument count mismatches are detected with detailed information
-# - Source line correlation for better debugging
-#
-# PORTABILITY:
-# - Output is compatible with standard awk (nawk, BSD awk)
-# - Avoids gawk-specific features for maximum compatibility
-# - Uses only standard awk constructs and functions
-#
-# -----------------------------------------------------------------------------
-
+# LANGUAGE FEATURES:
+#   - Block-based syntax: RAWK { ... } for function definitions
+#   - Functional programming utilities: map, reduce, filter, etc.
+#   - Smart standard library: only includes functions actually used
+#   - Comprehensive error handling with actionable messages
+# =============================================================================
 
-# The BEGIN block runs once before any input is processed.
-# Its purpose is to initialize the compiler's state.
 BEGIN {
-    # --- Compiler State Initialization ---
-
-    # Counter to generate unique internal names for lambda functions (e.g., __lambda_1, __lambda_2).
-    lambda_counter = 0
-
-    # State tracking for multi-line function definitions
-    in_function_body = 0
-    current_function_body = ""
-    current_function_name = ""
-    current_function_args = ""
-    current_function_arg_count = 0
-
-    # Enhanced error tracking
-    error_count = 0
-    warning_count = 0
-    source_lines[0] = ""  # Store source lines for better error reporting
-
-    # The Dispatch Dictionary. This is the core of the portable dispatch system.
-    # Key: The public function name (e.g., "my_add").
-    # Value: A pipe-delimited string of metadata -> "internal_name|arg_count|source_info"
-    # We initialize it here, though it's a global array.
-    delete RAWK_DISPATCH # Ensures it's empty
-
-    # Arrays to store the generated code before printing it in the END block.
-    # This ensures the correct final order of the output script.
-    delete generated_user_functions
-    delete modified_source_lines
-
-    # --- Standard Library Injection ---
-    # The standard library functions are now hardcoded in the END block
-    # to avoid issues with array initialization in the BEGIN block.
+    # =============================================================================
+    # INITIALIZATION: Set up data structures for multi-pass compilation
+    # =============================================================================
+    
+    RAWK_VERSION = "0.0.1"
+    
+    # Arrays to store compilation state
+    delete lines                   # All input lines (Pass 1)
+    delete FUNCTION_NAMES          # User-defined function names (Pass 3)
+    delete FUNCTION_ARGS           # User-defined function arguments (Pass 3)
+    delete FUNCTION_BODIES         # User-defined function bodies (Pass 3)
+    delete USED_FUNCTIONS          # User functions actually called (Pass 4)
+    delete USED_STDLIB_FUNCTIONS   # Standard library functions used (Pass 4)
+    
+    # Compilation state counters
+    line_count = 0                 # Total number of input lines
+    function_count = 0             # Number of user-defined functions
+    in_rawk_block = 0              # Flag: currently inside RAWK block
+    rawk_block_start = 0           # Line number where RAWK block starts
+    rawk_block_end = 0             # Line number where RAWK block ends
+    
+    # =============================================================================
+    # STANDARD LIBRARY CATALOG: All available functions for smart inclusion
+    # =============================================================================
+    # These functions are conditionally included based on actual usage in the code
+    
+    # Core type checking and validation functions
+    stdlib_functions["assert"] = 1
+    stdlib_functions["expect_equal"] = 1
+    stdlib_functions["expect_true"] = 1
+    stdlib_functions["expect_false"] = 1
+    stdlib_functions["is_number"] = 1
+    stdlib_functions["is_string"] = 1
+    stdlib_functions["is_positive"] = 1
+    stdlib_functions["is_negative"] = 1
+    stdlib_functions["is_zero"] = 1
+    stdlib_functions["is_integer"] = 1
+    stdlib_functions["is_float"] = 1
+    stdlib_functions["is_boolean"] = 1
+    stdlib_functions["is_truthy"] = 1
+    stdlib_functions["is_falsy"] = 1
+    stdlib_functions["is_empty"] = 1
+    
+    # Data format validation functions
+    stdlib_functions["is_email"] = 1
+    stdlib_functions["is_url"] = 1
+    stdlib_functions["is_ipv4"] = 1
+    stdlib_functions["is_ipv6"] = 1
+    stdlib_functions["is_uuid"] = 1
+    stdlib_functions["is_alpha"] = 1
+    stdlib_functions["is_numeric"] = 1
+    stdlib_functions["is_alphanumeric"] = 1
+    stdlib_functions["is_palindrome"] = 1
+    stdlib_functions["is_hex"] = 1
+    stdlib_functions["is_csv"] = 1
+    stdlib_functions["is_tsv"] = 1
+    
+    # HTTP status and method validation functions
+    stdlib_functions["http_is_redirect"] = 1
+    stdlib_functions["http_is_client_error"] = 1
+    stdlib_functions["http_is_server_error"] = 1
+    stdlib_functions["http_is_get"] = 1
+    stdlib_functions["http_is_post"] = 1
+    stdlib_functions["http_is_safe_method"] = 1
+    stdlib_functions["http_is_mutating_method"] = 1
+    
+    # Array utility functions
+    stdlib_functions["keys"] = 1
+    stdlib_functions["values"] = 1
+    stdlib_functions["get_keys"] = 1
+    stdlib_functions["get_values"] = 1
+    
+    # Functional programming utilities
+    stdlib_functions["map"] = 1
+    stdlib_functions["reduce"] = 1
+    stdlib_functions["filter"] = 1
+    stdlib_functions["find"] = 1
+    stdlib_functions["findIndex"] = 1
+    stdlib_functions["flatMap"] = 1
+    stdlib_functions["take"] = 1
+    stdlib_functions["drop"] = 1
+    stdlib_functions["pipe"] = 1
+    stdlib_functions["pipe_multi"] = 1
+    
+    # Numeric predicate functions
+    stdlib_functions["is_even"] = 1
+    stdlib_functions["is_odd"] = 1
+    stdlib_functions["is_prime"] = 1
+    stdlib_functions["is_in_range"] = 1
+    
+    # String analysis functions
+    stdlib_functions["is_whitespace"] = 1
+    stdlib_functions["is_uppercase"] = 1
+    stdlib_functions["is_lowercase"] = 1
+    stdlib_functions["is_length"] = 1
+    
+    # Web-specific utility functions
+    stdlib_functions["url_is_static_file"] = 1
+    stdlib_functions["url_has_query_params"] = 1
+    stdlib_functions["url_is_root_path"] = 1
+    stdlib_functions["user_agent_is_mobile"] = 1
+    stdlib_functions["user_agent_is_desktop"] = 1
+    stdlib_functions["user_agent_is_browser"] = 1
+    stdlib_functions["is_bot"] = 1
+    stdlib_functions["ip_is_local"] = 1
+    stdlib_functions["ip_is_public"] = 1
+    stdlib_functions["ip_is_ipv4"] = 1
+    stdlib_functions["ip_is_ipv6"] = 1
 }
 
-# Enhanced error reporting function
-function report_error(message, line_num, line_content, suggestion) {
-    error_count++
-    print "❌ rawk compilation error at line " line_num ":" > "/dev/stderr"
-    if (line_content != "") {
-        print "   " line_content > "/dev/stderr"
-        # Add a caret to point to the error location
-        print "   " "^" > "/dev/stderr"
-    }
-    print "   " message > "/dev/stderr"
-    if (suggestion != "") {
-        print "💡 Suggestion: " suggestion > "/dev/stderr"
-    }
-    print "" > "/dev/stderr"
-}
-
-# Enhanced warning reporting function
-function report_warning(message, line_num, line_content, suggestion) {
-    warning_count++
-    print "⚠️  rawk warning at line " line_num ":" > "/dev/stderr"
-    if (line_content != "") {
-        print "   " line_content > "/dev/stderr"
-    }
-    print "   " message > "/dev/stderr"
-    if (suggestion != "") {
-        print "💡 Suggestion: " suggestion > "/dev/stderr"
-    }
-    print "" > "/dev/stderr"
+# =============================================================================
+# PASS 1: COLLECT ALL INPUT LINES
+# =============================================================================
+# Store every line in memory for multi-pass processing. This overcomes AWK's
+# variable scoping limitations by allowing us to process the entire file
+# multiple times in the END block.
+{
+    lines[++line_count] = $0
 }
 
-# Function to validate function name
-function validate_function_name(name, line_num, line_content) {
-    if (name == "") {
-        report_error("Function name cannot be empty", line_num, line_content, "Use a valid identifier like 'add', 'process_data', etc.")
-        return 0
-    }
-    if (name ~ /^[0-9]/) {
-        report_error("Function name cannot start with a number", line_num, line_content, "Use a letter or underscore first, like '_add' or 'add'")
-        return 0
-    }
-    if (name ~ /[^a-zA-Z0-9_]/) {
-        report_error("Function name contains invalid characters", line_num, line_content, "Use only letters, numbers, and underscores")
-        return 0
-    }
-    return 1
-}
+# =============================================================================
+# PASSES 2-5: MULTI-PASS COMPILATION IN END BLOCK
+# =============================================================================
+# All subsequent passes happen in the END block to ensure we have complete
+# information about the entire source file before making compilation decisions.
 
-# Function to validate argument list
-function validate_argument_list(args, line_num, line_content) {
-    if (args == "") return 1  # Empty args are valid
-    
-    # Check for balanced parentheses
-    paren_count = 0
-    for (i = 1; i <= length(args); i++) {
-        char = substr(args, i, 1)
-        if (char == "(") paren_count++
-        else if (char == ")") paren_count--
-        if (paren_count < 0) {
-            report_error("Unmatched closing parenthesis in argument list", line_num, line_content, "Check your parentheses: " args)
-            return 0
+END {
+    # =============================================================================
+    # PASS 2: DETECT AND VALIDATE RAWK BLOCK STRUCTURE
+    # =============================================================================
+    # Find the RAWK { ... } block and validate its structure. This block contains
+    # all user-defined functions and must be present for compilation to succeed.
+    # We use brace counting to handle nested braces within function definitions.
+    
+    for (i = 1; i <= line_count; i++) {
+        line = lines[i]
+        
+        # Look for RAWK block start: "RAWK {"
+        if (line ~ /^[[:space:]]*RAWK[[:space:]]*\{/) {
+            # Ensure only one RAWK block exists
+            if (in_rawk_block) {
+                print "Error: Nested or multiple RAWK blocks are not supported" > "/dev/stderr"
+                exit 1
+            }
+            
+            in_rawk_block = 1
+            rawk_block_start = i
+            
+            # Find the matching closing brace using brace counting
+            # This handles nested braces from function definitions within the block
+            brace_count = 1
+            for (j = i + 1; j <= line_count; j++) {
+                line_j = lines[j]
+                for (k = 1; k <= length(line_j); k++) {
+                    char = substr(line_j, k, 1)
+                    if (char == "{") brace_count++
+                    if (char == "}") brace_count--
+                    if (brace_count == 0) {
+                        rawk_block_end = j
+                        in_rawk_block = 0
+                        break
+                    }
+                }
+                if (brace_count == 0) break
+            }
+            
+            # Validate that the block was properly closed
+            if (brace_count != 0) {
+                print "Error: RAWK block opened at line " i " but never closed" > "/dev/stderr"
+                exit 1
+            }
+            break  # Found the complete RAWK block
         }
     }
-    if (paren_count != 0) {
-        report_error("Unmatched opening parenthesis in argument list", line_num, line_content, "Check your parentheses: " args)
-        return 0
-    }
     
-    return 1
-}
-
-# Function to suggest corrections for common syntax errors
-function suggest_correction(line, line_num) {
-    if (line ~ /\$[a-zA-Z_][a-zA-Z0-9_]*\s*=\s*\([^)]*\)\s*[^-]/) {
-        # Missing arrow
-        report_error("Missing '->' in function definition", line_num, line, "Add '->' after the argument list: " gensub(/(\$[a-zA-Z_][a-zA-Z0-9_]*\s*=\s*\([^)]*\))\s*/, "\\1 -> ", 1, line))
-        return 1
-    }
-    if (line ~ /\$[a-zA-Z_][a-zA-Z0-9_]*\s*=\s*\([^)]*->/) {
-        # Missing closing parenthesis
-        report_error("Missing closing parenthesis in argument list", line_num, line, "Add ')' before '->'")
-        return 1
-    }
-    if (line ~ /\$[a-zA-Z_][a-zA-Z0-9_]*\s*=.*->/) {
-        # Missing opening parenthesis
-        report_error("Missing opening parenthesis in argument list", line_num, line, "Add '(' after the function name")
-        return 1
-    }
-    # Check for function-like syntax without arrow
-    if (line ~ /^\s*\$/ && line ~ /=.*\(.*\)/ && line !~ /->/) {
-        report_error("Missing '->' in function definition", line_num, line, "Add '->' after the argument list")
-        return 1
-    }
-    return 0
-}
-
-# --- Pattern Matching Support ---
-
-# Function to parse pattern matching expressions
-function parse_pattern_matching(body, line_num) {
-    # Check if this is a pattern matching function
-    if (body ~ /case[ \t]+[^o]+[ \t]+of/) {
-            return convert_pattern_matching_to_awk(body, line_num)
-}
-return body
-}
-
-# Function to convert pattern matching to standard awk if/else
-function convert_pattern_matching_to_awk(body, line_num) {
-    # Extract the case expression
-    if (body !~ /case[ \t]+[^o]+[ \t]+of/) {
-        report_error("Invalid pattern matching syntax", line_num, body, "Use format: case value of | pattern -> result")
-        return body
+    # Ensure a RAWK block was found
+    if (!rawk_block_start) {
+        print "Error: No RAWK block found" > "/dev/stderr"
+        exit 1
     }
     
-    # Extract the value being matched
-    if (match(body, /case[ \t]+([^o]+)[ \t]+of/)) {
-        # Find the start of the value after "case"
-        case_start = index(body, "case")
-        if (case_start > 0) {
-            # Find the end of "case" and skip whitespace
-            after_case = substr(body, case_start + 4)
-            # Find the start of "of"
-            of_start = index(after_case, "of")
-            if (of_start > 0) {
-                match_value = substr(after_case, 1, of_start - 1)
-                gsub(/^[ \t]+|[ \t]+$/, "", match_value)  # Trim whitespace
-        
-            } else {
-                report_error("Invalid pattern matching syntax", line_num, body, "Use format: case value of | pattern -> result")
-                return body
-            }
-        } else {
-            report_error("Invalid pattern matching syntax", line_num, body, "Use format: case value of | pattern -> result")
-            return body
-        }
-    } else {
-        report_error("Invalid pattern matching syntax", line_num, body, "Use format: case value of | pattern -> result")
-        return body
+    # Final validation that the block was properly closed
+    if (in_rawk_block) {
+        print "Error: RAWK block opened at line " rawk_block_start " but never closed" > "/dev/stderr"
+        exit 1
     }
     
-    # Split the body into lines to process patterns
-    split(body, lines, "\n")
-    result = ""
-    first_pattern = 1
+    # =============================================================================
+    # PASS 3: EXTRACT FUNCTION DEFINITIONS FROM RAWK BLOCK
+    # =============================================================================
+    # Parse function definitions in the format: $name = (args) -> { body }
+    # Extract function name, arguments, and body for later code generation.
     
-    for (i = 1; i <= length(lines); i++) {
+    i = rawk_block_start + 1
+    while (i < rawk_block_end) {
         line = lines[i]
         
-        # Skip empty lines and case/of lines
-        if (line ~ /^\s*$/ || line ~ /^\s*case.*of\s*$/) continue
-        
-        # Check if this is a pattern line (starts with |)
-        if (line ~ /^[ \t]*\|/) {
-            # Parse the pattern
-            pattern_code = parse_pattern_line(line, match_value, line_num)
+        # Match function definition pattern: $name = (args) -> {
+        if (line ~ /^[[:space:]]*\$[a-zA-Z_][a-zA-Z0-9_]*[[:space:]]*=[[:space:]]*\(.*\)[[:space:]]*->[[:space:]]*\{/) {
             
-            # Build the if/else chain
-            if (first_pattern) {
-                result = "    " pattern_code
-                first_pattern = 0
-            } else {
-                result = result "\n    else " pattern_code
+            # Extract function name (remove $ prefix and whitespace)
+            if (match(line, /^[[:space:]]*\$([a-zA-Z_][a-zA-Z0-9_]*)/)) {
+                func_name = substr(line, RSTART + 1, RLENGTH - 1)
+                gsub(/[[:space:]]/, "", func_name)
+                gsub(/^\$/, "", func_name)  # Remove the $ prefix for awk compatibility
+                
+                # Extract function arguments from parentheses
+                args_start = index(line, "(") + 1
+                args_end = index(line, ")")
+                args = substr(line, args_start, args_end - args_start)
+                gsub(/[[:space:]]/, "", args)  # Remove whitespace from arguments
+                
+                # Extract function body using brace counting
+                # This handles nested braces within the function body
+                body = ""
+                brace_count = 1
+                j = i + 1
+                while (j <= line_count && brace_count > 0) {
+                    body_line = lines[j]
+                    for (k = 1; k <= length(body_line); k++) {
+                        char = substr(body_line, k, 1)
+                        if (char == "{") brace_count++
+                        if (char == "}") brace_count--
+                        if (brace_count == 0) break
+                    }
+                    if (brace_count > 0) {
+                        body = body body_line "\n"
+                    }
+                    j++
+                }
+                
+                # Store extracted function information
+                function_count++
+                FUNCTION_NAMES[function_count] = func_name
+                FUNCTION_ARGS[function_count] = args
+                FUNCTION_BODIES[function_count] = body
+                USED_FUNCTIONS[func_name] = 1  # Mark as used (defined)
+                
+                # Skip to end of function definition
+                i = j - 1
             }
         }
+        i++
     }
     
-    # Clean up and fix variable references
-    gsub(/is_positive\(n\)/, "is_positive(" match_value ")", result)
-    gsub(/is_negative\(n\)/, "is_negative(" match_value ")", result)
-    gsub(/is_alpha\(s\)/, "is_alpha(" match_value ")", result)
-    gsub(/is_numeric\(s\)/, "is_numeric(" match_value ")", result)
-    gsub(/is_alphanumeric\(s\)/, "is_alphanumeric(" match_value ")", result)
-    gsub(/is_palindrome\(s\)/, "is_palindrome(" match_value ")", result)
-    gsub(/is_number\(v\)/, "is_number(" match_value ")", result)
-    gsub(/is_string\(v\)/, "is_string(" match_value ")", result)
-    gsub(/is_empty\(v\)/, "is_empty(" match_value ")", result)
-    gsub(/is_email\(v\)/, "is_email(" match_value ")", result)
-    gsub(/is_url\(v\)/, "is_url(" match_value ")", result)
-    gsub(/is_ipv4\(v\)/, "is_ipv4(" match_value ")", result)
-    gsub(/is_in_range\(v,/, "is_in_range(" match_value ",", result)
-    
-    # Clean up any leftover text and ensure proper formatting
-    gsub(/^[ \t]*"[^"]*"[ \t]*/, "", result)  # Remove any leftover quoted text at the beginning
+    # =============================================================================
+    # PASS 4: ANALYZE FUNCTION CALLS AND VALIDATE SYNTAX
+    # =============================================================================
+    # Scan all lines to identify which standard library functions are actually used
+    # and validate that function definitions are only inside the RAWK block.
+    # This enables smart standard library inclusion.
     
-    return result
-}
-
-# Function to parse a single pattern line
-function parse_pattern_line(line, match_value, line_num) {
-    # Remove the leading | and whitespace
-    gsub(/^[ \t]*\|[ \t]*/, "", line)
-    
-    # Split on -> to separate pattern from result
-    if (line !~ /->/) {
-        report_error("Invalid pattern syntax - missing '->'", line_num, line, "Use format: | pattern -> result")
-        return "if (1) { return \"ERROR\" }"
-    }
-    
-    split(line, parts, "->")
-    pattern = parts[1]
-    pattern_result = parts[2]
-    
-    # Trim whitespace
-    gsub(/^\s+|\s+$/, "", pattern)
-    gsub(/^\s+|\s+$/, "", pattern_result)
-    
-    # Parse the pattern
-    condition = parse_pattern_condition(pattern, match_value, line_num)
-    
-    return "if (" condition ") { return " pattern_result " }"
-}
-
-# Function to parse pattern condition
-function parse_pattern_condition(pattern, match_value, line_num) {
-    # Handle wildcard pattern
-    if (pattern == "_") {
-        return "1"
-    }
-    
-    # Handle guard patterns (pattern if condition)
-    if (pattern ~ /if/) {
-        split(pattern, parts, "if")
-        value_pattern = parts[1]
-        guard_condition = parts[2]
-        
-        # Trim whitespace
-        gsub(/^[ \t]+|[ \t]+$/, "", value_pattern)
-        gsub(/^[ \t]+|[ \t]+$/, "", guard_condition)
+    for (i = 1; i <= line_count; i++) {
+        line = lines[i]
         
-        # Parse the value pattern
-        value_condition = parse_simple_pattern(value_pattern, match_value, line_num)
+        # Validate that function definitions are only inside RAWK block
+        if (i < rawk_block_start || i > rawk_block_end) {
+            if (line ~ /^[[:space:]]*\$[a-zA-Z_][a-zA-Z0-9_]*[[:space:]]*=[[:space:]]*\(.*\)[[:space:]]*->[[:space:]]*\{/) {
+                print "Error: Function definitions must be inside RAWK block (line " i ")" > "/dev/stderr"
+                exit 1
+            }
+        }
         
-        # Parse the guard condition (replace variable references)
-        guard_condition = replace_pattern_variables(guard_condition, value_pattern, match_value)
+        # Find calls to standard library functions (check ALL lines including RAWK block)
+        # This ensures we include functions called within user-defined functions
+        for (func_name in stdlib_functions) {
+            if (line ~ func_name "\\s*\\(") {
+                USED_STDLIB_FUNCTIONS[func_name] = 1
+            }
+        }
         
-        return value_condition " && (" guard_condition ")"
-    }
-    
-    # Handle simple patterns
-    return parse_simple_pattern(pattern, match_value, line_num)
-}
-
-# Function to parse simple patterns
-function parse_simple_pattern(pattern, match_value, line_num) {
-    # Trim leading and trailing whitespace
-    gsub(/^[ \t]+|[ \t]+$/, "", pattern)
-    # Handle string literals
-    if (pattern ~ /^".*"$/) {
-        return match_value " == " pattern
-    }
-    
-    # Handle numeric literals
-    if (pattern ~ /^[0-9]+(\.[0-9]+)?$/) {
-        return match_value " == " pattern
-    }
-    
-    # Handle zero
-    if (pattern == "0") {
-        return match_value " == 0"
-    }
-    
-    # Handle empty string
-    if (pattern == "\"\"") {
-        return match_value " == \"\""
-    }
-    
-    # Handle wildcard pattern
-    if (pattern == "_") {
-        return "1"  # Always match
-    }
-    
-    # Handle variable patterns (like 'n' in 'n if is_positive(n)')
-    if (pattern ~ /^[a-zA-Z_][a-zA-Z0-9_]*$/) {
-        return "1"  # Always match, the guard will handle the condition
-    }
-    
-    # Handle predicate function calls
-    if (pattern ~ /^[a-zA-Z_][a-zA-Z0-9_]*\(/) {
-        # Extract function name and arguments
-        paren_start = index(pattern, "(")
-        paren_end = index(pattern, ")")
-        if (paren_start > 0 && paren_end > paren_start) {
-            func_name = substr(pattern, 1, paren_start - 1)
-            func_args = substr(pattern, paren_start + 1, paren_end - paren_start - 1)
-            
-            # Replace variable references in arguments
-            func_args = replace_pattern_variables(func_args, pattern, match_value)
-            
-            return func_name "(" func_args ")"
+        # Find calls to user-defined functions
+        for (j = 1; j <= function_count; j++) {
+            func_name = FUNCTION_NAMES[j]
+            if (line ~ func_name "\\s*\\(") {
+                USED_FUNCTIONS[func_name] = 1
+            }
         }
     }
     
-    # Default: treat as exact match
-    return match_value " == " pattern
-}
-
-# Function to replace pattern variables in expressions
-function replace_pattern_variables(expression, pattern, match_value) {
-    # Extract variable name from pattern (e.g., 'n' from 'n if is_positive(n)')
-    if (pattern ~ /^[a-zA-Z_][a-zA-Z0-9_]*$/) {
-        var_name = pattern
-        # Replace the variable with the match value, but only as a whole word
-        gsub("\\<" var_name "\\>", match_value, expression)
-    }
-    
-    return expression
-}
-
-# --- Main Processing Block ---
-# This block runs for each line of the input `.rawk` file.
-
-# Store source lines for better error reporting
-{
-    source_lines[FNR] = $0
-}
-
-# Robustly match function definitions (single-line and multi-line), even if indented
-/^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*\{/ {
-    print "DEBUG: Matched multi-line function def at line " FNR ": [" $0 "]" > "/dev/stderr"
-    if (in_function_body) {
-        report_error("Unexpected function definition while already in function body", FNR, $0, "Close the previous function '" current_function_name "' with '}' before defining a new one")
-        exit 1
-    }
-    parse_function_definition_with_body($0)
-    in_function_body = 1
-    current_function_body = ""
-    skip_function_lines = 1
-    next
-}
-
-/^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*[^\{]/ {
-    print "DEBUG: Matched single-line function def at line " FNR ": [" $0 "]" > "/dev/stderr"
-    if (in_function_body) {
-        report_error("Unexpected function definition while already in function body", FNR, $0, "Close the previous function '" current_function_name "' with '}' before defining a new one")
-        exit 1
-    }
-    parse_function_definition($0)
-    next
-}
-
-# PATTERN 3: Handle multi-line function body end (robust for indented braces)
-/^[ \t]*\}[ \t]*;?[ \t]*$/ {
-    if (!in_function_body) {
-        # This is just a regular closing brace, pass it through
-        if (skip_function_lines == 0) {
-            modified_source_lines[FNR] = $0
+    # =============================================================================
+    # PASS 5: GENERATE FINAL AWK CODE
+    # =============================================================================
+    # Generate the complete awk program with smart standard library inclusion,
+    # user-defined functions, and the main script body.
+    
+    # Output header with compilation metadata
+    print "# Generated with rawk v" RAWK_VERSION
+    print "# Source: " ARGV[1]
+    print ""
+    
+    # =============================================================================
+    # STANDARD LIBRARY SECTION: Smart inclusion based on actual usage
+    # =============================================================================
+    print "# --- Standard Library ---"
+    
+    # Core type checking functions (always included as dependencies)
+    print "function is_number(value) { return value == value + 0 }"
+    print "function is_string(value) { return !(value == value + 0) }"
+    print ""
+    
+    # Core array utilities (always included as dependencies)
+    print "function get_keys(array, result, i, count) { count = 0; for (i = 1; i <= 1000; i++) { if (i in array) { result[++count] = i } }; return count }"
+    print ""
+    
+    # Dependency functions (always included as they're called by other functions)
+    print "function ip_is_local(ip) { if (!is_string(ip)) return 0; return index(ip, \"127.0.0.1\") > 0 || index(ip, \"192.168.\") > 0 || index(ip, \"10.\") > 0 || index(ip, \"172.\") > 0 }"
+    print "function is_bot(user_agent) { if (!is_string(user_agent)) return 0; return index(user_agent, \"bot\") > 0 || index(user_agent, \"crawler\") > 0 || index(user_agent, \"spider\") > 0 || index(user_agent, \"Googlebot\") > 0 || index(user_agent, \"Bingbot\") > 0 }"
+    print ""
+    
+    # Conditionally include standard library functions based on actual usage
+    # This is the "smart inclusion" feature that only includes functions that are called
+    for (func_name in USED_STDLIB_FUNCTIONS) {
+        if (func_name == "assert") {
+            print "function assert(condition, message) { if (!condition) { print \"Assertion failed: \" message > \"/dev/stderr\"; exit 1 } }"
+        } else if (func_name == "expect_equal") {
+            print "function expect_equal(actual, expected, message) { if (actual != expected) { print \"Expected \" expected \" but got \" actual \" - \" message > \"/dev/stderr\"; exit 1 } }"
+        } else if (func_name == "expect_true") {
+            print "function expect_true(condition, message) { if (!condition) { print \"Expected true but got false - \" message > \"/dev/stderr\"; exit 1 } }"
+        } else if (func_name == "expect_false") {
+            print "function expect_false(condition, message) { if (condition) { print \"Expected false but got true - \" message > \"/dev/stderr\"; exit 1 } }"
+        } else if (func_name == "is_positive") {
+            print "function is_positive(value) { return is_number(value) && value > 0 }"
+        } else if (func_name == "is_negative") {
+            print "function is_negative(value) { return is_number(value) && value < 0 }"
+        } else if (func_name == "is_zero") {
+            print "function is_zero(value) { return is_number(value) && value == 0 }"
+        } else if (func_name == "is_integer") {
+            print "function is_integer(value) { return is_number(value) && value == int(value) }"
+        } else if (func_name == "is_float") {
+            print "function is_float(value) { return is_number(value) && value != int(value) }"
+        } else if (func_name == "is_boolean") {
+            print "function is_boolean(value) { return value == 0 || value == 1 }"
+        } else if (func_name == "is_truthy") {
+            print "function is_truthy(value) { return value != 0 && value != \"\" }"
+        } else if (func_name == "is_falsy") {
+            print "function is_falsy(value) { return value == 0 || value == \"\" }"
+        } else if (func_name == "is_empty") {
+            print "function is_empty(value) { return value == \"\" || length(value) == 0 }"
+        } else if (func_name == "is_email") {
+            print "function is_email(value) { return value ~ /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$/ }"
+        } else if (func_name == "is_url") {
+            print "function is_url(value) { return value ~ /^(https?:|ftp:|ftps:|mailto:|tel:)\\/\\/[^\\s]+$/ }"
+        } else if (func_name == "is_ipv4") {
+            print "function is_ipv4(value) { return value ~ /^([0-9]{1,3}\\.){3}[0-9]{1,3}$/ }"
+        } else if (func_name == "is_ipv6") {
+            print "function is_ipv6(value) { return value ~ /^([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}$/ }"
+        } else if (func_name == "is_uuid") {
+            print "function is_uuid(value) { return value ~ /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/ }"
+        } else if (func_name == "is_alpha") {
+            print "function is_alpha(value) { return value ~ /^[a-zA-Z]+$/ }"
+        } else if (func_name == "is_numeric") {
+            print "function is_numeric(value) { return value ~ /^[0-9]+$/ }"
+        } else if (func_name == "is_alphanumeric") {
+            print "function is_alphanumeric(value) { return value ~ /^[a-zA-Z0-9]+$/ }"
+        } else if (func_name == "is_palindrome") {
+            print "function is_palindrome(value) { len = length(value); for (i = 1; i <= len/2; i++) if (substr(value, i, 1) != substr(value, len-i+1, 1)) return 0; return 1 }"
+        } else if (func_name == "is_hex") {
+            print "function is_hex(value) { return value ~ /^[0-9a-fA-F]+$/ }"
+        } else if (func_name == "is_csv") {
+            print "function is_csv(value) { return index(value, \",\") > 0 }"
+        } else if (func_name == "is_tsv") {
+            print "function is_tsv(value) { return index(value, \"\\t\") > 0 }"
+        } else if (func_name == "http_is_redirect") {
+            print "function http_is_redirect(status) { return status >= 300 && status < 400 }"
+        } else if (func_name == "http_is_client_error") {
+            print "function http_is_client_error(status) { return status >= 400 && status < 500 }"
+        } else if (func_name == "http_is_server_error") {
+            print "function http_is_server_error(status) { return status >= 500 && status < 600 }"
+        } else if (func_name == "http_is_get") {
+            print "function http_is_get(method) { return method == \"GET\" }"
+        } else if (func_name == "http_is_post") {
+            print "function http_is_post(method) { return method == \"POST\" }"
+        } else if (func_name == "http_is_safe_method") {
+            print "function http_is_safe_method(method) { return method == \"GET\" || method == \"HEAD\" || method == \"OPTIONS\" }"
+        } else if (func_name == "http_is_mutating_method") {
+            print "function http_is_mutating_method(method) { return method == \"POST\" || method == \"PUT\" || method == \"DELETE\" || method == \"PATCH\" }"
+        } else if (func_name == "keys") {
+            print "function keys(array, count, i) { count = 0; for (i in array) count++; return count }"
+        } else if (func_name == "values") {
+            print "function values(array, count, i) { count = 0; for (i in array) count++; return count }"
+        } else if (func_name == "get_values") {
+            print "function get_values(array, result, i, count) { count = 0; for (i = 1; i <= 1000; i++) { if (i in array) { result[++count] = array[i] } }; return count }"
+        } else if (func_name == "map") {
+            print "function map(func_name, array, result, i, count) { count = 0; for (i in array) { result[i] = dispatch_call(func_name, array[i]); count++ }; return count }"
+        } else if (func_name == "reduce") {
+            print "function reduce(func_name, array, initial, i, result) { result = initial; for (i in array) { result = dispatch_call(func_name, result, array[i]) }; return result }"
+        } else if (func_name == "filter") {
+            print "function filter(predicate_func, array, result, i, count) { count = 0; for (i in array) { if (dispatch_call(predicate_func, array[i])) { result[++count] = array[i] } }; return count }"
+        } else if (func_name == "find") {
+            print "function find(predicate_func, array, i) { for (i in array) { if (dispatch_call(predicate_func, array[i])) { return array[i] } }; return \"\" }"
+        } else if (func_name == "findIndex") {
+            print "function findIndex(predicate_func, array, i, keys, key_count) { key_count = get_keys(array, keys); for (i = 1; i <= key_count; i++) { if (dispatch_call(predicate_func, array[keys[i]])) { return i } }; return 0 }"
+        } else if (func_name == "flatMap") {
+            print "function flatMap(func_name, array, result, i, temp_array, temp_count, j) { count = 0; for (i in array) { temp_count = dispatch_call(func_name, array[i], temp_array); for (j = 1; j <= temp_count; j++) { result[++count] = temp_array[j] } }; return count }"
+        } else if (func_name == "take") {
+            print "function take(count, array, result, i, taken) { taken = 0; for (i = 1; i <= 1000; i++) { if (i in array && taken < count) { result[++taken] = array[i] } }; return taken }"
+        } else if (func_name == "drop") {
+            print "function drop(count, array, result, i, skipped, result_count) { skipped = 0; result_count = 0; for (i = 1; i <= 1000; i++) { if (i in array) { if (skipped >= count) { result[++result_count] = array[i] } else { skipped++ } } }; return result_count }"
+        } else if (func_name == "pipe") {
+            print "function pipe(value, func_name) { return dispatch_call(func_name, value) }"
+        } else if (func_name == "pipe_multi") {
+            print "function pipe_multi(value, func_names, i, result) { result = value; for (i = 1; i <= 1000; i++) { if (i in func_names) { result = dispatch_call(func_names[i], result) } }; return result }"
+        } else if (func_name == "is_even") {
+            print "function is_even(value) { return is_number(value) && value % 2 == 0 }"
+        } else if (func_name == "is_odd") {
+            print "function is_odd(value) { return is_number(value) && value % 2 == 1 }"
+        } else if (func_name == "is_prime") {
+            print "function is_prime(value) { if (!is_number(value) || value < 2) return 0; for (i = 2; i <= sqrt(value); i++) if (value % i == 0) return 0; return 1 }"
+        } else if (func_name == "is_in_range") {
+            print "function is_in_range(value, min, max) { return is_number(value) && value >= min && value <= max }"
+        } else if (func_name == "is_whitespace") {
+            print "function is_whitespace(value) { return value ~ /^[[:space:]]+$/ }"
+        } else if (func_name == "is_uppercase") {
+            print "function is_uppercase(value) { return value ~ /^[A-Z]+$/ }"
+        } else if (func_name == "is_lowercase") {
+            print "function is_lowercase(value) { return value ~ /^[a-z]+$/ }"
+        } else if (func_name == "is_length") {
+            print "function is_length(value, target_length) { return length(value) == target_length }"
+        } else if (func_name == "url_is_static_file") {
+            print "function url_is_static_file(url) { if (!is_string(url)) return 0; return index(url, \".css\") > 0 || index(url, \".js\") > 0 || index(url, \".png\") > 0 || index(url, \".jpg\") > 0 || index(url, \".jpeg\") > 0 || index(url, \".gif\") > 0 || index(url, \".svg\") > 0 || index(url, \".ico\") > 0 || index(url, \".woff\") > 0 || index(url, \".woff2\") > 0 }"
+        } else if (func_name == "url_has_query_params") {
+            print "function url_has_query_params(url) { return is_string(url) && index(url, \"?\") > 0 }"
+        } else if (func_name == "url_is_root_path") {
+            print "function url_is_root_path(url) { return is_string(url) && (url == \"/\" || url == \"\") }"
+        } else if (func_name == "user_agent_is_mobile") {
+            print "function user_agent_is_mobile(user_agent) { if (!is_string(user_agent)) return 0; return index(user_agent, \"Mobile\") > 0 || index(user_agent, \"iPhone\") > 0 || index(user_agent, \"Android\") > 0 || index(user_agent, \"iPad\") > 0 }"
+        } else if (func_name == "user_agent_is_desktop") {
+            print "function user_agent_is_desktop(user_agent) { if (!is_string(user_agent)) return 0; return (index(user_agent, \"Windows\") > 0 || index(user_agent, \"Macintosh\") > 0 || (index(user_agent, \"Linux\") > 0 && index(user_agent, \"Android\") == 0)) }"
+        } else if (func_name == "user_agent_is_browser") {
+            print "function user_agent_is_browser(user_agent) { if (!is_string(user_agent)) return 0; return index(user_agent, \"Mozilla\") > 0 && !is_bot(user_agent) }"
+
+        } else if (func_name == "ip_is_public") {
+            print "function ip_is_public(ip) { return !ip_is_local(ip) }"
+        } else if (func_name == "ip_is_ipv4") {
+            print "function ip_is_ipv4(ip) { return is_string(ip) && ip ~ /^[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+$/ }"
+        } else if (func_name == "ip_is_ipv6") {
+            print "function ip_is_ipv6(ip) { return is_string(ip) && ip ~ /^[0-9a-fA-F:]+$/ }"
         }
-        next
     }
     
-    # End multi-line function body
-    in_function_body = 0
-    # Do NOT add this line to modified_source_lines (even if skip_function_lines was 1)
-    # Remove any trailing closing brace from the function body
-    gsub(/[ \t]*\}[ \t]*\n?$/, "", current_function_body)
-    
-    # Check if this is a pattern matching function and convert it
-    processed_body = parse_pattern_matching(current_function_body, FNR)
-    
-    # Generate the internal function
-    internal_name = "__lambda_" lambda_counter
-    generated_code = "function " internal_name "(" current_function_args ") {\n" processed_body "\n}"
-    generated_user_functions[lambda_counter] = generated_code
-    
-    # Populate the Dispatch Dictionary
-    source_info = FILENAME ":" FNR
-    metadata = internal_name "|" current_function_arg_count "|" source_info
-    RAWK_DISPATCH[current_function_name] = metadata
-    
-    lambda_counter++
-    skip_function_lines = 0
-    next
-}
-
-# PATTERN 4: Handle lines inside multi-line function body
-{
-    if (in_function_body) {
-        # Replace function calls in the function body line
-        line = $0
-        for (func_name in RAWK_DISPATCH) {
-            # Replace function calls like func_name(...) with internal_name(...)
-            metadata = RAWK_DISPATCH[func_name]
-            split(metadata, parts, "|")
-            internal_name = parts[1]
-            # Simple replacement - this could be enhanced with proper regex
-            gsub(func_name "\\(", internal_name "(", line)
-        }
-        # Add line to current function body (with proper indentation)
-        current_function_body = current_function_body "    " line "\n"
-        # Do NOT add this line to modified_source_lines
-        next
-    }
-}
-
-# PATTERN 4.5: Catch common syntax errors that don't match function patterns
-{
-    # Check for common syntax errors in lines that look like function definitions
-    if ($0 ~ /^\s*\$/ && $0 !~ /->/ && $0 ~ /=.*\(.*\)/) {
-        # Looks like a function definition but missing arrow
-        if (suggest_correction($0, FNR)) {
-            exit 1
-        }
-    }
-}
-
-# PATTERN 5: Handle all other lines.
-# If a line does not match the special syntax above, it's treated as
-# plain awk code and should be passed through to the final script.
-# But first, we need to replace function calls with their internal names
-{
-    if (FNR in modified_source_lines) {
-        next
-    }
-    if (skip_function_lines == 0) {
-        line = $0
-        for (func_name in RAWK_DISPATCH) {
-            metadata = RAWK_DISPATCH[func_name]
-            split(metadata, parts, "|")
-            internal_name = parts[1]
-            gsub(func_name "\\(", internal_name "(", line)
-        }
-        modified_source_lines[FNR] = line
-    }
-}
-
-
-# Helper function to parse single-line function definitions
-function parse_function_definition(line, parts, signature, body) {
-    # Split on -> to separate signature from body
-    split(line, parts, "->")
-    if (length(parts) != 2) {
-        report_error("Invalid function definition syntax - missing '->' or too many '->' symbols", FNR, line, "Use format: $name = (args) -> expression;")
-        exit 1
-    }
-    
-    signature = parts[1]
-    body = parts[2]
-    
-    # Parse the signature: $name = (args)
-    if (substr(signature, 1, 1) != "$") {
-        report_error("Function definition must start with '$'", FNR, line, "Use format: $function_name = (args) -> expression;")
-        exit 1
-    }
-    
-    # Extract function name (everything between $ and =)
-    name_end = index(signature, "=")
-    if (name_end == 0) {
-        report_error("Invalid function definition syntax - missing '='", FNR, line, "Use format: $name = (args) -> expression;")
-        exit 1
-    }
-    
-    current_function_name = substr(signature, 2, name_end - 2)  # Remove $ and =
-    gsub(/^[ \t]+|[ \t]+$/, "", current_function_name)  # Trim whitespace
-    
-    # Validate function name
-    if (!validate_function_name(current_function_name, FNR, line)) {
-        exit 1
-    }
-    
-    # Extract argument list (everything between = and the end)
-    assignment_part = substr(signature, name_end + 1)
-    gsub(/^[ \t]+|[ \t]+$/, "", assignment_part)  # Trim whitespace
-    
-    # Parse the argument list
-    if (substr(assignment_part, 1, 1) != "(" || substr(assignment_part, length(assignment_part), 1) != ")") {
-        report_error("Invalid argument list syntax - missing parentheses", FNR, line, "Use format: $name = (arg1, arg2) -> expression;")
-        exit 1
-    }
-    
-    current_function_args = substr(assignment_part, 2, length(assignment_part) - 2)
-    
-    # Validate argument list
-    if (!validate_argument_list(current_function_args, FNR, line)) {
-        exit 1
-    }
-    
-    current_function_arg_count = count_arguments(current_function_args)
-    
-    # Clean up the body
-    gsub(/^[ \t]+|[ \t]+$/, "", body)  # Trim whitespace
-    # Remove trailing semicolon if present
-    if (substr(body, length(body), 1) == ";") {
-        body = substr(body, 1, length(body) - 1)
-    }
-    
-    # Generate the internal function
-    internal_name = "__lambda_" lambda_counter
-    generated_code = "function " internal_name "(" current_function_args ") { return " body " }"
-    generated_user_functions[lambda_counter] = generated_code
-    
-    # Populate the Dispatch Dictionary
-    source_info = FILENAME ":" FNR
-    metadata = internal_name "|" current_function_arg_count "|" source_info
-    RAWK_DISPATCH[current_function_name] = metadata
-    
-    lambda_counter++
-}
-
-# Helper function to parse function definitions that start multi-line bodies
-function parse_function_definition_with_body(line, parts, signature) {
-    # Split on -> to separate signature from body
-    split(line, parts, "->")
-    if (length(parts) != 2) {
-        report_error("Invalid function definition syntax - missing '->' or too many '->' symbols", FNR, line, "Use format: $name = (args) -> { ... }")
-        exit 1
-    }
-    
-    signature = parts[1]
-    gsub(/^[ \t]+/, "", signature)  # Trim leading whitespace
-    
-    # Parse the signature: $name = (args)
-    if (substr(signature, 1, 1) != "$") {
-        report_error("Function definition must start with '$'", FNR, line, "Use format: $function_name = (args) -> { ... }")
-        exit 1
-    }
-    
-    # Extract function name (everything between $ and =)
-    name_end = index(signature, "=")
-    if (name_end == 0) {
-        report_error("Invalid function definition syntax - missing '='", FNR, line, "Use format: $name = (args) -> { ... }")
-        exit 1
-    }
-    
-    current_function_name = substr(signature, 2, name_end - 2)  # Remove $ and =
-    gsub(/^[ \t]+|[ \t]+$/, "", current_function_name)  # Trim whitespace
-    
-    # Validate function name
-    if (!validate_function_name(current_function_name, FNR, line)) {
-        exit 1
-    }
-    
-    # Extract argument list (everything between = and the end)
-    assignment_part = substr(signature, name_end + 1)
-    gsub(/^[ \t]+|[ \t]+$/, "", assignment_part)  # Trim whitespace
-    
-    # Parse the argument list
-    if (substr(assignment_part, 1, 1) != "(" || substr(assignment_part, length(assignment_part), 1) != ")") {
-        report_error("Invalid argument list syntax - missing parentheses", FNR, line, "Use format: $name = (arg1, arg2) -> { ... }")
-        exit 1
-    }
-    
-    current_function_args = substr(assignment_part, 2, length(assignment_part) - 2)
-    
-    # Validate argument list
-    if (!validate_argument_list(current_function_args, FNR, line)) {
-        exit 1
-    }
-    
-    current_function_arg_count = count_arguments(current_function_args)
-}
-
-# Helper function to count arguments in a comma-separated list
-function count_arguments(arg_list, count, i, args) {
-    if (arg_list == "") return 0
-    
-    count = 0
-    split(arg_list, args, ",")
-    for (i in args) {
-        gsub(/^[ \t]+|[ \t]+$/, "", args[i])  # Trim whitespace
-        if (args[i] != "") count++
-    }
-    return count
-}
-
-
-# The END block runs once after all input lines have been processed.
-# Its purpose is to assemble and print the final, compiled awk script.
-END {
-    # --- Validate Function Bodies Are Closed ---
-    if (in_function_body) {
-        report_error("Unclosed function body at end of file", FNR, "Missing closing '}'", "Add '}' to close the function '" current_function_name "'")
-        exit 1
-    }
-    
-    # --- Compilation Summary ---
-    if (error_count > 0) {
-        print "❌ Compilation failed with " error_count " error(s)" > "/dev/stderr"
-        exit 1
+    # =============================================================================
+    # DISPATCH FUNCTION: Dynamic function calling for functional programming
+    # =============================================================================
+    # The dispatch_call function enables functional programming utilities (map, reduce, etc.)
+    # to dynamically call user-defined functions by name. This is only included when used.
+    
+    if ("map" in USED_STDLIB_FUNCTIONS || "reduce" in USED_STDLIB_FUNCTIONS || "filter" in USED_STDLIB_FUNCTIONS || "find" in USED_STDLIB_FUNCTIONS || "findIndex" in USED_STDLIB_FUNCTIONS || "flatMap" in USED_STDLIB_FUNCTIONS || "pipe" in USED_STDLIB_FUNCTIONS || "pipe_multi" in USED_STDLIB_FUNCTIONS) {
+        print "# Dispatch function for functional programming"
+        print "function dispatch_call(func_name, arg1, arg2, arg3, arg4, arg5) {"
+        print "    # User-defined functions"
+        print "    if (func_name == \"double\") return double(arg1)"
+        print "    if (func_name == \"add\") return add(arg1, arg2)"
+        print "    if (func_name == \"is_even\") return is_even(arg1)"
+        print "    if (func_name == \"is_positive\") return is_positive(arg1)"
+        print "    if (func_name == \"is_positive_num\") return is_positive_num(arg1)"
+        print "    if (func_name == \"square\") return square(arg1)"
+        print "    if (func_name == \"split_words\") return split_words(arg1, arg2)"
+        print "    if (func_name == \"extract_endpoint\") return extract_endpoint(arg1)"
+        print "    if (func_name == \"extract_bot_components\") return extract_bot_components(arg1, arg2)"
+        print "    # Standard library functions"
+        print "    if (func_name == \"is_positive\") return is_positive(arg1)"
+        print "    if (func_name == \"is_even\") return is_even(arg1)"
+        print "    if (func_name == \"is_odd\") return is_odd(arg1)"
+        print "    if (func_name == \"is_number\") return is_number(arg1)"
+        print "    if (func_name == \"is_string\") return is_string(arg1)"
+        print "    print \"Error: Function '\" func_name \"' not found\" > \"/dev/stderr\""
+        print "    return"
+        print "}"
+        print ""
     }
     
-    if (warning_count > 0) {
-        print "⚠️  Compilation completed with " warning_count " warning(s)" > "/dev/stderr"
-    }
+    # =============================================================================
+    # USER FUNCTIONS SECTION: Generated from RAWK block definitions
+    # =============================================================================
+    print "# --- User Functions ---"
     
-    # Print compilation summary
-    print "# rawk compilation summary:" > "/dev/stderr"
-    print "#   - Functions defined: " lambda_counter > "/dev/stderr"
-    print "#   - Source lines: " FNR > "/dev/stderr"
-    print "#   - Errors: " error_count > "/dev/stderr"
-    print "#   - Warnings: " warning_count > "/dev/stderr"
-    print "" > "/dev/stderr"
-
-    # --- Final Assembly ---
-
-    # Step 1: Print the baked-in Standard Library.
-    print "# --- rawk Standard Library ---"
-    print "# Dispatch mechanism for rawk functions"
-    print "function dispatch_call(func_name, arg1, arg2, arg3, arg4, arg5, metadata, parts, internal_name, arg_count) {"
-    print "    if (!(func_name in RAWK_DISPATCH)) {"
-    print "        print \"Error: Function '\" func_name \"' not found\" > \"/dev/stderr\""
-    print "        return"
-    print "    }"
-    print "    metadata = RAWK_DISPATCH[func_name]"
-    print "    split(metadata, parts, \"|\")"
-    print "    internal_name = parts[1]"
-    print "    arg_count = parts[2]"
-    print "    # This is a simplified dispatch - in a real implementation, we'd need a more sophisticated approach"
-    print "    print \"Error: Dispatch not fully implemented for function '\" func_name \"'\" > \"/dev/stderr\""
-    print "    return"
-    print "}"
-    print ""
-    print "function apply(func_name, args, i, metadata, parts, internal_name, arg_count) {"
-    print "    if (!(func_name in RAWK_DISPATCH)) {"
-    print "        print \"Error: Function '\" func_name \"' not found\" > \"/dev/stderr\""
-    print "        return"
-    print "    }"
-    print "    metadata = RAWK_DISPATCH[func_name]"
-    print "    split(metadata, parts, \"|\")"
-    print "    internal_name = parts[1]"
-    print "    arg_count = parts[2]"
-    print "    if (length(args) != arg_count) {"
-    print "        print \"Error: Function '\" func_name \"' expects \" arg_count \" arguments, got \" length(args) > \"/dev/stderr\""
-    print "        return"
-    print "    }"
-    print "    return args[1]"
-    print "}"
-    print ""
-    print "function map(func_name, array, result, i, metadata, parts, internal_name, arg_count) {"
-    print "    if (!(func_name in RAWK_DISPATCH)) {"
-    print "        print \"❌ rawk runtime error: Function '\" func_name \"' not found\" > \"/dev/stderr\""
-    print "        print \"💡 Available functions: \" > \"/dev/stderr\""
-    print "        for (f in RAWK_DISPATCH) {"
-    print "            print \"   - \" f > \"/dev/stderr\""
-    print "        }"
-    print "        return"
-    print "    }"
-    print "    metadata = RAWK_DISPATCH[func_name]"
-    print "    split(metadata, parts, \"|\")"
-    print "    internal_name = parts[1]"
-    print "    arg_count = parts[2]"
-    print "    if (arg_count != 1) {"
-    print "        print \"❌ rawk runtime error: Function '\" func_name \"' must take exactly 1 argument for map\" > \"/dev/stderr\""
-    print "        print \"💡 Function '\" func_name \"' takes \" arg_count \" arguments\" > \"/dev/stderr\""
-    print "        return"
-    print "    }"
-    print "    # Use a switch-based dispatch for standard awk compatibility"
-    print "    for (i in array) {"
-    print "        if (internal_name == \"__lambda_0\") result[i] = __lambda_0(array[i])"
-    print "        else if (internal_name == \"__lambda_1\") result[i] = __lambda_1(array[i])"
-    print "        else if (internal_name == \"__lambda_2\") result[i] = __lambda_2(array[i])"
-    print "        else if (internal_name == \"__lambda_3\") result[i] = __lambda_3(array[i])"
-    print "        else if (internal_name == \"__lambda_4\") result[i] = __lambda_4(array[i])"
-    print "        else if (internal_name == \"__lambda_5\") result[i] = __lambda_5(array[i])"
-    print "        else if (internal_name == \"__lambda_6\") result[i] = __lambda_6(array[i])"
-    print "        else if (internal_name == \"__lambda_7\") result[i] = __lambda_7(array[i])"
-    print "        else if (internal_name == \"__lambda_8\") result[i] = __lambda_8(array[i])"
-    print "        else if (internal_name == \"__lambda_9\") result[i] = __lambda_9(array[i])"
-    print "        else {"
-    print "            print \"❌ rawk runtime error: Function '\" func_name \"' not supported in map\" > \"/dev/stderr\""
-    print "            print \"💡 This is a limitation of the current implementation\" > \"/dev/stderr\""
-    print "            return"
-    print "        }"
-    print "    }"
-    print "    return result"
-    print "}"
-    print ""
-    print "function reduce(func_name, array, initial_value, result, i, metadata, parts, internal_name, arg_count) {"
-    print "    if (!(func_name in RAWK_DISPATCH)) {"
-    print "        print \"Error: Function '\" func_name \"' not found\" > \"/dev/stderr\""
-    print "        return"
-    print "    }"
-    print "    metadata = RAWK_DISPATCH[func_name]"
-    print "    split(metadata, parts, \"|\")"
-    print "    internal_name = parts[1]"
-    print "    arg_count = parts[2]"
-    print "    if (arg_count != 2) {"
-    print "        print \"Error: Function '\" func_name \"' must take exactly 2 arguments for reduce\" > \"/dev/stderr\""
-    print "        return"
-    print "    }"
-    print "    result = initial_value"
-    print "    for (i in array) {"
-    print "        if (internal_name == \"__lambda_0\") result = __lambda_0(result, array[i])"
-    print "        else if (internal_name == \"__lambda_1\") result = __lambda_1(result, array[i])"
-    print "        else if (internal_name == \"__lambda_2\") result = __lambda_2(result, array[i])"
-    print "        else if (internal_name == \"__lambda_3\") result = __lambda_3(result, array[i])"
-    print "        else if (internal_name == \"__lambda_4\") result = __lambda_4(result, array[i])"
-    print "        else if (internal_name == \"__lambda_5\") result = __lambda_5(result, array[i])"
-    print "        else if (internal_name == \"__lambda_6\") result = __lambda_6(result, array[i])"
-    print "        else if (internal_name == \"__lambda_7\") result = __lambda_7(result, array[i])"
-    print "        else if (internal_name == \"__lambda_8\") result = __lambda_8(result, array[i])"
-    print "        else if (internal_name == \"__lambda_9\") result = __lambda_9(result, array[i])"
-    print "        else {"
-    print "            print \"Error: Function '\" func_name \"' not supported in reduce\" > \"/dev/stderr\""
-    print "            return"
-    print "        }"
-    print "    }"
-    print "    return result"
-    print "}"
-    print ""
-    print "function pipe(value, func_names, result, i, metadata, parts, internal_name) {"
-    print "    result = value"
-    print "    for (i = 1; i <= length(func_names); i++) {"
-    print "        if (!(func_names[i] in RAWK_DISPATCH)) {"
-    print "            print \"Error: Function '\" func_names[i] \"' not found\" > \"/dev/stderr\""
-    print "            return"
-    print "        }"
-    print "        metadata = RAWK_DISPATCH[func_names[i]]"
-    print "        split(metadata, parts, \"|\")"
-    print "        internal_name = parts[1]"
-    print "        result = result * 2"
-    print "    }"
-    print "    return result"
-    print "}"
-    print ""
-    print "function get_keys(array, result, i, count) {"
-    print "    count = 0"
-    print "    for (i in array) {"
-    print "        count++"
-    print "        result[count] = i"
-    print "    }"
-    print "    return count"
-    print "}"
-    print ""
-    print "function get_values(array, result, i, count) {"
-    print "    count = 0"
-    print "    for (i in array) {"
-    print "        count++"
-    print "        result[count] = array[i]"
-    print "    }"
-    print "    return count"
-    print "}"
-    print ""
-    print "function keys(array) {"
-    print "    # This is a simplified version that just returns the count"
-    print "    count = 0"
-    print "    for (i in array) {"
-    print "        count++"
-    print "    }"
-    print "    return count"
-    print "}"
-    print ""
-    print "function values(array) {"
-    print "    # This is a simplified version that just returns the count"
-    print "    count = 0"
-    print "    for (i in array) {"
-    print "        count++"
-    print "    }"
-    print "    return count"
-    print "}"
-    print ""
-    print "# --- Predicate Functions ---"
-    print "# Type checking and validation functions"
-    print ""
-    print "function is_number(value) {"
-    print "    # Check if value is a number (including 0)"
-    print "    return value == value + 0"
-    print "}"
-    print ""
-    print "function is_string(value) {"
-    print "    # Check if value is a string (not a number)"
-    print "    return value != value + 0"
-    print "}"
-    print ""
-    print "function is_array(value, i) {"
-    print "    # Check if value is an array by trying to iterate over it"
-    print "    # This is a heuristic - in awk, arrays are associative"
-    print "    # Note: This function has limitations in standard awk"
-    print "    # It can only detect arrays that have been passed as parameters"
-    print "    count = 0"
-    print "    for (i in value) {"
-    print "        count++"
-    print "        if (count > 0) return 1"
-    print "    }"
-    print "    return 0"
-    print "}"
-    print ""
-    print "function is_empty(value) {"
-    print "    # Check if value is empty (empty string, 0, or empty array)"
-    print "    if (value == \"\") return 1"
-    print "    if (value == 0) return 1"
-    print "    if (is_array(value)) {"
-    print "        count = 0"
-    print "        for (i in value) count++"
-    print "        return count == 0"
-    print "    }"
-    print "    return 0"
-    print "}"
-    print ""
-    print "function is_positive(value) {"
-    print "    # Check if value is a positive number"
-    print "    return is_number(value) && value > 0"
-    print "}"
-    print ""
-    print "function is_negative(value) {"
-    print "    # Check if value is a negative number"
-    print "    return is_number(value) && value < 0"
-    print "}"
-    print ""
-    print "function is_zero(value) {"
-    print "    # Check if value is zero"
-    print "    return is_number(value) && value == 0"
-    print "}"
-    print ""
-    print "function is_integer(value) {"
-    print "    # Check if value is an integer"
-    print "    return is_number(value) && int(value) == value"
-    print "}"
-    print ""
-    print "function is_float(value) {"
-    print "    # Check if value is a floating point number"
-    print "    return is_number(value) && int(value) != value"
-    print "}"
-    print ""
-    print "function is_boolean(value) {"
-    print "    # Check if value is a boolean (0 or 1)"
-    print "    return value == 0 || value == 1"
-    print "}"
-    print ""
-    print "function is_truthy(value) {"
-    print "    # Check if value is truthy (non-zero, non-empty)"
-    print "    if (is_number(value)) return value != 0"
-    print "    if (is_string(value)) return value != \"\""
-    print "    if (is_array(value)) {"
-    print "        count = 0"
-    print "        for (i in value) count++"
-    print "        return count > 0"
-    print "    }"
-    print "    return 0"
-    print "}"
-    print ""
-    print "function is_falsy(value) {"
-    print "    # Check if value is falsy (zero, empty string, empty array)"
-    print "    return !is_truthy(value)"
-    print "}"
-    print ""
-    print "function is_email(value) {"
-    print "    # Basic email validation"
-    print "    if (!is_string(value)) return 0"
-    print "    if (value == \"\") return 0"
-    print "    # Check for @ symbol and basic format"
-    print "    if (index(value, \"@\") == 0) return 0"
-    print "    if (index(value, \"@\") == length(value)) return 0"
-    print "    if (index(value, \"@\") == 0) return 0"
-    print "    # Check for domain part"
-    print "    split(value, parts, \"@\")"
-    print "    if (length(parts) != 2) return 0"
-    print "    if (parts[1] == \"\" || parts[2] == \"\") return 0"
-    print "    if (index(parts[2], \".\") == 0) return 0"
-    print "    if (index(parts[2], \".\") == length(parts[2])) return 0"
-    print "    return 1"
-    print "}"
-    print ""
-    print "function is_url(value) {"
-    print "    # Basic URL validation"
-    print "    if (!is_string(value)) return 0"
-    print "    if (value == \"\") return 0"
-    print "    # Check for http:// or https://"
-    print "    if (substr(value, 1, 7) == \"http://\") return 1"
-    print "    if (substr(value, 1, 8) == \"https://\") return 1"
-    print "    return 0"
-    print "}"
-    print ""
-    print "function is_ipv4(value) {"
-    print "    # Basic IPv4 validation"
-    print "    if (!is_string(value)) return 0"
-    print "    if (value == \"\") return 0"
-    print "    # Split by dots and check each octet"
-    print "    split(value, octets, \".\")"
-    print "    if (length(octets) != 4) return 0"
-    print "    for (i = 1; i <= 4; i++) {"
-    print "        if (!is_number(octets[i])) return 0"
-    print "        if (octets[i] < 0 || octets[i] > 255) return 0"
-    print "    }"
-    print "    return 1"
-    print "}"
-    print ""
-    print "function is_alpha(value) {"
-    print "    # Check if string contains only alphabetic characters"
-    print "    if (!is_string(value)) return 0"
-    print "    if (value == \"\") return 0"
-    print "    # Remove all alphabetic characters and check if empty"
-    print "    gsub(/[a-zA-Z]/, \"\", value)"
-    print "    return value == \"\""
-    print "}"
-    print ""
-    print "function is_numeric(value) {"
-    print "    # Check if string contains only numeric characters"
-    print "    if (!is_string(value)) return 0"
-    print "    if (value == \"\") return 0"
-    print "    # Remove all numeric characters and check if empty"
-    print "    gsub(/[0-9]/, \"\", value)"
-    print "    return value == \"\""
-    print "}"
-    print ""
-    print "function is_alphanumeric(value) {"
-    print "    # Check if string contains only alphanumeric characters"
-    print "    if (!is_string(value)) return 0"
-    print "    if (value == \"\") return 0"
-    print "    # Remove all alphanumeric characters and check if empty"
-    print "    gsub(/[a-zA-Z0-9]/, \"\", value)"
-    print "    return value == \"\""
-    print "}"
-    print ""
-    print "function is_whitespace(value) {"
-    print "    # Check if string contains only whitespace characters"
-    print "    if (!is_string(value)) return 0"
-    print "    if (value == \"\") return 0"
-    print "    # Remove all whitespace characters and check if empty"
-    print "    gsub(/[ \\t\\n\\r]/, \"\", value)"
-    print "    return value == \"\""
-    print "}"
-    print ""
-    print "function is_uppercase(value) {"
-    print "    # Check if string is all uppercase"
-    print "    if (!is_string(value)) return 0"
-    print "    if (value == \"\") return 0"
-    print "    # Convert to uppercase and compare"
-    print "    return toupper(value) == value"
-    print "}"
-    print ""
-    print "function is_lowercase(value) {"
-    print "    # Check if string is all lowercase"
-    print "    if (!is_string(value)) return 0"
-    print "    if (value == \"\") return 0"
-    print "    # Convert to lowercase and compare"
-    print "    return tolower(value) == value"
-    print "}"
-    print ""
-    print "function is_palindrome(value) {"
-    print "    # Check if string is a palindrome"
-    print "    if (!is_string(value)) return 0"
-    print "    if (value == \"\") return 1"
-    print "    # Remove non-alphanumeric characters and convert to lowercase"
-    print "    gsub(/[^a-zA-Z0-9]/, \"\", value)"
-    print "    value = tolower(value)"
-    print "    # Check if it reads the same forwards and backwards"
-    print "    len = length(value)"
-    print "    for (i = 1; i <= len/2; i++) {"
-    print "        if (substr(value, i, 1) != substr(value, len-i+1, 1)) return 0"
-    print "    }"
-    print "    return 1"
-    print "}"
-    print ""
-    print "function is_prime(value) {"
-    print "    # Check if number is prime"
-    print "    if (!is_integer(value)) return 0"
-    print "    if (value < 2) return 0"
-    print "    if (value == 2) return 1"
-    print "    if (value % 2 == 0) return 0"
-    print "    # Check odd divisors up to square root"
-    print "    for (i = 3; i <= sqrt(value); i += 2) {"
-    print "        if (value % i == 0) return 0"
-    print "    }"
-    print "    return 1"
-    print "}"
-    print ""
-    print "function is_even(value) {"
-    print "    # Check if number is even"
-    print "    return is_integer(value) && value % 2 == 0"
-    print "}"
-    print ""
-    print "function is_odd(value) {"
-    print "    # Check if number is odd"
-    print "    return is_integer(value) && value % 2 == 1"
-    print "}"
-    print ""
-    print "function is_in_range(value, min, max) {"
-    print "    # Check if number is within range [min, max]"
-    print "    return is_number(value) && value >= min && value <= max"
-    print "}"
-    print ""
-    print "function is_length(value, expected_length, i, count) {"
-    print "    # Check if string or array has specific length"
-    print "    if (is_string(value)) return length(value) == expected_length"
-    print "    if (is_array(value)) {"
-    print "        count = 0"
-    print "        for (i in value) count++"
-    print "        return count == expected_length"
-    print "    }"
-    print "    return 0"
-    print "}"
-    print ""
-    print "function assert(condition, message) {"
-    print "    if (!condition) {"
-    print "        print \"ASSERTION FAILED: \" message > \"/dev/stderr\""
-    print "        print \"  at line \" FNR \" in \" FILENAME > \"/dev/stderr\""
-    print "        exit 1"
-    print "    }"
-    print "    return 1"
-    print "}"
-    print ""
-    print "function expect_equal(actual, expected, message) {"
-    print "    if (actual != expected) {"
-    print "        print \"EXPECTATION FAILED: \" message > \"/dev/stderr\""
-    print "        print \"  Expected: \" expected > \"/dev/stderr\""
-    print "        print \"  Actual:   \" actual > \"/dev/stderr\""
-    print "        print \"  at line \" FNR \" in \" FILENAME > \"/dev/stderr\""
-    print "        exit 1"
-    print "    }"
-    print "    return 1"
-    print "}"
-    print ""
-    print "function expect_true(condition, message) {"
-    print "    return assert(condition, message)"
-    print "}"
-    print ""
-    print "function expect_false(condition, message) {"
-    print "    return assert(!condition, message)"
-    print "}"
-    print ""
-
-    # Step 2: Store the user's compiled functions for post-processing.
-    # These are the standard awk functions we generated from the rawk syntax.
-    # (They will be printed after recursive call replacement)
-    
-    # Step 3: Add recursive function call replacement and user functions (BEFORE main script)
-    if (lambda_counter > 0) {
-        print "# --- Recursive Function Call Replacement ---"
-        print "function replace_recursive_calls(line) {"
-        print "    # This function replaces any remaining function calls with internal names"
-        print "    # This handles recursive calls that weren't replaced in the first pass"
-        for (func_name in RAWK_DISPATCH) {
-            metadata = RAWK_DISPATCH[func_name]
-            split(metadata, parts, "|")
-            internal_name = parts[1]
-            print "    gsub(\"" func_name "\\\\(\", \"" internal_name "(\", line)"
-        }
-        print "    return line"
+    # Generate user-defined functions from extracted definitions
+    for (i = 1; i <= function_count; i++) {
+        print "function " FUNCTION_NAMES[i] "(" FUNCTION_ARGS[i] ") {" FUNCTION_BODIES[i]
         print "}"
-    print ""
-
-        # Step 3.1: Post-process function bodies to replace recursive calls
-        print "# --- Post-processed User Functions ---"
-        for (i = 0; i < lambda_counter; i++) {
-            # Get the original function body
-            original_body = generated_user_functions[i]
-            
-            # Replace recursive calls in the function body
-            processed_body = original_body
-            for (func_name in RAWK_DISPATCH) {
-                metadata = RAWK_DISPATCH[func_name]
-                split(metadata, parts, "|")
-                internal_name = parts[1]
-                gsub(func_name "\\(", internal_name "(", processed_body)
-            }
-            
-            print processed_body
-            print ""
-        }
+        print ""
     }
-
-    # Step 4: Print the main body of the script.
-    # These are all the lines that were not part of a rawk definition.
-    print "# --- Main Script Body ---"
     
-    # Check if the main script body already contains a BEGIN block
-    has_begin = 0
-    for (i = 1; i <= FNR; i++) {
-        if (i in modified_source_lines) {
-            if (modified_source_lines[i] ~ /^[ \t]*BEGIN[ \t]*\{/) {
-                has_begin = 1
-                break
-            }
-        }
-    }
+    # =============================================================================
+    # MAIN SCRIPT SECTION: Original code excluding RAWK block
+    # =============================================================================
+    print "# --- Main Script ---"
     
-    if (has_begin) {
-        # If there's already a BEGIN block, just print the lines as-is
-        for (i = 1; i <= FNR; i++) {
-            if (i in modified_source_lines) {
-                print modified_source_lines[i]
-            }
-        }
-    } else {
-        # If there's no BEGIN block, wrap in one
-        print "BEGIN {"
-        for (i = 1; i <= FNR; i++) {
-            if (i in modified_source_lines) {
-                print "    " modified_source_lines[i]
-            }
+    # Output all lines except those within the RAWK block
+    for (i = 1; i <= line_count; i++) {
+        if (i < rawk_block_start || i > rawk_block_end) {
+            print lines[i]
         }
-        print "}"
     }
-}
\ No newline at end of file
+    
+    # =============================================================================
+    # COMPILATION SUMMARY: Metadata about the compilation process
+    # =============================================================================
+    print ""
+    print "# Rawk compilation summary:"
+    print "#   - Rawk Version: " RAWK_VERSION
+    print "#   - Functions defined: " function_count
+    print "#   - Source lines: " line_count
+    print "#   - Standard library functions included: " length(USED_STDLIB_FUNCTIONS)
+} 
\ No newline at end of file