#!/usr/bin/env awk -f

# rawk.awk

# Author: @eli_oat
# License: Public Domain
# Version: 
RAWK_VERSION = "0.0.1"

# Lets help awk rawk
#
# This script translates a `.rawk` source file into standard, portable awk code.
# It uses a two-stage compilation approach for robustness and simplicity.
#
# This script is implemented in awk, and should work with any POSIX awk.
#
# USAGE:
#   # Two-stage compilation (recommended)
#   awk -f rawk.awk my_program.rawk > my_program.awk
#   awk -f my_program.awk
#
#   # One-step compilation and execution
#   awk -f rawk.awk my_program.rawk | awk -f -
#
# EXAMPLES:
#   # Basic usage - compile and run
#   awk -f rawk.awk hello.rawk | awk -f -
#
#   # Compile to rawk to an awk file for later use
#   awk -f rawk.awk hello.rawk > hello.awk
#   awk -f hello.awk
#
#   # Process input data
#   awk -f rawk.awk processor.rawk | awk -f - input.txt
#
# COMPILATION PROCESS:
#   1. Parse rawk syntax and validate
#   2. Generate standard AWK code
#   3. Output generated code to stdout
#   4. Output errors/warnings to stderr
#   5. Exit with appropriate code (0=success, 1=error)
#
# -----------------------------------------------------------------------------
# LANGUAGE FEATURES
# -----------------------------------------------------------------------------

# 1. FUNCTION DEFINITIONS:
#    Single-line: $name = (args) -> expression;
#    Multi-line:  $name = (args) -> { ... };
#
#    SYNTAX RULES:
#    - Each function definition must be on its own line
#    - No code allowed after function definitions on the same line
#    - Single-line functions must end with semicolon
#    - Multi-line functions must not end with semicolon
#
#    Examples:
#    $add = (x, y) -> x + y;
#    $greet = (name) -> "Hello, " name;
#    $calculate = (width, height) -> {
#        area = width * height
#        return area
#    };
#
#    ❌ Invalid (multiple functions on one line):
#    $add = (x, y) -> x + y; $multiply = (a, b) -> a * b;
#
#    ❌ Invalid (code after function):
#    $add = (x, y) -> x + y; print "hello";
#
#    ❌ Invalid (missing semicolon):
#    $add = (x, y) -> x + y
#
#    ❌ Invalid (extra semicolon):
#    $calculate = (w, h) -> { return w * h };
#
# 2. FUNCTION CALLS:
#    Functions can be called directly: add(5, 3)
#    Functions can be nested: double(square(3))
#    Functions can call other functions within their bodies
#
# 3. STANDARD LIBRARY:
#
#    ARRAY UTILITIES:
#    - keys(array): Returns count of keys in array
#    - values(array): Returns count of values in array
#    - get_keys(array, result): Populates result array with keys
#    - get_values(array, result): Populates result array with values
#
#    FUNCTIONAL PROGRAMMING:
#    - map(func_name, array, result): Apply function to each element of array
#    - reduce(func_name, array, initial): Reduce array using function (left fold)
#    - pipe(value, func_name): Pipe value through a single function
#    - pipe_multi(value, func_names): Pipe value through multiple functions
#    - dispatch_call(func_name, arg1, arg2, ...): Dynamic function dispatch
#
#    ENHANCED ARRAY UTILITIES:
#    - filter(predicate_func, array, result): Filter array elements based on predicate
#    - find(predicate_func, array): Find first element that matches predicate
#    - findIndex(predicate_func, array): Find index of first element that matches predicate
#    - flatMap(func_name, array, result): Apply function to each element and flatten result
#    - take(count, array, result): Take first n elements from array
#    - drop(count, array, result): Drop first n elements from array
#
#    TESTING FUNCTIONS:
#    - assert(condition, message): Asserts a condition is true
#    - expect_equal(actual, expected, message): Asserts actual equals expected
#    - expect_true(condition, message): Asserts condition is true
#    - expect_false(condition, message): Asserts condition is false
#
#    PREDICATE FUNCTIONS:
#    - is_number(value), is_string(value), is_array(value)
#    - is_positive(value), is_negative(value), is_zero(value)
#    - is_integer(value), is_float(value), is_boolean(value)
#    - is_even(value), is_odd(value), is_prime(value)
#    - is_whitespace(value), is_uppercase(value), is_lowercase(value)
#    - is_email(value), is_url(value), is_ipv4(value), is_ipv6(value)
#    - is_uuid(value), is_hex(value), is_csv(value), is_tsv(value)
#    - is_palindrome(value), is_length(value, target_length)
#    - http_is_redirect(status), http_is_client_error(status), http_is_server_error(status)
#    - http_is_get(method), http_is_post(method), http_is_safe_method(method), http_is_mutating_method(method)
#    - url_is_static_file(url), url_has_query_params(url), url_is_root_path(url)
#    - user_agent_is_mobile(user_agent), user_agent_is_desktop(user_agent), user_agent_is_browser(user_agent)
#    - ip_is_local(ip), ip_is_public(ip), ip_is_ipv4(ip), ip_is_ipv6(ip)
#
# 4. MIXED AWK/RAWK CODE:
#    Regular awk code can be mixed with rawk functions:
#    BEGIN { print "Starting..." }
#    $process = (line) -> "Processed: " line;
#    { print process($0) }
#    END { print "Done." }
#
# -----------------------------------------------------------------------------
# ARCHITECTURE AND TECHNICAL MISCELLANY
# -----------------------------------------------------------------------------

# 1. Parse: Extract rawk function definitions using `->` symbol
# 2. Generate: Create internal awk functions with unique names (`__lambda_0`, etc.)
# 3. Dispatch: Build dispatch table mapping public names to internal names
# 4. Replace: Replace function calls with internal names in source code
# 5. Output: Generate final awk script with standard library and user code
#
# GENERATED CODE STRUCTURE:
# - Standard library functions (predicates, utilities, testing)
# - Dispatch table (BEGIN block with RAWK_DISPATCH array)
# - Internal function definitions (__lambda_0, __lambda_1, etc.)
# - Main script body (user code with function calls replaced)
#
# LIMITATIONS:
# - Function names must be valid awk identifiers
# - Array returns from functions are not supported (use pass-by-reference)
# - Array iteration order is not guaranteed (AWK limitation)
# - Dynamic dispatch limited to functions defined at compile time
# - Maximum 5 arguments per function (dispatch table limitation)
#
# ERROR HANDLING:
# - Invalid syntax generates descriptive error messages with context
# - Missing functions are reported at runtime with helpful suggestions
# - Argument count mismatches are detected with detailed information
# - Source line correlation for better debugging
#
# PORTABILITY:
# - Output is compatible with standard awk (nawk, BSD awk)
# - Avoids gawk-specific features
# - Uses only standard awk constructs and functions
#
# -----------------------------------------------------------------------------

# Global state for multi-pass compilation
BEGIN {
    # --- Compiler State Initialization ---
    
    # Function collection arrays
    delete FUNCTION_NAMES
    delete FUNCTION_ARGS
    delete FUNCTION_BODIES
    delete FUNCTION_TYPES  # "single" or "multi"
    delete FUNCTION_LINES  # source line numbers
    
    # Counters
    function_count = 0
    line_count = 0
    
    # State tracking
    in_function_body = 0
    brace_count = 0
    in_function_def = 0  # Track if we're in a function definition context
    
    # Source lines for pass 2
    delete SOURCE_LINES
    delete SOURCE_LINE_TYPES  # "function_def", "function_body", "code"
    
    # State tracking for multi-line function definitions
    in_function_body = 0
    current_function_index = 0
    
    # Enhanced error tracking
    error_count = 0
    warning_count = 0
    
    # Compilation statistics
    functions_defined = 0
    source_lines = 0
    errors = 0
    warnings = 0
    
    # Syntax validation state
    validation_mode = 0  # 0 = normal compilation, 1 = syntax validation only
}

# -----------------------------------------------------------------------------
# MAIN PROCESSING: Parse and collect function definitions
# -----------------------------------------------------------------------------

{
    line_count++
    
    # Skip comments and empty lines
    if ($0 ~ /^[ \t]*#/ || $0 ~ /^[ \t]*$/) {
        next
    }
    
    # Pattern: Multi-line function definition start (the only allowed form)
    if ($0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*\{/) {
        in_function_def = 1
        parse_multi_line_function($0, line_count)
        next  # Do not add function definition line to main_script_lines
    }
    
    # Validate: Only allow function definitions with { ... }
    if ($0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*[^\{]/) {
        report_validation_error("Function definitions must use braces: -> { ... }", line_count, $0, "Use: $name = (args) -> { ... }")
        next
    }
    
    # Pattern: Multi-line function body continuation
    if (in_function_body) {
        # Count opening and closing braces
        open_braces = gsub(/\{/, "&", $0)
        close_braces = gsub(/\}/, "&", $0)
        
        if (close_braces > 0 && brace_count <= 1) {
            # End of function body
            in_function_body = 0
            in_function_def = 0
            next
        } else {
            # Update brace count
            brace_count += open_braces - close_braces
            
            # Add line to current function body
            FUNCTION_BODIES[current_function_index] = FUNCTION_BODIES[current_function_index] "\n    " $0
            next
        }
    }
    
    # Pattern: Start of multi-line function body, but only if not already in a function body
    if (!in_function_body && in_function_def && $0 ~ /^[ \t]*\{/) {
        in_function_body = 1
        brace_count = 1
        next
    }
    
    # Pattern: Regular code - collect for main script
    if (!in_function_body && !($0 ~ /^[ \t]*\$/ && $0 ~ /->/)) {
        main_script_lines[++main_script_count] = $0
    }
    
    # Unconditional next to suppress AWK's default printing
    next
}

# -----------------------------------------------------------------------------
# HELPER FUNCTIONS
# -----------------------------------------------------------------------------

# First-pass syntax validation for each line
function validate_line_syntax(line, line_num) {
    # Check for multiple functions on one line
    if (gsub(/\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->/, "FUNC") > 1) {
        report_validation_error("Multiple function definitions on one line", line_num, line, "Put each function on its own line")
        return
    }
    
    # Check for code after function definition on the same line
    if (line ~ /^\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*[^;{]*;[ \t]*[^ \t]/) {
        report_validation_error("Code after function definition on same line", line_num, line, "Put function definition on its own line")
        return
    }
    
    # Check for single-line functions missing semicolons
    if (line ~ /^\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*[^;{]*$/) {
        report_validation_error("Single-line function definition missing semicolon", line_num, line, "Add semicolon: " line ";")
        return
    }
    
    # Check for invalid function names
    if (line ~ /^\$[0-9]/) {
        report_validation_error("Function name cannot start with a number", line_num, line, "Use a letter or underscore: \$func_name = ...")
        return
    }
    
    # Check for missing arrow operator
    if (line ~ /^\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*[^-]/ && line !~ /->/) {
        report_validation_error("Function definition missing arrow operator (->)", line_num, line, "Add arrow: \$func = (args) -> expression")
        return
    }
    
    # Check for multi-line functions with semicolon after closing brace
    if (line ~ /^\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*\{[ \t]*\}[ \t]*;[ \t]*$/) {
        report_validation_error("Multi-line function should not end with semicolon", line_num, line, "Remove semicolon after closing brace")
        return
    }
    
    # Check for standard AWK function syntax
    if (line ~ /^function[ \t]+[a-zA-Z_][a-zA-Z0-9_]*[ \t]*\(/) {
        report_validation_warning("Standard AWK function syntax detected", line_num, line, "Use rawk syntax: \$func = (args) -> ...")
        return
    }
}

# Parse multi-line function definition
function parse_multi_line_function(line, line_num) {
    # Extract function name
    if (match(line, /\$([a-zA-Z_][a-zA-Z0-9_]*)/)) {
        func_name = substr(line, RSTART + 1, RLENGTH - 1)
    } else {
        report_error("Invalid function name", line_num, line, "Function name must be a valid identifier")
        return
    }
    
    # Extract arguments
    if (match(line, /\(([^)]*)\)/)) {
        args = substr(line, RSTART + 1, RLENGTH - 2)
    } else {
        report_error("Invalid argument list", line_num, line, "Arguments must be enclosed in parentheses")
        return
    }
    
    # Store function information
    function_count++
    current_function_index = function_count
    FUNCTION_NAMES[function_count] = func_name
    FUNCTION_ARGS[function_count] = args
    FUNCTION_BODIES[function_count] = ""
    FUNCTION_TYPES[function_count] = "multi"
    FUNCTION_LINES[function_count] = line_num
    
    # Start collecting function body (the opening brace is already on this line)
    in_function_body = 1
    brace_count = 1  # Start with 1 for the opening brace
    
    functions_defined++
}

# Parse single-line function definition
function parse_single_line_function(line, line_num) {
    # Extract function name
    if (match(line, /\$([a-zA-Z_][a-zA-Z0-9_]*)/)) {
        func_name = substr(line, RSTART + 1, RLENGTH - 1)
    } else {
        report_error("Invalid function name", line_num, line, "Function name must be a valid identifier")
        return
    }
    
    # Extract arguments
    if (match(line, /\(([^)]*)\)/)) {
        args = substr(line, RSTART + 1, RLENGTH - 2)
    } else {
        report_error("Invalid argument list", line_num, line, "Arguments must be enclosed in parentheses")
        return
    }
    
    # Extract body. which we enforce as everything after -> until a semicolon
    if (match(line, /->[ \t]*(.+?);/)) {
        body = substr(line, RSTART + 2, RLENGTH - 3)  # Remove -> and ;
        # Trim whitespace
        gsub(/^[ \t]+|[ \t]+$/, "", body)
    } else {
        report_error("Invalid function body", line_num, line, "Function body must follow '->' and end with ';'")
        return
    }
    
    # Store function information
    function_count++
    FUNCTION_NAMES[function_count] = func_name
    FUNCTION_ARGS[function_count] = args
    FUNCTION_BODIES[function_count] = body
    FUNCTION_TYPES[function_count] = "single"
    FUNCTION_LINES[function_count] = line_num
    
    functions_defined++
}

# Generate standard library functions
# FIXME: in the future, we should only generate the functions that are actually used
# TODO: track which functions are used/referenced
function generate_standard_library() {
    print "# --- rawk Standard Library ---"
    print "# Dispatch mechanism for rawk functions"
    print "function dispatch_call(func_name, arg1, arg2, arg3, arg4, arg5, metadata, parts, internal_name, arg_count) {"
    print "    if (!(func_name in RAWK_DISPATCH)) {"
    print "        print \"Error: Function '\" func_name \"' not found\" > \"/dev/stderr\""
    print "        return"
    print "    }"
    print "    metadata = RAWK_DISPATCH[func_name]"
    print "    split(metadata, parts, \"|\")"
    print "    internal_name = parts[1]"
    print "    arg_count = parts[2]"
    print "    "
    print "    # Switch statement dispatch based on internal function name"
    for (i = 1; i <= function_count; i++) {
        internal_name = "__lambda_" (i - 1)
        arg_count = split(FUNCTION_ARGS[i], args_array, ",")
        print "    if (internal_name == \"" internal_name "\") {"
        if (arg_count == 0) {
            print "        if (arg_count == 0) return " internal_name "()"
        } else if (arg_count == 1) {
            print "        if (arg_count == 1) return " internal_name "(arg1)"
        } else if (arg_count == 2) {
            print "        if (arg_count == 2) return " internal_name "(arg1, arg2)"
        } else if (arg_count == 3) {
            print "        if (arg_count == 3) return " internal_name "(arg1, arg2, arg3)"
        } else if (arg_count == 4) {
            print "        if (arg_count == 4) return " internal_name "(arg1, arg2, arg3, arg4)"
        } else if (arg_count == 5) {
            print "        if (arg_count == 5) return " internal_name "(arg1, arg2, arg3, arg4, arg5)"
        } else {
            print "        print \"Error: Function '\" func_name \"' has too many arguments (\" arg_count \")\" > \"/dev/stderr\""
            print "        return"
        }
        print "    }"
    }
    print "    "
    print "    print \"Error: Invalid argument count for function '\" func_name \"'\" > \"/dev/stderr\""
    print "    return"
    print "}"
    print ""
    
    print "# --- Predicate Functions ---"
    print "# Type checking and validation functions"
    print ""
    print "function is_number(value) {"
    print "    # Check if value is a number (including 0)"
    print "    return value == value + 0"
    print "}"
    print ""
    print "function is_string(value) {"
    print "    # Check if value is a string (not a number)"
    print "    # In AWK, string numbers like \"123\" are both strings and numbers"
    print "    # So we check if it's NOT a number to determine if it's a pure string"
    print "    return !(value == value + 0)"
    print "}"
    print ""
    print "function assert(condition, message) {"
    print "    if (!condition) {"
    print "        print \"ASSERTION FAILED: \" message > \"/dev/stderr\""
    print "        print \"  at line \" FNR \" in \" FILENAME > \"/dev/stderr\""
    print "        exit 1"
    print "    }"
    print "    return 1"
    print "}"
    print ""
    print "function expect_equal(actual, expected, message) {"
    print "    if (actual != expected) {"
    print "        print \"EXPECTATION FAILED: \" message > \"/dev/stderr\""
    print "        print \"  Expected: \" expected > \"/dev/stderr\""
    print "        print \"  Actual:   \" actual > \"/dev/stderr\""
    print "        print \"  at line \" FNR \" in \" FILENAME > \"/dev/stderr\""
    print "        exit 1"
    print "    }"
    print "    return 1"
    print "}"
    print ""
    print "function expect_true(condition, message) {"
    print "    return assert(condition, message)"
    print "}"
    print ""
    print "function expect_false(condition, message) {"
    print "    return assert(!condition, message)"
    print "}"
    print ""
    print "function is_positive(value) {"
    print "    # Check if value is a positive number"
    print "    return is_number(value) && value > 0"
    print "}"
    print ""
    print "function is_negative(value) {"
    print "    # Check if value is a negative number"
    print "    return is_number(value) && value < 0"
    print "}"
    print ""
    print "function is_zero(value) {"
    print "    # Check if value is zero"
    print "    return is_number(value) && value == 0"
    print "}"
    print ""
    print "function is_integer(value) {"
    print "    # Check if value is an integer"
    print "    return is_number(value) && int(value) == value"
    print "}"
    print ""
    print "function is_float(value) {"
    print "    # Check if value is a floating point number"
    print "    return is_number(value) && int(value) != value"
    print "}"
    print ""
    print "function is_boolean(value) {"
    print "    # Check if value is a boolean (0 or 1)"
    print "    return value == 0 || value == 1"
    print "}"
    print ""
    print "function is_truthy(value) {"
    print "    # Check if value is truthy (non-zero, non-empty)"
    print "    if (is_number(value)) return value != 0"
    print "    if (is_string(value)) return value != \"\""
    print "    return 0"
    print "}"
    print ""
    print "function is_falsy(value) {"
    print "    # Check if value is falsy (zero, empty string)"
    print "    return !is_truthy(value)"
    print "}"
    print ""
    print "function is_empty(value) {"
    print "    # Check if value is empty (empty string, 0)"
    print "    if (value == \"\") return 1"
    print "    if (value == 0) return 1"
    print "    return 0"
    print "}"
    print ""
    print "function is_email(value) {"
    print "    # Simple email validation"
    print "    if (value == \"\") return 0"
    print "    # Must contain exactly one @ symbol"
    print "    at_count = 0"
    print "    for (i = 1; i <= length(value); i++) {"
    print "        if (substr(value, i, 1) == \"@\") at_count++"
    print "    }"
    print "    if (at_count != 1) return 0"
    print "    # Split into local and domain parts"
    print "    split(value, parts, \"@\")"
    print "    local_part = parts[1]"
    print "    domain_part = parts[2]"
    print "    # Local and domain parts must not be empty"
    print "    if (length(local_part) == 0 || length(domain_part) == 0) return 0"
    print "    # Basic local part validation: no spaces"
    print "    if (local_part ~ /[ ]/) return 0"
    print "    # Domain part validation"
    print "    if (index(domain_part, \".\") == 0) return 0"
    print "    return 1"
    print "}"
    print ""
    print "function is_url(value) {"
    print "    # Enhanced URL validation with multiple protocols"
    print "    if (!is_string(value)) return 0"
    print "    if (value == \"\") return 0"
    print "    # Check for common URL schemes"
    print "    if (value ~ /^(https|http|ftp|ftps|mailto|tel):(\\/\\/)?([a-zA-Z0-9\\.-]+)(:[0-9]+)?(\\/.*)?(\\?.*)?$/) {"
    print "        # Extra check for http/https/ftp to ensure they have slashes"
            print "        if ((value ~ /^http/ || value ~ /^ftp/) && value !~ /:\\/\\//) return 0"
    print "        return 1"
    print "    }"
    print "    return 0"
    print "}"
    print ""
    print "function is_ipv4(value) {"
    print "    # Basic IPv4 validation"
    print "    if (!is_string(value)) return 0"
    print "    if (value == \"\") return 0"
    print "    # Split by dots and check each octet"
    print "    split(value, octets, \".\")"
    print "    if (length(octets) != 4) return 0"
    print "    for (i = 1; i <= 4; i++) {"
    print "        if (!is_number(octets[i])) return 0"
    print "        if (octets[i] < 0 || octets[i] > 255) return 0"
    print "    }"
    print "    return 1"
    print "}"
    print ""
    print "function is_ipv6(value) {"
    print "    # Enhanced IPv6 validation with interface identifiers"
    print "    if (!is_string(value)) return 0"
    print "    if (value == \"\") return 0"
    print "    # Handle optional interface identifier (e.g., %eth0)"
    print "    addr = value"
    print "    if (index(addr, \"%\") > 0) {"
    print "        split(addr, parts, \"%\")"
    print "        addr = parts[1]"
    print "    }"
    print "    # An IPv6 address cannot contain more than one \"::\""
    print "    if (gsub(/::/, \"&\") > 1) return 0"
    print "    # Check for invalid trailing colon"
    print "    if (substr(addr, length(addr)) == \":\" && substr(addr, length(addr) - 1) != \"::\") return 0"
    print "    has_trailing_colon = (substr(addr, length(addr) - 1) == \"::\")"
    print "    num_parts = split(addr, parts, \":\")"
    print "    empty_found = (addr ~ /::/)"
    print "    total_segments = num_parts"
    print "    if (has_trailing_colon) total_segments--"
    print "    for (i = 1; i <= num_parts; i++) {"
    print "        if (length(parts[i]) == 0) continue  # Part of :: compression"
    print "        # Each segment must be valid hex between 1 and 4 characters"
    print "        if (parts[i] !~ /^[0-9a-fA-F]{1,4}$/) return 0"
    print "    }"
    print "    if (empty_found) {"
    print "        if (total_segments > 7) return 0"
    print "    } else {"
    print "        if (total_segments != 8) return 0"
    print "    }"
    print "    return 1"
    print "}"
    print ""
    print "function is_uuid(value) {"
    print "    # UUID validation (comprehensive format support)"
    print "    if (!is_string(value)) return 0"
    print "    if (value == \"\") return 0"
    print "    # Pattern 1: Standard hyphenated UUID"
    print "    if (value ~ /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/) return 1"
    print "    # Pattern 2: UUID with no hyphens (32 hex characters)"
    print "    if (value ~ /^[0-9a-fA-F]{32}$/) return 1"
    print "    # Pattern 3: URN-formatted UUID"
    print "    if (value ~ /^urn:uuid:[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/) return 1"
    print "    return 0"
    print "}"
    print ""
    print "function is_alpha(value) {"
    print "    # Check if string contains only alphabetic characters"
    print "    if (!is_string(value)) return 0"
    print "    if (value == \"\") return 0"
    print "    # Remove all alphabetic characters and check if empty"
    print "    gsub(/[a-zA-Z]/, \"\", value)"
    print "    return value == \"\""
    print "}"
    print ""
    print "function is_numeric(value) {"
    print "    # Check if string contains only numeric characters"
    print "    if (value == \"\") return 0"
    print "    # Convert to string and check if it contains only digits"
    print "    str_value = value \"\""
    print "    # Remove all numeric characters and check if empty"
    print "    gsub(/[0-9]/, \"\", str_value)"
    print "    return str_value == \"\""
    print "}"
    print ""
    print "function is_alphanumeric(value) {"
    print "    # Check if string contains only alphanumeric characters"
    print "    if (!is_string(value)) return 0"
    print "    if (value == \"\") return 0"
    print "    # Remove all alphanumeric characters and check if empty"
    print "    gsub(/[a-zA-Z0-9]/, \"\", value)"
    print "    return value == \"\""
    print "}"
    print ""
    print "function is_palindrome(value) {"
    print "    # Enhanced palindrome detection with better whitespace handling"
    print "    if (!is_string(value)) return 0"
    print "    if (value == \"\") return 1"
    print "    # Clean string: lowercase and remove non-alphanumeric characters"
    print "    clean_str = tolower(value)"
    print "    gsub(/[^a-z0-9]/, \"\", clean_str)"
    print "    len = length(clean_str)"
    print "    if (len == 0) return 1  # Empty string after cleaning is a palindrome"
    print "    # Check if it reads the same forwards and backwards"
    print "    for (i = 1; i <= len / 2; i++) {"
    print "        if (substr(clean_str, i, 1) != substr(clean_str, len - i + 1, 1)) return 0"
    print "    }"
    print "    return 1"
    print "}"
    print ""
    print "function is_in_range(value, min, max) {"
    print "    # Check if number is within range [min, max]"
    print "    return is_number(value) && value >= min && value <= max"
    print "}"
    print ""
    print "function is_even(value) {"
    print "    # Check if number is even"
    print "    return is_number(value) && value % 2 == 0"
    print "}"
    print ""
    print "function is_odd(value) {"
    print "    # Check if number is odd"
    print "    return is_number(value) && value % 2 != 0"
    print "}"
    print ""
    print "function is_prime(value) {"
    print "    # Check if number is prime"
    print "    if (!is_number(value) || value < 2) return 0"
    print "    if (value == 2) return 1"
    print "    if (value % 2 == 0) return 0"
    print "    for (i = 3; i * i <= value; i += 2) {"
    print "        if (value % i == 0) return 0"
    print "    }"
    print "    return 1"
    print "}"
    print ""
    print "function is_whitespace(value) {"
    print "    # Check if string is whitespace"
    print "    if (!is_string(value)) return 0"
    print "    if (value == \"\") return 0"
    print "    return value ~ /^[ \\t\\n\\r]+$/"
    print "}"
    print ""
    print "function is_uppercase(value) {"
    print "    # Check if string is uppercase"
    print "    if (!is_string(value)) return 0"
    print "    if (value == \"\") return 0"
    print "    return value ~ /^[A-Z]+$/"
    print "}"
    print ""
    print "function is_lowercase(value) {"
    print "    # Check if string is lowercase"
    print "    if (!is_string(value)) return 0"
    print "    if (value == \"\") return 0"
    print "    return value ~ /^[a-z]+$/"
    print "}"
    print ""
    print "function is_length(value, target_length) {"
    print "    # Check if string/array has specific length"
    print "    if (is_string(value)) {"
    print "        return length(value) == target_length"
    print "    } else {"
    print "        # For arrays, count the elements"
    print "        count = 0"
    print "        for (i in value) count++"
    print "        return count == target_length"
    print "    }"
    print "}"
    print ""
    print "function is_array(value) {"
    print "    # Check if value is an array (limited detection)"
    print "    # This is a heuristic - we check if it has any elements"
    print "    # Note: This function has limitations due to AWK's array handling"
    print "    count = 0"
    print "    for (i in value) {"
    print "        count++"
    print "        break  # Just need to find one element"
    print "    }"
    print "    return count > 0"
    print "}"
    print ""
    print "function is_hex(value) {"
    print "    # Enhanced hex validation with optional prefixes"
    print "    if (!is_string(value)) return 0"
    print "    if (value == \"\") return 0"
    print "    # Strip optional prefixes"
    print "    test_str = value"
    print "    if (substr(test_str, 1, 2) == \"0x\" || substr(test_str, 1, 2) == \"0X\") {"
    print "        test_str = substr(test_str, 3)"
    print "    } else if (substr(test_str, 1, 1) == \"#\") {"
    print "        test_str = substr(test_str, 2)"
    print "    }"
    print "    if (length(test_str) == 0) return 0  # Prefix only is not valid"
    print "    return (test_str ~ /^[0-9a-fA-F]+$/) ? 1 : 0"
    print "}"
    print ""
    print "function is_csv(value, _fs_orig, _nf_orig, _comma_count, _quote_count) {"
    print "    # Check if string appears to be CSV format (robust version)"
    print "    if (!is_string(value)) return 0"
    print "    if (value == \"\") return 0"
    print "    # Heuristic 1: Must contain at least one comma"
    print "    if (index(value, \",\") == 0) return 0"
    print "    # Heuristic 2: Should have an even number of double quotes"
    print "    _quote_count = gsub(/\"/, \"&\", value)"
    print "    if (_quote_count % 2 != 0) return 0"
    print "    # Heuristic 3: When split by comma, should result in more than one field"
    print "    _fs_orig = FS"
    print "    _nf_orig = NF"
    print "    FS = \",\""
    print "    $0 = value"
    print "    _comma_count = NF"
    print "    # Restore original state"
    print "    FS = _fs_orig"
    print "    $0 = $0"
    print "    return (_comma_count > 1) ? 1 : 0"
    print "}"
    print ""
    print "function is_tsv(value, _fs_orig, _nf_orig, _tab_count) {"
    print "    # Check if string appears to be TSV format (robust version)"
    print "    if (!is_string(value)) return 0"
    print "    if (value == \"\") return 0"
    print "    # Heuristic 1: Must contain at least one tab character"
    print "    if (index(value, \"\\t\") == 0) return 0"
    print "    # Heuristic 2: When split by tab, should result in more than one field"
    print "    _fs_orig = FS"
    print "    _nf_orig = NF"
    print "    FS = \"\\t\""
    print "    $0 = value"
    print "    _tab_count = NF"
    print "    # Restore original state"
    print "    FS = _fs_orig"
    print "    $0 = $0"
    print "    return (_tab_count > 1) ? 1 : 0"
    print "}"
    print ""
    print "# --- HTTP Status Code Predicates ---"
    print "function http_is_redirect(status) {"
    print "    # Check if HTTP status code indicates a redirect (3xx)"
    print "    return is_number(status) && status >= 300 && status < 400"
    print "}"
    print ""
    print "function http_is_client_error(status) {"
    print "    # Check if HTTP status code indicates a client error (4xx)"
    print "    return is_number(status) && status >= 400 && status < 500"
    print "}"
    print ""
    print "function http_is_server_error(status) {"
    print "    # Check if HTTP status code indicates a server error (5xx)"
    print "    return is_number(status) && status >= 500 && status < 600"
    print "}"
    print ""
    print "# --- HTTP Method Predicates ---"
    print "function http_is_get(method) {"
    print "    # Check if HTTP method is GET"
    print "    return is_string(method) && method == \"GET\""
    print "}"
    print ""
    print "function http_is_post(method) {"
    print "    # Check if HTTP method is POST"
    print "    return is_string(method) && method == \"POST\""
    print "}"
    print ""
    print "function http_is_safe_method(method) {"
    print "    # Check if HTTP method is safe (GET, HEAD)"
    print "    return is_string(method) && (method == \"GET\" || method == \"HEAD\")"
    print "}"
    print ""
    print "function http_is_mutating_method(method) {"
    print "    # Check if HTTP method can mutate server state (POST, PUT, DELETE, PATCH)"
    print "    return is_string(method) && (method == \"POST\" || method == \"PUT\" || method == \"DELETE\" || method == \"PATCH\")"
    print "}"
    print ""
    print "# --- URL/Path Predicates ---"
    print "function url_is_static_file(url) {"
    print "    # Check if URL points to a static file (CSS, JS, images, etc.)"
    print "    if (!is_string(url)) return 0"
    print "    return index(url, \".css\") > 0 || index(url, \".js\") > 0 || index(url, \".png\") > 0 || index(url, \".jpg\") > 0 || index(url, \".jpeg\") > 0 || index(url, \".gif\") > 0 || index(url, \".svg\") > 0 || index(url, \".ico\") > 0 || index(url, \".woff\") > 0 || index(url, \".woff2\") > 0"
    print "}"
    print ""
    print "function url_has_query_params(url) {"
    print "    # Check if URL contains query parameters"
    print "    return is_string(url) && index(url, \"?\") > 0"
    print "}"
    print ""
    print "function url_is_root_path(url) {"
    print "    # Check if URL is the root path"
    print "    return is_string(url) && (url == \"/\" || url == \"\")"
    print "}"
    print ""
    print "# --- User Agent Predicates ---"
    print "function user_agent_is_mobile(user_agent) {"
    print "    # Check if user agent indicates a mobile device"
    print "    if (!is_string(user_agent)) return 0"
    print "    return index(user_agent, \"Mobile\") > 0 || index(user_agent, \"iPhone\") > 0 || index(user_agent, \"Android\") > 0 || index(user_agent, \"iPad\") > 0"
    print "}"
    print ""
    print "function user_agent_is_desktop(user_agent) {"
    print "    # Check if user agent indicates a desktop device"
    print "    if (!is_string(user_agent)) return 0"
    print "    # Check for desktop OS indicators, but exclude mobile Linux (Android)"
    print "    return (index(user_agent, \"Windows\") > 0 || index(user_agent, \"Macintosh\") > 0 || (index(user_agent, \"Linux\") > 0 && index(user_agent, \"Android\") == 0))"
    print "}"
    print ""
    print "function is_bot(user_agent) {"
    print "    # Check if user agent indicates a bot/crawler"
    print "    if (!is_string(user_agent)) return 0"
    print "    return index(user_agent, \"bot\") > 0 || index(user_agent, \"crawler\") > 0 || index(user_agent, \"spider\") > 0 || index(user_agent, \"Googlebot\") > 0 || index(user_agent, \"Bingbot\") > 0"
    print "}"
    print ""
    print "function user_agent_is_browser(user_agent) {"
    print "    # Check if user agent indicates a web browser (not a bot)"
    print "    if (!is_string(user_agent)) return 0"
    print "    return index(user_agent, \"Mozilla\") > 0 && !is_bot(user_agent)"
    print "}"
    print ""
    print "# --- IP Address Predicates ---"
    print "function ip_is_local(ip) {"
    print "    # Check if IP address is local/private"
    print "    if (!is_string(ip)) return 0"
    print "    return index(ip, \"127.0.0.1\") > 0 || index(ip, \"192.168.\") > 0 || index(ip, \"10.\") > 0 || index(ip, \"172.\") > 0"
    print "}"
    print ""
    print "function ip_is_public(ip) {"
    print "    # Check if IP address is public (not local)"
    print "    return !ip_is_local(ip)"
    print "}"
    print ""
    print "function ip_is_ipv4(ip) {"
    print "    # Check if IP address is IPv4 format"
    print "    return is_string(ip) && ip ~ /^[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+$/"
    print "}"
    print ""
    print "function ip_is_ipv6(ip) {"
    print "    # Check if IP address is IPv6 format"
    print "    return is_string(ip) && ip ~ /^[0-9a-fA-F:]+$/"
    print "}"
    print ""
    print "# --- Array Utility Functions ---"
    print ""
    print "function keys(array, count, i) {"
    print "    # Returns count of keys in array"
    print "    count = 0"
    print "    for (i in array) count++"
    print "    return count"
    print "}"
    print ""
    print "function values(array, count, i) {"
    print "    # Returns count of values in array"
    print "    count = 0"
    print "    for (i in array) count++"
    print "    return count"
    print "}"
    print ""
    print "function get_keys(array, result, i, count) {"
    print "    # Populates result array with keys"
    print "    count = 0"
    print "    for (i in array) {"
    print "        result[++count] = i"
    print "    }"
    print "    return count"
    print "}"
    print ""
    print "function get_values(array, result, i, count) {"
    print "    # Populates result array with values"
    print "    count = 0"
    print "    for (i in array) {"
    print "        result[++count] = array[i]"
    print "    }"
    print "    return count"
    print "}"
    print ""
    print "# --- Functional Programming Functions ---"
    print ""
    print "function map(func_name, array, result, i) {"
    print "    # Apply function to each element of array, preserving indices"
    print "    for (i in array) {"
    print "        result[i] = dispatch_call(func_name, array[i])"
    print "    }"
    print "    return keys(array)"
    print "}"
    print ""
    print "function reduce(func_name, array, initial, result, i, first) {"
    print "    # Reduce array using function (left fold)"
    print "    result = initial"
    print "    first = 1"
    print "    for (i in array) {"
    print "        if (first) {"
    print "            result = array[i]"
    print "            first = 0"
    print "        } else {"
    print "            result = dispatch_call(func_name, result, array[i])"
    print "        }"
    print "    }"
    print "    return result"
    print "}"
    print ""
    print "function pipe(value, func_name, result) {"
    print "    # Pipe value through a single function (simplified version)"
    print "    result = dispatch_call(func_name, value)"
    print "    return result"
    print "}"
    print ""
    print "function pipe_multi(value, func_names, result, i, func_count) {"
    print "    # Pipe value through multiple functions (func_names is array)"
    print "    result = value"
    print "    func_count = length(func_names)"
    print "    for (i = 1; i <= func_count; i++) {"
    print "        result = dispatch_call(func_names[i], result)"
    print "    }"
    print "    return result"
    print "}"
    print ""
    print "# --- Enhanced Array Utilities ---"
    print ""
    print "function filter(predicate_func, array, result, i, count) {"
    print "    # Filter array elements based on predicate function"
    print "    count = 0"
    print "    for (i in array) {"
    print "        if (dispatch_call(predicate_func, array[i])) {"
    print "            result[++count] = array[i]"
    print "        }"
    print "    }"
    print "    return count"
    print "}"
    print ""
    print "function find(predicate_func, array, i, keys, key_count) {"
    print "    # Find first element that matches predicate"
    print "    key_count = get_keys(array, keys)"
    print "    for (i = 1; i <= key_count; i++) {"
    print "        if (dispatch_call(predicate_func, array[keys[i]])) {"
    print "            return array[keys[i]]"
    print "        }"
    print "    }"
    print "    return \"\"  # Not found"
    print "}"
    print ""
    print "function findIndex(predicate_func, array, i, keys, key_count) {"
    print "    # Find index of first element that matches predicate"
    print "    key_count = get_keys(array, keys)"
    print "    for (i = 1; i <= key_count; i++) {"
    print "        if (dispatch_call(predicate_func, array[keys[i]])) {"
    print "            return i"
    print "        }"
    print "    }"
    print "    return 0  # Not found"
    print "}"
    print ""
    print "function flatMap(func_name, array, result, i, temp_array, temp_count, j) {"
    print "    # Apply function to each element and flatten the result"
    print "    for (i in array) {"
    print "        temp_count = dispatch_call(func_name, array[i], temp_array)"
    print "        for (j = 1; j <= temp_count; j++) {"
    print "            result[keys(result) + 1] = temp_array[j]"
    print "        }"
    print "    }"
    print "    return keys(result)"
    print "}"
    print ""
    print "function take(count, array, result, i, count_taken) {"
    print "    # Take first n elements from array"
    print "    count_taken = 0"
    print "    for (i in array) {"
    print "        if (count_taken >= count) break"
    print "        count_taken++"
    print "        result[count_taken] = array[i]"
    print "    }"
    print "    return count_taken"
    print "}"
    print ""
    print "function drop(count, array, result, i, count_dropped, count_kept) {"
    print "    # Drop first n elements from array"
    print "    count_dropped = 0"
    print "    count_kept = 0"
    print "    for (i in array) {"
    print "        count_dropped++"
    print "        if (count_dropped > count) {"
    print "            count_kept++"
    print "            result[count_kept] = array[i]"
    print "        }"
    print "    }"
    print "    return count_kept"
    print "}"
    print ""
}

# Generate function definitions
function generate_function_definitions() {
    if (function_count == 0) return
    
    print "# --- User Functions ---"
    
    # Build dispatch table
    print "# Dispatch table"
    print "BEGIN {"
    for (i = 1; i <= function_count; i++) {
        internal_name = "__lambda_" (i - 1)
        arg_count = split(FUNCTION_ARGS[i], args_array, ",")
        print "    RAWK_DISPATCH[\"" FUNCTION_NAMES[i] "\"] = \"" internal_name "|" arg_count "|" FUNCTION_LINES[i] "\""
    }
    print "}"
    print ""
    
    # Generate function definitions
    for (i = 1; i <= function_count; i++) {
        internal_name = "__lambda_" (i - 1)
        body = FUNCTION_BODIES[i]
        
        # Replace recursive calls
        for (j = 1; j <= function_count; j++) {
            gsub(FUNCTION_NAMES[j] "\\(", "__lambda_" (j - 1) "(", body)
        }
        
        print "function " internal_name "(" FUNCTION_ARGS[i] ") {"
        if (FUNCTION_TYPES[i] == "single") {
            print "    return " body
        } else {
            print body
        }
        print "}"
        print ""
    }
}

# Generate main script body
function generate_main_script() {
    print "# --- Main Script Body ---"
    
    # Check if there's already a BEGIN block
    has_begin = 0
    for (i = 1; i <= main_script_count; i++) {
        if (main_script_lines[i] ~ /^[ \t]*BEGIN[ \t]*\{/) {
            has_begin = 1
            break
        }
    }
    
    if (has_begin) {
        # Print lines as-is
        for (i = 1; i <= main_script_count; i++) {
            line = main_script_lines[i]
            
            # Replace function calls
            for (j = 1; j <= function_count; j++) {
                gsub(FUNCTION_NAMES[j] "\\(", "__lambda_" (j - 1) "(", line)
            }
            
            print line
        }
    } else {
        # Wrap in BEGIN block
        print "BEGIN {"
        for (i = 1; i <= main_script_count; i++) {
            line = main_script_lines[i]
            
            # Replace function calls
            for (j = 1; j <= function_count; j++) {
                gsub(FUNCTION_NAMES[j] "\\(", "__lambda_" (j - 1) "(", line)
            }
            
            print "    " line
        }
        print "}"
    }
}


function report_validation_error(message, line_num, line, suggestion) {
    print "❌ " message > "/dev/stderr"
    print "   at line " line_num " in " FILENAME > "/dev/stderr"
    print "   context: " line > "/dev/stderr"
    if (suggestion != "") {
        print "   💡 " suggestion > "/dev/stderr"
    }
    print "" > "/dev/stderr"
    validation_errors++
}

function report_validation_warning(message, line_num, line, suggestion) {
    print "⚠️  " message > "/dev/stderr"
    print "   at line " line_num " in " FILENAME > "/dev/stderr"
    print "   context: " line > "/dev/stderr"
    if (suggestion != "") {
        print "   💡 " suggestion > "/dev/stderr"
    }
    print "" > "/dev/stderr"
    validation_warnings++
}

# TODO: think through ways to add more passes to enhance compiler error messages
function report_error(message, line_num, line, suggestion) {
    print "❌ rawk compilation error: " message > "/dev/stderr"
    print "   at line " line_num " in " FILENAME > "/dev/stderr"
    print "   context: " line > "/dev/stderr"
    if (suggestion != "") {
        print "   💡 " suggestion > "/dev/stderr"
    }
    print "" > "/dev/stderr"
    error_count++
    errors++
}

function report_warning(message, line_num, line, suggestion) {
    print "⚠️  rawk compilation warning: " message > "/dev/stderr"
    print "   at line " line_num " in " FILENAME > "/dev/stderr"
    print "   context: " line > "/dev/stderr"
    if (suggestion != "") {
        print "   💡 " suggestion > "/dev/stderr"
    }
    print "" > "/dev/stderr"
    warning_count++
    warnings++
}

# END block to generate final output
END {
    # Check if any validation errors occurred
    if (validation_errors > 0) {
        print "" > "/dev/stderr"
        print "📊 Validation Summary" > "/dev/stderr"
        print "====================" > "/dev/stderr"
        print "Total Lines: " line_count > "/dev/stderr"
        print "Errors: " validation_errors > "/dev/stderr"
        print "Warnings: " validation_warnings > "/dev/stderr"
        print "❌ Syntax validation failed! Exiting without code generation." > "/dev/stderr"
        exit 1
    }
    
    # Generate standard library
    generate_standard_library()
    
    # Generate function definitions
    generate_function_definitions()
    
    # Generate main script body
    generate_main_script()
    
    # Add compilation metadata
    print "# Rawk compilation summary:"
    print "#   - Rawk Version: " RAWK_VERSION
    print "#   - Functions defined: " functions_defined
    print "#   - Source lines: " line_count
    print "#   - Errors: " errors
    print "#   - Warnings: " warnings
    print ""
}