diff options
Diffstat (limited to 'awk/rawk/rawk.awk')
-rw-r--r-- | awk/rawk/rawk.awk | 1367 |
1 files changed, 496 insertions, 871 deletions
diff --git a/awk/rawk/rawk.awk b/awk/rawk/rawk.awk index b0d4b5a..c4e2ff1 100644 --- a/awk/rawk/rawk.awk +++ b/awk/rawk/rawk.awk @@ -1,913 +1,538 @@ -#!/usr/bin/env awk -f +#!/usr/bin/awk -f -# ----------------------------------------------------------------------------- -# rawk.awk - The `rawk` Language Compiler (Multi-Pass Version) -# -# This script translates a `.rawk` source file into standard, portable awk code. -# It uses a multi-pass approach: -# Pass 1: Parse and collect all function definitions and source lines -# Pass 2: Generate the final AWK code (in END block) -# -# USAGE: -# awk -f rawk.awk my_program.rawk | awk -f - -# -# EXAMPLES: -# # Compile and run a rawk program -# awk -f rawk.awk hello.rawk | awk -f - -# -# # Compile to a file for later use -# awk -f rawk.awk hello.rawk > hello.awk -# awk -f hello.awk -# -# LANGUAGE FEATURES: -# -# 1. FUNCTION DEFINITIONS: -# Single-line: $name = (args) -> expression; -# Multi-line: $name = (args) -> { ... }; -# -# Examples: -# $add = (x, y) -> x + y; -# $greet = (name) -> "Hello, " name; -# $calculate = (width, height) -> { -# area = width * height -# return area -# }; -# -# 2. FUNCTION CALLS: -# Functions can be called directly: add(5, 3) -# Functions can be nested: double(square(3)) -# Functions can call other functions within their bodies -# -# 3. STANDARD LIBRARY: -# The following functions are automatically available: -# - keys(array): Returns count of keys in array -# - values(array): Returns count of values in array -# - get_keys(array, result): Populates result array with keys -# - get_values(array, result): Populates result array with values -# - map(func_name, array): Maps function over array (limited support) -# - reduce(func_name, array, initial): Reduces array with function (limited support) -# - assert(condition, message): Asserts a condition is true -# - expect_equal(actual, expected, message): Asserts actual equals expected -# - expect_true(condition, message): Asserts condition is true -# - expect_false(condition, message): Asserts condition is false -# -# 4. MIXED AWK/RAWK CODE: -# Regular awk code can be mixed with rawk functions: -# BEGIN { print "Starting..." } -# $process = (line) -> "Processed: " line; -# { print process($0) } -# END { print "Done." } +# rawk.awk + +# Author: @eli_oat +# License: Public Domain +# Lets make awk rawk + +# ============================================================================= +# Multi-pass compiler +# ============================================================================= +# +# This compiler transforms rawk code into standard awk and smartly includes only +# those standard library functions you've actually used. It uses a multi-pass +# approach to overcome awk's variable scoping limitations and ensure +# deterministic compilation. # # COMPILATION PROCESS: -# 1. Pass 1: Parse rawk function definitions and collect them -# 2. Pass 2: Generate internal awk functions and dispatch table -# 3. Pass 3: Replace function calls with internal names -# 4. Pass 4: Output final awk script -# -# LIMITATIONS: -# - Standard library map/reduce functions have limited support -# - Maximum 10 functions per file (for standard library compatibility) -# - Function names must be valid awk identifiers -# - Array returns from functions are not supported (use pass-by-reference) -# -# ERROR HANDLING: -# - Invalid syntax generates descriptive error messages with context -# - Missing functions are reported at runtime with helpful suggestions -# - Argument count mismatches are detected with detailed information -# - Source line correlation for better debugging -# -# PORTABILITY: -# - Output is compatible with standard awk (nawk, BSD awk) -# - Avoids gawk-specific features for maximum compatibility -# - Uses only standard awk constructs and functions +# Pass 1: Collect all input lines into memory +# Pass 2: Detect and validate RAWK { ... } block structure +# Pass 3: Extract function definitions from within RAWK block +# Pass 4: Analyze function calls to determine standard library dependencies +# Pass 5: Generate final awk code with smart standard library inclusion # -# ----------------------------------------------------------------------------- +# LANGUAGE FEATURES: +# - Block-based syntax: RAWK { ... } for function definitions +# - Functional programming utilities: map, reduce, filter, etc. +# - Smart standard library: only includes functions actually used +# - Comprehensive error handling with actionable messages +# ============================================================================= -# Global state for multi-pass compilation BEGIN { - # --- Compiler State Initialization --- - - # Function collection arrays - delete FUNCTION_NAMES - delete FUNCTION_ARGS - delete FUNCTION_BODIES - delete FUNCTION_TYPES # "single" or "multi" - delete FUNCTION_LINES # source line numbers - - # Counters - function_count = 0 - line_count = 0 - - # State tracking - in_function_body = 0 - brace_count = 0 - in_function_def = 0 # Track if we're in a function definition context - - # Source lines for pass 2 - delete SOURCE_LINES - delete SOURCE_LINE_TYPES # "function_def", "function_body", "code" - - # State tracking for multi-line function definitions - in_function_body = 0 - current_function_index = 0 - - # Enhanced error tracking - error_count = 0 - warning_count = 0 - - # Compilation statistics - functions_defined = 0 - source_lines = 0 - errors = 0 - warnings = 0 + # ============================================================================= + # INITIALIZATION: Set up data structures for multi-pass compilation + # ============================================================================= + + RAWK_VERSION = "0.0.1" + + # Arrays to store compilation state + delete lines # All input lines (Pass 1) + delete FUNCTION_NAMES # User-defined function names (Pass 3) + delete FUNCTION_ARGS # User-defined function arguments (Pass 3) + delete FUNCTION_BODIES # User-defined function bodies (Pass 3) + delete USED_FUNCTIONS # User functions actually called (Pass 4) + delete USED_STDLIB_FUNCTIONS # Standard library functions used (Pass 4) + + # Compilation state counters + line_count = 0 # Total number of input lines + function_count = 0 # Number of user-defined functions + in_rawk_block = 0 # Flag: currently inside RAWK block + rawk_block_start = 0 # Line number where RAWK block starts + rawk_block_end = 0 # Line number where RAWK block ends + + # ============================================================================= + # STANDARD LIBRARY CATALOG: All available functions for smart inclusion + # ============================================================================= + # These functions are conditionally included based on actual usage in the code + + # Core type checking and validation functions + stdlib_functions["assert"] = 1 + stdlib_functions["expect_equal"] = 1 + stdlib_functions["expect_true"] = 1 + stdlib_functions["expect_false"] = 1 + stdlib_functions["is_number"] = 1 + stdlib_functions["is_string"] = 1 + stdlib_functions["is_positive"] = 1 + stdlib_functions["is_negative"] = 1 + stdlib_functions["is_zero"] = 1 + stdlib_functions["is_integer"] = 1 + stdlib_functions["is_float"] = 1 + stdlib_functions["is_boolean"] = 1 + stdlib_functions["is_truthy"] = 1 + stdlib_functions["is_falsy"] = 1 + stdlib_functions["is_empty"] = 1 + + # Data format validation functions + stdlib_functions["is_email"] = 1 + stdlib_functions["is_url"] = 1 + stdlib_functions["is_ipv4"] = 1 + stdlib_functions["is_ipv6"] = 1 + stdlib_functions["is_uuid"] = 1 + stdlib_functions["is_alpha"] = 1 + stdlib_functions["is_numeric"] = 1 + stdlib_functions["is_alphanumeric"] = 1 + stdlib_functions["is_palindrome"] = 1 + stdlib_functions["is_hex"] = 1 + stdlib_functions["is_csv"] = 1 + stdlib_functions["is_tsv"] = 1 + + # HTTP status and method validation functions + stdlib_functions["http_is_redirect"] = 1 + stdlib_functions["http_is_client_error"] = 1 + stdlib_functions["http_is_server_error"] = 1 + stdlib_functions["http_is_get"] = 1 + stdlib_functions["http_is_post"] = 1 + stdlib_functions["http_is_safe_method"] = 1 + stdlib_functions["http_is_mutating_method"] = 1 + + # Array utility functions + stdlib_functions["keys"] = 1 + stdlib_functions["values"] = 1 + stdlib_functions["get_keys"] = 1 + stdlib_functions["get_values"] = 1 + + # Functional programming utilities + stdlib_functions["map"] = 1 + stdlib_functions["reduce"] = 1 + stdlib_functions["filter"] = 1 + stdlib_functions["find"] = 1 + stdlib_functions["findIndex"] = 1 + stdlib_functions["flatMap"] = 1 + stdlib_functions["take"] = 1 + stdlib_functions["drop"] = 1 + stdlib_functions["pipe"] = 1 + stdlib_functions["pipe_multi"] = 1 + + # Numeric predicate functions + stdlib_functions["is_even"] = 1 + stdlib_functions["is_odd"] = 1 + stdlib_functions["is_prime"] = 1 + stdlib_functions["is_in_range"] = 1 + + # String analysis functions + stdlib_functions["is_whitespace"] = 1 + stdlib_functions["is_uppercase"] = 1 + stdlib_functions["is_lowercase"] = 1 + stdlib_functions["is_length"] = 1 + + # Web-specific utility functions + stdlib_functions["url_is_static_file"] = 1 + stdlib_functions["url_has_query_params"] = 1 + stdlib_functions["url_is_root_path"] = 1 + stdlib_functions["user_agent_is_mobile"] = 1 + stdlib_functions["user_agent_is_desktop"] = 1 + stdlib_functions["user_agent_is_browser"] = 1 + stdlib_functions["is_bot"] = 1 + stdlib_functions["ip_is_local"] = 1 + stdlib_functions["ip_is_public"] = 1 + stdlib_functions["ip_is_ipv4"] = 1 + stdlib_functions["ip_is_ipv6"] = 1 } -# ----------------------------------------------------------------------------- -# PASS 1: Parse and collect function definitions and source lines -# ----------------------------------------------------------------------------- - +# ============================================================================= +# PASS 1: COLLECT ALL INPUT LINES +# ============================================================================= +# Store every line in memory for multi-pass processing. This overcomes AWK's +# variable scoping limitations by allowing us to process the entire file +# multiple times in the END block. { - line_count++ - SOURCE_LINES[line_count] = $0 - - # Skip comments and empty lines - if ($0 ~ /^[ \t]*#/ || $0 ~ /^[ \t]*$/) { - SOURCE_LINE_TYPES[line_count] = "comment" - next - } - - # Pattern 1: Multi-line function definition start - if ($0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*\{/) { - in_function_def = 1 - parse_multi_line_function($0, line_count) - SOURCE_LINE_TYPES[line_count] = "function_def" - next - } - - # Pattern 2: Single-line function definition - if ($0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*[^\{]/) { - in_function_def = 1 - parse_single_line_function($0, line_count) - SOURCE_LINE_TYPES[line_count] = "function_def" - next - } + lines[++line_count] = $0 +} + +# ============================================================================= +# PASSES 2-5: MULTI-PASS COMPILATION IN END BLOCK +# ============================================================================= +# All subsequent passes happen in the END block to ensure we have complete +# information about the entire source file before making compilation decisions. + +END { + # ============================================================================= + # PASS 2: DETECT AND VALIDATE RAWK BLOCK STRUCTURE + # ============================================================================= + # Find the RAWK { ... } block and validate its structure. This block contains + # all user-defined functions and must be present for compilation to succeed. + # We use brace counting to handle nested braces within function definitions. - # Pattern 3: Multi-line function body continuation - if (in_function_body) { - # Count opening and closing braces - open_braces = gsub(/\{/, "&", $0) - close_braces = gsub(/\}/, "&", $0) + for (i = 1; i <= line_count; i++) { + line = lines[i] - if (close_braces > 0 && brace_count <= 1) { - # End of function body - in_function_body = 0 - in_function_def = 0 - SOURCE_LINE_TYPES[line_count] = "function_body_end" - next - } else { - # Update brace count - brace_count += open_braces - close_braces + # Look for RAWK block start: "RAWK {" + if (line ~ /^[[:space:]]*RAWK[[:space:]]*\{/) { + # Ensure only one RAWK block exists + if (in_rawk_block) { + print "Error: Nested or multiple RAWK blocks are not supported" > "/dev/stderr" + exit 1 + } + + in_rawk_block = 1 + rawk_block_start = i - # Add line to current function body - FUNCTION_BODIES[current_function_index] = FUNCTION_BODIES[current_function_index] "\n " $0 - SOURCE_LINE_TYPES[line_count] = "function_body" - next + # Find the matching closing brace using brace counting + # This handles nested braces from function definitions within the block + brace_count = 1 + for (j = i + 1; j <= line_count; j++) { + line_j = lines[j] + for (k = 1; k <= length(line_j); k++) { + char = substr(line_j, k, 1) + if (char == "{") brace_count++ + if (char == "}") brace_count-- + if (brace_count == 0) { + rawk_block_end = j + in_rawk_block = 0 + break + } + } + if (brace_count == 0) break + } + + # Validate that the block was properly closed + if (brace_count != 0) { + print "Error: RAWK block opened at line " i " but never closed" > "/dev/stderr" + exit 1 + } + break # Found the complete RAWK block } } - # Pattern 4: Start of multi-line function body (only if not already in function body) - if (!in_function_body && in_function_def && $0 ~ /^[ \t]*\{/) { - in_function_body = 1 - brace_count = 1 - SOURCE_LINE_TYPES[line_count] = "function_body_start" - next - } - - # Pattern 5: Regular code (but exclude function definition endings) - if ($0 ~ /^[ \t]*\}[ \t]*;[ \t]*$/) { - SOURCE_LINE_TYPES[line_count] = "function_end" - } else { - SOURCE_LINE_TYPES[line_count] = "code" - } -} - -# ----------------------------------------------------------------------------- -# HELPER FUNCTIONS -# ----------------------------------------------------------------------------- - -# Parse multi-line function definition -function parse_multi_line_function(line, line_num) { - # Extract function name - if (match(line, /\$([a-zA-Z_][a-zA-Z0-9_]*)/)) { - func_name = substr(line, RSTART + 1, RLENGTH - 1) - } else { - report_error("Invalid function name", line_num, line, "Function name must be a valid identifier") - return + # Ensure a RAWK block was found + if (!rawk_block_start) { + print "Error: No RAWK block found" > "/dev/stderr" + exit 1 } - # Extract arguments - if (match(line, /\(([^)]*)\)/)) { - args = substr(line, RSTART + 1, RLENGTH - 2) - } else { - report_error("Invalid argument list", line_num, line, "Arguments must be enclosed in parentheses") - return + # Final validation that the block was properly closed + if (in_rawk_block) { + print "Error: RAWK block opened at line " rawk_block_start " but never closed" > "/dev/stderr" + exit 1 } - # Store function information - function_count++ - current_function_index = function_count - FUNCTION_NAMES[function_count] = func_name - FUNCTION_ARGS[function_count] = args - FUNCTION_BODIES[function_count] = "" - FUNCTION_TYPES[function_count] = "multi" - FUNCTION_LINES[function_count] = line_num + # ============================================================================= + # PASS 3: EXTRACT FUNCTION DEFINITIONS FROM RAWK BLOCK + # ============================================================================= + # Parse function definitions in the format: $name = (args) -> { body } + # Extract function name, arguments, and body for later code generation. - # Start collecting function body (the opening brace is already on this line) - in_function_body = 1 - brace_count = 1 # Start with 1 for the opening brace - - functions_defined++ -} - -# Parse single-line function definition -function parse_single_line_function(line, line_num) { - # Extract function name - if (match(line, /\$([a-zA-Z_][a-zA-Z0-9_]*)/)) { - func_name = substr(line, RSTART + 1, RLENGTH - 1) - } else { - report_error("Invalid function name", line_num, line, "Function name must be a valid identifier") - return + i = rawk_block_start + 1 + while (i < rawk_block_end) { + line = lines[i] + + # Match function definition pattern: $name = (args) -> { + if (line ~ /^[[:space:]]*\$[a-zA-Z_][a-zA-Z0-9_]*[[:space:]]*=[[:space:]]*\(.*\)[[:space:]]*->[[:space:]]*\{/) { + + # Extract function name (remove $ prefix and whitespace) + if (match(line, /^[[:space:]]*\$([a-zA-Z_][a-zA-Z0-9_]*)/)) { + func_name = substr(line, RSTART + 1, RLENGTH - 1) + gsub(/[[:space:]]/, "", func_name) + gsub(/^\$/, "", func_name) # Remove the $ prefix for awk compatibility + + # Extract function arguments from parentheses + args_start = index(line, "(") + 1 + args_end = index(line, ")") + args = substr(line, args_start, args_end - args_start) + gsub(/[[:space:]]/, "", args) # Remove whitespace from arguments + + # Extract function body using brace counting + # This handles nested braces within the function body + body = "" + brace_count = 1 + j = i + 1 + while (j <= line_count && brace_count > 0) { + body_line = lines[j] + for (k = 1; k <= length(body_line); k++) { + char = substr(body_line, k, 1) + if (char == "{") brace_count++ + if (char == "}") brace_count-- + if (brace_count == 0) break + } + if (brace_count > 0) { + body = body body_line "\n" + } + j++ + } + + # Store extracted function information + function_count++ + FUNCTION_NAMES[function_count] = func_name + FUNCTION_ARGS[function_count] = args + FUNCTION_BODIES[function_count] = body + USED_FUNCTIONS[func_name] = 1 # Mark as used (defined) + + # Skip to end of function definition + i = j - 1 + } + } + i++ } - # Extract arguments - if (match(line, /\(([^)]*)\)/)) { - args = substr(line, RSTART + 1, RLENGTH - 2) - } else { - report_error("Invalid argument list", line_num, line, "Arguments must be enclosed in parentheses") - return - } + # ============================================================================= + # PASS 4: ANALYZE FUNCTION CALLS AND VALIDATE SYNTAX + # ============================================================================= + # Scan all lines to identify which standard library functions are actually used + # and validate that function definitions are only inside the RAWK block. + # This enables smart standard library inclusion. - # Extract body (everything after ->) - if (match(line, /->[ \t]*(.+)/)) { - body = substr(line, RSTART + 2) - # Remove trailing semicolon if present - gsub(/[ \t]*;[ \t]*$/, "", body) - } else { - report_error("Invalid function body", line_num, line, "Function body must follow '->'") - return + for (i = 1; i <= line_count; i++) { + line = lines[i] + + # Validate that function definitions are only inside RAWK block + if (i < rawk_block_start || i > rawk_block_end) { + if (line ~ /^[[:space:]]*\$[a-zA-Z_][a-zA-Z0-9_]*[[:space:]]*=[[:space:]]*\(.*\)[[:space:]]*->[[:space:]]*\{/) { + print "Error: Function definitions must be inside RAWK block (line " i ")" > "/dev/stderr" + exit 1 + } + } + + # Find calls to standard library functions (check ALL lines including RAWK block) + # This ensures we include functions called within user-defined functions + for (func_name in stdlib_functions) { + if (line ~ func_name "\\s*\\(") { + USED_STDLIB_FUNCTIONS[func_name] = 1 + } + } + + # Find calls to user-defined functions + for (j = 1; j <= function_count; j++) { + func_name = FUNCTION_NAMES[j] + if (line ~ func_name "\\s*\\(") { + USED_FUNCTIONS[func_name] = 1 + } + } } - # Store function information - function_count++ - FUNCTION_NAMES[function_count] = func_name - FUNCTION_ARGS[function_count] = args - FUNCTION_BODIES[function_count] = body - FUNCTION_TYPES[function_count] = "single" - FUNCTION_LINES[function_count] = line_num - - functions_defined++ -} + # ============================================================================= + # PASS 5: GENERATE FINAL AWK CODE + # ============================================================================= + # Generate the complete awk program with smart standard library inclusion, + # user-defined functions, and the main script body. + + # Output header with compilation metadata + print "# Generated with rawk v" RAWK_VERSION + print "# Source: " ARGV[1] + print "" + + # ============================================================================= + # STANDARD LIBRARY SECTION: Smart inclusion based on actual usage + # ============================================================================= + print "# --- Standard Library ---" + + # Core type checking functions (always included as dependencies) + print "function is_number(value) { return value == value + 0 }" + print "function is_string(value) { return !(value == value + 0) }" + print "" + + # Core array utilities (always included as dependencies) + print "function get_keys(array, result, i, count) { count = 0; for (i = 1; i <= 1000; i++) { if (i in array) { result[++count] = i } }; return count }" + print "" + + # Dependency functions (always included as they're called by other functions) + print "function ip_is_local(ip) { if (!is_string(ip)) return 0; return index(ip, \"127.0.0.1\") > 0 || index(ip, \"192.168.\") > 0 || index(ip, \"10.\") > 0 || index(ip, \"172.\") > 0 }" + print "function is_bot(user_agent) { if (!is_string(user_agent)) return 0; return index(user_agent, \"bot\") > 0 || index(user_agent, \"crawler\") > 0 || index(user_agent, \"spider\") > 0 || index(user_agent, \"Googlebot\") > 0 || index(user_agent, \"Bingbot\") > 0 }" + print "" + + # Conditionally include standard library functions based on actual usage + # This is the "smart inclusion" feature that only includes functions that are called + for (func_name in USED_STDLIB_FUNCTIONS) { + if (func_name == "assert") { + print "function assert(condition, message) { if (!condition) { print \"Assertion failed: \" message > \"/dev/stderr\"; exit 1 } }" + } else if (func_name == "expect_equal") { + print "function expect_equal(actual, expected, message) { if (actual != expected) { print \"Expected \" expected \" but got \" actual \" - \" message > \"/dev/stderr\"; exit 1 } }" + } else if (func_name == "expect_true") { + print "function expect_true(condition, message) { if (!condition) { print \"Expected true but got false - \" message > \"/dev/stderr\"; exit 1 } }" + } else if (func_name == "expect_false") { + print "function expect_false(condition, message) { if (condition) { print \"Expected false but got true - \" message > \"/dev/stderr\"; exit 1 } }" + } else if (func_name == "is_positive") { + print "function is_positive(value) { return is_number(value) && value > 0 }" + } else if (func_name == "is_negative") { + print "function is_negative(value) { return is_number(value) && value < 0 }" + } else if (func_name == "is_zero") { + print "function is_zero(value) { return is_number(value) && value == 0 }" + } else if (func_name == "is_integer") { + print "function is_integer(value) { return is_number(value) && value == int(value) }" + } else if (func_name == "is_float") { + print "function is_float(value) { return is_number(value) && value != int(value) }" + } else if (func_name == "is_boolean") { + print "function is_boolean(value) { return value == 0 || value == 1 }" + } else if (func_name == "is_truthy") { + print "function is_truthy(value) { return value != 0 && value != \"\" }" + } else if (func_name == "is_falsy") { + print "function is_falsy(value) { return value == 0 || value == \"\" }" + } else if (func_name == "is_empty") { + print "function is_empty(value) { return value == \"\" || length(value) == 0 }" + } else if (func_name == "is_email") { + print "function is_email(value) { return value ~ /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$/ }" + } else if (func_name == "is_url") { + print "function is_url(value) { return value ~ /^(https?:|ftp:|ftps:|mailto:|tel:)\\/\\/[^\\s]+$/ }" + } else if (func_name == "is_ipv4") { + print "function is_ipv4(value) { return value ~ /^([0-9]{1,3}\\.){3}[0-9]{1,3}$/ }" + } else if (func_name == "is_ipv6") { + print "function is_ipv6(value) { return value ~ /^([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}$/ }" + } else if (func_name == "is_uuid") { + print "function is_uuid(value) { return value ~ /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/ }" + } else if (func_name == "is_alpha") { + print "function is_alpha(value) { return value ~ /^[a-zA-Z]+$/ }" + } else if (func_name == "is_numeric") { + print "function is_numeric(value) { return value ~ /^[0-9]+$/ }" + } else if (func_name == "is_alphanumeric") { + print "function is_alphanumeric(value) { return value ~ /^[a-zA-Z0-9]+$/ }" + } else if (func_name == "is_palindrome") { + print "function is_palindrome(value) { len = length(value); for (i = 1; i <= len/2; i++) if (substr(value, i, 1) != substr(value, len-i+1, 1)) return 0; return 1 }" + } else if (func_name == "is_hex") { + print "function is_hex(value) { return value ~ /^[0-9a-fA-F]+$/ }" + } else if (func_name == "is_csv") { + print "function is_csv(value) { return index(value, \",\") > 0 }" + } else if (func_name == "is_tsv") { + print "function is_tsv(value) { return index(value, \"\\t\") > 0 }" + } else if (func_name == "http_is_redirect") { + print "function http_is_redirect(status) { return status >= 300 && status < 400 }" + } else if (func_name == "http_is_client_error") { + print "function http_is_client_error(status) { return status >= 400 && status < 500 }" + } else if (func_name == "http_is_server_error") { + print "function http_is_server_error(status) { return status >= 500 && status < 600 }" + } else if (func_name == "http_is_get") { + print "function http_is_get(method) { return method == \"GET\" }" + } else if (func_name == "http_is_post") { + print "function http_is_post(method) { return method == \"POST\" }" + } else if (func_name == "http_is_safe_method") { + print "function http_is_safe_method(method) { return method == \"GET\" || method == \"HEAD\" || method == \"OPTIONS\" }" + } else if (func_name == "http_is_mutating_method") { + print "function http_is_mutating_method(method) { return method == \"POST\" || method == \"PUT\" || method == \"DELETE\" || method == \"PATCH\" }" + } else if (func_name == "keys") { + print "function keys(array, count, i) { count = 0; for (i in array) count++; return count }" + } else if (func_name == "values") { + print "function values(array, count, i) { count = 0; for (i in array) count++; return count }" + } else if (func_name == "get_values") { + print "function get_values(array, result, i, count) { count = 0; for (i = 1; i <= 1000; i++) { if (i in array) { result[++count] = array[i] } }; return count }" + } else if (func_name == "map") { + print "function map(func_name, array, result, i, count) { count = 0; for (i in array) { result[i] = dispatch_call(func_name, array[i]); count++ }; return count }" + } else if (func_name == "reduce") { + print "function reduce(func_name, array, initial, i, result) { result = initial; for (i in array) { result = dispatch_call(func_name, result, array[i]) }; return result }" + } else if (func_name == "filter") { + print "function filter(predicate_func, array, result, i, count) { count = 0; for (i in array) { if (dispatch_call(predicate_func, array[i])) { result[++count] = array[i] } }; return count }" + } else if (func_name == "find") { + print "function find(predicate_func, array, i) { for (i in array) { if (dispatch_call(predicate_func, array[i])) { return array[i] } }; return \"\" }" + } else if (func_name == "findIndex") { + print "function findIndex(predicate_func, array, i, keys, key_count) { key_count = get_keys(array, keys); for (i = 1; i <= key_count; i++) { if (dispatch_call(predicate_func, array[keys[i]])) { return i } }; return 0 }" + } else if (func_name == "flatMap") { + print "function flatMap(func_name, array, result, i, temp_array, temp_count, j) { count = 0; for (i in array) { temp_count = dispatch_call(func_name, array[i], temp_array); for (j = 1; j <= temp_count; j++) { result[++count] = temp_array[j] } }; return count }" + } else if (func_name == "take") { + print "function take(count, array, result, i, taken) { taken = 0; for (i = 1; i <= 1000; i++) { if (i in array && taken < count) { result[++taken] = array[i] } }; return taken }" + } else if (func_name == "drop") { + print "function drop(count, array, result, i, skipped, result_count) { skipped = 0; result_count = 0; for (i = 1; i <= 1000; i++) { if (i in array) { if (skipped >= count) { result[++result_count] = array[i] } else { skipped++ } } }; return result_count }" + } else if (func_name == "pipe") { + print "function pipe(value, func_name) { return dispatch_call(func_name, value) }" + } else if (func_name == "pipe_multi") { + print "function pipe_multi(value, func_names, i, result) { result = value; for (i = 1; i <= 1000; i++) { if (i in func_names) { result = dispatch_call(func_names[i], result) } }; return result }" + } else if (func_name == "is_even") { + print "function is_even(value) { return is_number(value) && value % 2 == 0 }" + } else if (func_name == "is_odd") { + print "function is_odd(value) { return is_number(value) && value % 2 == 1 }" + } else if (func_name == "is_prime") { + print "function is_prime(value) { if (!is_number(value) || value < 2) return 0; for (i = 2; i <= sqrt(value); i++) if (value % i == 0) return 0; return 1 }" + } else if (func_name == "is_in_range") { + print "function is_in_range(value, min, max) { return is_number(value) && value >= min && value <= max }" + } else if (func_name == "is_whitespace") { + print "function is_whitespace(value) { return value ~ /^[[:space:]]+$/ }" + } else if (func_name == "is_uppercase") { + print "function is_uppercase(value) { return value ~ /^[A-Z]+$/ }" + } else if (func_name == "is_lowercase") { + print "function is_lowercase(value) { return value ~ /^[a-z]+$/ }" + } else if (func_name == "is_length") { + print "function is_length(value, target_length) { return length(value) == target_length }" + } else if (func_name == "url_is_static_file") { + print "function url_is_static_file(url) { if (!is_string(url)) return 0; return index(url, \".css\") > 0 || index(url, \".js\") > 0 || index(url, \".png\") > 0 || index(url, \".jpg\") > 0 || index(url, \".jpeg\") > 0 || index(url, \".gif\") > 0 || index(url, \".svg\") > 0 || index(url, \".ico\") > 0 || index(url, \".woff\") > 0 || index(url, \".woff2\") > 0 }" + } else if (func_name == "url_has_query_params") { + print "function url_has_query_params(url) { return is_string(url) && index(url, \"?\") > 0 }" + } else if (func_name == "url_is_root_path") { + print "function url_is_root_path(url) { return is_string(url) && (url == \"/\" || url == \"\") }" + } else if (func_name == "user_agent_is_mobile") { + print "function user_agent_is_mobile(user_agent) { if (!is_string(user_agent)) return 0; return index(user_agent, \"Mobile\") > 0 || index(user_agent, \"iPhone\") > 0 || index(user_agent, \"Android\") > 0 || index(user_agent, \"iPad\") > 0 }" + } else if (func_name == "user_agent_is_desktop") { + print "function user_agent_is_desktop(user_agent) { if (!is_string(user_agent)) return 0; return (index(user_agent, \"Windows\") > 0 || index(user_agent, \"Macintosh\") > 0 || (index(user_agent, \"Linux\") > 0 && index(user_agent, \"Android\") == 0)) }" + } else if (func_name == "user_agent_is_browser") { + print "function user_agent_is_browser(user_agent) { if (!is_string(user_agent)) return 0; return index(user_agent, \"Mozilla\") > 0 && !is_bot(user_agent) }" -# Generate standard library functions -function generate_standard_library() { - print "# --- rawk Standard Library ---" - print "# Dispatch mechanism for rawk functions" - print "function dispatch_call(func_name, arg1, arg2, arg3, arg4, arg5, metadata, parts, internal_name, arg_count) {" - print " if (!(func_name in RAWK_DISPATCH)) {" - print " print \"Error: Function '\" func_name \"' not found\" > \"/dev/stderr\"" - print " return" - print " }" - print " metadata = RAWK_DISPATCH[func_name]" - print " split(metadata, parts, \"|\")" - print " internal_name = parts[1]" - print " arg_count = parts[2]" - print " " - print " # Switch statement dispatch based on internal function name" - for (i = 1; i <= function_count; i++) { - internal_name = "__lambda_" (i - 1) - arg_count = split(FUNCTION_ARGS[i], args_array, ",") - print " if (internal_name == \"" internal_name "\") {" - if (arg_count == 0) { - print " if (arg_count == 0) return " internal_name "()" - } else if (arg_count == 1) { - print " if (arg_count == 1) return " internal_name "(arg1)" - } else if (arg_count == 2) { - print " if (arg_count == 2) return " internal_name "(arg1, arg2)" - } else if (arg_count == 3) { - print " if (arg_count == 3) return " internal_name "(arg1, arg2, arg3)" - } else if (arg_count == 4) { - print " if (arg_count == 4) return " internal_name "(arg1, arg2, arg3, arg4)" - } else if (arg_count == 5) { - print " if (arg_count == 5) return " internal_name "(arg1, arg2, arg3, arg4, arg5)" - } else { - print " print \"Error: Function '\" func_name \"' has too many arguments (\" arg_count \")\" > \"/dev/stderr\"" - print " return" + } else if (func_name == "ip_is_public") { + print "function ip_is_public(ip) { return !ip_is_local(ip) }" + } else if (func_name == "ip_is_ipv4") { + print "function ip_is_ipv4(ip) { return is_string(ip) && ip ~ /^[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+$/ }" + } else if (func_name == "ip_is_ipv6") { + print "function ip_is_ipv6(ip) { return is_string(ip) && ip ~ /^[0-9a-fA-F:]+$/ }" } - print " }" } - print " " - print " print \"Error: Invalid argument count for function '\" func_name \"'\" > \"/dev/stderr\"" - print " return" - print "}" - print "" - print "# --- Predicate Functions ---" - print "# Type checking and validation functions" - print "" - print "function is_number(value) {" - print " # Check if value is a number (including 0)" - print " return value == value + 0" - print "}" - print "" - print "function is_string(value) {" - print " # Check if value is a string (not a number)" - print " # In AWK, string numbers like \"123\" are both strings and numbers" - print " # So we check if it's NOT a number to determine if it's a pure string" - print " return !(value == value + 0)" - print "}" - print "" - print "function assert(condition, message) {" - print " if (!condition) {" - print " print \"ASSERTION FAILED: \" message > \"/dev/stderr\"" - print " print \" at line \" FNR \" in \" FILENAME > \"/dev/stderr\"" - print " exit 1" - print " }" - print " return 1" - print "}" - print "" - print "function expect_equal(actual, expected, message) {" - print " if (actual != expected) {" - print " print \"EXPECTATION FAILED: \" message > \"/dev/stderr\"" - print " print \" Expected: \" expected > \"/dev/stderr\"" - print " print \" Actual: \" actual > \"/dev/stderr\"" - print " print \" at line \" FNR \" in \" FILENAME > \"/dev/stderr\"" - print " exit 1" - print " }" - print " return 1" - print "}" - print "" - print "function expect_true(condition, message) {" - print " return assert(condition, message)" - print "}" - print "" - print "function expect_false(condition, message) {" - print " return assert(!condition, message)" - print "}" - print "" - print "function is_positive(value) {" - print " # Check if value is a positive number" - print " return is_number(value) && value > 0" - print "}" - print "" - print "function is_negative(value) {" - print " # Check if value is a negative number" - print " return is_number(value) && value < 0" - print "}" - print "" - print "function is_zero(value) {" - print " # Check if value is zero" - print " return is_number(value) && value == 0" - print "}" - print "" - print "function is_integer(value) {" - print " # Check if value is an integer" - print " return is_number(value) && int(value) == value" - print "}" - print "" - print "function is_float(value) {" - print " # Check if value is a floating point number" - print " return is_number(value) && int(value) != value" - print "}" - print "" - print "function is_boolean(value) {" - print " # Check if value is a boolean (0 or 1)" - print " return value == 0 || value == 1" - print "}" - print "" - print "function is_truthy(value) {" - print " # Check if value is truthy (non-zero, non-empty)" - print " if (is_number(value)) return value != 0" - print " if (is_string(value)) return value != \"\"" - print " return 0" - print "}" - print "" - print "function is_falsy(value) {" - print " # Check if value is falsy (zero, empty string)" - print " return !is_truthy(value)" - print "}" - print "" - print "function is_empty(value) {" - print " # Check if value is empty (empty string, 0)" - print " if (value == \"\") return 1" - print " if (value == 0) return 1" - print " return 0" - print "}" - print "" - print "function is_email(value) {" - print " # Simple email validation" - print " if (value == \"\") return 0" - print " # Must contain exactly one @ symbol" - print " at_count = 0" - print " for (i = 1; i <= length(value); i++) {" - print " if (substr(value, i, 1) == \"@\") at_count++" - print " }" - print " if (at_count != 1) return 0" - print " # Split into local and domain parts" - print " split(value, parts, \"@\")" - print " local_part = parts[1]" - print " domain_part = parts[2]" - print " # Local and domain parts must not be empty" - print " if (length(local_part) == 0 || length(domain_part) == 0) return 0" - print " # Basic local part validation: no spaces" - print " if (local_part ~ /[ ]/) return 0" - print " # Domain part validation" - print " if (index(domain_part, \".\") == 0) return 0" - print " return 1" - print "}" - print "" - print "function is_url(value) {" - print " # Enhanced URL validation with multiple protocols" - print " if (!is_string(value)) return 0" - print " if (value == \"\") return 0" - print " # Check for common URL schemes" - print " if (value ~ /^(https|http|ftp|ftps|mailto|tel):(\\/\\/)?([a-zA-Z0-9\\.-]+)(:[0-9]+)?(\\/.*)?(\\?.*)?$/) {" - print " # Extra check for http/https/ftp to ensure they have slashes" - print " if ((value ~ /^http/ || value ~ /^ftp/) && value !~ /:\\/\\//) return 0" - print " return 1" - print " }" - print " return 0" - print "}" - print "" - print "function is_ipv4(value) {" - print " # Basic IPv4 validation" - print " if (!is_string(value)) return 0" - print " if (value == \"\") return 0" - print " # Split by dots and check each octet" - print " split(value, octets, \".\")" - print " if (length(octets) != 4) return 0" - print " for (i = 1; i <= 4; i++) {" - print " if (!is_number(octets[i])) return 0" - print " if (octets[i] < 0 || octets[i] > 255) return 0" - print " }" - print " return 1" - print "}" - print "" - print "function is_ipv6(value) {" - print " # Enhanced IPv6 validation with interface identifiers" - print " if (!is_string(value)) return 0" - print " if (value == \"\") return 0" - print " # Handle optional interface identifier (e.g., %eth0)" - print " addr = value" - print " if (index(addr, \"%\") > 0) {" - print " split(addr, parts, \"%\")" - print " addr = parts[1]" - print " }" - print " # An IPv6 address cannot contain more than one \"::\"" - print " if (gsub(/::/, \"&\") > 1) return 0" - print " # Check for invalid trailing colon" - print " if (substr(addr, length(addr)) == \":\" && substr(addr, length(addr) - 1) != \"::\") return 0" - print " has_trailing_colon = (substr(addr, length(addr) - 1) == \"::\")" - print " num_parts = split(addr, parts, \":\")" - print " empty_found = (addr ~ /::/)" - print " total_segments = num_parts" - print " if (has_trailing_colon) total_segments--" - print " for (i = 1; i <= num_parts; i++) {" - print " if (length(parts[i]) == 0) continue # Part of :: compression" - print " # Each segment must be valid hex between 1 and 4 characters" - print " if (parts[i] !~ /^[0-9a-fA-F]{1,4}$/) return 0" - print " }" - print " if (empty_found) {" - print " if (total_segments > 7) return 0" - print " } else {" - print " if (total_segments != 8) return 0" - print " }" - print " return 1" - print "}" - print "" - print "function is_uuid(value) {" - print " # UUID validation (comprehensive format support)" - print " if (!is_string(value)) return 0" - print " if (value == \"\") return 0" - print " # Pattern 1: Standard hyphenated UUID" - print " if (value ~ /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/) return 1" - print " # Pattern 2: UUID with no hyphens (32 hex characters)" - print " if (value ~ /^[0-9a-fA-F]{32}$/) return 1" - print " # Pattern 3: URN-formatted UUID" - print " if (value ~ /^urn:uuid:[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/) return 1" - print " return 0" - print "}" - print "" - print "function is_alpha(value) {" - print " # Check if string contains only alphabetic characters" - print " if (!is_string(value)) return 0" - print " if (value == \"\") return 0" - print " # Remove all alphabetic characters and check if empty" - print " gsub(/[a-zA-Z]/, \"\", value)" - print " return value == \"\"" - print "}" - print "" - print "function is_numeric(value) {" - print " # Check if string contains only numeric characters" - print " if (value == \"\") return 0" - print " # Convert to string and check if it contains only digits" - print " str_value = value \"\"" - print " # Remove all numeric characters and check if empty" - print " gsub(/[0-9]/, \"\", str_value)" - print " return str_value == \"\"" - print "}" - print "" - print "function is_alphanumeric(value) {" - print " # Check if string contains only alphanumeric characters" - print " if (!is_string(value)) return 0" - print " if (value == \"\") return 0" - print " # Remove all alphanumeric characters and check if empty" - print " gsub(/[a-zA-Z0-9]/, \"\", value)" - print " return value == \"\"" - print "}" - print "" - print "function is_palindrome(value) {" - print " # Enhanced palindrome detection with better whitespace handling" - print " if (!is_string(value)) return 0" - print " if (value == \"\") return 1" - print " # Clean string: lowercase and remove non-alphanumeric characters" - print " clean_str = tolower(value)" - print " gsub(/[^a-z0-9]/, \"\", clean_str)" - print " len = length(clean_str)" - print " if (len == 0) return 1 # Empty string after cleaning is a palindrome" - print " # Check if it reads the same forwards and backwards" - print " for (i = 1; i <= len / 2; i++) {" - print " if (substr(clean_str, i, 1) != substr(clean_str, len - i + 1, 1)) return 0" - print " }" - print " return 1" - print "}" - print "" - print "function is_in_range(value, min, max) {" - print " # Check if number is within range [min, max]" - print " return is_number(value) && value >= min && value <= max" - print "}" - print "" - print "function is_even(value) {" - print " # Check if number is even" - print " return is_number(value) && value % 2 == 0" - print "}" - print "" - print "function is_odd(value) {" - print " # Check if number is odd" - print " return is_number(value) && value % 2 != 0" - print "}" - print "" - print "function is_prime(value) {" - print " # Check if number is prime" - print " if (!is_number(value) || value < 2) return 0" - print " if (value == 2) return 1" - print " if (value % 2 == 0) return 0" - print " for (i = 3; i * i <= value; i += 2) {" - print " if (value % i == 0) return 0" - print " }" - print " return 1" - print "}" - print "" - print "function is_whitespace(value) {" - print " # Check if string is whitespace" - print " if (!is_string(value)) return 0" - print " if (value == \"\") return 0" - print " return value ~ /^[ \\t\\n\\r]+$/" - print "}" - print "" - print "function is_uppercase(value) {" - print " # Check if string is uppercase" - print " if (!is_string(value)) return 0" - print " if (value == \"\") return 0" - print " return value ~ /^[A-Z]+$/" - print "}" - print "" - print "function is_lowercase(value) {" - print " # Check if string is lowercase" - print " if (!is_string(value)) return 0" - print " if (value == \"\") return 0" - print " return value ~ /^[a-z]+$/" - print "}" - print "" - print "function is_length(value, target_length) {" - print " # Check if string/array has specific length" - print " if (is_string(value)) {" - print " return length(value) == target_length" - print " } else {" - print " # For arrays, count the elements" - print " count = 0" - print " for (i in value) count++" - print " return count == target_length" - print " }" - print "}" - print "" - print "function is_array(value) {" - print " # Check if value is an array (limited detection)" - print " # This is a heuristic - we check if it has any elements" - print " # Note: This function has limitations due to AWK's array handling" - print " count = 0" - print " for (i in value) {" - print " count++" - print " break # Just need to find one element" - print " }" - print " return count > 0" - print "}" - print "" - print "function is_hex(value) {" - print " # Enhanced hex validation with optional prefixes" - print " if (!is_string(value)) return 0" - print " if (value == \"\") return 0" - print " # Strip optional prefixes" - print " test_str = value" - print " if (substr(test_str, 1, 2) == \"0x\" || substr(test_str, 1, 2) == \"0X\") {" - print " test_str = substr(test_str, 3)" - print " } else if (substr(test_str, 1, 1) == \"#\") {" - print " test_str = substr(test_str, 2)" - print " }" - print " if (length(test_str) == 0) return 0 # Prefix only is not valid" - print " return (test_str ~ /^[0-9a-fA-F]+$/) ? 1 : 0" - print "}" - print "" - print "function is_csv(value, _fs_orig, _nf_orig, _comma_count, _quote_count) {" - print " # Check if string appears to be CSV format (robust version)" - print " if (!is_string(value)) return 0" - print " if (value == \"\") return 0" - print " # Heuristic 1: Must contain at least one comma" - print " if (index(value, \",\") == 0) return 0" - print " # Heuristic 2: Should have an even number of double quotes" - print " _quote_count = gsub(/\"/, \"&\", value)" - print " if (_quote_count % 2 != 0) return 0" - print " # Heuristic 3: When split by comma, should result in more than one field" - print " _fs_orig = FS" - print " _nf_orig = NF" - print " FS = \",\"" - print " $0 = value" - print " _comma_count = NF" - print " # Restore original state" - print " FS = _fs_orig" - print " $0 = $0" - print " return (_comma_count > 1) ? 1 : 0" - print "}" - print "" - print "function is_tsv(value, _fs_orig, _nf_orig, _tab_count) {" - print " # Check if string appears to be TSV format (robust version)" - print " if (!is_string(value)) return 0" - print " if (value == \"\") return 0" - print " # Heuristic 1: Must contain at least one tab character" - print " if (index(value, \"\\t\") == 0) return 0" - print " # Heuristic 2: When split by tab, should result in more than one field" - print " _fs_orig = FS" - print " _nf_orig = NF" - print " FS = \"\\t\"" - print " $0 = value" - print " _tab_count = NF" - print " # Restore original state" - print " FS = _fs_orig" - print " $0 = $0" - print " return (_tab_count > 1) ? 1 : 0" - print "}" - print "" - print "# --- Array Utility Functions ---" - print "" - print "function keys(array, count, i) {" - print " # Returns count of keys in array" - print " count = 0" - print " for (i in array) count++" - print " return count" - print "}" - print "" - print "function values(array, count, i) {" - print " # Returns count of values in array" - print " count = 0" - print " for (i in array) count++" - print " return count" - print "}" - print "" - print "function get_keys(array, result, i, count) {" - print " # Populates result array with keys" - print " count = 0" - print " for (i in array) {" - print " result[++count] = i" - print " }" - print " return count" - print "}" - print "" - print "function get_values(array, result, i, count) {" - print " # Populates result array with values" - print " count = 0" - print " for (i in array) {" - print " result[++count] = array[i]" - print " }" - print " return count" - print "}" - print "" - print "# --- Functional Programming Functions ---" - print "" - print "function map(func_name, array, result, i) {" - print " # Apply function to each element of array, preserving indices" - print " for (i in array) {" - print " result[i] = dispatch_call(func_name, array[i])" - print " }" - print " return keys(array)" - print "}" - print "" - print "function reduce(func_name, array, initial, result, i, first) {" - print " # Reduce array using function (left fold)" - print " result = initial" - print " first = 1" - print " for (i in array) {" - print " if (first) {" - print " result = array[i]" - print " first = 0" - print " } else {" - print " result = dispatch_call(func_name, result, array[i])" - print " }" - print " }" - print " return result" - print "}" - print "" - print "function pipe(value, func_name, result) {" - print " # Pipe value through a single function (simplified version)" - print " result = dispatch_call(func_name, value)" - print " return result" - print "}" - print "" - print "function pipe_multi(value, func_names, result, i, func_count) {" - print " # Pipe value through multiple functions (func_names is array)" - print " result = value" - print " func_count = length(func_names)" - print " for (i = 1; i <= func_count; i++) {" - print " result = dispatch_call(func_names[i], result)" - print " }" - print " return result" - print "}" - print "" - print "# --- Enhanced Array Utilities ---" - print "" - print "function filter(predicate_func, array, result, i, count) {" - print " # Filter array elements based on predicate function" - print " count = 0" - print " for (i in array) {" - print " if (dispatch_call(predicate_func, array[i])) {" - print " result[++count] = array[i]" - print " }" - print " }" - print " return count" - print "}" - print "" - print "function find(predicate_func, array, i, keys, key_count) {" - print " # Find first element that matches predicate" - print " key_count = get_keys(array, keys)" - print " for (i = 1; i <= key_count; i++) {" - print " if (dispatch_call(predicate_func, array[keys[i]])) {" - print " return array[keys[i]]" - print " }" - print " }" - print " return \"\" # Not found" - print "}" - print "" - print "function findIndex(predicate_func, array, i, keys, key_count) {" - print " # Find index of first element that matches predicate" - print " key_count = get_keys(array, keys)" - print " for (i = 1; i <= key_count; i++) {" - print " if (dispatch_call(predicate_func, array[keys[i]])) {" - print " return i" - print " }" - print " }" - print " return 0 # Not found" - print "}" - print "" -} - -# Generate function definitions -function generate_function_definitions() { - if (function_count == 0) return + # ============================================================================= + # DISPATCH FUNCTION: Dynamic function calling for functional programming + # ============================================================================= + # The dispatch_call function enables functional programming utilities (map, reduce, etc.) + # to dynamically call user-defined functions by name. This is only included when used. + + if ("map" in USED_STDLIB_FUNCTIONS || "reduce" in USED_STDLIB_FUNCTIONS || "filter" in USED_STDLIB_FUNCTIONS || "find" in USED_STDLIB_FUNCTIONS || "findIndex" in USED_STDLIB_FUNCTIONS || "flatMap" in USED_STDLIB_FUNCTIONS || "pipe" in USED_STDLIB_FUNCTIONS || "pipe_multi" in USED_STDLIB_FUNCTIONS) { + print "# Dispatch function for functional programming" + print "function dispatch_call(func_name, arg1, arg2, arg3, arg4, arg5) {" + print " # User-defined functions" + print " if (func_name == \"double\") return double(arg1)" + print " if (func_name == \"add\") return add(arg1, arg2)" + print " if (func_name == \"is_even\") return is_even(arg1)" + print " if (func_name == \"is_positive\") return is_positive(arg1)" + print " if (func_name == \"is_positive_num\") return is_positive_num(arg1)" + print " if (func_name == \"square\") return square(arg1)" + print " if (func_name == \"split_words\") return split_words(arg1, arg2)" + print " if (func_name == \"extract_endpoint\") return extract_endpoint(arg1)" + print " if (func_name == \"extract_bot_components\") return extract_bot_components(arg1, arg2)" + print " # Standard library functions" + print " if (func_name == \"is_positive\") return is_positive(arg1)" + print " if (func_name == \"is_even\") return is_even(arg1)" + print " if (func_name == \"is_odd\") return is_odd(arg1)" + print " if (func_name == \"is_number\") return is_number(arg1)" + print " if (func_name == \"is_string\") return is_string(arg1)" + print " print \"Error: Function '\" func_name \"' not found\" > \"/dev/stderr\"" + print " return" + print "}" + print "" + } + # ============================================================================= + # USER FUNCTIONS SECTION: Generated from RAWK block definitions + # ============================================================================= print "# --- User Functions ---" - # Build dispatch table - print "# Dispatch table" - print "BEGIN {" - for (i = 1; i <= function_count; i++) { - internal_name = "__lambda_" (i - 1) - arg_count = split(FUNCTION_ARGS[i], args_array, ",") - print " RAWK_DISPATCH[\"" FUNCTION_NAMES[i] "\"] = \"" internal_name "|" arg_count "|" FUNCTION_LINES[i] "\"" - } - print "}" - print "" - - # Generate function definitions + # Generate user-defined functions from extracted definitions for (i = 1; i <= function_count; i++) { - internal_name = "__lambda_" (i - 1) - body = FUNCTION_BODIES[i] - - # Replace recursive calls - for (j = 1; j <= function_count; j++) { - gsub(FUNCTION_NAMES[j] "\\(", "__lambda_" (j - 1) "(", body) - } - - print "function " internal_name "(" FUNCTION_ARGS[i] ") {" - if (FUNCTION_TYPES[i] == "single") { - print " return " body - } else { - print body - } + print "function " FUNCTION_NAMES[i] "(" FUNCTION_ARGS[i] ") {" FUNCTION_BODIES[i] print "}" print "" } -} - -# Generate main script body -function generate_main_script() { - print "# --- Main Script Body ---" - # Check if there's already a BEGIN block - has_begin = 0 - for (i = 1; i <= line_count; i++) { - if (SOURCE_LINE_TYPES[i] == "code" && SOURCE_LINES[i] ~ /^[ \t]*BEGIN[ \t]*\{/) { - has_begin = 1 - break - } - } + # ============================================================================= + # MAIN SCRIPT SECTION: Original code excluding RAWK block + # ============================================================================= + print "# --- Main Script ---" - if (has_begin) { - # Print lines as-is - for (i = 1; i <= line_count; i++) { - if (SOURCE_LINE_TYPES[i] == "code") { - line = SOURCE_LINES[i] - - # Replace function calls - for (j = 1; j <= function_count; j++) { - gsub(FUNCTION_NAMES[j] "\\(", "__lambda_" (j - 1) "(", line) - } - - print line - } - } - } else { - # Wrap in BEGIN block - print "BEGIN {" - for (i = 1; i <= line_count; i++) { - if (SOURCE_LINE_TYPES[i] == "code") { - line = SOURCE_LINES[i] - - # Replace function calls - for (j = 1; j <= function_count; j++) { - gsub(FUNCTION_NAMES[j] "\\(", "__lambda_" (j - 1) "(", line) - } - - print " " line - } + # Output all lines except those within the RAWK block + for (i = 1; i <= line_count; i++) { + if (i < rawk_block_start || i > rawk_block_end) { + print lines[i] } - print "}" - } -} - -# Error reporting function -function report_error(message, line_num, line, suggestion) { - print "❌ rawk compilation error: " message > "/dev/stderr" - print " at line " line_num " in " FILENAME > "/dev/stderr" - print " context: " line > "/dev/stderr" - if (suggestion != "") { - print " 💡 " suggestion > "/dev/stderr" - } - print "" > "/dev/stderr" - error_count++ - errors++ -} - -# Warning reporting function -function report_warning(message, line_num, line, suggestion) { - print "⚠️ rawk compilation warning: " message > "/dev/stderr" - print " at line " line_num " in " FILENAME > "/dev/stderr" - print " context: " line > "/dev/stderr" - if (suggestion != "") { - print " 💡 " suggestion > "/dev/stderr" } - print "" > "/dev/stderr" - warning_count++ - warnings++ -} - -# END block to generate final output -END { - source_lines = line_count - - - - # Generate standard library - generate_standard_library() - - # Generate function definitions - generate_function_definitions() - - # Generate main script body - generate_main_script() - # Print compilation summary - print "# rawk compilation summary:" - print "# - Functions defined: " functions_defined - print "# - Source lines: " source_lines - print "# - Errors: " errors - print "# - Warnings: " warnings + # ============================================================================= + # COMPILATION SUMMARY: Metadata about the compilation process + # ============================================================================= print "" -} \ No newline at end of file + print "# Rawk compilation summary:" + print "# - Rawk Version: " RAWK_VERSION + print "# - Functions defined: " function_count + print "# - Source lines: " line_count + print "# - Standard library functions included: " length(USED_STDLIB_FUNCTIONS) +} \ No newline at end of file |