diff options
Diffstat (limited to 'awk/rawk/scratch/tests_old/example_output.awk')
-rw-r--r-- | awk/rawk/scratch/tests_old/example_output.awk | 232 |
1 files changed, 232 insertions, 0 deletions
diff --git a/awk/rawk/scratch/tests_old/example_output.awk b/awk/rawk/scratch/tests_old/example_output.awk new file mode 100644 index 0000000..d0bff1d --- /dev/null +++ b/awk/rawk/scratch/tests_old/example_output.awk @@ -0,0 +1,232 @@ +# Generated by rawk v2.0.0 +# Source: example.rawk + +# --- Standard Library --- +function is_number(value) { return value == value + 0 } +function is_string(value) { return !(value == value + 0) } + +function get_keys(array, result, i, count) { count = 0; for (i in array) { result[++count] = i }; return count } + +function ip_is_local(ip) { if (!is_string(ip)) return 0; return index(ip, "127.0.0.1") > 0 || index(ip, "192.168.") > 0 || index(ip, "10.") > 0 || index(ip, "172.") > 0 } +function is_bot(user_agent) { if (!is_string(user_agent)) return 0; return index(user_agent, "bot") > 0 || index(user_agent, "crawler") > 0 || index(user_agent, "spider") > 0 || index(user_agent, "Googlebot") > 0 || index(user_agent, "Bingbot") > 0 } + +function flatMap(func_name, array, result, i, temp_array, temp_count, j) { count = 0; for (i in array) { temp_count = dispatch_call(func_name, array[i], temp_array); for (j = 1; j <= temp_count; j++) { result[++count] = temp_array[j] } }; return count } +function user_agent_is_desktop(user_agent) { if (!is_string(user_agent)) return 0; return (index(user_agent, "Windows") > 0 || index(user_agent, "Macintosh") > 0 || (index(user_agent, "Linux") > 0 && index(user_agent, "Android") == 0)) } +function map(func_name, array, result, i, count) { count = 0; for (i in array) { result[++count] = dispatch_call(func_name, array[i]) }; return count } +function http_is_server_error(status) { return status >= 500 && status < 600 } +function http_is_client_error(status) { return status >= 400 && status < 500 } +function http_is_mutating_method(method) { return method == "POST" || method == "PUT" || method == "DELETE" || method == "PATCH" } +function url_is_static_file(url) { if (!is_string(url)) return 0; return index(url, ".css") > 0 || index(url, ".js") > 0 || index(url, ".png") > 0 || index(url, ".jpg") > 0 || index(url, ".jpeg") > 0 || index(url, ".gif") > 0 || index(url, ".svg") > 0 || index(url, ".ico") > 0 || index(url, ".woff") > 0 || index(url, ".woff2") > 0 } +function take(count, array, result, i, taken) { taken = 0; for (i in array) { if (taken < count) { result[++taken] = array[i] } }; return taken } +function ip_is_public(ip) { return !ip_is_local(ip) } +function user_agent_is_mobile(user_agent) { if (!is_string(user_agent)) return 0; return index(user_agent, "Mobile") > 0 || index(user_agent, "iPhone") > 0 || index(user_agent, "Android") > 0 || index(user_agent, "iPad") > 0 } +# Dispatch function for functional programming +function dispatch_call(func_name, arg1, arg2, arg3, arg4, arg5) { + # User-defined functions + if (func_name == "double") return double(arg1) + if (func_name == "add") return add(arg1, arg2) + if (func_name == "is_even") return is_even(arg1) + if (func_name == "is_positive") return is_positive(arg1) + if (func_name == "is_positive_num") return is_positive_num(arg1) + if (func_name == "square") return square(arg1) + if (func_name == "split_words") return split_words(arg1, arg2) + if (func_name == "extract_endpoint") return extract_endpoint(arg1) + if (func_name == "extract_bot_components") return extract_bot_components(arg1, arg2) + # Standard library functions + if (func_name == "is_positive") return is_positive(arg1) + if (func_name == "is_even") return is_even(arg1) + if (func_name == "is_odd") return is_odd(arg1) + if (func_name == "is_number") return is_number(arg1) + if (func_name == "is_string") return is_string(arg1) + print "Error: Function '" func_name "' not found" > "/dev/stderr" + return +} + + +# --- User Functions --- +function extract_method(request) { split(request, parts, " ") + return parts[1] + +} + +function extract_url(request) { split(request, parts, " ") + return parts[2] + +} + +function format_error_report(ip,status,url,user_agent) { return ip " - " status " - " url " (" user_agent ")" + +} + +function format_success_report(ip,method,url,bytes) { return ip " - " method " " url " (" bytes " bytes)" + +} + +function is_success(status) { return status >= 200 && status < 300 + +} + +function is_api_request(url) { return index(url, "/api/") > 0 + +} + +function is_large_request(bytes) { return bytes > 1048576 # 1MB + +} + +function extract_endpoint(url) { return url + +} + +function extract_bot_components(user_agent,result) { split(user_agent, result, " ") + return length(result) + +} + +# --- Main Script --- + # Main processing pipeline + BEGIN { + print "Apache Log Analysis Report" + print "=============================" + print "" + } + + + # Process each log line + { + # Parse Apache log format: IP - - [timestamp] "method url status" bytes "referer" "user-agent" + # Note that we use a series of simpler regex matches, rather than trying to do it all at once + if (match($0, /^([0-9.]+)/)) { + ip = substr($0, RSTART, RLENGTH) + + # Extract request (method url protocol) + if (match($0, /"([^"]+)"/)) { + request = substr($0, RSTART + 1, RLENGTH - 2) + # Extract method and URL from request + method = extract_method(request) + url = extract_url(request) + } + + # Extract status code (number after the request) + if (match($0, /" ([0-9]+) /)) { + status = substr($0, RSTART + 1, RLENGTH - 2) + # Remove leading/trailing spaces + gsub(/^[ \t]+|[ \t]+$/, "", status) + } + + # Extract bytes (number after request) + if (match($0, /" ([0-9]+) /)) { + bytes = substr($0, RSTART + 1, RLENGTH - 2) + } + + # Extract user agent (last quoted field) + if (match($0, /"([^"]*)"$/)) { + user_agent = substr($0, RSTART + 1, RLENGTH - 2) + } + + # Store for analysis + request_count++ + + # Real-time processing using some standard library predicates + if (http_is_server_error(status)) { + server_error_count++ + error_report = format_error_report(ip, status, url, user_agent) + print "SERVER ERROR: " error_report + } else if (http_is_client_error(status)) { + client_error_count++ + error_report = format_error_report(ip, status, url, user_agent) + print "CLIENT ERROR: " error_report + } else if (is_success(status)) { + success_count++ + success_report = format_success_report(ip, method, url, bytes) + print "✓ " success_report + } + + # Track different types of requests + if (is_api_request(url)) { + api_count++ + api_urls[api_count] = url + } + + if (url_is_static_file(url)) { + static_count++ + static_urls[static_count] = url + } + + if (http_is_mutating_method(method)) { + mutation_count++ + if (ip_is_public(ip)) { + print "EXTERNAL MUTATION: " ip " " method " " url + } + } + + # Track user types + if (is_bot(user_agent)) { + bot_count++ + bot_agents[bot_count] = user_agent + } else if (user_agent_is_mobile(user_agent)) { + mobile_count++ + } else if (user_agent_is_desktop(user_agent)) { + desktop_count++ + } + + # Track large requests + if (is_large_request(bytes)) { + large_count++ + large_urls[large_count] = url + } + } + } + + END { + print "" + print "Summary Statistics" + print "====================" + print "Total Requests:", request_count + print "Successful:", success_count + print "Client Errors:", client_error_count + print "Server Errors:", server_error_count + print "Total Errors:", client_error_count + server_error_count + print "Error Rate:", sprintf("%.2f%%", ((client_error_count + server_error_count) / request_count) * 100) + print "API Requests:", api_count + print "Static Files:", static_count + print "Mutating Requests:", mutation_count + print "Mobile Users:", mobile_count + print "Desktop Users:", desktop_count + print "Bot Requests:", bot_count + print "Large Requests (>1MB):", large_count + + # Some functional patterns at play, map, flatMap, and take. + if (api_count > 0) { + print "" + print "API Usage Analysis" + print "====================" + + # Use map to extract API endpoints + endpoint_count = map("extract_endpoint", api_urls, endpoints) + print "API Endpoints found:", endpoint_count + } + + if (bot_count > 0) { + print "" + print "Bot Activity Analysis" + print "========================" + + # Use flatMap to extract bot user agent components + bot_components_count = flatMap("extract_bot_components", bot_agents, bot_components) + print "Bot components analyzed:", bot_components_count + + # Use take to show top 3 bot components + top_components_count = take(3, bot_components, top_components) + print "Top bot components:", top_components_count + } + + print "" + print "End analysis" + } + +# Rawk compilation summary: +# - Rawk Version: 2.0.0 +# - Functions defined: 9 +# - Source lines: 182 +# - Standard library functions included: 11 |