# Main processing pipeline BEGIN { print "Apache Log Analysis Report" print "=============================" print "" } RAWK { # Helper functions for parsing and analysis $extract_method = (request) -> { split(request, parts, " ") return parts[1] }; $extract_url = (request) -> { split(request, parts, " ") return parts[2] }; $format_error_report = (ip, status, url, user_agent) -> { return ip " - " status " - " url " (" user_agent ")" }; $format_success_report = (ip, method, url, bytes) -> { return ip " - " method " " url " (" bytes " bytes)" }; $is_success = (status) -> { return status >= 200 && status < 300 }; $is_api_request = (url) -> { return index(url, "/api/") > 0 }; $is_large_request = (bytes) -> { return bytes > 1048576 # 1MB }; # Functional programming examples $extract_endpoint = (url) -> { return url }; $extract_bot_components = (user_agent, result) -> { split(user_agent, result, " ") return length(result) }; } # Process each log line { # Parse Apache log format: IP - - [timestamp] "method url status" bytes "referer" "user-agent" # Note that we use a series of simpler regex matches, rather than trying to do it all at once if (match($0, /^([0-9.]+)/)) { ip = substr($0, RSTART, RLENGTH) # Extract request (method url protocol) if (match($0, /"([^"]+)"/)) { request = substr($0, RSTART + 1, RLENGTH - 2) # Extract method and URL from request method = extract_method(request) url = extract_url(request) } # Extract status code (number after the request) if (match($0, /" ([0-9]+) /)) { status = substr($0, RSTART + 1, RLENGTH - 2) # Remove leading/trailing spaces gsub(/^[ \t]+|[ \t]+$/, "", status) } # Extract bytes (number after request) if (match($0, /" ([0-9]+) /)) { bytes = substr($0, RSTART + 1, RLENGTH - 2) } # Extract user agent (last quoted field) if (match($0, /"([^"]*)"$/)) { user_agent = substr($0, RSTART + 1, RLENGTH - 2) } # Store for analysis request_count++ # Real-time processing using some standard library predicates if (http_is_server_error(status)) { server_error_count++ error_report = format_error_report(ip, status, url, user_agent) print "SERVER ERROR: " error_report } else if (http_is_client_error(status)) { client_error_count++ error_report = format_error_report(ip, status, url, user_agent) print "CLIENT ERROR: " error_report } else if (is_success(status)) { success_count++ success_report = format_success_report(ip, method, url, bytes) print "✓ " success_report } # Track different types of requests if (is_api_request(url)) { api_count++ api_urls[api_count] = url } if (url_is_static_file(url)) { static_count++ static_urls[static_count] = url } if (http_is_mutating_method(method)) { mutation_count++ if (ip_is_public(ip)) { print "EXTERNAL MUTATION: " ip " " method " " url } } # Track user types if (is_bot(user_agent)) { bot_count++ bot_agents[bot_count] = user_agent } else if (user_agent_is_mobile(user_agent)) { mobile_count++ } else if (user_agent_is_desktop(user_agent)) { desktop_count++ } # Track large requests if (is_large_request(bytes)) { large_count++ large_urls[large_count] = url } } } END { print "" print "Summary Statistics" print "====================" print "Total Requests:", request_count print "Successful:", success_count print "Client Errors:", client_error_count print "Server Errors:", server_error_count print "Total Errors:", client_error_count + server_error_count print "Error Rate:", sprintf("%.2f%%", ((client_error_count + server_error_count) / request_count) * 100) print "API Requests:", api_count print "Static Files:", static_count print "Mutating Requests:", mutation_count print "Mobile Users:", mobile_count print "Desktop Users:", desktop_count print "Bot Requests:", bot_count print "Large Requests (>1MB):", large_count # Some functional patterns at play, map, flatMap, and take. if (api_count > 0) { print "" print "API Usage Analysis" print "====================" # Use map to extract API endpoints endpoint_count = map("extract_endpoint", api_urls, endpoints) print "API Endpoints found:", endpoint_count } if (bot_count > 0) { print "" print "Bot Activity Analysis" print "========================" # Use flatMap to extract bot user agent components bot_components_count = flatMap("extract_bot_components", bot_agents, bot_components) print "Bot components analyzed:", bot_components_count # Use take to show top 3 bot components top_components_count = take(3, bot_components, top_components) print "Top bot components:", top_components_count } print "" print "End analysis" }