# Log parsing with rawk # This demonstrates processing common log formats like Apache, syslog, etc. # Function to parse Apache log entries $parse_apache_log = (ip, date, method, url, status, bytes, referer, user_agent) -> { if (status >= 400) { return "ERROR: " status " - " method " " url " from " ip } else if (status >= 300) { return "REDIRECT: " status " - " method " " url " from " ip } else { return "SUCCESS: " status " - " method " " url " (" bytes " bytes)" } }; # Function to parse syslog entries $parse_syslog = (timestamp, host, program, message) -> { if (index(message, "error") > 0 || index(message, "ERROR") > 0) { return "ERROR: " program " - " message } else if (index(message, "warning") > 0 || index(message, "WARNING") > 0) { return "WARNING: " program " - " message } else { return "INFO: " program " - " message } }; # Function to categorize requests $categorize_request = (method, url, status) -> { if (method == "GET" && index(url, ".jpg") > 0) { return "IMAGE_REQUEST" } else if (method == "POST") { return "FORM_SUBMISSION" } else if (method == "GET" && index(url, ".css") > 0) { return "STYLESHEET" } else if (method == "GET" && index(url, ".js") > 0) { return "JAVASCRIPT" } else { return "PAGE_REQUEST" } }; # Function to calculate request statistics $calculate_request_stats = (data, result, i, total, count, errors, redirects) -> { total = 0 count = 0 errors = 0 redirects = 0 for (i in data) { total++ if (data[i] >= 400) { errors++ } else if (data[i] >= 300) { redirects++ } } result["total"] = total result["errors"] = errors result["redirects"] = redirects result["success_rate"] = total > 0 ? ((total - errors - redirects) / total) * 100 : 0 return total }; BEGIN { print "=== Log Parser Report ===" print "" } # Process Apache log entries (simplified format) /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/ { ip = $1 date = $4 " " $5 method = $6 url = $7 status = $9 bytes = $10 result = parse_apache_log(ip, date, method, url, status, bytes, "", "") print "APACHE: " result # Store for statistics request_count++ status_codes[request_count] = status request_types[request_count] = categorize_request(method, url, status) } # Process syslog entries /^[A-Z][a-z]{2} [0-9]+ [0-9:]+/ { timestamp = $1 " " $2 " " $3 host = $4 program = substr($5, 1, length($5) - 1) # Remove trailing colon message = substr($0, index($0, $6)) result = parse_syslog(timestamp, host, program, message) print "SYSLOG: " result # Store for statistics log_count++ log_programs[log_count] = program } END { print "" print "=== Request Statistics ===" if (request_count > 0) { calculate_request_stats(status_codes, request_stats) print "Total requests: " request_stats["total"] print "Error rate: " request_stats["errors"] " (" (request_stats["errors"] / request_stats["total"] * 100) "%)" print "Success rate: " request_stats["success_rate"] "%" print "Redirects: " request_stats["redirects"] } print "" print "=== Request Types ===" for (i = 1; i <= request_count; i++) { type = request_types[i] type_count[type]++ } for (type in type_count) { print type ": " type_count[type] " requests" } print "" print "=== Log Sources ===" for (i = 1; i <= log_count; i++) { program = log_programs[i] program_count[program]++ } for (program in program_count) { print program ": " program_count[program] " entries" } print "" print "=== Report Complete ===" }