diff options
Diffstat (limited to 'awk/rawk/tests/real_world/test_log_parser.rawk')
-rw-r--r-- | awk/rawk/tests/real_world/test_log_parser.rawk | 139 |
1 files changed, 0 insertions, 139 deletions
diff --git a/awk/rawk/tests/real_world/test_log_parser.rawk b/awk/rawk/tests/real_world/test_log_parser.rawk deleted file mode 100644 index 1abdbaf..0000000 --- a/awk/rawk/tests/real_world/test_log_parser.rawk +++ /dev/null @@ -1,139 +0,0 @@ -# Log parsing with rawk -# This demonstrates processing common log formats like Apache, syslog, etc. - -# Function to parse Apache log entries -$parse_apache_log = (ip, date, method, url, status, bytes, referer, user_agent) -> { - if (status >= 400) { - return "ERROR: " status " - " method " " url " from " ip - } else if (status >= 300) { - return "REDIRECT: " status " - " method " " url " from " ip - } else { - return "SUCCESS: " status " - " method " " url " (" bytes " bytes)" - } -}; - -# Function to parse syslog entries -$parse_syslog = (timestamp, host, program, message) -> { - if (index(message, "error") > 0 || index(message, "ERROR") > 0) { - return "ERROR: " program " - " message - } else if (index(message, "warning") > 0 || index(message, "WARNING") > 0) { - return "WARNING: " program " - " message - } else { - return "INFO: " program " - " message - } -}; - -# Function to categorize requests -$categorize_request = (method, url, status) -> { - if (method == "GET" && index(url, ".jpg") > 0) { - return "IMAGE_REQUEST" - } else if (method == "POST") { - return "FORM_SUBMISSION" - } else if (method == "GET" && index(url, ".css") > 0) { - return "STYLESHEET" - } else if (method == "GET" && index(url, ".js") > 0) { - return "JAVASCRIPT" - } else { - return "PAGE_REQUEST" - } -}; - -# Function to calculate request statistics -$calculate_request_stats = (data, result, i, total, count, errors, redirects) -> { - total = 0 - count = 0 - errors = 0 - redirects = 0 - - for (i in data) { - total++ - if (data[i] >= 400) { - errors++ - } else if (data[i] >= 300) { - redirects++ - } - } - - result["total"] = total - result["errors"] = errors - result["redirects"] = redirects - result["success_rate"] = total > 0 ? ((total - errors - redirects) / total) * 100 : 0 - - return total -}; - -BEGIN { - print "=== Log Parser Report ===" - print "" -} - -# Process Apache log entries (simplified format) -/^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/ { - ip = $1 - date = $4 " " $5 - method = $6 - url = $7 - status = $9 - bytes = $10 - - result = parse_apache_log(ip, date, method, url, status, bytes, "", "") - print "APACHE: " result - - # Store for statistics - request_count++ - status_codes[request_count] = status - request_types[request_count] = categorize_request(method, url, status) -} - -# Process syslog entries -/^[A-Z][a-z]{2} [0-9]+ [0-9:]+/ { - timestamp = $1 " " $2 " " $3 - host = $4 - program = substr($5, 1, length($5) - 1) # Remove trailing colon - message = substr($0, index($0, $6)) - - result = parse_syslog(timestamp, host, program, message) - print "SYSLOG: " result - - # Store for statistics - log_count++ - log_programs[log_count] = program -} - -END { - print "" - print "=== Request Statistics ===" - - if (request_count > 0) { - calculate_request_stats(status_codes, request_stats) - print "Total requests: " request_stats["total"] - print "Error rate: " request_stats["errors"] " (" (request_stats["errors"] / request_stats["total"] * 100) "%)" - print "Success rate: " request_stats["success_rate"] "%" - print "Redirects: " request_stats["redirects"] - } - - print "" - print "=== Request Types ===" - for (i = 1; i <= request_count; i++) { - type = request_types[i] - type_count[type]++ - } - - for (type in type_count) { - print type ": " type_count[type] " requests" - } - - print "" - print "=== Log Sources ===" - for (i = 1; i <= log_count; i++) { - program = log_programs[i] - program_count[program]++ - } - - for (program in program_count) { - print program ": " program_count[program] " entries" - } - - print "" - print "=== Report Complete ===" -} \ No newline at end of file |