about summary refs log tree commit diff stats
path: root/awk/rawk/scratch/tests_old/real_world/test_log_parser.rawk
blob: 1abdbafb688782e9c5c606a80da0ee69ea10da92 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# Log parsing with rawk
# This demonstrates processing common log formats like Apache, syslog, etc.

# Function to parse Apache log entries
$parse_apache_log = (ip, date, method, url, status, bytes, referer, user_agent) -> {
    if (status >= 400) {
        return "ERROR: " status " - " method " " url " from " ip
    } else if (status >= 300) {
        return "REDIRECT: " status " - " method " " url " from " ip
    } else {
        return "SUCCESS: " status " - " method " " url " (" bytes " bytes)"
    }
};

# Function to parse syslog entries
$parse_syslog = (timestamp, host, program, message) -> {
    if (index(message, "error") > 0 || index(message, "ERROR") > 0) {
        return "ERROR: " program " - " message
    } else if (index(message, "warning") > 0 || index(message, "WARNING") > 0) {
        return "WARNING: " program " - " message
    } else {
        return "INFO: " program " - " message
    }
};

# Function to categorize requests
$categorize_request = (method, url, status) -> {
    if (method == "GET" && index(url, ".jpg") > 0) {
        return "IMAGE_REQUEST"
    } else if (method == "POST") {
        return "FORM_SUBMISSION"
    } else if (method == "GET" && index(url, ".css") > 0) {
        return "STYLESHEET"
    } else if (method == "GET" && index(url, ".js") > 0) {
        return "JAVASCRIPT"
    } else {
        return "PAGE_REQUEST"
    }
};

# Function to calculate request statistics
$calculate_request_stats = (data, result, i, total, count, errors, redirects) -> {
    total = 0
    count = 0
    errors = 0
    redirects = 0
    
    for (i in data) {
        total++
        if (data[i] >= 400) {
            errors++
        } else if (data[i] >= 300) {
            redirects++
        }
    }
    
    result["total"] = total
    result["errors"] = errors
    result["redirects"] = redirects
    result["success_rate"] = total > 0 ? ((total - errors - redirects) / total) * 100 : 0
    
    return total
};

BEGIN {
    print "=== Log Parser Report ==="
    print ""
}

# Process Apache log entries (simplified format)
/^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/ {
    ip = $1
    date = $4 " " $5
    method = $6
    url = $7
    status = $9
    bytes = $10
    
    result = parse_apache_log(ip, date, method, url, status, bytes, "", "")
    print "APACHE: " result
    
    # Store for statistics
    request_count++
    status_codes[request_count] = status
    request_types[request_count] = categorize_request(method, url, status)
}

# Process syslog entries
/^[A-Z][a-z]{2} [0-9]+ [0-9:]+/ {
    timestamp = $1 " " $2 " " $3
    host = $4
    program = substr($5, 1, length($5) - 1)  # Remove trailing colon
    message = substr($0, index($0, $6))
    
    result = parse_syslog(timestamp, host, program, message)
    print "SYSLOG: " result
    
    # Store for statistics
    log_count++
    log_programs[log_count] = program
}

END {
    print ""
    print "=== Request Statistics ==="
    
    if (request_count > 0) {
        calculate_request_stats(status_codes, request_stats)
        print "Total requests: " request_stats["total"]
        print "Error rate: " request_stats["errors"] " (" (request_stats["errors"] / request_stats["total"] * 100) "%)"
        print "Success rate: " request_stats["success_rate"] "%"
        print "Redirects: " request_stats["redirects"]
    }
    
    print ""
    print "=== Request Types ==="
    for (i = 1; i <= request_count; i++) {
        type = request_types[i]
        type_count[type]++
    }
    
    for (type in type_count) {
        print type ": " type_count[type] " requests"
    }
    
    print ""
    print "=== Log Sources ==="
    for (i = 1; i <= log_count; i++) {
        program = log_programs[i]
        program_count[program]++
    }
    
    for (program in program_count) {
        print program ": " program_count[program] " entries"
    }
    
    print ""
    print "=== Report Complete ==="
}