1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
|
# Generated by rawk v2.0.0
# Source: example.rawk
# --- Standard Library ---
function is_number(value) { return value == value + 0 }
function is_string(value) { return !(value == value + 0) }
function get_keys(array, result, i, count) { count = 0; for (i in array) { result[++count] = i }; return count }
function ip_is_local(ip) { if (!is_string(ip)) return 0; return index(ip, "127.0.0.1") > 0 || index(ip, "192.168.") > 0 || index(ip, "10.") > 0 || index(ip, "172.") > 0 }
function is_bot(user_agent) { if (!is_string(user_agent)) return 0; return index(user_agent, "bot") > 0 || index(user_agent, "crawler") > 0 || index(user_agent, "spider") > 0 || index(user_agent, "Googlebot") > 0 || index(user_agent, "Bingbot") > 0 }
function flatMap(func_name, array, result, i, temp_array, temp_count, j) { count = 0; for (i in array) { temp_count = dispatch_call(func_name, array[i], temp_array); for (j = 1; j <= temp_count; j++) { result[++count] = temp_array[j] } }; return count }
function user_agent_is_desktop(user_agent) { if (!is_string(user_agent)) return 0; return (index(user_agent, "Windows") > 0 || index(user_agent, "Macintosh") > 0 || (index(user_agent, "Linux") > 0 && index(user_agent, "Android") == 0)) }
function map(func_name, array, result, i, count) { count = 0; for (i in array) { result[++count] = dispatch_call(func_name, array[i]) }; return count }
function http_is_server_error(status) { return status >= 500 && status < 600 }
function http_is_client_error(status) { return status >= 400 && status < 500 }
function http_is_mutating_method(method) { return method == "POST" || method == "PUT" || method == "DELETE" || method == "PATCH" }
function url_is_static_file(url) { if (!is_string(url)) return 0; return index(url, ".css") > 0 || index(url, ".js") > 0 || index(url, ".png") > 0 || index(url, ".jpg") > 0 || index(url, ".jpeg") > 0 || index(url, ".gif") > 0 || index(url, ".svg") > 0 || index(url, ".ico") > 0 || index(url, ".woff") > 0 || index(url, ".woff2") > 0 }
function take(count, array, result, i, taken) { taken = 0; for (i in array) { if (taken < count) { result[++taken] = array[i] } }; return taken }
function ip_is_public(ip) { return !ip_is_local(ip) }
function user_agent_is_mobile(user_agent) { if (!is_string(user_agent)) return 0; return index(user_agent, "Mobile") > 0 || index(user_agent, "iPhone") > 0 || index(user_agent, "Android") > 0 || index(user_agent, "iPad") > 0 }
# Dispatch function for functional programming
function dispatch_call(func_name, arg1, arg2, arg3, arg4, arg5) {
# User-defined functions
if (func_name == "double") return double(arg1)
if (func_name == "add") return add(arg1, arg2)
if (func_name == "is_even") return is_even(arg1)
if (func_name == "is_positive") return is_positive(arg1)
if (func_name == "is_positive_num") return is_positive_num(arg1)
if (func_name == "square") return square(arg1)
if (func_name == "split_words") return split_words(arg1, arg2)
if (func_name == "extract_endpoint") return extract_endpoint(arg1)
if (func_name == "extract_bot_components") return extract_bot_components(arg1, arg2)
# Standard library functions
if (func_name == "is_positive") return is_positive(arg1)
if (func_name == "is_even") return is_even(arg1)
if (func_name == "is_odd") return is_odd(arg1)
if (func_name == "is_number") return is_number(arg1)
if (func_name == "is_string") return is_string(arg1)
print "Error: Function '" func_name "' not found" > "/dev/stderr"
return
}
# --- User Functions ---
function extract_method(request) { split(request, parts, " ")
return parts[1]
}
function extract_url(request) { split(request, parts, " ")
return parts[2]
}
function format_error_report(ip,status,url,user_agent) { return ip " - " status " - " url " (" user_agent ")"
}
function format_success_report(ip,method,url,bytes) { return ip " - " method " " url " (" bytes " bytes)"
}
function is_success(status) { return status >= 200 && status < 300
}
function is_api_request(url) { return index(url, "/api/") > 0
}
function is_large_request(bytes) { return bytes > 1048576 # 1MB
}
function extract_endpoint(url) { return url
}
function extract_bot_components(user_agent,result) { split(user_agent, result, " ")
return length(result)
}
# --- Main Script ---
# Main processing pipeline
BEGIN {
print "Apache Log Analysis Report"
print "============================="
print ""
}
# Process each log line
{
# Parse Apache log format: IP - - [timestamp] "method url status" bytes "referer" "user-agent"
# Note that we use a series of simpler regex matches, rather than trying to do it all at once
if (match($0, /^([0-9.]+)/)) {
ip = substr($0, RSTART, RLENGTH)
# Extract request (method url protocol)
if (match($0, /"([^"]+)"/)) {
request = substr($0, RSTART + 1, RLENGTH - 2)
# Extract method and URL from request
method = extract_method(request)
url = extract_url(request)
}
# Extract status code (number after the request)
if (match($0, /" ([0-9]+) /)) {
status = substr($0, RSTART + 1, RLENGTH - 2)
# Remove leading/trailing spaces
gsub(/^[ \t]+|[ \t]+$/, "", status)
}
# Extract bytes (number after request)
if (match($0, /" ([0-9]+) /)) {
bytes = substr($0, RSTART + 1, RLENGTH - 2)
}
# Extract user agent (last quoted field)
if (match($0, /"([^"]*)"$/)) {
user_agent = substr($0, RSTART + 1, RLENGTH - 2)
}
# Store for analysis
request_count++
# Real-time processing using some standard library predicates
if (http_is_server_error(status)) {
server_error_count++
error_report = format_error_report(ip, status, url, user_agent)
print "SERVER ERROR: " error_report
} else if (http_is_client_error(status)) {
client_error_count++
error_report = format_error_report(ip, status, url, user_agent)
print "CLIENT ERROR: " error_report
} else if (is_success(status)) {
success_count++
success_report = format_success_report(ip, method, url, bytes)
print "✓ " success_report
}
# Track different types of requests
if (is_api_request(url)) {
api_count++
api_urls[api_count] = url
}
if (url_is_static_file(url)) {
static_count++
static_urls[static_count] = url
}
if (http_is_mutating_method(method)) {
mutation_count++
if (ip_is_public(ip)) {
print "EXTERNAL MUTATION: " ip " " method " " url
}
}
# Track user types
if (is_bot(user_agent)) {
bot_count++
bot_agents[bot_count] = user_agent
} else if (user_agent_is_mobile(user_agent)) {
mobile_count++
} else if (user_agent_is_desktop(user_agent)) {
desktop_count++
}
# Track large requests
if (is_large_request(bytes)) {
large_count++
large_urls[large_count] = url
}
}
}
END {
print ""
print "Summary Statistics"
print "===================="
print "Total Requests:", request_count
print "Successful:", success_count
print "Client Errors:", client_error_count
print "Server Errors:", server_error_count
print "Total Errors:", client_error_count + server_error_count
print "Error Rate:", sprintf("%.2f%%", ((client_error_count + server_error_count) / request_count) * 100)
print "API Requests:", api_count
print "Static Files:", static_count
print "Mutating Requests:", mutation_count
print "Mobile Users:", mobile_count
print "Desktop Users:", desktop_count
print "Bot Requests:", bot_count
print "Large Requests (>1MB):", large_count
# Some functional patterns at play, map, flatMap, and take.
if (api_count > 0) {
print ""
print "API Usage Analysis"
print "===================="
# Use map to extract API endpoints
endpoint_count = map("extract_endpoint", api_urls, endpoints)
print "API Endpoints found:", endpoint_count
}
if (bot_count > 0) {
print ""
print "Bot Activity Analysis"
print "========================"
# Use flatMap to extract bot user agent components
bot_components_count = flatMap("extract_bot_components", bot_agents, bot_components)
print "Bot components analyzed:", bot_components_count
# Use take to show top 3 bot components
top_components_count = take(3, bot_components, top_components)
print "Top bot components:", top_components_count
}
print ""
print "End analysis"
}
# Rawk compilation summary:
# - Rawk Version: 2.0.0
# - Functions defined: 9
# - Source lines: 182
# - Standard library functions included: 11
|