diff options
Diffstat (limited to 'awk/rawk/tests/real_world')
-rw-r--r-- | awk/rawk/tests/real_world/README.md | 130 | ||||
-rw-r--r-- | awk/rawk/tests/real_world/test_csv_processor.rawk | 143 | ||||
-rw-r--r-- | awk/rawk/tests/real_world/test_data_processing.rawk | 75 | ||||
-rw-r--r-- | awk/rawk/tests/real_world/test_log_parser.rawk | 139 | ||||
-rw-r--r-- | awk/rawk/tests/real_world/test_mixed.rawk | 27 | ||||
-rw-r--r-- | awk/rawk/tests/real_world/test_system_monitor.rawk | 157 |
6 files changed, 0 insertions, 671 deletions
diff --git a/awk/rawk/tests/real_world/README.md b/awk/rawk/tests/real_world/README.md deleted file mode 100644 index c4ba349..0000000 --- a/awk/rawk/tests/real_world/README.md +++ /dev/null @@ -1,130 +0,0 @@ -# Real-World Examples - -This directory contains practical examples that demonstrate rawk's utility for common data processing tasks. - -## Test Files - -### `test_system_monitor.rawk` - System Monitoring -Processes output from common system commands: -- **df**: Disk usage monitoring with warnings -- **ps**: Process resource analysis -- **ls -l**: File categorization and statistics - -**Features:** -- Disk usage alerts (WARNING/CRITICAL thresholds) -- Process resource monitoring (CPU/MEM usage) -- File type categorization (DIRECTORY/EXECUTABLE/LARGE/SMALL) -- Statistical summaries - -**Run with:** -```bash -awk -f ../../rawk.awk test_system_monitor.rawk | awk -f - ../data/test_data.txt -``` - -**Sample Output:** -``` -DISK: WARNING: /dev/sdb1 (/home) is 90% full -PROCESS: HIGH CPU: stress (PID: 3456, 25.7% CPU) -FILE: EXECUTABLE: executable.sh (2048 bytes) -``` - -### `test_log_parser.rawk` - Log Parsing -Processes common log formats: -- **Apache logs**: Web server access logs -- **Syslog**: System log entries - -**Features:** -- HTTP status code categorization (SUCCESS/ERROR/REDIRECT) -- Log level detection (INFO/WARNING/ERROR) -- Request type classification -- Error rate calculation - -**Run with:** -```bash -awk -f ../../rawk.awk test_log_parser.rawk | awk -f - ../data/test_logs.txt -``` - -**Sample Output:** -``` -APACHE: ERROR: 404 - GET /nonexistent.html from 192.168.1.104 -SYSLOG: ERROR: kernel - ERROR: Out of memory -``` - -### `test_csv_processor.rawk` - CSV Data Processing -Processes CSV files with validation: -- **Email validation**: Basic email format checking -- **Age categorization**: Group employees by age -- **Salary statistics**: Calculate averages and ranges -- **Department analysis**: Employee distribution - -**Features:** -- Data validation and categorization -- Statistical analysis -- Report generation -- Error detection - -**Run with:** -```bash -awk -f ../../rawk.awk test_csv_processor.rawk | awk -f - ../data/test_employees.csv -``` - -**Sample Output:** -``` -EMPLOYEE: John Smith (ADULT, Engineering) - VALID email, $65000 -Average salary: $73916.7 -Email validity rate: 100% -``` - -### `test_data_processing.rawk` - General Data Processing -General data processing scenarios: -- Array filtering and manipulation -- Data aggregation -- Formatting and reporting - -**Run with:** -```bash -awk -f ../../rawk.awk test_data_processing.rawk | awk -f - -``` - -### `test_mixed.rawk` - Mixed awk/rawk Code -Demonstrates mixing rawk functions with regular awk code: -- Line-by-line processing -- Integration with awk patterns -- Combined functionality - -**Run with:** -```bash -awk -f ../../rawk.awk test_mixed.rawk | awk -f - ../data/test_input.txt -``` - -## Use Cases - -These examples demonstrate rawk's practical applications: - -### System Administration -- Monitor disk usage and alert on thresholds -- Track process resource consumption -- Analyze file system contents - -### Web Server Management -- Parse and analyze web server logs -- Monitor error rates and traffic patterns -- Identify problematic requests - -### Data Analysis -- Process CSV files with validation -- Generate business intelligence reports -- Analyze employee or customer data - -### Log Analysis -- Parse various log formats -- Identify system issues -- Generate operational reports - -## Data Files - -The examples use sample data files in the `../data/` directory: -- `test_data.txt`: Simulated system command outputs -- `test_logs.txt`: Sample Apache and syslog entries -- `test_employees.csv`: Sample employee data -- `test_input.txt`: Simple input data for mixed tests \ No newline at end of file diff --git a/awk/rawk/tests/real_world/test_csv_processor.rawk b/awk/rawk/tests/real_world/test_csv_processor.rawk deleted file mode 100644 index 5aa14b5..0000000 --- a/awk/rawk/tests/real_world/test_csv_processor.rawk +++ /dev/null @@ -1,143 +0,0 @@ -# CSV data processing with rawk -# This demonstrates processing CSV files with headers - -# Function to validate email format -$is_valid_email = (email) -> { - # Simple email validation: contains @ and . after @ - at_pos = index(email, "@") - if (at_pos == 0) return 0 - - # Check if there's a dot after the @ symbol - dot_pos = index(substr(email, at_pos + 1), ".") - return dot_pos > 0 -}; - -# Function to categorize age groups -$categorize_age = (age) -> { - if (age < 18) { - return "MINOR" - } else if (age < 30) { - return "YOUNG_ADULT" - } else if (age < 50) { - return "ADULT" - } else if (age < 65) { - return "MIDDLE_AGED" - } else { - return "SENIOR" - } -}; - -# Function to calculate salary statistics -$calculate_salary_stats = (data, result, i, total, count, max, min) -> { - total = 0 - count = 0 - max = 0 - min = 0 - first = 1 - - for (i in data) { - total += data[i] - count++ - if (first || data[i] > max) { - max = data[i] - } - if (first || data[i] < min) { - min = data[i] - } - first = 0 - } - - result["total"] = total - result["count"] = count - result["average"] = count > 0 ? total / count : 0 - result["max"] = max - result["min"] = min - - return count -}; - -# Function to format employee record -$format_employee = (name, email, age, salary, department) -> { - age_group = categorize_age(age) - email_status = is_valid_email(email) ? "VALID" : "INVALID" - - return name " (" age_group ", " department ") - " email_status " email, $" salary -}; - -BEGIN { - FS = "," # Set field separator to comma - print "=== CSV Data Processor ===" - print "" - header_processed = 0 -} - -# Skip header line -NR == 1 { - print "Processing CSV with columns: " $0 - print "" - next -} - -# Process data rows -{ - if (NF >= 5) { - name = $1 - email = $2 - age = $3 - salary = $4 - department = $5 - - result = format_employee(name, email, age, salary, department) - print "EMPLOYEE: " result - - # Store for statistics - employee_count++ - ages[employee_count] = age - salaries[employee_count] = salary - departments[employee_count] = department - age_groups[employee_count] = categorize_age(age) - - # Track department counts - dept_count[department]++ - - # Track age group counts - age_group_count[categorize_age(age)]++ - - # Track email validity - if (is_valid_email(email)) { - valid_emails++ - } else { - invalid_emails++ - } - } -} - -END { - print "" - print "=== Employee Statistics ===" - - if (employee_count > 0) { - calculate_salary_stats(salaries, salary_stats) - print "Total employees: " employee_count - print "Average salary: $" salary_stats["average"] - print "Salary range: $" salary_stats["min"] " - $" salary_stats["max"] - print "Valid emails: " valid_emails - print "Invalid emails: " invalid_emails - print "Email validity rate: " (valid_emails / employee_count * 100) "%" - } - - print "" - print "=== Department Distribution ===" - for (dept in dept_count) { - print dept ": " dept_count[dept] " employees" - } - - print "" - print "=== Age Group Distribution ===" - for (group in age_group_count) { - print group ": " age_group_count[group] " employees" - } - - print "" - print "=== Report Complete ===" -} \ No newline at end of file diff --git a/awk/rawk/tests/real_world/test_data_processing.rawk b/awk/rawk/tests/real_world/test_data_processing.rawk deleted file mode 100644 index dba1a0b..0000000 --- a/awk/rawk/tests/real_world/test_data_processing.rawk +++ /dev/null @@ -1,75 +0,0 @@ -# Test data processing scenarios -$filter_positive = (arr, result, i, count) -> { - count = 0 - for (i in arr) { - if (arr[i] > 0) { - result[++count] = arr[i] - } - } - return result -}; - -$sum_array = (arr, sum, i) -> { - sum = 0 - for (i in arr) { - sum += arr[i] - } - return sum -}; - -$average_array = (arr, sum, count, i) -> { - sum = 0 - count = 0 - for (i in arr) { - sum += arr[i] - count++ - } - return count > 0 ? sum / count : 0 -}; - -$find_max = (arr, max, i, first) -> { - first = 1 - for (i in arr) { - if (first || arr[i] > max) { - max = arr[i] - first = 0 - } - } - return max -}; - -$format_data = (name, age, city) -> { - return "Name: " name ", Age: " age ", City: " city -}; - -# Test data processing -BEGIN { - print "=== Testing Data Processing ===" - - # Test array operations - data[1] = 10 - data[2] = -5 - data[3] = 20 - data[4] = -3 - data[5] = 15 - - print "Original data:", data[1], data[2], data[3], data[4], data[5] - - # Test filtering - positive_nums = filter_positive(data) - print "Positive numbers:", positive_nums[1], positive_nums[2], positive_nums[3] - - # Test sum and average - total = sum_array(data) - avg = average_array(data) - print "Sum:", total - print "Average:", avg - - # Test finding maximum - max_val = find_max(data) - print "Maximum:", max_val - - # Test data formatting - formatted = format_data("Alice", 30, "New York") - print "Formatted:", formatted -} \ No newline at end of file diff --git a/awk/rawk/tests/real_world/test_log_parser.rawk b/awk/rawk/tests/real_world/test_log_parser.rawk deleted file mode 100644 index 1abdbaf..0000000 --- a/awk/rawk/tests/real_world/test_log_parser.rawk +++ /dev/null @@ -1,139 +0,0 @@ -# Log parsing with rawk -# This demonstrates processing common log formats like Apache, syslog, etc. - -# Function to parse Apache log entries -$parse_apache_log = (ip, date, method, url, status, bytes, referer, user_agent) -> { - if (status >= 400) { - return "ERROR: " status " - " method " " url " from " ip - } else if (status >= 300) { - return "REDIRECT: " status " - " method " " url " from " ip - } else { - return "SUCCESS: " status " - " method " " url " (" bytes " bytes)" - } -}; - -# Function to parse syslog entries -$parse_syslog = (timestamp, host, program, message) -> { - if (index(message, "error") > 0 || index(message, "ERROR") > 0) { - return "ERROR: " program " - " message - } else if (index(message, "warning") > 0 || index(message, "WARNING") > 0) { - return "WARNING: " program " - " message - } else { - return "INFO: " program " - " message - } -}; - -# Function to categorize requests -$categorize_request = (method, url, status) -> { - if (method == "GET" && index(url, ".jpg") > 0) { - return "IMAGE_REQUEST" - } else if (method == "POST") { - return "FORM_SUBMISSION" - } else if (method == "GET" && index(url, ".css") > 0) { - return "STYLESHEET" - } else if (method == "GET" && index(url, ".js") > 0) { - return "JAVASCRIPT" - } else { - return "PAGE_REQUEST" - } -}; - -# Function to calculate request statistics -$calculate_request_stats = (data, result, i, total, count, errors, redirects) -> { - total = 0 - count = 0 - errors = 0 - redirects = 0 - - for (i in data) { - total++ - if (data[i] >= 400) { - errors++ - } else if (data[i] >= 300) { - redirects++ - } - } - - result["total"] = total - result["errors"] = errors - result["redirects"] = redirects - result["success_rate"] = total > 0 ? ((total - errors - redirects) / total) * 100 : 0 - - return total -}; - -BEGIN { - print "=== Log Parser Report ===" - print "" -} - -# Process Apache log entries (simplified format) -/^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/ { - ip = $1 - date = $4 " " $5 - method = $6 - url = $7 - status = $9 - bytes = $10 - - result = parse_apache_log(ip, date, method, url, status, bytes, "", "") - print "APACHE: " result - - # Store for statistics - request_count++ - status_codes[request_count] = status - request_types[request_count] = categorize_request(method, url, status) -} - -# Process syslog entries -/^[A-Z][a-z]{2} [0-9]+ [0-9:]+/ { - timestamp = $1 " " $2 " " $3 - host = $4 - program = substr($5, 1, length($5) - 1) # Remove trailing colon - message = substr($0, index($0, $6)) - - result = parse_syslog(timestamp, host, program, message) - print "SYSLOG: " result - - # Store for statistics - log_count++ - log_programs[log_count] = program -} - -END { - print "" - print "=== Request Statistics ===" - - if (request_count > 0) { - calculate_request_stats(status_codes, request_stats) - print "Total requests: " request_stats["total"] - print "Error rate: " request_stats["errors"] " (" (request_stats["errors"] / request_stats["total"] * 100) "%)" - print "Success rate: " request_stats["success_rate"] "%" - print "Redirects: " request_stats["redirects"] - } - - print "" - print "=== Request Types ===" - for (i = 1; i <= request_count; i++) { - type = request_types[i] - type_count[type]++ - } - - for (type in type_count) { - print type ": " type_count[type] " requests" - } - - print "" - print "=== Log Sources ===" - for (i = 1; i <= log_count; i++) { - program = log_programs[i] - program_count[program]++ - } - - for (program in program_count) { - print program ": " program_count[program] " entries" - } - - print "" - print "=== Report Complete ===" -} \ No newline at end of file diff --git a/awk/rawk/tests/real_world/test_mixed.rawk b/awk/rawk/tests/real_world/test_mixed.rawk deleted file mode 100644 index 50cb6bb..0000000 --- a/awk/rawk/tests/real_world/test_mixed.rawk +++ /dev/null @@ -1,27 +0,0 @@ -# Mixed rawk and awk code -$increment = (x) -> x + 1; -$format_line = (line_num, text) -> "Line " line_num ": " text; - -# Regular awk code mixed in -BEGIN { - print "=== Mixed rawk and awk test ===" -} - -# Process each input line -{ - # Use rawk functions - incremented_line = increment(NR) - formatted = format_line(NR, $0) - - # Regular awk processing - if (length($0) > 10) { - print formatted " (long line)" - } else { - print formatted " (short line)" - } -} - -END { - print "=== End of processing ===" - print "Total lines processed:", NR -} \ No newline at end of file diff --git a/awk/rawk/tests/real_world/test_system_monitor.rawk b/awk/rawk/tests/real_world/test_system_monitor.rawk deleted file mode 100644 index 1e1ef1a..0000000 --- a/awk/rawk/tests/real_world/test_system_monitor.rawk +++ /dev/null @@ -1,157 +0,0 @@ -# System monitoring with rawk -# This demonstrates processing real command outputs like df, ps, ls - -# Function to analyze disk usage -$analyze_disk = (filesystem, size, used, avail, percent, mount) -> { - if (percent > 90) { - return "CRITICAL: " filesystem " (" mount ") is " percent "% full!" - } else if (percent > 80) { - return "WARNING: " filesystem " (" mount ") is " percent "% full" - } else if (percent > 60) { - return "NOTICE: " filesystem " (" mount ") is " percent "% full" - } else { - return "OK: " filesystem " (" mount ") has " avail " blocks free" - } -}; - -# Function to analyze process resource usage -$analyze_process = (pid, user, cpu, mem, command) -> { - if (cpu > 20) { - return "HIGH CPU: " command " (PID: " pid ", " cpu "% CPU)" - } else if (mem > 10) { - return "HIGH MEM: " command " (PID: " pid ", " mem "% MEM)" - } else { - return "NORMAL: " command " (PID: " pid ")" - } -}; - -# Function to categorize files -$categorize_file = (permissions, size, name) -> { - if (substr(permissions, 1, 1) == "d") { - return "DIRECTORY: " name " (" size " bytes)" - } else if (substr(permissions, 4, 1) == "x") { - return "EXECUTABLE: " name " (" size " bytes)" - } else if (size > 1000) { - return "LARGE FILE: " name " (" size " bytes)" - } else { - return "SMALL FILE: " name " (" size " bytes)" - } -}; - -# Function to calculate statistics -$calculate_stats = (data, result, i, total, count, max, min) -> { - total = 0 - count = 0 - max = 0 - min = 0 - first = 1 - - for (i in data) { - total += data[i] - count++ - if (first || data[i] > max) { - max = data[i] - } - if (first || data[i] < min) { - min = data[i] - } - first = 0 - } - - result["total"] = total - result["count"] = count - result["average"] = count > 0 ? total / count : 0 - result["max"] = max - result["min"] = min - - return count -}; - -BEGIN { - print "=== System Monitor Report ===" - print "" -} - -# Process df output (disk usage) -/^\/dev\// { - filesystem = $1 - size = $2 - used = $3 - avail = $4 - percent = $5 - mount = $6 - - result = analyze_disk(filesystem, size, used, avail, percent, mount) - print "DISK: " result - - # Store for statistics - disk_count++ - disk_usage[disk_count] = percent -} - -# Process ps output (process information) -/^[0-9]+\t/ { - pid = $1 - user = $2 - cpu = $3 - mem = $4 - command = $11 - - result = analyze_process(pid, user, cpu, mem, command) - print "PROCESS: " result - - # Store for statistics - process_count++ - cpu_usage[process_count] = cpu - mem_usage[process_count] = mem -} - -# Process ls output (file information) -/^[d-][rwx-]{9}\t/ { - permissions = $1 - size = $5 - name = $9 - - result = categorize_file(permissions, size, name) - print "FILE: " result - - # Store for statistics - file_count++ - file_sizes[file_count] = size -} - -END { - print "" - print "=== Summary Statistics ===" - - # Disk usage statistics - if (disk_count > 0) { - calculate_stats(disk_usage, disk_stats) - print "Disk Usage:" - print " Average: " disk_stats["average"] "%" - print " Maximum: " disk_stats["max"] "%" - print " Minimum: " disk_stats["min"] "%" - } - - # CPU usage statistics - if (process_count > 0) { - calculate_stats(cpu_usage, cpu_stats) - print "CPU Usage:" - print " Average: " cpu_stats["average"] "%" - print " Maximum: " cpu_stats["max"] "%" - print " Total processes: " process_count - } - - # File size statistics - if (file_count > 0) { - calculate_stats(file_sizes, file_stats) - print "File Sizes:" - print " Total size: " file_stats["total"] " bytes" - print " Average size: " file_stats["average"] " bytes" - print " Largest file: " file_stats["max"] " bytes" - print " Total files: " file_count - } - - print "" - print "=== Report Complete ===" -} \ No newline at end of file |