about summary refs log tree commit diff stats
path: root/awk/rawk/tests/real_world
diff options
context:
space:
mode:
Diffstat (limited to 'awk/rawk/tests/real_world')
-rw-r--r--awk/rawk/tests/real_world/README.md130
-rw-r--r--awk/rawk/tests/real_world/test_csv_processor.rawk143
-rw-r--r--awk/rawk/tests/real_world/test_data_processing.rawk75
-rw-r--r--awk/rawk/tests/real_world/test_log_parser.rawk139
-rw-r--r--awk/rawk/tests/real_world/test_mixed.rawk27
-rw-r--r--awk/rawk/tests/real_world/test_system_monitor.rawk157
6 files changed, 0 insertions, 671 deletions
diff --git a/awk/rawk/tests/real_world/README.md b/awk/rawk/tests/real_world/README.md
deleted file mode 100644
index c4ba349..0000000
--- a/awk/rawk/tests/real_world/README.md
+++ /dev/null
@@ -1,130 +0,0 @@
-# Real-World Examples
-
-This directory contains practical examples that demonstrate rawk's utility for common data processing tasks.
-
-## Test Files
-
-### `test_system_monitor.rawk` - System Monitoring
-Processes output from common system commands:
-- **df**: Disk usage monitoring with warnings
-- **ps**: Process resource analysis
-- **ls -l**: File categorization and statistics
-
-**Features:**
-- Disk usage alerts (WARNING/CRITICAL thresholds)
-- Process resource monitoring (CPU/MEM usage)
-- File type categorization (DIRECTORY/EXECUTABLE/LARGE/SMALL)
-- Statistical summaries
-
-**Run with:**
-```bash
-awk -f ../../rawk.awk test_system_monitor.rawk | awk -f - ../data/test_data.txt
-```
-
-**Sample Output:**
-```
-DISK: WARNING: /dev/sdb1 (/home) is 90% full
-PROCESS: HIGH CPU: stress (PID: 3456, 25.7% CPU)
-FILE: EXECUTABLE: executable.sh (2048 bytes)
-```
-
-### `test_log_parser.rawk` - Log Parsing
-Processes common log formats:
-- **Apache logs**: Web server access logs
-- **Syslog**: System log entries
-
-**Features:**
-- HTTP status code categorization (SUCCESS/ERROR/REDIRECT)
-- Log level detection (INFO/WARNING/ERROR)
-- Request type classification
-- Error rate calculation
-
-**Run with:**
-```bash
-awk -f ../../rawk.awk test_log_parser.rawk | awk -f - ../data/test_logs.txt
-```
-
-**Sample Output:**
-```
-APACHE: ERROR: 404 - GET /nonexistent.html from 192.168.1.104
-SYSLOG: ERROR: kernel - ERROR: Out of memory
-```
-
-### `test_csv_processor.rawk` - CSV Data Processing
-Processes CSV files with validation:
-- **Email validation**: Basic email format checking
-- **Age categorization**: Group employees by age
-- **Salary statistics**: Calculate averages and ranges
-- **Department analysis**: Employee distribution
-
-**Features:**
-- Data validation and categorization
-- Statistical analysis
-- Report generation
-- Error detection
-
-**Run with:**
-```bash
-awk -f ../../rawk.awk test_csv_processor.rawk | awk -f - ../data/test_employees.csv
-```
-
-**Sample Output:**
-```
-EMPLOYEE: John Smith (ADULT, Engineering) - VALID email, $65000
-Average salary: $73916.7
-Email validity rate: 100%
-```
-
-### `test_data_processing.rawk` - General Data Processing
-General data processing scenarios:
-- Array filtering and manipulation
-- Data aggregation
-- Formatting and reporting
-
-**Run with:**
-```bash
-awk -f ../../rawk.awk test_data_processing.rawk | awk -f -
-```
-
-### `test_mixed.rawk` - Mixed awk/rawk Code
-Demonstrates mixing rawk functions with regular awk code:
-- Line-by-line processing
-- Integration with awk patterns
-- Combined functionality
-
-**Run with:**
-```bash
-awk -f ../../rawk.awk test_mixed.rawk | awk -f - ../data/test_input.txt
-```
-
-## Use Cases
-
-These examples demonstrate rawk's practical applications:
-
-### System Administration
-- Monitor disk usage and alert on thresholds
-- Track process resource consumption
-- Analyze file system contents
-
-### Web Server Management
-- Parse and analyze web server logs
-- Monitor error rates and traffic patterns
-- Identify problematic requests
-
-### Data Analysis
-- Process CSV files with validation
-- Generate business intelligence reports
-- Analyze employee or customer data
-
-### Log Analysis
-- Parse various log formats
-- Identify system issues
-- Generate operational reports
-
-## Data Files
-
-The examples use sample data files in the `../data/` directory:
-- `test_data.txt`: Simulated system command outputs
-- `test_logs.txt`: Sample Apache and syslog entries
-- `test_employees.csv`: Sample employee data
-- `test_input.txt`: Simple input data for mixed tests 
\ No newline at end of file
diff --git a/awk/rawk/tests/real_world/test_csv_processor.rawk b/awk/rawk/tests/real_world/test_csv_processor.rawk
deleted file mode 100644
index 5aa14b5..0000000
--- a/awk/rawk/tests/real_world/test_csv_processor.rawk
+++ /dev/null
@@ -1,143 +0,0 @@
-# CSV data processing with rawk
-# This demonstrates processing CSV files with headers
-
-# Function to validate email format
-$is_valid_email = (email) -> {
-    # Simple email validation: contains @ and . after @
-    at_pos = index(email, "@")
-    if (at_pos == 0) return 0
-    
-    # Check if there's a dot after the @ symbol
-    dot_pos = index(substr(email, at_pos + 1), ".")
-    return dot_pos > 0
-};
-
-# Function to categorize age groups
-$categorize_age = (age) -> {
-    if (age < 18) {
-        return "MINOR"
-    } else if (age < 30) {
-        return "YOUNG_ADULT"
-    } else if (age < 50) {
-        return "ADULT"
-    } else if (age < 65) {
-        return "MIDDLE_AGED"
-    } else {
-        return "SENIOR"
-    }
-};
-
-# Function to calculate salary statistics
-$calculate_salary_stats = (data, result, i, total, count, max, min) -> {
-    total = 0
-    count = 0
-    max = 0
-    min = 0
-    first = 1
-    
-    for (i in data) {
-        total += data[i]
-        count++
-        if (first || data[i] > max) {
-            max = data[i]
-        }
-        if (first || data[i] < min) {
-            min = data[i]
-        }
-        first = 0
-    }
-    
-    result["total"] = total
-    result["count"] = count
-    result["average"] = count > 0 ? total / count : 0
-    result["max"] = max
-    result["min"] = min
-    
-    return count
-};
-
-# Function to format employee record
-$format_employee = (name, email, age, salary, department) -> {
-    age_group = categorize_age(age)
-    email_status = is_valid_email(email) ? "VALID" : "INVALID"
-    
-    return name " (" age_group ", " department ") - " email_status " email, $" salary
-};
-
-BEGIN {
-    FS = ","  # Set field separator to comma
-    print "=== CSV Data Processor ==="
-    print ""
-    header_processed = 0
-}
-
-# Skip header line
-NR == 1 {
-    print "Processing CSV with columns: " $0
-    print ""
-    next
-}
-
-# Process data rows
-{
-    if (NF >= 5) {
-        name = $1
-        email = $2
-        age = $3
-        salary = $4
-        department = $5
-        
-        result = format_employee(name, email, age, salary, department)
-        print "EMPLOYEE: " result
-        
-        # Store for statistics
-        employee_count++
-        ages[employee_count] = age
-        salaries[employee_count] = salary
-        departments[employee_count] = department
-        age_groups[employee_count] = categorize_age(age)
-        
-        # Track department counts
-        dept_count[department]++
-        
-        # Track age group counts
-        age_group_count[categorize_age(age)]++
-        
-        # Track email validity
-        if (is_valid_email(email)) {
-            valid_emails++
-        } else {
-            invalid_emails++
-        }
-    }
-}
-
-END {
-    print ""
-    print "=== Employee Statistics ==="
-    
-    if (employee_count > 0) {
-        calculate_salary_stats(salaries, salary_stats)
-        print "Total employees: " employee_count
-        print "Average salary: $" salary_stats["average"]
-        print "Salary range: $" salary_stats["min"] " - $" salary_stats["max"]
-        print "Valid emails: " valid_emails
-        print "Invalid emails: " invalid_emails
-        print "Email validity rate: " (valid_emails / employee_count * 100) "%"
-    }
-    
-    print ""
-    print "=== Department Distribution ==="
-    for (dept in dept_count) {
-        print dept ": " dept_count[dept] " employees"
-    }
-    
-    print ""
-    print "=== Age Group Distribution ==="
-    for (group in age_group_count) {
-        print group ": " age_group_count[group] " employees"
-    }
-    
-    print ""
-    print "=== Report Complete ==="
-} 
\ No newline at end of file
diff --git a/awk/rawk/tests/real_world/test_data_processing.rawk b/awk/rawk/tests/real_world/test_data_processing.rawk
deleted file mode 100644
index dba1a0b..0000000
--- a/awk/rawk/tests/real_world/test_data_processing.rawk
+++ /dev/null
@@ -1,75 +0,0 @@
-# Test data processing scenarios
-$filter_positive = (arr, result, i, count) -> {
-    count = 0
-    for (i in arr) {
-        if (arr[i] > 0) {
-            result[++count] = arr[i]
-        }
-    }
-    return result
-};
-
-$sum_array = (arr, sum, i) -> {
-    sum = 0
-    for (i in arr) {
-        sum += arr[i]
-    }
-    return sum
-};
-
-$average_array = (arr, sum, count, i) -> {
-    sum = 0
-    count = 0
-    for (i in arr) {
-        sum += arr[i]
-        count++
-    }
-    return count > 0 ? sum / count : 0
-};
-
-$find_max = (arr, max, i, first) -> {
-    first = 1
-    for (i in arr) {
-        if (first || arr[i] > max) {
-            max = arr[i]
-            first = 0
-        }
-    }
-    return max
-};
-
-$format_data = (name, age, city) -> {
-    return "Name: " name ", Age: " age ", City: " city
-};
-
-# Test data processing
-BEGIN {
-    print "=== Testing Data Processing ==="
-    
-    # Test array operations
-    data[1] = 10
-    data[2] = -5
-    data[3] = 20
-    data[4] = -3
-    data[5] = 15
-    
-    print "Original data:", data[1], data[2], data[3], data[4], data[5]
-    
-    # Test filtering
-    positive_nums = filter_positive(data)
-    print "Positive numbers:", positive_nums[1], positive_nums[2], positive_nums[3]
-    
-    # Test sum and average
-    total = sum_array(data)
-    avg = average_array(data)
-    print "Sum:", total
-    print "Average:", avg
-    
-    # Test finding maximum
-    max_val = find_max(data)
-    print "Maximum:", max_val
-    
-    # Test data formatting
-    formatted = format_data("Alice", 30, "New York")
-    print "Formatted:", formatted
-} 
\ No newline at end of file
diff --git a/awk/rawk/tests/real_world/test_log_parser.rawk b/awk/rawk/tests/real_world/test_log_parser.rawk
deleted file mode 100644
index 1abdbaf..0000000
--- a/awk/rawk/tests/real_world/test_log_parser.rawk
+++ /dev/null
@@ -1,139 +0,0 @@
-# Log parsing with rawk
-# This demonstrates processing common log formats like Apache, syslog, etc.
-
-# Function to parse Apache log entries
-$parse_apache_log = (ip, date, method, url, status, bytes, referer, user_agent) -> {
-    if (status >= 400) {
-        return "ERROR: " status " - " method " " url " from " ip
-    } else if (status >= 300) {
-        return "REDIRECT: " status " - " method " " url " from " ip
-    } else {
-        return "SUCCESS: " status " - " method " " url " (" bytes " bytes)"
-    }
-};
-
-# Function to parse syslog entries
-$parse_syslog = (timestamp, host, program, message) -> {
-    if (index(message, "error") > 0 || index(message, "ERROR") > 0) {
-        return "ERROR: " program " - " message
-    } else if (index(message, "warning") > 0 || index(message, "WARNING") > 0) {
-        return "WARNING: " program " - " message
-    } else {
-        return "INFO: " program " - " message
-    }
-};
-
-# Function to categorize requests
-$categorize_request = (method, url, status) -> {
-    if (method == "GET" && index(url, ".jpg") > 0) {
-        return "IMAGE_REQUEST"
-    } else if (method == "POST") {
-        return "FORM_SUBMISSION"
-    } else if (method == "GET" && index(url, ".css") > 0) {
-        return "STYLESHEET"
-    } else if (method == "GET" && index(url, ".js") > 0) {
-        return "JAVASCRIPT"
-    } else {
-        return "PAGE_REQUEST"
-    }
-};
-
-# Function to calculate request statistics
-$calculate_request_stats = (data, result, i, total, count, errors, redirects) -> {
-    total = 0
-    count = 0
-    errors = 0
-    redirects = 0
-    
-    for (i in data) {
-        total++
-        if (data[i] >= 400) {
-            errors++
-        } else if (data[i] >= 300) {
-            redirects++
-        }
-    }
-    
-    result["total"] = total
-    result["errors"] = errors
-    result["redirects"] = redirects
-    result["success_rate"] = total > 0 ? ((total - errors - redirects) / total) * 100 : 0
-    
-    return total
-};
-
-BEGIN {
-    print "=== Log Parser Report ==="
-    print ""
-}
-
-# Process Apache log entries (simplified format)
-/^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/ {
-    ip = $1
-    date = $4 " " $5
-    method = $6
-    url = $7
-    status = $9
-    bytes = $10
-    
-    result = parse_apache_log(ip, date, method, url, status, bytes, "", "")
-    print "APACHE: " result
-    
-    # Store for statistics
-    request_count++
-    status_codes[request_count] = status
-    request_types[request_count] = categorize_request(method, url, status)
-}
-
-# Process syslog entries
-/^[A-Z][a-z]{2} [0-9]+ [0-9:]+/ {
-    timestamp = $1 " " $2 " " $3
-    host = $4
-    program = substr($5, 1, length($5) - 1)  # Remove trailing colon
-    message = substr($0, index($0, $6))
-    
-    result = parse_syslog(timestamp, host, program, message)
-    print "SYSLOG: " result
-    
-    # Store for statistics
-    log_count++
-    log_programs[log_count] = program
-}
-
-END {
-    print ""
-    print "=== Request Statistics ==="
-    
-    if (request_count > 0) {
-        calculate_request_stats(status_codes, request_stats)
-        print "Total requests: " request_stats["total"]
-        print "Error rate: " request_stats["errors"] " (" (request_stats["errors"] / request_stats["total"] * 100) "%)"
-        print "Success rate: " request_stats["success_rate"] "%"
-        print "Redirects: " request_stats["redirects"]
-    }
-    
-    print ""
-    print "=== Request Types ==="
-    for (i = 1; i <= request_count; i++) {
-        type = request_types[i]
-        type_count[type]++
-    }
-    
-    for (type in type_count) {
-        print type ": " type_count[type] " requests"
-    }
-    
-    print ""
-    print "=== Log Sources ==="
-    for (i = 1; i <= log_count; i++) {
-        program = log_programs[i]
-        program_count[program]++
-    }
-    
-    for (program in program_count) {
-        print program ": " program_count[program] " entries"
-    }
-    
-    print ""
-    print "=== Report Complete ==="
-} 
\ No newline at end of file
diff --git a/awk/rawk/tests/real_world/test_mixed.rawk b/awk/rawk/tests/real_world/test_mixed.rawk
deleted file mode 100644
index 50cb6bb..0000000
--- a/awk/rawk/tests/real_world/test_mixed.rawk
+++ /dev/null
@@ -1,27 +0,0 @@
-# Mixed rawk and awk code
-$increment = (x) -> x + 1;
-$format_line = (line_num, text) -> "Line " line_num ": " text;
-
-# Regular awk code mixed in
-BEGIN {
-    print "=== Mixed rawk and awk test ==="
-}
-
-# Process each input line
-{
-    # Use rawk functions
-    incremented_line = increment(NR)
-    formatted = format_line(NR, $0)
-    
-    # Regular awk processing
-    if (length($0) > 10) {
-        print formatted " (long line)"
-    } else {
-        print formatted " (short line)"
-    }
-}
-
-END {
-    print "=== End of processing ==="
-    print "Total lines processed:", NR
-} 
\ No newline at end of file
diff --git a/awk/rawk/tests/real_world/test_system_monitor.rawk b/awk/rawk/tests/real_world/test_system_monitor.rawk
deleted file mode 100644
index 1e1ef1a..0000000
--- a/awk/rawk/tests/real_world/test_system_monitor.rawk
+++ /dev/null
@@ -1,157 +0,0 @@
-# System monitoring with rawk
-# This demonstrates processing real command outputs like df, ps, ls
-
-# Function to analyze disk usage
-$analyze_disk = (filesystem, size, used, avail, percent, mount) -> {
-    if (percent > 90) {
-        return "CRITICAL: " filesystem " (" mount ") is " percent "% full!"
-    } else if (percent > 80) {
-        return "WARNING: " filesystem " (" mount ") is " percent "% full"
-    } else if (percent > 60) {
-        return "NOTICE: " filesystem " (" mount ") is " percent "% full"
-    } else {
-        return "OK: " filesystem " (" mount ") has " avail " blocks free"
-    }
-};
-
-# Function to analyze process resource usage
-$analyze_process = (pid, user, cpu, mem, command) -> {
-    if (cpu > 20) {
-        return "HIGH CPU: " command " (PID: " pid ", " cpu "% CPU)"
-    } else if (mem > 10) {
-        return "HIGH MEM: " command " (PID: " pid ", " mem "% MEM)"
-    } else {
-        return "NORMAL: " command " (PID: " pid ")"
-    }
-};
-
-# Function to categorize files
-$categorize_file = (permissions, size, name) -> {
-    if (substr(permissions, 1, 1) == "d") {
-        return "DIRECTORY: " name " (" size " bytes)"
-    } else if (substr(permissions, 4, 1) == "x") {
-        return "EXECUTABLE: " name " (" size " bytes)"
-    } else if (size > 1000) {
-        return "LARGE FILE: " name " (" size " bytes)"
-    } else {
-        return "SMALL FILE: " name " (" size " bytes)"
-    }
-};
-
-# Function to calculate statistics
-$calculate_stats = (data, result, i, total, count, max, min) -> {
-    total = 0
-    count = 0
-    max = 0
-    min = 0
-    first = 1
-    
-    for (i in data) {
-        total += data[i]
-        count++
-        if (first || data[i] > max) {
-            max = data[i]
-        }
-        if (first || data[i] < min) {
-            min = data[i]
-        }
-        first = 0
-    }
-    
-    result["total"] = total
-    result["count"] = count
-    result["average"] = count > 0 ? total / count : 0
-    result["max"] = max
-    result["min"] = min
-    
-    return count
-};
-
-BEGIN {
-    print "=== System Monitor Report ==="
-    print ""
-}
-
-# Process df output (disk usage)
-/^\/dev\// {
-    filesystem = $1
-    size = $2
-    used = $3
-    avail = $4
-    percent = $5
-    mount = $6
-    
-    result = analyze_disk(filesystem, size, used, avail, percent, mount)
-    print "DISK: " result
-    
-    # Store for statistics
-    disk_count++
-    disk_usage[disk_count] = percent
-}
-
-# Process ps output (process information)
-/^[0-9]+\t/ {
-    pid = $1
-    user = $2
-    cpu = $3
-    mem = $4
-    command = $11
-    
-    result = analyze_process(pid, user, cpu, mem, command)
-    print "PROCESS: " result
-    
-    # Store for statistics
-    process_count++
-    cpu_usage[process_count] = cpu
-    mem_usage[process_count] = mem
-}
-
-# Process ls output (file information)
-/^[d-][rwx-]{9}\t/ {
-    permissions = $1
-    size = $5
-    name = $9
-    
-    result = categorize_file(permissions, size, name)
-    print "FILE: " result
-    
-    # Store for statistics
-    file_count++
-    file_sizes[file_count] = size
-}
-
-END {
-    print ""
-    print "=== Summary Statistics ==="
-    
-    # Disk usage statistics
-    if (disk_count > 0) {
-        calculate_stats(disk_usage, disk_stats)
-        print "Disk Usage:"
-        print "  Average: " disk_stats["average"] "%"
-        print "  Maximum: " disk_stats["max"] "%"
-        print "  Minimum: " disk_stats["min"] "%"
-    }
-    
-    # CPU usage statistics
-    if (process_count > 0) {
-        calculate_stats(cpu_usage, cpu_stats)
-        print "CPU Usage:"
-        print "  Average: " cpu_stats["average"] "%"
-        print "  Maximum: " cpu_stats["max"] "%"
-        print "  Total processes: " process_count
-    }
-    
-    # File size statistics
-    if (file_count > 0) {
-        calculate_stats(file_sizes, file_stats)
-        print "File Sizes:"
-        print "  Total size: " file_stats["total"] " bytes"
-        print "  Average size: " file_stats["average"] " bytes"
-        print "  Largest file: " file_stats["max"] " bytes"
-        print "  Total files: " file_count
-    }
-    
-    print ""
-    print "=== Report Complete ==="
-} 
\ No newline at end of file