diff options
Diffstat (limited to 'awk')
-rw-r--r-- | awk/rawk/README.md | 434 | ||||
-rw-r--r-- | awk/rawk/rawk.awk | 34 | ||||
-rw-r--r-- | awk/rawk/tests/simple_stdlib_test.rawk | 2 | ||||
-rw-r--r-- | awk/rawk/tests/test_errors.rawk | 2 | ||||
-rw-r--r-- | awk/rawk/tests/test_functional.rawk | 2 | ||||
-rwxr-xr-x | awk/rawk/tests/test_runner.sh | 18 |
6 files changed, 106 insertions, 386 deletions
diff --git a/awk/rawk/README.md b/awk/rawk/README.md index 961d1ba..d68217a 100644 --- a/awk/rawk/README.md +++ b/awk/rawk/README.md @@ -1,41 +1,12 @@ -# rawk v2.0.0 - A Functional Programming Language for awk +# rawk +## Make awk rawk. -**rawk** is a modern, functional-style language dialect that compiles to highly portable, standard `awk`. It provides a more expressive syntax for writing awk programs while maintaining full compatibility with existing awk code. +Rawk helps to bring some modern developer comforts to awk while maintaining awk's portability and inbuilt goodness. -## ๐ What's New in v2.0.0 - -- **Block-based syntax**: Functions are defined within `RAWK { ... }` blocks -- **Smart standard library**: Only includes functions actually used in your code -- **Multi-pass compiler**: Robust parsing with better error handling -- **Enhanced validation**: Clear error messages and comprehensive testing -- **Full standard library**: 50+ built-in functions for functional programming - -## Features - -- **Functional Programming**: Define functions with a clean, modern syntax -- **Portable**: Compiles to standard awk that runs on any implementation -- **Mixed Code**: Seamlessly mix rawk functions with regular awk code -- **Smart Standard Library**: Only includes functions you actually use -- **Comprehensive Testing**: Built-in testing framework with assertions -- **Error Handling**: Clear, actionable error messages - -## Quick Start - -### Installation - -No installation required! Just download `rawk_block_based.awk` and you're ready to go. - -### Basic Usage - -1. **Run the test suite** (recommended): -```bash -cd tests && ./fixed_test_runner.sh -``` - -2. **Create a simple program** (`hello.rawk`): +## Create a rawk file (`example.rawk`): ```rawk BEGIN { - print "Hello from rawk v2.0.0!" + print "Hello from rawk!" } RAWK { @@ -55,386 +26,125 @@ RAWK { } ``` -3. **Compile and run**: -```bash -# Two-stage compilation (recommended) -awk -f rawk_block_based.awk hello.rawk > hello.awk -echo "test" | awk -f hello.awk +A `.awk` file should, generally, be a totally valid `.rawk` file. Just like any valid JavaScript is valid TypeScript, likewise with awk and rawk. -# One-stage compilation and execution -echo "test" | awk -f rawk_block_based.awk hello.rawk | awk -f - -``` +Rawk introduces a new semantic block to awk, so that you can write special forms within the `RAWK {...}` block. -## Language Syntax +## Compile and run: +```bash +# Compile to awk +awk -f rawk.awk example.rawk > example.awk -### Block-Based Structure +# Run the compiled program +echo "test" | awk -f example.awk -All rawk functions must be defined within a `RAWK { ... }` block: +# Or compile and run in one line +echo "test" | awk -f rawk.awk example.rawk | awk -f - +``` -```rawk -BEGIN { - print "Initialization" -} +## How to run the example: +```bash +# Compile the example file +awk -f rawk.awk example.rawk > example_output.awk -RAWK { - $add = (x, y) -> { - return x + y; - }; - - $multiply = (a, b) -> { - return a * b; - }; - - $greet = (name) -> { - return "Hello, " name "!"; - }; -} +# Run with sample log data +awk -f example_output.awk sample.log -{ - result = add(5, 3); - print result; - print greet("World"); -} +# Or run with just a few lines +head -10 sample.log | awk -f example_output.awk + +# Or compile and run without outputting an awk file to disk +awk -f rawk.awk example.rawk | awk -f - sample.log ``` +## Syntax + ### Function Definitions +All functions go inside an `RAWK { ... }` block. -**Multi-line functions** (braces required): ```rawk RAWK { - $calculate_area = (width, height) -> { - area = width * height; - return area; - }; - - $factorial = (n) -> { - if (n <= 1) { - return 1; - } else { - return n * factorial(n - 1); - } + $function_name = (param1, param2) -> { + return param1 + param2; }; } ``` ### Function Calls +Call rawk functions from anywhere in the code, -Functions can be called anywhere in regular awk code: ```rawk { - result = add(2, 3); # Returns 5 - greeting = greet("Alice"); # Returns "Hello, Alice!" - area = calculate_area(4, 5); # Returns 20 + result = add(5, 3); + print result; } ``` -### Mixed awk/rawk Code +### Mixed Code +Mix and match awk and rawk code, -Regular awk code works seamlessly with rawk functions: ```rawk -BEGIN { - print "Starting processing..." - FS = "," # Set field separator -} +BEGIN { FS = "," } RAWK { - $process_line = (line) -> { - if (line ~ /^#/) return ""; # Skip comments - return "Processed: " line; - }; - - $validate_data = (field1, field2) -> { - if (field1 == "" || field2 == "") return 0; - return 1; + $process = (field) -> { + return "Processed: " field; }; } -/^[^#]/ { # Process non-comment lines - if (validate_data($1, $2)) { - result = process_line($0); - print result; +{ + if ($1 != "") { + print process($1); } } - -END { - print "Processing complete." -} ``` -## Smart Standard Library - -rawk v2.0.0 includes a smart standard library that only includes functions you actually use: - -### Testing Functions (Always Included) -- `assert(condition, message)`: Asserts a condition is true -- `expect_equal(actual, expected, message)`: Asserts actual equals expected -- `expect_true(condition, message)`: Asserts condition is true -- `expect_false(condition, message)`: Asserts condition is false - -### Type Checking (Always Included) -- `is_number(value)`: Check if value is a number -- `is_string(value)`: Check if value is a string -- `get_keys(array, result)`: Get array keys (dependency) - -### Conditional Functions (Only if Used) -- `is_positive(value)`: Check if number is positive -- `is_negative(value)`: Check if number is negative -- `is_zero(value)`: Check if number is zero -- `is_integer(value)`: Check if number is integer -- `is_float(value)`: Check if number is float -- `is_boolean(value)`: Check if value is boolean -- `is_truthy(value)`: Check if value is truthy -- `is_falsy(value)`: Check if value is falsy -- `is_empty(value)`: Check if value is empty - -### Validation Functions (Only if Used) -- `is_email(value)`: Email validation -- `is_url(value)`: URL validation with multiple protocols -- `is_ipv4(value)`: IPv4 validation -- `is_ipv6(value)`: IPv6 validation -- `is_uuid(value)`: UUID validation -- `is_alpha(value)`: Alphabetic string validation -- `is_numeric(value)`: Numeric string validation -- `is_alphanumeric(value)`: Alphanumeric string validation -- `is_palindrome(value)`: Palindrome detection -- `is_hex(value)`: Hexadecimal validation -- `is_csv(value)`: CSV format detection -- `is_tsv(value)`: TSV format detection - -### HTTP Predicates (Only if Used) -- `http_is_redirect(status)`: Check if HTTP status is redirect -- `http_is_client_error(status)`: Check if HTTP status is client error -- `http_is_server_error(status)`: Check if HTTP status is server error -- `http_is_get(method)`: Check if HTTP method is GET -- `http_is_post(method)`: Check if HTTP method is POST -- `http_is_safe_method(method)`: Check if HTTP method is safe -- `http_is_mutating_method(method)`: Check if HTTP method can mutate state - -### Array Utilities (Only if Used) -- `keys(array)`: Count of array keys -- `values(array)`: Count of array values -- `get_values(array, result)`: Get array values -- `map(func_name, array, result)`: Apply function to each element -- `reduce(func_name, array, initial)`: Reduce array using function -- `filter(predicate_func, array, result)`: Filter array elements -- `find(predicate_func, array)`: Find first matching element -- `findIndex(predicate_func, array)`: Find index of first matching element -- `flatMap(func_name, array, result)`: Apply function and flatten -- `take(count, array, result)`: Take first n elements -- `drop(count, array, result)`: Drop first n elements -- `pipe(value, func_name)`: Pipe value through function -- `pipe_multi(value, func_names)`: Pipe value through multiple functions - -## Examples - -### Basic Functionality -```rawk -BEGIN { - print "=== Basic Functionality Test ===" -} +## Standard Library +Rawk boasts a rather large standard library. -RAWK { - $add = (x, y) -> { - return x + y; - }; - - $multiply = (a, b) -> { - return a * b; - }; - - $greet = (name) -> { - return "Hello, " name "!"; - }; -} - -{ - # Test basic arithmetic - result1 = add(5, 3); - expect_equal(result1, 8, "add(5, 3) should return 8"); - - result2 = multiply(4, 7); - expect_equal(result2, 28, "multiply(4, 7) should return 28"); - - # Test string functions - greeting = greet("World"); - expect_equal(greeting, "Hello, World!", "greet('World') should return 'Hello, World!'"); - - print "All basic tests passed!"; - exit 0; -} +### Testing +```rawk +expect_equal(add(2, 3), 5, "Addition should work"); +expect_true(is_positive(5), "5 should be positive"); ``` -### Standard Library Usage +### Type Checking Predicates ```rawk -BEGIN { - print "=== Standard Library Test ===" -} - -RAWK { - $validate_email = (email) -> { - return is_email(email); - }; - - $validate_url = (url) -> { - return is_url(url); - }; - - $process_data = (data) -> { - if (is_csv(data)) { - return "CSV data detected"; - } else if (is_hex(data)) { - return "Hex data detected"; - } else { - return "Unknown format"; - } - }; -} - -{ - # Test email validation - expect_true(validate_email("user@example.com"), "Valid email should pass"); - expect_false(validate_email("invalid-email"), "Invalid email should fail"); - - # Test URL validation - expect_true(validate_url("https://example.com"), "Valid URL should pass"); - expect_false(validate_url("not-a-url"), "Invalid URL should fail"); - - # Test data format detection - expect_equal(process_data("name,age,city"), "CSV data detected", "CSV detection should work"); - expect_equal(process_data("FF00AA"), "Hex data detected", "Hex detection should work"); - - print "All standard library tests passed!"; - exit 0; -} +if (is_number(value)) { ... } +if (is_string(value)) { ... } ``` -### Smart Standard Library Demo +### Varuius Validation Predicates ```rawk -BEGIN { - print "=== Smart Standard Library Demo ===" - print "This program only uses is_email and is_number" - print "Only these functions will be included in the output" -} - -RAWK { - $validate_email = (email) -> { - return is_email(email); - }; - - $check_number = (num) -> { - return is_number(num); - }; -} - -{ - # Only use is_email and is_number from standard library - expect_true(validate_email("test@example.com"), "Valid email should pass"); - expect_false(validate_email("invalid"), "Invalid email should fail"); - - expect_true(check_number(42), "Number should pass"); - expect_false(check_number("abc"), "String should fail"); - - print "Smart standard library test passed!"; - print "Only is_email and is_number were included in output"; - exit 0; -} +if (is_email(email)) { ... } +if (is_url(url)) { ... } ``` -## Compilation Process - -1. **Pass 1**: Collect all source lines into memory -2. **Pass 2**: Detect and validate RAWK block(s) -3. **Pass 3**: Extract functions from RAWK block(s) -4. **Pass 4**: Analyze function calls and dependencies -5. **Pass 5**: Generate output with smart standard library inclusion +### Functional Programming Patterns +```rawk +# Transform array elements +count = map("double", numbers, doubled); -## Error Handling +# Filter array elements +count = filter("is_positive", numbers, positive); -The compiler provides helpful error messages for: -- **Missing RAWK block**: "Error: No RAWK block found" -- **Nested RAWK blocks**: "Error: Nested or multiple RAWK blocks are not supported" -- **Unclosed RAWK block**: "Error: RAWK block opened at line X but never closed" -- **Invalid function syntax**: Detailed error messages with suggestions +# Reduce array to single value +sum = reduce("add", numbers); +``` ## Testing -Run the comprehensive test suite: +Run the test suite, + ```bash -cd tests && ./fixed_test_runner.sh +cd tests && ./test_runner.sh ``` -The test suite includes: -- Basic functionality tests -- Standard library tests -- Functional programming tests -- Error handling tests -- Smart standard library tests - -## Performance and Best Practices - -### Smart Standard Library Benefits -- **Reduced output size**: Only includes functions you use -- **Faster compilation**: Less code to process -- **Cleaner output**: Easier to read and debug -- **Better maintainability**: Clear dependencies - -### Best Practices -1. **Use RAWK blocks**: Always define functions within RAWK blocks -2. **Test your code**: Use the built-in testing framework -3. **Validate inputs**: Use standard library validation functions -4. **Keep functions simple**: Single responsibility principle -5. **Use descriptive names**: Make function purposes clear - -## Migration from v1.x - -To migrate from rawk v1.x to v2.0.0: - -1. **Wrap functions in RAWK blocks**: - ```rawk - # Old v1.x syntax - $add = (x, y) -> x + y; - - # New v2.0.0 syntax - RAWK { - $add = (x, y) -> { - return x + y; - }; - } - ``` - -2. **Add braces to all functions**: - ```rawk - # Old v1.x syntax - $greet = (name) -> "Hello, " name; - - # New v2.0.0 syntax - RAWK { - $greet = (name) -> { - return "Hello, " name; - }; - } - ``` - -3. **Update test expectations**: Some array utility functions may behave differently - -## Limitations - -- **Function Names**: Must be valid awk identifiers (letters, digits, underscores) -- **Array Returns**: Functions cannot return arrays (use pass-by-reference instead) -- **Array Order**: AWK doesn't guarantee array iteration order -- **Function Count**: No practical limit, but large numbers may impact performance - -## Contributing - -1. Add test cases for new features -2. Ensure compatibility with standard awk -3. Update documentation for new functionality -4. Test on multiple awk implementations +## Requirements -## License - -This project is open source. Feel free to use, modify, and distribute as needed. +- Any awk implementation (gawk, mawk, nawk, etc.) +- No additional dependencies, strives to work with any POSIX awk -## Acknowledgments +## License -Inspired by the need for a more expressive syntax for awk programming while maintaining the portability and simplicity that makes awk so powerful. \ No newline at end of file +Public Domain \ No newline at end of file diff --git a/awk/rawk/rawk.awk b/awk/rawk/rawk.awk index 8180a7b..c4e2ff1 100644 --- a/awk/rawk/rawk.awk +++ b/awk/rawk/rawk.awk @@ -1,11 +1,19 @@ #!/usr/bin/awk -f + +# rawk.awk + +# Author: @eli_oat +# License: Public Domain +# Lets make awk rawk + # ============================================================================= -# rawk v2.0.0 - Multi-pass block-based compiler +# Multi-pass compiler # ============================================================================= # -# This compiler transforms rawk code into standard awk with smart standard -# library inclusion. It uses a multi-pass approach to overcome AWK's variable -# scoping limitations and ensure deterministic compilation. +# This compiler transforms rawk code into standard awk and smartly includes only +# those standard library functions you've actually used. It uses a multi-pass +# approach to overcome awk's variable scoping limitations and ensure +# deterministic compilation. # # COMPILATION PROCESS: # Pass 1: Collect all input lines into memory @@ -26,8 +34,10 @@ BEGIN { # INITIALIZATION: Set up data structures for multi-pass compilation # ============================================================================= + RAWK_VERSION = "0.0.1" + # Arrays to store compilation state - delete lines # All input lines (Pass 1) + delete lines # All input lines (Pass 1) delete FUNCTION_NAMES # User-defined function names (Pass 3) delete FUNCTION_ARGS # User-defined function arguments (Pass 3) delete FUNCTION_BODIES # User-defined function bodies (Pass 3) @@ -307,7 +317,7 @@ END { # user-defined functions, and the main script body. # Output header with compilation metadata - print "# Generated by rawk v2.0.0" + print "# Generated with rawk v" RAWK_VERSION print "# Source: " ARGV[1] print "" @@ -334,13 +344,13 @@ END { # This is the "smart inclusion" feature that only includes functions that are called for (func_name in USED_STDLIB_FUNCTIONS) { if (func_name == "assert") { - print "function assert(condition, message) { if (!condition) { print \"โ Assertion failed: \" message > \"/dev/stderr\"; exit 1 } }" + print "function assert(condition, message) { if (!condition) { print \"Assertion failed: \" message > \"/dev/stderr\"; exit 1 } }" } else if (func_name == "expect_equal") { - print "function expect_equal(actual, expected, message) { if (actual != expected) { print \"โ Expected \" expected \" but got \" actual \" - \" message > \"/dev/stderr\"; exit 1 } }" + print "function expect_equal(actual, expected, message) { if (actual != expected) { print \"Expected \" expected \" but got \" actual \" - \" message > \"/dev/stderr\"; exit 1 } }" } else if (func_name == "expect_true") { - print "function expect_true(condition, message) { if (!condition) { print \"โ Expected true but got false - \" message > \"/dev/stderr\"; exit 1 } }" + print "function expect_true(condition, message) { if (!condition) { print \"Expected true but got false - \" message > \"/dev/stderr\"; exit 1 } }" } else if (func_name == "expect_false") { - print "function expect_false(condition, message) { if (condition) { print \"โ Expected false but got true - \" message > \"/dev/stderr\"; exit 1 } }" + print "function expect_false(condition, message) { if (condition) { print \"Expected false but got true - \" message > \"/dev/stderr\"; exit 1 } }" } else if (func_name == "is_positive") { print "function is_positive(value) { return is_number(value) && value > 0 }" } else if (func_name == "is_negative") { @@ -465,7 +475,7 @@ END { # DISPATCH FUNCTION: Dynamic function calling for functional programming # ============================================================================= # The dispatch_call function enables functional programming utilities (map, reduce, etc.) - # to dynamically call user-defined functions by name. This is only included when needed. + # to dynamically call user-defined functions by name. This is only included when used. if ("map" in USED_STDLIB_FUNCTIONS || "reduce" in USED_STDLIB_FUNCTIONS || "filter" in USED_STDLIB_FUNCTIONS || "find" in USED_STDLIB_FUNCTIONS || "findIndex" in USED_STDLIB_FUNCTIONS || "flatMap" in USED_STDLIB_FUNCTIONS || "pipe" in USED_STDLIB_FUNCTIONS || "pipe_multi" in USED_STDLIB_FUNCTIONS) { print "# Dispatch function for functional programming" @@ -521,7 +531,7 @@ END { # ============================================================================= print "" print "# Rawk compilation summary:" - print "# - Rawk Version: 2.0.0" + print "# - Rawk Version: " RAWK_VERSION print "# - Functions defined: " function_count print "# - Source lines: " line_count print "# - Standard library functions included: " length(USED_STDLIB_FUNCTIONS) diff --git a/awk/rawk/tests/simple_stdlib_test.rawk b/awk/rawk/tests/simple_stdlib_test.rawk index 7245342..0a726df 100644 --- a/awk/rawk/tests/simple_stdlib_test.rawk +++ b/awk/rawk/tests/simple_stdlib_test.rawk @@ -14,7 +14,7 @@ RAWK { expect_true(is_string("hello"), "hello should be a string"); expect_false(is_number("abc"), "abc should not be a number"); - # Test our custom function + # Test the custom function expect_true(test_function(5), "5 should pass our test"); expect_false(test_function(-3), "-3 should fail our test"); expect_false(test_function("text"), "text should fail our test"); diff --git a/awk/rawk/tests/test_errors.rawk b/awk/rawk/tests/test_errors.rawk index 233ba33..2376822 100644 --- a/awk/rawk/tests/test_errors.rawk +++ b/awk/rawk/tests/test_errors.rawk @@ -1,4 +1,4 @@ -# This test file should fail compilation due to missing RAWK block +# This test file should fail compilation because it is missing a RAWK block BEGIN { print "This should fail because there's no RAWK block" } diff --git a/awk/rawk/tests/test_functional.rawk b/awk/rawk/tests/test_functional.rawk index 9cf1b77..41020a3 100644 --- a/awk/rawk/tests/test_functional.rawk +++ b/awk/rawk/tests/test_functional.rawk @@ -45,7 +45,7 @@ RAWK { texts[1] = "hello world"; texts[2] = "functional programming"; - texts[3] = "awk is awesome"; + texts[3] = "awk is rad"; # Test map function doubled_count = map("double", numbers, doubled); diff --git a/awk/rawk/tests/test_runner.sh b/awk/rawk/tests/test_runner.sh index 18f3fa2..d0b316d 100755 --- a/awk/rawk/tests/test_runner.sh +++ b/awk/rawk/tests/test_runner.sh @@ -1,6 +1,6 @@ #!/bin/bash -echo "๐งช Fixed rawk v2.0.0 Test Runner" +echo "a rawking test runner" echo "==================================" # Colors for output @@ -64,37 +64,37 @@ run_error_test() { # Run all tests echo "" -echo "๐ Running basic functionality tests..." +echo "Running basic functionality tests..." run_test "test_basic.rawk" "Basic Functionality" echo "" -echo "๐ Running simple standard library tests..." +echo "Running simple standard library tests..." run_test "simple_stdlib_test.rawk" "Simple Standard Library" echo "" -echo "๐ง Running full standard library tests..." +echo "Running full standard library tests..." run_test "test_stdlib.rawk" "Full Standard Library" echo "" -echo "๐ง Running functional programming tests..." +echo "Running functional programming tests..." run_test "test_functional.rawk" "Functional Programming" echo "" -echo "โ Running error handling tests..." +echo "Running error handling tests..." run_error_test "test_errors.rawk" "Error Handling" # Summary echo "" echo "==================================" -echo "๐ Test Summary:" +echo "Test Summary:" echo " Total tests: $TOTAL" echo -e " ${GREEN}Passed: $PASSED${NC}" echo -e " ${RED}Failed: $FAILED${NC}" if [ $FAILED -eq 0 ]; then - echo -e "\n${GREEN}๐ All tests passed!${NC}" + echo -e "\n${GREEN}All tests passed!${NC}" exit 0 else - echo -e "\n${RED}๐ฅ Some tests failed!${NC}" + echo -e "\n${RED}Some tests failed!${NC}" exit 1 fi \ No newline at end of file |