diff options
Diffstat (limited to 'awk/rawk/ENHANCED_UTILITIES_PLAN.md')
-rw-r--r-- | awk/rawk/ENHANCED_UTILITIES_PLAN.md | 380 |
1 files changed, 0 insertions, 380 deletions
diff --git a/awk/rawk/ENHANCED_UTILITIES_PLAN.md b/awk/rawk/ENHANCED_UTILITIES_PLAN.md deleted file mode 100644 index 816ceb5..0000000 --- a/awk/rawk/ENHANCED_UTILITIES_PLAN.md +++ /dev/null @@ -1,380 +0,0 @@ -# Enhanced Functional Programming Utilities Implementation Plan - -## Overview - -This document outlines the plan to implement additional functional programming utilities in the `rawk` language, building upon the successful implementation of `map()`, `reduce()`, and `pipe()` functions. - -## Current State - -### ✅ Implemented Functions -- `map(func_name, array, result)`: Apply function to each element -- `reduce(func_name, array, initial)`: Reduce array with function -- `pipe(value, func_name)`: Single function pipeline -- `pipe_multi(value, func_names)`: Multiple function pipeline -- `dispatch_call(func_name, arg1, arg2, ...)`: Dynamic function dispatch - -### ✅ Array Utilities -- `keys(array)`: Count of array keys -- `values(array)`: Count of array values -- `get_keys(array, result)`: Get array keys -- `get_values(array, result)`: Get array values - -## Enhanced Utilities to Implement - -### 1. `filter()` - Array Filtering - -**Function Name Analysis:** -- **`filter`**: ✅ **RECOMMENDED** - Standard, well-known, descriptive -- `select`: ❌ Too generic, could be confused with SQL -- `where`: ❌ SQL-like, not functional programming standard -- `keep`: ❌ Less common, unclear intent - -**Implementation:** -```awk -function filter(predicate_func, array, result, i) { - # Filter array elements based on predicate function - for (i in array) { - if (dispatch_call(predicate_func, array[i])) { - result[i] = array[i] - } - } - return keys(result) -} -``` - -**Usage Examples:** -```rawk -$is_positive = (x) -> x > 0; -$is_even = (x) -> x % 2 == 0; -$is_valid_email = (email) -> is_email(email); - -# Filter positive numbers -positive_count = filter("is_positive", numbers, positive_numbers) - -# Filter even numbers -even_count = filter("is_even", numbers, even_numbers) - -# Filter valid emails -valid_emails_count = filter("is_valid_email", emails, valid_emails) -``` - -### 2. `flatMap()` - Flatten and Map - -**Function Name Analysis:** -- **`flatMap`**: ✅ **RECOMMENDED** - Standard functional programming term -- `chain`: ❌ Less descriptive, could mean different things -- `flatten_map`: ❌ Too verbose -- `map_flatten`: ❌ Less common ordering - -**Implementation:** -```awk -function flatMap(func_name, array, result, i, temp_array, temp_count, j) { - # Apply function to each element and flatten the result - for (i in array) { - temp_count = dispatch_call(func_name, array[i], temp_array) - for (j = 1; j <= temp_count; j++) { - result[keys(result) + 1] = temp_array[j] - } - } - return keys(result) -} -``` - -**Usage Examples:** -```rawk -$split_words = (text, result) -> { - split(text, result, " ") - return length(result) -}; - -$get_tags = (item, result) -> { - split(item["tags"], result, ",") - return length(result) -}; - -# Flatten nested arrays -texts[1] = "hello world" -texts[2] = "functional programming" -texts[3] = "awk is awesome" - -words_count = flatMap("split_words", texts, all_words) -# Result: ["hello", "world", "functional", "programming", "awk", "is", "awesome"] - -# Extract and flatten tags from items -tags_count = flatMap("get_tags", items, all_tags) -``` - -### 3. `zip()` - Array Combination - -**Function Name Analysis:** -- **`zip`**: ✅ **RECOMMENDED** - Standard, well-known, descriptive -- `combine`: ❌ Too generic, could mean different operations -- `pair`: ❌ Less descriptive, doesn't imply multiple elements -- `interleave`: ❌ Less common, more complex than needed - -**Implementation:** -```awk -function zip(array1, array2, result, i, keys1, keys2, count1, count2) { - # Zip two arrays together - count1 = get_keys(array1, keys1) - count2 = get_keys(array2, keys2) - - for (i = 1; i <= count1 && i <= count2; i++) { - result[i]["first"] = array1[keys1[i]] - result[i]["second"] = array2[keys2[i]] - } - return (count1 < count2) ? count1 : count2 -} -``` - -**Usage Examples:** -```rawk -# Zip names and ages -names[1] = "Alice" -names[2] = "Bob" -names[3] = "Charlie" - -ages[1] = 25 -ages[2] = 30 -ages[3] = 35 - -pairs_count = zip(names, ages, name_age_pairs) -# Result: [{"first": "Alice", "second": 25}, {"first": "Bob", "second": 30}, ...] - -# Zip with different array lengths (truncates to shorter) -short_array[1] = "a" -short_array[2] = "b" - -long_array[1] = 1 -long_array[2] = 2 -long_array[3] = 3 -long_array[4] = 4 - -zipped_count = zip(short_array, long_array, pairs) -# Result: [{"first": "a", "second": 1}, {"first": "b", "second": 2}] -``` - -### 4. `zipWith()` - Array Combination with Function - -**Function Name Analysis:** -- **`zipWith`**: ✅ **RECOMMENDED** - Standard functional programming term -- `zip_apply`: ❌ Less common -- `combine_with`: ❌ Too verbose -- `zip_map`: ❌ Could be confused with flatMap - -**Implementation:** -```awk -function zipWith(func_name, array1, array2, result, i, keys1, keys2, count1, count2) { - # Zip two arrays and apply function to each pair - count1 = get_keys(array1, keys1) - count2 = get_keys(array2, keys2) - - for (i = 1; i <= count1 && i <= count2; i++) { - result[i] = dispatch_call(func_name, array1[keys1[i]], array2[keys2[i]]) - } - return (count1 < count2) ? count1 : count2 -} -``` - -**Usage Examples:** -```rawk -$add = (x, y) -> x + y; -$concat = (str1, str2) -> str1 " " str2; -$create_user = (name, age) -> name " (" age ")"; - -# Zip with addition -numbers1[1] = 1 -numbers1[2] = 2 -numbers1[3] = 3 - -numbers2[1] = 10 -numbers2[2] = 20 -numbers2[3] = 30 - -sums_count = zipWith("add", numbers1, numbers2, sums) -# Result: [11, 22, 33] - -# Zip with string concatenation -first_names[1] = "John" -first_names[2] = "Jane" - -last_names[1] = "Doe" -last_names[2] = "Smith" - -full_names_count = zipWith("concat", first_names, last_names, full_names) -# Result: ["John Doe", "Jane Smith"] -``` - -### 5. `take()` and `drop()` - Array Slicing - -**Function Name Analysis:** -- **`take`**: ✅ **RECOMMENDED** - Standard functional programming term -- **`drop`**: ✅ **RECOMMENDED** - Standard functional programming term -- `head`/`tail`: ❌ Less descriptive for arbitrary lengths -- `slice`/`skip`: ❌ Less common in functional programming - -**Implementation:** -```awk -function take(count, array, result, i, keys, key_count) { - # Take first n elements from array - key_count = get_keys(array, keys) - for (i = 1; i <= count && i <= key_count; i++) { - result[i] = array[keys[i]] - } - return (count < key_count) ? count : key_count -} - -function drop(count, array, result, i, keys, key_count) { - # Drop first n elements from array - key_count = get_keys(array, keys) - for (i = count + 1; i <= key_count; i++) { - result[i - count] = array[keys[i]] - } - return (count < key_count) ? key_count - count : 0 -} -``` - -**Usage Examples:** -```rawk -# Take first 3 elements -numbers[1] = 1 -numbers[2] = 2 -numbers[3] = 3 -numbers[4] = 4 -numbers[5] = 5 - -first_three_count = take(3, numbers, first_three) -# Result: [1, 2, 3] - -# Drop first 2 elements -remaining_count = drop(2, numbers, remaining) -# Result: [3, 4, 5] -``` - -### 6. `find()` and `findIndex()` - Array Searching - -**Function Name Analysis:** -- **`find`**: ✅ **RECOMMENDED** - Standard, well-known -- **`findIndex`**: ✅ **RECOMMENDED** - Standard, well-known -- `search`/`searchIndex`: ❌ Less common in functional programming -- `locate`/`locateIndex`: ❌ Less common - -**Implementation:** -```awk -function find(predicate_func, array, i, keys, key_count) { - # Find first element that matches predicate - key_count = get_keys(array, keys) - for (i = 1; i <= key_count; i++) { - if (dispatch_call(predicate_func, array[keys[i]])) { - return array[keys[i]] - } - } - return "" # Not found -} - -function findIndex(predicate_func, array, i, keys, key_count) { - # Find index of first element that matches predicate - key_count = get_keys(array, keys) - for (i = 1; i <= key_count; i++) { - if (dispatch_call(predicate_func, array[keys[i]])) { - return i - } - } - return 0 # Not found -} -``` - -**Usage Examples:** -```rawk -$is_positive = (x) -> x > 0; -$is_even = (x) -> x % 2 == 0; -$has_error = (log) -> index(log, "ERROR") > 0; - -# Find first positive number -first_positive = find("is_positive", numbers) - -# Find index of first even number -first_even_index = findIndex("is_even", numbers) - -# Find first error log -first_error = find("has_error", logs) -``` - -## Implementation Strategy - -### Phase 1: Core Filtering and Searching -1. **`filter()`** - Most commonly used, fundamental utility -2. **`find()`** and **`findIndex()`** - Essential for data processing - -### Phase 2: Array Transformation -3. **`flatMap()`** - Advanced array transformation -4. **`take()`** and **`drop()`** - Array slicing utilities - -### Phase 3: Array Combination -5. **`zip()`** - Basic array combination -6. **`zipWith()`** - Advanced array combination with functions - -## Testing Strategy - -### Unit Tests -- Test each function with various array types -- Test edge cases (empty arrays, single elements) -- Test error conditions and invalid inputs - -### Integration Tests -- Test combinations of functions (e.g., `filter` + `map`) -- Test with real-world data processing scenarios -- Test performance with large arrays - -### Example Test Cases -```rawk -# Test filter function -$is_positive = (x) -> x > 0; -numbers[1] = -1 -numbers[2] = 0 -numbers[3] = 1 -numbers[4] = -5 -numbers[5] = 10 - -positive_count = filter("is_positive", numbers, positive_numbers) -expect_equal(positive_count, 2, "Should find 2 positive numbers") -expect_equal(positive_numbers[1], 1, "First positive should be 1") -expect_equal(positive_numbers[2], 10, "Second positive should be 10") - -# Test zip function -names[1] = "Alice" -names[2] = "Bob" -ages[1] = 25 -ages[2] = 30 - -pairs_count = zip(names, ages, pairs) -expect_equal(pairs_count, 2, "Should create 2 pairs") -expect_equal(pairs[1]["first"], "Alice", "First pair first should be Alice") -expect_equal(pairs[1]["second"], 25, "First pair second should be 25") -``` - -## Success Criteria - -- [ ] All functions work correctly with various array types -- [ ] Functions handle edge cases gracefully -- [ ] Performance is acceptable for typical use cases -- [ ] All existing tests continue to pass -- [ ] Documentation is updated with examples -- [ ] Integration with existing functional programming features - -## Future Enhancements - -### Phase 4: Advanced Utilities -- **`groupBy()`** - Group array elements by key function -- **`partition()`** - Split array into two based on predicate -- **`sortBy()`** - Sort array using comparison function -- **`unique()`** - Remove duplicate elements - -### Phase 5: Performance Optimizations -- **Lazy evaluation** for large arrays -- **Stream processing** for memory efficiency -- **Parallel processing** where possible - -## Conclusion - -This plan provides a clear path to implementing comprehensive functional programming utilities in rawk. The chosen function names are standard, well-known, and descriptive, ensuring they will be familiar to developers with functional programming experience while remaining accessible to AWK users. \ No newline at end of file |