about summary refs log tree commit diff stats
path: root/awk/rawk/scratch/rawk_new.awk
diff options
context:
space:
mode:
Diffstat (limited to 'awk/rawk/scratch/rawk_new.awk')
-rw-r--r--awk/rawk/scratch/rawk_new.awk216
1 files changed, 216 insertions, 0 deletions
diff --git a/awk/rawk/scratch/rawk_new.awk b/awk/rawk/scratch/rawk_new.awk
new file mode 100644
index 0000000..c1f9b39
--- /dev/null
+++ b/awk/rawk/scratch/rawk_new.awk
@@ -0,0 +1,216 @@
+#!/usr/bin/env awk -f
+
+# rawk.awk - Clean Implementation
+# Author: @eli_oat
+# License: Public Domain
+# Version: 0.1.0
+
+# This script translates .rawk files into standard AWK code.
+# It uses a stateful parser to handle function definitions cleanly.
+
+# USAGE:
+#   awk -f rawk_new.awk input.rawk | awk -f -
+#   awk -f rawk_new.awk input.rawk > output.awk
+
+# -----------------------------------------------------------------------------
+# VARIABLES
+# -----------------------------------------------------------------------------
+
+# State tracking
+in_function = 0      # Are we inside a function definition?
+brace_count = 0      # Brace counter for function bodies
+line_count = 0       # Total lines processed
+
+# Function tracking
+function_count = 0
+
+# Main script lines (non-function code)
+main_script_count = 0
+
+# Validation
+validation_errors = 0
+
+# -----------------------------------------------------------------------------
+# MAIN PARSING LOGIC
+# -----------------------------------------------------------------------------
+
+{
+    line_count++
+    
+    # Skip comments and empty lines
+    if ($0 ~ /^[ \t]*#/ || $0 ~ /^[ \t]*$/) {
+        next
+    }
+    
+    # Check for function definition start
+    if (!in_function && $0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*\{/) {
+        print "# DEBUG: Matched function definition: " $0 > "/dev/stderr"
+        # Start of function definition
+        in_function = 1
+        brace_count = 1
+        
+        # Parse function header
+        parse_function_header($0)
+        next
+    } else if (!in_function && $0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->/) {
+        print "# DEBUG: Function definition without braces: " $0 > "/dev/stderr"
+    }
+    
+    # If we're inside a function, collect the body
+    if (in_function) {
+        # Count braces
+        open_braces = gsub(/\{/, "&", $0)
+        close_braces = gsub(/\}/, "&", $0)
+        brace_count += open_braces - close_braces
+        
+        # Add line to function body (skip the opening brace line)
+        if (!($0 ~ /^[ \t]*\{/)) {
+            FUNCTION_BODIES[function_count] = FUNCTION_BODIES[function_count] "\n    " $0
+        }
+        
+        # Check if function body is complete
+        if (brace_count == 0) {
+            in_function = 0
+        }
+        next
+    }
+    
+    # Regular code - add to main script
+    main_script_count++
+    MAIN_SCRIPT[main_script_count] = $0
+    
+    # Always skip to prevent AWK from printing input lines
+    next
+}
+
+# -----------------------------------------------------------------------------
+# HELPER FUNCTIONS
+# -----------------------------------------------------------------------------
+
+function parse_function_header(line) {
+    print "# DEBUG: parse_function_header called with: " line > "/dev/stderr"
+    
+    # Extract function name
+    if (match(line, /\$([a-zA-Z_][a-zA-Z0-9_]*)/)) {
+        func_name = substr(line, RSTART + 1, RLENGTH - 1)
+        print "# DEBUG: Function name: " func_name > "/dev/stderr"
+    } else {
+        report_error("Invalid function name", line_count, line)
+        return
+    }
+    
+    # Extract arguments
+    if (match(line, /\(([^)]*)\)/)) {
+        args = substr(line, RSTART + 1, RLENGTH - 2)
+        print "# DEBUG: Arguments: " args > "/dev/stderr"
+    } else {
+        report_error("Invalid argument list", line_count, line)
+        return
+    }
+    
+    # Store function information
+    function_count++
+    FUNCTION_NAMES[function_count] = func_name
+    FUNCTION_ARGS[function_count] = args
+    FUNCTION_BODIES[function_count] = ""
+    
+    print "# DEBUG: function_count after increment: " function_count > "/dev/stderr"
+}
+
+function report_error(message, line_num, line) {
+    print "❌ " message > "/dev/stderr"
+    print "   at line " line_num " in " FILENAME > "/dev/stderr"
+    print "   context: " line > "/dev/stderr"
+    print "" > "/dev/stderr"
+    validation_errors++
+}
+
+# -----------------------------------------------------------------------------
+# CODE GENERATION
+# -----------------------------------------------------------------------------
+
+END {
+    # Check for validation errors
+    if (validation_errors > 0) {
+        print "❌ Compilation failed with " validation_errors " error(s)" > "/dev/stderr"
+        exit 1
+    }
+    
+    # Generate standard library
+    generate_standard_library()
+    
+    # Generate function definitions
+    generate_functions()
+    
+    # Generate main script
+    generate_main_script()
+    
+    # Add metadata
+    print "# Generated by rawk v0.1.0"
+    print "# Functions: " function_count
+    print "# Lines: " line_count
+}
+
+function generate_standard_library() {
+    print "# --- Standard Library ---"
+    print ""
+    
+    # Add basic testing functions
+    print "function assert(condition, message) {"
+    print "    if (!condition) {"
+    print "        print \"❌ Assertion failed: \" message > \"/dev/stderr\""
+    print "        exit 1"
+    print "    }"
+    print "}"
+    print ""
+    
+    print "function expect_equal(actual, expected, message) {"
+    print "    if (actual != expected) {"
+    print "        print \"❌ Expected \" expected \" but got \" actual \" - \" message > \"/dev/stderr\""
+    print "        exit 1"
+    print "    }"
+    print "}"
+    print ""
+}
+
+function generate_functions() {
+    print "# DEBUG: generate_functions called, function_count = " function_count > "/dev/stderr"
+    if (function_count == 0) return
+    
+    print "# --- User Functions ---"
+    print ""
+    
+    for (i = 1; i <= function_count; i++) {
+        print "# DEBUG: Generating function " i ": " FUNCTION_NAMES[i] > "/dev/stderr"
+        print "function " FUNCTION_NAMES[i] "(" FUNCTION_ARGS[i] ") {" FUNCTION_BODIES[i]
+        print "}"
+        print ""
+    }
+}
+
+function generate_main_script() {
+    print "# --- Main Script ---"
+    
+    # Check if there's already a BEGIN block
+    has_begin = 0
+    for (i = 1; i <= main_script_count; i++) {
+        if (MAIN_SCRIPT[i] ~ /^[ \t]*BEGIN[ \t]*\{/) {
+            has_begin = 1
+            break
+        }
+    }
+    
+    if (has_begin) {
+        # Print lines as-is
+        for (i = 1; i <= main_script_count; i++) {
+            print MAIN_SCRIPT[i]
+        }
+    } else {
+        # Wrap in BEGIN block
+        print "BEGIN {"
+        for (i = 1; i <= main_script_count; i++) {
+            print "    " MAIN_SCRIPT[i]
+        }
+        print "}"
+    }
+} 
\ No newline at end of file