diff options
Diffstat (limited to 'awk/rawk/scratch/rawk_new.awk')
-rw-r--r-- | awk/rawk/scratch/rawk_new.awk | 216 |
1 files changed, 216 insertions, 0 deletions
diff --git a/awk/rawk/scratch/rawk_new.awk b/awk/rawk/scratch/rawk_new.awk new file mode 100644 index 0000000..c1f9b39 --- /dev/null +++ b/awk/rawk/scratch/rawk_new.awk @@ -0,0 +1,216 @@ +#!/usr/bin/env awk -f + +# rawk.awk - Clean Implementation +# Author: @eli_oat +# License: Public Domain +# Version: 0.1.0 + +# This script translates .rawk files into standard AWK code. +# It uses a stateful parser to handle function definitions cleanly. + +# USAGE: +# awk -f rawk_new.awk input.rawk | awk -f - +# awk -f rawk_new.awk input.rawk > output.awk + +# ----------------------------------------------------------------------------- +# VARIABLES +# ----------------------------------------------------------------------------- + +# State tracking +in_function = 0 # Are we inside a function definition? +brace_count = 0 # Brace counter for function bodies +line_count = 0 # Total lines processed + +# Function tracking +function_count = 0 + +# Main script lines (non-function code) +main_script_count = 0 + +# Validation +validation_errors = 0 + +# ----------------------------------------------------------------------------- +# MAIN PARSING LOGIC +# ----------------------------------------------------------------------------- + +{ + line_count++ + + # Skip comments and empty lines + if ($0 ~ /^[ \t]*#/ || $0 ~ /^[ \t]*$/) { + next + } + + # Check for function definition start + if (!in_function && $0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->[ \t]*\{/) { + print "# DEBUG: Matched function definition: " $0 > "/dev/stderr" + # Start of function definition + in_function = 1 + brace_count = 1 + + # Parse function header + parse_function_header($0) + next + } else if (!in_function && $0 ~ /^[ \t]*\$[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=[ \t]*\([^)]*\)[ \t]*->/) { + print "# DEBUG: Function definition without braces: " $0 > "/dev/stderr" + } + + # If we're inside a function, collect the body + if (in_function) { + # Count braces + open_braces = gsub(/\{/, "&", $0) + close_braces = gsub(/\}/, "&", $0) + brace_count += open_braces - close_braces + + # Add line to function body (skip the opening brace line) + if (!($0 ~ /^[ \t]*\{/)) { + FUNCTION_BODIES[function_count] = FUNCTION_BODIES[function_count] "\n " $0 + } + + # Check if function body is complete + if (brace_count == 0) { + in_function = 0 + } + next + } + + # Regular code - add to main script + main_script_count++ + MAIN_SCRIPT[main_script_count] = $0 + + # Always skip to prevent AWK from printing input lines + next +} + +# ----------------------------------------------------------------------------- +# HELPER FUNCTIONS +# ----------------------------------------------------------------------------- + +function parse_function_header(line) { + print "# DEBUG: parse_function_header called with: " line > "/dev/stderr" + + # Extract function name + if (match(line, /\$([a-zA-Z_][a-zA-Z0-9_]*)/)) { + func_name = substr(line, RSTART + 1, RLENGTH - 1) + print "# DEBUG: Function name: " func_name > "/dev/stderr" + } else { + report_error("Invalid function name", line_count, line) + return + } + + # Extract arguments + if (match(line, /\(([^)]*)\)/)) { + args = substr(line, RSTART + 1, RLENGTH - 2) + print "# DEBUG: Arguments: " args > "/dev/stderr" + } else { + report_error("Invalid argument list", line_count, line) + return + } + + # Store function information + function_count++ + FUNCTION_NAMES[function_count] = func_name + FUNCTION_ARGS[function_count] = args + FUNCTION_BODIES[function_count] = "" + + print "# DEBUG: function_count after increment: " function_count > "/dev/stderr" +} + +function report_error(message, line_num, line) { + print "❌ " message > "/dev/stderr" + print " at line " line_num " in " FILENAME > "/dev/stderr" + print " context: " line > "/dev/stderr" + print "" > "/dev/stderr" + validation_errors++ +} + +# ----------------------------------------------------------------------------- +# CODE GENERATION +# ----------------------------------------------------------------------------- + +END { + # Check for validation errors + if (validation_errors > 0) { + print "❌ Compilation failed with " validation_errors " error(s)" > "/dev/stderr" + exit 1 + } + + # Generate standard library + generate_standard_library() + + # Generate function definitions + generate_functions() + + # Generate main script + generate_main_script() + + # Add metadata + print "# Generated by rawk v0.1.0" + print "# Functions: " function_count + print "# Lines: " line_count +} + +function generate_standard_library() { + print "# --- Standard Library ---" + print "" + + # Add basic testing functions + print "function assert(condition, message) {" + print " if (!condition) {" + print " print \"❌ Assertion failed: \" message > \"/dev/stderr\"" + print " exit 1" + print " }" + print "}" + print "" + + print "function expect_equal(actual, expected, message) {" + print " if (actual != expected) {" + print " print \"❌ Expected \" expected \" but got \" actual \" - \" message > \"/dev/stderr\"" + print " exit 1" + print " }" + print "}" + print "" +} + +function generate_functions() { + print "# DEBUG: generate_functions called, function_count = " function_count > "/dev/stderr" + if (function_count == 0) return + + print "# --- User Functions ---" + print "" + + for (i = 1; i <= function_count; i++) { + print "# DEBUG: Generating function " i ": " FUNCTION_NAMES[i] > "/dev/stderr" + print "function " FUNCTION_NAMES[i] "(" FUNCTION_ARGS[i] ") {" FUNCTION_BODIES[i] + print "}" + print "" + } +} + +function generate_main_script() { + print "# --- Main Script ---" + + # Check if there's already a BEGIN block + has_begin = 0 + for (i = 1; i <= main_script_count; i++) { + if (MAIN_SCRIPT[i] ~ /^[ \t]*BEGIN[ \t]*\{/) { + has_begin = 1 + break + } + } + + if (has_begin) { + # Print lines as-is + for (i = 1; i <= main_script_count; i++) { + print MAIN_SCRIPT[i] + } + } else { + # Wrap in BEGIN block + print "BEGIN {" + for (i = 1; i <= main_script_count; i++) { + print " " MAIN_SCRIPT[i] + } + print "}" + } +} \ No newline at end of file |