diff options
Diffstat (limited to 'bash/talk-to-computer/quality_guard.sh')
-rwxr-xr-x | bash/talk-to-computer/quality_guard.sh | 366 |
1 files changed, 366 insertions, 0 deletions
diff --git a/bash/talk-to-computer/quality_guard.sh b/bash/talk-to-computer/quality_guard.sh new file mode 100755 index 0000000..06f8aec --- /dev/null +++ b/bash/talk-to-computer/quality_guard.sh @@ -0,0 +1,366 @@ +#!/bin/bash + +# Quality Guard - System-Wide Output Quality Protection +# This module provides comprehensive quality monitoring for all AI thinking mechanisms +# to prevent output degradation, nonsense, and repetitive responses. + +# --- Configuration --- +MIN_RESPONSE_LENGTH=30 +MAX_REPETITION_RATIO=0.4 +MAX_NONSENSE_SCORE=0.6 +DEGRADATION_THRESHOLD=0.65 +MAX_CORRECTION_ATTEMPTS=2 +FALLBACK_ENABLED=true + +# --- Quality Assessment Functions --- + +# Main quality assessment function +assess_quality() { + local response="$1" + local context="$2" + local mechanism="$3" + + # Calculate quality metrics + local length_score=$(assess_length "$response") + local coherence_score=$(assess_coherence "$response") + local repetition_score=$(assess_repetition "$response") + local relevance_score=$(assess_relevance "$response" "$context" "$mechanism") + local structure_score=$(assess_structure "$response") + + # Weighted quality score + local overall_score=$(echo "scale=2; ($length_score * 0.15 + $coherence_score * 0.25 + $repetition_score * 0.2 + $relevance_score * 0.25 + $structure_score * 0.15)" | bc -l 2>/dev/null || echo "0.5") + + echo "$overall_score" +} + +# Assess response length +assess_length() { + local response="$1" + local word_count=$(echo "$response" | wc -w) + + if [ "$word_count" -lt $MIN_RESPONSE_LENGTH ]; then + echo "0.2" + elif [ "$word_count" -lt 80 ]; then + echo "0.6" + elif [ "$word_count" -lt 200 ]; then + echo "0.9" + elif [ "$word_count" -lt 500 ]; then + echo "0.8" + else + echo "0.7" + fi +} + +# Assess coherence +assess_coherence() { + local response="$1" + + # Check for reasonable sentence structure + local sentences=$(echo "$response" | tr '.' '\n' | grep -v '^[[:space:]]*$' | wc -l) + local avg_length=$(echo "$response" | tr '.' '\n' | grep -v '^[[:space:]]*$' | awk '{sum += length($0)} END {print sum/NR}' 2>/dev/null || echo "50") + + # Penalize extremely long or short sentences + if (( $(echo "$avg_length > 300" | bc -l 2>/dev/null || echo "0") )); then + echo "0.3" + elif (( $(echo "$avg_length < 15" | bc -l 2>/dev/null || echo "0") )); then + echo "0.4" + elif [ "$sentences" -lt 2 ]; then + echo "0.5" + else + echo "0.8" + fi +} + +# Assess repetition +assess_repetition() { + local response="$1" + local unique_words=$(echo "$response" | tr ' ' '\n' | sort | uniq | wc -l) + local total_words=$(echo "$response" | wc -w) + + if [ "$total_words" -eq 0 ]; then + echo "0.0" + else + local repetition_ratio=$(echo "scale=2; $unique_words / $total_words" | bc -l 2>/dev/null || echo "0.5") + + if (( $(echo "$repetition_ratio < $MAX_REPETITION_RATIO" | bc -l 2>/dev/null || echo "0") )); then + echo "0.1" + elif (( $(echo "$repetition_ratio < 0.6" | bc -l 2>/dev/null || echo "0") )); then + echo "0.5" + else + echo "0.9" + fi + fi +} + +# Assess relevance to context and mechanism +assess_relevance() { + local response="$1" + local context="$2" + local mechanism="$3" + + # Mechanism-specific relevance checks + case "$mechanism" in + "puzzle") + if echo "$response" | grep -q -i "algorithm\|code\|implement\|function\|solution"; then + echo "0.9" + else + echo "0.6" + fi + ;; + "socratic") + if echo "$response" | grep -q -i "question\|analyze\|investigate\|examine\|why\|how"; then + echo "0.9" + else + echo "0.6" + fi + ;; + "exploration") + if echo "$response" | grep -q -i "compare\|alternative\|option\|approach\|strategy"; then + echo "0.9" + else + echo "0.6" + fi + ;; + "consensus") + if echo "$response" | grep -q -i "perspective\|view\|opinion\|agree\|disagree\|multiple"; then + echo "0.9" + else + echo "0.6" + fi + ;; + "critique") + if echo "$response" | grep -q -i "improve\|enhance\|fix\|refine\|better\|optimize"; then + echo "0.9" + else + echo "0.6" + fi + ;; + "synthesis") + if echo "$response" | grep -q -i "combine\|integrate\|merge\|unify\|synthesize"; then + echo "0.9" + else + echo "0.6" + fi + ;; + "peer_review") + if echo "$response" | grep -q -i "review\|feedback\|suggest\|advice\|collaborate"; then + echo "0.9" + else + echo "0.6" + fi + ;; + *) + echo "0.7" + ;; + esac +} + +# Assess structural quality +assess_structure() { + local response="$1" + + # Check for proper formatting and structure + local has_paragraphs=$(echo "$response" | grep -c '^[[:space:]]*$' 2>/dev/null | tr -d '[:space:]' || echo "0") + local has_lists=$(echo "$response" | grep -c '^[[:space:]]*[-]' 2>/dev/null | tr -d '[:space:]' || echo "0") + local has_numbers=$(echo "$response" | grep -c '^[[:space:]]*[0-9]' 2>/dev/null | tr -d '[:space:]' || echo "0") + + local structure_score=0.5 + + if [ "${has_paragraphs:-0}" -gt 0 ]; then structure_score=$(echo "$structure_score + 0.2" | bc -l 2>/dev/null || echo "$structure_score"); fi + if [ "${has_lists:-0}" -gt 0 ]; then structure_score=$(echo "$structure_score + 0.15" | bc -l 2>/dev/null || echo "$structure_score"); fi + if [ "${has_numbers:-0}" -gt 0 ]; then structure_score=$(echo "$structure_score + 0.15" | bc -l 2>/dev/null || echo "$structure_score"); fi + + echo "$structure_score" +} + +# --- Degradation Detection --- + +# Detect various degradation patterns +detect_degradation_patterns() { + local response="$1" + local degradation_score=0 + + # Check for nonsense patterns + if echo "$response" | grep -q -i "lorem ipsum\|asdf\|qwerty\|random text\|test test test"; then + degradation_score=$(echo "$degradation_score + 0.9" | bc -l 2>/dev/null || echo "$degradation_score") + fi + + # Check for excessive repetition (simplified pattern) + if echo "$response" | grep -q "the same phrase repeated multiple times"; then + degradation_score=$(echo "$degradation_score + 0.8" | bc -l 2>/dev/null || echo "$degradation_score") + fi + + # Check for incoherent punctuation (more specific - lines with only punctuation) + local punct_only_lines=$(echo "$response" | grep "^[[:space:]]*[[:punct:]]*[[:space:]]*$" | wc -l) + local total_lines=$(echo "$response" | wc -l) + if [ "$total_lines" -gt 0 ]; then + local punct_ratio=$(( punct_only_lines * 100 / total_lines )) + if [ "$punct_ratio" -gt 50 ]; then + # Only flag if more than half the lines are punctuation-only + degradation_score=$(echo "$degradation_score + 0.4" | bc -l 2>/dev/null || echo "$degradation_score") + fi + fi + + # Check for extremely short responses + local word_count=$(echo "$response" | wc -w) + if [ "$word_count" -lt 15 ]; then + degradation_score=$(echo "$degradation_score + 0.5" | bc -l 2>/dev/null || echo "$degradation_score") + fi + + # Check for gibberish (simplified pattern) + if echo "$response" | grep -q "aaaaa\|bbbbb\|ccccc\|ddddd\|eeeee"; then + degradation_score=$(echo "$degradation_score + 0.6" | bc -l 2>/dev/null || echo "$degradation_score") + fi + + # Note: Removed problematic markdown check to eliminate syntax warnings + + echo "$degradation_score" +} + +# --- Correction Mechanisms --- + +# Attempt to correct degraded output +correct_degraded_output() { + local degraded_response="$1" + local context="$2" + local mechanism="$3" + local model="$4" + local attempt=1 + + while [ "$attempt" -le "$MAX_CORRECTION_ATTEMPTS" ]; do + echo "🔄 Correction attempt $attempt/$MAX_CORRECTION_ATTEMPTS..." >&2 + + # Create correction prompt + local correction_prompt="The previous response was degraded or nonsensical. Please provide a clear, coherent response to: + +ORIGINAL REQUEST: $context + +RESPONSE TYPE: $mechanism + +Please ensure your response is: +- Relevant and focused on the request +- Well-structured with proper paragraphs and formatting +- Free of repetition, nonsense, or gibberish +- Appropriate length (at least 50 words) +- Clear and understandable + +Provide a fresh, high-quality response:" + + # Get corrected response + local corrected_response=$(ollama run "$model" "$correction_prompt") + + # Assess correction quality + local correction_quality=$(assess_quality "$corrected_response" "$context" "$mechanism") + local degradation_score=$(detect_degradation_patterns "$corrected_response") + + echo "Correction quality: $correction_quality, Degradation: $degradation_score" >&2 + + # Check if correction is successful + if (( $(echo "$correction_quality > $DEGRADATION_THRESHOLD" | bc -l 2>/dev/null || echo "0") )) && \ + (( $(echo "$degradation_score < $MAX_NONSENSE_SCORE" | bc -l 2>/dev/null || echo "0") )); then + + echo "✅ Output corrected successfully (quality: $correction_quality)" >&2 + echo "$corrected_response" + return 0 + fi + + attempt=$((attempt + 1)) + done + + echo "❌ All correction attempts failed. Using fallback response." >&2 + echo "$(generate_fallback_response "$mechanism" "$context")" + return 1 +} + +# Generate appropriate fallback response +generate_fallback_response() { + local mechanism="$1" + local context="$2" + + case "$mechanism" in + "puzzle") + echo "I apologize, but I'm experiencing difficulties providing a proper response to your puzzle or coding challenge. Please try rephrasing your question or ask for a different type of assistance. You might also want to try breaking down your request into smaller, more specific questions." + ;; + "socratic") + echo "I'm unable to provide the deep analysis you're looking for at this time. Please try asking your question again with more specific details, or consider rephrasing it in a different way." + ;; + "exploration") + echo "I'm having trouble exploring alternatives and strategies for your request. Please try asking your question again or provide more context about what you're looking to explore." + ;; + "consensus") + echo "I cannot provide multiple perspectives or consensus-building guidance currently. Please try rephrasing your request or ask for a different type of assistance." + ;; + "critique") + echo "I'm unable to provide improvement suggestions or critique at this time. Please try asking your question again or request a different approach." + ;; + "synthesis") + echo "I cannot synthesize or combine approaches currently. Please try rephrasing your request or ask for a different form of assistance." + ;; + "peer_review") + echo "I'm having trouble providing collaborative feedback or review. Please try asking your question again or request a different type of help." + ;; + *) + echo "I'm experiencing difficulties providing a proper response. Please try rephrasing your question or ask for a different type of assistance." + ;; + esac +} + +# --- Main Quality Guard Function --- + +# Main function to guard against output degradation +guard_output_quality() { + local response="$1" + local context="$2" + local mechanism="$3" + local model="$4" + + # Assess quality + local quality_score=$(assess_quality "$response" "$context" "$mechanism") + local degradation_score=$(detect_degradation_patterns "$response") + + echo "Quality Score: $quality_score, Degradation Score: $degradation_score" >&2 + + # Check if correction is needed + if (( $(echo "$quality_score < $DEGRADATION_THRESHOLD" | bc -l 2>/dev/null || echo "0") )) || \ + (( $(echo "$degradation_score > $MAX_NONSENSE_SCORE" | bc -l 2>/dev/null || echo "0") )); then + + echo "⚠️ Output quality below threshold. Initiating correction..." >&2 + + if [ "$FALLBACK_ENABLED" = "true" ]; then + correct_degraded_output "$response" "$context" "$mechanism" "$model" + else + echo "❌ Quality below threshold but fallback disabled." >&2 + echo "$response" + fi + else + echo "✅ Response quality acceptable (score: $quality_score)" >&2 + echo "$response" + fi +} + +# --- Utility Functions --- + +# Get mechanism name from script path +get_mechanism_name() { + local script_path="$1" + local script_name=$(basename "$script_path") + + case "$script_name" in + "puzzle") echo "puzzle" ;; + "socratic") echo "socratic" ;; + "exploration") echo "exploration" ;; + "consensus") echo "consensus" ;; + "critique") echo "critique" ;; + "synthesis") echo "synthesis" ;; + "peer-review") echo "peer_review" ;; + *) echo "unknown" ;; + esac +} + +# Export functions for use by other scripts +export -f assess_quality +export -f detect_degradation_patterns +export -f guard_output_quality +export -f get_mechanism_name +export -f correct_degraded_output +export -f generate_fallback_response |