bash/talk-to-computer/quality_guard.sh


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366

#!/bin/bash

# Quality Guard - System-Wide Output Quality Protection
# This module provides comprehensive quality monitoring for all AI thinking mechanisms
# to prevent output degradation, nonsense, and repetitive responses.

# --- Configuration ---
MIN_RESPONSE_LENGTH=30
MAX_REPETITION_RATIO=0.4
MAX_NONSENSE_SCORE=0.6
DEGRADATION_THRESHOLD=0.65
MAX_CORRECTION_ATTEMPTS=2
FALLBACK_ENABLED=true

# --- Quality Assessment Functions ---

# Main quality assessment function
assess_quality() {
    local response="$1"
    local context="$2"
    local mechanism="$3"
    
    # Calculate quality metrics
    local length_score=$(assess_length "$response")
    local coherence_score=$(assess_coherence "$response")
    local repetition_score=$(assess_repetition "$response")
    local relevance_score=$(assess_relevance "$response" "$context" "$mechanism")
    local structure_score=$(assess_structure "$response")
    
    # Weighted quality score
    local overall_score=$(echo "scale=2; ($length_score * 0.15 + $coherence_score * 0.25 + $repetition_score * 0.2 + $relevance_score * 0.25 + $structure_score * 0.15)" | bc -l 2>/dev/null || echo "0.5")
    
    echo "$overall_score"
}

# Assess response length
assess_length() {
    local response="$1"
    local word_count=$(echo "$response" | wc -w)
    
    if [ "$word_count" -lt $MIN_RESPONSE_LENGTH ]; then
        echo "0.2"
    elif [ "$word_count" -lt 80 ]; then
        echo "0.6"
    elif [ "$word_count" -lt 200 ]; then
        echo "0.9"
    elif [ "$word_count" -lt 500 ]; then
        echo "0.8"
    else
        echo "0.7"
    fi
}

# Assess coherence
assess_coherence() {
    local response="$1"
    
    # Check for reasonable sentence structure
    local sentences=$(echo "$response" | tr '.' '\n' | grep -v '^[[:space:]]*$' | wc -l)
    local avg_length=$(echo "$response" | tr '.' '\n' | grep -v '^[[:space:]]*$' | awk '{sum += length($0)} END {print sum/NR}' 2>/dev/null || echo "50")
    
    # Penalize extremely long or short sentences
    if (( $(echo "$avg_length > 300" | bc -l 2>/dev/null || echo "0") )); then
        echo "0.3"
    elif (( $(echo "$avg_length < 15" | bc -l 2>/dev/null || echo "0") )); then
        echo "0.4"
    elif [ "$sentences" -lt 2 ]; then
        echo "0.5"
    else
        echo "0.8"
    fi
}

# Assess repetition
assess_repetition() {
    local response="$1"
    local unique_words=$(echo "$response" | tr ' ' '\n' | sort | uniq | wc -l)
    local total_words=$(echo "$response" | wc -w)
    
    if [ "$total_words" -eq 0 ]; then
        echo "0.0"
    else
        local repetition_ratio=$(echo "scale=2; $unique_words / $total_words" | bc -l 2>/dev/null || echo "0.5")
        
        if (( $(echo "$repetition_ratio < $MAX_REPETITION_RATIO" | bc -l 2>/dev/null || echo "0") )); then
            echo "0.1"
        elif (( $(echo "$repetition_ratio < 0.6" | bc -l 2>/dev/null || echo "0") )); then
            echo "0.5"
        else
            echo "0.9"
        fi
    fi
}

# Assess relevance to context and mechanism
assess_relevance() {
    local response="$1"
    local context="$2"
    local mechanism="$3"
    
    # Mechanism-specific relevance checks
    case "$mechanism" in
        "puzzle")
            if echo "$response" | grep -q -i "algorithm\|code\|implement\|function\|solution"; then
                echo "0.9"
            else
                echo "0.6"
            fi
            ;;
        "socratic")
            if echo "$response" | grep -q -i "question\|analyze\|investigate\|examine\|why\|how"; then
                echo "0.9"
            else
                echo "0.6"
            fi
            ;;
        "exploration")
            if echo "$response" | grep -q -i "compare\|alternative\|option\|approach\|strategy"; then
                echo "0.9"
            else
                echo "0.6"
            fi
            ;;
        "consensus")
            if echo "$response" | grep -q -i "perspective\|view\|opinion\|agree\|disagree\|multiple"; then
                echo "0.9"
            else
                echo "0.6"
            fi
            ;;
        "critique")
            if echo "$response" | grep -q -i "improve\|enhance\|fix\|refine\|better\|optimize"; then
                echo "0.9"
            else
                echo "0.6"
            fi
            ;;
        "synthesis")
            if echo "$response" | grep -q -i "combine\|integrate\|merge\|unify\|synthesize"; then
                echo "0.9"
            else
                echo "0.6"
            fi
            ;;
        "peer_review")
            if echo "$response" | grep -q -i "review\|feedback\|suggest\|advice\|collaborate"; then
                echo "0.9"
            else
                echo "0.6"
            fi
            ;;
        *)
            echo "0.7"
            ;;
    esac
}

# Assess structural quality
assess_structure() {
    local response="$1"
    
    # Check for proper formatting and structure
    local has_paragraphs=$(echo "$response" | grep -c '^[[:space:]]*$' 2>/dev/null | tr -d '[:space:]' || echo "0")
    local has_lists=$(echo "$response" | grep -c '^[[:space:]]*[-]' 2>/dev/null | tr -d '[:space:]' || echo "0")
    local has_numbers=$(echo "$response" | grep -c '^[[:space:]]*[0-9]' 2>/dev/null | tr -d '[:space:]' || echo "0")
    
    local structure_score=0.5
    
    if [ "${has_paragraphs:-0}" -gt 0 ]; then structure_score=$(echo "$structure_score + 0.2" | bc -l 2>/dev/null || echo "$structure_score"); fi
    if [ "${has_lists:-0}" -gt 0 ]; then structure_score=$(echo "$structure_score + 0.15" | bc -l 2>/dev/null || echo "$structure_score"); fi
    if [ "${has_numbers:-0}" -gt 0 ]; then structure_score=$(echo "$structure_score + 0.15" | bc -l 2>/dev/null || echo "$structure_score"); fi
    
    echo "$structure_score"
}

# --- Degradation Detection ---

# Detect various degradation patterns
detect_degradation_patterns() {
    local response="$1"
    local degradation_score=0
    
    # Check for nonsense patterns
    if echo "$response" | grep -q -i "lorem ipsum\|asdf\|qwerty\|random text\|test test test"; then
        degradation_score=$(echo "$degradation_score + 0.9" | bc -l 2>/dev/null || echo "$degradation_score")
    fi
    
    # Check for excessive repetition (simplified pattern)
    if echo "$response" | grep -q "the same phrase repeated multiple times"; then
        degradation_score=$(echo "$degradation_score + 0.8" | bc -l 2>/dev/null || echo "$degradation_score")
    fi
    
    # Check for incoherent punctuation (more specific - lines with only punctuation)
    local punct_only_lines=$(echo "$response" | grep "^[[:space:]]*[[:punct:]]*[[:space:]]*$" | wc -l)
    local total_lines=$(echo "$response" | wc -l)
    if [ "$total_lines" -gt 0 ]; then
        local punct_ratio=$(( punct_only_lines * 100 / total_lines ))
        if [ "$punct_ratio" -gt 50 ]; then
            # Only flag if more than half the lines are punctuation-only
            degradation_score=$(echo "$degradation_score + 0.4" | bc -l 2>/dev/null || echo "$degradation_score")
        fi
    fi
    
    # Check for extremely short responses
    local word_count=$(echo "$response" | wc -w)
    if [ "$word_count" -lt 15 ]; then
        degradation_score=$(echo "$degradation_score + 0.5" | bc -l 2>/dev/null || echo "$degradation_score")
    fi
    
    # Check for gibberish (simplified pattern)
    if echo "$response" | grep -q "aaaaa\|bbbbb\|ccccc\|ddddd\|eeeee"; then
        degradation_score=$(echo "$degradation_score + 0.6" | bc -l 2>/dev/null || echo "$degradation_score")
    fi
    
    # Note: Removed problematic markdown check to eliminate syntax warnings
    
    echo "$degradation_score"
}

# --- Correction Mechanisms ---

# Attempt to correct degraded output
correct_degraded_output() {
    local degraded_response="$1"
    local context="$2"
    local mechanism="$3"
    local model="$4"
    local attempt=1
    
    while [ "$attempt" -le "$MAX_CORRECTION_ATTEMPTS" ]; do
        echo "🔄 Correction attempt $attempt/$MAX_CORRECTION_ATTEMPTS..." >&2
        
        # Create correction prompt
        local correction_prompt="The previous response was degraded or nonsensical. Please provide a clear, coherent response to:

ORIGINAL REQUEST: $context

RESPONSE TYPE: $mechanism

Please ensure your response is:
- Relevant and focused on the request
- Well-structured with proper paragraphs and formatting
- Free of repetition, nonsense, or gibberish
- Appropriate length (at least 50 words)
- Clear and understandable

Provide a fresh, high-quality response:"
        
        # Get corrected response
        local corrected_response=$(ollama run "$model" "$correction_prompt")
        
        # Assess correction quality
        local correction_quality=$(assess_quality "$corrected_response" "$context" "$mechanism")
        local degradation_score=$(detect_degradation_patterns "$corrected_response")
        
        echo "Correction quality: $correction_quality, Degradation: $degradation_score" >&2
        
        # Check if correction is successful
        if (( $(echo "$correction_quality > $DEGRADATION_THRESHOLD" | bc -l 2>/dev/null || echo "0") )) && \
           (( $(echo "$degradation_score < $MAX_NONSENSE_SCORE" | bc -l 2>/dev/null || echo "0") )); then
            
            echo "✅ Output corrected successfully (quality: $correction_quality)" >&2
            echo "$corrected_response"
            return 0
        fi
        
        attempt=$((attempt + 1))
    done
    
    echo "❌ All correction attempts failed. Using fallback response." >&2
    echo "$(generate_fallback_response "$mechanism" "$context")"
    return 1
}

# Generate appropriate fallback response
generate_fallback_response() {
    local mechanism="$1"
    local context="$2"
    
    case "$mechanism" in
        "puzzle")
            echo "I apologize, but I'm experiencing difficulties providing a proper response to your puzzle or coding challenge. Please try rephrasing your question or ask for a different type of assistance. You might also want to try breaking down your request into smaller, more specific questions."
            ;;
        "socratic")
            echo "I'm unable to provide the deep analysis you're looking for at this time. Please try asking your question again with more specific details, or consider rephrasing it in a different way."
            ;;
        "exploration")
            echo "I'm having trouble exploring alternatives and strategies for your request. Please try asking your question again or provide more context about what you're looking to explore."
            ;;
        "consensus")
            echo "I cannot provide multiple perspectives or consensus-building guidance currently. Please try rephrasing your request or ask for a different type of assistance."
            ;;
        "critique")
            echo "I'm unable to provide improvement suggestions or critique at this time. Please try asking your question again or request a different approach."
            ;;
        "synthesis")
            echo "I cannot synthesize or combine approaches currently. Please try rephrasing your request or ask for a different form of assistance."
            ;;
        "peer_review")
            echo "I'm having trouble providing collaborative feedback or review. Please try asking your question again or request a different type of help."
            ;;
        *)
            echo "I'm experiencing difficulties providing a proper response. Please try rephrasing your question or ask for a different type of assistance."
            ;;
    esac
}

# --- Main Quality Guard Function ---

# Main function to guard against output degradation
guard_output_quality() {
    local response="$1"
    local context="$2"
    local mechanism="$3"
    local model="$4"
    
    # Assess quality
    local quality_score=$(assess_quality "$response" "$context" "$mechanism")
    local degradation_score=$(detect_degradation_patterns "$response")
    
    echo "Quality Score: $quality_score, Degradation Score: $degradation_score" >&2
    
    # Check if correction is needed
    if (( $(echo "$quality_score < $DEGRADATION_THRESHOLD" | bc -l 2>/dev/null || echo "0") )) || \
       (( $(echo "$degradation_score > $MAX_NONSENSE_SCORE" | bc -l 2>/dev/null || echo "0") )); then
        
        echo "⚠️  Output quality below threshold. Initiating correction..." >&2
        
        if [ "$FALLBACK_ENABLED" = "true" ]; then
            correct_degraded_output "$response" "$context" "$mechanism" "$model"
        else
            echo "❌ Quality below threshold but fallback disabled." >&2
            echo "$response"
        fi
    else
        echo "✅ Response quality acceptable (score: $quality_score)" >&2
        echo "$response"
    fi
}

# --- Utility Functions ---

# Get mechanism name from script path
get_mechanism_name() {
    local script_path="$1"
    local script_name=$(basename "$script_path")
    
    case "$script_name" in
        "puzzle") echo "puzzle" ;;
        "socratic") echo "socratic" ;;
        "exploration") echo "exploration" ;;
        "consensus") echo "consensus" ;;
        "critique") echo "critique" ;;
        "synthesis") echo "synthesis" ;;
        "peer-review") echo "peer_review" ;;
        *) echo "unknown" ;;
    esac
}

# Export functions for use by other scripts
export -f assess_quality
export -f detect_degradation_patterns
export -f guard_output_quality
export -f get_mechanism_name
export -f correct_degraded_output
export -f generate_fallback_response