diff options
Diffstat (limited to 'bash/talk-to-computer/classifier.sh')
-rwxr-xr-x | bash/talk-to-computer/classifier.sh | 281 |
1 files changed, 281 insertions, 0 deletions
diff --git a/bash/talk-to-computer/classifier.sh b/bash/talk-to-computer/classifier.sh new file mode 100755 index 0000000..38f4869 --- /dev/null +++ b/bash/talk-to-computer/classifier.sh @@ -0,0 +1,281 @@ +#!/bin/bash + +# Advanced Prompt Classification System +# Multi-layer approach combining semantic analysis, pattern matching, and confidence scoring + +# Get the directory where this script is located +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "${SCRIPT_DIR}/logging.sh" + +# --- Classification Configuration --- +CLASSIFIER_MODEL="gemma3n:e2b" # Lightweight model for classification +CONFIDENCE_THRESHOLD=0.7 + +# --- Semantic Classification --- +classify_semantic() { + local prompt="$1" + + local classification_prompt="You are a prompt classifier. Analyze this prompt and determine which AI thinking mechanism would be most appropriate. + +PROMPT: \"$prompt\" + +AVAILABLE MECHANISMS: +- DIRECT: Simple questions, factual queries, straightforward requests +- CONSENSUS: Multiple perspectives needed, voting, agreement/disagreement +- SYNTHESIS: Combining approaches, integration, unification +- EXPLORATION: Comparing alternatives, strategic planning, option analysis +- SOCRATIC: Deep analysis, questioning assumptions, thorough investigation +- CRITIQUE: Improvement suggestions, refinement, enhancement +- PEER_REVIEW: Collaborative feedback, review processes, advice + +Respond with ONLY the mechanism name and confidence (0.0-1.0): +Format: MECHANISM_NAME:CONFIDENCE + +Example: EXPLORATION:0.85" + + local result=$(ollama run "$CLASSIFIER_MODEL" "$classification_prompt" 2>/dev/null) + echo "$result" +} + +# --- Intent Pattern Analysis --- +analyze_intent_patterns() { + local prompt="$1" + + # Initialize scores using individual variables (more portable) + local direct_score=0 + local consensus_score=0 + local synthesis_score=0 + local exploration_score=0 + local socratic_score=0 + local critique_score=0 + local peer_review_score=0 + local puzzle_score=0 + + # Question type patterns + if [[ "$prompt" =~ ^(what|when|where|who|which|how\ much|how\ many) ]]; then + direct_score=$((direct_score + 3)) + fi + + if [[ "$prompt" =~ ^(why|how|explain) ]]; then + socratic_score=$((socratic_score + 2)) + fi + + # Action-oriented patterns + if [[ "$prompt" =~ (compare|contrast|evaluate|assess) ]]; then + exploration_score=$((exploration_score + 3)) + fi + + if [[ "$prompt" =~ (improve|enhance|fix|refine|optimize|better) ]]; then + critique_score=$((critique_score + 3)) + fi + + if [[ "$prompt" =~ (combine|merge|integrate|synthesize|unify) ]]; then + synthesis_score=$((synthesis_score + 3)) + fi + + if [[ "$prompt" =~ (review|feedback|opinion|thoughts|suggest) ]]; then + peer_review_score=$((peer_review_score + 2)) + fi + + if [[ "$prompt" =~ (consensus|vote|agree|disagree|multiple.*view) ]]; then + consensus_score=$((consensus_score + 3)) + fi + + # Context patterns + if [[ "$prompt" =~ (strategy|strategic|plan|approach|option|alternative) ]]; then + exploration_score=$((exploration_score + 2)) + fi + + if [[ "$prompt" =~ (analyze|analysis|examine|investigate|deep|thorough) ]]; then + socratic_score=$((socratic_score + 2)) + fi + + # Puzzle and coding patterns + if [[ "$prompt" =~ (puzzle|solve|algorithm|code|programming|implement|sort|search|optimize|data.*structure) ]]; then + puzzle_score=$((puzzle_score + 3)) + fi + + if [[ "$prompt" =~ (challenge|problem|question|task|assignment|exercise) ]]; then + puzzle_score=$((puzzle_score + 2)) + fi + + # Lil-specific patterns - highest priority for puzzle mechanism + if [[ "$prompt" =~ (lil|LIL|using lil|in lil|with lil|lil programming|lil language|lil script) ]]; then + puzzle_score=$((puzzle_score + 5)) # Higher score than other patterns + fi + + # Find highest scoring mechanism + local max_score=0 + local best_mechanism="DIRECT" + + if [ "$direct_score" -gt "$max_score" ]; then + max_score="$direct_score" + best_mechanism="DIRECT" + fi + if [ "$consensus_score" -gt "$max_score" ]; then + max_score="$consensus_score" + best_mechanism="CONSENSUS" + fi + if [ "$synthesis_score" -gt "$max_score" ]; then + max_score="$synthesis_score" + best_mechanism="SYNTHESIS" + fi + if [ "$exploration_score" -gt "$max_score" ]; then + max_score="$exploration_score" + best_mechanism="EXPLORATION" + fi + if [ "$socratic_score" -gt "$max_score" ]; then + max_score="$socratic_score" + best_mechanism="SOCRATIC" + fi + if [ "$critique_score" -gt "$max_score" ]; then + max_score="$critique_score" + best_mechanism="CRITIQUE" + fi + if [ "$peer_review_score" -gt "$max_score" ]; then + max_score="$peer_review_score" + best_mechanism="PEER_REVIEW" + fi + if [ "$puzzle_score" -gt "$max_score" ]; then + max_score="$puzzle_score" + best_mechanism="PUZZLE" + fi + + # Calculate confidence based on score distribution + local total_score=$((direct_score + consensus_score + synthesis_score + exploration_score + socratic_score + critique_score + peer_review_score + puzzle_score)) + + local confidence="0.0" + if [ "$total_score" -gt 0 ]; then + confidence=$(echo "scale=2; $max_score / $total_score" | bc -l 2>/dev/null || echo "0.5") + fi + + echo "$best_mechanism:$confidence" +} + +# --- Complexity Analysis --- +analyze_complexity() { + local prompt="$1" + local word_count=$(echo "$prompt" | wc -w) + local sentence_count=$(echo "$prompt" | tr '.' '\n' | wc -l) + local question_count=$(echo "$prompt" | grep -o '?' | wc -l) + + # Simple heuristics for complexity + local complexity_score=0 + + # Word count factor + if [ "$word_count" -gt 50 ]; then + complexity_score=$((complexity_score + 3)) + elif [ "$word_count" -gt 20 ]; then + complexity_score=$((complexity_score + 2)) + elif [ "$word_count" -le 5 ]; then + complexity_score=$((complexity_score - 2)) + fi + + # Multiple questions suggest complexity + if [ "$question_count" -gt 1 ]; then + complexity_score=$((complexity_score + 2)) + fi + + # Multiple sentences suggest complexity + if [ "$sentence_count" -gt 3 ]; then + complexity_score=$((complexity_score + 1)) + fi + + echo "$complexity_score" +} + +# --- Confidence Weighted Classification --- +classify_prompt() { + local prompt="$1" + local use_semantic="${2:-true}" + + echo "=== Advanced Prompt Classification ===" >&2 + echo "Analyzing: \"$prompt\"" >&2 + echo >&2 + + # Get pattern-based classification + local pattern_result=$(analyze_intent_patterns "$prompt") + local pattern_mechanism=$(echo "$pattern_result" | cut -d':' -f1) + local pattern_confidence=$(echo "$pattern_result" | cut -d':' -f2) + + echo "Pattern Analysis: $pattern_mechanism (confidence: $pattern_confidence)" >&2 + + # Get complexity score + local complexity=$(analyze_complexity "$prompt") + echo "Complexity Score: $complexity" >&2 + + # Apply complexity adjustments + if [ "$complexity" -lt 0 ] && [ "$pattern_mechanism" != "DIRECT" ]; then + echo "Low complexity detected - suggesting DIRECT" >&2 + pattern_mechanism="DIRECT" + pattern_confidence="0.8" + elif [ "$complexity" -gt 4 ]; then + echo "High complexity detected - boosting complex mechanisms" >&2 + case "$pattern_mechanism" in + "DIRECT") + pattern_mechanism="SOCRATIC" + pattern_confidence="0.7" + ;; + esac + fi + + local final_mechanism="$pattern_mechanism" + local final_confidence="$pattern_confidence" + + # Use semantic classification if available and requested + if [ "$use_semantic" = "true" ] && command -v ollama >/dev/null 2>&1; then + echo "Running semantic analysis..." >&2 + local semantic_result=$(classify_semantic "$prompt") + + # Clean up the result + semantic_result=$(echo "$semantic_result" | tr -d ' ' | head -n1) + + if [[ "$semantic_result" =~ ^[A-Z_]+:[0-9.]+$ ]]; then + local semantic_mechanism=$(echo "$semantic_result" | cut -d':' -f1) + local semantic_confidence=$(echo "$semantic_result" | cut -d':' -f2) + + echo "Semantic Analysis: $semantic_mechanism (confidence: $semantic_confidence)" >&2 + + # Weighted combination of pattern and semantic results + local pattern_weight=$(echo "$pattern_confidence * 0.6" | bc -l 2>/dev/null || echo "0.3") + local semantic_weight=$(echo "$semantic_confidence * 0.4" | bc -l 2>/dev/null || echo "0.2") + + # If both agree, boost confidence + if [ "$pattern_mechanism" = "$semantic_mechanism" ]; then + final_confidence=$(echo "$pattern_confidence + 0.2" | bc -l 2>/dev/null || echo "0.8") + if (( $(echo "$final_confidence > 1.0" | bc -l 2>/dev/null || echo "0") )); then + final_confidence="1.0" + fi + echo "Pattern and semantic agree - boosting confidence to $final_confidence" >&2 + # If semantic has higher confidence, use it + elif (( $(echo "$semantic_confidence > $pattern_confidence + 0.1" | bc -l 2>/dev/null || echo "0") )); then + final_mechanism="$semantic_mechanism" + final_confidence="$semantic_confidence" + echo "Using semantic result due to higher confidence" >&2 + fi + else + log_warning "Semantic classification failed or returned invalid format: $semantic_result" + fi + fi + + # Final confidence check + if (( $(echo "$final_confidence < $CONFIDENCE_THRESHOLD" | bc -l 2>/dev/null || echo "0") )); then + echo "Low confidence ($final_confidence < $CONFIDENCE_THRESHOLD) - defaulting to DIRECT" >&2 + final_mechanism="DIRECT" + final_confidence="0.5" + fi + + echo >&2 + echo "=== Final Classification ===" >&2 + echo "Mechanism: $final_mechanism" >&2 + echo "Confidence: $final_confidence" >&2 + echo "============================" >&2 + + echo "$final_mechanism:$final_confidence" +} + +# --- Export Functions --- +export -f classify_prompt +export -f analyze_intent_patterns +export -f analyze_complexity +export -f classify_semantic |