diff options
Diffstat (limited to 'bash/talk-to-computer/rag_integration.sh')
-rw-r--r-- | bash/talk-to-computer/rag_integration.sh | 336 |
1 files changed, 336 insertions, 0 deletions
diff --git a/bash/talk-to-computer/rag_integration.sh b/bash/talk-to-computer/rag_integration.sh new file mode 100644 index 0000000..6c974df --- /dev/null +++ b/bash/talk-to-computer/rag_integration.sh @@ -0,0 +1,336 @@ +#!/bin/bash + +# RAG Integration Module +# This module provides functions for thinking mechanisms to intelligently query the RAG corpus +# and integrate relevant context into their prompts + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "${SCRIPT_DIR}/rag_config.sh" + +# --- RAG Query Functions --- + +# Main function for mechanisms to query RAG system +query_rag_context() { + local prompt="$1" + local mechanism="$2" + local max_context="${3:-$RAG_MAX_CONTEXT_LENGTH}" + + # Determine if RAG should be used + if should_use_rag "$prompt" "$mechanism"; then + local corpus_results + corpus_results=$(get_relevant_context "$prompt" "$mechanism" "$max_context") + + if [ -n "$corpus_results" ]; then + echo "RAG_CONTEXT_AVAILABLE: $corpus_results" + return 0 + fi + fi + + echo "RAG_CONTEXT_AVAILABLE: NONE" + return 1 +} + +# Determine if RAG should be used for this prompt/mechanism combination +should_use_rag() { + local prompt="$1" + local mechanism="$2" + + # Skip RAG if disabled + if [ "$RAG_ENABLED" != "true" ]; then + return 1 + fi + + # Check mechanism-specific RAG usage + case "$mechanism" in + "puzzle") + # Always use RAG for puzzle mechanism (coding/programming) + return 0 + ;; + "socratic") + # Use RAG for technical or complex topics + if echo "$prompt" | grep -q -i '\(technical\|complex\|advanced\|algorithm\|programming\|science\)'; then + return 0 + fi + ;; + "exploration") + # Use RAG for specific technical domains + if echo "$prompt" | grep -q -i '\(technology\|framework\|methodology\|architecture\)'; then + return 0 + fi + ;; + "critique") + # Use RAG for domain-specific improvement requests + if echo "$prompt" | grep -q -i '\(improve\|optimize\|enhance\|refactor\)'; then + return 0 + fi + ;; + esac + + # Default: don't use RAG unless explicitly triggered + return 1 +} + +# Get relevant context from corpus for the given prompt and mechanism +get_relevant_context() { + local prompt="$1" + local mechanism="$2" + local max_context="$3" + + # Extract key search terms from prompt + local search_terms + search_terms=$(extract_search_terms "$prompt" "$mechanism") + + if [ -z "$search_terms" ]; then + return 1 + fi + + local context="" + + # Try each search term + for term in $search_terms; do + local corpus_path + corpus_path=$(find_relevant_corpus "$term" "$mechanism") + + if [ -n "$corpus_path" ] && [ -d "$corpus_path" ]; then + local term_context + term_context=$(search_corpus_term "$term" "$corpus_path" "$max_context") + + if [ -n "$term_context" ]; then + context="${context}\n=== Context for '$term' ===\n${term_context}\n" + fi + fi + done + + # Trim context if too long + if [ ${#context} -gt "$max_context" ]; then + context=$(echo "$context" | head -c "$max_context") + context="${context}...\n[Content truncated for length]" + fi + + echo "$context" +} + +# Extract search terms from prompt based on mechanism +extract_search_terms() { + local prompt="$1" + local mechanism="$2" + + case "$mechanism" in + "puzzle") + # Extract programming-related terms + echo "$prompt" | grep -o -i '\b\(algorithm\|function\|variable\|class\|method\|programming\|code\|implement\|solve\)\w*' | head -5 + ;; + "socratic") + # Extract technical concepts + echo "$prompt" | grep -o -i '\b\(concept\|principle\|theory\|approach\|methodology\|framework\)\w*' | head -3 + ;; + "exploration") + # Extract comparison terms + echo "$prompt" | grep -o -i '\b\(compare\|versus\|alternative\|option\|approach\|strategy\)\w*' | head -3 + ;; + "critique") + # Extract improvement terms + echo "$prompt" | grep -o -i '\b\(improve\|optimize\|enhance\|fix\|refactor\|performance\|quality\)\w*' | head -3 + ;; + *) + # Generic term extraction + echo "$prompt" | grep -o -i '\b\w\{5,\}\b' | head -3 + ;; + esac +} + +# Find relevant corpus directory for a search term +find_relevant_corpus() { + local search_term="$1" + local mechanism="$2" + + # Try mechanism-specific corpus mapping first + case "$mechanism" in + "puzzle") + if echo "$search_term" | grep -q -i '\(lil\|programming\|algorithm\)'; then + echo "$CORPUS_DIR/programming" + return 0 + fi + ;; + "socratic") + if echo "$search_term" | grep -q -i '\(science\|physics\|chemistry\|biology\)'; then + echo "$CORPUS_DIR/science" + return 0 + fi + ;; + esac + + # Try to find corpus based on term + if echo "$search_term" | grep -q -i '\(programming\|code\|algorithm\|function\)'; then + echo "$CORPUS_DIR/programming" + elif echo "$search_term" | grep -q -i '\(science\|physics\|chemistry\|biology\)'; then + echo "$CORPUS_DIR/science" + elif echo "$search_term" | grep -q -i '\(literature\|book\|author\|writing\)'; then + echo "$CORPUS_DIR/literature" + else + # Default to general corpus + echo "$CORPUS_DIR/general" + fi +} + +# Search corpus for a specific term and return relevant content +search_corpus_term() { + local search_term="$1" + local corpus_path="$2" + local max_context="$3" + + # Use grep to find relevant content + local results + results=$(grep -r -i -A 5 -B 2 "$search_term" "$corpus_path" --include="*.txt" --include="*.md" --include="*.html" 2>/dev/null | head -20) + + if [ -n "$results" ]; then + echo "$results" + return 0 + fi + + return 1 +} + +# --- Context Integration Functions --- + +# Integrate RAG context into a prompt +integrate_rag_context() { + local original_prompt="$1" + local rag_context="$2" + local mechanism="$3" + + if [ "$rag_context" = "RAG_CONTEXT_AVAILABLE: NONE" ] || [ -z "$rag_context" ]; then + echo "$original_prompt" + return 0 + fi + + # Extract actual context content + local context_content + context_content=$(echo "$rag_context" | sed 's/^RAG_CONTEXT_AVAILABLE: //') + + # Create context-aware prompt based on mechanism + case "$mechanism" in + "puzzle") + cat << EOF +I have access to relevant programming knowledge that may help answer this question: + +$context_content + +Original Question: $original_prompt + +Please use the above context to provide a more accurate and helpful response. If the context is relevant, incorporate it naturally into your answer. If it's not directly relevant, you can ignore it and answer based on your general knowledge. +EOF + ;; + "socratic") + cat << EOF +Relevant context from knowledge base: + +$context_content + +Question for analysis: $original_prompt + +Consider the above context when formulating your response. Use it to provide deeper insights and more accurate analysis if relevant. +EOF + ;; + "exploration") + cat << EOF +Additional context that may be relevant: + +$context_content + +Exploration topic: $original_prompt + +Use the provided context to enrich your analysis and provide more comprehensive alternatives if applicable. +EOF + ;; + *) + cat << EOF +Context from knowledge base: + +$context_content + +$original_prompt + +You may use the above context to enhance your response if it's relevant to the question. +EOF + ;; + esac +} + +# --- Utility Functions --- + +# Check if corpus is available and functional +check_corpus_health() { + local issues=() + + # Check if corpus directory exists + if [ ! -d "$CORPUS_DIR" ]; then + issues+=("Corpus directory not found: $CORPUS_DIR") + fi + + # Check if registry exists + if [ ! -f "$CORPUS_REGISTRY" ]; then + issues+=("Corpus registry not found: $CORPUS_REGISTRY") + fi + + # Check if registry has content + if [ -f "$CORPUS_REGISTRY" ] && [ $(wc -l < "$CORPUS_REGISTRY") -le 3 ]; then + issues+=("Corpus registry appears to be empty") + fi + + # Report issues + if [ ${#issues[@]} -gt 0 ]; then + echo "❌ RAG Integration Issues Found:" + for issue in "${issues[@]}"; do + echo " - $issue" + done + return 1 + else + echo "✅ RAG Integration is healthy" + return 0 + fi +} + +# Get RAG statistics +get_rag_stats() { + if [ ! -f "$CORPUS_REGISTRY" ]; then + echo "❌ No corpus registry found" + return 1 + fi + + local topic_count=$(grep -c "|" "$CORPUS_REGISTRY") + local file_count=$(find "$CORPUS_DIR" -type f \( -name "*.txt" -o -name "*.md" -o -name "*.html" \) 2>/dev/null | wc -l) + + echo "📊 RAG System Statistics:" + echo " Topics: $topic_count" + echo " Files: $file_count" + echo " Status: $(if [ "$RAG_ENABLED" = "true" ]; then echo "Enabled"; else echo "Disabled"; fi)" + echo " Max Context: $RAG_MAX_CONTEXT_LENGTH characters" +} + +# --- Integration Helper --- + +# Helper function for mechanisms to easily use RAG +use_rag_if_available() { + local prompt="$1" + local mechanism="$2" + + local rag_result + rag_result=$(query_rag_context "$prompt" "$mechanism") + + if echo "$rag_result" | grep -q "^RAG_CONTEXT_AVAILABLE: " && ! echo "$rag_result" | grep -q "NONE$"; then + echo "RAG context found - integrating into prompt" + integrate_rag_context "$prompt" "$rag_result" "$mechanism" + return 0 + else + echo "No RAG context available - using original prompt" + echo "$prompt" + return 1 + fi +} + +# Export functions for use by other scripts +export -f query_rag_context should_use_rag get_relevant_context +export -f extract_search_terms find_relevant_corpus search_corpus_term +export -f integrate_rag_context check_corpus_health get_rag_stats +export -f use_rag_if_available |