#!/bin/bash # RAG Integration Module # This module provides functions for thinking mechanisms to intelligently query the RAG corpus # and integrate relevant context into their prompts SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" source "${SCRIPT_DIR}/rag_config.sh" # --- RAG Query Functions --- # Main function for mechanisms to query RAG system query_rag_context() { local prompt="$1" local mechanism="$2" local max_context="${3:-$RAG_MAX_CONTEXT_LENGTH}" # Determine if RAG should be used if should_use_rag "$prompt" "$mechanism"; then local corpus_results corpus_results=$(get_relevant_context "$prompt" "$mechanism" "$max_context") if [ -n "$corpus_results" ]; then echo "RAG_CONTEXT_AVAILABLE: $corpus_results" return 0 fi fi echo "RAG_CONTEXT_AVAILABLE: NONE" return 1 } # Determine if RAG should be used for this prompt/mechanism combination should_use_rag() { local prompt="$1" local mechanism="$2" # Skip RAG if disabled if [ "$RAG_ENABLED" != "true" ]; then return 1 fi # Check mechanism-specific RAG usage case "$mechanism" in "puzzle") # Always use RAG for puzzle mechanism (coding/programming) return 0 ;; "socratic") # Use RAG for technical or complex topics if echo "$prompt" | grep -q -i '\(technical\|complex\|advanced\|algorithm\|programming\|science\)'; then return 0 fi ;; "exploration") # Use RAG for specific technical domains if echo "$prompt" | grep -q -i '\(technology\|framework\|methodology\|architecture\)'; then return 0 fi ;; "critique") # Use RAG for domain-specific improvement requests if echo "$prompt" | grep -q -i '\(improve\|optimize\|enhance\|refactor\)'; then return 0 fi ;; esac # Default: don't use RAG unless explicitly triggered return 1 } # Get relevant context from corpus for the given prompt and mechanism get_relevant_context() { local prompt="$1" local mechanism="$2" local max_context="$3" # Extract key search terms from prompt local search_terms search_terms=$(extract_search_terms "$prompt" "$mechanism") if [ -z "$search_terms" ]; then return 1 fi local context="" # Try each search term for term in $search_terms; do local corpus_path corpus_path=$(find_relevant_corpus "$term" "$mechanism") if [ -n "$corpus_path" ] && [ -d "$corpus_path" ]; then local term_context term_context=$(search_corpus_term "$term" "$corpus_path" "$max_context") if [ -n "$term_context" ]; then context="${context}\n=== Context for '$term' ===\n${term_context}\n" fi fi done # Trim context if too long if [ ${#context} -gt "$max_context" ]; then context=$(echo "$context" | head -c "$max_context") context="${context}...\n[Content truncated for length]" fi echo "$context" } # Extract search terms from prompt based on mechanism extract_search_terms() { local prompt="$1" local mechanism="$2" case "$mechanism" in "puzzle") # Extract programming-related terms echo "$prompt" | grep -o -i '\b\(algorithm\|function\|variable\|class\|method\|programming\|code\|implement\|solve\)\w*' | head -5 ;; "socratic") # Extract technical concepts echo "$prompt" | grep -o -i '\b\(concept\|principle\|theory\|approach\|methodology\|framework\)\w*' | head -3 ;; "exploration") # Extract comparison terms echo "$prompt" | grep -o -i '\b\(compare\|versus\|alternative\|option\|approach\|strategy\)\w*' | head -3 ;; "critique") # Extract improvement terms echo "$prompt" | grep -o -i '\b\(improve\|optimize\|enhance\|fix\|refactor\|performance\|quality\)\w*' | head -3 ;; *) # Generic term extraction echo "$prompt" | grep -o -i '\b\w\{5,\}\b' | head -3 ;; esac } # Find relevant corpus directory for a search term find_relevant_corpus() { local search_term="$1" local mechanism="$2" # Try mechanism-specific corpus mapping first case "$mechanism" in "puzzle") if echo "$search_term" | grep -q -i '\(lil\|programming\|algorithm\)'; then echo "$CORPUS_DIR/programming" return 0 fi ;; "socratic") if echo "$search_term" | grep -q -i '\(science\|physics\|chemistry\|biology\)'; then echo "$CORPUS_DIR/science" return 0 fi ;; esac # Try to find corpus based on term if echo "$search_term" | grep -q -i '\(programming\|code\|algorithm\|function\)'; then echo "$CORPUS_DIR/programming" elif echo "$search_term" | grep -q -i '\(science\|physics\|chemistry\|biology\)'; then echo "$CORPUS_DIR/science" elif echo "$search_term" | grep -q -i '\(literature\|book\|author\|writing\)'; then echo "$CORPUS_DIR/literature" else # Default to general corpus echo "$CORPUS_DIR/general" fi } # Search corpus for a specific term and return relevant content search_corpus_term() { local search_term="$1" local corpus_path="$2" local max_context="$3" # Use grep to find relevant content local results results=$(grep -r -i -A 5 -B 2 "$search_term" "$corpus_path" --include="*.txt" --include="*.md" --include="*.html" 2>/dev/null | head -20) if [ -n "$results" ]; then echo "$results" return 0 fi return 1 } # --- Context Integration Functions --- # Integrate RAG context into a prompt integrate_rag_context() { local original_prompt="$1" local rag_context="$2" local mechanism="$3" if [ "$rag_context" = "RAG_CONTEXT_AVAILABLE: NONE" ] || [ -z "$rag_context" ]; then echo "$original_prompt" return 0 fi # Extract actual context content local context_content context_content=$(echo "$rag_context" | sed 's/^RAG_CONTEXT_AVAILABLE: //') # Create context-aware prompt based on mechanism case "$mechanism" in "puzzle") cat << EOF I have access to relevant programming knowledge that may help answer this question: $context_content Original Question: $original_prompt Please use the above context to provide a more accurate and helpful response. If the context is relevant, incorporate it naturally into your answer. If it's not directly relevant, you can ignore it and answer based on your general knowledge. EOF ;; "socratic") cat << EOF Relevant context from knowledge base: $context_content Question for analysis: $original_prompt Consider the above context when formulating your response. Use it to provide deeper insights and more accurate analysis if relevant. EOF ;; "exploration") cat << EOF Additional context that may be relevant: $context_content Exploration topic: $original_prompt Use the provided context to enrich your analysis and provide more comprehensive alternatives if applicable. EOF ;; *) cat << EOF Context from knowledge base: $context_content $original_prompt You may use the above context to enhance your response if it's relevant to the question. EOF ;; esac } # --- Utility Functions --- # Check if corpus is available and functional check_corpus_health() { local issues=() # Check if corpus directory exists if [ ! -d "$CORPUS_DIR" ]; then issues+=("Corpus directory not found: $CORPUS_DIR") fi # Check if registry exists if [ ! -f "$CORPUS_REGISTRY" ]; then issues+=("Corpus registry not found: $CORPUS_REGISTRY") fi # Check if registry has content if [ -f "$CORPUS_REGISTRY" ] && [ $(wc -l < "$CORPUS_REGISTRY") -le 3 ]; then issues+=("Corpus registry appears to be empty") fi # Report issues if [ ${#issues[@]} -gt 0 ]; then echo "❌ RAG Integration Issues Found:" for issue in "${issues[@]}"; do echo " - $issue" done return 1 else echo "✅ RAG Integration is healthy" return 0 fi } # Get RAG statistics get_rag_stats() { if [ ! -f "$CORPUS_REGISTRY" ]; then echo "❌ No corpus registry found" return 1 fi local topic_count=$(grep -c "|" "$CORPUS_REGISTRY") local file_count=$(find "$CORPUS_DIR" -type f \( -name "*.txt" -o -name "*.md" -o -name "*.html" \) 2>/dev/null | wc -l) echo "📊 RAG System Statistics:" echo " Topics: $topic_count" echo " Files: $file_count" echo " Status: $(if [ "$RAG_ENABLED" = "true" ]; then echo "Enabled"; else echo "Disabled"; fi)" echo " Max Context: $RAG_MAX_CONTEXT_LENGTH characters" } # --- Integration Helper --- # Helper function for mechanisms to easily use RAG use_rag_if_available() { local prompt="$1" local mechanism="$2" local rag_result rag_result=$(query_rag_context "$prompt" "$mechanism") if echo "$rag_result" | grep -q "^RAG_CONTEXT_AVAILABLE: " && ! echo "$rag_result" | grep -q "NONE$"; then echo "RAG context found - integrating into prompt" integrate_rag_context "$prompt" "$rag_result" "$mechanism" return 0 else echo "No RAG context available - using original prompt" echo "$prompt" return 1 fi } # Export functions for use by other scripts export -f query_rag_context should_use_rag get_relevant_context export -f extract_search_terms find_relevant_corpus search_corpus_term export -f integrate_rag_context check_corpus_health get_rag_stats export -f use_rag_if_available