about summary refs log tree commit diff stats
path: root/bash/talk-to-computer/rag_search.sh
diff options
context:
space:
mode:
Diffstat (limited to 'bash/talk-to-computer/rag_search.sh')
-rwxr-xr-xbash/talk-to-computer/rag_search.sh187
1 files changed, 187 insertions, 0 deletions
diff --git a/bash/talk-to-computer/rag_search.sh b/bash/talk-to-computer/rag_search.sh
new file mode 100755
index 0000000..dfcbc91
--- /dev/null
+++ b/bash/talk-to-computer/rag_search.sh
@@ -0,0 +1,187 @@
+#!/bin/bash
+
+# RAG Search Utility - Search the knowledge corpus
+# This script demonstrates how to search the corpus using efficient Unix tools
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "${SCRIPT_DIR}/rag_config.sh"
+
+# --- Utility Functions ---
+
+# Get corpus path for a topic (standalone version)
+get_corpus_path() {
+    local topic="$1"
+    if [ -f "$CORPUS_REGISTRY" ]; then
+        grep "^[^|]*${topic}|" "$CORPUS_REGISTRY" | head -1 | cut -d'|' -f2
+    fi
+}
+
+# Check if corpus exists for a topic
+corpus_exists() {
+    local topic="$1"
+    grep -q "^[^|]*${topic}|" "$CORPUS_REGISTRY" 2>/dev/null
+}
+
+# --- Search Functions ---
+
+# Search corpus for keywords
+search_corpus() {
+    local query="$1"
+    local topic="${2:-}"
+
+    echo "🔍 Searching corpus for: '$query'"
+    if [ -n "$topic" ]; then
+        echo "📂 Limited to topic: $topic"
+    fi
+    echo "----------------------------------------"
+
+    # Build search command
+    if [ -n "$topic" ]; then
+        local corpus_path=$(get_corpus_path "$topic")
+        if [ -n "$corpus_path" ]; then
+            # Search specific topic directory
+            grep -r -i "$query" "$corpus_path" --include="*.txt" --include="*.md" --include="*.html"
+        else
+            echo "❌ Topic not found: $topic"
+            return 1
+        fi
+    else
+        # Search entire corpus
+        grep -r -i "$query" "$CORPUS_DIR" --include="*.txt" --include="*.md" --include="*.html"
+    fi | head -10 | while IFS=: read -r file line content; do
+        local filename=$(basename "$file")
+        local topic_name=$(basename "$(dirname "$file")")
+        echo "📄 $topic_name/$filename (line $line):"
+        echo "   $content"
+        echo ""
+    done
+}
+
+# Get context around search results
+get_context() {
+    local query="$1"
+    local topic="$2"
+    local context_lines="${3:-$SEARCH_CONTEXT_LINES}"
+
+    echo "📖 Getting context for: '$query'"
+    echo "----------------------------------------"
+
+    if [ -n "$topic" ]; then
+        local corpus_path=$(get_corpus_path "$topic")
+        if [ -n "$corpus_path" ]; then
+            grep -r -i -A "$context_lines" -B "$context_lines" "$query" "$corpus_path"
+        else
+            echo "❌ Topic not found: $topic"
+            return 1
+        fi
+    else
+        grep -r -i -A "$context_lines" -B "$context_lines" "$query" "$CORPUS_DIR"
+    fi
+}
+
+# Extract relevant sections from files
+extract_sections() {
+    local query="$1"
+    local topic="$2"
+
+    echo "📋 Extracting relevant sections for: '$query'"
+    echo "----------------------------------------"
+
+    # Find files containing the query
+    local files
+    if [ -n "$topic" ]; then
+        local corpus_path=$(get_corpus_path "$topic")
+        files=$(grep -r -l -i "$query" "$corpus_path" 2>/dev/null)
+    else
+        files=$(grep -r -l -i "$query" "$CORPUS_DIR" 2>/dev/null)
+    fi
+
+    if [ -z "$files" ]; then
+        echo "❌ No files found containing: $query"
+        return 1
+    fi
+
+    echo "$files" | while read -r file; do
+        local filename=$(basename "$file")
+        echo "📄 Processing: $filename"
+        echo "----------------------------------------"
+
+        # Extract relevant sections (headers and surrounding content)
+        awk -v query="$query" '
+        BEGIN { in_section = 0; section_content = "" }
+
+        # Check if line contains query (case insensitive)
+        tolower($0) ~ tolower(query) {
+            if (in_section == 0) {
+                print "RELEVANT SECTION:"
+                in_section = 1
+            }
+        }
+
+        # If we found a header before the match, include it
+        /^#/ && in_section == 0 {
+            section_content = $0
+        }
+
+        # Print content when we have a match
+        in_section == 1 {
+            print
+            if (length($0) == 0) {
+                in_section = 0
+                section_content = ""
+                print ""
+            }
+        }
+        ' "$file"
+
+        echo "----------------------------------------"
+    done
+}
+
+# --- Main Command Interface ---
+
+case "${1:-help}" in
+    "search")
+        if [ -n "$2" ]; then
+            search_corpus "$2" "$3"
+        else
+            echo "❌ Usage: $0 search <query> [topic]"
+        fi
+        ;;
+    "context")
+        if [ -n "$2" ]; then
+            get_context "$2" "$3" "$4"
+        else
+            echo "❌ Usage: $0 context <query> [topic] [lines]"
+        fi
+        ;;
+    "extract")
+        if [ -n "$2" ]; then
+            extract_sections "$2" "$3"
+        else
+            echo "❌ Usage: $0 extract <query> [topic]"
+        fi
+        ;;
+    "stats")
+        get_corpus_stats
+        ;;
+    "help"|*)
+        echo "🔍 RAG Search Utility"
+        echo "Search and extract information from the knowledge corpus"
+        echo ""
+        echo "Usage: $0 <command> [arguments]"
+        echo ""
+        echo "Commands:"
+        echo "  search <query> [topic]    Search for exact matches"
+        echo "  context <query> [topic]  Get context around matches"
+        echo "  extract <query> [topic]  Extract relevant sections"
+        echo "  stats                   Show corpus statistics"
+        echo "  help                    Show this help message"
+        echo ""
+        echo "Examples:"
+        echo "  $0 search 'quantum physics'"
+        echo "  $0 search 'lil programming' programming"
+        echo "  $0 context 'force' physics"
+        echo "  $0 extract 'variables' programming"
+        ;;
+esac