diff options
Diffstat (limited to 'bash/talk-to-computer/rag_search.sh')
-rwxr-xr-x | bash/talk-to-computer/rag_search.sh | 187 |
1 files changed, 187 insertions, 0 deletions
diff --git a/bash/talk-to-computer/rag_search.sh b/bash/talk-to-computer/rag_search.sh new file mode 100755 index 0000000..dfcbc91 --- /dev/null +++ b/bash/talk-to-computer/rag_search.sh @@ -0,0 +1,187 @@ +#!/bin/bash + +# RAG Search Utility - Search the knowledge corpus +# This script demonstrates how to search the corpus using efficient Unix tools + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "${SCRIPT_DIR}/rag_config.sh" + +# --- Utility Functions --- + +# Get corpus path for a topic (standalone version) +get_corpus_path() { + local topic="$1" + if [ -f "$CORPUS_REGISTRY" ]; then + grep "^[^|]*${topic}|" "$CORPUS_REGISTRY" | head -1 | cut -d'|' -f2 + fi +} + +# Check if corpus exists for a topic +corpus_exists() { + local topic="$1" + grep -q "^[^|]*${topic}|" "$CORPUS_REGISTRY" 2>/dev/null +} + +# --- Search Functions --- + +# Search corpus for keywords +search_corpus() { + local query="$1" + local topic="${2:-}" + + echo "🔍 Searching corpus for: '$query'" + if [ -n "$topic" ]; then + echo "📂 Limited to topic: $topic" + fi + echo "----------------------------------------" + + # Build search command + if [ -n "$topic" ]; then + local corpus_path=$(get_corpus_path "$topic") + if [ -n "$corpus_path" ]; then + # Search specific topic directory + grep -r -i "$query" "$corpus_path" --include="*.txt" --include="*.md" --include="*.html" + else + echo "❌ Topic not found: $topic" + return 1 + fi + else + # Search entire corpus + grep -r -i "$query" "$CORPUS_DIR" --include="*.txt" --include="*.md" --include="*.html" + fi | head -10 | while IFS=: read -r file line content; do + local filename=$(basename "$file") + local topic_name=$(basename "$(dirname "$file")") + echo "📄 $topic_name/$filename (line $line):" + echo " $content" + echo "" + done +} + +# Get context around search results +get_context() { + local query="$1" + local topic="$2" + local context_lines="${3:-$SEARCH_CONTEXT_LINES}" + + echo "📖 Getting context for: '$query'" + echo "----------------------------------------" + + if [ -n "$topic" ]; then + local corpus_path=$(get_corpus_path "$topic") + if [ -n "$corpus_path" ]; then + grep -r -i -A "$context_lines" -B "$context_lines" "$query" "$corpus_path" + else + echo "❌ Topic not found: $topic" + return 1 + fi + else + grep -r -i -A "$context_lines" -B "$context_lines" "$query" "$CORPUS_DIR" + fi +} + +# Extract relevant sections from files +extract_sections() { + local query="$1" + local topic="$2" + + echo "📋 Extracting relevant sections for: '$query'" + echo "----------------------------------------" + + # Find files containing the query + local files + if [ -n "$topic" ]; then + local corpus_path=$(get_corpus_path "$topic") + files=$(grep -r -l -i "$query" "$corpus_path" 2>/dev/null) + else + files=$(grep -r -l -i "$query" "$CORPUS_DIR" 2>/dev/null) + fi + + if [ -z "$files" ]; then + echo "❌ No files found containing: $query" + return 1 + fi + + echo "$files" | while read -r file; do + local filename=$(basename "$file") + echo "📄 Processing: $filename" + echo "----------------------------------------" + + # Extract relevant sections (headers and surrounding content) + awk -v query="$query" ' + BEGIN { in_section = 0; section_content = "" } + + # Check if line contains query (case insensitive) + tolower($0) ~ tolower(query) { + if (in_section == 0) { + print "RELEVANT SECTION:" + in_section = 1 + } + } + + # If we found a header before the match, include it + /^#/ && in_section == 0 { + section_content = $0 + } + + # Print content when we have a match + in_section == 1 { + print + if (length($0) == 0) { + in_section = 0 + section_content = "" + print "" + } + } + ' "$file" + + echo "----------------------------------------" + done +} + +# --- Main Command Interface --- + +case "${1:-help}" in + "search") + if [ -n "$2" ]; then + search_corpus "$2" "$3" + else + echo "❌ Usage: $0 search <query> [topic]" + fi + ;; + "context") + if [ -n "$2" ]; then + get_context "$2" "$3" "$4" + else + echo "❌ Usage: $0 context <query> [topic] [lines]" + fi + ;; + "extract") + if [ -n "$2" ]; then + extract_sections "$2" "$3" + else + echo "❌ Usage: $0 extract <query> [topic]" + fi + ;; + "stats") + get_corpus_stats + ;; + "help"|*) + echo "🔍 RAG Search Utility" + echo "Search and extract information from the knowledge corpus" + echo "" + echo "Usage: $0 <command> [arguments]" + echo "" + echo "Commands:" + echo " search <query> [topic] Search for exact matches" + echo " context <query> [topic] Get context around matches" + echo " extract <query> [topic] Extract relevant sections" + echo " stats Show corpus statistics" + echo " help Show this help message" + echo "" + echo "Examples:" + echo " $0 search 'quantum physics'" + echo " $0 search 'lil programming' programming" + echo " $0 context 'force' physics" + echo " $0 extract 'variables' programming" + ;; +esac |