#!/bin/bash # RAG Search Utility - Search the knowledge corpus # This script demonstrates how to search the corpus using efficient Unix tools SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" source "${SCRIPT_DIR}/rag_config.sh" # --- Utility Functions --- # Get corpus path for a topic (standalone version) get_corpus_path() { local topic="$1" if [ -f "$CORPUS_REGISTRY" ]; then grep "^[^|]*${topic}|" "$CORPUS_REGISTRY" | head -1 | cut -d'|' -f2 fi } # Check if corpus exists for a topic corpus_exists() { local topic="$1" grep -q "^[^|]*${topic}|" "$CORPUS_REGISTRY" 2>/dev/null } # --- Search Functions --- # Search corpus for keywords search_corpus() { local query="$1" local topic="${2:-}" echo "🔍 Searching corpus for: '$query'" if [ -n "$topic" ]; then echo "📂 Limited to topic: $topic" fi echo "----------------------------------------" # Build search command if [ -n "$topic" ]; then local corpus_path=$(get_corpus_path "$topic") if [ -n "$corpus_path" ]; then # Search specific topic directory grep -r -i "$query" "$corpus_path" --include="*.txt" --include="*.md" --include="*.html" else echo "❌ Topic not found: $topic" return 1 fi else # Search entire corpus grep -r -i "$query" "$CORPUS_DIR" --include="*.txt" --include="*.md" --include="*.html" fi | head -10 | while IFS=: read -r file line content; do local filename=$(basename "$file") local topic_name=$(basename "$(dirname "$file")") echo "📄 $topic_name/$filename (line $line):" echo " $content" echo "" done } # Get context around search results get_context() { local query="$1" local topic="$2" local context_lines="${3:-$SEARCH_CONTEXT_LINES}" echo "📖 Getting context for: '$query'" echo "----------------------------------------" if [ -n "$topic" ]; then local corpus_path=$(get_corpus_path "$topic") if [ -n "$corpus_path" ]; then grep -r -i -A "$context_lines" -B "$context_lines" "$query" "$corpus_path" else echo "❌ Topic not found: $topic" return 1 fi else grep -r -i -A "$context_lines" -B "$context_lines" "$query" "$CORPUS_DIR" fi } # Extract relevant sections from files extract_sections() { local query="$1" local topic="$2" echo "📋 Extracting relevant sections for: '$query'" echo "----------------------------------------" # Find files containing the query local files if [ -n "$topic" ]; then local corpus_path=$(get_corpus_path "$topic") files=$(grep -r -l -i "$query" "$corpus_path" 2>/dev/null) else files=$(grep -r -l -i "$query" "$CORPUS_DIR" 2>/dev/null) fi if [ -z "$files" ]; then echo "❌ No files found containing: $query" return 1 fi echo "$files" | while read -r file; do local filename=$(basename "$file") echo "📄 Processing: $filename" echo "----------------------------------------" # Extract relevant sections (headers and surrounding content) awk -v query="$query" ' BEGIN { in_section = 0; section_content = "" } # Check if line contains query (case insensitive) tolower($0) ~ tolower(query) { if (in_section == 0) { print "RELEVANT SECTION:" in_section = 1 } } # If we found a header before the match, include it /^#/ && in_section == 0 { section_content = $0 } # Print content when we have a match in_section == 1 { print if (length($0) == 0) { in_section = 0 section_content = "" print "" } } ' "$file" echo "----------------------------------------" done } # --- Main Command Interface --- case "${1:-help}" in "search") if [ -n "$2" ]; then search_corpus "$2" "$3" else echo "❌ Usage: $0 search [topic]" fi ;; "context") if [ -n "$2" ]; then get_context "$2" "$3" "$4" else echo "❌ Usage: $0 context [topic] [lines]" fi ;; "extract") if [ -n "$2" ]; then extract_sections "$2" "$3" else echo "❌ Usage: $0 extract [topic]" fi ;; "stats") get_corpus_stats ;; "help"|*) echo "🔍 RAG Search Utility" echo "Search and extract information from the knowledge corpus" echo "" echo "Usage: $0 [arguments]" echo "" echo "Commands:" echo " search [topic] Search for exact matches" echo " context [topic] Get context around matches" echo " extract [topic] Extract relevant sections" echo " stats Show corpus statistics" echo " help Show this help message" echo "" echo "Examples:" echo " $0 search 'quantum physics'" echo " $0 search 'lil programming' programming" echo " $0 context 'force' physics" echo " $0 extract 'variables' programming" ;; esac