bash/consensus


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373

#!/bin/bash

# Consensus System
# This script uses multiple LLM models to achieve consensus on a response through voting.
#
# APPLICATION LOGIC:
# The consensus process uses a multi-round voting system where multiple AI models
# attempt to reach agreement on a response. The system operates through four phases
# designed to reduce bias and improve reliability:
#
# PHASE 1 - RESPONSE GENERATION:
#   - Models independently generate responses to avoid identical outputs
#   - Self-assessment of confidence provides internal quality indicators
#   - Different model architectures may produce varied perspectives
#   - Robust extraction handles formatting inconsistencies
#
# PHASE 2 - CONFIDENCE VALIDATION:
#   - A randomly selected judge model provides external quality assessment
#   - Random selection helps prevent bias toward any particular model
#   - External validation may catch overconfident self-assessments
#   - Quality control through independent review
#
# PHASE 3 - CROSS-MODEL VOTING:
#   - Each model evaluates others' work, creating a peer-review system
#   - Exclusion of self-voting prevents self-preference bias
#   - Collective evaluation uses different model perspectives
#   - Voting process distributes decision-making across models
#
# PHASE 4 - CONSENSUS DETERMINATION:
#   - >50% threshold requires majority agreement rather than plurality
#   - Fallback mechanisms provide output even when consensus isn't reached
#   - Transparent vote counting shows decision process
#   - Caveats indicate when consensus wasn't reached
#
# CONSENSUS MODELING:
# The system applies voting principles to AI model collaboration:
#   - Random judge selection helps reduce systematic bias
#   - Collective decision-making may reduce individual model errors
#   - Peer review provides multiple evaluation perspectives
#   - Transparency shows how decisions were made
#   - Iterative rounds may improve response quality
#   - Error handling addresses model inconsistencies
#
# The consensus threshold (>50%) requires majority agreement,
# while random judge selection helps prevent single-model dominance.
# The system emphasizes transparency and reliability in the decision process.

# --- Model Configuration ---
MODELS=(
    "llama3:8b-instruct-q4_K_M"
    "phi3:3.8b-mini-4k-instruct-q4_K_M"
    "deepseek-r1:1.5b"
    "gemma3n:e2b"
    "dolphin3:latest"
)

# Randomly select judge model from available models
JUDGE_MODEL="${MODELS[$((RANDOM % ${#MODELS[@]}))]}"

# --- Defaults ---
DEFAULT_ROUNDS=2

# --- Argument Validation ---
if [ "$#" -lt 1 ]; then
    echo -e "\n\tConsensus"
    echo -e "\tThis script uses multiple LLM models to achieve consensus through voting."
    echo -e "\n\tUsage: $0 [-f <file_path>] \"<your prompt>\" [number_of_rounds]"
    echo -e "\n\tExample: $0 -f ./input.txt \"Please summarize this text file\" 2"
    echo -e "\n\tIf number_of_rounds is not provided, the program will default to $DEFAULT_ROUNDS rounds."
    echo -e "\n\t-f <file_path> (optional): Append the contents of the file to the prompt."
    echo -e "\n"
    exit 1
fi

# --- Argument Parsing ---
FILE_PATH=""
while getopts "f:" opt; do
  case $opt in
    f)
      FILE_PATH="$OPTARG"
      ;;
    *)
      echo "Invalid option: -$OPTARG" >&2
      exit 1
      ;;
  esac
done
shift $((OPTIND -1))

PROMPT="$1"
if [ -z "$2" ]; then
    ROUNDS=$DEFAULT_ROUNDS
else
    ROUNDS=$2
fi

# If file path is provided, append its contents to the prompt
if [ -n "$FILE_PATH" ]; then
    if [ ! -f "$FILE_PATH" ]; then
        echo "File not found: $FILE_PATH" >&2
        exit 1
    fi
    FILE_CONTENTS=$(cat "$FILE_PATH")
    PROMPT="$PROMPT\n[FILE CONTENTS]\n$FILE_CONTENTS\n[END FILE]"
fi

# --- File Initialization ---
# Create a temporary directory if it doesn't exist
mkdir -p ~/tmp
# Create a unique file for this session based on the timestamp
SESSION_FILE=~/tmp/consensus_$(date +%Y%m%d_%H%M%S).txt

echo "Consensus Session Log: ${SESSION_FILE}"
echo "---------------------------------"
echo "Judge model selected: ${JUDGE_MODEL}"
echo "---------------------------------"

# Store the initial user prompt in the session file
echo "USER PROMPT: ${PROMPT}" >> "${SESSION_FILE}"
echo "JUDGE MODEL: ${JUDGE_MODEL}" >> "${SESSION_FILE}"
echo "" >> "${SESSION_FILE}"
echo "Processing consensus with ${#MODELS[@]} models over ${ROUNDS} rounds..."

# --- Consensus Rounds ---
for round in $(seq 1 "${ROUNDS}"); do
    echo "Starting consensus round ${round} of ${ROUNDS}..."
    echo "ROUND ${round}:" >> "${SESSION_FILE}"
    echo "================" >> "${SESSION_FILE}"
    
    # --- Step 1: Each model generates a response with confidence ---
    echo "Step 1: Generating responses with confidence scores..."
    echo "STEP 1 - MODEL RESPONSES:" >> "${SESSION_FILE}"
    
    declare -a responses
    declare -a confidences
    declare -a model_names
    
    for i in "${!MODELS[@]}"; do
        model="${MODELS[$i]}"
        echo "  Generating response from ${model}..."
        
        # Prompt for response with confidence
        RESPONSE_PROMPT="You are an expert assistant. Please respond to the following prompt and provide your confidence level (strictly 'low', 'medium', or 'high') at the end of your response.

PROMPT: ${PROMPT}

IMPORTANT: Format your response exactly as follows:
[RESPONSE]
Your detailed response here...
[CONFIDENCE]
low

OR

[RESPONSE]
Your detailed response here...
[CONFIDENCE]
medium

OR

[RESPONSE]
Your detailed response here...
[CONFIDENCE]
high

Make sure to include both [RESPONSE] and [CONFIDENCE] tags exactly as shown."

        response_output=$(ollama run "${model}" "${RESPONSE_PROMPT}")
        
        # Extract response and confidence
        response_text=$(echo "${response_output}" | sed -n '/\[RESPONSE\]/,/\[CONFIDENCE\]/p' | sed '1d;$d' | sed '$d')
        
        # If response extraction failed, use the full output (excluding confidence line)
        if [ -z "$response_text" ]; then
            response_text=$(echo "${response_output}" | sed '/\[CONFIDENCE\]/,$d' | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
        fi
        
        confidence=$(echo "${response_output}" | grep -A1 "\[CONFIDENCE\]" | tail -n1 | tr '[:upper:]' '[:lower:]' | xargs)
        
        # If confidence extraction failed, try alternative methods
        if [ -z "$confidence" ]; then
            confidence=$(echo "${response_output}" | grep -i "confidence" | tail -n1 | grep -o -i "\(low\|medium\|high\)" | head -n1)
        fi
        
        # Validate confidence level
        if [[ ! "$confidence" =~ ^(low|medium|high)$ ]]; then
            confidence="medium"  # Default if invalid
        fi
        
        # Store results
        responses[$i]="${response_text}"
        confidences[$i]="${confidence}"
        model_names[$i]="${model}"
        
        # Debug: Check if response was extracted properly
        if [ -z "${response_text}" ]; then
            echo "  WARNING: Empty response extracted from ${model}" >&2
        fi
        
        # Log to session file
        echo "MODEL ${i+1} (${model}):" >> "${SESSION_FILE}"
        echo "Response: ${response_text}" >> "${SESSION_FILE}"
        echo "Confidence: ${confidence}" >> "${SESSION_FILE}"
        echo "" >> "${SESSION_FILE}"
    done
    
    # --- Step 2: Judge validates confidence scores ---
    echo "Step 2: Validating confidence scores..."
    echo "STEP 2 - CONFIDENCE VALIDATION:" >> "${SESSION_FILE}"
    
    declare -a validated_confidences
    
    for i in "${!MODELS[@]}"; do
        model="${MODELS[$i]}"
        response="${responses[$i]}"
        confidence="${confidences[$i]}"
        
        JUDGE_PROMPT="You are a judge evaluating confidence scores. Review this response and its claimed confidence level, then provide your own confidence assessment.

RESPONSE: ${response}
CLAIMED CONFIDENCE: ${confidence}

Based on the quality, completeness, and accuracy of this response, what is your confidence level? Respond with only: low, medium, or high"

        judge_output=$(ollama run "${JUDGE_MODEL}" "${JUDGE_PROMPT}")
        judge_confidence=$(echo "${judge_output}" | tr '[:upper:]' '[:lower:]' | grep -o -i "\(low\|medium\|high\)" | head -n1)
        
        # Validate judge confidence
        if [[ ! "$judge_confidence" =~ ^(low|medium|high)$ ]]; then
            judge_confidence="medium"  # Default if invalid
        fi
        
        validated_confidences[$i]="${judge_confidence}"
        
        echo "MODEL ${i+1} (${model}):" >> "${SESSION_FILE}"
        echo "  Claimed confidence: ${confidence}" >> "${SESSION_FILE}"
        echo "  Validated confidence: ${judge_confidence}" >> "${SESSION_FILE}"
        echo "" >> "${SESSION_FILE}"
    done
    
    # --- Step 3: Models vote on best response ---
    echo "Step 3: Models voting on best response..."
    echo "STEP 3 - VOTING:" >> "${SESSION_FILE}"
    
    # Create voting prompt with all responses
    voting_prompt="You are a voter in a consensus system. Below are responses from different models to the same prompt. Please vote for the BEST response by providing the model number (1-${#MODELS[@]}).

ORIGINAL PROMPT: ${PROMPT}

RESPONSES:"
    
    for i in "${!MODELS[@]}"; do
        voting_prompt="${voting_prompt}

MODEL ${i+1} (${model_names[$i]}):
${responses[$i]}
Validated Confidence: ${validated_confidences[$i]}"
    done
    
    voting_prompt="${voting_prompt}

Please vote by responding with only the model number (1-${#MODELS[@]}) that you think provided the best response."

    declare -a votes
    declare -a vote_counts
    
    # Initialize vote counts
    for i in "${!MODELS[@]}"; do
        vote_counts[$i]=0
    done
    
    # Each model votes
    for i in "${!MODELS[@]}"; do
        model="${MODELS[$i]}"
        echo "  Getting vote from ${model}..."
        
        vote_output=$(ollama run "${model}" "${voting_prompt}")
        vote=$(echo "${vote_output}" | grep -o '[0-9]\+' | head -1)
        
        # Validate vote
        if [[ "$vote" =~ ^[0-9]+$ ]] && [ "$vote" -ge 1 ] && [ "$vote" -le "${#MODELS[@]}" ]; then
            votes[$i]=$((vote - 1))  # Convert to 0-based index
            vote_counts[$((vote - 1))]=$((${vote_counts[$((vote - 1))]} + 1))
        else
            votes[$i]=$i  # Default to voting for self if invalid
            vote_counts[$i]=$((${vote_counts[$i]} + 1))
        fi
        
        echo "MODEL ${i+1} (${model}) voted for MODEL $((votes[$i] + 1))" >> "${SESSION_FILE}"
    done
    
    # --- Step 4: Determine consensus ---
    echo "Step 4: Determining consensus..."
    echo "STEP 4 - CONSENSUS DETERMINATION:" >> "${SESSION_FILE}"
    
    # Find the response with the most votes
    max_votes=0
    winning_model=-1
    
    for i in "${!MODELS[@]}"; do
        if [ "${vote_counts[$i]}" -gt "$max_votes" ]; then
            max_votes="${vote_counts[$i]}"
            winning_model=$i
        fi
    done
    
    # Check if we have consensus (more than 50% of votes)
    total_votes=${#MODELS[@]}
    consensus_threshold=$((total_votes / 2 + 1))
    
    if [ "$max_votes" -ge "$consensus_threshold" ]; then
        consensus_reached=true
        consensus_message="CONSENSUS REACHED: Model $((winning_model + 1)) (${model_names[$winning_model]}) won with ${max_votes}/${total_votes} votes"
    else
        consensus_reached=false
        consensus_message="NO CONSENSUS: Model $((winning_model + 1)) (${model_names[$winning_model]}) had highest votes (${max_votes}/${total_votes}) but consensus threshold is ${consensus_threshold}"
    fi
    
    echo "Vote counts:" >> "${SESSION_FILE}"
    for i in "${!MODELS[@]}"; do
        echo "  Model $((i + 1)) (${model_names[$i]}): ${vote_counts[$i]} votes" >> "${SESSION_FILE}"
    done
    echo "" >> "${SESSION_FILE}"
    echo "${consensus_message}" >> "${SESSION_FILE}"
    echo "" >> "${SESSION_FILE}"
    
    # Store the winning response for next round or final output
    if [ "$winning_model" -ge 0 ]; then
        CURRENT_RESPONSE="${responses[$winning_model]}"
        CURRENT_CONFIDENCE="${validated_confidences[$winning_model]}"
        CURRENT_MODEL="${model_names[$winning_model]}"
        
        # Fallback: if winning response is empty, use the first non-empty response
        if [ -z "$CURRENT_RESPONSE" ]; then
            for i in "${!responses[@]}"; do
                if [ -n "${responses[$i]}" ]; then
                    CURRENT_RESPONSE="${responses[$i]}"
                    CURRENT_CONFIDENCE="${validated_confidences[$i]}"
                    CURRENT_MODEL="${model_names[$i]}"
                    echo "  Using fallback response from ${CURRENT_MODEL}" >&2
                    break
                fi
            done
        fi
    fi
    
    echo "Round ${round} complete: ${consensus_message}"
    echo "" >> "${SESSION_FILE}"
done

# --- Final Output ---
echo "---------------------------------"
echo "Consensus process complete."
echo "Final result:"
echo "---------------------------------"

# Print final summary
echo "CONSENSUS SUMMARY:" >> "${SESSION_FILE}"
echo "==================" >> "${SESSION_FILE}"
echo "Final Answer: ${CURRENT_RESPONSE}" >> "${SESSION_FILE}"
echo "Model: ${CURRENT_MODEL}" >> "${SESSION_FILE}"
echo "Confidence: ${CURRENT_CONFIDENCE}" >> "${SESSION_FILE}"
echo "Consensus Status: ${consensus_message}" >> "${SESSION_FILE}"

echo "Final Answer:"
echo "${CURRENT_RESPONSE}"
echo ""
echo "Model: ${CURRENT_MODEL}"
echo "Confidence: ${CURRENT_CONFIDENCE}"
echo "Consensus Status: ${consensus_message}"
echo ""
echo "Full session log: ${SESSION_FILE}"