model pass1 pass@count win_rate count SE(A) SE_x(A) SE_pred(A)
deepseek_r1_distill_llama_70b 25.8 50 21.2 11 8 6.7 4.4
deepseek_r1_distill_qwen_32b 25.5 43.3 20.9 11 8 6.8 4.2
deepseek_r1_distill_qwen_14b 24.5 33.3 20.2 11 7.9 7.4 2.7
google_gemma_3_27b_it 24.4 36.7 19.9 12 7.8 6.5 4.4
deepseek_r1_distill_llama_8b 21.5 26.7 17 13 7.5 6.8 3.2
google_gemma_3_12b_it 20.9 36.7 16.6 11 7.4 6.2 4.1
qwen3-32b 19.1 36.7 15.4 11 7.2 5.3 4.8
deepseek_r1_distill_qwen_7b 18.8 26.7 14.6 11 7.1 6.6 2.7
qwen3-14b 18.1 40 14.5 12 7 5.3 4.6
qwen3-8b 17.5 33.3 13.8 12 6.9 5.2 4.6
deepseek_r1_distill_qwen_1.5b 17.2 20 13.2 13 6.9 6.3 2.7
qwen3-4b 16.4 26.7 12.8 12 6.8 5.6 3.8
qwen2-math-72b-instruct 12 33.3 9.48 10 5.9 3.8 4.5
qwen2.5-coder-32b-instruct 11.5 30 8.95 11 5.8 3.7 4.5
qwen2-math-7b-instruct 10 23.3 7.87 6 5.5 3.7 4
google_gemma_3_4b_it 9.23 20 6.97 13 5.3 4.2 3.3
qwen3-1.7b 8.06 30 6.47 12 5 2.8 4.1
qwen2.5-coder-14b-instruct 7.78 23.3 5.83 12 4.9 3.3 3.6
qwen2-72b-instruct 3.64 26.7 2.99 11 3.4 0.61 3.4
llama-3.1-70B-instruct 3.33 3.33 2.34 13 3.3 3.3 0
qwen2-math-1.5b-instruct 3.33 10 2.61 4 3.3 1.2 3
qwen2.5-coder-7b-instruct 1.82 20 1.41 11 2.4 0 2.5
mistralai_ministral_8b_instruct_2410 1.21 13.3 0.943 11 2 0 2
qwen2.5-coder-3b-instruct 1.11 10 0.831 12 1.9 0.36 1.9
google_gemma_2_27b_it 1 6.67 0.812 10 1.8 0.46 1.8
qwen3-0.6b 0.769 10 0.596 13 1.6 0 1.6
google_gemma_2_9b_it 0.606 3.33 0.421 11 1.4 0.44 1.3
mistralai_mathstral_7b_v0.1 0.606 6.67 0.438 11 1.4 0 1.4
qwen2-1.5b-instruct 0.513 3.33 0.387 13 1.3 0.37 1.3
google_gemma_3_1b_it 0.513 3.33 0.351 13 1.3 0.37 1.3
qwen1.5-72b-chat 0.303 3.33 0.303 11 1 0 1
mistralai_mixtral_8x22b_instruct_v0.1 0.303 3.33 0.207 11 1 0 1
qwen2-7b-instruct 0.303 3.33 0.281 11 1 0 1
qwen1.5-32b-chat 0.303 3.33 0.281 11 1 0 1
mistralai_mixtral_8x7b_instruct_v0.1 0.278 3.33 0.243 12 0.96 0 0.96
google_codegemma_1.1_7b_it 0.256 3.33 0.193 13 0.92 0 0.92
llama-3.2-3B-instruct 0 0 0 18 0 0 0
llama-3.2-1B-instruct 0 0 0 21 0 0 0
llama-3.1-8B-instruct 0 0 0 15 0 0 0
google_gemma_7b_it 0 0 0 13 0 0 0
deepseek_v2_lite_chat 0 0 0 11 0 0 0
google_gemma_2b_it 0 0 0 13 0 0 0
mistralai_mistral_7b_instruct_v0.3 0 0 0 11 0 0 0
qwen1.5-0.5b-chat 0 0 0 13 0 0 0
qwen1.5-7b-chat 0 0 0 12 0 0 0
qwen2-0.5b-instruct 0 0 0 13 0 0 0
qwen1.5-14b-chat 0 0 0 12 0 0 0
qwen1.5-1.8b-chat 0 0 0 11 0 0 0
mistralai_mistral_7b_instruct_v0.2 0 0 0 11 0 0 0
mistralai_mistral_7b_instruct_v0.1 0 0 0 11 0 0 0
qwen2.5-coder-0.5b-instruct 0 0 0 13 0 0 0
qwen2.5-coder-1.5b-instruct 0 0 0 11 0 0 0