model	pass1	pass@count	win_rate	count	SE(A)	SE_x(A)	SE_pred(A)
qwen3-32b	78.5	89	40.4	11	3.2	2.7	1.8
qwen3-14b	77.8	86.6	39.9	12	3.2	2.9	1.4
qwen2.5-coder-14b-instruct	76	88.4	39	12	3.3	2.7	2
google_gemma_3_27b_it	75.6	80.5	37.7	12	3.4	3.2	1.1
qwen2.5-coder-32b-instruct	75.1	86.6	38.6	11	3.4	2.7	2
google_gemma_3_12b_it	73.2	77.4	35.9	11	3.5	3.3	1.1
qwen3-8b	69.9	87.2	35.4	12	3.6	2.9	2.1
qwen3-4b	69.7	82.9	34	12	3.6	3.2	1.6
google_gemma_2_27b_it	67.1	75	31.7	10	3.7	3.4	1.5
mistralai_mixtral_8x22b_instruct_v0.1	65.6	83.5	31.1	11	3.7	3	2.1
qwen2-math-72b-instruct	62.1	82.3	29.5	11	3.8	2.9	2.4
deepseek_r1_distill_qwen_32b	61	87.8	30.9	11	3.8	2.3	3
google_gemma_3_4b_it	60.6	68.9	28.2	13	3.8	3.6	1.3
llama-3.1-8B-instruct	57.3	57.3	26	15	3.9	3.9	0
google_gemma_2_9b_it	55.9	65.9	25.2	11	3.9	3.6	1.4
deepseek_r1_distill_qwen_14b	53	86	26.7	11	3.9	2.3	3.1
qwen3-1.7b	50.4	73.8	22.6	12	3.9	3.3	2
qwen2-7b-instruct	50	84.8	22.9	11	3.9	2.5	3
deepseek_r1_distill_llama_70b	49.6	86	24.9	11	3.9	2.2	3.2
qwen2-72b-instruct	48.2	84.1	23.3	11	3.9	2.2	3.2
qwen2.5-coder-7b-instruct	48.1	86.6	22.5	10	3.9	2.2	3.3
google_codegemma_1.1_7b_it	47.4	70.1	20	13	3.9	3.1	2.3
qwen1.5-14b-chat	45.1	75.6	19.1	12	3.9	3	2.5
qwen2.5-coder-3b-instruct	43.5	81.7	19.7	12	3.9	2.3	3.1
deepseek_v2_lite_chat	42.8	73.8	18	11	3.9	2.8	2.7
qwen1.5-32b-chat	40.9	65.2	17.5	11	3.8	3.1	2.2
mistralai_ministral_8b_instruct_2410	39.7	80.5	17.5	11	3.8	2.3	3.1
llama-3.2-3B-instruct	37.8	37.8	14.7	17	3.8	3.8	0
qwen1.5-72b-chat	37.7	62.2	15.6	11	3.8	2.9	2.4
deepseek_r1_distill_llama_8b	37	76.8	16.6	13	3.8	2.4	2.9
google_gemma_3_1b_it	36	43.3	14.5	13	3.7	3.5	1.3
mistralai_mathstral_7b_v0.1	35.9	74.4	14.9	11	3.7	2.3	2.9
qwen2.5-coder-1.5b-instruct	34.8	76.8	14.4	11	3.7	2.3	2.9
qwen1.5-7b-chat	34.1	65.2	13.4	12	3.7	2.9	2.3
mistralai_mistral_7b_instruct_v0.3	31.6	59.1	11.8	11	3.6	2.8	2.3
mistralai_mixtral_8x7b_instruct_v0.1	31.6	54.9	13	12	3.6	2.8	2.3
qwen2.5-coder-0.5b-instruct	31	68.3	12.7	13	3.6	2.3	2.8
qwen2-math-7b-instruct	29.6	56.1	12	6	3.6	2.5	2.5
deepseek_r1_distill_qwen_7b	26.9	71.3	11.9	11	3.5	2	2.9
llama-3.2-1B-instruct	25.6	25.6	9.17	12	3.4	3.4	0
mistralai_mistral_7b_instruct_v0.1	22.4	51.8	7.7	11	3.3	2.3	2.3
qwen3-0.6b	21.4	48.2	7.64	13	3.2	2.3	2.3
google_gemma_7b_it	20.3	40.2	6.93	13	3.1	2.6	1.8
qwen2-1.5b-instruct	14.1	57.9	5.9	13	2.7	1.2	2.4
google_gemma_2b_it	13.9	26.2	4.02	13	2.7	2.3	1.4
qwen2-0.5b-instruct	7.41	30.5	2.27	13	2	1.1	1.7
mistralai_mistral_7b_instruct_v0.2	6.95	28.7	2.49	10	2	0.98	1.7
qwen1.5-1.8b-chat	4.99	22.6	1.51	11	1.7	0.88	1.5
qwen2-math-1.5b-instruct	3.35	8.54	1.35	4	1.4	0.89	1.1
deepseek_r1_distill_qwen_1.5b	2.58	19.5	0.799	13	1.2	0.42	1.2
qwen1.5-0.5b-chat	1.92	7.93	0.349	13	1.1	0.77	0.74