model	pass1	pass@count	win_rate	count	SE(A)	SE_x(A)	SE_pred(A)
qwen3-14b	72.2	80	31.4	12	2	1.9	0.7
qwen2.5-coder-14b-instruct	72.1	88.2	32.9	12	2	1.6	1.2
google_gemma_3_12b_it	69.3	76.6	29.3	11	2.1	1.9	0.73
deepseek_r1_distill_llama_70b	66.3	81.2	26.7	11	2.1	1.8	1.1
qwen3-8b	66.1	76.8	27.1	12	2.1	1.9	0.88
qwen2.5-coder-7b-instruct	64.8	85.8	27.9	10	2.1	1.6	1.4
qwen3-4b	64.1	74.8	25.5	12	2.1	2	0.83
mistralai_mixtral_8x22b_instruct_v0.1	63.9	80.2	25.2	11	2.1	1.8	1.2
google_gemma_3_4b_it	60	69.8	22.6	13	2.2	2	0.81
deepseek_r1_distill_qwen_14b	57.1	77.6	21.1	11	2.2	1.8	1.3
llama-3.1-8B-instruct	56.2	56.2	19.9	15	2.2	2.2	0
qwen2.5-coder-3b-instruct	55	81.4	20.6	12	2.2	1.7	1.5
mistralai_mixtral_8x7b_instruct_v0.1	52.1	71.8	17.7	12	2.2	1.9	1.2
mistralai_ministral_8b_instruct_2410	52	73.6	17.3	11	2.2	1.8	1.3
qwen2-7b-instruct	51.1	72.6	17.2	11	2.2	1.8	1.3
llama-3.2-3B-instruct	48.8	48.8	15.7	15	2.2	2.2	0
mistralai_mathstral_7b_v0.1	48.7	75	15.7	11	2.2	1.8	1.4
qwen3-1.7b	48.1	62.6	15.7	12	2.2	2	0.99
deepseek_v2_lite_chat	44.5	67.8	13.6	11	2.2	1.8	1.3
qwen2.5-coder-1.5b-instruct	43.7	72.2	13.8	11	2.2	1.7	1.5
qwen1.5-14b-chat	40.2	62.2	11.4	12	2.2	1.8	1.2
deepseek_r1_distill_llama_8b	39.5	66.6	11.7	12	2.2	1.7	1.3
mistralai_mistral_7b_instruct_v0.3	39.2	61.6	10.7	11	2.2	1.8	1.2
deepseek_r1_distill_qwen_7b	39.2	68.2	12	11	2.2	1.6	1.4
mistralai_mistral_7b_instruct_v0.2	36.4	58	9.98	10	2.2	1.8	1.2
qwen1.5-7b-chat	34.8	56.2	9.06	12	2.1	1.8	1.2
llama-3.2-1B-instruct	32	32	8.18	11	2.1	2.1	0
qwen2.5-coder-0.5b-instruct	31.9	60.8	8.37	13	2.1	1.6	1.3
mistralai_mistral_7b_instruct_v0.1	30.5	58.6	7.54	11	2.1	1.5	1.4
qwen3-0.6b	27.6	51.6	6.97	13	2	1.6	1.2
qwen2-1.5b-instruct	23.5	56.4	5.27	13	1.9	1.3	1.4
deepseek_r1_distill_qwen_1.5b	14.6	38.8	3.2	12	1.6	1.1	1.1
qwen2-0.5b-instruct	13	38.6	2.3	13	1.5	0.97	1.1
qwen1.5-1.8b-chat	12.6	33.6	2.31	11	1.5	1	1.1
qwen1.5-0.5b-chat	4.17	17.4	0.577	13	0.89	0.54	0.71