model	pass1	pass@count	win_rate	count	SE(A)	SE_x(A)	SE_pred(A)
qwen3-14b	77.6	91.5	38	1.1e+03	3.3	2.9	1.4
qwen3-32b	77.3	93.3	37.7	1.1e+03	3.3	2.7	1.8
google_gemma_3_27b_it	75.7	78.7	36.2	7	3.3	3.2	0.93
qwen2.5-coder-32b-instruct	75	90.9	36.8	1.1e+03	3.4	2.7	2.1
qwen2.5-coder-14b-instruct	74.8	93.3	36.6	1.1e+03	3.4	2.6	2.1
google_gemma_3_12b_it	72.9	85.4	34	1.1e+03	3.5	3.3	1.1
llama-3.1-70B-instruct	70.7	88.4	33	1.1e+03	3.6	2.9	2
qwen3-8b	70.7	92.1	34.1	1.1e+03	3.6	2.9	2.1
qwen3-4b	69.7	89.6	32.3	1.1e+03	3.6	3.2	1.7
google_gemma_2_27b_it	66.4	86.6	29.8	1.1e+03	3.7	3.4	1.5
mistralai_mixtral_8x22b_instruct_v0.1	65.9	92.7	29.8	8.9e+02	3.7	2.9	2.3
deepseek_r1_distill_qwen_32b	62.7	92.7	30.5	1.1e+03	3.8	2.3	3
qwen2-math-72b-instruct	61.7	89	27.8	1.4e+02	3.8	2.8	2.5
google_gemma_3_4b_it	61	77.4	27.1	1.1e+03	3.8	3.6	1.4
google_gemma_2_9b_it	55.2	79.3	23.4	1.1e+03	3.9	3.6	1.5
llama-3.1-8B-instruct	54.8	90.2	22.9	1.1e+03	3.9	3.1	2.4
deepseek_r1_distill_qwen_14b	51.9	92.1	24.7	1.1e+03	3.9	2.3	3.1
qwen2-7b-instruct	50.1	92.7	21.6	1.1e+03	3.9	2.5	3
deepseek_r1_distill_llama_70b	49.9	93.9	23.6	1.1e+03	3.9	2.3	3.2
qwen3-1.7b	49.8	86.6	21.2	1.1e+03	3.9	3.3	2.1
qwen2-72b-instruct	48.4	93.3	22.1	1.1e+03	3.9	2.4	3.1
google_codegemma_1.1_7b_it	47.4	84.8	18.7	1.1e+03	3.9	3.2	2.3
qwen2.5-coder-7b-instruct	47.4	93.9	20.8	1.1e+03	3.9	2.3	3.2
qwen1.5-14b-chat	44.7	89.6	17.7	1.1e+03	3.9	3	2.4
qwen2.5-coder-3b-instruct	44.3	92.1	19.2	1.1e+03	3.9	2.3	3.1
llama-3.2-3B-instruct	44.2	87.2	17.4	1.1e+03	3.9	2.9	2.5
deepseek_v2_lite_chat	42.2	90.2	16.8	1.1e+03	3.9	2.8	2.7
qwen1.5-32b-chat	41.6	83.5	16.6	1.1e+03	3.8	3.2	2.2
mistralai_ministral_8b_instruct_2410	40.1	92.1	16.5	1.1e+03	3.8	2.4	3
deepseek_r1_distill_llama_8b	37.9	91.5	16.1	1.1e+03	3.8	2.3	3
qwen1.5-72b-chat	36.9	84.8	14.4	1.1e+03	3.8	3	2.3
mistralai_mathstral_7b_v0.1	36.3	92.1	14	1.1e+03	3.8	2.4	2.9
google_gemma_3_1b_it	36.1	56.1	13.8	1.1e+03	3.8	3.5	1.3
qwen2.5-coder-1.5b-instruct	35.3	92.1	13.8	1.1e+03	3.7	2.3	3
qwen1.5-7b-chat	33.2	84.1	12.2	1.1e+03	3.7	2.8	2.4
mistralai_mistral_7b_instruct_v0.3	31.7	87.2	11	1.1e+03	3.6	2.8	2.3
qwen2.5-coder-0.5b-instruct	30.7	88.4	11.8	1.1e+03	3.6	2.2	2.8
deepseek_r1_distill_qwen_7b	28.3	92.1	11.7	1.1e+03	3.5	2	2.9
llama-3.2-1B-instruct	25.9	76.8	8.72	1.1e+03	3.4	2.5	2.3
mistralai_mistral_7b_instruct_v0.1	22.4	84.1	7.24	1.1e+03	3.3	2.3	2.3
qwen3-0.6b	21.2	82.3	6.99	1.1e+03	3.2	2.3	2.2
google_gemma_7b_it	20.4	65.9	6.5	1.1e+03	3.1	2.6	1.8
qwen2-1.5b-instruct	14	85.4	5.2	1.1e+03	2.7	1.2	2.4
google_gemma_2b_it	13.7	45.7	3.6	1.1e+03	2.7	2.3	1.4
mistralai_mistral_7b_instruct_v0.2	7.76	69.5	2.45	1.1e+03	2.1	1.2	1.7
qwen2-0.5b-instruct	7.41	67.7	2.12	1.1e+03	2	1.1	1.7
qwen1.5-1.8b-chat	5.42	62.2	1.52	1.1e+03	1.8	0.93	1.5
deepseek_r1_distill_qwen_1.5b	2.32	71.3	0.722	1.1e+03	1.2	0.38	1.1
qwen1.5-0.5b-chat	1.79	23.8	0.302	1e+03	1	0.64	0.81