{"model":"google_gemma_3_27b_it","pass1":0.4009259259,"pass@count":0.6,"win_rate":0.385196994,"count":12.0,"SE(A)":0.0365288568,"SE_x(A)":0.0316817984,"SE_pred(A)":0.0181829872}
{"model":"google_gemma_2_27b_it","pass1":0.2011111111,"pass@count":0.4722222222,"win_rate":0.1901382955,"count":10.0,"SE(A)":0.0298761733,"SE_x(A)":0.0183770522,"SE_pred(A)":0.023555672}
{"model":"llama-3.1-8B-instruct","pass1":0.1825925926,"pass@count":0.2,"win_rate":0.1702635477,"count":15.0,"SE(A)":0.028795499,"SE_x(A)":0.0273182787,"SE_pred(A)":0.0091045272}
{"model":"google_gemma_3_4b_it","pass1":0.1747863248,"pass@count":0.2777777778,"win_rate":0.1635121827,"count":13.0,"SE(A)":0.0283074456,"SE_x(A)":0.0250361748,"SE_pred(A)":0.0132098987}
{"model":"google_gemma_2_9b_it","pass1":0.1681818182,"pass@count":0.3666666667,"win_rate":0.1572176437,"count":11.0,"SE(A)":0.0278783761,"SE_x(A)":0.0197909568,"SE_pred(A)":0.0196347112}
{"model":"llama-3.2-3B-instruct","pass1":0.0973856209,"pass@count":0.1055555556,"win_rate":0.0893282866,"count":17.0,"SE(A)":0.0220984743,"SE_x(A)":0.0211647532,"SE_pred(A)":0.0063557681}
{"model":"google_gemma_3_12b_it","pass1":0.0696969697,"pass@count":0.1611111111,"win_rate":0.0655593128,"count":11.0,"SE(A)":0.018979419,"SE_x(A)":0.0147920318,"SE_pred(A)":0.0118917678}
{"model":"google_codegemma_1.1_7b_it","pass1":0.0628205128,"pass@count":0.2611111111,"win_rate":0.0576279328,"count":13.0,"SE(A)":0.0180853065,"SE_x(A)":0.0115407036,"SE_pred(A)":0.0139244559}
{"model":"llama-3.2-1B-instruct","pass1":0.0402777778,"pass@count":0.0444444444,"win_rate":0.0363690505,"count":12.0,"SE(A)":0.0146544416,"SE_x(A)":0.0139095315,"SE_pred(A)":0.0046127639}
{"model":"google_gemma_7b_it","pass1":0.025,"pass@count":0.0666666667,"win_rate":0.0223500448,"count":12.0,"SE(A)":0.0116368667,"SE_x(A)":0.0086924582,"SE_pred(A)":0.0077367847}
{"model":"mistralai_mixtral_8x22b_instruct_v0.1","pass1":0.0085858586,"pass@count":0.0833333333,"win_rate":0.0079455613,"count":11.0,"SE(A)":0.0068767489,"SE_x(A)":0.0008442723,"SE_pred(A)":0.0068247256}
{"model":"google_gemma_3_1b_it","pass1":0.0069444444,"pass@count":0.0444444444,"win_rate":0.0062944811,"count":12.0,"SE(A)":0.0061896954,"SE_x(A)":0.0025057998,"SE_pred(A)":0.0056597965}
{"model":"google_gemma_2b_it","pass1":0.0064102564,"pass@count":0.0333333333,"win_rate":0.0056470923,"count":13.0,"SE(A)":0.0059484662,"SE_x(A)":0.0023889615,"SE_pred(A)":0.0054476704}
{"model":"mistralai_mathstral_7b_v0.1","pass1":0.0005050505,"pass@count":0.0055555556,"win_rate":0.0004788147,"count":11.0,"SE(A)":0.00167464,"SE_x(A)":0.0,"SE_pred(A)":0.001675063}
{"model":"qwen3-32b","pass1":0.0005050505,"pass@count":0.0055555556,"win_rate":0.0004803042,"count":11.0,"SE(A)":0.00167464,"SE_x(A)":0.0,"SE_pred(A)":0.001675063}
{"model":"qwen2.5-coder-14b-instruct","pass1":0.000462963,"pass@count":0.0055555556,"win_rate":0.0004402357,"count":12.0,"SE(A)":0.0016033795,"SE_x(A)":0.0,"SE_pred(A)":0.0016037507}
{"model":"qwen2.5-coder-3b-instruct","pass1":0.000462963,"pass@count":0.0055555556,"win_rate":0.000462963,"count":12.0,"SE(A)":0.0016033795,"SE_x(A)":0.0,"SE_pred(A)":0.0016037507}
{"model":"deepseek_r1_distill_llama_70b","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":9.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"deepseek_r1_distill_qwen_1.5b","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":12.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"deepseek_v2_lite_chat","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":11.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"deepseek_r1_distill_qwen_7b","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":11.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"deepseek_r1_distill_qwen_32b","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":9.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"deepseek_r1_distill_qwen_14b","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":11.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"deepseek_r1_distill_llama_8b","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":12.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"mistralai_mistral_7b_instruct_v0.2","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":10.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"mistralai_mistral_7b_instruct_v0.1","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":11.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"mistralai_ministral_8b_instruct_2410","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":11.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"qwen1.5-1.8b-chat","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":11.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"qwen1.5-14b-chat","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":12.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"qwen1.5-32b-chat","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":11.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"qwen1.5-72b-chat","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":10.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"qwen1.5-7b-chat","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":12.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"mistralai_mistral_7b_instruct_v0.3","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":11.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"mistralai_mixtral_8x7b_instruct_v0.1","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":12.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"qwen1.5-0.5b-chat","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":13.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"qwen2-72b-instruct","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":10.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"qwen2-1.5b-instruct","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":13.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"qwen2-0.5b-instruct","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":13.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"qwen2-7b-instruct","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":11.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"qwen2-math-7b-instruct","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":6.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"qwen2.5-coder-0.5b-instruct","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":13.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"qwen2-math-72b-instruct","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":10.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"qwen2-math-1.5b-instruct","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":4.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"qwen2.5-coder-32b-instruct","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":10.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"qwen2.5-coder-1.5b-instruct","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":11.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"qwen3-0.6b","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":13.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"qwen2.5-coder-7b-instruct","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":10.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"qwen3-1.7b","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":12.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"qwen3-14b","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":12.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"qwen3-4b","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":12.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
{"model":"qwen3-8b","pass1":0.0,"pass@count":0.0,"win_rate":0.0,"count":12.0,"SE(A)":0.0,"SE_x(A)":0.0,"SE_pred(A)":0.0}
