{"model":"qwen2.5-coder-14b-instruct","pass1":0.43225,"pass@count":0.626,"win_rate":0.3090286565,"count":12.0,"SE(A)":0.0156655653,"SE_x(A)":0.0129921631,"SE_pred(A)":0.0087529216}
{"model":"qwen3-14b","pass1":0.3801666667,"pass@count":0.56,"win_rate":0.2617722865,"count":12.0,"SE(A)":0.0153505691,"SE_x(A)":0.0132814739,"SE_pred(A)":0.00769691}
{"model":"qwen2.5-coder-7b-instruct","pass1":0.3569,"pass@count":0.576,"win_rate":0.2445016403,"count":10.0,"SE(A)":0.0151499964,"SE_x(A)":0.011999081,"SE_pred(A)":0.009249024}
{"model":"google_gemma_3_12b_it","pass1":0.3275454545,"pass@count":0.424,"win_rate":0.2149478362,"count":11.0,"SE(A)":0.0148411398,"SE_x(A)":0.0137214289,"SE_pred(A)":0.005655247}
{"model":"qwen3-8b","pass1":0.3115833333,"pass@count":0.503,"win_rate":0.2034540888,"count":12.0,"SE(A)":0.0146457898,"SE_x(A)":0.0123106535,"SE_pred(A)":0.0079339126}
{"model":"qwen3-4b","pass1":0.289,"pass@count":0.473,"win_rate":0.1853129515,"count":12.0,"SE(A)":0.0143345387,"SE_x(A)":0.0119858591,"SE_pred(A)":0.0078624539}
{"model":"mistralai_ministral_8b_instruct_2410","pass1":0.2474545455,"pass@count":0.495,"win_rate":0.1526789061,"count":11.0,"SE(A)":0.013646274,"SE_x(A)":0.0102515131,"SE_pred(A)":0.0090070679}
{"model":"qwen2.5-coder-3b-instruct","pass1":0.23825,"pass@count":0.483,"win_rate":0.1457499725,"count":12.0,"SE(A)":0.0134717088,"SE_x(A)":0.0100999834,"SE_pred(A)":0.0089150027}
{"model":"qwen2-7b-instruct","pass1":0.2355454545,"pass@count":0.429,"win_rate":0.1436589233,"count":11.0,"SE(A)":0.0134187851,"SE_x(A)":0.0108280013,"SE_pred(A)":0.0079257922}
{"model":"mistralai_mathstral_7b_v0.1","pass1":0.2257272727,"pass@count":0.482,"win_rate":0.1365074294,"count":11.0,"SE(A)":0.0132202296,"SE_x(A)":0.009739886,"SE_pred(A)":0.0089391885}
{"model":"llama-3.1-8B-instruct","pass1":0.2232666667,"pass@count":0.226,"win_rate":0.1363389922,"count":15.0,"SE(A)":0.013168852,"SE_x(A)":0.0130861466,"SE_pred(A)":0.0014735768}
{"model":"google_codegemma_1.1_7b_it","pass1":0.2093076923,"pass@count":0.439,"win_rate":0.126790232,"count":13.0,"SE(A)":0.0128646019,"SE_x(A)":0.0097499787,"SE_pred(A)":0.0083926097}
{"model":"qwen1.5-14b-chat","pass1":0.2049166667,"pass@count":0.405,"win_rate":0.1229031584,"count":12.0,"SE(A)":0.0127642401,"SE_x(A)":0.0101597874,"SE_pred(A)":0.0077268716}
{"model":"qwen2-math-7b-instruct","pass1":0.2018333333,"pass@count":0.354,"win_rate":0.1218274437,"count":6.0,"SE(A)":0.0126923851,"SE_x(A)":0.0099597509,"SE_pred(A)":0.0078676553}
{"model":"google_gemma_3_4b_it","pass1":0.1954615385,"pass@count":0.294,"win_rate":0.1118828515,"count":13.0,"SE(A)":0.0125401884,"SE_x(A)":0.0112318191,"SE_pred(A)":0.0055769673}
{"model":"mistralai_mistral_7b_instruct_v0.3","pass1":0.1816363636,"pass@count":0.36,"win_rate":0.1047512526,"count":11.0,"SE(A)":0.012191989,"SE_x(A)":0.0097593337,"SE_pred(A)":0.0073075304}
{"model":"deepseek_v2_lite_chat","pass1":0.1737272727,"pass@count":0.398,"win_rate":0.1016972558,"count":11.0,"SE(A)":0.0119810729,"SE_x(A)":0.0087863902,"SE_pred(A)":0.0081452719}
{"model":"deepseek_r1_distill_qwen_14b","pass1":0.1609090909,"pass@count":0.392,"win_rate":0.0981750994,"count":11.0,"SE(A)":0.0116196969,"SE_x(A)":0.0081708512,"SE_pred(A)":0.0082616309}
{"model":"qwen3-1.7b","pass1":0.1523333333,"pass@count":0.286,"win_rate":0.0808262763,"count":12.0,"SE(A)":0.0113634453,"SE_x(A)":0.0097471368,"SE_pred(A)":0.0058413365}
{"model":"deepseek_r1_distill_qwen_7b","pass1":0.1476363636,"pass@count":0.357,"win_rate":0.0841757172,"count":11.0,"SE(A)":0.011217837,"SE_x(A)":0.0080861191,"SE_pred(A)":0.0077752521}
{"model":"qwen2.5-coder-1.5b-instruct","pass1":0.1411818182,"pass@count":0.385,"win_rate":0.0813997425,"count":11.0,"SE(A)":0.0110113356,"SE_x(A)":0.0070410784,"SE_pred(A)":0.0084659747}
{"model":"mistralai_mistral_7b_instruct_v0.2","pass1":0.1357,"pass@count":0.291,"win_rate":0.0755329181,"count":10.0,"SE(A)":0.0108298435,"SE_x(A)":0.0085014351,"SE_pred(A)":0.0067090321}
{"model":"google_gemma_2_9b_it","pass1":0.1061666667,"pass@count":0.226,"win_rate":0.055987822,"count":12.0,"SE(A)":0.0097414222,"SE_x(A)":0.0083067061,"SE_pred(A)":0.0050886088}
{"model":"llama-3.2-3B-instruct","pass1":0.1014117647,"pass@count":0.102,"win_rate":0.0518324385,"count":17.0,"SE(A)":0.0095460682,"SE_x(A)":0.009519071,"SE_pred(A)":0.0007174301}
{"model":"mistralai_mistral_7b_instruct_v0.1","pass1":0.0922727273,"pass@count":0.247,"win_rate":0.0457578285,"count":11.0,"SE(A)":0.0091519654,"SE_x(A)":0.0064845493,"SE_pred(A)":0.0064582576}
{"model":"qwen1.5-7b-chat","pass1":0.0866666667,"pass@count":0.262,"win_rate":0.0454425518,"count":12.0,"SE(A)":0.0088969408,"SE_x(A)":0.0061819222,"SE_pred(A)":0.0063983899}
{"model":"qwen3-0.6b","pass1":0.0741538462,"pass@count":0.196,"win_rate":0.0347532065,"count":13.0,"SE(A)":0.0082858345,"SE_x(A)":0.0063185666,"SE_pred(A)":0.0053601091}
{"model":"deepseek_r1_distill_llama_8b","pass1":0.0684615385,"pass@count":0.221,"win_rate":0.0371218649,"count":13.0,"SE(A)":0.0079858973,"SE_x(A)":0.0053892288,"SE_pred(A)":0.0058932817}
{"model":"qwen2-1.5b-instruct","pass1":0.0577692308,"pass@count":0.211,"win_rate":0.0268458194,"count":13.0,"SE(A)":0.0073778009,"SE_x(A)":0.0046749357,"SE_pred(A)":0.0057076197}
{"model":"google_gemma_7b_it","pass1":0.0475384615,"pass@count":0.088,"win_rate":0.0193915994,"count":13.0,"SE(A)":0.0067289343,"SE_x(A)":0.0060316764,"SE_pred(A)":0.002982857}
{"model":"llama-3.2-1B-instruct","pass1":0.0458333333,"pass@count":0.046,"win_rate":0.0215897498,"count":12.0,"SE(A)":0.0066130658,"SE_x(A)":0.0066016001,"SE_pred(A)":0.0003892495}
{"model":"qwen2.5-coder-0.5b-instruct","pass1":0.0454615385,"pass@count":0.173,"win_rate":0.0207421002,"count":13.0,"SE(A)":0.006587472,"SE_x(A)":0.0038443988,"SE_pred(A)":0.005349335}
{"model":"qwen2-math-1.5b-instruct","pass1":0.04075,"pass@count":0.106,"win_rate":0.0221038682,"count":4.0,"SE(A)":0.0062521546,"SE_x(A)":0.0034408484,"SE_pred(A)":0.0052201533}
{"model":"google_gemma_3_1b_it","pass1":0.0383076923,"pass@count":0.082,"win_rate":0.0165155313,"count":13.0,"SE(A)":0.0060696139,"SE_x(A)":0.0050364525,"SE_pred(A)":0.0033873823}
{"model":"qwen2-0.5b-instruct","pass1":0.022,"pass@count":0.097,"win_rate":0.0090596052,"count":13.0,"SE(A)":0.0046385343,"SE_x(A)":0.002736852,"SE_pred(A)":0.0037450822}
{"model":"deepseek_r1_distill_qwen_1.5b","pass1":0.0177692308,"pass@count":0.091,"win_rate":0.0097079103,"count":13.0,"SE(A)":0.0041777369,"SE_x(A)":0.0022111948,"SE_pred(A)":0.0035445878}
{"model":"qwen1.5-1.8b-chat","pass1":0.0136666667,"pass@count":0.079,"win_rate":0.0059983507,"count":12.0,"SE(A)":0.0036714968,"SE_x(A)":0.0017349905,"SE_pred(A)":0.0032356911}
{"model":"google_gemma_2b_it","pass1":0.0038461538,"pass@count":0.022,"win_rate":0.0016989426,"count":13.0,"SE(A)":0.0019573863,"SE_x(A)":0.0009597419,"SE_pred(A)":0.0017059474}
{"model":"qwen1.5-0.5b-chat","pass1":0.0035384615,"pass@count":0.024,"win_rate":0.001296567,"count":13.0,"SE(A)":0.0018777489,"SE_x(A)":0.0007073187,"SE_pred(A)":0.001739437}
