{"model":"qwen2.5-coder-32b-instruct","pass1":0.824375,"pass@count":0.9225,"win_rate":0.4442432701,"count":10.0,"SE(A)":0.0134527348,"SE_x(A)":0.0113599732,"SE_pred(A)":0.0072060449}
{"model":"qwen3-14b","pass1":0.7865625,"pass@count":0.92375,"win_rate":0.4125391174,"count":12.0,"SE(A)":0.0144862838,"SE_x(A)":0.0116110682,"SE_pred(A)":0.008662304}
{"model":"qwen2.5-coder-14b-instruct","pass1":0.7654166667,"pass@count":0.895,"win_rate":0.3945008217,"count":12.0,"SE(A)":0.0149814049,"SE_x(A)":0.0127340315,"SE_pred(A)":0.007892207}
{"model":"google_gemma_3_27b_it","pass1":0.7595833333,"pass@count":0.87,"win_rate":0.3893642969,"count":9.0,"SE(A)":0.0151086272,"SE_x(A)":0.0129809472,"SE_pred(A)":0.007730823}
{"model":"llama-3.1-70B-instruct","pass1":0.7025,"pass@count":0.7025,"win_rate":0.3457753858,"count":13.0,"SE(A)":0.0161629882,"SE_x(A)":0.0161629882,"SE_pred(A)":0.0}
{"model":"google_gemma_3_12b_it","pass1":0.6972727273,"pass@count":0.83625,"win_rate":0.3425179958,"count":11.0,"SE(A)":0.0162435938,"SE_x(A)":0.0138341154,"SE_pred(A)":0.0085130248}
{"model":"qwen3-32b","pass1":0.6946590909,"pass@count":0.84,"win_rate":0.3438566082,"count":11.0,"SE(A)":0.0162829604,"SE_x(A)":0.0136085126,"SE_pred(A)":0.0089410951}
{"model":"deepseek_r1_distill_llama_70b","pass1":0.665125,"pass@count":0.87,"win_rate":0.3233721245,"count":10.0,"SE(A)":0.0166858373,"SE_x(A)":0.012960831,"SE_pred(A)":0.0105087596}
{"model":"qwen2-72b-instruct","pass1":0.641,"pass@count":0.855,"win_rate":0.3048647719,"count":10.0,"SE(A)":0.0169602108,"SE_x(A)":0.0129586468,"SE_pred(A)":0.010941765}
{"model":"google_gemma_2_27b_it","pass1":0.627375,"pass@count":0.8175,"win_rate":0.2915844106,"count":10.0,"SE(A)":0.0170944293,"SE_x(A)":0.014056461,"SE_pred(A)":0.0097280736}
{"model":"mistralai_mixtral_8x22b_instruct_v0.1","pass1":0.6232954545,"pass@count":0.83,"win_rate":0.2882711057,"count":11.0,"SE(A)":0.0171317772,"SE_x(A)":0.013780052,"SE_pred(A)":0.0101787993}
{"model":"qwen2-math-72b-instruct","pass1":0.5945,"pass@count":0.82,"win_rate":0.2731851026,"count":10.0,"SE(A)":0.0173590664,"SE_x(A)":0.0133717039,"SE_pred(A)":0.0110695403}
{"model":"qwen3-4b","pass1":0.5738541667,"pass@count":0.775,"win_rate":0.2609948102,"count":12.0,"SE(A)":0.0174837625,"SE_x(A)":0.0143106313,"SE_pred(A)":0.0100442911}
{"model":"qwen2.5-coder-7b-instruct","pass1":0.5615909091,"pass@count":0.80625,"win_rate":0.2505452539,"count":11.0,"SE(A)":0.0175430385,"SE_x(A)":0.01384378,"SE_pred(A)":0.0107753401}
{"model":"google_gemma_3_4b_it","pass1":0.5452884615,"pass@count":0.745,"win_rate":0.2399355337,"count":13.0,"SE(A)":0.0176050048,"SE_x(A)":0.0147881777,"SE_pred(A)":0.0095522769}
{"model":"deepseek_r1_distill_qwen_14b","pass1":0.5447727273,"pass@count":0.8825,"win_rate":0.2670842257,"count":11.0,"SE(A)":0.0176066537,"SE_x(A)":0.0100096512,"SE_pred(A)":0.0144845137}
{"model":"deepseek_r1_distill_llama_8b","pass1":0.5232291667,"pass@count":0.78625,"win_rate":0.2278160528,"count":12.0,"SE(A)":0.0176585817,"SE_x(A)":0.0132814534,"SE_pred(A)":0.0116373753}
{"model":"qwen2.5-coder-3b-instruct","pass1":0.5010416667,"pass@count":0.79125,"win_rate":0.2130405793,"count":12.0,"SE(A)":0.0176776312,"SE_x(A)":0.0133817268,"SE_pred(A)":0.0115511052}
{"model":"google_gemma_2_9b_it","pass1":0.4860416667,"pass@count":0.69625,"win_rate":0.1991267643,"count":12.0,"SE(A)":0.0176707797,"SE_x(A)":0.0146936139,"SE_pred(A)":0.0098160158}
{"model":"qwen1.5-32b-chat","pass1":0.4831818182,"pass@count":0.755,"win_rate":0.2005298953,"count":11.0,"SE(A)":0.0176676664,"SE_x(A)":0.0135077362,"SE_pred(A)":0.011388042}
{"model":"qwen1.5-72b-chat","pass1":0.46575,"pass@count":0.75625,"win_rate":0.1962667311,"count":10.0,"SE(A)":0.0176361467,"SE_x(A)":0.012744511,"SE_pred(A)":0.0121906157}
{"model":"mistralai_mixtral_8x7b_instruct_v0.1","pass1":0.4569318182,"pass@count":0.73625,"win_rate":0.1853104529,"count":11.0,"SE(A)":0.0176119679,"SE_x(A)":0.0134780817,"SE_pred(A)":0.0113367865}
{"model":"llama-3.1-8B-instruct","pass1":0.4425,"pass@count":0.4425,"win_rate":0.1791968278,"count":15.0,"SE(A)":0.0175603869,"SE_x(A)":0.0175603869,"SE_pred(A)":0.0}
{"model":"mistralai_ministral_8b_instruct_2410","pass1":0.4423863636,"pass@count":0.74375,"win_rate":0.1768373523,"count":11.0,"SE(A)":0.0175599213,"SE_x(A)":0.0131349523,"SE_pred(A)":0.0116543496}
{"model":"mistralai_mathstral_7b_v0.1","pass1":0.4086363636,"pass@count":0.72375,"win_rate":0.157832812,"count":11.0,"SE(A)":0.0173800419,"SE_x(A)":0.0131908179,"SE_pred(A)":0.0113167213}
{"model":"qwen2-7b-instruct","pass1":0.3934090909,"pass@count":0.69375,"win_rate":0.1510828207,"count":11.0,"SE(A)":0.0172713049,"SE_x(A)":0.0129312964,"SE_pred(A)":0.0114489976}
{"model":"google_codegemma_1.1_7b_it","pass1":0.3821153846,"pass@count":0.67625,"win_rate":0.1477876431,"count":13.0,"SE(A)":0.0171793196,"SE_x(A)":0.013164882,"SE_pred(A)":0.0110369789}
{"model":"qwen1.5-14b-chat","pass1":0.3776041667,"pass@count":0.6875,"win_rate":0.1431828216,"count":12.0,"SE(A)":0.0171398388,"SE_x(A)":0.0128405476,"SE_pred(A)":0.0113531675}
{"model":"mistralai_mistral_7b_instruct_v0.3","pass1":0.3417045455,"pass@count":0.62375,"win_rate":0.1243091369,"count":11.0,"SE(A)":0.0167683686,"SE_x(A)":0.0125764392,"SE_pred(A)":0.0110910488}
{"model":"qwen3-1.7b","pass1":0.339375,"pass@count":0.74375,"win_rate":0.1482881746,"count":12.0,"SE(A)":0.0167406545,"SE_x(A)":0.0093853975,"SE_pred(A)":0.0138623168}
{"model":"qwen3-0.6b","pass1":0.3136538462,"pass@count":0.645,"win_rate":0.1198159326,"count":13.0,"SE(A)":0.0164040815,"SE_x(A)":0.0113255714,"SE_pred(A)":0.0118669845}
{"model":"deepseek_r1_distill_qwen_32b","pass1":0.305,"pass@count":0.75125,"win_rate":0.1336683608,"count":10.0,"SE(A)":0.0162778607,"SE_x(A)":0.0082812959,"SE_pred(A)":0.014013882}
{"model":"llama-3.2-3B-instruct","pass1":0.30125,"pass@count":0.30125,"win_rate":0.1047624901,"count":18.0,"SE(A)":0.016221068,"SE_x(A)":0.016221068,"SE_pred(A)":0.0}
{"model":"qwen3-8b","pass1":0.2952083333,"pass@count":0.625,"win_rate":0.1131648599,"count":12.0,"SE(A)":0.0161268554,"SE_x(A)":0.0110220892,"SE_pred(A)":0.0117723836}
{"model":"qwen2.5-coder-1.5b-instruct","pass1":0.2897916667,"pass@count":0.6825,"win_rate":0.1072538249,"count":12.0,"SE(A)":0.0160395003,"SE_x(A)":0.0102892561,"SE_pred(A)":0.0123043399}
{"model":"deepseek_v2_lite_chat","pass1":0.2805681818,"pass@count":0.59625,"win_rate":0.0999498995,"count":11.0,"SE(A)":0.0158843349,"SE_x(A)":0.0113697988,"SE_pred(A)":0.0110923295}
{"model":"google_gemma_3_1b_it","pass1":0.2751041667,"pass@count":0.52625,"win_rate":0.1034470549,"count":12.0,"SE(A)":0.0157885189,"SE_x(A)":0.0121250336,"SE_pred(A)":0.0101124127}
{"model":"qwen2-math-7b-instruct","pass1":0.2630208333,"pass@count":0.59125,"win_rate":0.0952544763,"count":12.0,"SE(A)":0.0155660237,"SE_x(A)":0.0104868449,"SE_pred(A)":0.0115033551}
{"model":"qwen1.5-7b-chat","pass1":0.261875,"pass@count":0.575,"win_rate":0.0922681959,"count":12.0,"SE(A)":0.0155441502,"SE_x(A)":0.0110408316,"SE_pred(A)":0.0109416929}
{"model":"google_gemma_7b_it","pass1":0.2553125,"pass@count":0.47125,"win_rate":0.092559834,"count":12.0,"SE(A)":0.0154162263,"SE_x(A)":0.0121428717,"SE_pred(A)":0.0094979314}
{"model":"mistralai_mistral_7b_instruct_v0.1","pass1":0.2361363636,"pass@count":0.54125,"win_rate":0.0825900006,"count":11.0,"SE(A)":0.0150156577,"SE_x(A)":0.0101137609,"SE_pred(A)":0.0110987305}
{"model":"mistralai_mistral_7b_instruct_v0.2","pass1":0.2272727273,"pass@count":0.59,"win_rate":0.0824038884,"count":11.0,"SE(A)":0.0148163691,"SE_x(A)":0.0086918077,"SE_pred(A)":0.011999053}
{"model":"qwen2-math-1.5b-instruct","pass1":0.21625,"pass@count":0.51375,"win_rate":0.07644741,"count":12.0,"SE(A)":0.0145553228,"SE_x(A)":0.0103087487,"SE_pred(A)":0.0102755594}
{"model":"qwen2.5-coder-0.5b-instruct","pass1":0.2048076923,"pass@count":0.53125,"win_rate":0.0757247755,"count":13.0,"SE(A)":0.0142680369,"SE_x(A)":0.0090200517,"SE_pred(A)":0.011055114}
{"model":"deepseek_r1_distill_qwen_7b","pass1":0.1932954545,"pass@count":0.58875,"win_rate":0.0744908268,"count":11.0,"SE(A)":0.0139612106,"SE_x(A)":0.0071051801,"SE_pred(A)":0.012017979}
{"model":"deepseek_r1_distill_qwen_1.5b","pass1":0.1411458333,"pass@count":0.51625,"win_rate":0.0494191168,"count":12.0,"SE(A)":0.0123097363,"SE_x(A)":0.0059334828,"SE_pred(A)":0.0107853322}
{"model":"google_gemma_2b_it","pass1":0.1253846154,"pass@count":0.37875,"win_rate":0.0460780239,"count":13.0,"SE(A)":0.0117080802,"SE_x(A)":0.0072886772,"SE_pred(A)":0.0091626594}
{"model":"qwen2-1.5b-instruct","pass1":0.1251041667,"pass@count":0.43375,"win_rate":0.0411704804,"count":12.0,"SE(A)":0.011696854,"SE_x(A)":0.0063767905,"SE_pred(A)":0.0098057603}
{"model":"llama-3.2-1B-instruct","pass1":0.11375,"pass@count":0.11375,"win_rate":0.037674748,"count":21.0,"SE(A)":0.0112255811,"SE_x(A)":0.0112255811,"SE_pred(A)":0.0}
{"model":"qwen1.5-0.5b-chat","pass1":0.0623076923,"pass@count":0.28625,"win_rate":0.0232064981,"count":13.0,"SE(A)":0.0085458648,"SE_x(A)":0.0042853631,"SE_pred(A)":0.0073937452}
{"model":"qwen2-0.5b-instruct","pass1":0.0183653846,"pass@count":0.1625,"win_rate":0.0066559583,"count":13.0,"SE(A)":0.0047471172,"SE_x(A)":0.0011494256,"SE_pred(A)":0.0046058596}
{"model":"qwen1.5-1.8b-chat","pass1":0.0116666667,"pass@count":0.10375,"win_rate":0.0037769865,"count":12.0,"SE(A)":0.0037964713,"SE_x(A)":0.0010067637,"SE_pred(A)":0.0036605493}
