{"model":"qwen3-32b","pass1":0.8944207792,"pass@count":0.956,"win_rate":0.4064475846,"count":11.0,"SE(A)":0.0058599488,"SE_x(A)":null,"SE_pred(A)":null}
{"model":"google_gemma_3_27b_it","pass1":0.8922545455,"pass@count":0.9418181818,"win_rate":0.4054006998,"count":10.0,"SE(A)":0.0059125866,"SE_x(A)":0.0051904768,"SE_pred(A)":0.0028315421}
{"model":"deepseek_r1_distill_llama_70b","pass1":0.8669090909,"pass@count":0.956,"win_rate":0.3898299941,"count":10.9090909091,"SE(A)":0.0064773092,"SE_x(A)":null,"SE_pred(A)":null}
{"model":"google_gemma_3_12b_it","pass1":0.8627768595,"pass@count":0.9370909091,"win_rate":0.3829251163,"count":12.0,"SE(A)":0.0065614009,"SE_x(A)":null,"SE_pred(A)":null}
{"model":"llama-3.1-70B-instruct","pass1":0.8607272727,"pass@count":0.8607272727,"win_rate":0.3862264264,"count":12.0,"SE(A)":0.0066023642,"SE_x(A)":0.0066023642,"SE_pred(A)":0.0}
{"model":"qwen3-14b","pass1":0.8438347107,"pass@count":0.9341818182,"win_rate":0.3673968025,"count":11.0,"SE(A)":0.0069223667,"SE_x(A)":0.0058207383,"SE_pred(A)":0.0037467542}
{"model":"google_gemma_2_27b_it","pass1":0.8393818182,"pass@count":0.9290909091,"win_rate":0.3680911484,"count":10.0,"SE(A)":0.0070018175,"SE_x(A)":0.0057929941,"SE_pred(A)":0.0039327683}
{"model":"qwen2-72b-instruct","pass1":0.8059636364,"pass@count":0.9109090909,"win_rate":0.3398808804,"count":10.9090909091,"SE(A)":0.0075410694,"SE_x(A)":null,"SE_pred(A)":null}
{"model":"qwen3-8b","pass1":0.8045950413,"pass@count":0.9156363636,"win_rate":0.3391733512,"count":11.0,"SE(A)":0.0075611894,"SE_x(A)":0.0063577884,"SE_pred(A)":0.0040926902}
{"model":"deepseek_r1_distill_qwen_32b","pass1":0.8031454545,"pass@count":0.9207272727,"win_rate":0.3390002471,"count":10.9090909091,"SE(A)":0.0075823439,"SE_x(A)":null,"SE_pred(A)":null}
{"model":"qwen2.5-coder-32b-instruct","pass1":0.7943272727,"pass@count":0.9112727273,"win_rate":0.3321633878,"count":10.9090909091,"SE(A)":0.0077076457,"SE_x(A)":null,"SE_pred(A)":null}
{"model":"qwen2-math-72b-instruct","pass1":0.7904,"pass@count":0.9225454545,"win_rate":0.3322826353,"count":10.9090909091,"SE(A)":0.0077616268,"SE_x(A)":null,"SE_pred(A)":null}
{"model":"google_gemma_2_9b_it","pass1":0.7903140496,"pass@count":0.9127272727,"win_rate":0.3342909118,"count":11.0,"SE(A)":0.0077627959,"SE_x(A)":0.0063759145,"SE_pred(A)":0.0044281729}
{"model":"deepseek_r1_distill_qwen_14b","pass1":0.7462252964,"pass@count":0.9003636364,"win_rate":0.3014641944,"count":13.0,"SE(A)":0.0082983701,"SE_x(A)":null,"SE_pred(A)":null}
{"model":"qwen3-4b","pass1":0.7443030303,"pass@count":0.8716363636,"win_rate":0.2980523808,"count":12.0,"SE(A)":0.0083190041,"SE_x(A)":0.0071611775,"SE_pred(A)":0.0042335996}
{"model":"google_gemma_3_4b_it","pass1":0.7180979021,"pass@count":0.9007272727,"win_rate":0.2905293415,"count":13.0,"SE(A)":0.0085797501,"SE_x(A)":0.0067850407,"SE_pred(A)":0.0052512221}
{"model":"mistralai_mixtral_8x22b_instruct_v0.1","pass1":0.7168323232,"pass@count":0.8952727273,"win_rate":0.2853956277,"count":10.0,"SE(A)":0.0085914068,"SE_x(A)":null,"SE_pred(A)":null}
{"model":"qwen2.5-coder-14b-instruct","pass1":0.6879008264,"pass@count":0.8803636364,"win_rate":0.2673753122,"count":11.0,"SE(A)":0.008835739,"SE_x(A)":0.0068569338,"SE_pred(A)":0.0055724987}
{"model":"qwen1.5-72b-chat","pass1":0.6529090909,"pass@count":0.8603636364,"win_rate":0.2463046214,"count":10.9090909091,"SE(A)":0.0090778213,"SE_x(A)":null,"SE_pred(A)":null}
{"model":"mistralai_ministral_8b_instruct_2410","pass1":0.6457355372,"pass@count":0.8796363636,"win_rate":0.2417226816,"count":12.0,"SE(A)":0.009120629,"SE_x(A)":null,"SE_pred(A)":null}
{"model":"llama-3.1-8B-instruct","pass1":0.6421818182,"pass@count":0.6421818182,"win_rate":0.2505234755,"count":15.0,"SE(A)":0.0091410031,"SE_x(A)":0.0091410031,"SE_pred(A)":0.0}
{"model":"deepseek_r1_distill_qwen_7b","pass1":0.6167957839,"pass@count":0.8410909091,"win_rate":0.2263552374,"count":13.0,"SE(A)":0.0092708486,"SE_x(A)":null,"SE_pred(A)":null}
{"model":"mistralai_mathstral_7b_v0.1","pass1":0.6009631094,"pass@count":0.8992727273,"win_rate":0.2261769579,"count":13.0,"SE(A)":0.0093382196,"SE_x(A)":null,"SE_pred(A)":null}
{"model":"qwen1.5-32b-chat","pass1":0.5915252525,"pass@count":0.8207272727,"win_rate":0.2168029786,"count":10.0,"SE(A)":0.0093735242,"SE_x(A)":null,"SE_pred(A)":null}
{"model":"qwen2.5-coder-7b-instruct","pass1":0.5730645586,"pass@count":0.8298181818,"win_rate":0.2008060888,"count":13.0,"SE(A)":0.0094322767,"SE_x(A)":null,"SE_pred(A)":null}
{"model":"qwen2-7b-instruct","pass1":0.5698708827,"pass@count":0.8407272727,"win_rate":0.2031400638,"count":13.0,"SE(A)":0.009441072,"SE_x(A)":null,"SE_pred(A)":null}
{"model":"qwen3-1.7b","pass1":0.5464545455,"pass@count":0.7654545455,"win_rate":0.1888640496,"count":12.0,"SE(A)":0.0094933848,"SE_x(A)":0.0077938398,"SE_pred(A)":0.0054203704}
{"model":"llama-3.2-3B-instruct","pass1":0.5105454545,"pass@count":0.5105454545,"win_rate":0.1825251251,"count":18.0,"SE(A)":0.009532505,"SE_x(A)":0.009532505,"SE_pred(A)":0.0}
{"model":"qwen2-math-7b-instruct","pass1":0.5075454545,"pass@count":0.7756363636,"win_rate":0.1699270193,"count":12.0,"SE(A)":0.0095335401,"SE_x(A)":0.0074300028,"SE_pred(A)":0.0059735623}
{"model":"qwen1.5-14b-chat","pass1":0.4675041322,"pass@count":0.7778181818,"win_rate":0.1541060645,"count":11.0,"SE(A)":0.0095144678,"SE_x(A)":0.006933889,"SE_pred(A)":0.006515081}
{"model":"mistralai_mixtral_8x7b_instruct_v0.1","pass1":0.4525090909,"pass@count":0.7734545455,"win_rate":0.1543245266,"count":10.0,"SE(A)":0.0094915199,"SE_x(A)":0.006692489,"SE_pred(A)":0.0067304934}
{"model":"deepseek_r1_distill_llama_8b","pass1":0.4476363636,"pass@count":0.7672727273,"win_rate":0.145556092,"count":12.0,"SE(A)":0.0094821948,"SE_x(A)":0.0070394169,"SE_pred(A)":0.0063528441}
{"model":"google_codegemma_1.1_7b_it","pass1":0.4147412587,"pass@count":0.7647272727,"win_rate":0.1394973308,"count":13.0,"SE(A)":0.009394988,"SE_x(A)":0.0068307158,"SE_pred(A)":0.0064503581}
{"model":"deepseek_v2_lite_chat","pass1":0.4023115364,"pass@count":0.7509090909,"win_rate":0.1298301383,"count":9.9090909091,"SE(A)":0.0093508768,"SE_x(A)":null,"SE_pred(A)":null}
{"model":"qwen2.5-coder-3b-instruct","pass1":0.402030303,"pass@count":0.7429090909,"win_rate":0.125357105,"count":12.0,"SE(A)":0.0093498068,"SE_x(A)":0.0065720994,"SE_pred(A)":0.0066502929}
{"model":"deepseek_r1_distill_qwen_1.5b","pass1":0.3336969697,"pass@count":0.6472727273,"win_rate":0.0969679071,"count":12.0,"SE(A)":0.0089917801,"SE_x(A)":0.0068217341,"SE_pred(A)":0.0058579905}
{"model":"qwen1.5-7b-chat","pass1":0.319338843,"pass@count":0.6607272727,"win_rate":0.0927207931,"count":11.0,"SE(A)":0.0088904759,"SE_x(A)":0.0062279145,"SE_pred(A)":0.0063445759}
{"model":"qwen2-math-1.5b-instruct","pass1":0.3120606061,"pass@count":0.5618181818,"win_rate":0.0890977266,"count":12.0,"SE(A)":0.0088354407,"SE_x(A)":0.0070789897,"SE_pred(A)":0.0052870519}
{"model":"mistralai_mistral_7b_instruct_v0.3","pass1":0.2909538867,"pass@count":0.6650909091,"win_rate":0.0862968624,"count":13.0,"SE(A)":0.0086612979,"SE_x(A)":null,"SE_pred(A)":null}
{"model":"google_gemma_3_1b_it","pass1":0.2637878788,"pass@count":0.5821818182,"win_rate":0.0753346476,"count":12.0,"SE(A)":0.0084035454,"SE_x(A)":0.0062317696,"SE_pred(A)":0.0056377853}
{"model":"mistralai_mistral_7b_instruct_v0.2","pass1":0.2463570487,"pass@count":0.6294545455,"win_rate":0.0734063755,"count":13.0,"SE(A)":0.0082167291,"SE_x(A)":null,"SE_pred(A)":null}
{"model":"qwen2.5-coder-1.5b-instruct","pass1":0.2309393939,"pass@count":0.5774545455,"win_rate":0.0615968971,"count":12.0,"SE(A)":0.0080364259,"SE_x(A)":0.0052946367,"SE_pred(A)":0.0060457393}
{"model":"qwen3-0.6b","pass1":0.2283636364,"pass@count":0.5225454545,"win_rate":0.0632221834,"count":13.0,"SE(A)":0.008004855,"SE_x(A)":0.0058700826,"SE_pred(A)":0.0054424107}
{"model":"llama-3.2-1B-instruct","pass1":0.1949090909,"pass@count":0.1949090909,"win_rate":0.0583325243,"count":8.0,"SE(A)":0.0075539162,"SE_x(A)":0.0075539162,"SE_pred(A)":0.0}
{"model":"google_gemma_7b_it","pass1":0.1814848485,"pass@count":0.4523636364,"win_rate":0.0530510507,"count":12.0,"SE(A)":0.0073496592,"SE_x(A)":0.0053639828,"SE_pred(A)":0.0050244581}
{"model":"mistralai_mistral_7b_instruct_v0.1","pass1":0.1767852437,"pass@count":0.5061818182,"win_rate":0.0471982287,"count":13.0,"SE(A)":0.0072746689,"SE_x(A)":null,"SE_pred(A)":null}
{"model":"qwen2-1.5b-instruct","pass1":0.1461818182,"pass@count":0.4709090909,"win_rate":0.036776428,"count":12.0,"SE(A)":0.0067369455,"SE_x(A)":0.0041765457,"SE_pred(A)":0.0052861044}
{"model":"google_gemma_2b_it","pass1":0.054969697,"pass@count":0.2229090909,"win_rate":0.0178925795,"count":12.0,"SE(A)":0.0043462849,"SE_x(A)":0.002508415,"SE_pred(A)":0.0035493727}
{"model":"qwen1.5-1.8b-chat","pass1":0.0538181818,"pass@count":0.2694545455,"win_rate":0.0138057788,"count":11.0,"SE(A)":0.0043031399,"SE_x(A)":0.0020470323,"SE_pred(A)":0.003785059}
{"model":"qwen2-0.5b-instruct","pass1":0.0505454545,"pass@count":0.2854545455,"win_rate":0.0132909072,"count":13.0,"SE(A)":0.0041774551,"SE_x(A)":0.0019737571,"SE_pred(A)":0.0036817678}
{"model":"qwen2.5-coder-0.5b-instruct","pass1":0.0488671329,"pass@count":0.2443636364,"win_rate":0.0130967385,"count":13.0,"SE(A)":0.0041111439,"SE_x(A)":0.0021460845,"SE_pred(A)":0.0035065404}
{"model":"qwen1.5-0.5b-chat","pass1":0.0220699301,"pass@count":0.176,"win_rate":0.0068963164,"count":13.0,"SE(A)":0.0028014833,"SE_x(A)":0.0009348527,"SE_pred(A)":0.0026409012}
