{"model":"qwen3-14b","pass1":0.7218333333,"pass@count":0.8,"win_rate":0.3139846251,"count":12.0,"SE(A)":0.0200394597,"SE_x(A)":0.0187771122,"SE_pred(A)":0.007}
{"model":"qwen2.5-coder-14b-instruct","pass1":0.7213333333,"pass@count":0.882,"win_rate":0.328655235,"count":12.0,"SE(A)":0.020050514,"SE_x(A)":0.0162730142,"SE_pred(A)":0.0117137578}
{"model":"google_gemma_3_12b_it","pass1":0.6930909091,"pass@count":0.766,"win_rate":0.293196908,"count":11.0,"SE(A)":0.0206259982,"SE_x(A)":0.0192980211,"SE_pred(A)":0.0072813585}
{"model":"deepseek_r1_distill_llama_70b","pass1":0.6632727273,"pass@count":0.812,"win_rate":0.2674417785,"count":11.0,"SE(A)":0.0211349008,"SE_x(A)":0.0182943716,"SE_pred(A)":0.0105830052}
{"model":"qwen3-8b","pass1":0.6611666667,"pass@count":0.768,"win_rate":0.2705070907,"count":12.0,"SE(A)":0.021167206,"SE_x(A)":0.0192658201,"SE_pred(A)":0.008768055}
{"model":"qwen2.5-coder-7b-instruct","pass1":0.6484,"pass@count":0.858,"win_rate":0.2787730454,"count":10.0,"SE(A)":0.0213531,"SE_x(A)":0.0164127116,"SE_pred(A)":0.0136593476}
{"model":"qwen3-4b","pass1":0.6413333333,"pass@count":0.748,"win_rate":0.2546569479,"count":12.0,"SE(A)":0.021448771,"SE_x(A)":0.0197864682,"SE_pred(A)":0.0082792182}
{"model":"mistralai_mixtral_8x22b_instruct_v0.1","pass1":0.6387272727,"pass@count":0.802,"win_rate":0.2516631103,"count":11.0,"SE(A)":0.0214827719,"SE_x(A)":0.0179509543,"SE_pred(A)":0.0118013867}
{"model":"google_gemma_3_4b_it","pass1":0.5995384615,"pass@count":0.698,"win_rate":0.2260346971,"count":13.0,"SE(A)":0.0219131054,"SE_x(A)":0.0203704055,"SE_pred(A)":0.0080765568}
{"model":"deepseek_r1_distill_qwen_14b","pass1":0.5705454545,"pass@count":0.776,"win_rate":0.2112776922,"count":11.0,"SE(A)":0.0221369979,"SE_x(A)":0.0178745971,"SE_pred(A)":0.0130593053}
{"model":"llama-3.1-8B-instruct","pass1":0.562,"pass@count":0.562,"win_rate":0.1991628411,"count":15.0,"SE(A)":0.0221881049,"SE_x(A)":0.0221881049,"SE_pred(A)":0.0}
{"model":"qwen2.5-coder-3b-instruct","pass1":0.5503333333,"pass@count":0.814,"win_rate":0.2063294786,"count":12.0,"SE(A)":0.0222470922,"SE_x(A)":0.0168278203,"SE_pred(A)":0.0145518925}
{"model":"mistralai_mixtral_8x7b_instruct_v0.1","pass1":0.521,"pass@count":0.718,"win_rate":0.1769818485,"count":12.0,"SE(A)":0.022340949,"SE_x(A)":0.0188590417,"SE_pred(A)":0.0119772512}
{"model":"mistralai_ministral_8b_instruct_2410","pass1":0.5201818182,"pass@count":0.736,"win_rate":0.1734620601,"count":11.0,"SE(A)":0.0223424571,"SE_x(A)":0.0182164742,"SE_pred(A)":0.0129362071}
{"model":"qwen2-7b-instruct","pass1":0.5107272727,"pass@count":0.726,"win_rate":0.1720056879,"count":11.0,"SE(A)":0.0223555329,"SE_x(A)":0.0183953264,"SE_pred(A)":0.0127036144}
{"model":"llama-3.2-3B-instruct","pass1":0.488,"pass@count":0.488,"win_rate":0.156863321,"count":15.0,"SE(A)":0.022354239,"SE_x(A)":0.022354239,"SE_pred(A)":0.0}
{"model":"mistralai_mathstral_7b_v0.1","pass1":0.4870909091,"pass@count":0.75,"win_rate":0.1573797845,"count":11.0,"SE(A)":0.022353226,"SE_x(A)":0.0176169697,"SE_pred(A)":0.013758964}
{"model":"qwen3-1.7b","pass1":0.4811666667,"pass@count":0.626,"win_rate":0.1565352964,"count":12.0,"SE(A)":0.0223448117,"SE_x(A)":0.0200087785,"SE_pred(A)":0.0099468283}
{"model":"deepseek_v2_lite_chat","pass1":0.4450909091,"pass@count":0.678,"win_rate":0.1364124596,"count":11.0,"SE(A)":0.0222254355,"SE_x(A)":0.0178419063,"SE_pred(A)":0.013252787}
{"model":"qwen2.5-coder-1.5b-instruct","pass1":0.4370909091,"pass@count":0.722,"win_rate":0.1375447228,"count":11.0,"SE(A)":0.0221829866,"SE_x(A)":0.0166354873,"SE_pred(A)":0.0146746535}
{"model":"qwen1.5-14b-chat","pass1":0.4018333333,"pass@count":0.622,"win_rate":0.1139876965,"count":12.0,"SE(A)":0.0219254786,"SE_x(A)":0.0184185168,"SE_pred(A)":0.0118947404}
{"model":"deepseek_r1_distill_llama_8b","pass1":0.395,"pass@count":0.666,"win_rate":0.1170269505,"count":12.0,"SE(A)":0.0218620676,"SE_x(A)":0.0173662421,"SE_pred(A)":0.0132801972}
{"model":"mistralai_mistral_7b_instruct_v0.3","pass1":0.3923636364,"pass@count":0.616,"win_rate":0.1067025803,"count":11.0,"SE(A)":0.0218364106,"SE_x(A)":0.0182356222,"SE_pred(A)":0.0120121151}
{"model":"deepseek_r1_distill_qwen_7b","pass1":0.3916363636,"pass@count":0.682,"win_rate":0.1200138271,"count":11.0,"SE(A)":0.0218292154,"SE_x(A)":0.0164572755,"SE_pred(A)":0.0143412945}
{"model":"mistralai_mistral_7b_instruct_v0.2","pass1":0.3638,"pass@count":0.58,"win_rate":0.0997984087,"count":10.0,"SE(A)":0.0215150905,"SE_x(A)":0.0175679382,"SE_pred(A)":0.0124204133}
{"model":"qwen1.5-7b-chat","pass1":0.3483333333,"pass@count":0.562,"win_rate":0.0905779989,"count":12.0,"SE(A)":0.0213071454,"SE_x(A)":0.0175773758,"SE_pred(A)":0.0120428528}
{"model":"llama-3.2-1B-instruct","pass1":0.32,"pass@count":0.32,"win_rate":0.0818372275,"count":11.0,"SE(A)":0.0208614477,"SE_x(A)":0.0208614477,"SE_pred(A)":0.0}
{"model":"qwen2.5-coder-0.5b-instruct","pass1":0.3186153846,"pass@count":0.608,"win_rate":0.0836904487,"count":13.0,"SE(A)":0.0208374481,"SE_x(A)":0.0158888531,"SE_pred(A)":0.013481231}
{"model":"mistralai_mistral_7b_instruct_v0.1","pass1":0.3052727273,"pass@count":0.586,"win_rate":0.0753615279,"count":11.0,"SE(A)":0.0205952077,"SE_x(A)":0.015499526,"SE_pred(A)":0.0135619789}
{"model":"qwen3-0.6b","pass1":0.2758461538,"pass@count":0.516,"win_rate":0.0697381725,"count":13.0,"SE(A)":0.0199877489,"SE_x(A)":0.0159220007,"SE_pred(A)":0.012083046}
{"model":"qwen2-1.5b-instruct","pass1":0.2349230769,"pass@count":0.564,"win_rate":0.0526585403,"count":13.0,"SE(A)":0.0189596532,"SE_x(A)":0.0130317851,"SE_pred(A)":0.0137710212}
{"model":"deepseek_r1_distill_qwen_1.5b","pass1":0.1461666667,"pass@count":0.388,"win_rate":0.0319757873,"count":12.0,"SE(A)":0.015798859,"SE_x(A)":0.0112626093,"SE_pred(A)":0.0110796018}
{"model":"qwen2-0.5b-instruct","pass1":0.1298461538,"pass@count":0.386,"win_rate":0.0230337568,"count":13.0,"SE(A)":0.0150323737,"SE_x(A)":0.009736158,"SE_pred(A)":0.0114533614}
{"model":"qwen1.5-1.8b-chat","pass1":0.1263636364,"pass@count":0.336,"win_rate":0.0231037975,"count":11.0,"SE(A)":0.0148590624,"SE_x(A)":0.0101940673,"SE_pred(A)":0.010810769}
{"model":"qwen1.5-0.5b-chat","pass1":0.0416923077,"pass@count":0.174,"win_rate":0.0057718872,"count":13.0,"SE(A)":0.0089391341,"SE_x(A)":0.0053885363,"SE_pred(A)":0.0071324466}
