| qwen3-32b |
81.4 |
85.3 |
34.3 |
2 |
0.38 |
0.33 |
0.19 |
| qwen3-14b |
81.1 |
85.2 |
34 |
3 |
0.38 |
0.34 |
0.17 |
| qwen3-8b |
78.7 |
84.6 |
32.4 |
3 |
0.4 |
0.35 |
0.2 |
| llama-3.1-70B-instruct |
77.3 |
77.3 |
32 |
4 |
0.41 |
0.41 |
0 |
| deepseek_r1_distill_llama_70b |
76.2 |
76.2 |
31.4 |
1 |
0.41 |
NaN |
NaN |
| google_gemma_3_27b_it |
75.1 |
79.1 |
30.1 |
3 |
0.42 |
0.39 |
0.16 |
| qwen2-72b-instruct |
74.4 |
81.9 |
29.9 |
2 |
0.42 |
0.33 |
0.27 |
| google_gemma_3_12b_it |
74.2 |
79.9 |
29.5 |
3 |
0.43 |
0.38 |
0.19 |
| google_gemma_2_27b_it |
74 |
74 |
29.5 |
1 |
0.43 |
NaN |
NaN |
| qwen3-4b |
73.7 |
79.6 |
29.2 |
3 |
0.43 |
0.38 |
0.2 |
| qwen2.5-coder-32b-instruct |
73.5 |
79 |
29.2 |
2 |
0.43 |
0.36 |
0.23 |
| deepseek_r1_distill_qwen_14b |
71 |
85.4 |
28.9 |
3 |
0.44 |
0.31 |
0.32 |
| google_gemma_2_9b_it |
70.1 |
80.1 |
27.2 |
3 |
0.45 |
0.37 |
0.25 |
| deepseek_r1_distill_qwen_32b |
70.1 |
70.1 |
29.1 |
1 |
0.45 |
NaN |
NaN |
| deepseek_r1_distill_qwen_7b |
68.6 |
83.4 |
27.2 |
3 |
0.45 |
0.32 |
0.32 |
| qwen2.5-coder-14b-instruct |
66.8 |
81.9 |
25.5 |
3 |
0.46 |
0.33 |
0.32 |
| qwen2-math-72b-instruct |
66.3 |
77.9 |
25.7 |
2 |
0.46 |
0.32 |
0.33 |
| qwen1.5-72b-chat |
65.7 |
75.5 |
25 |
2 |
0.46 |
0.35 |
0.3 |
| qwen1.5-32b-chat |
64.8 |
75.9 |
24.6 |
2 |
0.46 |
0.33 |
0.32 |
| google_gemma_3_4b_it |
64.1 |
74.6 |
23.4 |
4 |
0.47 |
0.4 |
0.24 |
| mistralai_mixtral_8x22b_instruct_v0.1 |
63.2 |
76.3 |
24.1 |
2 |
0.47 |
0.31 |
0.35 |
| qwen2-math-7b-instruct |
62 |
77.8 |
22.9 |
3 |
0.47 |
0.35 |
0.32 |
| deepseek_r1_distill_llama_8b |
62 |
83.8 |
24.3 |
4 |
0.47 |
0.3 |
0.36 |
| llama-3.1-8B-instruct |
60.6 |
60.6 |
22.7 |
7 |
0.48 |
0.48 |
0 |
| mistralai_ministral_8b_instruct_2410 |
59.7 |
77.6 |
21.8 |
3 |
0.48 |
0.33 |
0.34 |
| qwen2-7b-instruct |
59.2 |
77.6 |
21.6 |
3 |
0.48 |
0.33 |
0.35 |
| qwen3-1.7b |
58.6 |
69.4 |
21 |
3 |
0.48 |
0.4 |
0.26 |
| qwen2-math-1.5b-instruct |
56.8 |
73.3 |
20.2 |
3 |
0.48 |
0.36 |
0.33 |
| qwen1.5-14b-chat |
53.6 |
71.3 |
18.7 |
3 |
0.49 |
0.35 |
0.34 |
| qwen2.5-coder-7b-instruct |
51.9 |
73.8 |
18.4 |
3 |
0.49 |
0.31 |
0.38 |
| llama-3.2-3B-instruct |
49.6 |
49.6 |
17.2 |
10 |
0.49 |
0.49 |
0 |
| deepseek_r1_distill_qwen_1.5b |
49.6 |
76.8 |
18.8 |
4 |
0.49 |
0.29 |
0.39 |
| mistralai_mathstral_7b_v0.1 |
47.5 |
70.6 |
16.4 |
3 |
0.49 |
0.3 |
0.38 |
| deepseek_v2_lite_chat |
46.7 |
59.5 |
15.6 |
2 |
0.49 |
0.34 |
0.35 |
| mistralai_mixtral_8x7b_instruct_v0.1 |
44.8 |
66.7 |
15.3 |
3 |
0.48 |
0.31 |
0.37 |
| qwen2.5-coder-3b-instruct |
40.3 |
62.7 |
13.1 |
3 |
0.48 |
0.3 |
0.37 |
| qwen1.5-7b-chat |
40.2 |
61.1 |
13.1 |
3 |
0.48 |
0.32 |
0.36 |
| google_codegemma_1.1_7b_it |
33.5 |
56.6 |
10 |
4 |
0.46 |
0.31 |
0.34 |
| mistralai_mistral_7b_instruct_v0.3 |
32.4 |
54.1 |
10.4 |
3 |
0.46 |
0.28 |
0.36 |
| google_gemma_3_1b_it |
28.9 |
45.6 |
8.36 |
4 |
0.44 |
0.34 |
0.28 |
| qwen3-0.6b |
28 |
49.1 |
8.4 |
4 |
0.44 |
0.3 |
0.32 |
| mistralai_mistral_7b_instruct_v0.2 |
27.9 |
47 |
8.92 |
3 |
0.44 |
0.28 |
0.34 |
| qwen2.5-coder-1.5b-instruct |
26.5 |
47.3 |
8.12 |
3 |
0.43 |
0.25 |
0.35 |
| llama-3.2-1B-instruct |
19 |
19 |
5.62 |
12 |
0.38 |
0.38 |
0 |
| google_gemma_7b_it |
18.5 |
35.8 |
5.3 |
4 |
0.38 |
0.26 |
0.28 |
| mistralai_mistral_7b_instruct_v0.1 |
16 |
32.9 |
5.07 |
3 |
0.36 |
0.18 |
0.31 |
| qwen2-1.5b-instruct |
15.2 |
32.5 |
5.14 |
3 |
0.35 |
0.16 |
0.31 |
| qwen1.5-1.8b-chat |
11.6 |
24.5 |
4.81 |
3 |
0.31 |
0.16 |
0.27 |
| qwen2.5-coder-0.5b-instruct |
7.1 |
20.8 |
2.39 |
4 |
0.25 |
0.11 |
0.23 |
| qwen2-0.5b-instruct |
6.61 |
20.3 |
2.56 |
4 |
0.24 |
0.087 |
0.23 |
| google_gemma_2b_it |
6.2 |
15.3 |
1.81 |
4 |
0.23 |
0.14 |
0.19 |
| qwen1.5-0.5b-chat |
5.41 |
14.5 |
3.2 |
4 |
0.22 |
0.11 |
0.19 |