| qwen3-32b |
70 |
85.5 |
36.4 |
9 |
0.42 |
0.35 |
0.23 |
| qwen3-14b |
67.3 |
81.6 |
34.4 |
10 |
0.43 |
0.37 |
0.21 |
| llama-3.1-70B-instruct |
63.9 |
63.9 |
32.3 |
12 |
0.44 |
0.44 |
0 |
| qwen3-8b |
62.6 |
78.2 |
31 |
10 |
0.44 |
0.38 |
0.22 |
| qwen2-72b-instruct |
62.2 |
78.1 |
31 |
4 |
0.44 |
0.35 |
0.27 |
| qwen2.5-coder-32b-instruct |
60.5 |
80 |
29.5 |
8 |
0.45 |
0.36 |
0.26 |
| google_gemma_3_12b_it |
58.7 |
77.6 |
28.2 |
11 |
0.45 |
0.38 |
0.23 |
| deepseek_r1_distill_llama_70b |
57.7 |
75.8 |
27.6 |
9 |
0.45 |
0.37 |
0.25 |
| qwen3-4b |
57.6 |
74.4 |
27.8 |
11 |
0.45 |
0.39 |
0.23 |
| mistralai_mixtral_8x22b_instruct_v0.1 |
51.8 |
82.2 |
24 |
9 |
0.46 |
0.34 |
0.31 |
| deepseek_r1_distill_qwen_32b |
51.5 |
73.5 |
23.6 |
8 |
0.46 |
0.36 |
0.28 |
| qwen2-math-72b-instruct |
51.1 |
79.8 |
23.8 |
8 |
0.46 |
0.34 |
0.31 |
| deepseek_r1_distill_qwen_14b |
48.1 |
75.5 |
21.5 |
12 |
0.46 |
0.35 |
0.29 |
| qwen1.5-72b-chat |
47.5 |
47.5 |
21.7 |
1 |
0.46 |
NaN |
NaN |
| qwen2.5-coder-14b-instruct |
47.4 |
81.9 |
21.3 |
10 |
0.46 |
0.33 |
0.32 |
| qwen1.5-32b-chat |
46.1 |
75 |
20.6 |
8 |
0.45 |
0.34 |
0.31 |
| llama-3.1-8B-instruct |
45 |
45 |
19.9 |
15 |
0.45 |
0.45 |
0 |
| qwen2-7b-instruct |
44.2 |
78.1 |
19.5 |
12 |
0.45 |
0.34 |
0.3 |
| qwen3-1.7b |
42.9 |
68 |
19.2 |
12 |
0.45 |
0.37 |
0.26 |
| mistralai_mixtral_8x7b_instruct_v0.1 |
42.3 |
75.9 |
18.8 |
10 |
0.45 |
0.33 |
0.31 |
| google_gemma_3_4b_it |
41.5 |
66.2 |
17.9 |
13 |
0.45 |
0.38 |
0.25 |
| mistralai_ministral_8b_instruct_2410 |
39.2 |
78.4 |
16.7 |
11 |
0.45 |
0.31 |
0.32 |
| qwen1.5-14b-chat |
37.9 |
69.6 |
16 |
10 |
0.44 |
0.33 |
0.29 |
| qwen2.5-coder-7b-instruct |
37.7 |
78.4 |
16 |
12 |
0.44 |
0.3 |
0.32 |
| mistralai_mathstral_7b_v0.1 |
36.7 |
79.2 |
15.7 |
12 |
0.44 |
0.29 |
0.33 |
| deepseek_r1_distill_llama_8b |
36.1 |
68.5 |
14.7 |
12 |
0.44 |
0.32 |
0.3 |
| deepseek_r1_distill_qwen_7b |
36.1 |
68.1 |
15.1 |
12 |
0.44 |
0.33 |
0.29 |
| llama-3.2-3B-instruct |
35 |
35 |
15 |
19 |
0.43 |
0.43 |
0 |
| mistralai_mistral_7b_instruct_v0.3 |
33.7 |
71.8 |
14.3 |
12 |
0.43 |
0.31 |
0.3 |
| qwen2-math-7b-instruct |
33.1 |
72.4 |
14.6 |
12 |
0.43 |
0.3 |
0.31 |
| qwen2.5-coder-3b-instruct |
29.4 |
74.6 |
12.1 |
12 |
0.42 |
0.27 |
0.31 |
| mistralai_mistral_7b_instruct_v0.2 |
29.1 |
65.4 |
11.9 |
12 |
0.41 |
0.3 |
0.28 |
| deepseek_v2_lite_chat |
29 |
67.5 |
11.9 |
10 |
0.41 |
0.28 |
0.3 |
| qwen1.5-7b-chat |
25.1 |
62.5 |
10.1 |
10 |
0.4 |
0.26 |
0.3 |
| qwen2-math-1.5b-instruct |
25.1 |
69.3 |
11.5 |
12 |
0.4 |
0.25 |
0.31 |
| qwen3-0.6b |
23.8 |
54 |
10.7 |
13 |
0.39 |
0.29 |
0.26 |
| mistralai_mistral_7b_instruct_v0.1 |
23.8 |
67.2 |
9.96 |
12 |
0.39 |
0.25 |
0.3 |
| llama-3.2-1B-instruct |
21.5 |
21.5 |
9.44 |
21 |
0.37 |
0.37 |
0 |
| deepseek_r1_distill_qwen_1.5b |
20.5 |
59.4 |
8.21 |
12 |
0.37 |
0.22 |
0.29 |
| qwen2.5-coder-1.5b-instruct |
20.3 |
64.8 |
8.48 |
12 |
0.37 |
0.22 |
0.29 |
| qwen2-1.5b-instruct |
17.2 |
63 |
7.59 |
12 |
0.34 |
0.18 |
0.29 |
| qwen1.5-1.8b-chat |
12.4 |
45.6 |
5.71 |
10 |
0.3 |
0.16 |
0.25 |
| qwen2-0.5b-instruct |
11.7 |
55.2 |
6.16 |
13 |
0.29 |
0.13 |
0.26 |
| qwen2.5-coder-0.5b-instruct |
10.4 |
53.1 |
5.79 |
13 |
0.28 |
0.1 |
0.26 |
| qwen1.5-0.5b-chat |
10.3 |
57 |
5.83 |
13 |
0.28 |
0.098 |
0.26 |