| qwen3-32b |
82.1 |
90 |
31.3 |
10 |
0.37 |
0.33 |
0.18 |
| qwen3-14b |
81.2 |
87.1 |
30.6 |
10 |
0.38 |
0.35 |
0.15 |
| qwen3-8b |
78.9 |
87.4 |
29.1 |
10 |
0.4 |
0.35 |
0.18 |
| llama-3.1-70B-instruct |
78.3 |
78.3 |
29 |
12 |
0.4 |
0.4 |
0 |
| deepseek_r1_distill_llama_70b |
78.1 |
90.3 |
29.1 |
9 |
0.4 |
0.33 |
0.23 |
| qwen2-72b-instruct |
76.5 |
88.8 |
27.7 |
7 |
0.41 |
0.34 |
0.24 |
| google_gemma_3_27b_it |
75.1 |
81.4 |
26.5 |
10 |
0.42 |
0.39 |
0.15 |
| deepseek_r1_distill_qwen_32b |
75.1 |
91.1 |
28.6 |
10 |
0.42 |
0.31 |
0.29 |
| qwen2-math-72b-instruct |
74.9 |
88.8 |
26.6 |
10 |
0.42 |
0.35 |
0.24 |
| qwen2.5-coder-32b-instruct |
74.7 |
86.5 |
26.3 |
10 |
0.42 |
0.37 |
0.21 |
| deepseek_r1_distill_qwen_14b |
74.5 |
90.9 |
27.5 |
12 |
0.42 |
0.32 |
0.28 |
| google_gemma_2_27b_it |
74.3 |
86.5 |
26.3 |
9 |
0.43 |
0.36 |
0.22 |
| google_gemma_3_12b_it |
74.2 |
83.2 |
25.9 |
11 |
0.43 |
0.39 |
0.18 |
| qwen3-4b |
73.6 |
82.7 |
25.7 |
12 |
0.43 |
0.39 |
0.18 |
| deepseek_r1_distill_qwen_7b |
73.3 |
90.6 |
26.5 |
12 |
0.43 |
0.33 |
0.28 |
| qwen2.5-coder-14b-instruct |
70.9 |
88.7 |
24.2 |
10 |
0.44 |
0.35 |
0.27 |
| google_gemma_2_9b_it |
70.6 |
85.9 |
24.2 |
11 |
0.44 |
0.37 |
0.24 |
| mistralai_mixtral_8x22b_instruct_v0.1 |
69.6 |
90.8 |
24.2 |
10 |
0.45 |
0.33 |
0.3 |
| qwen1.5-32b-chat |
67.1 |
88.9 |
22.7 |
10 |
0.46 |
0.34 |
0.3 |
| qwen1.5-72b-chat |
66.9 |
85.6 |
22.5 |
7 |
0.46 |
0.36 |
0.28 |
| deepseek_r1_distill_llama_8b |
66.8 |
90.6 |
23.5 |
12 |
0.46 |
0.32 |
0.33 |
| qwen2-math-7b-instruct |
65.5 |
86.9 |
21.4 |
12 |
0.46 |
0.36 |
0.28 |
| mistralai_ministral_8b_instruct_2410 |
64.6 |
87.6 |
21 |
11 |
0.47 |
0.36 |
0.3 |
| google_gemma_3_4b_it |
64.3 |
79.2 |
20.4 |
13 |
0.47 |
0.41 |
0.22 |
| qwen2-7b-instruct |
64.2 |
88.8 |
20.9 |
12 |
0.47 |
0.35 |
0.31 |
| llama-3.1-8B-instruct |
64.1 |
64.1 |
21.2 |
16 |
0.47 |
0.47 |
0 |
| qwen2-math-1.5b-instruct |
61.5 |
84.2 |
19.2 |
12 |
0.47 |
0.38 |
0.28 |
| qwen2.5-coder-7b-instruct |
61.4 |
88.2 |
19.6 |
12 |
0.47 |
0.35 |
0.32 |
| qwen3-1.7b |
58.8 |
76.9 |
18.4 |
12 |
0.48 |
0.41 |
0.25 |
| mistralai_mathstral_7b_v0.1 |
57.7 |
87.3 |
17.8 |
12 |
0.48 |
0.35 |
0.33 |
| deepseek_r1_distill_qwen_1.5b |
57 |
88.3 |
19.2 |
12 |
0.48 |
0.32 |
0.36 |
| qwen1.5-14b-chat |
55.7 |
82.9 |
16.9 |
10 |
0.48 |
0.37 |
0.32 |
| llama-3.2-3B-instruct |
55.3 |
55.3 |
16.9 |
18 |
0.48 |
0.48 |
0 |
| deepseek_v2_lite_chat |
49.9 |
80.6 |
14.5 |
10 |
0.49 |
0.36 |
0.33 |
| qwen2.5-coder-3b-instruct |
49.3 |
82.5 |
14.1 |
12 |
0.49 |
0.35 |
0.34 |
| mistralai_mixtral_8x7b_instruct_v0.1 |
49.2 |
83.1 |
14.8 |
10 |
0.49 |
0.34 |
0.35 |
| qwen1.5-7b-chat |
43 |
79.1 |
12.2 |
11 |
0.48 |
0.33 |
0.35 |
| google_codegemma_1.1_7b_it |
37.5 |
71.4 |
9.58 |
13 |
0.47 |
0.35 |
0.32 |
| mistralai_mistral_7b_instruct_v0.3 |
36.3 |
76.7 |
9.99 |
12 |
0.47 |
0.31 |
0.35 |
| qwen2.5-coder-1.5b-instruct |
34.9 |
74 |
8.96 |
12 |
0.46 |
0.32 |
0.34 |
| qwen3-0.6b |
30 |
63.1 |
7.67 |
13 |
0.45 |
0.33 |
0.3 |
| mistralai_mistral_7b_instruct_v0.2 |
29.8 |
68.1 |
8.21 |
12 |
0.45 |
0.3 |
0.32 |
| google_gemma_3_1b_it |
29.6 |
56.5 |
7.25 |
12 |
0.44 |
0.35 |
0.27 |
| qwen2-1.5b-instruct |
25.8 |
70.4 |
6.56 |
12 |
0.43 |
0.26 |
0.34 |
| llama-3.2-1B-instruct |
24.3 |
24.3 |
5.75 |
22 |
0.42 |
0.42 |
0 |
| mistralai_mistral_7b_instruct_v0.1 |
23.7 |
64 |
5.77 |
12 |
0.41 |
0.26 |
0.32 |
| google_gemma_7b_it |
19.1 |
47.9 |
4.54 |
12 |
0.38 |
0.28 |
0.27 |
| qwen1.5-1.8b-chat |
15.1 |
53.6 |
4.3 |
11 |
0.35 |
0.19 |
0.3 |
| qwen2-0.5b-instruct |
11.6 |
48.6 |
2.76 |
12 |
0.31 |
0.16 |
0.27 |
| qwen2.5-coder-0.5b-instruct |
9.04 |
41.8 |
2.19 |
13 |
0.28 |
0.15 |
0.24 |
| google_gemma_2b_it |
6.26 |
23.9 |
1.45 |
12 |
0.24 |
0.15 |
0.18 |
| qwen1.5-0.5b-chat |
4.29 |
27.9 |
1.22 |
13 |
0.2 |
0.083 |
0.18 |