| qwen3-14b |
72.2 |
80 |
31.4 |
12 |
2 |
1.9 |
0.7 |
| qwen2.5-coder-14b-instruct |
72.1 |
88.2 |
32.9 |
12 |
2 |
1.6 |
1.2 |
| google_gemma_3_12b_it |
69.3 |
76.6 |
29.3 |
11 |
2.1 |
1.9 |
0.73 |
| deepseek_r1_distill_llama_70b |
66.3 |
81.2 |
26.7 |
11 |
2.1 |
1.8 |
1.1 |
| qwen3-8b |
66.1 |
76.8 |
27.1 |
12 |
2.1 |
1.9 |
0.88 |
| qwen2.5-coder-7b-instruct |
64.8 |
85.8 |
27.9 |
10 |
2.1 |
1.6 |
1.4 |
| qwen3-4b |
64.1 |
74.8 |
25.5 |
12 |
2.1 |
2 |
0.83 |
| mistralai_mixtral_8x22b_instruct_v0.1 |
63.9 |
80.2 |
25.2 |
11 |
2.1 |
1.8 |
1.2 |
| google_gemma_3_4b_it |
60 |
69.8 |
22.6 |
13 |
2.2 |
2 |
0.81 |
| deepseek_r1_distill_qwen_14b |
57.1 |
77.6 |
21.1 |
11 |
2.2 |
1.8 |
1.3 |
| llama-3.1-8B-instruct |
56.2 |
56.2 |
19.9 |
15 |
2.2 |
2.2 |
0 |
| qwen2.5-coder-3b-instruct |
55 |
81.4 |
20.6 |
12 |
2.2 |
1.7 |
1.5 |
| mistralai_mixtral_8x7b_instruct_v0.1 |
52.1 |
71.8 |
17.7 |
12 |
2.2 |
1.9 |
1.2 |
| mistralai_ministral_8b_instruct_2410 |
52 |
73.6 |
17.3 |
11 |
2.2 |
1.8 |
1.3 |
| qwen2-7b-instruct |
51.1 |
72.6 |
17.2 |
11 |
2.2 |
1.8 |
1.3 |
| llama-3.2-3B-instruct |
48.8 |
48.8 |
15.7 |
15 |
2.2 |
2.2 |
0 |
| mistralai_mathstral_7b_v0.1 |
48.7 |
75 |
15.7 |
11 |
2.2 |
1.8 |
1.4 |
| qwen3-1.7b |
48.1 |
62.6 |
15.7 |
12 |
2.2 |
2 |
0.99 |
| deepseek_v2_lite_chat |
44.5 |
67.8 |
13.6 |
11 |
2.2 |
1.8 |
1.3 |
| qwen2.5-coder-1.5b-instruct |
43.7 |
72.2 |
13.8 |
11 |
2.2 |
1.7 |
1.5 |
| qwen1.5-14b-chat |
40.2 |
62.2 |
11.4 |
12 |
2.2 |
1.8 |
1.2 |
| deepseek_r1_distill_llama_8b |
39.5 |
66.6 |
11.7 |
12 |
2.2 |
1.7 |
1.3 |
| mistralai_mistral_7b_instruct_v0.3 |
39.2 |
61.6 |
10.7 |
11 |
2.2 |
1.8 |
1.2 |
| deepseek_r1_distill_qwen_7b |
39.2 |
68.2 |
12 |
11 |
2.2 |
1.6 |
1.4 |
| mistralai_mistral_7b_instruct_v0.2 |
36.4 |
58 |
9.98 |
10 |
2.2 |
1.8 |
1.2 |
| qwen1.5-7b-chat |
34.8 |
56.2 |
9.06 |
12 |
2.1 |
1.8 |
1.2 |
| llama-3.2-1B-instruct |
32 |
32 |
8.18 |
11 |
2.1 |
2.1 |
0 |
| qwen2.5-coder-0.5b-instruct |
31.9 |
60.8 |
8.37 |
13 |
2.1 |
1.6 |
1.3 |
| mistralai_mistral_7b_instruct_v0.1 |
30.5 |
58.6 |
7.54 |
11 |
2.1 |
1.5 |
1.4 |
| qwen3-0.6b |
27.6 |
51.6 |
6.97 |
13 |
2 |
1.6 |
1.2 |
| qwen2-1.5b-instruct |
23.5 |
56.4 |
5.27 |
13 |
1.9 |
1.3 |
1.4 |
| deepseek_r1_distill_qwen_1.5b |
14.6 |
38.8 |
3.2 |
12 |
1.6 |
1.1 |
1.1 |
| qwen2-0.5b-instruct |
13 |
38.6 |
2.3 |
13 |
1.5 |
0.97 |
1.1 |
| qwen1.5-1.8b-chat |
12.6 |
33.6 |
2.31 |
11 |
1.5 |
1 |
1.1 |
| qwen1.5-0.5b-chat |
4.17 |
17.4 |
0.577 |
13 |
0.89 |
0.54 |
0.71 |