| google_gemma_3_12b_it |
27.6 |
56.9 |
22.9 |
11 |
2 |
1.4 |
1.4 |
| qwen2-72b-instruct |
25.4 |
59 |
21 |
10 |
1.9 |
1.3 |
1.4 |
| qwen3-32b |
24.9 |
58.6 |
20.6 |
10 |
1.9 |
1.3 |
1.4 |
| qwen3-14b |
24.6 |
53.4 |
20.1 |
12 |
1.9 |
1.4 |
1.3 |
| qwen2.5-coder-32b-instruct |
23.7 |
51.1 |
19.2 |
10 |
1.9 |
1.3 |
1.3 |
| qwen3-4b |
21.7 |
49.9 |
17.5 |
12 |
1.8 |
1.3 |
1.3 |
| qwen3-8b |
17.4 |
46.4 |
14 |
12 |
1.7 |
1.2 |
1.2 |
| qwen2.5-coder-14b-instruct |
16.8 |
48.5 |
13.3 |
12 |
1.6 |
1 |
1.3 |
| qwen1.5-32b-chat |
13.5 |
44.1 |
10.9 |
11 |
1.5 |
0.93 |
1.2 |
| qwen1.5-72b-chat |
13.4 |
41.4 |
10.7 |
10 |
1.5 |
0.92 |
1.2 |
| google_gemma_7b_it |
13 |
33.4 |
10.9 |
13 |
1.5 |
1.1 |
1 |
| google_gemma_2_27b_it |
12.7 |
37.7 |
10.4 |
10 |
1.5 |
0.99 |
1.1 |
| qwen2-math-72b-instruct |
11.4 |
22.9 |
9.05 |
10 |
1.4 |
1.1 |
0.85 |
| qwen2.5-coder-7b-instruct |
11.2 |
39.8 |
8.76 |
10 |
1.4 |
0.75 |
1.2 |
| google_gemma_3_4b_it |
11.1 |
38.1 |
8.85 |
13 |
1.4 |
0.86 |
1.1 |
| llama-3.1-8B-instruct |
10.5 |
10.5 |
8.59 |
15 |
1.4 |
1.4 |
0 |
| google_gemma_2_9b_it |
9.29 |
29.5 |
7.51 |
11 |
1.3 |
0.87 |
0.94 |
| qwen1.5-14b-chat |
9.17 |
38.8 |
7.42 |
12 |
1.3 |
0.69 |
1.1 |
| mistralai_mixtral_8x22b_instruct_v0.1 |
8.93 |
37.5 |
7.07 |
11 |
1.3 |
0.65 |
1.1 |
| qwen2-1.5b-instruct |
7.57 |
38.4 |
6.5 |
13 |
1.2 |
0.59 |
1 |
| google_codegemma_1.1_7b_it |
7.45 |
38.3 |
6 |
13 |
1.2 |
0.5 |
1 |
| qwen2-7b-instruct |
7.13 |
31.1 |
5.52 |
11 |
1.1 |
0.59 |
0.97 |
| deepseek_r1_distill_qwen_32b |
6.64 |
19.8 |
4.9 |
10 |
1.1 |
0.73 |
0.82 |
| mistralai_mistral_7b_instruct_v0.3 |
6.62 |
28.7 |
5.33 |
11 |
1.1 |
0.55 |
0.95 |
| qwen3-1.7b |
6.62 |
22.7 |
5.05 |
12 |
1.1 |
0.74 |
0.81 |
| qwen1.5-7b-chat |
6.57 |
34.4 |
5.29 |
12 |
1.1 |
0.5 |
0.97 |
| llama-3.2-3B-instruct |
6.41 |
6.41 |
5.07 |
17 |
1.1 |
1.1 |
0 |
| deepseek_v2_lite_chat |
6.41 |
31.5 |
5.21 |
11 |
1.1 |
0.47 |
0.97 |
| qwen2-math-7b-instruct |
6.15 |
14 |
4.61 |
6 |
1.1 |
0.76 |
0.74 |
| deepseek_r1_distill_qwen_14b |
6.02 |
19.2 |
4.48 |
11 |
1 |
0.68 |
0.79 |
| mistralai_ministral_8b_instruct_2410 |
5.63 |
31.1 |
4.41 |
11 |
1 |
0.45 |
0.91 |
| mistralai_mistral_7b_instruct_v0.1 |
5.53 |
28.2 |
4.49 |
11 |
1 |
0.45 |
0.9 |
| deepseek_r1_distill_llama_70b |
5.48 |
17.3 |
3.98 |
10 |
1 |
0.66 |
0.75 |
| mistralai_mistral_7b_instruct_v0.2 |
5.32 |
23.5 |
4.33 |
10 |
0.99 |
0.5 |
0.85 |
| qwen2.5-coder-3b-instruct |
5.16 |
29.5 |
3.92 |
12 |
0.97 |
0.42 |
0.88 |
| google_gemma_2b_it |
5.11 |
13.2 |
4.1 |
13 |
0.97 |
0.74 |
0.62 |
| mistralai_mathstral_7b_v0.1 |
5.07 |
28 |
3.98 |
11 |
0.97 |
0.38 |
0.89 |
| qwen2-0.5b-instruct |
4.9 |
27 |
4.03 |
13 |
0.95 |
0.41 |
0.86 |
| qwen1.5-1.8b-chat |
4.31 |
22.3 |
3.5 |
11 |
0.89 |
0.35 |
0.82 |
| llama-3.2-1B-instruct |
4.27 |
4.27 |
3.47 |
12 |
0.89 |
0.89 |
0 |
| google_gemma_3_1b_it |
4.22 |
19.2 |
3.15 |
12 |
0.89 |
0.51 |
0.72 |
| deepseek_r1_distill_qwen_7b |
4.17 |
12.6 |
2.95 |
11 |
0.88 |
0.59 |
0.66 |
| qwen2-math-1.5b-instruct |
4.03 |
9.13 |
2.95 |
4 |
0.87 |
0.54 |
0.67 |
| qwen1.5-0.5b-chat |
3.91 |
20.6 |
3.26 |
13 |
0.85 |
0.37 |
0.77 |
| deepseek_r1_distill_llama_8b |
3.54 |
17.1 |
2.57 |
12 |
0.81 |
0.43 |
0.69 |
| qwen3-0.6b |
2.66 |
15.5 |
1.94 |
13 |
0.71 |
0.37 |
0.61 |
| deepseek_r1_distill_qwen_1.5b |
2.25 |
10.3 |
1.57 |
12 |
0.65 |
0.35 |
0.55 |
| qwen2.5-coder-1.5b-instruct |
1.8 |
11.8 |
1.33 |
11 |
0.59 |
0.23 |
0.54 |
| qwen2.5-coder-0.5b-instruct |
1.69 |
13.2 |
1.33 |
13 |
0.57 |
0.17 |
0.54 |