| google_gemma_3_27b_it |
86.1 |
93.3 |
46.6 |
8 |
0.49 |
0.41 |
0.26 |
| qwen3-14b |
82.8 |
93.4 |
44 |
10 |
0.53 |
0.43 |
0.32 |
| google_gemma_3_12b_it |
80.1 |
92.2 |
41.9 |
11 |
0.56 |
0.46 |
0.32 |
| qwen3-4b |
78.6 |
92.1 |
40.7 |
12 |
0.58 |
0.47 |
0.34 |
| qwen3-8b |
78.2 |
92.8 |
40.5 |
10 |
0.58 |
0.46 |
0.36 |
| qwen3-32b |
76.9 |
94.2 |
40.2 |
10 |
0.6 |
0.41 |
0.43 |
| deepseek_r1_distill_llama_70b |
73.6 |
92.4 |
39.4 |
9 |
0.62 |
0.46 |
0.43 |
| google_gemma_3_4b_it |
72 |
87.9 |
36 |
13 |
0.63 |
0.53 |
0.35 |
| deepseek_r1_distill_qwen_7b |
71.8 |
95.6 |
38.8 |
12 |
0.64 |
0.37 |
0.52 |
| deepseek_r1_distill_llama_8b |
69.1 |
94.6 |
36.2 |
12 |
0.65 |
0.4 |
0.52 |
| qwen3-1.7b |
63.9 |
86.5 |
30.5 |
12 |
0.68 |
0.53 |
0.42 |
| llama-3.1-70B-instruct |
62.9 |
63.1 |
29.8 |
12 |
0.68 |
0.68 |
0.06 |
| deepseek_r1_distill_qwen_1.5b |
62.6 |
93.4 |
31.6 |
12 |
0.68 |
0.39 |
0.56 |
| deepseek_r1_distill_qwen_14b |
61.3 |
92.6 |
32.7 |
11 |
0.69 |
0.41 |
0.55 |
| qwen2-72b-instruct |
59.3 |
85.6 |
27.4 |
10 |
0.69 |
0.51 |
0.47 |
| deepseek_r1_distill_qwen_32b |
58 |
90.2 |
30.6 |
9 |
0.7 |
0.44 |
0.54 |
| qwen2.5-coder-32b-instruct |
55.1 |
86.1 |
26.1 |
10 |
0.7 |
0.5 |
0.5 |
| google_gemma_2_27b_it |
51.9 |
71.6 |
22.5 |
7 |
0.71 |
0.58 |
0.4 |
| qwen2.5-coder-14b-instruct |
49.4 |
82.3 |
21.7 |
9 |
0.71 |
0.48 |
0.52 |
| qwen2.5-coder-7b-instruct |
45.9 |
83.4 |
20.1 |
11 |
0.7 |
0.45 |
0.55 |
| google_gemma_2_9b_it |
44.9 |
67 |
18.2 |
10 |
0.7 |
0.59 |
0.39 |
| qwen1.5-72b-chat |
43.5 |
72.7 |
17.8 |
10 |
0.7 |
0.53 |
0.46 |
| qwen1.5-32b-chat |
42.2 |
72.4 |
17.1 |
10 |
0.7 |
0.52 |
0.46 |
| mistralai_mixtral_8x22b_instruct_v0.1 |
41.8 |
75.4 |
17.1 |
10 |
0.7 |
0.5 |
0.49 |
| mistralai_mathstral_7b_v0.1 |
39.7 |
74.9 |
15.9 |
11 |
0.69 |
0.49 |
0.49 |
| mistralai_ministral_8b_instruct_2410 |
38.3 |
74.3 |
15.2 |
11 |
0.69 |
0.48 |
0.49 |
| llama-3.2-3B-instruct |
37.8 |
38 |
15.1 |
19 |
0.69 |
0.68 |
0.064 |
| llama-3.1-8B-instruct |
37.5 |
37.7 |
15 |
16 |
0.68 |
0.68 |
0.064 |
| qwen3-0.6b |
37 |
70.8 |
14.8 |
13 |
0.68 |
0.5 |
0.46 |
| qwen2.5-coder-3b-instruct |
36.7 |
75.8 |
14.9 |
12 |
0.68 |
0.44 |
0.52 |
| qwen2-7b-instruct |
36.2 |
72.6 |
15 |
11 |
0.68 |
0.44 |
0.51 |
| google_gemma_3_1b_it |
33.3 |
60.3 |
13 |
12 |
0.67 |
0.53 |
0.41 |
| qwen1.5-14b-chat |
31.5 |
64 |
11.8 |
10 |
0.66 |
0.47 |
0.46 |
| qwen2.5-coder-1.5b-instruct |
27.2 |
65.9 |
10.1 |
12 |
0.63 |
0.4 |
0.48 |
| deepseek_v2_lite_chat |
25.7 |
57.3 |
9.1 |
10 |
0.62 |
0.44 |
0.44 |
| mistralai_mixtral_8x7b_instruct_v0.1 |
25.6 |
58.6 |
9.16 |
10 |
0.62 |
0.43 |
0.45 |
| google_codegemma_1.1_7b_it |
20.5 |
53.3 |
7.12 |
13 |
0.57 |
0.4 |
0.41 |
| qwen1.5-7b-chat |
20.5 |
51.5 |
7.02 |
10 |
0.57 |
0.38 |
0.43 |
| llama-3.2-1B-instruct |
18.7 |
18.9 |
6.38 |
21 |
0.55 |
0.55 |
0.051 |
| qwen2-1.5b-instruct |
15.1 |
50.1 |
5.06 |
12 |
0.51 |
0.3 |
0.41 |
| mistralai_mistral_7b_instruct_v0.3 |
13.2 |
42.7 |
4.26 |
11 |
0.48 |
0.3 |
0.37 |
| google_gemma_7b_it |
11.9 |
31.7 |
3.91 |
12 |
0.46 |
0.34 |
0.31 |
| mistralai_mistral_7b_instruct_v0.2 |
10.3 |
33.8 |
3.2 |
11 |
0.43 |
0.28 |
0.33 |
| qwen2-0.5b-instruct |
7.62 |
34.4 |
2.41 |
13 |
0.38 |
0.2 |
0.32 |
| qwen2.5-coder-0.5b-instruct |
7.22 |
34 |
2.29 |
13 |
0.37 |
0.19 |
0.32 |
| mistralai_mistral_7b_instruct_v0.1 |
7.06 |
28.8 |
2.21 |
11 |
0.36 |
0.21 |
0.3 |
| google_gemma_2b_it |
6.33 |
22.2 |
2.11 |
12 |
0.34 |
0.23 |
0.25 |
| qwen1.5-1.8b-chat |
5.22 |
25.4 |
1.67 |
10 |
0.31 |
0.15 |
0.28 |
| qwen1.5-0.5b-chat |
1.3 |
11.2 |
0.486 |
13 |
0.16 |
0.049 |
0.15 |