| model |
pass1 |
pass@count |
win_rate |
count |
SE(A) |
SE_x(A) |
SE_pred(A) |
| gpt-4-0613+cot |
73.7 |
92.2 |
42.8 |
10 |
1.6 |
1.2 |
1 |
| gpt-4-0613 |
68 |
79.4 |
37.6 |
10 |
1.6 |
1.5 |
0.75 |
| gpt-3.5-turbo-0613 |
45.7 |
69.4 |
21 |
10 |
1.8 |
1.4 |
1 |
| phind |
44.4 |
69.5 |
19.8 |
10 |
1.8 |
1.4 |
1.1 |
| gpt-3.5-turbo-0613+cot |
44.3 |
83.1 |
21.8 |
10 |
1.8 |
1.1 |
1.4 |
| codetulu-2-34b |
43.9 |
75.8 |
19.4 |
10 |
1.8 |
1.3 |
1.2 |
| deepseek-instruct-33b |
42.8 |
70.4 |
18.9 |
10 |
1.7 |
1.4 |
1.1 |
| codellama-34b+cot |
42.7 |
82.2 |
20.2 |
10 |
1.7 |
1.1 |
1.4 |
| codellama-34b |
41.1 |
74.9 |
17.7 |
10 |
1.7 |
1.3 |
1.2 |
| magicoder-ds-7b |
40.1 |
70.2 |
17.1 |
10 |
1.7 |
1.3 |
1.1 |
| deepseek-base-33b |
39.6 |
74.1 |
17.1 |
10 |
1.7 |
1.3 |
1.2 |
| wizard-34b |
38.7 |
63.4 |
16.1 |
10 |
1.7 |
1.4 |
1 |
| codellama-python-34b |
37 |
66.6 |
15.1 |
10 |
1.7 |
1.3 |
1.1 |
| deepseek-base-6.7b |
36.9 |
70.5 |
15.5 |
10 |
1.7 |
1.2 |
1.2 |
| codellama-13b+cot |
36.4 |
78.2 |
16.4 |
10 |
1.7 |
1 |
1.4 |
| codellama-13b |
35.2 |
71 |
14.3 |
10 |
1.7 |
1.2 |
1.2 |
| deepseek-instruct-6.7b |
34.7 |
59.5 |
14 |
10 |
1.7 |
1.3 |
1 |
| mixtral-8x7b |
32.8 |
68.9 |
13.2 |
10 |
1.7 |
1.2 |
1.2 |
| codellama-python-13b |
32.5 |
65.6 |
12.8 |
10 |
1.7 |
1.2 |
1.2 |
| wizard-13b |
32.2 |
58.6 |
12.5 |
10 |
1.7 |
1.3 |
1 |
| codellama-python-7b |
31.6 |
66.4 |
12.6 |
10 |
1.6 |
1.1 |
1.2 |
| codellama-7b+cot |
30 |
73.4 |
12.9 |
10 |
1.6 |
0.92 |
1.3 |
| codellama-7b |
28.4 |
63.1 |
10.6 |
10 |
1.6 |
1.1 |
1.1 |
| mistral-7b |
27.6 |
62.4 |
10.3 |
10 |
1.6 |
1.1 |
1.1 |
| starcoderbase-16b |
25.8 |
59.1 |
9.51 |
10 |
1.5 |
1.1 |
1.1 |
| phi-2 |
25.7 |
60.8 |
10.4 |
10 |
1.5 |
1 |
1.1 |
| starcoderbase-7b |
25.4 |
56.1 |
9.18 |
10 |
1.5 |
1.1 |
1.1 |
| deepseek-instruct-1.3b |
24 |
46.6 |
9.07 |
10 |
1.5 |
1.2 |
0.92 |
| deepseek-base-1.3b |
22.5 |
54.1 |
8.25 |
10 |
1.5 |
1 |
1.1 |
| phi-1.5 |
16.1 |
47.8 |
6.37 |
10 |
1.3 |
0.8 |
1 |
| phi-1 |
12.6 |
25.4 |
4.03 |
10 |
1.2 |
0.96 |
0.67 |