44 "open-data" : " None" ,
55 "pass@1" : {
66 "instruct" : null ,
7- "complete" : 41.7
7+ "complete" : 38.73
88 },
9- "prompted" : false ,
9+ "prompted" : true ,
1010 "size" : 34 ,
11- "direct_complete" : true ,
11+ "direct_complete" : false ,
1212 "lazy" : false ,
1313 "elo_mle" : 942
1414 },
15- "CodeLlama-13B-Python " : {
16- "link" : " https://huggingface.co/codellama/CodeLlama-13b-hf " ,
15+ "Meta-Llama-3-70B " : {
16+ "link" : " https://huggingface.co/meta-llama/Meta-Llama-3-70B " ,
1717 "open-data" : " None" ,
1818 "pass@1" : {
1919 "instruct" : null ,
20- "complete" : 40.0
20+ "complete" : 48.98
2121 },
2222 "prompted" : false ,
23- "size" : 13 ,
24- "direct_complete" : true ,
23+ "size" : 70 ,
24+ "direct_complete" : false ,
25+ "lazy" : false ,
26+ "elo_mle" : 874
27+ },
28+ "Meta-Llama-3-70B-Instruct" : {
29+ "link" : " https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct" ,
30+ "open-data" : " None" ,
31+ "pass@1" : {
32+ "instruct" : null ,
33+ "complete" : 62.45
34+ },
35+ "prompted" : true ,
36+ "size" : 70 ,
37+ "direct_complete" : false ,
38+ "lazy" : false ,
39+ "elo_mle" : 874
40+ },
41+ "Meta-Llama-3.1-70B-Instruct" : {
42+ "link" : " https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct" ,
43+ "open-data" : " None" ,
44+ "pass@1" : {
45+ "instruct" : null ,
46+ "complete" : 60
47+ },
48+ "prompted" : true ,
49+ "size" : 70 ,
50+ "direct_complete" : false ,
2551 "lazy" : false ,
2652 "elo_mle" : 874
2753 },
28- "CodeQwen1.5-7B " : {
29- "link" : " https://huggingface.co/Qwen/CodeQwen1.5-7B " ,
54+ "Meta-Llama-3.1-70B " : {
55+ "link" : " https://huggingface.co/meta-llama/Llama-3.1-70B " ,
3056 "open-data" : " None" ,
3157 "pass@1" : {
3258 "instruct" : null ,
33- "complete" : 31.8
59+ "complete" : 37.56
3460 },
3561 "prompted" : false ,
62+ "size" : 70 ,
63+ "direct_complete" : false ,
64+ "lazy" : false ,
65+ "elo_mle" : 874
66+ },
67+ "Mistral-7B-Instruct-v0.3" : {
68+ "link" : " https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3" ,
69+ "open-data" : " None" ,
70+ "pass@1" : {
71+ "instruct" : null ,
72+ "complete" : 43.33
73+ },
74+ "prompted" : true ,
3675 "size" : 7 ,
37- "direct_complete" : true ,
76+ "direct_complete" : false ,
3877 "lazy" : false ,
39- "elo_mle" : 1056
78+ "elo_mle" : 874
4079 },
41- "DeepSeek-Coder-33B-Base " : {
42- "link" : " https://huggingface.co/deepseek-ai/deepseek-coder-33b-base " ,
80+ "Mixtral-8x7B-Instruct-v0.1 " : {
81+ "link" : " https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1 " ,
4382 "open-data" : " None" ,
4483 "pass@1" : {
4584 "instruct" : null ,
46- "complete" : 33.5
85+ "complete" : 42.96
4786 },
48- "prompted" : false ,
49- "size" : 33 ,
50- "direct_complete" : true ,
87+ "prompted" : true ,
88+ "size" : 7 ,
89+ "direct_complete" : false ,
90+ "lazy" : false ,
91+ "elo_mle" : 874
92+ },
93+ "Codestral-22B-v0.1" : {
94+ "link" : " https://huggingface.co/mistralai/Codestral-22B-v0.1" ,
95+ "open-data" : " None" ,
96+ "pass@1" : {
97+ "instruct" : null ,
98+ "complete" : 47.6
99+ },
100+ "prompted" : true ,
101+ "size" : 22 ,
102+ "direct_complete" : false ,
51103 "lazy" : false ,
52- "elo_mle" : 1064
104+ "elo_mle" : 874
53105 },
54- "StarCoder2-15B " : {
55- "link" : " https://huggingface.co/bigcode/starcoder2-15b " ,
56- "open-data" : " Full " ,
106+ "Phi-3-medium-128k-instruct " : {
107+ "link" : " https://huggingface.co/microsoft/Phi-3-medium-128k-instruct " ,
108+ "open-data" : " None " ,
57109 "pass@1" : {
58110 "instruct" : null ,
59- "complete" : 28.2
111+ "complete" : 48.03
60112 },
61- "prompted" : false ,
62- "size" : 15 ,
63- "direct_complete" : true ,
113+ "prompted" : true ,
114+ "size" : 14 ,
115+ "direct_complete" : false ,
64116 "lazy" : false ,
65- "elo_mle" : 960
117+ "elo_mle" : 874
66118 },
67- "DeepSeek-Coder-6.7B-Base " : {
68- "link" : " https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base " ,
119+ "Phi-3-mini-128k-instruct " : {
120+ "link" : " https://huggingface.co/microsoft/Phi-3-mini-128k-instruct " ,
69121 "open-data" : " None" ,
70122 "pass@1" : {
71123 "instruct" : null ,
72- "complete" : 28.4
124+ "complete" : 37.93
73125 },
74- "prompted" : false ,
75- "size" : 6.7 ,
76- "direct_complete" : true ,
126+ "prompted" : true ,
127+ "size" : 3.8 ,
128+ "direct_complete" : false ,
77129 "lazy" : false ,
78- "elo_mle" : 1002
130+ "elo_mle" : 874
79131 },
80- "DeepSeek-Coder-33B -Instruct" : {
81- "link" : " https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct " ,
132+ "Qwen2-57B-A14B -Instruct" : {
133+ "link" : " https://huggingface.co/Qwen/Qwen2-57B-A14B-Instruct " ,
82134 "open-data" : " None" ,
83135 "pass@1" : {
84136 "instruct" : null ,
85- "complete" : 33.5
137+ "complete" : 46.34
86138 },
87139 "prompted" : true ,
88- "size" : 33 ,
140+ "size" : 57 ,
89141 "direct_complete" : false ,
90142 "lazy" : false ,
91- "elo_mle" : 1129
143+ "elo_mle" : 874
92144 },
93- "Yi-1 .5-34B " : {
94- "link" : " https://huggingface.co/01-ai/Yi-1 .5-34B " ,
145+ "CodeQwen1 .5-7B-Chat " : {
146+ "link" : " https://huggingface.co/Qwen/CodeQwen1 .5-7B-Chat " ,
95147 "open-data" : " None" ,
96148 "pass@1" : {
97149 "instruct" : null ,
98- "complete" : 34.9
150+ "complete" : 49.82
99151 },
100- "prompted" : false ,
152+ "prompted" : true ,
153+ "size" : 7 ,
154+ "direct_complete" : false ,
155+ "lazy" : false ,
156+ "elo_mle" : 874
157+ },
158+ "Yi-1.5-34B-Chat" : {
159+ "link" : " https://huggingface.co/01-ai/Yi-1.5-34B-Chat" ,
160+ "open-data" : " None" ,
161+ "pass@1" : {
162+ "instruct" : null ,
163+ "complete" : 49.39
164+ },
165+ "prompted" : true ,
101166 "size" : 34 ,
102- "direct_complete" : true ,
167+ "direct_complete" : false ,
168+ "lazy" : false ,
169+ "elo_mle" : 874
170+ },
171+ "Yi-1.5-9B-Chat" : {
172+ "link" : " https://huggingface.co/01-ai/Yi-1.5-9B-Chat" ,
173+ "open-data" : " None" ,
174+ "pass@1" : {
175+ "instruct" : null ,
176+ "complete" : 47.23
177+ },
178+ "prompted" : true ,
179+ "size" : 9 ,
180+ "direct_complete" : false ,
181+ "lazy" : false ,
182+ "elo_mle" : 874
183+ },
184+ "DeepSeek-coder-7b-instruct-v1.5" : {
185+ "link" : " https://huggingface.co/deepseek-ai/deepseek-coder-7b-instruct-v1.5" ,
186+ "open-data" : " None" ,
187+ "pass@1" : {
188+ "instruct" : null ,
189+ "complete" : 41.21
190+ },
191+ "prompted" : true ,
192+ "size" : 7 ,
193+ "direct_complete" : false ,
103194 "lazy" : false ,
104- "elo_mle" : 978
195+ "elo_mle" : 874
105196 },
106- "OpenCodeInterpreter-DS-33B " : {
107- "link" : " https://huggingface.co/m-a-p/OpenCodeInterpreter-DS-33B " ,
108- "open-data" : " Partial " ,
197+ "DeepSeek-coder-33b-instruct " : {
198+ "link" : " https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct " ,
199+ "open-data" : " None " ,
109200 "pass@1" : {
110201 "instruct" : null ,
111- "complete" : 31.0
202+ "complete" : 36.6
112203 },
113204 "prompted" : true ,
114205 "size" : 33 ,
115- "direct_complete" : true ,
206+ "direct_complete" : false ,
116207 "lazy" : false ,
117- "elo_mle" : 1131
208+ "elo_mle" : 874
118209 },
119- "To be updated " : {
120- "link" : " " ,
210+ "DeepSeek-moe-16b-chat " : {
211+ "link" : " https://huggingface.co/deepseek-ai/deepseek-moe-16b-chat " ,
121212 "open-data" : " None" ,
122213 "pass@1" : {
123214 "instruct" : null ,
124- "complete" : 0
215+ "complete" : 31.01
125216 },
126- "prompted" : false ,
217+ "prompted" : true ,
218+ "size" : 16.4 ,
219+ "direct_complete" : false ,
220+ "lazy" : false ,
221+ "elo_mle" : 874
222+ },
223+ "DeepSeek-Coder-V2-Lite-Instruct" : {
224+ "link" : " https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct" ,
225+ "open-data" : " None" ,
226+ "pass@1" : {
227+ "instruct" : null ,
228+ "complete" : 46.51
229+ },
230+ "prompted" : true ,
231+ "size" : 16 ,
232+ "direct_complete" : false ,
233+ "lazy" : false ,
234+ "elo_mle" : 874
235+ },
236+ "InternLM2-5-20b-chat" : {
237+ "link" : " https://huggingface.co/internlm/internlm2_5-20b-chat" ,
238+ "open-data" : " None" ,
239+ "pass@1" : {
240+ "instruct" : null ,
241+ "complete" : 44.89
242+ },
243+ "prompted" : true ,
244+ "size" : 20 ,
245+ "direct_complete" : false ,
246+ "lazy" : false ,
247+ "elo_mle" : 874
248+ },
249+ "StarCoder2-15b-instruct-v0.1" : {
250+ "link" : " https://huggingface.co/bigcode/starcoder2-15b-instruct-v0.1" ,
251+ "open-data" : " None" ,
252+ "pass@1" : {
253+ "instruct" : null ,
254+ "complete" : 47.94
255+ },
256+ "prompted" : true ,
127257 "size" : 15 ,
128- "direct_complete" : true ,
258+ "direct_complete" : false ,
259+ "lazy" : false ,
260+ "elo_mle" : 874
261+ },
262+ "Claude-3-sonnet@20240229" : {
263+ "link" : " " ,
264+ "open-data" : " None" ,
265+ "pass@1" : {
266+ "instruct" : null ,
267+ "complete" : 53.97
268+ },
269+ "prompted" : true ,
270+ "size" : " None" ,
271+ "direct_complete" : false ,
272+ "lazy" : false ,
273+ "elo_mle" : 874
274+ },
275+ "GPT-4o-2024-05-13" : {
276+ "link" : " " ,
277+ "open-data" : " None" ,
278+ "pass@1" : {
279+ "instruct" : null ,
280+ "complete" : 67
281+ },
282+ "prompted" : true ,
283+ "size" : " None" ,
284+ "direct_complete" : false ,
285+ "lazy" : false ,
286+ "elo_mle" : 874
287+ },
288+ "GPT-3.5-turbo-0613" : {
289+ "link" : " " ,
290+ "open-data" : " None" ,
291+ "pass@1" : {
292+ "instruct" : null ,
293+ "complete" : 51.7
294+ },
295+ "prompted" : true ,
296+ "size" : " None" ,
297+ "direct_complete" : false ,
129298 "lazy" : false ,
130- "elo_mle" : 960
299+ "elo_mle" : 874
131300 }
132301}
0 commit comments