@@ -71,13 +71,7 @@ def test_get_cached_dataset_arff_not_cached(self):
7171 openml .datasets .functions ._get_cached_dataset_arff ,
7272 3 )
7373
74- def test_list_datasets (self ):
75- # We can only perform a smoke test here because we test on dynamic
76- # data from the internet...
77- datasets = openml .datasets .list_datasets ()
78- # 1087 as the number of datasets on openml.org
79- self .assertGreaterEqual (len (datasets ), 1087 )
80- for dataset in datasets :
74+ def _check_dataset (self , dataset ):
8175 self .assertEqual (type (dataset ), dict )
8276 self .assertGreaterEqual (len (dataset ), 2 )
8377 self .assertIn ('did' , dataset )
@@ -87,34 +81,29 @@ def test_list_datasets(self):
8781 self .assertIn (dataset ['status' ], ['in_preparation' , 'active' ,
8882 'deactivated' ])
8983
84+ def test_list_datasets (self ):
85+ # We can only perform a smoke test here because we test on dynamic
86+ # data from the internet...
87+ datasets = openml .datasets .list_datasets ()
88+ # 1087 as the number of datasets on openml.org
89+ self .assertGreaterEqual (len (datasets ), 1087 )
90+ for did in datasets :
91+ self ._check_dataset (datasets [did ])
92+
9093 def test_list_datasets_by_tag (self ):
9194 datasets = openml .datasets .list_datasets (tag = 'uci' )
9295 self .assertGreaterEqual (len (datasets ), 5 )
93- for dataset in datasets :
94- self .assertEqual (type (dataset ), dict )
95- self .assertGreaterEqual (len (dataset ), 2 )
96- self .assertIn ('did' , dataset )
97- self .assertIsInstance (dataset ['did' ], int )
98- self .assertIn ('status' , dataset )
99- self .assertTrue (is_string (dataset ['status' ]))
100- self .assertIn (dataset ['status' ], ['in_preparation' , 'active' ,
101- 'deactivated' ])
96+ for did in datasets :
97+ self ._check_dataset (datasets [did ])
10298
10399 def test_list_datasets_paginate (self ):
104100 size = 10
105101 max = 100
106102 for i in range (0 , max , size ):
107- data = openml .datasets .list_datasets (offset = i , size = size )
108- self .assertGreaterEqual (size , len (data ))
109- for dataset in data :
110- self .assertEqual (type (dataset ), dict )
111- self .assertGreaterEqual (len (dataset ), 2 )
112- self .assertIn ('did' , dataset )
113- self .assertIsInstance (dataset ['did' ], int )
114- self .assertIn ('status' , dataset )
115- self .assertTrue (is_string (dataset ['status' ]))
116- self .assertIn (dataset ['status' ], ['in_preparation' ,
117- 'active' , 'deactivated' ])
103+ datasets = openml .datasets .list_datasets (offset = i , size = size )
104+ self .assertGreaterEqual (size , len (datasets ))
105+ for did in datasets :
106+ self ._check_dataset (datasets [did ])
118107
119108 @unittest .skip ('See https://github.com/openml/openml-python/issues/149' )
120109 def test_check_datasets_active (self ):
0 commit comments