@@ -115,6 +115,9 @@ def _check_dataset(self, dataset):
115115 self .assertIsInstance (dataset ['status' ], six .string_types )
116116 self .assertIn (dataset ['status' ], ['in_preparation' , 'active' ,
117117 'deactivated' ])
118+ def _check_datasets (self , datasets ):
119+ for did in datasets :
120+ self ._check_dataset (datasets [did ])
118121
119122 def test_tag_untag_dataset (self ):
120123 tag = 'test_tag_%d' % random .randint (1 , 1000000 )
@@ -129,23 +132,45 @@ def test_list_datasets(self):
129132 datasets = openml .datasets .list_datasets ()
130133 # 1087 as the number of datasets on openml.org
131134 self .assertGreaterEqual (len (datasets ), 100 )
132- for did in datasets :
133- self ._check_dataset (datasets [did ])
135+ self ._check_datasets (datasets )
134136
135137 def test_list_datasets_by_tag (self ):
136138 datasets = openml .datasets .list_datasets (tag = 'study_14' )
137139 self .assertGreaterEqual (len (datasets ), 100 )
138- for did in datasets :
139- self ._check_dataset (datasets [did ])
140+ self ._check_datasets (datasets )
141+
142+ def test_list_datasets_by_number_instances (self ):
143+ datasets = openml .datasets .list_datasets (number_instances = "5..100" )
144+ self .assertGreaterEqual (len (datasets ), 4 )
145+ self ._check_datasets (datasets )
146+
147+ def test_list_datasets_by_number_features (self ):
148+ datasets = openml .datasets .list_datasets (number_features = "50..100" )
149+ self .assertGreaterEqual (len (datasets ), 8 )
150+ self ._check_datasets (datasets )
151+
152+ def test_list_datasets_by_number_classes (self ):
153+ datasets = openml .datasets .list_datasets (number_classes = "5" )
154+ self .assertGreaterEqual (len (datasets ), 3 )
155+ self ._check_datasets (datasets )
156+
157+ def test_list_datasets_by_number_missing_values (self ):
158+ datasets = openml .datasets .list_datasets (number_missing_values = "5..100" )
159+ self .assertGreaterEqual (len (datasets ), 5 )
160+ self ._check_datasets (datasets )
161+
162+ def test_list_datasets_combined_filters (self ):
163+ datasets = openml .datasets .list_datasets (tag = 'study_14' , number_instances = "100..1000" , number_missing_values = "800..1000" )
164+ self .assertGreaterEqual (len (datasets ), 1 )
165+ self ._check_datasets (datasets )
140166
141167 def test_list_datasets_paginate (self ):
142168 size = 10
143169 max = 100
144170 for i in range (0 , max , size ):
145171 datasets = openml .datasets .list_datasets (offset = i , size = size )
146172 self .assertGreaterEqual (size , len (datasets ))
147- for did in datasets :
148- self ._check_dataset (datasets [did ])
173+ self ._check_datasets (datasets )
149174
150175 def test_list_datasets_empty (self ):
151176 datasets = openml .datasets .list_datasets (tag = 'NoOneWouldUseThisTagAnyway' )
0 commit comments