22import io
33import logging
44import os
5+ import six
56import sys
67
78import arff
1011import scipy .sparse
1112import xmltodict
1213
13- from .. datasets . data_feature import OpenMLDataFeature
14+ from .data_feature import OpenMLDataFeature
1415from ..exceptions import PyOpenMLError
1516
1617if sys .version_info [0 ] >= 3 :
@@ -65,10 +66,14 @@ def __init__(self, dataset_id=None, name=None, version=None, description=None,
6566 self .default_target_attribute = default_target_attribute
6667 self .row_id_attribute = row_id_attribute
6768 self .ignore_attributes = None
68- if isinstance (ignore_attribute , str ):
69+ if isinstance (ignore_attribute , six . string_types ):
6970 self .ignore_attributes = [ignore_attribute ]
7071 elif isinstance (ignore_attribute , list ):
7172 self .ignore_attributes = ignore_attribute
73+ elif ignore_attribute is None :
74+ pass
75+ else :
76+ raise ValueError ('wrong data type for ignore_attribute. Should be list. ' )
7277 self .version_label = version_label
7378 self .citation = citation
7479 self .tag = tag
@@ -88,7 +93,8 @@ def __init__(self, dataset_id=None, name=None, version=None, description=None,
8893 xmlfeature ['oml:data_type' ],
8994 None , #todo add nominal values (currently not in database)
9095 int (xmlfeature ['oml:number_of_missing_values' ]))
91- assert idx == feature .index , "Data features not provided in right order"
96+ if idx != feature .index :
97+ raise ValueError ('Data features not provided in right order' )
9298 self .features [feature .index ] = feature
9399
94100
@@ -313,15 +319,40 @@ def retrieve_class_labels(self, target_name='class'):
313319 return None
314320
315321
316- def get_features_by_type (self , data_type , exclude = None , exclude_ignore_attributes = True , exclude_row_id_attribute = True ):
322+ def get_features_by_type (self , data_type , exclude = None ,
323+ exclude_ignore_attributes = True ,
324+ exclude_row_id_attribute = True ):
325+ '''
326+ Returns indices of features of a given type, e.g., all nominal features.
327+ Can use additional parameters to exclude various features by index or ontology.
328+
329+ Parameters
330+ ----------
331+ data_type : str
332+ The data type to return (e.g., nominal, numeric, date, string)
333+ exclude : list(int)
334+ Indices to exclude (and adapt the return values as if these indices
335+ are not present)
336+ exclude_ignore_attributes : bool
337+ Whether to exclude the defined ignore attributes (and adapt the
338+ return values as if these indices are not present)
339+ exclude_row_id_attribute : bool
340+ Whether to exclude the defined row id attributes (and adapt the
341+ return values as if these indices are not present)
342+
343+ Returns
344+ -------
345+ result : list
346+ a list of indices that have the specified data type
347+ '''
317348 assert data_type in OpenMLDataFeature .LEGAL_DATA_TYPES , "Illegal feature type requested"
318349 if self .ignore_attributes is not None :
319350 assert type (self .ignore_attributes ) is list , "ignore_attributes should be a list"
320351 if self .row_id_attribute is not None :
321352 assert type (self .row_id_attribute ) is str , "row id attribute should be a str"
322353 if exclude is not None :
323354 assert type (exclude ) is list , "Exclude should be a list"
324- assert all (isinstance (elem , str ) for elem in exclude ), "Exclude should be a list of strings"
355+ # assert all(isinstance(elem, str) for elem in exclude), "Exclude should be a list of strings"
325356 to_exclude = []
326357 if exclude is not None :
327358 to_exclude .extend (exclude )
0 commit comments