Skip to content

Commit 1c025db

Browse files
authored
Convert non-str column names to str when creating a dataset. (#851)
* Convert non-str column names to str when creating a dataset. * Add unit test
1 parent 433f1e7 commit 1c025db

2 files changed

Lines changed: 11 additions & 0 deletions

File tree

openml/datasets/functions.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -549,6 +549,11 @@ def attributes_arff_from_df(df):
549549
'string': 'STRING'
550550
}
551551
attributes_arff = []
552+
553+
if not all([isinstance(column_name, str) for column_name in df.columns]):
554+
logger.warning("Converting non-str column names to str.")
555+
df.columns = [str(column_name) for column_name in df.columns]
556+
552557
for column_name in df:
553558
# skipna=True does not infer properly the dtype. The NA values are
554559
# dropped before the inference instead.

tests/test_datasets/test_dataset_functions.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,12 @@ def test_attributes_arff_from_df(self):
569569
self.assertEqual(attributes, [('integer', 'INTEGER'),
570570
('floating', 'REAL')])
571571

572+
def test_attributes_arff_from_df_numeric_column(self):
573+
# Test column names are automatically converted to str if needed (#819)
574+
df = pd.DataFrame({0: [1, 2, 3], 0.5: [4, 5, 6], 'target': [0, 1, 1]})
575+
attributes = attributes_arff_from_df(df)
576+
self.assertEqual(attributes, [('0', 'INTEGER'), ('0.5', 'INTEGER'), ('target', 'INTEGER')])
577+
572578
def test_attributes_arff_from_df_mixed_dtype_categories(self):
573579
# liac-arff imposed categorical attributes to be of sting dtype. We
574580
# raise an error if this is not the case.

0 commit comments

Comments
 (0)