|
14 | 14 | from openml.testing import TestBase |
15 | 15 | from openml.runs.functions import _run_task_get_arffcontent, \ |
16 | 16 | _get_seeded_model, _run_exists, _extract_arfftrace, \ |
17 | | - _extract_arfftrace_attributes |
| 17 | + _extract_arfftrace_attributes, _prediction_to_row |
18 | 18 |
|
19 | 19 | from sklearn.naive_bayes import GaussianNB |
20 | 20 | from sklearn.model_selection._search import BaseSearchCV |
@@ -283,7 +283,7 @@ def test__run_exists(self): |
283 | 283 | self.assertGreater(len(run_ids), 0) |
284 | 284 |
|
285 | 285 |
|
286 | | - def test_get_seeded_model(self): |
| 286 | + def test__get_seeded_model(self): |
287 | 287 | # randomized models that are initialized without seeds, can be seeded |
288 | 288 | randomized_clfs = [ |
289 | 289 | BaggingClassifier(), |
@@ -318,7 +318,7 @@ def test_get_seeded_model(self): |
318 | 318 | self.assertIsInstance(new_params[param], int) |
319 | 319 | self.assertIsNotNone(new_params[param]) |
320 | 320 |
|
321 | | - def test_get_seeded_model_raises(self): |
| 321 | + def test__get_seeded_model_raises(self): |
322 | 322 | # the _get_seeded_model should raise exception if random_state is anything else than an int |
323 | 323 | randomized_clfs = [ |
324 | 324 | BaggingClassifier(random_state=np.random.RandomState(42)), |
@@ -377,6 +377,44 @@ def test__extract_arfftrace(self): |
377 | 377 |
|
378 | 378 | self.assertEqual(param_grid.keys(), optimized_params) |
379 | 379 |
|
| 380 | + def test__prediction_to_row(self): |
| 381 | + repeat_nr = 0 |
| 382 | + fold_nr = 0 |
| 383 | + clf = sklearn.pipeline.Pipeline(steps=[('Imputer', Imputer(strategy='mean')), |
| 384 | + ('VarianceThreshold', VarianceThreshold(threshold=0.05)), |
| 385 | + ('Estimator', GaussianNB())]) |
| 386 | + task = openml.tasks.get_task(20) |
| 387 | + train, test = task.get_train_test_split_indices(repeat_nr, fold_nr) |
| 388 | + X, y = task.get_X_and_y() |
| 389 | + clf.fit(X[train], y[train]) |
| 390 | + |
| 391 | + test_X = X[test] |
| 392 | + test_y = y[test] |
| 393 | + |
| 394 | + probaY = clf.predict_proba(test_X) |
| 395 | + predY = clf.predict(test_X) |
| 396 | + for idx in range(0, len(test_X)): |
| 397 | + arff_line = _prediction_to_row(repeat_nr, fold_nr, idx, |
| 398 | + task.class_labels[test_y[idx]], |
| 399 | + predY[idx], probaY[idx], task.class_labels, clf.classes_) |
| 400 | + |
| 401 | + self.assertIsInstance(arff_line, list) |
| 402 | + self.assertEqual(len(arff_line), 5 + len(task.class_labels)) |
| 403 | + self.assertEqual(arff_line[0], repeat_nr) |
| 404 | + self.assertEqual(arff_line[1], fold_nr) |
| 405 | + self.assertEqual(arff_line[2], idx) |
| 406 | + sum = 0.0 |
| 407 | + for att_idx in range(3, 3 + len(task.class_labels)): |
| 408 | + self.assertIsInstance(arff_line[att_idx], float) |
| 409 | + self.assertGreaterEqual(arff_line[att_idx], 0.0) |
| 410 | + self.assertLessEqual(arff_line[att_idx], 1.0) |
| 411 | + sum += arff_line[att_idx] |
| 412 | + self.assertAlmostEqual(sum, 1.0) |
| 413 | + |
| 414 | + self.assertIn(arff_line[-1], task.class_labels) |
| 415 | + self.assertIn(arff_line[-2], task.class_labels) |
| 416 | + pass |
| 417 | + |
380 | 418 |
|
381 | 419 | def test_run_with_classifiers_in_param_grid(self): |
382 | 420 | task = openml.tasks.get_task(115) |
|
0 commit comments