|
28 | 28 | from .loader_utils import count_lines, read_libsvm_msrank, retrieve |
29 | 29 |
|
30 | 30 |
|
| 31 | +def cifar_10(dataset_dir: Path) -> bool: |
| 32 | + """ |
| 33 | + Source: |
| 34 | + University of Toronto |
| 35 | + Collected by Alex Krizhevsky, Vinod Nair, and Geoffrey Hinton |
| 36 | + https://www.cs.toronto.edu/~kriz/cifar.html |
| 37 | +
|
| 38 | + Classification task. n_classes = 10 |
| 39 | + cifar_10 x train dataset (54000, 3072) |
| 40 | + cifar_10 y train dataset (54000, 1) |
| 41 | + cifar_10 x test dataset (6000, 3072) |
| 42 | + cifar_10 y test dataset (6000, 1) |
| 43 | +
|
| 44 | + """ |
| 45 | + dataset_name = 'cifar_10' |
| 46 | + os.makedirs(dataset_dir, exist_ok=True) |
| 47 | + |
| 48 | + X, y = fetch_openml(data_id=40927, return_X_y=True, |
| 49 | + as_frame=False, data_home=dataset_dir) |
| 50 | + |
| 51 | + X = pd.DataFrame(X) |
| 52 | + y = pd.DataFrame(y) |
| 53 | + y = y.astype(int) |
| 54 | + |
| 55 | + logging.info(f'{dataset_name} is loaded, started parsing...') |
| 56 | + |
| 57 | + x_train, x_test, y_train, y_test = train_test_split( |
| 58 | + X, y, test_size=0.1, random_state=42) |
| 59 | + for data, name in zip((x_train, x_test, y_train, y_test), |
| 60 | + ('x_train', 'x_test', 'y_train', 'y_test')): |
| 61 | + filename = f'{dataset_name}_{name}.npy' |
| 62 | + np.save(os.path.join(dataset_dir, filename), data) |
| 63 | + logging.info(f'dataset {dataset_name} is ready.') |
| 64 | + return True |
| 65 | + |
| 66 | + |
31 | 67 | def connect(dataset_dir: Path) -> bool: |
32 | 68 | """ |
33 | 69 | Source: |
|
0 commit comments