Skip to content

Commit 89c6b0a

Browse files
committed
ENH add get_data_home to localize the downloaded files
1 parent f3605d7 commit 89c6b0a

10 files changed

Lines changed: 106 additions & 44 deletions

tutorials/movies_3T/00_download_vim4.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,11 @@
2929
# Download
3030
# --------
3131

32-
# Update the directory variable to link to the directory containing the data.
33-
directory = '/data1/tutorials/vim-4/'
32+
# path of the data directory
33+
import os
34+
from voxelwise_tutorials.io import get_data_home
35+
directory = os.path.join(get_data_home(), "vim-4")
36+
print(directory)
3437

3538
###############################################################################
3639
# We will only use the first subject in this tutorial, but you can run the same
@@ -73,11 +76,10 @@
7376
# "TBD/stimuli/train_11.hdf",
7477
]
7578

76-
if __name__ == "__main__":
77-
78-
username = input("CRCNS username: ")
79-
password = getpass.getpass("CRCNS password: ")
79+
###############################################################################
80+
username = input("CRCNS username: ")
81+
password = getpass.getpass("CRCNS password: ")
8082

81-
for datafile in DATAFILES:
82-
local_filename = download_crcns(datafile, username, password,
83-
destination=directory)
83+
for datafile in DATAFILES:
84+
local_filename = download_crcns(datafile, username, password,
85+
destination=directory)

tutorials/movies_3T/01_plot_explainable_variance.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,17 @@
2121
###############################################################################
2222

2323
# path of the data directory
24-
directory = '/data1/tutorials/vim-4/'
24+
import os
25+
from voxelwise_tutorials.io import get_data_home
26+
directory = os.path.join(get_data_home(), "vim-4")
27+
print(directory)
2528

2629
# modify to use another subject
2730
subject = "S01"
2831

2932
###############################################################################
3033
# Compute the explainable variance
3134
# --------------------------------
32-
import os
3335
import numpy as np
3436

3537
from voxelwise_tutorials.io import load_hdf5_array

tutorials/movies_3T/02_plot_wordnet_model.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,10 @@
2828
###############################################################################
2929

3030
# path of the data directory
31-
directory = '/data1/tutorials/vim-4/'
31+
import os
32+
from voxelwise_tutorials.io import get_data_home
33+
directory = os.path.join(get_data_home(), "vim-4")
34+
print(directory)
3235

3336
# modify to use another subject
3437
subject = "S01"
@@ -38,7 +41,6 @@
3841
# -------------
3942
#
4043
# We first load the fMRI responses.
41-
import os
4244
import numpy as np
4345
from voxelwise_tutorials.io import load_hdf5_array
4446

tutorials/movies_3T/03_plot_motion_energy_model.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,10 @@
3232
###############################################################################
3333

3434
# path of the data directory
35-
directory = '/data1/tutorials/vim-4/'
35+
import os
36+
from voxelwise_tutorials.io import get_data_home
37+
directory = os.path.join(get_data_home(), "vim-4")
38+
print(directory)
3639

3740
# modify to use another subject
3841
subject = "S01"
@@ -42,7 +45,6 @@
4245
# -------------
4346
#
4447
# We first load the fMRI responses.
45-
import os
4648
import numpy as np
4749

4850
from voxelwise_tutorials.io import load_hdf5_array

tutorials/movies_3T/04_plot_banded_ridge_model.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,10 @@
1818
###############################################################################
1919

2020
# path of the data directory
21-
directory = '/data1/tutorials/vim-4/'
21+
import os
22+
from voxelwise_tutorials.io import get_data_home
23+
directory = os.path.join(get_data_home(), "vim-4")
24+
print(directory)
2225

2326
# modify to use another subject
2427
subject = "S01"
@@ -29,7 +32,6 @@
2932
#
3033
# As in the previous examples, we first load the fMRI responses, which are our
3134
# regression targets.
32-
import os
3335
import numpy as np
3436

3537
from voxelwise_tutorials.io import load_hdf5_array

tutorials/movies_3T/05_extract_motion_energy.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,10 @@
2626
###############################################################################
2727

2828
# path of the data directory
29-
directory = '/data1/tutorials/vim-4/'
29+
import os
30+
from voxelwise_tutorials.io import get_data_home
31+
directory = os.path.join(get_data_home(), "vim-4")
32+
print(directory)
3033

3134
###############################################################################
3235
# Load the stimuli images
@@ -35,7 +38,6 @@
3538
# Here the data is not loaded in memory, we only take a peak at the data shape.
3639

3740
import h5py
38-
import os
3941

4042
first_file_name = os.path.join(directory, 'stimuli', 'train_00.hdf')
4143
print(f"Content of {first_file_name}:")

tutorials/movies_4T/00_download_vim2.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,11 @@
2727
# Download
2828
# --------
2929

30-
# Update the directory variable to link to the directory containing the data.
31-
directory = '/data1/tutorials/vim-2/'
30+
# path of the data directory
31+
import os
32+
from voxelwise_tutorials.io import get_data_home
33+
directory = os.path.join(get_data_home(), "vim-2")
34+
print(directory)
3235

3336
###############################################################################
3437
# We will only use the first subject in this tutorial, but you can run the same
@@ -50,11 +53,10 @@
5053
'vim-2/docs/crcns-vim-2-data-description.pdf',
5154
]
5255

53-
if __name__ == "__main__":
54-
55-
username = input("CRCNS username: ")
56-
password = getpass.getpass("CRCNS password: ")
56+
###############################################################################
57+
username = input("CRCNS username: ")
58+
password = getpass.getpass("CRCNS password: ")
5759

58-
for datafile in DATAFILES:
59-
local_filename = download_crcns(datafile, username, password,
60-
destination=directory, unpack=True)
60+
for datafile in DATAFILES:
61+
local_filename = download_crcns(datafile, username, password,
62+
destination=directory, unpack=True)

tutorials/movies_4T/01_extract_motion_energy.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,13 @@
2525
###############################################################################
2626
# Load the stimuli images
2727
# -----------------------
28-
#
29-
# We downloaded the files in the previous script, and here we update the path
30-
# variable to link to the directory containing the data.
28+
# (We downloaded the files in the previous script.)
3129

32-
directory = '/data1/tutorials/vim-2/'
30+
# path of the data directory
31+
import os
32+
from voxelwise_tutorials.io import get_data_home
33+
directory = os.path.join(get_data_home(), "vim-2")
34+
print(directory)
3335

3436
###############################################################################
3537
# Here the data is not loaded in memory, we only take a peak at the data shape.

tutorials/movies_4T/02_plot_ridge_model.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,22 @@
2626
###############################################################################
2727
# Load the data
2828
# -------------
29-
#
29+
3030
# path of the data directory
31-
directory = '/data1/tutorials/vim-2/'
31+
import os
32+
from voxelwise_tutorials.io import get_data_home
33+
directory = os.path.join(get_data_home(), "vim-2")
34+
print(directory)
35+
36+
# modify to use another subject
37+
subject = "subject1"
3238

3339
###############################################################################
3440
# Here the data is not loaded in memory, we only take a peak at the data shape.
3541
import h5py
3642
import os.path as op
3743

38-
with h5py.File(op.join(directory, 'VoxelResponses_subject1.mat'), 'r') as f:
44+
with h5py.File(op.join(directory, f'VoxelResponses_{subject}.mat'), 'r') as f:
3945
print(f.keys()) # Show all variables
4046
for key in f.keys():
4147
print(f[key])
@@ -47,7 +53,7 @@
4753

4854
from voxelwise_tutorials.io import load_hdf5_array
4955

50-
file_name = op.join(directory, 'VoxelResponses_subject1.mat')
56+
file_name = op.join(directory, f'VoxelResponses_{subject}.mat')
5157
Y_train = load_hdf5_array(file_name, key='rt')
5258
Y_test_repeats = load_hdf5_array(file_name, key='rva')
5359

voxelwise_tutorials/io.py

Lines changed: 48 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import os
2-
import os.path as op
32
import requests
43
import shutil
54

@@ -51,16 +50,17 @@ def download_crcns(datafile, username, password, destination,
5150
raise RuntimeError(response.text)
5251

5352
# remove the dataset name
54-
filename = op.join(*login_data['fn'].split('/')[1:])
55-
local_filename = op.join(destination, filename)
53+
filename = os.path.join(*login_data['fn'].split('/')[1:])
54+
local_filename = os.path.join(destination, filename)
5655

5756
# create subdirectory if necessary
58-
local_directory = op.dirname(local_filename)
59-
if not op.exists(local_directory) or not op.isdir(local_directory):
57+
local_directory = os.path.dirname(local_filename)
58+
if not os.path.exists(local_directory) or not os.path.isdir(
59+
local_directory):
6060
os.makedirs(local_directory)
6161

6262
# download the file if it does not already exist
63-
if op.exists(local_filename):
63+
if os.path.exists(local_filename):
6464
print("%s already exists." % local_filename)
6565
else:
6666
bar = ProgressBar(title=filename, max_value=content_length)
@@ -71,7 +71,7 @@ def download_crcns(datafile, username, password, destination,
7171
f.write(chunk)
7272

7373
# uncompress archives
74-
if unpack and op.splitext(local_filename)[1] in [".zip", ".gz"]:
74+
if unpack and os.path.splitext(local_filename)[1] in [".zip", ".gz"]:
7575
unpack_archive(local_filename)
7676

7777
return local_filename
@@ -86,7 +86,7 @@ def unpack_archive(archive_name):
8686
Local name of the archive.
8787
"""
8888
print('\tUnpacking')
89-
extract_dir = op.dirname(archive_name)
89+
extract_dir = os.path.dirname(archive_name)
9090
shutil.unpack_archive(archive_name, extract_dir=extract_dir)
9191

9292

@@ -169,3 +169,43 @@ def save_hdf5_dataset(file_name, dataset, mode='w'):
169169
hf.create_dataset(name, data=array, compression='gzip')
170170

171171
print("Saved %s" % file_name)
172+
173+
174+
def get_data_home(data_home=None) -> str:
175+
"""Return the path of the voxelwise tutorials data dir.
176+
177+
This folder is used by some large dataset loaders to avoid downloading the
178+
data several times. By default the data dir is set to a folder named
179+
'voxelwise_tutorials' in the user home folder. Alternatively, it can be set
180+
by the 'VOXELWISE_TUTORIALS_DATA' environment variable or programmatically
181+
by giving an explicit folder path. The '~' symbol is expanded to the user
182+
home folder. If the folder does not already exist, it is automatically
183+
created.
184+
185+
Parameters
186+
----------
187+
data_home : str | None
188+
The path to voxelwise tutorials data dir.
189+
"""
190+
if data_home is None:
191+
data_home = os.environ.get(
192+
'VOXELWISE_TUTORIALS_DATA',
193+
os.path.join('~', 'voxelwise_tutorials_data'))
194+
195+
data_home = os.path.expanduser(data_home)
196+
if not os.path.exists(data_home):
197+
os.makedirs(data_home)
198+
199+
return data_home
200+
201+
202+
def clear_data_home(data_home=None):
203+
"""Delete all the content of the data home cache.
204+
205+
Parameters
206+
----------
207+
data_home : str | None
208+
The path to voxelwise tutorials data dir.
209+
"""
210+
data_home = get_data_home(data_home)
211+
shutil.rmtree(data_home)

0 commit comments

Comments
 (0)