Skip to content

Commit 8986421

Browse files
committed
EXA split the wordnet example into two scripts
1 parent 3f728dd commit 8986421

17 files changed

Lines changed: 641 additions & 251 deletions

tutorials/movies_3T/00_download_vim4.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
Gallant Lab Natural Movie 3T fMRI Data. CRCNS.org.
2424
http://dx.doi.org/10.6080/TBD
2525
"""
26-
# sphinx_gallery_thumbnail_path = "_static/crcns.png"
26+
# sphinx_gallery_thumbnail_path = "static/crcns.png"
2727

2828
###############################################################################
2929
# Download

tutorials/movies_3T/02_plot_wordnet_model.py

Lines changed: 5 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -136,10 +136,11 @@
136136
# Then we concatenate the features with multiple delays to account for the
137137
# hemodynamic response. Indeed, the BOLD signal recorded in fMRI experiments is
138138
# delayed in time with respect to the stimulus. With different delayed versions
139-
# of the features, the linear regression model will weight each
140-
# delayed feature with a different weight, to maximize the predictions.
141-
# With a sample every 2 seconds, we typically use 4 delays [1, 2, 3, 4] to
142-
# cover the most part of the hemodynamic response peak.
139+
# of the features, the linear regression model will weight each delayed feature
140+
# with a different weight, to maximize the predictions. With a sample every 2
141+
# seconds, we typically use 4 delays [1, 2, 3, 4] to cover the most part of the
142+
# hemodynamic response peak. In the next example, we further describe this
143+
# hemodynamic response estimation.
143144
from voxelwise_tutorials.delayer import Delayer
144145
delayer = Delayer(delays=[1, 2, 3, 4])
145146

@@ -299,120 +300,6 @@
299300
plot_alphas_diagnostic(best_alphas=best_alphas, alphas=alphas)
300301
plt.show()
301302

302-
###############################################################################
303-
# Compare with a model without delays
304-
# -----------------------------------
305-
#
306-
# To present an example of model comparison, we define here another model,
307-
# without feature delays (i.e. no ``Delayer``). Because the BOLD signal is
308-
# inherently slow due to the dynamics of neuro-vascular coupling, this model is
309-
# unlikely to perform well.
310-
311-
pipeline_nodelay = make_pipeline(
312-
StandardScaler(with_mean=True, with_std=False),
313-
KernelRidgeCV(
314-
alphas=alphas, cv=cv,
315-
solver_params=dict(n_targets_batch=500, n_alphas_batch=5,
316-
n_targets_batch_refit=100)),
317-
)
318-
pipeline_nodelay
319-
320-
###############################################################################
321-
# We fit and score the model as the previous one.
322-
pipeline_nodelay.fit(X_train, Y_train)
323-
scores_nodelay = pipeline_nodelay.score(X_test, Y_test)
324-
scores_nodelay = backend.to_numpy(scores_nodelay)
325-
print("(n_voxels,) =", scores_nodelay.shape)
326-
###############################################################################
327-
# Then, we plot the comparison of model performances with a 2D histogram.
328-
# All ~70k voxels are represented in this histogram, where the diagonal
329-
# corresponds to identical performance for both models. A distibution deviating
330-
# from the diagonal means that one model has better predictive performances
331-
# than the other.
332-
333-
from voxelwise_tutorials.viz import plot_hist2d
334-
335-
ax = plot_hist2d(scores_nodelay, scores)
336-
ax.set(
337-
title='Generalization R2 scores',
338-
xlabel='model without delays',
339-
ylabel='model with delays',
340-
)
341-
plt.show()
342-
343-
###############################################################################
344-
# We see that the model with delays performs much better than the model without
345-
# delays. This can be seen in voxels with scores above 0. The distribution
346-
# of scores below zero is not very informative, since it corresponds to voxels
347-
# with poor predictive performances anyway, and it only shows which model is
348-
# overfitting the most.
349-
350-
###############################################################################
351-
# Visualize the HRF
352-
# -----------------
353-
#
354-
# We just saw that delays are necessary to model BOLD responses. Here we show
355-
# how the fitted ridge regression weights follow the hemodynamic response
356-
# function (HRF).
357-
#
358-
# Fitting a kernel ridge regression results in a set of coefficients called the
359-
# "dual" coefficients :math:`w`. These coefficients differ from the "primal"
360-
# coefficients :math:`\beta` obtained with a ridge regression, but the primal
361-
# coefficients can be computed from the dual coefficients using the training
362-
# features :math:`X`:
363-
#
364-
# .. math::
365-
#
366-
# \beta = X^\top w
367-
#
368-
# To better visualize the HRF, we will refit a model with more delays, but only
369-
# on a selection of voxels to speed up the computations.
370-
371-
# pick the 10 best voxels
372-
voxel_selection = np.argsort(scores)[-10:]
373-
374-
# define a pipeline with more delays
375-
pipeline_many_delays = make_pipeline(
376-
StandardScaler(with_mean=True, with_std=False),
377-
Delayer(delays=[0, 1, 2, 3, 4, 5, 6]),
378-
KernelRidgeCV(
379-
alphas=alphas, cv=cv,
380-
solver_params=dict(n_targets_batch=500, n_alphas_batch=5,
381-
n_targets_batch_refit=100)),
382-
)
383-
384-
pipeline_many_delays.fit(X_train, Y_train[:, voxel_selection])
385-
386-
# get the (primal) ridge regression coefficients
387-
primal_coef = pipeline_many_delays[-1].get_primal_coef()
388-
primal_coef = backend.to_numpy(primal_coef)
389-
390-
# get the delays
391-
delays = pipeline_many_delays.named_steps['delayer'].delays
392-
# split the ridge coefficients per delays
393-
primal_coef_per_delay = np.stack(np.split(primal_coef, len(delays), axis=0))
394-
395-
# select the feature with the largest coefficients for each voxel
396-
feature_selection = np.argmax(np.sum(np.abs(primal_coef_per_delay), axis=0),
397-
axis=0)
398-
primal_coef_selection = primal_coef_per_delay[:, feature_selection,
399-
np.arange(len(voxel_selection))]
400-
401-
plt.plot(delays, primal_coef_selection)
402-
plt.xlabel('Delays')
403-
plt.xticks(delays)
404-
plt.ylabel('Ridge coefficients')
405-
plt.title(f'Largest feature for the {len(voxel_selection)} best voxels')
406-
plt.axhline(0, color='k', linewidth=0.5)
407-
plt.show()
408-
409-
###############################################################################
410-
# In this dataset, the brain responses are recorded every two seconds.
411-
#
412-
# We see that the hemodynamic response function (HRF) is captured in the model
413-
# weights. In this dataset, we can limit the number of features by using only
414-
# the most informative delays, for example [1, 2, 3, 4].
415-
416303
###############################################################################
417304
# Visualize the regression coefficients
418305
# -------------------------------------
Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
"""
2+
==================================
3+
Visualize the hemodynamic response
4+
==================================
5+
6+
In this example, we describe how the hemodynamic response is estimated in the
7+
previous model. We fit the same ridge model as in the previous example, and
8+
further describe the need to delay the features in time.
9+
10+
As explained in previous example, the BOLD signal recorded in fMRI experiments
11+
is delayed in time with respect to the stimulus. With different delayed
12+
versions of the features, the linear regression model weight each delayed
13+
feature with a different weight, to maximize the predictions. With a sample
14+
every 2 seconds, we typically use 4 delays [1, 2, 3, 4] to cover the most part
15+
of the hemodynamic response peak.
16+
17+
In this example, we show the descrease in prediction performances when using no
18+
delays. We also show how to visualize the estimated hemodynamic response
19+
function (HRF) using more delays.
20+
"""
21+
# sphinx_gallery_thumbnail_number = 2
22+
###############################################################################
23+
# Path of the data directory
24+
import os
25+
from voxelwise_tutorials.io import get_data_home
26+
directory = os.path.join(get_data_home(), "vim-4")
27+
print(directory)
28+
29+
###############################################################################
30+
31+
# modify to use another subject
32+
subject = "S01"
33+
34+
###############################################################################
35+
# Load the data
36+
# -------------
37+
#
38+
# We first load the fMRI responses.
39+
import numpy as np
40+
from voxelwise_tutorials.io import load_hdf5_array
41+
42+
file_name = os.path.join(directory, "responses", f"{subject}_responses.hdf")
43+
Y_train = load_hdf5_array(file_name, key="Y_train")
44+
Y_test = load_hdf5_array(file_name, key="Y_test")
45+
46+
print("(n_samples_train, n_voxels) =", Y_train.shape)
47+
print("(n_repeats, n_samples_test, n_voxels) =", Y_test.shape)
48+
49+
###############################################################################
50+
# We average the test repeats, to remove the non-repeatable part of fMRI
51+
# responses.
52+
Y_test = Y_test.mean(0)
53+
54+
print("(n_samples_test, n_voxels) =", Y_test.shape)
55+
56+
###############################################################################
57+
# We fill potential NaN (not-a-number) values with zeros.
58+
Y_train = np.nan_to_num(Y_train)
59+
Y_test = np.nan_to_num(Y_test)
60+
61+
###############################################################################
62+
# Then, we load the semantic "wordnet" features.
63+
feature_space = "wordnet"
64+
65+
file_name = os.path.join(directory, "features", f"{feature_space}.hdf")
66+
X_train = load_hdf5_array(file_name, key="X_train")
67+
X_test = load_hdf5_array(file_name, key="X_test")
68+
69+
print("(n_samples_train, n_features) =", X_train.shape)
70+
print("(n_samples_test, n_features) =", X_test.shape)
71+
72+
###############################################################################
73+
# Define the cross-validation scheme
74+
# ----------------------------------
75+
#
76+
# We define the same leave-one-run-out cross-validation split as in the
77+
# previous example.
78+
79+
from sklearn.model_selection import check_cv
80+
from voxelwise_tutorials.utils import generate_leave_one_run_out
81+
82+
# indice of first sample of each run
83+
run_onsets = load_hdf5_array(file_name, key="run_onsets")
84+
print(run_onsets)
85+
86+
###############################################################################
87+
# We define a cross-validation splitter, compatible with ``scikit-learn`` API.
88+
n_samples_train = X_train.shape[0]
89+
cv = generate_leave_one_run_out(n_samples_train, run_onsets)
90+
cv = check_cv(cv) # copy the cross-validation splitter into a reusable list
91+
92+
###############################################################################
93+
# Define the model
94+
# ----------------
95+
#
96+
# We define the same model as in the previous example. See the previous
97+
# example for more details about the model definition.
98+
99+
from sklearn.pipeline import make_pipeline
100+
from sklearn.preprocessing import StandardScaler
101+
from voxelwise_tutorials.delayer import Delayer
102+
from himalaya.kernel_ridge import KernelRidgeCV
103+
from himalaya.backend import set_backend
104+
backend = set_backend("torch_cuda", on_error="warn")
105+
106+
X_train = X_train.astype("float32")
107+
X_test = X_test.astype("float32")
108+
109+
alphas = np.logspace(1, 20, 20)
110+
111+
pipeline = make_pipeline(
112+
StandardScaler(with_mean=True, with_std=False),
113+
Delayer(delays=[1, 2, 3, 4]),
114+
KernelRidgeCV(
115+
alphas=alphas, cv=cv,
116+
solver_params=dict(n_targets_batch=500, n_alphas_batch=5,
117+
n_targets_batch_refit=100)),
118+
)
119+
120+
###############################################################################
121+
from sklearn import set_config
122+
set_config(display='diagram')
123+
pipeline
124+
125+
###############################################################################
126+
# Fit the model
127+
# -------------
128+
#
129+
# We fit on the train set, and score on the test set.
130+
131+
pipeline.fit(X_train, Y_train)
132+
133+
scores = pipeline.score(X_test, Y_test)
134+
scores = backend.to_numpy(scores)
135+
print("(n_voxels,) =", scores.shape)
136+
137+
###############################################################################
138+
# Compare with a model without delays
139+
# -----------------------------------
140+
#
141+
# We define here another model without feature delays (i.e. no ``Delayer``).
142+
# Because the BOLD signal is inherently slow due to the dynamics of
143+
# neuro-vascular coupling, this model is unlikely to perform well.
144+
145+
pipeline_nodelay = make_pipeline(
146+
StandardScaler(with_mean=True, with_std=False),
147+
KernelRidgeCV(
148+
alphas=alphas, cv=cv,
149+
solver_params=dict(n_targets_batch=500, n_alphas_batch=5,
150+
n_targets_batch_refit=100)),
151+
)
152+
pipeline_nodelay
153+
154+
###############################################################################
155+
# We fit and score the model as the previous one.
156+
pipeline_nodelay.fit(X_train, Y_train)
157+
scores_nodelay = pipeline_nodelay.score(X_test, Y_test)
158+
scores_nodelay = backend.to_numpy(scores_nodelay)
159+
print("(n_voxels,) =", scores_nodelay.shape)
160+
161+
###############################################################################
162+
# Then, we plot the comparison of model performances with a 2D histogram.
163+
# All ~70k voxels are represented in this histogram, where the diagonal
164+
# corresponds to identical performance for both models. A distibution deviating
165+
# from the diagonal means that one model has better predictive performances
166+
# than the other.
167+
import matplotlib.pyplot as plt
168+
from voxelwise_tutorials.viz import plot_hist2d
169+
170+
ax = plot_hist2d(scores_nodelay, scores)
171+
ax.set(
172+
title='Generalization R2 scores',
173+
xlabel='model without delays',
174+
ylabel='model with delays',
175+
)
176+
plt.show()
177+
178+
###############################################################################
179+
# We see that the model with delays performs much better than the model without
180+
# delays. This can be seen in voxels with scores above 0. The distribution
181+
# of scores below zero is not very informative, since it corresponds to voxels
182+
# with poor predictive performances anyway, and it only shows which model is
183+
# overfitting the most.
184+
185+
###############################################################################
186+
# Visualize the HRF
187+
# -----------------
188+
#
189+
# We just saw that delays are necessary to model BOLD responses. Here we show
190+
# how the fitted ridge regression weights follow the hemodynamic response
191+
# function (HRF).
192+
#
193+
# Fitting a kernel ridge regression results in a set of coefficients called the
194+
# "dual" coefficients :math:`w`. These coefficients differ from the "primal"
195+
# coefficients :math:`\beta` obtained with a ridge regression, but the primal
196+
# coefficients can be computed from the dual coefficients using the training
197+
# features :math:`X`:
198+
#
199+
# .. math::
200+
#
201+
# \beta = X^\top w
202+
#
203+
# To better visualize the HRF, we will refit a model with more delays, but only
204+
# on a selection of voxels to speed up the computations.
205+
206+
# pick the 10 best voxels
207+
voxel_selection = np.argsort(scores)[-10:]
208+
209+
# define a pipeline with more delays
210+
pipeline_many_delays = make_pipeline(
211+
StandardScaler(with_mean=True, with_std=False),
212+
Delayer(delays=[0, 1, 2, 3, 4, 5, 6]),
213+
KernelRidgeCV(
214+
alphas=alphas, cv=cv,
215+
solver_params=dict(n_targets_batch=500, n_alphas_batch=5,
216+
n_targets_batch_refit=100)),
217+
)
218+
219+
pipeline_many_delays.fit(X_train, Y_train[:, voxel_selection])
220+
221+
# get the (primal) ridge regression coefficients
222+
primal_coef = pipeline_many_delays[-1].get_primal_coef()
223+
primal_coef = backend.to_numpy(primal_coef)
224+
225+
# get the delays
226+
delays = pipeline_many_delays.named_steps['delayer'].delays
227+
# split the ridge coefficients per delays
228+
primal_coef_per_delay = np.stack(np.split(primal_coef, len(delays), axis=0))
229+
230+
# select the feature with the largest coefficients for each voxel
231+
feature_selection = np.argmax(np.sum(np.abs(primal_coef_per_delay), axis=0),
232+
axis=0)
233+
primal_coef_selection = primal_coef_per_delay[:, feature_selection,
234+
np.arange(len(voxel_selection))]
235+
236+
plt.plot(delays, primal_coef_selection)
237+
plt.xlabel('Delays')
238+
plt.xticks(delays)
239+
plt.ylabel('Ridge coefficients')
240+
plt.title(f'Largest feature for the {len(voxel_selection)} best voxels')
241+
plt.axhline(0, color='k', linewidth=0.5)
242+
plt.show()
243+
244+
###############################################################################
245+
# We see that the hemodynamic response function (HRF) is captured in the model
246+
# weights. Note that in this dataset, the brain responses are recorded every
247+
# two seconds.
File renamed without changes.
File renamed without changes.

0 commit comments

Comments
 (0)