Skip to content

Commit c4f5d64

Browse files
committed
EXA add visualization of the HRF
1 parent 0fd6f19 commit c4f5d64

6 files changed

Lines changed: 128 additions & 37 deletions

File tree

doc/index.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ Tutorials
99
---------
1010
.. toctree::
1111
:includehidden:
12-
:maxdepth: 3
12+
:maxdepth: 2
13+
1314

1415
auto_examples/index
1516

@@ -18,5 +19,5 @@ Voxelwise package
1819
.. toctree::
1920
:maxdepth: 2
2021

21-
quickstart
22+
voxelwise
2223

File renamed without changes.

tutorials/movies_3T/01_plot_explainable_variance.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
all repeats. This shared component can be estimated by taking the mean over
1818
repeats of the same experiment.
1919
"""
20-
20+
# sphinx_gallery_thumbnail_number = 2
2121
###############################################################################
2222

2323
# path of the data directory
@@ -41,19 +41,30 @@
4141

4242
###############################################################################
4343
# Then, we compute the explainable variance per voxel.
44-
# The variance of the signal is estimated by taking the average variance over
45-
# repeats (``mean_var``). The variance of the component shared across repeats
44+
# The variance of the signal is estimated by taking the variance over
45+
# time (``var``). The variance of the component shared across repeats
4646
# is estimated by taking the variance of the average response (``var_mean``).
4747
# Then, we can compute the explainable variance by dividing the two quantities.
4848

49-
mean_var = np.mean(np.var(Y_test, axis=1), axis=0)
49+
var = np.var(Y_test.reshape(-1, Y_test.shape[-1]), axis=0)
5050
var_mean = np.var(np.mean(Y_test, axis=0), axis=0)
51-
explainable_variance = var_mean / mean_var
51+
explainable_variance = var_mean / var
52+
53+
###############################################################################
54+
# Plot the distribution of explainable variance over voxels.
55+
import matplotlib.pyplot as plt
56+
57+
plt.hist(explainable_variance, bins=np.linspace(0, 1, 100), log=True,
58+
histtype='step')
59+
plt.xlabel("Explainable variance")
60+
plt.ylabel("Number of voxels")
61+
plt.title('Histogram of explainable variance')
62+
plt.grid('on')
63+
plt.show()
5264

5365
###############################################################################
5466
# Map to subject flatmap
5567
# ----------------------
56-
import matplotlib.pyplot as plt
5768
from voxelwise.viz import plot_flatmap_from_mapper
5869

5970
mapper_file = os.path.join(directory, 'mappers', f'{subject}_mappers.hdf')

tutorials/movies_3T/02_plot_wordnet_model.py

Lines changed: 106 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
set, comparing the model predictions with the corresponding ground-truth fMRI
2020
responses.
2121
22-
The banded ridge model is fitted with the package
22+
The ridge model is fitted with the package
2323
`himalaya <https://github.com/gallantlab/himalaya>`_.
2424
"""
2525
###############################################################################
@@ -95,15 +95,32 @@
9595
# ----------------
9696
#
9797
# Now, let's define the model pipeline.
98-
99-
from sklearn.pipeline import make_pipeline
98+
#
99+
# We first center the features, since we will not use an intercept.
100100
from sklearn.preprocessing import StandardScaler
101101

102-
# display the scikit-learn pipeline with an HTML diagram
103-
from sklearn import set_config
104-
set_config(display='diagram')
102+
scaler = StandardScaler(with_mean=True, with_std=False)
103+
104+
###############################################################################
105+
# Then we concatenate the features with multiple delays, to account for the
106+
# hemodynamic response. The linear regression model will then weight each
107+
# delayed feature with a different weight, to build a predictive model.
108+
#
109+
# With a sample every 2 seconds, we use 4 delays [1, 2, 3, 4] to cover the
110+
# most part of the hemodynamic response peak.
111+
from voxelwise.delayer import Delayer
112+
113+
delayer = Delayer(delays=[1, 2, 3, 4])
105114

106115
###############################################################################
116+
# The model we use is a ridge regression. When the number of features is
117+
# larger than the number of samples, it is more efficient to solve a ridge
118+
# regression using the (equivalent) dual formulation, kernel ridge regression
119+
# with a linear kernel.
120+
# Here, we have 3600 training samples, and 1705 * 4 = 6820 features (we
121+
# multiply by 4 since we use 4 time delays), therefore we use kernel ridge
122+
# regression.
123+
107124
# With one target, we could directly use the pipeline in scikit-learn's
108125
# GridSearchCV, to select the optimal hyperparameters over cross-validation.
109126
# However, GridSearchCV can only optimize one score. Thus, in the multiple
@@ -113,13 +130,15 @@
113130
from himalaya.kernel_ridge import KernelRidgeCV
114131

115132
###############################################################################
116-
# We first concatenate the features with multiple delays, to account for the
117-
# hemodynamic response. The linear regression model will then weight each
118-
# delayed feature with a different weight, to build a predictive model.
119-
#
120-
# With a sample every 2 seconds, we use 4 delays [1, 2, 3, 4] to cover the
121-
# most part of the hemodynamic response peak.
122-
from voxelwise.delayer import Delayer
133+
# The scale of the regularization hyperparameter alpha is unknown, so we use
134+
# a large logarithmic range, and we will check after the fit that best
135+
# hyperparameters are not all on one range edge.
136+
alphas = np.logspace(1, 20, 20)
137+
138+
kernel_ridge_cv = KernelRidgeCV(
139+
alphas=alphas, cv=cv,
140+
solver_params=dict(n_targets_batch=500, n_alphas_batch=5,
141+
n_targets_batch_refit=100))
123142

124143
###############################################################################
125144
# We set himalaya's backend to "torch_cuda" to fit the model using GPU.
@@ -133,24 +152,25 @@
133152
backend = set_backend("torch_cuda")
134153

135154
###############################################################################
136-
# The scale of the regularization hyperparameter alpha is unknown, so we use
137-
# a large logarithmic range, and we will check after the fit that best
138-
# hyperparameters are not all on one range edge.
139-
alphas = np.logspace(1, 20, 20)
140-
141-
###############################################################################
155+
# We use scikit-learn Pipeline to link the different steps together.
142156
# The scikit-learn Pipeline can be used as a regular estimator, calling
143-
# pipeline.fit, pipeline.predict, etc.
157+
# `pipeline.fit`, `pipeline.predict`, etc.
144158
# Using a pipeline can be useful to clarify the different steps, avoid
145159
# cross-validation mistakes, or automatically cache intermediate results.
160+
from sklearn.pipeline import make_pipeline
161+
162+
# define the pipeline
146163
pipeline = make_pipeline(
147-
StandardScaler(with_mean=True, with_std=False),
148-
Delayer(delays=[1, 2, 3, 4]),
149-
KernelRidgeCV(
150-
alphas=alphas, cv=cv,
151-
solver_params=dict(n_targets_batch=500, n_alphas_batch=5,
152-
n_targets_batch_refit=100)),
164+
scaler,
165+
delayer,
166+
kernel_ridge_cv,
153167
)
168+
169+
###############################################################################
170+
# We can display the scikit-learn pipeline with an HTML diagram.
171+
from sklearn import set_config
172+
set_config(display='diagram')
173+
154174
pipeline
155175

156176
###############################################################################
@@ -164,6 +184,7 @@
164184
pipeline.fit(X_train, Y_train)
165185

166186
scores = pipeline.score(X_test, Y_test)
187+
167188
###############################################################################
168189
# Since we performed the KernelRidgeCV on GPU, scores are returned as
169190
# torch.Tensor on GPU. Thus, we need to move them into numpy arrays on CPU, to
@@ -232,7 +253,7 @@
232253
# -----------------------------------
233254
#
234255
# To present an example of model comparison, we define here another model,
235-
# without feature delays (i.e. no Delayer). This model is unlikely to perform
256+
# without feature delays (i.e. no `Delayer`). This model is unlikely to perform
236257
# well, since fMRI responses are delayed in time with respect to the stimulus.
237258

238259
pipeline_nodelay = make_pipeline(
@@ -261,3 +282,61 @@
261282
ax.set(title='Generalization R2 scores', xlabel='model without delays',
262283
ylabel='model with delays')
263284
plt.show()
285+
286+
###############################################################################
287+
# Visualize the HRF
288+
# -----------------
289+
#
290+
# Here we will visualize the hemodynamic response function (HRF), as captured
291+
# in the ridge regression weights.
292+
#
293+
# Fitting a kernel ridge regression results in a set of coefficients called the
294+
# "dual" coefficients. These coefficients are different from the "primal"
295+
# coefficients obtained with a ridge regression, but the primal coefficients
296+
# can be computed from the dual coefficients using the training features.
297+
#
298+
# To better visualize the HRF, we will refit a model with more delays, but only
299+
# on a selection of voxels to speed up the computations.
300+
301+
# pick the 10 best voxels
302+
voxel_selection = np.argsort(scores)[-10:]
303+
304+
# define a pipeline with more delays
305+
pipeline_many_delays = make_pipeline(
306+
StandardScaler(with_mean=True, with_std=False),
307+
Delayer(delays=np.arange(7)),
308+
KernelRidgeCV(
309+
alphas=alphas, cv=cv,
310+
solver_params=dict(n_targets_batch=500, n_alphas_batch=5,
311+
n_targets_batch_refit=100)),
312+
)
313+
314+
pipeline_many_delays.fit(X_train, Y_train[:, voxel_selection])
315+
316+
# get the (primal) ridge regression coefficients
317+
primal_coef = pipeline_many_delays[-1].get_primal_coef()
318+
primal_coef = backend.to_numpy(primal_coef)
319+
320+
# get the delays
321+
delays = pipeline_many_delays.named_steps['delayer'].delays
322+
# split the ridge coefficients per delays
323+
primal_coef_per_delay = np.stack(np.split(primal_coef, len(delays), axis=0))
324+
325+
# select the feature with the largest coefficients for each voxel
326+
feature_selection = np.argmax(np.sum(np.abs(primal_coef_per_delay), axis=0),
327+
axis=0)
328+
primal_coef_selection = primal_coef_per_delay[:, feature_selection,
329+
np.arange(len(voxel_selection))]
330+
331+
plt.plot(delays, primal_coef_selection)
332+
plt.xlabel('Delays')
333+
plt.xticks(delays)
334+
plt.ylabel('Ridge coefficients')
335+
plt.title(f'Largest feature for the {len(voxel_selection)} best voxels')
336+
plt.axhline(0, color='k', linewidth=0.5)
337+
plt.show()
338+
339+
###############################################################################
340+
# We see that the hemodynamic response function (HRF) is captured in the model
341+
# weights. In practice, we can limit the number of features by using only
342+
# the most informative delays, for example [1, 2, 3, 4].

tutorials/movies_3T/03_plot_motion_energy_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
set, comparing the model predictions with the corresponding ground-truth fMRI
2525
responses.
2626
27-
The banded ridge model is fitted with the package
27+
The ridge model is fitted with the package
2828
`himalaya <https://github.com/gallantlab/himalaya>`_.
2929
"""
3030
###############################################################################

tutorials/movies_4T/02_plot_ridge_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
set, comparing the model predictions with the corresponding ground-truth fMRI
2020
responses.
2121
22-
The banded ridge model is fitted with the package
22+
The ridge model is fitted with the package
2323
`himalaya <https://github.com/gallantlab/himalaya>`_.
2424
"""
2525
# sphinx_gallery_thumbnail_number = 2

0 commit comments

Comments
 (0)