|
| 1 | +""" |
| 2 | +================================== |
| 3 | +Visualize the hemodynamic response |
| 4 | +================================== |
| 5 | +
|
| 6 | +In this example, we describe how the hemodynamic response is estimated in the |
| 7 | +previous model. We fit the same ridge model as in the previous example, and |
| 8 | +further describe the need to delay the features in time. |
| 9 | +
|
| 10 | +As explained in previous example, the BOLD signal recorded in fMRI experiments |
| 11 | +is delayed in time with respect to the stimulus. With different delayed |
| 12 | +versions of the features, the linear regression model weight each delayed |
| 13 | +feature with a different weight, to maximize the predictions. With a sample |
| 14 | +every 2 seconds, we typically use 4 delays [1, 2, 3, 4] to cover the most part |
| 15 | +of the hemodynamic response peak. |
| 16 | +
|
| 17 | +In this example, we show the descrease in prediction performances when using no |
| 18 | +delays. We also show how to visualize the estimated hemodynamic response |
| 19 | +function (HRF) using more delays. |
| 20 | +""" |
| 21 | +# sphinx_gallery_thumbnail_number = 2 |
| 22 | +############################################################################### |
| 23 | +# Path of the data directory |
| 24 | +import os |
| 25 | +from voxelwise_tutorials.io import get_data_home |
| 26 | +directory = os.path.join(get_data_home(), "vim-4") |
| 27 | +print(directory) |
| 28 | + |
| 29 | +############################################################################### |
| 30 | + |
| 31 | +# modify to use another subject |
| 32 | +subject = "S01" |
| 33 | + |
| 34 | +############################################################################### |
| 35 | +# Load the data |
| 36 | +# ------------- |
| 37 | +# |
| 38 | +# We first load the fMRI responses. |
| 39 | +import numpy as np |
| 40 | +from voxelwise_tutorials.io import load_hdf5_array |
| 41 | + |
| 42 | +file_name = os.path.join(directory, "responses", f"{subject}_responses.hdf") |
| 43 | +Y_train = load_hdf5_array(file_name, key="Y_train") |
| 44 | +Y_test = load_hdf5_array(file_name, key="Y_test") |
| 45 | + |
| 46 | +print("(n_samples_train, n_voxels) =", Y_train.shape) |
| 47 | +print("(n_repeats, n_samples_test, n_voxels) =", Y_test.shape) |
| 48 | + |
| 49 | +############################################################################### |
| 50 | +# We average the test repeats, to remove the non-repeatable part of fMRI |
| 51 | +# responses. |
| 52 | +Y_test = Y_test.mean(0) |
| 53 | + |
| 54 | +print("(n_samples_test, n_voxels) =", Y_test.shape) |
| 55 | + |
| 56 | +############################################################################### |
| 57 | +# We fill potential NaN (not-a-number) values with zeros. |
| 58 | +Y_train = np.nan_to_num(Y_train) |
| 59 | +Y_test = np.nan_to_num(Y_test) |
| 60 | + |
| 61 | +############################################################################### |
| 62 | +# Then, we load the semantic "wordnet" features. |
| 63 | +feature_space = "wordnet" |
| 64 | + |
| 65 | +file_name = os.path.join(directory, "features", f"{feature_space}.hdf") |
| 66 | +X_train = load_hdf5_array(file_name, key="X_train") |
| 67 | +X_test = load_hdf5_array(file_name, key="X_test") |
| 68 | + |
| 69 | +print("(n_samples_train, n_features) =", X_train.shape) |
| 70 | +print("(n_samples_test, n_features) =", X_test.shape) |
| 71 | + |
| 72 | +############################################################################### |
| 73 | +# Define the cross-validation scheme |
| 74 | +# ---------------------------------- |
| 75 | +# |
| 76 | +# We define the same leave-one-run-out cross-validation split as in the |
| 77 | +# previous example. |
| 78 | + |
| 79 | +from sklearn.model_selection import check_cv |
| 80 | +from voxelwise_tutorials.utils import generate_leave_one_run_out |
| 81 | + |
| 82 | +# indice of first sample of each run |
| 83 | +run_onsets = load_hdf5_array(file_name, key="run_onsets") |
| 84 | +print(run_onsets) |
| 85 | + |
| 86 | +############################################################################### |
| 87 | +# We define a cross-validation splitter, compatible with ``scikit-learn`` API. |
| 88 | +n_samples_train = X_train.shape[0] |
| 89 | +cv = generate_leave_one_run_out(n_samples_train, run_onsets) |
| 90 | +cv = check_cv(cv) # copy the cross-validation splitter into a reusable list |
| 91 | + |
| 92 | +############################################################################### |
| 93 | +# Define the model |
| 94 | +# ---------------- |
| 95 | +# |
| 96 | +# We define the same model as in the previous example. See the previous |
| 97 | +# example for more details about the model definition. |
| 98 | + |
| 99 | +from sklearn.pipeline import make_pipeline |
| 100 | +from sklearn.preprocessing import StandardScaler |
| 101 | +from voxelwise_tutorials.delayer import Delayer |
| 102 | +from himalaya.kernel_ridge import KernelRidgeCV |
| 103 | +from himalaya.backend import set_backend |
| 104 | +backend = set_backend("torch_cuda", on_error="warn") |
| 105 | + |
| 106 | +X_train = X_train.astype("float32") |
| 107 | +X_test = X_test.astype("float32") |
| 108 | + |
| 109 | +alphas = np.logspace(1, 20, 20) |
| 110 | + |
| 111 | +pipeline = make_pipeline( |
| 112 | + StandardScaler(with_mean=True, with_std=False), |
| 113 | + Delayer(delays=[1, 2, 3, 4]), |
| 114 | + KernelRidgeCV( |
| 115 | + alphas=alphas, cv=cv, |
| 116 | + solver_params=dict(n_targets_batch=500, n_alphas_batch=5, |
| 117 | + n_targets_batch_refit=100)), |
| 118 | +) |
| 119 | + |
| 120 | +############################################################################### |
| 121 | +from sklearn import set_config |
| 122 | +set_config(display='diagram') |
| 123 | +pipeline |
| 124 | + |
| 125 | +############################################################################### |
| 126 | +# Fit the model |
| 127 | +# ------------- |
| 128 | +# |
| 129 | +# We fit on the train set, and score on the test set. |
| 130 | + |
| 131 | +pipeline.fit(X_train, Y_train) |
| 132 | + |
| 133 | +scores = pipeline.score(X_test, Y_test) |
| 134 | +scores = backend.to_numpy(scores) |
| 135 | +print("(n_voxels,) =", scores.shape) |
| 136 | + |
| 137 | +############################################################################### |
| 138 | +# Compare with a model without delays |
| 139 | +# ----------------------------------- |
| 140 | +# |
| 141 | +# We define here another model without feature delays (i.e. no ``Delayer``). |
| 142 | +# Because the BOLD signal is inherently slow due to the dynamics of |
| 143 | +# neuro-vascular coupling, this model is unlikely to perform well. |
| 144 | + |
| 145 | +pipeline_nodelay = make_pipeline( |
| 146 | + StandardScaler(with_mean=True, with_std=False), |
| 147 | + KernelRidgeCV( |
| 148 | + alphas=alphas, cv=cv, |
| 149 | + solver_params=dict(n_targets_batch=500, n_alphas_batch=5, |
| 150 | + n_targets_batch_refit=100)), |
| 151 | +) |
| 152 | +pipeline_nodelay |
| 153 | + |
| 154 | +############################################################################### |
| 155 | +# We fit and score the model as the previous one. |
| 156 | +pipeline_nodelay.fit(X_train, Y_train) |
| 157 | +scores_nodelay = pipeline_nodelay.score(X_test, Y_test) |
| 158 | +scores_nodelay = backend.to_numpy(scores_nodelay) |
| 159 | +print("(n_voxels,) =", scores_nodelay.shape) |
| 160 | + |
| 161 | +############################################################################### |
| 162 | +# Then, we plot the comparison of model performances with a 2D histogram. |
| 163 | +# All ~70k voxels are represented in this histogram, where the diagonal |
| 164 | +# corresponds to identical performance for both models. A distibution deviating |
| 165 | +# from the diagonal means that one model has better predictive performances |
| 166 | +# than the other. |
| 167 | +import matplotlib.pyplot as plt |
| 168 | +from voxelwise_tutorials.viz import plot_hist2d |
| 169 | + |
| 170 | +ax = plot_hist2d(scores_nodelay, scores) |
| 171 | +ax.set( |
| 172 | + title='Generalization R2 scores', |
| 173 | + xlabel='model without delays', |
| 174 | + ylabel='model with delays', |
| 175 | +) |
| 176 | +plt.show() |
| 177 | + |
| 178 | +############################################################################### |
| 179 | +# We see that the model with delays performs much better than the model without |
| 180 | +# delays. This can be seen in voxels with scores above 0. The distribution |
| 181 | +# of scores below zero is not very informative, since it corresponds to voxels |
| 182 | +# with poor predictive performances anyway, and it only shows which model is |
| 183 | +# overfitting the most. |
| 184 | + |
| 185 | +############################################################################### |
| 186 | +# Visualize the HRF |
| 187 | +# ----------------- |
| 188 | +# |
| 189 | +# We just saw that delays are necessary to model BOLD responses. Here we show |
| 190 | +# how the fitted ridge regression weights follow the hemodynamic response |
| 191 | +# function (HRF). |
| 192 | +# |
| 193 | +# Fitting a kernel ridge regression results in a set of coefficients called the |
| 194 | +# "dual" coefficients :math:`w`. These coefficients differ from the "primal" |
| 195 | +# coefficients :math:`\beta` obtained with a ridge regression, but the primal |
| 196 | +# coefficients can be computed from the dual coefficients using the training |
| 197 | +# features :math:`X`: |
| 198 | +# |
| 199 | +# .. math:: |
| 200 | +# |
| 201 | +# \beta = X^\top w |
| 202 | +# |
| 203 | +# To better visualize the HRF, we will refit a model with more delays, but only |
| 204 | +# on a selection of voxels to speed up the computations. |
| 205 | + |
| 206 | +# pick the 10 best voxels |
| 207 | +voxel_selection = np.argsort(scores)[-10:] |
| 208 | + |
| 209 | +# define a pipeline with more delays |
| 210 | +pipeline_many_delays = make_pipeline( |
| 211 | + StandardScaler(with_mean=True, with_std=False), |
| 212 | + Delayer(delays=[0, 1, 2, 3, 4, 5, 6]), |
| 213 | + KernelRidgeCV( |
| 214 | + alphas=alphas, cv=cv, |
| 215 | + solver_params=dict(n_targets_batch=500, n_alphas_batch=5, |
| 216 | + n_targets_batch_refit=100)), |
| 217 | +) |
| 218 | + |
| 219 | +pipeline_many_delays.fit(X_train, Y_train[:, voxel_selection]) |
| 220 | + |
| 221 | +# get the (primal) ridge regression coefficients |
| 222 | +primal_coef = pipeline_many_delays[-1].get_primal_coef() |
| 223 | +primal_coef = backend.to_numpy(primal_coef) |
| 224 | + |
| 225 | +# get the delays |
| 226 | +delays = pipeline_many_delays.named_steps['delayer'].delays |
| 227 | +# split the ridge coefficients per delays |
| 228 | +primal_coef_per_delay = np.stack(np.split(primal_coef, len(delays), axis=0)) |
| 229 | + |
| 230 | +# select the feature with the largest coefficients for each voxel |
| 231 | +feature_selection = np.argmax(np.sum(np.abs(primal_coef_per_delay), axis=0), |
| 232 | + axis=0) |
| 233 | +primal_coef_selection = primal_coef_per_delay[:, feature_selection, |
| 234 | + np.arange(len(voxel_selection))] |
| 235 | + |
| 236 | +plt.plot(delays, primal_coef_selection) |
| 237 | +plt.xlabel('Delays') |
| 238 | +plt.xticks(delays) |
| 239 | +plt.ylabel('Ridge coefficients') |
| 240 | +plt.title(f'Largest feature for the {len(voxel_selection)} best voxels') |
| 241 | +plt.axhline(0, color='k', linewidth=0.5) |
| 242 | +plt.show() |
| 243 | + |
| 244 | +############################################################################### |
| 245 | +# We see that the hemodynamic response function (HRF) is captured in the model |
| 246 | +# weights. Note that in this dataset, the brain responses are recorded every |
| 247 | +# two seconds. |
0 commit comments