Skip to content

Commit 95924f8

Browse files
committed
ENH add Delayer.reshape_by_delays and more tests
1 parent 4e70bd4 commit 95924f8

6 files changed

Lines changed: 97 additions & 22 deletions

File tree

tutorials/movies_3T/02_plot_wordnet_model.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -328,12 +328,9 @@
328328
###############################################################################
329329
# Then, we aggregate the coefficients across the different delays.
330330

331-
# get the delays
332-
delays = pipeline.named_steps['delayer'].delays
333-
print("delays =", delays)
334-
335331
# split the ridge coefficients per delays
336-
primal_coef_per_delay = np.stack(np.split(primal_coef, len(delays), axis=0))
332+
delayer = pipeline.named_steps['delayer']
333+
primal_coef_per_delay = delayer.reshape_by_delays(primal_coef, axis=0)
337334
print("(n_delays, n_features, n_voxels) =", primal_coef_per_delay.shape)
338335

339336
# average over delays

tutorials/movies_3T/03_plot_hemodynamic_response.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -142,19 +142,19 @@
142142
# Because the BOLD signal is inherently slow due to the dynamics of
143143
# neuro-vascular coupling, this model is unlikely to perform well.
144144

145-
pipeline_nodelay = make_pipeline(
145+
pipeline_no_delay = make_pipeline(
146146
StandardScaler(with_mean=True, with_std=False),
147147
KernelRidgeCV(
148148
alphas=alphas, cv=cv,
149149
solver_params=dict(n_targets_batch=500, n_alphas_batch=5,
150150
n_targets_batch_refit=100)),
151151
)
152-
pipeline_nodelay
152+
pipeline_no_delay
153153

154154
###############################################################################
155155
# We fit and score the model as the previous one.
156-
pipeline_nodelay.fit(X_train, Y_train)
157-
scores_nodelay = pipeline_nodelay.score(X_test, Y_test)
156+
pipeline_no_delay.fit(X_train, Y_train)
157+
scores_nodelay = pipeline_no_delay.score(X_test, Y_test)
158158
scores_nodelay = backend.to_numpy(scores_nodelay)
159159
print("(n_voxels,) =", scores_nodelay.shape)
160160

@@ -207,7 +207,7 @@
207207
voxel_selection = np.argsort(scores)[-10:]
208208

209209
# define a pipeline with more delays
210-
pipeline_many_delays = make_pipeline(
210+
pipeline_more_delays = make_pipeline(
211211
StandardScaler(with_mean=True, with_std=False),
212212
Delayer(delays=[0, 1, 2, 3, 4, 5, 6]),
213213
KernelRidgeCV(
@@ -216,26 +216,26 @@
216216
n_targets_batch_refit=100)),
217217
)
218218

219-
pipeline_many_delays.fit(X_train, Y_train[:, voxel_selection])
219+
pipeline_more_delays.fit(X_train, Y_train[:, voxel_selection])
220220

221221
# get the (primal) ridge regression coefficients
222-
primal_coef = pipeline_many_delays[-1].get_primal_coef()
222+
primal_coef = pipeline_more_delays[-1].get_primal_coef()
223223
primal_coef = backend.to_numpy(primal_coef)
224224

225-
# get the delays
226-
delays = pipeline_many_delays.named_steps['delayer'].delays
227225
# split the ridge coefficients per delays
228-
primal_coef_per_delay = np.stack(np.split(primal_coef, len(delays), axis=0))
226+
delayer = pipeline_more_delays.named_steps['delayer']
227+
primal_coef_per_delay = delayer.reshape_by_delays(primal_coef, axis=0)
228+
print("(n_delays, n_features, n_voxels) =", primal_coef_per_delay.shape)
229229

230230
# select the feature with the largest coefficients for each voxel
231231
feature_selection = np.argmax(np.sum(np.abs(primal_coef_per_delay), axis=0),
232232
axis=0)
233233
primal_coef_selection = primal_coef_per_delay[:, feature_selection,
234234
np.arange(len(voxel_selection))]
235235

236-
plt.plot(delays, primal_coef_selection)
236+
plt.plot(delayer.delays, primal_coef_selection)
237237
plt.xlabel('Delays')
238-
plt.xticks(delays)
238+
plt.xticks(delayer.delays)
239239
plt.ylabel('Ridge coefficients')
240240
plt.title(f'Largest feature for the {len(voxel_selection)} best voxels')
241241
plt.axhline(0, color='k', linewidth=0.5)

tutorials/notebooks/movies_3T/02_plot_wordnet_model.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,7 @@
472472
},
473473
"outputs": [],
474474
"source": [
475-
"# get the delays\ndelays = pipeline.named_steps['delayer'].delays\nprint(\"delays =\", delays)\n\n# split the ridge coefficients per delays\nprimal_coef_per_delay = np.stack(np.split(primal_coef, len(delays), axis=0))\nprint(\"(n_delays, n_features, n_voxels) =\", primal_coef_per_delay.shape)\n\n# average over delays\naverage_coef = np.mean(primal_coef_per_delay, axis=0)\nprint(\"(n_features, n_voxels) =\", average_coef.shape)"
475+
"# split the ridge coefficients per delays\ndelayer = pipeline.named_steps['delayer']\nprimal_coef_per_delay = delayer.reshape_by_delays(primal_coef, axis=0)\nprint(\"(n_delays, n_features, n_voxels) =\", primal_coef_per_delay.shape)\n\n# average over delays\naverage_coef = np.mean(primal_coef_per_delay, axis=0)\nprint(\"(n_features, n_voxels) =\", average_coef.shape)"
476476
]
477477
},
478478
{

tutorials/notebooks/movies_3T/03_plot_hemodynamic_response.ipynb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@
228228
},
229229
"outputs": [],
230230
"source": [
231-
"pipeline_nodelay = make_pipeline(\n StandardScaler(with_mean=True, with_std=False),\n KernelRidgeCV(\n alphas=alphas, cv=cv,\n solver_params=dict(n_targets_batch=500, n_alphas_batch=5,\n n_targets_batch_refit=100)),\n)\npipeline_nodelay"
231+
"pipeline_no_delay = make_pipeline(\n StandardScaler(with_mean=True, with_std=False),\n KernelRidgeCV(\n alphas=alphas, cv=cv,\n solver_params=dict(n_targets_batch=500, n_alphas_batch=5,\n n_targets_batch_refit=100)),\n)\npipeline_no_delay"
232232
]
233233
},
234234
{
@@ -246,7 +246,7 @@
246246
},
247247
"outputs": [],
248248
"source": [
249-
"pipeline_nodelay.fit(X_train, Y_train)\nscores_nodelay = pipeline_nodelay.score(X_test, Y_test)\nscores_nodelay = backend.to_numpy(scores_nodelay)\nprint(\"(n_voxels,) =\", scores_nodelay.shape)"
249+
"pipeline_no_delay.fit(X_train, Y_train)\nscores_nodelay = pipeline_no_delay.score(X_test, Y_test)\nscores_nodelay = backend.to_numpy(scores_nodelay)\nprint(\"(n_voxels,) =\", scores_nodelay.shape)"
250250
]
251251
},
252252
{
@@ -289,7 +289,7 @@
289289
},
290290
"outputs": [],
291291
"source": [
292-
"# pick the 10 best voxels\nvoxel_selection = np.argsort(scores)[-10:]\n\n# define a pipeline with more delays\npipeline_many_delays = make_pipeline(\n StandardScaler(with_mean=True, with_std=False),\n Delayer(delays=[0, 1, 2, 3, 4, 5, 6]),\n KernelRidgeCV(\n alphas=alphas, cv=cv,\n solver_params=dict(n_targets_batch=500, n_alphas_batch=5,\n n_targets_batch_refit=100)),\n)\n\npipeline_many_delays.fit(X_train, Y_train[:, voxel_selection])\n\n# get the (primal) ridge regression coefficients\nprimal_coef = pipeline_many_delays[-1].get_primal_coef()\nprimal_coef = backend.to_numpy(primal_coef)\n\n# get the delays\ndelays = pipeline_many_delays.named_steps['delayer'].delays\n# split the ridge coefficients per delays\nprimal_coef_per_delay = np.stack(np.split(primal_coef, len(delays), axis=0))\n\n# select the feature with the largest coefficients for each voxel\nfeature_selection = np.argmax(np.sum(np.abs(primal_coef_per_delay), axis=0),\n axis=0)\nprimal_coef_selection = primal_coef_per_delay[:, feature_selection,\n np.arange(len(voxel_selection))]\n\nplt.plot(delays, primal_coef_selection)\nplt.xlabel('Delays')\nplt.xticks(delays)\nplt.ylabel('Ridge coefficients')\nplt.title(f'Largest feature for the {len(voxel_selection)} best voxels')\nplt.axhline(0, color='k', linewidth=0.5)\nplt.show()"
292+
"# pick the 10 best voxels\nvoxel_selection = np.argsort(scores)[-10:]\n\n# define a pipeline with more delays\npipeline_more_delays = make_pipeline(\n StandardScaler(with_mean=True, with_std=False),\n Delayer(delays=[0, 1, 2, 3, 4, 5, 6]),\n KernelRidgeCV(\n alphas=alphas, cv=cv,\n solver_params=dict(n_targets_batch=500, n_alphas_batch=5,\n n_targets_batch_refit=100)),\n)\n\npipeline_more_delays.fit(X_train, Y_train[:, voxel_selection])\n\n# get the (primal) ridge regression coefficients\nprimal_coef = pipeline_more_delays[-1].get_primal_coef()\nprimal_coef = backend.to_numpy(primal_coef)\n\n# split the ridge coefficients per delays\ndelayer = pipeline_more_delays.named_steps['delayer']\nprimal_coef_per_delay = delayer.reshape_by_delays(primal_coef, axis=0)\nprint(\"(n_delays, n_features, n_voxels) =\", primal_coef_per_delay.shape)\n\n# select the feature with the largest coefficients for each voxel\nfeature_selection = np.argmax(np.sum(np.abs(primal_coef_per_delay), axis=0),\n axis=0)\nprimal_coef_selection = primal_coef_per_delay[:, feature_selection,\n np.arange(len(voxel_selection))]\n\nplt.plot(delayer.delays, primal_coef_selection)\nplt.xlabel('Delays')\nplt.xticks(delayer.delays)\nplt.ylabel('Ridge coefficients')\nplt.title(f'Largest feature for the {len(voxel_selection)} best voxels')\nplt.axhline(0, color='k', linewidth=0.5)\nplt.show()"
293293
]
294294
},
295295
{

voxelwise_tutorials/delayer.py

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,37 @@ def __init__(self, delays=None):
3636
self.delays = delays
3737

3838
def fit(self, X, y=None):
39+
"""Fit the delayer.
40+
41+
Parameters
42+
----------
43+
X : array of shape (n_samples, n_features)
44+
Training data.
45+
46+
y : array of shape (n_samples,) or (n_samples, n_targets)
47+
Target values. Ignored.
48+
49+
Returns
50+
-------
51+
self : returns an instance of self.
52+
"""
3953
X = self._validate_data(X, dtype='numeric')
4054
self.n_features_in_ = X.shape[1]
4155
return self
4256

4357
def transform(self, X):
58+
"""Transform the input data X, copying features with different delays.
59+
60+
Parameters
61+
----------
62+
X : array of shape (n_samples, n_features)
63+
Input data.
64+
65+
Returns
66+
-------
67+
Xt : array of shape (n_samples, n_features * n_delays)
68+
Transformed data.
69+
"""
4470
check_is_fitted(self)
4571
X = check_array(X, copy=True)
4672

@@ -54,7 +80,6 @@ def transform(self, X):
5480

5581
X_delayed = np.zeros((n_samples, n_features * len(self.delays)),
5682
dtype=X.dtype)
57-
5883
for idx, delay in enumerate(self.delays):
5984
beg, end = idx * n_features, (idx + 1) * n_features
6085
if delay == 0:
@@ -65,3 +90,21 @@ def transform(self, X):
6590
X_delayed[:-abs(delay), beg:end] = X[abs(delay):]
6691

6792
return X_delayed
93+
94+
def reshape_by_delays(self, Xt, axis=1):
95+
"""Reshape an array, splitting and stacking across delays.
96+
97+
Parameters
98+
----------
99+
Xt : array of shape (n_samples, n_features * n_delays)
100+
Transformed array.
101+
axis : int, default=1
102+
Axis to split.
103+
104+
Returns
105+
-------
106+
Xt_split :array of shape (n_delays, n_samples, n_features)
107+
Reshaped array, splitting across delays.
108+
"""
109+
delays = self.delays or [0] # deals with None
110+
return np.stack(np.split(Xt, len(delays), axis=axis))

voxelwise_tutorials/tests/test_delayer.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
import pytest
2+
import numpy as np
3+
14
import sklearn.kernel_ridge
25
import sklearn.utils.estimator_checks
36

@@ -7,3 +10,35 @@
710
@sklearn.utils.estimator_checks.parametrize_with_checks([Delayer()])
811
def test_check_estimator(estimator, check):
912
check(estimator)
13+
14+
15+
@pytest.mark.parametrize('delays', [None, [0]])
16+
def test_no_delays(delays):
17+
X = np.random.randn(10, 3)
18+
Xt = Delayer(delays=delays).fit_transform(X)
19+
np.testing.assert_array_equal(Xt, X)
20+
21+
22+
@pytest.mark.parametrize('delays', [[0], [0, 1], [0, -1, 2]])
23+
def test_zero_delay_identity(delays):
24+
X = np.random.randn(10, 3)
25+
Xt = Delayer(delays=delays).fit_transform(X)
26+
np.testing.assert_array_equal(Xt[:, :X.shape[1]], X)
27+
28+
29+
@pytest.mark.parametrize('delays', [[1], [1, 2], [-1, 0, 2]])
30+
def test_nonzero_delay(delays):
31+
X = np.random.randn(10, 3)
32+
Xt = Delayer(delays=delays).fit_transform(X)
33+
with pytest.raises(AssertionError):
34+
np.testing.assert_array_equal(Xt[:, :X.shape[1]], X)
35+
36+
37+
@pytest.mark.parametrize('delays', [[1], [1, 2], [-1, 0, 2]])
38+
def test_reshape_by_delays(delays):
39+
X = np.random.randn(10, 3)
40+
trans = Delayer(delays=delays)
41+
Xt = trans.fit_transform(X)
42+
Xtt = trans.reshape_by_delays(Xt)
43+
44+
assert Xtt.shape == (len(delays), X.shape[0], X.shape[1])

0 commit comments

Comments
 (0)