ENH add Delayer.reshape_by_delays and more tests

TomDLT · TomDLT · commit 95924f8e341e · 2020-08-21T15:33:04.000-07:00
diff --git a/tutorials/movies_3T/02_plot_wordnet_model.py b/tutorials/movies_3T/02_plot_wordnet_model.py
@@ -328,12 +328,9 @@
 ###############################################################################
 # Then, we aggregate the coefficients across the different delays.
 
-# get the delays
-delays = pipeline.named_steps['delayer'].delays
-print("delays =", delays)
-
 # split the ridge coefficients per delays
-primal_coef_per_delay = np.stack(np.split(primal_coef, len(delays), axis=0))
+delayer = pipeline.named_steps['delayer']
+primal_coef_per_delay = delayer.reshape_by_delays(primal_coef, axis=0)
 print("(n_delays, n_features, n_voxels) =", primal_coef_per_delay.shape)
 
 # average over delays
diff --git a/tutorials/movies_3T/03_plot_hemodynamic_response.py b/tutorials/movies_3T/03_plot_hemodynamic_response.py
@@ -142,19 +142,19 @@
 # Because the BOLD signal is inherently slow due to the dynamics of
 # neuro-vascular coupling, this model is unlikely to perform well.
 
-pipeline_nodelay = make_pipeline(
+pipeline_no_delay = make_pipeline(
     StandardScaler(with_mean=True, with_std=False),
     KernelRidgeCV(
         alphas=alphas, cv=cv,
         solver_params=dict(n_targets_batch=500, n_alphas_batch=5,
                            n_targets_batch_refit=100)),
 )
-pipeline_nodelay
+pipeline_no_delay
 
 ###############################################################################
 # We fit and score the model as the previous one.
-pipeline_nodelay.fit(X_train, Y_train)
-scores_nodelay = pipeline_nodelay.score(X_test, Y_test)
+pipeline_no_delay.fit(X_train, Y_train)
+scores_nodelay = pipeline_no_delay.score(X_test, Y_test)
 scores_nodelay = backend.to_numpy(scores_nodelay)
 print("(n_voxels,) =", scores_nodelay.shape)
 
@@ -207,7 +207,7 @@
 voxel_selection = np.argsort(scores)[-10:]
 
 # define a pipeline with more delays
-pipeline_many_delays = make_pipeline(
+pipeline_more_delays = make_pipeline(
     StandardScaler(with_mean=True, with_std=False),
     Delayer(delays=[0, 1, 2, 3, 4, 5, 6]),
     KernelRidgeCV(
@@ -216,26 +216,26 @@
                            n_targets_batch_refit=100)),
 )
 
-pipeline_many_delays.fit(X_train, Y_train[:, voxel_selection])
+pipeline_more_delays.fit(X_train, Y_train[:, voxel_selection])
 
 # get the (primal) ridge regression coefficients
-primal_coef = pipeline_many_delays[-1].get_primal_coef()
+primal_coef = pipeline_more_delays[-1].get_primal_coef()
 primal_coef = backend.to_numpy(primal_coef)
 
-# get the delays
-delays = pipeline_many_delays.named_steps['delayer'].delays
 # split the ridge coefficients per delays
-primal_coef_per_delay = np.stack(np.split(primal_coef, len(delays), axis=0))
+delayer = pipeline_more_delays.named_steps['delayer']
+primal_coef_per_delay = delayer.reshape_by_delays(primal_coef, axis=0)
+print("(n_delays, n_features, n_voxels) =", primal_coef_per_delay.shape)
 
 # select the feature with the largest coefficients for each voxel
 feature_selection = np.argmax(np.sum(np.abs(primal_coef_per_delay), axis=0),
                               axis=0)
 primal_coef_selection = primal_coef_per_delay[:, feature_selection,
                                               np.arange(len(voxel_selection))]
 
-plt.plot(delays, primal_coef_selection)
+plt.plot(delayer.delays, primal_coef_selection)
 plt.xlabel('Delays')
-plt.xticks(delays)
+plt.xticks(delayer.delays)
 plt.ylabel('Ridge coefficients')
 plt.title(f'Largest feature for the {len(voxel_selection)} best voxels')
 plt.axhline(0, color='k', linewidth=0.5)
diff --git a/tutorials/notebooks/movies_3T/02_plot_wordnet_model.ipynb b/tutorials/notebooks/movies_3T/02_plot_wordnet_model.ipynb
@@ -472,7 +472,7 @@
       },
       "outputs": [],
       "source": [
-        "# get the delays\ndelays = pipeline.named_steps['delayer'].delays\nprint(\"delays =\", delays)\n\n# split the ridge coefficients per delays\nprimal_coef_per_delay = np.stack(np.split(primal_coef, len(delays), axis=0))\nprint(\"(n_delays, n_features, n_voxels) =\", primal_coef_per_delay.shape)\n\n# average over delays\naverage_coef = np.mean(primal_coef_per_delay, axis=0)\nprint(\"(n_features, n_voxels) =\", average_coef.shape)"
+        "# split the ridge coefficients per delays\ndelayer = pipeline.named_steps['delayer']\nprimal_coef_per_delay = delayer.reshape_by_delays(primal_coef, axis=0)\nprint(\"(n_delays, n_features, n_voxels) =\", primal_coef_per_delay.shape)\n\n# average over delays\naverage_coef = np.mean(primal_coef_per_delay, axis=0)\nprint(\"(n_features, n_voxels) =\", average_coef.shape)"
       ]
     },
     {
diff --git a/tutorials/notebooks/movies_3T/03_plot_hemodynamic_response.ipynb b/tutorials/notebooks/movies_3T/03_plot_hemodynamic_response.ipynb
@@ -228,7 +228,7 @@
       },
       "outputs": [],
       "source": [
-        "pipeline_nodelay = make_pipeline(\n    StandardScaler(with_mean=True, with_std=False),\n    KernelRidgeCV(\n        alphas=alphas, cv=cv,\n        solver_params=dict(n_targets_batch=500, n_alphas_batch=5,\n                           n_targets_batch_refit=100)),\n)\npipeline_nodelay"
+        "pipeline_no_delay = make_pipeline(\n    StandardScaler(with_mean=True, with_std=False),\n    KernelRidgeCV(\n        alphas=alphas, cv=cv,\n        solver_params=dict(n_targets_batch=500, n_alphas_batch=5,\n                           n_targets_batch_refit=100)),\n)\npipeline_no_delay"
       ]
     },
     {
@@ -246,7 +246,7 @@
       },
       "outputs": [],
       "source": [
-        "pipeline_nodelay.fit(X_train, Y_train)\nscores_nodelay = pipeline_nodelay.score(X_test, Y_test)\nscores_nodelay = backend.to_numpy(scores_nodelay)\nprint(\"(n_voxels,) =\", scores_nodelay.shape)"
+        "pipeline_no_delay.fit(X_train, Y_train)\nscores_nodelay = pipeline_no_delay.score(X_test, Y_test)\nscores_nodelay = backend.to_numpy(scores_nodelay)\nprint(\"(n_voxels,) =\", scores_nodelay.shape)"
       ]
     },
     {
@@ -289,7 +289,7 @@
       },
       "outputs": [],
       "source": [
-        "# pick the 10 best voxels\nvoxel_selection = np.argsort(scores)[-10:]\n\n# define a pipeline with more delays\npipeline_many_delays = make_pipeline(\n    StandardScaler(with_mean=True, with_std=False),\n    Delayer(delays=[0, 1, 2, 3, 4, 5, 6]),\n    KernelRidgeCV(\n        alphas=alphas, cv=cv,\n        solver_params=dict(n_targets_batch=500, n_alphas_batch=5,\n                           n_targets_batch_refit=100)),\n)\n\npipeline_many_delays.fit(X_train, Y_train[:, voxel_selection])\n\n# get the (primal) ridge regression coefficients\nprimal_coef = pipeline_many_delays[-1].get_primal_coef()\nprimal_coef = backend.to_numpy(primal_coef)\n\n# get the delays\ndelays = pipeline_many_delays.named_steps['delayer'].delays\n# split the ridge coefficients per delays\nprimal_coef_per_delay = np.stack(np.split(primal_coef, len(delays), axis=0))\n\n# select the feature with the largest coefficients for each voxel\nfeature_selection = np.argmax(np.sum(np.abs(primal_coef_per_delay), axis=0),\n                              axis=0)\nprimal_coef_selection = primal_coef_per_delay[:, feature_selection,\n                                              np.arange(len(voxel_selection))]\n\nplt.plot(delays, primal_coef_selection)\nplt.xlabel('Delays')\nplt.xticks(delays)\nplt.ylabel('Ridge coefficients')\nplt.title(f'Largest feature for the {len(voxel_selection)} best voxels')\nplt.axhline(0, color='k', linewidth=0.5)\nplt.show()"
+        "# pick the 10 best voxels\nvoxel_selection = np.argsort(scores)[-10:]\n\n# define a pipeline with more delays\npipeline_more_delays = make_pipeline(\n    StandardScaler(with_mean=True, with_std=False),\n    Delayer(delays=[0, 1, 2, 3, 4, 5, 6]),\n    KernelRidgeCV(\n        alphas=alphas, cv=cv,\n        solver_params=dict(n_targets_batch=500, n_alphas_batch=5,\n                           n_targets_batch_refit=100)),\n)\n\npipeline_more_delays.fit(X_train, Y_train[:, voxel_selection])\n\n# get the (primal) ridge regression coefficients\nprimal_coef = pipeline_more_delays[-1].get_primal_coef()\nprimal_coef = backend.to_numpy(primal_coef)\n\n# split the ridge coefficients per delays\ndelayer = pipeline_more_delays.named_steps['delayer']\nprimal_coef_per_delay = delayer.reshape_by_delays(primal_coef, axis=0)\nprint(\"(n_delays, n_features, n_voxels) =\", primal_coef_per_delay.shape)\n\n# select the feature with the largest coefficients for each voxel\nfeature_selection = np.argmax(np.sum(np.abs(primal_coef_per_delay), axis=0),\n                              axis=0)\nprimal_coef_selection = primal_coef_per_delay[:, feature_selection,\n                                              np.arange(len(voxel_selection))]\n\nplt.plot(delayer.delays, primal_coef_selection)\nplt.xlabel('Delays')\nplt.xticks(delayer.delays)\nplt.ylabel('Ridge coefficients')\nplt.title(f'Largest feature for the {len(voxel_selection)} best voxels')\nplt.axhline(0, color='k', linewidth=0.5)\nplt.show()"
       ]
     },
     {
diff --git a/voxelwise_tutorials/delayer.py b/voxelwise_tutorials/delayer.py
@@ -36,11 +36,37 @@ def __init__(self, delays=None):
         self.delays = delays
 
     def fit(self, X, y=None):
+        """Fit the delayer.
+
+        Parameters
+        ----------
+        X : array of shape (n_samples, n_features)
+            Training data.
+
+        y : array of shape (n_samples,) or (n_samples, n_targets)
+            Target values. Ignored.
+
+        Returns
+        -------
+        self : returns an instance of self.
+        """
         X = self._validate_data(X, dtype='numeric')
         self.n_features_in_ = X.shape[1]
         return self
 
     def transform(self, X):
+        """Transform the input data X, copying features with different delays.
+
+        Parameters
+        ----------
+        X : array of shape (n_samples, n_features)
+            Input data.
+
+        Returns
+        -------
+        Xt : array of shape (n_samples, n_features * n_delays)
+            Transformed data.
+        """
         check_is_fitted(self)
         X = check_array(X, copy=True)
 
@@ -54,7 +80,6 @@ def transform(self, X):
 
         X_delayed = np.zeros((n_samples, n_features * len(self.delays)),
                              dtype=X.dtype)
-
         for idx, delay in enumerate(self.delays):
             beg, end = idx * n_features, (idx + 1) * n_features
             if delay == 0:
@@ -65,3 +90,21 @@ def transform(self, X):
                 X_delayed[:-abs(delay), beg:end] = X[abs(delay):]
 
         return X_delayed
+
+    def reshape_by_delays(self, Xt, axis=1):
+        """Reshape an array, splitting and stacking across delays.
+
+        Parameters
+        ----------
+        Xt : array of shape (n_samples, n_features * n_delays)
+            Transformed array.
+        axis : int, default=1
+            Axis to split.
+
+        Returns
+        -------
+        Xt_split :array of shape (n_delays, n_samples, n_features)
+            Reshaped array, splitting across delays.
+        """
+        delays = self.delays or [0]  # deals with None
+        return np.stack(np.split(Xt, len(delays), axis=axis))
diff --git a/voxelwise_tutorials/tests/test_delayer.py b/voxelwise_tutorials/tests/test_delayer.py
@@ -1,3 +1,6 @@
+import pytest
+import numpy as np
+
 import sklearn.kernel_ridge
 import sklearn.utils.estimator_checks
 
@@ -7,3 +10,35 @@
 @sklearn.utils.estimator_checks.parametrize_with_checks([Delayer()])
 def test_check_estimator(estimator, check):
     check(estimator)
+
+
+@pytest.mark.parametrize('delays', [None, [0]])
+def test_no_delays(delays):
+    X = np.random.randn(10, 3)
+    Xt = Delayer(delays=delays).fit_transform(X)
+    np.testing.assert_array_equal(Xt, X)
+
+
+@pytest.mark.parametrize('delays', [[0], [0, 1], [0, -1, 2]])
+def test_zero_delay_identity(delays):
+    X = np.random.randn(10, 3)
+    Xt = Delayer(delays=delays).fit_transform(X)
+    np.testing.assert_array_equal(Xt[:, :X.shape[1]], X)
+
+
+@pytest.mark.parametrize('delays', [[1], [1, 2], [-1, 0, 2]])
+def test_nonzero_delay(delays):
+    X = np.random.randn(10, 3)
+    Xt = Delayer(delays=delays).fit_transform(X)
+    with pytest.raises(AssertionError):
+        np.testing.assert_array_equal(Xt[:, :X.shape[1]], X)
+
+
+@pytest.mark.parametrize('delays', [[1], [1, 2], [-1, 0, 2]])
+def test_reshape_by_delays(delays):
+    X = np.random.randn(10, 3)
+    trans = Delayer(delays=delays)
+    Xt = trans.fit_transform(X)
+    Xtt = trans.reshape_by_delays(Xt)
+
+    assert Xtt.shape == (len(delays), X.shape[0], X.shape[1])

Original file line number	Diff line number	Diff line change
`@@ -472,7 +472,7 @@`
`472`	`472`	`},`
`473`	`473`	`"outputs": [],`
`474`	`474`	`"source": [`
`475`		`- "# get the delays\ndelays = pipeline.named_steps['delayer'].delays\nprint(\"delays =\", delays)\n\n# split the ridge coefficients per delays\nprimal_coef_per_delay = np.stack(np.split(primal_coef, len(delays), axis=0))\nprint(\"(n_delays, n_features, n_voxels) =\", primal_coef_per_delay.shape)\n\n# average over delays\naverage_coef = np.mean(primal_coef_per_delay, axis=0)\nprint(\"(n_features, n_voxels) =\", average_coef.shape)"`
	`475`	`+ "# split the ridge coefficients per delays\ndelayer = pipeline.named_steps['delayer']\nprimal_coef_per_delay = delayer.reshape_by_delays(primal_coef, axis=0)\nprint(\"(n_delays, n_features, n_voxels) =\", primal_coef_per_delay.shape)\n\n# average over delays\naverage_coef = np.mean(primal_coef_per_delay, axis=0)\nprint(\"(n_features, n_voxels) =\", average_coef.shape)"`
`476`	`476`	`]`
`477`	`477`	`},`
`478`	`478`	`{`
Original file line number	Diff line number	Diff line change
`@@ -228,7 +228,7 @@`
`228`	`228`	`},`
`229`	`229`	`"outputs": [],`
`230`	`230`	`"source": [`
`231`		`- "pipeline_nodelay = make_pipeline(\n StandardScaler(with_mean=True, with_std=False),\n KernelRidgeCV(\n alphas=alphas, cv=cv,\n solver_params=dict(n_targets_batch=500, n_alphas_batch=5,\n n_targets_batch_refit=100)),\n)\npipeline_nodelay"`
	`231`	`+ "pipeline_no_delay = make_pipeline(\n StandardScaler(with_mean=True, with_std=False),\n KernelRidgeCV(\n alphas=alphas, cv=cv,\n solver_params=dict(n_targets_batch=500, n_alphas_batch=5,\n n_targets_batch_refit=100)),\n)\npipeline_no_delay"`
`232`	`232`	`]`
`233`	`233`	`},`
`234`	`234`	`{`
`@@ -246,7 +246,7 @@`
`246`	`246`	`},`
`247`	`247`	`"outputs": [],`
`248`	`248`	`"source": [`
`249`		`- "pipeline_nodelay.fit(X_train, Y_train)\nscores_nodelay = pipeline_nodelay.score(X_test, Y_test)\nscores_nodelay = backend.to_numpy(scores_nodelay)\nprint(\"(n_voxels,) =\", scores_nodelay.shape)"`
	`249`	`+ "pipeline_no_delay.fit(X_train, Y_train)\nscores_nodelay = pipeline_no_delay.score(X_test, Y_test)\nscores_nodelay = backend.to_numpy(scores_nodelay)\nprint(\"(n_voxels,) =\", scores_nodelay.shape)"`
`250`	`250`	`]`
`251`	`251`	`},`
`252`	`252`	`{`
`@@ -289,7 +289,7 @@`
`289`	`289`	`},`
`290`	`290`	`"outputs": [],`
`291`	`291`	`"source": [`
`292`		- "# pick the 10 best voxels\nvoxel_selection = np.argsort(scores)[-10:]\n\n# define a pipeline with more delays\npipeline_many_delays = make_pipeline(\n StandardScaler(with_mean=True, with_std=False),\n Delayer(delays=[0, 1, 2, 3, 4, 5, 6]),\n KernelRidgeCV(\n alphas=alphas, cv=cv,\n solver_params=dict(n_targets_batch=500, n_alphas_batch=5,\n n_targets_batch_refit=100)),\n)\n\npipeline_many_delays.fit(X_train, Y_train[:, voxel_selection])\n\n# get the (primal) ridge regression coefficients\nprimal_coef = pipeline_many_delays[-1].get_primal_coef()\nprimal_coef = backend.to_numpy(primal_coef)\n\n# get the delays\ndelays = pipeline_many_delays.named_steps['delayer'].delays\n# split the ridge coefficients per delays\nprimal_coef_per_delay = np.stack(np.split(primal_coef, len(delays), axis=0))\n\n# select the feature with the largest coefficients for each voxel\nfeature_selection = np.argmax(np.sum(np.abs(primal_coef_per_delay), axis=0),\n axis=0)\nprimal_coef_selection = primal_coef_per_delay[:, feature_selection,\n np.arange(len(voxel_selection))]\n\nplt.plot(delays, primal_coef_selection)\nplt.xlabel('Delays')\nplt.xticks(delays)\nplt.ylabel('Ridge coefficients')\nplt.title(f'Largest feature for the {len(voxel_selection)} best voxels')\nplt.axhline(0, color='k', linewidth=0.5)\nplt.show()"
	`292`	+ "# pick the 10 best voxels\nvoxel_selection = np.argsort(scores)[-10:]\n\n# define a pipeline with more delays\npipeline_more_delays = make_pipeline(\n StandardScaler(with_mean=True, with_std=False),\n Delayer(delays=[0, 1, 2, 3, 4, 5, 6]),\n KernelRidgeCV(\n alphas=alphas, cv=cv,\n solver_params=dict(n_targets_batch=500, n_alphas_batch=5,\n n_targets_batch_refit=100)),\n)\n\npipeline_more_delays.fit(X_train, Y_train[:, voxel_selection])\n\n# get the (primal) ridge regression coefficients\nprimal_coef = pipeline_more_delays[-1].get_primal_coef()\nprimal_coef = backend.to_numpy(primal_coef)\n\n# split the ridge coefficients per delays\ndelayer = pipeline_more_delays.named_steps['delayer']\nprimal_coef_per_delay = delayer.reshape_by_delays(primal_coef, axis=0)\nprint(\"(n_delays, n_features, n_voxels) =\", primal_coef_per_delay.shape)\n\n# select the feature with the largest coefficients for each voxel\nfeature_selection = np.argmax(np.sum(np.abs(primal_coef_per_delay), axis=0),\n axis=0)\nprimal_coef_selection = primal_coef_per_delay[:, feature_selection,\n np.arange(len(voxel_selection))]\n\nplt.plot(delayer.delays, primal_coef_selection)\nplt.xlabel('Delays')\nplt.xticks(delayer.delays)\nplt.ylabel('Ridge coefficients')\nplt.title(f'Largest feature for the {len(voxel_selection)} best voxels')\nplt.axhline(0, color='k', linewidth=0.5)\nplt.show()"
`293`	`293`	`]`
`294`	`294`	`},`
`295`	`295`	`{`