gallantlab
diff --git a/‎doc/merge_notebooks.py‎
Lines changed: 17 additions & 3 deletions b/‎doc/merge_notebooks.py‎
Lines changed: 17 additions & 3 deletions
diff --git a/‎tutorials/movies_3T/00_load_colab.py‎
Lines changed: 3 additions & 0 deletions b/‎tutorials/movies_3T/00_load_colab.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎tutorials/movies_3T/01_plot_explainable_variance.py‎
Lines changed: 6 additions & 6 deletions b/‎tutorials/movies_3T/01_plot_explainable_variance.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎tutorials/movies_3T/02_plot_wordnet_model.py‎
Lines changed: 50 additions & 52 deletions b/‎tutorials/movies_3T/02_plot_wordnet_model.py‎
Lines changed: 50 additions & 52 deletions
diff --git a/‎tutorials/movies_3T/03_plot_hemodynamic_response.py‎
Lines changed: 20 additions & 21 deletions b/‎tutorials/movies_3T/03_plot_hemodynamic_response.py‎
Lines changed: 20 additions & 21 deletions
@@ -22,9 +22,12 @@ def merge_notebooks(filenames):
             merged.cells = [cell_with_title(title=os.path.basename(fname))
                             ] + nb.cells
         else:
-            # add a Markdown cell with the file name, then all cells
-            merged.cells.extend(
-                [cell_with_title(title=os.path.basename(fname))] + nb.cells)
+            # add a code cell resetting all variables
+            merged.cells.append(cell_with_reset())
+            # add a Markdown cell with the file name
+            merged.cells.append(cell_with_title(title=os.path.basename(fname)))
+            # add all cells from current notebook
+            merged.cells.extend(nb.cells)
 
     if not hasattr(merged.metadata, 'name'):
         merged.metadata.name = ''
@@ -41,6 +44,17 @@ def cell_with_title(title):
     })
 
 
+def cell_with_reset():
+    """Returns a code cell with magic command to reset all variables."""
+    return nbformat.from_dict({
+        'cell_type': 'code',
+        'execution_count': None,
+        'metadata': {'collapsed': False},
+        'outputs': [],
+        'source': '%reset -f',
+    })
+
+
 if __name__ == '__main__':
     notebooks = sys.argv[1:]
     if not notebooks:
 
@@ -48,5 +48,8 @@
 cortex.utils.db = cortex.database.db
 cortex.dataset.braindata.db = cortex.database.db
 
+import sklearn
+sklearn.set_config(assume_finite=True)
+
 ###############################################################################
 # Your Google Colab environment is now set up for the voxelwise tutorials.
@@ -15,14 +15,14 @@
 are the same for each repetition of the stimulus. Thus, encoding models will
 predict only the repeatable stimulus-dependent signal.
 
-The stimulus-dependent signal can be estimated by taking the mean of 
+The stimulus-dependent signal can be estimated by taking the mean of
 brain responses over repeats of the same stimulus or experiment. The variance
 of the estimated stimulus-dependent signal, which we call the explainable
 variance, is proportional to the maximum prediction accuracy that can be
-obtained by a voxelwise encoding model in the test set. 
+obtained by a voxelwise encoding model in the test set.
 
 Mathematically, let :math:`y_i, i = 1 \\dots N` be the measured signal in
-a voxel for each of the :math:`N` repetitions of the same stimulus and 
+a voxel for each of the :math:`N` repetitions of the same stimulus and
 :math:`\\bar{y} = \\frac{1}{N}\\sum_{i=1}^Ny_i` the average brain response
 across repetitions. For each repeat, we define the residual timeseries
 between brain response and average brain response as :math:`r_i = y_i - \\bar{y}`.
@@ -114,7 +114,7 @@
 plt.show()
 
 ###############################################################################
-# We see that many voxels have low explainable variance. This is 
+# We see that many voxels have low explainable variance. This is
 # expected, since many voxels are not driven by a visual stimulus, and their
 # response changes over repeats of the same stimulus.
 # We also see that some voxels have high explainable variance (around 0.7). The
@@ -150,8 +150,8 @@
 plt.show()
 
 ###############################################################################
-# This figure is a flattened map of the cortical surface. A number of regions of
-# interest (ROIs) have been labeled to ease interpretation. If you have
+# This figure is a flattened map of the cortical surface. A number of regions
+# of interest (ROIs) have been labeled to ease interpretation. If you have
 # never seen such a flatmap, we recommend taking a look at a `pycortex brain
 # viewer <https://www.gallantlab.org/brainviewer/Deniz2019>`_, which displays
 # the brain in 3D. In this viewer, press "I" to inflate the brain, "F" to
 
@@ -21,9 +21,9 @@
 relashionship based on the `Wordnet <https://wordnet.princeton.edu/>`_ dataset.
 
 *Summary:* We first concatenate the features with multiple temporal delays to
-account for the slow hemodynamic response. We then use linear regression to fit 
-a predictive model of brain activity. The linear regression is regularized to 
-improve robustness to correlated features and to improve generalization 
+account for the slow hemodynamic response. We then use linear regression to fit
+a predictive model of brain activity. The linear regression is regularized to
+improve robustness to correlated features and to improve generalization
 performance. The optimal regularization hyperparameter is selected over a
 grid-search with cross-validation. Finally, the model generalization
 performance is evaluated on a held-out test set, comparing the model
@@ -66,9 +66,9 @@
 ###############################################################################
 # If we repeat an experiment multiple times, part of the fMRI responses might
 # change. However the modeling features do not change over the repeats, so the
-# voxelwise encoding model will predict the same signal for each repeat. To have an
-# upper bound of the model prediction accuracy, we keep only the repeatable part of
-# the signal by averaging the test repeats.
+# voxelwise encoding model will predict the same signal for each repeat. To
+# have an upper bound of the model prediction accuracy, we keep only the
+# repeatable part of the signal by averaging the test repeats.
 Y_test = Y_test.mean(0)
 
 print("(n_samples_test, n_voxels) =", Y_test.shape)
@@ -123,15 +123,15 @@
 #
 # Now, let's define the model pipeline.
 #
-# We first center the features, since we will not use an intercept. The
-# mean value in fMRI recording is non-informative, so each run is detrended and
+# We first center the features, since we will not use an intercept. The mean
+# value in fMRI recording is non-informative, so each run is detrended and
 # demeaned independently, and we do not need to predict an intercept value in
 # the linear model.
 #
 # However, we prefer to avoid normalizing by the standard deviation of each
-# feature. If the features are extracted in a consistent way from the
-# stimulus, their relative scale is meaningful. Normalizing them independently
-# from each other would remove this information. Moreover, the wordnet features are
+# feature. If the features are extracted in a consistent way from the stimulus,
+# their relative scale is meaningful. Normalizing them independently from each
+# other would remove this information. Moreover, the wordnet features are
 # one-hot-encoded, which means that each feature is either present (1) or not
 # present (0) in each sample. Normalizing one-hot-encoded features is not
 # recommended, since it would scale disproportionately the infrequent features.
@@ -141,11 +141,11 @@
 
 ###############################################################################
 # Then we concatenate the features with multiple delays to account for the
-# hemodynamic response. Due to neurovascular coupling, the recorded BOLD signal is
-# delayed in time with respect to the stimulus onset. With different delayed versions
-# of the features, the linear regression model will weigh each delayed feature
-# with a different weight to maximize the predictions. With a sample every 2
-# seconds, we typically use 4 delays [1, 2, 3, 4] to cover the
+# hemodynamic response. Due to neurovascular coupling, the recorded BOLD signal
+# is delayed in time with respect to the stimulus onset. With different delayed
+# versions of the features, the linear regression model will weigh each delayed
+# feature with a different weight to maximize the predictions. With a sample
+# every 2 seconds, we typically use 4 delays [1, 2, 3, 4] to cover the
 # hemodynamic response peak. In the next example, we further describe this
 # hemodynamic response estimation.
 from voxelwise_tutorials.delayer import Delayer
@@ -154,12 +154,13 @@
 ###############################################################################
 # Finally, we use a ridge regression model. Ridge regression is a linear
 # regression with L2 regularization. The L2 regularization improves robustness
-# to correlated features and improves generalization performance. However, the L2
-# regularization is controlled by a hyperparameter ``alpha`` that needs to be
-# tuned for each dataset. This regularization hyperparameter is usually selected over a grid
-# search with cross-validation, selecting the hyperparameter that maximizes the
-# predictive performances on the validation set. More details about
-# cross-validation can be found in the `scikit-learn documentation
+# to correlated features and improves generalization performance. However, the
+# L2 regularization is controlled by a hyperparameter ``alpha`` that needs to
+# be tuned for each dataset. This regularization hyperparameter is usually
+# selected over a grid search with cross-validation, selecting the
+# hyperparameter that maximizes the predictive performances on the validation
+# set. More details about cross-validation can be found in the `scikit-learn
+# documentation
 # <https://scikit-learn.org/stable/modules/cross_validation.html>`_.
 #
 # For computational reasons, when the number of features is larger than the
@@ -177,8 +178,8 @@
 # mean score over targets. Here, we want to find a different optimal
 # hyperparameter per target/voxel, so we use the package `himalaya
 # <https://github.com/gallantlab/himalaya>`_ which implements a
-# ``scikit-learn`` compatible estimator ``KernelRidgeCV``, with
-# hyperparameter selection independently on each target.
+# ``scikit-learn`` compatible estimator ``KernelRidgeCV``, with hyperparameter
+# selection independently on each target.
 from himalaya.kernel_ridge import KernelRidgeCV
 
 ###############################################################################
@@ -266,11 +267,10 @@
 # Plot the model prediction accuracy
 # ----------------------------------
 #
-# To visualize the model prediction accuracy, we can plot it for each voxel on 
-# a flattened surface of the brain. To do so, we use a mapper that is specific 
-# to the each subject's brain.
-# (Check previous example to see how to use the mapper to Freesurfer average
-# surface.)
+# To visualize the model prediction accuracy, we can plot it for each voxel on
+# a flattened surface of the brain. To do so, we use a mapper that is specific
+# to the each subject's brain. (Check previous example to see how to use the
+# mapper to Freesurfer average surface.)
 import matplotlib.pyplot as plt
 from voxelwise_tutorials.viz import plot_flatmap_from_mapper
 
@@ -280,17 +280,16 @@
 
 ###############################################################################
 # We can see that the "wordnet" features successfully predict part of the
-# measured brain activity, with :math:`R^2` scores as high as 0.4. Note that these
-# scores are generalization scores, since they are computed on a test set that
-# was not used during model fitting. 
-# Since we fitted a model independently in each voxel, we can inspect the 
-# generalization performances at the best available spatial resolution: 
-# individual voxels.
+# measured brain activity, with :math:`R^2` scores as high as 0.4. Note that
+# these scores are generalization scores, since they are computed on a test set
+# that was not used during model fitting. Since we fitted a model independently
+# in each voxel, we can inspect the generalization performances at the best
+# available spatial resolution: individual voxels.
 #
-# The best-predicted voxels are located in visual semantic areas like EBA, or FFA.
-# This is expected since the wordnet features encode semantic information about
-# the visual stimulus. For more discussions about these results, we refer the
-# reader to the original publication [1]_.
+# The best-predicted voxels are located in visual semantic areas like EBA, or
+# FFA. This is expected since the wordnet features encode semantic information
+# about the visual stimulus. For more discussions about these results, we refer
+# the reader to the original publication [1]_.
 
 ###############################################################################
 # Plot the selected hyperparameters
@@ -320,8 +319,9 @@
 # that have more predictive power.
 #
 # Since we know the meaning of each feature, we can interpret the large
-# regression coefficients. In the case of wordnet features, we can even build
-# a graph that represents the features that are linked by a semantic relationship.
+# regression coefficients. In the case of wordnet features, we can even build a
+# graph that represents the features that are linked by a semantic
+# relationship.
 
 ###############################################################################
 # We first get the (primal) ridge regression coefficients from the fitted
@@ -367,11 +367,11 @@
 
 ###############################################################################
 # Similarly to [1]_, we correct the coefficients of features linked by a
-# semantic relationship. When building the wordnet features, if a frame was labeled
-# with `wolf`, the authors automatically added the semantically linked categories 
-# `canine`, `carnivore`, `placental mammal`, `mamma`, `vertebrate`, `chordate`,
-# `organism`, and `whole`. The authors thus argue that the same correction
-# needs to be done on the coefficients.
+# semantic relationship. When building the wordnet features, if a frame was
+# labeled with `wolf`, the authors automatically added the semantically linked
+# categories `canine`, `carnivore`, `placental mammal`, `mamma`, `vertebrate`,
+# `chordate`, `organism`, and `whole`. The authors thus argue that the same
+# correction needs to be done on the coefficients.
 
 from voxelwise_tutorials.wordnet import load_wordnet
 from voxelwise_tutorials.wordnet import correct_coefficients
@@ -406,10 +406,10 @@
 #
 # In this example, because we use only a single subject and we perform a
 # different voxel selection, our result is slightly different than in [1]_. We
-# also use a different regularization parameter in each voxel, while in [1]_ 
-# all voxels had the same regularization parameter. 
-# We do not aim at reproducing exactly the results in [1]_, 
-# but we rather describe the general approach.
+# also use a different regularization parameter in each voxel, while in [1]_
+# all voxels had the same regularization parameter. We do not aim at
+# reproducing exactly the results in [1]_, but we rather describe the general
+# approach.
 
 ###############################################################################
 # To project the principal component on the cortical surface, we first need to
@@ -492,5 +492,3 @@
 #
 # .. [2] Saunders, C., Gammerman, A., & Vovk, V. (1998).
 #    Ridge regression learning algorithm in dual variables.
-
-del pipeline, kernel_ridge_cv
@@ -8,16 +8,17 @@
 example, and further describe the need to delay the features in time to account
 for the delayed BOLD response.
 
-Because of the temporal dynamics of neurovascular coupling, the recorded BOLD signal is delayed in
-time with respect to the stimulus. To account for this lag, we fit encoding
-models on delayed features. In this way, the linear regression model weighs
-each delayed feature separately and recovers the shape of the hemodynamic
-response function in each voxel separately. In turn, this method (also known as
-a Finite Impulse Response model, or FIR) maximizes the model prediction
-accuracy. With a repetition time of 2 seconds, we typically use 4 delays [1, 2,
-3, 4] to cover the peak of the the hemodynamic response function. However, the
-optimal number of delays can vary depending on the experiment and the brain
-area of interest, so you should experiment with different delays.
+Because of the temporal dynamics of neurovascular coupling, the recorded BOLD
+signal is delayed in time with respect to the stimulus. To account for this
+lag, we fit encoding models on delayed features. In this way, the linear
+regression model weighs each delayed feature separately and recovers the shape
+of the hemodynamic response function in each voxel separately. In turn, this
+method (also known as a Finite Impulse Response model, or FIR) maximizes the
+model prediction accuracy. With a repetition time of 2 seconds, we typically
+use 4 delays [1, 2, 3, 4] to cover the peak of the the hemodynamic response
+function. However, the optimal number of delays can vary depending on the
+experiment and the brain area of interest, so you should experiment with
+different delays.
 
 In this example, we show that a model without delays performs far worse than a
 model with delays. We also show how to visualize the estimated hemodynamic
@@ -160,20 +161,20 @@
 ###############################################################################
 # We fit and score the model as the previous one.
 pipeline_no_delay.fit(X_train, Y_train)
-scores_nodelay = pipeline_no_delay.score(X_test, Y_test)
-scores_nodelay = backend.to_numpy(scores_nodelay)
-print("(n_voxels,) =", scores_nodelay.shape)
+scores_no_delay = pipeline_no_delay.score(X_test, Y_test)
+scores_no_delay = backend.to_numpy(scores_no_delay)
+print("(n_voxels,) =", scores_no_delay.shape)
 
 ###############################################################################
-# Then, we plot the comparison of model prediction accuracies with a 2D histogram.
-# All ~70k voxels are represented in this histogram, where the diagonal
-# corresponds to identical prediction accuracy for both models. A distibution deviating
-# from the diagonal means that one model has better prediction accuracy
-# than the other.
+# Then, we plot the comparison of model prediction accuracies with a 2D
+# histogram. All ~70k voxels are represented in this histogram, where the
+# diagonal corresponds to identical prediction accuracy for both models. A
+# distibution deviating from the diagonal means that one model has better
+# prediction accuracy than the other.
 import matplotlib.pyplot as plt
 from voxelwise_tutorials.viz import plot_hist2d
 
-ax = plot_hist2d(scores_nodelay, scores)
+ax = plot_hist2d(scores_no_delay, scores)
 ax.set(
     title='Generalization R2 scores',
     xlabel='model without delays',
@@ -251,5 +252,3 @@
 # We see that the hemodynamic response function (HRF) is captured in the model
 # weights. Note that in this dataset, the brain responses are recorded every
 # two seconds.
-
-del pipeline, pipeline_no_delay