ENH clarify the text

TomDLT · TomDLT · commit ece4dc1a39a7 · 2020-09-14T15:12:44.000-07:00
diff --git a/tutorials/movies_3T/01_plot_explainable_variance.py b/tutorials/movies_3T/01_plot_explainable_variance.py
@@ -75,7 +75,8 @@
 # repeats. We also see that some voxels reach an explainable variance of 0.7,
 # which is quite high. It means that these voxels consistently record the same
 # activity across a repeated stimulus, and thus are good targets for encoding
-# models.
+# models. Of course, this set of explainable voxels changes from task to
+# task, depending on what you are trying to model.
 
 ###############################################################################
 # Map to subject flatmap
@@ -85,7 +86,7 @@
 # values to the subject brain. This can be done with `pycortex
 # <https://gallantlab.github.io/pycortex/>`_, which can create interactive 3D
 # viewers to be displayed in any modern browser. ``pycortex`` can also display
-# flatten maps of the cortical surface, to visualize the entire cortical
+# flattened maps of the cortical surface, to visualize the entire cortical
 # surface at once.
 #
 # Here, we do not share the anatomical information of the subjects for privacy
@@ -96,9 +97,8 @@
 #
 # The first mapper is 2D matrix of shape (n_pixels, n_voxels), that map each
 # voxel to a set of pixel in a flatmap. The matrix is efficient stored using a
-# ``scipy`` sparse CSR matrix format. To ease the use of this mapper, we
-# provide an example function ``plot_flatmap_from_mapper``. This function mimic
-# the behavior of ``pycortex.quickshow``.
+# ``scipy`` sparse CSR matrix format. The function ``plot_flatmap_from_mapper``
+# provides an example of how to use the mapper and visualize the flatmap.
 
 from voxelwise_tutorials.viz import plot_flatmap_from_mapper
 
@@ -140,9 +140,9 @@
 
 ###############################################################################
 # Then, we load the "fsaverage" mapper. The mapper is a matrix of shape
-# (n_vertices, n_voxels), which map each voxel to some vertices in the
-# fsaverage surface. It is also stored with a sparse CSR matrix format. The
-# mapper is applied with a dot product ``@`` (equivalent to ``np.dot``).
+# (n_vertices, n_voxels), which maps each voxel to some vertices in the
+# fsaverage surface. It is stored as a sparse CSR matrix. The mapper is applied
+# with a dot product ``@`` (equivalent to ``np.dot``).
 from voxelwise_tutorials.io import load_hdf5_sparse_array
 voxel_to_fsaverage = load_hdf5_sparse_array(mapper_file,
                                             key='voxel_to_fsaverage')
diff --git a/tutorials/movies_3T/02_plot_wordnet_model.py b/tutorials/movies_3T/02_plot_wordnet_model.py
@@ -20,10 +20,10 @@
 labels. To interpret our model, labels can be organized in a graph of semantic
 relashionship based on the `Wordnet <https://wordnet.princeton.edu/>`_ dataset.
 
-*Summary:* We first concatenate the features with multiple delays, to account
-for the slow hemodynamic response. We then fit a predictive model of BOLD
-activity, using a  linear regression that weights differently each delayed
-feature. The linear regression is regularized to improve robustness to
+*Summary:* We first concatenate the features with multiple temporal delays, to
+account for the slow hemodynamic response. We then fit a predictive model of
+BOLD activity, using a  linear regression that weighs each delayed feature
+differently. The linear regression is regularized to improve robustness to
 correlated features and to improve generalization. The optimal regularization
 hyperparameter is selected over a grid-search with cross-validation. Finally,
 the model generalization performance is evaluated on a held-out test set,
@@ -46,7 +46,13 @@
 # Load the data
 # -------------
 #
-# We first load the fMRI responses.
+# We first load the fMRI responses. These responses have been preprocessed as
+# decribed in [1]_. The data is separated into a training set ``Y_train`` and a
+# testing set ``Y_test``. The training set is used for fitting models, and
+# selecting the best models and hyperparameters. The testing set is later used
+# to estimate the generalization performances of the selected model. The
+# testing set contains multiple repetitions of the same experiment, to estimate
+# an upper bound of the model performances (cf. previous example).
 import numpy as np
 from voxelwise_tutorials.io import load_hdf5_array
 
@@ -75,7 +81,10 @@
 Y_test = np.nan_to_num(Y_test)
 
 ###############################################################################
-# Then, we load the semantic "wordnet" features.
+# Then, we load the semantic "wordnet" features, extracted from the stimulus at
+# each time point. The features corresponding to the training set are noted
+# ``X_train``, and the features corresponding to the testing set are noted
+# ``X_test``.
 feature_space = "wordnet"
 
 file_name = os.path.join(directory, "features", f"{feature_space}.hdf")
@@ -123,7 +132,7 @@
 #
 # However, we prefer not to normalize by the standard deviation of each
 # feature. Indeed, if the features are extracted in a consistent way from the
-# stimulus, there relative scale is meaningful. Normalizing them independently
+# stimulus, their relative scale is meaningful. Normalizing them independently
 # from each other would remove this meaning. Moreover, the wordnet features are
 # one-hot-encoded, which means that each feature is either present (1) or not
 # present (0) in each sample. Normalizing one-hot-encoded features is not
@@ -225,7 +234,7 @@
 ###############################################################################
 # We can display the ``scikit-learn`` pipeline with an HTML diagram.
 from sklearn import set_config
-set_config(display='diagram')
+set_config(display='diagram')  # requires scikit-learn 0.23
 pipeline
 
 ###############################################################################
diff --git a/tutorials/movies_3T/03_plot_hemodynamic_response.py b/tutorials/movies_3T/03_plot_hemodynamic_response.py
@@ -119,7 +119,7 @@
 
 ###############################################################################
 from sklearn import set_config
-set_config(display='diagram')
+set_config(display='diagram')  # requires scikit-learn 0.23
 pipeline
 
 ###############################################################################
diff --git a/tutorials/movies_3T/04_plot_motion_energy_model.py b/tutorials/movies_3T/04_plot_motion_energy_model.py
@@ -127,7 +127,7 @@
 
 ###############################################################################
 from sklearn import set_config
-set_config(display='diagram')
+set_config(display='diagram')  # requires scikit-learn 0.23
 pipeline_motion_energy
 
 ###############################################################################
diff --git a/tutorials/movies_3T/05_plot_banded_ridge_model.py b/tutorials/movies_3T/05_plot_banded_ridge_model.py
@@ -201,7 +201,7 @@
 # ``Kernelizer``.
 from himalaya.kernel_ridge import Kernelizer
 from sklearn import set_config
-set_config(display='diagram')
+set_config(display='diagram')  # requires scikit-learn 0.23
 
 preprocess_pipeline = make_pipeline(
     StandardScaler(with_mean=True, with_std=False),
diff --git a/tutorials/movies_4T/02_plot_ridge_model.py b/tutorials/movies_4T/02_plot_ridge_model.py
@@ -131,9 +131,9 @@
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import StandardScaler
 
-# display the scikit-learn pipeline with an HTML diagram
+# Display the scikit-learn pipeline with an HTML diagram.
 from sklearn import set_config
-set_config(display='diagram')
+set_config(display='diagram')  # requires scikit-learn 0.23
 
 ###############################################################################
 # With one target, we could directly use the pipeline in scikit-learn's
diff --git a/tutorials/notebooks/movies_3T/01_plot_explainable_variance.ipynb b/tutorials/notebooks/movies_3T/01_plot_explainable_variance.ipynb
@@ -134,14 +134,14 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "We see that most voxels have a rather low explainable variance, around 0.1\n(when not using the bias correction). This is expected, since most voxels are\nnot directly driven by a visual stimulus, and their activity change over\nrepeats. We also see that some voxels reach an explainable variance of 0.7,\nwhich is quite high. It means that these voxels consistently record the same\nactivity across a repeated stimulus, and thus are good targets for encoding\nmodels.\n\n"
+        "We see that most voxels have a rather low explainable variance, around 0.1\n(when not using the bias correction). This is expected, since most voxels are\nnot directly driven by a visual stimulus, and their activity change over\nrepeats. We also see that some voxels reach an explainable variance of 0.7,\nwhich is quite high. It means that these voxels consistently record the same\nactivity across a repeated stimulus, and thus are good targets for encoding\nmodels. Of course, this set of explainable voxels changes from task to\ntask, depending on what you are trying to model.\n\n"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Map to subject flatmap\n----------------------\n\nTo better understand the distribution of explainable variance, we map the\nvalues to the subject brain. This can be done with `pycortex\n<https://gallantlab.github.io/pycortex/>`_, which can create interactive 3D\nviewers to be displayed in any modern browser. ``pycortex`` can also display\nflatten maps of the cortical surface, to visualize the entire cortical\nsurface at once.\n\nHere, we do not share the anatomical information of the subjects for privacy\nconcerns. Instead, we provide two mappers:\n\n- to map the voxels to a (subject-specific) flatmap\n- to map the voxels to the Freesurfer average cortical surface (\"fsaverage\")\n\nThe first mapper is 2D matrix of shape (n_pixels, n_voxels), that map each\nvoxel to a set of pixel in a flatmap. The matrix is efficient stored using a\n``scipy`` sparse CSR matrix format. To ease the use of this mapper, we\nprovide an example function ``plot_flatmap_from_mapper``. This function mimic\nthe behavior of ``pycortex.quickshow``.\n\n"
+        "Map to subject flatmap\n----------------------\n\nTo better understand the distribution of explainable variance, we map the\nvalues to the subject brain. This can be done with `pycortex\n<https://gallantlab.github.io/pycortex/>`_, which can create interactive 3D\nviewers to be displayed in any modern browser. ``pycortex`` can also display\nflattened maps of the cortical surface, to visualize the entire cortical\nsurface at once.\n\nHere, we do not share the anatomical information of the subjects for privacy\nconcerns. Instead, we provide two mappers:\n\n- to map the voxels to a (subject-specific) flatmap\n- to map the voxels to the Freesurfer average cortical surface (\"fsaverage\")\n\nThe first mapper is 2D matrix of shape (n_pixels, n_voxels), that map each\nvoxel to a set of pixel in a flatmap. The matrix is efficient stored using a\n``scipy`` sparse CSR matrix format. The function ``plot_flatmap_from_mapper``\nprovides an example of how to use the mapper and visualize the flatmap.\n\n"
       ]
     },
     {
@@ -191,7 +191,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Then, we load the \"fsaverage\" mapper. The mapper is a matrix of shape\n(n_vertices, n_voxels), which map each voxel to some vertices in the\nfsaverage surface. It is also stored with a sparse CSR matrix format. The\nmapper is applied with a dot product ``@`` (equivalent to ``np.dot``).\n\n"
+        "Then, we load the \"fsaverage\" mapper. The mapper is a matrix of shape\n(n_vertices, n_voxels), which maps each voxel to some vertices in the\nfsaverage surface. It is stored as a sparse CSR matrix. The mapper is applied\nwith a dot product ``@`` (equivalent to ``np.dot``).\n\n"
       ]
     },
     {
diff --git a/tutorials/notebooks/movies_3T/02_plot_wordnet_model.ipynb b/tutorials/notebooks/movies_3T/02_plot_wordnet_model.ipynb
@@ -15,7 +15,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "\n# Fit a ridge model with wordnet features\n\n\nIn this example, we model the fMRI responses with semantic \"wordnet\" features,\nmanually annotated on each frame of the movie stimulus. The model is a\nregularized linear regression model, known as ridge regression. Since this\nmodel is used to predict brain activity from the stimulus, it is called a\n(voxelwise) encoding model.\n\nThis example reproduces part of the analysis described in Huth et al (2012)\n[1]_. See this publication for more details about the experiment, the wordnet\nfeatures, along with more results and more discussions.\n\n*Wordnet features:* The features used in this example are semantic labels\nmanually annotated on each frame of the movie stimulus. The semantic labels\ninclude nouns (such as \"woman\", \"car\", or \"building\") and verbs (such as\n\"talking\", \"touching\", or \"walking\"), for a total of 1705 distinct category\nlabels. To interpret our model, labels can be organized in a graph of semantic\nrelashionship based on the `Wordnet <https://wordnet.princeton.edu/>`_ dataset.\n\n*Summary:* We first concatenate the features with multiple delays, to account\nfor the slow hemodynamic response. We then fit a predictive model of BOLD\nactivity, using a  linear regression that weights differently each delayed\nfeature. The linear regression is regularized to improve robustness to\ncorrelated features and to improve generalization. The optimal regularization\nhyperparameter is selected over a grid-search with cross-validation. Finally,\nthe model generalization performance is evaluated on a held-out test set,\ncomparing the model predictions with the corresponding ground-truth fMRI\nresponses.\n"
+        "\n# Fit a ridge model with wordnet features\n\n\nIn this example, we model the fMRI responses with semantic \"wordnet\" features,\nmanually annotated on each frame of the movie stimulus. The model is a\nregularized linear regression model, known as ridge regression. Since this\nmodel is used to predict brain activity from the stimulus, it is called a\n(voxelwise) encoding model.\n\nThis example reproduces part of the analysis described in Huth et al (2012)\n[1]_. See this publication for more details about the experiment, the wordnet\nfeatures, along with more results and more discussions.\n\n*Wordnet features:* The features used in this example are semantic labels\nmanually annotated on each frame of the movie stimulus. The semantic labels\ninclude nouns (such as \"woman\", \"car\", or \"building\") and verbs (such as\n\"talking\", \"touching\", or \"walking\"), for a total of 1705 distinct category\nlabels. To interpret our model, labels can be organized in a graph of semantic\nrelashionship based on the `Wordnet <https://wordnet.princeton.edu/>`_ dataset.\n\n*Summary:* We first concatenate the features with multiple temporal delays, to\naccount for the slow hemodynamic response. We then fit a predictive model of\nBOLD activity, using a  linear regression that weighs each delayed feature\ndifferently. The linear regression is regularized to improve robustness to\ncorrelated features and to improve generalization. The optimal regularization\nhyperparameter is selected over a grid-search with cross-validation. Finally,\nthe model generalization performance is evaluated on a held-out test set,\ncomparing the model predictions with the corresponding ground-truth fMRI\nresponses.\n"
       ]
     },
     {
@@ -51,7 +51,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Load the data\n-------------\n\nWe first load the fMRI responses.\n\n"
+        "Load the data\n-------------\n\nWe first load the fMRI responses. These responses have been preprocessed as\ndecribed in [1]_. The data is separated into a training set ``Y_train`` and a\ntesting set ``Y_test``. The training set is used for fitting models, and\nselecting the best models and hyperparameters. The testing set is later used\nto estimate the generalization performances of the selected model. The\ntesting set contains multiple repetitions of the same experiment, to estimate\nan upper bound of the model performances (cf. previous example).\n\n"
       ]
     },
     {
@@ -105,7 +105,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Then, we load the semantic \"wordnet\" features.\n\n"
+        "Then, we load the semantic \"wordnet\" features, extracted from the stimulus at\neach time point. The features corresponding to the training set are noted\n``X_train``, and the features corresponding to the testing set are noted\n``X_test``.\n\n"
       ]
     },
     {
@@ -159,7 +159,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Define the model\n----------------\n\nNow, let's define the model pipeline.\n\nWe first center the features, since we will not use an intercept. Indeed, the\nmean value in fMRI recording is non-informative, so each run is detrended and\ndemeaned independently, and we do not need to predict an intercept value in\nthe linear model.\n\nHowever, we prefer not to normalize by the standard deviation of each\nfeature. Indeed, if the features are extracted in a consistent way from the\nstimulus, there relative scale is meaningful. Normalizing them independently\nfrom each other would remove this meaning. Moreover, the wordnet features are\none-hot-encoded, which means that each feature is either present (1) or not\npresent (0) in each sample. Normalizing one-hot-encoded features is not\nrecommended, since it would scale disproportionately the infrequent features.\n\n"
+        "Define the model\n----------------\n\nNow, let's define the model pipeline.\n\nWe first center the features, since we will not use an intercept. Indeed, the\nmean value in fMRI recording is non-informative, so each run is detrended and\ndemeaned independently, and we do not need to predict an intercept value in\nthe linear model.\n\nHowever, we prefer not to normalize by the standard deviation of each\nfeature. Indeed, if the features are extracted in a consistent way from the\nstimulus, their relative scale is meaningful. Normalizing them independently\nfrom each other would remove this meaning. Moreover, the wordnet features are\none-hot-encoded, which means that each feature is either present (1) or not\npresent (0) in each sample. Normalizing one-hot-encoded features is not\nrecommended, since it would scale disproportionately the infrequent features.\n\n"
       ]
     },
     {
@@ -314,7 +314,7 @@
       },
       "outputs": [],
       "source": [
-        "from sklearn import set_config\nset_config(display='diagram')\npipeline"
+        "from sklearn import set_config\nset_config(display='diagram')  # requires scikit-learn 0.23\npipeline"
       ]
     },
     {
diff --git a/tutorials/notebooks/movies_3T/03_plot_hemodynamic_response.ipynb b/tutorials/notebooks/movies_3T/03_plot_hemodynamic_response.ipynb
@@ -192,7 +192,7 @@
       },
       "outputs": [],
       "source": [
-        "from sklearn import set_config\nset_config(display='diagram')\npipeline"
+        "from sklearn import set_config\nset_config(display='diagram')  # requires scikit-learn 0.23\npipeline"
       ]
     },
     {
diff --git a/tutorials/notebooks/movies_3T/04_plot_motion_energy_model.ipynb b/tutorials/notebooks/movies_3T/04_plot_motion_energy_model.ipynb
@@ -181,7 +181,7 @@
       },
       "outputs": [],
       "source": [
-        "from sklearn import set_config\nset_config(display='diagram')\npipeline_motion_energy"
+        "from sklearn import set_config\nset_config(display='diagram')  # requires scikit-learn 0.23\npipeline_motion_energy"
       ]
     },
     {
diff --git a/tutorials/notebooks/movies_3T/05_plot_banded_ridge_model.ipynb b/tutorials/notebooks/movies_3T/05_plot_banded_ridge_model.ipynb
@@ -303,7 +303,7 @@
       },
       "outputs": [],
       "source": [
-        "from himalaya.kernel_ridge import Kernelizer\nfrom sklearn import set_config\nset_config(display='diagram')\n\npreprocess_pipeline = make_pipeline(\n    StandardScaler(with_mean=True, with_std=False),\n    Delayer(delays=[1, 2, 3, 4]),\n    Kernelizer(kernel=\"linear\"),\n)\npreprocess_pipeline"
+        "from himalaya.kernel_ridge import Kernelizer\nfrom sklearn import set_config\nset_config(display='diagram')  # requires scikit-learn 0.23\n\npreprocess_pipeline = make_pipeline(\n    StandardScaler(with_mean=True, with_std=False),\n    Delayer(delays=[1, 2, 3, 4]),\n    Kernelizer(kernel=\"linear\"),\n)\npreprocess_pipeline"
       ]
     },
     {
diff --git a/tutorials/notebooks/movies_4T/02_plot_ridge_model.ipynb b/tutorials/notebooks/movies_4T/02_plot_ridge_model.ipynb
@@ -134,7 +134,7 @@
       },
       "outputs": [],
       "source": [
-        "from sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import StandardScaler\n\n# display the scikit-learn pipeline with an HTML diagram\nfrom sklearn import set_config\nset_config(display='diagram')"
+        "from sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import StandardScaler\n\n# Display the scikit-learn pipeline with an HTML diagram.\nfrom sklearn import set_config\nset_config(display='diagram')  # requires scikit-learn 0.23"
       ]
     },
     {

Original file line number	Diff line number	Diff line change
`@@ -134,14 +134,14 @@`
`134`	`134`	`"cell_type": "markdown",`
`135`	`135`	`"metadata": {},`
`136`	`136`	`"source": [`
`137`		`- "We see that most voxels have a rather low explainable variance, around 0.1\n(when not using the bias correction). This is expected, since most voxels are\nnot directly driven by a visual stimulus, and their activity change over\nrepeats. We also see that some voxels reach an explainable variance of 0.7,\nwhich is quite high. It means that these voxels consistently record the same\nactivity across a repeated stimulus, and thus are good targets for encoding\nmodels.\n\n"`
	`137`	+ "We see that most voxels have a rather low explainable variance, around 0.1\n(when not using the bias correction). This is expected, since most voxels are\nnot directly driven by a visual stimulus, and their activity change over\nrepeats. We also see that some voxels reach an explainable variance of 0.7,\nwhich is quite high. It means that these voxels consistently record the same\nactivity across a repeated stimulus, and thus are good targets for encoding\nmodels. Of course, this set of explainable voxels changes from task to\ntask, depending on what you are trying to model.\n\n"
`138`	`138`	`]`
`139`	`139`	`},`
`140`	`140`	`{`
`141`	`141`	`"cell_type": "markdown",`
`142`	`142`	`"metadata": {},`
`143`	`143`	`"source": [`
`144`		- "Map to subject flatmap\n----------------------\n\nTo better understand the distribution of explainable variance, we map the\nvalues to the subject brain. This can be done with `pycortex\n<https://gallantlab.github.io/pycortex/>`_, which can create interactive 3D\nviewers to be displayed in any modern browser. ``pycortex`` can also display\nflatten maps of the cortical surface, to visualize the entire cortical\nsurface at once.\n\nHere, we do not share the anatomical information of the subjects for privacy\nconcerns. Instead, we provide two mappers:\n\n- to map the voxels to a (subject-specific) flatmap\n- to map the voxels to the Freesurfer average cortical surface (\"fsaverage\")\n\nThe first mapper is 2D matrix of shape (n_pixels, n_voxels), that map each\nvoxel to a set of pixel in a flatmap. The matrix is efficient stored using a\n``scipy`` sparse CSR matrix format. To ease the use of this mapper, we\nprovide an example function ``plot_flatmap_from_mapper``. This function mimic\nthe behavior of ``pycortex.quickshow``.\n\n"
	`144`	+ "Map to subject flatmap\n----------------------\n\nTo better understand the distribution of explainable variance, we map the\nvalues to the subject brain. This can be done with `pycortex\n<https://gallantlab.github.io/pycortex/>`_, which can create interactive 3D\nviewers to be displayed in any modern browser. ``pycortex`` can also display\nflattened maps of the cortical surface, to visualize the entire cortical\nsurface at once.\n\nHere, we do not share the anatomical information of the subjects for privacy\nconcerns. Instead, we provide two mappers:\n\n- to map the voxels to a (subject-specific) flatmap\n- to map the voxels to the Freesurfer average cortical surface (\"fsaverage\")\n\nThe first mapper is 2D matrix of shape (n_pixels, n_voxels), that map each\nvoxel to a set of pixel in a flatmap. The matrix is efficient stored using a\n``scipy`` sparse CSR matrix format. The function ``plot_flatmap_from_mapper``\nprovides an example of how to use the mapper and visualize the flatmap.\n\n"
`145`	`145`	`]`
`146`	`146`	`},`
`147`	`147`	`{`
`@@ -191,7 +191,7 @@`
`191`	`191`	`"cell_type": "markdown",`
`192`	`192`	`"metadata": {},`
`193`	`193`	`"source": [`
`194`		- "Then, we load the \"fsaverage\" mapper. The mapper is a matrix of shape\n(n_vertices, n_voxels), which map each voxel to some vertices in the\nfsaverage surface. It is also stored with a sparse CSR matrix format. The\nmapper is applied with a dot product ``@`` (equivalent to ``np.dot``).\n\n"
	`194`	+ "Then, we load the \"fsaverage\" mapper. The mapper is a matrix of shape\n(n_vertices, n_voxels), which maps each voxel to some vertices in the\nfsaverage surface. It is stored as a sparse CSR matrix. The mapper is applied\nwith a dot product ``@`` (equivalent to ``np.dot``).\n\n"
`195`	`195`	`]`
`196`	`196`	`},`
`197`	`197`	`{`
Original file line number	Diff line number	Diff line change
`@@ -15,7 +15,7 @@`
`15`	`15`	`"cell_type": "markdown",`
`16`	`16`	`"metadata": {},`
`17`	`17`	`"source": [`
`18`		- "\n# Fit a ridge model with wordnet features\n\n\nIn this example, we model the fMRI responses with semantic \"wordnet\" features,\nmanually annotated on each frame of the movie stimulus. The model is a\nregularized linear regression model, known as ridge regression. Since this\nmodel is used to predict brain activity from the stimulus, it is called a\n(voxelwise) encoding model.\n\nThis example reproduces part of the analysis described in Huth et al (2012)\n[1]_. See this publication for more details about the experiment, the wordnet\nfeatures, along with more results and more discussions.\n\nWordnet features: The features used in this example are semantic labels\nmanually annotated on each frame of the movie stimulus. The semantic labels\ninclude nouns (such as \"woman\", \"car\", or \"building\") and verbs (such as\n\"talking\", \"touching\", or \"walking\"), for a total of 1705 distinct category\nlabels. To interpret our model, labels can be organized in a graph of semantic\nrelashionship based on the `Wordnet <https://wordnet.princeton.edu/>`_ dataset.\n\nSummary: We first concatenate the features with multiple delays, to account\nfor the slow hemodynamic response. We then fit a predictive model of BOLD\nactivity, using a linear regression that weights differently each delayed\nfeature. The linear regression is regularized to improve robustness to\ncorrelated features and to improve generalization. The optimal regularization\nhyperparameter is selected over a grid-search with cross-validation. Finally,\nthe model generalization performance is evaluated on a held-out test set,\ncomparing the model predictions with the corresponding ground-truth fMRI\nresponses.\n"
	`18`	+ "\n# Fit a ridge model with wordnet features\n\n\nIn this example, we model the fMRI responses with semantic \"wordnet\" features,\nmanually annotated on each frame of the movie stimulus. The model is a\nregularized linear regression model, known as ridge regression. Since this\nmodel is used to predict brain activity from the stimulus, it is called a\n(voxelwise) encoding model.\n\nThis example reproduces part of the analysis described in Huth et al (2012)\n[1]_. See this publication for more details about the experiment, the wordnet\nfeatures, along with more results and more discussions.\n\nWordnet features: The features used in this example are semantic labels\nmanually annotated on each frame of the movie stimulus. The semantic labels\ninclude nouns (such as \"woman\", \"car\", or \"building\") and verbs (such as\n\"talking\", \"touching\", or \"walking\"), for a total of 1705 distinct category\nlabels. To interpret our model, labels can be organized in a graph of semantic\nrelashionship based on the `Wordnet <https://wordnet.princeton.edu/>`_ dataset.\n\nSummary: We first concatenate the features with multiple temporal delays, to\naccount for the slow hemodynamic response. We then fit a predictive model of\nBOLD activity, using a linear regression that weighs each delayed feature\ndifferently. The linear regression is regularized to improve robustness to\ncorrelated features and to improve generalization. The optimal regularization\nhyperparameter is selected over a grid-search with cross-validation. Finally,\nthe model generalization performance is evaluated on a held-out test set,\ncomparing the model predictions with the corresponding ground-truth fMRI\nresponses.\n"
`19`	`19`	`]`
`20`	`20`	`},`
`21`	`21`	`{`
`@@ -51,7 +51,7 @@`
`51`	`51`	`"cell_type": "markdown",`
`52`	`52`	`"metadata": {},`
`53`	`53`	`"source": [`
`54`		`- "Load the data\n-------------\n\nWe first load the fMRI responses.\n\n"`
	`54`	+ "Load the data\n-------------\n\nWe first load the fMRI responses. These responses have been preprocessed as\ndecribed in [1]_. The data is separated into a training set ``Y_train`` and a\ntesting set ``Y_test``. The training set is used for fitting models, and\nselecting the best models and hyperparameters. The testing set is later used\nto estimate the generalization performances of the selected model. The\ntesting set contains multiple repetitions of the same experiment, to estimate\nan upper bound of the model performances (cf. previous example).\n\n"
`55`	`55`	`]`
`56`	`56`	`},`
`57`	`57`	`{`
`@@ -105,7 +105,7 @@`
`105`	`105`	`"cell_type": "markdown",`
`106`	`106`	`"metadata": {},`
`107`	`107`	`"source": [`
`108`		`- "Then, we load the semantic \"wordnet\" features.\n\n"`
	`108`	+ "Then, we load the semantic \"wordnet\" features, extracted from the stimulus at\neach time point. The features corresponding to the training set are noted\n``X_train``, and the features corresponding to the testing set are noted\n``X_test``.\n\n"
`109`	`109`	`]`
`110`	`110`	`},`
`111`	`111`	`{`
`@@ -159,7 +159,7 @@`
`159`	`159`	`"cell_type": "markdown",`
`160`	`160`	`"metadata": {},`
`161`	`161`	`"source": [`
`162`		- "Define the model\n----------------\n\nNow, let's define the model pipeline.\n\nWe first center the features, since we will not use an intercept. Indeed, the\nmean value in fMRI recording is non-informative, so each run is detrended and\ndemeaned independently, and we do not need to predict an intercept value in\nthe linear model.\n\nHowever, we prefer not to normalize by the standard deviation of each\nfeature. Indeed, if the features are extracted in a consistent way from the\nstimulus, there relative scale is meaningful. Normalizing them independently\nfrom each other would remove this meaning. Moreover, the wordnet features are\none-hot-encoded, which means that each feature is either present (1) or not\npresent (0) in each sample. Normalizing one-hot-encoded features is not\nrecommended, since it would scale disproportionately the infrequent features.\n\n"
	`162`	+ "Define the model\n----------------\n\nNow, let's define the model pipeline.\n\nWe first center the features, since we will not use an intercept. Indeed, the\nmean value in fMRI recording is non-informative, so each run is detrended and\ndemeaned independently, and we do not need to predict an intercept value in\nthe linear model.\n\nHowever, we prefer not to normalize by the standard deviation of each\nfeature. Indeed, if the features are extracted in a consistent way from the\nstimulus, their relative scale is meaningful. Normalizing them independently\nfrom each other would remove this meaning. Moreover, the wordnet features are\none-hot-encoded, which means that each feature is either present (1) or not\npresent (0) in each sample. Normalizing one-hot-encoded features is not\nrecommended, since it would scale disproportionately the infrequent features.\n\n"
`163`	`163`	`]`
`164`	`164`	`},`
`165`	`165`	`{`
`@@ -314,7 +314,7 @@`
`314`	`314`	`},`
`315`	`315`	`"outputs": [],`
`316`	`316`	`"source": [`
`317`		`- "from sklearn import set_config\nset_config(display='diagram')\npipeline"`
	`317`	`+ "from sklearn import set_config\nset_config(display='diagram') # requires scikit-learn 0.23\npipeline"`
`318`	`318`	`]`
`319`	`319`	`},`
`320`	`320`	`{`
Original file line number	Diff line number	Diff line change
`@@ -192,7 +192,7 @@`
`192`	`192`	`},`
`193`	`193`	`"outputs": [],`
`194`	`194`	`"source": [`
`195`		`- "from sklearn import set_config\nset_config(display='diagram')\npipeline"`
	`195`	`+ "from sklearn import set_config\nset_config(display='diagram') # requires scikit-learn 0.23\npipeline"`
`196`	`196`	`]`
`197`	`197`	`},`
`198`	`198`	`{`
Original file line number	Diff line number	Diff line change
`@@ -181,7 +181,7 @@`
`181`	`181`	`},`
`182`	`182`	`"outputs": [],`
`183`	`183`	`"source": [`
`184`		`- "from sklearn import set_config\nset_config(display='diagram')\npipeline_motion_energy"`
	`184`	`+ "from sklearn import set_config\nset_config(display='diagram') # requires scikit-learn 0.23\npipeline_motion_energy"`
`185`	`185`	`]`
`186`	`186`	`},`
`187`	`187`	`{`