move the model and GPU to tensors and move back to cpu as needed for eval

vyinkabanjo · vyinkabanjo · commit 93cbd57edfed · 2025-01-02T04:00:33.000+01:00
diff --git a/lab2/solutions/PT_Part1_MNIST_Solution.ipynb b/lab2/solutions/PT_Part1_MNIST_Solution.ipynb
@@ -107,7 +107,10 @@
         "# Check that we are using a GPU, if not switch runtimes\n",
         "#   using Runtime > Change Runtime Type > GPU\n",
         "assert torch.cuda.is_available(), \"Please enable GPU from runtime settings\"\n",
-        "assert COMET_API_KEY != \"\", \"Please insert your Comet API Key\""
+        "assert COMET_API_KEY != \"\", \"Please insert your Comet API Key\"\n",
+        "\n",
+        "# Set GPU for computation\n",
+        "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")"
       ]
     },
     {
@@ -329,7 +332,7 @@
         "\n",
         "        return x\n",
         "\n",
-        "fc_model = FullyConnectedModel()"
+        "fc_model = FullyConnectedModel().to(device)"
       ]
     },
     {
@@ -410,6 +413,8 @@
         "        total_pred = 0\n",
         "\n",
         "        for images, labels in trainset_loader:\n",
+        "            # Move tensors to GPU so compatible with model\n",
+        "            images, labels = images.to(device), labels.to(device)\n",
         "            # Clear gradients before performing backward pass\n",
         "            optimizer.zero_grad()\n",
         "            # Forward pass\n",
@@ -489,6 +494,10 @@
         "    # Disable gradient calculations when in inference mode\n",
         "    with torch.no_grad():\n",
         "        for images, labels in testset_loader:\n",
+        "            # TODO: ensure evalaution happens on the GPU\n",
+        "            images, labels = images.to(device), labels.to(device)\n",
+        "            # images, labels = TODO\n",
+        "            \n",
         "            #TODO: feed the images into the model and obtain the predictions (forward pass)\n",
         "            outputs = model(images)\n",
         "            # outputs = TODO\n",
@@ -632,10 +641,10 @@
         "        return x\n",
         "\n",
         "# Instantiate the model\n",
-        "cnn_model = CNN()\n",
+        "cnn_model = CNN().to(device)\n",
         "# Initialize the model by passing some data through\n",
         "image, label = train_dataset[0]\n",
-        "image = image.unsqueeze(0)  # Add batch dimension → Shape: (1, 1, 28, 28)\n",
+        "image = image.to(device).unsqueeze(0)  # Add batch dimension → Shape: (1, 1, 28, 28)\n",
         "output = cnn_model(image)\n",
         "# Print the model summary\n",
         "print(cnn_model)"
@@ -665,7 +674,7 @@
       "outputs": [],
       "source": [
         "# Rebuild the CNN model\n",
-        "cnn_model = CNN()\n",
+        "cnn_model = CNN().to(device)\n",
         "\n",
         "# Define hyperparams\n",
         "batch_size = 64\n",
@@ -703,6 +712,8 @@
         "\n",
         "    # First grab a batch of training data which our data loader returns as a tensor\n",
         "    for idx, (images, labels) in enumerate(tqdm(trainset_loader)):\n",
+        "        images, labels = images.to(device), labels.to(device)\n",
+        "        \n",
         "        # Forward pass\n",
         "        #'''TODO: feed the images into the model and obtain the predictions'''\n",
         "        logits = cnn_model(images)\n",
@@ -791,7 +802,7 @@
       "outputs": [],
       "source": [
         "test_image, test_label = test_dataset[0]\n",
-        "test_image = test_image.unsqueeze(0)\n",
+        "test_image = test_image.to(device).unsqueeze(0)\n",
         "cnn_model.eval()\n",
         "predictions_test_image = cnn_model(test_image)"
       ]
@@ -837,7 +848,7 @@
       "source": [
         "'''TODO: identify the digit with the highest confidence prediction for the first\n",
         "    image in the test dataset. '''\n",
-        "predictions_value = predictions_test_image.detach().numpy()\n",
+        "predictions_value = predictions_test_image.cpu().detach().numpy() #.cpu() to copy tensor to memory first\n",
         "prediction = np.argmax(predictions_value)\n",
         "# prediction = # TODO\n",
         "print(prediction)"
@@ -861,7 +872,7 @@
       "outputs": [],
       "source": [
         "print(\"Label of this digit is:\", test_label)\n",
-        "plt.imshow(test_image[0,0,:,:], cmap=plt.cm.binary)\n",
+        "plt.imshow(test_image[0,0,:,:].cpu(), cmap=plt.cm.binary)\n",
         "comet_model_2.log_figure(figure=plt)"
       ]
     },
@@ -907,9 +918,9 @@
         "all_images = torch.cat(all_images)            # Shape: (total_samples, 1, 28, 28)\n",
         "\n",
         "# Convert tensors to NumPy for compatibility with plotting functions\n",
-        "predictions = all_predictions.numpy()  # Shape: (total_samples, num_classes)\n",
-        "test_labels = all_labels.numpy()       # Shape: (total_samples,)\n",
-        "test_images = all_images.numpy()       # Shape: (total_samples, 1, 28, 28)"
+        "predictions = all_predictions.cpu().numpy()  # Shape: (total_samples, num_classes)\n",
+        "test_labels = all_labels.cpu().numpy()       # Shape: (total_samples,)\n",
+        "test_images = all_images.cpu().numpy()       # Shape: (total_samples, 1, 28, 28)"
       ]
     },
     {