add kelvins tutorial

Sceki · Sceki · commit c3bdefa1b2c5 · 2025-04-07T16:49:44.000+02:00
diff --git a/docs/conf.py b/docs/conf.py
@@ -96,7 +96,8 @@
     "LSTM_training.ipynb",
     "basics.ipynb",
     "probabilistic_programming_module.ipynb",
-    "plotting.ipynb"
+    "plotting.ipynb",
+    "kelvins_dataset.ipynb"
 ]
 
 latex_engine = "xelatex"
diff --git a/docs/notebooks/kelvins_dataset.ipynb b/docs/notebooks/kelvins_dataset.ipynb
@@ -0,0 +1,224 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import kessler\n",
+    "from kessler import EventDataset\n",
+    "from kessler.nn import LSTMPredictor\n",
+    "from kessler.data import kelvins_to_event_dataset\n",
+    "import pandas as pd\n",
+    "\n",
+    "# Set the random number generator seed for reproducibility\n",
+    "kessler.seed(1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Data Loading\n",
+    "\n",
+    "Kessler accepts CDMs either in KVN format or as pandas dataframes. We hereby show a pandas dataframe loading example:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#As an example, we first show the case in which the data comes from the Kelvins competition.\n",
+    "#For this, we built a specific converter that takes care of the conversion from Kelvins format\n",
+    "#to standard CDM format (the data can be downloaded at https://kelvins.esa.int/collision-avoidance-challenge/data/):\n",
+    "file_name = '/home/gunes/data/kelvins/train_data/train_data.csv'\n",
+    "events = kelvins_to_event_dataset(file_name, drop_features=['c_rcs_estimate', 't_rcs_estimate'], num_events=1000) #we use only 200 events"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Instead, this is a generic real CDM data loader that should parse your Pandas (uncomment the following lines if needed):\n",
+    "#file_name = 'path_to_csv/file.csv'\n",
+    "\n",
+    "#df=pd.read_csv(file_name)\n",
+    "#events = EventDataset.from_pandas(df)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Descriptive Statistics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Descriptive statistics of the event:\n",
+    "kessler_stats = events.to_dataframe().describe()\n",
+    "print(kessler_stats)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# LSTM Training"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#We only use features with numeric content for the training\n",
+    "#nn_features is a list of the feature names taken into account for the training:\n",
+    "#it can be edited in case more features want to be added or removed\n",
+    "nn_features = events.common_features(only_numeric=True)\n",
+    "print(nn_features)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Split data into a test set (5% of the total number of events)\n",
+    "len_test_set=int(0.05*len(events))\n",
+    "print('Test data:', len_test_set)\n",
+    "events_test=events[-len_test_set:]\n",
+    "print(events_test)\n",
+    "\n",
+    "# The rest of the data will be used for training and validation\n",
+    "print('Training and validation data:', len(events)-len_test_set)\n",
+    "events_train_and_val=events[:-len_test_set]\n",
+    "print(events_train_and_val)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create an LSTM predictor, specialized to the nn_features we extracted above\n",
+    "model = LSTMPredictor(\n",
+    "            lstm_size=256,  # Number of hidden units per LSTM layer\n",
+    "            lstm_depth=2,  # Number of stacked LSTM layers\n",
+    "            dropout=0.2,  # Dropout probability\n",
+    "            features=nn_features)  # The list of feature names to use in the LSTM\n",
+    "\n",
+    "# Start training\n",
+    "model.learn(events_train_and_val, \n",
+    "            epochs=10, # Number of epochs (one epoch is one full pass through the training dataset)\n",
+    "            lr=1e-3, # Learning rate, can decrease it if training diverges\n",
+    "            batch_size=16, # Minibatch size, can be decreased if there are issues with memory use\n",
+    "            device='cpu', # Can be 'cuda' if there is a GPU available\n",
+    "            valid_proportion=0.15, # Proportion of the data to use as a validation set internally\n",
+    "            num_workers=4, # Number of multithreaded dataloader workers, 4 is good for performance, but if there are any issues or errors, please try num_workers=1 as this solves issues with PyTorch most of the time\n",
+    "            event_samples_for_stats=1000) # Number of events to use to compute NN normalization factors, have this number as big as possible (and at least a few thousands)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Save the model to a file after training:\n",
+    "model.save(file_name=\"LSTM_20epochs_lr10-4_batchsize16\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#NN loss plotted to a file:\n",
+    "model.plot_loss(file_name='plot_loss.pdf')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#we show an example CDM from the set:\n",
+    "events_train_and_val[0][0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#we take a single event, we remove the last CDM and try to predict it\n",
+    "event=events_test[3]\n",
+    "event_len = len(event)\n",
+    "print(event)\n",
+    "event_beginning = event[0:event_len-1]\n",
+    "print(event_beginning)\n",
+    "event_evolution = model.predict_event(event_beginning, num_samples=100, max_length=14)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#We plot the prediction in red:\n",
+    "axs = event_evolution.plot_features(['RELATIVE_SPEED', 'MISS_DISTANCE', 'OBJECT1_CT_T'], return_axs=True, linewidth=0.1, color='red', alpha=0.33, label='Prediction')\n",
+    "#and the ground truth value in blue:\n",
+    "event.plot_features(['RELATIVE_SPEED', 'MISS_DISTANCE', 'OBJECT1_CT_T'], axs=axs, label='Real', legend=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#we now plot the uncertainty prediction for all the covariance matrix elements of both OBJECT1 and OBJECT2:\n",
+    "axs = event_evolution.plot_uncertainty(return_axs=True, linewidth=0.5, label='Prediction', alpha=0.5, color='red', legend=True, diagonal=False)\n",
+    "event.plot_uncertainty(axs=axs, label='Real', diagonal=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/tutorials.rst b/docs/tutorials.rst
@@ -13,6 +13,7 @@ These tutorials include some basic examples on how to use kessler
   notebooks/basics.ipynb
   notebooks/cdms_analysis_and_plotting.ipynb
   notebooks/plotting.ipynb
+  notebooks/kelvins_dataset.ipynb
 
 
 Advanced

Original file line number	Diff line number	Diff line change
`@@ -96,7 +96,8 @@`
`96`	`96`	`"LSTM_training.ipynb",`
`97`	`97`	`"basics.ipynb",`
`98`	`98`	`"probabilistic_programming_module.ipynb",`
`99`		`- "plotting.ipynb"`
	`99`	`+ "plotting.ipynb",`
	`100`	`+ "kelvins_dataset.ipynb"`
`100`	`101`	`]`
`101`	`102`
`102`	`103`	`latex_engine = "xelatex"`