last updates, docstrings and removal of extras

Sceki · Sceki · commit 217a85a6b823 · 2025-04-24T17:14:42.000+02:00
diff --git a/docs/notebooks/basics.ipynb b/docs/notebooks/basics.ipynb
@@ -8,15 +8,6 @@
     "\n"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import kessler"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -45,7 +36,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -62,92 +53,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Loading CDMS (with extension .cdm.kvn.txt) from directory: /Users/giacomoacciarini/cdm_data/cdms_kvn/\n",
-      "Loaded 39 CDMs grouped into 4 events\n"
+      "Loading CDMS (with extension .kvn) from directory: synthetic_cdms/\n",
+      "Loaded 14 CDMs grouped into 2 events\n"
      ]
     }
    ],
    "source": [
-    "path_to_cdms_folder='cdm_data/cdms_kvn/'\n",
+    "path_to_cdms_folder='synthetic_cdms/'\n",
     "\n",
-    "events=EventDataset(path_to_cdms_folder)\n",
+    "events=EventDataset(path_to_cdms_folder,cdm_extension='.kvn')\n",
     "#A message appears confirming that the loading has happened, with the number of CDMs and events."
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Loading CDMs from pandas ``DataFrame`` object\n",
-    "\n",
-    "\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "\n",
-    "In this tutorial, we show how to load CDMs from pandas ``DataFrame`` object.\n",
-    "\n",
-    "First we perform the relevant imports:\n",
-    "    "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import kessler\n",
-    "import pandas as pd\n",
-    "from kessler import EventDataset\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Then, we create the ``EventDataset`` object, after having uploaded the pandas dataframe and created the ``DataFrame`` object:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Dataframe with 2 rows and 231 columns\n",
-      "Dropping columns with NaNs\n",
-      "Dataframe with 2 rows and 104 columns\n",
-      "Grouping by event_id\n",
-      "Grouped into 1 event(s)\n",
-      "Converting DataFrame to EventDataset\n",
-      "Time spent  | Time remain.| Progress             | Events | Events/sec\n",
-      "0d:00:00:00 | 0d:00:00:00 | #################### | 1/1 | 404.06         \n",
-      "\n",
-      "EventDataset(Events:1, number of CDMs per event: 2 (min), 2 (max), 2.00 (mean))\n"
-     ]
-    }
-   ],
-   "source": [
-    "file_name='cdm_data/cdms_csv/sample.csv'\n",
-    "df=pd.read_csv(file_name)\n",
-    "events=EventDataset.from_pandas(df)"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -173,27 +97,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 3,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Cannot import dbm.gnu: No module named '_gdbm'\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/giacomoacciarini/miniconda3/envs/fdl/lib/python3.7/site-packages/pyprob/util.py:327: UserWarning: Empirical distributions on disk may perform slow because GNU DBM is not available. Please install and configure gdbm library for Python for better speed.\n",
-      "  warnings.warn('Empirical distributions on disk may perform slow because GNU DBM is not available. Please install and configure gdbm library for Python for better speed.')\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "import kessler\n",
     "from kessler.data import kelvins_to_event_dataset"
    ]
   },
@@ -230,7 +137,7 @@
     }
    ],
    "source": [
-    "file_name='cdm_data/kelvins_data/test_data.csv'\n",
+    "file_name='kelvins_data/test_data.csv'\n",
     "events=kelvins_to_event_dataset(file_name, drop_features=['c_rcs_estimate', 't_rcs_estimate'], num_events=1000)\n",
     "#The output will show the number of CDMs and events loaded, as they progress.\n"
    ]
diff --git a/docs/notebooks/cdms_analysis_and_plotting.ipynb b/docs/notebooks/cdms_analysis_and_plotting.ipynb
@@ -41,7 +41,6 @@
    ],
    "source": [
     "import kessler\n",
-    "from kessler import EventDataset\n",
     "path_to_cdms_folder='synthetic_cdms'\n",
     "events=kessler.EventDataset(cdms_dir=path_to_cdms_folder,cdm_extension='.kvn')\n",
     "#events=EventDataset(path_to_cdms_folder)"
diff --git a/docs/notebooks/kelvins_dataset.ipynb b/docs/notebooks/kelvins_dataset.ipynb
@@ -7,7 +7,6 @@
    "outputs": [],
    "source": [
     "import kessler\n",
-    "from kessler import EventDataset\n",
     "from kessler.nn import LSTMPredictor\n",
     "from kessler.data import kelvins_to_event_dataset\n",
     "import pandas as pd\n",
@@ -28,13 +27,14 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "322e9b06",
    "metadata": {},
    "outputs": [],
    "source": [
     "#As an example, we first show the case in which the data comes from the Kelvins competition.\n",
     "#For this, we built a specific converter that takes care of the conversion from Kelvins format\n",
     "#to standard CDM format (the data can be downloaded at https://kelvins.esa.int/collision-avoidance-challenge/data/):\n",
-    "file_name = '/home/gunes/data/kelvins/train_data/train_data.csv'\n",
+    "file_name='kelvins_data/train_data.csv'\n",
     "events = kelvins_to_event_dataset(file_name, drop_features=['c_rcs_estimate', 't_rcs_estimate'], num_events=1000) #we use only 200 events"
    ]
   },
diff --git a/kessler/event.py b/kessler/event.py
@@ -13,6 +13,7 @@
 import matplotlib as mpl
 import matplotlib.pyplot as plt
 from glob import glob
+from tqdm import tqdm
 import copy
 import os
 import re
@@ -164,7 +165,7 @@ def __len__(self):
 
 
 class EventDataset():
-    def __init__(self, cdms_dir=None, cdm_extension='.cdm.kvn.txt', events=None):
+    def __init__(self, cdms_dir=None, cdm_extension='.kvn', events=None):
         if events is None:
             if cdms_dir is None:
                 self._events = []
@@ -398,10 +399,8 @@ def from_pandas(df, cdm_compatible_fields={
         df_events = df.groupby(group_events_by).groups
         print('Grouped into {} event(s)'.format(len(df_events)))
         events = []
-        util.progress_bar_init('Converting DataFrame to EventDataset', len(df_events), 'Events')
         i = 0
-        for k, v in df_events.items():
-            util.progress_bar_update(i)
+        for k, v in tqdm(df_events.items()):
             i += 1
             df_event = df.iloc[v]
             cdms = []
@@ -416,7 +415,6 @@ def from_pandas(df, cdm_compatible_fields={
                         cdm[cdm_name] = value
                 cdms.append(cdm)
             events.append(Event(cdms))
-        util.progress_bar_end()
         event_dataset = EventDataset(events=events)
         print('\n{}'.format(event_dataset))
         return event_dataset
@@ -425,12 +423,8 @@ def to_dataframe(self):
         if len(self) == 0:
             return pd.DataFrame()
         event_dataframes = []
-
-        util.progress_bar_init('Converting EventDataset to DataFrame', len(self._events), 'Events')
-        for i, event in enumerate(self._events):
-            util.progress_bar_update(i)
+        for i, event in enumerate(tqdm(self._events)):
             event_dataframes.append(event.to_dataframe())
-        util.progress_bar_end()
         return pd.concat(event_dataframes, ignore_index=True)
 
     def dates(self):
diff --git a/kessler/util.py b/kessler/util.py
diff --git a/setup.py b/setup.py