|
8 | 8 | "\n" |
9 | 9 | ] |
10 | 10 | }, |
11 | | - { |
12 | | - "cell_type": "code", |
13 | | - "execution_count": 1, |
14 | | - "metadata": {}, |
15 | | - "outputs": [], |
16 | | - "source": [ |
17 | | - "import kessler" |
18 | | - ] |
19 | | - }, |
20 | 11 | { |
21 | 12 | "cell_type": "markdown", |
22 | 13 | "metadata": {}, |
|
45 | 36 | }, |
46 | 37 | { |
47 | 38 | "cell_type": "code", |
48 | | - "execution_count": 2, |
| 39 | + "execution_count": 1, |
49 | 40 | "metadata": {}, |
50 | 41 | "outputs": [], |
51 | 42 | "source": [ |
|
62 | 53 | }, |
63 | 54 | { |
64 | 55 | "cell_type": "code", |
65 | | - "execution_count": 4, |
| 56 | + "execution_count": 2, |
66 | 57 | "metadata": {}, |
67 | 58 | "outputs": [ |
68 | 59 | { |
69 | 60 | "name": "stdout", |
70 | 61 | "output_type": "stream", |
71 | 62 | "text": [ |
72 | | - "Loading CDMS (with extension .cdm.kvn.txt) from directory: /Users/giacomoacciarini/cdm_data/cdms_kvn/\n", |
73 | | - "Loaded 39 CDMs grouped into 4 events\n" |
| 63 | + "Loading CDMS (with extension .kvn) from directory: synthetic_cdms/\n", |
| 64 | + "Loaded 14 CDMs grouped into 2 events\n" |
74 | 65 | ] |
75 | 66 | } |
76 | 67 | ], |
77 | 68 | "source": [ |
78 | | - "path_to_cdms_folder='cdm_data/cdms_kvn/'\n", |
| 69 | + "path_to_cdms_folder='synthetic_cdms/'\n", |
79 | 70 | "\n", |
80 | | - "events=EventDataset(path_to_cdms_folder)\n", |
| 71 | + "events=EventDataset(path_to_cdms_folder,cdm_extension='.kvn')\n", |
81 | 72 | "#A message appears confirming that the loading has happened, with the number of CDMs and events." |
82 | 73 | ] |
83 | 74 | }, |
84 | | - { |
85 | | - "cell_type": "markdown", |
86 | | - "metadata": {}, |
87 | | - "source": [ |
88 | | - "## Loading CDMs from pandas ``DataFrame`` object\n", |
89 | | - "\n", |
90 | | - "\n", |
91 | | - "\n" |
92 | | - ] |
93 | | - }, |
94 | | - { |
95 | | - "cell_type": "markdown", |
96 | | - "metadata": {}, |
97 | | - "source": [ |
98 | | - "\n", |
99 | | - "In this tutorial, we show how to load CDMs from pandas ``DataFrame`` object.\n", |
100 | | - "\n", |
101 | | - "First we perform the relevant imports:\n", |
102 | | - " " |
103 | | - ] |
104 | | - }, |
105 | | - { |
106 | | - "cell_type": "code", |
107 | | - "execution_count": 5, |
108 | | - "metadata": {}, |
109 | | - "outputs": [], |
110 | | - "source": [ |
111 | | - "import kessler\n", |
112 | | - "import pandas as pd\n", |
113 | | - "from kessler import EventDataset\n" |
114 | | - ] |
115 | | - }, |
116 | | - { |
117 | | - "cell_type": "markdown", |
118 | | - "metadata": {}, |
119 | | - "source": [ |
120 | | - "Then, we create the ``EventDataset`` object, after having uploaded the pandas dataframe and created the ``DataFrame`` object:" |
121 | | - ] |
122 | | - }, |
123 | | - { |
124 | | - "cell_type": "code", |
125 | | - "execution_count": 7, |
126 | | - "metadata": {}, |
127 | | - "outputs": [ |
128 | | - { |
129 | | - "name": "stdout", |
130 | | - "output_type": "stream", |
131 | | - "text": [ |
132 | | - "Dataframe with 2 rows and 231 columns\n", |
133 | | - "Dropping columns with NaNs\n", |
134 | | - "Dataframe with 2 rows and 104 columns\n", |
135 | | - "Grouping by event_id\n", |
136 | | - "Grouped into 1 event(s)\n", |
137 | | - "Converting DataFrame to EventDataset\n", |
138 | | - "Time spent | Time remain.| Progress | Events | Events/sec\n", |
139 | | - "0d:00:00:00 | 0d:00:00:00 | #################### | 1/1 | 404.06 \n", |
140 | | - "\n", |
141 | | - "EventDataset(Events:1, number of CDMs per event: 2 (min), 2 (max), 2.00 (mean))\n" |
142 | | - ] |
143 | | - } |
144 | | - ], |
145 | | - "source": [ |
146 | | - "file_name='cdm_data/cdms_csv/sample.csv'\n", |
147 | | - "df=pd.read_csv(file_name)\n", |
148 | | - "events=EventDataset.from_pandas(df)" |
149 | | - ] |
150 | | - }, |
151 | 75 | { |
152 | 76 | "cell_type": "markdown", |
153 | 77 | "metadata": {}, |
|
173 | 97 | }, |
174 | 98 | { |
175 | 99 | "cell_type": "code", |
176 | | - "execution_count": 8, |
| 100 | + "execution_count": 3, |
177 | 101 | "metadata": {}, |
178 | | - "outputs": [ |
179 | | - { |
180 | | - "name": "stdout", |
181 | | - "output_type": "stream", |
182 | | - "text": [ |
183 | | - "Cannot import dbm.gnu: No module named '_gdbm'\n" |
184 | | - ] |
185 | | - }, |
186 | | - { |
187 | | - "name": "stderr", |
188 | | - "output_type": "stream", |
189 | | - "text": [ |
190 | | - "/Users/giacomoacciarini/miniconda3/envs/fdl/lib/python3.7/site-packages/pyprob/util.py:327: UserWarning: Empirical distributions on disk may perform slow because GNU DBM is not available. Please install and configure gdbm library for Python for better speed.\n", |
191 | | - " warnings.warn('Empirical distributions on disk may perform slow because GNU DBM is not available. Please install and configure gdbm library for Python for better speed.')\n" |
192 | | - ] |
193 | | - } |
194 | | - ], |
| 102 | + "outputs": [], |
195 | 103 | "source": [ |
196 | | - "import kessler\n", |
197 | 104 | "from kessler.data import kelvins_to_event_dataset" |
198 | 105 | ] |
199 | 106 | }, |
|
230 | 137 | } |
231 | 138 | ], |
232 | 139 | "source": [ |
233 | | - "file_name='cdm_data/kelvins_data/test_data.csv'\n", |
| 140 | + "file_name='kelvins_data/test_data.csv'\n", |
234 | 141 | "events=kelvins_to_event_dataset(file_name, drop_features=['c_rcs_estimate', 't_rcs_estimate'], num_events=1000)\n", |
235 | 142 | "#The output will show the number of CDMs and events loaded, as they progress.\n" |
236 | 143 | ] |
|
0 commit comments