Skip to content

Commit bb8fb27

Browse files
committed
.
1 parent 864b8ac commit bb8fb27

10 files changed

Lines changed: 603 additions & 831 deletions

File tree

.DS_Store

0 Bytes
Binary file not shown.

data/processed/~$cleaned_data.xlsx

165 Bytes
Binary file not shown.

models/.DS_Store

6 KB
Binary file not shown.

models/model_BIMReTA.pkl

-326 Bytes
Binary file not shown.

notebooks/1.0-exploratory-data-analysis.ipynb

Lines changed: 80 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@
2525
},
2626
{
2727
"cell_type": "code",
28-
"execution_count": 1,
28+
"execution_count": 2,
2929
"id": "b0bf52d3-1cf2-4048-8640-db8446199260",
3030
"metadata": {
3131
"ExecuteTime": {
32-
"end_time": "2024-04-02T23:03:44.827295Z",
33-
"start_time": "2024-04-02T23:03:44.732965Z"
32+
"end_time": "2024-04-03T23:30:50.790645Z",
33+
"start_time": "2024-04-03T23:30:50.693004Z"
3434
}
3535
},
3636
"outputs": [],
@@ -44,12 +44,12 @@
4444
},
4545
{
4646
"cell_type": "code",
47-
"execution_count": 4,
47+
"execution_count": 3,
4848
"id": "892caecf-26c1-4976-a9ac-7082e1a4d0e4",
4949
"metadata": {
5050
"ExecuteTime": {
51-
"end_time": "2024-04-02T23:04:22.745724Z",
52-
"start_time": "2024-04-02T23:04:21.884073Z"
51+
"end_time": "2024-04-03T23:30:52.430695Z",
52+
"start_time": "2024-04-03T23:30:52.017766Z"
5353
}
5454
},
5555
"outputs": [
@@ -122,27 +122,93 @@
122122
},
123123
{
124124
"cell_type": "code",
125-
"execution_count": 5,
125+
"execution_count": 9,
126126
"id": "bd851536-da7e-4a61-898b-ccb0435f539f",
127127
"metadata": {
128128
"ExecuteTime": {
129-
"end_time": "2024-04-02T23:04:43.330554Z",
130-
"start_time": "2024-04-02T23:04:43.300434Z"
129+
"end_time": "2024-04-03T23:34:50.307289Z",
130+
"start_time": "2024-04-03T23:34:50.099158Z"
131131
}
132132
},
133133
"outputs": [
134134
{
135135
"data": {
136-
"text/plain": " Scene no. oil_spill_size evaporation_and_natural_disperson \\\ncount 2700 2700 2700.000000 \nunique 2700 3 NaN \ntop Scene 1 LARGE NaN \nfreq 1 929 NaN \nmean NaN NaN 54.946296 \nstd NaN NaN 26.132526 \nmin NaN NaN 10.000000 \n25% NaN NaN 32.750000 \n50% NaN NaN 55.000000 \n75% NaN NaN 78.000000 \nmax NaN NaN 99.000000 \n\n persistence oil_amount_to_recover E_ss E_sl \\\ncount 2700.000000 2700.000000 2700.000000 2700.000000 \nunique NaN NaN NaN NaN \ntop NaN NaN NaN NaN \nfreq NaN NaN NaN NaN \nmean 0.499259 1.049515 -0.000370 0.027407 \nstd 0.500092 0.546895 0.560196 0.583194 \nmin 0.000000 0.101499 -1.000000 -1.000000 \n25% 0.000000 0.581321 0.000000 0.000000 \n50% 0.000000 1.061242 0.000000 0.000000 \n75% 1.000000 1.520837 0.000000 0.000000 \nmax 1.000000 1.999423 1.000000 1.000000 \n\n E_sw E_sb sufficient_mixing_energy ... displacement \\\ncount 2700.000000 2700.000000 2700 ... 2700 \nunique NaN NaN 2 ... 2 \ntop NaN NaN yes ... no \nfreq NaN NaN 1400 ... 1356 \nmean 0.018148 0.000000 NaN ... NaN \nstd 0.578774 0.704087 NaN ... NaN \nmin -1.000000 -1.000000 NaN ... NaN \n25% 0.000000 0.000000 NaN ... NaN \n50% 0.000000 0.000000 NaN ... NaN \n75% 0.000000 0.000000 NaN ... NaN \nmax 1.000000 1.000000 NaN ... NaN \n\n E_ssI E_slI E_swI E_sbI shoreline_length \\\ncount 2700.000000 2700.000000 2700.000000 2700.000000 2700.000000 \nunique NaN NaN NaN NaN NaN \ntop NaN NaN NaN NaN NaN \nfreq NaN NaN NaN NaN NaN \nmean 0.011481 -0.005185 -0.003333 0.045926 3.060745 \nstd 0.562719 0.577434 0.589827 0.707318 1.157626 \nmin -1.000000 -1.000000 -1.000000 -1.000000 1.003905 \n25% 0.000000 0.000000 0.000000 0.000000 2.046566 \n50% 0.000000 0.000000 0.000000 0.000000 3.106652 \n75% 0.000000 0.000000 0.000000 1.000000 4.049098 \nmax 1.000000 1.000000 1.000000 1.000000 4.999372 \n\n distance_to_inhabitation mcr_DT_output cdu_DT_output \\\ncount 2700.000000 2700 2700 \nunique NaN 4 4 \ntop NaN ok Not recommended \nfreq NaN 1347 1722 \nmean 256.838519 NaN NaN \nstd 141.859053 NaN NaN \nmin 10.000000 NaN NaN \n25% 133.000000 NaN NaN \n50% 255.500000 NaN NaN \n75% 380.000000 NaN NaN \nmax 499.000000 NaN NaN \n\n isb_DT_output \ncount 2700 \nunique 3 \ntop OK \nfreq 1832 \nmean NaN \nstd NaN \nmin NaN \n25% NaN \n50% NaN \n75% NaN \nmax NaN \n\n[11 rows x 30 columns]",
137-
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Scene no.</th>\n <th>oil_spill_size</th>\n <th>evaporation_and_natural_disperson</th>\n <th>persistence</th>\n <th>oil_amount_to_recover</th>\n <th>E_ss</th>\n <th>E_sl</th>\n <th>E_sw</th>\n <th>E_sb</th>\n <th>sufficient_mixing_energy</th>\n <th>...</th>\n <th>displacement</th>\n <th>E_ssI</th>\n <th>E_slI</th>\n <th>E_swI</th>\n <th>E_sbI</th>\n <th>shoreline_length</th>\n <th>distance_to_inhabitation</th>\n <th>mcr_DT_output</th>\n <th>cdu_DT_output</th>\n <th>isb_DT_output</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>count</th>\n <td>2700</td>\n <td>2700</td>\n <td>2700.000000</td>\n <td>2700.000000</td>\n <td>2700.000000</td>\n <td>2700.000000</td>\n <td>2700.000000</td>\n <td>2700.000000</td>\n <td>2700.000000</td>\n <td>2700</td>\n <td>...</td>\n <td>2700</td>\n <td>2700.000000</td>\n <td>2700.000000</td>\n <td>2700.000000</td>\n <td>2700.000000</td>\n <td>2700.000000</td>\n <td>2700.000000</td>\n <td>2700</td>\n <td>2700</td>\n <td>2700</td>\n </tr>\n <tr>\n <th>unique</th>\n <td>2700</td>\n <td>3</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>2</td>\n <td>...</td>\n <td>2</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>4</td>\n <td>4</td>\n <td>3</td>\n </tr>\n <tr>\n <th>top</th>\n <td>Scene 1</td>\n <td>LARGE</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>yes</td>\n <td>...</td>\n <td>no</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>ok</td>\n <td>Not recommended</td>\n <td>OK</td>\n </tr>\n <tr>\n <th>freq</th>\n <td>1</td>\n <td>929</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>1400</td>\n <td>...</td>\n <td>1356</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>1347</td>\n <td>1722</td>\n <td>1832</td>\n </tr>\n <tr>\n <th>mean</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>54.946296</td>\n <td>0.499259</td>\n <td>1.049515</td>\n <td>-0.000370</td>\n <td>0.027407</td>\n <td>0.018148</td>\n <td>0.000000</td>\n <td>NaN</td>\n <td>...</td>\n <td>NaN</td>\n <td>0.011481</td>\n <td>-0.005185</td>\n <td>-0.003333</td>\n <td>0.045926</td>\n <td>3.060745</td>\n <td>256.838519</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>std</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>26.132526</td>\n <td>0.500092</td>\n <td>0.546895</td>\n <td>0.560196</td>\n <td>0.583194</td>\n <td>0.578774</td>\n <td>0.704087</td>\n <td>NaN</td>\n <td>...</td>\n <td>NaN</td>\n <td>0.562719</td>\n <td>0.577434</td>\n <td>0.589827</td>\n <td>0.707318</td>\n <td>1.157626</td>\n <td>141.859053</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>min</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>10.000000</td>\n <td>0.000000</td>\n <td>0.101499</td>\n <td>-1.000000</td>\n <td>-1.000000</td>\n <td>-1.000000</td>\n <td>-1.000000</td>\n <td>NaN</td>\n <td>...</td>\n <td>NaN</td>\n <td>-1.000000</td>\n <td>-1.000000</td>\n <td>-1.000000</td>\n <td>-1.000000</td>\n <td>1.003905</td>\n <td>10.000000</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>25%</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>32.750000</td>\n <td>0.000000</td>\n <td>0.581321</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>NaN</td>\n <td>...</td>\n <td>NaN</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>2.046566</td>\n <td>133.000000</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>50%</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>55.000000</td>\n <td>0.000000</td>\n <td>1.061242</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>NaN</td>\n <td>...</td>\n <td>NaN</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>3.106652</td>\n <td>255.500000</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>75%</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>78.000000</td>\n <td>1.000000</td>\n <td>1.520837</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>NaN</td>\n <td>...</td>\n <td>NaN</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>1.000000</td>\n <td>4.049098</td>\n <td>380.000000</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>max</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>99.000000</td>\n <td>1.000000</td>\n <td>1.999423</td>\n <td>1.000000</td>\n <td>1.000000</td>\n <td>1.000000</td>\n <td>1.000000</td>\n <td>NaN</td>\n <td>...</td>\n <td>NaN</td>\n <td>1.000000</td>\n <td>1.000000</td>\n <td>1.000000</td>\n <td>1.000000</td>\n <td>4.999372</td>\n <td>499.000000</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n </tbody>\n</table>\n<p>11 rows × 30 columns</p>\n</div>"
136+
"text/plain": "E_ss\n 0 1853\n-1 424\n 1 423\nName: count, dtype: int64"
138137
},
139-
"execution_count": 5,
140138
"metadata": {},
141-
"output_type": "execute_result"
139+
"output_type": "display_data"
140+
},
141+
{
142+
"data": {
143+
"text/plain": "E_sl\n 0 1780\n 1 497\n-1 423\nName: count, dtype: int64"
144+
},
145+
"metadata": {},
146+
"output_type": "display_data"
147+
},
148+
{
149+
"data": {
150+
"text/plain": "E_sw\n 0 1795\n 1 477\n-1 428\nName: count, dtype: int64"
151+
},
152+
"metadata": {},
153+
"output_type": "display_data"
154+
},
155+
{
156+
"data": {
157+
"text/plain": "sufficient_mixing_energy\nyes 1400\nno 1300\nName: count, dtype: int64"
158+
},
159+
"metadata": {},
160+
"output_type": "display_data"
161+
},
162+
{
163+
"data": {
164+
"text/plain": "E_ssC\n 0 1804\n 1 454\n-1 442\nName: count, dtype: int64"
165+
},
166+
"metadata": {},
167+
"output_type": "display_data"
168+
},
169+
{
170+
"data": {
171+
"text/plain": "seawater\nLarge 2632\nSmall 68\nName: count, dtype: int64"
172+
},
173+
"metadata": {},
174+
"output_type": "display_data"
175+
},
176+
{
177+
"data": {
178+
"text/plain": "E_ssI\n 0 1845\n 1 443\n-1 412\nName: count, dtype: int64"
179+
},
180+
"metadata": {},
181+
"output_type": "display_data"
182+
},
183+
{
184+
"data": {
185+
"text/plain": "soot_pollution\nNO soot pollution 1354\nYES soot pollution 1346\nName: count, dtype: int64"
186+
},
187+
"metadata": {},
188+
"output_type": "display_data"
189+
},
190+
{
191+
"data": {
192+
"text/plain": "displacement\nno 1356\nyes 1344\nName: count, dtype: int64"
193+
},
194+
"metadata": {},
195+
"output_type": "display_data"
142196
}
143197
],
144198
"source": [
145-
"data.describe(include='all') "
199+
"data.describe(include='all') \n",
200+
"display(data['E_ss'].value_counts())\n",
201+
"display(data['E_sl'].value_counts())\n",
202+
"display(data['E_sw'].value_counts())\n",
203+
"display(data['sufficient_mixing_energy'].value_counts())\n",
204+
"display(data['E_ssC'].value_counts())\n",
205+
"display(data['seawater'].value_counts())\n",
206+
"display(data['E_ssI'].value_counts())\n",
207+
"display(data['soot_pollution'].value_counts())\n",
208+
"display(data['displacement'].value_counts())\n",
209+
"\n",
210+
"# dispersion, E_ss, E_sl, E_sw, sufficient_mixing_energy,\n",
211+
" # E_ssC, seawater, E_ssI,soot_pollution, displacement"
146212
]
147213
},
148214
{

0 commit comments

Comments
 (0)