-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathanalytic.py
More file actions
390 lines (345 loc) · 17.3 KB
/
analytic.py
File metadata and controls
390 lines (345 loc) · 17.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
#
# Function to load default data
@st.cache_data
def load_default_data():
return pd.read_csv(
'analytics_data.csv'
)
# Function to load uploaded files (supports Excel and CSV)
def load_uploaded_file(uploaded_file):
try:
if uploaded_file.name.endswith('.xlsx'):
return pd.read_excel(uploaded_file, engine='openpyxl')
elif uploaded_file.name.endswith('.csv'):
return pd.read_csv(uploaded_file)
else:
st.sidebar.error("Unsupported file type! Please upload an Excel or CSV file.")
st.stop()
except Exception as e:
st.sidebar.error(f"Error loading file: {e}")
st.stop()
# Sidebar for file upload or default dataset
st.sidebar.title("Upload or Load Dataset")
data_source = st.sidebar.radio(
"Choose Data Source:",
("Default Dataset", "Upload Your Own Dataset")
)
# Load dataset based on user input
if data_source == "Default Dataset":
data = load_default_data()
st.sidebar.success("Default dataset loaded successfully!")
else:
uploaded_file = st.sidebar.file_uploader("Upload an Excel or CSV file", type=['xlsx', 'csv'])
if uploaded_file is not None:
data = load_uploaded_file(uploaded_file)
st.sidebar.success("Dataset uploaded successfully!")
else:
st.sidebar.warning("Please upload a dataset to proceed.")
st.stop()
# Define color palettes
default_colors = px.colors.qualitative.Plotly
time_series_colors = px.colors.qualitative.Set2
# Convert 'Timestamp' to datetime format and localize timezone if needed
if 'Timestamp' in data.columns:
data['Timestamp'] = pd.to_datetime(data['Timestamp'], errors='coerce').dt.tz_localize('UTC').dt.tz_convert('UTC')
# Sidebar options for filtering
st.sidebar.header("Filters")
view_option = st.sidebar.radio(
"Select View",
["Overall", "Customers & Visitors Analysis", "Shelf Analysis"]
)
# Time filters
if 'Timestamp' in data.columns:
start_time = st.sidebar.time_input("Start Time", data['Timestamp'].min().time())
end_time = st.sidebar.time_input("End Time", data['Timestamp'].max().time())
data.set_index('Timestamp', inplace=True)
if start_time < end_time:
filtered_data = data.between_time(start_time, end_time).reset_index()
else:
st.error("Please ensure that the start time is earlier than the end time.")
filtered_data = pd.DataFrame() # Empty DataFrame to prevent further errors
else:
st.error("Timestamp column is missing from the dataset.")
filtered_data = data
# Additional Filtering Options
st.sidebar.subheader("Additional Filters")
if st.sidebar.checkbox("Show Filters"):
# Multi-select filters for categorical columns
for col in filtered_data.select_dtypes(include=['object']).columns:
unique_values = filtered_data[col].dropna().unique()
selected_values = st.sidebar.multiselect(f"Filter by {col}", options=unique_values, default=unique_values)
filtered_data = filtered_data[filtered_data[col].isin(selected_values)]
# Range filters for numeric columns
for col in filtered_data.select_dtypes(include=['number']).columns:
min_val, max_val = filtered_data[col].min(), filtered_data[col].max()
selected_range = st.sidebar.slider(f"Filter by {col} range", min_val, max_val, (min_val, max_val))
filtered_data = filtered_data[(filtered_data[col] >= selected_range[0]) & (filtered_data[col] <= selected_range[1])]
#
# # Convert 'Timestamp' to datetime format and localize timezone if needed
# if 'Timestamp' in data.columns:
# data['Timestamp'] = pd.to_datetime(data['Timestamp'], errors='coerce').dt.tz_localize('UTC').dt.tz_convert('UTC')
#
#
# # Sidebar options for filtering
# st.sidebar.header("Filters")
# view_option = st.sidebar.radio(
# "Select View",
# ["Overall", "People & Customer Analysis", "Shelf Analysis"]
# )
#
# # Time filters
# start_time = st.sidebar.time_input("Start Time", data['Timestamp'].min().time() if 'Timestamp' in data.columns else None)
# end_time = st.sidebar.time_input("End Time", data['Timestamp'].max().time() if 'Timestamp' in data.columns else None)
#
# # Filter data based on selected time, checking start time < end time
# if 'Timestamp' in data.columns:
# data.set_index('Timestamp', inplace=True)
# if start_time < end_time:
# filtered_data = data.between_time(start_time, end_time).reset_index()
# else:
# st.error("Please ensure that the start time is earlier than the end time.")
# filtered_data = pd.DataFrame() # Empty DataFrame to prevent further errors
# else:
# st.error("Timestamp column is missing from the dataset.")
# filtered_data = data
# Refresh Button
if st.button("Refresh Dashboard"):
st.experimental_set_query_params()
# Tooltip Message
tooltip_message = (
"The dataset is a working process. You cannot open the Excel file directly, "
"and no modifications can be made. You can only add data to existing columns, "
"and you cannot change the column names."
)
st.markdown(
f'<span style="color: grey; font-size: 12px; text-decoration: underline;">{tooltip_message}</span>',
unsafe_allow_html=True
)
if view_option == "Overall":
st.subheader("Overall Summary")
# Define metrics and dynamically calculate their values
metrics = {
"Total Customers": data['Total Customers'].sum() if 'Total Customers' in data.columns else 0,
"Total Visitors": data['Total Visitors'].sum() if 'Total Visitors' in data.columns else 0,
"Queue Count": data['Queue Count'].sum() if 'Queue Count' in data.columns else 0,
"Current Customers": data['Current Customers'].sum() if 'Current Customers' in data.columns else 0,
"Current Visitor": data['Current Visitor'].sum() if 'Current Visitor' in data.columns else 0
}
# Calculate total checks (row count)
total_checks = len(data)
# Display metrics with gauges
st.subheader("Overall Summary Metrics")
gauge_figures = []
# Define a list of distinct colors for each gauge
gauge_colors = ['#00BFFF', '#32CD32', '#FF4500', '#DAA520', '#9370DB', '#FF6347'] # Professional colors
# Create a gauge for total checks
gauge_fig_total = go.Figure(go.Indicator(
mode="gauge+number",
value=total_checks,
title={'text': "Total Checks", 'font': {'size': 20}}, # Font color removed
gauge={
'axis': {'range': [0, max(metrics.values(), default=0) * 1.1]}, # Set range based on max metric value
'bar': {'color': gauge_colors[0]} # Color for the total checks gauge
},
number={'font': {'size': 18, 'family': 'Arial', 'weight': 'bold', 'color': 'grey'}} # Bold font, grey color
))
gauge_figures.append(gauge_fig_total) # Append total checks gauge
for (label, value), color in zip(metrics.items(), gauge_colors[1:]): # Start from the second color
fig = go.Figure(go.Indicator(
mode="gauge+number",
value=value,
title={'text': label, 'font': {'size': 20}}, # Font color removed
gauge={
'axis': {'range': [0, max(metrics.values(), default=0) * 1.1]},
'bar': {'color': color} # Use distinct color for each gauge
},
number={'font': {'size': 18, 'family': 'Arial', 'weight': 'bold', 'color': 'grey'}} # Bold font, grey color
))
gauge_figures.append(fig)
# Create containers for each row of gauges
# First row of gauges (including total checks)
col1, col2, col3 = st.columns(3)
with col1:
st.plotly_chart(gauge_figures[0], use_container_width=True) # Total Checks
with col2:
st.plotly_chart(gauge_figures[1], use_container_width=True) # Total Customers
with col3:
st.plotly_chart(gauge_figures[2], use_container_width=True) # Total Visitors
# Second row of gauges (remaining metrics)
col4, col5, col6 = st.columns(3)
with col4:
st.plotly_chart(gauge_figures[3], use_container_width=True) # Queue Count
with col5:
st.plotly_chart(gauge_figures[4], use_container_width=True) # Current Customers
with col6:
st.plotly_chart(gauge_figures[5], use_container_width=True) # Current Visitor
# Optional: Summary bar chart for all metrics
st.subheader("Metric Summary - Bar Chart")
bar_fig = go.Figure(go.Bar(
x=list(metrics.keys()),
y=list(metrics.values()),
text=list(metrics.values()),
textposition='auto',
marker_color=gauge_colors # Use the distinct colors for the bar chart
))
bar_fig.update_layout(
title="Overall Metric Summary",
xaxis_title="Metrics",
yaxis_title="Values",
template="plotly_dark",
plot_bgcolor="rgba(0,0,0,0)",
paper_bgcolor="rgba(0,0,0,0)",
title_font=dict(size=24), # Font color removed
xaxis_title_font=dict(size=18), # Font color removed
yaxis_title_font=dict(size=18) # Font color removed
)
st.plotly_chart(bar_fig)
# Combined line chart of Customers and Visitors
fig = go.Figure()
if 'Timestamp' in filtered_data.columns:
if 'Total Customers' in filtered_data.columns:
fig.add_trace(go.Scatter(x=filtered_data['Timestamp'], y=filtered_data['Total Customers'],
mode='lines', name='Total Customers', line=dict(color='#32CD32')))
if 'Total Visitors' in filtered_data.columns:
fig.add_trace(go.Scatter(x=filtered_data['Timestamp'], y=filtered_data['Total Visitors'],
mode='lines', name='Total Visitors', line=dict(color='#FF4500')))
fig.update_layout(
title="Overall Customers and Visitors Over Time",
template="plotly_dark",
plot_bgcolor="rgba(0,0,0,0)",
paper_bgcolor="rgba(0,0,0,0)",
title_font=dict(size=24), # Font color removed
xaxis_title_font=dict(size=18), # Font color removed
yaxis_title_font=dict(size=18) # Font color removed
)
st.plotly_chart(fig)
# People & Customer Analysis
elif view_option == "Customers & Visitors Analysis":
# Minute-Level Trends for Current Customers and Current Visitors
fig = go.Figure()
if 'Timestamp' in filtered_data.columns:
if 'Current Customers' in filtered_data.columns:
fig.add_trace(go.Scatter(x=filtered_data['Timestamp'], y=filtered_data['Current Customers'],
mode='lines', name='Current Customers',
line=dict(color='#1E90FF', width=3))) # Dodger Blue for line chart
if 'Current Visitor' in filtered_data.columns:
fig.add_trace(go.Scatter(x=filtered_data['Timestamp'], y=filtered_data['Current Visitor'],
mode='lines', name='Current Visitors',
line=dict(color='#FF4500', width=3))) # Orange Red for line chart
fig.update_layout(
title="Minute-Level Trends in Current Customers and Visitors",
template="plotly_dark",
plot_bgcolor="rgba(0,0,0,0)",
paper_bgcolor="rgba(0,0,0,0)",
title_font=dict(size=24, weight='bold'), # Bold weight, color removed
xaxis_title_font=dict(size=18), # Font color removed
yaxis_title_font=dict(size=18) # Font color removed
)
st.plotly_chart(fig)
# Extract minute from the Timestamp for grouping
filtered_data['Minute'] = filtered_data['Timestamp'].dt.strftime('%H:%M') # Format as "HH:MM" for each unique minute
# Group data by minute, calculating the average for Current Customers and Visitors
minute_data = filtered_data.groupby('Minute')[['Current Customers', 'Current Visitor']].mean().reset_index()
# Grouped Bar Chart for minute-level data
fig = go.Figure()
# Define metrics and colors for better distinction
metrics = ['Current Customers', 'Current Visitor']
colors = ['#4682B4', '#FF8C00'] # Steel Blue and Dark Orange for the bar chart
# Add a bar for each metric
for metric, color in zip(metrics, colors):
if metric in minute_data.columns:
fig.add_trace(go.Bar(
x=minute_data['Minute'], y=minute_data[metric],
name=metric, marker=dict(color=color)
))
# Update layout to ensure it’s readable even with minute-level data
fig.update_layout(
title="Minute-Level Grouped Bar Chart for Current Customers and Visitors",
xaxis_title="Time (Minute-Level)",
yaxis_title="Average Count",
barmode='group', # Grouped bars for easy comparison
template="plotly_dark",
plot_bgcolor="rgba(0,0,0,0)",
paper_bgcolor="rgba(0,0,0,0)",
title_font=dict(size=24, weight='bold'), # Bold weight, color removed
xaxis_title_font=dict(size=18), # Font color removed
yaxis_title_font=dict(size=18) # Font color removed
)
# Customize x-axis for better readability with minute-level data
fig.update_xaxes(tickangle=45, nticks=20, tickformat="%H:%M")
st.plotly_chart(fig)
# Shelf Analysis with Minute-Level Data
elif view_option == "Shelf Analysis":
st.subheader("Shelf Analysis (Minute-Level)")
# Ensure shelf columns exist in filtered_data
shelves = [col for col in ['Shelf 1', 'Shelf 2', 'Shelf 3', 'Shelf 4'] if col in filtered_data.columns]
if shelves:
# Summing each shelf's usage
shelf_totals = filtered_data[shelves].sum().reset_index()
shelf_totals.columns = ['Shelf', 'Count']
# 1. Total Shelf Usage (Bar Chart)
fig1 = px.bar(shelf_totals, x='Shelf', y='Count', text='Count', color='Shelf',
color_discrete_sequence=['#1E90FF', '#FF6347', '#32CD32', '#FFD700'], # Different colors for each shelf
title="Total Shelf Usage", template="plotly_dark")
fig1.update_traces(texttemplate='%{text}', textposition='outside')
fig1.update_layout(
plot_bgcolor="rgba(0,0,0,0)", # Transparent background for the chart area
paper_bgcolor="rgba(0,0,0,0)", # Transparent paper background
xaxis=dict(showgrid=False),
yaxis=dict(showgrid=False),
title_font=dict(size=24, weight='bold'), # Bold weight, color removed
xaxis_title_font=dict(size=18), # Font color removed
yaxis_title_font=dict(size=18) # Font color removed
)
st.plotly_chart(fig1)
# 2. Minute-Level Stacked Bar Chart for Shelf Usage
filtered_data['Minute'] = filtered_data['Timestamp'].dt.strftime('%H:%M') # Group by minute (hour:minute)
minute_shelf_usage = filtered_data.groupby('Minute')[shelves].sum().reset_index()
fig2 = go.Figure()
colors = ['#6A5ACD', '#FF4500', '#00FA9A', '#FFD700'] # Unique colors for each shelf in stacked bar chart
for shelf, color in zip(shelves, colors):
fig2.add_trace(go.Bar(
x=minute_shelf_usage['Minute'], y=minute_shelf_usage[shelf], name=shelf,
marker_color=color # Different colors for each shelf
))
fig2.update_layout(
title="Minute-Level Shelf Usage (Stacked Bar Chart)",
xaxis_title="Time (Minute)",
yaxis_title="Usage Count",
barmode='stack', # Stacks bars for cumulative view
template="plotly_dark",
plot_bgcolor="rgba(0,0,0,0)", # Transparent background for the chart area
paper_bgcolor="rgba(0,0,0,0)", # Transparent paper background
title_font=dict(size=24, weight='bold'), # Bold weight, color removed
xaxis_title_font=dict(size=18), # Font color removed
yaxis_title_font=dict(size=18), # Font color removed
xaxis=dict(tickformat="%H:%M")
)
st.plotly_chart(fig2)
# 3. Line Chart for Minute-Level Shelf Usage Over Time
fig3 = go.Figure()
line_colors = ['#00FF7F', '#FF1493', '#1E90FF', '#FFD700'] # Professional colors for line chart
for shelf, color in zip(shelves, line_colors):
fig3.add_trace(go.Scatter(
x=filtered_data['Timestamp'], y=filtered_data[shelf],
mode='lines+markers', # Added markers for better visibility
name=shelf,
line=dict(color=color, width=2) # Thicker line width and distinct color
))
fig3.update_layout(
title="Shelf Usage Over Time (Minute-Level Line Chart)",
xaxis_title="Time",
yaxis_title="Usage Count",
template="plotly_dark",
plot_bgcolor="rgba(0,0,0,0)", # Transparent background for the chart area
paper_bgcolor="rgba(0,0,0,0)", # Transparent paper background
title_font=dict(size=24, weight='bold'), # Bold weight, color removed
xaxis_title_font=dict(size=18), # Font color removed
yaxis_title_font=dict(size=18) # Font color removed
)
st.plotly_chart(fig3)