analytic/analytic.py at main · Ishmal793/analytic · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

#
# Function to load default data
@st.cache_data
def load_default_data():
    return pd.read_csv(
       'analytics_data.csv'
    )

# Function to load uploaded files (supports Excel and CSV)
def load_uploaded_file(uploaded_file):
    try:
        if uploaded_file.name.endswith('.xlsx'):
            return pd.read_excel(uploaded_file, engine='openpyxl')
        elif uploaded_file.name.endswith('.csv'):
            return pd.read_csv(uploaded_file)
        else:
            st.sidebar.error("Unsupported file type! Please upload an Excel or CSV file.")
            st.stop()
    except Exception as e:
        st.sidebar.error(f"Error loading file: {e}")
        st.stop()

# Sidebar for file upload or default dataset
st.sidebar.title("Upload or Load Dataset")

data_source = st.sidebar.radio(
    "Choose Data Source:",
    ("Default Dataset", "Upload Your Own Dataset")
)

# Load dataset based on user input
if data_source == "Default Dataset":
    data = load_default_data()
    st.sidebar.success("Default dataset loaded successfully!")
else:
    uploaded_file = st.sidebar.file_uploader("Upload an Excel or CSV file", type=['xlsx', 'csv'])

    if uploaded_file is not None:
        data = load_uploaded_file(uploaded_file)
        st.sidebar.success("Dataset uploaded successfully!")
    else:
        st.sidebar.warning("Please upload a dataset to proceed.")
        st.stop()


# Define color palettes
default_colors = px.colors.qualitative.Plotly
time_series_colors = px.colors.qualitative.Set2

# Convert 'Timestamp' to datetime format and localize timezone if needed
if 'Timestamp' in data.columns:
    data['Timestamp'] = pd.to_datetime(data['Timestamp'], errors='coerce').dt.tz_localize('UTC').dt.tz_convert('UTC')

# Sidebar options for filtering
st.sidebar.header("Filters")
view_option = st.sidebar.radio(
    "Select View",
    ["Overall", "Customers & Visitors Analysis", "Shelf Analysis"]
)

# Time filters
if 'Timestamp' in data.columns:
    start_time = st.sidebar.time_input("Start Time", data['Timestamp'].min().time())
    end_time = st.sidebar.time_input("End Time", data['Timestamp'].max().time())
    data.set_index('Timestamp', inplace=True)
    if start_time < end_time:
        filtered_data = data.between_time(start_time, end_time).reset_index()
    else:
        st.error("Please ensure that the start time is earlier than the end time.")
        filtered_data = pd.DataFrame()  # Empty DataFrame to prevent further errors
else:
    st.error("Timestamp column is missing from the dataset.")
    filtered_data = data

# Additional Filtering Options
st.sidebar.subheader("Additional Filters")
if st.sidebar.checkbox("Show Filters"):
    # Multi-select filters for categorical columns
    for col in filtered_data.select_dtypes(include=['object']).columns:
        unique_values = filtered_data[col].dropna().unique()
        selected_values = st.sidebar.multiselect(f"Filter by {col}", options=unique_values, default=unique_values)
        filtered_data = filtered_data[filtered_data[col].isin(selected_values)]

    # Range filters for numeric columns
    for col in filtered_data.select_dtypes(include=['number']).columns:
        min_val, max_val = filtered_data[col].min(), filtered_data[col].max()
        selected_range = st.sidebar.slider(f"Filter by {col} range", min_val, max_val, (min_val, max_val))
        filtered_data = filtered_data[(filtered_data[col] >= selected_range[0]) & (filtered_data[col] <= selected_range[1])]


#
# # Convert 'Timestamp' to datetime format and localize timezone if needed
# if 'Timestamp' in data.columns:
#     data['Timestamp'] = pd.to_datetime(data['Timestamp'], errors='coerce').dt.tz_localize('UTC').dt.tz_convert('UTC')
#
#
# # Sidebar options for filtering
# st.sidebar.header("Filters")
# view_option = st.sidebar.radio(
#     "Select View",
#     ["Overall", "People & Customer Analysis", "Shelf Analysis"]
# )
#
# # Time filters
# start_time = st.sidebar.time_input("Start Time", data['Timestamp'].min().time() if 'Timestamp' in data.columns else None)
# end_time = st.sidebar.time_input("End Time", data['Timestamp'].max().time() if 'Timestamp' in data.columns else None)
#
# # Filter data based on selected time, checking start time < end time
# if 'Timestamp' in data.columns:
#     data.set_index('Timestamp', inplace=True)
#     if start_time < end_time:
#         filtered_data = data.between_time(start_time, end_time).reset_index()
#     else:
#         st.error("Please ensure that the start time is earlier than the end time.")
#         filtered_data = pd.DataFrame()  # Empty DataFrame to prevent further errors
# else:
#     st.error("Timestamp column is missing from the dataset.")
#     filtered_data = data
# Refresh Button
if st.button("Refresh Dashboard"):
    st.experimental_set_query_params()

# Tooltip Message
tooltip_message = (
    "The dataset is a working process. You cannot open the Excel file directly, "
    "and no modifications can be made. You can only add data to existing columns, "
    "and you cannot change the column names."
)
st.markdown(
    f'<span style="color: grey; font-size: 12px; text-decoration: underline;">{tooltip_message}</span>',
    unsafe_allow_html=True
)

if view_option == "Overall":

    st.subheader("Overall Summary")

    # Define metrics and dynamically calculate their values
    metrics = {
        "Total Customers": data['Total Customers'].sum() if 'Total Customers' in data.columns else 0,
        "Total Visitors": data['Total Visitors'].sum() if 'Total Visitors' in data.columns else 0,
        "Queue Count": data['Queue Count'].sum() if 'Queue Count' in data.columns else 0,
        "Current Customers": data['Current Customers'].sum() if 'Current Customers' in data.columns else 0,
        "Current Visitor": data['Current Visitor'].sum() if 'Current Visitor' in data.columns else 0
    }

    # Calculate total checks (row count)
    total_checks = len(data)

    # Display metrics with gauges
    st.subheader("Overall Summary Metrics")
    gauge_figures = []
    # Define a list of distinct colors for each gauge
    gauge_colors = ['#00BFFF', '#32CD32', '#FF4500', '#DAA520', '#9370DB', '#FF6347']  # Professional colors

    # Create a gauge for total checks
    gauge_fig_total = go.Figure(go.Indicator(
        mode="gauge+number",
        value=total_checks,
        title={'text': "Total Checks", 'font': {'size': 20}},  # Font color removed
        gauge={
            'axis': {'range': [0, max(metrics.values(), default=0) * 1.1]},  # Set range based on max metric value
            'bar': {'color': gauge_colors[0]}  # Color for the total checks gauge
        },
        number={'font': {'size': 18, 'family': 'Arial', 'weight': 'bold', 'color': 'grey'}}  # Bold font, grey color
    ))
    gauge_figures.append(gauge_fig_total)  # Append total checks gauge

    for (label, value), color in zip(metrics.items(), gauge_colors[1:]):  # Start from the second color
        fig = go.Figure(go.Indicator(
            mode="gauge+number",
            value=value,
            title={'text': label, 'font': {'size': 20}},  # Font color removed
            gauge={
                'axis': {'range': [0, max(metrics.values(), default=0) * 1.1]},
                'bar': {'color': color}  # Use distinct color for each gauge
            },
            number={'font': {'size': 18, 'family': 'Arial', 'weight': 'bold', 'color': 'grey'}}  # Bold font, grey color
        ))
        gauge_figures.append(fig)

    # Create containers for each row of gauges
    # First row of gauges (including total checks)
    col1, col2, col3 = st.columns(3)

    with col1:
        st.plotly_chart(gauge_figures[0], use_container_width=True)  # Total Checks
    with col2:
        st.plotly_chart(gauge_figures[1], use_container_width=True)  # Total Customers
    with col3:
        st.plotly_chart(gauge_figures[2], use_container_width=True)  # Total Visitors

    # Second row of gauges (remaining metrics)
    col4, col5, col6 = st.columns(3)

    with col4:
        st.plotly_chart(gauge_figures[3], use_container_width=True)  # Queue Count
    with col5:
        st.plotly_chart(gauge_figures[4], use_container_width=True)  # Current Customers
    with col6:
        st.plotly_chart(gauge_figures[5], use_container_width=True)  # Current Visitor

    # Optional: Summary bar chart for all metrics
    st.subheader("Metric Summary - Bar Chart")
    bar_fig = go.Figure(go.Bar(
        x=list(metrics.keys()),
        y=list(metrics.values()),
        text=list(metrics.values()),
        textposition='auto',
        marker_color=gauge_colors  # Use the distinct colors for the bar chart
    ))

    bar_fig.update_layout(
        title="Overall Metric Summary",
        xaxis_title="Metrics",
        yaxis_title="Values",
        template="plotly_dark",
        plot_bgcolor="rgba(0,0,0,0)",
        paper_bgcolor="rgba(0,0,0,0)",
        title_font=dict(size=24),  # Font color removed
        xaxis_title_font=dict(size=18),  # Font color removed
        yaxis_title_font=dict(size=18)  # Font color removed
    )
    st.plotly_chart(bar_fig)

    # Combined line chart of Customers and Visitors
    fig = go.Figure()
    if 'Timestamp' in filtered_data.columns:
        if 'Total Customers' in filtered_data.columns:
            fig.add_trace(go.Scatter(x=filtered_data['Timestamp'], y=filtered_data['Total Customers'],
                                     mode='lines', name='Total Customers', line=dict(color='#32CD32')))
        if 'Total Visitors' in filtered_data.columns:
            fig.add_trace(go.Scatter(x=filtered_data['Timestamp'], y=filtered_data['Total Visitors'],
                                     mode='lines', name='Total Visitors', line=dict(color='#FF4500')))
    fig.update_layout(
        title="Overall Customers and Visitors Over Time",
        template="plotly_dark",
        plot_bgcolor="rgba(0,0,0,0)",
        paper_bgcolor="rgba(0,0,0,0)",
        title_font=dict(size=24),  # Font color removed
        xaxis_title_font=dict(size=18),  # Font color removed
        yaxis_title_font=dict(size=18)  # Font color removed
    )
    st.plotly_chart(fig)
# People & Customer Analysis
elif view_option == "Customers & Visitors Analysis":
    # Minute-Level Trends for Current Customers and Current Visitors
    fig = go.Figure()
    if 'Timestamp' in filtered_data.columns:
        if 'Current Customers' in filtered_data.columns:
            fig.add_trace(go.Scatter(x=filtered_data['Timestamp'], y=filtered_data['Current Customers'],
                                     mode='lines', name='Current Customers',
                                     line=dict(color='#1E90FF', width=3)))  # Dodger Blue for line chart
        if 'Current Visitor' in filtered_data.columns:
            fig.add_trace(go.Scatter(x=filtered_data['Timestamp'], y=filtered_data['Current Visitor'],
                                     mode='lines', name='Current Visitors',
                                     line=dict(color='#FF4500', width=3)))  # Orange Red for line chart

    fig.update_layout(
        title="Minute-Level Trends in Current Customers and Visitors",
        template="plotly_dark",
        plot_bgcolor="rgba(0,0,0,0)",
        paper_bgcolor="rgba(0,0,0,0)",
        title_font=dict(size=24, weight='bold'),  # Bold weight, color removed
        xaxis_title_font=dict(size=18),  # Font color removed
        yaxis_title_font=dict(size=18)  # Font color removed
    )
    st.plotly_chart(fig)

    # Extract minute from the Timestamp for grouping
    filtered_data['Minute'] = filtered_data['Timestamp'].dt.strftime('%H:%M')  # Format as "HH:MM" for each unique minute

    # Group data by minute, calculating the average for Current Customers and Visitors
    minute_data = filtered_data.groupby('Minute')[['Current Customers', 'Current Visitor']].mean().reset_index()

    # Grouped Bar Chart for minute-level data
    fig = go.Figure()

    # Define metrics and colors for better distinction
    metrics = ['Current Customers', 'Current Visitor']
    colors = ['#4682B4', '#FF8C00']  # Steel Blue and Dark Orange for the bar chart

    # Add a bar for each metric
    for metric, color in zip(metrics, colors):
        if metric in minute_data.columns:
            fig.add_trace(go.Bar(
                x=minute_data['Minute'], y=minute_data[metric],
                name=metric, marker=dict(color=color)
            ))

    # Update layout to ensure it’s readable even with minute-level data
    fig.update_layout(
        title="Minute-Level Grouped Bar Chart for Current Customers and Visitors",
        xaxis_title="Time (Minute-Level)",
        yaxis_title="Average Count",
        barmode='group',  # Grouped bars for easy comparison
        template="plotly_dark",
        plot_bgcolor="rgba(0,0,0,0)",
        paper_bgcolor="rgba(0,0,0,0)",
        title_font=dict(size=24, weight='bold'),  # Bold weight, color removed
        xaxis_title_font=dict(size=18),  # Font color removed
        yaxis_title_font=dict(size=18)  # Font color removed
    )

    # Customize x-axis for better readability with minute-level data
    fig.update_xaxes(tickangle=45, nticks=20, tickformat="%H:%M")

    st.plotly_chart(fig)
# Shelf Analysis with Minute-Level Data
elif view_option == "Shelf Analysis":
    st.subheader("Shelf Analysis (Minute-Level)")

    # Ensure shelf columns exist in filtered_data
    shelves = [col for col in ['Shelf 1', 'Shelf 2', 'Shelf 3', 'Shelf 4'] if col in filtered_data.columns]

    if shelves:
        # Summing each shelf's usage
        shelf_totals = filtered_data[shelves].sum().reset_index()
        shelf_totals.columns = ['Shelf', 'Count']

        # 1. Total Shelf Usage (Bar Chart)
        fig1 = px.bar(shelf_totals, x='Shelf', y='Count', text='Count', color='Shelf',
                      color_discrete_sequence=['#1E90FF', '#FF6347', '#32CD32', '#FFD700'],  # Different colors for each shelf
                      title="Total Shelf Usage", template="plotly_dark")
        fig1.update_traces(texttemplate='%{text}', textposition='outside')
        fig1.update_layout(
            plot_bgcolor="rgba(0,0,0,0)",  # Transparent background for the chart area
            paper_bgcolor="rgba(0,0,0,0)",  # Transparent paper background
            xaxis=dict(showgrid=False),
            yaxis=dict(showgrid=False),
            title_font=dict(size=24, weight='bold'),  # Bold weight, color removed
            xaxis_title_font=dict(size=18),  # Font color removed
            yaxis_title_font=dict(size=18)  # Font color removed
        )
        st.plotly_chart(fig1)

        # 2. Minute-Level Stacked Bar Chart for Shelf Usage
        filtered_data['Minute'] = filtered_data['Timestamp'].dt.strftime('%H:%M')  # Group by minute (hour:minute)
        minute_shelf_usage = filtered_data.groupby('Minute')[shelves].sum().reset_index()

        fig2 = go.Figure()
        colors = ['#6A5ACD', '#FF4500', '#00FA9A', '#FFD700']  # Unique colors for each shelf in stacked bar chart
        for shelf, color in zip(shelves, colors):
            fig2.add_trace(go.Bar(
                x=minute_shelf_usage['Minute'], y=minute_shelf_usage[shelf], name=shelf,
                marker_color=color  # Different colors for each shelf
            ))
        fig2.update_layout(
            title="Minute-Level Shelf Usage (Stacked Bar Chart)",
            xaxis_title="Time (Minute)",
            yaxis_title="Usage Count",
            barmode='stack',  # Stacks bars for cumulative view
            template="plotly_dark",
            plot_bgcolor="rgba(0,0,0,0)",  # Transparent background for the chart area
            paper_bgcolor="rgba(0,0,0,0)",  # Transparent paper background
            title_font=dict(size=24, weight='bold'),  # Bold weight, color removed
            xaxis_title_font=dict(size=18),  # Font color removed
            yaxis_title_font=dict(size=18),  # Font color removed
            xaxis=dict(tickformat="%H:%M")
        )
        st.plotly_chart(fig2)

        # 3. Line Chart for Minute-Level Shelf Usage Over Time
        fig3 = go.Figure()
        line_colors = ['#00FF7F', '#FF1493', '#1E90FF', '#FFD700']  # Professional colors for line chart
        for shelf, color in zip(shelves, line_colors):
            fig3.add_trace(go.Scatter(
                x=filtered_data['Timestamp'], y=filtered_data[shelf],
                mode='lines+markers',  # Added markers for better visibility
                name=shelf,
                line=dict(color=color, width=2)  # Thicker line width and distinct color
            ))
        fig3.update_layout(
            title="Shelf Usage Over Time (Minute-Level Line Chart)",
            xaxis_title="Time",
            yaxis_title="Usage Count",
            template="plotly_dark",
            plot_bgcolor="rgba(0,0,0,0)",  # Transparent background for the chart area
            paper_bgcolor="rgba(0,0,0,0)",  # Transparent paper background
            title_font=dict(size=24, weight='bold'),  # Bold weight, color removed
            xaxis_title_font=dict(size=18),  # Font color removed
            yaxis_title_font=dict(size=18)  # Font color removed
        )
        st.plotly_chart(fig3)