-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy path01_π_Repository_analytics.py
More file actions
208 lines (173 loc) Β· 7.37 KB
/
01_π_Repository_analytics.py
File metadata and controls
208 lines (173 loc) Β· 7.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
import streamlit as st
import pandas as pd
import plotly.express as px
from pandas import DataFrame
@st.cache_data
def load_data(filepath: str) -> DataFrame:
"""
Load data from a CSV file located at the specified filepath.
Args:
filepath (str): The path to the CSV file to be loaded.
Returns:
DataFrame: A DataFrame containing the loaded data, or an empty DataFrame if an error occurs.
Raises:
FileNotFoundError: If the CSV file cannot be found at the specified path.
pd.errors.EmptyDataError: If the CSV file is empty.
Exception: For any other exceptions that may occur during file loading.
"""
try:
data = pd.read_csv(filepath)
if data.empty:
st.error("No data found in the CSV file.", icon="π¨")
return data
except FileNotFoundError:
st.error(
f"File not found: {filepath}. Please check the file path.",
icon="π¨",
)
except pd.errors.EmptyDataError:
st.error("No data found in the CSV file.", icon="π¨")
except Exception as e:
st.error(f"An error occurred while loading the data: {e}", icon="π¨")
return pd.DataFrame() # Return an empty DataFrame if any error occurs
def main():
st.set_page_config(layout="wide", page_icon="π")
st.title("π GitHub Repository Analytics Dashboard", anchor=False)
# Create tabs for different analytics views
tab1, tab2, tab3 = st.tabs(
["β° Code Frequency", "π¬ Commit Activity", "π©βπ» Contributors"]
)
with tab1:
st.subheader("GitHub code change visualization")
st.info("Visualize code changes over time.", icon="βΉοΈ")
# Load and preprocess code frequency data
code_freq_data = load_data("data/streamlit_code_frequency_stats.csv")
code_freq_data["week"] = pd.to_datetime(
code_freq_data["week"], unit="s"
).dt.date
with st.expander("Show raw data"):
st.dataframe(code_freq_data)
# Implement a date range slider for selecting the period of interest
min_week = code_freq_data["week"].min()
max_week = code_freq_data["week"].max()
start_week, end_week = st.slider(
"Select Date Range",
min_value=min_week,
max_value=max_week,
value=(min_week, max_week),
format="MM/DD/YYYY",
)
# Filter data based on the selected date range
filtered_data = code_freq_data[
(code_freq_data["week"] >= start_week)
& (code_freq_data["week"] <= end_week)
]
st.subheader("Weekly code changes comparison")
# Adjust deletions for visualization
filtered_data["positive_deletions"] = filtered_data["deletions"].abs()
# Display area chart for additions and deletions
st.area_chart(
filtered_data.set_index("week")[["additions", "deletions"]],
color=["#00FF00", "#FF0000"],
)
# Display cumulative code changes over time
st.subheader("Cumulative code changes")
filtered_data["cumulative_additions"] = filtered_data["additions"].cumsum()
filtered_data["cumulative_deletions"] = filtered_data["deletions"].cumsum()
st.scatter_chart(
filtered_data.set_index("week")[
["cumulative_additions", "cumulative_deletions"]
]
)
with tab2:
st.subheader("Total commits over the past year")
st.info("Track total number of commits.", icon="βΉοΈ")
commit_activity_data = load_data("data/streamlit_commit_activity_stats.csv")
commit_activity_data["week"] = pd.to_datetime(
commit_activity_data["week"], unit="s"
)
with st.expander("Show raw data"):
st.dataframe(commit_activity_data)
# Display metrics for commit activity
total_commits = commit_activity_data["total"].sum()
average_commits = commit_activity_data["total"].mean()
weekly_change = commit_activity_data["total"].pct_change().iloc[-1] * 100
col1, col2, col3 = st.columns(3)
col1.metric("Total Commits", int(total_commits))
col2.metric("Average Weekly Commits", f"{average_commits:.2f}")
col3.metric("Week-over-Week Change", f"{weekly_change:.2f}%")
st.bar_chart(commit_activity_data.set_index("week")["total"])
with tab3:
st.subheader("Contributor analysis")
st.info("Analyze contributors and their activity.", icon="βΉοΈ")
# Load and preprocess contributor data
contributor_data = load_data("data/streamlit_contributor_stats.csv")
contributor_data.rename(
columns={"a": "additions", "d": "deletions", "c": "commits", "w": "date"},
inplace=True,
)
contributor_data["date"] = pd.to_datetime(contributor_data["date"], unit="s")
# Drop columns not needed for the analysis
columns_to_drop = [
"Unnamed: 0",
"author_node_id",
"author_avatar_url",
"author_gravatar_id",
]
contributor_data = contributor_data.drop(
columns=columns_to_drop, errors="ignore"
)
with st.expander("Show raw data"):
st.dataframe(contributor_data)
# Calculate total activity for each contributor
contributor_data["total_activity"] = (
contributor_data["additions"]
+ contributor_data["deletions"]
+ contributor_data["commits"]
)
# Group by author to summarize total activity, sorted by activity level
activity_by_user = (
contributor_data.groupby("author_login")["total_activity"]
.sum()
.sort_values(ascending=False)
)
user_list = activity_by_user.index.tolist()
selected_user = st.selectbox("Select a User", user_list)
# Filter data for the selected user and adjust date range using a slider
user_data = contributor_data[contributor_data["author_login"] == selected_user]
min_date = user_data["date"].min().to_pydatetime()
max_date = user_data["date"].max().to_pydatetime()
start_date, end_date = st.slider(
"Select date range",
min_value=min_date,
max_value=max_date,
value=(min_date, max_date),
format="YYYY-MM-DD",
)
filtered_data = user_data[
(user_data["date"] >= start_date) & (user_data["date"] <= end_date)
]
@st.experimental_fragment
def plot_chart() -> None:
"""Plot data"""
fig = px.line(
filtered_data,
x="date",
y=["additions", "deletions", "commits"],
labels={
"value": "Number of Contributions",
"variable": "Type of Contribution",
},
title=f"Interactive Contributions of {selected_user} Over Time",
)
fig.update_traces(mode="lines+markers")
st.plotly_chart(fig, use_container_width=True)
chart, dataset = st.columns(2)
with chart:
st.subheader(f"Contributions over time for {selected_user}")
plot_chart()
with dataset:
st.subheader(f"Show filtered data for {selected_user}")
st.data_editor(filtered_data)
if __name__ == "__main__":
main()