|
1 | | -import datetime |
2 | | -import pins |
3 | | -from pins.errors import PinsError |
4 | 1 | import plotly.express as px |
5 | 2 | import pandas as pd |
6 | | -from datetime import datetime, timedelta |
| 3 | +from datetime import timedelta |
7 | 4 |
|
8 | 5 |
|
9 | 6 | def compute_metrics( |
@@ -75,60 +72,74 @@ def _rolling_df(df: pd.DataFrame, td: timedelta): |
75 | 72 | first = stop |
76 | 73 |
|
77 | 74 |
|
78 | | -def pin_metrics(board, df_metrics, metrics_pin_name, overwrite=False): |
79 | | - pass |
| 75 | +def pin_metrics( |
| 76 | + board, |
| 77 | + df_metrics: pd.DataFrame, |
| 78 | + metrics_pin_name: str, |
| 79 | + pin_type: "str | None" = None, |
| 80 | + index_name: str = "index", |
| 81 | + overwrite: bool = False, |
| 82 | +) -> pd.DataFrame: |
| 83 | + """ |
| 84 | + Update an existing pin storing model metrics over time |
| 85 | +
|
| 86 | + Parameters |
| 87 | + ---------- |
| 88 | + board : |
| 89 | + Pins board |
| 90 | + df_metrics: pd.DataFrame |
| 91 | + Dataframe of metrics over time, such as created by `vetiver_compute_metrics()` |
| 92 | + metrics_pin_name: |
| 93 | + Pin name for where the metrics are stored |
| 94 | + index_name: |
| 95 | + The column in df_metrics containing the aggregated dates or datetimes. |
| 96 | + Note that this defaults to a column named "index". |
| 97 | + overwrite: bool |
| 98 | + If TRUE (the default), overwrite any metrics for |
| 99 | + dates that exist both in the existing pin and |
| 100 | + new metrics with the new values. If FALSE, error |
| 101 | + when the new metrics contain overlapping dates with |
| 102 | + the existing pin. |
| 103 | + """ |
80 | 104 |
|
| 105 | + old_metrics_raw = board.pin_read(metrics_pin_name) |
81 | 106 |
|
82 | | -# """ |
83 | | -# Update an existing pin storing model metrics over time |
| 107 | + # need to coerce date index to a datetime, since pandas does not infer |
| 108 | + # date columns from CSV (but note that formats like arrow do) |
| 109 | + old_metrics = old_metrics_raw.copy() |
| 110 | + old_metrics[index_name] = pd.to_datetime(old_metrics[index_name]) |
84 | 111 |
|
85 | | -# Parameters |
86 | | -# ---------- |
87 | | -# board : |
88 | | -# Pins board |
89 | | -# df_metrics: pd.DataFrame |
90 | | -# Dataframe of metrics over time, such as created by `vetiver_compute_metrics()` |
91 | | -# metrics_pin_name: |
92 | | -# Pin name for where the metrics are stored |
93 | | -# overwrite: bool |
94 | | -# If TRUE (the default), overwrite any metrics for |
95 | | -# dates that exist both in the existing pin and |
96 | | -# new metrics with the new values. If FALSE, error |
97 | | -# when the new metrics contain overlapping dates with |
98 | | -# the existing pin. |
99 | | -# """ |
100 | | -# date_types = (datetime.date, datetime.time, datetime.datetime) |
101 | | -# if not isinstance(df_metrics.index, date_types): |
102 | | -# try: |
103 | | -# df_metrics = df_metrics.index.astype("datetime") |
104 | | -# except TypeError: |
105 | | -# raise TypeError(f"Index of {df_metrics} must be a date type") |
| 112 | + # handle overlapping dates ---- |
| 113 | + dt_new = pd.to_datetime(df_metrics[index_name]) |
| 114 | + dt_old = old_metrics[index_name] |
106 | 115 |
|
107 | | -# new_metrics = df_metrics.sort_index() |
| 116 | + indx_old_overlap = dt_old.isin(dt_new) |
108 | 117 |
|
109 | | -# new_dates = df_metrics.index.unique() |
| 118 | + if overwrite: |
| 119 | + # get only rows specific to old metrics, so when we concat below |
| 120 | + # it effectively is an upsert |
| 121 | + old_metrics = old_metrics.loc[~indx_old_overlap, :] |
110 | 122 |
|
111 | | -# try: |
112 | | -# old_metrics = board.pin_read(metrics_pin_name) |
113 | | -# except PinsError: |
114 | | -# board.pin_write(metrics_pin_name) |
| 123 | + elif not overwrite and indx_old_overlap.any(): |
| 124 | + raise ValueError( |
| 125 | + f"The new metrics overlap with dates already stored in {metrics_pin_name}." |
| 126 | + " Check the aggregated dates or use `overwrite=True`." |
| 127 | + ) |
115 | 128 |
|
116 | | -# overlapping_dates = old_metrics.index in new_dates |
| 129 | + # update and pin ---- |
| 130 | + combined_metrics = pd.concat([old_metrics, df_metrics], ignore_index=True) |
| 131 | + sorted_metrics = combined_metrics.sort_values(index_name) |
117 | 132 |
|
118 | | -# if overwrite is True: |
119 | | -# old_metrics = old_metrics not in overlapping_dates |
120 | | -# else: |
121 | | -# if overlapping_dates: |
122 | | -# raise ValueError( |
123 | | -# f"The new metrics overlap with dates \ |
124 | | -# already stored in {repr(metrics_pin_name)} \ |
125 | | -# Check the aggregated dates or use `overwrite = True`" |
126 | | -# ) |
| 133 | + if pin_type is None: |
| 134 | + meta = board.pin_meta(metrics_pin_name) |
127 | 135 |
|
128 | | -# new_metrics = old_metrics + df_metrics |
129 | | -# new_metrics = new_metrics.sort_index() |
| 136 | + final_pin_type = meta.type |
| 137 | + else: |
| 138 | + final_pin_type = pin_type |
130 | 139 |
|
131 | | -# pins.pin_write(board, new_metrics, metrics_pin_name) |
| 140 | + board.pin_write(sorted_metrics, metrics_pin_name, type=final_pin_type) |
| 141 | + |
| 142 | + return sorted_metrics |
132 | 143 |
|
133 | 144 |
|
134 | 145 | def plot_metrics( |
@@ -157,9 +168,11 @@ def plot_metrics( |
157 | 168 | y=estimate, |
158 | 169 | color=metric, |
159 | 170 | facet_row=metric, |
160 | | - markers=n, |
| 171 | + markers=dict(size=n), |
| 172 | + hover_data={"n": ':'}, |
161 | 173 | **kw, |
162 | 174 | ) |
| 175 | + |
163 | 176 | fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1])) |
164 | 177 | fig.update_layout(showlegend=False) |
165 | 178 |
|
|
0 commit comments