Skip to content

Commit 461e174

Browse files
committed
fix: remove hard-coded type from pin_metrics, add tests
1 parent e02420d commit 461e174

2 files changed

Lines changed: 81 additions & 20 deletions

File tree

vetiver/monitor.py

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,13 @@ def _rolling_df(df: pd.DataFrame, td: timedelta):
7373

7474

7575
def pin_metrics(
76-
board, df_metrics, metrics_pin_name: str, index_name="index", overwrite=False
77-
):
76+
board,
77+
df_metrics: pd.DataFrame,
78+
metrics_pin_name: str,
79+
pin_type: "str | None" = None,
80+
index_name: str = "index",
81+
overwrite: bool = False,
82+
) -> pd.DataFrame:
7883
"""
7984
Update an existing pin storing model metrics over time
8085
@@ -97,21 +102,18 @@ def pin_metrics(
97102
the existing pin.
98103
"""
99104

100-
new_dates = df_metrics[index_name]
105+
old_metrics_raw = board.pin_read(metrics_pin_name)
101106

102-
old_metrics = board.pin_read(metrics_pin_name)
103-
old_dates = old_metrics[index_name]
107+
# need to coerce date index to a datetime, since pandas does not infer
108+
# date columns from CSV (but note that formats like arrow do)
109+
old_metrics = old_metrics_raw.copy()
110+
old_metrics[index_name] = pd.to_datetime(old_metrics[index_name])
104111

105112
# handle overlapping dates ----
106-
if new_dates.dtype != old_dates.dtype:
107-
raise TypeError(
108-
f"index_name column ({repr(index_name)}) in old and new metrics "
109-
"must have the same dtype. "
110-
f"\nOld dtype: {old_dates.dtype}"
111-
f"\nNew dtype: {new_dates.dtype}"
112-
)
113+
dt_new = pd.to_datetime(df_metrics[index_name])
114+
dt_old = old_metrics[index_name]
113115

114-
indx_old_overlap = old_metrics[index_name].isin(new_dates)
116+
indx_old_overlap = dt_old.isin(dt_new)
115117

116118
if overwrite:
117119
# get only rows specific to old metrics, so when we concat below
@@ -128,7 +130,14 @@ def pin_metrics(
128130
combined_metrics = pd.concat([old_metrics, df_metrics], ignore_index=True)
129131
sorted_metrics = combined_metrics.sort_values(index_name)
130132

131-
board.pin_write(sorted_metrics, metrics_pin_name, type="arrow")
133+
if pin_type is None:
134+
meta = board.pin_meta(metrics_pin_name)
135+
136+
final_pin_type = meta.type
137+
else:
138+
final_pin_type = pin_type
139+
140+
board.pin_write(sorted_metrics, metrics_pin_name, type=final_pin_type)
132141

133142
return sorted_metrics
134143

vetiver/tests/test_monitor.py

Lines changed: 58 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
import numpy
77
import vetiver
88

9+
import pytest
10+
911
rng = pd.date_range("1/1/2012", periods=10, freq="S")
1012
new = dict(x=range(len(rng)), y=range(len(rng)))
1113
df = pd.DataFrame(new, index=rng)
@@ -41,17 +43,22 @@ def test_monitor(snapshot):
4143
snapshot.assert_match(m.to_json(), "test_monitor.json")
4244

4345

44-
def test_vetiver_pin_metrics():
45-
board = pins.board_temp()
46-
df_metrics_old = pd.DataFrame(
46+
@pytest.fixture
47+
def df_metrics_old():
48+
return pd.DataFrame(
4749
{
4850
"index": pd.to_datetime(["2021-01-01", "2021-01-02"]),
4951
"n": [1, 2],
5052
"metric": ["x", "x"],
51-
"estimate": [0.6, 0.7],
53+
"estimate": [0.1, 0.2],
5254
}
5355
)
5456

57+
58+
def test_vetiver_pin_metrics_simple(df_metrics_old):
59+
board = pins.board_temp()
60+
board.pin_write(df_metrics_old, "test_metrics", type="csv")
61+
5562
df_metrics_new = pd.DataFrame(
5663
{
5764
"index": pd.to_datetime(["2021-01-03", "2021-01-04"]),
@@ -61,8 +68,53 @@ def test_vetiver_pin_metrics():
6168
}
6269
)
6370

64-
board.pin_write(df_metrics_old, "test_metrics", type="arrow")
65-
6671
df_res = vetiver.pin_metrics(board, df_metrics_new, "test_metrics")
6772

6873
assert len(df_res) == 4
74+
assert df_res.equals(pd.concat([df_metrics_old, df_metrics_new], ignore_index=True))
75+
76+
77+
def test_vetiver_pin_metrics_overlap_error(df_metrics_old):
78+
board = pins.board_temp()
79+
board.pin_write(df_metrics_old, "test_metrics", type="csv")
80+
81+
with pytest.raises(ValueError) as exc_info:
82+
vetiver.pin_metrics(board, df_metrics_old, "test_metrics")
83+
84+
assert "The new metrics overlap" in exc_info.value.args[0]
85+
86+
87+
def test_vetiver_pin_metrics_overwrite(df_metrics_old):
88+
board = pins.board_temp()
89+
board.pin_write(df_metrics_old, "test_metrics", type="csv")
90+
91+
# first row should update existing metrics
92+
df_metrics_new = pd.DataFrame(
93+
{
94+
"index": pd.to_datetime(["2021-01-01", "2021-01-03"]),
95+
"n": [200, 201],
96+
"metric": ["y", "y"],
97+
"estimate": [0.8, 0.9],
98+
}
99+
)
100+
101+
df_res = vetiver.pin_metrics(board, df_metrics_new, "test_metrics", overwrite=True)
102+
assert len(df_res) == 3
103+
104+
df_dst = pd.concat([df_metrics_old.iloc[[1], :], df_metrics_new], ignore_index=True)
105+
assert df_res.equals(df_dst.sort_values("index"))
106+
107+
108+
def test_vetiver_pin_metrics_manual_pin_type(df_metrics_old):
109+
board = pins.board_temp()
110+
board.pin_write(df_metrics_old, "test_metrics", type="csv")
111+
112+
df_res = vetiver.pin_metrics(
113+
board, df_metrics_old, "test_metrics", overwrite=True, pin_type="joblib"
114+
)
115+
116+
assert len(df_res) == 2
117+
118+
meta = board.pin_meta("test_metrics")
119+
120+
assert meta.type == "joblib"

0 commit comments

Comments
 (0)