Skip to content

Commit ee27b29

Browse files
authored
Improve auto-triage TUI: vault fallback, check TTL, search, review questions (#64969)
- Use vault as fallback for single-PR fetch at startup, skipping the GraphQL call when cached data is still fresh (4h TTL). - Add 4h TTL to check vault so re-run results on the same commit are picked up instead of serving stale cached data. - Extend TUI search (/) to match by title and author in addition to PR number. - Pass diff_text to LLM assessment on on-demand re-evaluation so review questions are included in the prompt. Signed-off-by: André Ahlert <andre@aex.partners>
1 parent 431c40b commit ee27b29

4 files changed

Lines changed: 82 additions & 19 deletions

File tree

dev/breeze/src/airflow_breeze/commands/pr_commands.py

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -248,9 +248,9 @@ def _cached_assess_pr(
248248
return result
249249

250250
# Generate directed review questions from the diff if available.
251-
# Note: diff_text is not yet passed by the background thread-pool submissions
252-
# (the diff may not be fetched at LLM submission time). Review questions are
253-
# active when diff_text is provided explicitly (e.g. sequential review mode).
251+
# In the TUI, diff_text is passed when the diff has been fetched by the
252+
# background executor before the LLM submission. In the non-TUI flow,
253+
# it is passed explicitly during sequential review.
254254
review_questions: list[str] | None = None
255255
if diff_text:
256256
from airflow_breeze.utils.pr_vault import generate_review_questions
@@ -1859,6 +1859,42 @@ def _fetch_single_pr_graphql(token: str, github_repository: str, pr_number: int)
18591859
)
18601860

18611861

1862+
def _load_pr_from_vault(github_repository: str, pr_number: int) -> PRData | None:
1863+
"""Try to load a PR from the vault. Returns None on miss or expired TTL.
1864+
1865+
The returned PRData has ``unresolved_threads=[]``, ``review_decisions=[]``,
1866+
and ``has_collaborator_review=False``. These are backfilled by
1867+
``_enrich_candidate_details`` which runs during triage/review regardless
1868+
of whether the PR came from vault or the API.
1869+
"""
1870+
from airflow_breeze.utils.pr_vault import load_pr
1871+
1872+
data = load_pr(github_repository, pr_number)
1873+
if data is None:
1874+
return None
1875+
return PRData(
1876+
number=data["number"],
1877+
title=data["title"],
1878+
body=data.get("body", ""),
1879+
url=data["url"],
1880+
created_at=data["created_at"],
1881+
updated_at=data["updated_at"],
1882+
node_id=data.get("node_id", ""),
1883+
author_login=data["author_login"],
1884+
author_association=data.get("author_association", "NONE"),
1885+
head_sha=data["head_sha"],
1886+
base_ref=data.get("base_ref", "main"),
1887+
check_summary=data.get("check_summary", ""),
1888+
checks_state=data.get("checks_state", "UNKNOWN"),
1889+
failed_checks=data.get("failed_checks", []),
1890+
commits_behind=data.get("commits_behind", 0),
1891+
is_draft=data.get("is_draft", False),
1892+
mergeable=data.get("mergeable", "UNKNOWN"),
1893+
labels=data.get("labels", []),
1894+
unresolved_threads=[],
1895+
)
1896+
1897+
18621898
_author_profile_cache: dict[str, dict] = {}
18631899
_author_profile_lock = threading.Lock()
18641900

@@ -5322,6 +5358,7 @@ def _ensure_diff_for_pr(tui_ref: TriageTUI, pr_number: int, pr_url: str) -> None
53225358
pr_body=cur_pr.body,
53235359
check_status_summary=cur_pr.check_summary,
53245360
llm_model=llm_model,
5361+
diff_text=diff_cache.get(cur_pr.number),
53255362
)
53265363
ctx.llm_future_to_pr[fut] = cur_pr
53275364
# Keep as PASSING with LLM in progress
@@ -10041,9 +10078,15 @@ def _fetch_initial_prs(
1004110078
_initial_review_requested_user: str | None = None if review_mode else review_requested_user
1004210079

1004310080
if pr_number:
10044-
if not quiet:
10045-
console_print(f"[info]Fetching PR #{pr_number} via GraphQL...[/]")
10046-
all_prs = [_fetch_single_pr_graphql(token, github_repository, pr_number)]
10081+
cached = _load_pr_from_vault(github_repository, pr_number)
10082+
if cached is not None:
10083+
if not quiet:
10084+
console_print(f"[info]Loaded PR #{pr_number} from vault cache.[/]")
10085+
all_prs = [cached]
10086+
else:
10087+
if not quiet:
10088+
console_print(f"[info]Fetching PR #{pr_number} via GraphQL...[/]")
10089+
all_prs = [_fetch_single_pr_graphql(token, github_repository, pr_number)]
1004710090
total_matching_prs = 1
1004810091
elif len(review_requested_users) > 1 and not review_mode:
1004910092
if not quiet:

dev/breeze/src/airflow_breeze/utils/pr_vault.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,9 @@ def save_prs_batch(github_repository: str, prs) -> int:
7474

7575
# ── Check status vault ───────────────────────────────────────────
7676
# Keyed by head_sha. Only caches fully-completed check results (no
77-
# IN_PROGRESS or QUEUED). Completed results never change for the same SHA.
78-
_check_vault = CacheStore("check_vault")
77+
# IN_PROGRESS or QUEUED). Uses a 4-hour TTL because checks can be
78+
# re-run on the same commit without a force push.
79+
_check_vault = CacheStore("check_vault", ttl_seconds=4 * 3600)
7980

8081
# Statuses that indicate checks are still running
8182
_INCOMPLETE_STATUSES = {"IN_PROGRESS", "QUEUED", "PENDING"}

dev/breeze/src/airflow_breeze/utils/tui_display.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1753,7 +1753,7 @@ def search_jump(self) -> bool:
17531753
matching entry. Pressing Escape cancels. Returns True if the cursor moved.
17541754
"""
17551755
width, height = _get_terminal_size()
1756-
prompt = "/ Jump to PR #: "
1756+
prompt = "/ Search (PR#, title, author): "
17571757
query = ""
17581758

17591759
while True:
@@ -1786,18 +1786,28 @@ def search_jump(self) -> bool:
17861786
if not query:
17871787
return False
17881788

1789-
# Match by PR number only
1789+
# Try exact PR number match first
1790+
stripped = query.lstrip("#")
17901791
try:
1791-
target_num = int(query.lstrip("#"))
1792+
target_num = int(stripped)
17921793
except ValueError:
1793-
return False
1794-
1794+
target_num = None
1795+
1796+
if target_num is not None:
1797+
for idx, entry in enumerate(self.entries):
1798+
if entry.pr.number == target_num:
1799+
self.cursor = idx
1800+
self.scroll_offset = idx
1801+
self._focus = _FocusPanel.PR_LIST
1802+
return True
1803+
1804+
# Fall back to text search on title/author (also for numeric queries
1805+
# that didn't match any PR number)
1806+
query_lower = query.lower()
17951807
for idx, entry in enumerate(self.entries):
1796-
if entry.pr.number == target_num:
1808+
if query_lower in entry.pr.title.lower() or query_lower in entry.pr.author_login.lower():
17971809
self.cursor = idx
1798-
# Put the matched entry at the top of the visible list
17991810
self.scroll_offset = idx
1800-
# Switch focus to PR list so the selection is highlighted
18011811
self._focus = _FocusPanel.PR_LIST
18021812
return True
18031813

dev/breeze/tests/test_pr_vault.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -152,13 +152,22 @@ def test_different_sha_returns_none(self, _fake_cache_dir):
152152
save_check_status("apache/airflow", "sha_abc", {"SUCCESS": 1})
153153
assert load_check_status("apache/airflow", "sha_different") is None
154154

155-
def test_no_ttl_for_same_sha(self, _fake_cache_dir):
156-
"""Check vault has no TTL — same SHA always returns same results."""
155+
def test_ttl_expires_stale_results(self, _fake_cache_dir):
156+
"""Check vault uses 4h TTL so re-run results are picked up."""
157157
save_check_status("apache/airflow", "sha_abc", {"SUCCESS": 1})
158-
# Even with old timestamp, should still return (no TTL)
159158
loaded = load_check_status("apache/airflow", "sha_abc")
160159
assert loaded is not None
161160

161+
# Simulate expiry by backdating cached_at
162+
import json
163+
164+
cache_file = _fake_cache_dir / "checks_sha_abc.json"
165+
data = json.loads(cache_file.read_text())
166+
data["cached_at"] = data["cached_at"] - 5 * 3600 # 5 hours ago
167+
cache_file.write_text(json.dumps(data))
168+
169+
assert load_check_status("apache/airflow", "sha_abc") is None
170+
162171

163172
class TestWorkflowRunsVault:
164173
def test_save_and_load(self, _fake_cache_dir):

0 commit comments

Comments
 (0)