Skip to content

Commit 7eca870

Browse files
committed
Refine natural action execution and analysis logging
1 parent cbfff99 commit 7eca870

21 files changed

Lines changed: 1362 additions & 658 deletions

.gitignore

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,9 @@ dist/
2222
# AutoResearch runtime artifacts
2323
.autoresearch/
2424

25-
# Manual real-validation workspace (requested)
25+
# Manual example / real-validation workspace
2626
/test/*
2727
!/test/smoke/
28-
/test/.autoresearch/
2928

3029
# Logs / coverage
3130
coverage/

README.ko.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -259,12 +259,13 @@ npm run test:smoke:ci
259259

260260
스모크 테스트 안내:
261261
- smoke harness 파일은 `tests/smoke/` 아래에 있습니다.
262-
- smoke가 실행하는 예시 workspace는 계속 `/test`를 사용합니다.
263-
- `test:smoke:natural-collect``/test` 경로에서 실행되며,
262+
- 수동 실행용 예시 workspace는 `/test` 아래에 둡니다.
263+
- smoke는 `/test` 루트 상태를 덮어쓰지 않도록 `/test/smoke-workspace`를 별도 workspace로 사용합니다.
264+
- `test:smoke:natural-collect``/test/smoke-workspace` 경로에서 실행되며,
264265
자연어 수집 요청 -> pending `/agent collect ...` 생성 흐름을 PTY로 검증합니다.
265-
- `test:smoke:natural-collect-execute``/test` 경로에서 실행되며,
266+
- `test:smoke:natural-collect-execute``/test/smoke-workspace` 경로에서 실행되며,
266267
자연어 수집 요청 -> `y` 실행 -> 수집 산출물 생성까지 PTY로 검증합니다.
267-
- `test:smoke:all``/test` 기준 전체 로컬 smoke 묶음을 실행합니다.
268+
- `test:smoke:all``/test/smoke-workspace` 기준 전체 로컬 smoke 묶음을 실행합니다.
268269
- 실제 Codex 호출 없이 `AUTORESEARCH_FAKE_CODEX_RESPONSE`를 사용합니다.
269270
- execute 스모크는 `AUTORESEARCH_FAKE_SEMANTIC_SCHOLAR_RESPONSE`도 사용합니다.
270271
- `test:smoke:ci`는 CI 모드 선택 실행입니다.

README.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -259,12 +259,13 @@ npm run test:smoke:ci
259259

260260
Smoke note:
261261
- Smoke harness files live under `tests/smoke/`.
262-
- The runnable example workspace used by smoke stays under `/test`.
263-
- `test:smoke:natural-collect` runs in `/test` and verifies PTY flow for
262+
- The manual example workspace stays under `/test`.
263+
- Smoke uses an isolated workspace under `/test/smoke-workspace` so it does not overwrite the root `/test` example state.
264+
- `test:smoke:natural-collect` runs in `/test/smoke-workspace` and verifies PTY flow for
264265
natural-language collect request -> pending `/agent collect ...` command.
265-
- `test:smoke:natural-collect-execute` runs in `/test` and verifies
266+
- `test:smoke:natural-collect-execute` runs in `/test/smoke-workspace` and verifies
266267
natural-language collect request -> `y` execute -> collect artifacts created.
267-
- `test:smoke:all` runs the full local smoke bundle in `/test`.
268+
- `test:smoke:all` runs the full local smoke bundle in `/test/smoke-workspace`.
268269
- It uses `AUTORESEARCH_FAKE_CODEX_RESPONSE` to avoid live Codex calls.
269270
- Execute smoke also uses `AUTORESEARCH_FAKE_SEMANTIC_SCHOLAR_RESPONSE`.
270271
- `test:smoke:ci` runs CI-mode smoke selection.

src/core/analysis/paperAnalyzer.ts

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { LLMClient } from "../llm/client.js";
1+
import { LLMClient, LLMProgressEvent } from "../llm/client.js";
22
import { AnalysisCorpusRow, ResolvedPaperSource, buildAbstractFallbackText } from "./paperText.js";
33
import { ResponsesPdfAnalysisClient } from "../../integrations/openai/responsesPdfAnalysisClient.js";
44

@@ -71,23 +71,31 @@ export async function analyzePaperWithLlm(args: {
7171
paper: AnalysisCorpusRow;
7272
source: ResolvedPaperSource;
7373
maxAttempts?: number;
74+
onProgress?: (message: string) => void;
7475
}): Promise<PaperAnalysisResult> {
7576
const maxAttempts = Math.max(1, args.maxAttempts ?? 2);
7677
let lastError: Error | undefined;
7778

7879
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
7980
try {
81+
args.onProgress?.(`Starting LLM analysis attempt ${attempt}/${maxAttempts}.`);
8082
const completion = await args.llm.complete(buildPaperAnalysisPrompt(args.paper, args.source), {
81-
systemPrompt: ANALYSIS_SYSTEM_PROMPT
83+
systemPrompt: ANALYSIS_SYSTEM_PROMPT,
84+
onProgress: (event) => {
85+
emitLlmProgress(args.onProgress, event);
86+
}
8287
});
88+
args.onProgress?.("Received LLM output. Parsing structured JSON.");
8389
const parsed = parsePaperAnalysisJson(completion.text);
90+
args.onProgress?.("Structured JSON parsed successfully.");
8491
return {
8592
...normalizePaperAnalysis(args.paper, args.source, parsed),
8693
attempts: attempt,
8794
rawJson: parsed
8895
};
8996
} catch (error) {
9097
lastError = error instanceof Error ? error : new Error(String(error));
98+
args.onProgress?.(`Analysis attempt ${attempt}/${maxAttempts} failed: ${lastError.message}`);
9199
}
92100
}
93101

@@ -101,6 +109,7 @@ export async function analyzePaperWithResponsesPdf(args: {
101109
model: string;
102110
maxAttempts?: number;
103111
abortSignal?: AbortSignal;
112+
onProgress?: (message: string) => void;
104113
}): Promise<PaperAnalysisResult> {
105114
const maxAttempts = Math.max(1, args.maxAttempts ?? 2);
106115
let lastError: Error | undefined;
@@ -113,27 +122,65 @@ export async function analyzePaperWithResponsesPdf(args: {
113122

114123
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
115124
try {
125+
args.onProgress?.(`Starting Responses API PDF analysis attempt ${attempt}/${maxAttempts} with model ${args.model}.`);
116126
const completion = await args.client.analyzePdf({
117127
model: args.model,
118128
pdfUrl: args.pdfUrl,
119129
prompt: buildPaperAnalysisFilePrompt(args.paper),
120130
systemPrompt: ANALYSIS_SYSTEM_PROMPT,
121-
abortSignal: args.abortSignal
131+
abortSignal: args.abortSignal,
132+
onProgress: (message) => args.onProgress?.(message)
122133
});
134+
args.onProgress?.("Received Responses API output. Parsing structured JSON.");
123135
const parsed = parsePaperAnalysisJson(completion.text);
136+
args.onProgress?.("Structured JSON parsed successfully.");
124137
return {
125138
...normalizePaperAnalysis(args.paper, sourceHint, parsed),
126139
attempts: attempt,
127140
rawJson: parsed
128141
};
129142
} catch (error) {
130143
lastError = error instanceof Error ? error : new Error(String(error));
144+
args.onProgress?.(`PDF analysis attempt ${attempt}/${maxAttempts} failed: ${lastError.message}`);
131145
}
132146
}
133147

134148
throw lastError ?? new Error("paper_analysis_failed");
135149
}
136150

151+
export function shouldFallbackResponsesPdfToLocalText(error: unknown): boolean {
152+
const message = error instanceof Error ? error.message : String(error);
153+
return [
154+
/timeout while downloading/i,
155+
/failed to download/i,
156+
/unable to download/i,
157+
/unable to fetch/i,
158+
/could not fetch/i,
159+
/file_url/i,
160+
/remote file/i
161+
].some((pattern) => pattern.test(message));
162+
}
163+
164+
function emitLlmProgress(
165+
onProgress: ((message: string) => void) | undefined,
166+
event: LLMProgressEvent
167+
): void {
168+
if (!onProgress) {
169+
return;
170+
}
171+
if (event.type === "delta") {
172+
const text = event.text.trim();
173+
if (text) {
174+
onProgress(`LLM> ${text}`);
175+
}
176+
return;
177+
}
178+
const text = event.text.trim();
179+
if (text) {
180+
onProgress(text);
181+
}
182+
}
183+
137184
export function buildPaperAnalysisPrompt(paper: AnalysisCorpusRow, source: ResolvedPaperSource): string {
138185
return [
139186
"Analyze the following paper and extract structured evidence.",

src/core/analysis/paperSelection.ts

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -101,9 +101,14 @@ export async function selectPapersForAnalysis(args: {
101101
runTopic: string;
102102
corpusRows: AnalysisCorpusRow[];
103103
request: AnalysisSelectionRequest;
104+
onProgress?: (message: string) => void;
104105
}): Promise<PaperSelectionResult> {
106+
args.onProgress?.(
107+
`Deterministic pre-rank started for ${args.corpusRows.length} paper(s) using title/topic similarity, citation count, and recency.`
108+
);
105109
const ranked = rankPapersDeterministically(args.runTitle || args.runTopic, args.corpusRows);
106110
const totalCandidates = ranked.length;
111+
args.onProgress?.(`Deterministic pre-rank completed for ${totalCandidates} candidate(s).`);
107112
if (args.request.selectionMode === "all" || !args.request.topN || args.request.topN >= totalCandidates) {
108113
const selectedPaperIds = ranked.map((candidate) => candidate.paper.paper_id);
109114
const rankedCandidates = ranked.map((candidate, index) => ({
@@ -127,9 +132,30 @@ export async function selectPapersForAnalysis(args: {
127132

128133
const candidatePoolSize = Math.min(totalCandidates, Math.max(args.request.topN * 5, 50));
129134
const candidatePool = ranked.slice(0, candidatePoolSize);
130-
const rerank = await rerankCandidates(args.llm, args.runTitle || args.runTopic, args.runTopic, args.request.topN, candidatePool);
135+
args.onProgress?.(
136+
`Preparing LLM rerank for ${candidatePool.length} candidate(s) to choose top ${args.request.topN}.`
137+
);
138+
args.onProgress?.(
139+
`Rerank candidate preview: ${candidatePool
140+
.slice(0, 5)
141+
.map((candidate) => `${candidate.paper.paper_id}:${candidate.deterministicScore}`)
142+
.join(", ")}`
143+
);
144+
const rerank = await rerankCandidates(
145+
args.llm,
146+
args.runTitle || args.runTopic,
147+
args.runTopic,
148+
args.request.topN,
149+
candidatePool,
150+
args.onProgress
151+
);
131152
const rerankedIds = rerank.orderedPaperIds;
132153
const rerankOrder = new Map<string, number>(rerankedIds.map((paperId, index) => [paperId, index]));
154+
args.onProgress?.(
155+
rerank.applied
156+
? `LLM rerank completed. Top selection preview: ${rerankedIds.slice(0, Math.min(args.request.topN, 5)).join(", ")}`
157+
: `LLM rerank fallback activated. Using deterministic order (${rerank.fallbackReason}).`
158+
);
133159

134160
const selectedPaperIds = rerankedIds.slice(0, args.request.topN);
135161
const selectedSet = new Set(selectedPaperIds);
@@ -218,12 +244,22 @@ async function rerankCandidates(
218244
referenceTitle: string,
219245
runTopic: string,
220246
topN: number,
221-
candidates: RankedPaperCandidate[]
247+
candidates: RankedPaperCandidate[],
248+
onProgress?: (message: string) => void
222249
): Promise<{ orderedPaperIds: string[]; applied: boolean; fallbackReason?: string }> {
223250
try {
251+
onProgress?.(`Submitting rerank request for ${candidates.length} candidate(s).`);
224252
const response = await llm.complete(buildRerankPrompt(referenceTitle, runTopic, topN, candidates), {
225-
systemPrompt: RERANK_SYSTEM_PROMPT
253+
systemPrompt: RERANK_SYSTEM_PROMPT,
254+
onProgress: (event) => {
255+
const text = event.text.trim();
256+
if (!text) {
257+
return;
258+
}
259+
onProgress?.(event.type === "delta" ? `LLM rerank> ${text}` : text);
260+
}
226261
});
262+
onProgress?.("Received rerank response. Parsing JSON ordering.");
227263
const parsed = parseRerankJson(response.text);
228264
const seen = new Set<string>();
229265
const orderedPaperIds = normalizeStringArray(parsed.ordered_paper_ids)
@@ -235,6 +271,7 @@ async function rerankCandidates(
235271
seen.add(paperId);
236272
return true;
237273
});
274+
onProgress?.(`Parsed rerank JSON with ${orderedPaperIds.length} explicit paper id(s).`);
238275

239276
const fallbackRemainder = candidates
240277
.map((candidate) => candidate.paper.paper_id)
@@ -245,6 +282,7 @@ async function rerankCandidates(
245282
applied: true
246283
};
247284
} catch (error) {
285+
onProgress?.(`Rerank request failed: ${error instanceof Error ? error.message : String(error)}`);
248286
return {
249287
orderedPaperIds: candidates.map((candidate) => candidate.paper.paper_id),
250288
applied: false,

src/core/analysis/paperText.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,12 @@ export async function resolvePaperTextSource(args: {
3838
runId: string;
3939
paper: AnalysisCorpusRow;
4040
abortSignal?: AbortSignal;
41+
onProgress?: (message: string) => void;
4142
}): Promise<ResolvedPaperSource> {
4243
const fallback = buildAbstractFallbackText(args.paper);
4344
const pdfUrl = resolvePaperPdfUrl(args.paper);
4445
if (!pdfUrl) {
46+
args.onProgress?.("No PDF URL found. Using abstract fallback.");
4547
return {
4648
sourceType: "abstract",
4749
text: fallback,
@@ -56,6 +58,7 @@ export async function resolvePaperTextSource(args: {
5658

5759
const cachedText = await readCachedText(textCachePath);
5860
if (cachedText) {
61+
args.onProgress?.("Reusing cached extracted full text.");
5962
return {
6063
sourceType: "full_text",
6164
text: cachedText,
@@ -67,11 +70,14 @@ export async function resolvePaperTextSource(args: {
6770
}
6871

6972
try {
73+
args.onProgress?.("Downloading PDF for text extraction.");
7074
await downloadPdf(pdfUrl, pdfCachePath, args.abortSignal);
75+
args.onProgress?.("Extracting text from downloaded PDF.");
7176
const extracted = await extractPdfText(pdfCachePath, args.abortSignal);
7277
if (extracted) {
7378
await ensureDir(path.dirname(textCachePath));
7479
await fs.writeFile(textCachePath, extracted, "utf8");
80+
args.onProgress?.("PDF text extraction completed.");
7581
return {
7682
sourceType: "full_text",
7783
text: extracted,
@@ -81,6 +87,7 @@ export async function resolvePaperTextSource(args: {
8187
textCachePath
8288
};
8389
}
90+
args.onProgress?.("PDF extraction produced no usable text. Falling back to abstract.");
8491
return {
8592
sourceType: "abstract",
8693
text: fallback,
@@ -91,6 +98,9 @@ export async function resolvePaperTextSource(args: {
9198
fallbackReason: "pdf_extract_failed"
9299
};
93100
} catch (error) {
101+
args.onProgress?.(
102+
`PDF resolution failed (${error instanceof Error ? error.message : String(error)}). Falling back to abstract.`
103+
);
94104
return {
95105
sourceType: "abstract",
96106
text: fallback,

0 commit comments

Comments
 (0)