lhy0718
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 2 deletions b/‎.gitignore‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎README.ko.md‎
Lines changed: 5 additions & 4 deletions b/‎README.ko.md‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎README.md‎
Lines changed: 5 additions & 4 deletions b/‎README.md‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎src/core/analysis/paperAnalyzer.ts‎
Lines changed: 50 additions & 3 deletions b/‎src/core/analysis/paperAnalyzer.ts‎
Lines changed: 50 additions & 3 deletions
diff --git a/‎src/core/analysis/paperSelection.ts‎
Lines changed: 41 additions & 3 deletions b/‎src/core/analysis/paperSelection.ts‎
Lines changed: 41 additions & 3 deletions
diff --git a/‎src/core/analysis/paperText.ts‎
Lines changed: 10 additions & 0 deletions b/‎src/core/analysis/paperText.ts‎
Lines changed: 10 additions & 0 deletions
@@ -22,10 +22,9 @@ dist/
 # AutoResearch runtime artifacts
 .autoresearch/
 
-# Manual real-validation workspace (requested)
+# Manual example / real-validation workspace
 /test/*
 !/test/smoke/
-/test/.autoresearch/
 
 # Logs / coverage
 coverage/
 
@@ -259,12 +259,13 @@ npm run test:smoke:ci
 
 스모크 테스트 안내:
 - smoke harness 파일은 `tests/smoke/` 아래에 있습니다.
-- smoke가 실행하는 예시 workspace는 계속 `/test`를 사용합니다.
-- `test:smoke:natural-collect`는 `/test` 경로에서 실행되며,
+- 수동 실행용 예시 workspace는 `/test` 아래에 둡니다.
+- smoke는 `/test` 루트 상태를 덮어쓰지 않도록 `/test/smoke-workspace`를 별도 workspace로 사용합니다.
+- `test:smoke:natural-collect`는 `/test/smoke-workspace` 경로에서 실행되며,
   자연어 수집 요청 -> pending `/agent collect ...` 생성 흐름을 PTY로 검증합니다.
-- `test:smoke:natural-collect-execute`는 `/test` 경로에서 실행되며,
+- `test:smoke:natural-collect-execute`는 `/test/smoke-workspace` 경로에서 실행되며,
   자연어 수집 요청 -> `y` 실행 -> 수집 산출물 생성까지 PTY로 검증합니다.
-- `test:smoke:all`은 `/test` 기준 전체 로컬 smoke 묶음을 실행합니다.
+- `test:smoke:all`은 `/test/smoke-workspace` 기준 전체 로컬 smoke 묶음을 실행합니다.
 - 실제 Codex 호출 없이 `AUTORESEARCH_FAKE_CODEX_RESPONSE`를 사용합니다.
 - execute 스모크는 `AUTORESEARCH_FAKE_SEMANTIC_SCHOLAR_RESPONSE`도 사용합니다.
 - `test:smoke:ci`는 CI 모드 선택 실행입니다.
 
@@ -259,12 +259,13 @@ npm run test:smoke:ci
 
 Smoke note:
 - Smoke harness files live under `tests/smoke/`.
-- The runnable example workspace used by smoke stays under `/test`.
-- `test:smoke:natural-collect` runs in `/test` and verifies PTY flow for
+- The manual example workspace stays under `/test`.
+- Smoke uses an isolated workspace under `/test/smoke-workspace` so it does not overwrite the root `/test` example state.
+- `test:smoke:natural-collect` runs in `/test/smoke-workspace` and verifies PTY flow for
   natural-language collect request -> pending `/agent collect ...` command.
-- `test:smoke:natural-collect-execute` runs in `/test` and verifies
+- `test:smoke:natural-collect-execute` runs in `/test/smoke-workspace` and verifies
   natural-language collect request -> `y` execute -> collect artifacts created.
-- `test:smoke:all` runs the full local smoke bundle in `/test`.
+- `test:smoke:all` runs the full local smoke bundle in `/test/smoke-workspace`.
 - It uses `AUTORESEARCH_FAKE_CODEX_RESPONSE` to avoid live Codex calls.
 - Execute smoke also uses `AUTORESEARCH_FAKE_SEMANTIC_SCHOLAR_RESPONSE`.
 - `test:smoke:ci` runs CI-mode smoke selection.
 
@@ -1,4 +1,4 @@
-import { LLMClient } from "../llm/client.js";
+import { LLMClient, LLMProgressEvent } from "../llm/client.js";
 import { AnalysisCorpusRow, ResolvedPaperSource, buildAbstractFallbackText } from "./paperText.js";
 import { ResponsesPdfAnalysisClient } from "../../integrations/openai/responsesPdfAnalysisClient.js";
 
@@ -71,23 +71,31 @@ export async function analyzePaperWithLlm(args: {
   paper: AnalysisCorpusRow;
   source: ResolvedPaperSource;
   maxAttempts?: number;
+  onProgress?: (message: string) => void;
 }): Promise<PaperAnalysisResult> {
   const maxAttempts = Math.max(1, args.maxAttempts ?? 2);
   let lastError: Error | undefined;
 
   for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
     try {
+      args.onProgress?.(`Starting LLM analysis attempt ${attempt}/${maxAttempts}.`);
       const completion = await args.llm.complete(buildPaperAnalysisPrompt(args.paper, args.source), {
-        systemPrompt: ANALYSIS_SYSTEM_PROMPT
+        systemPrompt: ANALYSIS_SYSTEM_PROMPT,
+        onProgress: (event) => {
+          emitLlmProgress(args.onProgress, event);
+        }
       });
+      args.onProgress?.("Received LLM output. Parsing structured JSON.");
       const parsed = parsePaperAnalysisJson(completion.text);
+      args.onProgress?.("Structured JSON parsed successfully.");
       return {
         ...normalizePaperAnalysis(args.paper, args.source, parsed),
         attempts: attempt,
         rawJson: parsed
       };
     } catch (error) {
       lastError = error instanceof Error ? error : new Error(String(error));
+      args.onProgress?.(`Analysis attempt ${attempt}/${maxAttempts} failed: ${lastError.message}`);
     }
   }
 
@@ -101,6 +109,7 @@ export async function analyzePaperWithResponsesPdf(args: {
   model: string;
   maxAttempts?: number;
   abortSignal?: AbortSignal;
+  onProgress?: (message: string) => void;
 }): Promise<PaperAnalysisResult> {
   const maxAttempts = Math.max(1, args.maxAttempts ?? 2);
   let lastError: Error | undefined;
@@ -113,27 +122,65 @@ export async function analyzePaperWithResponsesPdf(args: {
 
   for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
     try {
+      args.onProgress?.(`Starting Responses API PDF analysis attempt ${attempt}/${maxAttempts} with model ${args.model}.`);
       const completion = await args.client.analyzePdf({
         model: args.model,
         pdfUrl: args.pdfUrl,
         prompt: buildPaperAnalysisFilePrompt(args.paper),
         systemPrompt: ANALYSIS_SYSTEM_PROMPT,
-        abortSignal: args.abortSignal
+        abortSignal: args.abortSignal,
+        onProgress: (message) => args.onProgress?.(message)
       });
+      args.onProgress?.("Received Responses API output. Parsing structured JSON.");
       const parsed = parsePaperAnalysisJson(completion.text);
+      args.onProgress?.("Structured JSON parsed successfully.");
       return {
         ...normalizePaperAnalysis(args.paper, sourceHint, parsed),
         attempts: attempt,
         rawJson: parsed
       };
     } catch (error) {
       lastError = error instanceof Error ? error : new Error(String(error));
+      args.onProgress?.(`PDF analysis attempt ${attempt}/${maxAttempts} failed: ${lastError.message}`);
     }
   }
 
   throw lastError ?? new Error("paper_analysis_failed");
 }
 
+export function shouldFallbackResponsesPdfToLocalText(error: unknown): boolean {
+  const message = error instanceof Error ? error.message : String(error);
+  return [
+    /timeout while downloading/i,
+    /failed to download/i,
+    /unable to download/i,
+    /unable to fetch/i,
+    /could not fetch/i,
+    /file_url/i,
+    /remote file/i
+  ].some((pattern) => pattern.test(message));
+}
+
+function emitLlmProgress(
+  onProgress: ((message: string) => void) | undefined,
+  event: LLMProgressEvent
+): void {
+  if (!onProgress) {
+    return;
+  }
+  if (event.type === "delta") {
+    const text = event.text.trim();
+    if (text) {
+      onProgress(`LLM> ${text}`);
+    }
+    return;
+  }
+  const text = event.text.trim();
+  if (text) {
+    onProgress(text);
+  }
+}
+
 export function buildPaperAnalysisPrompt(paper: AnalysisCorpusRow, source: ResolvedPaperSource): string {
   return [
     "Analyze the following paper and extract structured evidence.",
 
@@ -101,9 +101,14 @@ export async function selectPapersForAnalysis(args: {
   runTopic: string;
   corpusRows: AnalysisCorpusRow[];
   request: AnalysisSelectionRequest;
+  onProgress?: (message: string) => void;
 }): Promise<PaperSelectionResult> {
+  args.onProgress?.(
+    `Deterministic pre-rank started for ${args.corpusRows.length} paper(s) using title/topic similarity, citation count, and recency.`
+  );
   const ranked = rankPapersDeterministically(args.runTitle || args.runTopic, args.corpusRows);
   const totalCandidates = ranked.length;
+  args.onProgress?.(`Deterministic pre-rank completed for ${totalCandidates} candidate(s).`);
   if (args.request.selectionMode === "all" || !args.request.topN || args.request.topN >= totalCandidates) {
     const selectedPaperIds = ranked.map((candidate) => candidate.paper.paper_id);
     const rankedCandidates = ranked.map((candidate, index) => ({
@@ -127,9 +132,30 @@ export async function selectPapersForAnalysis(args: {
 
   const candidatePoolSize = Math.min(totalCandidates, Math.max(args.request.topN * 5, 50));
   const candidatePool = ranked.slice(0, candidatePoolSize);
-  const rerank = await rerankCandidates(args.llm, args.runTitle || args.runTopic, args.runTopic, args.request.topN, candidatePool);
+  args.onProgress?.(
+    `Preparing LLM rerank for ${candidatePool.length} candidate(s) to choose top ${args.request.topN}.`
+  );
+  args.onProgress?.(
+    `Rerank candidate preview: ${candidatePool
+      .slice(0, 5)
+      .map((candidate) => `${candidate.paper.paper_id}:${candidate.deterministicScore}`)
+      .join(", ")}`
+  );
+  const rerank = await rerankCandidates(
+    args.llm,
+    args.runTitle || args.runTopic,
+    args.runTopic,
+    args.request.topN,
+    candidatePool,
+    args.onProgress
+  );
   const rerankedIds = rerank.orderedPaperIds;
   const rerankOrder = new Map<string, number>(rerankedIds.map((paperId, index) => [paperId, index]));
+  args.onProgress?.(
+    rerank.applied
+      ? `LLM rerank completed. Top selection preview: ${rerankedIds.slice(0, Math.min(args.request.topN, 5)).join(", ")}`
+      : `LLM rerank fallback activated. Using deterministic order (${rerank.fallbackReason}).`
+  );
 
   const selectedPaperIds = rerankedIds.slice(0, args.request.topN);
   const selectedSet = new Set(selectedPaperIds);
@@ -218,12 +244,22 @@ async function rerankCandidates(
   referenceTitle: string,
   runTopic: string,
   topN: number,
-  candidates: RankedPaperCandidate[]
+  candidates: RankedPaperCandidate[],
+  onProgress?: (message: string) => void
 ): Promise<{ orderedPaperIds: string[]; applied: boolean; fallbackReason?: string }> {
   try {
+    onProgress?.(`Submitting rerank request for ${candidates.length} candidate(s).`);
     const response = await llm.complete(buildRerankPrompt(referenceTitle, runTopic, topN, candidates), {
-      systemPrompt: RERANK_SYSTEM_PROMPT
+      systemPrompt: RERANK_SYSTEM_PROMPT,
+      onProgress: (event) => {
+        const text = event.text.trim();
+        if (!text) {
+          return;
+        }
+        onProgress?.(event.type === "delta" ? `LLM rerank> ${text}` : text);
+      }
     });
+    onProgress?.("Received rerank response. Parsing JSON ordering.");
     const parsed = parseRerankJson(response.text);
     const seen = new Set<string>();
     const orderedPaperIds = normalizeStringArray(parsed.ordered_paper_ids)
@@ -235,6 +271,7 @@ async function rerankCandidates(
         seen.add(paperId);
         return true;
       });
+    onProgress?.(`Parsed rerank JSON with ${orderedPaperIds.length} explicit paper id(s).`);
 
     const fallbackRemainder = candidates
       .map((candidate) => candidate.paper.paper_id)
@@ -245,6 +282,7 @@ async function rerankCandidates(
       applied: true
     };
   } catch (error) {
+    onProgress?.(`Rerank request failed: ${error instanceof Error ? error.message : String(error)}`);
     return {
       orderedPaperIds: candidates.map((candidate) => candidate.paper.paper_id),
       applied: false,
 
@@ -38,10 +38,12 @@ export async function resolvePaperTextSource(args: {
   runId: string;
   paper: AnalysisCorpusRow;
   abortSignal?: AbortSignal;
+  onProgress?: (message: string) => void;
 }): Promise<ResolvedPaperSource> {
   const fallback = buildAbstractFallbackText(args.paper);
   const pdfUrl = resolvePaperPdfUrl(args.paper);
   if (!pdfUrl) {
+    args.onProgress?.("No PDF URL found. Using abstract fallback.");
     return {
       sourceType: "abstract",
       text: fallback,
@@ -56,6 +58,7 @@ export async function resolvePaperTextSource(args: {
 
   const cachedText = await readCachedText(textCachePath);
   if (cachedText) {
+    args.onProgress?.("Reusing cached extracted full text.");
     return {
       sourceType: "full_text",
       text: cachedText,
@@ -67,11 +70,14 @@ export async function resolvePaperTextSource(args: {
   }
 
   try {
+    args.onProgress?.("Downloading PDF for text extraction.");
     await downloadPdf(pdfUrl, pdfCachePath, args.abortSignal);
+    args.onProgress?.("Extracting text from downloaded PDF.");
     const extracted = await extractPdfText(pdfCachePath, args.abortSignal);
     if (extracted) {
       await ensureDir(path.dirname(textCachePath));
       await fs.writeFile(textCachePath, extracted, "utf8");
+      args.onProgress?.("PDF text extraction completed.");
       return {
         sourceType: "full_text",
         text: extracted,
@@ -81,6 +87,7 @@ export async function resolvePaperTextSource(args: {
         textCachePath
       };
     }
+    args.onProgress?.("PDF extraction produced no usable text. Falling back to abstract.");
     return {
       sourceType: "abstract",
       text: fallback,
@@ -91,6 +98,9 @@ export async function resolvePaperTextSource(args: {
       fallbackReason: "pdf_extract_failed"
     };
   } catch (error) {
+    args.onProgress?.(
+      `PDF resolution failed (${error instanceof Error ? error.message : String(error)}). Falling back to abstract.`
+    );
     return {
       sourceType: "abstract",
       text: fallback,