@@ -37,6 +37,9 @@ import {
3737 runAggregatedPaperSearch ,
3838 SearchProviderClient
3939} from "../collection/searchAggregation.js" ;
40+ import { loadGovernancePolicy } from "../../governance/policyLoader.js" ;
41+ import { ScreeningReport , screenEvidence } from "../../governance/evidenceIntakeFilter.js" ;
42+ import { appendGovernanceTrace } from "../../governance/governanceTrace.js" ;
4043
4144const ENRICHMENT_CONCURRENCY = 6 ;
4245const ENRICHMENT_PROGRESS_INTERVAL = 10 ;
@@ -168,9 +171,18 @@ interface CollectResultMeta {
168171 requestedQuery ?: string ;
169172 queryAttempts : CollectQueryAttemptMeta [ ] ;
170173 enrichment : CollectEnrichmentMeta ;
174+ governance_warnings ?: CollectGovernanceWarning [ ] ;
171175 timestamp : string ;
172176}
173177
178+ interface CollectGovernanceWarning {
179+ paper_id : string ;
180+ source : string ;
181+ triggeredRules : string [ ] ;
182+ excerpt : string | null ;
183+ recommendation : string ;
184+ }
185+
174186interface CollectQueryAttemptMeta {
175187 query : string ;
176188 reason : LiteratureQueryCandidate [ "reason" ] ;
@@ -323,6 +335,8 @@ export function createCollectPapersNode(deps: NodeExecutionDeps): GraphNodeHandl
323335 const persistedEnrichmentLogs = new Map < string , CollectEnrichmentLogEntry > (
324336 existingEnrichmentLogs . map ( ( entry ) => [ entry . paper_id , entry ] )
325337 ) ;
338+ const governancePolicy = loadGovernancePolicy ( ) ;
339+ const governanceWarnings : CollectGovernanceWarning [ ] = [ ] ;
326340 let diagnostics : SemanticScholarSearchDiagnostics = emptyCollectDiagnostics ( ) ;
327341 let aggregationReport : PaperSearchAggregationReport | undefined ;
328342 const queryAttempts : CollectQueryAttemptMeta [ ] = [ ] ;
@@ -356,7 +370,8 @@ export function createCollectPapersNode(deps: NodeExecutionDeps): GraphNodeHandl
356370 processedCount : 0 ,
357371 attemptedCount : 0 ,
358372 updatedCount : 0
359- }
373+ } ,
374+ governanceWarnings
360375 } ) ,
361376 diagnostics
362377 } ) ;
@@ -406,6 +421,49 @@ export function createCollectPapersNode(deps: NodeExecutionDeps): GraphNodeHandl
406421
407422 let changed = false ;
408423 for ( const record of aggregated . records ) {
424+ const screening = screenCollectedPaper ( record , governancePolicy ) ;
425+ if ( screening . result === "blocked" ) {
426+ appendGovernanceTrace ( {
427+ timestamp : new Date ( ) . toISOString ( ) ,
428+ runId : run . id ,
429+ node : "collect_papers" ,
430+ inputSummary : screeningInputSummary ( record ) ,
431+ screeningResult : screening . result ,
432+ triggeredRules : screening . triggeredRules ,
433+ decision : "hard_stop" ,
434+ matchedSlotId : "evidence_intake" ,
435+ detail : screening . recommendation
436+ } ) ;
437+ deps . eventStream . emit ( {
438+ type : "OBS_RECEIVED" ,
439+ runId : run . id ,
440+ node : "collect_papers" ,
441+ payload : {
442+ text : `Governance blocked collected paper "${ record . paper . title } " and excluded it from the corpus.`
443+ }
444+ } ) ;
445+ continue ;
446+ }
447+ if ( screening . result === "suspicious_but_usable" ) {
448+ governanceWarnings . push ( {
449+ paper_id : record . paper . paperId ,
450+ source : resolveGovernanceSource ( record ) ,
451+ triggeredRules : screening . triggeredRules ,
452+ excerpt : screening . excerpt ,
453+ recommendation : screening . recommendation
454+ } ) ;
455+ appendGovernanceTrace ( {
456+ timestamp : new Date ( ) . toISOString ( ) ,
457+ runId : run . id ,
458+ node : "collect_papers" ,
459+ inputSummary : screeningInputSummary ( record ) ,
460+ screeningResult : screening . result ,
461+ triggeredRules : screening . triggeredRules ,
462+ decision : "allow_with_trace" ,
463+ matchedSlotId : "evidence_intake" ,
464+ detail : screening . recommendation
465+ } ) ;
466+ }
409467 fetchedPapers . set ( record . paper . paperId , record . paper ) ;
410468 const currentRow = storedRows . get ( record . paper . paperId ) ;
411469 if ( ! currentRow && additionalLimit !== undefined && newPaperIds . size >= additionalLimit ) {
@@ -461,6 +519,7 @@ export function createCollectPapersNode(deps: NodeExecutionDeps): GraphNodeHandl
461519 fallbackSources : Array . from ( fallbackSources ) ,
462520 requestedQuery : normalizedRequest . requestedQuery ,
463521 queryAttempts,
522+ governanceWarnings,
464523 enrichment : {
465524 blocking : false ,
466525 status : "not_needed" ,
@@ -631,6 +690,7 @@ export function createCollectPapersNode(deps: NodeExecutionDeps): GraphNodeHandl
631690 fallbackSources : Array . from ( fallbackSources ) ,
632691 requestedQuery : normalizedRequest . requestedQuery ,
633692 queryAttempts,
693+ governanceWarnings,
634694 enrichment :
635695 papersToEnrich . length > 0
636696 ? {
@@ -1469,6 +1529,7 @@ function buildCollectResultMeta(input: {
14691529 requestedQuery ?: string ;
14701530 queryAttempts : CollectQueryAttemptMeta [ ] ;
14711531 enrichment : CollectEnrichmentMeta ;
1532+ governanceWarnings ?: CollectGovernanceWarning [ ] ;
14721533} ) : CollectResultMeta {
14731534 return {
14741535 query : input . request . query ,
@@ -1502,10 +1563,44 @@ function buildCollectResultMeta(input: {
15021563 requestedQuery : input . requestedQuery ,
15031564 queryAttempts : input . queryAttempts ,
15041565 enrichment : input . enrichment ,
1566+ governance_warnings : input . governanceWarnings ?? [ ] ,
15051567 timestamp : new Date ( ) . toISOString ( )
15061568 } ;
15071569}
15081570
1571+ function screenCollectedPaper (
1572+ record : AggregatedSearchRecord ,
1573+ policy : ReturnType < typeof loadGovernancePolicy >
1574+ ) : ScreeningReport {
1575+ return screenEvidence (
1576+ {
1577+ text : `${ record . paper . title } \n${ record . paper . abstract ?? "" } ` . trim ( ) ,
1578+ source : resolveGovernanceSource ( record ) ,
1579+ context : "collect_papers"
1580+ } ,
1581+ policy
1582+ ) ;
1583+ }
1584+
1585+ function resolveGovernanceSource ( record : AggregatedSearchRecord ) : string {
1586+ return (
1587+ record . row . landing_url ||
1588+ record . row . url ||
1589+ record . row . pdf_url ||
1590+ record . paper . landingUrl ||
1591+ record . paper . url ||
1592+ record . paper . openAccessPdfUrl ||
1593+ `provider:${ record . paper . canonicalSource } `
1594+ ) ;
1595+ }
1596+
1597+ function screeningInputSummary ( record : AggregatedSearchRecord ) : string {
1598+ return `${ record . paper . title } ${ record . paper . abstract ?? "" } `
1599+ . replace ( / \s + / gu, " " )
1600+ . trim ( )
1601+ . slice ( 0 , 100 ) ;
1602+ }
1603+
15091604async function persistCollectSnapshot ( input : {
15101605 run : { id : string } ;
15111606 rows : StoredCorpusRow [ ] ;
0 commit comments