Skip to content

Commit dfdc152

Browse files
committed
feat: strengthen governed review and analysis gates
1 parent 8ab9ab5 commit dfdc152

32 files changed

Lines changed: 1686 additions & 42 deletions

src/config.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -884,7 +884,10 @@ function normalizePrimaryLlmMode(
884884
}
885885

886886
function normalizeWorkflowApprovalMode(value: unknown): WorkflowApprovalMode {
887-
return value === "manual" ? "manual" : "minimal";
887+
if (value === "manual" || value === "hybrid") {
888+
return value;
889+
}
890+
return "minimal";
888891
}
889892

890893
function normalizeExecutionApprovalMode(value: unknown): ExecutionApprovalMode {

src/core/agents/agentOrchestrator.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { GraphNodeId, GRAPH_NODE_ORDER, RunGraphState, RunRecord } from "../../types.js";
1+
import { GraphNodeId, GRAPH_NODE_ORDER, RunGraphState, RunRecord, WorkflowApprovalMode } from "../../types.js";
22
import { CheckpointStore } from "../stateGraph/checkpointStore.js";
33
import { JumpMode } from "../stateGraph/types.js";
44
import { RunStore } from "../runs/runStore.js";
@@ -24,6 +24,10 @@ export class AgentOrchestrator {
2424
return [...GRAPH_NODE_ORDER];
2525
}
2626

27+
updateApprovalMode(mode: WorkflowApprovalMode): void {
28+
this.runtime.setApprovalMode(mode);
29+
}
30+
2731
async runAgent(runId: string, nodeId: GraphNodeId): Promise<AgentRunResponse> {
2832
return this.runAgentWithOptions(runId, nodeId);
2933
}
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
import fs from "node:fs";
2+
import path from "node:path";
3+
4+
import type { StoredCorpusRow } from "../collection/types.js";
5+
6+
export interface CitationReport {
7+
orphan_citations: string[];
8+
unchecked_sources: string[];
9+
status: "pass" | "fail";
10+
}
11+
12+
interface EvidenceLinksClaimLike {
13+
citation_paper_ids?: unknown;
14+
}
15+
16+
interface EvidenceLinksArtifactLike {
17+
claims?: unknown;
18+
}
19+
20+
const CITATION_REGEX = new RegExp(String.raw`\\cite[a-zA-Z*]*(?:\[[^\]]*\]){0,2}\{([^}]+)\}`, "gu");
21+
const BIB_ENTRY_REGEX = /@\w+\s*\{\s*([^,\s]+)\s*,/gu;
22+
23+
export function checkCitationConsistency(runDir: string): CitationReport {
24+
const paperDir = path.join(runDir, "paper");
25+
const mainTex = safeReadFile(path.join(paperDir, "main.tex"));
26+
const referencesBib = safeReadFile(path.join(paperDir, "references.bib"));
27+
const evidenceLinks = safeReadJson<EvidenceLinksArtifactLike>(path.join(paperDir, "evidence_links.json"));
28+
const corpusRows = parseCorpusRows(path.join(runDir, "corpus.jsonl"));
29+
30+
const citedKeys = extractCitationKeys(mainTex);
31+
const bibKeys = extractBibKeys(referencesBib);
32+
const orphanCitations = uniqueStrings(
33+
citedKeys.filter((key) => !bibKeys.has(key))
34+
);
35+
const uncheckedSources = resolveUncheckedSources(evidenceLinks, corpusRows);
36+
37+
return {
38+
orphan_citations: orphanCitations,
39+
unchecked_sources: uncheckedSources,
40+
status: orphanCitations.length > 0 ? "fail" : "pass"
41+
};
42+
}
43+
44+
function safeReadFile(filePath: string): string | null {
45+
try {
46+
return fs.readFileSync(filePath, "utf8");
47+
} catch {
48+
return null;
49+
}
50+
}
51+
52+
function safeReadJson<T>(filePath: string): T | null {
53+
try {
54+
return JSON.parse(fs.readFileSync(filePath, "utf8")) as T;
55+
} catch {
56+
return null;
57+
}
58+
}
59+
60+
function extractCitationKeys(mainTex: string | null): string[] {
61+
if (!mainTex) {
62+
return [];
63+
}
64+
const keys: string[] = [];
65+
for (const match of mainTex.matchAll(CITATION_REGEX)) {
66+
const rawKeys = match[1]?.split(",") ?? [];
67+
for (const key of rawKeys) {
68+
const trimmed = key.trim();
69+
if (trimmed) {
70+
keys.push(trimmed);
71+
}
72+
}
73+
}
74+
return uniqueStrings(keys);
75+
}
76+
77+
function extractBibKeys(referencesBib: string | null): Set<string> {
78+
const keys = new Set<string>();
79+
if (!referencesBib) {
80+
return keys;
81+
}
82+
for (const match of referencesBib.matchAll(BIB_ENTRY_REGEX)) {
83+
const key = match[1]?.trim();
84+
if (key) {
85+
keys.add(key);
86+
}
87+
}
88+
return keys;
89+
}
90+
91+
function parseCorpusRows(corpusPath: string): Map<string, StoredCorpusRow> {
92+
const rows = new Map<string, StoredCorpusRow>();
93+
const raw = safeReadFile(corpusPath);
94+
if (!raw) {
95+
return rows;
96+
}
97+
for (const line of raw.split(/\r?\n/u)) {
98+
const trimmed = line.trim();
99+
if (!trimmed) {
100+
continue;
101+
}
102+
try {
103+
const parsed = JSON.parse(trimmed) as StoredCorpusRow;
104+
if (parsed.paper_id) {
105+
rows.set(parsed.paper_id, parsed);
106+
}
107+
} catch {
108+
// Ignore malformed corpus rows and rely on the remaining parseable records.
109+
}
110+
}
111+
return rows;
112+
}
113+
114+
function resolveUncheckedSources(
115+
evidenceLinks: EvidenceLinksArtifactLike | null,
116+
corpusRows: Map<string, StoredCorpusRow>
117+
): string[] {
118+
const claims = Array.isArray(evidenceLinks?.claims)
119+
? evidenceLinks.claims as EvidenceLinksClaimLike[]
120+
: [];
121+
const citationPaperIds = uniqueStrings(
122+
claims.flatMap((claim) =>
123+
Array.isArray(claim.citation_paper_ids)
124+
? claim.citation_paper_ids.filter((value): value is string => typeof value === "string" && value.trim().length > 0)
125+
: []
126+
)
127+
);
128+
129+
return citationPaperIds.filter((paperId) => {
130+
const row = corpusRows.get(paperId);
131+
if (!row) {
132+
return true;
133+
}
134+
return !Boolean(row.doi || row.url || row.landing_url || row.pdf_url);
135+
});
136+
}
137+
138+
function uniqueStrings(values: string[]): string[] {
139+
return [...new Set(values)];
140+
}

src/core/analysis/paperMinimumGate.ts

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import type { ReviewArtifactPresence } from "../reviewSystem.js";
1717
import type { AnalysisReport } from "../resultAnalysis.js";
1818
import type { BriefEvidenceAssessment, BriefEvidenceCeiling } from "./briefEvidenceValidator.js";
1919
import { GATE_THRESHOLDS } from "./paperGateThresholds.js";
20+
import { hasAtLeastOneCompleteResultsTableRow } from "./resultsTableSchema.js";
2021

2122
// ---------------------------------------------------------------------------
2223
// Types
@@ -160,7 +161,18 @@ export function evaluateMinimumGate(input: MinimumGateInput): MinimumGateResult
160161
detail: artifactClaimEvidence.detail
161162
});
162163

163-
// 9. Not merely system/smoke validation
164+
// 9. Results table includes explicit baseline/comparator values
165+
const hasStructuredResultsTable = hasAtLeastOneCompleteResultsTableRow(input.report.results_table);
166+
checks.push({
167+
id: "results_table_schema",
168+
label: "Results table includes at least one baseline/comparator row",
169+
passed: hasStructuredResultsTable,
170+
detail: hasStructuredResultsTable
171+
? "result_analysis.results_table contains at least one complete baseline/comparator row."
172+
: "No result_analysis.results_table row has both baseline and comparator populated."
173+
});
174+
175+
// 10. Not merely system/smoke validation
164176
const hasHypotheses = input.presence.hypothesesPresent;
165177
const hasEnoughFindings = (input.report.primary_findings?.length ?? 0) >= GATE_THRESHOLDS.minPrimaryFindingCount;
166178
const isSubstantive = hasHypotheses && hasEnoughFindings && hasObjective;
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
export type ResultsTableDirection = "higher_better" | "lower_better";
2+
3+
export interface ResultsTableRow {
4+
metric: string;
5+
baseline: number | null;
6+
comparator: number | null;
7+
delta: number | null;
8+
direction: ResultsTableDirection;
9+
}
10+
11+
export type ResultsTableSchema = ResultsTableRow[];
12+
13+
export interface ResultsTableSchemaValidation {
14+
valid: boolean;
15+
issues: string[];
16+
rows: ResultsTableSchema;
17+
}
18+
19+
export function buildResultsTableSchema(
20+
metrics: string[],
21+
direction: ResultsTableDirection
22+
): ResultsTableSchema {
23+
return uniqueStrings(metrics)
24+
.map((metric) => metric.trim())
25+
.filter(Boolean)
26+
.map((metric) => ({
27+
metric,
28+
baseline: null,
29+
comparator: null,
30+
delta: null,
31+
direction
32+
}));
33+
}
34+
35+
export function validateResultsTableSchema(value: unknown): ResultsTableSchemaValidation {
36+
if (!Array.isArray(value)) {
37+
return {
38+
valid: false,
39+
issues: ["results_table must be an array."],
40+
rows: []
41+
};
42+
}
43+
44+
const rows: ResultsTableSchema = [];
45+
const issues: string[] = [];
46+
47+
for (const [index, candidate] of value.entries()) {
48+
if (!candidate || typeof candidate !== "object" || Array.isArray(candidate)) {
49+
issues.push(`results_table[${index}] must be an object.`);
50+
continue;
51+
}
52+
const row = candidate as Record<string, unknown>;
53+
const metric = typeof row.metric === "string" ? row.metric.trim() : "";
54+
const direction = row.direction;
55+
56+
if (!metric) {
57+
issues.push(`results_table[${index}] must include a non-empty metric.`);
58+
}
59+
if (direction !== "higher_better" && direction !== "lower_better") {
60+
issues.push(`results_table[${index}] must include direction higher_better or lower_better.`);
61+
}
62+
63+
const baseline = normalizeNullableNumber(row.baseline, `results_table[${index}].baseline`, issues);
64+
const comparator = normalizeNullableNumber(row.comparator, `results_table[${index}].comparator`, issues);
65+
const delta = normalizeNullableNumber(row.delta, `results_table[${index}].delta`, issues);
66+
67+
rows.push({
68+
metric,
69+
baseline,
70+
comparator,
71+
delta,
72+
direction: direction === "lower_better" ? "lower_better" : "higher_better"
73+
});
74+
}
75+
76+
return {
77+
valid: issues.length === 0,
78+
issues,
79+
rows
80+
};
81+
}
82+
83+
export function hasAtLeastOneCompleteResultsTableRow(rows: ResultsTableSchema | undefined): boolean {
84+
return (rows ?? []).some((row) => row.baseline !== null && row.comparator !== null);
85+
}
86+
87+
export function hasAnyIncompleteResultsTableRow(rows: ResultsTableSchema | undefined): boolean {
88+
return (rows ?? []).some((row) => row.baseline === null || row.comparator === null);
89+
}
90+
91+
function normalizeNullableNumber(
92+
value: unknown,
93+
label: string,
94+
issues: string[]
95+
): number | null {
96+
if (value === null || value === undefined) {
97+
return null;
98+
}
99+
if (typeof value === "number" && Number.isFinite(value)) {
100+
return value;
101+
}
102+
issues.push(`${label} must be a finite number or null.`);
103+
return null;
104+
}
105+
106+
function uniqueStrings(values: string[]): string[] {
107+
return [...new Set(values)];
108+
}

0 commit comments

Comments
 (0)