Skip to content
Draft
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
5945db4
H-6364: Add artifacts proxy rewrite for page images
lunelson Mar 23, 2026
2aac90f
H-6364: Add SSE proxy API route for ingest events
lunelson Mar 23, 2026
230fb07
H-6364: Align IngestRunView types with discovery contract
lunelson Mar 24, 2026
effc3af
H-6364: Continuous-scroll document viewer with scrollToPage
lunelson Mar 24, 2026
64dbc96
H-6364: Entity cards with assertion windows in results panel
lunelson Mar 24, 2026
112718f
H-6364: Two-panel inputs view with extraction mode toggle
lunelson Mar 24, 2026
9c5a327
H-6364: Code review fixes across ingest module
lunelson Mar 25, 2026
3172a57
H-6364: Fix temporal container healthcheck to not depend on namespace
lunelson Mar 25, 2026
715dd07
H-6364: Align proxy routes with ingest-runs API rename
lunelson Mar 25, 2026
db963c4
H-6364: Human-readable progress labels for SSE streaming
lunelson Mar 25, 2026
125e3f7
H-6364: Fix results panel β€” claims fallback, scroll containment, sele…
lunelson Mar 25, 2026
9185412
H-6364: Fix results layout β€” independent scroll containers
lunelson Mar 25, 2026
28850fa
H-6364: Move 'New Upload' button to fixed footer below scroll panels
lunelson Mar 25, 2026
f8de5d4
H-6364: Add evidence panel header with document stats
lunelson Mar 25, 2026
bf1e9ea
H-6364: Fix ingest review issues
lunelson Mar 25, 2026
8709ff3
H-6364: Tighten SSE proxy contract
lunelson Mar 25, 2026
e9aaaf6
feat: harden ingest recovery and claim grouping
lunelson Mar 27, 2026
535ae85
feat: persist ingest run id in url
lunelson Mar 27, 2026
e73de5e
feat: rehydrate ingest runs from url
lunelson Mar 27, 2026
84e99b1
feat: stabilize ingest progress and sidebar state
lunelson Mar 27, 2026
2539f74
feat: clean up stale ingest run urls
lunelson Mar 27, 2026
5b78edf
feat: stabilize ingest stream handling
lunelson Mar 27, 2026
9c3bd2f
feat: narrow ingest run state variants
lunelson Mar 27, 2026
899090d
feat: unify ingest page navigation policy
lunelson Mar 27, 2026
d8d3cbf
feat: add ingest page navigation regression tests
lunelson Mar 27, 2026
188fe8d
feat: read ingest streams without event whitelists
lunelson Mar 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions apps/hash-external-services/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -199,10 +199,9 @@ services:
[
"CMD",
"temporal",
"workflow",
"list",
"--namespace",
"HASH",
"operator",
"cluster",
"health",
"--address",
"temporal:7233",
]
Expand Down
15 changes: 12 additions & 3 deletions apps/hash-frontend/next.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -88,15 +88,24 @@ export default withSentryConfig(
// Ingest pipeline proxy β†’ Mastra API
{
source: "/api/ingest",
destination: `${mastraApiOrigin}/discovery-runs`,
destination: `${mastraApiOrigin}/ingest-runs`,
},
{
source: "/api/ingest/:path*",
destination: `${mastraApiOrigin}/discovery-runs/:path*`,
destination: `${mastraApiOrigin}/ingest-runs/:path*`,
},
{
source: "/api/ingest-fixtures/:path*",
destination: `${mastraApiOrigin}/discovery-fixtures/:path*`,
destination: `${mastraApiOrigin}/ingest-fixtures/:path*`,
},
{
source: "/api/ingest-artifacts/:path*",
destination: `${mastraApiOrigin}/artifacts/:path*`,
},
// Page images are referenced as /artifacts/... in discovery view data
{
source: "/artifacts/:path*",
destination: `${mastraApiOrigin}/artifacts/:path*`,
},
{
source: "/pages",
Expand Down
120 changes: 120 additions & 0 deletions apps/hash-frontend/src/pages/api/ingest/[runId]/events.api.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
/**
* SSE proxy for ingest run events.
*
* Next.js rewrites buffer responses, breaking SSE streaming. This API route
* manually proxies the EventSource connection to the Mastra API with proper
* streaming headers.
*/
import type { NextApiRequest, NextApiResponse } from "next";

const MASTRA_API_ORIGIN =
process.env.MASTRA_API_ORIGIN ?? "http://localhost:4111";

const getMastraApiOrigin = (): URL | null => {
try {
const url = new URL(MASTRA_API_ORIGIN);
if (url.protocol !== "http:" && url.protocol !== "https:") {
return null;
}
return url;
} catch {
return null;
}
};

export default async function handler(
Comment thread
lunelson marked this conversation as resolved.
req: NextApiRequest,
res: NextApiResponse,
) {
const { runId } = req.query;
const upstreamOrigin = getMastraApiOrigin();

if (!upstreamOrigin) {
res.status(500).json({ error: "Invalid MASTRA_API_ORIGIN" });
return;
}

if (typeof runId !== "string" || runId.trim().length === 0) {
res.status(400).json({ error: "Missing runId" });
return;
}

const upstreamUrl = new URL(
`/ingest-runs/${encodeURIComponent(runId)}/events`,
upstreamOrigin,
);

const headers: Record<string, string> = {
Accept: "text/event-stream",
};

const lastEventId = req.headers["last-event-id"];
if (typeof lastEventId === "string") {
headers["Last-Event-ID"] = lastEventId;
}

const after = req.query.after;
if (typeof after === "string") {
upstreamUrl.searchParams.set("after", after);
}

const abortController = new AbortController();

try {
const upstream = await fetch(upstreamUrl, {
headers,
signal: abortController.signal,
});

if (!upstream.ok || !upstream.body) {
res.status(upstream.status).json({ error: "Upstream error" });
return;
}

res.writeHead(200, {
"Content-Type": "text/event-stream",
"Cache-Control": "no-cache, no-transform",
Connection: "keep-alive",
"X-Accel-Buffering": "no",
});

const reader = upstream.body.getReader();
const decoder = new TextDecoder();

const pump = async () => {
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- loop until stream ends
while (true) {
const { done, value } = await reader.read();
if (done) {
break;
}
const chunk = decoder.decode(value, { stream: true });
res.write(chunk);
Comment thread Fixed
Comment thread Fixed
}
res.end();
};

req.on("close", () => {
Comment thread
lunelson marked this conversation as resolved.
Outdated
abortController.abort();
reader.cancel().catch(() => {});
});

await pump();
} catch {
if (abortController.signal.aborted) {
if (!res.writableEnded) {
res.end();
}
return;
}

if (!res.headersSent) {
res.status(502).json({ error: "Failed to connect to upstream" });
return;
}

if (!res.writableEnded) {
res.end();
}
}
Comment thread
cursor[bot] marked this conversation as resolved.
}
69 changes: 64 additions & 5 deletions apps/hash-frontend/src/pages/ingest.page.tsx
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
import { InfinityLightIcon } from "@hashintel/design-system";
import { Box, Container } from "@mui/material";
import {
Box,
Container,
FormControlLabel,
Radio,
RadioGroup,
Tooltip,
Typography,
} from "@mui/material";
import { useRouter } from "next/router";
import { useEffect } from "react";

Expand Down Expand Up @@ -46,13 +54,64 @@ const IngestPage: NextPageWithLayout = () => {
<Box
sx={{
display: "flex",
alignItems: "center",
justifyContent: "center",
minHeight: 400,
gap: 4,
py: 4,
minHeight: 400,
}}
>
<UploadPanel state={state} onUpload={upload} onReset={reset} />
{/* Left panel: upload */}
<Box sx={{ flex: 1, minWidth: 0 }}>
<UploadPanel state={state} onUpload={upload} onReset={reset} />
</Box>

{/* Right panel: extraction mode */}
<Box
sx={{
width: 280,
flexShrink: 0,
p: 3,
border: ({ palette }) => `1px solid ${palette.gray[20]}`,
borderRadius: 2,
alignSelf: "flex-start",
}}
>
<Typography
variant="smallTextLabels"
sx={{ fontWeight: 600, mb: 2 }}
>
Extraction Mode
</Typography>
<RadioGroup defaultValue="open">
<FormControlLabel
value="open"
control={<Radio size="small" />}
label={
<Typography variant="smallTextLabels">
Open Extraction
</Typography>
}
/>
<Tooltip title="Coming soon" placement="right">
<FormControlLabel
value="targeted"
control={<Radio size="small" disabled />}
label={
<Typography
variant="smallTextLabels"
sx={{ color: "gray.50" }}
>
Targeted Extraction
</Typography>
}
/>
</Tooltip>
</RadioGroup>
<Typography variant="microText" sx={{ color: "gray.50", mt: 2 }}>
Open extraction discovers entities and claims without type
constraints. Targeted extraction lets you specify ontology types
to extract.
</Typography>
</Box>
</Box>
</Container>
</>
Expand Down
67 changes: 60 additions & 7 deletions apps/hash-frontend/src/pages/ingest.page/evidence-resolver.ts
Original file line number Diff line number Diff line change
@@ -1,20 +1,37 @@
/**
* Evidence resolver: selection β†’ highlighted block IDs + target page.
*
* Pure function. No I/O, no React.
* Pure functions. No I/O, no React.
*/
import type { Block, ExtractedClaim, RosterEntry } from "./types";
import type {
AssertionWindow,
Block,
ExtractedClaim,
MentionContextPlan,
} from "./types";

// ---------------------------------------------------------------------------
// Selection types
// ---------------------------------------------------------------------------

export type Selection =
| { kind: "roster"; entry: RosterEntry }
| { kind: "claim"; claim: ExtractedClaim }
| { kind: "assertion"; window: AssertionWindow }
| null;

// ---------------------------------------------------------------------------
// Evidence resolution
// ---------------------------------------------------------------------------

export interface EvidenceResult {
blockIds: string[];
targetPage: number | null;
}

export function getAssertionWindowKey(win: AssertionWindow): string {
return `${win.blockId}:${win.windowStart}:${win.windowEnd}:${win.mentionStart}:${win.mentionEnd}`;
}

export function resolveEvidence(
selection: Selection,
blocks: Block[],
Expand All @@ -24,13 +41,13 @@ export function resolveEvidence(
}

const blockIds =
selection.kind === "roster"
? [...new Set(selection.entry.mentions.map((mention) => mention.blockId))]
: [
selection.kind === "claim"
? [
...new Set(
selection.claim.evidenceRefs.flatMap((ref) => ref.blockIds),
),
];
]
: [selection.window.blockId];

let targetPage: number | null = null;
for (const block of blocks) {
Expand All @@ -49,3 +66,39 @@ export function resolveEvidence(

return { blockIds, targetPage };
}

// ---------------------------------------------------------------------------
// Assertion window collection per entity
// ---------------------------------------------------------------------------

/**
* Pre-compute a map of rosterEntryId β†’ AssertionWindow[] for all entities.
*/
export function buildEntityAssertionMap(
mentionContexts: MentionContextPlan[],
): Map<string, AssertionWindow[]> {
const map = new Map<string, Map<string, AssertionWindow>>();

for (const context of mentionContexts) {
if (context.mode !== "assertion_windows") {
continue;
}
for (const win of context.assertionWindows) {
const windowKey = getAssertionWindowKey(win);
for (const participant of win.participants) {
const existing =
map.get(participant.rosterEntryId) ??
new Map<string, AssertionWindow>();
existing.set(windowKey, win);
map.set(participant.rosterEntryId, existing);
}
}
}

return new Map(
[...map.entries()].map(([rosterEntryId, windows]) => [
rosterEntryId,
[...windows.values()],
]),
);
}
Comment thread
cursor[bot] marked this conversation as resolved.
11 changes: 11 additions & 0 deletions apps/hash-frontend/src/pages/ingest.page/highlight-styles.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/** Shared highlight color tokens for selection states across the ingest UI. */
export const highlightColors = {
/** Border color for bbox overlays on page images. */
bboxBorder: "rgba(59, 130, 246, 0.7)",
/** Background fill for bbox overlays on page images. */
bboxFill: "rgba(59, 130, 246, 0.12)",
/** Background for selected list items (entity cards, assertion windows). */
selectedBg: "rgba(59, 130, 246, 0.08)",
/** Background for hovered list items. */
hoverBg: "rgba(59, 130, 246, 0.04)",
} as const;
Loading
Loading