- Chart
+ Controls
+
+
+ Source-ordered controls. Backend generation, dispatch dtype, EP degree, routing, and
+ activation profile are independent benchmark dimensions.
@@ -792,82 +830,41 @@ export default function CollectiveXDisplay() {
testId="collectivex-percentile-toggle"
/>
-
-
+ {
- setXAxis(value as CollectiveXXAxis);
- track('collectivex_x_axis_changed', { axis: value });
+ setSuite(value);
+ track('collectivex_suite_changed', { suite: value });
}}
- >
-
-
-
-
- Source tokens / rank
- Global source tokens
-
-
+ ariaLabel="CollectiveX comparison suite"
+ testId="collectivex-suite-toggle"
+ className="flex-wrap"
+ />
-
+
{
- setYAxis(value as CollectiveXYAxis);
- track('collectivex_y_axis_changed', { axis: value });
+ setBackendFilter(value);
+ track('collectivex_backend_changed', { backend: value });
}}
>
-
+
- Latency
- Tokens / second
- Logical routed payload rate
+ {backendOptions.map((value) => (
+
+ {value === 'all' ? 'All' : value}
+
+ ))}
-
- {
- setXScaleType(value);
- track('collectivex_x_scale_changed', { scale: value });
- }}
- ariaLabel="CollectiveX x scale"
- testId="collectivex-x-scale-toggle"
- />
-
-
- {
- setYScaleType(value);
- track('collectivex_y_scale_changed', { scale: value });
- }}
- ariaLabel="CollectiveX y scale"
- testId="collectivex-y-scale-toggle"
- />
-
-
-
-
-
-
- Filters
-
-
- Precision is the dispatch dtype. Activation profile is an independent benchmark
- dimension.
-
-
-
-
+
+
@@ -897,31 +892,6 @@ export default function CollectiveXDisplay() {
className="flex-wrap"
/>
-
-
- {
- setActivation(value);
- track('collectivex_activation_changed', { activation: value });
- }}
- >
-
-
-
-
- {activationOptions.map((value) => (
-
- {value === 'all' ? 'All' : formatActivation(value)}
-
- ))}
-
-
-
-
-
+
+
+ {
+ setPublication(value);
+ track('collectivex_publication_changed', { publication: value });
+ }}
+ ariaLabel="CollectiveX publication status"
+ testId="collectivex-publication-toggle"
+ className="flex-wrap"
+ />
+
+
+
+ {
+ setActivation(value);
+ track('collectivex_activation_changed', { activation: value });
+ }}
+ >
+
+
+
+
+ {activationOptions.map((value) => (
+
+ {value === 'all' ? 'All' : formatActivation(value)}
+
+ ))}
+
+
+
+
+ {
+ setXAxis(value as CollectiveXXAxis);
+ track('collectivex_x_axis_changed', { axis: value });
+ }}
+ >
+
+
+
+
+ Source tokens / rank
+ Global source tokens
+
+
+
+
{
- setSuite(value);
- track('collectivex_suite_changed', { suite: value });
+ setXScaleType(value);
+ track('collectivex_x_scale_changed', { scale: value });
}}
- ariaLabel="CollectiveX comparison suite"
- testId="collectivex-suite-toggle"
- className="flex-wrap"
+ ariaLabel="CollectiveX x scale"
+ testId="collectivex-x-scale-toggle"
/>
-
+
+ {
+ setYAxis(value as CollectiveXYAxis);
+ track('collectivex_y_axis_changed', { axis: value });
+ }}
+ >
+
+
+
+
+ Latency
+ Tokens / second
+ Logical routed payload rate
+
+
+
+
{
- setPublication(value);
- track('collectivex_publication_changed', { publication: value });
+ setYScaleType(value);
+ track('collectivex_y_scale_changed', { scale: value });
}}
- ariaLabel="CollectiveX publication status"
- testId="collectivex-publication-toggle"
- className="flex-wrap"
+ ariaLabel="CollectiveX y scale"
+ testId="collectivex-y-scale-toggle"
/>
@@ -1005,6 +1043,7 @@ export default function CollectiveXDisplay() {
{OPERATION_LABELS[operation]} · {phase} · {percentile}
{modelShape === 'all' ? ' · all shapes' : ` · ${selectedModelLabel}`}
+ {backendFilter === 'all' ? '' : ` · ${backendFilter}`}
{precision === 'all' ? '' : ` · ${precision.toUpperCase()}`}
{epFilter === 'all' ? '' : ` · EP${epFilter}`}
{activation === 'all' ? '' : ` · ${formatActivation(activation)} activation`}
@@ -1125,8 +1164,8 @@ export default function CollectiveXDisplay() {
{overviewGroups.length === 0 ? (
- No latency panels match the current precision, activation, suite, routing, and
- publication filters.
+ No latency panels match the current backend, precision, activation, suite,
+ routing, and publication filters.
) : (
@@ -1179,8 +1218,8 @@ export default function CollectiveXDisplay() {
Scaling
Strong and weak scaling are distinct experiments with separately labeled fixed-work
- contracts. Precision and activation filters apply; a chart appears once a SKU has
- matched measurements at two EP degrees.
+ contracts. Backend, precision, and activation filters apply; a chart appears once a
+ SKU has matched measurements at two EP degrees.
@@ -1192,8 +1231,8 @@ export default function CollectiveXDisplay() {
Heatmaps
Dispatch p50 across EP, routing, and resource dimensions for the current phase,
- precision, activation, suite, and publication filters. The routing selector is
- intentionally not applied here.
+ backend, precision, activation, suite, and publication filters. The routing selector
+ is intentionally not applied here.
diff --git a/packages/app/src/components/collectivex/data.test.ts b/packages/app/src/components/collectivex/data.test.ts
index 3a5e60b1..ef95d335 100644
--- a/packages/app/src/components/collectivex/data.test.ts
+++ b/packages/app/src/components/collectivex/data.test.ts
@@ -6,6 +6,7 @@ import {
collectiveXHeatmapCells,
collectiveXPrefillFloor,
collectiveXScalingPoints,
+ collectiveXSeriesLabel,
comparisonDifferences,
distributionSensitivity,
metricValue,
@@ -188,6 +189,32 @@ describe('normalizeCollectiveXDocument', () => {
});
});
+ it('labels DeepEP v2 as a distinct backend generation', () => {
+ const baseShape = rawDocument().shape as Record;
+ const v1 = rawDocument({
+ backend: 'deepep',
+ backend_provenance: { deepep_version: '1.2.1' },
+ });
+ const v2 = rawDocument({
+ backend: 'deepep',
+ backend_provenance: { deepep_version: '2.0.0+af9a040' },
+ shape: {
+ ...baseShape,
+ kernel_gen: 'v2',
+ },
+ });
+ const series = normalized(v2);
+
+ expect(series.label).toContain('deepep v2');
+ expect(collectiveXSeriesLabel({ ...series, label: 'MI355X EP8 · deepep · bf16' })).toContain(
+ 'deepep v2',
+ );
+ expect(
+ chartPoints([series], 'dispatch', 'p99', 'tokens-per-rank', 'latency')[0]?.seriesLabel,
+ ).toContain('deepep v2');
+ expect(collectiveXConfigIdentity(v1)).not.toBe(collectiveXConfigIdentity(v2));
+ });
+
it('supports legacy flat rows without mislabeling the isolated sum as measured', () => {
const series = normalized({
schema_version: 1,
diff --git a/packages/app/src/components/collectivex/data.ts b/packages/app/src/components/collectivex/data.ts
index a484e59b..af0125a2 100644
--- a/packages/app/src/components/collectivex/data.ts
+++ b/packages/app/src/components/collectivex/data.ts
@@ -174,6 +174,27 @@ function backendVersion(raw: Record): string | null {
);
}
+function backendKernelGeneration(backend: string, version: string | null): string {
+ if (backend !== 'deepep') return 'n-a';
+ if (version && (/^2(?:\.|$)/u.test(version) || /\bv2\b/iu.test(version))) return 'v2';
+ return 'v1';
+}
+
+export function collectiveXBackendLabel(backend: string, version: string | null): string {
+ return backendKernelGeneration(backend, version) === 'v2' ? `${backend} v2` : backend;
+}
+
+export function collectiveXSeriesLabel(
+ series: Pick,
+): string {
+ const backendLabel = collectiveXBackendLabel(series.backend, series.backendVersion);
+ if (backendLabel === series.backend) return series.label;
+
+ const backendSegment = `· ${series.backend} ·`;
+ if (!series.label.includes(backendSegment)) return series.label;
+ return series.label.replace(backendSegment, `· ${backendLabel} ·`);
+}
+
function stableHash(value: string): string {
let hash = 2166136261;
for (const character of value) {
@@ -236,6 +257,10 @@ function rawConfig(raw: Record) {
const routingIdentity = isRecord(raw.routing_identity) ? raw.routing_identity : {};
const eplb = isRecord(raw.eplb) ? raw.eplb : {};
const placement = isRecord(raw.placement) ? raw.placement : {};
+ const backend = stringValue(raw.backend, 'unknown');
+ const version = backendVersion(raw);
+ const kernelGeneration =
+ stringValue(shape.kernel_gen) || backendKernelGeneration(backend, version);
const phase: CollectiveXPhase | null =
raw.phase === 'decode' || raw.phase === 'prefill' ? raw.phase : null;
const routing = stringValue(shape.routing, 'unknown');
@@ -252,7 +277,9 @@ function rawConfig(raw: Record) {
return {
runner,
sku: skuFromRunner(runner),
- backend: stringValue(raw.backend, 'unknown'),
+ backend,
+ backendVersion: version,
+ backendKernelGeneration: kernelGeneration,
phase,
mode: stringValue(raw.mode, 'normal'),
resourceMode: stringValue(raw.resource_mode) || 'tuned',
@@ -307,6 +334,7 @@ export function collectiveXConfigIdentity(raw: Record): string
return [
config.sku,
config.backend,
+ config.backendKernelGeneration,
config.hidden ?? '',
config.topk ?? '',
config.experts ?? '',
@@ -331,6 +359,7 @@ function colorKey(config: ReturnType): string {
[
config.sku,
config.backend,
+ config.backendKernelGeneration,
config.dispatchDtype,
config.mode,
config.resourceMode,
@@ -349,9 +378,10 @@ function buildLabel(config: ReturnType): string {
config.measurementContract === 'cached-layout-comm-only-v1' ? '[cl]' : '',
].filter(Boolean);
const routing = config.routingLabel === 'uniform' ? '' : ` · ${config.routingLabel}`;
- return `${config.sku.toUpperCase()} EP${config.epSize ?? '?'} · ${config.backend} · ${
- config.dispatchDtype
- }${suffixes.length > 0 ? ` ${suffixes.join(' ')}` : ''}${routing}`;
+ return `${config.sku.toUpperCase()} EP${config.epSize ?? '?'} · ${collectiveXBackendLabel(
+ config.backend,
+ config.backendVersion,
+ )} · ${config.dispatchDtype}${suffixes.length > 0 ? ` ${suffixes.join(' ')}` : ''}${routing}`;
}
export function normalizeCollectiveXDocument(
@@ -432,7 +462,7 @@ export function normalizeCollectiveXDocument(
workloadSource: config.workloadSource,
eplbImbalanceBefore: config.eplbImbalanceBefore,
eplbImbalanceAfter: config.eplbImbalanceAfter,
- backendVersion: backendVersion(raw),
+ backendVersion: config.backendVersion,
imageDigest: config.imageDigest,
repository: config.repository,
run: runSource(raw, generatedAt, context),
@@ -1137,7 +1167,7 @@ export function chartPoints(
.filter((row) => item.phase !== 'prefill' || row.tokensPerRank >= prefillFloor)
.map((row) => ({
seriesId: item.id,
- seriesLabel: item.label,
+ seriesLabel: collectiveXSeriesLabel(item),
colorKey: item.colorKey,
x: xAxis === 'tokens-per-rank' ? row.tokensPerRank : row.globalTokens,
y: metricValue(row, operation, percentileKey, yAxis),
@@ -1157,6 +1187,9 @@ export function comparisonDifferences(series: CollectiveXSeries[]): string[] {
new Set(series.map(getValue)).size > 1;
if (different((item) => item.topologyClass)) warnings.push('topology');
+ if (different((item) => collectiveXBackendLabel(item.backend, item.backendVersion))) {
+ warnings.push('backend generation');
+ }
if (different((item) => item.epSize)) warnings.push('EP degree');
if (different((item) => item.shape.dispatchDtype)) warnings.push('dispatch dtype');
if (different((item) => item.mode)) warnings.push('kernel mode');
From 0a91b6d8581250da17d571de8f738cabd09f1074 Mon Sep 17 00:00:00 2001
From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com>
Date: Tue, 30 Jun 2026 09:24:10 +0800
Subject: [PATCH 08/23] fix: ingest CollectiveX aggregate runs
---
packages/app/public/data/collectivex.json | 137556 +++------------
.../collectivex/CollectiveXDisplay.tsx | 4 +-
.../src/components/collectivex/data.test.ts | 1 +
.../app/src/components/collectivex/data.ts | 28 +-
.../app/src/components/collectivex/types.ts | 1 +
.../app/src/lib/collectivex-snapshot.test.ts | 53 +-
packages/app/src/lib/collectivex-snapshot.ts | 57 +-
7 files changed, 24205 insertions(+), 113495 deletions(-)
diff --git a/packages/app/public/data/collectivex.json b/packages/app/public/data/collectivex.json
index 2b44f424..20838d6e 100644
--- a/packages/app/public/data/collectivex.json
+++ b/packages/app/public/data/collectivex.json
@@ -2,28 +2,28 @@
"snapshotVersion": 3,
"series": [
{
- "id": "cx-0eafa1d5",
- "identity": "b300|deepep|4096|8|128|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452",
- "colorKey": "b300_c9569580",
- "comparisonKey": "62e1e2299cdc509d",
+ "id": "cx-5d8b357a",
+ "identity": "gb200|deepep|v1|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ebe68878aa18bb0",
+ "colorKey": "gb200_7c2da03d",
+ "comparisonKey": "fa3808de096d4a7a",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T11:14:16.179311+00:00",
+ "generatedAt": "2026-06-29T14:00:04.159261+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_14",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_1",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "runtime-visible-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16",
+ "label": "GB200 EP8 · deepep · bf16",
"model": "Qwen3.5",
"shape": {
"hidden": 4096,
@@ -35,14 +35,15 @@
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -50,59 +51,59 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "75530960a30b452",
- "workloadId": "set:8:d1b92539bddfb570",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "ebe68878aa18bb0",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28287508460",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287508460",
- "createdAt": "2026-06-27T11:14:16.179311+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 1,
"globalTokens": 8,
"dispatch": {
- "p50": 56.992001831531525,
- "p90": 59.328000992536545,
- "p95": 62.55999952554703,
- "p99": 80.38400113582611
+ "p50": 88.70399743318558,
+ "p90": 319.4560110569,
+ "p95": 368.3199882507324,
+ "p99": 390.56000113487244
},
"combine": {
- "p50": 55.00800162553787,
- "p90": 57.0559985935688,
- "p95": 64.41599875688553,
- "p99": 65.92000275850296
+ "p50": 70.8480030298233,
+ "p90": 330.49601316452026,
+ "p95": 350.3040075302124,
+ "p99": 363.77599835395813
},
"roundtrip": {
- "p50": 94.81599926948547,
- "p90": 97.63199836015701,
- "p95": 99.04000163078308,
- "p99": 108.0000028014183
+ "p50": 136.1279934644699,
+ "p90": 390.3680145740509,
+ "p95": 427.16801166534424,
+ "p99": 443.1680142879486
},
"isolatedSum": {
- "p50": 112.0000034570694,
- "p90": 116.38399958610535,
- "p95": 126.97599828243256,
- "p99": 146.30400389432907
+ "p50": 159.55200046300888,
+ "p90": 649.9520242214203,
+ "p95": 718.6239957809448,
+ "p99": 754.3359994888306
},
"roundtripMeasured": true,
"dispatchLogicalBytes": 344064,
"combineLogicalBytes": 344064,
"fanoutMean": 5.25,
"recvTokensMax": 6,
- "stragglerRank": 7,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -111,35 +112,35 @@
"tokensPerRank": 2,
"globalTokens": 16,
"dispatch": {
- "p50": 56.89600110054016,
- "p90": 59.039998799562454,
- "p95": 61.15199998021126,
- "p99": 82.04799890518188
+ "p50": 88.70399743318558,
+ "p90": 328.8320004940033,
+ "p95": 365.59998989105225,
+ "p99": 387.2320055961609
},
"combine": {
- "p50": 55.67999929189682,
- "p90": 58.400001376867294,
- "p95": 64.67200070619583,
- "p99": 76.67200267314911
+ "p50": 71.42399996519089,
+ "p90": 338.6879861354828,
+ "p95": 352.7039885520935,
+ "p99": 362.91199922561646
},
"roundtrip": {
- "p50": 95.16800194978714,
- "p90": 98.11200201511383,
- "p95": 100.67199915647507,
- "p99": 112.03200370073318
+ "p50": 137.66400516033173,
+ "p90": 396.09599113464355,
+ "p95": 427.45599150657654,
+ "p99": 439.8399889469147
},
"isolatedSum": {
- "p50": 112.57600039243698,
- "p90": 117.44000017642975,
- "p95": 125.82400068640709,
- "p99": 158.720001578331
+ "p50": 160.12799739837646,
+ "p90": 667.5199866294861,
+ "p95": 718.3039784431458,
+ "p99": 750.1440048217773
},
"roundtripMeasured": true,
"dispatchLogicalBytes": 704512,
"combineLogicalBytes": 704512,
"fanoutMean": 5.375,
"recvTokensMax": 12,
- "stragglerRank": 7,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -148,35 +149,35 @@
"tokensPerRank": 4,
"globalTokens": 32,
"dispatch": {
- "p50": 57.21599981188774,
- "p90": 59.74400043487549,
- "p95": 61.664000153541565,
- "p99": 77.18399912118912
+ "p50": 90.36800265312195,
+ "p90": 332.0640027523041,
+ "p95": 372.1280097961426,
+ "p99": 386.49600744247437
},
"combine": {
- "p50": 56.063998490571976,
- "p90": 58.14399942755699,
- "p95": 64.92800265550613,
- "p99": 78.68800312280655
+ "p50": 72.57600128650665,
+ "p90": 318.91199946403503,
+ "p95": 351.3279855251312,
+ "p99": 366.62399768829346
},
"roundtrip": {
- "p50": 95.74399888515472,
- "p90": 98.78399968147278,
- "p95": 103.26399654150009,
- "p99": 113.0559965968132
+ "p50": 138.5599970817566,
+ "p90": 397.11999893188477,
+ "p95": 425.9839951992035,
+ "p99": 444.9920058250427
},
"isolatedSum": {
- "p50": 113.27999830245972,
- "p90": 117.88799986243248,
- "p95": 126.5920028090477,
- "p99": 155.87200224399567
+ "p50": 162.9440039396286,
+ "p90": 650.9760022163391,
+ "p95": 723.4559953212738,
+ "p99": 753.1200051307678
},
"roundtripMeasured": true,
"dispatchLogicalBytes": 1384448,
"combineLogicalBytes": 1384448,
"fanoutMean": 5.28125,
"recvTokensMax": 26,
- "stragglerRank": 7,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -185,35 +186,35 @@
"tokensPerRank": 8,
"globalTokens": 64,
"dispatch": {
- "p50": 58.079998940229416,
- "p90": 61.08799949288368,
- "p95": 62.65600025653839,
- "p99": 71.68000191450119
+ "p50": 89.9519994854927,
+ "p90": 120.86399644613266,
+ "p95": 360.4480028152466,
+ "p99": 390.8480107784271
},
"combine": {
- "p50": 64.44799900054932,
- "p90": 66.23999774456024,
- "p95": 66.59200042486191,
- "p99": 69.023996591568
+ "p50": 73.69600236415863,
+ "p90": 326.1120021343231,
+ "p95": 354.0799915790558,
+ "p99": 365.1520013809204
},
"roundtrip": {
- "p50": 108.8000014424324,
- "p90": 113.95200341939926,
- "p95": 114.84800279140472,
- "p99": 122.72000312805176
+ "p50": 140.86399972438812,
+ "p90": 409.59998965263367,
+ "p95": 438.04800510406494,
+ "p99": 451.9999921321869
},
"isolatedSum": {
- "p50": 122.52799794077873,
- "p90": 127.32799723744392,
- "p95": 129.2480006814003,
- "p99": 140.70399850606918
+ "p50": 163.64800184965134,
+ "p90": 446.9759985804558,
+ "p95": 714.5279943943024,
+ "p99": 756.0000121593475
},
"roundtripMeasured": true,
"dispatchLogicalBytes": 2744320,
"combineLogicalBytes": 2744320,
"fanoutMean": 5.234375,
"recvTokensMax": 49,
- "stragglerRank": 4,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -222,35 +223,35 @@
"tokensPerRank": 16,
"globalTokens": 128,
"dispatch": {
- "p50": 58.687999844551086,
- "p90": 61.055999249219894,
- "p95": 63.00800293684006,
- "p99": 71.96799665689468
+ "p50": 91.80799871683121,
+ "p90": 332.70400762557983,
+ "p95": 379.35999035835266,
+ "p99": 392.0319974422455
},
"combine": {
- "p50": 57.82400071620941,
- "p90": 66.3679987192154,
- "p95": 66.81600213050842,
- "p99": 77.98399776220322
+ "p50": 75.71200281381607,
+ "p90": 338.9120101928711,
+ "p95": 355.3279936313629,
+ "p99": 367.0080006122589
},
"roundtrip": {
- "p50": 111.39199882745743,
- "p90": 122.04799801111221,
- "p95": 126.5919953584671,
- "p99": 132.86399841308594
+ "p50": 142.30400323867798,
+ "p90": 393.887996673584,
+ "p95": 428.8960099220276,
+ "p99": 445.95199823379517
},
"isolatedSum": {
- "p50": 116.5120005607605,
- "p90": 127.42399796843529,
- "p95": 129.82400506734848,
- "p99": 149.9519944190979
+ "p50": 167.52000153064728,
+ "p90": 671.6160178184509,
+ "p95": 734.6879839897156,
+ "p99": 759.0399980545044
},
"roundtripMeasured": true,
"dispatchLogicalBytes": 5464064,
"combineLogicalBytes": 5464064,
"fanoutMean": 5.2109375,
"recvTokensMax": 94,
- "stragglerRank": 4,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -259,35 +260,35 @@
"tokensPerRank": 32,
"globalTokens": 256,
"dispatch": {
- "p50": 70.8480030298233,
- "p90": 74.68800246715546,
- "p95": 75.71200281381607,
- "p99": 81.31200075149536
+ "p50": 98.04800152778625,
+ "p90": 367.74399876594543,
+ "p95": 386.30399107933044,
+ "p99": 400.38400888442993
},
"combine": {
- "p50": 66.30399823188782,
- "p90": 67.07199662923813,
- "p95": 67.71200150251389,
- "p99": 77.15199887752533
+ "p50": 80.92799782752991,
+ "p90": 349.727988243103,
+ "p95": 362.8480136394501,
+ "p99": 374.752014875412
},
"roundtrip": {
- "p50": 108.99200290441513,
- "p90": 114.07999694347382,
- "p95": 116.7680025100708,
- "p99": 132.47999548912048
+ "p50": 146.43199741840363,
+ "p90": 423.2639968395233,
+ "p95": 440.12799859046936,
+ "p99": 459.6799910068512
},
"isolatedSum": {
- "p50": 137.15200126171112,
- "p90": 141.75999909639359,
- "p95": 143.42400431632996,
- "p99": 158.4639996290207
+ "p50": 178.97599935531616,
+ "p90": 717.4719870090485,
+ "p95": 749.1520047187805,
+ "p99": 775.1360237598419
},
"roundtripMeasured": true,
"dispatchLogicalBytes": 11124736,
"combineLogicalBytes": 11124736,
"fanoutMean": 5.3046875,
"recvTokensMax": 186,
- "stragglerRank": 7,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -296,35 +297,35 @@
"tokensPerRank": 64,
"globalTokens": 512,
"dispatch": {
- "p50": 70.72000205516815,
- "p90": 72.95999675989151,
- "p95": 74.8480036854744,
- "p99": 81.02399855852127
+ "p50": 101.47199779748917,
+ "p90": 357.12000727653503,
+ "p95": 386.24000549316406,
+ "p99": 410.3359878063202
},
"combine": {
- "p50": 78.75200361013412,
- "p90": 79.55200225114822,
- "p95": 80.19199967384338,
- "p99": 95.96800059080124
+ "p50": 91.39200299978256,
+ "p90": 105.8880016207695,
+ "p95": 340.4479920864105,
+ "p99": 377.3120045661926
},
"roundtrip": {
- "p50": 131.77600502967834,
- "p90": 136.63999736309052,
- "p95": 138.91200721263885,
- "p99": 158.04800391197205
+ "p50": 165.56799411773682,
+ "p90": 411.9360148906708,
+ "p95": 429.4399917125702,
+ "p99": 461.60000562667847
},
"isolatedSum": {
- "p50": 149.47200566530228,
- "p90": 152.51199901103973,
- "p95": 155.04000335931778,
- "p99": 176.9919991493225
+ "p50": 192.86400079727173,
+ "p90": 463.00800889730453,
+ "p95": 726.6879975795746,
+ "p99": 787.6479923725128
},
"roundtripMeasured": true,
"dispatchLogicalBytes": 22192128,
"combineLogicalBytes": 22192128,
"fanoutMean": 5.291015625,
"recvTokensMax": 358,
- "stragglerRank": 7,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -333,35 +334,35 @@
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 82.5280025601387,
- "p90": 85.21600067615509,
- "p95": 88.16000074148178,
- "p99": 100.80000013113022
+ "p50": 112.2559979557991,
+ "p90": 357.2160005569458,
+ "p95": 379.61599230766296,
+ "p99": 414.36800360679626
},
"combine": {
- "p50": 91.77599847316742,
- "p90": 94.59199756383896,
- "p95": 101.72799974679947,
- "p99": 104.92800176143646
+ "p50": 107.90400207042694,
+ "p90": 344.4159924983978,
+ "p95": 355.103999376297,
+ "p99": 397.5360095500946
},
"roundtrip": {
- "p50": 157.53600001335144,
- "p90": 165.24800658226013,
- "p95": 166.97600483894348,
- "p99": 184.76800620555878
+ "p50": 191.8720006942749,
+ "p90": 220.89600563049316,
+ "p95": 477.08800435066223,
+ "p99": 499.55201148986816
},
"isolatedSum": {
- "p50": 174.30400103330612,
- "p90": 179.80799823999405,
- "p95": 189.88800048828125,
- "p99": 205.72800189256668
+ "p50": 220.16000002622604,
+ "p90": 701.6319930553436,
+ "p95": 734.71999168396,
+ "p99": 811.9040131568909
},
"roundtripMeasured": true,
"dispatchLogicalBytes": 44564480,
"combineLogicalBytes": 44564480,
"fanoutMean": 5.3125,
"recvTokensMax": 699,
- "stragglerRank": 7,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -369,16 +370,16 @@
]
},
{
- "id": "cx-73ede381",
- "identity": "b300|deepep|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452",
- "colorKey": "b300_307ed708",
- "comparisonKey": "29583b2aa22167e0",
+ "id": "cx-b83f938d",
+ "identity": "gb200|deepep|v1|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||13e2b193b87a112",
+ "colorKey": "gb200_7c2da03d",
+ "comparisonKey": "ad961b604b617551",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:51:53.146142+00:00",
+ "generatedAt": "2026-06-29T14:01:52.765724+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_04",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_1",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
"mode": "normal",
@@ -386,30 +387,31 @@
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "runtime-visible-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16",
- "model": "Qwen3.5",
+ "label": "GB200 EP8 · deepep · bf16",
+ "model": "shape 5120/8/160",
"shape": {
- "hidden": 4096,
+ "hidden": 5120,
"topk": 8,
- "experts": 128,
+ "experts": 160,
"routing": "uniform",
"routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -417,59 +419,59 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "75530960a30b452",
- "workloadId": "set:8:d1b92539bddfb570",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "13e2b193b87a112",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285698979",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285698979",
- "createdAt": "2026-06-27T09:51:53.146142+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 1,
"globalTokens": 8,
"dispatch": {
- "p50": 55.776000022888184,
- "p90": 58.14399942755699,
- "p95": 59.93599817156792,
- "p99": 65.95200300216675
+ "p50": 97.50399738550186,
+ "p90": 341.0240113735199,
+ "p95": 371.2959885597229,
+ "p99": 406.17600083351135
},
"combine": {
- "p50": 54.71999943256378,
- "p90": 56.063998490571976,
- "p95": 57.151999324560165,
- "p99": 65.69600105285645
+ "p50": 74.91199672222137,
+ "p90": 330.9760093688965,
+ "p95": 353.7600040435791,
+ "p99": 368.4479892253876
},
"roundtrip": {
- "p50": 93.31200271844864,
- "p90": 95.96800059080124,
- "p95": 98.01600128412247,
- "p99": 104.86400127410889
+ "p50": 148.41599762439728,
+ "p90": 393.44000816345215,
+ "p95": 424.3839979171753,
+ "p99": 458.71999859809875
},
"isolatedSum": {
- "p50": 110.49599945545197,
- "p90": 114.20799791812897,
- "p95": 117.08799749612808,
- "p99": 131.6480040550232
+ "p50": 172.41599410772324,
+ "p90": 672.0000207424164,
+ "p95": 725.055992603302,
+ "p99": 774.6239900588989
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 344064,
- "combineLogicalBytes": 344064,
+ "dispatchLogicalBytes": 430080,
+ "combineLogicalBytes": 430080,
"fanoutMean": 5.25,
- "recvTokensMax": 6,
- "stragglerRank": 4,
+ "recvTokensMax": 8,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -478,35 +480,35 @@
"tokensPerRank": 2,
"globalTokens": 16,
"dispatch": {
- "p50": 56.09599873423576,
- "p90": 57.792000472545624,
- "p95": 59.55199897289276,
- "p99": 68.15999746322632
+ "p50": 96.44799679517746,
+ "p90": 332.5439989566803,
+ "p95": 372.79999256134033,
+ "p99": 399.07199144363403
},
"combine": {
- "p50": 55.07199838757515,
- "p90": 56.671999394893646,
- "p95": 57.28000029921532,
- "p99": 65.2799978852272
+ "p50": 75.29599964618683,
+ "p90": 321.3120102882385,
+ "p95": 352.3840010166168,
+ "p99": 365.2479946613312
},
"roundtrip": {
- "p50": 95.39200365543365,
- "p90": 101.79200023412704,
- "p95": 102.55999863147736,
- "p99": 108.0000028014183
+ "p50": 148.95999431610107,
+ "p90": 393.3440148830414,
+ "p95": 429.31199073791504,
+ "p99": 453.72799038887024
},
"isolatedSum": {
- "p50": 111.16799712181091,
- "p90": 114.46399986743927,
- "p95": 116.83199927210808,
- "p99": 133.43999534845352
+ "p50": 171.7439964413643,
+ "p90": 653.8560092449188,
+ "p95": 725.1839935779572,
+ "p99": 764.3199861049652
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 704512,
- "combineLogicalBytes": 704512,
+ "dispatchLogicalBytes": 880640,
+ "combineLogicalBytes": 880640,
"fanoutMean": 5.375,
- "recvTokensMax": 12,
- "stragglerRank": 4,
+ "recvTokensMax": 13,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -515,35 +517,35 @@
"tokensPerRank": 4,
"globalTokens": 32,
"dispatch": {
- "p50": 56.0000017285347,
- "p90": 57.88800120353699,
- "p95": 59.67999994754791,
- "p99": 68.7360018491745
+ "p50": 98.04800152778625,
+ "p90": 332.73598551750183,
+ "p95": 369.951993227005,
+ "p99": 394.9120044708252
},
"combine": {
- "p50": 56.12799897789955,
- "p90": 65.37599861621857,
- "p95": 65.72800129652023,
- "p99": 66.97600334882736
+ "p50": 80.89599758386612,
+ "p90": 328.7999927997589,
+ "p95": 354.11199927330017,
+ "p99": 368.51200461387634
},
"roundtrip": {
- "p50": 105.18400371074677,
- "p90": 111.10399663448334,
- "p95": 112.2559979557991,
- "p99": 115.10399729013443
+ "p50": 152.8320014476776,
+ "p90": 396.4479863643646,
+ "p95": 431.0719966888428,
+ "p99": 465.0239944458008
},
"isolatedSum": {
- "p50": 112.12800070643425,
- "p90": 123.26399981975555,
- "p95": 125.40800124406815,
- "p99": 135.71200519800186
+ "p50": 178.94399911165237,
+ "p90": 661.5359783172607,
+ "p95": 724.0639925003052,
+ "p99": 763.4240090847015
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1384448,
- "combineLogicalBytes": 1384448,
- "fanoutMean": 5.28125,
- "recvTokensMax": 26,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 1740800,
+ "combineLogicalBytes": 1740800,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 25,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -552,35 +554,35 @@
"tokensPerRank": 8,
"globalTokens": 64,
"dispatch": {
- "p50": 56.86400085687637,
- "p90": 58.88000130653381,
- "p95": 60.54399907588959,
- "p99": 67.6800012588501
+ "p50": 99.5199978351593,
+ "p90": 336.89600229263306,
+ "p95": 383.4879994392395,
+ "p99": 412.86399960517883
},
"combine": {
- "p50": 65.24799764156342,
- "p90": 66.46399945020676,
- "p95": 66.81600213050842,
- "p99": 70.62400132417679
+ "p50": 82.2720006108284,
+ "p90": 324.95999336242676,
+ "p95": 354.5919954776764,
+ "p99": 378.04800271987915
},
"roundtrip": {
- "p50": 105.79200088977814,
- "p90": 112.35199868679047,
- "p95": 112.83200234174728,
- "p99": 116.48000031709671
+ "p50": 154.7199934720993,
+ "p90": 400.89601278305054,
+ "p95": 427.19998955726624,
+ "p99": 459.52001214027405
},
"isolatedSum": {
- "p50": 122.11199849843979,
- "p90": 125.34400075674057,
- "p95": 127.36000120639801,
- "p99": 138.3040025830269
+ "p50": 181.7919984459877,
+ "p90": 661.8559956550598,
+ "p95": 738.0799949169159,
+ "p99": 790.912002325058
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2744320,
- "combineLogicalBytes": 2744320,
- "fanoutMean": 5.234375,
- "recvTokensMax": 49,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 3471360,
+ "combineLogicalBytes": 3471360,
+ "fanoutMean": 5.296875,
+ "recvTokensMax": 50,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -589,35 +591,35 @@
"tokensPerRank": 16,
"globalTokens": 128,
"dispatch": {
- "p50": 58.6559996008873,
- "p90": 60.5119988322258,
- "p95": 63.00800293684006,
- "p99": 79.0719985961914
+ "p50": 100.89600086212158,
+ "p90": 353.2480001449585,
+ "p95": 383.2640051841736,
+ "p99": 401.98400616645813
},
"combine": {
- "p50": 65.31199812889099,
- "p90": 66.43199920654297,
- "p95": 66.97600334882736,
- "p99": 69.40799951553345
+ "p50": 83.3280012011528,
+ "p90": 337.95198798179626,
+ "p95": 358.0160140991211,
+ "p99": 374.81600046157837
},
"roundtrip": {
- "p50": 105.85600137710571,
- "p90": 107.87200182676315,
- "p95": 109.66400057077408,
- "p99": 115.64800143241882
+ "p50": 154.36799824237823,
+ "p90": 388.0000114440918,
+ "p95": 423.74399304389954,
+ "p99": 458.49600434303284
},
"isolatedSum": {
- "p50": 123.96799772977829,
- "p90": 126.94399803876877,
- "p95": 129.98400628566742,
- "p99": 148.47999811172485
+ "p50": 184.22400206327438,
+ "p90": 691.1999881267548,
+ "p95": 741.2800192832947,
+ "p99": 776.8000066280365
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 5464064,
- "combineLogicalBytes": 5464064,
- "fanoutMean": 5.2109375,
- "recvTokensMax": 94,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 6912000,
+ "combineLogicalBytes": 6912000,
+ "fanoutMean": 5.2734375,
+ "recvTokensMax": 93,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -626,34 +628,34 @@
"tokensPerRank": 32,
"globalTokens": 256,
"dispatch": {
- "p50": 72.80000299215317,
- "p90": 75.83999633789062,
- "p95": 76.60800218582153,
- "p99": 84.63999629020691
+ "p50": 101.95200145244598,
+ "p90": 131.3599944114685,
+ "p95": 373.50401282310486,
+ "p99": 395.9679901599884
},
"combine": {
- "p50": 66.27199798822403,
- "p90": 67.03999638557434,
- "p95": 67.55200028419495,
- "p99": 69.47200000286102
+ "p50": 85.88799834251404,
+ "p90": 331.743985414505,
+ "p95": 361.88799142837524,
+ "p99": 376.70400738716125
},
"roundtrip": {
- "p50": 109.24799740314484,
- "p90": 115.32799899578094,
- "p95": 116.57600104808807,
- "p99": 131.32800161838531
+ "p50": 157.47199952602386,
+ "p90": 396.09599113464355,
+ "p95": 433.75998735427856,
+ "p99": 462.14398741722107
},
"isolatedSum": {
- "p50": 139.0720009803772,
- "p90": 142.87999272346497,
- "p95": 144.16000247001648,
- "p99": 154.11199629306793
+ "p50": 187.83999979496002,
+ "p90": 463.1039798259735,
+ "p95": 735.3920042514801,
+ "p99": 772.6719975471497
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 11124736,
- "combineLogicalBytes": 11124736,
- "fanoutMean": 5.3046875,
- "recvTokensMax": 186,
+ "dispatchLogicalBytes": 13977600,
+ "combineLogicalBytes": 13977600,
+ "fanoutMean": 5.33203125,
+ "recvTokensMax": 179,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
@@ -663,35 +665,35 @@
"tokensPerRank": 64,
"globalTokens": 512,
"dispatch": {
- "p50": 69.66400146484375,
- "p90": 71.9040036201477,
- "p95": 73.05599749088287,
- "p99": 79.64800298213959
+ "p50": 110.43199896812439,
+ "p90": 345.66399455070496,
+ "p95": 376.800000667572,
+ "p99": 412.447988986969
},
"combine": {
- "p50": 78.65600287914276,
- "p90": 79.68000322580338,
- "p95": 80.1599994301796,
- "p99": 89.59999680519104
+ "p50": 98.39999675750732,
+ "p90": 331.13598823547363,
+ "p95": 355.4239869117737,
+ "p99": 381.9519877433777
},
"roundtrip": {
- "p50": 130.8480054140091,
- "p90": 134.33599472045898,
- "p95": 137.92000710964203,
- "p99": 152.12799608707428
+ "p50": 177.66399681568146,
+ "p90": 206.2399983406067,
+ "p95": 431.7440092563629,
+ "p99": 459.55199003219604
},
"isolatedSum": {
- "p50": 148.3200043439865,
- "p90": 151.58400684595108,
- "p95": 153.21599692106247,
- "p99": 169.24799978733063
+ "p50": 208.8319957256317,
+ "p90": 676.7999827861786,
+ "p95": 732.2239875793457,
+ "p99": 794.3999767303467
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 22192128,
- "combineLogicalBytes": 22192128,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 358,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 27975680,
+ "combineLogicalBytes": 27975680,
+ "fanoutMean": 5.3359375,
+ "recvTokensMax": 355,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -700,35 +702,35 @@
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 82.30400085449219,
- "p90": 86.14400029182434,
- "p95": 90.7519981265068,
- "p99": 98.59199821949005
+ "p50": 124.28800016641617,
+ "p90": 366.6880130767822,
+ "p95": 396.09599113464355,
+ "p99": 425.53600668907166
},
"combine": {
- "p50": 91.90399944782257,
- "p90": 95.0080007314682,
- "p95": 101.6639992594719,
- "p99": 102.52799838781357
+ "p50": 118.01599711179733,
+ "p90": 124.38400089740753,
+ "p95": 131.96800649166107,
+ "p99": 388.9920115470886
},
"roundtrip": {
- "p50": 166.81599617004395,
- "p90": 173.88799786567688,
- "p95": 175.32800137996674,
- "p99": 189.4720047712326
+ "p50": 212.19199895858765,
+ "p90": 483.8719964027405,
+ "p95": 501.21599435806274,
+ "p99": 530.3040146827698
},
"isolatedSum": {
- "p50": 174.20800030231476,
- "p90": 181.15200102329254,
- "p95": 192.4159973859787,
- "p99": 201.11999660730362
+ "p50": 242.3039972782135,
+ "p90": 491.07201397418976,
+ "p95": 528.0639976263046,
+ "p99": 814.5280182361603
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 44564480,
- "combineLogicalBytes": 44564480,
- "fanoutMean": 5.3125,
+ "dispatchLogicalBytes": 55674880,
+ "combineLogicalBytes": 55674880,
+ "fanoutMean": 5.3095703125,
"recvTokensMax": 699,
- "stragglerRank": 4,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -736,16 +738,16 @@
]
},
{
- "id": "cx-b2b86614",
- "identity": "b300|deepep|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef",
- "colorKey": "b300_307ed708",
- "comparisonKey": "246ad32f5ce8e310",
+ "id": "cx-f3f399c1",
+ "identity": "gb200|deepep|v1|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510",
+ "colorKey": "gb200_7c2da03d",
+ "comparisonKey": "7c7859f7d3b18eaf",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:52:24.032758+00:00",
+ "generatedAt": "2026-06-29T14:03:42.050997+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_14",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_1",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
"mode": "normal",
@@ -753,30 +755,31 @@
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "runtime-visible-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16",
- "model": "shape 5120/8/160",
+ "label": "GB200 EP8 · deepep · bf16",
+ "model": "MiniMax-M3",
"shape": {
- "hidden": 5120,
+ "hidden": 6144,
"topk": 8,
- "experts": 160,
+ "experts": 256,
"routing": "uniform",
"routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -784,59 +787,59 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "be1b44a963bd4ef",
- "workloadId": "set:8:34e5874082f8ea8f",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "74444524b5db510",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285710659",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285710659",
- "createdAt": "2026-06-27T09:52:24.032758+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 1,
"globalTokens": 8,
"dispatch": {
- "p50": 56.03199824690819,
- "p90": 58.75200033187866,
- "p95": 61.69600039720535,
- "p99": 71.03999704122543
+ "p50": 96.8639999628067,
+ "p90": 317.1199858188629,
+ "p95": 366.4959967136383,
+ "p99": 397.8239893913269
},
"combine": {
- "p50": 55.424001067876816,
- "p90": 57.5999990105629,
- "p95": 64.7680014371872,
- "p99": 65.5359998345375
+ "p50": 80.57600259780884,
+ "p90": 331.167995929718,
+ "p95": 355.6160032749176,
+ "p99": 375.16799569129944
},
"roundtrip": {
- "p50": 94.59199756383896,
- "p90": 97.85600006580353,
- "p95": 101.85600072145462,
- "p99": 125.15200674533844
+ "p50": 152.16000378131866,
+ "p90": 408.25599431991577,
+ "p95": 434.112012386322,
+ "p99": 456.31998777389526
},
"isolatedSum": {
- "p50": 111.455999314785,
- "p90": 116.35199934244156,
- "p95": 126.46400183439255,
- "p99": 136.57599687576294
+ "p50": 177.44000256061554,
+ "p90": 648.2879817485809,
+ "p95": 722.1119999885559,
+ "p99": 772.9919850826263
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 430080,
- "combineLogicalBytes": 430080,
- "fanoutMean": 5.25,
- "recvTokensMax": 8,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 540672,
+ "combineLogicalBytes": 540672,
+ "fanoutMean": 5.5,
+ "recvTokensMax": 7,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -845,35 +848,35 @@
"tokensPerRank": 2,
"globalTokens": 16,
"dispatch": {
- "p50": 55.93600124120712,
- "p90": 57.88800120353699,
- "p95": 59.74400043487549,
- "p99": 67.26399809122086
+ "p50": 97.98400104045868,
+ "p90": 340.89601039886475,
+ "p95": 378.6880075931549,
+ "p99": 401.08799934387207
},
"combine": {
- "p50": 56.28800019621849,
- "p90": 65.63200056552887,
- "p95": 66.17599725723267,
- "p99": 76.60800218582153
+ "p50": 82.46400207281113,
+ "p90": 326.84800028800964,
+ "p95": 361.08800768852234,
+ "p99": 372.9279935359955
},
"roundtrip": {
- "p50": 104.09600287675858,
- "p90": 111.10399663448334,
- "p95": 112.12799698114395,
- "p99": 116.95999652147293
+ "p50": 152.8320014476776,
+ "p90": 410.3679955005646,
+ "p95": 445.279985666275,
+ "p99": 479.2320132255554
},
"isolatedSum": {
- "p50": 112.22400143742561,
- "p90": 123.52000176906586,
- "p95": 125.91999769210815,
- "p99": 143.8720002770424
+ "p50": 180.4480031132698,
+ "p90": 667.7440106868744,
+ "p95": 739.7760152816772,
+ "p99": 774.0159928798676
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 880640,
- "combineLogicalBytes": 880640,
+ "dispatchLogicalBytes": 1056768,
+ "combineLogicalBytes": 1056768,
"fanoutMean": 5.375,
"recvTokensMax": 13,
- "stragglerRank": 4,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -882,35 +885,35 @@
"tokensPerRank": 4,
"globalTokens": 32,
"dispatch": {
- "p50": 55.93600124120712,
- "p90": 57.920001447200775,
- "p95": 59.10399928689003,
- "p99": 65.92000275850296
+ "p50": 97.95200079679489,
+ "p90": 333.8559865951538,
+ "p95": 377.21601128578186,
+ "p99": 393.5680091381073
},
"combine": {
- "p50": 65.37599861621857,
- "p90": 66.39999896287918,
- "p95": 66.52799993753433,
- "p99": 69.72800195217133
+ "p50": 83.26400071382523,
+ "p90": 325.98400115966797,
+ "p95": 355.16801476478577,
+ "p99": 373.4720051288605
},
"roundtrip": {
- "p50": 105.85600137710571,
- "p90": 112.86400258541107,
- "p95": 113.72800171375275,
- "p99": 131.42399489879608
+ "p50": 154.08000349998474,
+ "p90": 397.98399806022644,
+ "p95": 425.85599422454834,
+ "p99": 453.7599980831146
},
"isolatedSum": {
- "p50": 121.31199985742569,
- "p90": 124.32000041007996,
- "p95": 125.63199922442436,
- "p99": 135.6480047106743
+ "p50": 181.21600151062012,
+ "p90": 659.8399877548218,
+ "p95": 732.3840260505676,
+ "p99": 767.0400142669678
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1740800,
- "combineLogicalBytes": 1740800,
- "fanoutMean": 5.3125,
- "recvTokensMax": 25,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 2125824,
+ "combineLogicalBytes": 2125824,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 29,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -919,35 +922,35 @@
"tokensPerRank": 8,
"globalTokens": 64,
"dispatch": {
- "p50": 57.18399956822395,
- "p90": 59.13599953055382,
- "p95": 60.92799827456474,
- "p99": 75.55200159549713
+ "p50": 98.62399846315384,
+ "p90": 352.9280126094818,
+ "p95": 377.9839873313904,
+ "p99": 399.9359905719757
},
"combine": {
- "p50": 66.0799965262413,
- "p90": 66.65600091218948,
- "p95": 67.52000004053116,
- "p99": 81.02399855852127
+ "p50": 84.03199911117554,
+ "p90": 320.25599479675293,
+ "p95": 355.19999265670776,
+ "p99": 375.328004360199
},
"roundtrip": {
- "p50": 105.43999820947647,
- "p90": 107.96800255775452,
- "p95": 109.98400300741196,
- "p99": 118.04799735546112
+ "p50": 155.4879993200302,
+ "p90": 413.2800102233887,
+ "p95": 442.7199959754944,
+ "p99": 462.72000670433044
},
"isolatedSum": {
- "p50": 123.26399609446526,
- "p90": 125.7920004427433,
- "p95": 128.4479983150959,
- "p99": 156.5760001540184
+ "p50": 182.65599757432938,
+ "p90": 673.1840074062347,
+ "p95": 733.1839799880981,
+ "p99": 775.2639949321747
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 3471360,
- "combineLogicalBytes": 3471360,
- "fanoutMean": 5.296875,
- "recvTokensMax": 50,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 4263936,
+ "combineLogicalBytes": 4263936,
+ "fanoutMean": 5.421875,
+ "recvTokensMax": 47,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -956,35 +959,35 @@
"tokensPerRank": 16,
"globalTokens": 128,
"dispatch": {
- "p50": 58.36800113320351,
- "p90": 60.47999858856201,
- "p95": 62.272001057863235,
- "p99": 68.09599697589874
+ "p50": 100.3199964761734,
+ "p90": 354.0799915790558,
+ "p95": 384.67198610305786,
+ "p99": 400.5439877510071
},
"combine": {
- "p50": 66.14399701356888,
- "p90": 66.84800237417221,
- "p95": 67.4239993095398,
- "p99": 76.76800340414047
+ "p50": 85.05599945783615,
+ "p90": 329.9520015716553,
+ "p95": 359.2959940433502,
+ "p99": 377.56800651550293
},
"roundtrip": {
- "p50": 106.36799782514572,
- "p90": 108.67200046777725,
- "p95": 110.97600311040878,
- "p99": 117.76000261306763
+ "p50": 158.87999534606934,
+ "p90": 417.05599427223206,
+ "p95": 443.1680142879486,
+ "p99": 461.60000562667847
},
"isolatedSum": {
- "p50": 124.51199814677238,
- "p90": 127.32800096273422,
- "p95": 129.69600036740303,
- "p99": 144.86400038003922
+ "p50": 185.37599593400955,
+ "p90": 684.0319931507111,
+ "p95": 743.9679801464081,
+ "p99": 778.11199426651
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 6912000,
- "combineLogicalBytes": 6912000,
- "fanoutMean": 5.2734375,
- "recvTokensMax": 93,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 8503296,
+ "combineLogicalBytes": 8503296,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 92,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -993,35 +996,35 @@
"tokensPerRank": 32,
"globalTokens": 256,
"dispatch": {
- "p50": 74.5600014925003,
- "p90": 76.4160007238388,
- "p95": 77.08799839019775,
- "p99": 81.85599744319916
+ "p50": 102.55999863147736,
+ "p90": 340.4160141944885,
+ "p95": 378.6880075931549,
+ "p99": 407.039999961853
},
"combine": {
- "p50": 67.32799857854843,
- "p90": 71.45600020885468,
- "p95": 77.2159993648529,
- "p99": 90.01599997282028
+ "p50": 88.95999938249588,
+ "p90": 323.64800572395325,
+ "p95": 355.52000999450684,
+ "p99": 379.5520067214966
},
"roundtrip": {
- "p50": 119.32799965143204,
- "p90": 125.2480000257492,
- "p95": 126.17599964141846,
- "p99": 128.9599984884262
+ "p50": 164.2560064792633,
+ "p90": 405.7280123233795,
+ "p95": 442.49600172042847,
+ "p99": 465.31200408935547
},
"isolatedSum": {
- "p50": 141.88800007104874,
- "p90": 147.87200093269348,
- "p95": 154.30399775505066,
- "p99": 171.87199741601944
+ "p50": 191.51999801397324,
+ "p90": 664.0640199184418,
+ "p95": 734.2080175876617,
+ "p99": 786.5920066833496
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 13977600,
- "combineLogicalBytes": 13977600,
- "fanoutMean": 5.33203125,
- "recvTokensMax": 179,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 16908288,
+ "combineLogicalBytes": 16908288,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 182,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -1030,35 +1033,35 @@
"tokensPerRank": 64,
"globalTokens": 512,
"dispatch": {
- "p50": 71.42399996519089,
- "p90": 78.46400141716003,
- "p95": 79.9039974808693,
- "p99": 101.79200023412704
+ "p50": 111.7440015077591,
+ "p90": 345.7280099391937,
+ "p95": 365.82401394844055,
+ "p99": 416.703999042511
},
"combine": {
- "p50": 80.06399869918823,
- "p90": 83.16799998283386,
- "p95": 89.6959975361824,
- "p99": 93.44000369310379
+ "p50": 103.5199984908104,
+ "p90": 342.24000573158264,
+ "p95": 370.7840144634247,
+ "p99": 391.2639915943146
},
"roundtrip": {
- "p50": 147.2640037536621,
- "p90": 150.11200308799744,
- "p95": 151.58399939537048,
- "p99": 160.3199988603592
+ "p50": 185.18400192260742,
+ "p90": 200.6399929523468,
+ "p95": 429.56799268722534,
+ "p99": 484.3519926071167
},
"isolatedSum": {
- "p50": 151.48799866437912,
- "p90": 161.6320013999939,
- "p95": 169.5999950170517,
- "p99": 195.23200392723083
+ "p50": 215.2639999985695,
+ "p90": 687.9680156707764,
+ "p95": 736.6080284118652,
+ "p99": 807.9679906368256
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 27975680,
- "combineLogicalBytes": 27975680,
- "fanoutMean": 5.3359375,
- "recvTokensMax": 355,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 33423360,
+ "combineLogicalBytes": 33423360,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 367,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -1067,35 +1070,35 @@
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 90.84799885749817,
- "p90": 98.39999675750732,
- "p95": 100.35199671983719,
- "p99": 104.73600029945374
+ "p50": 126.71999633312225,
+ "p90": 375.2000033855438,
+ "p95": 402.3039937019348,
+ "p99": 428.8960099220276
},
"combine": {
- "p50": 102.9760017991066,
- "p90": 103.93600165843964,
- "p95": 104.67199981212616,
- "p99": 114.62400108575821
+ "p50": 123.26399981975555,
+ "p90": 355.16801476478577,
+ "p95": 398.5599875450134,
+ "p99": 417.85600781440735
},
"roundtrip": {
- "p50": 170.01600563526154,
- "p90": 178.20799350738525,
- "p95": 180.09600043296814,
- "p99": 193.31200420856476
+ "p50": 216.60800278186798,
+ "p90": 237.31200397014618,
+ "p95": 501.8240213394165,
+ "p99": 520.2879905700684
},
"isolatedSum": {
- "p50": 193.82400065660477,
- "p90": 202.33599841594696,
- "p95": 205.02399653196335,
- "p99": 219.36000138521194
+ "p50": 249.9839961528778,
+ "p90": 730.3680181503296,
+ "p95": 800.8639812469482,
+ "p99": 846.7520177364349
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 55674880,
- "combineLogicalBytes": 55674880,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 699,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 66576384,
+ "combineLogicalBytes": 66576384,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 723,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -1103,16 +1106,16 @@
]
},
{
- "id": "cx-24853ec9",
- "identity": "b300|deepep|6144|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "b300_c9569580",
- "comparisonKey": "862206160efb203e",
+ "id": "cx-e8db863c",
+ "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||00df46ebb2988d7",
+ "colorKey": "gb200_f1783455",
+ "comparisonKey": "ef1adb0bc917ca19",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T11:13:44.096050+00:00",
+ "generatedAt": "2026-06-29T13:47:21.708482+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_11",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
"mode": "normal",
@@ -1120,14 +1123,14 @@
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16",
- "model": "MiniMax-M3",
+ "label": "GB200 EP8 · deepep · bf16",
+ "model": "DeepSeek-V3/V4",
"shape": {
- "hidden": 6144,
+ "hidden": 7168,
"topk": 8,
"experts": 256,
"routing": "uniform",
@@ -1136,14 +1139,15 @@
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -1151,59 +1155,205 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
+ "gpusPerNode": 8,
+ "scaleUpDomain": 8
+ },
+ "routingConsistent": true,
+ "traceSignature": "00df46ebb2988d7",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": null,
+ "eplbImbalanceAfter": null,
+ "backendVersion": "1.1.0+814e508",
+ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
+ "repository": "SemiAnalysisAI/InferenceX",
+ "run": {
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
+ },
+ "rows": [
+ {
+ "tokensPerRank": 8,
+ "globalTokens": 64,
+ "dispatch": {
+ "p50": 90.30400216579437,
+ "p90": 100.12800246477127,
+ "p95": 105.53599894046783,
+ "p99": 116.92799627780914
+ },
+ "combine": {
+ "p50": 82.2720006108284,
+ "p90": 89.47200328111649,
+ "p95": 91.58399701118469,
+ "p99": 95.77599912881851
+ },
+ "roundtrip": {
+ "p50": 149.1519957780838,
+ "p90": 159.67999398708344,
+ "p95": 162.1440052986145,
+ "p99": 169.37600076198578
+ },
+ "isolatedSum": {
+ "p50": 172.57600277662277,
+ "p90": 189.60000574588776,
+ "p95": 197.11999595165253,
+ "p99": 212.70399540662766
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 4974592,
+ "combineLogicalBytes": 4974592,
+ "fanoutMean": 5.421875,
+ "recvTokensMax": 47,
+ "stragglerRank": 3,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 64,
+ "globalTokens": 512,
+ "dispatch": {
+ "p50": 107.45599865913391,
+ "p90": 113.40799927711487,
+ "p95": 117.63200163841248,
+ "p99": 125.5359947681427
+ },
+ "combine": {
+ "p50": 104.96000200510025,
+ "p90": 108.41599851846695,
+ "p95": 110.6560006737709,
+ "p99": 117.69600212574005
+ },
+ "roundtrip": {
+ "p50": 185.34399569034576,
+ "p90": 192.60799884796143,
+ "p95": 196.0960030555725,
+ "p99": 200.6720006465912
+ },
+ "isolatedSum": {
+ "p50": 212.41600066423416,
+ "p90": 221.82399779558182,
+ "p95": 228.28800231218338,
+ "p99": 243.23199689388275
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 38993920,
+ "combineLogicalBytes": 38993920,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 367,
+ "stragglerRank": 3,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ }
+ ]
+ },
+ {
+ "id": "cx-814e92ce",
+ "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510",
+ "colorKey": "gb200_f1783455",
+ "comparisonKey": "ef1adb0bc917ca19",
+ "schemaVersion": 3,
+ "generatedAt": "2026-06-29T13:54:29.380812+00:00",
+ "status": "valid",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
+ "backend": "deepep",
+ "phase": "decode",
+ "mode": "normal",
+ "resourceMode": "tuned",
+ "suite": "backend-default",
+ "comparisonClass": "standardized",
+ "measurementContract": "layout-and-dispatch-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
+ "worldSize": 8,
+ "epSize": 8,
+ "label": "GB200 EP8 · deepep · bf16",
+ "model": "DeepSeek-V3/V4",
+ "shape": {
+ "hidden": 7168,
+ "topk": 8,
+ "experts": 256,
+ "routing": "uniform",
+ "routingLabel": "uniform",
+ "routingStep": 0,
+ "unevenTokens": "none",
+ "eplbEnabled": false,
+ "dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
+ "activationProfile": "normal",
+ "combineQuantMode": "none"
+ },
+ "resourceProfile": {
+ "requestedFraction": null,
+ "achievedFraction": 0.1316,
+ "configuredUnits": 20,
+ "deviceUnits": 152,
+ "resourceClass": "backend-tuned",
+ "conformanceClass": "backend-default",
+ "fixedKernel": false,
+ "paretoEligible": false
+ },
+ "placement": {
+ "kind": "adversarial",
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:2e0df6a62cd0143e",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "74444524b5db510",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28287497246",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287497246",
- "createdAt": "2026-06-27T11:13:44.096050+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 1,
"globalTokens": 8,
"dispatch": {
- "p50": 57.440001517534256,
- "p90": 59.29600074887276,
- "p95": 61.28000095486641,
- "p99": 64.41599875688553
+ "p50": 94.04800087213516,
+ "p90": 105.95200210809708,
+ "p95": 109.98400300741196,
+ "p99": 118.367999792099
},
"combine": {
- "p50": 65.8240020275116,
- "p90": 67.07199662923813,
- "p95": 67.19999760389328,
- "p99": 77.47200131416321
+ "p50": 82.97599852085114,
+ "p90": 87.99999952316284,
+ "p95": 93.47199648618698,
+ "p99": 96.3200032711029
},
"roundtrip": {
- "p50": 108.25599730014801,
- "p90": 113.3119985461235,
- "p95": 114.30399864912033,
- "p99": 123.71200323104858
+ "p50": 152.0320028066635,
+ "p90": 162.6559942960739,
+ "p95": 165.43999314308167,
+ "p99": 176.41599476337433
},
"isolatedSum": {
- "p50": 123.26400354504585,
- "p90": 126.36799737811089,
- "p95": 128.4799985587597,
- "p99": 141.88800007104874
+ "p50": 177.0239993929863,
+ "p90": 193.95200163125992,
+ "p95": 203.45599949359894,
+ "p99": 214.6880030632019
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 540672,
- "combineLogicalBytes": 540672,
+ "dispatchLogicalBytes": 630784,
+ "combineLogicalBytes": 630784,
"fanoutMean": 5.5,
"recvTokensMax": 7,
- "stragglerRank": 7,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -1212,35 +1362,35 @@
"tokensPerRank": 2,
"globalTokens": 16,
"dispatch": {
- "p50": 57.66399949789047,
- "p90": 60.28800085186958,
- "p95": 62.94400244951248,
- "p99": 71.1359977722168
+ "p50": 93.47199648618698,
+ "p90": 106.20799660682678,
+ "p95": 110.04800349473953,
+ "p99": 116.5120005607605
},
"combine": {
- "p50": 66.23999774456024,
- "p90": 67.16799736022949,
- "p95": 67.26399809122086,
- "p99": 69.63200122117996
+ "p50": 83.45600217580795,
+ "p90": 87.93599903583527,
+ "p95": 93.37600320577621,
+ "p99": 96.25600278377533
},
"roundtrip": {
- "p50": 107.4879989027977,
- "p90": 113.15199732780457,
- "p95": 114.17599767446518,
- "p99": 118.9119964838028
+ "p50": 152.92799472808838,
+ "p90": 163.4880006313324,
+ "p95": 166.4319932460785,
+ "p99": 173.47200214862823
},
"isolatedSum": {
- "p50": 123.90399724245071,
- "p90": 127.45599821209908,
- "p95": 130.20800054073334,
- "p99": 140.76799899339676
+ "p50": 176.92799866199493,
+ "p90": 194.14399564266205,
+ "p95": 203.42400670051575,
+ "p99": 212.76800334453583
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1056768,
- "combineLogicalBytes": 1056768,
+ "dispatchLogicalBytes": 1232896,
+ "combineLogicalBytes": 1232896,
"fanoutMean": 5.375,
"recvTokensMax": 13,
- "stragglerRank": 7,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -1249,35 +1399,35 @@
"tokensPerRank": 4,
"globalTokens": 32,
"dispatch": {
- "p50": 59.328000992536545,
- "p90": 62.17600032687187,
- "p95": 65.05600363016129,
- "p99": 70.01599669456482
+ "p50": 94.55999732017517,
+ "p90": 107.10400342941284,
+ "p95": 113.08799684047699,
+ "p99": 121.08799815177917
},
"combine": {
- "p50": 66.81600213050842,
- "p90": 68.12799721956253,
- "p95": 69.11999732255936,
- "p99": 77.27999985218048
+ "p50": 84.32000130414963,
+ "p90": 91.90399944782257,
+ "p95": 94.81599926948547,
+ "p99": 99.67999905347824
},
"roundtrip": {
- "p50": 108.57599973678589,
- "p90": 115.58400094509125,
- "p95": 118.1119978427887,
- "p99": 128.76799702644348
+ "p50": 156.38400614261627,
+ "p90": 168.09600591659546,
+ "p95": 171.51999473571777,
+ "p99": 181.08800053596497
},
"isolatedSum": {
- "p50": 126.14400312304497,
- "p90": 130.3039975464344,
- "p95": 134.17600095272064,
- "p99": 147.2959965467453
+ "p50": 178.8799986243248,
+ "p90": 199.0080028772354,
+ "p95": 207.90399610996246,
+ "p99": 220.76799720525742
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2125824,
- "combineLogicalBytes": 2125824,
+ "dispatchLogicalBytes": 2480128,
+ "combineLogicalBytes": 2480128,
"fanoutMean": 5.40625,
"recvTokensMax": 29,
- "stragglerRank": 7,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -1286,35 +1436,35 @@
"tokensPerRank": 8,
"globalTokens": 64,
"dispatch": {
- "p50": 59.776000678539276,
- "p90": 62.111999839544296,
- "p95": 63.90400230884552,
- "p99": 70.14399766921997
+ "p50": 95.83999961614609,
+ "p90": 107.64800012111664,
+ "p95": 111.7440015077591,
+ "p99": 123.23199957609177
},
"combine": {
- "p50": 67.87200272083282,
- "p90": 76.19199901819229,
- "p95": 77.18399912118912,
- "p99": 79.55200225114822
+ "p50": 85.21600067615509,
+ "p90": 93.63199770450592,
+ "p95": 95.48799693584442,
+ "p99": 106.23999685049057
},
"roundtrip": {
- "p50": 116.31999909877777,
- "p90": 122.43200093507767,
- "p95": 124.60800260305405,
- "p99": 131.77600502967834
+ "p50": 158.2079976797104,
+ "p90": 169.08800601959229,
+ "p95": 171.9679981470108,
+ "p99": 177.44000256061554
},
"isolatedSum": {
- "p50": 127.6480033993721,
- "p90": 138.3039988577366,
- "p95": 141.08800143003464,
- "p99": 149.6959999203682
+ "p50": 181.05600029230118,
+ "p90": 201.27999782562256,
+ "p95": 207.23199844360352,
+ "p99": 229.47199642658234
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4263936,
- "combineLogicalBytes": 4263936,
+ "dispatchLogicalBytes": 4974592,
+ "combineLogicalBytes": 4974592,
"fanoutMean": 5.421875,
"recvTokensMax": 47,
- "stragglerRank": 5,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -1323,35 +1473,35 @@
"tokensPerRank": 16,
"globalTokens": 128,
"dispatch": {
- "p50": 64.19199705123901,
- "p90": 69.34399902820587,
- "p95": 71.80800288915634,
- "p99": 79.52000200748444
+ "p50": 97.88800030946732,
+ "p90": 109.69600081443787,
+ "p95": 114.84800279140472,
+ "p99": 124.1919994354248
},
"combine": {
- "p50": 68.4799998998642,
- "p90": 76.48000121116638,
- "p95": 76.9599974155426,
- "p99": 79.0719985961914
+ "p50": 86.40000224113464,
+ "p90": 95.0080007314682,
+ "p95": 97.120001912117,
+ "p99": 104.06400263309479
},
"roundtrip": {
- "p50": 121.5360015630722,
- "p90": 126.52799487113953,
- "p95": 127.3919939994812,
- "p99": 137.9839926958084
+ "p50": 161.53599321842194,
+ "p90": 172.54400253295898,
+ "p95": 177.5359958410263,
+ "p99": 186.68800592422485
},
"isolatedSum": {
- "p50": 132.6719969511032,
- "p90": 145.82400023937225,
- "p95": 148.76800030469894,
- "p99": 158.59200060367584
+ "p50": 184.28800255060196,
+ "p90": 204.70400154590607,
+ "p95": 211.96800470352173,
+ "p99": 228.2560020685196
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 8503296,
- "combineLogicalBytes": 8503296,
+ "dispatchLogicalBytes": 9920512,
+ "combineLogicalBytes": 9920512,
"fanoutMean": 5.40625,
"recvTokensMax": 92,
- "stragglerRank": 0,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -1360,35 +1510,35 @@
"tokensPerRank": 32,
"globalTokens": 256,
"dispatch": {
- "p50": 73.53600114583969,
- "p90": 77.18399912118912,
- "p95": 78.14399898052216,
- "p99": 88.128000497818
+ "p50": 101.21600329875946,
+ "p90": 111.48799955844879,
+ "p95": 115.1999980211258,
+ "p99": 120.41600048542023
},
"combine": {
- "p50": 77.504001557827,
- "p90": 79.19999957084656,
- "p95": 79.45600152015686,
- "p99": 80.25600016117096
+ "p50": 94.68799829483032,
+ "p90": 99.32799637317657,
+ "p95": 105.6319996714592,
+ "p99": 109.3439981341362
},
"roundtrip": {
- "p50": 123.64800274372101,
- "p90": 128.38399410247803,
- "p95": 131.1360001564026,
- "p99": 140.4159963130951
+ "p50": 166.24000668525696,
+ "p90": 175.52000284194946,
+ "p95": 178.14399302005768,
+ "p99": 185.2799952030182
},
"isolatedSum": {
- "p50": 151.0400027036667,
- "p90": 156.38399869203568,
- "p95": 157.60000050067902,
- "p99": 168.38400065898895
+ "p50": 195.90400159358978,
+ "p90": 210.81599593162537,
+ "p95": 220.831997692585,
+ "p99": 229.75999861955643
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 16908288,
- "combineLogicalBytes": 16908288,
+ "dispatchLogicalBytes": 19726336,
+ "combineLogicalBytes": 19726336,
"fanoutMean": 5.375,
"recvTokensMax": 182,
- "stragglerRank": 7,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -1397,35 +1547,35 @@
"tokensPerRank": 64,
"globalTokens": 512,
"dispatch": {
- "p50": 88.28800171613693,
- "p90": 90.91199934482574,
- "p95": 91.90399944782257,
- "p99": 103.90400141477585
+ "p50": 113.40799927711487,
+ "p90": 123.16799908876419,
+ "p95": 126.5919953584671,
+ "p99": 138.2720023393631
},
"combine": {
- "p50": 90.91199934482574,
- "p90": 91.93599969148636,
- "p95": 92.47999638319016,
- "p99": 103.2319962978363
+ "p50": 108.44799876213074,
+ "p90": 115.84000289440155,
+ "p95": 118.72000247240067,
+ "p99": 121.88799679279327
},
"roundtrip": {
- "p50": 147.96799421310425,
- "p90": 153.18399667739868,
- "p95": 155.4879993200302,
- "p99": 161.69600188732147
+ "p50": 191.83999300003052,
+ "p90": 200.8640021085739,
+ "p95": 203.5519927740097,
+ "p99": 210.52800118923187
},
"isolatedSum": {
- "p50": 179.20000106096268,
- "p90": 182.8479990363121,
- "p95": 184.38399583101273,
- "p99": 207.13599771261215
+ "p50": 221.8559980392456,
+ "p90": 239.00800198316574,
+ "p95": 245.31199783086777,
+ "p99": 260.1599991321564
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 33423360,
- "combineLogicalBytes": 33423360,
+ "dispatchLogicalBytes": 38993920,
+ "combineLogicalBytes": 38993920,
"fanoutMean": 5.3125,
"recvTokensMax": 367,
- "stragglerRank": 7,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -1434,35 +1584,35 @@
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 100.00000149011612,
- "p90": 102.68799960613251,
- "p95": 103.4879982471466,
- "p99": 112.86400258541107
+ "p50": 126.14400684833527,
+ "p90": 136.1279934644699,
+ "p95": 139.67999815940857,
+ "p99": 150.84800124168396
},
"combine": {
- "p50": 105.82400113344193,
- "p90": 114.46399986743927,
- "p95": 115.03999680280685,
- "p99": 118.23999881744385
+ "p50": 128.12800705432892,
+ "p90": 134.11200046539307,
+ "p95": 135.903999209404,
+ "p99": 142.11200177669525
},
"roundtrip": {
- "p50": 185.82400679588318,
- "p90": 190.14400243759155,
- "p95": 191.00800156593323,
- "p99": 196.8960016965866
+ "p50": 226.623997092247,
+ "p90": 235.6799989938736,
+ "p95": 238.304004073143,
+ "p99": 243.83999407291412
},
"isolatedSum": {
- "p50": 205.82400262355804,
- "p90": 217.15199947357178,
- "p95": 218.52799504995346,
- "p99": 231.10400140285492
+ "p50": 254.27201390266418,
+ "p90": 270.239993929863,
+ "p95": 275.58399736881256,
+ "p99": 292.9600030183792
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 66576384,
- "combineLogicalBytes": 66576384,
+ "dispatchLogicalBytes": 77672448,
+ "combineLogicalBytes": 77672448,
"fanoutMean": 5.291015625,
"recvTokensMax": 723,
- "stragglerRank": 7,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -1470,16 +1620,16 @@
]
},
{
- "id": "cx-c0dba141",
- "identity": "b300|deepep|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "b300_307ed708",
- "comparisonKey": "62d01cd02a49457a",
+ "id": "cx-50b58ea2",
+ "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510",
+ "colorKey": "gb200_7c2da03d",
+ "comparisonKey": "61b647515928837c",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:52:49.194497+00:00",
+ "generatedAt": "2026-06-29T13:49:03.469405+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_01",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
"mode": "normal",
@@ -1487,14 +1637,14 @@
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "runtime-visible-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16",
- "model": "MiniMax-M3",
+ "label": "GB200 EP8 · deepep · bf16",
+ "model": "DeepSeek-V3/V4",
"shape": {
- "hidden": 6144,
+ "hidden": 7168,
"topk": 8,
"experts": 256,
"routing": "uniform",
@@ -1503,14 +1653,15 @@
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -1518,59 +1669,59 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:2e0df6a62cd0143e",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "74444524b5db510",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285721110",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285721110",
- "createdAt": "2026-06-27T09:52:49.194497+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 1,
"globalTokens": 8,
"dispatch": {
- "p50": 56.41600117087364,
- "p90": 58.848001062870026,
- "p95": 63.07200342416763,
- "p99": 79.29600030183792
+ "p50": 91.67999774217606,
+ "p90": 103.07200253009796,
+ "p95": 107.51999914646149,
+ "p99": 116.86400324106216
},
"combine": {
- "p50": 65.24799764156342,
- "p90": 66.14399701356888,
- "p95": 66.3359984755516,
- "p99": 68.96000355482101
+ "p50": 81.53600245714188,
+ "p90": 85.88799834251404,
+ "p95": 90.55999666452408,
+ "p99": 95.16800194978714
},
"roundtrip": {
- "p50": 104.76800054311752,
- "p90": 111.35999858379364,
- "p95": 112.09599673748016,
- "p99": 115.7120019197464
+ "p50": 151.45599842071533,
+ "p90": 160.89600324630737,
+ "p95": 164.2879992723465,
+ "p99": 175.26400089263916
},
"isolatedSum": {
- "p50": 121.66399881243706,
- "p90": 124.9919980764389,
- "p95": 129.40800189971924,
- "p99": 148.25600385665894
+ "p50": 173.21600019931793,
+ "p90": 188.960000872612,
+ "p95": 198.07999581098557,
+ "p99": 212.0320051908493
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 540672,
- "combineLogicalBytes": 540672,
+ "dispatchLogicalBytes": 630784,
+ "combineLogicalBytes": 630784,
"fanoutMean": 5.5,
"recvTokensMax": 7,
- "stragglerRank": 4,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -1579,35 +1730,35 @@
"tokensPerRank": 2,
"globalTokens": 16,
"dispatch": {
- "p50": 55.87200075387955,
- "p90": 57.53599852323532,
- "p95": 58.94400179386139,
- "p99": 66.91200286149979
+ "p50": 90.52799642086029,
+ "p90": 102.7199998497963,
+ "p95": 106.39999806880951,
+ "p99": 115.58400094509125
},
"combine": {
- "p50": 65.69600105285645,
- "p90": 66.3359984755516,
- "p95": 66.68800115585327,
- "p99": 78.20799946784973
+ "p50": 81.66400343179703,
+ "p90": 88.67199718952179,
+ "p95": 90.52799642086029,
+ "p99": 94.08000111579895
},
"roundtrip": {
- "p50": 105.56799918413162,
- "p90": 110.23999750614166,
- "p95": 111.13599687814713,
- "p99": 129.5360028743744
+ "p50": 151.39199793338776,
+ "p90": 162.9440039396286,
+ "p95": 166.4319932460785,
+ "p99": 171.74400389194489
},
"isolatedSum": {
- "p50": 121.56800180673599,
- "p90": 123.87199699878693,
- "p95": 125.63200294971466,
- "p99": 145.12000232934952
+ "p50": 172.19199985265732,
+ "p90": 191.39199703931808,
+ "p95": 196.9279944896698,
+ "p99": 209.6640020608902
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1056768,
- "combineLogicalBytes": 1056768,
+ "dispatchLogicalBytes": 1232896,
+ "combineLogicalBytes": 1232896,
"fanoutMean": 5.375,
"recvTokensMax": 13,
- "stragglerRank": 4,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -1616,35 +1767,35 @@
"tokensPerRank": 4,
"globalTokens": 32,
"dispatch": {
- "p50": 56.73599988222122,
- "p90": 58.36800113320351,
- "p95": 59.42400172352791,
- "p99": 68.4799998998642
+ "p50": 92.44800359010696,
+ "p90": 104.12800312042236,
+ "p95": 109.15199667215347,
+ "p99": 117.40799993276596
},
"combine": {
- "p50": 66.14399701356888,
- "p90": 67.16799736022949,
- "p95": 68.15999746322632,
- "p99": 78.17599922418594
+ "p50": 82.30400085449219,
+ "p90": 89.79199826717377,
+ "p95": 91.77599847316742,
+ "p99": 96.41599655151367
},
"roundtrip": {
- "p50": 106.59199953079224,
- "p90": 109.43999886512756,
- "p95": 111.84000223875046,
- "p99": 120.7680031657219
+ "p50": 153.43999862670898,
+ "p90": 163.61600160598755,
+ "p95": 167.90400445461273,
+ "p99": 173.0560064315796
},
"isolatedSum": {
- "p50": 122.8799968957901,
- "p90": 125.535998493433,
- "p95": 127.58399918675423,
- "p99": 146.65599912405014
+ "p50": 174.75200444459915,
+ "p90": 193.92000138759613,
+ "p95": 200.9279951453209,
+ "p99": 213.82399648427963
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2125824,
- "combineLogicalBytes": 2125824,
+ "dispatchLogicalBytes": 2480128,
+ "combineLogicalBytes": 2480128,
"fanoutMean": 5.40625,
"recvTokensMax": 29,
- "stragglerRank": 4,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -1653,35 +1804,35 @@
"tokensPerRank": 8,
"globalTokens": 64,
"dispatch": {
- "p50": 57.631999254226685,
- "p90": 60.19200012087822,
- "p95": 61.59999966621399,
- "p99": 72.83200323581696
+ "p50": 93.9520001411438,
+ "p90": 104.09600287675858,
+ "p95": 107.77600109577179,
+ "p99": 117.72800236940384
},
"combine": {
- "p50": 66.78400188684464,
- "p90": 68.31999868154526,
- "p95": 69.50400024652481,
- "p99": 77.82399654388428
+ "p50": 85.28000116348267,
+ "p90": 93.56799721717834,
+ "p95": 96.73599898815155,
+ "p99": 167.67999529838562
},
"roundtrip": {
- "p50": 115.68000167608261,
- "p90": 122.11199849843979,
- "p95": 123.03999811410904,
- "p99": 135.04000008106232
+ "p50": 159.7760021686554,
+ "p90": 171.07200622558594,
+ "p95": 177.08800733089447,
+ "p99": 250.46399235725403
},
"isolatedSum": {
- "p50": 124.41600114107132,
- "p90": 128.51199880242348,
- "p95": 131.1039999127388,
- "p99": 150.65599977970123
+ "p50": 179.23200130462646,
+ "p90": 197.66400009393692,
+ "p95": 204.51200008392334,
+ "p99": 285.40799766778946
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4263936,
- "combineLogicalBytes": 4263936,
+ "dispatchLogicalBytes": 4974592,
+ "combineLogicalBytes": 4974592,
"fanoutMean": 5.421875,
"recvTokensMax": 47,
- "stragglerRank": 4,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -1690,35 +1841,35 @@
"tokensPerRank": 16,
"globalTokens": 128,
"dispatch": {
- "p50": 58.94400179386139,
- "p90": 64.89600241184235,
- "p95": 65.88800251483917,
- "p99": 72.4480003118515
+ "p50": 94.94400024414062,
+ "p90": 105.3759977221489,
+ "p95": 108.31999778747559,
+ "p99": 118.14399808645248
},
"combine": {
- "p50": 67.64800101518631,
- "p90": 76.25599950551987,
- "p95": 76.92799717187881,
- "p99": 78.43200117349625
+ "p50": 88.92799913883209,
+ "p90": 93.34400296211243,
+ "p95": 94.71999853849411,
+ "p99": 101.6639992594719
},
"roundtrip": {
- "p50": 121.56800180673599,
- "p90": 124.35200065374374,
- "p95": 125.40799379348755,
- "p99": 136.73600554466248
+ "p50": 158.75199437141418,
+ "p90": 168.38400065898895,
+ "p95": 171.55200242996216,
+ "p99": 180.1919937133789
},
"isolatedSum": {
- "p50": 126.5920028090477,
- "p90": 141.1520019173622,
- "p95": 142.815999686718,
- "p99": 150.88000148534775
+ "p50": 183.87199938297272,
+ "p90": 198.72000068426132,
+ "p95": 203.0399963259697,
+ "p99": 219.80799734592438
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 8503296,
- "combineLogicalBytes": 8503296,
+ "dispatchLogicalBytes": 9920512,
+ "combineLogicalBytes": 9920512,
"fanoutMean": 5.40625,
"recvTokensMax": 92,
- "stragglerRank": 4,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -1727,35 +1878,35 @@
"tokensPerRank": 32,
"globalTokens": 256,
"dispatch": {
- "p50": 68.54400038719177,
- "p90": 75.42400062084198,
- "p95": 75.9039968252182,
- "p99": 78.015998005867
+ "p50": 99.61599856615067,
+ "p90": 110.62400043010712,
+ "p95": 113.6000007390976,
+ "p99": 134.36800241470337
},
"combine": {
- "p50": 77.56800204515457,
- "p90": 78.36800068616867,
- "p95": 78.52800190448761,
- "p99": 80.70400357246399
+ "p50": 93.56799721717834,
+ "p90": 100.832000374794,
+ "p95": 103.42399775981903,
+ "p99": 107.16799646615982
},
"roundtrip": {
- "p50": 125.34399330615997,
- "p90": 131.84000551700592,
- "p95": 133.53599607944489,
- "p99": 144.22400295734406
+ "p50": 164.86400365829468,
+ "p90": 173.7920045852661,
+ "p95": 177.3120015859604,
+ "p99": 184.38400328159332
},
"isolatedSum": {
- "p50": 146.11200243234634,
- "p90": 153.79200130701065,
- "p95": 154.4319987297058,
- "p99": 158.720001578331
+ "p50": 193.183995783329,
+ "p90": 211.45600080490112,
+ "p95": 217.02399849891663,
+ "p99": 241.5359988808632
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 16908288,
- "combineLogicalBytes": 16908288,
+ "dispatchLogicalBytes": 19726336,
+ "combineLogicalBytes": 19726336,
"fanoutMean": 5.375,
"recvTokensMax": 182,
- "stragglerRank": 7,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -1764,35 +1915,35 @@
"tokensPerRank": 64,
"globalTokens": 512,
"dispatch": {
- "p50": 85.91999858617783,
- "p90": 88.79999816417694,
- "p95": 89.56799656152725,
- "p99": 97.69599884748459
+ "p50": 109.92000252008438,
+ "p90": 118.9119964838028,
+ "p95": 122.20799922943115,
+ "p99": 127.6479959487915
},
"combine": {
- "p50": 90.2400016784668,
- "p90": 92.28800237178802,
- "p95": 94.97600048780441,
- "p99": 102.36799716949463
+ "p50": 106.62399977445602,
+ "p90": 114.84800279140472,
+ "p95": 116.67200177907944,
+ "p99": 119.35999989509583
},
"roundtrip": {
- "p50": 149.79200065135956,
- "p90": 162.81600296497345,
- "p95": 167.4560010433197,
- "p99": 173.66400361061096
+ "p50": 189.18399512767792,
+ "p90": 196.57599925994873,
+ "p95": 199.71199333667755,
+ "p99": 204.352006316185
},
"isolatedSum": {
- "p50": 176.16000026464462,
- "p90": 181.08800053596497,
- "p95": 184.54399704933167,
- "p99": 200.06399601697922
+ "p50": 216.5440022945404,
+ "p90": 233.75999927520752,
+ "p95": 238.8800010085106,
+ "p99": 247.00799584388733
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 33423360,
- "combineLogicalBytes": 33423360,
+ "dispatchLogicalBytes": 38993920,
+ "combineLogicalBytes": 38993920,
"fanoutMean": 5.3125,
"recvTokensMax": 367,
- "stragglerRank": 4,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -1801,35 +1952,35 @@
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 98.7199991941452,
- "p90": 101.21600329875946,
- "p95": 102.14400291442871,
- "p99": 113.24799805879593
+ "p50": 124.79999661445618,
+ "p90": 133.7919980287552,
+ "p95": 137.472003698349,
+ "p99": 143.48800480365753
},
"combine": {
- "p50": 105.56799918413162,
- "p90": 113.98400366306305,
- "p95": 114.49600011110306,
- "p99": 114.94400352239609
+ "p50": 128.12800705432892,
+ "p90": 131.8719983100891,
+ "p95": 133.40799510478973,
+ "p99": 141.56800508499146
},
"roundtrip": {
- "p50": 184.4159960746765,
- "p90": 188.92799317836761,
- "p95": 190.08000195026398,
- "p99": 197.24799692630768
+ "p50": 224.60800409317017,
+ "p90": 233.21600258350372,
+ "p95": 236.28799617290497,
+ "p99": 245.56800723075867
},
"isolatedSum": {
- "p50": 204.28799837827682,
- "p90": 215.2000069618225,
- "p95": 216.64000302553177,
- "p99": 228.19200158119202
+ "p50": 252.9280036687851,
+ "p90": 265.6639963388443,
+ "p95": 270.87999880313873,
+ "p99": 285.056009888649
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 66576384,
- "combineLogicalBytes": 66576384,
+ "dispatchLogicalBytes": 77672448,
+ "combineLogicalBytes": 77672448,
"fanoutMean": 5.291015625,
"recvTokensMax": 723,
- "stragglerRank": 4,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -1837,47 +1988,48 @@
]
},
{
- "id": "cx-3f6620d0",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|fp8-saturation|none|none|0|tuned||8c8497a77d9085d",
- "colorKey": "b300_c9569580",
- "comparisonKey": "11fb97077712804e",
+ "id": "cx-ae831441",
+ "identity": "gb200|deepep|v1|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||6d507ec2ec8998f",
+ "colorKey": "gb200_7c2da03d",
+ "comparisonKey": "57634c99a1c8a12a",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T00:06:34.883169+00:00",
+ "generatedAt": "2026-06-29T13:58:16.852678+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_05",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_1",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "runtime-visible-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16",
- "model": "DeepSeek-V3/V4",
+ "label": "GB200 EP8 · deepep · bf16",
+ "model": "Kimi-K2",
"shape": {
"hidden": 7168,
"topk": 8,
- "experts": 256,
+ "experts": 384,
"routing": "uniform",
"routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
- "activationProfile": "fp8-saturation",
+ "kernelGeneration": "v1",
+ "activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -1885,352 +2037,281 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "8c8497a77d9085d",
- "workloadId": "set:4:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "6d507ec2ec8998f",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28272154473",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272154473",
- "createdAt": "2026-06-27T00:06:34.883169+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 1,
"globalTokens": 8,
"dispatch": {
- "p50": 57.40800127387047,
- "p90": 59.26400050520897,
- "p95": 61.055999249219894,
- "p99": 69.66400146484375
+ "p50": 97.120001912117,
+ "p90": 331.39199018478394,
+ "p95": 372.4159896373749,
+ "p99": 393.3759927749634
},
"combine": {
- "p50": 66.30399823188782,
- "p90": 67.32799857854843,
- "p95": 68.25599819421768,
- "p99": 77.02399790287018
+ "p50": 82.97599852085114,
+ "p90": 318.33600997924805,
+ "p95": 356.7039966583252,
+ "p99": 384.8640024662018
},
"roundtrip": {
- "p50": 106.88000172376633,
- "p90": 111.35999858379364,
- "p95": 112.96000331640244,
- "p99": 129.31199371814728
+ "p50": 155.29599785804749,
+ "p90": 406.9119989871979,
+ "p95": 436.2559914588928,
+ "p99": 462.5599980354309
},
"isolatedSum": {
- "p50": 123.71199950575829,
- "p90": 126.5919990837574,
- "p95": 129.31199744343758,
- "p99": 146.68799936771393
+ "p50": 180.09600043296814,
+ "p90": 649.728000164032,
+ "p95": 729.1199862957001,
+ "p99": 778.2399952411652
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
+ "dispatchLogicalBytes": 602112,
+ "combineLogicalBytes": 602112,
+ "fanoutMean": 5.25,
+ "recvTokensMax": 8,
+ "stragglerRank": 1,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 2,
+ "globalTokens": 16,
+ "dispatch": {
+ "p50": 98.14400225877762,
+ "p90": 350.6239950656891,
+ "p95": 377.344012260437,
+ "p99": 396.38400077819824
+ },
+ "combine": {
+ "p50": 84.32000130414963,
+ "p90": 326.07999444007874,
+ "p95": 356.1600148677826,
+ "p99": 369.7279989719391
+ },
+ "roundtrip": {
+ "p50": 158.62399339675903,
+ "p90": 409.5039963722229,
+ "p95": 440.8000111579895,
+ "p99": 456.2560021877289
+ },
+ "isolatedSum": {
+ "p50": 182.46400356292725,
+ "p90": 676.7039895057678,
+ "p95": 733.5040271282196,
+ "p99": 766.1119997501373
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 1218560,
+ "combineLogicalBytes": 1218560,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 14,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 4,
+ "globalTokens": 32,
"dispatch": {
- "p50": 58.33600088953972,
- "p90": 60.67200005054474,
- "p95": 62.68800050020218,
- "p99": 68.15999746322632
+ "p50": 97.31200337409973,
+ "p90": 139.1039937734604,
+ "p95": 365.4080033302307,
+ "p99": 385.343998670578
},
"combine": {
- "p50": 67.84000247716904,
- "p90": 77.2159993648529,
- "p95": 77.88799703121185,
- "p99": 78.75200361013412
+ "p50": 84.06399935483932,
+ "p90": 320.8320140838623,
+ "p95": 356.00000619888306,
+ "p99": 368.1280016899109
},
"roundtrip": {
- "p50": 121.88799679279327,
- "p90": 125.05599856376648,
- "p95": 126.08000636100769,
- "p99": 136.99199259281158
+ "p50": 158.2079976797104,
+ "p90": 407.039999961853,
+ "p95": 437.5999867916107,
+ "p99": 456.35199546813965
},
"isolatedSum": {
- "p50": 126.17600336670876,
- "p90": 137.88799941539764,
- "p95": 140.57599753141403,
- "p99": 146.91200107336044
+ "p50": 181.37600272893906,
+ "p90": 459.9360078573227,
+ "p95": 721.4080095291138,
+ "p99": 753.4720003604889
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 2408448,
+ "combineLogicalBytes": 2408448,
+ "fanoutMean": 5.25,
+ "recvTokensMax": 26,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 69.85600292682648,
- "p90": 74.27199929952621,
- "p95": 75.3600001335144,
- "p99": 82.97599852085114
+ "p50": 100.8640006184578,
+ "p90": 338.75200152397156,
+ "p95": 381.5680146217346,
+ "p99": 394.8479890823364
},
"combine": {
- "p50": 78.52800190448761,
- "p90": 79.19999957084656,
- "p95": 79.99999821186066,
- "p99": 82.8159973025322
+ "p50": 86.11200004816055,
+ "p90": 317.24798679351807,
+ "p95": 355.9359908103943,
+ "p99": 368.80001425743103
},
"roundtrip": {
- "p50": 131.3599944114685,
- "p90": 135.903999209404,
- "p95": 136.76799833774567,
- "p99": 147.5519984960556
+ "p50": 162.4000072479248,
+ "p90": 406.3360095024109,
+ "p95": 436.67200207710266,
+ "p99": 459.74400639533997
},
"isolatedSum": {
- "p50": 148.3840048313141,
- "p90": 153.47199887037277,
- "p95": 155.35999834537506,
- "p99": 165.79199582338333
+ "p50": 186.97600066661835,
+ "p90": 655.9999883174896,
+ "p95": 737.5040054321289,
+ "p99": 763.6480033397675
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 4831232,
+ "combineLogicalBytes": 4831232,
+ "fanoutMean": 5.265625,
+ "recvTokensMax": 48,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 16,
+ "globalTokens": 128,
"dispatch": {
- "p50": 94.24000233411789,
- "p90": 96.79999947547913,
- "p95": 99.39199686050415,
- "p99": 103.74400019645691
+ "p50": 101.02400183677673,
+ "p90": 337.3439908027649,
+ "p95": 375.2320110797882,
+ "p99": 404.63998913764954
},
"combine": {
- "p50": 115.35999923944473,
- "p90": 116.12799763679504,
- "p95": 116.73600226640701,
- "p99": 127.29600071907043
+ "p50": 88.67199718952179,
+ "p90": 324.19198751449585,
+ "p95": 362.7519905567169,
+ "p99": 391.84001088142395
},
"roundtrip": {
- "p50": 193.4400051832199,
- "p90": 198.91199469566345,
- "p95": 199.71199333667755,
- "p99": 208.3200067281723
+ "p50": 165.53600132465363,
+ "p90": 409.6960127353668,
+ "p95": 438.4959936141968,
+ "p99": 458.43198895454407
},
"isolatedSum": {
- "p50": 209.60000157356262,
- "p90": 212.92799711227417,
- "p95": 216.12799912691116,
- "p99": 231.04000091552734
+ "p50": 189.69599902629852,
+ "p90": 661.5359783172607,
+ "p95": 737.9840016365051,
+ "p99": 796.4800000190735
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-854f00de",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||8c8497a77d9085d",
- "colorKey": "b300_c9569580",
- "comparisonKey": "afbd085a57d290fd",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:57:27.937449+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_17",
- "sku": "b300",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "B300 EP8 · deepep · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1351,
- "configuredUnits": 20,
- "deviceUnits": 148,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "8c8497a77d9085d",
- "workloadId": "set:4:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271865772",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271865772",
- "createdAt": "2026-06-26T23:57:27.937449+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 55.58399856090546,
- "p90": 57.40800127387047,
- "p95": 59.13599953055382,
- "p99": 65.63200056552887
- },
- "combine": {
- "p50": 66.14399701356888,
- "p90": 67.55200028419495,
- "p95": 68.38399916887283,
- "p99": 77.2159993648529
- },
- "roundtrip": {
- "p50": 105.18400371074677,
- "p90": 111.29599809646606,
- "p95": 113.50400000810623,
- "p99": 132.1280002593994
- },
- "isolatedSum": {
- "p50": 121.72799557447433,
- "p90": 124.96000155806541,
- "p95": 127.51999869942665,
- "p99": 142.84799993038177
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 9848832,
+ "combineLogicalBytes": 9848832,
+ "fanoutMean": 5.3671875,
+ "recvTokensMax": 91,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 58.400001376867294,
- "p90": 60.99199876189232,
- "p95": 62.880001962184906,
- "p99": 73.05599749088287
+ "p50": 104.5759990811348,
+ "p90": 338.6879861354828,
+ "p95": 372.6080060005188,
+ "p99": 395.9999978542328
},
"combine": {
- "p50": 67.29599833488464,
- "p90": 77.15199887752533,
- "p95": 77.72800326347351,
- "p99": 79.64800298213959
+ "p50": 95.29600292444229,
+ "p90": 328.0639946460724,
+ "p95": 342.272013425827,
+ "p99": 376.3839900493622
},
"roundtrip": {
- "p50": 117.95199662446976,
- "p90": 122.72000312805176,
- "p95": 123.9359974861145,
- "p99": 138.46400380134583
+ "p50": 167.67999529838562,
+ "p90": 404.4159948825836,
+ "p95": 431.0399889945984,
+ "p99": 457.88800716400146
},
"isolatedSum": {
- "p50": 125.69599971175194,
- "p90": 138.14399763941765,
- "p95": 140.60800522565842,
- "p99": 152.70400047302246
+ "p50": 199.8720020055771,
+ "p90": 666.7519807815552,
+ "p95": 714.8800194263458,
+ "p99": 772.383987903595
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 19496960,
+ "combineLogicalBytes": 19496960,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 178,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 64,
+ "globalTokens": 512,
"dispatch": {
- "p50": 67.9360032081604,
- "p90": 71.16799801588058,
- "p95": 73.72800260782242,
- "p99": 86.5280032157898
+ "p50": 112.96000331640244,
+ "p90": 132.22399353981018,
+ "p95": 365.4080033302307,
+ "p99": 413.4080111980438
},
"combine": {
- "p50": 77.95199751853943,
- "p90": 79.19999957084656,
- "p95": 80.06399869918823,
- "p99": 83.8719978928566
+ "p50": 108.47999900579453,
+ "p90": 119.1679984331131,
+ "p95": 342.78398752212524,
+ "p99": 383.07198882102966
},
"roundtrip": {
- "p50": 128.7039965391159,
- "p90": 131.1360001564026,
- "p95": 132.76800513267517,
- "p99": 140.6400054693222
+ "p50": 194.815993309021,
+ "p90": 447.80799746513367,
+ "p95": 469.2800045013428,
+ "p99": 492.576003074646
},
"isolatedSum": {
- "p50": 145.88800072669983,
- "p90": 150.36799758672714,
- "p95": 153.79200130701065,
- "p99": 170.4000011086464
+ "p50": 221.44000232219696,
+ "p90": 251.39199197292328,
+ "p95": 708.191990852356,
+ "p99": 796.4800000190735
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 38836224,
+ "combineLogicalBytes": 38836224,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 372,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -2239,35 +2320,35 @@
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 92.70399808883667,
- "p90": 97.63199836015701,
- "p95": 99.71199929714203,
- "p99": 135.42400300502777
+ "p50": 128.7360042333603,
+ "p90": 375.0079870223999,
+ "p95": 405.5359959602356,
+ "p99": 426.2399971485138
},
"combine": {
- "p50": 114.78400230407715,
- "p90": 116.70400202274323,
- "p95": 118.97599697113037,
- "p99": 164.0319973230362
+ "p50": 128.89599800109863,
+ "p90": 378.33601236343384,
+ "p95": 401.91999077796936,
+ "p99": 419.0079867839813
},
"roundtrip": {
- "p50": 190.62399864196777,
- "p90": 196.60800695419312,
- "p95": 197.66399264335632,
- "p99": 203.99999618530273
+ "p50": 227.90400683879852,
+ "p90": 245.53599953651428,
+ "p95": 489.24800753593445,
+ "p99": 524.1919755935669
},
"isolatedSum": {
- "p50": 207.48800039291382,
- "p90": 214.33600038290024,
- "p95": 218.6879962682724,
- "p99": 299.45600032806396
+ "p50": 257.6320022344589,
+ "p90": 753.3439993858337,
+ "p95": 807.455986738205,
+ "p99": 845.2479839324951
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 77514752,
+ "combineLogicalBytes": 77514752,
+ "fanoutMean": 5.2802734375,
+ "recvTokensMax": 707,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -2275,16 +2356,16 @@
]
},
{
- "id": "cx-bbb0479e",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "b300_c9569580",
- "comparisonKey": "c777627e39152404",
+ "id": "cx-1789a31a",
+ "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||af0b2d2a9119979",
+ "colorKey": "gb200_62dbe147",
+ "comparisonKey": "3fdd98e0ac017897",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T10:26:04.332610+00:00",
+ "generatedAt": "2026-06-29T13:55:53.501138+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_04",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
"mode": "normal",
@@ -2292,30 +2373,31 @@
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16",
+ "label": "GB200 EP8 · deepep · bf16 · balanced",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
"experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
+ "routing": "balanced",
+ "routingLabel": "balanced",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -2323,133 +2405,59 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "af0b2d2a9119979",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "2.0.0+af9a040",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28286434915",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286434915",
- "createdAt": "2026-06-27T10:26:04.332610+00:00",
- "sha": "91c7acf59a5e524f37742922ec67721d86a03f6b"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 1,
"globalTokens": 8,
"dispatch": {
- "p50": 56.86400085687637,
- "p90": 59.51999872922897,
- "p95": 61.824001371860504,
- "p99": 71.32799923419952
- },
- "combine": {
- "p50": 67.64800101518631,
- "p90": 69.72800195217133,
- "p95": 71.07199728488922,
- "p99": 90.55999666452408
- },
- "roundtrip": {
- "p50": 109.21599715948105,
- "p90": 114.656001329422,
- "p95": 115.87200313806534,
- "p99": 121.08799815177917
- },
- "isolatedSum": {
- "p50": 124.51200187206268,
- "p90": 129.2480006814003,
- "p95": 132.89599865674973,
- "p99": 161.8879958987236
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 57.95200169086456,
- "p90": 60.83200126886368,
- "p95": 64.70400094985962,
- "p99": 79.6160027384758
- },
- "combine": {
- "p50": 68.00000369548798,
- "p90": 69.40799951553345,
- "p95": 70.30399888753891,
- "p99": 80.70400357246399
- },
- "roundtrip": {
- "p50": 108.89600217342377,
- "p90": 112.47999966144562,
- "p95": 117.47200042009354,
- "p99": 286.20800375938416
- },
- "isolatedSum": {
- "p50": 125.95200538635254,
- "p90": 130.24000078439713,
- "p95": 135.00799983739853,
- "p99": 160.3200063109398
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 59.90400165319443,
- "p90": 63.87200206518173,
- "p95": 66.880002617836,
- "p99": 74.72000271081924
+ "p50": 90.81599861383438,
+ "p90": 104.22399640083313,
+ "p95": 107.77600109577179,
+ "p99": 122.14399874210358
},
"combine": {
- "p50": 69.2799985408783,
- "p90": 78.46400141716003,
- "p95": 78.87999713420868,
- "p99": 91.26400202512741
+ "p50": 82.68799632787704,
+ "p90": 90.94399958848953,
+ "p95": 93.79199892282486,
+ "p99": 96.8639999628067
},
"roundtrip": {
- "p50": 123.58400225639343,
- "p90": 127.77599692344666,
- "p95": 128.9920061826706,
- "p99": 140.70400595664978
+ "p50": 150.07999539375305,
+ "p90": 160.38399934768677,
+ "p95": 163.83999586105347,
+ "p99": 174.20800030231476
},
"isolatedSum": {
- "p50": 129.18400019407272,
- "p90": 142.33600348234177,
- "p95": 145.75999975204468,
- "p99": 165.98400473594666
+ "p50": 173.50399494171143,
+ "p90": 195.16799598932266,
+ "p95": 201.56800001859665,
+ "p99": 219.00799870491028
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 917504,
+ "combineLogicalBytes": 917504,
+ "fanoutMean": 8,
+ "recvTokensMax": 8,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -2458,72 +2466,35 @@
"tokensPerRank": 8,
"globalTokens": 64,
"dispatch": {
- "p50": 59.7120001912117,
- "p90": 61.91999837756157,
- "p95": 64.2239972949028,
- "p99": 73.53600114583969
- },
- "combine": {
- "p50": 70.68800181150436,
- "p90": 79.16799932718277,
- "p95": 79.77599650621414,
- "p99": 83.39200168848038
- },
- "roundtrip": {
- "p50": 121.21599912643433,
- "p90": 126.49600207805634,
- "p95": 127.20000743865967,
- "p99": 133.91999900341034
- },
- "isolatedSum": {
- "p50": 130.40000200271606,
- "p90": 141.08799770474434,
- "p95": 143.99999380111694,
- "p99": 156.92800283432007
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 66.94400310516357,
- "p90": 73.5040009021759,
- "p95": 75.26399940252304,
- "p99": 77.69600301980972
+ "p50": 92.70399808883667,
+ "p90": 106.33599758148193,
+ "p95": 111.23199760913849,
+ "p99": 132.4480026960373
},
"combine": {
- "p50": 78.62400263547897,
- "p90": 79.71200346946716,
- "p95": 79.93599772453308,
- "p99": 82.94399827718735
+ "p50": 85.28000116348267,
+ "p90": 94.04800087213516,
+ "p95": 95.93600034713745,
+ "p99": 104.99200224876404
},
"roundtrip": {
- "p50": 121.11999839544296,
- "p90": 123.32800030708313,
- "p95": 124.41600114107132,
- "p99": 129.7920048236847
+ "p50": 157.31200575828552,
+ "p90": 167.35999286174774,
+ "p95": 170.1119989156723,
+ "p99": 175.48799514770508
},
"isolatedSum": {
- "p50": 145.56800574064255,
- "p90": 153.21600437164307,
- "p95": 155.19999712705612,
- "p99": 160.64000129699707
+ "p50": 177.98399925231934,
+ "p90": 200.3839984536171,
+ "p95": 207.16799795627594,
+ "p99": 237.44000494480133
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 7340032,
+ "combineLogicalBytes": 7340032,
+ "fanoutMean": 8,
+ "recvTokensMax": 64,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -2532,72 +2503,35 @@
"tokensPerRank": 32,
"globalTokens": 256,
"dispatch": {
- "p50": 69.76000219583511,
- "p90": 71.80800288915634,
- "p95": 74.46400076150894,
- "p99": 86.84799820184708
- },
- "combine": {
- "p50": 80.22399991750717,
- "p90": 83.10399949550629,
- "p95": 90.30400216579437,
- "p99": 92.6079973578453
- },
- "roundtrip": {
- "p50": 132.38400220870972,
- "p90": 137.05599308013916,
- "p95": 138.72000575065613,
- "p99": 158.9439958333969
- },
- "isolatedSum": {
- "p50": 149.98400211334229,
- "p90": 154.91200238466263,
- "p95": 164.7680029273033,
- "p99": 179.45599555969238
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 82.56000280380249,
- "p90": 89.75999802350998,
- "p95": 91.0400003194809,
- "p99": 103.20000350475311
+ "p50": 100.16000270843506,
+ "p90": 110.27199774980545,
+ "p95": 113.76000195741653,
+ "p99": 121.5360015630722
},
"combine": {
- "p50": 92.6399976015091,
- "p90": 94.97600048780441,
- "p95": 102.55999863147736,
- "p99": 106.36799782514572
+ "p50": 94.01600062847137,
+ "p90": 98.84800016880035,
+ "p95": 103.96800190210342,
+ "p99": 107.61599987745285
},
"roundtrip": {
- "p50": 160.16000509262085,
- "p90": 165.0560051202774,
- "p95": 166.75199568271637,
- "p99": 179.77599799633026
+ "p50": 169.66399550437927,
+ "p90": 178.6240041255951,
+ "p95": 181.43999576568604,
+ "p99": 188.51199746131897
},
"isolatedSum": {
- "p50": 175.20000040531158,
- "p90": 184.7359985113144,
- "p95": 193.59999895095825,
- "p99": 209.56800132989883
+ "p50": 194.17600333690643,
+ "p90": 209.1199979186058,
+ "p95": 217.72800385951996,
+ "p99": 229.15200144052505
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 29360128,
+ "combineLogicalBytes": 29360128,
+ "fanoutMean": 8,
+ "recvTokensMax": 256,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -2606,35 +2540,35 @@
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 94.46399658918381,
- "p90": 96.79999947547913,
- "p95": 98.88000041246414,
- "p99": 114.656001329422
+ "p50": 137.63199746608734,
+ "p90": 147.61599898338318,
+ "p95": 149.98400211334229,
+ "p99": 158.39999914169312
},
"combine": {
- "p50": 116.03199690580368,
- "p90": 117.44000017642975,
- "p95": 117.91999638080597,
- "p99": 127.83999741077423
+ "p50": 145.56799829006195,
+ "p90": 153.9520025253296,
+ "p95": 156.38400614261627,
+ "p99": 162.7199947834015
},
"roundtrip": {
- "p50": 195.6160068511963,
- "p90": 200.41599869728088,
- "p95": 201.664000749588,
- "p99": 227.35999524593353
+ "p50": 260.70401072502136,
+ "p90": 270.04799246788025,
+ "p95": 273.79199862480164,
+ "p99": 286.3680124282837
},
"isolatedSum": {
- "p50": 210.4959934949875,
- "p90": 214.23999965190887,
- "p95": 216.7999967932701,
- "p99": 242.49599874019623
+ "p50": 283.1999957561493,
+ "p90": 301.56800150871277,
+ "p95": 306.36800825595856,
+ "p99": 321.1199939250946
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 117440512,
+ "combineLogicalBytes": 117440512,
+ "fanoutMean": 8,
+ "recvTokensMax": 1024,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -2642,16 +2576,16 @@
]
},
{
- "id": "cx-2fa7319c",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||c774c8e4abb34da",
- "colorKey": "b300_c9569580",
- "comparisonKey": "89fa2de88509570c",
+ "id": "cx-03edcd25",
+ "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||f0bc700e9998f70",
+ "colorKey": "gb200_3028258e",
+ "comparisonKey": "021bfb0baa9d2669",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T00:54:19.552522+00:00",
+ "generatedAt": "2026-06-29T13:56:38.892289+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_01",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
"mode": "normal",
@@ -2659,30 +2593,31 @@
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16",
+ "label": "GB200 EP8 · deepep · bf16 · balanced-rank-local",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
"experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
+ "routing": "balanced-rank-local",
+ "routingLabel": "balanced-rank-local",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -2690,207 +2625,170 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "c774c8e4abb34da",
- "workloadId": "set:5:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "f0bc700e9998f70",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28273513209",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28273513209",
- "createdAt": "2026-06-27T00:54:19.552522+00:00",
- "sha": "2c15d9415503e9ccb84cd49cf446a122796efc1e"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 1,
"globalTokens": 8,
"dispatch": {
- "p50": 56.41600117087364,
- "p90": 58.46399813890457,
- "p95": 60.95999851822853,
- "p99": 71.55200093984604
- },
- "combine": {
- "p50": 66.27199798822403,
- "p90": 67.55200028419495,
- "p95": 68.28799843788147,
- "p99": 77.27999985218048
- },
- "roundtrip": {
- "p50": 105.85600137710571,
- "p90": 112.28799819946289,
- "p95": 113.3119985461235,
- "p99": 124.09599870443344
- },
- "isolatedSum": {
- "p50": 122.68799915909767,
- "p90": 126.01599842309952,
- "p95": 129.24799695611,
- "p99": 148.83200079202652
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 56.60799890756607,
- "p90": 58.04799869656563,
- "p95": 59.39200147986412,
- "p99": 63.64800035953522
+ "p50": 91.07200056314468,
+ "p90": 104.2879968881607,
+ "p95": 108.83200168609619,
+ "p99": 114.1119971871376
},
"combine": {
- "p50": 67.03999638557434,
- "p90": 68.7360018491745,
- "p95": 69.15199756622314,
- "p99": 77.2159993648529
+ "p50": 68.9919963479042,
+ "p90": 73.53600114583969,
+ "p95": 78.36800068616867,
+ "p99": 82.20800012350082
},
"roundtrip": {
- "p50": 107.04000294208527,
- "p90": 109.76000130176544,
- "p95": 111.35999858379364,
- "p99": 119.19999867677689
+ "p50": 140.06400108337402,
+ "p90": 150.81599354743958,
+ "p95": 154.30399775505066,
+ "p99": 162.1440052986145
},
"isolatedSum": {
- "p50": 123.64799529314041,
- "p90": 126.78400054574013,
- "p95": 128.54399904608727,
- "p99": 140.86399972438812
+ "p50": 160.0639969110489,
+ "p90": 177.8239980340004,
+ "p95": 187.20000237226486,
+ "p99": 196.31999731063843
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 114688,
+ "combineLogicalBytes": 114688,
+ "fanoutMean": 1,
+ "recvTokensMax": 4,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4,
- "globalTokens": 32,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 58.81600081920624,
- "p90": 64.44799900054932,
- "p95": 66.01600348949432,
- "p99": 71.61600142717361
+ "p50": 94.30400282144547,
+ "p90": 107.51999914646149,
+ "p95": 112.09599673748016,
+ "p99": 126.88000500202179
},
"combine": {
- "p50": 67.26399809122086,
- "p90": 69.63200122117996,
- "p95": 77.15199887752533,
- "p99": 78.91199737787247
+ "p50": 70.8480030298233,
+ "p90": 79.52000200748444,
+ "p95": 81.37600123882294,
+ "p99": 85.60000360012054
},
"roundtrip": {
- "p50": 122.20799922943115,
- "p90": 125.18399953842163,
- "p95": 125.91999769210815,
- "p99": 130.3360015153885
+ "p50": 144.896000623703,
+ "p90": 157.4079990386963,
+ "p95": 161.72799468040466,
+ "p99": 180.28800189495087
},
"isolatedSum": {
- "p50": 126.0799989104271,
- "p90": 134.08000022172928,
- "p95": 143.16800236701965,
- "p99": 150.52799880504608
+ "p50": 165.15200585126877,
+ "p90": 187.04000115394592,
+ "p95": 193.4719979763031,
+ "p99": 212.48000860214233
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 917504,
+ "combineLogicalBytes": 917504,
+ "fanoutMean": 1,
+ "recvTokensMax": 8,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 59.42400172352791,
- "p90": 64.25599753856659,
- "p95": 67.87200272083282,
- "p99": 74.62400197982788
+ "p50": 98.75199943780899,
+ "p90": 114.27199840545654,
+ "p95": 126.11199915409088,
+ "p99": 222.08000719547272
},
"combine": {
- "p50": 68.9919963479042,
- "p90": 78.015998005867,
- "p95": 78.62400263547897,
- "p99": 81.88799768686295
+ "p50": 80.79999685287476,
+ "p90": 85.1840004324913,
+ "p95": 91.42400324344635,
+ "p99": 94.87999975681305
},
"roundtrip": {
- "p50": 119.39200013875961,
- "p90": 125.05599856376648,
- "p95": 126.17599964141846,
- "p99": 130.36799430847168
+ "p50": 150.62400698661804,
+ "p90": 162.6559942960739,
+ "p95": 165.75999557971954,
+ "p99": 174.27200078964233
},
"isolatedSum": {
- "p50": 128.4159980714321,
- "p90": 142.2719955444336,
- "p95": 146.4960053563118,
- "p99": 156.51199966669083
+ "p50": 179.55199629068375,
+ "p90": 199.45599883794785,
+ "p95": 217.53600239753723,
+ "p99": 316.96000695228577
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 3670016,
+ "combineLogicalBytes": 3670016,
+ "fanoutMean": 1,
+ "recvTokensMax": 32,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 66.68800115585327,
- "p90": 73.7600028514862,
- "p95": 75.13599842786789,
- "p99": 80.35200089216232
+ "p50": 101.43999755382538,
+ "p90": 111.29599809646606,
+ "p95": 116.06399714946747,
+ "p99": 124.70400333404541
},
"combine": {
- "p50": 69.88800317049026,
- "p90": 78.5600021481514,
- "p95": 78.75200361013412,
- "p99": 82.56000280380249
+ "p50": 82.0159986615181,
+ "p90": 91.61599725484848,
+ "p95": 93.53599697351456,
+ "p99": 96.54399752616882
},
"roundtrip": {
- "p50": 119.26399916410446,
- "p90": 121.47200107574463,
- "p95": 123.52000176906586,
- "p99": 127.68000364303589
+ "p50": 157.50400722026825,
+ "p90": 166.46400094032288,
+ "p95": 169.50400173664093,
+ "p99": 179.3919950723648
},
"isolatedSum": {
- "p50": 136.57600432634354,
- "p90": 152.3200049996376,
- "p95": 153.888002038002,
- "p99": 162.9120036959648
+ "p50": 183.45599621534348,
+ "p90": 202.91199535131454,
+ "p95": 209.59999412298203,
+ "p99": 221.24800086021423
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 0,
+ "dispatchLogicalBytes": 14680064,
+ "combineLogicalBytes": 14680064,
+ "fanoutMean": 1,
+ "recvTokensMax": 128,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -2898,16 +2796,16 @@
]
},
{
- "id": "cx-dc6ca42c",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|small-amplitude|none|none|0|tuned||8c8497a77d9085d",
- "colorKey": "b300_c9569580",
- "comparisonKey": "8a9fa1be98f83eb3",
+ "id": "cx-50034489",
+ "identity": "gb200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||0456df9778e5c0f",
+ "colorKey": "gb200_71fc8a17",
+ "comparisonKey": "ba506a9c9dcd4b28",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T00:06:17.025326+00:00",
+ "generatedAt": "2026-06-29T14:00:31.280638+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_14",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
"mode": "normal",
@@ -2915,30 +2813,31 @@
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16",
- "model": "DeepSeek-V3/V4",
+ "label": "GB200 EP8 · deepep · bf16 · balanced+eplb",
+ "model": "DeepSeek-V3 (EPLB physical)",
"shape": {
"hidden": 7168,
"topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
+ "experts": 288,
+ "routing": "balanced",
+ "routingLabel": "balanced+eplb",
"routingStep": 0,
"unevenTokens": "none",
- "eplbEnabled": false,
+ "eplbEnabled": true,
"dispatchDtype": "bf16",
- "activationProfile": "small-amplitude",
+ "kernelGeneration": "v1",
+ "activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -2946,352 +2845,281 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "8c8497a77d9085d",
- "workloadId": "set:4:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "traceSignature": "0456df9778e5c0f",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": 1,
+ "eplbImbalanceAfter": 1,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28272146490",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272146490",
- "createdAt": "2026-06-27T00:06:17.025326+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 1,
"globalTokens": 8,
"dispatch": {
- "p50": 55.84000051021576,
- "p90": 57.95200169086456,
- "p95": 60.54399907588959,
- "p99": 68.09599697589874
+ "p50": 92.38400310277939,
+ "p90": 104.16000336408615,
+ "p95": 107.45599865913391,
+ "p99": 114.27199840545654
},
"combine": {
- "p50": 66.20799750089645,
- "p90": 66.94400310516357,
- "p95": 67.52000004053116,
- "p99": 90.87999910116196
+ "p50": 71.9359964132309,
+ "p90": 80.57600259780884,
+ "p95": 82.97599852085114,
+ "p99": 87.23200112581253
},
"roundtrip": {
- "p50": 106.04800283908844,
- "p90": 111.07199639081955,
- "p95": 112.67200112342834,
- "p99": 125.15200674533844
+ "p50": 145.6640064716339,
+ "p90": 166.59200191497803,
+ "p95": 180.7360053062439,
+ "p99": 195.96800208091736
},
"isolatedSum": {
- "p50": 122.04799801111221,
- "p90": 124.89600479602814,
- "p95": 128.06399911642075,
- "p99": 158.9759960770607
+ "p50": 164.31999951601028,
+ "p90": 184.736005961895,
+ "p95": 190.43199717998505,
+ "p99": 201.50399953126907
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 229376,
+ "combineLogicalBytes": 229376,
+ "fanoutMean": 2,
+ "recvTokensMax": 3,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 2,
+ "globalTokens": 16,
"dispatch": {
- "p50": 59.07199904322624,
- "p90": 62.3680017888546,
- "p95": 65.08799642324448,
- "p99": 71.00799679756165
+ "p50": 93.47199648618698,
+ "p90": 105.59999942779541,
+ "p95": 111.32799834012985,
+ "p99": 145.50399780273438
},
"combine": {
- "p50": 69.18399780988693,
- "p90": 78.14399898052216,
- "p95": 78.59200239181519,
- "p99": 88.22400122880936
+ "p50": 72.64000177383423,
+ "p90": 80.6720033288002,
+ "p95": 83.10399949550629,
+ "p99": 88.03199976682663
},
"roundtrip": {
- "p50": 119.07199770212173,
- "p90": 124.32000041007996,
- "p95": 125.37600100040436,
- "p99": 140.06400108337402
+ "p50": 144.6080058813095,
+ "p90": 154.36799824237823,
+ "p95": 157.0879966020584,
+ "p99": 163.26400637626648
},
"isolatedSum": {
- "p50": 128.25599685311317,
- "p90": 140.51200076937675,
- "p95": 143.67999881505966,
- "p99": 159.231998026371
+ "p50": 166.1119982600212,
+ "p90": 186.2720027565956,
+ "p95": 194.43199783563614,
+ "p99": 233.535997569561
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 458752,
+ "combineLogicalBytes": 458752,
+ "fanoutMean": 2,
+ "recvTokensMax": 6,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 4,
+ "globalTokens": 32,
"dispatch": {
- "p50": 68.70400160551071,
- "p90": 73.66400212049484,
- "p95": 75.13599842786789,
- "p99": 93.56799721717834
+ "p50": 98.11200201511383,
+ "p90": 239.32799696922302,
+ "p95": 276.38399600982666,
+ "p99": 444.5439875125885
},
"combine": {
- "p50": 78.62400263547897,
- "p90": 79.6160027384758,
- "p95": 81.44000172615051,
- "p99": 91.48799628019333
+ "p50": 78.46400141716003,
+ "p90": 170.3999936580658,
+ "p95": 194.5600062608719,
+ "p99": 231.36000335216522
},
"roundtrip": {
- "p50": 130.65600395202637,
- "p90": 135.71199774742126,
- "p95": 136.76799833774567,
- "p99": 144.1279947757721
+ "p50": 148.19200336933136,
+ "p90": 159.2639982700348,
+ "p95": 165.8560037612915,
+ "p99": 268.95999908447266
},
"isolatedSum": {
- "p50": 147.32800424098969,
- "p90": 153.28000485897064,
- "p95": 156.5760001540184,
- "p99": 185.05599349737167
+ "p50": 176.57600343227386,
+ "p90": 409.7279906272888,
+ "p95": 470.94400227069855,
+ "p99": 675.9039908647537
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 4,
- "correct": true,
+ "dispatchLogicalBytes": 917504,
+ "combineLogicalBytes": 917504,
+ "fanoutMean": 2,
+ "recvTokensMax": 12,
+ "stragglerRank": 2,
+ "correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 93.08800101280212,
- "p90": 98.78399968147278,
- "p95": 100.63999891281128,
- "p99": 110.17599701881409
+ "p50": 97.02400118112564,
+ "p90": 124.32000041007996,
+ "p95": 139.3280029296875,
+ "p99": 165.75999557971954
},
"combine": {
- "p50": 115.39199948310852,
- "p90": 116.28799885511398,
- "p95": 117.21599847078323,
- "p99": 126.39999389648438
+ "p50": 76.12799853086472,
+ "p90": 84.70399677753448,
+ "p95": 93.59999746084213,
+ "p99": 254.7520101070404
},
"roundtrip": {
- "p50": 192.25600361824036,
- "p90": 198.2080042362213,
- "p95": 198.7839937210083,
- "p99": 203.61599326133728
+ "p50": 147.64800667762756,
+ "p90": 159.2639982700348,
+ "p95": 164.09599781036377,
+ "p99": 179.58399653434753
},
"isolatedSum": {
- "p50": 208.48000049591064,
- "p90": 215.07199853658676,
- "p95": 217.8559973835945,
- "p99": 236.57599091529846
+ "p50": 173.15199971199036,
+ "p90": 209.02399718761444,
+ "p95": 232.92800039052963,
+ "p99": 420.51200568675995
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 1835008,
+ "combineLogicalBytes": 1835008,
+ "fanoutMean": 2,
+ "recvTokensMax": 24,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
- }
- ]
- },
- {
- "id": "cx-a995e296",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|wide-dynamic-range|none|none|0|tuned||8c8497a77d9085d",
- "colorKey": "b300_c9569580",
- "comparisonKey": "fe9431c5beaaf675",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:06:39.072562+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_03",
- "sku": "b300",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "B300 EP8 · deepep · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "wide-dynamic-range",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1351,
- "configuredUnits": 20,
- "deviceUnits": 148,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "8c8497a77d9085d",
- "workloadId": "set:4:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272150514",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272150514",
- "createdAt": "2026-06-27T00:06:39.072562+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
+ },
{
- "tokensPerRank": 1,
- "globalTokens": 8,
+ "tokensPerRank": 16,
+ "globalTokens": 128,
"dispatch": {
- "p50": 1758.687973022461,
- "p90": 2565.7920837402344,
- "p95": 2910.815954208374,
- "p99": 3400.576114654541
+ "p50": 95.23200243711472,
+ "p90": 105.79200088977814,
+ "p95": 109.3439981341362,
+ "p99": 122.52800166606903
},
"combine": {
- "p50": 1759.8719596862793,
- "p90": 1907.871961593628,
- "p95": 2670.1760292053223,
- "p99": 2940.095901489258
+ "p50": 80.09599894285202,
+ "p90": 84.83199775218964,
+ "p95": 87.39200234413147,
+ "p99": 97.75999933481216
},
"roundtrip": {
- "p50": 1802.39999294281,
- "p90": 1987.0719909667969,
- "p95": 2666.1760807037354,
- "p99": 2924.000024795532
+ "p50": 148.70400726795197,
+ "p90": 159.13599729537964,
+ "p95": 161.79199516773224,
+ "p99": 171.55200242996216
},
"isolatedSum": {
- "p50": 3518.5599327087402,
- "p90": 4473.664045333862,
- "p95": 5580.991983413696,
- "p99": 6340.672016143799
+ "p50": 175.32800137996674,
+ "p90": 190.62399864196777,
+ "p95": 196.73600047826767,
+ "p99": 220.2880010008812
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 3670016,
+ "combineLogicalBytes": 3670016,
+ "fanoutMean": 2,
+ "recvTokensMax": 48,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 1754.8799514770508,
- "p90": 2488.703966140747,
- "p95": 2823.359966278076,
- "p99": 3391.4880752563477
+ "p50": 98.81599992513657,
+ "p90": 164.15999829769135,
+ "p95": 243.00800263881683,
+ "p99": 392.2879993915558
},
"combine": {
- "p50": 1760.4479789733887,
- "p90": 1861.184000968933,
- "p95": 2647.264003753662,
- "p99": 2955.8401107788086
+ "p50": 82.43200182914734,
+ "p90": 94.01600062847137,
+ "p95": 98.62399846315384,
+ "p99": 122.68800288438797
},
"roundtrip": {
- "p50": 1819.2960023880005,
- "p90": 1958.5280418395996,
- "p95": 2686.271905899048,
- "p99": 2968.319892883301
+ "p50": 151.74399316310883,
+ "p90": 165.40800034999847,
+ "p95": 177.34399437904358,
+ "p99": 246.75199389457703
},
"isolatedSum": {
- "p50": 3515.3279304504395,
- "p90": 4349.88796710968,
- "p95": 5470.623970031738,
- "p99": 6347.328186035156
+ "p50": 181.2480017542839,
+ "p90": 258.1759989261627,
+ "p95": 341.6320011019707,
+ "p99": 514.9760022759438
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 7340032,
+ "combineLogicalBytes": 7340032,
+ "fanoutMean": 2,
+ "recvTokensMax": 96,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 64,
+ "globalTokens": 512,
"dispatch": {
- "p50": 1767.3920392990112,
- "p90": 2204.767942428589,
- "p95": 2829.9520015716553,
- "p99": 3398.303985595703
+ "p50": 99.20000284910202,
+ "p90": 109.37599837779999,
+ "p95": 113.95200341939926,
+ "p99": 121.08799815177917
},
"combine": {
- "p50": 1764.0960216522217,
- "p90": 1887.1040344238281,
- "p95": 2647.615909576416,
- "p99": 3015.5839920043945
+ "p50": 83.10399949550629,
+ "p90": 91.13600105047226,
+ "p95": 94.30400282144547,
+ "p99": 98.04800152778625
},
"roundtrip": {
- "p50": 1835.6800079345703,
- "p90": 1997.1840381622314,
- "p95": 2681.3440322875977,
- "p99": 2967.072010040283
+ "p50": 155.42399883270264,
+ "p90": 177.2480010986328,
+ "p95": 278.0799865722656,
+ "p99": 647.1999883651733
},
"isolatedSum": {
- "p50": 3531.488060951233,
- "p90": 4091.871976852417,
- "p95": 5477.567911148071,
- "p99": 6413.887977600098
+ "p50": 182.3040023446083,
+ "p90": 200.51199942827225,
+ "p95": 208.25600624084473,
+ "p99": 219.13599967956543
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 14680064,
+ "combineLogicalBytes": 14680064,
+ "fanoutMean": 2,
+ "recvTokensMax": 192,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -3300,35 +3128,35 @@
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 1790.7520532608032,
- "p90": 2270.848035812378,
- "p95": 2845.247983932495,
- "p99": 3459.712028503418
+ "p50": 106.72000050544739,
+ "p90": 115.7120019197464,
+ "p95": 118.9119964838028,
+ "p99": 124.32000041007996
},
"combine": {
- "p50": 1809.7599744796753,
- "p90": 1956.9599628448486,
- "p95": 2685.7919692993164,
- "p99": 3029.952049255371
+ "p50": 97.47199714183807,
+ "p90": 106.08000308275223,
+ "p95": 108.31999778747559,
+ "p99": 110.59200018644333
},
"roundtrip": {
- "p50": 1890.3039693832397,
- "p90": 2169.4719791412354,
- "p95": 2888.256072998047,
- "p99": 3985.24808883667
+ "p50": 178.20799350738525,
+ "p90": 185.69600582122803,
+ "p95": 188.4479969739914,
+ "p99": 196.44799828529358
},
"isolatedSum": {
- "p50": 3600.5120277404785,
- "p90": 4227.807998657227,
- "p95": 5531.0399532318115,
- "p99": 6489.664077758789
+ "p50": 204.19199764728546,
+ "p90": 221.79200500249863,
+ "p95": 227.23199427127838,
+ "p99": 234.91200059652328
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 29360128,
+ "combineLogicalBytes": 29360128,
+ "fanoutMean": 2,
+ "recvTokensMax": 384,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -3336,16 +3164,16 @@
]
},
{
- "id": "cx-b81422f4",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|zeros|none|none|0|tuned||8c8497a77d9085d",
- "colorKey": "b300_c9569580",
- "comparisonKey": "d97d7a8231265a6c",
+ "id": "cx-39d61832",
+ "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||d0eaac3a0f0ae8c",
+ "colorKey": "gb200_d945a181",
+ "comparisonKey": "b74e548739c090a5",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T00:06:13.336317+00:00",
+ "generatedAt": "2026-06-29T13:58:26.409733+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_13",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
"mode": "normal",
@@ -3353,30 +3181,31 @@
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16",
+ "label": "GB200 EP8 · deepep · bf16 · hotspot-single",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
"experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
+ "routing": "hotspot-single",
+ "routingLabel": "hotspot-single",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
- "activationProfile": "zeros",
+ "kernelGeneration": "v1",
+ "activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -3384,59 +3213,59 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "8c8497a77d9085d",
- "workloadId": "set:4:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "d0eaac3a0f0ae8c",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28272142980",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272142980",
- "createdAt": "2026-06-27T00:06:13.336317+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 1,
"globalTokens": 8,
"dispatch": {
- "p50": 56.63999915122986,
- "p90": 59.26400050520897,
- "p95": 62.04799935221672,
- "p99": 73.85600358247757
+ "p50": 91.0400003194809,
+ "p90": 106.46399855613708,
+ "p95": 113.34399878978729,
+ "p99": 134.20799374580383
},
"combine": {
- "p50": 66.43199920654297,
- "p90": 67.4239993095398,
- "p95": 68.25599819421768,
- "p99": 78.04799824953079
+ "p50": 81.79199695587158,
+ "p90": 91.96799993515015,
+ "p95": 96.16000205278397,
+ "p99": 105.76000064611435
},
"roundtrip": {
- "p50": 106.78400099277496,
- "p90": 111.39199882745743,
- "p95": 113.34399878978729,
- "p99": 117.0239970088005
+ "p50": 152.38399803638458,
+ "p90": 199.64799284934998,
+ "p95": 213.59999477863312,
+ "p99": 233.40800404548645
},
"isolatedSum": {
- "p50": 123.07199835777283,
- "p90": 126.68799981474876,
- "p95": 130.3039975464344,
- "p99": 151.90400183200836
+ "p50": 172.83199727535248,
+ "p90": 198.43199849128723,
+ "p95": 209.50400084257126,
+ "p99": 239.96799439191818
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 602112,
+ "combineLogicalBytes": 602112,
+ "fanoutMean": 5.25,
+ "recvTokensMax": 8,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -3445,35 +3274,35 @@
"tokensPerRank": 8,
"globalTokens": 64,
"dispatch": {
- "p50": 59.55199897289276,
- "p90": 61.824001371860504,
- "p95": 63.680000603199005,
- "p99": 71.07199728488922
+ "p50": 92.57599711418152,
+ "p90": 104.22399640083313,
+ "p95": 109.79200154542923,
+ "p99": 132.06399977207184
},
"combine": {
- "p50": 68.92800331115723,
- "p90": 77.7600035071373,
- "p95": 77.95199751853943,
- "p99": 78.65600287914276
+ "p50": 83.8719978928566,
+ "p90": 89.4400030374527,
+ "p95": 92.6399976015091,
+ "p99": 96.76799923181534
},
"roundtrip": {
- "p50": 120.03199756145477,
- "p90": 124.4800016283989,
- "p95": 125.95200538635254,
- "p99": 145.53600549697876
+ "p50": 154.2080044746399,
+ "p90": 164.5440012216568,
+ "p95": 169.18399930000305,
+ "p99": 179.71199750900269
},
"isolatedSum": {
- "p50": 128.48000228405,
- "p90": 139.5840048789978,
- "p95": 141.63199812173843,
- "p99": 149.72800016403198
+ "p50": 176.44799500703812,
+ "p90": 193.66399943828583,
+ "p95": 202.43199914693832,
+ "p99": 228.83199900388718
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 4859904,
+ "combineLogicalBytes": 4859904,
+ "fanoutMean": 5.296875,
+ "recvTokensMax": 64,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -3482,35 +3311,35 @@
"tokensPerRank": 32,
"globalTokens": 256,
"dispatch": {
- "p50": 68.83200258016586,
- "p90": 72.38399982452393,
- "p95": 75.16799867153168,
- "p99": 78.17599922418594
+ "p50": 101.27999633550644,
+ "p90": 116.86400324106216,
+ "p95": 129.2800009250641,
+ "p99": 143.77599954605103
},
"combine": {
- "p50": 78.65600287914276,
- "p90": 79.71200346946716,
- "p95": 80.57600259780884,
- "p99": 100.92800110578537
+ "p50": 93.05600076913834,
+ "p90": 101.56799852848053,
+ "p95": 107.07200318574905,
+ "p99": 119.4240003824234
},
"roundtrip": {
- "p50": 130.72000443935394,
- "p90": 134.2719942331314,
- "p95": 135.74400544166565,
- "p99": 155.7759940624237
+ "p50": 162.81600296497345,
+ "p90": 173.0560064315796,
+ "p95": 176.03200674057007,
+ "p99": 183.16799402236938
},
"isolatedSum": {
- "p50": 147.48800545930862,
- "p90": 152.0960032939911,
- "p95": 155.74400126934052,
- "p99": 179.1040003299713
+ "p50": 194.33599710464478,
+ "p90": 218.4320017695427,
+ "p95": 236.35200411081314,
+ "p99": 263.1999999284744
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 19525632,
+ "combineLogicalBytes": 19525632,
+ "fanoutMean": 5.3203125,
+ "recvTokensMax": 256,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -3519,35 +3348,35 @@
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 93.34400296211243,
- "p90": 95.93600034713745,
- "p95": 99.2640033364296,
- "p99": 107.61599987745285
+ "p50": 130.87999820709229,
+ "p90": 144.3839967250824,
+ "p95": 169.21600699424744,
+ "p99": 363.23198676109314
},
"combine": {
- "p50": 115.4559999704361,
- "p90": 116.44800007343292,
- "p95": 117.0559972524643,
- "p99": 126.43200159072876
+ "p50": 142.2719955444336,
+ "p90": 151.58399939537048,
+ "p95": 155.64799308776855,
+ "p99": 173.47200214862823
},
"roundtrip": {
- "p50": 192.9599940776825,
- "p90": 198.81600141525269,
- "p95": 199.8080015182495,
- "p99": 274.1439938545227
+ "p50": 248.06399643421173,
+ "p90": 264.6079957485199,
+ "p95": 275.4879891872406,
+ "p99": 288.12798857688904
},
"isolatedSum": {
- "p50": 208.80000293254852,
- "p90": 212.38400042057037,
- "p95": 216.3200005888939,
- "p99": 234.0480014681816
+ "p50": 273.1519937515259,
+ "p90": 295.9679961204529,
+ "p95": 324.864000082016,
+ "p99": 536.7039889097214
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 78102528,
+ "combineLogicalBytes": 78102528,
+ "fanoutMean": 5.3203125,
+ "recvTokensMax": 1024,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -3555,47 +3384,48 @@
]
},
{
- "id": "cx-53b3c366",
- "identity": "b300|deepep|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "b300_307ed708",
- "comparisonKey": "8f32ac097503699d",
+ "id": "cx-8c49b354",
+ "identity": "gb200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||5793a02d08aaa9c",
+ "colorKey": "gb200_d826ab8d",
+ "comparisonKey": "97827a35998e3c24",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:50:59.262697+00:00",
+ "generatedAt": "2026-06-29T13:51:49.411928+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_14",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_1",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "layout-and-dispatch-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16",
- "model": "DeepSeek-V3/V4",
+ "label": "GB200 EP8 · deepep · bf16 · hotspot-single+eplb",
+ "model": "DeepSeek-V3 (EPLB physical)",
"shape": {
"hidden": 7168,
"topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
+ "experts": 288,
+ "routing": "hotspot-single",
+ "routingLabel": "hotspot-single+eplb",
"routingStep": 0,
"unevenTokens": "none",
- "eplbEnabled": false,
+ "eplbEnabled": true,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -3603,59 +3433,59 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "traceSignature": "5793a02d08aaa9c",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": 1.830078125,
+ "eplbImbalanceAfter": 1.0007595486111112,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285677323",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285677323",
- "createdAt": "2026-06-27T09:50:59.262697+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 1,
"globalTokens": 8,
"dispatch": {
- "p50": 57.50399827957153,
- "p90": 59.67999994754791,
- "p95": 61.69600039720535,
- "p99": 81.7599967122078
+ "p50": 90.87999910116196,
+ "p90": 317.984014749527,
+ "p95": 359.6479892730713,
+ "p99": 389.3440067768097
},
"combine": {
- "p50": 67.00800359249115,
- "p90": 68.1919977068901,
- "p95": 69.5360004901886,
- "p99": 77.63200253248215
+ "p50": 82.04799890518188,
+ "p90": 339.80798721313477,
+ "p95": 354.71999645233154,
+ "p99": 369.4399893283844
},
"roundtrip": {
- "p50": 107.51999914646149,
- "p90": 112.92800307273865,
- "p95": 114.49600011110306,
- "p99": 130.68799674510956
+ "p50": 148.6400067806244,
+ "p90": 411.8080139160156,
+ "p95": 429.0879964828491,
+ "p99": 449.6000111103058
},
"isolatedSum": {
- "p50": 124.51200187206268,
- "p90": 127.87199765443802,
- "p95": 131.23200088739395,
- "p99": 159.39199924468994
+ "p50": 172.92799800634384,
+ "p90": 657.7920019626617,
+ "p95": 714.3679857254028,
+ "p99": 758.7839961051941
},
"roundtripMeasured": true,
"dispatchLogicalBytes": 630784,
"combineLogicalBytes": 630784,
"fanoutMean": 5.5,
"recvTokensMax": 7,
- "stragglerRank": 4,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -3664,35 +3494,35 @@
"tokensPerRank": 2,
"globalTokens": 16,
"dispatch": {
- "p50": 57.50399827957153,
- "p90": 59.58399921655655,
- "p95": 61.983998864889145,
- "p99": 74.20799881219864
+ "p50": 91.839998960495,
+ "p90": 330.4319977760315,
+ "p95": 363.072007894516,
+ "p99": 384.768009185791
},
"combine": {
- "p50": 67.32799857854843,
- "p90": 69.43999975919724,
- "p95": 76.9599974155426,
- "p99": 81.50400221347809
+ "p50": 82.11199939250946,
+ "p90": 326.1120021343231,
+ "p95": 352.83198952674866,
+ "p99": 365.1840090751648
},
"roundtrip": {
- "p50": 108.06400328874588,
- "p90": 110.88000237941742,
- "p95": 113.50400000810623,
- "p99": 120.51200121641159
+ "p50": 149.1519957780838,
+ "p90": 380.44801354408264,
+ "p95": 422.87999391555786,
+ "p99": 441.9519901275635
},
"isolatedSum": {
- "p50": 124.83199685811996,
- "p90": 129.02399897575378,
- "p95": 138.94399628043175,
- "p99": 155.71200102567673
+ "p50": 173.95199835300446,
+ "p90": 656.5439999103546,
+ "p95": 715.9039974212646,
+ "p99": 749.9520182609558
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 1175552,
+ "combineLogicalBytes": 1175552,
+ "fanoutMean": 5.125,
+ "recvTokensMax": 12,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -3701,35 +3531,35 @@
"tokensPerRank": 4,
"globalTokens": 32,
"dispatch": {
- "p50": 59.51999872922897,
- "p90": 61.76000088453293,
- "p95": 63.64800035953522,
- "p99": 68.12799721956253
+ "p50": 93.72799843549728,
+ "p90": 336.2559974193573,
+ "p95": 363.48798871040344,
+ "p99": 400.2879858016968
},
"combine": {
- "p50": 68.86400282382965,
- "p90": 77.37600058317184,
- "p95": 78.04799824953079,
- "p99": 80.54400235414505
+ "p50": 84.09599959850311,
+ "p90": 324.6079981327057,
+ "p95": 361.02399230003357,
+ "p99": 458.624005317688
},
"roundtrip": {
- "p50": 123.90399724245071,
- "p90": 126.75200402736664,
- "p95": 127.20000743865967,
- "p99": 130.94399869441986
+ "p50": 152.0639955997467,
+ "p90": 391.87198877334595,
+ "p95": 423.007994890213,
+ "p99": 448.0000138282776
},
"isolatedSum": {
- "p50": 128.38400155305862,
- "p90": 139.13600146770477,
- "p95": 141.695998609066,
- "p99": 148.67199957370758
+ "p50": 177.8239980340004,
+ "p90": 660.863995552063,
+ "p95": 724.511981010437,
+ "p99": 858.9119911193848
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 2451456,
+ "combineLogicalBytes": 2451456,
+ "fanoutMean": 5.34375,
+ "recvTokensMax": 23,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -3738,35 +3568,35 @@
"tokensPerRank": 8,
"globalTokens": 64,
"dispatch": {
- "p50": 60.447998344898224,
- "p90": 65.5680000782013,
- "p95": 67.71200150251389,
- "p99": 73.88799637556076
+ "p50": 94.33600306510925,
+ "p90": 351.3279855251312,
+ "p95": 371.4880049228668,
+ "p99": 384.70399379730225
},
"combine": {
- "p50": 69.023996591568,
- "p90": 77.63200253248215,
- "p95": 78.27199995517731,
- "p99": 79.68000322580338
+ "p50": 84.6719965338707,
+ "p90": 316.1599934101105,
+ "p95": 347.1679985523224,
+ "p99": 368.7039911746979
},
"roundtrip": {
- "p50": 120.7360029220581,
- "p90": 126.11199915409088,
- "p95": 127.48800218105316,
- "p99": 135.6160044670105
+ "p50": 154.4319987297058,
+ "p90": 396.12799882888794,
+ "p95": 427.0400106906891,
+ "p99": 455.9679925441742
},
"isolatedSum": {
- "p50": 129.47199493646622,
- "p90": 143.20000261068344,
- "p95": 145.9840014576912,
- "p99": 153.56799960136414
+ "p50": 179.00799959897995,
+ "p90": 667.4879789352417,
+ "p95": 718.6560034751892,
+ "p99": 753.4079849720001
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 4730880,
+ "combineLogicalBytes": 4730880,
+ "fanoutMean": 5.15625,
+ "recvTokensMax": 44,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -3775,35 +3605,35 @@
"tokensPerRank": 16,
"globalTokens": 128,
"dispatch": {
- "p50": 62.55999952554703,
- "p90": 69.08799707889557,
- "p95": 71.35999947786331,
- "p99": 78.23999971151352
+ "p50": 96.63999825716019,
+ "p90": 343.392014503479,
+ "p95": 373.4399974346161,
+ "p99": 391.4560079574585
},
"combine": {
- "p50": 77.66400277614594,
- "p90": 79.1039988398552,
- "p95": 79.45600152015686,
- "p99": 81.216000020504
+ "p50": 86.30400151014328,
+ "p90": 322.56001234054565,
+ "p95": 349.88799691200256,
+ "p99": 374.783992767334
},
"roundtrip": {
- "p50": 120.25599926710129,
- "p90": 122.65600264072418,
- "p95": 124.15999919176102,
- "p99": 136.63999736309052
+ "p50": 155.64799308776855,
+ "p90": 387.4239921569824,
+ "p95": 428.9279878139496,
+ "p99": 452.63999700546265
},
"isolatedSum": {
- "p50": 140.22400230169296,
- "p90": 148.19199591875076,
- "p95": 150.81600099802017,
- "p99": 159.45599973201752
+ "p50": 182.94399976730347,
+ "p90": 665.9520268440247,
+ "p95": 723.3279943466187,
+ "p99": 766.2400007247925
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 9691136,
+ "combineLogicalBytes": 9691136,
+ "fanoutMean": 5.28125,
+ "recvTokensMax": 88,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -3812,35 +3642,35 @@
"tokensPerRank": 32,
"globalTokens": 256,
"dispatch": {
- "p50": 69.92000341415405,
- "p90": 75.13599842786789,
- "p95": 75.80800354480743,
- "p99": 80.73599636554718
+ "p50": 100.44799745082855,
+ "p90": 344.67199444770813,
+ "p95": 374.2719888687134,
+ "p99": 391.2000060081482
},
"combine": {
- "p50": 78.91199737787247,
- "p90": 79.80799674987793,
- "p95": 80.35200089216232,
- "p99": 83.71199667453766
+ "p50": 93.56799721717834,
+ "p90": 322.6560056209564,
+ "p95": 338.1440043449402,
+ "p99": 371.71199917793274
},
"roundtrip": {
- "p50": 131.26400113105774,
- "p90": 136.06399297714233,
- "p95": 137.79200613498688,
- "p99": 158.78400206565857
+ "p50": 163.93600404262543,
+ "p90": 399.6799886226654,
+ "p95": 419.295996427536,
+ "p99": 453.5039961338043
},
"isolatedSum": {
- "p50": 148.83200079202652,
- "p90": 154.94399517774582,
- "p95": 156.16000443696976,
- "p99": 164.44799304008484
+ "p50": 194.0159946680069,
+ "p90": 667.3280000686646,
+ "p95": 712.4159932136536,
+ "p99": 762.9120051860809
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 19568640,
+ "combineLogicalBytes": 19568640,
+ "fanoutMean": 5.33203125,
+ "recvTokensMax": 179,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -3849,35 +3679,35 @@
"tokensPerRank": 64,
"globalTokens": 512,
"dispatch": {
- "p50": 88.16000074148178,
- "p90": 91.13600105047226,
- "p95": 92.3520028591156,
- "p99": 106.46399855613708
+ "p50": 110.84800213575363,
+ "p90": 352.06401348114014,
+ "p95": 369.1520094871521,
+ "p99": 409.2479944229126
},
"combine": {
- "p50": 92.47999638319016,
- "p90": 100.96000134944916,
- "p95": 102.04800218343735,
- "p99": 116.19199812412262
+ "p50": 107.04000294208527,
+ "p90": 330.1759958267212,
+ "p95": 352.06401348114014,
+ "p99": 389.3119990825653
},
"roundtrip": {
- "p50": 159.8079949617386,
- "p90": 163.42400014400482,
- "p95": 164.8319959640503,
- "p99": 172.03199863433838
+ "p50": 187.9359930753708,
+ "p90": 420.3520119190216,
+ "p95": 462.68799901008606,
+ "p99": 484.16000604629517
},
"isolatedSum": {
- "p50": 180.63999712467194,
- "p90": 192.09600239992142,
- "p95": 194.40000504255295,
- "p99": 222.6559966802597
+ "p50": 217.8880050778389,
+ "p90": 682.2400093078613,
+ "p95": 721.2160229682922,
+ "p99": 798.5599935054779
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 38750208,
+ "combineLogicalBytes": 38750208,
+ "fanoutMean": 5.279296875,
+ "recvTokensMax": 348,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -3886,35 +3716,35 @@
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 94.46399658918381,
- "p90": 100.03200173377991,
- "p95": 102.04800218343735,
- "p99": 111.68000102043152
+ "p50": 125.50400197505951,
+ "p90": 370.2400028705597,
+ "p95": 393.887996673584,
+ "p99": 429.2159974575043
},
"combine": {
- "p50": 115.48800021409988,
- "p90": 116.5120005607605,
- "p95": 117.18399822711945,
- "p99": 127.61600315570831
+ "p50": 128.76799702644348,
+ "p90": 391.4240002632141,
+ "p95": 403.872013092041,
+ "p99": 416.22400283813477
},
"roundtrip": {
- "p50": 195.23200392723083,
- "p90": 199.13600385189056,
- "p95": 200.1280039548874,
- "p99": 208.25600624084473
+ "p50": 227.39200294017792,
+ "p90": 472.51200675964355,
+ "p95": 503.04001569747925,
+ "p99": 527.1999835968018
},
"isolatedSum": {
- "p50": 209.9519968032837,
- "p90": 216.5440022945404,
- "p95": 219.2320004105568,
- "p99": 239.29600417613983
+ "p50": 254.271999001503,
+ "p90": 761.6640031337738,
+ "p95": 797.760009765625,
+ "p99": 845.440000295639
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 77342720,
+ "combineLogicalBytes": 77342720,
+ "fanoutMean": 5.2685546875,
+ "recvTokensMax": 687,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -3922,16 +3752,16 @@
]
},
{
- "id": "cx-bb4293a3",
- "identity": "b300|deepep|7168|8|384|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760",
- "colorKey": "b300_c9569580",
- "comparisonKey": "9212a9f938273ac4",
+ "id": "cx-e4160fbb",
+ "identity": "gb200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||a572344820478f0",
+ "colorKey": "gb200_8703b849",
+ "comparisonKey": "dcee6033928840f5",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T11:14:04.417572+00:00",
+ "generatedAt": "2026-06-29T13:59:39.819924+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_12",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
"mode": "normal",
@@ -3939,30 +3769,31 @@
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16",
- "model": "Kimi-K2",
+ "label": "GB200 EP8 · deepep · bf16 · uniform+eplb",
+ "model": "DeepSeek-V3 (EPLB physical)",
"shape": {
"hidden": 7168,
"topk": 8,
- "experts": 384,
+ "experts": 288,
"routing": "uniform",
- "routingLabel": "uniform",
+ "routingLabel": "uniform+eplb",
"routingStep": 0,
"unevenTokens": "none",
- "eplbEnabled": false,
+ "eplbEnabled": true,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -3970,59 +3801,59 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "d6c49ae98878760",
- "workloadId": "set:8:9a27d0df4b17fa09",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "traceSignature": "a572344820478f0",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": 1.078125,
+ "eplbImbalanceAfter": 1.00048828125,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28287503016",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287503016",
- "createdAt": "2026-06-27T11:14:04.417572+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 1,
"globalTokens": 8,
"dispatch": {
- "p50": 57.82400071620941,
- "p90": 59.90400165319443,
- "p95": 62.97600269317627,
- "p99": 70.592001080513
+ "p50": 89.59999680519104,
+ "p90": 101.75999999046326,
+ "p95": 105.85600137710571,
+ "p99": 112.60800063610077
},
"combine": {
- "p50": 66.52799993753433,
- "p90": 67.58400052785873,
- "p95": 68.9919963479042,
- "p99": 78.87999713420868
+ "p50": 79.13599908351898,
+ "p90": 83.55200290679932,
+ "p95": 85.7279971241951,
+ "p99": 92.44800359010696
},
"roundtrip": {
- "p50": 107.90400207042694,
- "p90": 114.20799791812897,
- "p95": 114.94400352239609,
- "p99": 125.21600723266602
+ "p50": 146.36799693107605,
+ "p90": 157.56799280643463,
+ "p95": 161.02400422096252,
+ "p99": 170.20800709724426
},
"isolatedSum": {
- "p50": 124.35200065374374,
- "p90": 127.48800218105316,
- "p95": 131.96799904108047,
- "p99": 149.47199821472168
+ "p50": 168.73599588871002,
+ "p90": 185.31200289726257,
+ "p95": 191.5839985013008,
+ "p99": 205.05600422620773
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 602112,
- "combineLogicalBytes": 602112,
- "fanoutMean": 5.25,
- "recvTokensMax": 8,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 516096,
+ "combineLogicalBytes": 516096,
+ "fanoutMean": 4.5,
+ "recvTokensMax": 6,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -4031,35 +3862,35 @@
"tokensPerRank": 2,
"globalTokens": 16,
"dispatch": {
- "p50": 58.079998940229416,
- "p90": 61.3120011985302,
- "p95": 64.03200328350067,
- "p99": 79.29600030183792
+ "p50": 90.7839983701706,
+ "p90": 103.29599678516388,
+ "p95": 108.06400328874588,
+ "p99": 122.30399996042252
},
"combine": {
- "p50": 67.10399687290192,
- "p90": 68.41599941253662,
- "p95": 69.98399645090103,
- "p99": 85.50400286912918
+ "p50": 80.28800040483475,
+ "p90": 84.09599959850311,
+ "p95": 85.85599809885025,
+ "p99": 94.27200257778168
},
"roundtrip": {
- "p50": 108.03200304508209,
- "p90": 110.944002866745,
- "p95": 113.15199732780457,
- "p99": 129.15199995040894
+ "p50": 148.5760062932968,
+ "p90": 159.19999778270721,
+ "p95": 164.12800550460815,
+ "p99": 172.57599532604218
},
"isolatedSum": {
- "p50": 125.18399581313133,
- "p90": 129.72800061106682,
- "p95": 134.0159997344017,
- "p99": 164.8000031709671
+ "p50": 171.07199877500534,
+ "p90": 187.391996383667,
+ "p95": 193.92000138759613,
+ "p99": 216.5760025382042
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1218560,
- "combineLogicalBytes": 1218560,
- "fanoutMean": 5.3125,
- "recvTokensMax": 14,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 1089536,
+ "combineLogicalBytes": 1089536,
+ "fanoutMean": 4.75,
+ "recvTokensMax": 11,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -4068,35 +3899,35 @@
"tokensPerRank": 4,
"globalTokens": 32,
"dispatch": {
- "p50": 59.7120001912117,
- "p90": 61.85600161552429,
- "p95": 63.1679967045784,
- "p99": 75.39200037717819
+ "p50": 90.46400338411331,
+ "p90": 101.79200023412704,
+ "p95": 104.54399883747101,
+ "p99": 112.92800307273865
},
"combine": {
- "p50": 68.31999868154526,
- "p90": 77.11999863386154,
- "p95": 77.7600035071373,
- "p99": 89.59999680519104
+ "p50": 81.18399977684021,
+ "p90": 85.4400023818016,
+ "p95": 90.52799642086029,
+ "p99": 95.32800316810608
},
"roundtrip": {
- "p50": 123.48800152540207,
- "p90": 127.61600315570831,
- "p95": 128.4479945898056,
- "p99": 141.9840008020401
+ "p50": 149.02399480342865,
+ "p90": 160.67199409008026,
+ "p95": 163.68000209331512,
+ "p99": 173.8239973783493
},
"isolatedSum": {
- "p50": 128.03199887275696,
- "p90": 138.97600024938583,
- "p95": 140.9280002117157,
- "p99": 164.99199718236923
+ "p50": 171.64800316095352,
+ "p90": 187.23200261592865,
+ "p95": 195.0719952583313,
+ "p99": 208.25600624084473
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2408448,
- "combineLogicalBytes": 2408448,
- "fanoutMean": 5.25,
- "recvTokensMax": 26,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 2207744,
+ "combineLogicalBytes": 2207744,
+ "fanoutMean": 4.8125,
+ "recvTokensMax": 23,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -4105,35 +3936,35 @@
"tokensPerRank": 8,
"globalTokens": 64,
"dispatch": {
- "p50": 59.967998415231705,
- "p90": 62.81600147485733,
- "p95": 66.39999896287918,
- "p99": 73.53600114583969
+ "p50": 92.99200028181076,
+ "p90": 102.55999863147736,
+ "p95": 105.82400113344193,
+ "p99": 111.93600296974182
},
"combine": {
- "p50": 68.44799965620041,
- "p90": 76.92799717187881,
- "p95": 77.34400033950806,
- "p99": 82.75199681520462
+ "p50": 83.16799998283386,
+ "p90": 87.00799942016602,
+ "p95": 91.58399701118469,
+ "p99": 99.04000163078308
},
"roundtrip": {
- "p50": 122.17599898576736,
- "p90": 127.07200646400452,
- "p95": 128.25599312782288,
- "p99": 142.68800616264343
+ "p50": 153.98399531841278,
+ "p90": 164.8000031709671,
+ "p95": 166.9439971446991,
+ "p99": 176.35199427604675
},
"isolatedSum": {
- "p50": 128.4159980714321,
- "p90": 139.74399864673615,
- "p95": 143.74399930238724,
- "p99": 156.2879979610443
+ "p50": 176.16000026464462,
+ "p90": 189.56799805164337,
+ "p95": 197.40799814462662,
+ "p99": 210.9760046005249
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4831232,
- "combineLogicalBytes": 4831232,
- "fanoutMean": 5.265625,
- "recvTokensMax": 48,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 4558848,
+ "combineLogicalBytes": 4558848,
+ "fanoutMean": 4.96875,
+ "recvTokensMax": 46,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -4142,34 +3973,34 @@
"tokensPerRank": 16,
"globalTokens": 128,
"dispatch": {
- "p50": 62.880001962184906,
- "p90": 68.80000233650208,
- "p95": 71.03999704122543,
- "p99": 74.65600222349167
+ "p50": 95.42399644851685,
+ "p90": 105.6319996714592,
+ "p95": 109.53599959611893,
+ "p99": 122.01599776744843
},
"combine": {
- "p50": 69.11999732255936,
- "p90": 78.40000092983246,
- "p95": 78.97599786520004,
- "p99": 82.40000158548355
+ "p50": 83.5840031504631,
+ "p90": 89.34400230646133,
+ "p95": 93.56799721717834,
+ "p99": 95.45599669218063
},
"roundtrip": {
- "p50": 121.11999839544296,
- "p90": 125.34399330615997,
- "p95": 127.13600695133209,
- "p99": 134.8479986190796
+ "p50": 154.4640064239502,
+ "p90": 166.07999801635742,
+ "p95": 169.53599452972412,
+ "p99": 176.57600343227386
},
"isolatedSum": {
- "p50": 131.99999928474426,
- "p90": 147.20000326633453,
- "p95": 150.01599490642548,
- "p99": 157.05600380897522
- },
+ "p50": 179.00799959897995,
+ "p90": 194.97600197792053,
+ "p95": 203.10399681329727,
+ "p99": 217.47199445962906
+ },
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9848832,
- "combineLogicalBytes": 9848832,
- "fanoutMean": 5.3671875,
- "recvTokensMax": 91,
+ "dispatchLogicalBytes": 9347072,
+ "combineLogicalBytes": 9347072,
+ "fanoutMean": 5.09375,
+ "recvTokensMax": 86,
"stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
@@ -4179,35 +4010,35 @@
"tokensPerRank": 32,
"globalTokens": 256,
"dispatch": {
- "p50": 70.94399631023407,
- "p90": 76.48000121116638,
- "p95": 77.37600058317184,
- "p99": 80.06399869918823
+ "p50": 98.24000298976898,
+ "p90": 108.06400328874588,
+ "p95": 111.61600053310394,
+ "p99": 119.61600184440613
},
"combine": {
- "p50": 79.3600007891655,
- "p90": 80.22399991750717,
- "p95": 81.28000050783157,
- "p99": 91.90399944782257
+ "p50": 91.51999652385712,
+ "p90": 95.74399888515472,
+ "p95": 97.82399982213974,
+ "p99": 103.5199984908104
},
"roundtrip": {
- "p50": 134.46399569511414,
- "p90": 138.20800185203552,
- "p95": 139.71200585365295,
- "p99": 151.2320041656494
+ "p50": 161.95200383663177,
+ "p90": 171.83999717235565,
+ "p95": 174.49599504470825,
+ "p99": 182.3360025882721
},
"isolatedSum": {
- "p50": 150.30399709939957,
- "p90": 156.70400112867355,
- "p95": 158.65600109100342,
- "p99": 171.9679981470108
+ "p50": 189.7599995136261,
+ "p90": 203.8080021739006,
+ "p95": 209.44000035524368,
+ "p99": 223.13600033521652
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19496960,
- "combineLogicalBytes": 19496960,
- "fanoutMean": 5.3125,
+ "dispatchLogicalBytes": 18995200,
+ "combineLogicalBytes": 18995200,
+ "fanoutMean": 5.17578125,
"recvTokensMax": 178,
- "stragglerRank": 4,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -4216,35 +4047,35 @@
"tokensPerRank": 64,
"globalTokens": 512,
"dispatch": {
- "p50": 88.8959988951683,
- "p90": 91.61599725484848,
- "p95": 93.24800223112106,
- "p99": 102.94400155544281
+ "p50": 109.72800105810165,
+ "p90": 118.52800101041794,
+ "p95": 122.6240023970604,
+ "p99": 135.3919953107834
},
"combine": {
- "p50": 92.38400310277939,
- "p90": 100.63999891281128,
- "p95": 101.6639992594719,
- "p99": 104.73600029945374
+ "p50": 105.56799918413162,
+ "p90": 109.31199789047241,
+ "p95": 112.06399649381638,
+ "p99": 118.68800222873688
},
"roundtrip": {
- "p50": 161.31199896335602,
- "p90": 165.0879979133606,
- "p95": 166.46400094032288,
- "p99": 185.7919991016388
+ "p50": 187.32799589633942,
+ "p90": 195.90400159358978,
+ "p95": 198.7839937210083,
+ "p99": 205.72799444198608
},
"isolatedSum": {
- "p50": 181.2800019979477,
- "p90": 192.25599616765976,
- "p95": 194.91200149059296,
- "p99": 207.68000185489655
+ "p50": 215.29600024223328,
+ "p90": 227.83999890089035,
+ "p95": 234.68799889087677,
+ "p99": 254.07999753952026
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 38836224,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 372,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 38291456,
+ "combineLogicalBytes": 38291456,
+ "fanoutMean": 5.216796875,
+ "recvTokensMax": 348,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -4253,35 +4084,35 @@
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 95.58399766683578,
- "p90": 100.67199915647507,
- "p95": 101.79200023412704,
- "p99": 108.15999656915665
+ "p50": 124.35200065374374,
+ "p90": 132.54399597644806,
+ "p95": 135.68000495433807,
+ "p99": 147.67999947071075
},
"combine": {
- "p50": 115.64800143241882,
- "p90": 116.57600104808807,
- "p95": 117.3119992017746,
- "p99": 128.00000607967377
+ "p50": 126.0479986667633,
+ "p90": 132.32000172138214,
+ "p95": 133.98399949073792,
+ "p99": 143.2960033416748
},
"roundtrip": {
- "p50": 197.05599546432495,
- "p90": 200.95999538898468,
- "p95": 202.84800231456757,
- "p99": 227.90400683879852
+ "p50": 224.44799542427063,
+ "p90": 233.69599878787994,
+ "p95": 236.51200532913208,
+ "p99": 246.68799340724945
},
"isolatedSum": {
- "p50": 211.2319990992546,
- "p90": 217.24800020456314,
- "p95": 219.10399943590164,
- "p99": 236.1600026488304
+ "p50": 250.39999932050705,
+ "p90": 264.8639976978302,
+ "p95": 269.664004445076,
+ "p99": 290.97600281238556
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77514752,
- "combineLogicalBytes": 77514752,
- "fanoutMean": 5.2802734375,
- "recvTokensMax": 707,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 77113344,
+ "combineLogicalBytes": 77113344,
+ "fanoutMean": 5.2529296875,
+ "recvTokensMax": 685,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -4289,107 +4120,108 @@
]
},
{
- "id": "cx-22c8469b",
- "identity": "b300|deepep|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760",
- "colorKey": "b300_307ed708",
- "comparisonKey": "382d98414c6b61e6",
+ "id": "cx-ee1bfa1a",
+ "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||1093cd76c9cd2db",
+ "colorKey": "gb200_62fd6d04",
+ "comparisonKey": "ca163ecd5d51bcb6",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:51:28.371280+00:00",
+ "generatedAt": "2026-06-29T13:55:33.751216+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_02",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "layout-and-dispatch-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16",
- "model": "Kimi-K2",
+ "label": "GB200 EP8 · deepep · bf16 · zipf",
+ "model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
- "experts": 384,
- "routing": "uniform",
- "routingLabel": "uniform",
+ "experts": 256,
+ "routing": "zipf",
+ "routingLabel": "zipf",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
"paretoEligible": false
},
"placement": {
- "kind": "packed",
- "nodes": 1,
+ "kind": "adversarial",
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "d6c49ae98878760",
- "workloadId": "set:8:9a27d0df4b17fa09",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "1093cd76c9cd2db",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285688277",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285688277",
- "createdAt": "2026-06-27T09:51:28.371280+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 1,
"globalTokens": 8,
"dispatch": {
- "p50": 57.440001517534256,
- "p90": 61.63199990987778,
- "p95": 64.64000046253204,
- "p99": 82.33600109815598
+ "p50": 91.16800129413605,
+ "p90": 103.29599678516388,
+ "p95": 107.58399963378906,
+ "p99": 115.55200070142746
},
"combine": {
- "p50": 66.20799750089645,
- "p90": 66.880002617836,
- "p95": 68.41599941253662,
- "p99": 80.32000064849854
+ "p50": 78.87999713420868,
+ "p90": 83.64800363779068,
+ "p95": 86.27200126647949,
+ "p99": 95.48799693584442
},
"roundtrip": {
- "p50": 107.51999914646149,
- "p90": 115.03999680280685,
- "p95": 117.40799993276596,
- "p99": 124.7360035777092
+ "p50": 148.44800531864166,
+ "p90": 158.78400206565857,
+ "p95": 162.56000101566315,
+ "p99": 169.72799599170685
},
"isolatedSum": {
- "p50": 123.64799901843071,
- "p90": 128.51200252771378,
- "p95": 133.05599987506866,
- "p99": 162.6560017466545
+ "p50": 170.04799842834473,
+ "p90": 186.94400042295456,
+ "p95": 193.85600090026855,
+ "p99": 211.03999763727188
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 602112,
- "combineLogicalBytes": 602112,
- "fanoutMean": 5.25,
+ "dispatchLogicalBytes": 444416,
+ "combineLogicalBytes": 444416,
+ "fanoutMean": 3.875,
"recvTokensMax": 8,
- "stragglerRank": 7,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -4398,35 +4230,35 @@
"tokensPerRank": 2,
"globalTokens": 16,
"dispatch": {
- "p50": 57.631999254226685,
- "p90": 60.60799956321716,
- "p95": 63.32799792289734,
- "p99": 80.70400357246399
+ "p50": 92.16000139713287,
+ "p90": 103.64799946546555,
+ "p95": 107.35999792814255,
+ "p99": 113.34399878978729
},
"combine": {
- "p50": 66.27199798822403,
- "p90": 67.26399809122086,
- "p95": 68.12799721956253,
- "p99": 78.015998005867
+ "p50": 78.68800312280655,
+ "p90": 83.3280012011528,
+ "p95": 85.11999994516373,
+ "p99": 93.44000369310379
},
"roundtrip": {
- "p50": 106.81600123643875,
- "p90": 109.98400300741196,
- "p95": 112.47999966144562,
- "p99": 124.79999661445618
+ "p50": 147.13600277900696,
+ "p90": 157.21599757671356,
+ "p95": 160.5439931154251,
+ "p99": 169.47199404239655
},
"isolatedSum": {
- "p50": 123.90399724245071,
- "p90": 127.87199765443802,
- "p95": 131.45599514245987,
- "p99": 158.720001578331
+ "p50": 170.84800451993942,
+ "p90": 186.97600066661835,
+ "p95": 192.47999787330627,
+ "p99": 206.78400248289108
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1218560,
- "combineLogicalBytes": 1218560,
- "fanoutMean": 5.3125,
- "recvTokensMax": 14,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 845824,
+ "combineLogicalBytes": 845824,
+ "fanoutMean": 3.6875,
+ "recvTokensMax": 16,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -4435,35 +4267,35 @@
"tokensPerRank": 4,
"globalTokens": 32,
"dispatch": {
- "p50": 57.95200169086456,
- "p90": 61.37600168585777,
- "p95": 65.50399959087372,
- "p99": 79.42400127649307
+ "p50": 94.17600184679031,
+ "p90": 106.91200196743011,
+ "p95": 111.58400028944016,
+ "p99": 127.07200646400452
},
"combine": {
- "p50": 66.97600334882736,
- "p90": 69.34399902820587,
- "p95": 76.67200267314911,
- "p99": 89.63199704885483
+ "p50": 80.9599980711937,
+ "p90": 86.04799956083298,
+ "p95": 90.46400338411331,
+ "p99": 94.87999975681305
},
"roundtrip": {
- "p50": 111.26399785280228,
- "p90": 115.90400338172913,
- "p95": 119.00799721479416,
- "p99": 129.15199995040894
+ "p50": 149.56800639629364,
+ "p90": 159.93599593639374,
+ "p95": 163.32800686359406,
+ "p99": 170.3680008649826
},
"isolatedSum": {
- "p50": 124.92800503969193,
- "p90": 130.72000071406364,
- "p95": 142.17600226402283,
- "p99": 169.0559983253479
+ "p50": 175.135999917984,
+ "p90": 192.9600015282631,
+ "p95": 202.04800367355347,
+ "p99": 221.95200622081757
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2408448,
- "combineLogicalBytes": 2408448,
- "fanoutMean": 5.25,
- "recvTokensMax": 26,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 1691648,
+ "combineLogicalBytes": 1691648,
+ "fanoutMean": 3.6875,
+ "recvTokensMax": 32,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -4472,35 +4304,35 @@
"tokensPerRank": 8,
"globalTokens": 64,
"dispatch": {
- "p50": 58.9120015501976,
- "p90": 61.055999249219894,
- "p95": 62.81600147485733,
- "p99": 81.88799768686295
+ "p50": 94.59199756383896,
+ "p90": 106.1440035700798,
+ "p95": 109.21599715948105,
+ "p99": 123.1359988451004
},
"combine": {
- "p50": 67.64800101518631,
- "p90": 69.63200122117996,
- "p95": 76.9599974155426,
- "p99": 78.72000336647034
+ "p50": 82.68799632787704,
+ "p90": 90.17600119113922,
+ "p95": 93.85599941015244,
+ "p99": 104.12800312042236
},
"roundtrip": {
- "p50": 123.6800029873848,
- "p90": 125.98399817943573,
- "p95": 126.8479973077774,
- "p99": 133.18400084972382
+ "p50": 151.96800231933594,
+ "p90": 163.87200355529785,
+ "p95": 167.58400201797485,
+ "p99": 174.40000176429749
},
"isolatedSum": {
- "p50": 126.56000256538391,
- "p90": 130.68800047039986,
- "p95": 139.77599889039993,
- "p99": 160.60800105333328
+ "p50": 177.279993891716,
+ "p90": 196.32000476121902,
+ "p95": 203.07199656963348,
+ "p99": 227.26400196552277
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4831232,
- "combineLogicalBytes": 4831232,
- "fanoutMean": 5.265625,
- "recvTokensMax": 48,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 3354624,
+ "combineLogicalBytes": 3354624,
+ "fanoutMean": 3.65625,
+ "recvTokensMax": 64,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -4509,35 +4341,35 @@
"tokensPerRank": 16,
"globalTokens": 128,
"dispatch": {
- "p50": 60.736000537872314,
- "p90": 66.91200286149979,
- "p95": 68.28799843788147,
- "p99": 72.83200323581696
+ "p50": 94.52799707651138,
+ "p90": 104.92800176143646,
+ "p95": 109.15199667215347,
+ "p99": 120.2239990234375
},
"combine": {
- "p50": 68.76800209283829,
- "p90": 77.95199751853943,
- "p95": 78.43200117349625,
- "p99": 78.78399640321732
+ "p50": 82.97599852085114,
+ "p90": 91.80799871683121,
+ "p95": 94.17600184679031,
+ "p99": 98.55999797582626
},
"roundtrip": {
- "p50": 119.77600306272507,
- "p90": 124.67200309038162,
- "p95": 127.10399925708771,
- "p99": 141.37600362300873
+ "p50": 155.03999590873718,
+ "p90": 165.18400609493256,
+ "p95": 168.41599345207214,
+ "p99": 175.26400089263916
},
"isolatedSum": {
- "p50": 129.5040026307106,
- "p90": 144.86400038003922,
- "p95": 146.71999961137772,
- "p99": 151.61599963903427
+ "p50": 177.50399559736252,
+ "p90": 196.73600047826767,
+ "p95": 203.3279985189438,
+ "p99": 218.78399699926376
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9848832,
- "combineLogicalBytes": 9848832,
- "fanoutMean": 5.3671875,
- "recvTokensMax": 91,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 6537216,
+ "combineLogicalBytes": 6537216,
+ "fanoutMean": 3.5625,
+ "recvTokensMax": 127,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -4546,35 +4378,35 @@
"tokensPerRank": 32,
"globalTokens": 256,
"dispatch": {
- "p50": 69.76000219583511,
- "p90": 71.19999825954437,
- "p95": 73.79200309515,
- "p99": 84.57600325345993
+ "p50": 98.1760025024414,
+ "p90": 107.4879989027977,
+ "p95": 111.10399663448334,
+ "p99": 119.07199770212173
},
"combine": {
- "p50": 78.49600166082382,
- "p90": 79.45600152015686,
- "p95": 80.4160013794899,
- "p99": 102.33599692583084
+ "p50": 86.30400151014328,
+ "p90": 94.01600062847137,
+ "p95": 96.12800180912018,
+ "p99": 104.032002389431
},
"roundtrip": {
- "p50": 130.97600638866425,
- "p90": 135.68000495433807,
- "p95": 137.1839940547943,
- "p99": 148.41599762439728
+ "p50": 160.22400557994843,
+ "p90": 169.3120002746582,
+ "p95": 171.55200242996216,
+ "p99": 178.68800461292267
},
"isolatedSum": {
- "p50": 148.25600385665894,
- "p90": 150.65599977970123,
- "p95": 154.2080044746399,
- "p99": 186.91200017929077
+ "p50": 184.4800040125847,
+ "p90": 201.50399953126907,
+ "p95": 207.23199844360352,
+ "p99": 223.10400009155273
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19496960,
- "combineLogicalBytes": 19496960,
- "fanoutMean": 5.3125,
- "recvTokensMax": 178,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 12859392,
+ "combineLogicalBytes": 12859392,
+ "fanoutMean": 3.50390625,
+ "recvTokensMax": 255,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -4583,35 +4415,35 @@
"tokensPerRank": 64,
"globalTokens": 512,
"dispatch": {
- "p50": 86.2400010228157,
- "p90": 89.4400030374527,
- "p95": 90.59199690818787,
- "p99": 99.71199929714203
+ "p50": 106.175996363163,
+ "p90": 115.87200313806534,
+ "p95": 119.71200257539749,
+ "p99": 143.8400000333786
},
"combine": {
- "p50": 91.61599725484848,
- "p90": 93.56799721717834,
- "p95": 95.0080007314682,
- "p99": 104.3199971318245
+ "p50": 103.4879982471466,
+ "p90": 107.61599987745285,
+ "p95": 109.11999642848969,
+ "p99": 117.11999773979187
},
"roundtrip": {
- "p50": 160.288006067276,
- "p90": 166.4319932460785,
- "p95": 173.34400117397308,
- "p99": 184.86399948596954
+ "p50": 186.52799725532532,
+ "p90": 194.59199905395508,
+ "p95": 198.14400374889374,
+ "p99": 204.8639953136444
},
"isolatedSum": {
- "p50": 177.85599827766418,
- "p90": 183.00800025463104,
- "p95": 185.59999763965607,
- "p99": 204.03199642896652
+ "p50": 209.6639946103096,
+ "p90": 223.4880030155182,
+ "p95": 228.83199900388718,
+ "p99": 260.95999777317047
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 38836224,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 372,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 25145344,
+ "combineLogicalBytes": 25145344,
+ "fanoutMean": 3.42578125,
+ "recvTokensMax": 510,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -4620,35 +4452,35 @@
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 94.14400160312653,
- "p90": 97.50399738550186,
- "p95": 99.87200051546097,
- "p99": 113.76000195741653
+ "p50": 125.63200294971466,
+ "p90": 134.5600038766861,
+ "p95": 136.63999736309052,
+ "p99": 141.79199934005737
},
"combine": {
- "p50": 115.26399850845337,
- "p90": 115.93600362539291,
- "p95": 117.18399822711945,
- "p99": 131.20000064373016
+ "p50": 134.8479986190796,
+ "p90": 142.91200041770935,
+ "p95": 144.28800344467163,
+ "p99": 150.4639983177185
},
"roundtrip": {
- "p50": 192.89599359035492,
- "p90": 198.68800044059753,
- "p95": 200.19200444221497,
- "p99": 209.18400585651398
+ "p50": 238.5600060224533,
+ "p90": 247.16800451278687,
+ "p95": 250.5280077457428,
+ "p99": 257.3759853839874
},
"isolatedSum": {
- "p50": 209.4080001115799,
- "p90": 213.44000101089478,
- "p95": 217.0559987425804,
- "p99": 244.9600026011467
+ "p50": 260.48000156879425,
+ "p90": 277.47200429439545,
+ "p95": 280.92800080776215,
+ "p99": 292.2559976577759
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77514752,
- "combineLogicalBytes": 77514752,
- "fanoutMean": 5.2802734375,
- "recvTokensMax": 707,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 49946624,
+ "combineLogicalBytes": 49946624,
+ "fanoutMean": 3.40234375,
+ "recvTokensMax": 1022,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -4656,16 +4488,16 @@
]
},
{
- "id": "cx-a22ca77b",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||2279937619f3971",
- "colorKey": "b300_77566238",
- "comparisonKey": "08fb0b4fb4077abb",
+ "id": "cx-1ce8c4bb",
+ "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||7eace9164e82cd6",
+ "colorKey": "gb200_8855aa26",
+ "comparisonKey": "7e97825cbdd9f3b4",
"schemaVersion": 3,
- "generatedAt": "2026-06-26T23:58:04.079730+00:00",
+ "generatedAt": "2026-06-29T13:57:43.080262+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_02",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
"mode": "normal",
@@ -4673,30 +4505,31 @@
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · balanced",
+ "label": "GB200 EP8 · deepep · bf16 · zipf-heavy",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
"experts": 256,
- "routing": "balanced",
- "routingLabel": "balanced",
+ "routing": "zipf-heavy",
+ "routingLabel": "zipf-heavy",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -4704,59 +4537,59 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "2279937619f3971",
- "workloadId": "set:4:7af12818400d6348",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "7eace9164e82cd6",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28271873027",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271873027",
- "createdAt": "2026-06-26T23:58:04.079730+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 1,
"globalTokens": 8,
"dispatch": {
- "p50": 56.41600117087364,
- "p90": 58.848001062870026,
- "p95": 61.216000467538834,
- "p99": 80.25600016117096
+ "p50": 89.40800279378891,
+ "p90": 99.67999905347824,
+ "p95": 103.5199984908104,
+ "p99": 109.76000130176544
},
"combine": {
- "p50": 67.6800012588501,
- "p90": 69.60000097751617,
- "p95": 76.73600316047668,
- "p99": 82.62400329113007
+ "p50": 70.23999840021133,
+ "p90": 73.60000163316727,
+ "p95": 79.16799932718277,
+ "p99": 84.95999872684479
},
"roundtrip": {
- "p50": 106.49599879980087,
- "p90": 109.27999764680862,
- "p95": 111.13599687814713,
- "p99": 124.1919994354248
+ "p50": 135.5839967727661,
+ "p90": 144.86399292945862,
+ "p95": 148.6400067806244,
+ "p99": 155.7759940624237
},
"isolatedSum": {
- "p50": 124.09600242972374,
- "p90": 128.4480020403862,
- "p95": 137.95200362801552,
- "p99": 162.88000345230103
+ "p50": 159.64800119400024,
+ "p90": 173.2800006866455,
+ "p95": 182.68799781799316,
+ "p99": 194.72000002861023
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 917504,
- "combineLogicalBytes": 917504,
- "fanoutMean": 8,
+ "dispatchLogicalBytes": 172032,
+ "combineLogicalBytes": 172032,
+ "fanoutMean": 1.5,
"recvTokensMax": 8,
- "stragglerRank": 7,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -4765,35 +4598,35 @@
"tokensPerRank": 8,
"globalTokens": 64,
"dispatch": {
- "p50": 58.43200162053108,
- "p90": 60.70400029420853,
- "p95": 62.6240000128746,
- "p99": 78.65600287914276
+ "p50": 89.53599631786346,
+ "p90": 100.67199915647507,
+ "p95": 105.27999699115753,
+ "p99": 114.75200206041336
},
"combine": {
- "p50": 77.98399776220322,
- "p90": 78.72000336647034,
- "p95": 78.84799689054489,
- "p99": 81.4720019698143
+ "p50": 72.80000299215317,
+ "p90": 80.51200211048126,
+ "p95": 82.17599987983704,
+ "p99": 86.68799698352814
},
"roundtrip": {
- "p50": 118.07999759912491,
- "p90": 122.91199713945389,
- "p95": 124.1919994354248,
- "p99": 131.99999928474426
+ "p50": 142.39999651908875,
+ "p90": 151.5520066022873,
+ "p95": 154.23999726772308,
+ "p99": 164.73600268363953
},
"isolatedSum": {
- "p50": 136.4159993827343,
- "p90": 139.42400366067886,
- "p95": 141.4719969034195,
- "p99": 160.12800484895706
+ "p50": 162.33599931001663,
+ "p90": 181.18400126695633,
+ "p95": 187.45599687099457,
+ "p99": 201.4399990439415
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 7340032,
- "combineLogicalBytes": 7340032,
- "fanoutMean": 8,
+ "dispatchLogicalBytes": 1376256,
+ "combineLogicalBytes": 1376256,
+ "fanoutMean": 1.5,
"recvTokensMax": 64,
- "stragglerRank": 7,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -4802,35 +4635,35 @@
"tokensPerRank": 32,
"globalTokens": 256,
"dispatch": {
- "p50": 69.82400268316269,
- "p90": 71.87200337648392,
- "p95": 73.7600028514862,
- "p99": 84.25600081682205
+ "p50": 97.24800288677216,
+ "p90": 106.23999685049057,
+ "p95": 110.62400043010712,
+ "p99": 121.95199728012085
},
"combine": {
- "p50": 79.16799932718277,
- "p90": 81.08799904584885,
- "p95": 81.91999793052673,
- "p99": 90.71999788284302
+ "p50": 82.40000158548355,
+ "p90": 86.14400029182434,
+ "p95": 91.64799749851227,
+ "p99": 95.10400146245956
},
"roundtrip": {
- "p50": 133.82400572299957,
- "p90": 140.09599387645721,
- "p95": 141.92000031471252,
- "p99": 145.82400023937225
+ "p50": 150.7200002670288,
+ "p90": 159.32799875736237,
+ "p95": 162.1440052986145,
+ "p99": 166.49599373340607
},
"isolatedSum": {
- "p50": 148.99200201034546,
- "p90": 152.96000242233276,
- "p95": 155.68000078201294,
- "p99": 174.97599869966507
+ "p50": 179.6480044722557,
+ "p90": 192.3839971423149,
+ "p95": 202.27199792861938,
+ "p99": 217.0559987425804
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 29360128,
- "combineLogicalBytes": 29360128,
- "fanoutMean": 8,
+ "dispatchLogicalBytes": 5533696,
+ "combineLogicalBytes": 5533696,
+ "fanoutMean": 1.5078125,
"recvTokensMax": 256,
- "stragglerRank": 7,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -4839,35 +4672,35 @@
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 105.76000064611435,
- "p90": 107.71200060844421,
- "p95": 109.02400314807892,
- "p99": 114.78400230407715
+ "p50": 121.34400010108948,
+ "p90": 129.82399761676788,
+ "p95": 133.69600474834442,
+ "p99": 144.3520039319992
},
"combine": {
- "p50": 130.36799430847168,
- "p90": 139.615997672081,
- "p95": 140.03199338912964,
- "p99": 143.13599467277527
+ "p50": 130.94399869441986,
+ "p90": 134.65599715709686,
+ "p95": 136.19199395179749,
+ "p99": 146.11199498176575
},
"roundtrip": {
- "p50": 230.68800568580627,
- "p90": 234.52800512313843,
- "p95": 235.55199801921844,
- "p99": 240.09600281715393
+ "p50": 225.66400468349457,
+ "p90": 233.3119958639145,
+ "p95": 236.15999519824982,
+ "p99": 240.79999327659607
},
"isolatedSum": {
- "p50": 236.12799495458603,
- "p90": 247.3279982805252,
- "p95": 249.05599653720856,
- "p99": 257.9199969768524
+ "p50": 252.28799879550934,
+ "p90": 264.47999477386475,
+ "p95": 269.8879987001419,
+ "p99": 290.46399891376495
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 117440512,
- "combineLogicalBytes": 117440512,
- "fanoutMean": 8,
+ "dispatchLogicalBytes": 22650880,
+ "combineLogicalBytes": 22650880,
+ "fanoutMean": 1.54296875,
"recvTokensMax": 1024,
- "stragglerRank": 7,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -4875,16 +4708,16 @@
]
},
{
- "id": "cx-42672aa9",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500",
- "colorKey": "b300_77566238",
- "comparisonKey": "3fe3497798f4d1dd",
+ "id": "cx-30fe3c0c",
+ "identity": "gb200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||39778bd75f046da",
+ "colorKey": "gb200_10fda6e8",
+ "comparisonKey": "5ba24bce143d87f0",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:48:00.348230+00:00",
+ "generatedAt": "2026-06-29T13:50:56.638851+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_08",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_1",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
"mode": "normal",
@@ -4892,30 +4725,31 @@
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · balanced",
- "model": "DeepSeek-V3/V4",
+ "label": "GB200 EP8 · deepep · bf16 · zipf-heavy+eplb",
+ "model": "DeepSeek-V3 (EPLB physical)",
"shape": {
"hidden": 7168,
"topk": 8,
- "experts": 256,
- "routing": "balanced",
- "routingLabel": "balanced",
+ "experts": 288,
+ "routing": "zipf-heavy",
+ "routingLabel": "zipf-heavy+eplb",
"routingStep": 0,
"unevenTokens": "none",
- "eplbEnabled": false,
+ "eplbEnabled": true,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -4923,59 +4757,59 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "ffa946582edb500",
- "workloadId": "set:8:7af12818400d6348",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "traceSignature": "39778bd75f046da",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": 7.40625,
+ "eplbImbalanceAfter": 1.0004417782738093,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285609982",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285609982",
- "createdAt": "2026-06-27T09:48:00.348230+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 1,
"globalTokens": 8,
"dispatch": {
- "p50": 57.50399827957153,
- "p90": 59.20000001788139,
- "p95": 61.055999249219894,
- "p99": 68.60800087451935
+ "p50": 94.81599926948547,
+ "p90": 327.2320032119751,
+ "p95": 382.84799456596375,
+ "p99": 408.03200006484985
},
"combine": {
- "p50": 67.74400174617767,
- "p90": 69.66400146484375,
- "p95": 76.83199644088745,
- "p99": 81.40800148248672
+ "p50": 80.86399734020233,
+ "p90": 327.4880051612854,
+ "p95": 357.63201117515564,
+ "p99": 369.28001046180725
},
"roundtrip": {
- "p50": 107.87200182676315,
- "p90": 110.11199653148651,
- "p95": 112.28799819946289,
- "p99": 123.9359974861145
+ "p50": 151.74399316310883,
+ "p90": 415.9039855003357,
+ "p95": 444.09599900245667,
+ "p99": 466.048002243042
},
"isolatedSum": {
- "p50": 125.2480000257492,
- "p90": 128.86400148272514,
- "p95": 137.88799569010735,
- "p99": 150.01600235700607
+ "p50": 175.6799966096878,
+ "p90": 654.7200083732605,
+ "p95": 740.4800057411194,
+ "p99": 777.3120105266571
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 917504,
- "combineLogicalBytes": 917504,
- "fanoutMean": 8,
- "recvTokensMax": 8,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 559104,
+ "combineLogicalBytes": 559104,
+ "fanoutMean": 4.875,
+ "recvTokensMax": 6,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -4984,35 +4818,35 @@
"tokensPerRank": 2,
"globalTokens": 16,
"dispatch": {
- "p50": 57.760000228881836,
- "p90": 60.095999389886856,
- "p95": 62.111999839544296,
- "p99": 75.6480023264885
+ "p50": 94.46399658918381,
+ "p90": 317.82400608062744,
+ "p95": 374.9440014362335,
+ "p99": 404.1920006275177
},
"combine": {
- "p50": 67.96800345182419,
- "p90": 76.64000242948532,
- "p95": 77.44000107049942,
- "p99": 78.72000336647034
+ "p50": 83.16799998283386,
+ "p90": 336.92800998687744,
+ "p95": 363.96801471710205,
+ "p99": 377.56800651550293
},
"roundtrip": {
- "p50": 117.40799993276596,
- "p90": 123.19999933242798,
- "p95": 123.87199699878693,
- "p99": 141.27999544143677
+ "p50": 154.7199934720993,
+ "p90": 418.7839925289154,
+ "p95": 443.1680142879486,
+ "p99": 467.74399280548096
},
"isolatedSum": {
- "p50": 125.72800368070602,
- "p90": 136.73600181937218,
- "p95": 139.55200091004372,
- "p99": 154.36800569295883
+ "p50": 177.63199657201767,
+ "p90": 654.7520160675049,
+ "p95": 738.9120161533356,
+ "p99": 781.7600071430206
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1835008,
- "combineLogicalBytes": 1835008,
- "fanoutMean": 8,
- "recvTokensMax": 16,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 1175552,
+ "combineLogicalBytes": 1175552,
+ "fanoutMean": 5.125,
+ "recvTokensMax": 12,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -5021,35 +4855,35 @@
"tokensPerRank": 4,
"globalTokens": 32,
"dispatch": {
- "p50": 59.42400172352791,
- "p90": 61.43999844789505,
- "p95": 63.29599767923355,
- "p99": 74.78400319814682
+ "p50": 96.47999703884125,
+ "p90": 336.60799264907837,
+ "p95": 380.47999143600464,
+ "p99": 397.7920114994049
},
"combine": {
- "p50": 77.85599678754807,
- "p90": 78.68800312280655,
- "p95": 78.97599786520004,
- "p99": 93.82399916648865
+ "p50": 83.3280012011528,
+ "p90": 323.71199131011963,
+ "p95": 365.7599985599518,
+ "p99": 378.6880075931549
},
"roundtrip": {
- "p50": 119.61600184440613,
- "p90": 124.51200187206268,
- "p95": 125.76000392436981,
- "p99": 132.1599930524826
+ "p50": 154.40000593662262,
+ "p90": 409.66400504112244,
+ "p95": 441.0240054130554,
+ "p99": 464.1599953174591
},
"isolatedSum": {
- "p50": 137.27999851107597,
- "p90": 140.1280015707016,
- "p95": 142.2719955444336,
- "p99": 168.60800236463547
+ "p50": 179.80799823999405,
+ "p90": 660.319983959198,
+ "p95": 746.2399899959564,
+ "p99": 776.4800190925598
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 3670016,
- "combineLogicalBytes": 3670016,
- "fanoutMean": 8,
- "recvTokensMax": 32,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 2465792,
+ "combineLogicalBytes": 2465792,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 25,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -5058,35 +4892,35 @@
"tokensPerRank": 8,
"globalTokens": 64,
"dispatch": {
- "p50": 59.776000678539276,
- "p90": 62.04799935221672,
- "p95": 63.10400366783142,
- "p99": 72.86400347948074
+ "p50": 97.31200337409973,
+ "p90": 343.00801157951355,
+ "p95": 380.47999143600464,
+ "p99": 403.55199575424194
},
"combine": {
- "p50": 78.3040001988411,
- "p90": 78.72000336647034,
- "p95": 78.97599786520004,
- "p99": 82.14399963617325
+ "p50": 86.2400010228157,
+ "p90": 327.1999955177307,
+ "p95": 362.36798763275146,
+ "p99": 378.52799892425537
},
"roundtrip": {
- "p50": 119.84000355005264,
- "p90": 122.75200337171555,
- "p95": 125.37600100040436,
- "p99": 154.40000593662262
+ "p50": 161.69600188732147,
+ "p90": 429.56799268722534,
+ "p95": 454.27200198173523,
+ "p99": 468.9280092716217
},
"isolatedSum": {
- "p50": 138.08000087738037,
- "p90": 140.76800271868706,
- "p95": 142.08000153303146,
- "p99": 155.008003115654
+ "p50": 183.55200439691544,
+ "p90": 670.2080070972443,
+ "p95": 742.8479790687561,
+ "p99": 782.0799946784973
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 7340032,
- "combineLogicalBytes": 7340032,
- "fanoutMean": 8,
- "recvTokensMax": 64,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 4988928,
+ "combineLogicalBytes": 4988928,
+ "fanoutMean": 5.4375,
+ "recvTokensMax": 47,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -5095,35 +4929,35 @@
"tokensPerRank": 16,
"globalTokens": 128,
"dispatch": {
- "p50": 72.67200201749802,
- "p90": 76.4480009675026,
- "p95": 77.27999985218048,
- "p99": 82.75199681520462
+ "p50": 105.92000186443329,
+ "p90": 365.7599985599518,
+ "p95": 393.5999870300293,
+ "p99": 440.73599576950073
},
"combine": {
- "p50": 78.40000092983246,
- "p90": 78.87999713420868,
- "p95": 79.26400005817413,
- "p99": 82.65600353479385
+ "p50": 89.82399851083755,
+ "p90": 329.02398705482483,
+ "p95": 364.9600148200989,
+ "p99": 384.95999574661255
},
"roundtrip": {
- "p50": 121.40800058841705,
- "p90": 127.03999876976013,
- "p95": 128.7039965391159,
- "p99": 145.1839953660965
+ "p50": 163.26400637626648,
+ "p90": 414.0160083770752,
+ "p95": 437.18400597572327,
+ "p99": 466.97598695755005
},
"isolatedSum": {
- "p50": 151.07200294733047,
- "p90": 155.32799810171127,
- "p95": 156.54399991035461,
- "p99": 165.40800034999847
+ "p50": 195.74400037527084,
+ "p90": 694.7839856147766,
+ "p95": 758.5600018501282,
+ "p99": 825.6959915161133
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 14680064,
- "combineLogicalBytes": 14680064,
- "fanoutMean": 8,
- "recvTokensMax": 128,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 9791488,
+ "combineLogicalBytes": 9791488,
+ "fanoutMean": 5.3359375,
+ "recvTokensMax": 94,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -5132,35 +4966,35 @@
"tokensPerRank": 32,
"globalTokens": 256,
"dispatch": {
- "p50": 70.94399631023407,
- "p90": 72.76800274848938,
- "p95": 74.17599856853485,
- "p99": 80.99199831485748
+ "p50": 101.43999755382538,
+ "p90": 370.0160086154938,
+ "p95": 390.01598954200745,
+ "p99": 421.5039908885956
},
"combine": {
- "p50": 80.06399869918823,
- "p90": 81.66400343179703,
- "p95": 89.24800157546997,
- "p99": 106.36799782514572
+ "p50": 93.98400038480759,
+ "p90": 326.2079954147339,
+ "p95": 340.1600122451782,
+ "p99": 388.0639970302582
},
"roundtrip": {
- "p50": 134.33599472045898,
- "p90": 141.4719969034195,
- "p95": 143.36000382900238,
- "p99": 156.25600516796112
+ "p50": 165.47200083732605,
+ "p90": 417.6959991455078,
+ "p95": 434.143990278244,
+ "p99": 472.927987575531
},
"isolatedSum": {
- "p50": 151.0079950094223,
- "p90": 154.4320061802864,
- "p95": 163.42400014400482,
- "p99": 187.3599961400032
+ "p50": 195.42399793863297,
+ "p90": 696.2240040302277,
+ "p95": 730.1760017871857,
+ "p99": 809.5679879188538
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 29360128,
- "combineLogicalBytes": 29360128,
- "fanoutMean": 8,
- "recvTokensMax": 256,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 19410944,
+ "combineLogicalBytes": 19410944,
+ "fanoutMean": 5.2890625,
+ "recvTokensMax": 178,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -5169,35 +5003,35 @@
"tokensPerRank": 64,
"globalTokens": 512,
"dispatch": {
- "p50": 85.53600311279297,
- "p90": 89.05600011348724,
- "p95": 92.25600212812424,
- "p99": 105.76000064611435
+ "p50": 113.79200220108032,
+ "p90": 363.20000886917114,
+ "p95": 397.92001247406006,
+ "p99": 420.54399847984314
},
"combine": {
- "p50": 94.08000111579895,
- "p90": 103.04000228643417,
- "p95": 103.29599678516388,
- "p99": 114.30399864912033
+ "p50": 107.58399963378906,
+ "p90": 342.6879942417145,
+ "p95": 355.1360070705414,
+ "p99": 399.07199144363403
},
"roundtrip": {
- "p50": 169.11999881267548,
- "p90": 172.63999581336975,
- "p95": 174.75199699401855,
- "p99": 194.17600333690643
+ "p50": 189.91999328136444,
+ "p90": 450.5600035190582,
+ "p95": 483.96798968315125,
+ "p99": 507.61598348617554
},
"isolatedSum": {
- "p50": 179.61600422859192,
- "p90": 192.09600239992142,
- "p95": 195.55199891328812,
- "p99": 220.06399929523468
+ "p50": 221.37600183486938,
+ "p90": 705.8880031108856,
+ "p95": 753.0560195446014,
+ "p99": 819.6159899234772
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 58720256,
- "combineLogicalBytes": 58720256,
- "fanoutMean": 8,
- "recvTokensMax": 512,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 38678528,
+ "combineLogicalBytes": 38678528,
+ "fanoutMean": 5.26953125,
+ "recvTokensMax": 360,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -5206,35 +5040,35 @@
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 107.13600367307663,
- "p90": 125.88800489902496,
- "p95": 139.71200585365295,
- "p99": 175.55199563503265
+ "p50": 127.42400169372559,
+ "p90": 383.93598794937134,
+ "p95": 410.3679955005646,
+ "p99": 428.9279878139496
},
"combine": {
- "p50": 131.3920021057129,
- "p90": 139.90400731563568,
- "p95": 140.09599387645721,
- "p99": 151.61600708961487
+ "p50": 127.9039978981018,
+ "p90": 394.23999190330505,
+ "p95": 412.992000579834,
+ "p99": 428.5759925842285
},
"roundtrip": {
- "p50": 231.7119985818863,
- "p90": 236.28799617290497,
- "p95": 238.75199258327484,
- "p99": 258.2719922065735
+ "p50": 228.86399924755096,
+ "p90": 287.80800104141235,
+ "p95": 501.8240213394165,
+ "p99": 531.4559936523438
},
"isolatedSum": {
- "p50": 238.52800577878952,
- "p90": 265.79201221466064,
- "p95": 279.80799973011017,
- "p99": 327.1680027246475
+ "p50": 255.3279995918274,
+ "p90": 778.1759798526764,
+ "p95": 823.3599960803986,
+ "p99": 857.5039803981781
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 117440512,
- "combineLogicalBytes": 117440512,
- "fanoutMean": 8,
- "recvTokensMax": 1024,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 77285376,
+ "combineLogicalBytes": 77285376,
+ "fanoutMean": 5.2646484375,
+ "recvTokensMax": 704,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -5242,16 +5076,16 @@
]
},
{
- "id": "cx-c5ecae32",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8",
- "colorKey": "b300_a314501b",
- "comparisonKey": "a145623f8abcc709",
+ "id": "cx-d3049a56",
+ "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||a3b13bb200bb717",
+ "colorKey": "gb200_0cd6b029",
+ "comparisonKey": "c40370edc4d42626",
"schemaVersion": 3,
- "generatedAt": "2026-06-26T23:58:12.406102+00:00",
+ "generatedAt": "2026-06-29T14:02:09.823089+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_06",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
"mode": "normal",
@@ -5259,30 +5093,31 @@
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · balanced-rank-local",
+ "label": "GB200 EP8 · deepep · bf16 · zipf-mild",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
"experts": 256,
- "routing": "balanced-rank-local",
- "routingLabel": "balanced-rank-local",
+ "routing": "zipf-mild",
+ "routingLabel": "zipf-mild",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -5290,95 +5125,206 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "d02a66236b524b8",
- "workloadId": "set:4:2eebbed158fe1320",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "a3b13bb200bb717",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28271879618",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271879618",
- "createdAt": "2026-06-26T23:58:12.406102+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 1,
"globalTokens": 8,
"dispatch": {
- "p50": 62.97600269317627,
- "p90": 65.21599739789963,
- "p95": 66.01600348949432,
- "p99": 75.74400305747986
+ "p50": 88.28800171613693,
+ "p90": 101.85600072145462,
+ "p95": 104.60799932479858,
+ "p99": 112.73600161075592
},
"combine": {
- "p50": 54.336000233888626,
- "p90": 55.26399984955788,
- "p95": 56.60799890756607,
- "p99": 65.5359998345375
+ "p50": 79.6160027384758,
+ "p90": 83.5840031504631,
+ "p95": 88.83199840784073,
+ "p99": 95.8079993724823
},
"roundtrip": {
- "p50": 94.94400024414062,
- "p90": 98.27200323343277,
- "p95": 100.63999891281128,
- "p99": 111.93600296974182
+ "p50": 146.464005112648,
+ "p90": 158.65600109100342,
+ "p95": 162.59199380874634,
+ "p99": 169.15200650691986
},
"isolatedSum": {
- "p50": 117.3120029270649,
- "p90": 120.4799972474575,
- "p95": 122.6240023970604,
- "p99": 141.28000289201736
+ "p50": 167.90400445461273,
+ "p90": 185.44000387191772,
+ "p95": 193.4399977326393,
+ "p99": 208.54400098323822
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 114688,
- "combineLogicalBytes": 114688,
- "fanoutMean": 1,
- "recvTokensMax": 4,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
+ "dispatchLogicalBytes": 587776,
+ "combineLogicalBytes": 587776,
+ "fanoutMean": 5.125,
+ "recvTokensMax": 8,
+ "stragglerRank": 5,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 2,
+ "globalTokens": 16,
+ "dispatch": {
+ "p50": 88.128000497818,
+ "p90": 100.80000013113022,
+ "p95": 105.53599894046783,
+ "p99": 113.79200220108032
+ },
+ "combine": {
+ "p50": 80.4160013794899,
+ "p90": 84.60800349712372,
+ "p95": 87.39200234413147,
+ "p99": 95.551997423172
+ },
+ "roundtrip": {
+ "p50": 147.2959965467453,
+ "p90": 159.55199301242828,
+ "p95": 163.10399770736694,
+ "p99": 171.29600048065186
+ },
+ "isolatedSum": {
+ "p50": 168.5440018773079,
+ "p90": 185.40800362825394,
+ "p95": 192.9280012845993,
+ "p99": 209.34399962425232
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 1103872,
+ "combineLogicalBytes": 1103872,
+ "fanoutMean": 4.8125,
+ "recvTokensMax": 16,
+ "stragglerRank": 5,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 4,
+ "globalTokens": 32,
+ "dispatch": {
+ "p50": 90.7519981265068,
+ "p90": 103.39199751615524,
+ "p95": 109.21599715948105,
+ "p99": 118.30399930477142
+ },
+ "combine": {
+ "p50": 82.17599987983704,
+ "p90": 88.44800293445587,
+ "p95": 92.51199662685394,
+ "p99": 98.39999675750732
+ },
+ "roundtrip": {
+ "p50": 150.751993060112,
+ "p90": 161.43999993801117,
+ "p95": 165.72800278663635,
+ "p99": 173.69599640369415
+ },
+ "isolatedSum": {
+ "p50": 172.92799800634384,
+ "p90": 191.84000045061111,
+ "p95": 201.727993786335,
+ "p99": 216.70399606227875
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 2250752,
+ "combineLogicalBytes": 2250752,
+ "fanoutMean": 4.90625,
+ "recvTokensMax": 31,
+ "stragglerRank": 5,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
{
"tokensPerRank": 8,
"globalTokens": 64,
"dispatch": {
- "p50": 59.39200147986412,
- "p90": 61.63199990987778,
- "p95": 62.65600025653839,
- "p99": 71.68000191450119
+ "p50": 92.44800359010696,
+ "p90": 105.02400249242783,
+ "p95": 107.71200060844421,
+ "p99": 118.17599833011627
},
"combine": {
- "p50": 56.73599988222122,
- "p90": 65.34399837255478,
- "p95": 65.95200300216675,
- "p99": 85.4400023818016
+ "p50": 84.3840017914772,
+ "p90": 131.6480040550232,
+ "p95": 169.3120002746582,
+ "p99": 203.5519927740097
},
"roundtrip": {
- "p50": 108.57599973678589,
- "p90": 113.56800049543381,
- "p95": 114.84800279140472,
- "p99": 120.12799829244614
+ "p50": 155.2640050649643,
+ "p90": 188.1600022315979,
+ "p95": 260.44800877571106,
+ "p99": 295.7119941711426
},
"isolatedSum": {
- "p50": 116.12800136208534,
- "p90": 126.97599828243256,
- "p95": 128.60800325870514,
- "p99": 157.1200042963028
+ "p50": 176.83200538158417,
+ "p90": 236.67200654745102,
+ "p95": 277.0240008831024,
+ "p99": 321.727991104126
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 917504,
- "combineLogicalBytes": 917504,
- "fanoutMean": 1,
- "recvTokensMax": 8,
+ "dispatchLogicalBytes": 4472832,
+ "combineLogicalBytes": 4472832,
+ "fanoutMean": 4.875,
+ "recvTokensMax": 62,
+ "stragglerRank": 2,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 16,
+ "globalTokens": 128,
+ "dispatch": {
+ "p50": 93.82399916648865,
+ "p90": 106.1440035700798,
+ "p95": 109.37599837779999,
+ "p99": 114.9120032787323
+ },
+ "combine": {
+ "p50": 84.09599959850311,
+ "p90": 90.52799642086029,
+ "p95": 92.28800237178802,
+ "p99": 96.41599655151367
+ },
+ "roundtrip": {
+ "p50": 155.13600409030914,
+ "p90": 167.61599481105804,
+ "p95": 170.59199512004852,
+ "p99": 178.20799350738525
+ },
+ "isolatedSum": {
+ "p50": 177.91999876499176,
+ "p90": 196.6719999909401,
+ "p95": 201.664000749588,
+ "p99": 211.32799983024597
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 8888320,
+ "combineLogicalBytes": 8888320,
+ "fanoutMean": 4.84375,
+ "recvTokensMax": 124,
"stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
@@ -5388,34 +5334,71 @@
"tokensPerRank": 32,
"globalTokens": 256,
"dispatch": {
- "p50": 70.72000205516815,
- "p90": 76.57600194215775,
- "p95": 77.88799703121185,
- "p99": 85.31200140714645
+ "p50": 97.63199836015701,
+ "p90": 109.24799740314484,
+ "p95": 112.2559979557991,
+ "p99": 118.49600076675415
},
"combine": {
- "p50": 66.6240006685257,
- "p90": 67.32799857854843,
- "p95": 67.61600077152252,
- "p99": 78.84799689054489
+ "p50": 91.67999774217606,
+ "p90": 96.22400254011154,
+ "p95": 98.4639972448349,
+ "p99": 105.95200210809708
},
"roundtrip": {
- "p50": 120.51200121641159,
- "p90": 123.99999797344208,
- "p95": 124.64000284671783,
- "p99": 130.0159990787506
+ "p50": 161.8880033493042,
+ "p90": 172.44799435138702,
+ "p95": 176.35199427604675,
+ "p99": 183.32800269126892
},
"isolatedSum": {
- "p50": 137.34400272369385,
- "p90": 143.90400052070618,
- "p95": 145.50399780273438,
- "p99": 164.15999829769135
+ "p50": 189.31199610233307,
+ "p90": 205.47199994325638,
+ "p95": 210.719995200634,
+ "p99": 224.44800287485123
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 3670016,
- "combineLogicalBytes": 3670016,
- "fanoutMean": 1,
- "recvTokensMax": 32,
+ "dispatchLogicalBytes": 17733632,
+ "combineLogicalBytes": 17733632,
+ "fanoutMean": 4.83203125,
+ "recvTokensMax": 248,
+ "stragglerRank": 2,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 64,
+ "globalTokens": 512,
+ "dispatch": {
+ "p50": 110.59200018644333,
+ "p90": 120.35199999809265,
+ "p95": 123.29600006341934,
+ "p99": 131.84000551700592
+ },
+ "combine": {
+ "p50": 106.08000308275223,
+ "p90": 110.3999987244606,
+ "p95": 115.00799655914307,
+ "p99": 120.7360029220581
+ },
+ "roundtrip": {
+ "p50": 190.11199474334717,
+ "p90": 198.4959989786148,
+ "p95": 202.2079974412918,
+ "p99": 209.82399582862854
+ },
+ "isolatedSum": {
+ "p50": 216.67200326919556,
+ "p90": 230.75199872255325,
+ "p95": 238.3039966225624,
+ "p99": 252.57600843906403
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 35424256,
+ "combineLogicalBytes": 35424256,
+ "fanoutMean": 4.826171875,
+ "recvTokensMax": 492,
"stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
@@ -5425,35 +5408,35 @@
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 70.11199742555618,
- "p90": 71.87200337648392,
- "p95": 73.79200309515,
- "p99": 79.64800298213959
+ "p50": 128.25599312782288,
+ "p90": 137.66400516033173,
+ "p95": 142.20799505710602,
+ "p99": 147.87200093269348
},
"combine": {
- "p50": 68.35199892520905,
- "p90": 70.04799693822861,
- "p95": 76.92799717187881,
- "p99": 79.1039988398552
+ "p50": 135.51999628543854,
+ "p90": 144.0960019826889,
+ "p95": 145.75999975204468,
+ "p99": 152.6080071926117
},
"roundtrip": {
- "p50": 122.23999947309494,
- "p90": 129.5360028743744,
- "p95": 131.32800161838531,
- "p99": 142.87999272346497
+ "p50": 241.02400243282318,
+ "p90": 249.91999566555023,
+ "p95": 253.02401185035706,
+ "p99": 258.7200105190277
},
"isolatedSum": {
- "p50": 138.46399635076523,
- "p90": 141.92000031471252,
- "p95": 150.7200002670288,
- "p99": 158.75200182199478
+ "p50": 263.7759894132614,
+ "p90": 281.76000714302063,
+ "p95": 287.9679948091507,
+ "p99": 300.4800081253052
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 14680064,
- "combineLogicalBytes": 14680064,
- "fanoutMean": 1,
- "recvTokensMax": 128,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 70160384,
+ "combineLogicalBytes": 70160384,
+ "fanoutMean": 4.779296875,
+ "recvTokensMax": 987,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -5461,16 +5444,16 @@
]
},
{
- "id": "cx-db4e17eb",
- "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b",
- "colorKey": "b300_592e9a16",
- "comparisonKey": "22200746e5037727",
+ "id": "cx-acc388f6",
+ "identity": "gb200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||ab982093c4eac2b",
+ "colorKey": "gb200_4a0087e5",
+ "comparisonKey": "c58460e1e2fc4307",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:48:06.153274+00:00",
+ "generatedAt": "2026-06-29T14:02:31.096616+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_07",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
"mode": "normal",
@@ -5478,30 +5461,31 @@
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · balanced+eplb",
+ "label": "GB200 EP8 · deepep · bf16 · zipf-mild+eplb",
"model": "DeepSeek-V3 (EPLB physical)",
"shape": {
"hidden": 7168,
"topk": 8,
"experts": 288,
- "routing": "balanced",
- "routingLabel": "balanced+eplb",
+ "routing": "zipf-mild",
+ "routingLabel": "zipf-mild+eplb",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": true,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -5509,59 +5493,59 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "f0e66a15078595b",
- "workloadId": "set:8:7af12818400d6348",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 1,
- "eplbImbalanceAfter": 1,
- "backendVersion": "1.2.1",
+ "traceSignature": "ab982093c4eac2b",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": 2.61328125,
+ "eplbImbalanceAfter": 1.0009114583333334,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285612438",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285612438",
- "createdAt": "2026-06-27T09:48:06.153274+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 1,
"globalTokens": 8,
"dispatch": {
- "p50": 57.760000228881836,
- "p90": 60.864001512527466,
- "p95": 63.13599646091461,
- "p99": 69.08799707889557
+ "p50": 95.32800316810608,
+ "p90": 107.87200182676315,
+ "p95": 112.92800307273865,
+ "p99": 125.82400441169739
},
"combine": {
- "p50": 55.52000179886818,
- "p90": 57.37600103020668,
- "p95": 64.44799900054932,
- "p99": 66.17599725723267
+ "p50": 79.71200346946716,
+ "p90": 84.63999629020691,
+ "p95": 89.6959975361824,
+ "p99": 95.36000341176987
},
"roundtrip": {
- "p50": 95.29600292444229,
- "p90": 98.14400225877762,
- "p95": 99.64799880981445,
- "p99": 105.05600273609161
+ "p50": 152.25599706172943,
+ "p90": 163.29599916934967,
+ "p95": 167.39200055599213,
+ "p99": 177.69600450992584
},
"isolatedSum": {
- "p50": 113.28000202775002,
- "p90": 118.24000254273415,
- "p95": 127.58399546146393,
- "p99": 135.26399433612823
+ "p50": 175.04000663757324,
+ "p90": 192.51199811697006,
+ "p95": 202.62400060892105,
+ "p99": 221.18400782346725
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 229376,
- "combineLogicalBytes": 229376,
- "fanoutMean": 2,
- "recvTokensMax": 3,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 602112,
+ "combineLogicalBytes": 602112,
+ "fanoutMean": 5.25,
+ "recvTokensMax": 7,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -5570,35 +5554,35 @@
"tokensPerRank": 2,
"globalTokens": 16,
"dispatch": {
- "p50": 57.8560009598732,
- "p90": 59.84000116586685,
- "p95": 61.72800064086914,
- "p99": 72.41600006818771
+ "p50": 94.30400282144547,
+ "p90": 106.59199953079224,
+ "p95": 111.42399907112122,
+ "p99": 122.3360002040863
},
"combine": {
- "p50": 56.76800012588501,
- "p90": 65.63200056552887,
- "p95": 66.17599725723267,
- "p99": 66.94400310516357
+ "p50": 81.53600245714188,
+ "p90": 87.36000210046768,
+ "p95": 90.84799885749817,
+ "p99": 96.16000205278397
},
"roundtrip": {
- "p50": 105.34399747848511,
- "p90": 112.15999722480774,
- "p95": 113.40799927711487,
- "p99": 127.26399302482605
+ "p50": 154.01600301265717,
+ "p90": 165.82399606704712,
+ "p95": 168.99199783802032,
+ "p99": 178.43200266361237
},
"isolatedSum": {
- "p50": 114.62400108575821,
- "p90": 125.47200173139572,
- "p95": 127.9039978981018,
- "p99": 139.3600031733513
+ "p50": 175.84000527858734,
+ "p90": 193.95200163125992,
+ "p95": 202.27199792861938,
+ "p99": 218.49600225687027
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 458752,
- "combineLogicalBytes": 458752,
- "fanoutMean": 2,
- "recvTokensMax": 6,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 1189888,
+ "combineLogicalBytes": 1189888,
+ "fanoutMean": 5.1875,
+ "recvTokensMax": 12,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -5607,35 +5591,35 @@
"tokensPerRank": 4,
"globalTokens": 32,
"dispatch": {
- "p50": 57.82400071620941,
- "p90": 59.776000678539276,
- "p95": 61.535999178886414,
- "p99": 68.12799721956253
+ "p50": 95.96800059080124,
+ "p90": 107.64800012111664,
+ "p95": 114.1119971871376,
+ "p99": 131.55199587345123
},
"combine": {
- "p50": 65.60000032186508,
- "p90": 66.46399945020676,
- "p95": 66.97600334882736,
- "p99": 77.504001557827
+ "p50": 83.26400071382523,
+ "p90": 90.43200314044952,
+ "p95": 93.24800223112106,
+ "p99": 101.18400305509567
},
"roundtrip": {
- "p50": 111.29599809646606,
- "p90": 114.14399743080139,
- "p95": 114.84800279140472,
- "p99": 123.45600128173828
+ "p50": 156.70399367809296,
+ "p90": 167.52000153064728,
+ "p95": 171.83999717235565,
+ "p99": 181.05599284172058
},
"isolatedSum": {
- "p50": 123.4240010380745,
- "p90": 126.24000012874603,
- "p95": 128.51200252771378,
- "p99": 145.63199877738953
+ "p50": 179.23200130462646,
+ "p90": 198.08000326156616,
+ "p95": 207.35999941825867,
+ "p99": 232.7359989285469
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 917504,
- "combineLogicalBytes": 917504,
- "fanoutMean": 2,
- "recvTokensMax": 12,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 2408448,
+ "combineLogicalBytes": 2408448,
+ "fanoutMean": 5.25,
+ "recvTokensMax": 23,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -5644,35 +5628,35 @@
"tokensPerRank": 8,
"globalTokens": 64,
"dispatch": {
- "p50": 59.23200026154518,
- "p90": 60.864001512527466,
- "p95": 62.55999952554703,
- "p99": 69.18399780988693
+ "p50": 97.79199957847595,
+ "p90": 109.63200032711029,
+ "p95": 114.75200206041336,
+ "p99": 130.14400005340576
},
"combine": {
- "p50": 65.88800251483917,
- "p90": 66.59200042486191,
- "p95": 66.94400310516357,
- "p99": 69.5360004901886
+ "p50": 85.1840004324913,
+ "p90": 94.04800087213516,
+ "p95": 97.75999933481216,
+ "p99": 148.60799908638
},
"roundtrip": {
- "p50": 107.07200318574905,
- "p90": 109.50399935245514,
- "p95": 111.29599809646606,
- "p99": 122.52800166606903
+ "p50": 159.743994474411,
+ "p90": 172.06400632858276,
+ "p95": 179.77599799633026,
+ "p99": 339.9040102958679
},
"isolatedSum": {
- "p50": 125.12000277638435,
- "p90": 127.45600193738937,
- "p95": 129.5040026307106,
- "p99": 138.71999830007553
+ "p50": 182.97600001096725,
+ "p90": 203.68000119924545,
+ "p95": 212.51200139522552,
+ "p99": 278.75199913978577
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1835008,
- "combineLogicalBytes": 1835008,
- "fanoutMean": 2,
- "recvTokensMax": 24,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 4859904,
+ "combineLogicalBytes": 4859904,
+ "fanoutMean": 5.296875,
+ "recvTokensMax": 47,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -5681,35 +5665,35 @@
"tokensPerRank": 16,
"globalTokens": 128,
"dispatch": {
- "p50": 60.575999319553375,
- "p90": 63.64800035953522,
- "p95": 66.20799750089645,
- "p99": 75.58400183916092
+ "p50": 98.59199821949005,
+ "p90": 109.8560020327568,
+ "p95": 114.14399743080139,
+ "p99": 128.86400520801544
},
"combine": {
- "p50": 66.17599725723267,
- "p90": 66.97600334882736,
- "p95": 67.19999760389328,
- "p99": 70.14399766921997
+ "p50": 86.7839977145195,
+ "p90": 93.59999746084213,
+ "p95": 95.67999839782715,
+ "p99": 103.16800326108932
},
"roundtrip": {
- "p50": 108.09600353240967,
- "p90": 110.20799726247787,
- "p95": 112.2559979557991,
- "p99": 118.94399672746658
+ "p50": 162.1759980916977,
+ "p90": 173.43999445438385,
+ "p95": 176.7359972000122,
+ "p99": 182.36799538135529
},
"isolatedSum": {
- "p50": 126.75199657678604,
- "p90": 130.62400370836258,
- "p95": 133.40799510478973,
- "p99": 145.7279995083809
+ "p50": 185.37599593400955,
+ "p90": 203.45599949359894,
+ "p95": 209.82399582862854,
+ "p99": 232.03200846910477
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 3670016,
- "combineLogicalBytes": 3670016,
- "fanoutMean": 2,
- "recvTokensMax": 48,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 9605120,
+ "combineLogicalBytes": 9605120,
+ "fanoutMean": 5.234375,
+ "recvTokensMax": 93,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -5718,35 +5702,35 @@
"tokensPerRank": 32,
"globalTokens": 256,
"dispatch": {
- "p50": 60.67200005054474,
- "p90": 62.752000987529755,
- "p95": 64.03200328350067,
- "p99": 73.95199686288834
+ "p50": 101.98400169610977,
+ "p90": 111.90400272607803,
+ "p95": 115.35999923944473,
+ "p99": 121.08799815177917
},
"combine": {
- "p50": 66.23999774456024,
- "p90": 67.1359971165657,
- "p95": 67.61600077152252,
- "p99": 78.14399898052216
+ "p50": 92.73599833250046,
+ "p90": 98.14400225877762,
+ "p95": 101.1200025677681,
+ "p99": 107.45599865913391
},
"roundtrip": {
- "p50": 108.89600217342377,
- "p90": 111.39199882745743,
- "p95": 113.69600147008896,
- "p99": 122.52800166606903
+ "p50": 166.9120043516159,
+ "p90": 177.18400061130524,
+ "p95": 181.92000687122345,
+ "p99": 189.11999464035034
},
"isolatedSum": {
- "p50": 126.91199779510498,
- "p90": 129.88799810409546,
- "p95": 131.6480040550232,
- "p99": 152.0959958434105
+ "p50": 194.72000002861023,
+ "p90": 210.04800498485565,
+ "p95": 216.48000180721283,
+ "p99": 228.5439968109131
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 7340032,
- "combineLogicalBytes": 7340032,
- "fanoutMean": 2,
- "recvTokensMax": 96,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 19367936,
+ "combineLogicalBytes": 19367936,
+ "fanoutMean": 5.27734375,
+ "recvTokensMax": 182,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -5755,35 +5739,35 @@
"tokensPerRank": 64,
"globalTokens": 512,
"dispatch": {
- "p50": 76.64000242948532,
- "p90": 78.52800190448761,
- "p95": 79.29600030183792,
- "p99": 89.37600255012512
+ "p50": 111.90400272607803,
+ "p90": 121.47200107574463,
+ "p95": 125.34399330615997,
+ "p99": 131.23199343681335
},
"combine": {
- "p50": 68.00000369548798,
- "p90": 76.80000364780426,
- "p95": 77.47200131416321,
- "p99": 79.39200103282928
+ "p50": 107.80800133943558,
+ "p90": 116.06399714946747,
+ "p95": 119.10399794578552,
+ "p99": 127.23200023174286
},
"roundtrip": {
- "p50": 124.25599992275238,
- "p90": 128.9599984884262,
- "p95": 129.7920048236847,
- "p99": 141.59999787807465
+ "p50": 192.9599940776825,
+ "p90": 203.99999618530273,
+ "p95": 210.207998752594,
+ "p99": 223.61600399017334
},
"isolatedSum": {
- "p50": 144.6400061249733,
- "p90": 155.32800555229187,
- "p95": 156.76800161600113,
- "p99": 168.7680035829544
+ "p50": 219.7120040655136,
+ "p90": 237.5359982252121,
+ "p95": 244.4479912519455,
+ "p99": 258.4639936685562
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 14680064,
- "combineLogicalBytes": 14680064,
- "fanoutMean": 2,
- "recvTokensMax": 192,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 38535168,
+ "combineLogicalBytes": 38535168,
+ "fanoutMean": 5.25,
+ "recvTokensMax": 358,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -5792,35 +5776,35 @@
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 81.18399977684021,
- "p90": 87.55200356245041,
- "p95": 89.47200328111649,
- "p99": 95.74399888515472
+ "p50": 127.20000743865967,
+ "p90": 135.00800728797913,
+ "p95": 138.65600526332855,
+ "p99": 144.16000247001648
},
"combine": {
- "p50": 81.98399841785431,
- "p90": 90.7839983701706,
- "p95": 91.0400003194809,
- "p99": 102.78400033712387
+ "p50": 125.69600343704224,
+ "p90": 133.02400708198547,
+ "p95": 137.63199746608734,
+ "p99": 143.5520052909851
},
"roundtrip": {
- "p50": 146.08000218868256,
- "p90": 148.28799664974213,
- "p95": 150.81599354743958,
- "p99": 159.743994474411
+ "p50": 226.4000028371811,
+ "p90": 235.07200181484222,
+ "p95": 239.19999599456787,
+ "p99": 247.55200743675232
},
"isolatedSum": {
- "p50": 163.16799819469452,
- "p90": 178.336001932621,
- "p95": 180.51200360059738,
- "p99": 198.5279992222786
+ "p50": 252.8960108757019,
+ "p90": 268.0320143699646,
+ "p95": 276.2880027294159,
+ "p99": 287.7120077610016
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 29360128,
- "combineLogicalBytes": 29360128,
- "fanoutMean": 2,
- "recvTokensMax": 384,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 76869632,
+ "combineLogicalBytes": 76869632,
+ "fanoutMean": 5.236328125,
+ "recvTokensMax": 688,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -5828,16 +5812,16 @@
]
},
{
- "id": "cx-72792847",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||2ad5ef98d328fa1",
- "colorKey": "b300_5b993222",
- "comparisonKey": "10e590b8f933d382",
+ "id": "cx-04c531ee",
+ "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||1093cd76c9cd2db",
+ "colorKey": "gb200_ff33b726",
+ "comparisonKey": "61f8a26a723405f9",
"schemaVersion": 3,
- "generatedAt": "2026-06-26T23:58:30.886921+00:00",
+ "generatedAt": "2026-06-29T14:03:53.703773+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_10",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
"mode": "normal",
@@ -5845,30 +5829,31 @@
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · hotspot-single",
+ "label": "GB200 EP8 · deepep · bf16 · zipf-moderate",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
"experts": 256,
- "routing": "hotspot-single",
- "routingLabel": "hotspot-single",
+ "routing": "zipf-moderate",
+ "routingLabel": "zipf-moderate",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -5876,389 +5861,170 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "2ad5ef98d328fa1",
- "workloadId": "set:4:286be993cd819ed9",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "1093cd76c9cd2db",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28271900377",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271900377",
- "createdAt": "2026-06-26T23:58:30.886921+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 1,
"globalTokens": 8,
"dispatch": {
- "p50": 56.96000158786774,
- "p90": 59.10399928689003,
- "p95": 62.272001057863235,
- "p99": 71.68000191450119
+ "p50": 89.34400230646133,
+ "p90": 101.9200012087822,
+ "p95": 105.59999942779541,
+ "p99": 113.63200098276138
},
"combine": {
- "p50": 66.39999896287918,
- "p90": 67.07199662923813,
- "p95": 67.45599955320358,
- "p99": 90.17600119113922
+ "p50": 76.73600316047668,
+ "p90": 81.98399841785431,
+ "p95": 83.96799862384796,
+ "p99": 89.50400352478027
},
"roundtrip": {
- "p50": 106.91200196743011,
- "p90": 113.40799927711487,
- "p95": 117.18399822711945,
- "p99": 195.77600061893463
+ "p50": 143.74400675296783,
+ "p90": 154.7199934720993,
+ "p95": 158.75199437141418,
+ "p99": 165.3439998626709
},
"isolatedSum": {
- "p50": 123.36000055074692,
- "p90": 126.17599591612816,
- "p95": 129.72800061106682,
- "p99": 161.8560031056404
+ "p50": 166.08000546693802,
+ "p90": 183.9039996266365,
+ "p95": 189.56799805164337,
+ "p99": 203.13600450754166
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 602112,
- "combineLogicalBytes": 602112,
- "fanoutMean": 5.25,
+ "dispatchLogicalBytes": 444416,
+ "combineLogicalBytes": 444416,
+ "fanoutMean": 3.875,
"recvTokensMax": 8,
- "stragglerRank": 7,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 2,
+ "globalTokens": 16,
"dispatch": {
- "p50": 59.58399921655655,
- "p90": 62.65600025653839,
- "p95": 65.34399837255478,
- "p99": 81.85599744319916
+ "p50": 90.01599997282028,
+ "p90": 102.46399790048599,
+ "p95": 105.24799674749374,
+ "p99": 111.29599809646606
},
"combine": {
- "p50": 68.00000369548798,
- "p90": 77.11999863386154,
- "p95": 77.79199630022049,
- "p99": 79.9039974808693
+ "p50": 78.14399898052216,
+ "p90": 83.0719992518425,
+ "p95": 86.11200004816055,
+ "p99": 95.04000097513199
},
"roundtrip": {
- "p50": 122.36800044775009,
- "p90": 125.791996717453,
- "p95": 127.71199643611908,
- "p99": 145.82400023937225
+ "p50": 146.62399888038635,
+ "p90": 158.52800011634827,
+ "p95": 162.1759980916977,
+ "p99": 182.52800405025482
},
"isolatedSum": {
- "p50": 127.58400291204453,
- "p90": 139.77599889039993,
- "p95": 143.13599467277527,
- "p99": 161.75999492406845
+ "p50": 168.15999895334244,
+ "p90": 185.5359971523285,
+ "p95": 191.3599967956543,
+ "p99": 206.33599907159805
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4859904,
- "combineLogicalBytes": 4859904,
- "fanoutMean": 5.296875,
- "recvTokensMax": 64,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 845824,
+ "combineLogicalBytes": 845824,
+ "fanoutMean": 3.6875,
+ "recvTokensMax": 16,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 4,
+ "globalTokens": 32,
"dispatch": {
- "p50": 69.63200122117996,
- "p90": 75.32799988985062,
- "p95": 77.27999985218048,
- "p99": 98.08000177145004
+ "p50": 91.58399701118469,
+ "p90": 104.16000336408615,
+ "p95": 108.89600217342377,
+ "p99": 117.76000261306763
},
"combine": {
- "p50": 78.62400263547897,
- "p90": 79.26400005817413,
- "p95": 79.45600152015686,
- "p99": 89.75999802350998
+ "p50": 80.25600016117096,
+ "p90": 84.51200276613235,
+ "p95": 89.91999924182892,
+ "p99": 102.04800218343735
},
"roundtrip": {
- "p50": 133.53599607944489,
- "p90": 137.15200126171112,
- "p95": 138.5280042886734,
- "p99": 155.10399639606476
+ "p50": 148.6400067806244,
+ "p90": 160.7999950647354,
+ "p95": 163.96799683570862,
+ "p99": 171.4559942483902
},
"isolatedSum": {
- "p50": 148.25600385665894,
- "p90": 154.59199994802475,
- "p95": 156.73600137233734,
- "p99": 187.83999979496002
+ "p50": 171.83999717235565,
+ "p90": 188.6720061302185,
+ "p95": 198.81600141525269,
+ "p99": 219.80800479650497
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19525632,
- "combineLogicalBytes": 19525632,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 256,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1691648,
+ "combineLogicalBytes": 1691648,
+ "fanoutMean": 3.6875,
+ "recvTokensMax": 32,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 103.39199751615524,
- "p90": 104.96000200510025,
- "p95": 106.62399977445602,
- "p99": 110.81600189208984
+ "p50": 92.47999638319016,
+ "p90": 105.31199723482132,
+ "p95": 109.11999642848969,
+ "p99": 117.44000017642975
},
"combine": {
- "p50": 127.80800461769104,
- "p90": 129.2160004377365,
- "p95": 130.5920034646988,
- "p99": 150.62400698661804
+ "p50": 82.24000036716461,
+ "p90": 90.62399715185165,
+ "p95": 93.31200271844864,
+ "p99": 102.78400033712387
},
"roundtrip": {
- "p50": 215.87200462818146,
- "p90": 223.07200729846954,
- "p95": 224.7679978609085,
- "p99": 231.32799565792084
+ "p50": 152.16000378131866,
+ "p90": 164.63999450206757,
+ "p95": 167.7439957857132,
+ "p99": 177.2480010986328
},
"isolatedSum": {
- "p50": 231.20000213384628,
- "p90": 234.17600244283676,
- "p95": 237.21600323915482,
- "p99": 261.4400088787079
+ "p50": 174.71999675035477,
+ "p90": 195.93599438667297,
+ "p95": 202.43199914693832,
+ "p99": 220.22400051355362
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 78102528,
- "combineLogicalBytes": 78102528,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 1024,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-f390f28a",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621",
- "colorKey": "b300_5b993222",
- "comparisonKey": "82de9b5581f31438",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T09:50:17.677386+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_03",
- "sku": "b300",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · hotspot-single",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "hotspot-single",
- "routingLabel": "hotspot-single",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1351,
- "configuredUnits": 20,
- "deviceUnits": 148,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "b6caf944f6bb621",
- "workloadId": "set:8:286be993cd819ed9",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28285661360",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285661360",
- "createdAt": "2026-06-27T09:50:17.677386+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 56.48000165820122,
- "p90": 58.88000130653381,
- "p95": 61.37600168585777,
- "p99": 68.89600306749344
- },
- "combine": {
- "p50": 66.17599725723267,
- "p90": 66.78400188684464,
- "p95": 67.32799857854843,
- "p99": 69.95200365781784
- },
- "roundtrip": {
- "p50": 105.56799918413162,
- "p90": 112.19199746847153,
- "p95": 112.70400136709213,
- "p99": 120.7360029220581
- },
- "isolatedSum": {
- "p50": 122.65599891543388,
- "p90": 125.66400319337845,
- "p95": 128.7040002644062,
- "p99": 138.84800672531128
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 602112,
- "combineLogicalBytes": 602112,
- "fanoutMean": 5.25,
- "recvTokensMax": 8,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 56.76800012588501,
- "p90": 59.007998555898666,
- "p95": 62.04799935221672,
- "p99": 67.52000004053116
- },
- "combine": {
- "p50": 66.3679987192154,
- "p90": 67.64800101518631,
- "p95": 68.2239979505539,
- "p99": 77.66400277614594
- },
- "roundtrip": {
- "p50": 106.27199709415436,
- "p90": 120.60800194740295,
- "p95": 129.56799566745758,
- "p99": 144.99199390411377
- },
- "isolatedSum": {
- "p50": 123.1359988451004,
- "p90": 126.65599957108498,
- "p95": 130.27199730277061,
- "p99": 145.1840028166771
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1247232,
- "combineLogicalBytes": 1247232,
- "fanoutMean": 5.4375,
- "recvTokensMax": 16,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 57.53599852323532,
- "p90": 59.4559982419014,
- "p95": 61.69600039720535,
- "p99": 68.44799965620041
- },
- "combine": {
- "p50": 67.61600077152252,
- "p90": 69.50400024652481,
- "p95": 77.2159993648529,
- "p99": 91.13600105047226
- },
- "roundtrip": {
- "p50": 113.76000195741653,
- "p90": 118.8800036907196,
- "p95": 121.69600278139114,
- "p99": 124.9919980764389
- },
- "isolatedSum": {
- "p50": 125.15199929475784,
- "p90": 128.9599984884262,
- "p95": 138.91199976205826,
- "p99": 159.58400070667267
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2451456,
- "combineLogicalBytes": 2451456,
- "fanoutMean": 5.34375,
- "recvTokensMax": 32,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 58.04799869656563,
- "p90": 60.54399907588959,
- "p95": 63.61600011587143,
- "p99": 71.16799801588058
- },
- "combine": {
- "p50": 67.58400052785873,
- "p90": 69.56800073385239,
- "p95": 72.89600372314453,
- "p99": 80.89599758386612
- },
- "roundtrip": {
- "p50": 122.30399996042252,
- "p90": 125.11999905109406,
- "p95": 126.3359934091568,
- "p99": 137.28000223636627
- },
- "isolatedSum": {
- "p50": 125.63199922442436,
- "p90": 130.11199980974197,
- "p95": 136.51200383901596,
- "p99": 152.0639955997467
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4859904,
- "combineLogicalBytes": 4859904,
- "fanoutMean": 5.296875,
- "recvTokensMax": 64,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 3354624,
+ "combineLogicalBytes": 3354624,
+ "fanoutMean": 3.65625,
+ "recvTokensMax": 64,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -6267,34 +6033,34 @@
"tokensPerRank": 16,
"globalTokens": 128,
"dispatch": {
- "p50": 59.776000678539276,
- "p90": 67.10399687290192,
- "p95": 69.7920024394989,
- "p99": 78.11199873685837
+ "p50": 92.73599833250046,
+ "p90": 103.80800068378448,
+ "p95": 107.61599987745285,
+ "p99": 116.5120005607605
},
"combine": {
- "p50": 68.44799965620041,
- "p90": 78.14399898052216,
- "p95": 78.46400141716003,
- "p99": 79.64800298213959
+ "p50": 82.49600231647491,
+ "p90": 91.16800129413605,
+ "p95": 94.27200257778168,
+ "p99": 103.4879982471466
},
"roundtrip": {
- "p50": 119.55200135707855,
- "p90": 124.38400089740753,
- "p95": 125.56800246238708,
- "p99": 129.18399274349213
+ "p50": 152.54400670528412,
+ "p90": 163.7759953737259,
+ "p95": 167.00799763202667,
+ "p99": 175.07199943065643
},
"isolatedSum": {
- "p50": 128.22400033473969,
- "p90": 145.24799585342407,
- "p95": 148.25600385665894,
- "p99": 157.76000171899796
+ "p50": 175.23200064897537,
+ "p90": 194.97600197792053,
+ "p95": 201.88800245523453,
+ "p99": 219.9999988079071
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9748480,
- "combineLogicalBytes": 9748480,
- "fanoutMean": 5.3125,
- "recvTokensMax": 128,
+ "dispatchLogicalBytes": 6537216,
+ "combineLogicalBytes": 6537216,
+ "fanoutMean": 3.5625,
+ "recvTokensMax": 127,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
@@ -6304,34 +6070,34 @@
"tokensPerRank": 32,
"globalTokens": 256,
"dispatch": {
- "p50": 68.9919963479042,
- "p90": 74.11199808120728,
- "p95": 75.99999755620956,
- "p99": 90.87999910116196
+ "p50": 97.82399982213974,
+ "p90": 108.31999778747559,
+ "p95": 111.93600296974182,
+ "p99": 120.57600170373917
},
"combine": {
- "p50": 78.5600021481514,
- "p90": 79.19999957084656,
- "p95": 79.77599650621414,
- "p99": 91.26400202512741
+ "p50": 85.63199639320374,
+ "p90": 93.85599941015244,
+ "p95": 95.51999717950821,
+ "p99": 102.59199887514114
},
"roundtrip": {
- "p50": 130.3360015153885,
- "p90": 134.20799374580383,
- "p95": 136.00000739097595,
- "p99": 146.33600413799286
+ "p50": 159.13599729537964,
+ "p90": 168.99199783802032,
+ "p95": 171.77599668502808,
+ "p99": 178.0479997396469
},
"isolatedSum": {
- "p50": 147.5519984960556,
- "p90": 153.31199765205383,
- "p95": 155.7759940624237,
- "p99": 182.14400112628937
+ "p50": 183.45599621534348,
+ "p90": 202.17599719762802,
+ "p95": 207.45600014925003,
+ "p99": 223.1680005788803
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19525632,
- "combineLogicalBytes": 19525632,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 256,
+ "dispatchLogicalBytes": 12859392,
+ "combineLogicalBytes": 12859392,
+ "fanoutMean": 3.50390625,
+ "recvTokensMax": 255,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
@@ -6341,34 +6107,34 @@
"tokensPerRank": 64,
"globalTokens": 512,
"dispatch": {
- "p50": 83.03999900817871,
- "p90": 88.60799670219421,
- "p95": 89.56799656152725,
- "p99": 95.16800194978714
+ "p50": 106.36799782514572,
+ "p90": 115.87200313806534,
+ "p95": 119.80800330638885,
+ "p99": 127.55200266838074
},
"combine": {
- "p50": 91.67999774217606,
- "p90": 93.9520001411438,
- "p95": 94.55999732017517,
- "p99": 102.46399790048599
+ "p50": 103.45599800348282,
+ "p90": 108.31999778747559,
+ "p95": 109.8560020327568,
+ "p99": 119.29599940776825
},
"roundtrip": {
- "p50": 159.19999778270721,
- "p90": 164.000004529953,
- "p95": 166.24000668525696,
- "p99": 175.20000040531158
+ "p50": 185.08799374103546,
+ "p90": 194.62400674819946,
+ "p95": 198.5280066728592,
+ "p99": 210.33599972724915
},
"isolatedSum": {
- "p50": 174.71999675035477,
- "p90": 182.559996843338,
- "p95": 184.12799388170242,
- "p99": 197.63199985027313
+ "p50": 209.82399582862854,
+ "p90": 224.19200092554092,
+ "p95": 229.66400533914566,
+ "p99": 246.848002076149
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38621184,
- "combineLogicalBytes": 38621184,
- "fanoutMean": 5.26171875,
- "recvTokensMax": 512,
+ "dispatchLogicalBytes": 25145344,
+ "combineLogicalBytes": 25145344,
+ "fanoutMean": 3.42578125,
+ "recvTokensMax": 510,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
@@ -6378,34 +6144,34 @@
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 105.3759977221489,
- "p90": 106.6880002617836,
- "p95": 108.2879975438118,
- "p99": 117.76000261306763
+ "p50": 127.51999497413635,
+ "p90": 135.42400300502777,
+ "p95": 139.615997672081,
+ "p99": 144.80000734329224
},
"combine": {
- "p50": 127.9039978981018,
- "p90": 131.1360001564026,
- "p95": 138.20800185203552,
- "p99": 144.03200149536133
+ "p50": 134.0479999780655,
+ "p90": 142.97600090503693,
+ "p95": 145.05599439144135,
+ "p99": 154.91199493408203
},
"roundtrip": {
- "p50": 220.47999501228333,
- "p90": 224.41600263118744,
- "p95": 225.69599747657776,
- "p99": 234.65600609779358
+ "p50": 236.2239956855774,
+ "p90": 244.32000517845154,
+ "p95": 247.45599925518036,
+ "p99": 256.8959891796112
},
"isolatedSum": {
- "p50": 233.2799956202507,
- "p90": 237.8240004181862,
- "p95": 246.49599939584732,
- "p99": 261.79200410842896
+ "p50": 261.56799495220184,
+ "p90": 278.4000039100647,
+ "p95": 284.67199206352234,
+ "p99": 299.71200227737427
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 78102528,
- "combineLogicalBytes": 78102528,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 1024,
+ "dispatchLogicalBytes": 49946624,
+ "combineLogicalBytes": 49946624,
+ "fanoutMean": 3.40234375,
+ "recvTokensMax": 1022,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
@@ -6414,16 +6180,16 @@
]
},
{
- "id": "cx-6a4bc237",
- "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac",
- "colorKey": "b300_39a5906c",
- "comparisonKey": "f7e177d587167ca7",
+ "id": "cx-a12c4b2b",
+ "identity": "gb200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||431e44245dd1524",
+ "colorKey": "gb200_acbc8de8",
+ "comparisonKey": "a122841e63a6f52b",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:50:24.903917+00:00",
+ "generatedAt": "2026-06-29T14:04:17.680714+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_06",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
"mode": "normal",
@@ -6431,30 +6197,31 @@
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · hotspot-single+eplb",
+ "label": "GB200 EP8 · deepep · bf16 · zipf-moderate+eplb",
"model": "DeepSeek-V3 (EPLB physical)",
"shape": {
"hidden": 7168,
"topk": 8,
"experts": 288,
- "routing": "hotspot-single",
- "routingLabel": "hotspot-single+eplb",
+ "routing": "zipf-moderate",
+ "routingLabel": "zipf-moderate+eplb",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": true,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -6462,59 +6229,59 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "e41f5099a9733ac",
- "workloadId": "set:8:286be993cd819ed9",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 1.830078125,
- "eplbImbalanceAfter": 1.0007595486111112,
- "backendVersion": "1.2.1",
+ "traceSignature": "431e44245dd1524",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": 4.927734375,
+ "eplbImbalanceAfter": 1.0006103515625,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285664068",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285664068",
- "createdAt": "2026-06-27T09:50:24.903917+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 1,
"globalTokens": 8,
"dispatch": {
- "p50": 55.64799904823303,
- "p90": 57.37600103020668,
- "p95": 59.647999703884125,
- "p99": 68.12799721956253
+ "p50": 90.65599739551544,
+ "p90": 102.24000364542007,
+ "p95": 106.30399733781815,
+ "p99": 116.19199812412262
},
"combine": {
- "p50": 65.60000032186508,
- "p90": 66.20799750089645,
- "p95": 66.68800115585327,
- "p99": 77.27999985218048
+ "p50": 79.0719985961914,
+ "p90": 83.55200290679932,
+ "p95": 85.4720026254654,
+ "p99": 92.86399930715561
},
"roundtrip": {
- "p50": 104.12800312042236,
- "p90": 109.92000252008438,
- "p95": 111.35999858379364,
- "p99": 116.35199934244156
+ "p50": 146.27200365066528,
+ "p90": 157.47199952602386,
+ "p95": 161.21600568294525,
+ "p99": 169.3439930677414
},
"isolatedSum": {
- "p50": 121.24799937009811,
- "p90": 123.58399853110313,
- "p95": 126.3360008597374,
- "p99": 145.407997071743
+ "p50": 169.72799599170685,
+ "p90": 185.7920065522194,
+ "p95": 191.77599996328354,
+ "p99": 209.05599743127823
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
+ "dispatchLogicalBytes": 616448,
+ "combineLogicalBytes": 616448,
+ "fanoutMean": 5.375,
"recvTokensMax": 7,
- "stragglerRank": 7,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -6523,35 +6290,35 @@
"tokensPerRank": 2,
"globalTokens": 16,
"dispatch": {
- "p50": 55.80800026655197,
- "p90": 58.111999183893204,
- "p95": 60.19200012087822,
- "p99": 80.60800284147263
+ "p50": 90.20800143480301,
+ "p90": 102.04800218343735,
+ "p95": 107.04000294208527,
+ "p99": 117.8240031003952
},
"combine": {
- "p50": 65.92000275850296,
- "p90": 67.35999882221222,
- "p95": 68.1919977068901,
- "p99": 78.27199995517731
+ "p50": 81.05599880218506,
+ "p90": 88.60799670219421,
+ "p95": 91.51999652385712,
+ "p99": 95.0080007314682
},
"roundtrip": {
- "p50": 104.80000078678131,
- "p90": 107.16799646615982,
- "p95": 109.56799983978271,
- "p99": 119.6800023317337
+ "p50": 150.84800124168396,
+ "p90": 161.05599701404572,
+ "p95": 164.51199352741241,
+ "p99": 170.52799463272095
},
"isolatedSum": {
- "p50": 121.72800302505493,
- "p90": 125.47199800610542,
- "p95": 128.38399782776833,
- "p99": 158.88000279664993
+ "p50": 171.26400023698807,
+ "p90": 190.65599888563156,
+ "p95": 198.55999946594238,
+ "p99": 212.8320038318634
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1175552,
- "combineLogicalBytes": 1175552,
- "fanoutMean": 5.125,
- "recvTokensMax": 12,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1204224,
+ "combineLogicalBytes": 1204224,
+ "fanoutMean": 5.25,
+ "recvTokensMax": 14,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -6560,35 +6327,35 @@
"tokensPerRank": 4,
"globalTokens": 32,
"dispatch": {
- "p50": 56.57599866390228,
- "p90": 59.776000678539276,
- "p95": 64.7360011935234,
- "p99": 73.18399846553802
+ "p50": 91.48799628019333,
+ "p90": 102.11200267076492,
+ "p95": 106.59199953079224,
+ "p99": 128.1599998474121
},
"combine": {
- "p50": 66.3679987192154,
- "p90": 67.77600198984146,
- "p95": 68.4799998998642,
- "p99": 82.33600109815598
+ "p50": 81.40800148248672,
+ "p90": 88.83199840784073,
+ "p95": 91.51999652385712,
+ "p99": 96.00000083446503
},
"roundtrip": {
- "p50": 111.84000223875046,
- "p90": 116.67200177907944,
- "p95": 120.51200121641159,
- "p99": 148.15999567508698
+ "p50": 150.78400075435638,
+ "p90": 161.53599321842194,
+ "p95": 164.8319959640503,
+ "p99": 170.04799842834473
},
"isolatedSum": {
- "p50": 122.94399738311768,
- "p90": 127.55200266838074,
- "p95": 133.2160010933876,
- "p99": 155.519999563694
+ "p50": 172.89599776268005,
+ "p90": 190.94400107860565,
+ "p95": 198.11199605464935,
+ "p99": 224.16000068187714
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2451456,
- "combineLogicalBytes": 2451456,
- "fanoutMean": 5.34375,
- "recvTokensMax": 23,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 2394112,
+ "combineLogicalBytes": 2394112,
+ "fanoutMean": 5.21875,
+ "recvTokensMax": 24,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -6597,35 +6364,35 @@
"tokensPerRank": 8,
"globalTokens": 64,
"dispatch": {
- "p50": 57.72799998521805,
- "p90": 59.808000922203064,
- "p95": 61.664000153541565,
- "p99": 71.3919997215271
+ "p50": 93.82399916648865,
+ "p90": 103.45599800348282,
+ "p95": 107.39199817180634,
+ "p99": 113.76000195741653
},
"combine": {
- "p50": 67.10399687290192,
- "p90": 69.11999732255936,
- "p95": 76.31999999284744,
- "p99": 80.9599980711937
+ "p50": 83.13599973917007,
+ "p90": 90.04800021648407,
+ "p95": 92.25600212812424,
+ "p99": 102.9760017991066
},
"roundtrip": {
- "p50": 121.79200351238251,
- "p90": 124.4800016283989,
- "p95": 125.2480000257492,
- "p99": 135.77599823474884
+ "p50": 155.64799308776855,
+ "p90": 165.56799411773682,
+ "p95": 169.37600076198578,
+ "p99": 175.74399709701538
},
"isolatedSum": {
- "p50": 124.83199685811996,
- "p90": 128.92799824476242,
- "p95": 137.984000146389,
- "p99": 152.3519977927208
+ "p50": 176.95999890565872,
+ "p90": 193.5039982199669,
+ "p95": 199.64800029993057,
+ "p99": 216.73600375652313
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4730880,
- "combineLogicalBytes": 4730880,
- "fanoutMean": 5.15625,
- "recvTokensMax": 44,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 4630528,
+ "combineLogicalBytes": 4630528,
+ "fanoutMean": 5.046875,
+ "recvTokensMax": 45,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -6634,35 +6401,35 @@
"tokensPerRank": 16,
"globalTokens": 128,
"dispatch": {
- "p50": 65.05600363016129,
- "p90": 69.95200365781784,
- "p95": 73.56800138950348,
- "p99": 77.11999863386154
+ "p50": 94.71999853849411,
+ "p90": 103.45599800348282,
+ "p95": 106.81600123643875,
+ "p99": 112.96000331640244
},
"combine": {
- "p50": 67.48799979686737,
- "p90": 77.63200253248215,
- "p95": 77.85599678754807,
- "p99": 78.49600166082382
+ "p50": 84.51200276613235,
+ "p90": 91.51999652385712,
+ "p95": 93.24800223112106,
+ "p99": 98.65599870681763
},
"roundtrip": {
- "p50": 118.9119964838028,
- "p90": 122.04799801111221,
- "p95": 123.99999797344208,
- "p99": 128.86400520801544
+ "p50": 156.41599893569946,
+ "p90": 166.30400717258453,
+ "p95": 169.18399930000305,
+ "p99": 177.40799486637115
},
"isolatedSum": {
- "p50": 132.54400342702866,
- "p90": 147.5840061903,
- "p95": 151.42399817705154,
- "p99": 155.61600029468536
+ "p50": 179.23200130462646,
+ "p90": 194.97599452733994,
+ "p95": 200.06400346755981,
+ "p99": 211.61600202322006
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9691136,
- "combineLogicalBytes": 9691136,
- "fanoutMean": 5.28125,
- "recvTokensMax": 88,
- "stragglerRank": 0,
+ "dispatchLogicalBytes": 9447424,
+ "combineLogicalBytes": 9447424,
+ "fanoutMean": 5.1484375,
+ "recvTokensMax": 91,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -6671,35 +6438,35 @@
"tokensPerRank": 32,
"globalTokens": 256,
"dispatch": {
- "p50": 68.00000369548798,
- "p90": 71.03999704122543,
- "p95": 73.72800260782242,
- "p99": 91.39200299978256
+ "p50": 98.08000177145004,
+ "p90": 107.29599744081497,
+ "p95": 110.30399799346924,
+ "p99": 118.6240017414093
},
"combine": {
- "p50": 77.95199751853943,
- "p90": 78.68800312280655,
- "p95": 79.3600007891655,
- "p99": 89.63199704885483
+ "p50": 92.22400188446045,
+ "p90": 96.12800180912018,
+ "p95": 99.74399954080582,
+ "p99": 105.15200346708298
},
"roundtrip": {
- "p50": 128.83199751377106,
- "p90": 133.66399705410004,
- "p95": 135.0719928741455,
- "p99": 143.26399564743042
+ "p50": 164.12800550460815,
+ "p90": 173.43999445438385,
+ "p95": 176.60799622535706,
+ "p99": 184.64000523090363
},
"isolatedSum": {
- "p50": 145.9520012140274,
- "p90": 149.72800016403198,
- "p95": 153.08800339698792,
- "p99": 181.0240000486374
+ "p50": 190.3040036559105,
+ "p90": 203.42399924993515,
+ "p95": 210.04799753427505,
+ "p99": 223.77600520849228
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19568640,
- "combineLogicalBytes": 19568640,
- "fanoutMean": 5.33203125,
- "recvTokensMax": 179,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 19023872,
+ "combineLogicalBytes": 19023872,
+ "fanoutMean": 5.18359375,
+ "recvTokensMax": 178,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -6708,35 +6475,35 @@
"tokensPerRank": 64,
"globalTokens": 512,
"dispatch": {
- "p50": 81.216000020504,
- "p90": 87.93599903583527,
- "p95": 89.15200084447861,
- "p99": 95.0080007314682
+ "p50": 109.63200032711029,
+ "p90": 117.60000139474869,
+ "p95": 120.38400024175644,
+ "p99": 127.6479959487915
},
"combine": {
- "p50": 91.48799628019333,
- "p90": 93.88799965381622,
- "p95": 100.96000134944916,
- "p99": 101.95200145244598
+ "p50": 106.04800283908844,
+ "p90": 112.06399649381638,
+ "p95": 115.52000045776367,
+ "p99": 118.97599697113037
},
"roundtrip": {
- "p50": 156.25600516796112,
- "p90": 160.25599837303162,
- "p95": 161.98399662971497,
- "p99": 176.92799866199493
+ "p50": 188.51199746131897,
+ "p90": 196.73599302768707,
+ "p95": 199.77599382400513,
+ "p99": 204.70400154590607
},
"isolatedSum": {
- "p50": 172.70399630069733,
- "p90": 181.8239986896515,
- "p95": 190.11200219392776,
- "p99": 196.96000218391418
+ "p50": 215.68000316619873,
+ "p90": 229.66399788856506,
+ "p95": 235.9040006995201,
+ "p99": 246.62399291992188
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38750208,
- "combineLogicalBytes": 38750208,
- "fanoutMean": 5.279296875,
- "recvTokensMax": 348,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 38148096,
+ "combineLogicalBytes": 38148096,
+ "fanoutMean": 5.197265625,
+ "recvTokensMax": 350,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -6745,35 +6512,35 @@
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 92.79999881982803,
- "p90": 94.91200000047684,
- "p95": 96.6079980134964,
- "p99": 106.23999685049057
+ "p50": 124.35200065374374,
+ "p90": 131.58400356769562,
+ "p95": 135.45599579811096,
+ "p99": 141.66399836540222
},
"combine": {
- "p50": 114.78400230407715,
- "p90": 116.41599982976913,
- "p95": 117.66400188207626,
- "p99": 128.60800325870514
+ "p50": 126.91199779510498,
+ "p90": 132.192000746727,
+ "p95": 135.68000495433807,
+ "p99": 141.4400041103363
},
"roundtrip": {
- "p50": 190.8479928970337,
- "p90": 196.73599302768707,
- "p95": 197.82400131225586,
- "p99": 204.51200008392334
+ "p50": 224.60800409317017,
+ "p90": 232.67200589179993,
+ "p95": 236.89599335193634,
+ "p99": 241.98399484157562
},
"isolatedSum": {
- "p50": 207.58400112390518,
- "p90": 211.32799983024597,
- "p95": 214.27199989557266,
- "p99": 234.8480001091957
+ "p50": 251.26399844884872,
+ "p90": 263.7760043144226,
+ "p95": 271.13600075244904,
+ "p99": 283.1040024757385
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77342720,
- "combineLogicalBytes": 77342720,
- "fanoutMean": 5.2685546875,
+ "dispatchLogicalBytes": 76955648,
+ "combineLogicalBytes": 76955648,
+ "fanoutMean": 5.2421875,
"recvTokensMax": 687,
- "stragglerRank": 7,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -6781,16 +6548,16 @@
]
},
{
- "id": "cx-f11d8dc8",
- "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de",
- "colorKey": "b300_e3d449ce",
- "comparisonKey": "6570d3a11ae9f14f",
+ "id": "cx-a1958791",
+ "identity": "gb200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||431e44245dd1524",
+ "colorKey": "gb200_be611b2a",
+ "comparisonKey": "cb842765866b5c94",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:47:37.037332+00:00",
+ "generatedAt": "2026-06-29T14:01:17.988369+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_01",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
"mode": "normal",
@@ -6798,30 +6565,31 @@
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · uniform+eplb",
+ "label": "GB200 EP8 · deepep · bf16 · zipf+eplb",
"model": "DeepSeek-V3 (EPLB physical)",
"shape": {
"hidden": 7168,
"topk": 8,
"experts": 288,
- "routing": "uniform",
- "routingLabel": "uniform+eplb",
+ "routing": "zipf",
+ "routingLabel": "zipf+eplb",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": true,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -6829,59 +6597,59 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "73351bbcd4d02de",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 1.078125,
- "eplbImbalanceAfter": 1.00048828125,
- "backendVersion": "1.2.1",
+ "traceSignature": "431e44245dd1524",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": 4.927734375,
+ "eplbImbalanceAfter": 1.0006103515625,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285602756",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285602756",
- "createdAt": "2026-06-27T09:47:37.037332+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 1,
"globalTokens": 8,
"dispatch": {
- "p50": 58.400001376867294,
- "p90": 59.967998415231705,
- "p95": 62.431998550891876,
- "p99": 78.65600287914276
+ "p50": 88.0960002541542,
+ "p90": 98.24000298976898,
+ "p95": 103.04000228643417,
+ "p99": 112.35199868679047
},
"combine": {
- "p50": 66.04799628257751,
- "p90": 66.59200042486191,
- "p95": 67.71200150251389,
- "p99": 77.56800204515457
+ "p50": 79.3600007891655,
+ "p90": 83.39200168848038,
+ "p95": 86.75199747085571,
+ "p99": 93.6959981918335
},
"roundtrip": {
- "p50": 107.19999670982361,
- "p90": 113.18399757146835,
- "p95": 114.62400108575821,
- "p99": 137.5039964914322
+ "p50": 144.51199769973755,
+ "p90": 154.11199629306793,
+ "p95": 156.51200711727142,
+ "p99": 160.8320027589798
},
"isolatedSum": {
- "p50": 124.44799765944481,
- "p90": 126.55999884009361,
- "p95": 130.14400005340576,
- "p99": 156.22400492429733
+ "p50": 167.4560010433197,
+ "p90": 181.63200467824936,
+ "p95": 189.7919997572899,
+ "p99": 206.04799687862396
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 516096,
- "combineLogicalBytes": 516096,
- "fanoutMean": 4.5,
- "recvTokensMax": 6,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 616448,
+ "combineLogicalBytes": 616448,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 7,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -6890,35 +6658,35 @@
"tokensPerRank": 2,
"globalTokens": 16,
"dispatch": {
- "p50": 58.75200033187866,
- "p90": 60.03199890255928,
- "p95": 60.67200005054474,
- "p99": 65.60000032186508
+ "p50": 87.5839963555336,
+ "p90": 98.49599748849869,
+ "p95": 102.14400291442871,
+ "p99": 110.72000116109848
},
"combine": {
- "p50": 66.20799750089645,
- "p90": 67.71200150251389,
- "p95": 68.60800087451935,
- "p99": 88.86399865150452
+ "p50": 80.28800040483475,
+ "p90": 84.57600325345993,
+ "p95": 88.44800293445587,
+ "p99": 96.00000083446503
},
"roundtrip": {
- "p50": 108.0000028014183,
- "p90": 112.2559979557991,
- "p95": 115.167997777462,
- "p99": 124.03199821710587
+ "p50": 146.04799449443817,
+ "p90": 157.27999806404114,
+ "p95": 159.87199544906616,
+ "p99": 166.59200191497803
},
"isolatedSum": {
- "p50": 124.95999783277512,
- "p90": 127.74400040507317,
- "p95": 129.2800009250641,
- "p99": 154.4639989733696
+ "p50": 167.87199676036835,
+ "p90": 183.07200074195862,
+ "p95": 190.59200584888458,
+ "p99": 206.7200019955635
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1089536,
- "combineLogicalBytes": 1089536,
- "fanoutMean": 4.75,
- "recvTokensMax": 11,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1204224,
+ "combineLogicalBytes": 1204224,
+ "fanoutMean": 5.25,
+ "recvTokensMax": 14,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -6927,35 +6695,35 @@
"tokensPerRank": 4,
"globalTokens": 32,
"dispatch": {
- "p50": 59.167999774217606,
- "p90": 61.85600161552429,
- "p95": 63.840001821517944,
- "p99": 70.3359991312027
+ "p50": 90.81599861383438,
+ "p90": 101.50399804115295,
+ "p95": 107.00800269842148,
+ "p99": 116.44800007343292
},
"combine": {
- "p50": 68.12799721956253,
- "p90": 76.48000121116638,
- "p95": 76.92799717187881,
- "p99": 77.91999727487564
+ "p50": 81.63200318813324,
+ "p90": 88.35200220346451,
+ "p95": 91.87199920415878,
+ "p99": 95.20000219345093
},
"roundtrip": {
- "p50": 120.44800072908401,
- "p90": 124.09599870443344,
- "p95": 125.59999525547028,
- "p99": 134.33599472045898
+ "p50": 148.8959938287735,
+ "p90": 160.22400557994843,
+ "p95": 164.2879992723465,
+ "p99": 171.74400389194489
},
"isolatedSum": {
- "p50": 127.29599699378014,
- "p90": 138.33600282669067,
- "p95": 140.76799899339676,
- "p99": 148.25599640607834
+ "p50": 172.44800180196762,
+ "p90": 189.85600024461746,
+ "p95": 198.88000190258026,
+ "p99": 211.64800226688385
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2207744,
- "combineLogicalBytes": 2207744,
- "fanoutMean": 4.8125,
- "recvTokensMax": 23,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 2394112,
+ "combineLogicalBytes": 2394112,
+ "fanoutMean": 5.21875,
+ "recvTokensMax": 24,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -6964,35 +6732,35 @@
"tokensPerRank": 8,
"globalTokens": 64,
"dispatch": {
- "p50": 60.864001512527466,
- "p90": 68.31999868154526,
- "p95": 70.97599655389786,
- "p99": 79.99999821186066
+ "p50": 91.77599847316742,
+ "p90": 100.5759984254837,
+ "p95": 104.67199981212616,
+ "p99": 109.8880022764206
},
"combine": {
- "p50": 68.31999868154526,
- "p90": 77.11999863386154,
- "p95": 77.79199630022049,
- "p99": 79.42400127649307
+ "p50": 83.00799876451492,
+ "p90": 90.55999666452408,
+ "p95": 92.86399930715561,
+ "p99": 100.16000270843506
},
"roundtrip": {
- "p50": 121.60000205039978,
- "p90": 125.91999769210815,
- "p95": 127.03999876976013,
- "p99": 133.08799266815186
+ "p50": 152.5759994983673,
+ "p90": 161.76000237464905,
+ "p95": 164.63999450206757,
+ "p99": 172.92800545692444
},
"isolatedSum": {
- "p50": 129.18400019407272,
- "p90": 145.4399973154068,
- "p95": 148.76799285411835,
- "p99": 159.42399948835373
+ "p50": 174.78399723768234,
+ "p90": 191.13599509000778,
+ "p95": 197.53599911928177,
+ "p99": 210.04800498485565
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4558848,
- "combineLogicalBytes": 4558848,
- "fanoutMean": 4.96875,
- "recvTokensMax": 46,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 4630528,
+ "combineLogicalBytes": 4630528,
+ "fanoutMean": 5.046875,
+ "recvTokensMax": 45,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -7001,35 +6769,35 @@
"tokensPerRank": 16,
"globalTokens": 128,
"dispatch": {
- "p50": 67.45599955320358,
- "p90": 73.37599992752075,
- "p95": 74.5600014925003,
- "p99": 81.40800148248672
+ "p50": 94.24000233411789,
+ "p90": 102.46399790048599,
+ "p95": 105.82400113344193,
+ "p99": 113.50400000810623
},
"combine": {
- "p50": 68.9919963479042,
- "p90": 78.20799946784973,
- "p95": 78.46400141716003,
- "p99": 81.15199953317642
+ "p50": 85.1840004324913,
+ "p90": 91.80799871683121,
+ "p95": 93.44000369310379,
+ "p99": 96.28800302743912
},
"roundtrip": {
- "p50": 121.15199863910675,
- "p90": 124.25599992275238,
- "p95": 126.01600587368011,
- "p99": 138.97599279880524
+ "p50": 154.4640064239502,
+ "p90": 163.00800442695618,
+ "p95": 166.59200191497803,
+ "p99": 175.4239946603775
},
"isolatedSum": {
- "p50": 136.4479959011078,
- "p90": 151.58399939537048,
- "p95": 153.02400290966034,
- "p99": 162.56000101566315
+ "p50": 179.4240027666092,
+ "p90": 194.2719966173172,
+ "p95": 199.26400482654572,
+ "p99": 209.79200303554535
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9347072,
- "combineLogicalBytes": 9347072,
- "fanoutMean": 5.09375,
- "recvTokensMax": 86,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 9447424,
+ "combineLogicalBytes": 9447424,
+ "fanoutMean": 5.1484375,
+ "recvTokensMax": 91,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -7038,35 +6806,35 @@
"tokensPerRank": 32,
"globalTokens": 256,
"dispatch": {
- "p50": 70.43199986219406,
- "p90": 71.80800288915634,
- "p95": 73.47200065851212,
- "p99": 81.44000172615051
+ "p50": 97.24800288677216,
+ "p90": 106.72000050544739,
+ "p95": 109.66400057077408,
+ "p99": 117.44000017642975
},
"combine": {
- "p50": 78.46400141716003,
- "p90": 79.39200103282928,
- "p95": 80.99199831485748,
- "p99": 92.12800115346909
+ "p50": 92.44800359010696,
+ "p90": 95.87199985980988,
+ "p95": 98.52799773216248,
+ "p99": 103.2319962978363
},
"roundtrip": {
- "p50": 132.7359974384308,
- "p90": 137.56799697875977,
- "p95": 138.62399756908417,
- "p99": 143.71199905872345
+ "p50": 161.6320013999939,
+ "p90": 170.56000232696533,
+ "p95": 173.40800166130066,
+ "p99": 179.1359931230545
},
"isolatedSum": {
- "p50": 148.8960012793541,
- "p90": 151.20000392198563,
- "p95": 154.4639989733696,
- "p99": 173.5680028796196
+ "p50": 189.69600647687912,
+ "p90": 202.59200036525726,
+ "p95": 208.19199830293655,
+ "p99": 220.67199647426605
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 18995200,
- "combineLogicalBytes": 18995200,
- "fanoutMean": 5.17578125,
+ "dispatchLogicalBytes": 19023872,
+ "combineLogicalBytes": 19023872,
+ "fanoutMean": 5.18359375,
"recvTokensMax": 178,
- "stragglerRank": 7,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -7075,35 +6843,35 @@
"tokensPerRank": 64,
"globalTokens": 512,
"dispatch": {
- "p50": 83.16799998283386,
- "p90": 88.95999938249588,
- "p95": 89.82399851083755,
- "p99": 93.85599941015244
+ "p50": 109.02400314807892,
+ "p90": 116.57600104808807,
+ "p95": 119.19999867677689,
+ "p99": 127.07200646400452
},
"combine": {
- "p50": 91.58399701118469,
- "p90": 93.82399916648865,
- "p95": 101.1200025677681,
- "p99": 114.68800157308578
+ "p50": 106.39999806880951,
+ "p90": 112.57600039243698,
+ "p95": 115.87200313806534,
+ "p99": 120.31999975442886
},
"roundtrip": {
- "p50": 159.42400693893433,
- "p90": 163.10399770736694,
- "p95": 164.35199975967407,
- "p99": 169.37600076198578
+ "p50": 187.55200505256653,
+ "p90": 195.3279972076416,
+ "p95": 198.7520009279251,
+ "p99": 204.48000729084015
},
"isolatedSum": {
- "p50": 174.75199699401855,
- "p90": 182.78399854898453,
- "p95": 190.94400107860565,
- "p99": 208.54400098323822
+ "p50": 215.42400121688843,
+ "p90": 229.15200144052505,
+ "p95": 235.07200181484222,
+ "p99": 247.39200621843338
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38291456,
- "combineLogicalBytes": 38291456,
- "fanoutMean": 5.216796875,
- "recvTokensMax": 348,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 38148096,
+ "combineLogicalBytes": 38148096,
+ "fanoutMean": 5.197265625,
+ "recvTokensMax": 350,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -7112,35 +6880,35 @@
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 95.2640026807785,
- "p90": 97.59999811649323,
- "p95": 99.90400075912476,
- "p99": 116.28799885511398
+ "p50": 123.64800274372101,
+ "p90": 130.8159977197647,
+ "p95": 133.760005235672,
+ "p99": 140.35199582576752
},
"combine": {
- "p50": 115.23199826478958,
- "p90": 115.84000289440155,
- "p95": 116.38399958610535,
- "p99": 126.20800733566284
+ "p50": 127.71199643611908,
+ "p90": 132.54399597644806,
+ "p95": 135.71199774742126,
+ "p99": 143.39199662208557
},
"roundtrip": {
- "p50": 193.9840018749237,
- "p90": 199.64799284934998,
- "p95": 200.6399929523468,
- "p99": 210.52800118923187
+ "p50": 223.87200593948364,
+ "p90": 230.84799945354462,
+ "p95": 234.72000658512115,
+ "p99": 240.60800671577454
},
"isolatedSum": {
- "p50": 210.49600094556808,
- "p90": 213.44000101089478,
- "p95": 216.2880003452301,
- "p99": 242.49600619077682
+ "p50": 251.3599991798401,
+ "p90": 263.35999369621277,
+ "p95": 269.47200298309326,
+ "p99": 283.7439924478531
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77113344,
- "combineLogicalBytes": 77113344,
- "fanoutMean": 5.2529296875,
- "recvTokensMax": 685,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 76955648,
+ "combineLogicalBytes": 76955648,
+ "fanoutMean": 5.2421875,
+ "recvTokensMax": 687,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -7148,47 +6916,48 @@
]
},
{
- "id": "cx-7d11224e",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c",
- "colorKey": "b300_8d2811e3",
- "comparisonKey": "801e704d68c28ca9",
+ "id": "cx-063a34f6",
+ "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510",
+ "colorKey": "gb200_42130d21",
+ "comparisonKey": "386f464c43c562a3",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:48:25.920368+00:00",
+ "generatedAt": "2026-06-29T13:48:13.108347+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_09",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "cached-layout-comm-only-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · zipf",
+ "label": "GB200 EP8 · deepep · bf16 [cl]",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
"experts": 256,
- "routing": "zipf",
- "routingLabel": "zipf",
+ "routing": "uniform",
+ "routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -7196,59 +6965,59 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "14ded8461f2636c",
- "workloadId": "set:8:f5576e2b712d38c3",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "74444524b5db510",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285620595",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285620595",
- "createdAt": "2026-06-27T09:48:25.920368+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 1,
"globalTokens": 8,
"dispatch": {
- "p50": 56.384000927209854,
- "p90": 58.81600081920624,
- "p95": 61.37600168585777,
- "p99": 80.60800284147263
+ "p50": 75.1039981842041,
+ "p90": 87.80799806118011,
+ "p95": 92.38400310277939,
+ "p99": 98.49599748849869
},
"combine": {
- "p50": 65.47199934720993,
- "p90": 66.3679987192154,
- "p95": 66.72000139951706,
- "p99": 68.09599697589874
+ "p50": 81.98399841785431,
+ "p90": 87.99999952316284,
+ "p95": 92.6079973578453,
+ "p99": 97.82399982213974
},
"roundtrip": {
- "p50": 107.42399841547012,
- "p90": 111.84000223875046,
- "p95": 112.96000331640244,
- "p99": 126.14400684833527
+ "p50": 137.02400028705597,
+ "p90": 149.4079977273941,
+ "p95": 153.79199385643005,
+ "p99": 161.82400286197662
},
"isolatedSum": {
- "p50": 121.85600027441978,
- "p90": 125.18399953842163,
- "p95": 128.09600308537483,
- "p99": 148.70399981737137
+ "p50": 157.0879966020584,
+ "p90": 175.80799758434296,
+ "p95": 184.9920004606247,
+ "p99": 196.31999731063843
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 444416,
- "combineLogicalBytes": 444416,
- "fanoutMean": 3.875,
- "recvTokensMax": 8,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 630784,
+ "combineLogicalBytes": 630784,
+ "fanoutMean": 5.5,
+ "recvTokensMax": 7,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -7257,35 +7026,35 @@
"tokensPerRank": 2,
"globalTokens": 16,
"dispatch": {
- "p50": 56.2559999525547,
- "p90": 58.33600088953972,
- "p95": 60.447998344898224,
- "p99": 72.83200323581696
+ "p50": 76.89599692821503,
+ "p90": 94.55999732017517,
+ "p95": 100.00000149011612,
+ "p99": 114.04799669981003
},
"combine": {
- "p50": 66.01600348949432,
- "p90": 66.68800115585327,
- "p95": 67.48799979686737,
- "p99": 91.90399944782257
+ "p50": 83.64800363779068,
+ "p90": 95.8079993724823,
+ "p95": 98.01600128412247,
+ "p99": 141.27999544143677
},
"roundtrip": {
- "p50": 105.02400249242783,
- "p90": 112.41599917411804,
- "p95": 113.0559965968132,
- "p99": 119.64800208806992
+ "p50": 138.87999951839447,
+ "p90": 154.59200739860535,
+ "p95": 159.55199301242828,
+ "p99": 173.7920045852661
},
"isolatedSum": {
- "p50": 122.27200344204903,
- "p90": 125.02400204539299,
- "p95": 127.9359981417656,
- "p99": 164.73600268363953
+ "p50": 160.5440005660057,
+ "p90": 190.36799669265747,
+ "p95": 198.0160027742386,
+ "p99": 255.3279921412468
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 845824,
- "combineLogicalBytes": 845824,
- "fanoutMean": 3.6875,
- "recvTokensMax": 16,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1232896,
+ "combineLogicalBytes": 1232896,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 13,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -7294,34 +7063,34 @@
"tokensPerRank": 4,
"globalTokens": 32,
"dispatch": {
- "p50": 56.73599988222122,
- "p90": 59.29600074887276,
- "p95": 60.99199876189232,
- "p99": 73.11999797821045
+ "p50": 76.60800218582153,
+ "p90": 88.76799792051315,
+ "p95": 92.12800115346909,
+ "p99": 102.11200267076492
},
"combine": {
- "p50": 66.20799750089645,
- "p90": 67.55200028419495,
- "p95": 68.80000233650208,
- "p99": 79.74400371313095
+ "p50": 83.64800363779068,
+ "p90": 89.79199826717377,
+ "p95": 94.46399658918381,
+ "p99": 97.37599641084671
},
"roundtrip": {
- "p50": 105.85600137710571,
- "p90": 108.73600095510483,
- "p95": 110.43199896812439,
- "p99": 124.92799758911133
+ "p50": 139.16799426078796,
+ "p90": 150.7200002670288,
+ "p95": 155.35999834537506,
+ "p99": 163.4880006313324
},
"isolatedSum": {
- "p50": 122.94399738311768,
- "p90": 126.8480010330677,
- "p95": 129.7920010983944,
- "p99": 152.8640016913414
+ "p50": 160.2560058236122,
+ "p90": 178.55999618768692,
+ "p95": 186.5919977426529,
+ "p99": 199.48799908161163
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1691648,
- "combineLogicalBytes": 1691648,
- "fanoutMean": 3.6875,
- "recvTokensMax": 32,
+ "dispatchLogicalBytes": 2480128,
+ "combineLogicalBytes": 2480128,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 29,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
@@ -7331,35 +7100,35 @@
"tokensPerRank": 8,
"globalTokens": 64,
"dispatch": {
- "p50": 58.30400064587593,
- "p90": 64.57599997520447,
- "p95": 65.85600227117538,
- "p99": 70.88000327348709
+ "p50": 81.34400099515915,
+ "p90": 94.33600306510925,
+ "p95": 97.120001912117,
+ "p99": 110.3999987244606
},
"combine": {
- "p50": 66.81600213050842,
- "p90": 68.51200014352798,
- "p95": 69.023996591568,
- "p99": 78.17599922418594
+ "p50": 84.86399799585342,
+ "p90": 93.1520015001297,
+ "p95": 95.64799815416336,
+ "p99": 103.10400277376175
},
"roundtrip": {
- "p50": 114.56000059843063,
- "p90": 121.15199863910675,
- "p95": 122.5920021533966,
- "p99": 138.72000575065613
+ "p50": 141.76000654697418,
+ "p90": 153.888002038002,
+ "p95": 156.99200332164764,
+ "p99": 164.2560064792633
},
"isolatedSum": {
- "p50": 125.12000277638435,
- "p90": 133.08800011873245,
- "p95": 134.87999886274338,
- "p99": 149.05600249767303
+ "p50": 166.20799899101257,
+ "p90": 187.48800456523895,
+ "p95": 192.76800006628036,
+ "p99": 213.50400149822235
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 3354624,
- "combineLogicalBytes": 3354624,
- "fanoutMean": 3.65625,
- "recvTokensMax": 64,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 4974592,
+ "combineLogicalBytes": 4974592,
+ "fanoutMean": 5.421875,
+ "recvTokensMax": 47,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -7368,35 +7137,35 @@
"tokensPerRank": 16,
"globalTokens": 128,
"dispatch": {
- "p50": 59.29600074887276,
- "p90": 63.45599889755249,
- "p95": 66.3679987192154,
- "p99": 85.82399785518646
+ "p50": 82.46400207281113,
+ "p90": 92.22400188446045,
+ "p95": 96.16000205278397,
+ "p99": 102.08000242710114
},
"combine": {
- "p50": 67.03999638557434,
- "p90": 69.023996591568,
- "p95": 70.3359991312027,
- "p99": 79.93599772453308
+ "p50": 86.17600053548813,
+ "p90": 95.2640026807785,
+ "p95": 96.8639999628067,
+ "p99": 105.27999699115753
},
"roundtrip": {
- "p50": 122.6240023970604,
- "p90": 125.66399574279785,
- "p95": 126.65599584579468,
- "p99": 131.9359987974167
+ "p50": 144.96000111103058,
+ "p90": 157.02399611473083,
+ "p95": 159.67999398708344,
+ "p99": 168.96000504493713
},
"isolatedSum": {
- "p50": 126.3359971344471,
- "p90": 132.47999548912048,
- "p95": 136.7039978504181,
- "p99": 165.75999557971954
+ "p50": 168.64000260829926,
+ "p90": 187.48800456523895,
+ "p95": 193.02400201559067,
+ "p99": 207.35999941825867
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 6537216,
- "combineLogicalBytes": 6537216,
- "fanoutMean": 3.5625,
- "recvTokensMax": 127,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 9920512,
+ "combineLogicalBytes": 9920512,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 92,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -7405,35 +7174,35 @@
"tokensPerRank": 32,
"globalTokens": 256,
"dispatch": {
- "p50": 74.14399832487106,
- "p90": 76.54400169849396,
- "p95": 77.85599678754807,
- "p99": 89.4400030374527
+ "p50": 87.10400015115738,
+ "p90": 123.99999797344208,
+ "p95": 142.56000518798828,
+ "p99": 181.08800053596497
},
"combine": {
- "p50": 77.11999863386154,
- "p90": 78.52800190448761,
- "p95": 78.68800312280655,
- "p99": 89.4400030374527
+ "p50": 95.16800194978714,
+ "p90": 110.81600189208984,
+ "p95": 121.56800180673599,
+ "p99": 172.41600155830383
},
"roundtrip": {
- "p50": 127.10399925708771,
- "p90": 132.1280002593994,
- "p95": 133.760005235672,
- "p99": 136.3839954137802
+ "p50": 153.85599434375763,
+ "p90": 186.94399297237396,
+ "p95": 214.27200734615326,
+ "p99": 252.79998779296875
},
"isolatedSum": {
- "p50": 151.2639969587326,
- "p90": 155.07200360298157,
- "p95": 156.54399991035461,
- "p99": 178.8800060749054
+ "p50": 182.27200210094452,
+ "p90": 234.81599986553192,
+ "p95": 264.1280069947243,
+ "p99": 353.5040020942688
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 12859392,
- "combineLogicalBytes": 12859392,
- "fanoutMean": 3.50390625,
- "recvTokensMax": 255,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 19726336,
+ "combineLogicalBytes": 19726336,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 182,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -7442,35 +7211,35 @@
"tokensPerRank": 64,
"globalTokens": 512,
"dispatch": {
- "p50": 79.77599650621414,
- "p90": 81.53600245714188,
- "p95": 82.8159973025322,
- "p99": 89.9839997291565
+ "p50": 96.22400254011154,
+ "p90": 104.19200360774994,
+ "p95": 108.57599973678589,
+ "p99": 115.10399729013443
},
"combine": {
- "p50": 90.87999910116196,
- "p90": 102.88000106811523,
- "p95": 104.41599786281586,
- "p99": 115.58400094509125
+ "p50": 107.07200318574905,
+ "p90": 112.5119999051094,
+ "p95": 116.92799627780914,
+ "p99": 122.43200093507767
},
"roundtrip": {
- "p50": 157.95199573040009,
- "p90": 162.59199380874634,
- "p95": 164.19200599193573,
- "p99": 182.68799781799316
+ "p50": 174.84800517559052,
+ "p90": 183.03999304771423,
+ "p95": 185.31200289726257,
+ "p99": 190.2720034122467
},
"isolatedSum": {
- "p50": 170.6559956073761,
- "p90": 184.4160035252571,
- "p95": 187.23199516534805,
- "p99": 205.56800067424774
+ "p50": 203.2960057258606,
+ "p90": 216.70400351285934,
+ "p95": 225.50399601459503,
+ "p99": 237.5359982252121
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 25145344,
- "combineLogicalBytes": 25145344,
- "fanoutMean": 3.42578125,
- "recvTokensMax": 510,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 38993920,
+ "combineLogicalBytes": 38993920,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 367,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -7479,35 +7248,35 @@
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 101.24800354242325,
- "p90": 104.22399640083313,
- "p95": 105.3759977221489,
- "p99": 124.67200309038162
+ "p50": 109.6000000834465,
+ "p90": 117.60000139474869,
+ "p95": 121.11999839544296,
+ "p99": 127.10399925708771
},
"combine": {
- "p50": 126.17599964141846,
- "p90": 127.71199643611908,
- "p95": 128.31999361515045,
- "p99": 139.93600010871887
+ "p50": 126.97599828243256,
+ "p90": 133.15199315547943,
+ "p95": 134.68800485134125,
+ "p99": 140.1599943637848
},
"roundtrip": {
- "p50": 208.92800390720367,
- "p90": 213.76000344753265,
- "p95": 214.78399634361267,
- "p99": 229.0239930152893
+ "p50": 211.64800226688385,
+ "p90": 227.80799865722656,
+ "p95": 236.86400055885315,
+ "p99": 291.20001196861267
},
"isolatedSum": {
- "p50": 227.4240031838417,
- "p90": 231.9359928369522,
- "p95": 233.69599133729935,
- "p99": 264.6080031991005
+ "p50": 236.57599836587906,
+ "p90": 250.75199455022812,
+ "p95": 255.8080032467842,
+ "p99": 267.2639936208725
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 49946624,
- "combineLogicalBytes": 49946624,
- "fanoutMean": 3.40234375,
- "recvTokensMax": 1022,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 77672448,
+ "combineLogicalBytes": 77672448,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 723,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -7515,107 +7284,182 @@
]
},
{
- "id": "cx-cc647506",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||1fa7fe74d0e30a3",
- "colorKey": "b300_8d2811e3",
- "comparisonKey": "478acd4108c50326",
+ "id": "cx-fea7e1cd",
+ "identity": "gb200|deepep|v1|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510",
+ "colorKey": "gb200_e13e1290",
+ "comparisonKey": "69a9fc41fa25ee9c",
"schemaVersion": 3,
- "generatedAt": "2026-06-26T23:58:32.426052+00:00",
+ "generatedAt": "2026-06-29T13:55:18.057516+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_05",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_1",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
- "mode": "normal",
+ "mode": "ll",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · zipf",
+ "label": "GB200 EP8 · deepep · bf16 LL",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
"experts": 256,
- "routing": "zipf",
- "routingLabel": "zipf",
+ "routing": "uniform",
+ "routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
- "configuredUnits": 20,
- "deviceUnits": 148,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
+ "achievedFraction": null,
+ "configuredUnits": null,
+ "deviceUnits": 152,
+ "resourceClass": "fixed-kernel",
+ "conformanceClass": "not-applicable",
+ "fixedKernel": true,
"paretoEligible": false
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "1fa7fe74d0e30a3",
- "workloadId": "set:4:f5576e2b712d38c3",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "74444524b5db510",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28271886823",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271886823",
- "createdAt": "2026-06-26T23:58:32.426052+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 1,
"globalTokens": 8,
"dispatch": {
- "p50": 56.703999638557434,
- "p90": 59.90400165319443,
- "p95": 62.65600025653839,
- "p99": 69.98399645090103
+ "p50": 85.60000360012054,
+ "p90": 315.42399525642395,
+ "p95": 344.31999921798706,
+ "p99": 359.9039912223816
},
"combine": {
- "p50": 65.88800251483917,
- "p90": 66.43199920654297,
- "p95": 66.72000139951706,
- "p99": 73.7600028514862
+ "p50": 77.504001557827,
+ "p90": 85.7279971241951,
+ "p95": 87.39200234413147,
+ "p99": 94.24000233411789
},
"roundtrip": {
- "p50": 107.16799646615982,
- "p90": 112.83200234174728,
- "p95": 114.14399743080139,
- "p99": 120.44800072908401
+ "p50": 115.77600240707397,
+ "p90": 322.52800464630127,
+ "p95": 349.40800070762634,
+ "p99": 384.64000821113586
},
"isolatedSum": {
- "p50": 122.5920021533966,
- "p90": 126.3360008597374,
- "p95": 129.37600165605545,
- "p99": 143.74399930238724
+ "p50": 163.10400515794754,
+ "p90": 401.15199238061905,
+ "p95": 431.71200156211853,
+ "p99": 454.1439935564995
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 444416,
- "combineLogicalBytes": 444416,
- "fanoutMean": 3.875,
- "recvTokensMax": 8,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 630784,
+ "combineLogicalBytes": 630784,
+ "fanoutMean": 5.5,
+ "recvTokensMax": 14,
+ "stragglerRank": 5,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 2,
+ "globalTokens": 16,
+ "dispatch": {
+ "p50": 88.57599645853043,
+ "p90": 314.39998745918274,
+ "p95": 344.5119857788086,
+ "p99": 376.3839900493622
+ },
+ "combine": {
+ "p50": 75.07199794054031,
+ "p90": 86.36800199747086,
+ "p95": 88.19200098514557,
+ "p99": 95.58399766683578
+ },
+ "roundtrip": {
+ "p50": 117.95199662446976,
+ "p90": 337.92001008987427,
+ "p95": 386.78398728370667,
+ "p99": 395.26399970054626
+ },
+ "isolatedSum": {
+ "p50": 163.64799439907074,
+ "p90": 400.7679894566536,
+ "p95": 432.70398676395416,
+ "p99": 471.96798771619797
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 1232896,
+ "combineLogicalBytes": 1232896,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 21,
+ "stragglerRank": 4,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 4,
+ "globalTokens": 32,
+ "dispatch": {
+ "p50": 91.48799628019333,
+ "p90": 312.9599988460541,
+ "p95": 336.64000034332275,
+ "p99": 369.59999799728394
+ },
+ "combine": {
+ "p50": 82.87999778985977,
+ "p90": 94.40000355243683,
+ "p95": 95.64799815416336,
+ "p99": 98.04800152778625
+ },
+ "roundtrip": {
+ "p50": 123.9359974861145,
+ "p90": 347.80800342559814,
+ "p95": 385.72800159454346,
+ "p99": 398.97599816322327
+ },
+ "isolatedSum": {
+ "p50": 174.3679940700531,
+ "p90": 407.3600023984909,
+ "p95": 432.2879984974861,
+ "p99": 467.6479995250702
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 2480128,
+ "combineLogicalBytes": 2480128,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 39,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -7624,35 +7468,72 @@
"tokensPerRank": 8,
"globalTokens": 64,
"dispatch": {
- "p50": 58.848001062870026,
- "p90": 60.80000102519989,
- "p95": 62.84800171852112,
- "p99": 74.40000027418137
+ "p50": 98.84800016880035,
+ "p90": 298.2400059700012,
+ "p95": 344.57600116729736,
+ "p99": 375.5840063095093
},
"combine": {
- "p50": 68.00000369548798,
- "p90": 70.30399888753891,
- "p95": 76.99199765920639,
- "p99": 78.5600021481514
+ "p50": 94.52799707651138,
+ "p90": 105.31199723482132,
+ "p95": 107.68000036478043,
+ "p99": 314.1759932041168
},
"roundtrip": {
- "p50": 116.54400080442429,
- "p90": 123.29600006341934,
- "p95": 124.83199685811996,
- "p99": 130.46400249004364
+ "p50": 132.1280002593994,
+ "p90": 362.2719943523407,
+ "p95": 400.41598677635193,
+ "p99": 413.2480025291443
},
"isolatedSum": {
- "p50": 126.848004758358,
- "p90": 131.1039999127388,
- "p95": 139.8399993777275,
- "p99": 152.96000242233276
+ "p50": 193.37599724531174,
+ "p90": 403.55200320482254,
+ "p95": 452.2560015320778,
+ "p99": 689.7599995136261
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 3354624,
- "combineLogicalBytes": 3354624,
- "fanoutMean": 3.65625,
- "recvTokensMax": 64,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 4974592,
+ "combineLogicalBytes": 4974592,
+ "fanoutMean": 5.421875,
+ "recvTokensMax": 74,
+ "stragglerRank": 4,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 16,
+ "globalTokens": 128,
+ "dispatch": {
+ "p50": 106.36799782514572,
+ "p90": 313.4720027446747,
+ "p95": 347.4560081958771,
+ "p99": 373.24801087379456
+ },
+ "combine": {
+ "p50": 96.67199850082397,
+ "p90": 120.41600048542023,
+ "p95": 121.56800180673599,
+ "p99": 124.54400211572647
+ },
+ "roundtrip": {
+ "p50": 154.2080044746399,
+ "p90": 408.25599431991577,
+ "p95": 423.6159920692444,
+ "p99": 434.3680143356323
+ },
+ "isolatedSum": {
+ "p50": 203.0399963259697,
+ "p90": 433.8880032300949,
+ "p95": 469.02401000261307,
+ "p99": 497.792012989521
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 9920512,
+ "combineLogicalBytes": 9920512,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 145,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -7661,35 +7542,72 @@
"tokensPerRank": 32,
"globalTokens": 256,
"dispatch": {
- "p50": 75.9039968252182,
- "p90": 78.27199995517731,
- "p95": 79.52000200748444,
- "p99": 87.5839963555336
+ "p50": 125.791996717453,
+ "p90": 322.33598828315735,
+ "p95": 359.74401235580444,
+ "p99": 386.2079977989197
},
"combine": {
- "p50": 78.40000092983246,
- "p90": 79.19999957084656,
- "p95": 79.71200346946716,
- "p99": 83.64800363779068
+ "p50": 122.6240023970604,
+ "p90": 217.82399713993073,
+ "p95": 219.90400552749634,
+ "p99": 228.67199778556824
},
"roundtrip": {
- "p50": 134.24000144004822,
- "p90": 138.20800185203552,
- "p95": 139.5840048789978,
- "p99": 144.3520039319992
+ "p50": 211.07199788093567,
+ "p90": 437.79200315475464,
+ "p95": 477.60000824928284,
+ "p99": 492.92799830436707
},
"isolatedSum": {
- "p50": 154.30399775505066,
- "p90": 157.47199952602386,
- "p95": 159.2320054769516,
- "p99": 171.23199999332428
+ "p50": 248.4159991145134,
+ "p90": 540.1599854230881,
+ "p95": 579.6480178833008,
+ "p99": 614.8799955844879
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 12859392,
- "combineLogicalBytes": 12859392,
- "fanoutMean": 3.50390625,
- "recvTokensMax": 255,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 19726336,
+ "combineLogicalBytes": 19726336,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 287,
+ "stragglerRank": 5,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 64,
+ "globalTokens": 512,
+ "dispatch": {
+ "p50": 351.39200091362,
+ "p90": 369.376003742218,
+ "p95": 553.4080266952515,
+ "p99": 628.7680268287659
+ },
+ "combine": {
+ "p50": 358.62401127815247,
+ "p90": 366.7519986629486,
+ "p95": 368.4479892253876,
+ "p99": 375.67999958992004
+ },
+ "roundtrip": {
+ "p50": 397.21599221229553,
+ "p90": 642.3680186271667,
+ "p95": 668.1280136108398,
+ "p99": 700.4799842834473
+ },
+ "isolatedSum": {
+ "p50": 710.0160121917725,
+ "p90": 736.1280024051666,
+ "p95": 921.856015920639,
+ "p99": 1004.4480264186859
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 38993920,
+ "combineLogicalBytes": 38993920,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 564,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -7698,35 +7616,35 @@
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 103.07200253009796,
- "p90": 105.98400235176086,
- "p95": 107.04000294208527,
- "p99": 113.21599781513214
+ "p50": 596.9600081443787,
+ "p90": 619.4239854812622,
+ "p95": 630.4000020027161,
+ "p99": 876.1919736862183
},
"combine": {
- "p50": 127.13600695133209,
- "p90": 128.1599998474121,
- "p95": 128.57599556446075,
- "p99": 131.04000687599182
+ "p50": 623.8399744033813,
+ "p90": 634.880006313324,
+ "p95": 636.031985282898,
+ "p99": 638.9439702033997
},
"roundtrip": {
- "p50": 209.1200053691864,
- "p90": 214.30400013923645,
- "p95": 216.12800657749176,
- "p99": 229.66399788856506
+ "p50": 675.6160259246826,
+ "p90": 928.5439848899841,
+ "p95": 978.0799746513367,
+ "p99": 1023.2959985733032
},
"isolatedSum": {
- "p50": 230.20800948143005,
- "p90": 234.14400219917297,
- "p95": 235.61599850654602,
- "p99": 244.25600469112396
+ "p50": 1220.79998254776,
+ "p90": 1254.3039917945862,
+ "p95": 1266.431987285614,
+ "p99": 1515.135943889618
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 49946624,
- "combineLogicalBytes": 49946624,
- "fanoutMean": 3.40234375,
- "recvTokensMax": 1022,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 77672448,
+ "combineLogicalBytes": 77672448,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 1104,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -7734,107 +7652,108 @@
]
},
{
- "id": "cx-17ec2f4d",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c",
- "colorKey": "b300_2e44c039",
- "comparisonKey": "c7065362244c808a",
+ "id": "cx-67003169",
+ "identity": "gb200|deepep|v1|7168|8|256|bf16|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510",
+ "colorKey": "gb200_09c517b8",
+ "comparisonKey": "068a135c875dc310",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:49:49.525819+00:00",
+ "generatedAt": "2026-06-29T13:55:48.669860+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_09",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_1",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
- "mode": "normal",
+ "mode": "ll",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "runtime-visible-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · zipf-heavy",
+ "label": "GB200 EP8 · deepep · bf16 LL",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
"experts": 256,
- "routing": "zipf-heavy",
- "routingLabel": "zipf-heavy",
+ "routing": "uniform",
+ "routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
- "configuredUnits": 20,
- "deviceUnits": 148,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
+ "achievedFraction": null,
+ "configuredUnits": null,
+ "deviceUnits": 152,
+ "resourceClass": "fixed-kernel",
+ "conformanceClass": "not-applicable",
+ "fixedKernel": true,
"paretoEligible": false
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "22da8b58646609c",
- "workloadId": "set:8:6b84350720aa8233",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "74444524b5db510",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285651441",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285651441",
- "createdAt": "2026-06-27T09:49:49.525819+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 1,
"globalTokens": 8,
"dispatch": {
- "p50": 58.62399935722351,
- "p90": 66.04799628257751,
- "p95": 68.03199648857117,
- "p99": 76.19199901819229
+ "p50": 80.70400357246399,
+ "p90": 308.51200222969055,
+ "p95": 334.1439962387085,
+ "p99": 359.9039912223816
},
"combine": {
- "p50": 56.352000683546066,
- "p90": 64.7680014371872,
- "p95": 64.99200314283371,
- "p99": 68.06399673223495
+ "p50": 83.74399691820145,
+ "p90": 88.83199840784073,
+ "p95": 94.01600062847137,
+ "p99": 98.65599870681763
},
"roundtrip": {
- "p50": 95.45599669218063,
- "p90": 100.96000134944916,
- "p95": 103.04000228643417,
- "p99": 108.96000266075134
+ "p50": 114.14399743080139,
+ "p90": 330.52799105644226,
+ "p95": 371.8400001525879,
+ "p99": 390.6559944152832
},
"isolatedSum": {
- "p50": 114.97600004076958,
- "p90": 130.8159977197647,
- "p95": 133.02399963140488,
- "p99": 144.25599575042725
+ "p50": 164.44800049066544,
+ "p90": 397.3440006375313,
+ "p95": 428.15999686717987,
+ "p99": 458.5599899291992
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 172032,
- "combineLogicalBytes": 172032,
- "fanoutMean": 1.5,
- "recvTokensMax": 8,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 630784,
+ "combineLogicalBytes": 630784,
+ "fanoutMean": 5.5,
+ "recvTokensMax": 14,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -7843,35 +7762,35 @@
"tokensPerRank": 2,
"globalTokens": 16,
"dispatch": {
- "p50": 57.24800005555153,
- "p90": 59.7120001912117,
- "p95": 62.49599903821945,
- "p99": 69.72800195217133
+ "p50": 87.42400258779526,
+ "p90": 318.015992641449,
+ "p95": 347.135990858078,
+ "p99": 371.16798758506775
},
"combine": {
- "p50": 56.223999708890915,
- "p90": 64.7680014371872,
- "p95": 65.0240033864975,
- "p99": 66.17599725723267
+ "p50": 81.727996468544,
+ "p90": 87.99999952316284,
+ "p95": 93.47199648618698,
+ "p99": 94.94400024414062
},
"roundtrip": {
- "p50": 98.04800152778625,
- "p90": 104.25599664449692,
- "p95": 105.6319996714592,
- "p99": 116.03199690580368
+ "p50": 117.15199798345566,
+ "p90": 339.35999870300293,
+ "p95": 386.55999302864075,
+ "p99": 400.9920060634613
},
"isolatedSum": {
- "p50": 113.47199976444244,
- "p90": 124.4800016283989,
- "p95": 127.52000242471695,
- "p99": 135.903999209404
+ "p50": 169.15199905633926,
+ "p90": 406.0159921646118,
+ "p95": 440.607987344265,
+ "p99": 466.1119878292084
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 315392,
- "combineLogicalBytes": 315392,
- "fanoutMean": 1.375,
- "recvTokensMax": 16,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 1232896,
+ "combineLogicalBytes": 1232896,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 21,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -7880,34 +7799,34 @@
"tokensPerRank": 4,
"globalTokens": 32,
"dispatch": {
- "p50": 57.37600103020668,
- "p90": 59.51999872922897,
- "p95": 61.91999837756157,
- "p99": 67.87200272083282
+ "p50": 89.82399851083755,
+ "p90": 309.82398986816406,
+ "p95": 332.09601044654846,
+ "p99": 371.8720078468323
},
"combine": {
- "p50": 56.51199817657471,
- "p90": 64.89600241184235,
- "p95": 65.5680000782013,
- "p99": 68.86400282382965
+ "p50": 82.91199803352356,
+ "p90": 94.71999853849411,
+ "p95": 95.67999839782715,
+ "p99": 98.2080027461052
},
"roundtrip": {
- "p50": 102.33599692583084,
- "p90": 107.39199817180634,
- "p95": 110.1439967751503,
- "p99": 116.80000275373459
+ "p50": 124.38400089740753,
+ "p90": 381.24799728393555,
+ "p95": 397.3439931869507,
+ "p99": 415.45599699020386
},
"isolatedSum": {
- "p50": 113.88799920678139,
- "p90": 124.41600114107132,
- "p95": 127.48799845576286,
- "p99": 136.73600554466248
+ "p50": 172.73599654436111,
+ "p90": 404.5439884066582,
+ "p95": 427.7760088443756,
+ "p99": 470.08001059293747
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 616448,
- "fanoutMean": 1.34375,
- "recvTokensMax": 32,
+ "dispatchLogicalBytes": 2480128,
+ "combineLogicalBytes": 2480128,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 39,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
@@ -7917,34 +7836,34 @@
"tokensPerRank": 8,
"globalTokens": 64,
"dispatch": {
- "p50": 58.04799869656563,
- "p90": 60.575999319553375,
- "p95": 64.09599632024765,
- "p99": 75.16799867153168
+ "p50": 98.78399968147278,
+ "p90": 323.87199997901917,
+ "p95": 358.97600650787354,
+ "p99": 381.53600692749023
},
"combine": {
- "p50": 56.8000003695488,
- "p90": 66.04799628257751,
- "p95": 66.3359984755516,
- "p99": 67.64800101518631
+ "p50": 85.50400286912918,
+ "p90": 99.16800260543823,
+ "p95": 100.99200159311295,
+ "p99": 106.55999928712845
},
"roundtrip": {
- "p50": 108.73600095510483,
- "p90": 112.99200356006622,
- "p95": 113.92000317573547,
- "p99": 129.82399761676788
+ "p50": 131.3920021057129,
+ "p90": 366.9759929180145,
+ "p95": 405.63198924064636,
+ "p99": 421.4400053024292
},
"isolatedSum": {
- "p50": 114.84799906611443,
- "p90": 126.62399560213089,
- "p95": 130.43199479579926,
- "p99": 142.815999686718
+ "p50": 184.28800255060196,
+ "p90": 423.0400025844574,
+ "p95": 459.9680081009865,
+ "p99": 488.0960062146187
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1376256,
- "combineLogicalBytes": 1376256,
- "fanoutMean": 1.5,
- "recvTokensMax": 64,
+ "dispatchLogicalBytes": 4974592,
+ "combineLogicalBytes": 4974592,
+ "fanoutMean": 5.421875,
+ "recvTokensMax": 74,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
@@ -7954,34 +7873,34 @@
"tokensPerRank": 16,
"globalTokens": 128,
"dispatch": {
- "p50": 58.720000088214874,
- "p90": 60.83200126886368,
- "p95": 63.040003180503845,
- "p99": 79.52000200748444
+ "p50": 105.40799796581268,
+ "p90": 325.72799921035767,
+ "p95": 357.7280044555664,
+ "p99": 397.5360095500946
},
"combine": {
- "p50": 65.98400324583054,
- "p90": 66.81600213050842,
- "p95": 66.97600334882736,
- "p99": 68.83200258016586
+ "p50": 94.17600184679031,
+ "p90": 118.97599697113037,
+ "p95": 120.70400267839432,
+ "p99": 122.97599762678146
},
"roundtrip": {
- "p50": 107.39199817180634,
- "p90": 109.47199910879135,
- "p95": 111.1999973654747,
- "p99": 118.17599833011627
+ "p50": 153.9199948310852,
+ "p90": 412.0959937572479,
+ "p95": 432.8959882259369,
+ "p99": 448.15999269485474
},
"isolatedSum": {
- "p50": 124.70400333404541,
- "p90": 127.6480033993721,
- "p95": 130.0160065293312,
- "p99": 148.3520045876503
+ "p50": 199.583999812603,
+ "p90": 444.70399618148804,
+ "p95": 478.4320071339607,
+ "p99": 520.5120071768761
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2781184,
- "combineLogicalBytes": 2781184,
- "fanoutMean": 1.515625,
- "recvTokensMax": 128,
+ "dispatchLogicalBytes": 9920512,
+ "combineLogicalBytes": 9920512,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 145,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
@@ -7991,72 +7910,72 @@
"tokensPerRank": 32,
"globalTokens": 256,
"dispatch": {
- "p50": 70.36799937486649,
- "p90": 76.12799853086472,
- "p95": 77.47200131416321,
- "p99": 94.81599926948547
+ "p50": 115.42399972677231,
+ "p90": 323.2960104942322,
+ "p95": 364.51199650764465,
+ "p99": 395.4240083694458
},
"combine": {
- "p50": 68.51200014352798,
- "p90": 72.7040022611618,
- "p95": 76.86399668455124,
- "p99": 80.19199967384338
+ "p50": 116.86400324106216,
+ "p90": 211.5519940853119,
+ "p95": 219.04000639915466,
+ "p99": 221.02400660514832
},
"roundtrip": {
- "p50": 120.99199742078781,
- "p90": 125.50400197505951,
- "p95": 126.75200402736664,
- "p99": 134.07999277114868
+ "p50": 210.87999641895294,
+ "p90": 444.09599900245667,
+ "p95": 473.9519953727722,
+ "p99": 495.2960014343262
},
"isolatedSum": {
- "p50": 138.87999951839447,
- "p90": 148.83200079202652,
- "p95": 154.33599799871445,
- "p99": 175.00799894332886
+ "p50": 232.28800296783447,
+ "p90": 534.8480045795441,
+ "p95": 583.5520029067993,
+ "p99": 616.4480149745941
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 5533696,
- "combineLogicalBytes": 5533696,
- "fanoutMean": 1.5078125,
- "recvTokensMax": 256,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
+ "dispatchLogicalBytes": 19726336,
+ "combineLogicalBytes": 19726336,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 287,
+ "stragglerRank": 5,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
},
{
"tokensPerRank": 64,
"globalTokens": 512,
"dispatch": {
- "p50": 78.49600166082382,
- "p90": 80.1599994301796,
- "p95": 81.727996468544,
- "p99": 87.5839963555336
+ "p50": 351.6800105571747,
+ "p90": 367.2960102558136,
+ "p95": 580.4160237312317,
+ "p99": 641.7279839515686
},
"combine": {
- "p50": 82.78399705886841,
- "p90": 90.81599861383438,
- "p95": 91.36000275611877,
- "p99": 93.53599697351456
+ "p50": 363.3280098438263,
+ "p90": 367.5520122051239,
+ "p95": 369.05598640441895,
+ "p99": 375.7439851760864
},
"roundtrip": {
- "p50": 146.2080031633377,
- "p90": 154.91199493408203,
- "p95": 157.98400342464447,
- "p99": 176.06399953365326
+ "p50": 396.9919979572296,
+ "p90": 656.1279892921448,
+ "p95": 681.3759803771973,
+ "p99": 705.7600021362305
},
"isolatedSum": {
- "p50": 161.27999871969223,
- "p90": 170.97599804401398,
- "p95": 173.08799922466278,
- "p99": 181.11999332904816
+ "p50": 715.008020401001,
+ "p90": 734.8480224609375,
+ "p95": 949.4720101356506,
+ "p99": 1017.471969127655
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 11210752,
- "combineLogicalBytes": 11210752,
- "fanoutMean": 1.52734375,
- "recvTokensMax": 512,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 38993920,
+ "combineLogicalBytes": 38993920,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 564,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -8065,35 +7984,35 @@
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 91.61599725484848,
- "p90": 93.59999746084213,
- "p95": 94.59199756383896,
- "p99": 101.21600329875946
+ "p50": 583.9679837226868,
+ "p90": 615.3920292854309,
+ "p95": 620.415985584259,
+ "p99": 875.9040236473083
},
"combine": {
- "p50": 116.35199934244156,
- "p90": 125.56800246238708,
- "p95": 126.52799487113953,
- "p99": 127.83999741077423
+ "p50": 616.5440082550049,
+ "p90": 626.6880035400391,
+ "p95": 628.0320286750793,
+ "p99": 636.1600160598755
},
"roundtrip": {
- "p50": 195.39199769496918,
- "p90": 202.94399559497833,
- "p95": 204.6079933643341,
- "p99": 235.83999276161194
+ "p50": 671.455979347229,
+ "p90": 726.0479927062988,
+ "p95": 978.8159728050232,
+ "p99": 1028.2880067825317
},
"isolatedSum": {
- "p50": 207.96799659729004,
- "p90": 219.16799992322922,
- "p95": 221.11999243497849,
- "p99": 229.0560007095337
+ "p50": 1200.5119919776917,
+ "p90": 1242.08003282547,
+ "p95": 1248.4480142593384,
+ "p99": 1512.0640397071838
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 22650880,
- "combineLogicalBytes": 22650880,
- "fanoutMean": 1.54296875,
- "recvTokensMax": 1024,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 77672448,
+ "combineLogicalBytes": 77672448,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 1104,
+ "stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -8101,47 +8020,48 @@
]
},
{
- "id": "cx-3bfb4348",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||47fddabb3277bec",
- "colorKey": "b300_2e44c039",
- "comparisonKey": "5c5e6a7ecdec195f",
+ "id": "cx-22d0d299",
+ "identity": "gb200|deepep|v1|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ebe68878aa18bb0",
+ "colorKey": "gb200_b0118480",
+ "comparisonKey": "90f59e3a9d53c258",
"schemaVersion": 3,
- "generatedAt": "2026-06-26T23:58:26.448327+00:00",
+ "generatedAt": "2026-06-29T13:59:11.907002+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_16",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_1",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "runtime-visible-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · zipf-heavy",
- "model": "DeepSeek-V3/V4",
+ "label": "GB200 EP8 · deepep · fp8",
+ "model": "Qwen3.5",
"shape": {
- "hidden": 7168,
+ "hidden": 4096,
"topk": 8,
- "experts": 256,
- "routing": "zipf-heavy",
- "routingLabel": "zipf-heavy",
+ "experts": 128,
+ "routing": "uniform",
+ "routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "bf16",
+ "dispatchDtype": "fp8",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -8149,59 +8069,133 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "47fddabb3277bec",
- "workloadId": "set:4:6b84350720aa8233",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "ebe68878aa18bb0",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28271893428",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271893428",
- "createdAt": "2026-06-26T23:58:26.448327+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 1,
"globalTokens": 8,
"dispatch": {
- "p50": 56.832000613212585,
- "p90": 63.71200084686279,
- "p95": 64.54399973154068,
- "p99": 69.88800317049026
+ "p50": 434.143990278244,
+ "p90": 461.43999695777893,
+ "p95": 621.5680241584778,
+ "p99": 739.0720248222351
},
"combine": {
- "p50": 55.67999929189682,
- "p90": 58.20799991488457,
- "p95": 64.86400216817856,
- "p99": 68.89600306749344
+ "p50": 65.8240020275116,
+ "p90": 307.93601274490356,
+ "p95": 345.12001276016235,
+ "p99": 361.31200194358826
},
"roundtrip": {
- "p50": 94.52799707651138,
- "p90": 99.2640033364296,
- "p95": 101.56799852848053,
- "p99": 107.04000294208527
+ "p50": 490.5279874801636,
+ "p90": 542.4000024795532,
+ "p95": 733.8560223579407,
+ "p99": 905.1839709281921
},
"isolatedSum": {
- "p50": 112.5119999051094,
- "p90": 121.92000076174736,
- "p95": 129.40800189971924,
- "p99": 138.7840062379837
+ "p50": 499.9679923057556,
+ "p90": 769.3760097026825,
+ "p95": 966.6880369186401,
+ "p99": 1100.3840267658234
},
"roundtripMeasured": true,
"dispatchLogicalBytes": 172032,
- "combineLogicalBytes": 172032,
- "fanoutMean": 1.5,
- "recvTokensMax": 8,
- "stragglerRank": 4,
+ "combineLogicalBytes": 344064,
+ "fanoutMean": 5.25,
+ "recvTokensMax": 6,
+ "stragglerRank": 1,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 2,
+ "globalTokens": 16,
+ "dispatch": {
+ "p50": 436.41600012779236,
+ "p90": 472.57599234580994,
+ "p95": 669.0880060195923,
+ "p99": 738.5280132293701
+ },
+ "combine": {
+ "p50": 66.01600348949432,
+ "p90": 296.1280047893524,
+ "p95": 337.76000142097473,
+ "p99": 359.0719997882843
+ },
+ "roundtrip": {
+ "p50": 485.152006149292,
+ "p90": 519.0079808235168,
+ "p95": 712.0959758758545,
+ "p99": 781.4080119132996
+ },
+ "isolatedSum": {
+ "p50": 502.4320036172867,
+ "p90": 768.7039971351624,
+ "p95": 1006.848007440567,
+ "p99": 1097.6000130176544
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 352256,
+ "combineLogicalBytes": 704512,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 12,
+ "stragglerRank": 1,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 4,
+ "globalTokens": 32,
+ "dispatch": {
+ "p50": 430.9439957141876,
+ "p90": 464.06400203704834,
+ "p95": 625.2800226211548,
+ "p99": 730.6560277938843
+ },
+ "combine": {
+ "p50": 68.4799998998642,
+ "p90": 312.22400069236755,
+ "p95": 346.5920090675354,
+ "p99": 364.28800225257874
+ },
+ "roundtrip": {
+ "p50": 487.36000061035156,
+ "p90": 514.4320130348206,
+ "p95": 689.631998538971,
+ "p99": 788.0319952964783
+ },
+ "isolatedSum": {
+ "p50": 499.4239956140518,
+ "p90": 776.2880027294159,
+ "p95": 971.8720316886902,
+ "p99": 1094.944030046463
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 692224,
+ "combineLogicalBytes": 1384448,
+ "fanoutMean": 5.28125,
+ "recvTokensMax": 26,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -8210,35 +8204,72 @@
"tokensPerRank": 8,
"globalTokens": 64,
"dispatch": {
- "p50": 56.92800134420395,
- "p90": 59.039998799562454,
- "p95": 60.5119988322258,
- "p99": 66.04799628257751
+ "p50": 437.824010848999,
+ "p90": 520.7359790802002,
+ "p95": 702.4000287055969,
+ "p99": 772.704005241394
},
"combine": {
- "p50": 56.63999915122986,
- "p90": 66.23999774456024,
- "p95": 66.56000018119812,
- "p99": 78.91199737787247
+ "p50": 70.3359991312027,
+ "p90": 304.76799607276917,
+ "p95": 341.5679931640625,
+ "p99": 357.9519987106323
},
"roundtrip": {
- "p50": 107.80800133943558,
- "p90": 113.43999952077866,
- "p95": 114.656001329422,
- "p99": 124.22399967908859
+ "p50": 490.6879961490631,
+ "p90": 520.19202709198,
+ "p95": 694.815993309021,
+ "p99": 798.4960079193115
},
"isolatedSum": {
- "p50": 113.56800049543381,
- "p90": 125.2799965441227,
- "p95": 127.07199901342392,
- "p99": 144.95999366044998
+ "p50": 508.1600099802017,
+ "p90": 825.5039751529694,
+ "p95": 1043.9680218696594,
+ "p99": 1130.6560039520264
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1376256,
- "combineLogicalBytes": 1376256,
- "fanoutMean": 1.5,
- "recvTokensMax": 64,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 1372160,
+ "combineLogicalBytes": 2744320,
+ "fanoutMean": 5.234375,
+ "recvTokensMax": 49,
+ "stragglerRank": 1,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 16,
+ "globalTokens": 128,
+ "dispatch": {
+ "p50": 437.9520118236542,
+ "p90": 488.41598629951477,
+ "p95": 633.5359811782837,
+ "p99": 747.0399737358093
+ },
+ "combine": {
+ "p50": 71.71200215816498,
+ "p90": 311.74400448799133,
+ "p95": 348.224014043808,
+ "p99": 364.1279935836792
+ },
+ "roundtrip": {
+ "p50": 488.2879853248596,
+ "p90": 523.0399966239929,
+ "p95": 684.4800114631653,
+ "p99": 813.4719729423523
+ },
+ "isolatedSum": {
+ "p50": 509.66401398181915,
+ "p90": 800.1599907875061,
+ "p95": 981.7599952220917,
+ "p99": 1111.1679673194885
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 2732032,
+ "combineLogicalBytes": 5464064,
+ "fanoutMean": 5.2109375,
+ "recvTokensMax": 94,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -8247,35 +8278,72 @@
"tokensPerRank": 32,
"globalTokens": 256,
"dispatch": {
- "p50": 67.90400296449661,
- "p90": 74.07999783754349,
- "p95": 75.93599706888199,
- "p99": 82.2720006108284
+ "p50": 432.5760006904602,
+ "p90": 461.66399121284485,
+ "p95": 627.3599863052368,
+ "p99": 729.8240065574646
},
"combine": {
- "p50": 67.90400296449661,
- "p90": 70.0799971818924,
- "p95": 77.05599814653397,
- "p99": 79.26400005817413
+ "p50": 74.46400076150894,
+ "p90": 332.92800188064575,
+ "p95": 355.1360070705414,
+ "p99": 370.3039884567261
},
"roundtrip": {
- "p50": 120.4800009727478,
- "p90": 124.89599734544754,
- "p95": 126.27199292182922,
- "p99": 140.99200069904327
+ "p50": 484.6400022506714,
+ "p90": 511.87199354171753,
+ "p95": 675.9679913520813,
+ "p99": 799.5520234107971
},
"isolatedSum": {
- "p50": 135.80800592899323,
- "p90": 144.15999501943588,
- "p95": 152.99199521541595,
- "p99": 161.53600066900253
+ "p50": 507.04000145196915,
+ "p90": 794.5919930934906,
+ "p95": 982.4959933757782,
+ "p99": 1100.1279950141907
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 5533696,
- "combineLogicalBytes": 5533696,
- "fanoutMean": 1.5078125,
- "recvTokensMax": 256,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 5562368,
+ "combineLogicalBytes": 11124736,
+ "fanoutMean": 5.3046875,
+ "recvTokensMax": 186,
+ "stragglerRank": 1,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 64,
+ "globalTokens": 512,
+ "dispatch": {
+ "p50": 430.30399084091187,
+ "p90": 462.336003780365,
+ "p95": 612.4160289764404,
+ "p99": 742.3359751701355
+ },
+ "combine": {
+ "p50": 86.56000345945358,
+ "p90": 317.8560137748718,
+ "p95": 353.2800078392029,
+ "p99": 383.87200236320496
+ },
+ "roundtrip": {
+ "p50": 488.8319969177246,
+ "p90": 514.4320130348206,
+ "p95": 559.8400235176086,
+ "p99": 780.1920175552368
+ },
+ "isolatedSum": {
+ "p50": 516.8639943003654,
+ "p90": 780.1920175552368,
+ "p95": 965.6960368156433,
+ "p99": 1126.2079775333405
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 11096064,
+ "combineLogicalBytes": 22192128,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 358,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -8284,35 +8352,35 @@
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 91.77599847316742,
- "p90": 94.17600184679031,
- "p95": 95.74399888515472,
- "p99": 114.20799791812897
+ "p50": 431.7440092563629,
+ "p90": 473.56799244880676,
+ "p95": 657.9840183258057,
+ "p99": 752.8960108757019
},
"combine": {
- "p50": 116.28799885511398,
- "p90": 119.19999867677689,
- "p95": 126.36800110340118,
- "p99": 130.43199479579926
+ "p50": 102.7199998497963,
+ "p90": 332.7679932117462,
+ "p95": 354.91201281547546,
+ "p99": 391.1359906196594
},
"roundtrip": {
- "p50": 194.0159946680069,
- "p90": 201.08799636363983,
- "p95": 202.84800231456757,
- "p99": 212.92799711227417
+ "p50": 511.2000107765198,
+ "p90": 537.4400019645691,
+ "p95": 744.0320253372192,
+ "p99": 820.7679986953735
},
"isolatedSum": {
- "p50": 208.0639973282814,
- "p90": 213.3760005235672,
- "p95": 222.1119999885559,
- "p99": 244.63999271392822
+ "p50": 534.4640091061592,
+ "p90": 806.335985660553,
+ "p95": 1012.8960311412811,
+ "p99": 1144.0320014953613
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 22650880,
- "combineLogicalBytes": 22650880,
- "fanoutMean": 1.54296875,
- "recvTokensMax": 1024,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 22282240,
+ "combineLogicalBytes": 44564480,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 699,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -8320,47 +8388,48 @@
]
},
{
- "id": "cx-272778fb",
- "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366",
- "colorKey": "b300_6d2e4735",
- "comparisonKey": "d0ac3c3db4cc1004",
+ "id": "cx-9ac4ce24",
+ "identity": "gb200|deepep|v1|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||13e2b193b87a112",
+ "colorKey": "gb200_b0118480",
+ "comparisonKey": "1ee739e4974cb32b",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:50:21.321707+00:00",
+ "generatedAt": "2026-06-29T14:00:58.916373+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_12",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_1",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "runtime-visible-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · zipf-heavy+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
+ "label": "GB200 EP8 · deepep · fp8",
+ "model": "shape 5120/8/160",
"shape": {
- "hidden": 7168,
+ "hidden": 5120,
"topk": 8,
- "experts": 288,
- "routing": "zipf-heavy",
- "routingLabel": "zipf-heavy+eplb",
+ "experts": 160,
+ "routing": "uniform",
+ "routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
+ "eplbEnabled": false,
+ "dispatchDtype": "fp8",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -8368,59 +8437,59 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "5a3054422534366",
- "workloadId": "set:8:6b84350720aa8233",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 7.40625,
- "eplbImbalanceAfter": 1.0004417782738093,
- "backendVersion": "1.2.1",
+ "traceSignature": "13e2b193b87a112",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": null,
+ "eplbImbalanceAfter": null,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285654027",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285654027",
- "createdAt": "2026-06-27T09:50:21.321707+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 1,
"globalTokens": 8,
"dispatch": {
- "p50": 1764.9600505828857,
- "p90": 2543.519973754883,
- "p95": 2872.8959560394287,
- "p99": 3412.3198986053467
+ "p50": 413.88800740242004,
+ "p90": 448.38398694992065,
+ "p95": 665.4719710350037,
+ "p99": 718.5279726982117
},
"combine": {
- "p50": 1750.1120567321777,
- "p90": 1847.872018814087,
- "p95": 2633.280038833618,
- "p99": 2927.1678924560547
+ "p50": 66.52799993753433,
+ "p90": 307.0720136165619,
+ "p95": 347.29599952697754,
+ "p99": 358.72000455856323
},
"roundtrip": {
- "p50": 1795.583963394165,
- "p90": 1911.6159677505493,
- "p95": 2635.5841159820557,
- "p99": 2994.0481185913086
+ "p50": 462.5920057296753,
+ "p90": 680.832028388977,
+ "p95": 723.8079905509949,
+ "p99": 898.464024066925
},
"isolatedSum": {
- "p50": 3515.0721073150635,
- "p90": 4391.39199256897,
- "p95": 5506.175994873047,
- "p99": 6339.487791061401
+ "p50": 480.4160073399544,
+ "p90": 755.4560005664825,
+ "p95": 1012.7679705619812,
+ "p99": 1077.247977256775
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 559104,
- "combineLogicalBytes": 559104,
- "fanoutMean": 4.875,
- "recvTokensMax": 6,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 215040,
+ "combineLogicalBytes": 430080,
+ "fanoutMean": 5.25,
+ "recvTokensMax": 8,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -8429,35 +8498,35 @@
"tokensPerRank": 2,
"globalTokens": 16,
"dispatch": {
- "p50": 1758.4320306777954,
- "p90": 2088.1919860839844,
- "p95": 2806.4639568328857,
- "p99": 3415.616035461426
+ "p50": 416.51201248168945,
+ "p90": 624.3839859962463,
+ "p95": 660.2240204811096,
+ "p99": 708.4159851074219
},
"combine": {
- "p50": 1750.3039836883545,
- "p90": 1858.9119911193848,
- "p95": 2584.0001106262207,
- "p99": 2952.4800777435303
+ "p50": 67.90400296449661,
+ "p90": 311.90401315689087,
+ "p95": 347.51999378204346,
+ "p99": 360.1920008659363
},
"roundtrip": {
- "p50": 1806.7200183868408,
- "p90": 1925.663948059082,
- "p95": 2728.480100631714,
- "p99": 3011.45601272583
+ "p50": 462.75201439857483,
+ "p90": 490.4960095882416,
+ "p95": 711.7120027542114,
+ "p99": 755.1680207252502
},
"isolatedSum": {
- "p50": 3508.73601436615,
- "p90": 3947.103977203369,
- "p95": 5390.464067459106,
- "p99": 6368.096113204956
+ "p50": 484.41601544618607,
+ "p90": 936.2879991531372,
+ "p95": 1007.7440142631531,
+ "p99": 1068.6079859733582
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1175552,
- "combineLogicalBytes": 1175552,
- "fanoutMean": 5.125,
- "recvTokensMax": 12,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 440320,
+ "combineLogicalBytes": 880640,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 13,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -8466,35 +8535,35 @@
"tokensPerRank": 4,
"globalTokens": 32,
"dispatch": {
- "p50": 1758.9119672775269,
- "p90": 2487.6160621643066,
- "p95": 2937.9520416259766,
- "p99": 3416.5759086608887
+ "p50": 414.11200165748596,
+ "p90": 459.4239890575409,
+ "p95": 659.775972366333,
+ "p99": 783.9360237121582
},
"combine": {
- "p50": 1761.1839771270752,
- "p90": 1895.840048789978,
- "p95": 2682.528018951416,
- "p99": 3779.8080444335938
+ "p50": 69.85600292682648,
+ "p90": 311.5200102329254,
+ "p95": 345.40799260139465,
+ "p99": 358.5599958896637
},
"roundtrip": {
- "p50": 1816.7680501937866,
- "p90": 1913.0879640579224,
- "p95": 2590.4319286346436,
- "p99": 2941.8559074401855
+ "p50": 461.40798926353455,
+ "p90": 505.69599866867065,
+ "p95": 727.7759909629822,
+ "p99": 896.2879776954651
},
"isolatedSum": {
- "p50": 3520.095944404602,
- "p90": 4383.456110954285,
- "p95": 5620.480060577393,
- "p99": 7196.383953094482
+ "p50": 483.96800458431244,
+ "p90": 770.9439992904663,
+ "p95": 1005.1839649677277,
+ "p99": 1142.496019601822
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2465792,
- "combineLogicalBytes": 2465792,
- "fanoutMean": 5.375,
+ "dispatchLogicalBytes": 870400,
+ "combineLogicalBytes": 1740800,
+ "fanoutMean": 5.3125,
"recvTokensMax": 25,
- "stragglerRank": 4,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -8503,35 +8572,35 @@
"tokensPerRank": 8,
"globalTokens": 64,
"dispatch": {
- "p50": 1764.6080255508423,
- "p90": 2651.520013809204,
- "p95": 3028.7680625915527,
- "p99": 5341.599941253662
+ "p50": 409.66400504112244,
+ "p90": 435.5199933052063,
+ "p95": 646.8160152435303,
+ "p99": 696.6400146484375
},
"combine": {
- "p50": 1762.5279426574707,
- "p90": 1947.9999542236328,
- "p95": 2684.351921081543,
- "p99": 13385.215759277344
+ "p50": 72.09599763154984,
+ "p90": 340.12800455093384,
+ "p95": 353.2159924507141,
+ "p99": 366.65600538253784
},
"roundtrip": {
- "p50": 1826.6880512237549,
- "p90": 1935.968041419983,
- "p95": 2620.6719875335693,
- "p99": 2976.8319129943848
+ "p50": 466.75199270248413,
+ "p90": 502.3040175437927,
+ "p95": 720.3199863433838,
+ "p99": 801.1839985847473
},
"isolatedSum": {
- "p50": 3527.135968208313,
- "p90": 4599.519968032837,
- "p95": 5713.119983673096,
- "p99": 18726.815700531006
+ "p50": 481.7600026726723,
+ "p90": 775.6479978561401,
+ "p95": 1000.0320076942444,
+ "p99": 1063.2960200309753
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4988928,
- "combineLogicalBytes": 4988928,
- "fanoutMean": 5.4375,
- "recvTokensMax": 47,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 1735680,
+ "combineLogicalBytes": 3471360,
+ "fanoutMean": 5.296875,
+ "recvTokensMax": 50,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -8540,35 +8609,35 @@
"tokensPerRank": 16,
"globalTokens": 128,
"dispatch": {
- "p50": 1762.2079849243164,
- "p90": 2232.2559356689453,
- "p95": 2812.9279613494873,
- "p99": 3426.3999462127686
+ "p50": 417.02398657798767,
+ "p90": 445.1200067996979,
+ "p95": 636.9600296020508,
+ "p99": 717.9520130157471
},
"combine": {
- "p50": 1772.7359533309937,
- "p90": 2522.815942764282,
- "p95": 2989.471912384033,
- "p99": 6136.096000671387
+ "p50": 73.2479989528656,
+ "p90": 326.1120021343231,
+ "p95": 354.3680012226105,
+ "p99": 369.08799409866333
},
"roundtrip": {
- "p50": 1848.736047744751,
- "p90": 2838.3679389953613,
- "p95": 3572.223901748657,
- "p99": 5888.12780380249
+ "p50": 462.6559913158417,
+ "p90": 507.1359872817993,
+ "p95": 715.4880166053772,
+ "p99": 774.6880054473877
},
"isolatedSum": {
- "p50": 3534.94393825531,
- "p90": 4755.0718784332275,
- "p95": 5802.3998737335205,
- "p99": 9562.495946884155
+ "p50": 490.27198553085327,
+ "p90": 771.232008934021,
+ "p95": 991.3280308246613,
+ "p99": 1087.0400071144104
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9791488,
- "combineLogicalBytes": 9791488,
- "fanoutMean": 5.3359375,
- "recvTokensMax": 94,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 3456000,
+ "combineLogicalBytes": 6912000,
+ "fanoutMean": 5.2734375,
+ "recvTokensMax": 93,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -8577,35 +8646,35 @@
"tokensPerRank": 32,
"globalTokens": 256,
"dispatch": {
- "p50": 1787.775993347168,
- "p90": 3058.4959983825684,
- "p95": 4017.824172973633,
- "p99": 5667.263984680176
+ "p50": 416.6400134563446,
+ "p90": 596.7360138893127,
+ "p95": 663.5839939117432,
+ "p99": 754.3359994888306
},
"combine": {
- "p50": 1784.991979598999,
- "p90": 2866.368055343628,
- "p95": 3568.7999725341797,
- "p99": 5742.527961730957
+ "p50": 76.76800340414047,
+ "p90": 349.37599301338196,
+ "p95": 362.5600039958954,
+ "p99": 374.08000230789185
},
"roundtrip": {
- "p50": 1844.5760011672974,
- "p90": 2729.248046875,
- "p95": 3046.3039875030518,
- "p99": 5490.079879760742
+ "p50": 461.9840085506439,
+ "p90": 490.62401056289673,
+ "p95": 718.3679938316345,
+ "p99": 761.5039944648743
},
"isolatedSum": {
- "p50": 3572.767972946167,
- "p90": 5924.864053726196,
- "p95": 7586.6241455078125,
- "p99": 11409.791946411133
+ "p50": 493.4080168604851,
+ "p90": 946.1120069026947,
+ "p95": 1026.1439979076385,
+ "p99": 1128.4160017967224
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19410944,
- "combineLogicalBytes": 19410944,
- "fanoutMean": 5.2890625,
- "recvTokensMax": 178,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 6988800,
+ "combineLogicalBytes": 13977600,
+ "fanoutMean": 5.33203125,
+ "recvTokensMax": 179,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -8614,35 +8683,35 @@
"tokensPerRank": 64,
"globalTokens": 512,
"dispatch": {
- "p50": 1779.520034790039,
- "p90": 2049.920082092285,
- "p95": 2758.0161094665527,
- "p99": 3381.7598819732666
+ "p50": 415.2640104293823,
+ "p90": 447.55199551582336,
+ "p95": 679.6159744262695,
+ "p99": 781.2479734420776
},
"combine": {
- "p50": 1785.2799892425537,
- "p90": 1907.647967338562,
- "p95": 2608.544111251831,
- "p99": 2964.8640155792236
+ "p50": 90.30400216579437,
+ "p90": 112.31999844312668,
+ "p95": 341.0879969596863,
+ "p99": 384.223997592926
},
"roundtrip": {
- "p50": 1863.2320165634155,
- "p90": 1987.104058265686,
- "p95": 2669.055938720703,
- "p99": 3054.5599460601807
+ "p50": 479.8080027103424,
+ "p90": 509.5040202140808,
+ "p95": 724.6400117874146,
+ "p99": 845.6000089645386
},
"isolatedSum": {
- "p50": 3564.800024032593,
- "p90": 3957.568049430847,
- "p95": 5366.560220718384,
- "p99": 6346.62389755249
+ "p50": 505.5680125951767,
+ "p90": 559.87199395895,
+ "p95": 1020.7039713859558,
+ "p99": 1165.4719710350037
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38678528,
- "combineLogicalBytes": 38678528,
- "fanoutMean": 5.26953125,
- "recvTokensMax": 360,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 13987840,
+ "combineLogicalBytes": 27975680,
+ "fanoutMean": 5.3359375,
+ "recvTokensMax": 355,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -8651,35 +8720,35 @@
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 1799.9680042266846,
- "p90": 2006.6559314727783,
- "p95": 2855.6160926818848,
- "p99": 3409.8880290985107
+ "p50": 432.3520064353943,
+ "p90": 658.9760184288025,
+ "p95": 717.8879976272583,
+ "p99": 915.6799912452698
},
"combine": {
- "p50": 1824.8319625854492,
- "p90": 3164.6718978881836,
- "p95": 5201.375961303711,
- "p99": 6098.78396987915
+ "p50": 109.15199667215347,
+ "p90": 117.72800236940384,
+ "p95": 354.46399450302124,
+ "p99": 394.27199959754944
},
"roundtrip": {
- "p50": 1909.2479944229126,
- "p90": 3033.3759784698486,
- "p95": 5025.9199142456055,
- "p99": 5985.599994659424
+ "p50": 504.41598892211914,
+ "p90": 538.8799905776978,
+ "p95": 754.0159821510315,
+ "p99": 827.9680013656616
},
"isolatedSum": {
- "p50": 3624.799966812134,
- "p90": 5171.327829360962,
- "p95": 8056.992053985596,
- "p99": 9508.671998977661
+ "p50": 541.5040031075478,
+ "p90": 776.7040207982063,
+ "p95": 1072.3519921302795,
+ "p99": 1309.9519908428192
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77285376,
- "combineLogicalBytes": 77285376,
- "fanoutMean": 5.2646484375,
- "recvTokensMax": 704,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 27837440,
+ "combineLogicalBytes": 55674880,
+ "fanoutMean": 5.3095703125,
+ "recvTokensMax": 699,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -8687,47 +8756,48 @@
]
},
{
- "id": "cx-d77e8004",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b",
- "colorKey": "b300_7ab35d34",
- "comparisonKey": "9b1abb398e739521",
+ "id": "cx-f128fb76",
+ "identity": "gb200|deepep|v1|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510",
+ "colorKey": "gb200_b0118480",
+ "comparisonKey": "a413134cd507bf47",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:48:58.943687+00:00",
+ "generatedAt": "2026-06-29T14:02:46.682266+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_17",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_1",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "runtime-visible-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · zipf-mild",
- "model": "DeepSeek-V3/V4",
+ "label": "GB200 EP8 · deepep · fp8",
+ "model": "MiniMax-M3",
"shape": {
- "hidden": 7168,
+ "hidden": 6144,
"topk": 8,
"experts": 256,
- "routing": "zipf-mild",
- "routingLabel": "zipf-mild",
+ "routing": "uniform",
+ "routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "bf16",
+ "dispatchDtype": "fp8",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -8735,59 +8805,59 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "f3df51be7d5c32b",
- "workloadId": "set:8:289b7f9c14292e96",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "74444524b5db510",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285630258",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285630258",
- "createdAt": "2026-06-27T09:48:58.943687+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 1,
"globalTokens": 8,
"dispatch": {
- "p50": 57.40800127387047,
- "p90": 59.647999703884125,
- "p95": 62.49599903821945,
- "p99": 74.07999783754349
+ "p50": 446.9119906425476,
+ "p90": 482.87999629974365,
+ "p95": 691.7120218276978,
+ "p99": 746.9120025634766
},
"combine": {
- "p50": 66.56000018119812,
- "p90": 67.26399809122086,
- "p95": 67.64800101518631,
- "p99": 78.5600021481514
+ "p50": 73.60000163316727,
+ "p90": 332.63999223709106,
+ "p95": 356.83199763298035,
+ "p99": 371.8400001525879
},
"roundtrip": {
- "p50": 109.6000000834465,
- "p90": 113.69600147008896,
- "p95": 114.52800035476685,
- "p99": 122.65600264072418
+ "p50": 498.75199794769287,
+ "p90": 718.6880111694336,
+ "p95": 828.0959725379944,
+ "p99": 1953.1519412994385
},
"isolatedSum": {
- "p50": 123.96800145506859,
- "p90": 126.91199779510498,
- "p95": 130.14400005340576,
- "p99": 152.63999998569489
+ "p50": 520.5119922757149,
+ "p90": 815.5199885368347,
+ "p95": 1048.544019460678,
+ "p99": 1118.7520027160645
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 587776,
- "combineLogicalBytes": 587776,
- "fanoutMean": 5.125,
- "recvTokensMax": 8,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 270336,
+ "combineLogicalBytes": 540672,
+ "fanoutMean": 5.5,
+ "recvTokensMax": 7,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -8796,35 +8866,35 @@
"tokensPerRank": 2,
"globalTokens": 16,
"dispatch": {
- "p50": 57.5999990105629,
- "p90": 59.07199904322624,
- "p95": 61.43999844789505,
- "p99": 67.52000004053116
+ "p50": 444.44799423217773,
+ "p90": 638.0800008773804,
+ "p95": 707.4559926986694,
+ "p99": 760.8640193939209
},
"combine": {
- "p50": 66.84800237417221,
- "p90": 67.32799857854843,
- "p95": 68.2239979505539,
- "p99": 82.2720006108284
+ "p50": 73.15199822187424,
+ "p90": 341.18399024009705,
+ "p95": 357.695996761322,
+ "p99": 368.0959939956665
},
"roundtrip": {
- "p50": 107.32799768447876,
- "p90": 110.11199653148651,
- "p95": 111.51999980211258,
- "p99": 119.19999867677689
+ "p50": 491.58400297164917,
+ "p90": 511.3919973373413,
+ "p95": 693.1520104408264,
+ "p99": 797.0560193061829
},
"isolatedSum": {
- "p50": 124.44800138473511,
- "p90": 126.39999762177467,
- "p95": 129.66399639844894,
- "p99": 149.79200065135956
+ "p50": 517.599992454052,
+ "p90": 979.2639911174774,
+ "p95": 1065.1519894599915,
+ "p99": 1128.9600133895874
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1103872,
- "combineLogicalBytes": 1103872,
- "fanoutMean": 4.8125,
- "recvTokensMax": 16,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 528384,
+ "combineLogicalBytes": 1056768,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 13,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -8833,35 +8903,35 @@
"tokensPerRank": 4,
"globalTokens": 32,
"dispatch": {
- "p50": 58.75200033187866,
- "p90": 60.70400029420853,
- "p95": 61.91999837756157,
- "p99": 70.91200351715088
+ "p50": 442.49600172042847,
+ "p90": 603.0719876289368,
+ "p95": 717.1199917793274,
+ "p99": 807.6159954071045
},
"combine": {
- "p50": 68.80000233650208,
- "p90": 76.99199765920639,
- "p95": 77.27999985218048,
- "p99": 78.52800190448761
+ "p50": 75.42400062084198,
+ "p90": 344.35200691223145,
+ "p95": 359.391987323761,
+ "p99": 377.6000142097473
},
"roundtrip": {
- "p50": 115.80800265073776,
- "p90": 121.8239963054657,
- "p95": 123.52000176906586,
- "p99": 133.66399705410004
+ "p50": 488.12800645828247,
+ "p90": 520.7359790802002,
+ "p95": 765.3440237045288,
+ "p99": 900.160014629364
},
"isolatedSum": {
- "p50": 127.55200266838074,
- "p90": 137.69599795341492,
- "p95": 139.19999822974205,
- "p99": 149.4400054216385
+ "p50": 517.9200023412704,
+ "p90": 947.4239945411682,
+ "p95": 1076.5119791030884,
+ "p99": 1185.2160096168518
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2250752,
- "combineLogicalBytes": 2250752,
- "fanoutMean": 4.90625,
- "recvTokensMax": 31,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 1062912,
+ "combineLogicalBytes": 2125824,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 29,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -8870,35 +8940,35 @@
"tokensPerRank": 8,
"globalTokens": 64,
"dispatch": {
- "p50": 60.15999987721443,
- "p90": 65.15199691057205,
- "p95": 66.97600334882736,
- "p99": 73.18399846553802
+ "p50": 449.0559995174408,
+ "p90": 476.7040014266968,
+ "p95": 684.831976890564,
+ "p99": 756.3520073890686
},
"combine": {
- "p50": 68.60800087451935,
- "p90": 77.15199887752533,
- "p95": 77.82399654388428,
- "p99": 80.03199845552444
+ "p50": 77.05599814653397,
+ "p90": 343.29599142074585,
+ "p95": 359.96800661087036,
+ "p99": 373.56799840927124
},
"roundtrip": {
- "p50": 120.4800009727478,
- "p90": 125.44000148773193,
- "p95": 126.71999633312225,
- "p99": 144.3520039319992
+ "p50": 503.29601764678955,
+ "p90": 709.4079852104187,
+ "p95": 812.2239708900452,
+ "p99": 1050.6559610366821
},
"isolatedSum": {
- "p50": 128.76800075173378,
- "p90": 142.30399578809738,
- "p95": 144.79999989271164,
- "p99": 153.21599692106247
+ "p50": 526.1119976639748,
+ "p90": 819.9999928474426,
+ "p95": 1044.7999835014343,
+ "p99": 1129.9200057983398
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4472832,
- "combineLogicalBytes": 4472832,
- "fanoutMean": 4.875,
- "recvTokensMax": 62,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 2131968,
+ "combineLogicalBytes": 4263936,
+ "fanoutMean": 5.421875,
+ "recvTokensMax": 47,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -8907,35 +8977,35 @@
"tokensPerRank": 16,
"globalTokens": 128,
"dispatch": {
- "p50": 63.551999628543854,
- "p90": 69.95200365781784,
- "p95": 71.10399752855301,
- "p99": 88.128000497818
+ "p50": 447.488009929657,
+ "p90": 475.48800706863403,
+ "p95": 701.1520266532898,
+ "p99": 776.9280076026917
},
"combine": {
- "p50": 76.89599692821503,
- "p90": 78.87999713420868,
- "p95": 79.26400005817413,
- "p99": 82.46400207281113
+ "p50": 78.33600044250488,
+ "p90": 330.3360044956207,
+ "p95": 364.1279935836792,
+ "p99": 375.5840063095093
},
"roundtrip": {
- "p50": 121.05599790811539,
- "p90": 124.89599734544754,
- "p95": 126.91199779510498,
- "p99": 135.68000495433807
+ "p50": 499.1999864578247,
+ "p90": 527.9039740562439,
+ "p95": 778.8800001144409,
+ "p99": 872.8960156440735
},
"isolatedSum": {
- "p50": 140.44799655675888,
- "p90": 148.83200079202652,
- "p95": 150.36799758672714,
- "p99": 170.59200257062912
+ "p50": 525.8240103721619,
+ "p90": 805.8240115642548,
+ "p95": 1065.280020236969,
+ "p99": 1152.512013912201
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 8888320,
- "combineLogicalBytes": 8888320,
- "fanoutMean": 4.84375,
- "recvTokensMax": 124,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 4251648,
+ "combineLogicalBytes": 8503296,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 92,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -8944,35 +9014,35 @@
"tokensPerRank": 32,
"globalTokens": 256,
"dispatch": {
- "p50": 70.68800181150436,
- "p90": 76.51200145483017,
- "p95": 77.44000107049942,
- "p99": 82.49600231647491
+ "p50": 449.1199851036072,
+ "p90": 478.5279929637909,
+ "p95": 704.4159770011902,
+ "p99": 780.8640003204346
},
"combine": {
- "p50": 79.19999957084656,
- "p90": 79.74400371313095,
- "p95": 80.19199967384338,
- "p99": 89.82399851083755
+ "p50": 81.95199817419052,
+ "p90": 327.0080089569092,
+ "p95": 361.6639971733093,
+ "p99": 380.92800974845886
},
"roundtrip": {
- "p50": 133.40799510478973,
- "p90": 137.82399892807007,
- "p95": 139.0720009803772,
- "p99": 155.29599785804749
+ "p50": 499.64800477027893,
+ "p90": 522.5279927253723,
+ "p95": 745.4720139503479,
+ "p99": 810.591995716095
},
"isolatedSum": {
- "p50": 149.88800138235092,
- "p90": 156.25600516796112,
- "p95": 157.6320007443428,
- "p99": 172.32000082731247
+ "p50": 531.0719832777977,
+ "p90": 805.5360019207001,
+ "p95": 1066.0799741744995,
+ "p99": 1161.7920100688934
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 17733632,
- "combineLogicalBytes": 17733632,
- "fanoutMean": 4.83203125,
- "recvTokensMax": 248,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 8454144,
+ "combineLogicalBytes": 16908288,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 182,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -8981,35 +9051,35 @@
"tokensPerRank": 64,
"globalTokens": 512,
"dispatch": {
- "p50": 87.5839963555336,
- "p90": 90.87999910116196,
- "p95": 91.67999774217606,
- "p99": 98.9760011434555
+ "p50": 450.75199007987976,
+ "p90": 663.3599996566772,
+ "p95": 725.600004196167,
+ "p99": 879.4559836387634
},
"combine": {
- "p50": 92.47999638319016,
- "p90": 101.40799731016159,
- "p95": 101.98400169610977,
- "p99": 115.64800143241882
+ "p50": 96.96000069379807,
+ "p90": 338.591992855072,
+ "p95": 368.22399497032166,
+ "p99": 396.06401324272156
},
"roundtrip": {
- "p50": 159.55199301242828,
- "p90": 163.29599916934967,
- "p95": 165.43999314308167,
- "p99": 171.07200622558594
+ "p50": 517.6960229873657,
+ "p90": 542.6560044288635,
+ "p95": 798.1439828872681,
+ "p99": 868.9919710159302
},
"isolatedSum": {
- "p50": 180.06399273872375,
- "p90": 192.28799641132355,
- "p95": 193.66399943828583,
- "p99": 214.62400257587433
+ "p50": 547.7119907736778,
+ "p90": 1001.9519925117493,
+ "p95": 1093.8239991664886,
+ "p99": 1275.519996881485
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 35424256,
- "combineLogicalBytes": 35424256,
- "fanoutMean": 4.826171875,
- "recvTokensMax": 492,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 16711680,
+ "combineLogicalBytes": 33423360,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 367,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -9018,35 +9088,35 @@
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 103.67999970912933,
- "p90": 105.56799918413162,
- "p95": 108.70400071144104,
- "p99": 117.91999638080597
+ "p50": 447.00801372528076,
+ "p90": 475.0399887561798,
+ "p95": 717.2480225563049,
+ "p99": 823.8720297813416
},
"combine": {
- "p50": 127.87200510501862,
- "p90": 128.9599984884262,
- "p95": 130.0799995660782,
- "p99": 138.5280042886734
+ "p50": 114.97599631547928,
+ "p90": 121.21599912643433,
+ "p95": 124.51200187206268,
+ "p99": 421.4079976081848
},
"roundtrip": {
- "p50": 214.1440063714981,
- "p90": 220.47999501228333,
- "p95": 222.49600291252136,
- "p99": 228.19200158119202
+ "p50": 533.6319804191589,
+ "p90": 553.6959767341614,
+ "p95": 573.9520192146301,
+ "p99": 891.9360041618347
},
"isolatedSum": {
- "p50": 231.55200481414795,
- "p90": 234.52799767255783,
- "p95": 238.78400027751923,
- "p99": 256.44800066947937
+ "p50": 561.98401004076,
+ "p90": 596.2559878826141,
+ "p95": 841.7600244283676,
+ "p99": 1245.2800273895264
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 70160384,
- "combineLogicalBytes": 70160384,
- "fanoutMean": 4.779296875,
- "recvTokensMax": 987,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 33288192,
+ "combineLogicalBytes": 66576384,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 723,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -9054,16 +9124,16 @@
]
},
{
- "id": "cx-945e07fc",
- "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243",
- "colorKey": "b300_5e3d915a",
- "comparisonKey": "7cc654cb13c70aa7",
+ "id": "cx-67cf349f",
+ "identity": "gb200|deepep|v1|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510",
+ "colorKey": "gb200_6d63c708",
+ "comparisonKey": "ef8fcf8c07a567e5",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:49:00.117687+00:00",
+ "generatedAt": "2026-06-29T13:49:55.837432+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_15",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
"mode": "normal",
@@ -9071,30 +9141,31 @@
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · zipf-mild+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
+ "label": "GB200 EP8 · deepep · fp8",
+ "model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
- "experts": 288,
- "routing": "zipf-mild",
- "routingLabel": "zipf-mild+eplb",
+ "experts": 256,
+ "routing": "uniform",
+ "routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
+ "eplbEnabled": false,
+ "dispatchDtype": "fp8",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -9102,59 +9173,59 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "16babcaf4204243",
- "workloadId": "set:8:289b7f9c14292e96",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 2.61328125,
- "eplbImbalanceAfter": 1.0009114583333334,
- "backendVersion": "1.2.1",
+ "traceSignature": "74444524b5db510",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": null,
+ "eplbImbalanceAfter": null,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285632999",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285632999",
- "createdAt": "2026-06-27T09:49:00.117687+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 1,
"globalTokens": 8,
"dispatch": {
- "p50": 55.67999929189682,
- "p90": 57.72799998521805,
- "p95": 59.87200140953064,
- "p99": 69.50400024652481
+ "p50": 94.94400024414062,
+ "p90": 106.55999928712845,
+ "p95": 111.61600053310394,
+ "p99": 120.28799951076508
},
"combine": {
- "p50": 65.50399959087372,
- "p90": 66.17599725723267,
- "p95": 66.30399823188782,
- "p99": 69.31199878454208
+ "p50": 73.18399846553802,
+ "p90": 78.72000336647034,
+ "p95": 82.30400085449219,
+ "p99": 89.12000060081482
},
"roundtrip": {
- "p50": 104.54399883747101,
- "p90": 111.42399907112122,
- "p95": 112.28799819946289,
- "p99": 117.60000139474869
+ "p50": 250.46399235725403,
+ "p90": 263.61599564552307,
+ "p95": 267.61600375175476,
+ "p99": 275.07200837135315
},
"isolatedSum": {
- "p50": 121.18399888277054,
- "p90": 123.90399724245071,
- "p95": 126.17599964141846,
- "p99": 138.8159990310669
+ "p50": 168.12799870967865,
+ "p90": 185.28000265359879,
+ "p95": 193.92000138759613,
+ "p99": 209.4080001115799
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 602112,
- "combineLogicalBytes": 602112,
- "fanoutMean": 5.25,
+ "dispatchLogicalBytes": 315392,
+ "combineLogicalBytes": 630784,
+ "fanoutMean": 5.5,
"recvTokensMax": 7,
- "stragglerRank": 7,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -9163,72 +9234,72 @@
"tokensPerRank": 2,
"globalTokens": 16,
"dispatch": {
- "p50": 55.80800026655197,
- "p90": 57.66399949789047,
- "p95": 58.9120015501976,
- "p99": 63.87200206518173
+ "p50": 95.2640026807785,
+ "p90": 107.39199817180634,
+ "p95": 111.51999980211258,
+ "p99": 119.07199770212173
},
"combine": {
- "p50": 65.85600227117538,
- "p90": 67.03999638557434,
- "p95": 67.80800223350525,
- "p99": 78.52800190448761
+ "p50": 73.88799637556076,
+ "p90": 79.83999699354172,
+ "p95": 83.23200047016144,
+ "p99": 88.03199976682663
},
"roundtrip": {
- "p50": 105.18400371074677,
- "p90": 107.26399719715118,
- "p95": 108.15999656915665,
- "p99": 112.47999966144562
+ "p50": 252.22399830818176,
+ "p90": 264.6079957485199,
+ "p95": 267.96799898147583,
+ "p99": 276.99199318885803
},
"isolatedSum": {
- "p50": 121.66400253772736,
- "p90": 124.70399588346481,
- "p95": 126.72000378370285,
- "p99": 142.40000396966934
+ "p50": 169.15199905633926,
+ "p90": 187.23199516534805,
+ "p95": 194.75200027227402,
+ "p99": 207.10399746894836
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1189888,
- "combineLogicalBytes": 1189888,
- "fanoutMean": 5.1875,
- "recvTokensMax": 12,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
+ "dispatchLogicalBytes": 616448,
+ "combineLogicalBytes": 1232896,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 13,
+ "stragglerRank": 3,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
},
{
"tokensPerRank": 4,
"globalTokens": 32,
"dispatch": {
- "p50": 56.51199817657471,
- "p90": 58.111999183893204,
- "p95": 59.23200026154518,
- "p99": 65.18399715423584
+ "p50": 95.93600034713745,
+ "p90": 109.47199910879135,
+ "p95": 114.656001329422,
+ "p99": 128.63999605178833
},
"combine": {
- "p50": 66.23999774456024,
- "p90": 67.90400296449661,
- "p95": 68.35199892520905,
- "p99": 76.48000121116638
+ "p50": 75.6480023264885,
+ "p90": 80.99199831485748,
+ "p95": 84.99199897050858,
+ "p99": 90.36800265312195
},
"roundtrip": {
- "p50": 106.39999806880951,
- "p90": 109.50399935245514,
- "p95": 114.3679991364479,
- "p99": 124.9919980764389
+ "p50": 254.01601195335388,
+ "p90": 266.7520046234131,
+ "p95": 270.3999876976013,
+ "p99": 277.50399708747864
},
"isolatedSum": {
- "p50": 122.75199592113495,
- "p90": 126.01600214838982,
- "p95": 127.58399918675423,
- "p99": 141.66399836540222
+ "p50": 171.58400267362595,
+ "p90": 190.46399742364883,
+ "p95": 199.64800029993057,
+ "p99": 219.00799870491028
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2408448,
- "combineLogicalBytes": 2408448,
- "fanoutMean": 5.25,
- "recvTokensMax": 23,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 1240064,
+ "combineLogicalBytes": 2480128,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 29,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -9237,35 +9308,35 @@
"tokensPerRank": 8,
"globalTokens": 64,
"dispatch": {
- "p50": 58.17599967122078,
- "p90": 60.575999319553375,
- "p95": 61.69600039720535,
- "p99": 74.97599720954895
+ "p50": 93.37600320577621,
+ "p90": 105.15200346708298,
+ "p95": 110.36799848079681,
+ "p99": 121.08799815177917
},
"combine": {
- "p50": 67.03999638557434,
- "p90": 69.31199878454208,
- "p95": 76.57600194215775,
- "p99": 78.91199737787247
+ "p50": 77.47200131416321,
+ "p90": 83.74399691820145,
+ "p95": 87.55200356245041,
+ "p99": 94.33600306510925
},
"roundtrip": {
- "p50": 119.4240003824234,
- "p90": 124.79999661445618,
- "p95": 125.88800489902496,
- "p99": 129.43999469280243
+ "p50": 254.33599948883057,
+ "p90": 267.1999931335449,
+ "p95": 272.0640003681183,
+ "p99": 284.8320007324219
},
"isolatedSum": {
- "p50": 125.21599605679512,
- "p90": 129.88799810409546,
- "p95": 138.2720023393631,
- "p99": 153.88799458742142
+ "p50": 170.84800451993942,
+ "p90": 188.89600038528442,
+ "p95": 197.92000204324722,
+ "p99": 215.42400121688843
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4859904,
- "combineLogicalBytes": 4859904,
- "fanoutMean": 5.296875,
+ "dispatchLogicalBytes": 2487296,
+ "combineLogicalBytes": 4974592,
+ "fanoutMean": 5.421875,
"recvTokensMax": 47,
- "stragglerRank": 4,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -9274,35 +9345,35 @@
"tokensPerRank": 16,
"globalTokens": 128,
"dispatch": {
- "p50": 59.776000678539276,
- "p90": 65.88800251483917,
- "p95": 67.03999638557434,
- "p99": 71.9359964132309
+ "p50": 95.2640026807785,
+ "p90": 106.55999928712845,
+ "p95": 110.91200262308121,
+ "p99": 119.19999867677689
},
"combine": {
- "p50": 68.12799721956253,
- "p90": 77.44000107049942,
- "p95": 77.79199630022049,
- "p99": 89.6959975361824
+ "p50": 79.96799796819687,
+ "p90": 85.56800335645676,
+ "p95": 88.128000497818,
+ "p99": 95.13600170612335
},
"roundtrip": {
- "p50": 118.81600320339203,
- "p90": 124.83199685811996,
- "p95": 125.91999769210815,
- "p99": 131.80799782276154
+ "p50": 257.85601139068604,
+ "p90": 271.2000012397766,
+ "p95": 274.81600642204285,
+ "p99": 392.35201478004456
},
"isolatedSum": {
- "p50": 127.9039978981018,
- "p90": 143.3280035853386,
- "p95": 144.83199268579483,
- "p99": 161.6319939494133
+ "p50": 175.23200064897537,
+ "p90": 192.1280026435852,
+ "p95": 199.0400031208992,
+ "p99": 214.33600038290024
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9605120,
- "combineLogicalBytes": 9605120,
- "fanoutMean": 5.234375,
- "recvTokensMax": 93,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 4960256,
+ "combineLogicalBytes": 9920512,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 92,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -9311,35 +9382,35 @@
"tokensPerRank": 32,
"globalTokens": 256,
"dispatch": {
- "p50": 68.03199648857117,
- "p90": 69.2799985408783,
- "p95": 71.23199850320816,
- "p99": 76.22399926185608
+ "p50": 94.59199756383896,
+ "p90": 105.24799674749374,
+ "p95": 109.82400178909302,
+ "p99": 120.25599926710129
},
"combine": {
- "p50": 77.85599678754807,
- "p90": 78.5600021481514,
- "p95": 79.32800054550171,
- "p99": 90.84799885749817
+ "p50": 84.44800227880478,
+ "p90": 90.81599861383438,
+ "p95": 94.01600062847137,
+ "p99": 97.02400118112564
},
"roundtrip": {
- "p50": 128.86400520801544,
- "p90": 132.35199451446533,
- "p95": 134.91199910640717,
- "p99": 140.57600498199463
+ "p50": 264.0320062637329,
+ "p90": 276.92800760269165,
+ "p95": 279.80801463127136,
+ "p99": 286.97600960731506
},
"isolatedSum": {
- "p50": 145.88799327611923,
- "p90": 147.8400006890297,
- "p95": 150.55999904870987,
- "p99": 167.07199811935425
+ "p50": 179.03999984264374,
+ "p90": 196.06399536132812,
+ "p95": 203.8400024175644,
+ "p99": 217.28000044822693
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19367936,
- "combineLogicalBytes": 19367936,
- "fanoutMean": 5.27734375,
+ "dispatchLogicalBytes": 9863168,
+ "combineLogicalBytes": 19726336,
+ "fanoutMean": 5.375,
"recvTokensMax": 182,
- "stragglerRank": 7,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -9348,35 +9419,35 @@
"tokensPerRank": 64,
"globalTokens": 512,
"dispatch": {
- "p50": 86.30400151014328,
- "p90": 88.86399865150452,
- "p95": 89.82399851083755,
- "p99": 103.20000350475311
+ "p50": 101.02400183677673,
+ "p90": 111.7440015077591,
+ "p95": 115.32799899578094,
+ "p99": 122.97599762678146
},
"combine": {
- "p50": 90.87999910116196,
- "p90": 92.8959995508194,
- "p95": 94.40000355243683,
- "p99": 102.4319976568222
+ "p50": 100.09600222110748,
+ "p90": 106.175996363163,
+ "p95": 108.15999656915665,
+ "p99": 114.72000181674957
},
"roundtrip": {
- "p50": 157.31200575828552,
- "p90": 161.56800091266632,
- "p95": 163.03999722003937,
- "p99": 172.8000044822693
+ "p50": 282.943993806839,
+ "p90": 318.36798787117004,
+ "p95": 368.6720132827759,
+ "p99": 443.10399889945984
},
"isolatedSum": {
- "p50": 177.18400061130524,
- "p90": 181.7599982023239,
- "p95": 184.22400206327438,
- "p99": 205.63200116157532
+ "p50": 201.12000405788422,
+ "p90": 217.9199978709221,
+ "p95": 223.4879955649376,
+ "p99": 237.69599944353104
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38535168,
- "combineLogicalBytes": 38535168,
- "fanoutMean": 5.25,
- "recvTokensMax": 358,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 19496960,
+ "combineLogicalBytes": 38993920,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 367,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -9385,35 +9456,35 @@
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 92.6079973578453,
- "p90": 95.67999839782715,
- "p95": 98.94400089979172,
- "p99": 109.79200154542923
+ "p50": 111.13599687814713,
+ "p90": 120.35199999809265,
+ "p95": 124.7360035777092,
+ "p99": 132.79999792575836
},
"combine": {
- "p50": 114.62400108575821,
- "p90": 115.35999923944473,
- "p95": 116.06399714946747,
- "p99": 126.78399682044983
+ "p50": 119.45600062608719,
+ "p90": 125.91999769210815,
+ "p95": 129.37599420547485,
+ "p99": 135.93600690364838
},
"roundtrip": {
- "p50": 194.94399428367615,
- "p90": 198.33600521087646,
- "p95": 199.39200580120087,
- "p99": 228.70400547981262
+ "p50": 304.3839931488037,
+ "p90": 316.3839876651764,
+ "p95": 320.51199674606323,
+ "p99": 329.24801111221313
},
"isolatedSum": {
- "p50": 207.23199844360352,
- "p90": 211.03999763727188,
- "p95": 215.0079980492592,
- "p99": 236.57599836587906
+ "p50": 230.5919975042343,
+ "p90": 246.2719976902008,
+ "p95": 254.11199778318405,
+ "p99": 268.73600482940674
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 76869632,
- "combineLogicalBytes": 76869632,
- "fanoutMean": 5.236328125,
- "recvTokensMax": 688,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 38836224,
+ "combineLogicalBytes": 77672448,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 723,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -9421,47 +9492,48 @@
]
},
{
- "id": "cx-29812ef0",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c",
- "colorKey": "b300_fdf55523",
- "comparisonKey": "941e1d8581ae6b5b",
+ "id": "cx-cce0f9af",
+ "identity": "gb200|deepep|v1|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510",
+ "colorKey": "gb200_b0118480",
+ "comparisonKey": "e52bc4d8d01ec622",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:49:22.371406+00:00",
+ "generatedAt": "2026-06-29T13:51:44.095505+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_01",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "runtime-visible-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · zipf-moderate",
+ "label": "GB200 EP8 · deepep · fp8",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
"experts": 256,
- "routing": "zipf-moderate",
- "routingLabel": "zipf-moderate",
+ "routing": "uniform",
+ "routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "bf16",
+ "dispatchDtype": "fp8",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -9469,58 +9541,58 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "14ded8461f2636c",
- "workloadId": "set:8:120a8dc1dba92ca9",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "74444524b5db510",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285640709",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285640709",
- "createdAt": "2026-06-27T09:49:22.371406+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 1,
"globalTokens": 8,
"dispatch": {
- "p50": 56.63999915122986,
- "p90": 58.27200040221214,
- "p95": 60.28800085186958,
- "p99": 68.92800331115723
+ "p50": 441.3119852542877,
+ "p90": 461.91999316215515,
+ "p95": 468.03200244903564,
+ "p99": 478.7200093269348
},
"combine": {
- "p50": 65.47199934720993,
- "p90": 66.27199798822403,
- "p95": 66.39999896287918,
- "p99": 69.21599805355072
+ "p50": 74.94399696588516,
+ "p90": 81.24800026416779,
+ "p95": 84.73599702119827,
+ "p99": 89.72799777984619
},
"roundtrip": {
- "p50": 106.84800148010254,
- "p90": 112.19199746847153,
- "p95": 113.08799684047699,
- "p99": 127.96799838542938
+ "p50": 487.61600255966187,
+ "p90": 510.047972202301,
+ "p95": 515.936017036438,
+ "p99": 529.5360088348389
},
"isolatedSum": {
- "p50": 122.11199849843979,
- "p90": 124.54399839043617,
- "p95": 126.68799981474876,
- "p99": 138.14400136470795
+ "p50": 516.2559822201729,
+ "p90": 543.1679934263229,
+ "p95": 552.7679994702339,
+ "p99": 568.448007106781
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 444416,
- "combineLogicalBytes": 444416,
- "fanoutMean": 3.875,
- "recvTokensMax": 8,
+ "dispatchLogicalBytes": 315392,
+ "combineLogicalBytes": 630784,
+ "fanoutMean": 5.5,
+ "recvTokensMax": 7,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
@@ -9530,34 +9602,34 @@
"tokensPerRank": 2,
"globalTokens": 16,
"dispatch": {
- "p50": 56.86400085687637,
- "p90": 58.59199911355972,
- "p95": 60.99199876189232,
- "p99": 64.41599875688553
+ "p50": 438.7199878692627,
+ "p90": 463.9680087566376,
+ "p95": 599.9680161476135,
+ "p99": 2199.712038040161
},
"combine": {
- "p50": 65.88800251483917,
- "p90": 66.52799993753433,
- "p95": 67.52000004053116,
- "p99": 78.015998005867
+ "p50": 76.80000364780426,
+ "p90": 83.80799740552902,
+ "p95": 88.67199718952179,
+ "p99": 111.90400272607803
},
"roundtrip": {
- "p50": 105.92000186443329,
- "p90": 112.76800185441971,
- "p95": 113.56800049543381,
- "p99": 130.3360015153885
+ "p50": 486.7520034313202,
+ "p90": 508.1599950790405,
+ "p95": 513.4720206260681,
+ "p99": 523.6799716949463
},
"isolatedSum": {
- "p50": 122.75200337171555,
- "p90": 125.11999905109406,
- "p95": 128.51199880242348,
- "p99": 142.43199676275253
+ "p50": 515.519991517067,
+ "p90": 547.7760061621666,
+ "p95": 688.6400133371353,
+ "p99": 2311.616040766239
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 845824,
- "combineLogicalBytes": 845824,
- "fanoutMean": 3.6875,
- "recvTokensMax": 16,
+ "dispatchLogicalBytes": 616448,
+ "combineLogicalBytes": 1232896,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 13,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
@@ -9567,35 +9639,35 @@
"tokensPerRank": 4,
"globalTokens": 32,
"dispatch": {
- "p50": 58.400001376867294,
- "p90": 61.08799949288368,
- "p95": 64.19199705123901,
- "p99": 66.72000139951706
+ "p50": 444.12800669670105,
+ "p90": 463.9360010623932,
+ "p95": 468.51199865341187,
+ "p99": 480.99198937416077
},
"combine": {
- "p50": 66.17599725723267,
- "p90": 67.48799979686737,
- "p95": 68.54400038719177,
- "p99": 76.31999999284744
+ "p50": 77.7600035071373,
+ "p90": 83.77599716186523,
+ "p95": 85.9839990735054,
+ "p99": 90.40000289678574
},
"roundtrip": {
- "p50": 107.96800255775452,
- "p90": 113.0559965968132,
- "p95": 115.64800143241882,
- "p99": 122.01599776744843
+ "p50": 488.319993019104,
+ "p90": 511.48802042007446,
+ "p95": 519.648015499115,
+ "p99": 591.7440056800842
},
"isolatedSum": {
- "p50": 124.57599863409996,
- "p90": 128.57599928975105,
- "p95": 132.7359974384308,
- "p99": 143.0400013923645
+ "p50": 521.8880102038383,
+ "p90": 547.7119982242584,
+ "p95": 554.4959977269173,
+ "p99": 571.3919922709465
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1691648,
- "combineLogicalBytes": 1691648,
- "fanoutMean": 3.6875,
- "recvTokensMax": 32,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 1240064,
+ "combineLogicalBytes": 2480128,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 29,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -9604,35 +9676,35 @@
"tokensPerRank": 8,
"globalTokens": 64,
"dispatch": {
- "p50": 58.6559996008873,
- "p90": 60.575999319553375,
- "p95": 62.144000083208084,
- "p99": 69.47200000286102
+ "p50": 436.8000030517578,
+ "p90": 455.9360146522522,
+ "p95": 461.08800172805786,
+ "p99": 471.19998931884766
},
"combine": {
- "p50": 68.00000369548798,
- "p90": 76.28799974918365,
- "p95": 76.54400169849396,
- "p99": 77.60000228881836
+ "p50": 79.6160027384758,
+ "p90": 85.63199639320374,
+ "p95": 89.15200084447861,
+ "p99": 96.3520035147667
},
"roundtrip": {
- "p50": 117.95199662446976,
- "p90": 123.6800029873848,
- "p95": 125.11999905109406,
- "p99": 143.2960033416748
+ "p50": 485.4399859905243,
+ "p90": 505.5999755859375,
+ "p95": 511.2000107765198,
+ "p99": 520.5119848251343
},
"isolatedSum": {
- "p50": 126.65600329637527,
- "p90": 136.86399906873703,
- "p95": 138.68800178170204,
- "p99": 147.07200229167938
+ "p50": 516.4160057902336,
+ "p90": 541.5680110454559,
+ "p95": 550.2400025725365,
+ "p99": 567.5519928336143
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 3354624,
- "combineLogicalBytes": 3354624,
- "fanoutMean": 3.65625,
- "recvTokensMax": 64,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 2487296,
+ "combineLogicalBytes": 4974592,
+ "fanoutMean": 5.421875,
+ "recvTokensMax": 47,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -9641,35 +9713,35 @@
"tokensPerRank": 16,
"globalTokens": 128,
"dispatch": {
- "p50": 59.93599817156792,
- "p90": 65.8240020275116,
- "p95": 69.40799951553345,
- "p99": 84.54400300979614
+ "p50": 441.2800073623657,
+ "p90": 459.83999967575073,
+ "p95": 464.4800126552582,
+ "p99": 475.48800706863403
},
"combine": {
- "p50": 68.12799721956253,
- "p90": 76.12799853086472,
- "p95": 76.7040029168129,
- "p99": 77.85599678754807
+ "p50": 82.11199939250946,
+ "p90": 87.77599781751633,
+ "p95": 90.59199690818787,
+ "p99": 96.25600278377533
},
"roundtrip": {
- "p50": 122.97599762678146,
- "p90": 126.43200159072876,
- "p95": 127.32799351215363,
- "p99": 132.47999548912048
+ "p50": 490.27198553085327,
+ "p90": 511.1039876937866,
+ "p95": 517.2799825668335,
+ "p99": 531.1999917030334
},
"isolatedSum": {
- "p50": 128.06399539113045,
- "p90": 141.9520005583763,
- "p95": 146.11200243234634,
- "p99": 162.3999997973442
+ "p50": 523.3920067548752,
+ "p90": 547.6159974932671,
+ "p95": 555.072009563446,
+ "p99": 571.7440098524094
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 6537216,
- "combineLogicalBytes": 6537216,
- "fanoutMean": 3.5625,
- "recvTokensMax": 127,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 4960256,
+ "combineLogicalBytes": 9920512,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 92,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -9678,35 +9750,35 @@
"tokensPerRank": 32,
"globalTokens": 256,
"dispatch": {
- "p50": 74.43200051784515,
- "p90": 77.37600058317184,
- "p95": 78.17599922418594,
- "p99": 81.4720019698143
+ "p50": 438.84798884391785,
+ "p90": 456.6720128059387,
+ "p95": 461.760014295578,
+ "p99": 474.3039906024933
},
"combine": {
- "p50": 77.72800326347351,
- "p90": 78.52800190448761,
- "p95": 78.68800312280655,
- "p99": 90.68799763917923
+ "p50": 87.16800063848495,
+ "p90": 93.79199892282486,
+ "p95": 97.75999933481216,
+ "p99": 103.93600165843964
},
"roundtrip": {
- "p50": 129.2479932308197,
- "p90": 133.59999656677246,
- "p95": 134.8160058259964,
- "p99": 141.63200557231903
+ "p50": 489.984005689621,
+ "p90": 509.7919702529907,
+ "p95": 515.1360034942627,
+ "p99": 524.511992931366
},
"isolatedSum": {
- "p50": 152.16000378131866,
- "p90": 155.90400248765945,
- "p95": 156.8640023469925,
- "p99": 172.15999960899353
+ "p50": 526.0159894824028,
+ "p90": 550.4640117287636,
+ "p95": 559.5200136303902,
+ "p99": 578.2399922609329
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 12859392,
- "combineLogicalBytes": 12859392,
- "fanoutMean": 3.50390625,
- "recvTokensMax": 255,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 9863168,
+ "combineLogicalBytes": 19726336,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 182,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -9715,35 +9787,35 @@
"tokensPerRank": 64,
"globalTokens": 512,
"dispatch": {
- "p50": 79.52000200748444,
- "p90": 84.86399799585342,
- "p95": 85.69599688053131,
- "p99": 103.00800204277039
+ "p50": 432.73600935935974,
+ "p90": 452.5119960308075,
+ "p95": 458.9439928531647,
+ "p99": 469.92000937461853
},
"combine": {
- "p50": 90.68799763917923,
- "p90": 92.22400188446045,
- "p95": 93.72799843549728,
- "p99": 102.27199643850327
+ "p50": 101.47199779748917,
+ "p90": 107.90400207042694,
+ "p95": 109.82400178909302,
+ "p99": 115.7120019197464
},
"roundtrip": {
- "p50": 158.07999670505524,
- "p90": 162.84799575805664,
- "p95": 163.68000209331512,
- "p99": 179.80800569057465
+ "p50": 508.54402780532837,
+ "p90": 527.3920297622681,
+ "p95": 533.1199765205383,
+ "p99": 546.8479990959167
},
"isolatedSum": {
- "p50": 170.20799964666367,
- "p90": 177.08799988031387,
- "p95": 179.4239953160286,
- "p99": 205.27999848127365
+ "p50": 534.2080071568489,
+ "p90": 560.4159981012344,
+ "p95": 568.7679946422577,
+ "p99": 585.6320112943649
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 25145344,
- "combineLogicalBytes": 25145344,
- "fanoutMean": 3.42578125,
- "recvTokensMax": 510,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 19496960,
+ "combineLogicalBytes": 38993920,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 367,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -9752,35 +9824,35 @@
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 102.52799838781357,
- "p90": 109.79200154542923,
- "p95": 111.68000102043152,
- "p99": 126.5919953584671
+ "p50": 440.19201397895813,
+ "p90": 462.1120095252991,
+ "p95": 466.75199270248413,
+ "p99": 481.7279875278473
},
"combine": {
- "p50": 126.3359934091568,
- "p90": 127.55200266838074,
- "p95": 127.74400413036346,
- "p99": 134.3040019273758
+ "p50": 120.64000219106674,
+ "p90": 127.71199643611908,
+ "p95": 130.94399869441986,
+ "p99": 136.63999736309052
},
"roundtrip": {
- "p50": 209.50399339199066,
- "p90": 217.3759937286377,
- "p95": 220.92799842357635,
- "p99": 231.55200481414795
+ "p50": 521.664023399353,
+ "p90": 539.0080213546753,
+ "p95": 542.0799851417542,
+ "p99": 557.5680136680603
},
"isolatedSum": {
- "p50": 228.86399179697037,
- "p90": 237.34400421380997,
- "p95": 239.42400515079498,
- "p99": 260.8959972858429
+ "p50": 560.8320161700249,
+ "p90": 589.8240059614182,
+ "p95": 597.695991396904,
+ "p99": 618.3679848909378
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 49946624,
- "combineLogicalBytes": 49946624,
- "fanoutMean": 3.40234375,
- "recvTokensMax": 1022,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 38836224,
+ "combineLogicalBytes": 77672448,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 723,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -9788,47 +9860,48 @@
]
},
{
- "id": "cx-b49699d8",
- "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836",
- "colorKey": "b300_4eade0db",
- "comparisonKey": "4a0af3f3eb467c05",
+ "id": "cx-b9eefa34",
+ "identity": "gb200|deepep|v1|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||6d507ec2ec8998f",
+ "colorKey": "gb200_b0118480",
+ "comparisonKey": "b951703b8acd7879",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:49:28.247452+00:00",
+ "generatedAt": "2026-06-29T13:52:42.590848+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_14",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "runtime-visible-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · zipf-moderate+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
+ "label": "GB200 EP8 · deepep · fp8",
+ "model": "Kimi-K2",
"shape": {
"hidden": 7168,
"topk": 8,
- "experts": 288,
- "routing": "zipf-moderate",
- "routingLabel": "zipf-moderate+eplb",
+ "experts": 384,
+ "routing": "uniform",
+ "routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
+ "eplbEnabled": false,
+ "dispatchDtype": "fp8",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -9836,59 +9909,59 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "a8f501af7004836",
- "workloadId": "set:8:120a8dc1dba92ca9",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 4.927734375,
- "eplbImbalanceAfter": 1.0006103515625,
- "backendVersion": "1.2.1",
+ "traceSignature": "6d507ec2ec8998f",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": null,
+ "eplbImbalanceAfter": null,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285643524",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285643524",
- "createdAt": "2026-06-27T09:49:28.247452+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 1,
"globalTokens": 8,
"dispatch": {
- "p50": 57.8560009598732,
- "p90": 60.32000109553337,
- "p95": 62.55999952554703,
- "p99": 75.26399940252304
+ "p50": 355.29598593711853,
+ "p90": 370.7199990749359,
+ "p95": 376.15999579429626,
+ "p99": 386.4000141620636
},
"combine": {
- "p50": 66.01600348949432,
- "p90": 66.39999896287918,
- "p95": 66.56000018119812,
- "p99": 69.82400268316269
+ "p50": 71.26399874687195,
+ "p90": 76.06399804353714,
+ "p95": 78.46400141716003,
+ "p99": 82.71999657154083
},
"roundtrip": {
- "p50": 107.77600109577179,
- "p90": 113.98400366306305,
- "p95": 115.10399729013443,
- "p99": 123.80799651145935
+ "p50": 395.04000544548035,
+ "p90": 405.69600462913513,
+ "p95": 407.74399042129517,
+ "p99": 414.5919978618622
},
"isolatedSum": {
- "p50": 123.87200444936752,
- "p90": 126.72000005841255,
- "p95": 129.11999970674515,
- "p99": 145.08800208568573
+ "p50": 426.5599846839905,
+ "p90": 446.78399711847305,
+ "p95": 454.6239972114563,
+ "p99": 469.12001073360443
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 616448,
- "fanoutMean": 5.375,
- "recvTokensMax": 7,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 301056,
+ "combineLogicalBytes": 602112,
+ "fanoutMean": 5.25,
+ "recvTokensMax": 8,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -9897,35 +9970,35 @@
"tokensPerRank": 2,
"globalTokens": 16,
"dispatch": {
- "p50": 57.95200169086456,
- "p90": 59.55199897289276,
- "p95": 61.02399900555611,
- "p99": 63.35999816656113
+ "p50": 353.0240058898926,
+ "p90": 365.9200072288513,
+ "p95": 368.0959939956665,
+ "p99": 376.800000667572
},
"combine": {
- "p50": 66.23999774456024,
- "p90": 67.9360032081604,
- "p95": 69.11999732255936,
- "p99": 78.20799946784973
+ "p50": 72.80000299215317,
+ "p90": 77.98399776220322,
+ "p95": 81.34400099515915,
+ "p99": 86.33600175380707
},
"roundtrip": {
- "p50": 107.00800269842148,
- "p90": 109.21599715948105,
- "p95": 111.13599687814713,
- "p99": 122.27199971675873
+ "p50": 400.60800313949585,
+ "p90": 412.06398606300354,
+ "p95": 416.8640077114105,
+ "p99": 427.7760088443756
},
"isolatedSum": {
- "p50": 124.1919994354248,
- "p90": 127.48800218105316,
- "p95": 130.14399632811546,
- "p99": 141.56799763441086
+ "p50": 425.82400888204575,
+ "p90": 443.90400499105453,
+ "p95": 449.43999499082565,
+ "p99": 463.1360024213791
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1204224,
- "combineLogicalBytes": 1204224,
- "fanoutMean": 5.25,
+ "dispatchLogicalBytes": 609280,
+ "combineLogicalBytes": 1218560,
+ "fanoutMean": 5.3125,
"recvTokensMax": 14,
- "stragglerRank": 4,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -9934,35 +10007,35 @@
"tokensPerRank": 4,
"globalTokens": 32,
"dispatch": {
- "p50": 59.74400043487549,
- "p90": 61.85600161552429,
- "p95": 63.968002796173096,
- "p99": 73.44000041484833
+ "p50": 359.26398634910583,
+ "p90": 388.92799615859985,
+ "p95": 398.5919952392578,
+ "p99": 414.5919978618622
},
"combine": {
- "p50": 67.55200028419495,
- "p90": 69.24799829721451,
- "p95": 71.99999690055847,
- "p99": 77.72800326347351
+ "p50": 75.16799867153168,
+ "p90": 80.64000308513641,
+ "p95": 83.48800241947174,
+ "p99": 88.86399865150452
},
"roundtrip": {
- "p50": 118.27199906110764,
- "p90": 124.70400333404541,
- "p95": 126.20800733566284,
- "p99": 130.11200726032257
+ "p50": 406.3360095024109,
+ "p90": 429.82399463653564,
+ "p95": 435.07200479507446,
+ "p99": 447.07199931144714
},
"isolatedSum": {
- "p50": 127.29600071907043,
- "p90": 131.1039999127388,
- "p95": 135.96799969673157,
- "p99": 151.16800367832184
+ "p50": 434.4319850206375,
+ "p90": 469.56799924373627,
+ "p95": 482.07999765872955,
+ "p99": 503.4559965133667
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2394112,
- "combineLogicalBytes": 2394112,
- "fanoutMean": 5.21875,
- "recvTokensMax": 24,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 1204224,
+ "combineLogicalBytes": 2408448,
+ "fanoutMean": 5.25,
+ "recvTokensMax": 26,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -9971,35 +10044,35 @@
"tokensPerRank": 8,
"globalTokens": 64,
"dispatch": {
- "p50": 60.22400036454201,
- "p90": 62.20800057053566,
- "p95": 64.19199705123901,
- "p99": 81.11999928951263
+ "p50": 355.8720052242279,
+ "p90": 369.8880076408386,
+ "p95": 374.752014875412,
+ "p99": 403.4239947795868
},
"combine": {
- "p50": 68.06399673223495,
- "p90": 70.0799971818924,
- "p95": 77.15199887752533,
- "p99": 79.16799932718277
+ "p50": 76.38400048017502,
+ "p90": 81.50400221347809,
+ "p95": 85.56800335645676,
+ "p99": 93.91999989748001
},
"roundtrip": {
- "p50": 123.00799787044525,
- "p90": 127.10399925708771,
- "p95": 127.68000364303589,
- "p99": 130.46400249004364
+ "p50": 403.6799967288971,
+ "p90": 417.05599427223206,
+ "p95": 421.63199186325073,
+ "p99": 437.3759925365448
},
"isolatedSum": {
- "p50": 128.28799709677696,
- "p90": 132.28799775242805,
- "p95": 141.34399592876434,
- "p99": 160.2879986166954
+ "p50": 432.2560057044029,
+ "p90": 451.3920098543167,
+ "p95": 460.32001823186874,
+ "p99": 497.3439946770668
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4630528,
- "combineLogicalBytes": 4630528,
- "fanoutMean": 5.046875,
- "recvTokensMax": 45,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 2415616,
+ "combineLogicalBytes": 4831232,
+ "fanoutMean": 5.265625,
+ "recvTokensMax": 48,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -10008,35 +10081,35 @@
"tokensPerRank": 16,
"globalTokens": 128,
"dispatch": {
- "p50": 65.95200300216675,
- "p90": 69.72800195217133,
- "p95": 72.41600006818771,
- "p99": 76.09599828720093
+ "p50": 354.8159897327423,
+ "p90": 369.6640133857727,
+ "p95": 374.5279908180237,
+ "p99": 380.22398948669434
},
"combine": {
- "p50": 68.70400160551071,
- "p90": 77.63200253248215,
- "p95": 78.3040001988411,
- "p99": 92.0960009098053
+ "p50": 78.14399898052216,
+ "p90": 82.56000280380249,
+ "p95": 85.82399785518646,
+ "p99": 90.27200192213058
},
"roundtrip": {
- "p50": 120.99199742078781,
- "p90": 126.08000636100769,
- "p95": 127.20000743865967,
- "p99": 143.13599467277527
+ "p50": 402.3680090904236,
+ "p90": 413.91998529434204,
+ "p95": 418.14398765563965,
+ "p99": 423.96798729896545
},
"isolatedSum": {
- "p50": 134.65600460767746,
- "p90": 147.36000448465347,
- "p95": 150.7200002670288,
- "p99": 168.19199919700623
+ "p50": 432.95998871326447,
+ "p90": 452.2240161895752,
+ "p95": 460.35198867321014,
+ "p99": 470.4959914088249
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9447424,
- "combineLogicalBytes": 9447424,
- "fanoutMean": 5.1484375,
+ "dispatchLogicalBytes": 4924416,
+ "combineLogicalBytes": 9848832,
+ "fanoutMean": 5.3671875,
"recvTokensMax": 91,
- "stragglerRank": 3,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -10045,35 +10118,35 @@
"tokensPerRank": 32,
"globalTokens": 256,
"dispatch": {
- "p50": 70.11199742555618,
- "p90": 74.43200051784515,
- "p95": 75.42400062084198,
- "p99": 80.25600016117096
+ "p50": 353.66401076316833,
+ "p90": 369.2159950733185,
+ "p95": 373.53599071502686,
+ "p99": 381.1520040035248
},
"combine": {
- "p50": 78.40000092983246,
- "p90": 79.1039988398552,
- "p95": 80.12799918651581,
- "p99": 90.46400338411331
+ "p50": 83.39200168848038,
+ "p90": 88.60799670219421,
+ "p95": 91.61599725484848,
+ "p99": 96.8639999628067
},
"roundtrip": {
- "p50": 133.37600231170654,
- "p90": 137.2160017490387,
- "p95": 138.20800185203552,
- "p99": 140.86399972438812
+ "p50": 408.9280068874359,
+ "p90": 420.54399847984314,
+ "p95": 423.93600940704346,
+ "p99": 454.23999428749084
},
"isolatedSum": {
- "p50": 148.51199835538864,
- "p90": 153.53599935770035,
- "p95": 155.5519998073578,
- "p99": 170.72000354528427
+ "p50": 437.0560124516487,
+ "p90": 457.8239917755127,
+ "p95": 465.15198796987534,
+ "p99": 478.0160039663315
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19023872,
- "combineLogicalBytes": 19023872,
- "fanoutMean": 5.18359375,
+ "dispatchLogicalBytes": 9748480,
+ "combineLogicalBytes": 19496960,
+ "fanoutMean": 5.3125,
"recvTokensMax": 178,
- "stragglerRank": 4,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -10082,35 +10155,35 @@
"tokensPerRank": 64,
"globalTokens": 512,
"dispatch": {
- "p50": 88.44800293445587,
- "p90": 90.7519981265068,
- "p95": 91.51999652385712,
- "p99": 95.32800316810608
+ "p50": 365.34398794174194,
+ "p90": 422.87999391555786,
+ "p95": 429.3760061264038,
+ "p99": 442.30398535728455
},
"combine": {
- "p50": 92.3520028591156,
- "p90": 101.24800354242325,
- "p95": 101.82400047779083,
- "p99": 115.07199704647064
+ "p50": 100.44799745082855,
+ "p90": 105.8880016207695,
+ "p95": 108.38399827480316,
+ "p99": 115.42399972677231
},
"roundtrip": {
- "p50": 161.40800714492798,
- "p90": 165.3759926557541,
- "p95": 166.72000288963318,
- "p99": 173.21600019931793
+ "p50": 439.10399079322815,
+ "p90": 491.10400676727295,
+ "p95": 499.424010515213,
+ "p99": 508.4480047225952
},
"isolatedSum": {
- "p50": 180.80000579357147,
- "p90": 192.00000166893005,
- "p95": 193.34399700164795,
- "p99": 210.40000021457672
+ "p50": 465.7919853925705,
+ "p90": 528.7679955363274,
+ "p95": 537.760004401207,
+ "p99": 557.7279850840569
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38148096,
- "combineLogicalBytes": 38148096,
- "fanoutMean": 5.197265625,
- "recvTokensMax": 350,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 19418112,
+ "combineLogicalBytes": 38836224,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 372,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -10119,35 +10192,35 @@
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 94.81599926948547,
- "p90": 97.63199836015701,
- "p95": 99.80800002813339,
- "p99": 106.84800148010254
+ "p50": 355.29598593711853,
+ "p90": 372.8640079498291,
+ "p95": 379.39199805259705,
+ "p99": 402.8159976005554
},
"combine": {
- "p50": 115.23199826478958,
- "p90": 116.12799763679504,
- "p95": 117.3119992017746,
- "p99": 179.83999848365784
+ "p50": 119.23199892044067,
+ "p90": 124.03199821710587,
+ "p95": 126.68800354003906,
+ "p99": 130.52800297737122
},
"roundtrip": {
- "p50": 193.53599846363068,
- "p90": 199.16799664497375,
- "p95": 200.41599869728088,
- "p99": 207.48800039291382
+ "p50": 448.3200013637543,
+ "p90": 460.5120122432709,
+ "p95": 463.03999423980713,
+ "p99": 469.760000705719
},
"isolatedSum": {
- "p50": 210.04799753427505,
- "p90": 213.75999599695206,
- "p95": 217.119999229908,
- "p99": 286.6879999637604
+ "p50": 474.5279848575592,
+ "p90": 496.89600616693497,
+ "p95": 506.0800015926361,
+ "p99": 533.3440005779266
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 76955648,
- "combineLogicalBytes": 76955648,
- "fanoutMean": 5.2421875,
- "recvTokensMax": 687,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 38757376,
+ "combineLogicalBytes": 77514752,
+ "fanoutMean": 5.2802734375,
+ "recvTokensMax": 707,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -10155,47 +10228,48 @@
]
},
{
- "id": "cx-686fd558",
- "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836",
- "colorKey": "b300_f1ea991b",
- "comparisonKey": "72d679cfb4846306",
+ "id": "cx-8173576b",
+ "identity": "gb200|deepep|v1|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510",
+ "colorKey": "gb200_7e970144",
+ "comparisonKey": "04982d471558d8a8",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:48:52.585093+00:00",
+ "generatedAt": "2026-06-29T13:50:48.119671+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_02",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "cached-layout-comm-only-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · zipf+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
+ "label": "GB200 EP8 · deepep · fp8 [cl]",
+ "model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
- "experts": 288,
- "routing": "zipf",
- "routingLabel": "zipf+eplb",
+ "experts": 256,
+ "routing": "uniform",
+ "routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
+ "eplbEnabled": false,
+ "dispatchDtype": "fp8",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -10203,59 +10277,59 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "a8f501af7004836",
- "workloadId": "set:8:f5576e2b712d38c3",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 4.927734375,
- "eplbImbalanceAfter": 1.0006103515625,
- "backendVersion": "1.2.1",
+ "traceSignature": "74444524b5db510",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": null,
+ "eplbImbalanceAfter": null,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285622991",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285622991",
- "createdAt": "2026-06-27T09:48:52.585093+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 1,
"globalTokens": 8,
"dispatch": {
- "p50": 58.01599845290184,
- "p90": 59.58399921655655,
- "p95": 60.896001756191254,
- "p99": 72.35199958086014
+ "p50": 69.11999732255936,
+ "p90": 79.16799932718277,
+ "p95": 82.40000158548355,
+ "p99": 91.20000153779984
},
"combine": {
- "p50": 66.17599725723267,
- "p90": 66.880002617836,
- "p95": 67.4239993095398,
- "p99": 80.73599636554718
+ "p50": 71.42399996519089,
+ "p90": 75.52000135183334,
+ "p95": 79.8719972372055,
+ "p99": 84.3840017914772
},
"roundtrip": {
- "p50": 107.55199939012527,
- "p90": 113.56800049543381,
- "p95": 114.9120032787323,
- "p99": 131.8719983100891
+ "p50": 202.4639993906021,
+ "p90": 214.9440050125122,
+ "p95": 218.49599480628967,
+ "p99": 231.83999955654144
},
"isolatedSum": {
- "p50": 124.1919957101345,
- "p90": 126.46400183439255,
- "p95": 128.32000106573105,
- "p99": 153.08799594640732
+ "p50": 140.54399728775024,
+ "p90": 154.6880006790161,
+ "p95": 162.27199882268906,
+ "p99": 175.58400332927704
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 616448,
- "fanoutMean": 5.375,
+ "dispatchLogicalBytes": 315392,
+ "combineLogicalBytes": 630784,
+ "fanoutMean": 5.5,
"recvTokensMax": 7,
- "stragglerRank": 4,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -10264,35 +10338,35 @@
"tokensPerRank": 2,
"globalTokens": 16,
"dispatch": {
- "p50": 58.27200040221214,
- "p90": 59.99999865889549,
- "p95": 60.7680007815361,
- "p99": 74.0479975938797
+ "p50": 69.31199878454208,
+ "p90": 78.33600044250488,
+ "p95": 81.63200318813324,
+ "p99": 88.51200342178345
},
"combine": {
- "p50": 66.59200042486191,
- "p90": 67.52000004053116,
- "p95": 68.64000111818314,
- "p99": 70.88000327348709
+ "p50": 72.28799909353256,
+ "p90": 76.31999999284744,
+ "p95": 78.5600021481514,
+ "p99": 82.56000280380249
},
"roundtrip": {
- "p50": 107.744000852108,
- "p90": 109.79200154542923,
- "p95": 111.29599809646606,
- "p99": 121.72800302505493
+ "p50": 203.0079960823059,
+ "p90": 215.55200219154358,
+ "p95": 218.6560034751892,
+ "p99": 230.0799936056137
},
"isolatedSum": {
- "p50": 124.86400082707405,
- "p90": 127.51999869942665,
- "p95": 129.40800189971924,
- "p99": 144.9280008673668
+ "p50": 141.59999787807465,
+ "p90": 154.65600043535233,
+ "p95": 160.19200533628464,
+ "p99": 171.07200622558594
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1204224,
- "combineLogicalBytes": 1204224,
- "fanoutMean": 5.25,
- "recvTokensMax": 14,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 616448,
+ "combineLogicalBytes": 1232896,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 13,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -10301,35 +10375,35 @@
"tokensPerRank": 4,
"globalTokens": 32,
"dispatch": {
- "p50": 61.664000153541565,
- "p90": 82.40000158548355,
- "p95": 86.36800199747086,
- "p99": 92.92799979448318
+ "p50": 70.01599669456482,
+ "p90": 79.48800176382065,
+ "p95": 82.49600231647491,
+ "p99": 96.09600156545639
},
"combine": {
- "p50": 68.54400038719177,
- "p90": 76.4160007238388,
- "p95": 77.15199887752533,
- "p99": 79.29600030183792
+ "p50": 74.94399696588516,
+ "p90": 80.03199845552444,
+ "p95": 83.23200047016144,
+ "p99": 87.2960016131401
},
"roundtrip": {
- "p50": 124.15999919176102,
- "p90": 127.36000120639801,
- "p95": 128.48000228405,
- "p99": 144.57599818706512
+ "p50": 207.64799416065216,
+ "p90": 219.55199539661407,
+ "p95": 222.59199619293213,
+ "p99": 229.40799593925476
},
"isolatedSum": {
- "p50": 130.20800054073334,
- "p90": 158.81600230932236,
- "p95": 163.52000087499619,
- "p99": 172.2240000963211
+ "p50": 144.95999366044998,
+ "p90": 159.5200002193451,
+ "p95": 165.72800278663635,
+ "p99": 183.3920031785965
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2394112,
- "combineLogicalBytes": 2394112,
- "fanoutMean": 5.21875,
- "recvTokensMax": 24,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 1240064,
+ "combineLogicalBytes": 2480128,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 29,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -10338,35 +10412,35 @@
"tokensPerRank": 8,
"globalTokens": 64,
"dispatch": {
- "p50": 60.32000109553337,
- "p90": 62.49599903821945,
- "p95": 64.00000303983688,
- "p99": 79.23199981451035
+ "p50": 70.17599791288376,
+ "p90": 79.71200346946716,
+ "p95": 82.5280025601387,
+ "p99": 91.23200178146362
},
"combine": {
- "p50": 68.44799965620041,
- "p90": 77.44000107049942,
- "p95": 78.23999971151352,
- "p99": 79.55200225114822
+ "p50": 76.1599987745285,
+ "p90": 82.04799890518188,
+ "p95": 84.6719965338707,
+ "p99": 94.81599926948547
},
"roundtrip": {
- "p50": 121.44000083208084,
- "p90": 126.43200159072876,
- "p95": 127.61600315570831,
- "p99": 132.03200697898865
+ "p50": 207.39200711250305,
+ "p90": 218.36799383163452,
+ "p95": 222.01600670814514,
+ "p99": 227.03999280929565
},
"isolatedSum": {
- "p50": 128.76800075173378,
- "p90": 139.93600010871887,
- "p95": 142.2400027513504,
- "p99": 158.78400206565857
+ "p50": 146.33599668741226,
+ "p90": 161.76000237464905,
+ "p95": 167.1999990940094,
+ "p99": 186.0480010509491
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4630528,
- "combineLogicalBytes": 4630528,
- "fanoutMean": 5.046875,
- "recvTokensMax": 45,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 2487296,
+ "combineLogicalBytes": 4974592,
+ "fanoutMean": 5.421875,
+ "recvTokensMax": 47,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -10375,35 +10449,35 @@
"tokensPerRank": 16,
"globalTokens": 128,
"dispatch": {
- "p50": 66.78400188684464,
- "p90": 73.11999797821045,
- "p95": 74.81600344181061,
- "p99": 79.19999957084656
+ "p50": 70.97599655389786,
+ "p90": 80.48000186681747,
+ "p95": 83.96799862384796,
+ "p99": 99.61599856615067
},
"combine": {
- "p50": 77.40800082683563,
- "p90": 78.72000336647034,
- "p95": 79.16799932718277,
- "p99": 81.50400221347809
+ "p50": 78.23999971151352,
+ "p90": 83.5840031504631,
+ "p95": 86.56000345945358,
+ "p99": 91.00800007581711
},
"roundtrip": {
- "p50": 120.64000219106674,
- "p90": 123.77600371837616,
- "p95": 125.95200538635254,
- "p99": 143.0719941854477
+ "p50": 209.75999534130096,
+ "p90": 221.0559993982315,
+ "p95": 223.36000204086304,
+ "p99": 232.35200345516205
},
"isolatedSum": {
- "p50": 144.19200271368027,
- "p90": 151.8400013446808,
- "p95": 153.98400276899338,
- "p99": 160.70400178432465
+ "p50": 149.21599626541138,
+ "p90": 164.06400501728058,
+ "p95": 170.52800208330154,
+ "p99": 190.62399864196777
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9447424,
- "combineLogicalBytes": 9447424,
- "fanoutMean": 5.1484375,
- "recvTokensMax": 91,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 4960256,
+ "combineLogicalBytes": 9920512,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 92,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -10412,35 +10486,35 @@
"tokensPerRank": 32,
"globalTokens": 256,
"dispatch": {
- "p50": 70.20799815654755,
- "p90": 72.95999675989151,
- "p95": 76.48000121116638,
- "p99": 82.94399827718735
+ "p50": 73.69600236415863,
+ "p90": 82.49600231647491,
+ "p95": 85.88799834251404,
+ "p99": 92.92799979448318
},
"combine": {
- "p50": 78.78399640321732,
- "p90": 79.80799674987793,
- "p95": 80.99199831485748,
- "p99": 89.91999924182892
+ "p50": 83.0719992518425,
+ "p90": 88.128000497818,
+ "p95": 91.07200056314468,
+ "p99": 95.16800194978714
},
"roundtrip": {
- "p50": 133.12000036239624,
- "p90": 137.7280056476593,
- "p95": 138.36799561977386,
- "p99": 155.10399639606476
+ "p50": 214.62400257587433,
+ "p90": 225.92000663280487,
+ "p95": 229.5680046081543,
+ "p99": 237.5359982252121
},
"isolatedSum": {
- "p50": 148.99199455976486,
- "p90": 152.76799350976944,
- "p95": 157.47199952602386,
- "p99": 172.86399751901627
+ "p50": 156.76800161600113,
+ "p90": 170.6240028142929,
+ "p95": 176.95999890565872,
+ "p99": 188.09600174427032
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19023872,
- "combineLogicalBytes": 19023872,
- "fanoutMean": 5.18359375,
- "recvTokensMax": 178,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 9863168,
+ "combineLogicalBytes": 19726336,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 182,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -10449,35 +10523,35 @@
"tokensPerRank": 64,
"globalTokens": 512,
"dispatch": {
- "p50": 89.66399729251862,
- "p90": 91.93599969148636,
- "p95": 92.8959995508194,
- "p99": 102.30399668216705
+ "p50": 82.14399963617325,
+ "p90": 90.4960036277771,
+ "p95": 93.63199770450592,
+ "p99": 100.25600343942642
},
"combine": {
- "p50": 92.06400066614151,
- "p90": 96.67199850082397,
- "p95": 101.72799974679947,
- "p99": 103.7760004401207
+ "p50": 98.4639972448349,
+ "p90": 104.16000336408615,
+ "p95": 107.16799646615982,
+ "p99": 111.55200004577637
},
"roundtrip": {
- "p50": 161.6320013999939,
- "p90": 165.43999314308167,
- "p95": 166.52800142765045,
- "p99": 182.68799781799316
+ "p50": 232.96000063419342,
+ "p90": 243.8720017671585,
+ "p95": 249.6960014104843,
+ "p99": 258.6880028247833
},
"isolatedSum": {
- "p50": 181.72799795866013,
- "p90": 188.60799819231033,
- "p95": 194.62399929761887,
- "p99": 206.07999712228775
+ "p50": 180.60799688100815,
+ "p90": 194.65600699186325,
+ "p95": 200.79999417066574,
+ "p99": 211.8080034852028
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38148096,
- "combineLogicalBytes": 38148096,
- "fanoutMean": 5.197265625,
- "recvTokensMax": 350,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 19496960,
+ "combineLogicalBytes": 38993920,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 367,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -10486,35 +10560,35 @@
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 94.94400024414062,
- "p90": 97.4079966545105,
- "p95": 100.00000149011612,
- "p99": 108.96000266075134
+ "p50": 93.21600198745728,
+ "p90": 99.10400211811066,
+ "p95": 102.08000242710114,
+ "p99": 108.15999656915665
},
"combine": {
- "p50": 115.58400094509125,
- "p90": 117.21599847078323,
- "p95": 118.56000125408173,
- "p99": 138.3039951324463
+ "p50": 117.72800236940384,
+ "p90": 122.30399996042252,
+ "p95": 125.82400441169739,
+ "p99": 130.68799674510956
},
"roundtrip": {
- "p50": 197.2160041332245,
- "p90": 202.39999890327454,
- "p95": 204.0960043668747,
- "p99": 209.6640020608902
+ "p50": 253.6959946155548,
+ "p90": 265.1520073413849,
+ "p95": 268.0639922618866,
+ "p99": 276.2880027294159
},
"isolatedSum": {
- "p50": 210.52800118923187,
- "p90": 214.62399512529373,
- "p95": 218.56000274419785,
- "p99": 247.26399779319763
+ "p50": 210.94400435686111,
+ "p90": 221.40800207853317,
+ "p95": 227.90400683879852,
+ "p99": 238.8479933142662
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 76955648,
- "combineLogicalBytes": 76955648,
- "fanoutMean": 5.2421875,
- "recvTokensMax": 687,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 38836224,
+ "combineLogicalBytes": 77672448,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 723,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -10522,28 +10596,28 @@
]
},
{
- "id": "cx-f0dd83d8",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176",
- "colorKey": "b300_c1ad910f",
- "comparisonKey": "80e2eefb7447672f",
+ "id": "cx-7963bbb8",
+ "identity": "gb200|deepep|v1|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510",
+ "colorKey": "gb200_1849fea1",
+ "comparisonKey": "b109657d01c98324",
"schemaVersion": 3,
- "generatedAt": "2026-06-26T17:41:08.828331+00:00",
+ "generatedAt": "2026-06-29T13:56:37.073274+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_15",
- "sku": "b300",
+ "publicationStatus": "diagnostic",
+ "runner": "gb200-nv_1",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
+ "mode": "ll",
+ "resourceMode": "tuned",
+ "suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 (norm)",
+ "label": "GB200 EP8 · deepep · fp8 LL",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
@@ -10554,74 +10628,75 @@
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "bf16",
+ "dispatchDtype": "fp8",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1824,
- "configuredUnits": 27,
- "deviceUnits": 148,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
+ "requestedFraction": null,
+ "achievedFraction": null,
+ "configuredUnits": null,
+ "deviceUnits": 152,
+ "resourceClass": "fixed-kernel",
+ "conformanceClass": "not-applicable",
+ "fixedKernel": true,
+ "paretoEligible": false
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "74444524b5db510",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28254469772",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254469772",
- "createdAt": "2026-06-26T17:41:08.828331+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 1,
"globalTokens": 8,
"dispatch": {
- "p50": 56.992001831531525,
- "p90": 59.039998799562454,
- "p95": 61.824001371860504,
- "p99": 73.44000041484833
+ "p50": 79.74400371313095,
+ "p90": 300.6080090999603,
+ "p95": 342.272013425827,
+ "p99": 381.53600692749023
},
"combine": {
- "p50": 66.3359984755516,
- "p90": 67.4239993095398,
- "p95": 68.15999746322632,
- "p99": 77.47200131416321
+ "p50": 115.4559999704361,
+ "p90": 120.7360029220581,
+ "p95": 122.36800044775009,
+ "p99": 124.41600114107132
},
"roundtrip": {
- "p50": 106.81600123643875,
- "p90": 113.08799684047699,
- "p95": 114.23999816179276,
- "p99": 135.6479972600937
+ "p50": 1608.9279651641846,
+ "p90": 1614.2079830169678,
+ "p95": 1618.5280084609985,
+ "p99": 1940.4159784317017
},
"isolatedSum": {
- "p50": 123.32800030708313,
- "p90": 126.46399810910225,
- "p95": 129.98399883508682,
- "p99": 150.91200172901154
+ "p50": 195.20000368356705,
+ "p90": 421.34401202201843,
+ "p95": 464.6400138735771,
+ "p99": 505.95200806856155
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
+ "dispatchLogicalBytes": 315392,
"combineLogicalBytes": 630784,
"fanoutMean": 5.5,
- "recvTokensMax": 7,
+ "recvTokensMax": 14,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
@@ -10631,34 +10706,34 @@
"tokensPerRank": 2,
"globalTokens": 16,
"dispatch": {
- "p50": 56.992001831531525,
- "p90": 58.78400057554245,
- "p95": 60.92799827456474,
- "p99": 73.21599870920181
+ "p50": 84.60800349712372,
+ "p90": 295.5839931964874,
+ "p95": 336.9919955730438,
+ "p99": 394.0800130367279
},
"combine": {
- "p50": 67.32799857854843,
- "p90": 69.11999732255936,
- "p95": 70.65600156784058,
- "p99": 79.93599772453308
+ "p50": 117.08799749612808,
+ "p90": 121.98399752378464,
+ "p95": 123.45600128173828,
+ "p99": 128.25599312782288
},
"roundtrip": {
- "p50": 106.9440022110939,
- "p90": 109.40799862146378,
- "p95": 110.88000237941742,
- "p99": 119.39200013875961
+ "p50": 1611.2960577011108,
+ "p90": 1616.6399717330933,
+ "p95": 1622.0159530639648,
+ "p99": 1949.6959447860718
},
"isolatedSum": {
- "p50": 124.32000041007996,
- "p90": 127.9039978981018,
- "p95": 131.58399984240532,
- "p99": 153.1519964337349
+ "p50": 201.6960009932518,
+ "p90": 417.56799072027206,
+ "p95": 460.4479968547821,
+ "p99": 522.3360061645508
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
+ "dispatchLogicalBytes": 616448,
"combineLogicalBytes": 1232896,
"fanoutMean": 5.375,
- "recvTokensMax": 13,
+ "recvTokensMax": 21,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
@@ -10668,34 +10743,34 @@
"tokensPerRank": 4,
"globalTokens": 32,
"dispatch": {
- "p50": 57.792000472545624,
- "p90": 59.39200147986412,
- "p95": 61.28000095486641,
- "p99": 68.09599697589874
+ "p50": 87.80799806118011,
+ "p90": 282.01600909233093,
+ "p95": 331.32800459861755,
+ "p99": 385.18399000167847
},
"combine": {
- "p50": 67.80800223350525,
- "p90": 69.66400146484375,
- "p95": 76.99199765920639,
- "p99": 78.75200361013412
+ "p50": 120.06399780511856,
+ "p90": 125.18399953842163,
+ "p95": 127.13600695133209,
+ "p99": 131.071999669075
},
"roundtrip": {
- "p50": 116.22399836778641,
- "p90": 122.68800288438797,
- "p95": 124.35200065374374,
- "p99": 127.93600559234619
+ "p50": 1614.8799657821655,
+ "p90": 1622.2399473190308,
+ "p95": 1897.3759412765503,
+ "p99": 1955.23202419281
},
"isolatedSum": {
- "p50": 125.60000270605087,
- "p90": 129.05600294470787,
- "p95": 138.2719986140728,
- "p99": 146.84800058603287
+ "p50": 207.87199586629868,
+ "p90": 407.20000863075256,
+ "p95": 458.46401154994965,
+ "p99": 516.2559896707535
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
+ "dispatchLogicalBytes": 1240064,
"combineLogicalBytes": 2480128,
"fanoutMean": 5.40625,
- "recvTokensMax": 29,
+ "recvTokensMax": 39,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
@@ -10705,34 +10780,34 @@
"tokensPerRank": 8,
"globalTokens": 64,
"dispatch": {
- "p50": 59.29600074887276,
- "p90": 61.15199998021126,
- "p95": 62.39999830722809,
- "p99": 68.1919977068901
+ "p50": 92.19200164079666,
+ "p90": 323.4559893608093,
+ "p95": 354.6240031719208,
+ "p99": 389.18399810791016
},
"combine": {
- "p50": 68.38399916887283,
- "p90": 77.31200009584427,
- "p95": 77.72800326347351,
- "p99": 78.78399640321732
+ "p50": 126.14400684833527,
+ "p90": 130.62399625778198,
+ "p95": 132.09599256515503,
+ "p99": 135.6160044670105
},
"roundtrip": {
- "p50": 120.25599926710129,
- "p90": 125.82400441169739,
- "p95": 126.75200402736664,
- "p99": 133.44000279903412
+ "p50": 1621.4079856872559,
+ "p90": 1628.5439729690552,
+ "p95": 1897.92001247406,
+ "p99": 1957.4719667434692
},
"isolatedSum": {
- "p50": 127.67999991774559,
- "p90": 138.46400007605553,
- "p95": 140.1280015707016,
- "p99": 146.97599411010742
+ "p50": 218.33600848913193,
+ "p90": 454.0799856185913,
+ "p95": 486.7199957370758,
+ "p99": 524.8000025749207
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
+ "dispatchLogicalBytes": 2487296,
"combineLogicalBytes": 4974592,
"fanoutMean": 5.421875,
- "recvTokensMax": 47,
+ "recvTokensMax": 74,
"stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
@@ -10742,35 +10817,35 @@
"tokensPerRank": 16,
"globalTokens": 128,
"dispatch": {
- "p50": 62.78400123119354,
- "p90": 69.023996591568,
- "p95": 71.03999704122543,
- "p99": 76.73600316047668
+ "p50": 99.39199686050415,
+ "p90": 334.1760039329529,
+ "p95": 375.0079870223999,
+ "p99": 404.4800102710724
},
"combine": {
- "p50": 77.2479996085167,
- "p90": 78.5600021481514,
- "p95": 78.72000336647034,
- "p99": 80.86399734020233
+ "p50": 137.95199990272522,
+ "p90": 143.19999516010284,
+ "p95": 145.02400159835815,
+ "p99": 151.13599598407745
},
"roundtrip": {
- "p50": 119.61600184440613,
- "p90": 122.72000312805176,
- "p95": 124.35200065374374,
- "p99": 131.29599392414093
+ "p50": 1634.7839832305908,
+ "p90": 1642.0799493789673,
+ "p95": 1649.2480039596558,
+ "p99": 1971.295952796936
},
"isolatedSum": {
- "p50": 140.03200083971024,
- "p90": 147.5839987397194,
- "p95": 149.76000040769577,
- "p99": 157.60000050067902
+ "p50": 237.34399676322937,
+ "p90": 477.3759990930557,
+ "p95": 520.031988620758,
+ "p99": 555.6160062551498
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
+ "dispatchLogicalBytes": 4960256,
"combineLogicalBytes": 9920512,
"fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 5,
+ "recvTokensMax": 145,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -10779,35 +10854,35 @@
"tokensPerRank": 32,
"globalTokens": 256,
"dispatch": {
- "p50": 69.24799829721451,
- "p90": 70.91200351715088,
- "p95": 73.69600236415863,
- "p99": 81.69600367546082
+ "p50": 118.30399930477142,
+ "p90": 343.23200583457947,
+ "p95": 377.82400846481323,
+ "p99": 433.1200122833252
},
"combine": {
- "p50": 78.59200239181519,
- "p90": 79.80799674987793,
- "p95": 80.73599636554718,
- "p99": 90.94399958848953
+ "p50": 145.21600306034088,
+ "p90": 157.95199573040009,
+ "p95": 170.6559956073761,
+ "p99": 204.8960030078888
},
"roundtrip": {
- "p50": 130.68799674510956,
- "p90": 135.23200154304504,
- "p95": 136.51199638843536,
- "p99": 140.47999680042267
+ "p50": 1674.9759912490845,
+ "p90": 1684.4160556793213,
+ "p95": 1693.9200162887573,
+ "p99": 2003.2958984375
},
"isolatedSum": {
- "p50": 147.8400006890297,
- "p90": 150.7200002670288,
- "p95": 154.4319987297058,
- "p99": 172.64000326395035
+ "p50": 263.5200023651123,
+ "p90": 501.18400156497955,
+ "p95": 548.4800040721893,
+ "p99": 638.016015291214
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
+ "dispatchLogicalBytes": 9863168,
"combineLogicalBytes": 19726336,
"fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 7,
+ "recvTokensMax": 287,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -10816,35 +10891,35 @@
"tokensPerRank": 64,
"globalTokens": 512,
"dispatch": {
- "p50": 82.49600231647491,
- "p90": 92.70399808883667,
- "p95": 95.0080007314682,
- "p99": 99.45599734783173
+ "p50": 160.3199988603592,
+ "p90": 346.24001383781433,
+ "p95": 395.6800103187561,
+ "p99": 445.6000030040741
},
"combine": {
- "p50": 92.25600212812424,
- "p90": 100.09600222110748,
- "p95": 102.36799716949463,
- "p99": 106.65600001811981
+ "p50": 190.7840073108673,
+ "p90": 201.4079988002777,
+ "p95": 204.79999482631683,
+ "p99": 210.81599593162537
},
"roundtrip": {
- "p50": 158.65600109100342,
- "p90": 163.00800442695618,
- "p95": 164.19200599193573,
- "p99": 169.50400173664093
+ "p50": 1793.12002658844,
+ "p90": 1805.2159547805786,
+ "p95": 2026.304006576538,
+ "p99": 2088.223934173584
},
"isolatedSum": {
- "p50": 174.75200444459915,
- "p90": 192.80000030994415,
- "p95": 197.37599790096283,
- "p99": 206.11199736595154
+ "p50": 351.1040061712265,
+ "p90": 547.648012638092,
+ "p95": 600.4800051450729,
+ "p99": 656.4159989356995
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
+ "dispatchLogicalBytes": 19496960,
"combineLogicalBytes": 38993920,
"fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 7,
+ "recvTokensMax": 564,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -10853,35 +10928,35 @@
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 93.91999989748001,
- "p90": 95.83999961614609,
- "p95": 98.04800152778625,
- "p99": 104.99200224876404
+ "p50": 347.80800342559814,
+ "p90": 371.5839982032776,
+ "p95": 581.8560123443604,
+ "p99": 651.0080099105835
},
"combine": {
- "p50": 115.35999923944473,
- "p90": 115.93600362539291,
- "p95": 116.60800129175186,
- "p99": 119.45600062608719
+ "p50": 349.37599301338196,
+ "p90": 364.0640079975128,
+ "p95": 370.65601348876953,
+ "p99": 383.13600420951843
},
"roundtrip": {
- "p50": 192.51200556755066,
- "p90": 198.88000190258026,
- "p95": 199.48799908161163,
- "p99": 209.47200059890747
+ "p50": 2056.3199520111084,
+ "p90": 2088.3519649505615,
+ "p95": 2184.4160556793213,
+ "p99": 2264.4801139831543
},
"isolatedSum": {
- "p50": 209.27999913692474,
- "p90": 211.776003241539,
- "p95": 214.65600281953812,
- "p99": 224.44800287485123
+ "p50": 697.1839964389801,
+ "p90": 735.6480062007904,
+ "p95": 952.5120258331299,
+ "p99": 1034.144014120102
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
+ "dispatchLogicalBytes": 38836224,
"combineLogicalBytes": 77672448,
"fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
+ "recvTokensMax": 1104,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -10889,107 +10964,108 @@
]
},
{
- "id": "cx-dede7717",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|normalized|0.18|ffa946582edb500",
- "colorKey": "b300_0622d929",
- "comparisonKey": "c4ede73885f09b56",
+ "id": "cx-2bd302a0",
+ "identity": "gb200|deepep|v1|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510",
+ "colorKey": "gb200_653f32d1",
+ "comparisonKey": "63dcfe2a21df3808",
"schemaVersion": 3,
- "generatedAt": "2026-06-26T18:12:16.850895+00:00",
+ "generatedAt": "2026-06-29T13:57:26.133416+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_17",
- "sku": "b300",
+ "publicationStatus": "diagnostic",
+ "runner": "gb200-nv_1",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
+ "mode": "ll",
+ "resourceMode": "tuned",
+ "suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "runtime-visible-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 (norm) · balanced",
+ "label": "GB200 EP8 · deepep · fp8 LL",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
"experts": 256,
- "routing": "balanced",
- "routingLabel": "balanced",
+ "routing": "uniform",
+ "routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "bf16",
+ "dispatchDtype": "fp8",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1824,
- "configuredUnits": 27,
- "deviceUnits": 148,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
+ "requestedFraction": null,
+ "achievedFraction": null,
+ "configuredUnits": null,
+ "deviceUnits": 152,
+ "resourceClass": "fixed-kernel",
+ "conformanceClass": "not-applicable",
+ "fixedKernel": true,
+ "paretoEligible": false
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "ffa946582edb500",
- "workloadId": "set:8:7af12818400d6348",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "74444524b5db510",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28254508907",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254508907",
- "createdAt": "2026-06-26T18:12:16.850895+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 1,
"globalTokens": 8,
"dispatch": {
- "p50": 57.69599974155426,
- "p90": 60.06399914622307,
- "p95": 61.664000153541565,
- "p99": 77.7600035071373
+ "p50": 79.8719972372055,
+ "p90": 307.6480031013489,
+ "p95": 347.26399183273315,
+ "p99": 385.5679929256439
},
"combine": {
- "p50": 68.03199648857117,
- "p90": 69.76000219583511,
- "p95": 76.92799717187881,
- "p99": 78.52800190448761
+ "p50": 119.4240003824234,
+ "p90": 124.83199685811996,
+ "p95": 126.88000500202179,
+ "p99": 131.77600502967834
},
"roundtrip": {
- "p50": 107.80800133943558,
- "p90": 110.59200018644333,
- "p95": 112.19199746847153,
- "p99": 128.76799702644348
+ "p50": 1610.8160018920898,
+ "p90": 1618.5920238494873,
+ "p95": 1887.55202293396,
+ "p99": 1946.0480213165283
},
"isolatedSum": {
- "p50": 125.72799623012543,
- "p90": 129.82400134205818,
- "p95": 138.59199732542038,
- "p99": 156.2880054116249
+ "p50": 199.2959976196289,
+ "p90": 432.47999995946884,
+ "p95": 474.14399683475494,
+ "p99": 517.3439979553223
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 917504,
- "combineLogicalBytes": 917504,
- "fanoutMean": 8,
- "recvTokensMax": 8,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 315392,
+ "combineLogicalBytes": 630784,
+ "fanoutMean": 5.5,
+ "recvTokensMax": 14,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -10998,35 +11074,35 @@
"tokensPerRank": 2,
"globalTokens": 16,
"dispatch": {
- "p50": 58.559998869895935,
- "p90": 60.15999987721443,
- "p95": 61.664000153541565,
- "p99": 72.76800274848938
+ "p50": 82.04799890518188,
+ "p90": 311.7760121822357,
+ "p95": 348.5119938850403,
+ "p99": 394.9120044708252
},
"combine": {
- "p50": 68.25599819421768,
- "p90": 76.86399668455124,
- "p95": 77.53600180149078,
- "p99": 79.9039974808693
+ "p50": 120.19199877977371,
+ "p90": 124.89599734544754,
+ "p95": 126.46399438381195,
+ "p99": 128.92800569534302
},
"roundtrip": {
- "p50": 116.22399836778641,
- "p90": 122.11199849843979,
- "p95": 123.07199835777283,
- "p99": 127.9039978981018
+ "p50": 1612.9599809646606,
+ "p90": 1620.8959817886353,
+ "p95": 1906.6879749298096,
+ "p99": 1956.32004737854
},
"isolatedSum": {
- "p50": 126.81599706411362,
- "p90": 137.02399656176567,
- "p95": 139.20000195503235,
- "p99": 152.67200022935867
+ "p50": 202.2399976849556,
+ "p90": 436.67200952768326,
+ "p95": 474.97598826885223,
+ "p99": 523.8400101661682
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1835008,
- "combineLogicalBytes": 1835008,
- "fanoutMean": 8,
- "recvTokensMax": 16,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 616448,
+ "combineLogicalBytes": 1232896,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 21,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -11035,35 +11111,35 @@
"tokensPerRank": 4,
"globalTokens": 32,
"dispatch": {
- "p50": 58.59199911355972,
- "p90": 60.5119988322258,
- "p95": 61.664000153541565,
- "p99": 69.66400146484375
+ "p50": 87.71199733018875,
+ "p90": 322.56001234054565,
+ "p95": 345.37601470947266,
+ "p99": 387.3920142650604
},
"combine": {
- "p50": 70.01599669456482,
- "p90": 78.40000092983246,
- "p95": 78.52800190448761,
- "p99": 81.216000020504
+ "p50": 122.72000312805176,
+ "p90": 127.77599692344666,
+ "p95": 129.31199371814728,
+ "p99": 143.93599331378937
},
"roundtrip": {
- "p50": 121.66400253772736,
- "p90": 125.37600100040436,
- "p95": 127.20000743865967,
- "p99": 135.74400544166565
+ "p50": 1616.703987121582,
+ "p90": 1624.2239475250244,
+ "p95": 1915.2319431304932,
+ "p99": 2350.0161170959473
},
"isolatedSum": {
- "p50": 128.60799580812454,
- "p90": 138.91199976205826,
- "p95": 140.19200205802917,
- "p99": 150.88000148534775
+ "p50": 210.4320004582405,
+ "p90": 450.3360092639923,
+ "p95": 474.68800842761993,
+ "p99": 531.3280075788498
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 3670016,
- "combineLogicalBytes": 3670016,
- "fanoutMean": 8,
- "recvTokensMax": 32,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 1240064,
+ "combineLogicalBytes": 2480128,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 39,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -11072,34 +11148,34 @@
"tokensPerRank": 8,
"globalTokens": 64,
"dispatch": {
- "p50": 59.61599946022034,
- "p90": 61.95199862122536,
- "p95": 63.90400230884552,
- "p99": 71.52000069618225
+ "p50": 91.16800129413605,
+ "p90": 333.1199884414673,
+ "p95": 358.88001322746277,
+ "p99": 392.5119936466217
},
"combine": {
- "p50": 77.40800082683563,
- "p90": 78.65600287914276,
- "p95": 78.94399762153625,
- "p99": 89.28000181913376
+ "p50": 130.23999333381653,
+ "p90": 135.1040005683899,
+ "p95": 136.48000359535217,
+ "p99": 138.33600282669067
},
"roundtrip": {
- "p50": 119.80800330638885,
- "p90": 122.65600264072418,
- "p95": 124.83199685811996,
- "p99": 136.83199882507324
+ "p50": 1622.5279569625854,
+ "p90": 1628.383994102478,
+ "p95": 1637.120008468628,
+ "p99": 1966.271996498108
},
"isolatedSum": {
- "p50": 137.02400028705597,
- "p90": 140.60800150036812,
- "p95": 142.84799993038177,
- "p99": 160.800002515316
+ "p50": 221.40799462795258,
+ "p90": 468.2239890098572,
+ "p95": 495.36001682281494,
+ "p99": 530.8479964733124
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 7340032,
- "combineLogicalBytes": 7340032,
- "fanoutMean": 8,
- "recvTokensMax": 64,
+ "dispatchLogicalBytes": 2487296,
+ "combineLogicalBytes": 4974592,
+ "fanoutMean": 5.421875,
+ "recvTokensMax": 74,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
@@ -11109,35 +11185,35 @@
"tokensPerRank": 16,
"globalTokens": 128,
"dispatch": {
- "p50": 73.91999661922455,
- "p90": 76.09599828720093,
- "p95": 78.04799824953079,
- "p99": 85.24800091981888
+ "p50": 97.21600264310837,
+ "p90": 331.743985414505,
+ "p95": 363.48798871040344,
+ "p99": 419.3280041217804
},
"combine": {
- "p50": 78.40000092983246,
- "p90": 79.1039988398552,
- "p95": 79.39200103282928,
- "p99": 85.08799970149994
+ "p50": 141.7279988527298,
+ "p90": 147.23199605941772,
+ "p95": 149.6960073709488,
+ "p99": 274.97598528862
},
"roundtrip": {
- "p50": 121.44000083208084,
- "p90": 126.94400548934937,
- "p95": 128.92800569534302,
- "p99": 145.31199634075165
+ "p50": 1634.6240043640137,
+ "p90": 1641.0239934921265,
+ "p95": 1651.8720388412476,
+ "p99": 1987.328052520752
},
"isolatedSum": {
- "p50": 152.319997549057,
- "p90": 155.19999712705612,
- "p95": 157.43999928236008,
- "p99": 170.33600062131882
+ "p50": 238.94400149583817,
+ "p90": 478.97598147392273,
+ "p95": 513.1839960813522,
+ "p99": 694.3039894104004
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 14680064,
- "combineLogicalBytes": 14680064,
- "fanoutMean": 8,
- "recvTokensMax": 128,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 4960256,
+ "combineLogicalBytes": 9920512,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 145,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -11146,35 +11222,35 @@
"tokensPerRank": 32,
"globalTokens": 256,
"dispatch": {
- "p50": 71.07199728488922,
- "p90": 72.86400347948074,
- "p95": 73.47200065851212,
- "p99": 82.40000158548355
+ "p50": 113.69600147008896,
+ "p90": 342.848002910614,
+ "p95": 374.2400109767914,
+ "p99": 416.48000478744507
},
"combine": {
- "p50": 80.06399869918823,
- "p90": 81.37600123882294,
- "p95": 81.82399719953537,
- "p99": 89.88799899816513
+ "p50": 142.97600090503693,
+ "p90": 149.1519957780838,
+ "p95": 152.16000378131866,
+ "p99": 173.0239987373352
},
"roundtrip": {
- "p50": 134.36800241470337,
- "p90": 141.56800508499146,
- "p95": 143.99999380111694,
- "p99": 148.80000054836273
+ "p50": 1678.2399415969849,
+ "p90": 1688.3200407028198,
+ "p95": 1698.6240148544312,
+ "p99": 2008.512020111084
},
"isolatedSum": {
- "p50": 151.13599598407745,
- "p90": 154.24000471830368,
- "p95": 155.29599785804749,
- "p99": 172.28800058364868
+ "p50": 256.6720023751259,
+ "p90": 491.9999986886978,
+ "p95": 526.40001475811,
+ "p99": 589.5040035247803
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 29360128,
- "combineLogicalBytes": 29360128,
- "fanoutMean": 8,
- "recvTokensMax": 256,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 9863168,
+ "combineLogicalBytes": 19726336,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 287,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -11183,35 +11259,35 @@
"tokensPerRank": 64,
"globalTokens": 512,
"dispatch": {
- "p50": 85.34400165081024,
- "p90": 89.63199704885483,
- "p95": 91.93599969148636,
- "p99": 96.57599776983261
+ "p50": 162.4000072479248,
+ "p90": 326.4639973640442,
+ "p95": 385.6959939002991,
+ "p99": 463.48801255226135
},
"combine": {
- "p50": 93.98400038480759,
- "p90": 103.10400277376175,
- "p95": 103.29599678516388,
- "p99": 105.92000186443329
+ "p50": 201.88799500465393,
+ "p90": 211.39200031757355,
+ "p95": 213.82400393486023,
+ "p99": 217.8560048341751
},
"roundtrip": {
- "p50": 169.3439930677414,
- "p90": 172.89599776268005,
- "p95": 175.87199807167053,
- "p99": 196.16000354290009
+ "p50": 1803.5839796066284,
+ "p90": 1823.7119913101196,
+ "p95": 2049.0241050720215,
+ "p99": 2119.391918182373
},
"isolatedSum": {
- "p50": 179.32800203561783,
- "p90": 192.73599982261658,
- "p95": 195.23199647665024,
- "p99": 202.4959996342659
+ "p50": 364.28800225257874,
+ "p90": 537.8559976816177,
+ "p95": 599.5199978351593,
+ "p99": 681.3440173864365
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 58720256,
- "combineLogicalBytes": 58720256,
- "fanoutMean": 8,
- "recvTokensMax": 512,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 19496960,
+ "combineLogicalBytes": 38993920,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 564,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -11220,35 +11296,35 @@
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 107.13600367307663,
- "p90": 109.79200154542923,
- "p95": 111.7120012640953,
- "p99": 131.96800649166107
+ "p50": 351.00799798965454,
+ "p90": 390.04799723625183,
+ "p95": 620.9920048713684,
+ "p99": 682.0160150527954
},
"combine": {
- "p50": 130.49599528312683,
- "p90": 139.52000439167023,
- "p95": 139.8719996213913,
- "p99": 140.54399728775024
+ "p50": 355.23200035095215,
+ "p90": 373.4720051288605,
+ "p95": 378.9120018482208,
+ "p99": 387.36000657081604
},
"roundtrip": {
- "p50": 231.1680018901825,
- "p90": 235.00800132751465,
- "p95": 236.7040067911148,
- "p99": 257.6960027217865
+ "p50": 2065.2480125427246,
+ "p90": 2107.7120304107666,
+ "p95": 2237.247943878174,
+ "p99": 2313.823938369751
},
"isolatedSum": {
- "p50": 237.63199895620346,
- "p90": 249.31200593709946,
- "p95": 251.5840008854866,
- "p99": 272.5120037794113
+ "p50": 706.2399983406067,
+ "p90": 763.5200023651123,
+ "p95": 999.9040067195892,
+ "p99": 1069.3760216236115
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 117440512,
- "combineLogicalBytes": 117440512,
- "fanoutMean": 8,
- "recvTokensMax": 1024,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 38836224,
+ "combineLogicalBytes": 77672448,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 1104,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -11256,366 +11332,293 @@
]
},
{
- "id": "cx-e56568fe",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|normalized|0.18|14ded8461f2636c",
- "colorKey": "b300_01ab5b1a",
- "comparisonKey": "1f56c3705f670037",
+ "id": "cx-d5cc743d",
+ "identity": "gb200|deepep|v1|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||03f98832f76b043",
+ "colorKey": "gb200_7c2da03d",
+ "comparisonKey": "3c60cc7bd418443a",
"schemaVersion": 3,
- "generatedAt": "2026-06-26T23:38:03.696815+00:00",
+ "generatedAt": "2026-06-29T14:00:33.410141+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_07",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_1",
+ "sku": "gb200",
"backend": "deepep",
- "phase": "decode",
+ "phase": "prefill",
"mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
+ "resourceMode": "tuned",
+ "suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "runtime-visible-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 (norm) · zipf",
- "model": "DeepSeek-V3/V4",
+ "label": "GB200 EP8 · deepep · bf16",
+ "model": "Qwen3.5",
"shape": {
- "hidden": 7168,
+ "hidden": 4096,
"topk": 8,
- "experts": 256,
- "routing": "zipf",
- "routingLabel": "zipf",
+ "experts": 128,
+ "routing": "uniform",
+ "routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1824,
- "configuredUnits": 27,
- "deviceUnits": 148,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
+ "requestedFraction": null,
+ "achievedFraction": 0.1316,
+ "configuredUnits": 20,
+ "deviceUnits": 152,
+ "resourceClass": "backend-tuned",
+ "conformanceClass": "backend-default",
"fixedKernel": false,
- "paretoEligible": true
+ "paretoEligible": false
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "14ded8461f2636c",
- "workloadId": "set:8:f5576e2b712d38c3",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "03f98832f76b043",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28271231753",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271231753",
- "createdAt": "2026-06-26T23:38:03.696815+00:00",
- "sha": "ee4ffe77871d0200cb4a78c96d3ae9f692e9af02"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 55.904000997543335,
- "p90": 59.776000678539276,
- "p95": 65.72800129652023,
- "p99": 85.11999994516373
- },
- "combine": {
- "p50": 65.60000032186508,
- "p90": 66.3679987192154,
- "p95": 66.91200286149979,
- "p99": 76.86399668455124
- },
- "roundtrip": {
- "p50": 105.05600273609161,
- "p90": 111.35999858379364,
- "p95": 112.96000331640244,
- "p99": 121.05599790811539
- },
- "isolatedSum": {
- "p50": 121.50400131940842,
- "p90": 126.14399939775467,
- "p95": 132.64000415802002,
- "p99": 161.98399662971497
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 444416,
- "combineLogicalBytes": 444416,
- "fanoutMean": 3.875,
- "recvTokensMax": 8,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 55.84000051021576,
- "p90": 57.56799876689911,
- "p95": 60.095999389886856,
- "p99": 72.4480003118515
- },
- "combine": {
- "p50": 65.69600105285645,
- "p90": 66.3679987192154,
- "p95": 66.84800237417221,
- "p99": 69.2799985408783
- },
- "roundtrip": {
- "p50": 104.76800054311752,
- "p90": 109.40799862146378,
- "p95": 112.03200370073318,
- "p99": 159.19999778270721
- },
- "isolatedSum": {
- "p50": 121.5360015630722,
- "p90": 123.9359974861145,
- "p95": 126.94400176405907,
- "p99": 141.7279988527298
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 845824,
- "combineLogicalBytes": 845824,
- "fanoutMean": 3.6875,
- "recvTokensMax": 16,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 57.37600103020668,
- "p90": 60.80000102519989,
- "p95": 65.76000154018402,
- "p99": 95.8079993724823
+ "p50": 115.84000289440155,
+ "p90": 131.99999928474426,
+ "p95": 371.45599722862244,
+ "p99": 415.1360094547272
},
"combine": {
- "p50": 66.59200042486191,
- "p90": 77.18399912118912,
- "p95": 77.82399654388428,
- "p99": 79.16799932718277
+ "p50": 108.0000028014183,
+ "p90": 338.81598711013794,
+ "p95": 352.7359962463379,
+ "p99": 389.2799913883209
},
"roundtrip": {
- "p50": 106.91200196743011,
- "p90": 112.38399893045425,
- "p95": 115.23199826478958,
- "p99": 124.22399967908859
+ "p50": 193.7599927186966,
+ "p90": 210.4319930076599,
+ "p95": 443.6799883842468,
+ "p99": 480.5760085582733
},
"isolatedSum": {
- "p50": 123.96800145506859,
- "p90": 137.984000146389,
- "p95": 143.5839980840683,
- "p99": 174.97599869966507
+ "p50": 223.84000569581985,
+ "p90": 470.8159863948822,
+ "p95": 724.1919934749603,
+ "p99": 804.4160008430481
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1691648,
- "combineLogicalBytes": 1691648,
- "fanoutMean": 3.6875,
- "recvTokensMax": 32,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 44564480,
+ "combineLogicalBytes": 44564480,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 699,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 58.6559996008873,
- "p90": 63.231997191905975,
- "p95": 65.60000032186508,
- "p99": 69.47200000286102
+ "p50": 148.0959951877594,
+ "p90": 417.5359904766083,
+ "p95": 451.1680006980896,
+ "p99": 469.6640074253082
},
"combine": {
- "p50": 68.12799721956253,
- "p90": 76.48000121116638,
- "p95": 77.15199887752533,
- "p99": 84.1279998421669
+ "p50": 145.6959992647171,
+ "p90": 363.74399065971375,
+ "p95": 406.5600037574768,
+ "p99": 438.1760060787201
},
"roundtrip": {
- "p50": 122.11199849843979,
- "p90": 125.34399330615997,
- "p95": 128.4479945898056,
- "p99": 151.5520066022873
+ "p50": 265.76000452041626,
+ "p90": 477.3760139942169,
+ "p95": 510.3359818458557,
+ "p99": 544.2559719085693
},
"isolatedSum": {
- "p50": 126.78399682044983,
- "p90": 139.71199840307236,
- "p95": 142.7519991993904,
- "p99": 153.59999984502792
+ "p50": 293.7919944524765,
+ "p90": 781.279981136322,
+ "p95": 857.7280044555664,
+ "p99": 907.8400135040283
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 3354624,
- "combineLogicalBytes": 3354624,
- "fanoutMean": 3.65625,
- "recvTokensMax": 64,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 89726976,
+ "combineLogicalBytes": 89726976,
+ "fanoutMean": 5.34814453125,
+ "recvTokensMax": 1385,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 59.487998485565186,
- "p90": 65.24799764156342,
- "p95": 67.00800359249115,
- "p99": 73.56800138950348
+ "p50": 197.37599790096283,
+ "p90": 463.9360010623932,
+ "p95": 495.64799666404724,
+ "p99": 526.0800123214722
},
"combine": {
- "p50": 68.12799721956253,
- "p90": 77.34400033950806,
- "p95": 77.88799703121185,
- "p99": 89.53599631786346
+ "p50": 216.99200570583344,
+ "p90": 425.1840114593506,
+ "p95": 496.5119957923889,
+ "p99": 524.2879986763
},
"roundtrip": {
- "p50": 119.1679984331131,
- "p90": 124.67200309038162,
- "p95": 125.69600343704224,
- "p99": 134.5600038766861
+ "p50": 385.5679929256439,
+ "p90": 398.6240029335022,
+ "p95": 652.4800062179565,
+ "p99": 695.5519914627075
},
"isolatedSum": {
- "p50": 127.61599570512772,
- "p90": 142.59199798107147,
- "p95": 144.896000623703,
- "p99": 163.10399770736694
+ "p50": 414.36800360679626,
+ "p90": 889.1200125217438,
+ "p95": 992.1599924564362,
+ "p99": 1050.3680109977722
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 6537216,
- "combineLogicalBytes": 6537216,
- "fanoutMean": 3.5625,
- "recvTokensMax": 127,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 179503104,
+ "combineLogicalBytes": 179503104,
+ "fanoutMean": 5.349609375,
+ "recvTokensMax": 2772,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 74.52800124883652,
- "p90": 76.51200145483017,
- "p95": 77.18399912118912,
- "p99": 81.7599967122078
+ "p50": 308.22399258613586,
+ "p90": 601.248025894165,
+ "p95": 629.8879981040955,
+ "p99": 645.1519727706909
},
"combine": {
- "p50": 77.91999727487564,
- "p90": 78.78399640321732,
- "p95": 79.26400005817413,
- "p99": 81.85599744319916
+ "p50": 416.31999611854553,
+ "p90": 667.4559712409973,
+ "p95": 718.1119918823242,
+ "p99": 746.7520236968994
},
"roundtrip": {
- "p50": 132.32000172138214,
- "p90": 135.6160044670105,
- "p95": 136.31999492645264,
- "p99": 141.66399836540222
+ "p50": 639.3280029296875,
+ "p90": 916.1279797554016,
+ "p95": 946.5280175209045,
+ "p99": 973.4079837799072
},
"isolatedSum": {
- "p50": 152.44799852371216,
- "p90": 155.29599785804749,
- "p95": 156.44799917936325,
- "p99": 163.61599415540695
+ "p50": 724.5439887046814,
+ "p90": 1268.7039971351624,
+ "p95": 1347.9999899864197,
+ "p99": 1391.9039964675903
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 12859392,
- "combineLogicalBytes": 12859392,
- "fanoutMean": 3.50390625,
- "recvTokensMax": 255,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 359022592,
+ "combineLogicalBytes": 359022592,
+ "fanoutMean": 5.349853515625,
+ "recvTokensMax": 5558,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 64,
- "globalTokens": 512,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 80.19199967384338,
- "p90": 81.88799768686295,
- "p95": 83.52000266313553,
- "p99": 90.30400216579437
+ "p50": 528.7359952926636,
+ "p90": 803.6159873008728,
+ "p95": 846.560001373291,
+ "p99": 884.223997592926
},
"combine": {
- "p50": 90.59199690818787,
- "p90": 91.67999774217606,
- "p95": 92.57599711418152,
- "p99": 101.21600329875946
+ "p50": 820.1280236244202,
+ "p90": 835.9360098838806,
+ "p95": 1127.9040575027466,
+ "p99": 1175.6160259246826
},
"roundtrip": {
- "p50": 155.45600652694702,
- "p90": 160.5760008096695,
- "p95": 161.98399662971497,
- "p99": 169.53599452972412
+ "p50": 1312.2559785842896,
+ "p90": 1328.5759687423706,
+ "p95": 1621.0240125656128,
+ "p99": 1657.696008682251
},
"isolatedSum": {
- "p50": 170.78399658203125,
- "p90": 173.567995429039,
- "p95": 176.09599977731705,
- "p99": 191.52000546455383
+ "p50": 1348.8640189170837,
+ "p90": 1639.5519971847534,
+ "p95": 1974.4640588760376,
+ "p99": 2059.8400235176086
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 25145344,
- "combineLogicalBytes": 25145344,
- "fanoutMean": 3.42578125,
- "recvTokensMax": 510,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 716111872,
+ "combineLogicalBytes": 716111872,
+ "fanoutMean": 5.33544921875,
+ "recvTokensMax": 10982,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
"dispatch": {
- "p50": 101.69599950313568,
- "p90": 105.15200346708298,
- "p95": 106.04800283908844,
- "p99": 115.167997777462
+ "p50": 978.7840247154236,
+ "p90": 1006.4640045166016,
+ "p95": 1288.7680530548096,
+ "p99": 1324.5760202407837
},
"combine": {
- "p50": 126.81600451469421,
- "p90": 127.77599692344666,
- "p95": 128.12800705432892,
- "p99": 131.71200454235077
+ "p50": 1550.3360033035278,
+ "p90": 1569.7599649429321,
+ "p95": 1864.5440340042114,
+ "p99": 1903.8079977035522
},
"roundtrip": {
- "p50": 207.58399367332458,
- "p90": 212.41599321365356,
- "p95": 215.45599400997162,
- "p99": 240.79999327659607
+ "p50": 2488.6720180511475,
+ "p90": 2510.3039741516113,
+ "p95": 2800.9281158447266,
+ "p99": 2841.599941253662
},
"isolatedSum": {
- "p50": 228.5120040178299,
- "p90": 232.92800039052963,
- "p95": 234.17600989341736,
- "p99": 246.88000231981277
+ "p50": 2529.1200280189514,
+ "p90": 2576.2239694595337,
+ "p95": 3153.312087059021,
+ "p99": 3228.384017944336
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 49946624,
- "combineLogicalBytes": 49946624,
- "fanoutMean": 3.40234375,
- "recvTokensMax": 1022,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1432395776,
+ "combineLogicalBytes": 1432395776,
+ "fanoutMean": 5.336090087890625,
+ "recvTokensMax": 21939,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -11623,366 +11626,293 @@
]
},
{
- "id": "cx-a499b6fe",
- "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|normalized|0.18|a8f501af7004836",
- "colorKey": "b300_085c12d4",
- "comparisonKey": "f41671f558a3c8d2",
+ "id": "cx-149387f5",
+ "identity": "gb200|deepep|v1|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||a9df48e6438e77a",
+ "colorKey": "gb200_7c2da03d",
+ "comparisonKey": "dd9e945599d67e5f",
"schemaVersion": 3,
- "generatedAt": "2026-06-26T18:23:15.234137+00:00",
+ "generatedAt": "2026-06-29T14:02:20.934185+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_10",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_1",
+ "sku": "gb200",
"backend": "deepep",
- "phase": "decode",
+ "phase": "prefill",
"mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "resourceMode": "tuned",
+ "suite": "backend-default",
+ "comparisonClass": "standardized",
+ "measurementContract": "runtime-visible-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 (norm) · zipf+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
+ "label": "GB200 EP8 · deepep · bf16",
+ "model": "shape 5120/8/160",
"shape": {
- "hidden": 7168,
+ "hidden": 5120,
"topk": 8,
- "experts": 288,
- "routing": "zipf",
- "routingLabel": "zipf+eplb",
+ "experts": 160,
+ "routing": "uniform",
+ "routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
- "eplbEnabled": true,
+ "eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1824,
- "configuredUnits": 27,
- "deviceUnits": 148,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
+ "requestedFraction": null,
+ "achievedFraction": 0.1316,
+ "configuredUnits": 20,
+ "deviceUnits": 152,
+ "resourceClass": "backend-tuned",
+ "conformanceClass": "backend-default",
"fixedKernel": false,
- "paretoEligible": true
+ "paretoEligible": false
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "a8f501af7004836",
- "workloadId": "set:8:f5576e2b712d38c3",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 4.927734375,
- "eplbImbalanceAfter": 1.0006103515625,
- "backendVersion": "1.2.1",
+ "traceSignature": "a9df48e6438e77a",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": null,
+ "eplbImbalanceAfter": null,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28255311146",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28255311146",
- "createdAt": "2026-06-26T18:23:15.234137+00:00",
- "sha": "36d3eb6c3c7386d3220c873d305410219c5c0f17"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 56.86400085687637,
- "p90": 59.7120001912117,
- "p95": 63.32799792289734,
- "p99": 72.64000177383423
- },
- "combine": {
- "p50": 64.83200192451477,
- "p90": 66.46399945020676,
- "p95": 66.94400310516357,
- "p99": 76.51200145483017
- },
- "roundtrip": {
- "p50": 105.12000322341919,
- "p90": 110.72000116109848,
- "p95": 111.7440015077591,
- "p99": 122.56000190973282
- },
- "isolatedSum": {
- "p50": 121.69600278139114,
- "p90": 126.17599964141846,
- "p95": 130.2720010280609,
- "p99": 149.1520032286644
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 616448,
- "fanoutMean": 5.375,
- "recvTokensMax": 7,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 57.5999990105629,
- "p90": 59.808000922203064,
- "p95": 62.07999959588051,
- "p99": 71.45600020885468
- },
- "combine": {
- "p50": 66.27199798822403,
- "p90": 67.00800359249115,
- "p95": 67.29599833488464,
- "p99": 76.92799717187881
- },
- "roundtrip": {
- "p50": 106.27199709415436,
- "p90": 108.22399705648422,
- "p95": 110.01600325107574,
- "p99": 132.54399597644806
- },
- "isolatedSum": {
- "p50": 123.87199699878693,
- "p90": 126.81600451469421,
- "p95": 129.37599793076515,
- "p99": 148.3839973807335
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1204224,
- "combineLogicalBytes": 1204224,
- "fanoutMean": 5.25,
- "recvTokensMax": 14,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 57.53599852323532,
- "p90": 59.808000922203064,
- "p95": 60.70400029420853,
- "p99": 67.87200272083282
+ "p50": 120.19199877977371,
+ "p90": 181.43999576568604,
+ "p95": 387.00801134109497,
+ "p99": 433.0559968948364
},
"combine": {
- "p50": 66.43199920654297,
- "p90": 67.45599955320358,
- "p95": 69.31199878454208,
- "p99": 78.78399640321732
+ "p50": 116.12799763679504,
+ "p90": 120.86399644613266,
+ "p95": 127.13600695133209,
+ "p99": 377.7279853820801
},
"roundtrip": {
- "p50": 106.6880002617836,
- "p90": 109.50399935245514,
- "p95": 111.87200248241425,
- "p99": 125.08800625801086
+ "p50": 205.47200739383698,
+ "p90": 461.34400367736816,
+ "p95": 483.42400789260864,
+ "p99": 499.0079998970032
},
"isolatedSum": {
- "p50": 123.96799772977829,
- "p90": 127.26400047540665,
- "p95": 130.0159990787506,
- "p99": 146.65599912405014
+ "p50": 236.31999641656876,
+ "p90": 302.3039922118187,
+ "p95": 514.1440182924271,
+ "p99": 810.7839822769165
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2394112,
- "combineLogicalBytes": 2394112,
- "fanoutMean": 5.21875,
- "recvTokensMax": 24,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 55674880,
+ "combineLogicalBytes": 55674880,
+ "fanoutMean": 5.3095703125,
+ "recvTokensMax": 699,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 58.848001062870026,
- "p90": 61.15199998021126,
- "p95": 64.41599875688553,
- "p99": 78.14399898052216
+ "p50": 152.25599706172943,
+ "p90": 415.3279960155487,
+ "p95": 451.775997877121,
+ "p99": 482.36799240112305
},
"combine": {
- "p50": 68.28799843788147,
- "p90": 76.25599950551987,
- "p95": 76.92799717187881,
- "p99": 79.64800298213959
+ "p50": 155.90399503707886,
+ "p90": 361.63198947906494,
+ "p95": 404.63998913764954,
+ "p99": 450.9119987487793
},
"roundtrip": {
- "p50": 116.28799885511398,
- "p90": 122.8799968957901,
- "p95": 124.70400333404541,
- "p99": 145.08800208568573
+ "p50": 275.90399980545044,
+ "p90": 290.6239926815033,
+ "p95": 537.5999808311462,
+ "p99": 569.3439841270447
},
"isolatedSum": {
- "p50": 127.1359995007515,
- "p90": 137.40799948573112,
- "p95": 141.34399592876434,
- "p99": 157.79200196266174
+ "p50": 308.1599920988083,
+ "p90": 776.9599854946136,
+ "p95": 856.4159870147705,
+ "p99": 933.2799911499023
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4630528,
- "combineLogicalBytes": 4630528,
- "fanoutMean": 5.046875,
- "recvTokensMax": 45,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 111104000,
+ "combineLogicalBytes": 111104000,
+ "fanoutMean": 5.2978515625,
+ "recvTokensMax": 1387,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 60.54399907588959,
- "p90": 66.14399701356888,
- "p95": 68.67200136184692,
- "p99": 83.29600095748901
+ "p50": 208.064004778862,
+ "p90": 467.5840139389038,
+ "p95": 495.5520033836365,
+ "p99": 534.5600247383118
},
"combine": {
- "p50": 68.64000111818314,
- "p90": 77.2159993648529,
- "p95": 77.82399654388428,
- "p99": 78.91199737787247
+ "p50": 232.31999576091766,
+ "p90": 476.8959879875183,
+ "p95": 504.7039985656738,
+ "p99": 537.5040173530579
},
"roundtrip": {
- "p50": 123.16799908876419,
- "p90": 126.0479986667633,
- "p95": 127.16799974441528,
- "p99": 131.1040073633194
+ "p50": 404.32000160217285,
+ "p90": 418.9760088920593,
+ "p95": 679.4559955596924,
+ "p99": 724.2559790611267
},
"isolatedSum": {
- "p50": 129.18400019407272,
- "p90": 143.35999637842178,
- "p95": 146.4959979057312,
- "p99": 162.20799833536148
+ "p50": 440.38400053977966,
+ "p90": 944.4800019264221,
+ "p95": 1000.2560019493103,
+ "p99": 1072.0640420913696
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9447424,
- "combineLogicalBytes": 9447424,
- "fanoutMean": 5.1484375,
- "recvTokensMax": 91,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 223098880,
+ "combineLogicalBytes": 223098880,
+ "fanoutMean": 5.319091796875,
+ "recvTokensMax": 2762,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 69.31199878454208,
- "p90": 75.52000135183334,
- "p95": 76.4160007238388,
- "p99": 83.20000022649765
+ "p50": 325.8880078792572,
+ "p90": 601.535975933075,
+ "p95": 628.3199787139893,
+ "p99": 661.5679860115051
},
"combine": {
- "p50": 78.46400141716003,
- "p90": 79.26400005817413,
- "p95": 79.45600152015686,
- "p99": 82.40000158548355
+ "p50": 475.19999742507935,
+ "p90": 740.2880191802979,
+ "p95": 786.4320278167725,
+ "p99": 820.5119967460632
},
"roundtrip": {
- "p50": 132.192000746727,
- "p90": 135.6479972600937,
- "p95": 136.3839954137802,
- "p99": 147.20000326633453
+ "p50": 724.9600291252136,
+ "p90": 740.3839826583862,
+ "p95": 1014.3040418624878,
+ "p99": 1059.1039657592773
},
"isolatedSum": {
- "p50": 147.77600020170212,
- "p90": 154.78400141000748,
- "p95": 155.87200224399567,
- "p99": 165.6000018119812
+ "p50": 801.0880053043365,
+ "p90": 1341.8239951133728,
+ "p95": 1414.7520065307617,
+ "p99": 1482.0799827575684
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19023872,
- "combineLogicalBytes": 19023872,
- "fanoutMean": 5.18359375,
- "recvTokensMax": 178,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 446730240,
+ "combineLogicalBytes": 446730240,
+ "fanoutMean": 5.325439453125,
+ "recvTokensMax": 5518,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 64,
- "globalTokens": 512,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 87.8399983048439,
- "p90": 90.30400216579437,
- "p95": 91.87199920415878,
- "p99": 100.0640019774437
+ "p50": 568.1920051574707,
+ "p90": 587.1040225028992,
+ "p95": 874.8160004615784,
+ "p99": 909.6639752388
},
"combine": {
- "p50": 91.2960022687912,
- "p90": 93.08800101280212,
- "p95": 93.85599941015244,
- "p99": 108.12799632549286
+ "p50": 848.0319976806641,
+ "p90": 876.8640160560608,
+ "p95": 1146.720051765442,
+ "p99": 1191.3599967956543
},
"roundtrip": {
- "p50": 157.44000673294067,
- "p90": 162.4639928340912,
- "p95": 163.71199488639832,
- "p99": 168.89600455760956
+ "p50": 1380.4479837417603,
+ "p90": 1396.448016166687,
+ "p95": 1698.8799571990967,
+ "p99": 1734.1760396957397
},
"isolatedSum": {
- "p50": 179.1360005736351,
- "p90": 183.3920031785965,
- "p95": 185.72799861431122,
- "p99": 208.19199830293655
+ "p50": 1416.2240028381348,
+ "p90": 1463.96803855896,
+ "p95": 2021.5360522270203,
+ "p99": 2101.0239720344543
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38148096,
- "combineLogicalBytes": 38148096,
- "fanoutMean": 5.197265625,
- "recvTokensMax": 350,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 893634560,
+ "combineLogicalBytes": 893634560,
+ "fanoutMean": 5.32647705078125,
+ "recvTokensMax": 11032,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
"dispatch": {
- "p50": 93.9520001411438,
- "p90": 98.39999675750732,
- "p95": 100.00000149011612,
- "p99": 105.53599894046783
+ "p50": 1051.6480207443237,
+ "p90": 1330.4640054702759,
+ "p95": 1371.7119693756104,
+ "p99": 1398.2720375061035
},
"combine": {
- "p50": 115.29599875211716,
- "p90": 116.12799763679504,
- "p95": 116.48000031709671,
- "p99": 127.87200510501862
+ "p50": 1592.8959846496582,
+ "p90": 1619.0719604492188,
+ "p95": 1900.1920223236084,
+ "p99": 1945.7279443740845
},
"roundtrip": {
- "p50": 193.08799505233765,
- "p90": 199.90399479866028,
- "p95": 201.50400698184967,
- "p99": 214.1759991645813
+ "p50": 2600.7039546966553,
+ "p90": 2650.304079055786,
+ "p95": 2929.663896560669,
+ "p99": 2953.632116317749
},
"isolatedSum": {
- "p50": 209.24799889326096,
- "p90": 214.52799439430237,
- "p95": 216.48000180721283,
- "p99": 233.40800404548645
+ "p50": 2644.544005393982,
+ "p90": 2949.5359659194946,
+ "p95": 3271.9039916992188,
+ "p99": 3343.999981880188
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 76955648,
- "combineLogicalBytes": 76955648,
- "fanoutMean": 5.2421875,
- "recvTokensMax": 687,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1786265600,
+ "combineLogicalBytes": 1786265600,
+ "fanoutMean": 5.323486328125,
+ "recvTokensMax": 21895,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -11990,31 +11920,31 @@
]
},
{
- "id": "cx-8481f6a4",
- "identity": "b300|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176",
- "colorKey": "b300_63f1354f",
- "comparisonKey": "63f9b5a5300d4d4b",
+ "id": "cx-08f535b7",
+ "identity": "gb200|deepep|v1|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901",
+ "colorKey": "gb200_7c2da03d",
+ "comparisonKey": "cc4f254d990410d2",
"schemaVersion": 3,
- "generatedAt": "2026-06-26T18:09:35.317427+00:00",
+ "generatedAt": "2026-06-29T14:04:11.209616+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_16",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_1",
+ "sku": "gb200",
"backend": "deepep",
- "phase": "decode",
+ "phase": "prefill",
"mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
+ "resourceMode": "tuned",
+ "suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "cached-layout-comm-only-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "runtime-visible-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 (norm) [cl]",
- "model": "DeepSeek-V3/V4",
+ "label": "GB200 EP8 · deepep · bf16",
+ "model": "MiniMax-M3",
"shape": {
- "hidden": 7168,
+ "hidden": 6144,
"topk": 8,
"experts": 256,
"routing": "uniform",
@@ -12023,333 +11953,260 @@
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1824,
- "configuredUnits": 27,
- "deviceUnits": 148,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
+ "requestedFraction": null,
+ "achievedFraction": 0.1316,
+ "configuredUnits": 20,
+ "deviceUnits": 152,
+ "resourceClass": "backend-tuned",
+ "conformanceClass": "backend-default",
"fixedKernel": false,
- "paretoEligible": true
+ "paretoEligible": false
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "b9896b0a7ca9901",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28254489726",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254489726",
- "createdAt": "2026-06-26T18:09:35.317427+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 50.303999334573746,
- "p90": 52.06400156021118,
- "p95": 53.82400006055832,
- "p99": 65.05600363016129
- },
- "combine": {
- "p50": 66.56000018119812,
- "p90": 68.2239979505539,
- "p95": 68.76800209283829,
- "p99": 77.95199751853943
- },
- "roundtrip": {
- "p50": 99.84000027179718,
- "p90": 103.90400141477585,
- "p95": 107.51999914646149,
- "p99": 117.11999773979187
- },
- "isolatedSum": {
- "p50": 116.86399951577187,
- "p90": 120.28799951076508,
- "p95": 122.5920021533966,
- "p99": 143.0080011487007
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 51.263999193906784,
- "p90": 52.89600044488907,
- "p95": 55.32800033688545,
- "p99": 65.18399715423584
- },
- "combine": {
- "p50": 66.97600334882736,
- "p90": 68.7360018491745,
- "p95": 69.11999732255936,
- "p99": 78.11199873685837
- },
- "roundtrip": {
- "p50": 100.99200159311295,
- "p90": 103.26399654150009,
- "p95": 105.76000064611435,
- "p99": 113.6000007390976
- },
- "isolatedSum": {
- "p50": 118.24000254273415,
- "p90": 121.63200229406357,
- "p95": 124.44799765944481,
- "p99": 143.2959958910942
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 51.4880008995533,
- "p90": 53.408000618219376,
- "p95": 54.9440011382103,
- "p99": 61.63199990987778
+ "p50": 122.75200337171555,
+ "p90": 372.54399061203003,
+ "p95": 404.1920006275177,
+ "p99": 428.3199906349182
},
"combine": {
- "p50": 67.6800012588501,
- "p90": 69.60000097751617,
- "p95": 76.89599692821503,
- "p99": 79.16799932718277
+ "p50": 120.60800194740295,
+ "p90": 188.48000466823578,
+ "p95": 386.9760036468506,
+ "p99": 412.3840034008026
},
"roundtrip": {
- "p50": 108.73600095510483,
- "p90": 115.80800265073776,
- "p95": 117.0239970088005,
- "p99": 124.35200065374374
+ "p50": 214.39999341964722,
+ "p90": 436.6399943828583,
+ "p95": 488.0639910697937,
+ "p99": 523.6480236053467
},
"isolatedSum": {
- "p50": 119.1680021584034,
- "p90": 123.00800159573555,
- "p95": 131.83999806642532,
- "p99": 140.79999923706055
+ "p50": 243.3600053191185,
+ "p90": 561.0239952802658,
+ "p95": 791.1680042743683,
+ "p99": 840.7039940357208
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 66576384,
+ "combineLogicalBytes": 66576384,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 723,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 52.639998495578766,
- "p90": 55.64799904823303,
- "p95": 59.39200147986412,
- "p99": 68.00000369548798
+ "p50": 157.60000050067902,
+ "p90": 420.6080138683319,
+ "p95": 449.0239918231964,
+ "p99": 488.8960123062134
},
"combine": {
- "p50": 68.25599819421768,
- "p90": 77.08799839019775,
- "p95": 77.60000228881836,
- "p99": 78.94399762153625
+ "p50": 162.6880019903183,
+ "p90": 391.29599928855896,
+ "p95": 432.2879910469055,
+ "p99": 457.69599080085754
},
"roundtrip": {
- "p50": 113.69600147008896,
- "p90": 117.66400188207626,
- "p95": 118.72000247240067,
- "p99": 121.18399888277054
+ "p50": 290.0480031967163,
+ "p90": 529.4719934463501,
+ "p95": 567.3279762268066,
+ "p99": 593.7280058860779
},
"isolatedSum": {
- "p50": 120.89599668979645,
- "p90": 132.7359974384308,
- "p95": 136.99200376868248,
- "p99": 146.94400131702423
+ "p50": 320.2880024909973,
+ "p90": 811.9040131568909,
+ "p95": 881.3119828701019,
+ "p99": 946.5920031070709
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 133619712,
+ "combineLogicalBytes": 133619712,
+ "fanoutMean": 5.3095703125,
+ "recvTokensMax": 1422,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 61.792001128196716,
- "p90": 67.90400296449661,
- "p95": 68.67200136184692,
- "p99": 71.1359977722168
+ "p50": 223.13599288463593,
+ "p90": 504.2880177497864,
+ "p95": 543.9680218696594,
+ "p99": 947.6799964904785
},
"combine": {
- "p50": 70.46400010585785,
- "p90": 78.40000092983246,
- "p95": 78.59200239181519,
- "p99": 81.44000172615051
+ "p50": 261.50399446487427,
+ "p90": 513.8239860534668,
+ "p95": 548.255980014801,
+ "p99": 2365.855932235718
},
"roundtrip": {
- "p50": 113.18399757146835,
- "p90": 115.9679964184761,
- "p95": 117.53600090742111,
- "p99": 127.87200510501862
+ "p50": 440.2559995651245,
+ "p90": 453.95201444625854,
+ "p95": 705.6000232696533,
+ "p99": 817.6000118255615
},
"isolatedSum": {
- "p50": 132.25600123405457,
- "p90": 146.30400389432907,
- "p95": 147.2640037536621,
- "p99": 152.5759994983673
+ "p50": 484.6399873495102,
+ "p90": 1018.1120038032532,
+ "p95": 1092.2240018844604,
+ "p99": 3313.5359287261963
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 267657216,
+ "combineLogicalBytes": 267657216,
+ "fanoutMean": 5.31787109375,
+ "recvTokensMax": 2779,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 62.65600025653839,
- "p90": 64.92800265550613,
- "p95": 66.880002617836,
- "p99": 73.69600236415863
+ "p50": 349.2160141468048,
+ "p90": 614.4000291824341,
+ "p95": 651.3599753379822,
+ "p99": 686.9440078735352
},
"combine": {
- "p50": 78.59200239181519,
- "p90": 79.74400371313095,
- "p95": 80.64000308513641,
- "p99": 85.63199639320374
+ "p50": 479.0399968624115,
+ "p90": 748.3519911766052,
+ "p95": 785.2159738540649,
+ "p99": 819.4559812545776
},
"roundtrip": {
- "p50": 124.28800016641617,
- "p90": 127.93600559234619,
- "p95": 130.43199479579926,
- "p99": 138.5599970817566
+ "p50": 791.7439937591553,
+ "p90": 805.6960105895996,
+ "p95": 1106.7839860916138,
+ "p99": 1150.048017501831
},
"isolatedSum": {
- "p50": 141.24800264835358,
- "p90": 144.67200636863708,
- "p95": 147.5200057029724,
- "p99": 159.32799875736237
+ "p50": 828.2560110092163,
+ "p90": 1362.7520203590393,
+ "p95": 1436.5759491920471,
+ "p99": 1506.3999891281128
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 534380544,
+ "combineLogicalBytes": 534380544,
+ "fanoutMean": 5.30859375,
+ "recvTokensMax": 5505,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 64,
- "globalTokens": 512,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 75.77600330114365,
- "p90": 83.16799998283386,
- "p95": 83.96799862384796,
- "p99": 96.3520035147667
+ "p50": 599.6479988098145,
+ "p90": 615.4239773750305,
+ "p95": 909.1519713401794,
+ "p99": 959.3600034713745
},
"combine": {
- "p50": 91.48799628019333,
- "p90": 93.6959981918335,
- "p95": 95.90400010347366,
- "p99": 104.76800054311752
+ "p50": 861.1199855804443,
+ "p90": 877.5680065155029,
+ "p95": 1169.2479848861694,
+ "p99": 1214.2720222473145
},
"roundtrip": {
- "p50": 150.11200308799744,
- "p90": 153.28000485897064,
- "p95": 154.91199493408203,
- "p99": 159.96800363063812
+ "p50": 1430.6880235671997,
+ "p90": 1540.4800176620483,
+ "p95": 1742.8159713745117,
+ "p99": 1778.9440155029297
},
"isolatedSum": {
- "p50": 167.26399958133698,
- "p90": 176.86399817466736,
- "p95": 179.87199872732162,
- "p99": 201.12000405788422
+ "p50": 1460.7679843902588,
+ "p90": 1492.9919838905334,
+ "p95": 2078.399956226349,
+ "p99": 2173.632025718689
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1066119168,
+ "combineLogicalBytes": 1066119168,
+ "fanoutMean": 5.29547119140625,
+ "recvTokensMax": 10952,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
"dispatch": {
- "p50": 87.36000210046768,
- "p90": 89.31200206279755,
- "p95": 92.3520028591156,
- "p99": 98.36799651384354
+ "p50": 1109.7919940948486,
+ "p90": 1361.88805103302,
+ "p95": 1428.671956062317,
+ "p99": 1470.1440334320068
},
"combine": {
- "p50": 115.32799899578094,
- "p90": 115.9679964184761,
- "p95": 117.21599847078323,
- "p99": 126.49600207805634
+ "p50": 1612.3520135879517,
+ "p90": 1633.4079504013062,
+ "p95": 1926.4960289001465,
+ "p99": 1954.7840356826782
},
"roundtrip": {
- "p50": 186.14399433135986,
- "p90": 191.67999923229218,
- "p95": 193.05600225925446,
- "p99": 199.072003364563
+ "p50": 2689.6960735321045,
+ "p90": 2706.239938735962,
+ "p95": 2997.312068939209,
+ "p99": 3036.191940307617
},
"isolatedSum": {
- "p50": 202.68800109624863,
- "p90": 205.27999848127365,
- "p95": 209.56800132989883,
- "p99": 224.86399859189987
+ "p50": 2722.1440076828003,
+ "p90": 2995.296001434326,
+ "p95": 3355.1679849624634,
+ "p99": 3424.928069114685
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 2131722240,
+ "combineLogicalBytes": 2131722240,
+ "fanoutMean": 5.294189453125,
+ "recvTokensMax": 21781,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -12357,28 +12214,28 @@
]
},
{
- "id": "cx-00895a92",
- "identity": "b300|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "b300_33311fdc",
- "comparisonKey": "fb96ce98136947bb",
+ "id": "cx-a5b9e896",
+ "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901",
+ "colorKey": "gb200_f1783455",
+ "comparisonKey": "3c02cc9216dfe92d",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:46:23.442699+00:00",
+ "generatedAt": "2026-06-29T13:53:19.000387+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_07",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_1",
+ "sku": "gb200",
"backend": "deepep",
- "phase": "decode",
+ "phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "cached-layout-comm-only-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "layout-and-dispatch-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 [cl]",
+ "label": "GB200 EP8 · deepep · bf16",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
@@ -12390,333 +12247,260 @@
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
"paretoEligible": false
},
"placement": {
- "kind": "packed",
- "nodes": 1,
+ "kind": "adversarial",
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "b9896b0a7ca9901",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285573016",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285573016",
- "createdAt": "2026-06-27T09:46:23.442699+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 49.6320016682148,
- "p90": 50.97600072622299,
- "p95": 52.319999784231186,
- "p99": 56.992001831531525
- },
- "combine": {
- "p50": 65.66400080919266,
- "p90": 66.52799993753433,
- "p95": 67.26399809122086,
- "p99": 77.2479996085167
- },
- "roundtrip": {
- "p50": 98.62399846315384,
- "p90": 101.31199657917023,
- "p95": 103.39199751615524,
- "p99": 122.3360002040863
- },
- "isolatedSum": {
- "p50": 115.29600247740746,
- "p90": 117.50400066375732,
- "p95": 119.58399787545204,
- "p99": 134.24000144004822
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 50.144001841545105,
- "p90": 51.872000098228455,
- "p95": 53.37600037455559,
- "p99": 63.00800293684006
+ "p50": 123.99999797344208,
+ "p90": 367.5839900970459,
+ "p95": 395.26399970054626,
+ "p99": 433.56800079345703
},
"combine": {
- "p50": 67.07199662923813,
- "p90": 68.7360018491745,
- "p95": 69.24799829721451,
- "p99": 79.1039988398552
+ "p50": 124.95999783277512,
+ "p90": 361.34400963783264,
+ "p95": 400.4479944705963,
+ "p99": 415.16798734664917
},
"roundtrip": {
- "p50": 99.80800002813339,
- "p90": 102.01600193977356,
- "p95": 103.7760004401207,
- "p99": 110.20799726247787
+ "p50": 221.50400280952454,
+ "p90": 250.65600872039795,
+ "p95": 482.4959933757782,
+ "p99": 526.7519950866699
},
"isolatedSum": {
- "p50": 117.21599847078323,
- "p90": 120.60800194740295,
- "p95": 122.6239986717701,
- "p99": 142.11200177669525
+ "p50": 248.9599958062172,
+ "p90": 728.9279997348785,
+ "p95": 795.7119941711426,
+ "p99": 848.7359881401062
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 77672448,
+ "combineLogicalBytes": 77672448,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 723,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4,
- "globalTokens": 32,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 50.97600072622299,
- "p90": 52.799999713897705,
- "p95": 54.91200089454651,
- "p99": 61.11999973654747
+ "p50": 161.3440066576004,
+ "p90": 455.7119905948639,
+ "p95": 468.80000829696655,
+ "p99": 482.015997171402
},
"combine": {
- "p50": 68.4799998998642,
- "p90": 89.63199704885483,
- "p95": 92.28800237178802,
- "p99": 102.4319976568222
+ "p50": 168.35199296474457,
+ "p90": 428.73600125312805,
+ "p95": 442.68798828125,
+ "p99": 465.05600214004517
},
"roundtrip": {
- "p50": 112.86400258541107,
- "p90": 117.08799749612808,
- "p95": 118.23999881744385,
- "p99": 121.95199728012085
+ "p50": 299.4239926338196,
+ "p90": 541.6319966316223,
+ "p95": 581.6320180892944,
+ "p99": 611.4240288734436
},
"isolatedSum": {
- "p50": 119.45600062608719,
- "p90": 142.43199676275253,
- "p95": 147.20000326633453,
- "p99": 163.55199739336967
+ "p50": 329.69599962234497,
+ "p90": 884.447991847992,
+ "p95": 911.4879965782166,
+ "p99": 947.0719993114471
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 155889664,
+ "combineLogicalBytes": 155889664,
+ "fanoutMean": 5.3095703125,
+ "recvTokensMax": 1422,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 55.296000093221664,
- "p90": 70.30399888753891,
- "p95": 75.1039981842041,
- "p99": 115.58400094509125
+ "p50": 231.7119985818863,
+ "p90": 335.07201075553894,
+ "p95": 539.1680002212524,
+ "p99": 568.6079859733582
},
"combine": {
- "p50": 68.89600306749344,
- "p90": 77.79199630022049,
- "p95": 78.33600044250488,
- "p99": 82.33600109815598
+ "p50": 293.69598627090454,
+ "p90": 538.0799770355225,
+ "p95": 578.2399773597717,
+ "p99": 609.5679998397827
},
"roundtrip": {
- "p50": 113.6000007390976,
- "p90": 117.91999638080597,
- "p95": 118.97599697113037,
- "p99": 125.18399953842163
+ "p50": 472.03201055526733,
+ "p90": 486.04801297187805,
+ "p95": 770.0799703598022,
+ "p99": 807.6159954071045
},
"isolatedSum": {
- "p50": 124.1920031607151,
- "p90": 148.0959951877594,
- "p95": 153.43999862670898,
- "p99": 197.92000204324722
+ "p50": 525.4079848527908,
+ "p90": 873.1519877910614,
+ "p95": 1117.4079775810242,
+ "p99": 1178.1759858131409
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
+ "dispatchLogicalBytes": 312266752,
+ "combineLogicalBytes": 312266752,
+ "fanoutMean": 5.31787109375,
+ "recvTokensMax": 2779,
"stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 60.06399914622307,
- "p90": 67.19999760389328,
- "p95": 68.03199648857117,
- "p99": 71.87200337648392
+ "p50": 361.1519932746887,
+ "p90": 370.88000774383545,
+ "p95": 662.015974521637,
+ "p99": 707.7760100364685
},
"combine": {
- "p50": 68.89600306749344,
- "p90": 77.85599678754807,
- "p95": 78.3040001988411,
- "p99": 81.4720019698143
+ "p50": 494.6880042552948,
+ "p90": 760.9279751777649,
+ "p95": 812.9600286483765,
+ "p99": 842.9120182991028
},
"roundtrip": {
- "p50": 112.09599673748016,
- "p90": 114.9120032787323,
- "p95": 116.54400080442429,
- "p99": 128.25599312782288
+ "p50": 828.3519744873047,
+ "p90": 839.4240140914917,
+ "p95": 1141.7280435562134,
+ "p99": 1192.6079988479614
},
"isolatedSum": {
- "p50": 128.9600022137165,
- "p90": 145.05599439144135,
- "p95": 146.33599668741226,
- "p99": 153.34400534629822
+ "p50": 855.8399975299835,
+ "p90": 1131.8079829216003,
+ "p95": 1474.9760031700134,
+ "p99": 1550.6880283355713
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 623443968,
+ "combineLogicalBytes": 623443968,
+ "fanoutMean": 5.30859375,
+ "recvTokensMax": 5505,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 61.792001128196716,
- "p90": 63.45599889755249,
- "p95": 66.49599969387054,
- "p99": 72.03199714422226
+ "p50": 619.5520162582397,
+ "p90": 636.5119814872742,
+ "p95": 949.4079947471619,
+ "p99": 986.0799908638
},
"combine": {
- "p50": 78.11199873685837,
- "p90": 79.42400127649307,
- "p95": 80.35200089216232,
- "p99": 83.48800241947174
+ "p50": 884.3520283699036,
+ "p90": 899.4879722595215,
+ "p95": 1200.1279592514038,
+ "p99": 1233.2799434661865
},
"roundtrip": {
- "p50": 122.81599640846252,
- "p90": 124.95999783277512,
- "p95": 127.00800597667694,
- "p99": 132.76800513267517
+ "p50": 1478.4640073776245,
+ "p90": 1499.168038368225,
+ "p95": 1800.3840446472168,
+ "p99": 1831.6800594329834
},
"isolatedSum": {
- "p50": 139.90399986505508,
- "p90": 142.88000017404556,
- "p95": 146.84800058603287,
- "p99": 155.519999563694
+ "p50": 1503.9040446281433,
+ "p90": 1535.9999537467957,
+ "p95": 2149.5359539985657,
+ "p99": 2219.3599343299866
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1243805696,
+ "combineLogicalBytes": 1243805696,
+ "fanoutMean": 5.29547119140625,
+ "recvTokensMax": 10952,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 64,
- "globalTokens": 512,
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
"dispatch": {
- "p50": 74.5920017361641,
- "p90": 82.14399963617325,
- "p95": 83.20000022649765,
- "p99": 97.69599884748459
+ "p50": 1144.1600322723389,
+ "p90": 1158.687949180603,
+ "p95": 1444.2239999771118,
+ "p99": 1490.7519817352295
},
"combine": {
- "p50": 91.93599969148636,
- "p90": 100.63999891281128,
- "p95": 101.75999999046326,
- "p99": 108.22399705648422
+ "p50": 1646.016001701355,
+ "p90": 1906.432032585144,
+ "p95": 1952.9279470443726,
+ "p99": 2000.7359981536865
},
"roundtrip": {
- "p50": 148.60799908638,
- "p90": 151.96800231933594,
- "p95": 153.1199961900711,
- "p99": 155.93600273132324
+ "p50": 2764.256000518799,
+ "p90": 2780.3521156311035,
+ "p95": 3060.800075531006,
+ "p99": 3094.752073287964
},
"isolatedSum": {
- "p50": 166.52800142765045,
- "p90": 182.78399854898453,
- "p95": 184.9600002169609,
- "p99": 205.9199959039688
+ "p50": 2790.176033973694,
+ "p90": 3065.119981765747,
+ "p95": 3397.1519470214844,
+ "p99": 3491.487979888916
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 86.5280032157898,
- "p90": 87.99999952316284,
- "p95": 90.11200070381165,
- "p99": 99.84000027179718
- },
- "combine": {
- "p50": 114.97599631547928,
- "p90": 116.28799885511398,
- "p95": 117.18399822711945,
- "p99": 126.49600207805634
- },
- "roundtrip": {
- "p50": 185.2799952030182,
- "p90": 191.00800156593323,
- "p95": 192.76799261569977,
- "p99": 203.23200523853302
- },
- "isolatedSum": {
- "p50": 201.50399953126907,
- "p90": 204.28799837827682,
- "p95": 207.2959989309311,
- "p99": 226.33600234985352
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 2487009280,
+ "combineLogicalBytes": 2487009280,
+ "fanoutMean": 5.294189453125,
+ "recvTokensMax": 21781,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -12724,47 +12508,48 @@
]
},
{
- "id": "cx-34fdfa58",
- "identity": "b300|deepep|4096|8|128|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452",
- "colorKey": "b300_d6fd14c3",
- "comparisonKey": "e31dbd692115f689",
+ "id": "cx-b1e9f61a",
+ "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901",
+ "colorKey": "gb200_7c2da03d",
+ "comparisonKey": "20a70c4abe2b89cf",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T11:14:20.626757+00:00",
+ "generatedAt": "2026-06-29T13:49:33.863915+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_04",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
- "phase": "decode",
+ "phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "runtime-visible-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · fp8",
- "model": "Qwen3.5",
+ "label": "GB200 EP8 · deepep · bf16",
+ "model": "DeepSeek-V3/V4",
"shape": {
- "hidden": 4096,
+ "hidden": 7168,
"topk": 8,
- "experts": 128,
+ "experts": 256,
"routing": "uniform",
"routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "fp8",
+ "dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -12772,318 +12557,244 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "75530960a30b452",
- "workloadId": "set:8:d1b92539bddfb570",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "b9896b0a7ca9901",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28287509502",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287509502",
- "createdAt": "2026-06-27T11:14:20.626757+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 54.655998945236206,
- "p90": 56.60799890756607,
- "p95": 57.40800127387047,
- "p99": 63.80800157785416
- },
- "combine": {
- "p50": 49.92000013589859,
- "p90": 51.16799846291542,
- "p95": 52.12799832224846,
- "p99": 59.10399928689003
- },
- "roundtrip": {
- "p50": 107.07200318574905,
- "p90": 109.6000000834465,
- "p95": 111.84000223875046,
- "p99": 129.56799566745758
- },
- "isolatedSum": {
- "p50": 104.5759990811348,
- "p90": 107.77599737048149,
- "p95": 109.53599959611893,
- "p99": 122.91200086474419
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 172032,
- "combineLogicalBytes": 344064,
- "fanoutMean": 5.25,
- "recvTokensMax": 6,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 55.135998874902725,
- "p90": 57.24800005555153,
- "p95": 58.720000088214874,
- "p99": 64.80000168085098
- },
- "combine": {
- "p50": 50.75199902057648,
- "p90": 52.480001002550125,
- "p95": 52.83199995756149,
- "p99": 63.90400230884552
- },
- "roundtrip": {
- "p50": 108.83200168609619,
- "p90": 112.12799698114395,
- "p95": 115.26399850845337,
- "p99": 229.40799593925476
- },
- "isolatedSum": {
- "p50": 105.8879978954792,
- "p90": 109.72800105810165,
- "p95": 111.55200004577637,
- "p99": 128.7040039896965
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 352256,
- "combineLogicalBytes": 704512,
- "fanoutMean": 5.375,
- "recvTokensMax": 12,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 56.671999394893646,
- "p90": 76.38400048017502,
- "p95": 85.69599688053131,
- "p99": 199.10399615764618
+ "p50": 122.43200093507767,
+ "p90": 130.14400005340576,
+ "p95": 133.7279975414276,
+ "p99": 138.97599279880524
},
"combine": {
- "p50": 53.15199866890907,
- "p90": 57.11999908089638,
- "p95": 58.62399935722351,
- "p99": 63.07200342416763
+ "p50": 125.98399817943573,
+ "p90": 131.52000308036804,
+ "p95": 132.9919993877411,
+ "p99": 140.06400108337402
},
"roundtrip": {
- "p50": 111.42399907112122,
- "p90": 114.33599889278412,
- "p95": 116.48000031709671,
- "p99": 125.5359947681427
+ "p50": 220.70400416851044,
+ "p90": 227.39200294017792,
+ "p95": 230.52799701690674,
+ "p99": 235.167995095253
},
"isolatedSum": {
- "p50": 109.82399806380272,
- "p90": 133.5039995610714,
- "p95": 144.31999623775482,
- "p99": 262.1759995818138
+ "p50": 248.4159991145134,
+ "p90": 261.6640031337738,
+ "p95": 266.7199969291687,
+ "p99": 279.03999388217926
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 692224,
- "combineLogicalBytes": 1384448,
- "fanoutMean": 5.28125,
- "recvTokensMax": 26,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 77672448,
+ "combineLogicalBytes": 77672448,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 723,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 57.760000228881836,
- "p90": 59.93599817156792,
- "p95": 61.184000223875046,
- "p99": 67.26399809122086
+ "p50": 160.73599457740784,
+ "p90": 170.0800061225891,
+ "p95": 174.8799979686737,
+ "p99": 7247.3602294921875
},
"combine": {
- "p50": 54.91200089454651,
- "p90": 56.96000158786774,
- "p95": 57.28000029921532,
- "p99": 63.391998410224915
+ "p50": 167.77600347995758,
+ "p90": 171.48800194263458,
+ "p95": 175.58400332927704,
+ "p99": 183.48799645900726
},
"roundtrip": {
- "p50": 114.78400230407715,
- "p90": 116.99199676513672,
- "p95": 118.43200027942657,
- "p99": 134.94400680065155
+ "p50": 300.86401104927063,
+ "p90": 309.471994638443,
+ "p95": 313.1519854068756,
+ "p99": 326.9439935684204
},
"isolatedSum": {
- "p50": 112.67200112342834,
- "p90": 116.89599975943565,
- "p95": 118.46400052309036,
- "p99": 130.65599650144577
+ "p50": 328.5119980573654,
+ "p90": 341.5680080652237,
+ "p95": 350.46400129795074,
+ "p99": 7430.848225951195
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1372160,
- "combineLogicalBytes": 2744320,
- "fanoutMean": 5.234375,
- "recvTokensMax": 49,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 155889664,
+ "combineLogicalBytes": 155889664,
+ "fanoutMean": 5.3095703125,
+ "recvTokensMax": 1422,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 57.8560009598732,
- "p90": 59.93599817156792,
- "p95": 61.216000467538834,
- "p99": 79.80799674987793
+ "p50": 232.09600150585175,
+ "p90": 240.57599902153015,
+ "p95": 243.80800127983093,
+ "p99": 250.0160038471222
},
"combine": {
- "p50": 54.91200089454651,
- "p90": 56.73599988222122,
- "p95": 57.023998349905014,
- "p99": 60.06399914622307
+ "p50": 292.959988117218,
+ "p90": 300.5119860172272,
+ "p95": 303.26399207115173,
+ "p99": 307.8719973564148
},
"roundtrip": {
- "p50": 115.13599753379822,
- "p90": 117.34399944543839,
- "p95": 118.6240017414093,
- "p99": 126.08000636100769
+ "p50": 472.896009683609,
+ "p90": 479.99998927116394,
+ "p95": 482.7519953250885,
+ "p99": 488.7999892234802
},
"isolatedSum": {
- "p50": 112.76800185441971,
- "p90": 116.67199805378914,
- "p95": 118.23999881744385,
- "p99": 139.871995896101
+ "p50": 525.0559896230698,
+ "p90": 541.0879850387573,
+ "p95": 547.0719933509827,
+ "p99": 557.888001203537
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2732032,
- "combineLogicalBytes": 5464064,
- "fanoutMean": 5.2109375,
- "recvTokensMax": 94,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 312266752,
+ "combineLogicalBytes": 312266752,
+ "fanoutMean": 5.31787109375,
+ "recvTokensMax": 2779,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 61.08799949288368,
- "p90": 63.26399743556976,
- "p95": 64.4799992442131,
- "p99": 74.43200051784515
+ "p50": 360.79999804496765,
+ "p90": 371.2320029735565,
+ "p95": 380.2880048751831,
+ "p99": 451.6800045967102
},
"combine": {
- "p50": 58.27200040221214,
- "p90": 60.28800085186958,
- "p95": 60.92799827456474,
- "p99": 65.0240033864975
+ "p50": 493.120014667511,
+ "p90": 500.5120038986206,
+ "p95": 501.9199848175049,
+ "p99": 508.86398553848267
},
"roundtrip": {
- "p50": 122.6240023970604,
- "p90": 125.56800246238708,
- "p95": 127.26399302482605,
- "p99": 150.36800503730774
+ "p50": 825.6000280380249,
+ "p90": 832.4480056762695,
+ "p95": 834.2080116271973,
+ "p99": 841.1200046539307
},
"isolatedSum": {
- "p50": 119.35999989509583,
- "p90": 123.55199828743935,
- "p95": 125.40799751877785,
- "p99": 139.45600390434265
+ "p50": 853.9200127124786,
+ "p90": 871.7440068721771,
+ "p95": 882.207989692688,
+ "p99": 960.5439901351929
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 5562368,
- "combineLogicalBytes": 11124736,
- "fanoutMean": 5.3046875,
- "recvTokensMax": 186,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 623443968,
+ "combineLogicalBytes": 623443968,
+ "fanoutMean": 5.30859375,
+ "recvTokensMax": 5505,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 64,
- "globalTokens": 512,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 67.80800223350525,
- "p90": 70.62400132417679,
- "p95": 71.3919997215271,
- "p99": 76.99199765920639
+ "p50": 619.8400259017944,
+ "p90": 626.36798620224,
+ "p95": 628.1279921531677,
+ "p99": 635.3920102119446
},
"combine": {
- "p50": 71.29599899053574,
- "p90": 73.40800017118454,
- "p95": 74.23999905586243,
- "p99": 77.15199887752533
+ "p50": 880.7359933853149,
+ "p90": 885.1199746131897,
+ "p95": 886.3999843597412,
+ "p99": 893.0879831314087
},
"roundtrip": {
- "p50": 146.91199362277985,
- "p90": 150.176003575325,
- "p95": 151.90400183200836,
- "p99": 180.51199615001678
+ "p50": 1470.2080488204956,
+ "p90": 1478.559970855713,
+ "p95": 1480.8000326156616,
+ "p99": 1488.5120391845703
},
"isolatedSum": {
- "p50": 139.10400122404099,
- "p90": 144.03200149536133,
- "p95": 145.63199877738953,
- "p99": 154.14399653673172
+ "p50": 1500.5760192871094,
+ "p90": 1511.4879608154297,
+ "p95": 1514.527976512909,
+ "p99": 1528.4799933433533
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 11096064,
- "combineLogicalBytes": 22192128,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 358,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 1243805696,
+ "combineLogicalBytes": 1243805696,
+ "fanoutMean": 5.29547119140625,
+ "recvTokensMax": 10952,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
"dispatch": {
- "p50": 77.11999863386154,
- "p90": 79.52000200748444,
- "p95": 80.70400357246399,
- "p99": 101.1200025677681
+ "p50": 1146.6879844665527,
+ "p90": 1153.7920236587524,
+ "p95": 1155.9360027313232,
+ "p99": 1161.56804561615
},
"combine": {
- "p50": 87.74399757385254,
- "p90": 89.82399851083755,
- "p95": 90.91199934482574,
- "p99": 96.12800180912018
+ "p50": 1635.5199813842773,
+ "p90": 1693.6320066452026,
+ "p95": 1704.7040462493896,
+ "p99": 1926.751971244812
},
"roundtrip": {
- "p50": 178.8800060749054,
- "p90": 181.5679967403412,
- "p95": 182.8799992799759,
- "p99": 190.68799912929535
+ "p50": 2760.3840827941895,
+ "p90": 2840.9600257873535,
+ "p95": 3140.928030014038,
+ "p99": 7563.263893127441
},
"isolatedSum": {
- "p50": 164.86399620771408,
- "p90": 169.344000518322,
- "p95": 171.61600291728973,
- "p99": 197.24800437688828
+ "p50": 2782.20796585083,
+ "p90": 2847.424030303955,
+ "p95": 2860.640048980713,
+ "p99": 3088.320016860962
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 22282240,
- "combineLogicalBytes": 44564480,
- "fanoutMean": 5.3125,
- "recvTokensMax": 699,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 2487009280,
+ "combineLogicalBytes": 2487009280,
+ "fanoutMean": 5.294189453125,
+ "recvTokensMax": 21781,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -13091,47 +12802,48 @@
]
},
{
- "id": "cx-3b501b50",
- "identity": "b300|deepep|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452",
- "colorKey": "b300_c4c63f07",
- "comparisonKey": "b3fe3e767199861f",
+ "id": "cx-a58e57e5",
+ "identity": "gb200|deepep|v1|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||fc08bf2f8d42ed8",
+ "colorKey": "gb200_7c2da03d",
+ "comparisonKey": "6f8e0fa22252756a",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:51:42.086775+00:00",
+ "generatedAt": "2026-06-29T13:58:46.178899+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_09",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_1",
+ "sku": "gb200",
"backend": "deepep",
- "phase": "decode",
+ "phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "runtime-visible-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · fp8",
- "model": "Qwen3.5",
+ "label": "GB200 EP8 · deepep · bf16",
+ "model": "Kimi-K2",
"shape": {
- "hidden": 4096,
+ "hidden": 7168,
"topk": 8,
- "experts": 128,
+ "experts": 384,
"routing": "uniform",
"routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "fp8",
+ "dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -13139,318 +12851,427 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "75530960a30b452",
- "workloadId": "set:8:d1b92539bddfb570",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "fc08bf2f8d42ed8",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285693587",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285693587",
- "createdAt": "2026-06-27T09:51:42.086775+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 112.89600282907486,
- "p90": 116.22399836778641,
- "p95": 118.40000003576279,
- "p99": 145.11999487876892
+ "p50": 124.79999661445618,
+ "p90": 371.3279962539673,
+ "p95": 393.5360014438629,
+ "p99": 431.2959909439087
},
"combine": {
- "p50": 50.71999877691269,
- "p90": 52.57600173354149,
- "p95": 53.119998425245285,
- "p99": 63.07200342416763
+ "p50": 126.46399438381195,
+ "p90": 362.4640107154846,
+ "p95": 400.9599983692169,
+ "p99": 422.4959909915924
},
"roundtrip": {
- "p50": 155.45600652694702,
- "p90": 158.62399339675903,
- "p95": 161.05599701404572,
- "p99": 178.27199399471283
+ "p50": 224.09600019454956,
+ "p90": 444.7680115699768,
+ "p95": 486.4319860935211,
+ "p99": 527.9039740562439
},
"isolatedSum": {
- "p50": 163.61600160598755,
- "p90": 168.8000001013279,
- "p95": 171.51999846100807,
- "p99": 208.19199830293655
+ "p50": 251.26399099826813,
+ "p90": 733.7920069694519,
+ "p95": 794.4959998130798,
+ "p99": 853.7919819355011
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 172032,
- "combineLogicalBytes": 344064,
- "fanoutMean": 5.25,
- "recvTokensMax": 6,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 77514752,
+ "combineLogicalBytes": 77514752,
+ "fanoutMean": 5.2802734375,
+ "recvTokensMax": 707,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2,
- "globalTokens": 16,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 113.82400244474411,
- "p90": 117.18399822711945,
- "p95": 119.80800330638885,
- "p99": 142.62400567531586
+ "p50": 166.33599996566772,
+ "p90": 457.92001485824585,
+ "p95": 493.0880069732666,
+ "p99": 2744.0319061279297
},
"combine": {
- "p50": 52.000001072883606,
- "p90": 53.0879981815815,
- "p95": 54.11199852824211,
- "p99": 62.3680017888546
+ "p50": 170.04799842834473,
+ "p90": 437.5999867916107,
+ "p95": 457.3119878768921,
+ "p99": 676.4479875564575
},
"roundtrip": {
- "p50": 156.8640023469925,
- "p90": 161.85599565505981,
- "p95": 168.44800114631653,
- "p99": 217.50399470329285
+ "p50": 307.0720136165619,
+ "p90": 557.7600002288818,
+ "p95": 587.9039764404297,
+ "p99": 620.6719875335693
},
"isolatedSum": {
- "p50": 165.82400351762772,
- "p90": 170.27199640870094,
- "p95": 173.92000183463097,
- "p99": 204.99200746417046
+ "p50": 336.38399839401245,
+ "p90": 895.5200016498566,
+ "p95": 950.3999948501587,
+ "p99": 3420.479893684387
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 352256,
- "combineLogicalBytes": 704512,
- "fanoutMean": 5.375,
- "recvTokensMax": 12,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 154570752,
+ "combineLogicalBytes": 154570752,
+ "fanoutMean": 5.2646484375,
+ "recvTokensMax": 1391,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4,
- "globalTokens": 32,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 115.13599753379822,
- "p90": 118.75200271606445,
- "p95": 120.86399644613266,
- "p99": 133.2480013370514
+ "p50": 235.20000278949738,
+ "p90": 256.00001215934753,
+ "p95": 539.9360060691833,
+ "p99": 572.7360248565674
},
"combine": {
- "p50": 54.75199967622757,
- "p90": 56.703999638557434,
- "p95": 56.992001831531525,
- "p99": 60.127999633550644
+ "p50": 287.58400678634644,
+ "p90": 295.9679961204529,
+ "p95": 540.0639772415161,
+ "p99": 600.1600027084351
},
"roundtrip": {
- "p50": 161.21600568294525,
- "p90": 165.0879979133606,
- "p95": 167.00799763202667,
- "p99": 186.91200017929077
+ "p50": 475.77598690986633,
+ "p90": 494.04799938201904,
+ "p95": 776.6079902648926,
+ "p99": 803.1359910964966
},
"isolatedSum": {
- "p50": 169.8879972100258,
- "p90": 175.4560023546219,
- "p95": 177.85599827766418,
- "p99": 193.37600097060204
+ "p50": 522.7840095758438,
+ "p90": 551.9680082798004,
+ "p95": 1079.9999833106995,
+ "p99": 1172.8960275650024
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 692224,
- "combineLogicalBytes": 1384448,
- "fanoutMean": 5.28125,
- "recvTokensMax": 26,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 309772288,
+ "combineLogicalBytes": 309772288,
+ "fanoutMean": 5.275390625,
+ "recvTokensMax": 2754,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 116.31999909877777,
- "p90": 118.9119964838028,
- "p95": 120.70400267839432,
- "p99": 126.49600207805634
+ "p50": 365.4719889163971,
+ "p90": 382.88000226020813,
+ "p95": 667.0719981193542,
+ "p99": 700.4799842834473
},
"combine": {
- "p50": 56.2559999525547,
- "p90": 57.88800120353699,
- "p95": 58.559998869895935,
- "p99": 72.7040022611618
+ "p50": 497.44001030921936,
+ "p90": 772.5120186805725,
+ "p95": 816.3520097732544,
+ "p99": 837.2160196304321
},
"roundtrip": {
- "p50": 163.00800442695618,
- "p90": 166.20799899101257,
- "p95": 167.93599724769592,
- "p99": 180.86400628089905
+ "p50": 834.272027015686,
+ "p90": 849.5360016822815,
+ "p95": 1139.5200490951538,
+ "p99": 1173.9519834518433
},
"isolatedSum": {
- "p50": 172.57599905133247,
- "p90": 176.79999768733978,
- "p95": 179.26400154829025,
- "p99": 199.20000433921814
+ "p50": 862.9119992256165,
+ "p90": 1155.3920209407806,
+ "p95": 1483.4240078926086,
+ "p99": 1537.6960039138794
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1372160,
- "combineLogicalBytes": 2744320,
- "fanoutMean": 5.234375,
- "recvTokensMax": 49,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 619501568,
+ "combineLogicalBytes": 619501568,
+ "fanoutMean": 5.2750244140625,
+ "recvTokensMax": 5469,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 116.70400202274323,
- "p90": 119.58400160074234,
- "p95": 121.76000326871872,
- "p99": 145.88800072669983
+ "p50": 616.927981376648,
+ "p90": 901.6000032424927,
+ "p95": 947.8719830513,
+ "p99": 985.0239753723145
},
"combine": {
- "p50": 56.832000613212585,
- "p90": 58.62399935722351,
- "p95": 59.007998555898666,
- "p99": 66.880002617836
+ "p50": 875.328004360199,
+ "p90": 893.0559754371643,
+ "p95": 1183.840036392212,
+ "p99": 1216.70401096344
},
"roundtrip": {
- "p50": 165.72800278663635,
- "p90": 169.21600699424744,
- "p95": 170.8800047636032,
- "p99": 185.72799861431122
+ "p50": 1467.2960042953491,
+ "p90": 1478.943943977356,
+ "p95": 1765.4080390930176,
+ "p99": 1812.7360343933105
},
"isolatedSum": {
- "p50": 173.5360026359558,
- "p90": 178.20800095796585,
- "p95": 180.7680018246174,
- "p99": 212.76800334453583
+ "p50": 1492.255985736847,
+ "p90": 1794.655978679657,
+ "p95": 2131.712019443512,
+ "p99": 2201.7279863357544
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2732032,
- "combineLogicalBytes": 5464064,
- "fanoutMean": 5.2109375,
- "recvTokensMax": 94,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1239375872,
+ "combineLogicalBytes": 1239375872,
+ "fanoutMean": 5.276611328125,
+ "recvTokensMax": 10883,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
"dispatch": {
- "p50": 119.52000111341476,
- "p90": 122.72000312805176,
- "p95": 124.60800260305405,
- "p99": 160.5439931154251
+ "p50": 1133.952021598816,
+ "p90": 1419.3919897079468,
+ "p95": 1456.928014755249,
+ "p99": 1488.4480237960815
},
"combine": {
- "p50": 59.67999994754791,
- "p90": 61.63199990987778,
- "p95": 62.24000081419945,
- "p99": 65.79200178384781
+ "p50": 1633.952021598816,
+ "p90": 1904.4159650802612,
+ "p95": 1942.1759843826294,
+ "p99": 1971.9359874725342
},
"roundtrip": {
- "p50": 170.68800330162048,
- "p90": 173.98400604724884,
- "p95": 175.64800381660461,
- "p99": 187.6160055398941
+ "p50": 2741.7280673980713,
+ "p90": 2753.1518936157227,
+ "p95": 3059.648036956787,
+ "p99": 3083.904027938843
},
"isolatedSum": {
- "p50": 179.20000106096268,
- "p90": 184.35200303792953,
- "p95": 186.8480034172535,
- "p99": 226.33599489927292
+ "p50": 2767.904043197632,
+ "p90": 3323.807954788208,
+ "p95": 3399.1039991378784,
+ "p99": 3460.3840112686157
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 5562368,
- "combineLogicalBytes": 11124736,
- "fanoutMean": 5.3046875,
- "recvTokensMax": 186,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 2479669248,
+ "combineLogicalBytes": 2479669248,
+ "fanoutMean": 5.278564453125,
+ "recvTokensMax": 21730,
+ "stragglerRank": 0,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ }
+ ]
+ },
+ {
+ "id": "cx-8aff0e36",
+ "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||8bf55e36863f028",
+ "colorKey": "gb200_62dbe147",
+ "comparisonKey": "24459de50e73a419",
+ "schemaVersion": 3,
+ "generatedAt": "2026-06-29T13:56:17.788357+00:00",
+ "status": "valid",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
+ "backend": "deepep",
+ "phase": "prefill",
+ "mode": "normal",
+ "resourceMode": "tuned",
+ "suite": "backend-default",
+ "comparisonClass": "standardized",
+ "measurementContract": "layout-and-dispatch-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
+ "worldSize": 8,
+ "epSize": 8,
+ "label": "GB200 EP8 · deepep · bf16 · balanced",
+ "model": "DeepSeek-V3/V4",
+ "shape": {
+ "hidden": 7168,
+ "topk": 8,
+ "experts": 256,
+ "routing": "balanced",
+ "routingLabel": "balanced",
+ "routingStep": 0,
+ "unevenTokens": "none",
+ "eplbEnabled": false,
+ "dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
+ "activationProfile": "normal",
+ "combineQuantMode": "none"
+ },
+ "resourceProfile": {
+ "requestedFraction": null,
+ "achievedFraction": 0.1316,
+ "configuredUnits": 20,
+ "deviceUnits": 152,
+ "resourceClass": "backend-tuned",
+ "conformanceClass": "backend-default",
+ "fixedKernel": false,
+ "paretoEligible": false
+ },
+ "placement": {
+ "kind": "packed",
+ "nodes": 2,
+ "gpusPerNode": 8,
+ "scaleUpDomain": 8
+ },
+ "routingConsistent": true,
+ "traceSignature": "8bf55e36863f028",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": null,
+ "eplbImbalanceAfter": null,
+ "backendVersion": "1.1.0+814e508",
+ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
+ "repository": "SemiAnalysisAI/InferenceX",
+ "run": {
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
+ },
+ "rows": [
+ {
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
+ "dispatch": {
+ "p50": 140.47999680042267,
+ "p90": 152.22400426864624,
+ "p95": 158.1760048866272,
+ "p99": 166.4000004529953
+ },
+ "combine": {
+ "p50": 145.88800072669983,
+ "p90": 154.62400019168854,
+ "p95": 156.3200056552887,
+ "p99": 163.29599916934967
+ },
+ "roundtrip": {
+ "p50": 259.8400115966797,
+ "p90": 268.0320143699646,
+ "p95": 271.2959945201874,
+ "p99": 276.2239873409271
+ },
+ "isolatedSum": {
+ "p50": 286.3679975271225,
+ "p90": 306.8480044603348,
+ "p95": 314.4960105419159,
+ "p99": 329.69599962234497
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 117440512,
+ "combineLogicalBytes": 117440512,
+ "fanoutMean": 8,
+ "recvTokensMax": 1024,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 64,
- "globalTokens": 512,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 130.68799674510956,
- "p90": 134.97599959373474,
- "p95": 138.5280042886734,
- "p99": 150.84800124168396
+ "p50": 275.7120132446289,
+ "p90": 284.5120131969452,
+ "p95": 288.2879972457886,
+ "p99": 296.28801345825195
},
"combine": {
- "p50": 72.51200079917908,
- "p90": 74.5600014925003,
- "p95": 75.23199915885925,
- "p99": 79.19999957084656
+ "p50": 369.79201436042786,
+ "p90": 377.27999687194824,
+ "p95": 379.040002822876,
+ "p99": 384.2880129814148
},
"roundtrip": {
- "p50": 195.10400295257568,
- "p90": 199.42399859428406,
- "p95": 202.04800367355347,
- "p99": 217.95199811458588
+ "p50": 610.2079749107361,
+ "p90": 618.1439757347107,
+ "p95": 620.5120086669922,
+ "p99": 625.4400014877319
},
"isolatedSum": {
- "p50": 203.19999754428864,
- "p90": 209.53600108623505,
- "p95": 213.76000344753265,
- "p99": 230.04800081253052
+ "p50": 645.5040276050568,
+ "p90": 661.7920100688934,
+ "p95": 667.3280000686646,
+ "p99": 680.5760264396667
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 11096064,
- "combineLogicalBytes": 22192128,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 358,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 469762048,
+ "combineLogicalBytes": 469762048,
+ "fanoutMean": 8,
+ "recvTokensMax": 4096,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 145.53600549697876,
- "p90": 148.70400726795197,
- "p95": 150.07999539375305,
- "p99": 157.47199952602386
+ "p50": 816.6080117225647,
+ "p90": 829.3759822845459,
+ "p95": 832.1920037269592,
+ "p99": 840.7359719276428
},
"combine": {
- "p50": 89.6959975361824,
- "p90": 92.12800115346909,
- "p95": 93.56799721717834,
- "p99": 105.12000322341919
+ "p50": 1204.800009727478,
+ "p90": 1208.9279890060425,
+ "p95": 1212.8640413284302,
+ "p99": 1217.1200513839722
},
"roundtrip": {
- "p50": 228.28799486160278,
- "p90": 232.35200345516205,
- "p95": 234.23999547958374,
- "p99": 251.3279914855957
+ "p50": 1997.1200227737427,
+ "p90": 2019.0720558166504,
+ "p95": 2026.2401103973389,
+ "p99": 2036.2560749053955
},
"isolatedSum": {
- "p50": 235.23200303316116,
- "p90": 240.83200842142105,
- "p95": 243.6479926109314,
- "p99": 262.59200274944305
+ "p50": 2021.4080214500427,
+ "p90": 2038.3039712905884,
+ "p95": 2045.0560450553894,
+ "p99": 2057.856023311615
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 22282240,
- "combineLogicalBytes": 44564480,
- "fanoutMean": 5.3125,
- "recvTokensMax": 699,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1879048192,
+ "combineLogicalBytes": 1879048192,
+ "fanoutMean": 8,
+ "recvTokensMax": 16384,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -13458,47 +13279,48 @@
]
},
{
- "id": "cx-59d44b57",
- "identity": "b300|deepep|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef",
- "colorKey": "b300_c4c63f07",
- "comparisonKey": "16e666f429329305",
+ "id": "cx-9defed89",
+ "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7f1ea4cf569d12c",
+ "colorKey": "gb200_3028258e",
+ "comparisonKey": "3fa6024dd84d5535",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:52:10.269764+00:00",
+ "generatedAt": "2026-06-29T13:56:59.647021+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_10",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
- "phase": "decode",
+ "phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "layout-and-dispatch-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · fp8",
- "model": "shape 5120/8/160",
+ "label": "GB200 EP8 · deepep · bf16 · balanced-rank-local",
+ "model": "DeepSeek-V3/V4",
"shape": {
- "hidden": 5120,
+ "hidden": 7168,
"topk": 8,
- "experts": 160,
- "routing": "uniform",
- "routingLabel": "uniform",
+ "experts": 256,
+ "routing": "balanced-rank-local",
+ "routingLabel": "balanced-rank-local",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "fp8",
+ "dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -13506,366 +13328,476 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "be1b44a963bd4ef",
- "workloadId": "set:8:34e5874082f8ea8f",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "7f1ea4cf569d12c",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285705053",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285705053",
- "createdAt": "2026-06-27T09:52:10.269764+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 111.7120012640953,
- "p90": 115.52000045776367,
- "p95": 118.17599833011627,
- "p99": 129.08799946308136
+ "p50": 98.68799895048141,
+ "p90": 108.2879975438118,
+ "p95": 110.97600311040878,
+ "p99": 121.8239963054657
},
"combine": {
- "p50": 54.62399870157242,
- "p90": 55.93600124120712,
- "p95": 56.89600110054016,
- "p99": 59.74400043487549
+ "p50": 82.2720006108284,
+ "p90": 96.73599898815155,
+ "p95": 103.20000350475311,
+ "p99": 108.19199681282043
},
"roundtrip": {
- "p50": 156.0640037059784,
- "p90": 160.35200655460358,
- "p95": 163.13600540161133,
- "p99": 179.967999458313
+ "p50": 158.4639996290207,
+ "p90": 186.3359957933426,
+ "p95": 198.97599518299103,
+ "p99": 221.98399901390076
},
"isolatedSum": {
- "p50": 166.33599996566772,
- "p90": 171.4560016989708,
- "p95": 175.07199943065643,
- "p99": 188.83199989795685
+ "p50": 180.95999956130981,
+ "p90": 205.02399653196335,
+ "p95": 214.1760066151619,
+ "p99": 230.01599311828613
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 215040,
- "combineLogicalBytes": 430080,
- "fanoutMean": 5.25,
- "recvTokensMax": 8,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 14680064,
+ "combineLogicalBytes": 14680064,
+ "fanoutMean": 1,
+ "recvTokensMax": 128,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2,
- "globalTokens": 16,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 112.15999722480774,
- "p90": 115.7120019197464,
- "p95": 117.60000139474869,
- "p99": 126.24000012874603
+ "p50": 123.83999675512314,
+ "p90": 136.51199638843536,
+ "p95": 139.45600390434265,
+ "p99": 181.21600151062012
},
"combine": {
- "p50": 55.64799904823303,
- "p90": 57.24800005555153,
- "p95": 58.079998940229416,
- "p99": 65.11999666690826
+ "p50": 127.36000120639801,
+ "p90": 131.84000551700592,
+ "p95": 134.36800241470337,
+ "p99": 141.31200313568115
},
"roundtrip": {
- "p50": 158.720001578331,
- "p90": 163.07200491428375,
- "p95": 167.26399958133698,
- "p99": 184.28799510002136
+ "p50": 223.51999580860138,
+ "p90": 243.93600225448608,
+ "p95": 252.3840069770813,
+ "p99": 264.51200246810913
},
"isolatedSum": {
- "p50": 167.80799627304077,
- "p90": 172.96000197529793,
- "p95": 175.6800003349781,
- "p99": 191.3599967956543
+ "p50": 251.19999796152115,
+ "p90": 268.3520019054413,
+ "p95": 273.824006319046,
+ "p99": 322.52800464630127
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 440320,
- "combineLogicalBytes": 880640,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 58720256,
+ "combineLogicalBytes": 58720256,
+ "fanoutMean": 1,
+ "recvTokensMax": 512,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4,
- "globalTokens": 32,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 114.43199962377548,
- "p90": 120.35199999809265,
- "p95": 123.74400347471237,
- "p99": 146.68799936771393
+ "p50": 212.64000236988068,
+ "p90": 225.69599747657776,
+ "p95": 234.14400219917297,
+ "p99": 249.85599517822266
},
"combine": {
- "p50": 60.127999633550644,
- "p90": 61.85600161552429,
- "p95": 63.07200342416763,
- "p99": 65.43999910354614
+ "p50": 256.0639977455139,
+ "p90": 266.1440074443817,
+ "p95": 268.0320143699646,
+ "p99": 276.3200104236603
},
"roundtrip": {
- "p50": 164.99200463294983,
- "p90": 170.1119989156723,
- "p95": 173.5360026359558,
- "p99": 206.7520022392273
+ "p50": 451.07200741767883,
+ "p90": 459.6799910068512,
+ "p95": 463.8400077819824,
+ "p99": 725.6320118904114
},
"isolatedSum": {
- "p50": 174.55999925732613,
- "p90": 182.20800161361694,
- "p95": 186.81600689888,
- "p99": 212.12799847126007
+ "p50": 468.7040001153946,
+ "p90": 491.8400049209595,
+ "p95": 502.1760165691376,
+ "p99": 526.1760056018829
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 870400,
- "combineLogicalBytes": 1740800,
- "fanoutMean": 5.3125,
- "recvTokensMax": 25,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 234881024,
+ "combineLogicalBytes": 234881024,
+ "fanoutMean": 1,
+ "recvTokensMax": 2048,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 114.59200084209442,
- "p90": 117.72800236940384,
- "p95": 119.4240003824234,
- "p99": 133.5040032863617
- },
- "combine": {
- "p50": 59.58399921655655,
- "p90": 61.40799820423126,
- "p95": 62.111999839544296,
- "p99": 65.18399715423584
- },
- "roundtrip": {
- "p50": 166.17600619792938,
- "p90": 170.33599317073822,
- "p95": 173.21600019931793,
- "p99": 191.48799777030945
+ }
+ ]
+ },
+ {
+ "id": "cx-9efef357",
+ "identity": "gb200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||7ac30b0a39b1405",
+ "colorKey": "gb200_71fc8a17",
+ "comparisonKey": "684eae7793ca35a1",
+ "schemaVersion": 3,
+ "generatedAt": "2026-06-29T14:00:56.504573+00:00",
+ "status": "valid",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
+ "backend": "deepep",
+ "phase": "prefill",
+ "mode": "normal",
+ "resourceMode": "tuned",
+ "suite": "backend-default",
+ "comparisonClass": "standardized",
+ "measurementContract": "layout-and-dispatch-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
+ "worldSize": 8,
+ "epSize": 8,
+ "label": "GB200 EP8 · deepep · bf16 · balanced+eplb",
+ "model": "DeepSeek-V3 (EPLB physical)",
+ "shape": {
+ "hidden": 7168,
+ "topk": 8,
+ "experts": 288,
+ "routing": "balanced",
+ "routingLabel": "balanced+eplb",
+ "routingStep": 0,
+ "unevenTokens": "none",
+ "eplbEnabled": true,
+ "dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
+ "activationProfile": "normal",
+ "combineQuantMode": "none"
+ },
+ "resourceProfile": {
+ "requestedFraction": null,
+ "achievedFraction": 0.1316,
+ "configuredUnits": 20,
+ "deviceUnits": 152,
+ "resourceClass": "backend-tuned",
+ "conformanceClass": "backend-default",
+ "fixedKernel": false,
+ "paretoEligible": false
+ },
+ "placement": {
+ "kind": "packed",
+ "nodes": 2,
+ "gpusPerNode": 8,
+ "scaleUpDomain": 8
+ },
+ "routingConsistent": true,
+ "traceSignature": "7ac30b0a39b1405",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": 1,
+ "eplbImbalanceAfter": 1,
+ "backendVersion": "1.1.0+814e508",
+ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
+ "repository": "SemiAnalysisAI/InferenceX",
+ "run": {
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
+ },
+ "rows": [
+ {
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
+ "dispatch": {
+ "p50": 104.44799810647964,
+ "p90": 113.18399757146835,
+ "p95": 116.41599982976913,
+ "p99": 129.95199859142303
+ },
+ "combine": {
+ "p50": 96.19200229644775,
+ "p90": 105.82400113344193,
+ "p95": 107.90400207042694,
+ "p99": 130.62399625778198
+ },
+ "roundtrip": {
+ "p50": 175.52000284194946,
+ "p90": 183.07200074195862,
+ "p95": 185.2799952030182,
+ "p99": 190.5599981546402
},
"isolatedSum": {
- "p50": 174.17600005865097,
- "p90": 179.1360005736351,
- "p95": 181.5360002219677,
- "p99": 198.68800044059753
+ "p50": 200.6400004029274,
+ "p90": 219.00799870491028,
+ "p95": 224.32000190019608,
+ "p99": 260.575994849205
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1735680,
- "combineLogicalBytes": 3471360,
- "fanoutMean": 5.296875,
- "recvTokensMax": 50,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 29360128,
+ "combineLogicalBytes": 29360128,
+ "fanoutMean": 2,
+ "recvTokensMax": 384,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 115.55200070142746,
- "p90": 119.84000355005264,
- "p95": 121.79200351238251,
- "p99": 141.50400459766388
+ "p50": 120.80000340938568,
+ "p90": 128.57599556446075,
+ "p95": 131.67999684810638,
+ "p99": 137.1839940547943
},
"combine": {
- "p50": 61.28000095486641,
- "p90": 63.231997191905975,
- "p95": 63.840001821517944,
- "p99": 69.88800317049026
+ "p50": 118.65600198507309,
+ "p90": 122.78400361537933,
+ "p95": 128.1599998474121,
+ "p99": 133.82400572299957
},
"roundtrip": {
- "p50": 167.1999990940094,
- "p90": 171.55200242996216,
- "p95": 174.43199455738068,
- "p99": 185.12000143527985
+ "p50": 214.1759991645813,
+ "p90": 221.40799462795258,
+ "p95": 223.7440049648285,
+ "p99": 229.63200509548187
},
"isolatedSum": {
- "p50": 176.83200165629387,
- "p90": 183.07200074195862,
- "p95": 185.63200533390045,
- "p99": 211.39200776815414
+ "p50": 239.45600539445877,
+ "p90": 251.3599991798401,
+ "p95": 259.8399966955185,
+ "p99": 271.0079997777939
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 3456000,
- "combineLogicalBytes": 6912000,
- "fanoutMean": 5.2734375,
- "recvTokensMax": 93,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 58720256,
+ "combineLogicalBytes": 58720256,
+ "fanoutMean": 2,
+ "recvTokensMax": 768,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 120.51200121641159,
- "p90": 123.6800029873848,
- "p95": 125.59999525547028,
- "p99": 131.32800161838531
+ "p50": 158.1760048866272,
+ "p90": 166.24000668525696,
+ "p95": 169.79199647903442,
+ "p99": 174.0799993276596
},
"combine": {
- "p50": 64.96000289916992,
- "p90": 66.94400310516357,
- "p95": 67.29599833488464,
- "p99": 75.39200037717819
+ "p50": 155.68000078201294,
+ "p90": 159.16800498962402,
+ "p95": 162.88000345230103,
+ "p99": 169.24799978733063
},
"roundtrip": {
- "p50": 175.20000040531158,
- "p90": 179.32799458503723,
- "p95": 183.77600610256195,
- "p99": 198.62399995326996
+ "p50": 286.17599606513977,
+ "p90": 293.66400837898254,
+ "p95": 296.2239980697632,
+ "p99": 299.51998591423035
},
"isolatedSum": {
- "p50": 185.4720041155815,
- "p90": 190.62400609254837,
- "p95": 192.89599359035492,
- "p99": 206.7200019955635
+ "p50": 313.85600566864014,
+ "p90": 325.408011674881,
+ "p95": 332.67199993133545,
+ "p99": 343.32799911499023
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 6988800,
- "combineLogicalBytes": 13977600,
- "fanoutMean": 5.33203125,
- "recvTokensMax": 179,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 117440512,
+ "combineLogicalBytes": 117440512,
+ "fanoutMean": 2,
+ "recvTokensMax": 1536,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 64,
- "globalTokens": 512,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 131.71200454235077,
- "p90": 136.60800457000732,
- "p95": 139.13600146770477,
- "p99": 152.96000242233276
+ "p50": 226.78400576114655,
+ "p90": 234.40000414848328,
+ "p95": 236.89599335193634,
+ "p99": 243.00800263881683
},
"combine": {
- "p50": 77.85599678754807,
- "p90": 79.99999821186066,
- "p95": 80.64000308513641,
- "p99": 85.02399921417236
+ "p50": 284.0000092983246,
+ "p90": 291.0720109939575,
+ "p95": 292.1600043773651,
+ "p99": 296.671986579895
},
"roundtrip": {
- "p50": 201.664000749588,
- "p90": 206.4639925956726,
- "p95": 208.19200575351715,
- "p99": 221.98399901390076
+ "p50": 475.8400022983551,
+ "p90": 484.73599553108215,
+ "p95": 487.7760112285614,
+ "p99": 491.61601066589355
},
"isolatedSum": {
- "p50": 209.56800132989883,
- "p90": 216.60800278186798,
- "p95": 219.7760045528412,
- "p99": 237.98400163650513
+ "p50": 510.78401505947113,
+ "p90": 525.4720151424408,
+ "p95": 529.0559977293015,
+ "p99": 539.6799892187119
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 13987840,
- "combineLogicalBytes": 27975680,
- "fanoutMean": 5.3359375,
- "recvTokensMax": 355,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 234881024,
+ "combineLogicalBytes": 234881024,
+ "fanoutMean": 2,
+ "recvTokensMax": 3072,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 151.7760008573532,
- "p90": 157.05600380897522,
- "p95": 159.7760021686554,
- "p99": 171.58399522304535
+ "p50": 368.4160113334656,
+ "p90": 375.36001205444336,
+ "p95": 377.7279853820801,
+ "p99": 381.47199153900146
},
"combine": {
- "p50": 98.39999675750732,
- "p90": 104.38399761915207,
- "p95": 108.51199924945831,
- "p99": 120.38400024175644
+ "p50": 500.15997886657715,
+ "p90": 503.4880042076111,
+ "p95": 504.863977432251,
+ "p99": 510.0160241127014
},
"roundtrip": {
- "p50": 242.0479953289032,
- "p90": 246.59200012683868,
- "p95": 248.51199984550476,
- "p99": 264.384001493454
+ "p50": 839.9680256843567,
+ "p90": 846.3360071182251,
+ "p95": 848.1919765472412,
+ "p99": 853.3440232276917
},
"isolatedSum": {
- "p50": 250.17599761486053,
- "p90": 261.4400014281273,
- "p95": 268.2880014181137,
- "p99": 291.9679954648018
+ "p50": 868.5759902000427,
+ "p90": 878.8480162620544,
+ "p95": 882.591962814331,
+ "p99": 891.4880156517029
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 27837440,
- "combineLogicalBytes": 55674880,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 699,
+ "dispatchLogicalBytes": 469762048,
+ "combineLogicalBytes": 469762048,
+ "fanoutMean": 2,
+ "recvTokensMax": 6144,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
+ },
+ {
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
+ "dispatch": {
+ "p50": 668.287992477417,
+ "p90": 675.9999990463257,
+ "p95": 678.7199974060059,
+ "p99": 685.4400038719177
+ },
+ "combine": {
+ "p50": 898.144006729126,
+ "p90": 905.6959748268127,
+ "p95": 906.7519903182983,
+ "p99": 908.6080193519592
+ },
+ "roundtrip": {
+ "p50": 1543.455958366394,
+ "p90": 1638.1440162658691,
+ "p95": 1646.1759805679321,
+ "p99": 1760.0959539413452
+ },
+ "isolatedSum": {
+ "p50": 1566.431999206543,
+ "p90": 1581.6959738731384,
+ "p95": 1585.4719877243042,
+ "p99": 1594.048023223877
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 939524096,
+ "combineLogicalBytes": 939524096,
+ "fanoutMean": 2,
+ "recvTokensMax": 12288,
+ "stragglerRank": 5,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
}
]
},
{
- "id": "cx-a0445944",
- "identity": "b300|deepep|6144|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "b300_d6fd14c3",
- "comparisonKey": "e7da15664ffcf0f8",
+ "id": "cx-06be5389",
+ "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||eb906a106a6cb71",
+ "colorKey": "gb200_d945a181",
+ "comparisonKey": "029c20e625903daf",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T11:13:45.257215+00:00",
+ "generatedAt": "2026-06-29T13:58:49.951164+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_05",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
- "phase": "decode",
+ "phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · fp8",
- "model": "MiniMax-M3",
+ "label": "GB200 EP8 · deepep · bf16 · hotspot-single",
+ "model": "DeepSeek-V3/V4",
"shape": {
- "hidden": 6144,
+ "hidden": 7168,
"topk": 8,
"experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
+ "routing": "hotspot-single",
+ "routingLabel": "hotspot-single",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "fp8",
+ "dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -13873,318 +13805,427 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:2e0df6a62cd0143e",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "eb906a106a6cb71",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28287498289",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287498289",
- "createdAt": "2026-06-27T11:13:45.257215+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 55.03999814391136,
- "p90": 57.312000542879105,
- "p95": 58.78400057554245,
- "p99": 68.12799721956253
+ "p50": 127.93600559234619,
+ "p90": 135.45599579811096,
+ "p95": 138.40000331401825,
+ "p99": 145.1520025730133
},
"combine": {
- "p50": 56.48000165820122,
- "p90": 57.920001447200775,
- "p95": 58.720000088214874,
- "p99": 66.52799993753433
+ "p50": 140.03199338912964,
+ "p90": 144.70399916172028,
+ "p95": 146.01600170135498,
+ "p99": 152.25599706172943
},
"roundtrip": {
- "p50": 114.656001329422,
- "p90": 116.99199676513672,
- "p95": 118.9119964838028,
- "p99": 136.19199395179749
+ "p50": 244.47999894618988,
+ "p90": 251.71199440956116,
+ "p95": 254.14401292800903,
+ "p99": 259.93600487709045
},
"isolatedSum": {
- "p50": 111.51999980211258,
- "p90": 115.23200199007988,
- "p95": 117.50400066375732,
- "p99": 134.65599715709686
+ "p50": 267.96799898147583,
+ "p90": 280.15999495983124,
+ "p95": 284.41600501537323,
+ "p99": 297.40799963474274
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 270336,
- "combineLogicalBytes": 540672,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 78102528,
+ "combineLogicalBytes": 78102528,
+ "fanoutMean": 5.3203125,
+ "recvTokensMax": 1024,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2,
- "globalTokens": 16,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 55.96800148487091,
- "p90": 58.079998940229416,
- "p95": 59.23200026154518,
- "p99": 67.07199662923813
+ "p50": 251.583993434906,
+ "p90": 259.20000672340393,
+ "p95": 262.4959945678711,
+ "p99": 267.8399980068207
},
"combine": {
- "p50": 58.720000088214874,
- "p90": 60.06399914622307,
- "p95": 60.70400029420853,
- "p99": 68.35199892520905
+ "p50": 363.96801471710205,
+ "p90": 368.4479892253876,
+ "p95": 373.21600317955017,
+ "p99": 377.56800651550293
},
"roundtrip": {
- "p50": 117.88800358772278,
- "p90": 120.19199877977371,
- "p95": 122.3360002040863,
- "p99": 133.760005235672
+ "p50": 582.0159912109375,
+ "p90": 590.2079939842224,
+ "p95": 592.3839807510376,
+ "p99": 601.1520028114319
},
"isolatedSum": {
- "p50": 114.68800157308578,
- "p90": 118.14399808645248,
- "p95": 119.93600055575371,
- "p99": 135.42399555444717
+ "p50": 615.552008152008,
+ "p90": 627.6479959487915,
+ "p95": 635.7119977474213,
+ "p99": 645.4080045223236
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 528384,
- "combineLogicalBytes": 1056768,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 311091200,
+ "combineLogicalBytes": 311091200,
+ "fanoutMean": 5.2978515625,
+ "recvTokensMax": 4096,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4,
- "globalTokens": 32,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 57.98399820923805,
- "p90": 60.15999987721443,
- "p95": 61.40799820423126,
- "p99": 68.44799965620041
+ "p50": 729.1839718818665,
+ "p90": 736.8959784507751,
+ "p95": 740.0000095367432,
+ "p99": 747.2000122070312
},
"combine": {
- "p50": 60.896001756191254,
- "p90": 62.94400244951248,
- "p95": 63.4239986538887,
- "p99": 69.023996591568
+ "p50": 1165.9200191497803,
+ "p90": 1170.2079772949219,
+ "p95": 1173.9200353622437,
+ "p99": 1177.6000261306763
},
"roundtrip": {
- "p50": 121.47200107574463,
- "p90": 123.87199699878693,
- "p95": 125.05599856376648,
- "p99": 135.48800349235535
+ "p50": 1869.088053703308,
+ "p90": 1877.72798538208,
+ "p95": 1880.1599740982056,
+ "p99": 1886.3999843597412
},
"isolatedSum": {
- "p50": 118.8799999654293,
- "p90": 123.10400232672691,
- "p95": 124.83199685811996,
- "p99": 137.4719962477684
+ "p50": 1895.1039910316467,
+ "p90": 1907.103955745697,
+ "p95": 1913.9200448989868,
+ "p99": 1924.8000383377075
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1062912,
- "combineLogicalBytes": 2125824,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1241511936,
+ "combineLogicalBytes": 1241511936,
+ "fanoutMean": 5.28570556640625,
+ "recvTokensMax": 16384,
+ "stragglerRank": 4,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ }
+ ]
+ },
+ {
+ "id": "cx-bdd9bb8d",
+ "identity": "gb200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||6248b19ef786add",
+ "colorKey": "gb200_d826ab8d",
+ "comparisonKey": "1a323c0d685e8d2e",
+ "schemaVersion": 3,
+ "generatedAt": "2026-06-29T13:52:19.710696+00:00",
+ "status": "valid",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_1",
+ "sku": "gb200",
+ "backend": "deepep",
+ "phase": "prefill",
+ "mode": "normal",
+ "resourceMode": "tuned",
+ "suite": "backend-default",
+ "comparisonClass": "standardized",
+ "measurementContract": "layout-and-dispatch-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
+ "worldSize": 8,
+ "epSize": 8,
+ "label": "GB200 EP8 · deepep · bf16 · hotspot-single+eplb",
+ "model": "DeepSeek-V3 (EPLB physical)",
+ "shape": {
+ "hidden": 7168,
+ "topk": 8,
+ "experts": 288,
+ "routing": "hotspot-single",
+ "routingLabel": "hotspot-single+eplb",
+ "routingStep": 0,
+ "unevenTokens": "none",
+ "eplbEnabled": true,
+ "dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
+ "activationProfile": "normal",
+ "combineQuantMode": "none"
+ },
+ "resourceProfile": {
+ "requestedFraction": null,
+ "achievedFraction": 0.1316,
+ "configuredUnits": 20,
+ "deviceUnits": 152,
+ "resourceClass": "backend-tuned",
+ "conformanceClass": "backend-default",
+ "fixedKernel": false,
+ "paretoEligible": false
+ },
+ "placement": {
+ "kind": "packed",
+ "nodes": 2,
+ "gpusPerNode": 8,
+ "scaleUpDomain": 8
+ },
+ "routingConsistent": true,
+ "traceSignature": "6248b19ef786add",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": 1.8466796875,
+ "eplbImbalanceAfter": 1.0002700343276514,
+ "backendVersion": "1.1.0+814e508",
+ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
+ "repository": "SemiAnalysisAI/InferenceX",
+ "run": {
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
+ },
+ "rows": [
+ {
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
+ "dispatch": {
+ "p50": 126.14400684833527,
+ "p90": 381.21598958969116,
+ "p95": 413.2159948348999,
+ "p99": 437.5999867916107
+ },
+ "combine": {
+ "p50": 128.86400520801544,
+ "p90": 384.19198989868164,
+ "p95": 403.7120044231415,
+ "p99": 416.22400283813477
+ },
+ "roundtrip": {
+ "p50": 225.47200322151184,
+ "p90": 238.20799589157104,
+ "p95": 481.0880124568939,
+ "p99": 537.5040173530579
+ },
+ "isolatedSum": {
+ "p50": 255.0080120563507,
+ "p90": 765.4079794883728,
+ "p95": 816.9279992580414,
+ "p99": 853.8239896297455
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 77701120,
+ "combineLogicalBytes": 77701120,
+ "fanoutMean": 5.29296875,
+ "recvTokensMax": 697,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 59.039998799562454,
- "p90": 61.08799949288368,
- "p95": 62.3680017888546,
- "p99": 72.89600372314453
+ "p50": 164.41600024700165,
+ "p90": 437.79200315475464,
+ "p95": 472.6719856262207,
+ "p99": 497.3120093345642
},
"combine": {
- "p50": 62.94400244951248,
- "p90": 64.41599875688553,
- "p95": 65.05600363016129,
- "p99": 69.15199756622314
+ "p50": 171.9360053539276,
+ "p90": 408.06400775909424,
+ "p95": 444.64001059532166,
+ "p99": 481.0880124568939
},
"roundtrip": {
- "p50": 125.08800625801086,
- "p90": 127.13600695133209,
- "p95": 130.23999333381653,
- "p99": 145.9520012140274
+ "p50": 308.351993560791,
+ "p90": 320.51199674606323,
+ "p95": 572.0639824867249,
+ "p99": 616.096019744873
},
"isolatedSum": {
- "p50": 121.98400124907494,
- "p90": 125.50399824976921,
- "p95": 127.42400541901588,
- "p99": 142.04800128936768
+ "p50": 336.35200560092926,
+ "p90": 845.8560109138489,
+ "p95": 917.3119962215424,
+ "p99": 978.4000217914581
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2131968,
- "combineLogicalBytes": 4263936,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 155187200,
+ "combineLogicalBytes": 155187200,
+ "fanoutMean": 5.28564453125,
+ "recvTokensMax": 1372,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 59.776000678539276,
- "p90": 61.91999837756157,
- "p95": 63.00800293684006,
- "p99": 73.27999919652939
+ "p50": 236.38400435447693,
+ "p90": 246.33599817752838,
+ "p95": 524.0319967269897,
+ "p99": 579.0719985961914
},
"combine": {
- "p50": 63.551999628543854,
- "p90": 65.50399959087372,
- "p95": 66.97600334882736,
- "p99": 72.03199714422226
+ "p50": 292.7039861679077,
+ "p90": 304.32000756263733,
+ "p95": 552.5760054588318,
+ "p99": 583.1040143966675
},
"roundtrip": {
- "p50": 126.39999389648438,
- "p90": 128.86400520801544,
- "p95": 130.3360015153885,
- "p99": 143.74400675296783
+ "p50": 471.2640047073364,
+ "p90": 486.7199957370758,
+ "p95": 773.9840149879456,
+ "p99": 810.2080225944519
},
"isolatedSum": {
- "p50": 123.32800030708313,
- "p90": 127.42399796843529,
- "p95": 129.98400628566742,
- "p99": 145.31199634075165
+ "p50": 529.0879905223846,
+ "p90": 550.6560057401657,
+ "p95": 1076.6080021858215,
+ "p99": 1162.176012992859
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4251648,
- "combineLogicalBytes": 8503296,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 311162880,
+ "combineLogicalBytes": 311162880,
+ "fanoutMean": 5.299072265625,
+ "recvTokensMax": 2761,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 63.1679967045784,
- "p90": 65.8240020275116,
- "p95": 67.03999638557434,
- "p99": 75.48800110816956
+ "p50": 362.527996301651,
+ "p90": 372.1599876880646,
+ "p95": 657.2800278663635,
+ "p99": 706.5920233726501
},
"combine": {
- "p50": 68.41599941253662,
- "p90": 70.81600278615952,
- "p95": 71.52000069618225,
- "p99": 95.04000097513199
+ "p50": 501.3120174407959,
+ "p90": 775.9680151939392,
+ "p95": 822.1439719200134,
+ "p99": 843.1040048599243
},
"roundtrip": {
- "p50": 135.96799969673157,
- "p90": 138.59200477600098,
- "p95": 140.25600254535675,
- "p99": 151.32799744606018
+ "p50": 833.4720134735107,
+ "p90": 1112.3199462890625,
+ "p95": 1156.3199758529663,
+ "p99": 1182.5920343399048
},
"isolatedSum": {
- "p50": 131.58399611711502,
- "p90": 136.6400048136711,
- "p95": 138.5599970817566,
- "p99": 170.52800208330154
+ "p50": 863.8400137424469,
+ "p90": 1148.1280028820038,
+ "p95": 1479.423999786377,
+ "p99": 1549.6960282325745
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 8454144,
- "combineLogicalBytes": 16908288,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 619974656,
+ "combineLogicalBytes": 619974656,
+ "fanoutMean": 5.279052734375,
+ "recvTokensMax": 5481,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 64,
- "globalTokens": 512,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 70.52800059318542,
- "p90": 73.31199944019318,
- "p95": 75.3600001335144,
- "p99": 84.83199775218964
+ "p50": 617.5040006637573,
+ "p90": 909.6959829330444,
+ "p95": 947.2320079803467,
+ "p99": 972.3520278930664
},
"combine": {
- "p50": 82.30400085449219,
- "p90": 84.19200032949448,
- "p95": 85.28000116348267,
- "p99": 99.61599856615067
+ "p50": 874.2719888687134,
+ "p90": 911.9679927825928,
+ "p95": 1204.4800519943237,
+ "p99": 1231.9999933242798
},
"roundtrip": {
- "p50": 163.35999965667725,
- "p90": 165.8560037612915,
- "p95": 167.71200299263,
- "p99": 189.11999464035034
+ "p50": 1461.1519575119019,
+ "p90": 1474.5919704437256,
+ "p95": 1765.023946762085,
+ "p99": 1823.1680393218994
},
"isolatedSum": {
- "p50": 152.8320014476776,
- "p90": 157.50399976968765,
- "p95": 160.64000129699707,
- "p99": 184.4479963183403
+ "p50": 1491.7759895324707,
+ "p90": 1821.6639757156372,
+ "p95": 2151.7120599746704,
+ "p99": 2204.352021217346
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 16711680,
- "combineLogicalBytes": 33423360,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1240020992,
+ "combineLogicalBytes": 1240020992,
+ "fanoutMean": 5.27935791015625,
+ "recvTokensMax": 10883,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
"dispatch": {
- "p50": 81.40800148248672,
- "p90": 84.09599959850311,
- "p95": 85.66399663686752,
- "p99": 95.29600292444229
+ "p50": 1139.9680376052856,
+ "p90": 1151.360034942627,
+ "p95": 1454.751968383789,
+ "p99": 1494.879961013794
},
"combine": {
- "p50": 102.14400291442871,
- "p90": 104.3199971318245,
- "p95": 105.72800040245056,
- "p99": 114.72000181674957
+ "p50": 1625.5359649658203,
+ "p90": 1638.592004776001,
+ "p95": 1942.4320459365845,
+ "p99": 1977.4080514907837
},
"roundtrip": {
- "p50": 205.9839963912964,
- "p90": 208.99200439453125,
- "p95": 210.4959934949875,
- "p99": 222.04799950122833
+ "p50": 2741.7280673980713,
+ "p90": 2755.6159496307373,
+ "p95": 3060.800075531006,
+ "p99": 3102.976083755493
},
"isolatedSum": {
- "p50": 183.55200439691544,
- "p90": 188.4159967303276,
- "p95": 191.39199703931808,
- "p99": 210.01600474119186
+ "p50": 2765.504002571106,
+ "p90": 2789.952039718628,
+ "p95": 3397.1840143203735,
+ "p99": 3472.2880125045776
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 33288192,
- "combineLogicalBytes": 66576384,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 2480414720,
+ "combineLogicalBytes": 2480414720,
+ "fanoutMean": 5.2801513671875,
+ "recvTokensMax": 21702,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -14192,47 +14233,48 @@
]
},
{
- "id": "cx-429a4a40",
- "identity": "b300|deepep|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "b300_c4c63f07",
- "comparisonKey": "fe452cc5767ffbdd",
+ "id": "cx-17c4723d",
+ "identity": "gb200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||291e5ce62735286",
+ "colorKey": "gb200_8703b849",
+ "comparisonKey": "5356d58a72408ddf",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:52:37.801228+00:00",
+ "generatedAt": "2026-06-29T14:00:10.030819+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_16",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
- "phase": "decode",
+ "phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "layout-and-dispatch-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · fp8",
- "model": "MiniMax-M3",
+ "label": "GB200 EP8 · deepep · bf16 · uniform+eplb",
+ "model": "DeepSeek-V3 (EPLB physical)",
"shape": {
- "hidden": 6144,
+ "hidden": 7168,
"topk": 8,
- "experts": 256,
+ "experts": 288,
"routing": "uniform",
- "routingLabel": "uniform",
+ "routingLabel": "uniform+eplb",
"routingStep": 0,
"unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
+ "eplbEnabled": true,
+ "dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -14240,318 +14282,244 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:2e0df6a62cd0143e",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "traceSignature": "291e5ce62735286",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": 1.006072998046875,
+ "eplbImbalanceAfter": 1.0000152587890625,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285716223",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285716223",
- "createdAt": "2026-06-27T09:52:37.801228+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 114.84800279140472,
- "p90": 118.6240017414093,
- "p95": 120.51200121641159,
- "p99": 149.3760049343109
+ "p50": 125.50400197505951,
+ "p90": 134.5919966697693,
+ "p95": 137.79200613498688,
+ "p99": 147.32800424098969
},
"combine": {
- "p50": 58.49599838256836,
- "p90": 60.22400036454201,
- "p95": 60.95999851822853,
- "p99": 64.64000046253204
+ "p50": 129.2479932308197,
+ "p90": 133.18400084972382,
+ "p95": 134.39999520778656,
+ "p99": 139.55199718475342
},
"roundtrip": {
- "p50": 165.0879979133606,
- "p90": 168.2880073785782,
- "p95": 170.30400037765503,
- "p99": 177.34399437904358
+ "p50": 225.63199698925018,
+ "p90": 233.18399488925934,
+ "p95": 235.71200668811798,
+ "p99": 239.80799317359924
},
"isolatedSum": {
- "p50": 173.34400117397308,
- "p90": 178.8480021059513,
- "p95": 181.47199973464012,
- "p99": 214.01600539684296
+ "p50": 254.7519952058792,
+ "p90": 267.7759975194931,
+ "p95": 272.19200134277344,
+ "p99": 286.8800014257431
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 270336,
- "combineLogicalBytes": 540672,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 77041664,
+ "combineLogicalBytes": 77041664,
+ "fanoutMean": 5.248046875,
+ "recvTokensMax": 686,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2,
- "globalTokens": 16,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 115.167997777462,
- "p90": 118.17599833011627,
- "p95": 120.06399780511856,
- "p99": 135.16800105571747
+ "p50": 162.78399527072906,
+ "p90": 171.55200242996216,
+ "p95": 174.01599884033203,
+ "p99": 180.03199994564056
},
"combine": {
- "p50": 59.55199897289276,
- "p90": 61.15199998021126,
- "p95": 62.04799935221672,
- "p99": 64.31999802589417
+ "p50": 169.18399930000305,
+ "p90": 176.60799622535706,
+ "p95": 179.00800704956055,
+ "p99": 182.5920045375824
},
"roundtrip": {
- "p50": 164.57599401474,
- "p90": 168.35199296474457,
- "p95": 170.46399414539337,
- "p99": 185.47199666500092
+ "p50": 305.7920038700104,
+ "p90": 313.85600566864014,
+ "p95": 316.1279857158661,
+ "p99": 320.44801115989685
},
"isolatedSum": {
- "p50": 174.71999675035477,
- "p90": 179.32799831032753,
- "p95": 182.11199715733528,
- "p99": 199.48799908161163
+ "p50": 331.9679945707321,
+ "p90": 348.1599986553192,
+ "p95": 353.0240058898926,
+ "p99": 362.62400448322296
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 528384,
- "combineLogicalBytes": 1056768,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 154542080,
+ "combineLogicalBytes": 154542080,
+ "fanoutMean": 5.263671875,
+ "recvTokensMax": 1365,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4,
- "globalTokens": 32,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 118.75200271606445,
- "p90": 124.32000041007996,
- "p95": 128.80000472068787,
- "p99": 145.56799829006195
+ "p50": 235.48799753189087,
+ "p90": 242.3039972782135,
+ "p95": 244.6720004081726,
+ "p99": 253.82399559020996
},
"combine": {
- "p50": 62.68800050020218,
- "p90": 64.41599875688553,
- "p95": 65.0240033864975,
- "p99": 73.82400333881378
+ "p50": 289.72798585891724,
+ "p90": 294.94398832321167,
+ "p95": 297.85600304603577,
+ "p99": 303.1359910964966
},
"roundtrip": {
- "p50": 170.6559956073761,
- "p90": 174.6560037136078,
- "p95": 176.83200538158417,
- "p99": 186.88000738620758
+ "p50": 470.5600142478943,
+ "p90": 479.5520007610321,
+ "p95": 482.56000876426697,
+ "p99": 487.0719909667969
},
"isolatedSum": {
- "p50": 181.44000321626663,
- "p90": 188.73599916696548,
- "p95": 193.82400810718536,
- "p99": 219.39200162887573
+ "p50": 525.2159833908081,
+ "p90": 537.2479856014252,
+ "p95": 542.5280034542084,
+ "p99": 556.9599866867065
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1062912,
- "combineLogicalBytes": 2125824,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 310589440,
+ "combineLogicalBytes": 310589440,
+ "fanoutMean": 5.289306640625,
+ "recvTokensMax": 2746,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 118.04799735546112,
- "p90": 121.2799996137619,
- "p95": 123.61600250005722,
- "p99": 135.3600025177002
+ "p50": 359.20000076293945,
+ "p90": 367.0719861984253,
+ "p95": 369.4399893283844,
+ "p99": 376.15999579429626
},
"combine": {
- "p50": 63.231997191905975,
- "p90": 64.99200314283371,
- "p95": 65.24799764156342,
- "p99": 72.51200079917908
+ "p50": 498.879998922348,
+ "p90": 504.09597158432007,
+ "p95": 507.58397579193115,
+ "p99": 511.58398389816284
},
"roundtrip": {
- "p50": 172.54400253295898,
- "p90": 176.15999281406403,
- "p95": 177.59999632835388,
- "p99": 187.51999735832214
+ "p50": 824.5440125465393,
+ "p90": 832.1920037269592,
+ "p95": 834.0799808502197,
+ "p99": 837.6320004463196
},
"isolatedSum": {
- "p50": 181.2799945473671,
- "p90": 186.2720027565956,
- "p95": 188.86400014162064,
- "p99": 207.87200331687927
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2131968,
- "combineLogicalBytes": 4263936,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 118.9119964838028,
- "p90": 122.94399738311768,
- "p95": 127.42400169372559,
- "p99": 143.48800480365753
- },
- "combine": {
- "p50": 64.35199826955795,
- "p90": 65.79200178384781,
- "p95": 66.68800115585327,
- "p99": 70.27199864387512
- },
- "roundtrip": {
- "p50": 173.40800166130066,
- "p90": 176.83200538158417,
- "p95": 178.5919964313507,
- "p99": 190.7840073108673
- },
- "isolatedSum": {
- "p50": 183.26399475336075,
- "p90": 188.73599916696548,
- "p95": 194.11200284957886,
- "p99": 213.76000344753265
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4251648,
- "combineLogicalBytes": 8503296,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 124.9919980764389,
- "p90": 128.1599998474121,
- "p95": 130.23999333381653,
- "p99": 156.5759927034378
- },
- "combine": {
- "p50": 68.64000111818314,
- "p90": 70.14399766921997,
- "p95": 70.78400254249573,
- "p99": 75.39200037717819
- },
- "roundtrip": {
- "p50": 185.56800484657288,
- "p90": 189.18399512767792,
- "p95": 191.52000546455383,
- "p99": 204.83200252056122
- },
- "isolatedSum": {
- "p50": 193.63199919462204,
- "p90": 198.30399751663208,
- "p95": 201.02399587631226,
- "p99": 231.967993080616
+ "p50": 858.0799996852875,
+ "p90": 871.1679577827454,
+ "p95": 877.0239651203156,
+ "p99": 887.7439796924591
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 8454144,
- "combineLogicalBytes": 16908288,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
+ "dispatchLogicalBytes": 619171840,
+ "combineLogicalBytes": 619171840,
+ "fanoutMean": 5.272216796875,
+ "recvTokensMax": 5467,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 64,
- "globalTokens": 512,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 135.42400300502777,
- "p90": 139.5840048789978,
- "p95": 143.45599710941315,
- "p99": 153.6960005760193
+ "p50": 615.4239773750305,
+ "p90": 623.520016670227,
+ "p95": 625.760018825531,
+ "p99": 631.1360001564026
},
"combine": {
- "p50": 82.75199681520462,
- "p90": 85.02399921417236,
- "p95": 85.85599809885025,
- "p99": 96.19200229644775
+ "p50": 872.5759983062744,
+ "p90": 881.7600011825562,
+ "p95": 883.4879994392395,
+ "p99": 887.5839710235596
},
"roundtrip": {
- "p50": 210.207998752594,
- "p90": 215.2319997549057,
- "p95": 217.6000028848648,
- "p99": 234.72000658512115
+ "p50": 1458.400011062622,
+ "p90": 1466.6240215301514,
+ "p95": 1469.4080352783203,
+ "p99": 1476.1919975280762
},
"isolatedSum": {
- "p50": 218.1759998202324,
- "p90": 224.60800409317017,
- "p95": 229.3119952082634,
- "p99": 249.88800287246704
+ "p50": 1487.999975681305,
+ "p90": 1505.2800178527832,
+ "p95": 1509.2480182647705,
+ "p99": 1518.7199711799622
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 16711680,
- "combineLogicalBytes": 33423360,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
+ "dispatchLogicalBytes": 1238945792,
+ "combineLogicalBytes": 1238945792,
+ "fanoutMean": 5.2747802734375,
+ "recvTokensMax": 10913,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
"dispatch": {
- "p50": 157.151997089386,
- "p90": 161.02400422096252,
- "p95": 163.29599916934967,
- "p99": 173.0560064315796
+ "p50": 1142.8799629211426,
+ "p90": 1149.664044380188,
+ "p95": 1152.1600484848022,
+ "p99": 1156.7039489746094
},
"combine": {
- "p50": 102.1760031580925,
- "p90": 104.35199737548828,
- "p95": 105.43999820947647,
- "p99": 116.06399714946747
+ "p50": 1636.5760564804077,
+ "p90": 1647.3599672317505,
+ "p95": 1649.183988571167,
+ "p99": 1657.5679779052734
},
"roundtrip": {
- "p50": 253.02401185035706,
- "p90": 257.60000944137573,
- "p95": 260.8320116996765,
- "p99": 278.9439857006073
+ "p50": 2753.82399559021,
+ "p90": 2762.6240253448486,
+ "p95": 2765.727996826172,
+ "p99": 2771.0399627685547
},
"isolatedSum": {
- "p50": 259.3280002474785,
- "p90": 265.3760015964508,
- "p95": 268.73599737882614,
- "p99": 289.12000358104706
+ "p50": 2779.4560194015503,
+ "p90": 2797.0240116119385,
+ "p95": 2801.3440370559692,
+ "p99": 2814.271926879883
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 33288192,
- "combineLogicalBytes": 66576384,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 2481747968,
+ "combineLogicalBytes": 2481747968,
+ "fanoutMean": 5.282989501953125,
+ "recvTokensMax": 21789,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -14559,366 +14527,293 @@
]
},
{
- "id": "cx-c27e2cad",
- "identity": "b300|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "b300_d6fd14c3",
- "comparisonKey": "ac13ebc2bb2c560a",
+ "id": "cx-c9c3e331",
+ "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||bfb01c61bdf926e",
+ "colorKey": "gb200_62fd6d04",
+ "comparisonKey": "dc09c891587fd8b9",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T10:26:01.213105+00:00",
+ "generatedAt": "2026-06-29T13:54:20.997256+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_10",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_1",
+ "sku": "gb200",
"backend": "deepep",
- "phase": "decode",
+ "phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · fp8",
+ "label": "GB200 EP8 · deepep · bf16 · zipf",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
"experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
+ "routing": "zipf",
+ "routingLabel": "zipf",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "fp8",
+ "dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
"paretoEligible": false
},
"placement": {
- "kind": "packed",
- "nodes": 1,
+ "kind": "adversarial",
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "bfb01c61bdf926e",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "2.0.0+af9a040",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28286436120",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286436120",
- "createdAt": "2026-06-27T10:26:01.213105+00:00",
- "sha": "91c7acf59a5e524f37742922ec67721d86a03f6b"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 56.2559999525547,
- "p90": 58.78400057554245,
- "p95": 61.28000095486641,
- "p99": 77.69600301980972
- },
- "combine": {
- "p50": 61.983998864889145,
- "p90": 78.8159966468811,
- "p95": 86.87999844551086,
- "p99": 95.10400146245956
- },
- "roundtrip": {
- "p50": 120.44800072908401,
- "p90": 123.19999933242798,
- "p95": 125.82400441169739,
- "p99": 144.03200149536133
- },
- "isolatedSum": {
- "p50": 118.23999881744385,
- "p90": 137.59999722242355,
- "p95": 148.15999940037727,
- "p99": 172.8000044822693
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 315392,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 57.151999324560165,
- "p90": 59.039998799562454,
- "p95": 59.99999865889549,
- "p99": 73.11999797821045
- },
- "combine": {
- "p50": 64.54399973154068,
- "p90": 66.17599725723267,
- "p95": 67.16799736022949,
- "p99": 74.23999905586243
- },
- "roundtrip": {
- "p50": 124.15999919176102,
- "p90": 126.39999389648438,
- "p95": 129.60000336170197,
- "p99": 138.49599659442902
- },
- "isolatedSum": {
- "p50": 121.69599905610085,
- "p90": 125.21599605679512,
- "p95": 127.16799601912498,
- "p99": 147.35999703407288
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 58.88000130653381,
- "p90": 61.37600168585777,
- "p95": 63.10400366783142,
- "p99": 91.10400080680847
+ "p50": 126.52799487113953,
+ "p90": 361.1840009689331,
+ "p95": 402.43199467658997,
+ "p99": 425.8880019187927
},
"combine": {
- "p50": 67.35999882221222,
- "p90": 69.50400024652481,
- "p95": 70.14399766921997,
- "p99": 86.30400151014328
+ "p50": 133.15199315547943,
+ "p90": 365.2159869670868,
+ "p95": 404.54399585723877,
+ "p99": 423.2960045337677
},
"roundtrip": {
- "p50": 127.68000364303589,
- "p90": 130.14400005340576,
- "p95": 131.55199587345123,
- "p99": 137.08800077438354
+ "p50": 234.40000414848328,
+ "p90": 251.0719895362854,
+ "p95": 486.62400245666504,
+ "p99": 529.151976108551
},
"isolatedSum": {
- "p50": 126.24000012874603,
- "p90": 130.88000193238258,
- "p95": 133.2480013370514,
- "p99": 177.40800231695175
+ "p50": 259.67998802661896,
+ "p90": 726.3999879360199,
+ "p95": 806.9759905338287,
+ "p99": 849.1840064525604
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1240064,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 49946624,
+ "combineLogicalBytes": 49946624,
+ "fanoutMean": 3.40234375,
+ "recvTokensMax": 1022,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 60.03199890255928,
- "p90": 62.30400130152702,
- "p95": 63.26399743556976,
- "p99": 69.2799985408783
+ "p50": 162.1759980916977,
+ "p90": 430.07999658584595,
+ "p95": 449.7919976711273,
+ "p99": 491.5199875831604
},
"combine": {
- "p50": 68.76800209283829,
- "p90": 70.46400010585785,
- "p95": 71.3919997215271,
- "p99": 87.74399757385254
+ "p50": 183.23199450969696,
+ "p90": 452.60798931121826,
+ "p95": 461.2799882888794,
+ "p99": 474.91198778152466
},
"roundtrip": {
- "p50": 130.62399625778198,
- "p90": 133.08799266815186,
- "p95": 134.94400680065155,
- "p99": 141.88799262046814
+ "p50": 320.70401310920715,
+ "p90": 572.9600191116333,
+ "p95": 596.1920022964478,
+ "p99": 618.5280084609985
},
"isolatedSum": {
- "p50": 128.80000099539757,
- "p90": 132.76800140738487,
- "p95": 134.65599715709686,
- "p99": 157.02399611473083
+ "p50": 345.40799260139465,
+ "p90": 882.6879858970642,
+ "p95": 911.0719859600067,
+ "p99": 966.4319753646851
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2487296,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 100509696,
+ "combineLogicalBytes": 100509696,
+ "fanoutMean": 3.42333984375,
+ "recvTokensMax": 2046,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 61.28000095486641,
- "p90": 63.551999628543854,
- "p95": 64.89600241184235,
- "p99": 75.58400183916092
+ "p50": 231.00799322128296,
+ "p90": 476.1599898338318,
+ "p95": 523.0720043182373,
+ "p99": 548.192024230957
},
"combine": {
- "p50": 69.47200000286102,
- "p90": 71.45600020885468,
- "p95": 72.38399982452393,
- "p99": 76.67200267314911
+ "p50": 353.11999917030334,
+ "p90": 600.1920104026794,
+ "p95": 622.7840185165405,
+ "p99": 637.1520161628723
},
"roundtrip": {
- "p50": 132.9919993877411,
- "p90": 135.55200397968292,
- "p95": 137.37599551677704,
- "p99": 149.63200688362122
+ "p50": 551.3920187950134,
+ "p90": 824.5120048522949,
+ "p95": 847.648024559021,
+ "p99": 2337.5680446624756
},
"isolatedSum": {
- "p50": 130.75200095772743,
- "p90": 135.00799983739853,
- "p95": 137.28000223636627,
- "p99": 152.25600451231003
+ "p50": 584.1279923915863,
+ "p90": 1076.3520002365112,
+ "p95": 1145.8560228347778,
+ "p99": 1185.3440403938293
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4960256,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 201678848,
+ "combineLogicalBytes": 201678848,
+ "fanoutMean": 3.4345703125,
+ "recvTokensMax": 4094,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 64.2239972949028,
- "p90": 66.39999896287918,
- "p95": 67.87200272083282,
- "p99": 82.8159973025322
+ "p50": 378.59201431274414,
+ "p90": 640.2239799499512,
+ "p95": 671.3280081748962,
+ "p99": 694.9759721755981
},
"combine": {
- "p50": 75.39200037717819,
- "p90": 77.02399790287018,
- "p95": 77.72800326347351,
- "p99": 85.82399785518646
+ "p50": 634.4000101089478,
+ "p90": 651.8719792366028,
+ "p95": 928.0319809913635,
+ "p99": 957.0879936218262
},
"roundtrip": {
- "p50": 145.37599682807922,
- "p90": 147.8399932384491,
- "p95": 148.83199334144592,
- "p99": 160.41600704193115
+ "p50": 977.3439764976501,
+ "p90": 1243.1360483169556,
+ "p95": 1284.4799757003784,
+ "p99": 1316.5760040283203
},
"isolatedSum": {
- "p50": 139.615997672081,
- "p90": 143.42399686574936,
- "p95": 145.60000598430634,
- "p99": 168.63999515771866
+ "p50": 1012.9920244216919,
+ "p90": 1292.095959186554,
+ "p95": 1599.3599891662598,
+ "p99": 1652.0639657974243
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9863168,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 405035008,
+ "combineLogicalBytes": 405035008,
+ "fanoutMean": 3.4488525390625,
+ "recvTokensMax": 8189,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 64,
- "globalTokens": 512,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 72.9919970035553,
- "p90": 75.6480023264885,
- "p95": 76.89599692821503,
- "p99": 89.79199826717377
+ "p50": 700.0640034675598,
+ "p90": 935.2319836616516,
+ "p95": 967.5840139389038,
+ "p99": 997.6639747619629
},
"combine": {
- "p50": 89.24800157546997,
- "p90": 91.2960022687912,
- "p95": 92.99200028181076,
- "p99": 104.76800054311752
+ "p50": 1141.6319608688354,
+ "p90": 1441.1519765853882,
+ "p95": 1468.991994857788,
+ "p99": 1488.927960395813
},
"roundtrip": {
- "p50": 173.92000555992126,
- "p90": 176.9919991493225,
- "p95": 179.1040003299713,
- "p99": 198.08000326156616
+ "p50": 1814.2720460891724,
+ "p90": 1917.3760414123535,
+ "p95": 2131.6800117492676,
+ "p99": 2175.1039028167725
},
"isolatedSum": {
- "p50": 162.23999857902527,
- "p90": 166.9440045952797,
- "p95": 169.8879972100258,
- "p99": 194.5599988102913
+ "p50": 1841.6959643363953,
+ "p90": 2376.38396024704,
+ "p95": 2436.576008796692,
+ "p99": 2486.591935157776
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19496960,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
+ "dispatchLogicalBytes": 808822784,
+ "combineLogicalBytes": 808822784,
+ "fanoutMean": 3.44354248046875,
+ "recvTokensMax": 16380,
"stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
"dispatch": {
- "p50": 83.90399813652039,
- "p90": 86.65599673986435,
- "p95": 87.96799927949905,
- "p99": 94.2080020904541
+ "p50": 1366.047978401184,
+ "p90": 1532.0639610290527,
+ "p95": 1574.5279788970947,
+ "p99": 1611.232042312622
},
"combine": {
- "p50": 110.20799726247787,
- "p90": 112.92800307273865,
- "p95": 113.88800293207169,
- "p99": 120.92799693346024
+ "p50": 2185.5039596557617,
+ "p90": 2457.98397064209,
+ "p95": 2500.159978866577,
+ "p99": 2538.048028945923
},
"roundtrip": {
- "p50": 220.19200026988983,
- "p90": 223.4240025281906,
- "p95": 224.99200701713562,
- "p99": 245.08799612522125
+ "p50": 3528.8639068603516,
+ "p90": 3623.5198974609375,
+ "p95": 3858.8480949401855,
+ "p99": 3893.8560485839844
},
"isolatedSum": {
- "p50": 194.11199539899826,
- "p90": 199.583999812603,
- "p95": 201.85600221157074,
- "p99": 215.13599902391434
+ "p50": 3551.551938056946,
+ "p90": 3990.0479316711426,
+ "p95": 4074.687957763672,
+ "p99": 4149.280071258545
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 1619795968,
+ "combineLogicalBytes": 1619795968,
+ "fanoutMean": 3.4481201171875,
+ "recvTokensMax": 32761,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -14926,47 +14821,48 @@
]
},
{
- "id": "cx-669dd02d",
- "identity": "b300|deepep|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "b300_c4c63f07",
- "comparisonKey": "564ae99a5e9997e8",
+ "id": "cx-d88669b5",
+ "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||e47f9de18e6cabe",
+ "colorKey": "gb200_62fd6d04",
+ "comparisonKey": "dc09c891587fd8b9",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:50:49.099200+00:00",
+ "generatedAt": "2026-06-29T13:57:23.040418+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_01",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
- "phase": "decode",
+ "phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "layout-and-dispatch-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · fp8",
+ "label": "GB200 EP8 · deepep · bf16 · zipf",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
"experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
+ "routing": "zipf",
+ "routingLabel": "zipf",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "fp8",
+ "dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -14974,317 +14870,315 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "e47f9de18e6cabe",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285671692",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285671692",
- "createdAt": "2026-06-27T09:50:49.099200+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 112.35199868679047,
- "p90": 115.87200313806534,
- "p95": 118.59200149774551,
- "p99": 133.215993642807
- },
- "combine": {
- "p50": 62.33600154519081,
- "p90": 64.35199826955795,
- "p95": 64.7680014371872,
- "p99": 68.4799998998642
- },
- "roundtrip": {
- "p50": 164.92800414562225,
- "p90": 168.06399822235107,
- "p95": 170.27199268341064,
- "p99": 182.6239973306656
- },
- "isolatedSum": {
- "p50": 174.68800023198128,
- "p90": 180.2240014076233,
- "p95": 183.3600029349327,
- "p99": 201.6959935426712
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 315392,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 112.28799819946289,
- "p90": 116.09599739313126,
- "p95": 119.6800023317337,
- "p99": 136.4479959011078
- },
- "combine": {
- "p50": 62.39999830722809,
- "p90": 64.15999680757523,
- "p95": 64.38399851322174,
- "p99": 65.92000275850296
- },
- "roundtrip": {
- "p50": 167.26399958133698,
- "p90": 169.76000368595123,
- "p95": 172.4800020456314,
- "p99": 185.92000007629395
- },
- "isolatedSum": {
- "p50": 174.68799650669098,
- "p90": 180.25599420070648,
- "p95": 184.06400084495544,
- "p99": 202.36799865961075
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 113.92000317573547,
- "p90": 117.3119992017746,
- "p95": 121.21599912643433,
- "p99": 127.68000364303589
+ "p50": 125.15200674533844,
+ "p90": 133.91999900341034,
+ "p95": 136.86400651931763,
+ "p99": 145.6640064716339
},
"combine": {
- "p50": 63.680000603199005,
- "p90": 65.40799885988235,
- "p95": 65.95200300216675,
- "p99": 78.78399640321732
+ "p50": 135.1040005683899,
+ "p90": 143.2960033416748,
+ "p95": 144.9279934167862,
+ "p99": 153.85599434375763
},
"roundtrip": {
- "p50": 168.35199296474457,
- "p90": 172.35200107097626,
- "p95": 174.43199455738068,
- "p99": 184.4480037689209
+ "p50": 237.88799345493317,
+ "p90": 246.2719976902008,
+ "p95": 248.44799935817719,
+ "p99": 254.20799851417542
},
"isolatedSum": {
- "p50": 177.60000377893448,
- "p90": 182.71999806165695,
- "p95": 187.16800212860107,
- "p99": 206.4640000462532
+ "p50": 260.25600731372833,
+ "p90": 277.21600234508514,
+ "p95": 281.7919999361038,
+ "p99": 299.52000081539154
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1240064,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 49946624,
+ "combineLogicalBytes": 49946624,
+ "fanoutMean": 3.40234375,
+ "recvTokensMax": 1022,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 115.167997777462,
- "p90": 118.9119964838028,
- "p95": 121.50400131940842,
- "p99": 135.26399433612823
+ "p50": 235.45600473880768,
+ "p90": 243.16799640655518,
+ "p95": 245.728000998497,
+ "p99": 249.91999566555023
},
"combine": {
- "p50": 66.27199798822403,
- "p90": 67.4239993095398,
- "p95": 68.35199892520905,
- "p99": 74.17599856853485
+ "p50": 353.5679876804352,
+ "p90": 363.1359934806824,
+ "p95": 365.1520013809204,
+ "p99": 373.05599451065063
},
"roundtrip": {
- "p50": 172.0000058412552,
- "p90": 174.68799650669098,
- "p95": 176.83200538158417,
- "p99": 191.6159987449646
+ "p50": 547.0399856567383,
+ "p90": 556.6400289535522,
+ "p95": 559.2960119247437,
+ "p99": 571.4560151100159
},
"isolatedSum": {
- "p50": 181.43999576568604,
- "p90": 186.3359957933426,
- "p95": 189.85600024461746,
- "p99": 209.4399929046631
+ "p50": 589.0239924192429,
+ "p90": 606.3039898872375,
+ "p95": 610.8800023794174,
+ "p99": 622.9759901762009
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2487296,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 201678848,
+ "combineLogicalBytes": 201678848,
+ "fanoutMean": 3.4345703125,
+ "recvTokensMax": 4094,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 115.9679964184761,
- "p90": 118.97599697113037,
- "p95": 120.7360029220581,
- "p99": 130.68799674510956
+ "p50": 706.4639925956726,
+ "p90": 716.1920070648193,
+ "p95": 719.2639708518982,
+ "p99": 726.751983165741
},
"combine": {
- "p50": 67.1359971165657,
- "p90": 68.9919963479042,
- "p95": 69.60000097751617,
- "p99": 86.75199747085571
+ "p50": 1146.2080478668213,
+ "p90": 1154.3680429458618,
+ "p95": 1155.6799411773682,
+ "p99": 1159.9359512329102
},
"roundtrip": {
- "p50": 174.94399845600128,
- "p90": 178.17600071430206,
- "p95": 179.77599799633026,
- "p99": 183.58400464057922
+ "p50": 1824.2559432983398,
+ "p90": 1834.5919847488403,
+ "p95": 1839.0400409698486,
+ "p99": 1851.4879941940308
},
"isolatedSum": {
- "p50": 183.1039935350418,
- "p90": 187.96799331903458,
- "p95": 190.33600389957428,
- "p99": 217.43999421596527
+ "p50": 1852.672040462494,
+ "p90": 1870.5600500106812,
+ "p95": 1874.9439120292664,
+ "p99": 1886.6879343986511
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4960256,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 808822784,
+ "combineLogicalBytes": 808822784,
+ "fanoutMean": 3.44354248046875,
+ "recvTokensMax": 16380,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
- },
+ }
+ ]
+ },
+ {
+ "id": "cx-acdb86d4",
+ "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||9014f8b812bd39e",
+ "colorKey": "gb200_8855aa26",
+ "comparisonKey": "971950c12559c2cf",
+ "schemaVersion": 3,
+ "generatedAt": "2026-06-29T13:58:06.263347+00:00",
+ "status": "valid",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
+ "backend": "deepep",
+ "phase": "prefill",
+ "mode": "normal",
+ "resourceMode": "tuned",
+ "suite": "backend-default",
+ "comparisonClass": "standardized",
+ "measurementContract": "layout-and-dispatch-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
+ "worldSize": 8,
+ "epSize": 8,
+ "label": "GB200 EP8 · deepep · bf16 · zipf-heavy",
+ "model": "DeepSeek-V3/V4",
+ "shape": {
+ "hidden": 7168,
+ "topk": 8,
+ "experts": 256,
+ "routing": "zipf-heavy",
+ "routingLabel": "zipf-heavy",
+ "routingStep": 0,
+ "unevenTokens": "none",
+ "eplbEnabled": false,
+ "dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
+ "activationProfile": "normal",
+ "combineQuantMode": "none"
+ },
+ "resourceProfile": {
+ "requestedFraction": null,
+ "achievedFraction": 0.1316,
+ "configuredUnits": 20,
+ "deviceUnits": 152,
+ "resourceClass": "backend-tuned",
+ "conformanceClass": "backend-default",
+ "fixedKernel": false,
+ "paretoEligible": false
+ },
+ "placement": {
+ "kind": "packed",
+ "nodes": 2,
+ "gpusPerNode": 8,
+ "scaleUpDomain": 8
+ },
+ "routingConsistent": true,
+ "traceSignature": "9014f8b812bd39e",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": null,
+ "eplbImbalanceAfter": null,
+ "backendVersion": "1.1.0+814e508",
+ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
+ "repository": "SemiAnalysisAI/InferenceX",
+ "run": {
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
+ },
+ "rows": [
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 122.65600264072418,
- "p90": 126.91199779510498,
- "p95": 131.42399489879608,
- "p99": 145.53600549697876
+ "p50": 117.69600212574005,
+ "p90": 125.44000148773193,
+ "p95": 128.86400520801544,
+ "p99": 136.9599997997284
},
"combine": {
- "p50": 71.6480016708374,
- "p90": 73.44000041484833,
- "p95": 73.88799637556076,
- "p99": 87.55200356245041
+ "p50": 130.14400005340576,
+ "p90": 133.85599851608276,
+ "p95": 136.54400408267975,
+ "p99": 141.02399349212646
},
"roundtrip": {
- "p50": 186.81600689888,
- "p90": 191.77600741386414,
- "p95": 198.08000326156616,
- "p99": 232.44799673557281
+ "p50": 224.89599883556366,
+ "p90": 231.64799809455872,
+ "p95": 234.72000658512115,
+ "p99": 238.5600060224533
},
"isolatedSum": {
- "p50": 194.30400431156158,
- "p90": 200.3519982099533,
- "p95": 205.31199127435684,
- "p99": 233.08800905942917
+ "p50": 247.8400021791458,
+ "p90": 259.2960000038147,
+ "p95": 265.4080092906952,
+ "p99": 277.98399329185486
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9863168,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 22650880,
+ "combineLogicalBytes": 22650880,
+ "fanoutMean": 1.54296875,
+ "recvTokensMax": 1024,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 64,
- "globalTokens": 512,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 136.60800457000732,
- "p90": 141.34399592876434,
- "p95": 144.57599818706512,
- "p99": 157.4079990386963
+ "p50": 205.72799444198608,
+ "p90": 212.41599321365356,
+ "p95": 214.91199731826782,
+ "p99": 220.92799842357635
},
"combine": {
- "p50": 87.3280018568039,
- "p90": 89.4400030374527,
- "p95": 89.91999924182892,
- "p99": 97.98400104045868
+ "p50": 329.0880024433136,
+ "p90": 336.92800998687744,
+ "p95": 339.55198526382446,
+ "p99": 342.5920009613037
},
"roundtrip": {
- "p50": 214.4639939069748,
- "p90": 220.15999257564545,
- "p95": 224.35200214385986,
- "p99": 243.23199689388275
+ "p50": 506.3040256500244,
+ "p90": 515.1039958000183,
+ "p95": 518.3680057525635,
+ "p99": 527.6479721069336
},
"isolatedSum": {
- "p50": 223.93600642681122,
- "p90": 230.78399896621704,
- "p95": 234.49599742889404,
- "p99": 255.39200007915497
+ "p50": 534.8159968852997,
+ "p90": 549.344003200531,
+ "p95": 554.4639825820923,
+ "p99": 563.5199993848801
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19496960,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 91521024,
+ "combineLogicalBytes": 91521024,
+ "fanoutMean": 1.55859375,
+ "recvTokensMax": 4096,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 159.84000265598297,
- "p90": 163.64799439907074,
- "p95": 165.56799411773682,
- "p99": 176.15999281406403
+ "p50": 619.3919777870178,
+ "p90": 629.8879981040955,
+ "p95": 634.1120004653931,
+ "p99": 641.1200165748596
},
"combine": {
- "p50": 108.38399827480316,
- "p90": 110.68800091743469,
- "p95": 111.96800321340561,
- "p99": 118.72000247240067
+ "p50": 1179.7120571136475,
+ "p90": 1187.4560117721558,
+ "p95": 1189.568042755127,
+ "p99": 1192.639946937561
},
"roundtrip": {
- "p50": 262.08001375198364,
- "p90": 266.30398631095886,
- "p95": 270.81599831581116,
- "p99": 283.6799919605255
+ "p50": 1733.8240146636963,
+ "p90": 1743.3279752731323,
+ "p95": 1746.2719678878784,
+ "p99": 1752.73597240448
},
"isolatedSum": {
- "p50": 268.22400093078613,
- "p90": 274.33599531650543,
- "p95": 277.5359973311424,
- "p99": 294.8799952864647
+ "p50": 1799.1040349006653,
+ "p90": 1817.3440098762512,
+ "p95": 1823.68004322052,
+ "p99": 1833.7599635124207
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
+ "dispatchLogicalBytes": 368062464,
+ "combineLogicalBytes": 368062464,
+ "fanoutMean": 1.5670166015625,
+ "recvTokensMax": 16384,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
@@ -15293,47 +15187,48 @@
]
},
{
- "id": "cx-67bd51f4",
- "identity": "b300|deepep|7168|8|384|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760",
- "colorKey": "b300_d6fd14c3",
- "comparisonKey": "df0e0b78e56d7652",
+ "id": "cx-83202be8",
+ "identity": "gb200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||0e6b07a25691d72",
+ "colorKey": "gb200_10fda6e8",
+ "comparisonKey": "36d11099a6c7305e",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T11:14:03.421071+00:00",
+ "generatedAt": "2026-06-29T13:51:26.745976+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_17",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_1",
+ "sku": "gb200",
"backend": "deepep",
- "phase": "decode",
+ "phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · fp8",
- "model": "Kimi-K2",
+ "label": "GB200 EP8 · deepep · bf16 · zipf-heavy+eplb",
+ "model": "DeepSeek-V3 (EPLB physical)",
"shape": {
"hidden": 7168,
"topk": 8,
- "experts": 384,
- "routing": "uniform",
- "routingLabel": "uniform",
+ "experts": 288,
+ "routing": "zipf-heavy",
+ "routingLabel": "zipf-heavy+eplb",
"routingStep": 0,
"unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
+ "eplbEnabled": true,
+ "dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -15341,318 +15236,244 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "d6c49ae98878760",
- "workloadId": "set:8:9a27d0df4b17fa09",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "traceSignature": "0e6b07a25691d72",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": 7.38995361328125,
+ "eplbImbalanceAfter": 1.0000210716610862,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28287503879",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287503879",
- "createdAt": "2026-06-27T11:14:03.421071+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 56.63999915122986,
- "p90": 58.43200162053108,
- "p95": 59.167999774217606,
- "p99": 67.4239993095398
+ "p50": 125.5359947681427,
+ "p90": 379.2319893836975,
+ "p95": 407.99999237060547,
+ "p99": 436.2240135669708
},
"combine": {
- "p50": 59.67999994754791,
- "p90": 61.24800071120262,
- "p95": 62.463998794555664,
- "p99": 73.27999919652939
+ "p50": 125.11999905109406,
+ "p90": 138.40000331401825,
+ "p95": 396.06401324272156,
+ "p99": 415.583997964859
},
"roundtrip": {
- "p50": 119.80800330638885,
- "p90": 122.78400361537933,
- "p95": 129.63199615478516,
- "p99": 147.74399995803833
+ "p50": 222.30400145053864,
+ "p90": 448.4480023384094,
+ "p95": 486.9120121002197,
+ "p99": 527.7760028839111
},
"isolatedSum": {
- "p50": 116.31999909877777,
- "p90": 119.6800023317337,
- "p95": 121.63199856877327,
- "p99": 140.70399850606918
+ "p50": 250.65599381923676,
+ "p90": 517.6319926977158,
+ "p95": 804.064005613327,
+ "p99": 851.8080115318298
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 301056,
- "combineLogicalBytes": 602112,
- "fanoutMean": 5.25,
- "recvTokensMax": 8,
+ "dispatchLogicalBytes": 79206400,
+ "combineLogicalBytes": 79206400,
+ "fanoutMean": 5.3955078125,
+ "recvTokensMax": 713,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2,
- "globalTokens": 16,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 57.24800005555153,
- "p90": 59.23200026154518,
- "p95": 59.87200140953064,
- "p99": 67.55200028419495
+ "p50": 163.35999965667725,
+ "p90": 440.8319890499115,
+ "p95": 466.5600061416626,
+ "p99": 488.2560074329376
},
"combine": {
- "p50": 60.99199876189232,
- "p90": 62.880001962184906,
- "p95": 63.26399743556976,
- "p99": 65.43999910354614
+ "p50": 168.86399686336517,
+ "p90": 416.128009557724,
+ "p95": 441.8559968471527,
+ "p99": 466.94400906562805
},
"roundtrip": {
- "p50": 121.5360015630722,
- "p90": 123.87199699878693,
- "p95": 125.44000148773193,
- "p99": 139.74399864673615
+ "p50": 302.047997713089,
+ "p90": 568.3519840240479,
+ "p95": 588.096022605896,
+ "p99": 614.5280003547668
},
"isolatedSum": {
- "p50": 118.23999881744385,
- "p90": 122.11200222373009,
- "p95": 123.1359988451004,
- "p99": 132.9919993877411
+ "p50": 332.2239965200424,
+ "p90": 856.9599986076355,
+ "p95": 908.4160029888153,
+ "p99": 955.2000164985657
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 609280,
- "combineLogicalBytes": 1218560,
- "fanoutMean": 5.3125,
- "recvTokensMax": 14,
+ "dispatchLogicalBytes": 159330304,
+ "combineLogicalBytes": 159330304,
+ "fanoutMean": 5.4267578125,
+ "recvTokensMax": 1436,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4,
- "globalTokens": 32,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 59.23200026154518,
- "p90": 61.43999844789505,
- "p95": 62.3680017888546,
- "p99": 68.00000369548798
+ "p50": 231.55200481414795,
+ "p90": 276.63999795913696,
+ "p95": 530.1759839057922,
+ "p99": 586.4959955215454
},
"combine": {
- "p50": 65.05600363016129,
- "p90": 67.07199662923813,
- "p95": 67.391999065876,
- "p99": 71.55200093984604
+ "p50": 294.8800027370453,
+ "p90": 542.0479774475098,
+ "p95": 578.0159831047058,
+ "p99": 598.4640121459961
},
"roundtrip": {
- "p50": 128.03199887275696,
- "p90": 133.88800621032715,
- "p95": 158.36800634860992,
- "p99": 190.0160014629364
+ "p50": 472.31999039649963,
+ "p90": 489.9199903011322,
+ "p95": 774.4640111923218,
+ "p99": 814.079999923706
},
"isolatedSum": {
- "p50": 124.28800389170647,
- "p90": 128.51199507713318,
- "p95": 129.7600008547306,
- "p99": 139.55200463533401
+ "p50": 526.4320075511932,
+ "p90": 818.6879754066467,
+ "p95": 1108.191967010498,
+ "p99": 1184.9600076675415
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1204224,
- "combineLogicalBytes": 2408448,
- "fanoutMean": 5.25,
- "recvTokensMax": 26,
+ "dispatchLogicalBytes": 319535104,
+ "combineLogicalBytes": 319535104,
+ "fanoutMean": 5.441650390625,
+ "recvTokensMax": 2897,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 59.776000678539276,
- "p90": 61.85600161552429,
- "p95": 62.72000074386597,
- "p99": 69.40799951553345
- },
- "combine": {
- "p50": 65.60000032186508,
- "p90": 67.45599955320358,
- "p95": 68.7360018491745,
- "p99": 75.80800354480743
- },
- "roundtrip": {
- "p50": 128.83199751377106,
- "p90": 131.71200454235077,
- "p95": 135.42400300502777,
- "p99": 151.0079950094223
- },
- "isolatedSum": {
- "p50": 125.37600100040436,
- "p90": 129.31200116872787,
- "p95": 131.45600259304047,
- "p99": 145.21600306034088
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2415616,
- "combineLogicalBytes": 4831232,
- "fanoutMean": 5.265625,
- "recvTokensMax": 48,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 61.184000223875046,
- "p90": 63.45599889755249,
- "p95": 65.47199934720993,
- "p99": 85.85599809885025
+ "p50": 361.7919981479645,
+ "p90": 375.61601400375366,
+ "p95": 677.344024181366,
+ "p99": 708.9920043945312
},
"combine": {
- "p50": 67.35999882221222,
- "p90": 69.40799951553345,
- "p95": 70.46400010585785,
- "p99": 73.79200309515
+ "p50": 503.10397148132324,
+ "p90": 519.2319750785828,
+ "p95": 810.2399706840515,
+ "p99": 847.4559783935547
},
"roundtrip": {
- "p50": 131.58400356769562,
- "p90": 134.11200046539307,
- "p95": 135.42400300502777,
- "p99": 145.53600549697876
+ "p50": 838.1440043449402,
+ "p90": 1135.0079774856567,
+ "p95": 1168.6079502105713,
+ "p99": 1194.815993309021
},
"isolatedSum": {
- "p50": 128.54399904608727,
- "p90": 132.86399841308594,
- "p95": 135.93599945306778,
- "p99": 159.64800119400024
+ "p50": 864.8959696292877,
+ "p90": 894.8479890823364,
+ "p95": 1487.5839948654175,
+ "p99": 1556.447982788086
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4924416,
- "combineLogicalBytes": 9848832,
- "fanoutMean": 5.3671875,
- "recvTokensMax": 91,
+ "dispatchLogicalBytes": 638410752,
+ "combineLogicalBytes": 638410752,
+ "fanoutMean": 5.43603515625,
+ "recvTokensMax": 5815,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 64.00000303983688,
- "p90": 65.60000032186508,
- "p95": 66.65600091218948,
- "p99": 75.74400305747986
- },
- "combine": {
- "p50": 73.27999919652939,
- "p90": 75.23199915885925,
- "p95": 75.55200159549713,
- "p99": 79.29600030183792
- },
- "roundtrip": {
- "p50": 144.48000490665436,
- "p90": 147.0080018043518,
- "p95": 147.93600142002106,
- "p99": 156.89599514007568
- },
- "isolatedSum": {
- "p50": 137.28000223636627,
- "p90": 140.83199948072433,
- "p95": 142.20800250768661,
- "p99": 155.04000335931778
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9748480,
- "combineLogicalBytes": 19496960,
- "fanoutMean": 5.3125,
- "recvTokensMax": 178,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 72.38399982452393,
- "p90": 74.36800003051758,
- "p95": 75.29599964618683,
- "p99": 83.99999886751175
+ "p50": 623.6799955368042,
+ "p90": 637.0239853858948,
+ "p95": 947.8399753570557,
+ "p99": 981.4079999923706
},
"combine": {
- "p50": 88.22400122880936,
- "p90": 90.11200070381165,
- "p95": 91.58399701118469,
- "p99": 97.98400104045868
+ "p50": 895.2000141143799,
+ "p90": 907.263994216919,
+ "p95": 1206.112027168274,
+ "p99": 1238.5599613189697
},
"roundtrip": {
- "p50": 173.8239973783493,
- "p90": 176.54399573802948,
- "p95": 177.37600207328796,
- "p99": 183.67999792099
+ "p50": 1491.6479587554932,
+ "p90": 1501.9840002059937,
+ "p95": 1796.447992324829,
+ "p99": 1847.6159572601318
},
"isolatedSum": {
- "p50": 160.60800105333328,
- "p90": 164.48000073432922,
- "p95": 166.87999665737152,
- "p99": 181.98399990797043
+ "p50": 1518.880009651184,
+ "p90": 1544.2879796028137,
+ "p95": 2153.9520025253296,
+ "p99": 2219.9679613113403
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19418112,
- "combineLogicalBytes": 38836224,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 372,
+ "dispatchLogicalBytes": 1275144192,
+ "combineLogicalBytes": 1275144192,
+ "fanoutMean": 5.42889404296875,
+ "recvTokensMax": 11606,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
"dispatch": {
- "p50": 86.36800199747086,
- "p90": 89.37600255012512,
- "p95": 90.52799642086029,
- "p99": 98.11200201511383
+ "p50": 1165.6320095062256,
+ "p90": 1447.4560022354126,
+ "p95": 1480.1599979400635,
+ "p99": 1516.0000324249268
},
"combine": {
- "p50": 108.70400071144104,
- "p90": 111.26399785280228,
- "p95": 112.35199868679047,
- "p99": 118.81600320339203
+ "p50": 1694.4639682769775,
+ "p90": 1978.559970855713,
+ "p95": 2011.904001235962,
+ "p99": 2036.03196144104
},
"roundtrip": {
- "p50": 222.59199619293213,
- "p90": 226.52800381183624,
- "p95": 229.5680046081543,
- "p99": 250.5599856376648
+ "p50": 2829.9200534820557,
+ "p90": 2843.2960510253906,
+ "p95": 3127.7120113372803,
+ "p99": 3177.40797996521
},
"isolatedSum": {
- "p50": 195.0720027089119,
- "p90": 200.6400004029274,
- "p95": 202.87999510765076,
- "p99": 216.92800521850586
+ "p50": 2860.095977783203,
+ "p90": 3426.0159730911255,
+ "p95": 3492.0639991760254,
+ "p99": 3552.031993865967
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38757376,
- "combineLogicalBytes": 77514752,
- "fanoutMean": 5.2802734375,
- "recvTokensMax": 707,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 2546374656,
+ "combineLogicalBytes": 2546374656,
+ "fanoutMean": 5.420562744140625,
+ "recvTokensMax": 23170,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -15660,47 +15481,48 @@
]
},
{
- "id": "cx-4e513884",
- "identity": "b300|deepep|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760",
- "colorKey": "b300_c4c63f07",
- "comparisonKey": "cf47e1b064e2e435",
+ "id": "cx-2d323e00",
+ "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||a39eeb7c2dc6ca7",
+ "colorKey": "gb200_0cd6b029",
+ "comparisonKey": "21940cb240b28c01",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:51:14.282258+00:00",
+ "generatedAt": "2026-06-29T14:03:02.250790+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_05",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
- "phase": "decode",
+ "phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "layout-and-dispatch-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · fp8",
- "model": "Kimi-K2",
+ "label": "GB200 EP8 · deepep · bf16 · zipf-mild",
+ "model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
- "experts": 384,
- "routing": "uniform",
- "routingLabel": "uniform",
+ "experts": 256,
+ "routing": "zipf-mild",
+ "routingLabel": "zipf-mild",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "fp8",
+ "dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -15708,318 +15530,244 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "d6c49ae98878760",
- "workloadId": "set:8:9a27d0df4b17fa09",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "a39eeb7c2dc6ca7",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285682409",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285682409",
- "createdAt": "2026-06-27T09:51:14.282258+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 117.50400066375732,
- "p90": 121.0239976644516,
- "p95": 123.03999811410904,
- "p99": 134.91199910640717
- },
- "combine": {
- "p50": 60.19200012087822,
- "p90": 61.72800064086914,
- "p95": 62.52799928188324,
- "p99": 66.94400310516357
- },
- "roundtrip": {
- "p50": 167.87199676036835,
- "p90": 171.55200242996216,
- "p95": 176.09600722789764,
- "p99": 186.0799938440323
- },
- "isolatedSum": {
- "p50": 177.69600078463554,
- "p90": 182.75199830532074,
- "p95": 185.56799739599228,
- "p99": 201.85600221157074
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 301056,
- "combineLogicalBytes": 602112,
- "fanoutMean": 5.25,
- "recvTokensMax": 8,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 118.9119964838028,
- "p90": 122.91199713945389,
- "p95": 125.2480000257492,
- "p99": 134.5279961824417
- },
- "combine": {
- "p50": 62.55999952554703,
- "p90": 64.4799992442131,
- "p95": 65.05600363016129,
- "p99": 74.33599978685379
- },
- "roundtrip": {
- "p50": 173.7920045852661,
- "p90": 177.0240068435669,
- "p95": 179.4240027666092,
- "p99": 209.56799387931824
- },
- "isolatedSum": {
- "p50": 181.47199600934982,
- "p90": 187.391996383667,
- "p95": 190.3040036559105,
- "p99": 208.8639959692955
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 609280,
- "combineLogicalBytes": 1218560,
- "fanoutMean": 5.3125,
- "recvTokensMax": 14,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 119.45600062608719,
- "p90": 124.38400089740753,
- "p95": 127.80800461769104,
- "p99": 167.77600347995758
+ "p50": 130.2720010280609,
+ "p90": 137.85600662231445,
+ "p95": 140.35199582576752,
+ "p99": 145.37599682807922
},
"combine": {
- "p50": 63.00800293684006,
- "p90": 64.86400216817856,
- "p95": 65.37599861621857,
- "p99": 77.27999985218048
+ "p50": 134.36800241470337,
+ "p90": 142.5279974937439,
+ "p95": 143.96800100803375,
+ "p99": 152.99199521541595
},
"roundtrip": {
- "p50": 173.98400604724884,
- "p90": 177.85599827766418,
- "p95": 180.38399517536163,
- "p99": 187.99999356269836
+ "p50": 237.37600445747375,
+ "p90": 244.89599466323853,
+ "p95": 248.57600033283234,
+ "p99": 255.74401021003723
},
"isolatedSum": {
- "p50": 182.46400356292725,
- "p90": 189.2480030655861,
- "p95": 193.1840032339096,
- "p99": 245.05600333213806
+ "p50": 264.6400034427643,
+ "p90": 280.38400411605835,
+ "p95": 284.31999683380127,
+ "p99": 298.3679920434952
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1204224,
- "combineLogicalBytes": 2408448,
- "fanoutMean": 5.25,
- "recvTokensMax": 26,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 70160384,
+ "combineLogicalBytes": 70160384,
+ "fanoutMean": 4.779296875,
+ "recvTokensMax": 987,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 121.5360015630722,
- "p90": 124.7360035777092,
- "p95": 126.46399438381195,
- "p99": 140.35199582576752
+ "p50": 169.11999881267548,
+ "p90": 177.3120015859604,
+ "p95": 180.09600043296814,
+ "p99": 185.63200533390045
},
"combine": {
- "p50": 66.68800115585327,
- "p90": 68.4799998998642,
- "p95": 69.24799829721451,
- "p99": 75.71200281381607
+ "p50": 187.74400651454926,
+ "p90": 192.89599359035492,
+ "p95": 194.07999515533447,
+ "p99": 200.00000298023224
},
"roundtrip": {
- "p50": 179.51999604701996,
- "p90": 184.12800133228302,
- "p95": 190.528005361557,
- "p99": 432.6399862766266
+ "p50": 323.68001341819763,
+ "p90": 331.07200264930725,
+ "p95": 333.95200967788696,
+ "p99": 338.9759957790375
},
"isolatedSum": {
- "p50": 188.22400271892548,
- "p90": 193.2160034775734,
- "p95": 195.71199268102646,
- "p99": 216.0639986395836
+ "p50": 356.86400532722473,
+ "p90": 370.2079951763153,
+ "p95": 374.1759955883026,
+ "p99": 385.6320083141327
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2415616,
- "combineLogicalBytes": 4831232,
- "fanoutMean": 5.265625,
- "recvTokensMax": 48,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 140879872,
+ "combineLogicalBytes": 140879872,
+ "fanoutMean": 4.79833984375,
+ "recvTokensMax": 1972,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 122.43200093507767,
- "p90": 126.0479986667633,
- "p95": 128.63999605178833,
- "p99": 147.71200716495514
+ "p50": 246.848002076149,
+ "p90": 255.93599677085876,
+ "p95": 259.10401344299316,
+ "p99": 264.76800441741943
},
"combine": {
- "p50": 67.64800101518631,
- "p90": 69.43999975919724,
- "p95": 71.29599899053574,
- "p99": 89.08800035715103
+ "p50": 354.8159897327423,
+ "p90": 363.1359934806824,
+ "p95": 365.02400040626526,
+ "p99": 371.2959885597229
},
"roundtrip": {
- "p50": 181.11999332904816,
- "p90": 185.12000143527985,
- "p95": 187.6160055398941,
- "p99": 205.28000593185425
+ "p50": 558.463990688324,
+ "p90": 566.9119954109192,
+ "p95": 570.2080130577087,
+ "p99": 574.8479962348938
},
"isolatedSum": {
- "p50": 190.08000195026398,
- "p90": 195.48799842596054,
- "p95": 199.93599504232407,
- "p99": 236.80000752210617
+ "p50": 601.6639918088913,
+ "p90": 619.0719902515411,
+ "p95": 624.1280138492584,
+ "p99": 636.0639929771423
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4924416,
- "combineLogicalBytes": 9848832,
- "fanoutMean": 5.3671875,
- "recvTokensMax": 91,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 282333184,
+ "combineLogicalBytes": 282333184,
+ "fanoutMean": 4.80810546875,
+ "recvTokensMax": 3936,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 129.2800009250641,
- "p90": 132.4159950017929,
- "p95": 134.75200533866882,
- "p99": 156.80000185966492
+ "p50": 395.58398723602295,
+ "p90": 402.8159976005554,
+ "p95": 404.35200929641724,
+ "p99": 407.9360067844391
},
"combine": {
- "p50": 73.18399846553802,
- "p90": 75.19999891519547,
- "p95": 76.25599950551987,
- "p99": 83.45600217580795
+ "p50": 616.1919832229614,
+ "p90": 623.7120032310486,
+ "p95": 625.216007232666,
+ "p99": 628.4800171852112
},
"roundtrip": {
- "p50": 195.26399672031403,
- "p90": 199.0399956703186,
- "p95": 201.82399451732635,
- "p99": 220.12799978256226
+ "p50": 986.9760274887085,
+ "p90": 994.4319725036621,
+ "p95": 996.5119957923889,
+ "p99": 1004.6080350875854
},
"isolatedSum": {
- "p50": 202.4639993906021,
- "p90": 207.61599391698837,
- "p95": 211.0080048441887,
- "p99": 240.25600403547287
+ "p50": 1011.7759704589844,
+ "p90": 1026.528000831604,
+ "p95": 1029.5680165290833,
+ "p99": 1036.4160239696503
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9748480,
- "combineLogicalBytes": 19496960,
- "fanoutMean": 5.3125,
- "recvTokensMax": 178,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 566716416,
+ "combineLogicalBytes": 566716416,
+ "fanoutMean": 4.8255615234375,
+ "recvTokensMax": 7855,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 64,
- "globalTokens": 512,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 142.4960047006607,
- "p90": 146.40000462532043,
- "p95": 149.53599870204926,
- "p99": 164.32000696659088
+ "p50": 704.7039866447449,
+ "p90": 715.391993522644,
+ "p95": 718.783974647522,
+ "p99": 727.1680235862732
},
"combine": {
- "p50": 88.79999816417694,
- "p90": 90.97599983215332,
- "p95": 91.839998960495,
- "p99": 101.50399804115295
+ "p50": 1126.911997795105,
+ "p90": 1131.6800117492676,
+ "p95": 1133.7920427322388,
+ "p99": 1140.9599781036377
},
"roundtrip": {
- "p50": 221.37600183486938,
- "p90": 225.95199942588806,
- "p95": 228.99200022220612,
- "p99": 238.17600309848785
+ "p50": 1802.4640083312988,
+ "p90": 1876.63996219635,
+ "p95": 1890.0799751281738,
+ "p99": 2370.176076889038
},
"isolatedSum": {
- "p50": 231.29600286483765,
- "p90": 237.37600445747375,
- "p95": 241.37599766254425,
- "p99": 265.82400500774384
+ "p50": 1831.6159844398499,
+ "p90": 1847.0720052719116,
+ "p95": 1852.5760173797607,
+ "p99": 1868.128001689911
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19418112,
- "combineLogicalBytes": 38836224,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 372,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1132285952,
+ "combineLogicalBytes": 1132285952,
+ "fanoutMean": 4.8206787109375,
+ "recvTokensMax": 15694,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
"dispatch": {
- "p50": 167.04000532627106,
- "p90": 171.23199999332428,
- "p95": 174.01599884033203,
- "p99": 183.1039935350418
+ "p50": 1379.3599605560303,
+ "p90": 1392.0639753341675,
+ "p95": 1394.752025604248,
+ "p99": 1403.007984161377
},
"combine": {
- "p50": 109.21599715948105,
- "p90": 111.455999314785,
- "p95": 112.70400136709213,
- "p99": 125.91999769210815
+ "p50": 2162.0800495147705,
+ "p90": 2171.583890914917,
+ "p95": 2175.0400066375732,
+ "p99": 2370.975971221924
},
"roundtrip": {
- "p50": 270.2080011367798,
- "p90": 274.2080092430115,
- "p95": 276.5119969844818,
- "p99": 297.7280020713806
+ "p50": 3512.063980102539,
+ "p90": 3529.439926147461,
+ "p95": 3536.57603263855,
+ "p99": 3583.51993560791
},
"isolatedSum": {
- "p50": 276.2560024857521,
- "p90": 282.6879993081093,
- "p95": 286.72000020742416,
- "p99": 309.02399122714996
+ "p50": 3541.440010070801,
+ "p90": 3563.6478662490845,
+ "p95": 3569.7920322418213,
+ "p99": 3773.983955383301
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38757376,
- "combineLogicalBytes": 77514752,
- "fanoutMean": 5.2802734375,
- "recvTokensMax": 707,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 2267840512,
+ "combineLogicalBytes": 2267840512,
+ "fanoutMean": 4.82763671875,
+ "recvTokensMax": 31357,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -16027,366 +15775,293 @@
]
},
{
- "id": "cx-1911c35d",
- "identity": "b300|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176",
- "colorKey": "b300_eee29686",
- "comparisonKey": "37f5e47990ede677",
+ "id": "cx-fd383085",
+ "identity": "gb200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||3eb2f0d7bdba0fe",
+ "colorKey": "gb200_4a0087e5",
+ "comparisonKey": "a10977d3e6692367",
"schemaVersion": 3,
- "generatedAt": "2026-06-26T17:41:38.976776+00:00",
+ "generatedAt": "2026-06-29T14:03:32.180363+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_01",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
- "phase": "decode",
+ "phase": "prefill",
"mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
+ "resourceMode": "tuned",
+ "suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · fp8 (norm)",
- "model": "DeepSeek-V3/V4",
+ "label": "GB200 EP8 · deepep · bf16 · zipf-mild+eplb",
+ "model": "DeepSeek-V3 (EPLB physical)",
"shape": {
"hidden": 7168,
"topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
+ "experts": 288,
+ "routing": "zipf-mild",
+ "routingLabel": "zipf-mild+eplb",
"routingStep": 0,
"unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
+ "eplbEnabled": true,
+ "dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1824,
- "configuredUnits": 27,
- "deviceUnits": 148,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
+ "requestedFraction": null,
+ "achievedFraction": 0.1316,
+ "configuredUnits": 20,
+ "deviceUnits": 152,
+ "resourceClass": "backend-tuned",
+ "conformanceClass": "backend-default",
"fixedKernel": false,
- "paretoEligible": true
+ "paretoEligible": false
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "traceSignature": "3eb2f0d7bdba0fe",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": 2.545684814453125,
+ "eplbImbalanceAfter": 1.0001495361328125,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28254479346",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254479346",
- "createdAt": "2026-06-26T17:41:38.976776+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 56.03199824690819,
- "p90": 58.240000158548355,
- "p95": 59.61599946022034,
- "p99": 69.56800073385239
- },
- "combine": {
- "p50": 61.40799820423126,
- "p90": 63.4239986538887,
- "p95": 64.35199826955795,
- "p99": 77.53600180149078
- },
- "roundtrip": {
- "p50": 121.18399888277054,
- "p90": 123.4240010380745,
- "p95": 124.64000284671783,
- "p99": 131.48799538612366
- },
- "isolatedSum": {
- "p50": 117.43999645113945,
- "p90": 121.66399881243706,
- "p95": 123.96799772977829,
- "p99": 147.10400253534317
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 315392,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 56.92800134420395,
- "p90": 59.23200026154518,
- "p95": 60.19200012087822,
- "p99": 68.4799998998642
- },
- "combine": {
- "p50": 62.24000081419945,
- "p90": 64.19199705123901,
- "p95": 65.05600363016129,
- "p99": 69.69600170850754
- },
- "roundtrip": {
- "p50": 122.65600264072418,
- "p90": 124.79999661445618,
- "p95": 125.98399817943573,
- "p99": 135.1040005683899
- },
- "isolatedSum": {
- "p50": 119.1680021584034,
- "p90": 123.4239973127842,
- "p95": 125.2480037510395,
- "p99": 138.17600160837173
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 57.631999254226685,
- "p90": 60.03199890255928,
- "p95": 61.37600168585777,
- "p99": 67.16799736022949
+ "p50": 126.36800110340118,
+ "p90": 135.5839967727661,
+ "p95": 138.87999951839447,
+ "p99": 143.39199662208557
},
"combine": {
- "p50": 63.93600255250931,
- "p90": 65.43999910354614,
- "p95": 65.88800251483917,
- "p99": 69.023996591568
+ "p50": 130.36799430847168,
+ "p90": 134.14399325847626,
+ "p95": 139.39200341701508,
+ "p99": 144.25599575042725
},
"roundtrip": {
- "p50": 125.50400197505951,
- "p90": 128.51199507713318,
- "p95": 132.06399977207184,
- "p99": 143.10400187969208
+ "p50": 227.23199427127838,
+ "p90": 235.9679937362671,
+ "p95": 238.71999979019165,
+ "p99": 245.12000381946564
},
"isolatedSum": {
- "p50": 121.56800180673599,
- "p90": 125.47199800610542,
- "p95": 127.26400420069695,
- "p99": 136.19199395179749
+ "p50": 256.73599541187286,
+ "p90": 269.72799003124237,
+ "p95": 278.27200293540955,
+ "p99": 287.6479923725128
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1240064,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 78159872,
+ "combineLogicalBytes": 78159872,
+ "fanoutMean": 5.32421875,
+ "recvTokensMax": 702,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 59.487998485565186,
- "p90": 61.88800185918808,
- "p95": 62.81600147485733,
- "p99": 73.2479989528656
+ "p50": 164.51199352741241,
+ "p90": 173.24799299240112,
+ "p95": 175.29599368572235,
+ "p99": 179.36000227928162
},
"combine": {
- "p50": 66.46399945020676,
- "p90": 67.80800223350525,
- "p95": 68.89600306749344,
- "p99": 71.71200215816498
+ "p50": 169.18399930000305,
+ "p90": 177.88800597190857,
+ "p95": 179.58399653434753,
+ "p99": 183.3920031785965
},
"roundtrip": {
- "p50": 128.60800325870514,
- "p90": 130.65600395202637,
- "p95": 131.80799782276154,
- "p99": 144.3520039319992
+ "p50": 307.8399896621704,
+ "p90": 315.64798951148987,
+ "p95": 318.91199946403503,
+ "p99": 328.0639946460724
},
"isolatedSum": {
- "p50": 125.95199793577194,
- "p90": 129.69600409269333,
- "p95": 131.71200454235077,
- "p99": 144.96000111103058
+ "p50": 333.69599282741547,
+ "p90": 351.1359989643097,
+ "p95": 354.8799902200699,
+ "p99": 362.7520054578781
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2487296,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 156563456,
+ "combineLogicalBytes": 156563456,
+ "fanoutMean": 5.33251953125,
+ "recvTokensMax": 1393,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 60.67200005054474,
- "p90": 62.880001962184906,
- "p95": 63.74400109052658,
- "p99": 69.82400268316269
+ "p50": 235.71200668811798,
+ "p90": 243.0720031261444,
+ "p95": 245.44000625610352,
+ "p99": 252.16001272201538
},
"combine": {
- "p50": 67.64800101518631,
- "p90": 69.63200122117996,
- "p95": 70.91200351715088,
- "p99": 79.71200346946716
+ "p50": 289.92000222206116,
+ "p90": 293.92001032829285,
+ "p95": 296.09599709510803,
+ "p99": 301.9520044326782
},
"roundtrip": {
- "p50": 130.87999820709229,
- "p90": 133.15199315547943,
- "p95": 134.43200290203094,
- "p99": 141.88799262046814
+ "p50": 474.07999634742737,
+ "p90": 487.0400130748749,
+ "p95": 492.99201369285583,
+ "p99": 814.6880269050598
},
"isolatedSum": {
- "p50": 128.32000106573105,
- "p90": 132.51200318336487,
- "p95": 134.65600460767746,
- "p99": 149.53600615262985
+ "p50": 525.6320089101791,
+ "p90": 536.9920134544373,
+ "p95": 541.5360033512115,
+ "p99": 554.1120171546936
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4960256,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
+ "dispatchLogicalBytes": 312410112,
+ "combineLogicalBytes": 312410112,
+ "fanoutMean": 5.3203125,
+ "recvTokensMax": 2773,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 63.61600011587143,
- "p90": 65.47199934720993,
- "p95": 66.23999774456024,
- "p99": 72.54400104284286
+ "p50": 360.51198840141296,
+ "p90": 368.19198727607727,
+ "p95": 371.10400199890137,
+ "p99": 379.2319893836975
},
"combine": {
- "p50": 72.31999933719635,
- "p90": 74.14399832487106,
- "p95": 75.23199915885925,
- "p99": 79.6160027384758
+ "p50": 496.89599871635437,
+ "p90": 502.01600790023804,
+ "p95": 503.2320022583008,
+ "p99": 508.67199897766113
},
"roundtrip": {
- "p50": 142.87999272346497,
- "p90": 145.85599303245544,
- "p95": 147.16799557209015,
- "p99": 155.29599785804749
+ "p50": 829.2480111122131,
+ "p90": 837.984025478363,
+ "p95": 840.4800295829773,
+ "p99": 844.3840146064758
},
"isolatedSum": {
- "p50": 135.93599945306778,
- "p90": 139.615997672081,
- "p95": 141.4719969034195,
- "p99": 152.16000378131866
+ "p50": 857.4079871177673,
+ "p90": 870.2079951763153,
+ "p95": 874.3360042572021,
+ "p99": 887.9039883613586
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9863168,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 622712832,
+ "combineLogicalBytes": 622712832,
+ "fanoutMean": 5.3023681640625,
+ "recvTokensMax": 5498,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 64,
- "globalTokens": 512,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 72.64000177383423,
- "p90": 75.3600001335144,
- "p95": 76.51200145483017,
- "p99": 82.65600353479385
+ "p50": 622.7840185165405,
+ "p90": 630.5279731750488,
+ "p95": 632.5759887695312,
+ "p99": 638.4959816932678
},
"combine": {
- "p50": 87.90399879217148,
- "p90": 90.08000046014786,
- "p95": 90.84799885749817,
- "p99": 101.15200281143188
+ "p50": 882.8480243682861,
+ "p90": 891.7120099067688,
+ "p95": 894.1439986228943,
+ "p99": 896.6400027275085
},
"roundtrip": {
- "p50": 172.83199727535248,
- "p90": 175.4239946603775,
- "p95": 176.41599476337433,
- "p99": 181.43999576568604
+ "p50": 1476.3519763946533,
+ "p90": 1485.0560426712036,
+ "p95": 1487.8400564193726,
+ "p99": 1497.6320266723633
},
"isolatedSum": {
- "p50": 160.5440005660057,
- "p90": 165.44000059366226,
- "p95": 167.36000031232834,
- "p99": 183.80800634622574
+ "p50": 1505.6320428848267,
+ "p90": 1522.2399830818176,
+ "p95": 1526.7199873924255,
+ "p99": 1535.1359844207764
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19496960,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1245038592,
+ "combineLogicalBytes": 1245038592,
+ "fanoutMean": 5.30072021484375,
+ "recvTokensMax": 10955,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
"dispatch": {
- "p50": 83.20000022649765,
- "p90": 86.07999980449677,
- "p95": 87.0399996638298,
- "p99": 90.17600119113922
+ "p50": 1150.048017501831,
+ "p90": 1156.6400527954102,
+ "p95": 1158.30397605896,
+ "p99": 1162.176012992859
},
"combine": {
- "p50": 108.70400071144104,
- "p90": 110.97600311040878,
- "p95": 112.06399649381638,
- "p99": 116.41599982976913
+ "p50": 1647.6800441741943,
+ "p90": 1657.8880548477173,
+ "p95": 1659.4560146331787,
+ "p99": 1667.7440404891968
},
"roundtrip": {
- "p50": 218.07999908924103,
- "p90": 221.343994140625,
- "p95": 222.97599911689758,
- "p99": 235.52000522613525
+ "p50": 2770.2720165252686,
+ "p90": 2780.384063720703,
+ "p95": 2783.1039428710938,
+ "p99": 2787.1360778808594
},
"isolatedSum": {
- "p50": 191.9040009379387,
- "p90": 197.05600291490555,
- "p95": 199.10399615764618,
- "p99": 206.59200102090836
+ "p50": 2797.7280616760254,
+ "p90": 2814.5281076431274,
+ "p95": 2817.7599906921387,
+ "p99": 2829.9200534820557
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 2489460736,
+ "combineLogicalBytes": 2489460736,
+ "fanoutMean": 5.299407958984375,
+ "recvTokensMax": 21864,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -16394,366 +16069,293 @@
]
},
{
- "id": "cx-fe6f5351",
- "identity": "b300|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176",
- "colorKey": "b300_84b10b26",
- "comparisonKey": "abf92acc41d9d301",
+ "id": "cx-e422e15d",
+ "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||bfb01c61bdf926e",
+ "colorKey": "gb200_ff33b726",
+ "comparisonKey": "bb683080611997e2",
"schemaVersion": 3,
- "generatedAt": "2026-06-26T18:10:48.557544+00:00",
+ "generatedAt": "2026-06-29T14:04:49.835497+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_01",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
- "phase": "decode",
+ "phase": "prefill",
"mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
+ "resourceMode": "tuned",
+ "suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "cached-layout-comm-only-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "layout-and-dispatch-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · fp8 (norm) [cl]",
+ "label": "GB200 EP8 · deepep · bf16 · zipf-moderate",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
"experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
+ "routing": "zipf-moderate",
+ "routingLabel": "zipf-moderate",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "fp8",
+ "dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1824,
- "configuredUnits": 27,
- "deviceUnits": 148,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
+ "requestedFraction": null,
+ "achievedFraction": 0.1316,
+ "configuredUnits": 20,
+ "deviceUnits": 152,
+ "resourceClass": "backend-tuned",
+ "conformanceClass": "backend-default",
"fixedKernel": false,
- "paretoEligible": true
+ "paretoEligible": false
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "bfb01c61bdf926e",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28254499301",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254499301",
- "createdAt": "2026-06-26T18:10:48.557544+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 49.56800118088722,
- "p90": 51.263999193906784,
- "p95": 51.83999985456467,
- "p99": 57.920001447200775
+ "p50": 125.791996717453,
+ "p90": 134.783998131752,
+ "p95": 136.9280070066452,
+ "p99": 143.10400187969208
},
"combine": {
- "p50": 62.24000081419945,
- "p90": 63.680000603199005,
- "p95": 64.51199948787689,
- "p99": 66.3679987192154
+ "p50": 133.760005235672,
+ "p90": 142.59199798107147,
+ "p95": 144.16000247001648,
+ "p99": 146.4959979057312
},
"roundtrip": {
- "p50": 114.78400230407715,
- "p90": 116.86400324106216,
- "p95": 118.01599711179733,
- "p99": 126.68800354003906
+ "p50": 233.95200073719025,
+ "p90": 240.09600281715393,
+ "p95": 242.43199825286865,
+ "p99": 246.33599817752838
},
"isolatedSum": {
- "p50": 111.80800199508667,
- "p90": 114.94399979710579,
- "p95": 116.35199934244156,
- "p99": 124.28800016641617
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 315392,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 49.92000013589859,
- "p90": 51.80799961090088,
- "p95": 52.76799947023392,
- "p99": 58.9120015501976
- },
- "combine": {
- "p50": 63.040003180503845,
- "p90": 64.89600241184235,
- "p95": 65.24799764156342,
- "p99": 74.11199808120728
- },
- "roundtrip": {
- "p50": 116.64000153541565,
- "p90": 119.00799721479416,
- "p95": 121.08799815177917,
- "p99": 136.57599687576294
- },
- "isolatedSum": {
- "p50": 112.96000331640244,
- "p90": 116.70400202274323,
- "p95": 118.01599711179733,
- "p99": 133.02399963140488
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 51.42400041222572,
- "p90": 53.63199859857559,
- "p95": 54.655998945236206,
- "p99": 65.76000154018402
- },
- "combine": {
- "p50": 63.10400366783142,
- "p90": 64.96000289916992,
- "p95": 65.63200056552887,
- "p99": 75.93599706888199
- },
- "roundtrip": {
- "p50": 117.53600090742111,
- "p90": 119.87199634313583,
- "p95": 120.86399644613266,
- "p99": 132.192000746727
- },
- "isolatedSum": {
- "p50": 114.52800408005714,
- "p90": 118.59200149774551,
- "p95": 120.28799951076508,
- "p99": 141.695998609066
+ "p50": 259.552001953125,
+ "p90": 277.3759961128235,
+ "p95": 281.0880094766617,
+ "p99": 289.5999997854233
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1240064,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 49946624,
+ "combineLogicalBytes": 49946624,
+ "fanoutMean": 3.40234375,
+ "recvTokensMax": 1022,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 52.41600051522255,
- "p90": 54.496001452207565,
- "p95": 55.71199953556061,
- "p99": 60.5119988322258
+ "p50": 159.90400314331055,
+ "p90": 167.29600727558136,
+ "p95": 170.56000232696533,
+ "p99": 175.20000040531158
},
"combine": {
- "p50": 65.72800129652023,
- "p90": 67.48799979686737,
- "p95": 67.9360032081604,
- "p99": 73.21599870920181
+ "p50": 182.40000307559967,
+ "p90": 190.68799912929535,
+ "p95": 192.57600605487823,
+ "p99": 194.14399564266205
},
"roundtrip": {
- "p50": 122.04799801111221,
- "p90": 124.38400089740753,
- "p95": 126.52799487113953,
- "p99": 147.16799557209015
+ "p50": 316.51198863983154,
+ "p90": 323.4879970550537,
+ "p95": 325.98400115966797,
+ "p99": 331.167995929718
},
"isolatedSum": {
- "p50": 118.14400181174278,
- "p90": 121.98400124907494,
- "p95": 123.64800274372101,
- "p99": 133.7279975414276
+ "p50": 342.3040062189102,
+ "p90": 357.9840064048767,
+ "p95": 363.13600838184357,
+ "p99": 369.34399604797363
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2487296,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 100509696,
+ "combineLogicalBytes": 100509696,
+ "fanoutMean": 3.42333984375,
+ "recvTokensMax": 2046,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 53.69599908590317,
- "p90": 55.84000051021576,
- "p95": 56.86400085687637,
- "p99": 65.0240033864975
+ "p50": 229.12000119686127,
+ "p90": 236.9920015335083,
+ "p95": 239.9359941482544,
+ "p99": 247.1040040254593
},
"combine": {
- "p50": 67.16799736022949,
- "p90": 68.9919963479042,
- "p95": 69.69600170850754,
- "p99": 77.98399776220322
+ "p50": 346.8160033226013,
+ "p90": 355.3600013256073,
+ "p95": 357.34400153160095,
+ "p99": 361.82400584220886
},
"roundtrip": {
- "p50": 123.36000055074692,
- "p90": 125.66399574279785,
- "p95": 127.16799974441528,
- "p99": 140.70400595664978
+ "p50": 545.7919836044312,
+ "p90": 553.4080266952515,
+ "p95": 556.7359924316406,
+ "p99": 567.0080184936523
},
"isolatedSum": {
- "p50": 120.86399644613266,
- "p90": 124.83199685811996,
- "p95": 126.56000256538391,
- "p99": 143.0080011487007
+ "p50": 575.9360045194626,
+ "p90": 592.3520028591156,
+ "p95": 597.2799956798553,
+ "p99": 608.9280098676682
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4960256,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 201678848,
+ "combineLogicalBytes": 201678848,
+ "fanoutMean": 3.4345703125,
+ "recvTokensMax": 4094,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 57.66399949789047,
- "p90": 59.776000678539276,
- "p95": 60.63999980688095,
- "p99": 65.72800129652023
+ "p50": 373.9840090274811,
+ "p90": 382.81598687171936,
+ "p95": 386.04798913002014,
+ "p99": 393.15199851989746
},
"combine": {
- "p50": 72.89600372314453,
- "p90": 74.14399832487106,
- "p95": 75.55200159549713,
- "p99": 83.96799862384796
+ "p50": 634.7839832305908,
+ "p90": 638.4639739990234,
+ "p95": 639.8720145225525,
+ "p99": 646.1120247840881
},
"roundtrip": {
- "p50": 138.40000331401825,
- "p90": 140.60799777507782,
- "p95": 141.66399836540222,
- "p99": 149.53599870204926
+ "p50": 975.7440090179443,
+ "p90": 984.000027179718,
+ "p95": 986.624002456665,
+ "p99": 995.7759976387024
},
"isolatedSum": {
- "p50": 130.560003221035,
- "p90": 133.91999900341034,
- "p95": 136.19200140237808,
- "p99": 149.6959999203682
+ "p50": 1008.7679922580719,
+ "p90": 1021.2799608707428,
+ "p95": 1025.9200036525726,
+ "p99": 1039.2640233039856
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9863168,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 405035008,
+ "combineLogicalBytes": 405035008,
+ "fanoutMean": 3.4488525390625,
+ "recvTokensMax": 8189,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 64,
- "globalTokens": 512,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 66.04799628257751,
- "p90": 68.15999746322632,
- "p95": 69.88800317049026,
- "p99": 74.91199672222137
+ "p50": 695.3920125961304,
+ "p90": 706.1120271682739,
+ "p95": 709.4720005989075,
+ "p99": 715.4560089111328
},
"combine": {
- "p50": 87.93599903583527,
- "p90": 90.08000046014786,
- "p95": 91.74399822950363,
- "p99": 98.24000298976898
+ "p50": 1147.0719575881958,
+ "p90": 1226.3360023498535,
+ "p95": 1237.9200458526611,
+ "p99": 1293.7599420547485
},
"roundtrip": {
- "p50": 164.76799547672272,
- "p90": 167.42399334907532,
- "p95": 169.3120002746582,
- "p99": 185.92000007629395
+ "p50": 1811.743974685669,
+ "p90": 1821.4720487594604,
+ "p95": 1824.8319625854492,
+ "p99": 1829.2800188064575
},
"isolatedSum": {
- "p50": 153.98399531841278,
- "p90": 158.23999792337418,
- "p95": 161.6320013999939,
- "p99": 173.15199971199036
+ "p50": 1842.4639701843262,
+ "p90": 1932.4480295181274,
+ "p95": 1947.3920464515686,
+ "p99": 2009.2159509658813
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19496960,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 808822784,
+ "combineLogicalBytes": 808822784,
+ "fanoutMean": 3.44354248046875,
+ "recvTokensMax": 16380,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
"dispatch": {
- "p50": 77.63200253248215,
- "p90": 80.1599994301796,
- "p95": 81.50400221347809,
- "p99": 98.65599870681763
+ "p50": 1357.2479486465454,
+ "p90": 1370.751976966858,
+ "p95": 1375.615954399109,
+ "p99": 1384.4480514526367
},
"combine": {
- "p50": 108.35199803113937,
- "p90": 110.78400164842606,
- "p95": 111.84000223875046,
- "p99": 126.01600587368011
+ "p50": 2192.960023880005,
+ "p90": 2200.000047683716,
+ "p95": 2201.3440132141113,
+ "p99": 2209.887981414795
},
"roundtrip": {
- "p50": 211.42399311065674,
- "p90": 214.52799439430237,
- "p95": 215.87200462818146,
- "p99": 223.1999933719635
+ "p50": 3524.8639583587646,
+ "p90": 3537.760019302368,
+ "p95": 3541.248083114624,
+ "p99": 3546.5919971466064
},
"isolatedSum": {
- "p50": 185.98400056362152,
- "p90": 190.94400107860565,
- "p95": 193.34400445222855,
- "p99": 224.67200458049774
+ "p50": 3550.2079725265503,
+ "p90": 3570.7520246505737,
+ "p95": 3576.95996761322,
+ "p99": 3594.3360328674316
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1619795968,
+ "combineLogicalBytes": 1619795968,
+ "fanoutMean": 3.4481201171875,
+ "recvTokensMax": 32761,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -16761,47 +16363,48 @@
]
},
{
- "id": "cx-83d0a7b9",
- "identity": "b300|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "b300_49e66a7b",
- "comparisonKey": "0abec2edede4ab05",
+ "id": "cx-80278610",
+ "identity": "gb200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||45b103b10fbcaef",
+ "colorKey": "gb200_acbc8de8",
+ "comparisonKey": "54864d16635426aa",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:47:10.185475+00:00",
+ "generatedAt": "2026-06-29T14:05:19.211189+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_15",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
- "phase": "decode",
+ "phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "cached-layout-comm-only-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "layout-and-dispatch-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · fp8 [cl]",
- "model": "DeepSeek-V3/V4",
+ "label": "GB200 EP8 · deepep · bf16 · zipf-moderate+eplb",
+ "model": "DeepSeek-V3 (EPLB physical)",
"shape": {
"hidden": 7168,
"topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
+ "experts": 288,
+ "routing": "zipf-moderate",
+ "routingLabel": "zipf-moderate+eplb",
"routingStep": 0,
"unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
+ "eplbEnabled": true,
+ "dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -16809,317 +16412,243 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "traceSignature": "45b103b10fbcaef",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": 4.895263671875,
+ "eplbImbalanceAfter": 1.0000902811686199,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285590577",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285590577",
- "createdAt": "2026-06-27T09:47:10.185475+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 49.247998744249344,
- "p90": 51.552001386880875,
- "p95": 52.799999713897705,
- "p99": 63.35999816656113
- },
- "combine": {
- "p50": 61.72800064086914,
- "p90": 63.45599889755249,
- "p95": 65.47199934720993,
- "p99": 80.86399734020233
- },
- "roundtrip": {
- "p50": 114.78400230407715,
- "p90": 117.40799993276596,
- "p95": 120.80000340938568,
- "p99": 136.83199882507324
- },
- "isolatedSum": {
- "p50": 110.97599938511848,
- "p90": 115.00800028443336,
- "p95": 118.27199906110764,
- "p99": 144.22399550676346
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 315392,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 50.27199909090996,
- "p90": 52.5440014898777,
- "p95": 54.46400120854378,
- "p99": 61.08799949288368
+ "p50": 125.82400441169739,
+ "p90": 133.95200669765472,
+ "p95": 136.3839954137802,
+ "p99": 141.08799397945404
},
"combine": {
- "p50": 61.664000153541565,
- "p90": 63.551999628543854,
- "p95": 65.15199691057205,
- "p99": 74.01599735021591
+ "p50": 128.38399410247803,
+ "p90": 131.8719983100891,
+ "p95": 132.76800513267517,
+ "p99": 133.98399949073792
},
"roundtrip": {
- "p50": 116.12799763679504,
- "p90": 118.52800101041794,
- "p95": 121.66400253772736,
- "p99": 139.26400244235992
+ "p50": 224.2880016565323,
+ "p90": 231.99999332427979,
+ "p95": 234.047994017601,
+ "p99": 240.25599658489227
},
"isolatedSum": {
- "p50": 111.93599924445152,
- "p90": 116.09600111842155,
- "p95": 119.61599811911583,
- "p99": 135.1039968430996
+ "p50": 254.20799851417542,
+ "p90": 265.82400500774384,
+ "p95": 269.1520005464554,
+ "p99": 275.07199347019196
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
+ "dispatchLogicalBytes": 77385728,
+ "combineLogicalBytes": 77385728,
+ "fanoutMean": 5.271484375,
+ "recvTokensMax": 691,
"stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 51.10400170087814,
- "p90": 53.279999643564224,
- "p95": 53.98400127887726,
- "p99": 61.08799949288368
- },
- "combine": {
- "p50": 62.880001962184906,
- "p90": 64.86400216817856,
- "p95": 65.63200056552887,
- "p99": 83.52000266313553
- },
- "roundtrip": {
- "p50": 117.91999638080597,
- "p90": 121.05599790811539,
- "p95": 122.81599640846252,
- "p99": 135.16800105571747
- },
- "isolatedSum": {
- "p50": 113.98400366306305,
- "p90": 118.14400181174278,
- "p95": 119.61600184440613,
- "p99": 144.6080021560192
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1240064,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 52.38400027155876,
- "p90": 54.687999188899994,
- "p95": 55.71199953556061,
- "p99": 60.127999633550644
+ "p50": 163.13600540161133,
+ "p90": 171.00800573825836,
+ "p95": 173.50399494171143,
+ "p99": 179.19999361038208
},
"combine": {
- "p50": 66.91200286149979,
- "p90": 73.11999797821045,
- "p95": 75.19999891519547,
- "p99": 92.16000139713287
+ "p50": 168.99199783802032,
+ "p90": 173.88799786567688,
+ "p95": 178.01600694656372,
+ "p99": 181.40800297260284
},
"roundtrip": {
- "p50": 121.98399752378464,
- "p90": 124.38400089740753,
- "p95": 125.76000392436981,
- "p99": 145.9520012140274
+ "p50": 304.1279911994934,
+ "p90": 312.48000264167786,
+ "p95": 315.8400058746338,
+ "p99": 321.02400064468384
},
"isolatedSum": {
- "p50": 119.29600313305855,
- "p90": 127.80799716711044,
- "p95": 130.91199845075607,
- "p99": 152.28800103068352
+ "p50": 332.12800323963165,
+ "p90": 344.89600360393524,
+ "p95": 351.52000188827515,
+ "p99": 360.6079965829849
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2487296,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
+ "dispatchLogicalBytes": 155172864,
+ "combineLogicalBytes": 155172864,
+ "fanoutMean": 5.28515625,
+ "recvTokensMax": 1378,
"stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 53.37600037455559,
- "p90": 55.615998804569244,
- "p95": 56.63999915122986,
- "p99": 63.45599889755249
+ "p50": 232.5119972229004,
+ "p90": 240.79999327659607,
+ "p95": 244.00000274181366,
+ "p99": 253.2159984111786
},
"combine": {
- "p50": 67.87200272083282,
- "p90": 69.76000219583511,
- "p95": 70.52800059318542,
- "p99": 75.87199658155441
+ "p50": 289.15199637413025,
+ "p90": 294.14400458335876,
+ "p95": 297.21599817276,
+ "p99": 302.3039996623993
},
"roundtrip": {
- "p50": 124.38400089740753,
- "p90": 127.10399925708771,
- "p95": 128.76799702644348,
- "p99": 143.71199905872345
+ "p50": 470.0799882411957,
+ "p90": 477.85601019859314,
+ "p95": 479.93600368499756,
+ "p99": 483.90400409698486
},
"isolatedSum": {
- "p50": 121.24800309538841,
- "p90": 125.37600100040436,
- "p95": 127.16799974441528,
- "p99": 139.3279954791069
+ "p50": 521.6639935970306,
+ "p90": 534.9439978599548,
+ "p95": 541.2160009145737,
+ "p99": 555.5199980735779
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4960256,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
+ "dispatchLogicalBytes": 310546432,
+ "combineLogicalBytes": 310546432,
+ "fanoutMean": 5.28857421875,
+ "recvTokensMax": 2745,
"stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 57.0559985935688,
- "p90": 59.4559982419014,
- "p95": 61.47199869155884,
- "p99": 80.19199967384338
+ "p50": 359.8079979419708,
+ "p90": 366.5919899940491,
+ "p95": 369.79201436042786,
+ "p99": 380.511999130249
},
"combine": {
- "p50": 73.21599870920181,
- "p90": 75.32799988985062,
- "p95": 76.19199901819229,
- "p99": 83.52000266313553
+ "p50": 492.70400404930115,
+ "p90": 502.1759867668152,
+ "p95": 506.84797763824463,
+ "p99": 544.6400046348572
},
"roundtrip": {
- "p50": 137.56799697875977,
- "p90": 140.44800400733948,
- "p95": 143.51999759674072,
- "p99": 164.0319973230362
+ "p50": 823.0400085449219,
+ "p90": 830.1119804382324,
+ "p95": 832.3839902877808,
+ "p99": 837.1840119361877
},
"isolatedSum": {
- "p50": 130.27199730277061,
- "p90": 134.783998131752,
- "p95": 137.66399770975113,
- "p99": 163.7120023369789
+ "p50": 852.512001991272,
+ "p90": 868.7679767608643,
+ "p95": 876.6399919986725,
+ "p99": 925.1520037651062
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9863168,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
+ "dispatchLogicalBytes": 620619776,
+ "combineLogicalBytes": 620619776,
+ "fanoutMean": 5.2845458984375,
+ "recvTokensMax": 5526,
"stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 64,
- "globalTokens": 512,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 66.46399945020676,
- "p90": 69.05599683523178,
- "p95": 70.23999840021133,
- "p99": 80.9599980711937
+ "p50": 620.639979839325,
+ "p90": 628.2879710197449,
+ "p95": 630.4640173912048,
+ "p99": 638.0800008773804
},
"combine": {
- "p50": 87.20000088214874,
- "p90": 88.95999938249588,
- "p95": 89.82399851083755,
- "p99": 96.6079980134964
+ "p50": 887.2640132904053,
+ "p90": 895.8079814910889,
+ "p95": 897.7280259132385,
+ "p99": 902.1120071411133
},
"roundtrip": {
- "p50": 165.43999314308167,
- "p90": 167.7439957857132,
- "p95": 168.70400309562683,
- "p99": 188.9919936656952
+ "p50": 1480.2559614181519,
+ "p90": 1489.5039796829224,
+ "p95": 1492.192029953003,
+ "p99": 1496.5440034866333
},
"isolatedSum": {
- "p50": 153.6640003323555,
- "p90": 158.01599621772766,
- "p95": 160.0639969110489,
- "p99": 177.5679960846901
+ "p50": 1507.9039931297302,
+ "p90": 1524.0959525108337,
+ "p95": 1528.1920433044434,
+ "p99": 1540.1920080184937
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19496960,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
+ "dispatchLogicalBytes": 1239175168,
+ "combineLogicalBytes": 1239175168,
+ "fanoutMean": 5.2757568359375,
+ "recvTokensMax": 11165,
"stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
"dispatch": {
- "p50": 76.73600316047668,
- "p90": 79.23199981451035,
- "p95": 80.64000308513641,
- "p99": 87.52000331878662
+ "p50": 1147.4239826202393,
+ "p90": 1154.4640064239502,
+ "p95": 1156.1280488967896,
+ "p99": 1159.2639684677124
},
"combine": {
- "p50": 108.57599973678589,
- "p90": 111.10399663448334,
- "p95": 112.86400258541107,
- "p99": 119.6800023317337
+ "p50": 1648.0319499969482,
+ "p90": 1657.5679779052734,
+ "p95": 1660.3200435638428,
+ "p99": 1772.063970565796
},
"roundtrip": {
- "p50": 211.2320065498352,
- "p90": 214.27200734615326,
- "p95": 216.06400609016418,
- "p99": 229.8559993505478
+ "p50": 2771.8400955200195,
+ "p90": 2780.0960540771484,
+ "p95": 2783.3919525146484,
+ "p99": 2789.8240089416504
},
"isolatedSum": {
- "p50": 185.31200289726257,
- "p90": 190.33599644899368,
- "p95": 193.50400567054749,
- "p99": 207.20000565052032
+ "p50": 2795.4559326171875,
+ "p90": 2812.0319843292236,
+ "p95": 2816.4480924606323,
+ "p99": 2931.3279390335083
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
+ "dispatchLogicalBytes": 2481604608,
+ "combineLogicalBytes": 2481604608,
+ "fanoutMean": 5.282684326171875,
+ "recvTokensMax": 22165,
"stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
@@ -17128,47 +16657,48 @@
]
},
{
- "id": "cx-567c4192",
- "identity": "b300|deepep|7168|8|256|fp8-directcast|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "b300_8688ff74",
- "comparisonKey": "e2dc1b3bb397a94c",
+ "id": "cx-5fb2396a",
+ "identity": "gb200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||45b103b10fbcaef",
+ "colorKey": "gb200_be611b2a",
+ "comparisonKey": "70163b5f1158fbc8",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T15:56:03.746973+00:00",
+ "generatedAt": "2026-06-29T14:01:47.949072+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_09",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
- "phase": "decode",
+ "phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · fp8-directcast",
- "model": "DeepSeek-V3/V4",
+ "label": "GB200 EP8 · deepep · bf16 · zipf+eplb",
+ "model": "DeepSeek-V3 (EPLB physical)",
"shape": {
"hidden": 7168,
"topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
+ "experts": 288,
+ "routing": "zipf",
+ "routingLabel": "zipf+eplb",
"routingStep": 0,
"unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8-directcast",
+ "eplbEnabled": true,
+ "dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -17176,318 +16706,244 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "traceSignature": "45b103b10fbcaef",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": 4.895263671875,
+ "eplbImbalanceAfter": 1.0000902811686199,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28294160895",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28294160895",
- "createdAt": "2026-06-27T15:56:03.746973+00:00",
- "sha": "42eddb48c3eed35214c5ad50da1aa6527363ff70"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 57.08799883723259,
- "p90": 59.20000001788139,
- "p95": 61.37600168585777,
- "p99": 70.62400132417679
- },
- "combine": {
- "p50": 61.63199990987778,
- "p90": 63.391998410224915,
- "p95": 64.06400352716446,
- "p99": 67.61600077152252
- },
- "roundtrip": {
- "p50": 121.63200229406357,
- "p90": 123.77600371837616,
- "p95": 125.791996717453,
- "p99": 143.39199662208557
- },
- "isolatedSum": {
- "p50": 118.71999874711037,
- "p90": 122.59199842810631,
- "p95": 125.44000521302223,
- "p99": 138.2400020956993
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 57.5999990105629,
- "p90": 60.38400158286095,
- "p95": 61.85600161552429,
- "p99": 71.96799665689468
- },
- "combine": {
- "p50": 63.29599767923355,
- "p90": 65.15199691057205,
- "p95": 65.60000032186508,
- "p99": 68.96000355482101
- },
- "roundtrip": {
- "p50": 124.86399710178375,
- "p90": 127.13600695133209,
- "p95": 128.4479945898056,
- "p99": 137.15200126171112
- },
- "isolatedSum": {
- "p50": 120.89599668979645,
- "p90": 125.535998493433,
- "p95": 127.45600193738937,
- "p99": 140.9280002117157
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 59.84000116586685,
- "p90": 62.30400130152702,
- "p95": 63.32799792289734,
- "p99": 72.1919983625412
+ "p50": 124.64000284671783,
+ "p90": 132.47999548912048,
+ "p95": 135.96799969673157,
+ "p99": 145.50399780273438
},
"combine": {
- "p50": 64.38399851322174,
- "p90": 66.68800115585327,
- "p95": 67.48799979686737,
- "p99": 74.30399954319
+ "p50": 127.68000364303589,
+ "p90": 132.1280002593994,
+ "p95": 133.63200426101685,
+ "p99": 141.2159949541092
},
"roundtrip": {
- "p50": 127.29600071907043,
- "p90": 130.11200726032257,
- "p95": 132.47999548912048,
- "p99": 155.74400126934052
+ "p50": 223.4559953212738,
+ "p90": 230.9119999408722,
+ "p95": 233.66400599479675,
+ "p99": 240.63999950885773
},
"isolatedSum": {
- "p50": 124.22399967908859,
- "p90": 128.9920024573803,
- "p95": 130.8159977197647,
- "p99": 146.4959979057312
+ "p50": 252.32000648975372,
+ "p90": 264.6079957485199,
+ "p95": 269.6000039577484,
+ "p99": 286.71999275684357
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 77385728,
+ "combineLogicalBytes": 77385728,
+ "fanoutMean": 5.271484375,
+ "recvTokensMax": 691,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 60.5119988322258,
- "p90": 63.519999384880066,
- "p95": 66.20799750089645,
- "p99": 72.64000177383423
+ "p50": 162.27200627326965,
+ "p90": 171.00800573825836,
+ "p95": 173.95199835300446,
+ "p99": 182.68799781799316
},
"combine": {
- "p50": 67.87200272083282,
- "p90": 69.98399645090103,
- "p95": 71.42399996519089,
- "p99": 86.87999844551086
+ "p50": 168.38400065898895,
+ "p90": 175.64800381660461,
+ "p95": 177.85599827766418,
+ "p99": 181.95199966430664
},
"roundtrip": {
- "p50": 129.95199859142303,
- "p90": 132.22399353981018,
- "p95": 133.63200426101685,
- "p99": 141.37600362300873
+ "p50": 305.1519989967346,
+ "p90": 313.53598833084106,
+ "p95": 316.22400879859924,
+ "p99": 321.3759958744049
},
"isolatedSum": {
- "p50": 128.38400155305862,
- "p90": 133.5039958357811,
- "p95": 137.63199746608734,
- "p99": 159.5200002193451
+ "p50": 330.6560069322586,
+ "p90": 346.656009554863,
+ "p95": 351.80799663066864,
+ "p99": 364.6399974822998
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 155172864,
+ "combineLogicalBytes": 155172864,
+ "fanoutMean": 5.28515625,
+ "recvTokensMax": 1378,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 61.664000153541565,
- "p90": 64.03200328350067,
- "p95": 65.34399837255478,
- "p99": 70.62400132417679
+ "p50": 232.9919934272766,
+ "p90": 241.85599386692047,
+ "p95": 244.83199417591095,
+ "p99": 249.85599517822266
},
"combine": {
- "p50": 68.57600063085556,
- "p90": 70.23999840021133,
- "p95": 71.42399996519089,
- "p99": 81.63200318813324
+ "p50": 291.9999957084656,
+ "p90": 299.6160089969635,
+ "p95": 301.4400005340576,
+ "p99": 304.60798740386963
},
"roundtrip": {
- "p50": 132.38400220870972,
- "p90": 135.55200397968292,
- "p95": 136.99199259281158,
- "p99": 152.0639955997467
+ "p50": 468.7039852142334,
+ "p90": 476.6719937324524,
+ "p95": 479.64799404144287,
+ "p99": 486.7520034313202
},
"isolatedSum": {
- "p50": 130.24000078439713,
- "p90": 134.272001683712,
- "p95": 136.76799833774567,
- "p99": 152.25600451231003
+ "p50": 524.9919891357422,
+ "p90": 541.472002863884,
+ "p95": 546.2719947099686,
+ "p99": 554.4639825820923
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 310546432,
+ "combineLogicalBytes": 310546432,
+ "fanoutMean": 5.28857421875,
+ "recvTokensMax": 2745,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 64.7360011935234,
- "p90": 66.94400310516357,
- "p95": 68.06399673223495,
- "p99": 75.68000257015228
+ "p50": 358.0799996852875,
+ "p90": 365.4400110244751,
+ "p95": 368.6720132827759,
+ "p99": 376.8639862537384
},
"combine": {
- "p50": 73.66400212049484,
- "p90": 75.45600086450577,
- "p95": 75.93599706888199,
- "p99": 81.79199695587158
+ "p50": 495.168000459671,
+ "p90": 502.560019493103,
+ "p95": 503.77601385116577,
+ "p99": 510.43200492858887
},
"roundtrip": {
- "p50": 143.99999380111694,
- "p90": 146.55999839305878,
- "p95": 147.93600142002106,
- "p99": 157.82399475574493
+ "p50": 824.4159817695618,
+ "p90": 832.7999711036682,
+ "p95": 835.7120156288147,
+ "p99": 840.2559757232666
},
"isolatedSum": {
- "p50": 138.40000331401825,
- "p90": 142.40000396966934,
- "p95": 143.99999380111694,
- "p99": 157.47199952602386
+ "p50": 853.2480001449585,
+ "p90": 868.0000305175781,
+ "p95": 872.4480271339417,
+ "p99": 887.2959911823273
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 620619776,
+ "combineLogicalBytes": 620619776,
+ "fanoutMean": 5.2845458984375,
+ "recvTokensMax": 5526,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 64,
- "globalTokens": 512,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 73.37599992752075,
- "p90": 75.99999755620956,
- "p95": 77.72800326347351,
- "p99": 170.46399414539337
+ "p50": 623.0400204658508,
+ "p90": 629.7600269317627,
+ "p95": 631.9360136985779,
+ "p99": 637.2159719467163
},
"combine": {
- "p50": 87.99999952316284,
- "p90": 90.04800021648407,
- "p95": 90.87999910116196,
- "p99": 99.96800124645233
+ "p50": 893.6640024185181,
+ "p90": 899.071991443634,
+ "p95": 904.3840169906616,
+ "p99": 909.1839790344238
},
"roundtrip": {
- "p50": 171.26399278640747,
- "p90": 174.27200078964233,
- "p95": 175.9359985589981,
- "p99": 191.03999435901642
+ "p50": 1488.576054573059,
+ "p90": 1497.7600574493408,
+ "p95": 1500.5439519882202,
+ "p99": 1506.2079429626465
},
"isolatedSum": {
- "p50": 161.3759994506836,
- "p90": 166.04799777269363,
- "p95": 168.60800236463547,
- "p99": 270.4319953918457
+ "p50": 1516.704022884369,
+ "p90": 1528.8320183753967,
+ "p95": 1536.3200306892395,
+ "p99": 1546.3999509811401
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 1239175168,
+ "combineLogicalBytes": 1239175168,
+ "fanoutMean": 5.2757568359375,
+ "recvTokensMax": 11165,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
"dispatch": {
- "p50": 83.96799862384796,
- "p90": 86.68799698352814,
- "p95": 87.61599659919739,
- "p99": 95.87199985980988
+ "p50": 1147.5199460983276,
+ "p90": 1153.8879871368408,
+ "p95": 1157.088041305542,
+ "p99": 1161.471962928772
},
"combine": {
- "p50": 109.0560033917427,
- "p90": 112.0000034570694,
- "p95": 112.89600282907486,
- "p99": 120.99199742078781
+ "p50": 1648.8959789276123,
+ "p90": 1658.784031867981,
+ "p95": 1660.0960493087769,
+ "p99": 1665.503978729248
},
"roundtrip": {
- "p50": 219.00799870491028,
- "p90": 222.08000719547272,
- "p95": 224.16000068187714,
- "p99": 238.8480007648468
+ "p50": 2773.279905319214,
+ "p90": 2784.032106399536,
+ "p95": 2787.071943283081,
+ "p99": 2798.7520694732666
},
"isolatedSum": {
- "p50": 193.02400201559067,
- "p90": 198.68800044059753,
- "p95": 200.51199942827225,
- "p99": 216.8639972805977
+ "p50": 2796.41592502594,
+ "p90": 2812.672019004822,
+ "p95": 2817.184090614319,
+ "p99": 2826.97594165802
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 2481604608,
+ "combineLogicalBytes": 2481604608,
+ "fanoutMean": 5.282684326171875,
+ "recvTokensMax": 22165,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -17495,28 +16951,28 @@
]
},
{
- "id": "cx-10314900",
- "identity": "b300|deepep|7168|8|256|fp8-pertoken|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "b300_5b8a7672",
- "comparisonKey": "facc765e5a3b34b6",
+ "id": "cx-8c11501e",
+ "identity": "gb200|deepep|v1|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901",
+ "colorKey": "gb200_42130d21",
+ "comparisonKey": "ffdc9987b54494d5",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T15:56:09.517904+00:00",
+ "generatedAt": "2026-06-29T13:48:41.427518+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_13",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
- "phase": "decode",
+ "phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "cached-layout-comm-only-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · fp8-pertoken",
+ "label": "GB200 EP8 · deepep · bf16 [cl]",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
@@ -17527,15 +16983,16 @@
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "fp8-pertoken",
+ "dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -17543,317 +17000,243 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "b9896b0a7ca9901",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28294164589",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28294164589",
- "createdAt": "2026-06-27T15:56:09.517904+00:00",
- "sha": "42eddb48c3eed35214c5ad50da1aa6527363ff70"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 57.08799883723259,
- "p90": 59.51999872922897,
- "p95": 60.864001512527466,
- "p99": 74.65600222349167
+ "p50": 107.84000158309937,
+ "p90": 115.29599875211716,
+ "p95": 118.6240017414093,
+ "p99": 124.86399710178375
},
"combine": {
- "p50": 63.1679967045784,
- "p90": 65.11999666690826,
- "p95": 66.20799750089645,
- "p99": 72.4480003118515
+ "p50": 126.88000500202179,
+ "p90": 131.52000308036804,
+ "p95": 133.44000279903412,
+ "p99": 140.76800644397736
},
"roundtrip": {
- "p50": 124.83199685811996,
- "p90": 126.97599828243256,
- "p95": 128.48000228405,
- "p99": 136.73600554466248
+ "p50": 206.62400126457214,
+ "p90": 212.5760018825531,
+ "p95": 215.488001704216,
+ "p99": 222.6559966802597
},
"isolatedSum": {
- "p50": 120.25599554181099,
- "p90": 124.63999539613724,
- "p95": 127.07199901342392,
- "p99": 147.10400253534317
+ "p50": 234.72000658512115,
+ "p90": 246.8160018324852,
+ "p95": 252.06400454044342,
+ "p99": 265.6320035457611
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
+ "dispatchLogicalBytes": 77672448,
+ "combineLogicalBytes": 77672448,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 723,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2,
- "globalTokens": 16,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 57.72799998521805,
- "p90": 59.967998415231705,
- "p95": 60.99199876189232,
- "p99": 68.70400160551071
- },
- "combine": {
- "p50": 63.58399987220764,
- "p90": 65.18399715423584,
- "p95": 66.0799965262413,
- "p99": 72.25599884986877
- },
- "roundtrip": {
- "p50": 124.4800016283989,
- "p90": 127.07200646400452,
- "p95": 128.86400520801544,
- "p99": 151.5520066022873
- },
- "isolatedSum": {
- "p50": 121.31199985742569,
- "p90": 125.15199556946754,
- "p95": 127.07199528813362,
- "p99": 140.9600004553795
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 59.167999774217606,
- "p90": 61.02399900555611,
- "p95": 62.431998550891876,
- "p99": 81.40800148248672
- },
- "combine": {
- "p50": 63.87200206518173,
- "p90": 65.5359998345375,
- "p95": 66.49599969387054,
- "p99": 72.95999675989151
- },
- "roundtrip": {
- "p50": 125.21600723266602,
- "p90": 128.09599936008453,
- "p95": 130.23999333381653,
- "p99": 148.19200336933136
- },
- "isolatedSum": {
- "p50": 123.04000183939934,
- "p90": 126.55999884009361,
- "p95": 128.92799824476242,
- "p99": 154.36799824237823
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 61.08799949288368,
- "p90": 63.29599767923355,
- "p95": 64.57599997520447,
- "p99": 72.95999675989151
+ "p50": 145.88800072669983,
+ "p90": 154.94400262832642,
+ "p95": 158.01599621772766,
+ "p99": 165.8879965543747
},
"combine": {
- "p50": 67.58400052785873,
- "p90": 69.21599805355072,
- "p95": 69.63200122117996,
- "p99": 79.52000200748444
+ "p50": 167.55199432373047,
+ "p90": 171.4559942483902,
+ "p95": 175.58400332927704,
+ "p99": 180.16000092029572
},
"roundtrip": {
- "p50": 130.14400005340576,
- "p90": 133.18400084972382,
- "p95": 136.35200262069702,
- "p99": 151.8400013446808
+ "p50": 286.3680124282837,
+ "p90": 293.5679852962494,
+ "p95": 296.06398940086365,
+ "p99": 299.9039888381958
},
"isolatedSum": {
- "p50": 128.67200002074242,
- "p90": 132.51199573278427,
- "p95": 134.20800119638443,
- "p99": 152.47999876737595
+ "p50": 313.4399950504303,
+ "p90": 326.3999968767166,
+ "p95": 333.5999995470047,
+ "p99": 346.0479974746704
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
+ "dispatchLogicalBytes": 155889664,
+ "combineLogicalBytes": 155889664,
+ "fanoutMean": 5.3095703125,
+ "recvTokensMax": 1422,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 61.792001128196716,
- "p90": 64.51199948787689,
- "p95": 65.5680000782013,
- "p99": 77.05599814653397
+ "p50": 218.4319943189621,
+ "p90": 224.44799542427063,
+ "p95": 227.00800001621246,
+ "p99": 231.9359928369522
},
"combine": {
- "p50": 68.80000233650208,
- "p90": 70.65600156784058,
- "p95": 71.29599899053574,
- "p99": 77.66400277614594
+ "p50": 292.38399863243103,
+ "p90": 300.4800081253052,
+ "p95": 302.2719919681549,
+ "p99": 305.6960105895996
},
"roundtrip": {
- "p50": 133.215993642807,
- "p90": 135.3919953107834,
- "p95": 136.99199259281158,
- "p99": 163.32800686359406
+ "p50": 457.2159945964813,
+ "p90": 466.17600321769714,
+ "p95": 468.76800060272217,
+ "p99": 473.7280011177063
},
"isolatedSum": {
- "p50": 130.5920034646988,
- "p90": 135.16800105571747,
- "p95": 136.86399906873703,
- "p99": 154.7200009226799
+ "p50": 510.8159929513931,
+ "p90": 524.9280035495758,
+ "p95": 529.2799919843674,
+ "p99": 537.6320034265518
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 312266752,
+ "combineLogicalBytes": 312266752,
+ "fanoutMean": 5.31787109375,
+ "recvTokensMax": 2779,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 64.83200192451477,
- "p90": 66.91200286149979,
- "p95": 67.45599955320358,
- "p99": 70.78400254249573
+ "p50": 345.69600224494934,
+ "p90": 352.9599905014038,
+ "p95": 356.06399178504944,
+ "p99": 360.9279990196228
},
"combine": {
- "p50": 73.53600114583969,
- "p90": 75.58400183916092,
- "p95": 76.4480009675026,
- "p99": 98.11200201511383
+ "p50": 497.8559911251068,
+ "p90": 502.7199983596802,
+ "p95": 504.5440196990967,
+ "p99": 510.17600297927856
},
"roundtrip": {
- "p50": 145.24799585342407,
- "p90": 147.64800667762756,
- "p95": 148.95999431610107,
- "p99": 166.4000004529953
+ "p50": 810.2399706840515,
+ "p90": 817.2799944877625,
+ "p95": 819.2960023880005,
+ "p99": 823.7119913101196
},
"isolatedSum": {
- "p50": 138.36800307035446,
- "p90": 142.4960047006607,
- "p95": 143.90400052070618,
- "p99": 168.89600455760956
+ "p50": 843.5519933700562,
+ "p90": 855.679988861084,
+ "p95": 860.6080114841461,
+ "p99": 871.1040019989014
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 623443968,
+ "combineLogicalBytes": 623443968,
+ "fanoutMean": 5.30859375,
+ "recvTokensMax": 5505,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 64,
- "globalTokens": 512,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 73.53600114583969,
- "p90": 75.9039968252182,
- "p95": 76.73600316047668,
- "p99": 81.34400099515915
+ "p50": 605.791985988617,
+ "p90": 613.6320233345032,
+ "p95": 616.3840293884277,
+ "p99": 622.6879954338074
},
"combine": {
- "p50": 87.77599781751633,
- "p90": 89.91999924182892,
- "p95": 90.7839983701706,
- "p99": 105.0880029797554
+ "p50": 880.7039856910706,
+ "p90": 886.1119747161865,
+ "p95": 889.5679712295532,
+ "p99": 894.6560025215149
},
"roundtrip": {
- "p50": 173.08799922466278,
- "p90": 175.6799966096878,
- "p95": 177.37600207328796,
- "p99": 204.73599433898926
+ "p50": 1458.1760168075562,
+ "p90": 1466.8480157852173,
+ "p95": 1469.2800045013428,
+ "p99": 1474.8159646987915
},
"isolatedSum": {
- "p50": 161.31199896335602,
- "p90": 165.82399606704712,
- "p95": 167.52000153064728,
- "p99": 186.43200397491455
+ "p50": 1486.4959716796875,
+ "p90": 1499.7439980506897,
+ "p95": 1505.952000617981,
+ "p99": 1517.3439979553223
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
+ "dispatchLogicalBytes": 1243805696,
+ "combineLogicalBytes": 1243805696,
+ "fanoutMean": 5.29547119140625,
+ "recvTokensMax": 10952,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
"dispatch": {
- "p50": 84.19200032949448,
- "p90": 87.0399996638298,
- "p95": 88.86399865150452,
- "p99": 98.01600128412247
+ "p50": 1116.1279678344727,
+ "p90": 1123.9039897918701,
+ "p95": 1126.3680458068848,
+ "p99": 1129.7919750213623
},
"combine": {
- "p50": 109.40799862146378,
- "p90": 111.93600296974182,
- "p95": 112.67200112342834,
- "p99": 124.44800138473511
+ "p50": 1633.952021598816,
+ "p90": 1640.9920454025269,
+ "p95": 1644.2240476608276,
+ "p99": 1646.9119787216187
},
"roundtrip": {
- "p50": 219.7760045528412,
- "p90": 223.4240025281906,
- "p95": 225.2800017595291,
- "p99": 244.80000138282776
+ "p50": 2724.479913711548,
+ "p90": 2733.5360050201416,
+ "p95": 2735.6479167938232,
+ "p99": 2747.1680641174316
},
"isolatedSum": {
- "p50": 193.59999895095825,
- "p90": 198.97600263357162,
- "p95": 201.53599977493286,
- "p99": 222.46400266885757
+ "p50": 2750.0799894332886,
+ "p90": 2764.896035194397,
+ "p95": 2770.5920934677124,
+ "p99": 2776.703953742981
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
+ "dispatchLogicalBytes": 2487009280,
+ "combineLogicalBytes": 2487009280,
+ "fanoutMean": 5.294189453125,
+ "recvTokensMax": 21781,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
@@ -17862,28 +17245,28 @@
]
},
{
- "id": "cx-5fc48052",
- "identity": "b300|deepep|4096|8|128|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569",
- "colorKey": "b300_c9569580",
- "comparisonKey": "789db7396b5cd7a2",
+ "id": "cx-42f87ef0",
+ "identity": "gb200|deepep|v1|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||03f98832f76b043",
+ "colorKey": "gb200_b0118480",
+ "comparisonKey": "047ebfa2d1bec960",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T11:14:23.346610+00:00",
+ "generatedAt": "2026-06-29T13:59:42.464730+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_02",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_1",
+ "sku": "gb200",
"backend": "deepep",
"phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "runtime-visible-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16",
+ "label": "GB200 EP8 · deepep · fp8",
"model": "Qwen3.5",
"shape": {
"hidden": 4096,
@@ -17894,15 +17277,16 @@
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "bf16",
+ "dispatchDtype": "fp8",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -17910,59 +17294,59 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "dc27c5e0894e569",
- "workloadId": "set:6:76d8142d69406335",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "03f98832f76b043",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28287508460",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287508460",
- "createdAt": "2026-06-27T11:14:23.346610+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 82.56000280380249,
- "p90": 85.02399921417236,
- "p95": 88.16000074148178,
- "p99": 96.3520035147667
+ "p50": 342.848002910614,
+ "p90": 364.3839955329895,
+ "p95": 600.7999777793884,
+ "p99": 661.1199975013733
},
"combine": {
- "p50": 91.48799628019333,
- "p90": 93.9520001411438,
- "p95": 94.55999732017517,
- "p99": 102.94400155544281
+ "p50": 100.28800368309021,
+ "p90": 337.8880023956299,
+ "p95": 369.1520094871521,
+ "p99": 395.6480026245117
},
"roundtrip": {
- "p50": 158.39999914169312,
- "p90": 166.24000668525696,
- "p95": 167.80799627304077,
- "p99": 184.4799965620041
+ "p50": 414.8479998111725,
+ "p90": 432.44799971580505,
+ "p95": 678.4960031509399,
+ "p99": 714.9760127067566
},
"isolatedSum": {
- "p50": 174.04799908399582,
- "p90": 178.97599935531616,
- "p95": 182.71999806165695,
- "p99": 199.2960050702095
+ "p50": 443.1360065937042,
+ "p90": 702.2719979286194,
+ "p95": 969.9519872665405,
+ "p99": 1056.768000125885
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 44564480,
+ "dispatchLogicalBytes": 22282240,
"combineLogicalBytes": 44564480,
"fanoutMean": 5.3125,
"recvTokensMax": 699,
- "stragglerRank": 5,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -17971,35 +17355,35 @@
"tokensPerRank": 256,
"globalTokens": 2048,
"dispatch": {
- "p50": 124.70400333404541,
- "p90": 127.23200023174286,
- "p95": 128.7360042333603,
- "p99": 135.83999872207642
+ "p50": 349.69601035118103,
+ "p90": 376.51199102401733,
+ "p95": 667.680025100708,
+ "p99": 707.1999907493591
},
"combine": {
- "p50": 128.48000228405,
- "p90": 130.5920034646988,
- "p95": 131.45600259304047,
- "p99": 141.02399349212646
+ "p50": 139.8719996213913,
+ "p90": 378.9440095424652,
+ "p95": 420.54399847984314,
+ "p99": 448.1920003890991
},
"roundtrip": {
- "p50": 231.6800057888031,
- "p90": 237.95199394226074,
- "p95": 239.29600417613983,
- "p99": 251.52000784873962
+ "p50": 466.2719964981079,
+ "p90": 480.6720018386841,
+ "p95": 703.1999826431274,
+ "p99": 811.3279938697815
},
"isolatedSum": {
- "p50": 253.1840056180954,
- "p90": 257.82400369644165,
- "p95": 260.19200682640076,
- "p99": 276.8639922142029
+ "p50": 489.5680099725723,
+ "p90": 755.4560005664825,
+ "p95": 1088.2240235805511,
+ "p99": 1155.3919911384583
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 89726976,
+ "dispatchLogicalBytes": 44863488,
"combineLogicalBytes": 89726976,
"fanoutMean": 5.34814453125,
"recvTokensMax": 1385,
- "stragglerRank": 7,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -18008,35 +17392,35 @@
"tokensPerRank": 512,
"globalTokens": 4096,
"dispatch": {
- "p50": 174.17599260807037,
- "p90": 177.21599340438843,
- "p95": 179.07199263572693,
- "p99": 195.0400024652481
+ "p50": 422.94400930404663,
+ "p90": 439.61599469184875,
+ "p95": 456.7680060863495,
+ "p99": 782.7200293540955
},
"combine": {
- "p50": 191.64800643920898,
- "p90": 201.02399587631226,
- "p95": 201.56799256801605,
- "p99": 213.6639952659607
+ "p50": 208.12800526618958,
+ "p90": 442.1440064907074,
+ "p95": 485.8880043029785,
+ "p99": 526.3360142707825
},
"roundtrip": {
- "p50": 346.8480110168457,
- "p90": 351.26399993896484,
- "p95": 352.86399722099304,
- "p99": 362.39999532699585
+ "p50": 609.9200248718262,
+ "p90": 626.0160207748413,
+ "p95": 633.8239908218384,
+ "p99": 962.3680114746094
},
"isolatedSum": {
- "p50": 365.82399904727936,
- "p90": 378.2399892807007,
- "p95": 380.639985203743,
- "p99": 408.7039977312088
+ "p50": 631.0720145702362,
+ "p90": 881.7600011825562,
+ "p95": 942.656010389328,
+ "p99": 1309.056043624878
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 179503104,
+ "dispatchLogicalBytes": 89751552,
"combineLogicalBytes": 179503104,
"fanoutMean": 5.349609375,
"recvTokensMax": 2772,
- "stragglerRank": 5,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -18045,35 +17429,35 @@
"tokensPerRank": 1024,
"globalTokens": 8192,
"dispatch": {
- "p50": 289.34401273727417,
- "p90": 292.4480140209198,
- "p95": 293.88800263404846,
- "p99": 305.34398555755615
+ "p50": 580.8320045471191,
+ "p90": 595.9039926528931,
+ "p95": 824.7680068016052,
+ "p99": 938.1440281867981
},
"combine": {
- "p50": 389.1200125217438,
- "p90": 398.5919952392578,
- "p95": 400.9599983692169,
- "p99": 410.1119935512543
+ "p50": 403.26398611068726,
+ "p90": 656.4159989356995,
+ "p95": 708.4479928016663,
+ "p99": 752.1920204162598
},
"roundtrip": {
- "p50": 597.5040197372437,
- "p90": 608.1600189208984,
- "p95": 612.7039790153503,
- "p99": 631.8399906158447
+ "p50": 972.8639721870422,
+ "p90": 1011.4239454269409,
+ "p95": 1198.0479955673218,
+ "p99": 1805.3120374679565
},
"isolatedSum": {
- "p50": 678.464025259018,
- "p90": 691.0400092601776,
- "p95": 694.8480010032654,
- "p99": 715.4559791088104
+ "p50": 984.0959906578064,
+ "p90": 1252.3199915885925,
+ "p95": 1533.2159996032715,
+ "p99": 1690.3360486030579
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 359022592,
+ "dispatchLogicalBytes": 179511296,
"combineLogicalBytes": 359022592,
"fanoutMean": 5.349853515625,
"recvTokensMax": 5558,
- "stragglerRank": 7,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -18082,35 +17466,35 @@
"tokensPerRank": 2048,
"globalTokens": 16384,
"dispatch": {
- "p50": 520.6720232963562,
- "p90": 525.439977645874,
- "p95": 530.9439897537231,
- "p99": 536.0640287399292
+ "p50": 897.3760008811951,
+ "p90": 911.5200042724609,
+ "p95": 921.1840033531189,
+ "p99": 1252.511978149414
},
"combine": {
- "p50": 754.9759745597839,
- "p90": 765.7920122146606,
- "p95": 766.9119834899902,
- "p99": 778.6880135536194
+ "p50": 813.2479786872864,
+ "p90": 1091.9359922409058,
+ "p95": 1143.1039571762085,
+ "p99": 1172.320008277893
},
"roundtrip": {
- "p50": 1255.5840015411377,
- "p90": 1263.7759447097778,
- "p95": 1268.1920528411865,
- "p99": 1274.8479843139648
+ "p50": 1658.1120491027832,
+ "p90": 1689.6320581436157,
+ "p95": 1707.5200080871582,
+ "p99": 1989.1200065612793
},
"isolatedSum": {
- "p50": 1275.6479978561401,
- "p90": 1291.2319898605347,
- "p95": 1297.8559732437134,
- "p99": 1314.7520422935486
+ "p50": 1710.6239795684814,
+ "p90": 2003.4559965133667,
+ "p95": 2064.2879605293274,
+ "p99": 2424.831986427307
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 716111872,
+ "dispatchLogicalBytes": 358055936,
"combineLogicalBytes": 716111872,
"fanoutMean": 5.33544921875,
"recvTokensMax": 10982,
- "stragglerRank": 7,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -18119,31 +17503,31 @@
"tokensPerRank": 4096,
"globalTokens": 32768,
"dispatch": {
- "p50": 979.5200228691101,
- "p90": 990.1760220527649,
- "p95": 992.0960068702698,
- "p99": 1001.5039443969727
+ "p50": 1547.2320318222046,
+ "p90": 1564.2240047454834,
+ "p95": 1583.3920240402222,
+ "p99": 1904.960036277771
},
"combine": {
- "p50": 1442.304015159607,
- "p90": 1454.1120529174805,
- "p95": 1455.1680088043213,
- "p99": 1493.7599897384644
+ "p50": 1536.8000268936157,
+ "p90": 1578.5280466079712,
+ "p95": 1848.512053489685,
+ "p99": 1890.239953994751
},
"roundtrip": {
- "p50": 2391.200065612793,
- "p90": 2402.9760360717773,
- "p95": 2407.7439308166504,
- "p99": 2476.6080379486084
+ "p50": 3048.896074295044,
+ "p90": 3067.8720474243164,
+ "p95": 3086.6239070892334,
+ "p99": 3380.000114440918
},
"isolatedSum": {
- "p50": 2421.824038028717,
- "p90": 2444.2880749702454,
- "p95": 2447.264015674591,
- "p99": 2495.263934135437
+ "p50": 3084.0320587158203,
+ "p90": 3142.7520513534546,
+ "p95": 3431.904077529907,
+ "p99": 3795.199990272522
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1432395776,
+ "dispatchLogicalBytes": 716197888,
"combineLogicalBytes": 1432395776,
"fanoutMean": 5.336090087890625,
"recvTokensMax": 21939,
@@ -18155,16 +17539,16 @@
]
},
{
- "id": "cx-65c7aa3e",
- "identity": "b300|deepep|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569",
- "colorKey": "b300_307ed708",
- "comparisonKey": "691973c29c59446c",
+ "id": "cx-e69d7792",
+ "identity": "gb200|deepep|v1|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||a9df48e6438e77a",
+ "colorKey": "gb200_b0118480",
+ "comparisonKey": "003150d36349a329",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:52:08.477764+00:00",
+ "generatedAt": "2026-06-29T13:39:30.760982+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_06",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
"phase": "prefill",
"mode": "normal",
@@ -18172,30 +17556,31 @@
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "runtime-visible-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16",
- "model": "Qwen3.5",
+ "label": "GB200 EP8 · deepep · fp8",
+ "model": "shape 5120/8/160",
"shape": {
- "hidden": 4096,
+ "hidden": 5120,
"topk": 8,
- "experts": 128,
+ "experts": 160,
"routing": "uniform",
"routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "bf16",
+ "dispatchDtype": "fp8",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -18203,59 +17588,59 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "dc27c5e0894e569",
- "workloadId": "set:6:76d8142d69406335",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "a9df48e6438e77a",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285702163",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285702163",
- "createdAt": "2026-06-27T09:52:08.477764+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374321542",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374321542",
+ "createdAt": "2026-06-29T13:08:07Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 82.68799632787704,
- "p90": 86.30400151014328,
- "p95": 89.66399729251862,
- "p99": 95.29600292444229
+ "p50": 386.27201318740845,
+ "p90": 416.6719913482666,
+ "p95": 425.24799704551697,
+ "p99": 439.8080110549927
},
"combine": {
- "p50": 92.22400188446045,
- "p90": 94.43199634552002,
- "p95": 101.6319990158081,
- "p99": 103.96800190210342
+ "p50": 106.88000172376633,
+ "p90": 111.26399785280228,
+ "p95": 113.66400122642517,
+ "p99": 117.27999895811081
},
"roundtrip": {
- "p50": 159.9999964237213,
- "p90": 167.90400445461273,
- "p95": 170.49600183963776,
- "p99": 177.12000012397766
+ "p50": 466.97598695755005,
+ "p90": 492.2559857368469,
+ "p95": 498.49599599838257,
+ "p99": 509.3439817428589
},
"isolatedSum": {
- "p50": 174.9119982123375,
- "p90": 180.7359978556633,
- "p95": 191.29599630832672,
- "p99": 199.26400482654572
+ "p50": 493.1520149111748,
+ "p90": 527.9359892010689,
+ "p95": 538.9119982719421,
+ "p99": 557.0880100131035
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 44564480,
- "combineLogicalBytes": 44564480,
- "fanoutMean": 5.3125,
+ "dispatchLogicalBytes": 27837440,
+ "combineLogicalBytes": 55674880,
+ "fanoutMean": 5.3095703125,
"recvTokensMax": 699,
- "stragglerRank": 7,
+ "stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -18264,35 +17649,35 @@
"tokensPerRank": 256,
"globalTokens": 2048,
"dispatch": {
- "p50": 123.29600006341934,
- "p90": 127.00800597667694,
- "p95": 128.22400033473969,
- "p99": 140.03199338912964
+ "p50": 381.6959857940674,
+ "p90": 408.6399972438812,
+ "p95": 415.96800088882446,
+ "p99": 430.4960072040558
},
"combine": {
- "p50": 127.9039978981018,
- "p90": 129.82399761676788,
- "p95": 131.9359987974167,
- "p99": 143.42400431632996
+ "p50": 146.04799449443817,
+ "p90": 151.74399316310883,
+ "p95": 154.08000349998474,
+ "p99": 158.01599621772766
},
"roundtrip": {
- "p50": 229.5999974012375,
- "p90": 235.83999276161194,
- "p95": 237.34399676322937,
- "p99": 241.60000681877136
+ "p50": 511.23201847076416,
+ "p90": 531.7440032958984,
+ "p95": 538.4640097618103,
+ "p99": 548.1600165367126
},
"isolatedSum": {
- "p50": 251.19999796152115,
- "p90": 256.8320035934448,
- "p95": 260.1599991321564,
- "p99": 283.4559977054596
+ "p50": 527.7439802885056,
+ "p90": 560.38399040699,
+ "p95": 570.0480043888092,
+ "p99": 588.5120034217834
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 89726976,
- "combineLogicalBytes": 89726976,
- "fanoutMean": 5.34814453125,
- "recvTokensMax": 1385,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 55552000,
+ "combineLogicalBytes": 111104000,
+ "fanoutMean": 5.2978515625,
+ "recvTokensMax": 1387,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -18301,35 +17686,35 @@
"tokensPerRank": 512,
"globalTokens": 4096,
"dispatch": {
- "p50": 173.75999689102173,
- "p90": 177.18400061130524,
- "p95": 178.6240041255951,
- "p99": 186.97600066661835
+ "p50": 473.7600088119507,
+ "p90": 490.01601338386536,
+ "p95": 495.9680140018463,
+ "p99": 504.5120120048523
},
"combine": {
- "p50": 191.64800643920898,
- "p90": 200.3519982099533,
- "p95": 200.8640021085739,
- "p99": 212.3199999332428
+ "p50": 221.82400524616241,
+ "p90": 228.19200158119202,
+ "p95": 230.24000227451324,
+ "p99": 236.54399812221527
},
"roundtrip": {
- "p50": 345.7599878311157,
- "p90": 350.816011428833,
- "p95": 352.6400029659271,
- "p99": 360.1279854774475
+ "p50": 696.3199973106384,
+ "p90": 716.2240147590637,
+ "p95": 720.9600210189819,
+ "p99": 728.6400198936462
},
"isolatedSum": {
- "p50": 365.4080033302307,
- "p90": 377.53599882125854,
- "p95": 379.488006234169,
- "p99": 399.29600059986115
+ "p50": 695.5840140581131,
+ "p90": 718.2080149650574,
+ "p95": 726.2080162763596,
+ "p99": 741.0560101270676
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 179503104,
- "combineLogicalBytes": 179503104,
- "fanoutMean": 5.349609375,
- "recvTokensMax": 2772,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 111549440,
+ "combineLogicalBytes": 223098880,
+ "fanoutMean": 5.319091796875,
+ "recvTokensMax": 2762,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -18338,35 +17723,35 @@
"tokensPerRank": 1024,
"globalTokens": 8192,
"dispatch": {
- "p50": 289.0560030937195,
- "p90": 292.86399483680725,
- "p95": 294.1119968891144,
- "p99": 311.71199679374695
+ "p50": 666.3359999656677,
+ "p90": 688.7999773025513,
+ "p95": 693.7280297279358,
+ "p99": 700.8000016212463
},
"combine": {
- "p50": 397.599995136261,
- "p90": 408.9280068874359,
- "p95": 410.0160002708435,
- "p99": 421.7279851436615
+ "p50": 473.7600088119507,
+ "p90": 480.25599122047424,
+ "p95": 482.4639856815338,
+ "p99": 487.4880015850067
},
"roundtrip": {
- "p50": 594.3359732627869,
- "p90": 600.6079912185669,
- "p95": 604.4480204582214,
- "p99": 610.5920076370239
+ "p50": 1111.4879846572876,
+ "p90": 1132.2239637374878,
+ "p95": 1135.8400583267212,
+ "p99": 1144.9600458145142
},
"isolatedSum": {
- "p50": 686.6559982299805,
- "p90": 701.7920017242432,
- "p95": 704.1279971599579,
- "p99": 733.4399819374084
+ "p50": 1140.0960087776184,
+ "p90": 1169.0559685230255,
+ "p95": 1176.1920154094696,
+ "p99": 1188.288003206253
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 359022592,
- "combineLogicalBytes": 359022592,
- "fanoutMean": 5.349853515625,
- "recvTokensMax": 5558,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 223365120,
+ "combineLogicalBytes": 446730240,
+ "fanoutMean": 5.325439453125,
+ "recvTokensMax": 5518,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -18375,35 +17760,35 @@
"tokensPerRank": 2048,
"globalTokens": 16384,
"dispatch": {
- "p50": 521.3119983673096,
- "p90": 528.544008731842,
- "p95": 534.0480208396912,
- "p99": 546.8479990959167
+ "p50": 1043.4880256652832,
+ "p90": 1062.656044960022,
+ "p95": 1066.9759511947632,
+ "p99": 1077.8239965438843
},
"combine": {
- "p50": 755.2000284194946,
- "p90": 765.887975692749,
- "p95": 766.6559815406799,
- "p99": 781.5039753913879
+ "p50": 840.0959968566895,
+ "p90": 846.015989780426,
+ "p95": 847.711980342865,
+ "p99": 851.6479730606079
},
"roundtrip": {
- "p50": 1255.0400495529175,
- "p90": 1264.8320198059082,
- "p95": 1271.3279724121094,
- "p99": 1316.3199424743652
+ "p50": 1844.256043434143,
+ "p90": 1863.935947418213,
+ "p95": 1870.911955833435,
+ "p99": 1884.384036064148
},
"isolatedSum": {
- "p50": 1276.5120267868042,
- "p90": 1294.431984424591,
- "p95": 1300.704002380371,
- "p99": 1328.3519744873047
+ "p50": 1883.5840225219727,
+ "p90": 1908.672034740448,
+ "p95": 1914.6879315376282,
+ "p99": 1929.4719696044922
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 716111872,
- "combineLogicalBytes": 716111872,
- "fanoutMean": 5.33544921875,
- "recvTokensMax": 10982,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 446817280,
+ "combineLogicalBytes": 893634560,
+ "fanoutMean": 5.32647705078125,
+ "recvTokensMax": 11032,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -18412,35 +17797,35 @@
"tokensPerRank": 4096,
"globalTokens": 32768,
"dispatch": {
- "p50": 980.0000190734863,
- "p90": 991.0719990730286,
- "p95": 995.0399994850159,
- "p99": 1015.1040554046631
+ "p50": 1834.496021270752,
+ "p90": 1857.9519987106323,
+ "p95": 1866.495966911316,
+ "p99": 2043.9679622650146
},
"combine": {
- "p50": 1441.856026649475,
- "p90": 1453.5679817199707,
- "p95": 1456.9599628448486,
- "p99": 1492.5119876861572
+ "p50": 1586.2400531768799,
+ "p90": 1593.1520462036133,
+ "p95": 1595.296025276184,
+ "p99": 1600.7360219955444
},
"roundtrip": {
- "p50": 2390.6240463256836,
- "p90": 2406.9759845733643,
- "p95": 2415.616035461426,
- "p99": 2474.3359088897705
+ "p50": 3375.391960144043,
+ "p90": 3391.871929168701,
+ "p95": 3397.887945175171,
+ "p99": 3405.280113220215
},
"isolatedSum": {
- "p50": 2421.8560457229614,
- "p90": 2444.6399807929993,
- "p95": 2451.9999623298645,
- "p99": 2507.6160430908203
+ "p50": 3420.736074447632,
+ "p90": 3451.1040449142456,
+ "p95": 3461.7919921875,
+ "p99": 3644.703984260559
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1432395776,
- "combineLogicalBytes": 1432395776,
- "fanoutMean": 5.336090087890625,
- "recvTokensMax": 21939,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 893132800,
+ "combineLogicalBytes": 1786265600,
+ "fanoutMean": 5.323486328125,
+ "recvTokensMax": 21895,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -18448,16 +17833,16 @@
]
},
{
- "id": "cx-ec7ecdcc",
- "identity": "b300|deepep|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42",
- "colorKey": "b300_307ed708",
- "comparisonKey": "03e634138c74f76f",
+ "id": "cx-e1f3cb9e",
+ "identity": "gb200|deepep|v1|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901",
+ "colorKey": "gb200_b0118480",
+ "comparisonKey": "a99dfa04a87e0b18",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:52:35.993019+00:00",
+ "generatedAt": "2026-06-29T14:03:20.332386+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_13",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_1",
+ "sku": "gb200",
"backend": "deepep",
"phase": "prefill",
"mode": "normal",
@@ -18465,30 +17850,31 @@
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "runtime-visible-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16",
- "model": "shape 5120/8/160",
+ "label": "GB200 EP8 · deepep · fp8",
+ "model": "MiniMax-M3",
"shape": {
- "hidden": 5120,
+ "hidden": 6144,
"topk": 8,
- "experts": 160,
+ "experts": 256,
"routing": "uniform",
"routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "bf16",
+ "dispatchDtype": "fp8",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -18496,59 +17882,59 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "0c022a63bbcbf42",
- "workloadId": "set:6:28c0c09b13ff0acf",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "b9896b0a7ca9901",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285713494",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285713494",
- "createdAt": "2026-06-27T09:52:35.993019+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 92.51199662685394,
- "p90": 98.81599992513657,
- "p95": 100.12800246477127,
- "p99": 117.50400066375732
+ "p50": 415.8720076084137,
+ "p90": 446.5920031070709,
+ "p95": 601.9840240478516,
+ "p99": 737.6000285148621
},
"combine": {
- "p50": 103.13600301742554,
- "p90": 104.22399640083313,
- "p95": 104.96000200510025,
- "p99": 114.01599645614624
+ "p50": 114.68800157308578,
+ "p90": 122.6240023970604,
+ "p95": 171.39199376106262,
+ "p99": 422.6880073547363
},
"roundtrip": {
- "p50": 176.60799622535706,
- "p90": 182.8799992799759,
- "p95": 184.92799997329712,
- "p99": 195.5520063638687
+ "p50": 508.7360143661499,
+ "p90": 540.6079888343811,
+ "p95": 742.0799732208252,
+ "p99": 809.9200129508972
},
"isolatedSum": {
- "p50": 195.64799964427948,
- "p90": 203.0399963259697,
- "p95": 205.08800446987152,
- "p99": 231.51999711990356
+ "p50": 530.5600091814995,
+ "p90": 569.2160055041313,
+ "p95": 773.3760178089142,
+ "p99": 1160.2880358695984
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 55674880,
- "combineLogicalBytes": 55674880,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 699,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 33288192,
+ "combineLogicalBytes": 66576384,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 723,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -18557,35 +17943,35 @@
"tokensPerRank": 256,
"globalTokens": 2048,
"dispatch": {
- "p50": 119.52000111341476,
- "p90": 124.95999783277512,
- "p95": 126.46399438381195,
- "p99": 136.25599443912506
+ "p50": 402.3999869823456,
+ "p90": 447.29599356651306,
+ "p95": 700.1280188560486,
+ "p99": 777.7919769287109
},
"combine": {
- "p50": 139.96799290180206,
- "p90": 141.37600362300873,
- "p95": 142.7839994430542,
- "p99": 151.48800611495972
+ "p50": 158.01599621772766,
+ "p90": 401.5679955482483,
+ "p95": 440.5440092086792,
+ "p99": 486.4000082015991
},
"roundtrip": {
- "p50": 244.54399943351746,
- "p90": 249.4720071554184,
- "p95": 251.10399723052979,
- "p99": 258.08000564575195
+ "p50": 540.0959849357605,
+ "p90": 579.2959928512573,
+ "p95": 596.992015838623,
+ "p99": 850.9119749069214
},
"isolatedSum": {
- "p50": 259.4879940152168,
- "p90": 266.33600145578384,
- "p95": 269.24799382686615,
- "p99": 287.7440005540848
+ "p50": 560.4159832000732,
+ "p90": 848.8639891147614,
+ "p95": 1140.6720280647278,
+ "p99": 1264.19198513031
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 111104000,
- "combineLogicalBytes": 111104000,
- "fanoutMean": 5.2978515625,
- "recvTokensMax": 1387,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 66809856,
+ "combineLogicalBytes": 133619712,
+ "fanoutMean": 5.3095703125,
+ "recvTokensMax": 1422,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -18594,35 +17980,35 @@
"tokensPerRank": 512,
"globalTokens": 4096,
"dispatch": {
- "p50": 185.85599958896637,
- "p90": 189.05599415302277,
- "p95": 191.48799777030945,
- "p99": 201.7280012369156
+ "p50": 528.4799933433533,
+ "p90": 556.5760135650635,
+ "p95": 668.0319905281067,
+ "p99": 878.5600066184998
},
"combine": {
- "p50": 214.62400257587433,
- "p90": 224.48000311851501,
- "p95": 225.43999552726746,
- "p99": 236.4799976348877
+ "p50": 261.9839906692505,
+ "p90": 510.55997610092163,
+ "p95": 548.5759973526001,
+ "p99": 573.4080076217651
},
"roundtrip": {
- "p50": 372.76801466941833,
- "p90": 379.2319893836975,
- "p95": 381.632000207901,
- "p99": 400.9599983692169
+ "p50": 754.144012928009,
+ "p90": 774.4960188865662,
+ "p95": 1018.6560153961182,
+ "p99": 1087.9679918289185
},
"isolatedSum": {
- "p50": 400.4800021648407,
- "p90": 413.5359972715378,
- "p95": 416.9279932975769,
- "p99": 438.2079988718033
+ "p50": 790.4639840126038,
+ "p90": 1067.135989665985,
+ "p95": 1216.6079878807068,
+ "p99": 1451.968014240265
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 223098880,
- "combineLogicalBytes": 223098880,
- "fanoutMean": 5.319091796875,
- "recvTokensMax": 2762,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 133828608,
+ "combineLogicalBytes": 267657216,
+ "fanoutMean": 5.31787109375,
+ "recvTokensMax": 2779,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -18631,35 +18017,35 @@
"tokensPerRank": 1024,
"globalTokens": 8192,
"dispatch": {
- "p50": 303.0720055103302,
- "p90": 310.65601110458374,
- "p95": 313.1519854068756,
- "p99": 327.84000039100647
+ "p50": 750.2719759941101,
+ "p90": 798.7840175628662,
+ "p95": 878.3680200576782,
+ "p99": 1141.759991645813
},
"combine": {
- "p50": 436.2240135669708,
- "p90": 445.47200202941895,
- "p95": 445.8880126476288,
- "p99": 458.9439928531647
+ "p50": 475.19999742507935,
+ "p90": 747.6159930229187,
+ "p95": 801.0879755020142,
+ "p99": 840.9600257873535
},
"roundtrip": {
- "p50": 699.4240283966064,
- "p90": 707.9359889030457,
- "p95": 712.2560143470764,
- "p99": 739.520013332367
+ "p50": 1190.783977508545,
+ "p90": 1414.1440391540527,
+ "p95": 1574.6560096740723,
+ "p99": 5944.064140319824
},
"isolatedSum": {
- "p50": 739.296019077301,
- "p90": 756.1280131340027,
- "p95": 759.0399980545044,
- "p99": 786.7839932441711
+ "p50": 1225.4719734191895,
+ "p90": 1546.400010585785,
+ "p95": 1679.4559955596924,
+ "p99": 1982.7200174331665
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 446730240,
- "combineLogicalBytes": 446730240,
- "fanoutMean": 5.325439453125,
- "recvTokensMax": 5518,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 267190272,
+ "combineLogicalBytes": 534380544,
+ "fanoutMean": 5.30859375,
+ "recvTokensMax": 5505,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -18668,35 +18054,35 @@
"tokensPerRank": 2048,
"globalTokens": 16384,
"dispatch": {
- "p50": 548.9280223846436,
- "p90": 558.0160021781921,
- "p95": 559.7119927406311,
- "p99": 571.5199708938599
+ "p50": 1186.3679885864258,
+ "p90": 1213.215947151184,
+ "p95": 1304.7679662704468,
+ "p99": 1598.9760160446167
},
"combine": {
- "p50": 779.3279886245728,
- "p90": 790.4639840126038,
- "p95": 791.263997554779,
- "p99": 803.9360046386719
+ "p50": 857.7600121498108,
+ "p90": 1133.6959600448608,
+ "p95": 1182.8479766845703,
+ "p99": 1223.9680290222168
},
"roundtrip": {
- "p50": 1311.1679553985596,
- "p90": 1321.3759660720825,
- "p95": 1328.3519744873047,
- "p99": 1356.0960292816162
+ "p50": 2003.2639503479004,
+ "p90": 2130.496025085449,
+ "p95": 2245.8879947662354,
+ "p99": 2302.9119968414307
},
"isolatedSum": {
- "p50": 1328.2560110092163,
- "p90": 1348.479986190796,
- "p95": 1350.9759902954102,
- "p99": 1375.4559755325317
+ "p50": 2044.1280007362366,
+ "p90": 2346.911907196045,
+ "p95": 2487.615942955017,
+ "p99": 2822.9440450668335
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 893634560,
- "combineLogicalBytes": 893634560,
- "fanoutMean": 5.32647705078125,
- "recvTokensMax": 11032,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 533059584,
+ "combineLogicalBytes": 1066119168,
+ "fanoutMean": 5.29547119140625,
+ "recvTokensMax": 10952,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -18705,35 +18091,35 @@
"tokensPerRank": 4096,
"globalTokens": 32768,
"dispatch": {
- "p50": 1032.3200225830078,
- "p90": 1042.688012123108,
- "p95": 1046.6879606246948,
- "p99": 1057.919979095459
+ "p50": 2106.2400341033936,
+ "p90": 2129.6958923339844,
+ "p95": 2328.831911087036,
+ "p99": 2491.6160106658936
},
"combine": {
- "p50": 1477.4080514907837,
- "p90": 1481.4079999923706,
- "p95": 1490.9759759902954,
- "p99": 1538.9440059661865
+ "p50": 1610.0159883499146,
+ "p90": 1913.599967956543,
+ "p95": 1943.8079595565796,
+ "p99": 1971.168041229248
},
"roundtrip": {
- "p50": 2480.6079864501953,
- "p90": 2492.9919242858887,
- "p95": 2498.624086380005,
- "p99": 2541.7280197143555
+ "p50": 3669.4719791412354,
+ "p90": 3683.3600997924805,
+ "p95": 3691.3599967956543,
+ "p99": 3996.0319995880127
},
"isolatedSum": {
- "p50": 2509.7280740737915,
- "p90": 2524.0960121154785,
- "p95": 2537.6639366149902,
- "p99": 2596.8639850616455
+ "p50": 3716.256022453308,
+ "p90": 4043.2958602905273,
+ "p95": 4272.639870643616,
+ "p99": 4462.784051895142
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1786265600,
- "combineLogicalBytes": 1786265600,
- "fanoutMean": 5.323486328125,
- "recvTokensMax": 21895,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1065861120,
+ "combineLogicalBytes": 2131722240,
+ "fanoutMean": 5.294189453125,
+ "recvTokensMax": 21781,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -18741,16 +18127,16 @@
]
},
{
- "id": "cx-99771256",
- "identity": "b300|deepep|6144|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "b300_c9569580",
- "comparisonKey": "f9f9af4879f1b5f6",
+ "id": "cx-b9c2ee85",
+ "identity": "gb200|deepep|v1|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901",
+ "colorKey": "gb200_6d63c708",
+ "comparisonKey": "7d3b869c7fd78b55",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T11:13:49.871789+00:00",
+ "generatedAt": "2026-06-29T13:50:26.724922+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_06",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
"phase": "prefill",
"mode": "normal",
@@ -18758,14 +18144,14 @@
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16",
- "model": "MiniMax-M3",
+ "label": "GB200 EP8 · deepep · fp8",
+ "model": "DeepSeek-V3/V4",
"shape": {
- "hidden": 6144,
+ "hidden": 7168,
"topk": 8,
"experts": 256,
"routing": "uniform",
@@ -18773,15 +18159,16 @@
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "bf16",
+ "dispatchDtype": "fp8",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -18789,59 +18176,59 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:9f5e1e005a35e937",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "b9896b0a7ca9901",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28287497246",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287497246",
- "createdAt": "2026-06-27T11:13:49.871789+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 99.29600358009338,
- "p90": 102.14400291442871,
- "p95": 102.9760017991066,
- "p99": 110.55999994277954
+ "p50": 112.06399649381638,
+ "p90": 119.61600184440613,
+ "p95": 122.75200337171555,
+ "p99": 128.7360042333603
},
"combine": {
- "p50": 105.69600015878677,
- "p90": 114.20799791812897,
- "p95": 114.62400108575821,
- "p99": 128.83199751377106
+ "p50": 119.74400281906128,
+ "p90": 124.86399710178375,
+ "p95": 128.09599936008453,
+ "p99": 136.73600554466248
},
"roundtrip": {
- "p50": 184.57600474357605,
- "p90": 188.83199989795685,
- "p95": 190.17599523067474,
- "p99": 198.08000326156616
+ "p50": 275.39199590682983,
+ "p90": 286.24001145362854,
+ "p95": 289.792001247406,
+ "p99": 294.3040132522583
},
"isolatedSum": {
- "p50": 204.99200373888016,
- "p90": 216.35200083255768,
- "p95": 217.6000028848648,
- "p99": 239.3919974565506
+ "p50": 231.80799931287766,
+ "p90": 244.47999894618988,
+ "p95": 250.84800273180008,
+ "p99": 265.47200977802277
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 66576384,
- "combineLogicalBytes": 66576384,
+ "dispatchLogicalBytes": 38836224,
+ "combineLogicalBytes": 77672448,
"fanoutMean": 5.291015625,
"recvTokensMax": 723,
- "stragglerRank": 4,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -18850,35 +18237,35 @@
"tokensPerRank": 256,
"globalTokens": 2048,
"dispatch": {
- "p50": 132.38400220870972,
- "p90": 137.472003698349,
- "p95": 139.42399621009827,
- "p99": 147.20000326633453
+ "p50": 143.93599331378937,
+ "p90": 150.84800124168396,
+ "p95": 153.28000485897064,
+ "p99": 157.98400342464447
},
"combine": {
- "p50": 150.14399588108063,
- "p90": 151.61600708961487,
- "p95": 151.7760008573532,
- "p99": 154.11199629306793
+ "p50": 161.98399662971497,
+ "p90": 166.78400337696075,
+ "p95": 168.83200407028198,
+ "p99": 172.7360039949417
},
"roundtrip": {
- "p50": 259.93600487709045,
- "p90": 264.0640139579773,
- "p95": 265.1520073413849,
- "p99": 282.81599283218384
+ "p50": 363.072007894516,
+ "p90": 370.9760010242462,
+ "p95": 374.1759955883026,
+ "p99": 381.3439905643463
},
"isolatedSum": {
- "p50": 282.52799808979034,
- "p90": 289.08801078796387,
- "p95": 291.1999970674515,
- "p99": 301.31199955940247
+ "p50": 305.91998994350433,
+ "p90": 317.6320046186447,
+ "p95": 322.1120089292526,
+ "p99": 330.7200074195862
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 133619712,
- "combineLogicalBytes": 133619712,
+ "dispatchLogicalBytes": 77944832,
+ "combineLogicalBytes": 155889664,
"fanoutMean": 5.3095703125,
"recvTokensMax": 1422,
- "stragglerRank": 0,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -18887,35 +18274,35 @@
"tokensPerRank": 512,
"globalTokens": 4096,
"dispatch": {
- "p50": 197.37599790096283,
- "p90": 201.34399831295013,
- "p95": 202.36800611019135,
- "p99": 210.40000021457672
+ "p50": 199.39200580120087,
+ "p90": 207.07200467586517,
+ "p95": 210.4959934949875,
+ "p99": 215.39199352264404
},
"combine": {
- "p50": 238.81599307060242,
- "p90": 248.79999458789825,
- "p95": 249.85599517822266,
- "p99": 255.74401021003723
+ "p50": 284.5439910888672,
+ "p90": 290.20801186561584,
+ "p95": 292.1600043773651,
+ "p99": 296.03201150894165
},
"roundtrip": {
- "p50": 410.4959964752197,
- "p90": 417.7919924259186,
- "p95": 420.8959937095642,
- "p99": 438.01599740982056
+ "p50": 594.8479771614075,
+ "p90": 603.2639741897583,
+ "p95": 605.4080128669739,
+ "p99": 613.3120059967041
},
"isolatedSum": {
- "p50": 436.19199097156525,
- "p90": 450.1439929008484,
- "p95": 452.224001288414,
- "p99": 466.14401042461395
+ "p50": 483.93599689006805,
+ "p90": 497.280016541481,
+ "p95": 502.6559978723526,
+ "p99": 511.4240050315857
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 267657216,
- "combineLogicalBytes": 267657216,
+ "dispatchLogicalBytes": 156133376,
+ "combineLogicalBytes": 312266752,
"fanoutMean": 5.31787109375,
"recvTokensMax": 2779,
- "stragglerRank": 7,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -18924,35 +18311,35 @@
"tokensPerRank": 1024,
"globalTokens": 8192,
"dispatch": {
- "p50": 319.42400336265564,
- "p90": 327.5519907474518,
- "p95": 330.24001121520996,
- "p99": 346.94400429725647
+ "p50": 313.6959969997406,
+ "p90": 321.50399684906006,
+ "p95": 324.44798946380615,
+ "p99": 334.01599526405334
},
"combine": {
- "p50": 444.89601254463196,
- "p90": 447.61601090431213,
- "p95": 449.0880072116852,
- "p99": 458.3680033683777
+ "p50": 489.56799507141113,
+ "p90": 495.87199091911316,
+ "p95": 497.8240132331848,
+ "p99": 502.78401374816895
},
"roundtrip": {
- "p50": 742.464005947113,
- "p90": 748.960018157959,
- "p95": 751.6160011291504,
- "p99": 762.1440291404724
+ "p50": 1021.6319561004639,
+ "p90": 1028.607964515686,
+ "p95": 1031.008005142212,
+ "p99": 1035.1040363311768
},
"isolatedSum": {
- "p50": 764.3200159072876,
- "p90": 775.1680016517639,
- "p95": 779.3280184268951,
- "p99": 805.3120076656342
+ "p50": 803.2639920711517,
+ "p90": 817.3759877681732,
+ "p95": 822.272002696991,
+ "p99": 836.8000090122223
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 534380544,
- "combineLogicalBytes": 534380544,
+ "dispatchLogicalBytes": 311721984,
+ "combineLogicalBytes": 623443968,
"fanoutMean": 5.30859375,
"recvTokensMax": 5505,
- "stragglerRank": 4,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -18961,35 +18348,35 @@
"tokensPerRank": 2048,
"globalTokens": 16384,
"dispatch": {
- "p50": 568.2880282402039,
- "p90": 572.9600191116333,
- "p95": 574.176013469696,
- "p99": 603.4560203552246
+ "p50": 559.008002281189,
+ "p90": 567.903995513916,
+ "p95": 571.3919997215271,
+ "p99": 579.8079967498779
},
"combine": {
- "p50": 802.4640083312988,
- "p90": 813.7279748916626,
- "p95": 814.9759769439697,
- "p99": 830.847978591919
+ "p50": 875.1999735832214,
+ "p90": 881.7920088768005,
+ "p95": 884.2880129814148,
+ "p99": 888.0320191383362
},
"roundtrip": {
- "p50": 1348.5759496688843,
- "p90": 1358.5599660873413,
- "p95": 1367.3280477523804,
- "p99": 1390.0799751281738
+ "p50": 1877.8879642486572,
+ "p90": 1887.8079652786255,
+ "p95": 1891.1360502243042,
+ "p99": 1899.3279933929443
},
"isolatedSum": {
- "p50": 1370.7520365715027,
- "p90": 1386.687994003296,
- "p95": 1389.1519904136658,
- "p99": 1434.3039989471436
+ "p50": 1434.2079758644104,
+ "p90": 1449.6960043907166,
+ "p95": 1455.680012702942,
+ "p99": 1467.840015888214
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1066119168,
- "combineLogicalBytes": 1066119168,
+ "dispatchLogicalBytes": 621902848,
+ "combineLogicalBytes": 1243805696,
"fanoutMean": 5.29547119140625,
"recvTokensMax": 10952,
- "stragglerRank": 4,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -18998,35 +18385,35 @@
"tokensPerRank": 4096,
"globalTokens": 32768,
"dispatch": {
- "p50": 1055.3920269012451,
- "p90": 1064.5760297775269,
- "p95": 1068.1920051574707,
- "p99": 1080.191969871521
+ "p50": 1040.1279926300049,
+ "p90": 1051.967978477478,
+ "p95": 1054.5599460601807,
+ "p99": 1064.2880201339722
},
"combine": {
- "p50": 1502.8799772262573,
- "p90": 1514.464020729065,
- "p95": 1516.8319940567017,
- "p99": 1539.6159887313843
+ "p50": 1631.55198097229,
+ "p90": 1638.1440162658691,
+ "p95": 1640.0320529937744,
+ "p99": 1649.3760347366333
},
"roundtrip": {
- "p50": 2540.4160022735596,
- "p90": 2552.6719093322754,
- "p95": 2560.512065887451,
- "p99": 2638.6559009552
+ "p50": 3564.192056655884,
+ "p90": 3574.78404045105,
+ "p95": 3577.2159099578857,
+ "p99": 3582.304000854492
},
"isolatedSum": {
- "p50": 2558.2720041275024,
- "p90": 2579.040050506592,
- "p95": 2585.0239992141724,
- "p99": 2619.8079586029053
+ "p50": 2671.679973602295,
+ "p90": 2690.111994743347,
+ "p95": 2694.591999053955,
+ "p99": 2713.6640548706055
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2131722240,
- "combineLogicalBytes": 2131722240,
+ "dispatchLogicalBytes": 1243504640,
+ "combineLogicalBytes": 2487009280,
"fanoutMean": 5.294189453125,
"recvTokensMax": 21781,
- "stragglerRank": 4,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -19034,16 +18421,16 @@
]
},
{
- "id": "cx-46706f1e",
- "identity": "b300|deepep|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "b300_307ed708",
- "comparisonKey": "b477f7e33cf027ec",
+ "id": "cx-e6a97375",
+ "identity": "gb200|deepep|v1|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901",
+ "colorKey": "gb200_b0118480",
+ "comparisonKey": "e07c5ac7fb8068b5",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:53:05.143387+00:00",
+ "generatedAt": "2026-06-29T13:52:17.589100+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_05",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
"phase": "prefill",
"mode": "normal",
@@ -19051,14 +18438,14 @@
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "runtime-visible-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16",
- "model": "MiniMax-M3",
+ "label": "GB200 EP8 · deepep · fp8",
+ "model": "DeepSeek-V3/V4",
"shape": {
- "hidden": 6144,
+ "hidden": 7168,
"topk": 8,
"experts": 256,
"routing": "uniform",
@@ -19066,15 +18453,16 @@
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "bf16",
+ "dispatchDtype": "fp8",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -19082,59 +18470,59 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:9f5e1e005a35e937",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "b9896b0a7ca9901",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285723416",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285723416",
- "createdAt": "2026-06-27T09:53:05.143387+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 99.35999661684036,
- "p90": 101.72799974679947,
- "p95": 102.59199887514114,
- "p99": 109.0560033917427
+ "p50": 341.0559892654419,
+ "p90": 360.3839874267578,
+ "p95": 365.56801199913025,
+ "p99": 375.64799189567566
},
"combine": {
- "p50": 104.8320010304451,
- "p90": 113.88800293207169,
- "p95": 114.20799791812897,
- "p99": 117.34399944543839
+ "p50": 118.8800036907196,
+ "p90": 124.86399710178375,
+ "p95": 127.10399925708771,
+ "p99": 131.20000064373016
},
"roundtrip": {
- "p50": 185.15199422836304,
- "p90": 189.28000330924988,
- "p95": 191.23199582099915,
- "p99": 221.95200622081757
+ "p50": 435.61598658561707,
+ "p90": 448.41599464416504,
+ "p95": 452.32000946998596,
+ "p99": 458.5280120372772
},
"isolatedSum": {
- "p50": 204.19199764728546,
- "p90": 215.61600267887115,
- "p95": 216.7999967932701,
- "p99": 226.4000028371811
+ "p50": 459.9359929561615,
+ "p90": 485.24798452854156,
+ "p95": 492.67201125621796,
+ "p99": 506.8479925394058
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 66576384,
- "combineLogicalBytes": 66576384,
+ "dispatchLogicalBytes": 38836224,
+ "combineLogicalBytes": 77672448,
"fanoutMean": 5.291015625,
"recvTokensMax": 723,
- "stragglerRank": 2,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -19143,35 +18531,35 @@
"tokensPerRank": 256,
"globalTokens": 2048,
"dispatch": {
- "p50": 128.54400277137756,
- "p90": 136.00000739097595,
- "p95": 137.66400516033173,
- "p99": 154.08000349998474
+ "p50": 376.2879967689514,
+ "p90": 386.55999302864075,
+ "p95": 388.7360095977783,
+ "p99": 393.7920033931732
},
"combine": {
- "p50": 142.94399321079254,
- "p90": 152.0639955997467,
- "p95": 152.41600573062897,
- "p99": 176.35199427604675
+ "p50": 161.21600568294525,
+ "p90": 166.27199947834015,
+ "p95": 168.12799870967865,
+ "p99": 173.34400117397308
},
"roundtrip": {
- "p50": 259.64799523353577,
- "p90": 263.5200023651123,
- "p95": 265.9519910812378,
- "p99": 286.1120104789734
+ "p50": 527.2960066795349,
+ "p90": 537.8559827804565,
+ "p95": 540.3839945793152,
+ "p99": 544.3519949913025
},
"isolatedSum": {
- "p50": 271.4879959821701,
- "p90": 288.06400299072266,
- "p95": 290.0800108909607,
- "p99": 330.4319977760315
+ "p50": 537.5040024518967,
+ "p90": 552.8319925069809,
+ "p95": 556.864008307457,
+ "p99": 567.1360045671463
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 133619712,
- "combineLogicalBytes": 133619712,
+ "dispatchLogicalBytes": 77944832,
+ "combineLogicalBytes": 155889664,
"fanoutMean": 5.3095703125,
"recvTokensMax": 1422,
- "stragglerRank": 2,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -19180,35 +18568,35 @@
"tokensPerRank": 512,
"globalTokens": 4096,
"dispatch": {
- "p50": 196.57599925994873,
- "p90": 200.3519982099533,
- "p95": 201.63199305534363,
- "p99": 223.23200106620789
+ "p50": 495.61598896980286,
+ "p90": 506.1119794845581,
+ "p95": 509.5360279083252,
+ "p99": 516.0959959030151
},
"combine": {
- "p50": 239.45599794387817,
- "p90": 249.34400618076324,
- "p95": 250.11199712753296,
- "p99": 262.4320089817047
+ "p50": 283.3600044250488,
+ "p90": 289.0239953994751,
+ "p95": 290.8799946308136,
+ "p99": 295.1360046863556
},
"roundtrip": {
- "p50": 409.40800309181213,
- "p90": 418.17599534988403,
- "p95": 426.144003868103,
- "p99": 449.7919976711273
+ "p50": 754.5920014381409,
+ "p90": 763.9999985694885,
+ "p95": 765.9839987754822,
+ "p99": 773.9840149879456
},
"isolatedSum": {
- "p50": 436.0319972038269,
- "p90": 449.69600439071655,
- "p95": 451.7439901828766,
- "p99": 485.6640100479126
+ "p50": 778.9759933948517,
+ "p90": 795.1359748840332,
+ "p95": 800.4160225391388,
+ "p99": 811.2320005893707
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 267657216,
- "combineLogicalBytes": 267657216,
+ "dispatchLogicalBytes": 156133376,
+ "combineLogicalBytes": 312266752,
"fanoutMean": 5.31787109375,
"recvTokensMax": 2779,
- "stragglerRank": 5,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -19217,35 +18605,35 @@
"tokensPerRank": 1024,
"globalTokens": 8192,
"dispatch": {
- "p50": 316.0000145435333,
- "p90": 321.6319978237152,
- "p95": 326.55999064445496,
- "p99": 339.1680121421814
+ "p50": 731.935977935791,
+ "p90": 743.4239983558655,
+ "p95": 746.4960217475891,
+ "p99": 752.3199915885925
},
"combine": {
- "p50": 445.15201449394226,
- "p90": 446.78398966789246,
- "p95": 448.60801100730896,
- "p99": 472.03201055526733
+ "p50": 489.0879988670349,
+ "p90": 494.30400133132935,
+ "p95": 496.44801020622253,
+ "p99": 499.35999512672424
},
"roundtrip": {
- "p50": 743.2000041007996,
- "p90": 750.0799894332886,
- "p95": 757.5039863586426,
- "p99": 775.7120132446289
+ "p50": 1187.7119541168213,
+ "p90": 1199.77605342865,
+ "p95": 1202.623963356018,
+ "p99": 1210.6239795684814
},
"isolatedSum": {
- "p50": 761.1520290374756,
- "p90": 768.4159874916077,
- "p95": 775.1680016517639,
- "p99": 811.2000226974487
+ "p50": 1221.023976802826,
+ "p90": 1237.7279996871948,
+ "p95": 1242.9440319538116,
+ "p99": 1251.6799867153168
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 534380544,
- "combineLogicalBytes": 534380544,
+ "dispatchLogicalBytes": 311721984,
+ "combineLogicalBytes": 623443968,
"fanoutMean": 5.30859375,
"recvTokensMax": 5505,
- "stragglerRank": 7,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -19254,35 +18642,35 @@
"tokensPerRank": 2048,
"globalTokens": 16384,
"dispatch": {
- "p50": 567.0719742774963,
- "p90": 570.9760189056396,
- "p95": 573.2799768447876,
- "p99": 593.5360193252563
+ "p50": 1204.8319578170776,
+ "p90": 1216.8960571289062,
+ "p95": 1223.3599424362183,
+ "p99": 1430.3359985351562
},
"combine": {
- "p50": 801.7920255661011,
- "p90": 805.8239817619324,
- "p95": 815.1040077209473,
- "p99": 850.6879806518555
+ "p50": 878.3680200576782,
+ "p90": 885.1839900016785,
+ "p95": 887.2640132904053,
+ "p99": 891.9360041618347
},
"roundtrip": {
- "p50": 1346.336007118225,
- "p90": 1356.7359447479248,
- "p95": 1364.0960454940796,
- "p99": 1429.535984992981
+ "p50": 2039.9041175842285,
+ "p90": 2050.784111022949,
+ "p95": 2054.7521114349365,
+ "p99": 2062.3679161071777
},
"isolatedSum": {
- "p50": 1368.8639998435974,
- "p90": 1376.800000667572,
- "p95": 1388.3839845657349,
- "p99": 1444.2239999771118
+ "p50": 2083.199977874756,
+ "p90": 2102.0800471305847,
+ "p95": 2110.6239557266235,
+ "p99": 2322.272002696991
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1066119168,
- "combineLogicalBytes": 1066119168,
+ "dispatchLogicalBytes": 621902848,
+ "combineLogicalBytes": 1243805696,
"fanoutMean": 5.29547119140625,
"recvTokensMax": 10952,
- "stragglerRank": 7,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -19291,35 +18679,35 @@
"tokensPerRank": 4096,
"globalTokens": 32768,
"dispatch": {
- "p50": 1061.4080429077148,
- "p90": 1067.039966583252,
- "p95": 1075.32799243927,
- "p99": 1103.9040088653564
+ "p50": 2268.8639163970947,
+ "p90": 2281.4719676971436,
+ "p95": 2284.0960025787354,
+ "p99": 2294.3360805511475
},
"combine": {
- "p50": 1503.2000541687012,
- "p90": 1515.2640342712402,
- "p95": 1526.9759893417358,
- "p99": 1554.8160076141357
+ "p50": 1629.0240287780762,
+ "p90": 1636.1600160598755,
+ "p95": 1638.8800144195557,
+ "p99": 1645.8239555358887
},
"roundtrip": {
- "p50": 2543.2960987091064,
- "p90": 2558.880090713501,
- "p95": 2570.847988128662,
- "p99": 2619.1680431365967
+ "p50": 3866.5599822998047,
+ "p90": 3876.192092895508,
+ "p95": 3881.216049194336,
+ "p99": 3893.2158946990967
},
"isolatedSum": {
- "p50": 2564.608097076416,
- "p90": 2582.304000854492,
- "p95": 2602.303981781006,
- "p99": 2658.720016479492
+ "p50": 3897.887945175171,
+ "p90": 3917.631983757019,
+ "p95": 3922.976016998291,
+ "p99": 3940.160036087036
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2131722240,
- "combineLogicalBytes": 2131722240,
+ "dispatchLogicalBytes": 1243504640,
+ "combineLogicalBytes": 2487009280,
"fanoutMean": 5.294189453125,
"recvTokensMax": 21781,
- "stragglerRank": 4,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -19327,47 +18715,48 @@
]
},
{
- "id": "cx-238797ce",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||157ca81687ddb63",
- "colorKey": "b300_c9569580",
- "comparisonKey": "c4fbb2dad9521e3e",
+ "id": "cx-54bf03e2",
+ "identity": "gb200|deepep|v1|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||fc08bf2f8d42ed8",
+ "colorKey": "gb200_b0118480",
+ "comparisonKey": "1434b75a5e7d7c2d",
"schemaVersion": 3,
- "generatedAt": "2026-06-26T23:57:38.465863+00:00",
+ "generatedAt": "2026-06-29T13:53:16.382279+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_13",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
"phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "runtime-visible-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16",
- "model": "DeepSeek-V3/V4",
+ "label": "GB200 EP8 · deepep · fp8",
+ "model": "Kimi-K2",
"shape": {
"hidden": 7168,
"topk": 8,
- "experts": 256,
+ "experts": 384,
"routing": "uniform",
"routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "bf16",
+ "dispatchDtype": "fp8",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -19375,132 +18764,243 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "157ca81687ddb63",
- "workloadId": "set:3:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "fc08bf2f8d42ed8",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28271869301",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271869301",
- "createdAt": "2026-06-26T23:57:38.465863+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 93.66399794816971,
- "p90": 99.42399710416794,
- "p95": 101.24800354242325,
- "p99": 112.15999722480774
+ "p50": 354.6240031719208,
+ "p90": 374.1439878940582,
+ "p95": 378.2080113887787,
+ "p99": 386.46399974823
},
"combine": {
- "p50": 115.7120019197464,
- "p90": 116.54400080442429,
- "p95": 117.47200042009354,
- "p99": 128.7039965391159
+ "p50": 121.5360015630722,
+ "p90": 125.95200538635254,
+ "p95": 128.38399410247803,
+ "p99": 133.12000036239624
},
"roundtrip": {
- "p50": 195.3279972076416,
- "p90": 199.072003364563,
- "p95": 200.57600736618042,
- "p99": 214.1440063714981
+ "p50": 448.96000623703003,
+ "p90": 460.7999920845032,
+ "p95": 465.11998772621155,
+ "p99": 484.25599932670593
},
"isolatedSum": {
- "p50": 209.3759998679161,
- "p90": 215.96799790859222,
- "p95": 218.72000396251678,
- "p99": 240.86399376392365
+ "p50": 476.160004734993,
+ "p90": 500.09599328041077,
+ "p95": 506.5920054912567,
+ "p99": 519.5840001106262
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 38757376,
+ "combineLogicalBytes": 77514752,
+ "fanoutMean": 5.2802734375,
+ "recvTokensMax": 707,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 193.7599927186966,
- "p90": 200.3519982099533,
- "p95": 202.94399559497833,
- "p99": 209.75999534130096
+ "p50": 383.39200615882874,
+ "p90": 394.5919871330261,
+ "p95": 398.0160057544708,
+ "p99": 404.06399965286255
},
"combine": {
- "p50": 272.92799949645996,
- "p90": 275.04000067710876,
- "p95": 275.6800055503845,
- "p99": 289.4720137119293
+ "p50": 164.44799304008484,
+ "p90": 170.17599940299988,
+ "p95": 172.44799435138702,
+ "p99": 175.87199807167053
},
"roundtrip": {
- "p50": 434.5279932022095,
- "p90": 444.95999813079834,
- "p95": 448.1920003890991,
- "p99": 461.37601137161255
+ "p50": 534.3040227890015,
+ "p90": 545.4080104827881,
+ "p95": 548.6400127410889,
+ "p99": 564.2240047454834
},
"isolatedSum": {
- "p50": 466.68799221515656,
- "p90": 475.3919988870621,
- "p95": 478.62400114536285,
- "p99": 499.2320090532303
+ "p50": 547.8399991989136,
+ "p90": 564.767986536026,
+ "p95": 570.4640001058578,
+ "p99": 579.9359977245331
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 312266752,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
+ "dispatchLogicalBytes": 77285376,
+ "combineLogicalBytes": 154570752,
+ "fanoutMean": 5.2646484375,
+ "recvTokensMax": 1391,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 577.6960253715515,
- "p90": 582.6879739761353,
- "p95": 584.6400260925293,
- "p99": 595.7120060920715
+ "p50": 501.8240213394165,
+ "p90": 511.4240050315857,
+ "p95": 514.0159726142883,
+ "p99": 518.6880230903625
},
"combine": {
- "p50": 818.336009979248,
- "p90": 828.4479975700378,
- "p95": 838.3679986000061,
- "p99": 852.6399731636047
+ "p50": 285.95200181007385,
+ "p90": 291.7119860649109,
+ "p95": 293.8239872455597,
+ "p99": 298.2720136642456
},
"roundtrip": {
- "p50": 1377.7920007705688,
- "p90": 1387.3920440673828,
- "p95": 1397.2480297088623,
- "p99": 1410.4640483856201
+ "p50": 761.4719867706299,
+ "p90": 772.159993648529,
+ "p95": 774.8159766197205,
+ "p99": 784.4799757003784
},
"isolatedSum": {
- "p50": 1396.0320353507996,
- "p90": 1411.135971546173,
- "p95": 1423.0080246925354,
- "p99": 1448.3519792556763
+ "p50": 787.7760231494904,
+ "p90": 803.1359910964966,
+ "p95": 807.839959859848,
+ "p99": 816.9600367546082
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1243805696,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
+ "dispatchLogicalBytes": 154886144,
+ "combineLogicalBytes": 309772288,
+ "fanoutMean": 5.275390625,
+ "recvTokensMax": 2754,
+ "stragglerRank": 4,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
+ "dispatch": {
+ "p50": 739.2320036888123,
+ "p90": 750.976026058197,
+ "p95": 754.8159956932068,
+ "p99": 763.7119889259338
+ },
+ "combine": {
+ "p50": 492.48000979423523,
+ "p90": 498.52800369262695,
+ "p95": 501.0240077972412,
+ "p99": 504.83202934265137
+ },
+ "roundtrip": {
+ "p50": 1193.503975868225,
+ "p90": 1203.2320499420166,
+ "p95": 1207.0399522781372,
+ "p99": 1216.1279916763306
+ },
+ "isolatedSum": {
+ "p50": 1231.7120134830475,
+ "p90": 1249.504029750824,
+ "p95": 1255.840003490448,
+ "p99": 1268.5440182685852
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 309750784,
+ "combineLogicalBytes": 619501568,
+ "fanoutMean": 5.2750244140625,
+ "recvTokensMax": 5469,
+ "stragglerRank": 4,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
+ "dispatch": {
+ "p50": 1204.3520212173462,
+ "p90": 1216.5440320968628,
+ "p95": 1221.1840152740479,
+ "p99": 1232.7040433883667
+ },
+ "combine": {
+ "p50": 869.0879940986633,
+ "p90": 875.648021697998,
+ "p95": 877.1839737892151,
+ "p99": 883.7760090827942
+ },
+ "roundtrip": {
+ "p50": 2034.1439247131348,
+ "p90": 2047.1038818359375,
+ "p95": 2052.8318881988525,
+ "p99": 2060.7359409332275
+ },
+ "isolatedSum": {
+ "p50": 2073.4400153160095,
+ "p90": 2092.192053794861,
+ "p95": 2098.367989063263,
+ "p99": 2116.480052471161
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 619687936,
+ "combineLogicalBytes": 1239375872,
+ "fanoutMean": 5.276611328125,
+ "recvTokensMax": 10883,
+ "stragglerRank": 4,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
+ "dispatch": {
+ "p50": 2272.2880840301514,
+ "p90": 2281.6319465637207,
+ "p95": 2285.183906555176,
+ "p99": 2294.3038940429688
+ },
+ "combine": {
+ "p50": 1623.968005180359,
+ "p90": 1630.784034729004,
+ "p95": 1633.3119869232178,
+ "p99": 1637.8240585327148
+ },
+ "roundtrip": {
+ "p50": 3862.9438877105713,
+ "p90": 3873.2481002807617,
+ "p95": 3877.2799968719482,
+ "p99": 3888.0960941314697
+ },
+ "isolatedSum": {
+ "p50": 3896.2560892105103,
+ "p90": 3912.4159812927246,
+ "p95": 3918.4958934783936,
+ "p99": 3932.1279525756836
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 1239834624,
+ "combineLogicalBytes": 2479669248,
+ "fanoutMean": 5.278564453125,
+ "recvTokensMax": 21730,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
@@ -19509,28 +19009,28 @@
]
},
{
- "id": "cx-20a284d3",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "b300_c9569580",
- "comparisonKey": "0484fdcbaa6c315c",
+ "id": "cx-0cf4ef81",
+ "identity": "gb200|deepep|v1|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901",
+ "colorKey": "gb200_7e970144",
+ "comparisonKey": "e8f405c383a7484e",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T10:26:05.756924+00:00",
+ "generatedAt": "2026-06-29T13:51:18.445065+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_15",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb200-nv_0",
+ "sku": "gb200",
"backend": "deepep",
"phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "cached-layout-comm-only-v1",
+ "topologyClass": "gb200-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16",
+ "label": "GB200 EP8 · deepep · fp8 [cl]",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
@@ -19541,15 +19041,16 @@
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "bf16",
+ "dispatchDtype": "fp8",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -19557,59 +19058,59 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "b9896b0a7ca9901",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "2.0.0+af9a040",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28286434915",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286434915",
- "createdAt": "2026-06-27T10:26:05.756924+00:00",
- "sha": "91c7acf59a5e524f37742922ec67721d86a03f6b"
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 94.33600306510925,
- "p90": 98.33600372076035,
- "p95": 101.40799731016159,
- "p99": 131.1040073633194
+ "p50": 95.51999717950821,
+ "p90": 102.75200009346008,
+ "p95": 105.8880016207695,
+ "p99": 121.79200351238251
},
"combine": {
- "p50": 115.99999666213989,
- "p90": 117.47200042009354,
- "p95": 118.6240017414093,
- "p99": 131.071999669075
+ "p50": 119.1679984331131,
+ "p90": 124.22399967908859,
+ "p95": 126.52799487113953,
+ "p99": 130.2720010280609
},
"roundtrip": {
- "p50": 194.4960057735443,
- "p90": 200.70399343967438,
- "p95": 203.3279985189438,
- "p99": 237.34399676322937
+ "p50": 258.0159902572632,
+ "p90": 269.0240144729614,
+ "p95": 271.07200026512146,
+ "p99": 281.2800109386444
},
"isolatedSum": {
- "p50": 210.33599972724915,
- "p90": 215.80800414085388,
- "p95": 220.0319990515709,
- "p99": 262.1760070323944
+ "p50": 214.6879956126213,
+ "p90": 226.97599977254868,
+ "p95": 232.41599649190903,
+ "p99": 252.06400454044342
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
+ "dispatchLogicalBytes": 38836224,
"combineLogicalBytes": 77672448,
"fanoutMean": 5.291015625,
"recvTokensMax": 723,
- "stragglerRank": 7,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -19618,35 +19119,35 @@
"tokensPerRank": 256,
"globalTokens": 2048,
"dispatch": {
- "p50": 136.63999736309052,
- "p90": 140.1599943637848,
- "p95": 141.53599739074707,
- "p99": 167.32800006866455
+ "p50": 127.87200510501862,
+ "p90": 133.59999656677246,
+ "p95": 136.73600554466248,
+ "p99": 140.3840035200119
},
"combine": {
- "p50": 156.70399367809296,
- "p90": 165.02399742603302,
- "p95": 165.6319946050644,
- "p99": 177.50400304794312
+ "p50": 161.6639941930771,
+ "p90": 166.97600483894348,
+ "p95": 169.0559983253479,
+ "p99": 172.06400632858276
},
"roundtrip": {
- "p50": 273.21600914001465,
- "p90": 279.4240117073059,
- "p95": 281.2480032444,
- "p99": 292.4160063266754
+ "p50": 347.03999757766724,
+ "p90": 354.52800989151,
+ "p95": 356.79998993873596,
+ "p99": 360.8640134334564
},
"isolatedSum": {
- "p50": 293.3439910411835,
- "p90": 305.1839917898178,
- "p95": 307.16799199581146,
- "p99": 344.83200311660767
+ "p50": 289.5359992980957,
+ "p90": 300.57600140571594,
+ "p95": 305.7920038700104,
+ "p99": 312.44800984859467
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 155889664,
+ "dispatchLogicalBytes": 77944832,
"combineLogicalBytes": 155889664,
"fanoutMean": 5.3095703125,
"recvTokensMax": 1422,
- "stragglerRank": 7,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -19655,35 +19156,35 @@
"tokensPerRank": 512,
"globalTokens": 4096,
"dispatch": {
- "p50": 194.4960057735443,
- "p90": 202.81599462032318,
- "p95": 204.16000485420227,
- "p99": 231.455996632576
+ "p50": 183.29599499702454,
+ "p90": 189.5039975643158,
+ "p95": 192.3840045928955,
+ "p99": 198.88000190258026
},
"combine": {
- "p50": 266.59199595451355,
- "p90": 275.519996881485,
- "p95": 277.3759961128235,
- "p99": 302.3679852485657
+ "p50": 286.1439883708954,
+ "p90": 292.7039861679077,
+ "p95": 294.624000787735,
+ "p99": 298.46400022506714
},
"roundtrip": {
- "p50": 437.6319944858551,
- "p90": 447.9359984397888,
- "p95": 454.0480077266693,
- "p99": 517.6960229873657
+ "p50": 574.4320154190063,
+ "p90": 581.3440084457397,
+ "p95": 584.6719741821289,
+ "p99": 589.8240208625793
},
"isolatedSum": {
- "p50": 461.08800172805786,
- "p90": 478.33599150180817,
- "p95": 481.53600096702576,
- "p99": 533.8239818811417
+ "p50": 469.4399833679199,
+ "p90": 482.2079837322235,
+ "p95": 487.0080053806305,
+ "p99": 497.3440021276474
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 312266752,
+ "dispatchLogicalBytes": 156133376,
"combineLogicalBytes": 312266752,
"fanoutMean": 5.31787109375,
"recvTokensMax": 2779,
- "stragglerRank": 7,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -19692,35 +19193,35 @@
"tokensPerRank": 1024,
"globalTokens": 8192,
"dispatch": {
- "p50": 326.55999064445496,
- "p90": 330.3360044956207,
- "p95": 334.49599146842957,
- "p99": 353.15200686454773
+ "p50": 296.51200771331787,
+ "p90": 304.3839931488037,
+ "p95": 306.71998858451843,
+ "p99": 313.728004693985
},
"combine": {
- "p50": 459.3279957771301,
- "p90": 462.72000670433044,
- "p95": 471.0400104522705,
- "p99": 533.5680246353149
+ "p50": 488.6400103569031,
+ "p90": 494.81600522994995,
+ "p95": 496.12799286842346,
+ "p99": 500.3200173377991
},
"roundtrip": {
- "p50": 764.9279832839966,
- "p90": 773.1519937515259,
- "p95": 777.1520018577576,
- "p99": 811.0399842262268
+ "p50": 1004.4480562210083,
+ "p90": 1011.3279819488525,
+ "p95": 1014.6239995956421,
+ "p99": 1019.1359519958496
},
"isolatedSum": {
- "p50": 785.8879864215851,
- "p90": 793.0560111999512,
- "p95": 805.5360019207001,
- "p99": 886.7200314998627
+ "p50": 785.152018070221,
+ "p90": 799.1999983787537,
+ "p95": 802.8479814529419,
+ "p99": 814.0480220317841
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 623443968,
+ "dispatchLogicalBytes": 311721984,
"combineLogicalBytes": 623443968,
"fanoutMean": 5.30859375,
"recvTokensMax": 5505,
- "stragglerRank": 7,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -19729,35 +19230,35 @@
"tokensPerRank": 2048,
"globalTokens": 16384,
"dispatch": {
- "p50": 574.9120116233826,
- "p90": 586.7840051651001,
- "p95": 597.0879793167114,
- "p99": 678.4639954566956
+ "p50": 540.3519868850708,
+ "p90": 550.4639744758606,
+ "p95": 553.0239939689636,
+ "p99": 558.8160157203674
},
"combine": {
- "p50": 818.2399868965149,
- "p90": 828.7360072135925,
- "p95": 832.7360153198242,
- "p99": 879.8080086708069
+ "p50": 878.7840008735657,
+ "p90": 919.2320108413696,
+ "p95": 928.76797914505,
+ "p99": 971.2960124015808
},
"roundtrip": {
- "p50": 1376.1279582977295,
- "p90": 1384.7039937973022,
- "p95": 1398.1120586395264,
- "p99": 1485.0879907608032
+ "p50": 1854.2720079421997,
+ "p90": 1866.528034210205,
+ "p95": 1895.7760334014893,
+ "p99": 1940.8960342407227
},
"isolatedSum": {
- "p50": 1393.1519985198975,
- "p90": 1415.5200123786926,
- "p95": 1429.8239946365356,
- "p99": 1558.2720041275024
+ "p50": 1419.1359877586365,
+ "p90": 1469.6959853172302,
+ "p95": 1481.7919731140137,
+ "p99": 1530.1120281219482
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1243805696,
+ "dispatchLogicalBytes": 621902848,
"combineLogicalBytes": 1243805696,
"fanoutMean": 5.29547119140625,
"recvTokensMax": 10952,
- "stragglerRank": 6,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -19766,35 +19267,35 @@
"tokensPerRank": 4096,
"globalTokens": 32768,
"dispatch": {
- "p50": 1068.3519840240479,
- "p90": 1078.0800580978394,
- "p95": 1086.4640474319458,
- "p99": 1142.624020576477
+ "p50": 1003.2639503479004,
+ "p90": 1012.5119686126709,
+ "p95": 1016.0959959030151,
+ "p99": 1022.5600004196167
},
"combine": {
- "p50": 1529.47199344635,
- "p90": 1541.3119792938232,
- "p95": 1551.8079996109009,
- "p99": 1614.9120330810547
+ "p50": 1630.9759616851807,
+ "p90": 1637.1840238571167,
+ "p95": 1639.456033706665,
+ "p99": 1643.5199975967407
},
"roundtrip": {
- "p50": 2586.5280628204346,
- "p90": 2602.7839183807373,
- "p95": 2617.6319122314453,
- "p99": 2691.5199756622314
+ "p50": 3531.071901321411,
+ "p90": 3541.50390625,
+ "p95": 3545.4719066619873,
+ "p99": 3552.608013153076
},
"isolatedSum": {
- "p50": 2597.823977470398,
- "p90": 2619.3920373916626,
- "p95": 2638.2720470428467,
- "p99": 2757.5360536575317
+ "p50": 2634.239912033081,
+ "p90": 2649.6959924697876,
+ "p95": 2655.55202960968,
+ "p99": 2666.0799980163574
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2487009280,
+ "dispatchLogicalBytes": 1243504640,
"combineLogicalBytes": 2487009280,
"fanoutMean": 5.294189453125,
"recvTokensMax": 21781,
- "stragglerRank": 6,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -19802,47 +19303,48 @@
]
},
{
- "id": "cx-330e7a0b",
- "identity": "b300|deepep|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "b300_307ed708",
- "comparisonKey": "669ed990dbfd00e2",
+ "id": "cx-e1708e07",
+ "identity": "gb300|deepep|v1|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ebe68878aa18bb0",
+ "colorKey": "gb300_b97bfb88",
+ "comparisonKey": "c86d940414a55991",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:51:13.255714+00:00",
+ "generatedAt": "2026-06-29T14:03:30.906721+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_11",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
+ "phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "runtime-visible-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16",
- "model": "DeepSeek-V3/V4",
+ "label": "GB300 EP8 · deepep · bf16",
+ "model": "Qwen3.5",
"shape": {
- "hidden": 7168,
+ "hidden": 4096,
"topk": 8,
- "experts": 256,
+ "experts": 128,
"routing": "uniform",
"routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -19850,244 +19352,318 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "ebe68878aa18bb0",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285680003",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285680003",
- "createdAt": "2026-06-27T09:51:13.255714+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 1,
+ "globalTokens": 8,
"dispatch": {
- "p50": 93.44000369310379,
- "p90": 96.54399752616882,
- "p95": 100.09600222110748,
- "p99": 102.94400155544281
+ "p50": 95.8079993724823,
+ "p90": 110.36799848079681,
+ "p95": 115.42399972677231,
+ "p99": 124.95999783277512
},
"combine": {
- "p50": 115.26399850845337,
- "p90": 116.09599739313126,
- "p95": 117.34399944543839,
- "p99": 127.77599692344666
+ "p50": 71.19999825954437,
+ "p90": 77.88799703121185,
+ "p95": 81.56800270080566,
+ "p99": 84.6719965338707
},
"roundtrip": {
- "p50": 192.06400215625763,
- "p90": 198.7520009279251,
- "p95": 199.71199333667755,
- "p99": 215.68000316619873
+ "p50": 142.56000518798828,
+ "p90": 155.68000078201294,
+ "p95": 160.8320027589798,
+ "p99": 169.95200514793396
},
"isolatedSum": {
- "p50": 208.70400220155716,
- "p90": 212.63999491930008,
- "p95": 217.44000166654587,
- "p99": 230.71999847888947
+ "p50": 167.00799763202667,
+ "p90": 188.25599551200867,
+ "p95": 196.99200242757797,
+ "p99": 209.6319943666458
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 344064,
+ "combineLogicalBytes": 344064,
+ "fanoutMean": 5.25,
+ "recvTokensMax": 6,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 256,
- "globalTokens": 2048,
+ "tokensPerRank": 2,
+ "globalTokens": 16,
"dispatch": {
- "p50": 136.54400408267975,
- "p90": 139.29599523544312,
- "p95": 141.12000167369843,
- "p99": 151.10400319099426
+ "p50": 96.57599776983261,
+ "p90": 112.03200370073318,
+ "p95": 116.64000153541565,
+ "p99": 138.14400136470795
},
"combine": {
- "p50": 162.9440039396286,
- "p90": 164.60800170898438,
- "p95": 165.18400609493256,
- "p99": 178.52799594402313
+ "p50": 72.54400104284286,
+ "p90": 79.83999699354172,
+ "p95": 82.17599987983704,
+ "p99": 85.66399663686752
},
"roundtrip": {
- "p50": 271.84000611305237,
- "p90": 277.75999903678894,
- "p95": 280.0639867782593,
- "p99": 295.48799991607666
+ "p50": 144.54400539398193,
+ "p90": 157.3760062456131,
+ "p95": 161.15200519561768,
+ "p99": 173.8239973783493
},
"isolatedSum": {
- "p50": 299.48800802230835,
- "p90": 303.9039969444275,
- "p95": 306.304007768631,
- "p99": 329.6319991350174
+ "p50": 169.11999881267548,
+ "p90": 191.8720006942749,
+ "p95": 198.81600141525269,
+ "p99": 223.80799800157547
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 155889664,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 704512,
+ "combineLogicalBytes": 704512,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 12,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 4,
+ "globalTokens": 32,
"dispatch": {
- "p50": 192.7040070295334,
- "p90": 198.7520009279251,
- "p95": 200.95999538898468,
- "p99": 214.27200734615326
+ "p50": 96.22400254011154,
+ "p90": 111.26399785280228,
+ "p95": 117.08799749612808,
+ "p99": 128.67200374603271
},
"combine": {
- "p50": 264.8960053920746,
- "p90": 274.27199482917786,
- "p95": 274.87999200820923,
- "p99": 286.3039970397949
+ "p50": 72.51200079917908,
+ "p90": 80.48000186681747,
+ "p95": 83.90399813652039,
+ "p99": 97.31200337409973
},
"roundtrip": {
- "p50": 443.36000084877014,
- "p90": 448.86401295661926,
- "p95": 453.0560076236725,
- "p99": 460.640013217926
+ "p50": 145.88800072669983,
+ "p90": 159.67999398708344,
+ "p95": 165.21599888801575,
+ "p99": 186.52799725532532
},
"isolatedSum": {
- "p50": 457.60001242160797,
- "p90": 473.02399575710297,
- "p95": 475.8399873971939,
- "p99": 500.5760043859482
+ "p50": 168.73600333929062,
+ "p90": 191.74399971961975,
+ "p95": 200.99199563264847,
+ "p99": 225.98400712013245
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 312266752,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 1384448,
+ "combineLogicalBytes": 1384448,
+ "fanoutMean": 5.28125,
+ "recvTokensMax": 26,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 326.84800028800964,
- "p90": 329.75998520851135,
- "p95": 331.36001229286194,
- "p99": 340.9599959850311
+ "p50": 97.47199714183807,
+ "p90": 113.21599781513214,
+ "p95": 118.6240017414093,
+ "p99": 129.2160004377365
},
"combine": {
- "p50": 458.97600054740906,
- "p90": 462.46400475502014,
- "p95": 470.335990190506,
- "p99": 474.36800599098206
+ "p50": 74.81600344181061,
+ "p90": 82.07999914884567,
+ "p95": 83.29600095748901,
+ "p99": 94.04800087213516
},
"roundtrip": {
- "p50": 764.2880082130432,
- "p90": 772.0639705657959,
- "p95": 773.5360264778137,
- "p99": 783.8079929351807
+ "p50": 146.7200070619583,
+ "p90": 159.29600596427917,
+ "p95": 163.455992937088,
+ "p99": 177.05599963665009
},
"isolatedSum": {
- "p50": 785.8240008354187,
- "p90": 792.2239899635315,
- "p95": 801.6960024833679,
- "p99": 815.3280019760132
+ "p50": 172.28800058364868,
+ "p90": 195.2959969639778,
+ "p95": 201.92000269889832,
+ "p99": 223.26400130987167
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 623443968,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 2744320,
+ "combineLogicalBytes": 2744320,
+ "fanoutMean": 5.234375,
+ "recvTokensMax": 49,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 16,
+ "globalTokens": 128,
"dispatch": {
- "p50": 575.9680271148682,
- "p90": 583.1040143966675,
- "p95": 584.6719741821289,
- "p99": 595.4880118370056
+ "p50": 97.24800288677216,
+ "p90": 112.06399649381638,
+ "p95": 117.72800236940384,
+ "p99": 128.86400520801544
},
"combine": {
- "p50": 817.6640272140503,
- "p90": 827.7760148048401,
- "p95": 828.2240033149719,
- "p99": 840.1280045509338
+ "p50": 77.504001557827,
+ "p90": 82.94399827718735,
+ "p95": 85.21600067615509,
+ "p99": 94.81599926948547
},
"roundtrip": {
- "p50": 1376.7679929733276,
- "p90": 1384.5759630203247,
- "p95": 1390.3679847717285,
- "p99": 1429.6319484710693
+ "p50": 147.90399372577667,
+ "p90": 160.47999262809753,
+ "p95": 164.48000073432922,
+ "p99": 176.4480024576187
},
"isolatedSum": {
- "p50": 1393.6320543289185,
- "p90": 1410.8800292015076,
- "p95": 1412.8959774971008,
- "p99": 1435.6160163879395
+ "p50": 174.75200444459915,
+ "p90": 195.00799477100372,
+ "p95": 202.94400304555893,
+ "p99": 223.68000447750092
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1243805696,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 5464064,
+ "combineLogicalBytes": 5464064,
+ "fanoutMean": 5.2109375,
+ "recvTokensMax": 94,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4096,
- "globalTokens": 32768,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 1069.6320533752441,
- "p90": 1077.6959657669067,
- "p95": 1080.1600217819214,
- "p99": 1091.4560556411743
+ "p50": 98.78399968147278,
+ "p90": 111.35999858379364,
+ "p95": 117.95199662446976,
+ "p99": 131.9359987974167
},
"combine": {
- "p50": 1529.0240049362183,
- "p90": 1540.2239561080933,
- "p95": 1541.0560369491577,
- "p99": 1551.5199899673462
+ "p50": 80.32000064849854,
+ "p90": 84.95999872684479,
+ "p95": 86.87999844551086,
+ "p99": 95.16800194978714
},
"roundtrip": {
- "p50": 2583.616018295288,
- "p90": 2593.696117401123,
- "p95": 2599.3599891662598,
- "p99": 2626.4960765838623
+ "p50": 150.78400075435638,
+ "p90": 160.89600324630737,
+ "p95": 166.52800142765045,
+ "p99": 181.60000443458557
},
"isolatedSum": {
- "p50": 2598.6560583114624,
- "p90": 2617.919921875,
- "p95": 2621.216058731079,
- "p99": 2642.9760456085205
+ "p50": 179.1040003299713,
+ "p90": 196.31999731063843,
+ "p95": 204.83199506998062,
+ "p99": 227.10400074720383
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2487009280,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 11124736,
+ "combineLogicalBytes": 11124736,
+ "fanoutMean": 5.3046875,
+ "recvTokensMax": 186,
+ "stragglerRank": 3,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 64,
+ "globalTokens": 512,
+ "dispatch": {
+ "p50": 104.00000214576721,
+ "p90": 115.35999923944473,
+ "p95": 122.20799922943115,
+ "p99": 134.8479986190796
+ },
+ "combine": {
+ "p50": 91.96799993515015,
+ "p90": 96.92800045013428,
+ "p95": 102.49599814414978,
+ "p99": 109.63200032711029
+ },
+ "roundtrip": {
+ "p50": 165.98400473594666,
+ "p90": 176.32000148296356,
+ "p95": 181.34400248527527,
+ "p99": 194.30400431156158
+ },
+ "isolatedSum": {
+ "p50": 195.96800208091736,
+ "p90": 212.287999689579,
+ "p95": 224.70399737358093,
+ "p99": 244.47999894618988
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 22192128,
+ "combineLogicalBytes": 22192128,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 358,
+ "stragglerRank": 2,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
+ "dispatch": {
+ "p50": 116.5120005607605,
+ "p90": 125.11999905109406,
+ "p95": 129.2160004377365,
+ "p99": 139.71200585365295
+ },
+ "combine": {
+ "p50": 107.16799646615982,
+ "p90": 112.0000034570694,
+ "p95": 117.0559972524643,
+ "p99": 126.43200159072876
+ },
+ "roundtrip": {
+ "p50": 194.75199282169342,
+ "p90": 203.96800339221954,
+ "p95": 207.07200467586517,
+ "p99": 217.66400337219238
+ },
+ "isolatedSum": {
+ "p50": 223.67999702692032,
+ "p90": 237.12000250816345,
+ "p95": 246.2719976902008,
+ "p99": 266.1440074443817
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 44564480,
+ "combineLogicalBytes": 44564480,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 699,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -20095,47 +19671,48 @@
]
},
{
- "id": "cx-d4f1db50",
- "identity": "b300|deepep|7168|8|384|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf",
- "colorKey": "b300_c9569580",
- "comparisonKey": "70142fedc425dd51",
+ "id": "cx-9e8c8650",
+ "identity": "gb300|deepep|v1|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||13e2b193b87a112",
+ "colorKey": "gb300_b97bfb88",
+ "comparisonKey": "94583a6ef392e3d0",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T11:14:26.079004+00:00",
+ "generatedAt": "2026-06-29T14:07:58.912744+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_15",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
+ "phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "runtime-visible-v1",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16",
- "model": "Kimi-K2",
+ "label": "GB300 EP8 · deepep · bf16",
+ "model": "shape 5120/8/160",
"shape": {
- "hidden": 7168,
+ "hidden": 5120,
"topk": 8,
- "experts": 384,
+ "experts": 160,
"routing": "uniform",
"routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -20143,292 +19720,367 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "cd50548525dafdf",
- "workloadId": "set:6:b23bc0c4b6402c69",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "13e2b193b87a112",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28287503016",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287503016",
- "createdAt": "2026-06-27T11:14:26.079004+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 1,
+ "globalTokens": 8,
"dispatch": {
- "p50": 1799.6480464935303,
- "p90": 2024.2879390716553,
- "p95": 2855.3919792175293,
- "p99": 3412.2560024261475
+ "p50": 96.22400254011154,
+ "p90": 111.23199760913849,
+ "p95": 117.76000261306763,
+ "p99": 156.67200088500977
},
"combine": {
- "p50": 1812.8000497817993,
- "p90": 1949.5359659194946,
- "p95": 2620.09596824646,
- "p99": 2830.048084259033
+ "p50": 72.89600372314453,
+ "p90": 82.17599987983704,
+ "p95": 85.11999994516373,
+ "p99": 129.72800433635712
},
"roundtrip": {
- "p50": 1900.1920223236084,
- "p90": 2016.5760517120361,
- "p95": 2611.488103866577,
- "p99": 3049.344062805176
+ "p50": 146.84799313545227,
+ "p90": 160.60799360275269,
+ "p95": 170.71999609470367,
+ "p99": 226.8799990415573
},
"isolatedSum": {
- "p50": 3612.4480962753296,
- "p90": 3973.82390499115,
- "p95": 5475.487947463989,
- "p99": 6242.304086685181
+ "p50": 169.12000626325607,
+ "p90": 193.40799748897552,
+ "p95": 202.88000255823135,
+ "p99": 286.4000052213669
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77514752,
- "combineLogicalBytes": 77514752,
- "fanoutMean": 5.2802734375,
- "recvTokensMax": 707,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 430080,
+ "combineLogicalBytes": 430080,
+ "fanoutMean": 5.25,
+ "recvTokensMax": 8,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 256,
- "globalTokens": 2048,
+ "tokensPerRank": 2,
+ "globalTokens": 16,
"dispatch": {
- "p50": 1876.1919736862183,
- "p90": 2189.120054244995,
- "p95": 2922.816038131714,
- "p99": 3402.240037918091
+ "p50": 98.11200201511383,
+ "p90": 113.53600025177002,
+ "p95": 123.19999933242798,
+ "p99": 170.30400037765503
},
"combine": {
- "p50": 1860.6079816818237,
- "p90": 1970.52800655365,
- "p95": 2403.167963027954,
- "p99": 2977.8881072998047
+ "p50": 72.9919970035553,
+ "p90": 81.40800148248672,
+ "p95": 83.64800363779068,
+ "p99": 120.83200365304947
},
"roundtrip": {
- "p50": 1979.2640209197998,
- "p90": 2097.536087036133,
- "p95": 2794.1761016845703,
- "p99": 3157.9198837280273
+ "p50": 147.10399508476257,
+ "p90": 161.76000237464905,
+ "p95": 176.64000391960144,
+ "p99": 214.08000588417053
},
"isolatedSum": {
- "p50": 3736.799955368042,
- "p90": 4159.648060798645,
- "p95": 5325.984001159668,
- "p99": 6380.1281452178955
+ "p50": 171.10399901866913,
+ "p90": 194.94400173425674,
+ "p95": 206.84800297021866,
+ "p99": 291.1360040307045
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 154570752,
- "combineLogicalBytes": 154570752,
- "fanoutMean": 5.2646484375,
- "recvTokensMax": 1391,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 880640,
+ "combineLogicalBytes": 880640,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 13,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 4,
+ "globalTokens": 32,
"dispatch": {
- "p50": 1976.0639667510986,
- "p90": 2366.368055343628,
- "p95": 2979.0399074554443,
- "p99": 3521.440029144287
+ "p50": 97.59999811649323,
+ "p90": 112.31999844312668,
+ "p95": 119.10399794578552,
+ "p99": 146.36799693107605
},
"combine": {
- "p50": 1994.1760301589966,
- "p90": 2153.6319255828857,
- "p95": 2808.351993560791,
- "p99": 3210.304021835327
+ "p50": 75.00799745321274,
+ "p90": 82.78399705886841,
+ "p95": 86.91199868917465,
+ "p99": 116.31999909877777
},
"roundtrip": {
- "p50": 2184.7360134124756,
- "p90": 2389.280080795288,
- "p95": 3086.7199897766113,
- "p99": 3524.319887161255
+ "p50": 149.1519957780838,
+ "p90": 160.38399934768677,
+ "p95": 166.04800522327423,
+ "p99": 194.33599710464478
},
"isolatedSum": {
- "p50": 3970.239996910095,
- "p90": 4519.999980926514,
- "p95": 5787.391901016235,
- "p99": 6731.744050979614
+ "p50": 172.60799556970596,
+ "p90": 195.1039955019951,
+ "p95": 206.01599663496017,
+ "p99": 262.6879960298538
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 309772288,
- "combineLogicalBytes": 309772288,
- "fanoutMean": 5.275390625,
- "recvTokensMax": 2754,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1740800,
+ "combineLogicalBytes": 1740800,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 25,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 2102.2400856018066,
- "p90": 2479.5520305633545,
- "p95": 3182.1439266204834,
- "p99": 4024.6081352233887
+ "p50": 98.55999797582626,
+ "p90": 113.37599903345108,
+ "p95": 121.18399888277054,
+ "p99": 164.15999829769135
},
"combine": {
- "p50": 2238.5919094085693,
- "p90": 2511.5840435028076,
- "p95": 3066.6239261627197,
- "p99": 3605.247974395752
+ "p50": 80.44800162315369,
+ "p90": 84.86399799585342,
+ "p95": 91.45600348711014,
+ "p99": 121.88799679279327
},
"roundtrip": {
- "p50": 2536.7679595947266,
- "p90": 2645.951986312866,
- "p95": 3478.5280227661133,
- "p99": 4007.6160430908203
+ "p50": 151.32799744606018,
+ "p90": 164.57599401474,
+ "p95": 175.90400576591492,
+ "p99": 238.5919988155365
},
"isolatedSum": {
- "p50": 4340.831995010376,
- "p90": 4991.136074066162,
- "p95": 6248.767852783203,
- "p99": 7629.856109619141
+ "p50": 179.00799959897995,
+ "p90": 198.2399970293045,
+ "p95": 212.64000236988068,
+ "p99": 286.0479950904846
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 619501568,
- "combineLogicalBytes": 619501568,
- "fanoutMean": 5.2750244140625,
- "recvTokensMax": 5469,
+ "dispatchLogicalBytes": 3471360,
+ "combineLogicalBytes": 3471360,
+ "fanoutMean": 5.296875,
+ "recvTokensMax": 50,
"stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 16,
+ "globalTokens": 128,
"dispatch": {
- "p50": 2352.7679443359375,
- "p90": 2601.088047027588,
- "p95": 3376.3840198516846,
- "p99": 4238.1439208984375
+ "p50": 98.33600372076035,
+ "p90": 111.93600296974182,
+ "p95": 117.60000139474869,
+ "p99": 157.4079990386963
},
"combine": {
- "p50": 2585.2479934692383,
- "p90": 2841.9840335845947,
- "p95": 3667.9999828338623,
- "p99": 4010.7522010803223
+ "p50": 80.89599758386612,
+ "p90": 85.1840004324913,
+ "p95": 89.79199826717377,
+ "p99": 119.10399794578552
},
"roundtrip": {
- "p50": 3136.607885360718,
- "p90": 3412.1599197387695,
- "p95": 4064.095973968506,
- "p99": 6203.680038452148
+ "p50": 152.96000242233276,
+ "p90": 166.6560024023056,
+ "p95": 172.28800058364868,
+ "p99": 221.50400280952454
},
"isolatedSum": {
- "p50": 4938.015937805176,
- "p90": 5443.072080612183,
- "p95": 7044.384002685547,
- "p99": 8248.89612197876
+ "p50": 179.23200130462646,
+ "p90": 197.12000340223312,
+ "p95": 207.39199966192245,
+ "p99": 276.5119969844818
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1239375872,
- "combineLogicalBytes": 1239375872,
- "fanoutMean": 5.276611328125,
- "recvTokensMax": 10883,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 6912000,
+ "combineLogicalBytes": 6912000,
+ "fanoutMean": 5.2734375,
+ "recvTokensMax": 93,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4096,
- "globalTokens": 32768,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 2850.5918979644775,
- "p90": 3381.5360069274902,
- "p95": 3976.288080215454,
- "p99": 5621.503829956055
+ "p50": 98.78399968147278,
+ "p90": 113.21599781513214,
+ "p95": 124.54400211572647,
+ "p99": 163.07200491428375
},
"combine": {
- "p50": 3287.7440452575684,
- "p90": 3433.759927749634,
- "p95": 3676.8319606781006,
- "p99": 4466.11213684082
+ "p50": 83.36000144481659,
+ "p90": 91.87199920415878,
+ "p95": 95.36000341176987,
+ "p99": 141.15199446678162
},
"roundtrip": {
- "p50": 4338.784217834473,
- "p90": 4467.199802398682,
- "p95": 4870.207786560059,
- "p99": 5583.968162536621
+ "p50": 154.88000214099884,
+ "p90": 168.38400065898895,
+ "p95": 187.42400407791138,
+ "p99": 240.54400622844696
},
"isolatedSum": {
- "p50": 6138.335943222046,
- "p90": 6815.295934677124,
- "p95": 7653.120040893555,
- "p99": 10087.615966796875
+ "p50": 182.14400112628937,
+ "p90": 205.08799701929092,
+ "p95": 219.90400552749634,
+ "p99": 304.22399938106537
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2479669248,
- "combineLogicalBytes": 2479669248,
- "fanoutMean": 5.278564453125,
- "recvTokensMax": 21730,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 13977600,
+ "combineLogicalBytes": 13977600,
+ "fanoutMean": 5.33203125,
+ "recvTokensMax": 179,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
- }
- ]
+ },
+ {
+ "tokensPerRank": 64,
+ "globalTokens": 512,
+ "dispatch": {
+ "p50": 108.89600217342377,
+ "p90": 119.77600306272507,
+ "p95": 126.78399682044983,
+ "p99": 179.71199750900269
+ },
+ "combine": {
+ "p50": 96.83199971914291,
+ "p90": 105.8880016207695,
+ "p95": 108.99200290441513,
+ "p99": 150.91200172901154
+ },
+ "roundtrip": {
+ "p50": 176.12800002098083,
+ "p90": 187.96800076961517,
+ "p95": 194.4960057735443,
+ "p99": 237.34399676322937
+ },
+ "isolatedSum": {
+ "p50": 205.72800189256668,
+ "p90": 225.66400468349457,
+ "p95": 235.77599972486496,
+ "p99": 330.6239992380142
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 27975680,
+ "combineLogicalBytes": 27975680,
+ "fanoutMean": 5.3359375,
+ "recvTokensMax": 355,
+ "stragglerRank": 3,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
+ "dispatch": {
+ "p50": 122.81599640846252,
+ "p90": 132.89600610733032,
+ "p95": 138.8159990310669,
+ "p99": 169.11999881267548
+ },
+ "combine": {
+ "p50": 117.0559972524643,
+ "p90": 121.88799679279327,
+ "p95": 129.40800189971924,
+ "p99": 154.6880006790161
+ },
+ "roundtrip": {
+ "p50": 209.60000157356262,
+ "p90": 220.7999974489212,
+ "p95": 225.66400468349457,
+ "p99": 258.432000875473
+ },
+ "isolatedSum": {
+ "p50": 239.87199366092682,
+ "p90": 254.7840029001236,
+ "p95": 268.22400093078613,
+ "p99": 323.8079994916916
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 55674880,
+ "combineLogicalBytes": 55674880,
+ "fanoutMean": 5.3095703125,
+ "recvTokensMax": 699,
+ "stragglerRank": 3,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ }
+ ]
},
{
- "id": "cx-0ef62f98",
- "identity": "b300|deepep|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf",
- "colorKey": "b300_307ed708",
- "comparisonKey": "6ef04ab36d1b6989",
+ "id": "cx-7c993840",
+ "identity": "gb300|deepep|v1|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510",
+ "colorKey": "gb300_b97bfb88",
+ "comparisonKey": "1c929d1cf59e66d3",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:51:40.258532+00:00",
+ "generatedAt": "2026-06-29T14:12:18.029743+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_08",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
+ "phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "runtime-visible-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16",
- "model": "Kimi-K2",
+ "label": "GB300 EP8 · deepep · bf16",
+ "model": "MiniMax-M3",
"shape": {
- "hidden": 7168,
+ "hidden": 6144,
"topk": 8,
- "experts": 384,
+ "experts": 256,
"routing": "uniform",
"routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -20436,537 +20088,318 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "cd50548525dafdf",
- "workloadId": "set:6:b23bc0c4b6402c69",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "74444524b5db510",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285690957",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285690957",
- "createdAt": "2026-06-27T09:51:40.258532+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 94.14400160312653,
- "p90": 98.4639972448349,
- "p95": 102.94400155544281,
- "p99": 110.91200262308121
- },
- "combine": {
- "p50": 115.26399850845337,
- "p90": 116.12799763679504,
- "p95": 117.60000139474869,
- "p99": 127.23200023174286
- },
- "roundtrip": {
- "p50": 192.86400079727173,
- "p90": 199.45600628852844,
- "p95": 202.07999646663666,
- "p99": 214.78399634361267
- },
- "isolatedSum": {
- "p50": 209.4080001115799,
- "p90": 214.59199488162994,
- "p95": 220.5440029501915,
- "p99": 238.14400285482407
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77514752,
- "combineLogicalBytes": 77514752,
- "fanoutMean": 5.2802734375,
- "recvTokensMax": 707,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 131.74399733543396,
- "p90": 138.7840062379837,
- "p95": 141.184002161026,
- "p99": 154.4319987297058
- },
- "combine": {
- "p50": 161.85599565505981,
- "p90": 164.2560064792633,
- "p95": 164.99200463294983,
- "p99": 175.04000663757324
- },
- "roundtrip": {
- "p50": 276.5760123729706,
- "p90": 284.31999683380127,
- "p95": 288.4159982204437,
- "p99": 299.80799555778503
- },
- "isolatedSum": {
- "p50": 293.5999929904938,
- "p90": 303.040012717247,
- "p95": 306.17600679397583,
- "p99": 329.47200536727905
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 154570752,
- "combineLogicalBytes": 154570752,
- "fanoutMean": 5.2646484375,
- "recvTokensMax": 1391,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 192.60799884796143,
- "p90": 199.52000677585602,
- "p95": 202.43200659751892,
- "p99": 214.23999965190887
- },
- "combine": {
- "p50": 265.28000831604004,
- "p90": 274.4640111923218,
- "p95": 275.1680016517639,
- "p99": 287.1679961681366
- },
- "roundtrip": {
- "p50": 434.7200095653534,
- "p90": 443.3920085430145,
- "p95": 447.1360146999359,
- "p99": 463.00798654556274
- },
- "isolatedSum": {
- "p50": 457.88800716400146,
- "p90": 473.9840179681778,
- "p95": 477.60000824928284,
- "p99": 501.40799582004547
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 309772288,
- "combineLogicalBytes": 309772288,
- "fanoutMean": 5.275390625,
- "recvTokensMax": 2754,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 1,
+ "globalTokens": 8,
"dispatch": {
- "p50": 326.9760012626648,
- "p90": 330.27198910713196,
- "p95": 331.36001229286194,
- "p99": 341.8239951133728
+ "p50": 102.55999863147736,
+ "p90": 120.64000219106674,
+ "p95": 138.84800672531128,
+ "p99": 175.4560023546219
},
"combine": {
- "p50": 458.3039879798889,
- "p90": 462.3039960861206,
- "p95": 470.2720046043396,
- "p99": 482.7840030193329
+ "p50": 80.54400235414505,
+ "p90": 85.82399785518646,
+ "p95": 94.11200135946274,
+ "p99": 132.60799646377563
},
"roundtrip": {
- "p50": 764.2560005187988,
- "p90": 772.1920013427734,
- "p95": 775.4560112953186,
- "p99": 788.320004940033
+ "p50": 155.39200603961945,
+ "p90": 169.8240041732788,
+ "p95": 194.0159946680069,
+ "p99": 232.70399868488312
},
"isolatedSum": {
- "p50": 785.2799892425537,
- "p90": 792.5759851932526,
- "p95": 801.6320168972015,
- "p99": 824.6079981327057
+ "p50": 183.1040009856224,
+ "p90": 206.4640000462532,
+ "p95": 232.96000808477402,
+ "p99": 308.0639988183975
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 619501568,
- "combineLogicalBytes": 619501568,
- "fanoutMean": 5.2750244140625,
- "recvTokensMax": 5469,
+ "dispatchLogicalBytes": 540672,
+ "combineLogicalBytes": 540672,
+ "fanoutMean": 5.5,
+ "recvTokensMax": 7,
"stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 2,
+ "globalTokens": 16,
"dispatch": {
- "p50": 567.9680109024048,
- "p90": 572.2560286521912,
- "p95": 577.9520273208618,
- "p99": 588.7359976768494
+ "p50": 103.84000092744827,
+ "p90": 117.50400066375732,
+ "p95": 127.23200023174286,
+ "p99": 161.95200383663177
},
"combine": {
- "p50": 807.4560165405273,
- "p90": 816.864013671875,
- "p95": 826.2720108032227,
- "p99": 877.1520256996155
+ "p50": 80.9599980711937,
+ "p90": 85.28000116348267,
+ "p95": 91.26400202512741,
+ "p99": 131.71200454235077
},
"roundtrip": {
- "p50": 1359.0079545974731,
- "p90": 1367.6799535751343,
- "p95": 1373.7280368804932,
- "p99": 1425.5039691925049
+ "p50": 157.82399475574493,
+ "p90": 169.66399550437927,
+ "p95": 174.72000420093536,
+ "p99": 218.4000015258789
},
"isolatedSum": {
- "p50": 1375.4240274429321,
- "p90": 1389.1200423240662,
- "p95": 1404.2240381240845,
- "p99": 1465.8880233764648
+ "p50": 184.79999899864197,
+ "p90": 202.78400182724,
+ "p95": 218.49600225687027,
+ "p99": 293.66400837898254
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1239375872,
- "combineLogicalBytes": 1239375872,
- "fanoutMean": 5.276611328125,
- "recvTokensMax": 10883,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 1056768,
+ "combineLogicalBytes": 1056768,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 13,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4096,
- "globalTokens": 32768,
+ "tokensPerRank": 4,
+ "globalTokens": 32,
"dispatch": {
- "p50": 1064.0640258789062,
- "p90": 1069.1200494766235,
- "p95": 1075.103998184204,
- "p99": 1101.088047027588
+ "p50": 103.61599922180176,
+ "p90": 115.32799899578094,
+ "p95": 122.04799801111221,
+ "p99": 142.5279974937439
},
"combine": {
- "p50": 1516.2559747695923,
- "p90": 1527.4560451507568,
- "p95": 1529.4400453567505,
- "p99": 1576.3520002365112
+ "p50": 82.24000036716461,
+ "p90": 85.82399785518646,
+ "p95": 89.75999802350998,
+ "p99": 97.21600264310837
},
"roundtrip": {
- "p50": 2562.78395652771,
- "p90": 2572.5440979003906,
- "p95": 2577.984094619751,
- "p99": 2608.351945877075
+ "p50": 159.10400450229645,
+ "p90": 170.81600427627563,
+ "p95": 175.26400089263916,
+ "p99": 192.3840045928955
},
"isolatedSum": {
- "p50": 2580.3200006484985,
- "p90": 2596.5760946273804,
- "p95": 2604.5440435409546,
- "p99": 2677.440047264099
+ "p50": 185.85599958896637,
+ "p90": 201.1519968509674,
+ "p95": 211.8079960346222,
+ "p99": 239.74400013685226
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2479669248,
- "combineLogicalBytes": 2479669248,
- "fanoutMean": 5.278564453125,
- "recvTokensMax": 21730,
+ "dispatchLogicalBytes": 2125824,
+ "combineLogicalBytes": 2125824,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 29,
"stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
- }
- ]
- },
- {
- "id": "cx-1f1575ee",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39",
- "colorKey": "b300_77566238",
- "comparisonKey": "89f8d104edbb2508",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T09:48:40.157886+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_16",
- "sku": "b300",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · balanced",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "balanced",
- "routingLabel": "balanced",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1351,
- "configuredUnits": 20,
- "deviceUnits": 148,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "0a3064a2af0dd39",
- "workloadId": "set:6:2dad1a73ff872905",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28285615307",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285615307",
- "createdAt": "2026-06-27T09:48:40.157886+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 1811.2640380859375,
- "p90": 2052.7360439300537,
- "p95": 2767.9359912872314,
- "p99": 3486.1440658569336
- },
- "combine": {
- "p50": 1848.6720323562622,
- "p90": 1981.9200038909912,
- "p95": 2632.8959465026855,
- "p99": 3014.080047607422
- },
- "roundtrip": {
- "p50": 1926.3039827346802,
- "p90": 2019.2639827728271,
- "p95": 2607.0079803466797,
- "p99": 3037.4081134796143
- },
- "isolatedSum": {
- "p50": 3659.9360704421997,
- "p90": 4034.656047821045,
- "p95": 5400.831937789917,
- "p99": 6500.2241134643555
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 117440512,
- "combineLogicalBytes": 117440512,
- "fanoutMean": 8,
- "recvTokensMax": 1024,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
},
{
- "tokensPerRank": 256,
- "globalTokens": 2048,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 1909.9199771881104,
- "p90": 2291.3599014282227,
- "p95": 2951.96795463562,
- "p99": 4049.7918128967285
+ "p50": 104.41599786281586,
+ "p90": 119.29599940776825,
+ "p95": 129.43999469280243,
+ "p99": 184.7359985113144
},
"combine": {
- "p50": 1909.9839925765991,
- "p90": 2116.7359352111816,
- "p95": 2735.680103302002,
- "p99": 3026.4639854431152
+ "p50": 83.71199667453766,
+ "p90": 92.67199784517288,
+ "p95": 94.84799951314926,
+ "p99": 107.80800133943558
},
"roundtrip": {
- "p50": 2060.3199005126953,
- "p90": 2157.792091369629,
- "p95": 2832.7999114990234,
- "p99": 3228.3198833465576
+ "p50": 161.0880047082901,
+ "p90": 172.70399630069733,
+ "p95": 182.43199586868286,
+ "p99": 230.04800081253052
},
"isolatedSum": {
- "p50": 3819.9039697647095,
- "p90": 4408.095836639404,
- "p95": 5687.648057937622,
- "p99": 7076.255798339844
+ "p50": 188.12799453735352,
+ "p90": 211.96799725294113,
+ "p95": 224.2879942059517,
+ "p99": 292.54399985074997
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 234881024,
- "combineLogicalBytes": 234881024,
- "fanoutMean": 8,
- "recvTokensMax": 2048,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 4263936,
+ "combineLogicalBytes": 4263936,
+ "fanoutMean": 5.421875,
+ "recvTokensMax": 47,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 16,
+ "globalTokens": 128,
"dispatch": {
- "p50": 2026.7200469970703,
- "p90": 2262.399911880493,
- "p95": 2992.89608001709,
- "p99": 3506.0160160064697
+ "p50": 102.59199887514114,
+ "p90": 116.28799885511398,
+ "p95": 122.94399738311768,
+ "p99": 169.0559983253479
},
"combine": {
- "p50": 2108.9279651641846,
- "p90": 2252.255916595459,
- "p95": 2964.672088623047,
- "p99": 3763.808012008667
+ "p50": 84.41600203514099,
+ "p90": 92.8959995508194,
+ "p95": 95.23200243711472,
+ "p99": 119.03999745845795
},
"roundtrip": {
- "p50": 2335.0400924682617,
- "p90": 2459.1360092163086,
- "p95": 3039.2000675201416,
- "p99": 3627.135992050171
+ "p50": 161.85599565505981,
+ "p90": 174.112007021904,
+ "p95": 179.9039989709854,
+ "p99": 247.51999974250793
},
"isolatedSum": {
- "p50": 4135.648012161255,
- "p90": 4514.655828475952,
- "p95": 5957.568168640137,
- "p99": 7269.824028015137
+ "p50": 187.00800091028214,
+ "p90": 209.18399840593338,
+ "p95": 218.1759998202324,
+ "p99": 288.09599578380585
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 469762048,
- "combineLogicalBytes": 469762048,
- "fanoutMean": 8,
- "recvTokensMax": 4096,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 8503296,
+ "combineLogicalBytes": 8503296,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 92,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 2215.167999267578,
- "p90": 2474.047899246216,
- "p95": 2963.9999866485596,
- "p99": 3755.0079822540283
+ "p50": 103.93600165843964,
+ "p90": 116.7680025100708,
+ "p95": 121.0239976644516,
+ "p99": 162.04799711704254
},
"combine": {
- "p50": 2386.8160247802734,
- "p90": 2521.951913833618,
- "p95": 3310.7199668884277,
- "p99": 3616.895914077759
+ "p50": 88.0960002541542,
+ "p90": 95.8079993724823,
+ "p95": 98.75199943780899,
+ "p99": 142.43200421333313
},
"roundtrip": {
- "p50": 2777.695894241333,
- "p90": 2873.3439445495605,
- "p95": 3295.2001094818115,
- "p99": 4089.024066925049
+ "p50": 166.24000668525696,
+ "p90": 177.98399925231934,
+ "p95": 183.20000171661377,
+ "p99": 235.55199801921844
},
"isolatedSum": {
- "p50": 4601.984024047852,
- "p90": 4995.999813079834,
- "p95": 6274.719953536987,
- "p99": 7371.903896331787
+ "p50": 192.03200191259384,
+ "p90": 212.5760018825531,
+ "p95": 219.7759971022606,
+ "p99": 304.48000133037567
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 939524096,
- "combineLogicalBytes": 939524096,
- "fanoutMean": 8,
- "recvTokensMax": 8192,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 16908288,
+ "combineLogicalBytes": 16908288,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 182,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 64,
+ "globalTokens": 512,
"dispatch": {
- "p50": 2534.015893936157,
- "p90": 2614.207983016968,
- "p95": 3331.199884414673,
- "p99": 3946.6240406036377
+ "p50": 113.88800293207169,
+ "p90": 121.40800058841705,
+ "p95": 124.51200187206268,
+ "p99": 134.0160071849823
},
"combine": {
- "p50": 2894.8159217834473,
- "p90": 2969.0239429473877,
- "p95": 3296.128034591675,
- "p99": 4143.392086029053
+ "p50": 102.88000106811523,
+ "p90": 108.47999900579453,
+ "p95": 109.69600081443787,
+ "p99": 116.89600348472595
},
"roundtrip": {
- "p50": 3649.6639251708984,
- "p90": 3799.5200157165527,
- "p95": 4219.871997833252,
- "p99": 4852.320194244385
+ "p50": 186.39999628067017,
+ "p90": 195.71200013160706,
+ "p95": 198.5280066728592,
+ "p99": 208.8959962129593
},
"isolatedSum": {
- "p50": 5428.8318157196045,
- "p90": 5583.2319259643555,
- "p95": 6627.327919006348,
- "p99": 8090.01612663269
+ "p50": 216.76800400018692,
+ "p90": 229.88799959421158,
+ "p95": 234.20800268650055,
+ "p99": 250.91201066970825
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1879048192,
- "combineLogicalBytes": 1879048192,
- "fanoutMean": 8,
- "recvTokensMax": 16384,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 33423360,
+ "combineLogicalBytes": 33423360,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 367,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4096,
- "globalTokens": 32768,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 3252.351999282837,
- "p90": 3331.104040145874,
- "p95": 3698.4639167785645,
- "p99": 4560.927867889404
+ "p50": 125.791996717453,
+ "p90": 133.59999656677246,
+ "p95": 136.1600011587143,
+ "p99": 144.6080058813095
},
"combine": {
- "p50": 3938.591957092285,
- "p90": 4131.968021392822,
- "p95": 4414.432048797607,
- "p99": 5301.055908203125
+ "p50": 122.23999947309494,
+ "p90": 130.62399625778198,
+ "p95": 132.35199451446533,
+ "p99": 137.88799941539764
},
"roundtrip": {
- "p50": 5385.6000900268555,
- "p90": 5495.0079917907715,
- "p95": 6258.880138397217,
- "p99": 6821.216106414795
+ "p50": 218.4319943189621,
+ "p90": 226.78400576114655,
+ "p95": 229.44000363349915,
+ "p99": 236.32000386714935
},
"isolatedSum": {
- "p50": 7190.943956375122,
- "p90": 7463.072061538696,
- "p95": 8112.895965576172,
- "p99": 9861.98377609253
+ "p50": 248.03199619054794,
+ "p90": 264.22399282455444,
+ "p95": 268.5119956731796,
+ "p99": 282.49600529670715
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 3758096384,
- "combineLogicalBytes": 3758096384,
- "fanoutMean": 8,
- "recvTokensMax": 32768,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 66576384,
+ "combineLogicalBytes": 66576384,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 723,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -20974,47 +20407,48 @@
]
},
{
- "id": "cx-a989dada",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||9e6ac678a09f7f8",
- "colorKey": "b300_77566238",
- "comparisonKey": "0cdc743c580a47d3",
+ "id": "cx-07f80259",
+ "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||00df46ebb2988d7",
+ "colorKey": "gb300_74218200",
+ "comparisonKey": "771769a5e7987ff5",
"schemaVersion": 3,
- "generatedAt": "2026-06-26T23:58:19.169974+00:00",
+ "generatedAt": "2026-06-29T13:43:34.234497+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_01",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
+ "phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · balanced",
+ "label": "GB300 EP8 · deepep · bf16",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
"experts": 256,
- "routing": "balanced",
- "routingLabel": "balanced",
+ "routing": "uniform",
+ "routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -21022,133 +20456,96 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "9e6ac678a09f7f8",
- "workloadId": "set:3:2dad1a73ff872905",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "00df46ebb2988d7",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28271876366",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271876366",
- "createdAt": "2026-06-26T23:58:19.169974+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 1816.2239789962769,
- "p90": 2297.152042388916,
- "p95": 2896.320104598999,
- "p99": 3506.6559314727783
- },
- "combine": {
- "p50": 1859.1680526733398,
- "p90": 2047.4560260772705,
- "p95": 2707.1681022644043,
- "p99": 3027.2960662841797
- },
- "roundtrip": {
- "p50": 1932.8960180282593,
- "p90": 2138.335943222046,
- "p95": 2772.9599475860596,
- "p99": 3193.279981613159
- },
- "isolatedSum": {
- "p50": 3675.3920316696167,
- "p90": 4344.6080684661865,
- "p95": 5603.488206863403,
- "p99": 6533.951997756958
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 117440512,
- "combineLogicalBytes": 117440512,
- "fanoutMean": 8,
- "recvTokensMax": 1024,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 2029.6320915222168,
- "p90": 2355.0078868865967,
- "p95": 3023.6799716949463,
- "p99": 3532.543897628784
+ "p50": 93.9520001411438,
+ "p90": 107.42399841547012,
+ "p95": 111.55200004577637,
+ "p99": 120.35199999809265
},
"combine": {
- "p50": 2128.671884536743,
- "p90": 2460.576057434082,
- "p95": 3003.5200119018555,
- "p99": 3345.4079627990723
+ "p50": 83.23200047016144,
+ "p90": 89.91999924182892,
+ "p95": 92.03200042247772,
+ "p99": 97.88800030946732
},
"roundtrip": {
- "p50": 2337.8241062164307,
- "p90": 2708.159923553467,
- "p95": 3375.744104385376,
- "p99": 3673.952102661133
+ "p50": 155.35999834537506,
+ "p90": 164.0319973230362,
+ "p95": 167.1680063009262,
+ "p99": 173.95199835300446
},
"isolatedSum": {
- "p50": 4158.30397605896,
- "p90": 4815.583944320679,
- "p95": 6027.199983596802,
- "p99": 6877.951860427856
+ "p50": 177.18400061130524,
+ "p90": 197.34399765729904,
+ "p95": 203.5840004682541,
+ "p99": 218.24000030755997
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 469762048,
- "combineLogicalBytes": 469762048,
- "fanoutMean": 8,
- "recvTokensMax": 4096,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 4974592,
+ "combineLogicalBytes": 4974592,
+ "fanoutMean": 5.421875,
+ "recvTokensMax": 47,
+ "stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 64,
+ "globalTokens": 512,
"dispatch": {
- "p50": 2545.1838970184326,
- "p90": 2883.19993019104,
- "p95": 3424.1280555725098,
- "p99": 3852.544069290161
+ "p50": 110.20799726247787,
+ "p90": 118.17599833011627,
+ "p95": 121.24799937009811,
+ "p99": 127.77599692344666
},
"combine": {
- "p50": 2903.520107269287,
- "p90": 3124.959945678711,
- "p95": 3718.2400226593018,
- "p99": 4377.791881561279
+ "p50": 105.05600273609161,
+ "p90": 109.24799740314484,
+ "p95": 111.1999973654747,
+ "p99": 117.76000261306763
},
"roundtrip": {
- "p50": 3660.6719493865967,
- "p90": 3928.3199310302734,
- "p95": 4631.743907928467,
- "p99": 5148.064136505127
+ "p50": 185.92000007629395,
+ "p90": 193.08799505233765,
+ "p95": 196.6399997472763,
+ "p99": 203.64800095558167
},
"isolatedSum": {
- "p50": 5448.70400428772,
- "p90": 6008.159875869751,
- "p95": 7142.3680782318115,
- "p99": 8230.33595085144
+ "p50": 215.2639999985695,
+ "p90": 227.4239957332611,
+ "p95": 232.44799673557281,
+ "p99": 245.53599953651428
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1879048192,
- "combineLogicalBytes": 1879048192,
- "fanoutMean": 8,
- "recvTokensMax": 16384,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 38993920,
+ "combineLogicalBytes": 38993920,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 367,
+ "stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -21156,474 +20553,367 @@
]
},
{
- "id": "cx-092ff174",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9",
- "colorKey": "b300_a314501b",
- "comparisonKey": "c51826952291f0ba",
+ "id": "cx-7324ba0b",
+ "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510",
+ "colorKey": "gb300_74218200",
+ "comparisonKey": "771769a5e7987ff5",
"schemaVersion": 3,
- "generatedAt": "2026-06-26T23:57:58.409823+00:00",
+ "generatedAt": "2026-06-29T13:37:36.702477+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_14",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
+ "phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · balanced-rank-local",
+ "label": "GB300 EP8 · deepep · bf16",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
"experts": 256,
- "routing": "balanced-rank-local",
- "routingLabel": "balanced-rank-local",
+ "routing": "uniform",
+ "routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
"paretoEligible": false
},
"placement": {
- "kind": "packed",
- "nodes": 1,
+ "kind": "adversarial",
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "7aa44c7b86748b9",
- "workloadId": "set:3:388ff74baef05c72",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "74444524b5db510",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28271883343",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271883343",
- "createdAt": "2026-06-26T23:57:58.409823+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 1,
+ "globalTokens": 8,
"dispatch": {
- "p50": 69.37599927186966,
- "p90": 71.03999704122543,
- "p95": 73.37599992752075,
- "p99": 81.69600367546082
+ "p50": 98.84800016880035,
+ "p90": 110.72000116109848,
+ "p95": 117.34399944543839,
+ "p99": 128.89599800109863
},
"combine": {
- "p50": 67.61600077152252,
- "p90": 69.60000097751617,
- "p95": 77.02399790287018,
- "p99": 83.39200168848038
+ "p50": 82.40000158548355,
+ "p90": 89.34400230646133,
+ "p95": 92.70399808883667,
+ "p99": 98.75199943780899
},
"roundtrip": {
- "p50": 119.93599683046341,
- "p90": 126.01600587368011,
- "p95": 128.48000228405,
- "p99": 135.55200397968292
+ "p50": 155.93600273132324,
+ "p90": 167.07199811935425,
+ "p95": 170.1440066099167,
+ "p99": 179.1040003299713
},
"isolatedSum": {
- "p50": 136.99200004339218,
- "p90": 140.6399980187416,
- "p95": 150.39999783039093,
- "p99": 165.0880053639412
+ "p50": 181.2480017542839,
+ "p90": 200.06400346755981,
+ "p95": 210.04799753427505,
+ "p99": 227.64799743890762
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 14680064,
- "combineLogicalBytes": 14680064,
- "fanoutMean": 1,
- "recvTokensMax": 128,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 630784,
+ "combineLogicalBytes": 630784,
+ "fanoutMean": 5.5,
+ "recvTokensMax": 7,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 2,
+ "globalTokens": 16,
"dispatch": {
- "p50": 93.98400038480759,
- "p90": 98.68799895048141,
- "p95": 100.28800368309021,
- "p99": 105.72800040245056
+ "p50": 98.24000298976898,
+ "p90": 110.6560006737709,
+ "p95": 115.87200313806534,
+ "p99": 126.14400684833527
},
"combine": {
- "p50": 115.52000045776367,
- "p90": 116.5120005607605,
- "p95": 116.73600226640701,
- "p99": 123.48800152540207
+ "p50": 82.46400207281113,
+ "p90": 87.67999708652496,
+ "p95": 92.51199662685394,
+ "p99": 98.14400225877762
},
"roundtrip": {
- "p50": 193.08799505233765,
- "p90": 197.88800179958344,
- "p95": 198.59200716018677,
- "p99": 204.0960043668747
+ "p50": 155.8080017566681,
+ "p90": 165.95199704170227,
+ "p95": 169.27999258041382,
+ "p99": 177.76000499725342
},
"isolatedSum": {
- "p50": 209.50400084257126,
- "p90": 215.1999995112419,
- "p95": 217.02400594949722,
- "p99": 229.21600192785263
+ "p50": 180.7040050625801,
+ "p90": 198.33599776029587,
+ "p95": 208.38399976491928,
+ "p99": 224.28800910711288
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 58720256,
- "combineLogicalBytes": 58720256,
- "fanoutMean": 1,
- "recvTokensMax": 512,
+ "dispatchLogicalBytes": 1232896,
+ "combineLogicalBytes": 1232896,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 13,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 197.37599790096283,
- "p90": 199.96799528598785,
- "p95": 200.80000162124634,
- "p99": 207.10399746894836
- },
- "combine": {
- "p50": 248.1600046157837,
- "p90": 249.9839961528778,
- "p95": 250.68798661231995,
- "p99": 253.79198789596558
- },
- "roundtrip": {
- "p50": 429.8880100250244,
- "p90": 434.30399894714355,
- "p95": 436.2879991531372,
- "p99": 442.84799695014954
- },
- "isolatedSum": {
- "p50": 445.5360025167465,
- "p90": 449.95199143886566,
- "p95": 451.4879882335663,
- "p99": 460.89598536491394
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 234881024,
- "combineLogicalBytes": 234881024,
- "fanoutMean": 1,
- "recvTokensMax": 2048,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-91ac2845",
- "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71",
- "colorKey": "b300_592e9a16",
- "comparisonKey": "0a480d3d40419b1c",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T09:48:29.790713+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_03",
- "sku": "b300",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · balanced+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "balanced",
- "routingLabel": "balanced+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1351,
- "configuredUnits": 20,
- "deviceUnits": 148,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "df54a9510825f71",
- "workloadId": "set:6:2dad1a73ff872905",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 1,
- "eplbImbalanceAfter": 1,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28285617940",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285617940",
- "createdAt": "2026-06-27T09:48:29.790713+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 4,
+ "globalTokens": 32,
"dispatch": {
- "p50": 84.25600081682205,
- "p90": 88.16000074148178,
- "p95": 89.21600133180618,
- "p99": 96.03200107812881
+ "p50": 98.59199821949005,
+ "p90": 111.96800321340561,
+ "p95": 118.75200271606445,
+ "p99": 129.2160004377365
},
"combine": {
- "p50": 82.2720006108284,
- "p90": 90.71999788284302,
- "p95": 90.97599983215332,
- "p99": 102.49599814414978
+ "p50": 84.54400300979614,
+ "p90": 90.94399958848953,
+ "p95": 93.50399672985077,
+ "p99": 99.87200051546097
},
"roundtrip": {
- "p50": 146.40000462532043,
- "p90": 149.1200029850006,
- "p95": 150.68799257278442,
- "p99": 157.31200575828552
+ "p50": 158.91200304031372,
+ "p90": 170.6559956073761,
+ "p95": 173.47200214862823,
+ "p99": 182.5920045375824
},
"isolatedSum": {
- "p50": 166.52800142765045,
- "p90": 178.8799986243248,
- "p95": 180.1920011639595,
- "p99": 198.5279992222786
+ "p50": 183.1360012292862,
+ "p90": 202.91200280189514,
+ "p95": 212.25599944591522,
+ "p99": 229.08800095319748
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 29360128,
- "combineLogicalBytes": 29360128,
- "fanoutMean": 2,
- "recvTokensMax": 384,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 2480128,
+ "combineLogicalBytes": 2480128,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 29,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 256,
- "globalTokens": 2048,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 96.12800180912018,
- "p90": 98.59199821949005,
- "p95": 100.44799745082855,
- "p99": 120.12799829244614
+ "p50": 99.93600100278854,
+ "p90": 111.29599809646606,
+ "p95": 116.99199676513672,
+ "p99": 128.4160017967224
},
"combine": {
- "p50": 104.92800176143646,
- "p90": 113.92000317573547,
- "p95": 114.43199962377548,
- "p99": 116.38399958610535
+ "p50": 85.02399921417236,
+ "p90": 92.22400188446045,
+ "p95": 94.65599805116653,
+ "p99": 99.64799880981445
},
"roundtrip": {
- "p50": 184.28799510002136,
- "p90": 191.74399971961975,
- "p95": 194.14399564266205,
- "p99": 206.01600408554077
+ "p50": 160.99199652671814,
+ "p90": 170.59199512004852,
+ "p95": 174.55999553203583,
+ "p99": 186.91200017929077
},
"isolatedSum": {
- "p50": 201.05600357055664,
- "p90": 212.51200139522552,
- "p95": 214.87999707460403,
- "p99": 236.51199787855148
+ "p50": 184.9600002169609,
+ "p90": 203.5199999809265,
+ "p95": 211.64799481630325,
+ "p99": 228.06400060653687
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 58720256,
- "combineLogicalBytes": 58720256,
- "fanoutMean": 2,
- "recvTokensMax": 768,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 4974592,
+ "combineLogicalBytes": 4974592,
+ "fanoutMean": 5.421875,
+ "recvTokensMax": 47,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 16,
+ "globalTokens": 128,
"dispatch": {
- "p50": 131.55199587345123,
- "p90": 138.91200721263885,
- "p95": 140.19200205802917,
- "p99": 149.85600113868713
+ "p50": 99.58399832248688,
+ "p90": 111.29599809646606,
+ "p95": 114.52800035476685,
+ "p99": 123.07199835777283
},
"combine": {
- "p50": 142.65599846839905,
- "p90": 151.90400183200836,
- "p95": 152.41600573062897,
- "p99": 164.09599781036377
+ "p50": 87.0399996638298,
+ "p90": 94.11200135946274,
+ "p95": 96.76799923181534,
+ "p99": 101.50399804115295
},
"roundtrip": {
- "p50": 258.59200954437256,
- "p90": 264.6400034427643,
- "p95": 268.38400959968567,
- "p99": 282.943993806839
+ "p50": 162.33600676059723,
+ "p90": 172.28800058364868,
+ "p95": 175.84000527858734,
+ "p99": 188.31999599933624
},
"isolatedSum": {
- "p50": 274.2079943418503,
- "p90": 290.8160090446472,
- "p95": 292.60800778865814,
- "p99": 313.9519989490509
+ "p50": 186.62399798631668,
+ "p90": 205.4079994559288,
+ "p95": 211.29599958658218,
+ "p99": 224.57599639892578
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 117440512,
- "combineLogicalBytes": 117440512,
- "fanoutMean": 2,
- "recvTokensMax": 1536,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 9920512,
+ "combineLogicalBytes": 9920512,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 92,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 199.8399943113327,
- "p90": 207.10399746894836,
- "p95": 214.36800062656403,
- "p99": 236.4799976348877
+ "p50": 103.07200253009796,
+ "p90": 114.62400108575821,
+ "p95": 118.94399672746658,
+ "p99": 134.0479999780655
},
"combine": {
- "p50": 262.36799359321594,
- "p90": 262.9759907722473,
- "p95": 263.35999369621277,
- "p99": 272.5119888782501
+ "p50": 93.05600076913834,
+ "p90": 98.2080027461052,
+ "p95": 100.22400319576263,
+ "p99": 107.84000158309937
},
"roundtrip": {
- "p50": 435.5199933052063,
- "p90": 441.9200122356415,
- "p95": 445.4079866409302,
- "p99": 463.29599618911743
+ "p50": 167.29600727558136,
+ "p90": 178.81600558757782,
+ "p95": 182.3039948940277,
+ "p99": 192.09599494934082
},
"isolatedSum": {
- "p50": 462.20798790454865,
- "p90": 470.0799882411957,
- "p95": 477.7279943227768,
- "p99": 508.9919865131378
+ "p50": 196.1280032992363,
+ "p90": 212.8320038318634,
+ "p95": 219.16799992322922,
+ "p99": 241.88800156116486
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 234881024,
- "combineLogicalBytes": 234881024,
- "fanoutMean": 2,
- "recvTokensMax": 3072,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 19726336,
+ "combineLogicalBytes": 19726336,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 182,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 64,
+ "globalTokens": 512,
"dispatch": {
- "p50": 345.7599878311157,
- "p90": 352.7039885520935,
- "p95": 355.9040129184723,
- "p99": 390.3999924659729
+ "p50": 112.8000020980835,
+ "p90": 122.46400117874146,
+ "p95": 125.31200051307678,
+ "p99": 132.79999792575836
},
"combine": {
- "p50": 459.55199003219604,
- "p90": 462.911993265152,
- "p95": 470.8159863948822,
- "p99": 483.6159944534302
+ "p50": 106.46399855613708,
+ "p90": 113.24799805879593,
+ "p95": 117.79200285673141,
+ "p99": 129.98400628566742
},
"roundtrip": {
- "p50": 786.9439721107483,
- "p90": 792.8640246391296,
- "p95": 797.5040078163147,
- "p99": 829.7920227050781
+ "p50": 190.33600389957428,
+ "p90": 199.35999810695648,
+ "p95": 202.01599597930908,
+ "p99": 218.62399578094482
},
"isolatedSum": {
- "p50": 805.3119778633118,
- "p90": 815.6159818172455,
- "p95": 826.7199993133545,
- "p99": 874.0159869194031
+ "p50": 219.26400065422058,
+ "p90": 235.71199923753738,
+ "p95": 243.1040033698082,
+ "p99": 262.7840042114258
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 469762048,
- "combineLogicalBytes": 469762048,
- "fanoutMean": 2,
- "recvTokensMax": 6144,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 38993920,
+ "combineLogicalBytes": 38993920,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 367,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4096,
- "globalTokens": 32768,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 648.5120058059692,
- "p90": 655.6479930877686,
- "p95": 660.5439782142639,
- "p99": 667.7119731903076
+ "p50": 128.38399410247803,
+ "p90": 137.95199990272522,
+ "p95": 141.50400459766388,
+ "p99": 148.51200580596924
},
"combine": {
- "p50": 828.0959725379944,
- "p90": 838.4320139884949,
- "p95": 840.6400084495544,
- "p99": 855.0400137901306
+ "p50": 126.08000636100769,
+ "p90": 132.79999792575836,
+ "p95": 134.5600038766861,
+ "p99": 145.47200500965118
},
"roundtrip": {
- "p50": 1455.3279876708984,
- "p90": 1466.5919542312622,
- "p95": 1471.0079431533813,
- "p99": 1482.4320077896118
+ "p50": 226.4000028371811,
+ "p90": 236.03199422359467,
+ "p95": 239.96800184249878,
+ "p99": 246.20799720287323
},
"isolatedSum": {
- "p50": 1476.6079783439636,
- "p90": 1494.0800070762634,
- "p95": 1501.1839866638184,
- "p99": 1522.7519869804382
+ "p50": 254.46400046348572,
+ "p90": 270.7519978284836,
+ "p95": 276.06400847435,
+ "p99": 293.9840108156204
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 939524096,
- "combineLogicalBytes": 939524096,
- "fanoutMean": 2,
- "recvTokensMax": 12288,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 77672448,
+ "combineLogicalBytes": 77672448,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 723,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -21631,47 +20921,48 @@
]
},
{
- "id": "cx-eac6e215",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||38fd0bcf7109c32",
- "colorKey": "b300_5b993222",
- "comparisonKey": "d3d6cc25fee96bc7",
+ "id": "cx-0d0d8f23",
+ "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510",
+ "colorKey": "gb300_b97bfb88",
+ "comparisonKey": "61b32b843c8fbec1",
"schemaVersion": 3,
- "generatedAt": "2026-06-26T23:58:52.035249+00:00",
+ "generatedAt": "2026-06-29T13:47:31.111489+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_09",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
+ "phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "runtime-visible-v1",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · hotspot-single",
+ "label": "GB300 EP8 · deepep · bf16",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
"experts": 256,
- "routing": "hotspot-single",
- "routingLabel": "hotspot-single",
+ "routing": "uniform",
+ "routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -21679,426 +20970,318 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "38fd0bcf7109c32",
- "workloadId": "set:3:b952d4a43d688b50",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "74444524b5db510",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28271903494",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271903494",
- "createdAt": "2026-06-26T23:58:52.035249+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 1,
+ "globalTokens": 8,
"dispatch": {
- "p50": 106.08000308275223,
- "p90": 107.87200182676315,
- "p95": 109.15199667215347,
- "p99": 120.28799951076508
+ "p50": 89.02399986982346,
+ "p90": 103.29599678516388,
+ "p95": 107.64800012111664,
+ "p99": 119.39200013875961
},
"combine": {
- "p50": 127.83999741077423,
- "p90": 129.85600531101227,
- "p95": 130.97600638866425,
- "p99": 139.5840048789978
+ "p50": 79.55200225114822,
+ "p90": 84.70399677753448,
+ "p95": 87.36000210046768,
+ "p99": 95.16800194978714
},
"roundtrip": {
- "p50": 219.39200162887573,
- "p90": 224.16000068187714,
- "p95": 225.055992603302,
- "p99": 235.35999655723572
+ "p50": 149.85600113868713,
+ "p90": 162.4639928340912,
+ "p95": 166.81599617004395,
+ "p99": 180.35200238227844
},
"isolatedSum": {
- "p50": 233.92000049352646,
- "p90": 237.72800713777542,
- "p95": 240.12800306081772,
- "p99": 259.8720043897629
+ "p50": 168.57600212097168,
+ "p90": 187.99999356269836,
+ "p95": 195.00800222158432,
+ "p99": 214.56000208854675
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 78102528,
- "combineLogicalBytes": 78102528,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 1024,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 630784,
+ "combineLogicalBytes": 630784,
+ "fanoutMean": 5.5,
+ "recvTokensMax": 7,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 2,
+ "globalTokens": 16,
"dispatch": {
- "p50": 217.95199811458588,
- "p90": 224.03199970722198,
- "p95": 228.83200645446777,
- "p99": 252.70399451255798
+ "p50": 89.85599875450134,
+ "p90": 103.29599678516388,
+ "p95": 109.31199789047241,
+ "p99": 124.70400333404541
},
"combine": {
- "p50": 336.38399839401245,
- "p90": 338.49599957466125,
- "p95": 339.9040102958679,
- "p99": 348.4160006046295
+ "p50": 81.34400099515915,
+ "p90": 86.14400029182434,
+ "p95": 89.63199704885483,
+ "p99": 95.64799815416336
},
"roundtrip": {
- "p50": 535.8399748802185,
- "p90": 546.0159778594971,
- "p95": 551.3280034065247,
- "p99": 558.3680272102356
+ "p50": 148.95999431610107,
+ "p90": 160.22400557994843,
+ "p95": 163.42400014400482,
+ "p99": 171.48800194263458
},
"isolatedSum": {
- "p50": 554.3359965085983,
- "p90": 562.5279992818832,
- "p95": 568.7360167503357,
- "p99": 601.1199951171875
+ "p50": 171.1999997496605,
+ "p90": 189.43999707698822,
+ "p95": 198.94399493932724,
+ "p99": 220.35200148820877
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 311091200,
- "combineLogicalBytes": 311091200,
- "fanoutMean": 5.2978515625,
- "recvTokensMax": 4096,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1232896,
+ "combineLogicalBytes": 1232896,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 13,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 685.0559711456299,
- "p90": 694.5599913597107,
- "p95": 696.3199973106384,
- "p99": 705.3760290145874
- },
- "combine": {
- "p50": 1085.4400396347046,
- "p90": 1086.3360166549683,
- "p95": 1087.6480340957642,
- "p99": 1096.7680215835571
- },
- "roundtrip": {
- "p50": 1752.511978149414,
- "p90": 1760.3199481964111,
- "p95": 1762.0480060577393,
- "p99": 1772.6080417633057
- },
- "isolatedSum": {
- "p50": 1770.4960107803345,
- "p90": 1780.896008014679,
- "p95": 1783.9680314064026,
- "p99": 1802.1440505981445
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1241511936,
- "combineLogicalBytes": 1241511936,
- "fanoutMean": 5.28570556640625,
- "recvTokensMax": 16384,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-b38b286e",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c",
- "colorKey": "b300_5b993222",
- "comparisonKey": "acefe503588b8e8a",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T09:50:40.107682+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_13",
- "sku": "b300",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · hotspot-single",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "hotspot-single",
- "routingLabel": "hotspot-single",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1351,
- "configuredUnits": 20,
- "deviceUnits": 148,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "bfbb64a166e9f1c",
- "workloadId": "set:6:b952d4a43d688b50",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28285666343",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285666343",
- "createdAt": "2026-06-27T09:50:40.107682+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 4,
+ "globalTokens": 32,
"dispatch": {
- "p50": 104.12800312042236,
- "p90": 105.79200088977814,
- "p95": 106.65600001811981,
- "p99": 124.79999661445618
+ "p50": 90.08000046014786,
+ "p90": 105.43999820947647,
+ "p95": 111.26399785280228,
+ "p99": 127.26399302482605
},
"combine": {
- "p50": 128.9599984884262,
- "p90": 138.59200477600098,
- "p95": 139.42399621009827,
- "p99": 144.16000247001648
+ "p50": 82.71999657154083,
+ "p90": 87.2960016131401,
+ "p95": 91.0400003194809,
+ "p99": 97.08800166845322
},
"roundtrip": {
- "p50": 217.3759937286377,
- "p90": 224.0000069141388,
- "p95": 225.055992603302,
- "p99": 228.89600694179535
+ "p50": 151.74399316310883,
+ "p90": 164.76799547672272,
+ "p95": 169.72799599170685,
+ "p99": 210.14399826526642
},
"isolatedSum": {
- "p50": 233.08800160884857,
- "p90": 244.3840056657791,
- "p95": 246.07999622821808,
- "p99": 268.95999908447266
+ "p50": 172.7999970316887,
+ "p90": 192.73599982261658,
+ "p95": 202.30399817228317,
+ "p99": 224.35199469327927
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 78102528,
- "combineLogicalBytes": 78102528,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 1024,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 2480128,
+ "combineLogicalBytes": 2480128,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 29,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 256,
- "globalTokens": 2048,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 141.76000654697418,
- "p90": 143.61600577831268,
- "p95": 145.53600549697876,
- "p99": 164.44799304008484
+ "p50": 92.41600334644318,
+ "p90": 105.56799918413162,
+ "p95": 109.6000000834465,
+ "p99": 120.86399644613266
},
"combine": {
- "p50": 188.38399648666382,
- "p90": 190.17599523067474,
- "p95": 192.00000166893005,
- "p99": 201.9840031862259
+ "p50": 83.99999886751175,
+ "p90": 88.03199976682663,
+ "p95": 92.38400310277939,
+ "p99": 98.14400225877762
},
"roundtrip": {
- "p50": 318.11198592185974,
- "p90": 323.64800572395325,
- "p95": 325.0240087509155,
- "p99": 335.3919982910156
+ "p50": 154.30399775505066,
+ "p90": 166.75199568271637,
+ "p95": 170.17599940299988,
+ "p99": 179.9039989709854
},
"isolatedSum": {
- "p50": 330.144003033638,
- "p90": 333.7920010089874,
- "p95": 337.5360071659088,
- "p99": 366.43199622631073
+ "p50": 176.41600221395493,
+ "p90": 193.59999895095825,
+ "p95": 201.9840031862259,
+ "p99": 219.00799870491028
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 156090368,
- "combineLogicalBytes": 156090368,
- "fanoutMean": 5.31640625,
- "recvTokensMax": 2048,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 4974592,
+ "combineLogicalBytes": 4974592,
+ "fanoutMean": 5.421875,
+ "recvTokensMax": 47,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 16,
+ "globalTokens": 128,
"dispatch": {
- "p50": 216.8319970369339,
- "p90": 220.0320065021515,
- "p95": 223.32799434661865,
- "p99": 231.29600286483765
+ "p50": 93.53599697351456,
+ "p90": 105.85600137710571,
+ "p95": 110.11199653148651,
+ "p99": 121.21599912643433
},
"combine": {
- "p50": 336.5760147571564,
- "p90": 338.20798993110657,
- "p95": 339.6799862384796,
- "p99": 351.23199224472046
+ "p50": 85.63199639320374,
+ "p90": 92.79999881982803,
+ "p95": 96.41599655151367,
+ "p99": 102.4319976568222
},
"roundtrip": {
- "p50": 534.6879959106445,
- "p90": 541.5040254592896,
- "p95": 543.8399910926819,
- "p99": 547.327995300293
+ "p50": 156.92800283432007,
+ "p90": 169.3120002746582,
+ "p95": 173.92000555992126,
+ "p99": 184.7040057182312
},
"isolatedSum": {
- "p50": 553.4080117940903,
- "p90": 558.239996433258,
- "p95": 563.0079805850983,
- "p99": 582.5279951095581
+ "p50": 179.1679933667183,
+ "p90": 198.65600019693375,
+ "p95": 206.52799308300018,
+ "p99": 223.64799678325653
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 311091200,
- "combineLogicalBytes": 311091200,
- "fanoutMean": 5.2978515625,
- "recvTokensMax": 4096,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 9920512,
+ "combineLogicalBytes": 9920512,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 92,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 369.1520094871521,
- "p90": 377.1199882030487,
- "p95": 378.62399220466614,
- "p99": 388.35200667381287
+ "p50": 98.4639972448349,
+ "p90": 108.86400192975998,
+ "p95": 112.76800185441971,
+ "p99": 120.67200243473053
},
"combine": {
- "p50": 580.5119872093201,
- "p90": 582.1120142936707,
- "p95": 582.5920104980469,
- "p99": 585.3760242462158
+ "p50": 89.15200084447861,
+ "p90": 96.6079980134964,
+ "p95": 98.9760011434555,
+ "p99": 104.89600151777267
},
"roundtrip": {
- "p50": 939.1679763793945,
- "p90": 944.2880153656006,
- "p95": 945.9840059280396,
- "p99": 958.079993724823
+ "p50": 161.98399662971497,
+ "p90": 172.7360039949417,
+ "p95": 176.60799622535706,
+ "p99": 186.3359957933426
},
"isolatedSum": {
- "p50": 949.6639966964722,
- "p90": 959.2320024967194,
- "p95": 961.216002702713,
- "p99": 973.7280309200287
+ "p50": 187.6159980893135,
+ "p90": 205.47199994325638,
+ "p95": 211.7440029978752,
+ "p99": 225.5680039525032
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 620648448,
- "combineLogicalBytes": 620648448,
- "fanoutMean": 5.2847900390625,
- "recvTokensMax": 8192,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 19726336,
+ "combineLogicalBytes": 19726336,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 182,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 64,
+ "globalTokens": 512,
"dispatch": {
- "p50": 684.1279864311218,
- "p90": 693.3119893074036,
- "p95": 694.6560144424438,
- "p99": 802.5919795036316
+ "p50": 110.6560006737709,
+ "p90": 119.23199892044067,
+ "p95": 123.23199957609177,
+ "p99": 131.99999928474426
},
"combine": {
- "p50": 1085.15202999115,
- "p90": 1086.7520570755005,
- "p95": 1087.3279571533203,
- "p99": 1098.9760160446167
+ "p50": 104.63999956846237,
+ "p90": 110.33599823713303,
+ "p95": 112.5440001487732,
+ "p99": 121.50400131940842
},
"roundtrip": {
- "p50": 1750.656008720398,
- "p90": 1759.071946144104,
- "p95": 1762.7840042114258,
- "p99": 1789.2800569534302
+ "p50": 186.11200153827667,
+ "p90": 193.85600090026855,
+ "p95": 197.85599410533905,
+ "p99": 204.22400534152985
},
"isolatedSum": {
- "p50": 1769.2800164222717,
- "p90": 1780.064046382904,
- "p95": 1781.9839715957642,
- "p99": 1901.5679955482483
+ "p50": 215.29600024223328,
+ "p90": 229.5679971575737,
+ "p95": 235.77599972486496,
+ "p99": 253.50400060415268
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1241511936,
- "combineLogicalBytes": 1241511936,
- "fanoutMean": 5.28570556640625,
- "recvTokensMax": 16384,
+ "dispatchLogicalBytes": 38993920,
+ "combineLogicalBytes": 38993920,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 367,
"stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4096,
- "globalTokens": 32768,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 1323.3599662780762,
- "p90": 1332.1599960327148,
- "p95": 1336.5440368652344,
- "p99": 1345.3439474105835
+ "p50": 124.22399967908859,
+ "p90": 132.9919993877411,
+ "p95": 136.09600067138672,
+ "p99": 145.21600306034088
},
"combine": {
- "p50": 2080.22403717041,
- "p90": 2082.0159912109375,
- "p95": 2084.0959548950195,
- "p99": 2094.655990600586
+ "p50": 123.4240010380745,
+ "p90": 131.26400113105774,
+ "p95": 134.20799374580383,
+ "p99": 136.89599931240082
},
"roundtrip": {
- "p50": 3382.688045501709,
- "p90": 3391.9999599456787,
- "p95": 3396.4478969573975,
- "p99": 3412.480115890503
+ "p50": 221.98399901390076,
+ "p90": 231.58399760723114,
+ "p95": 234.9119931459427,
+ "p99": 247.99999594688416
},
"isolatedSum": {
- "p50": 3403.5840034484863,
- "p90": 3414.1759872436523,
- "p95": 3420.639991760254,
- "p99": 3439.9999380111694
+ "p50": 247.6480007171631,
+ "p90": 264.2560005187988,
+ "p95": 270.30399441719055,
+ "p99": 282.1120023727417
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2484242432,
- "combineLogicalBytes": 2484242432,
- "fanoutMean": 5.288299560546875,
- "recvTokensMax": 32768,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 77672448,
+ "combineLogicalBytes": 77672448,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 723,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -22106,47 +21289,48 @@
]
},
{
- "id": "cx-6ace94e5",
- "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8",
- "colorKey": "b300_39a5906c",
- "comparisonKey": "4191eeca9b95da96",
+ "id": "cx-cb8753e8",
+ "identity": "gb300|deepep|v1|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||6d507ec2ec8998f",
+ "colorKey": "gb300_b97bfb88",
+ "comparisonKey": "e0f3959bcbc3fc9a",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:50:47.306052+00:00",
+ "generatedAt": "2026-06-29T13:57:22.452311+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_16",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
+ "phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "runtime-visible-v1",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · hotspot-single+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
+ "label": "GB300 EP8 · deepep · bf16",
+ "model": "Kimi-K2",
"shape": {
"hidden": 7168,
"topk": 8,
- "experts": 288,
- "routing": "hotspot-single",
- "routingLabel": "hotspot-single+eplb",
+ "experts": 384,
+ "routing": "uniform",
+ "routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
- "eplbEnabled": true,
+ "eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -22154,244 +21338,318 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "29ae5ace13636f8",
- "workloadId": "set:6:b952d4a43d688b50",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 1.8466796875,
- "eplbImbalanceAfter": 1.0002700343276514,
- "backendVersion": "1.2.1",
+ "traceSignature": "6d507ec2ec8998f",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": null,
+ "eplbImbalanceAfter": null,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285668831",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285668831",
- "createdAt": "2026-06-27T09:50:47.306052+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 1,
+ "globalTokens": 8,
"dispatch": {
- "p50": 95.74399888515472,
- "p90": 99.84000027179718,
- "p95": 101.31199657917023,
- "p99": 107.42399841547012
+ "p50": 106.59199953079224,
+ "p90": 121.15199863910675,
+ "p95": 125.47199428081512,
+ "p99": 157.3439985513687
},
"combine": {
- "p50": 115.26399850845337,
- "p90": 116.35199934244156,
- "p95": 117.5680011510849,
- "p99": 131.77600502967834
+ "p50": 82.91199803352356,
+ "p90": 87.26400136947632,
+ "p95": 92.3520028591156,
+ "p99": 99.58399832248688
},
"roundtrip": {
- "p50": 194.14399564266205,
- "p90": 199.52000677585602,
- "p95": 200.54399967193604,
- "p99": 206.68800175189972
+ "p50": 162.9440039396286,
+ "p90": 173.7920045852661,
+ "p95": 178.0800074338913,
+ "p99": 187.23200261592865
},
"isolatedSum": {
- "p50": 211.0079973936081,
- "p90": 216.19199961423874,
- "p95": 218.87999773025513,
- "p99": 239.20000344514847
+ "p50": 189.5039975643158,
+ "p90": 208.41600000858307,
+ "p95": 217.82399713993073,
+ "p99": 256.9279968738556
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77701120,
- "combineLogicalBytes": 77701120,
- "fanoutMean": 5.29296875,
- "recvTokensMax": 697,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 602112,
+ "combineLogicalBytes": 602112,
+ "fanoutMean": 5.25,
+ "recvTokensMax": 8,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 256,
- "globalTokens": 2048,
+ "tokensPerRank": 2,
+ "globalTokens": 16,
"dispatch": {
- "p50": 134.5919966697693,
- "p90": 139.8400068283081,
- "p95": 141.34399592876434,
- "p99": 147.77599275112152
+ "p50": 106.46399855613708,
+ "p90": 119.77600306272507,
+ "p95": 126.62400305271149,
+ "p99": 169.5680022239685
},
"combine": {
- "p50": 155.87200224399567,
- "p90": 165.27999937534332,
- "p95": 170.43200135231018,
- "p99": 176.7680048942566
+ "p50": 84.60800349712372,
+ "p90": 92.70399808883667,
+ "p95": 96.83199971914291,
+ "p99": 122.27199971675873
},
"roundtrip": {
- "p50": 273.27999472618103,
- "p90": 280.5120050907135,
- "p95": 281.72799944877625,
- "p99": 288.35201263427734
+ "p50": 166.9120043516159,
+ "p90": 178.1120002269745,
+ "p95": 182.6239973306656,
+ "p99": 194.87999379634857
},
"isolatedSum": {
- "p50": 290.46399891376495,
- "p90": 305.1200062036514,
- "p95": 311.7759972810745,
- "p99": 324.5439976453781
+ "p50": 191.0720020532608,
+ "p90": 212.48000115156174,
+ "p95": 223.4560027718544,
+ "p99": 291.84000194072723
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 155187200,
- "combineLogicalBytes": 155187200,
- "fanoutMean": 5.28564453125,
- "recvTokensMax": 1372,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 1218560,
+ "combineLogicalBytes": 1218560,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 14,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 4,
+ "globalTokens": 32,
"dispatch": {
- "p50": 194.46399807929993,
- "p90": 200.83199441432953,
- "p95": 203.64800095558167,
- "p99": 213.24799954891205
+ "p50": 107.71200060844421,
+ "p90": 122.52800166606903,
+ "p95": 128.7039965391159,
+ "p99": 157.50400722026825
},
"combine": {
- "p50": 265.3760015964508,
- "p90": 274.3679881095886,
- "p95": 274.84801411628723,
- "p99": 277.75999903678894
+ "p50": 85.05599945783615,
+ "p90": 93.82399916648865,
+ "p95": 96.89600020647049,
+ "p99": 120.89599668979645
},
"roundtrip": {
- "p50": 444.19199228286743,
- "p90": 448.67199659347534,
- "p95": 450.27199387550354,
- "p99": 476.0960042476654
+ "p50": 165.40800034999847,
+ "p90": 177.15199291706085,
+ "p95": 181.92000687122345,
+ "p99": 244.57600712776184
},
"isolatedSum": {
- "p50": 459.83999967575073,
- "p90": 475.19998252391815,
- "p95": 478.4960150718689,
- "p99": 491.007998585701
+ "p50": 192.76800006628036,
+ "p90": 216.35200083255768,
+ "p95": 225.5999967455864,
+ "p99": 278.4000039100647
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 311162880,
- "combineLogicalBytes": 311162880,
- "fanoutMean": 5.299072265625,
- "recvTokensMax": 2761,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 2408448,
+ "combineLogicalBytes": 2408448,
+ "fanoutMean": 5.25,
+ "recvTokensMax": 26,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 326.6560137271881,
- "p90": 330.3999900817871,
- "p95": 331.29599690437317,
- "p99": 342.8800106048584
+ "p50": 109.02400314807892,
+ "p90": 121.18399888277054,
+ "p95": 125.56800246238708,
+ "p99": 141.31200313568115
},
"combine": {
- "p50": 461.88798546791077,
- "p90": 470.94398736953735,
- "p95": 471.45599126815796,
- "p99": 483.2639992237091
+ "p50": 89.91999924182892,
+ "p90": 96.6079980134964,
+ "p95": 98.49599748849869,
+ "p99": 108.09600353240967
},
"roundtrip": {
- "p50": 770.4960107803345,
- "p90": 775.3599882125854,
- "p95": 777.5999903678894,
- "p99": 795.9039807319641
+ "p50": 168.73599588871002,
+ "p90": 180.89599907398224,
+ "p95": 185.37600338459015,
+ "p99": 204.48000729084015
},
"isolatedSum": {
- "p50": 788.5439991950989,
- "p90": 801.3439774513245,
- "p95": 802.7519881725311,
- "p99": 826.1440098285675
+ "p50": 198.94400238990784,
+ "p90": 217.79199689626694,
+ "p95": 224.06399995088577,
+ "p99": 249.40800666809082
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 619974656,
- "combineLogicalBytes": 619974656,
- "fanoutMean": 5.279052734375,
- "recvTokensMax": 5481,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 4831232,
+ "combineLogicalBytes": 4831232,
+ "fanoutMean": 5.265625,
+ "recvTokensMax": 48,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 16,
+ "globalTokens": 128,
"dispatch": {
- "p50": 570.1760053634644,
- "p90": 577.567994594574,
- "p95": 579.5199871063232,
- "p99": 643.8400149345398
+ "p50": 107.13600367307663,
+ "p90": 119.71200257539749,
+ "p95": 124.79999661445618,
+ "p99": 148.3200043439865
},
"combine": {
- "p50": 815.8400058746338,
- "p90": 826.5600204467773,
- "p95": 827.5840282440186,
- "p99": 830.8159708976746
+ "p50": 91.80799871683121,
+ "p90": 97.05600142478943,
+ "p95": 99.74399954080582,
+ "p99": 116.19199812412262
},
"roundtrip": {
- "p50": 1370.9759712219238,
- "p90": 1381.0559511184692,
- "p95": 1383.8720321655273,
- "p99": 1396.672010421753
+ "p50": 169.5680022239685,
+ "p90": 180.09600043296814,
+ "p95": 184.64000523090363,
+ "p99": 193.24800372123718
},
"isolatedSum": {
- "p50": 1386.0160112380981,
- "p90": 1404.1280150413513,
- "p95": 1407.1040153503418,
- "p99": 1474.6559858322144
+ "p50": 198.94400238990784,
+ "p90": 216.76800400018692,
+ "p95": 224.543996155262,
+ "p99": 264.51200246810913
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1240020992,
- "combineLogicalBytes": 1240020992,
- "fanoutMean": 5.27935791015625,
- "recvTokensMax": 10883,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 9848832,
+ "combineLogicalBytes": 9848832,
+ "fanoutMean": 5.3671875,
+ "recvTokensMax": 91,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4096,
- "globalTokens": 32768,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 1064.7039413452148,
- "p90": 1068.4479475021362,
- "p95": 1071.8079805374146,
- "p99": 1093.3760404586792
+ "p50": 108.0000028014183,
+ "p90": 120.31999975442886,
+ "p95": 125.82400441169739,
+ "p99": 133.82400572299957
},
"combine": {
- "p50": 1526.2080430984497,
- "p90": 1530.56001663208,
- "p95": 1539.3919944763184,
- "p99": 1604.8959493637085
+ "p50": 95.83999961614609,
+ "p90": 100.5759984254837,
+ "p95": 106.46399855613708,
+ "p99": 133.7919980287552
},
"roundtrip": {
- "p50": 2567.7759647369385,
- "p90": 2580.415964126587,
- "p95": 2587.8400802612305,
- "p99": 2656.8961143493652
+ "p50": 178.1120002269745,
+ "p90": 189.08800184726715,
+ "p95": 193.53599846363068,
+ "p99": 213.21600675582886
},
"isolatedSum": {
- "p50": 2590.9119844436646,
- "p90": 2599.0079641342163,
- "p95": 2611.199975013733,
- "p99": 2698.2719898223877
+ "p50": 203.8400024175644,
+ "p90": 220.89599817991257,
+ "p95": 232.28800296783447,
+ "p99": 267.61600375175476
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2480414720,
- "combineLogicalBytes": 2480414720,
- "fanoutMean": 5.2801513671875,
- "recvTokensMax": 21702,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 19496960,
+ "combineLogicalBytes": 19496960,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 178,
+ "stragglerRank": 3,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 64,
+ "globalTokens": 512,
+ "dispatch": {
+ "p50": 119.35999989509583,
+ "p90": 130.5599957704544,
+ "p95": 135.3919953107834,
+ "p99": 178.81600558757782
+ },
+ "combine": {
+ "p50": 109.50399935245514,
+ "p90": 117.76000261306763,
+ "p95": 120.60800194740295,
+ "p99": 131.67999684810638
+ },
+ "roundtrip": {
+ "p50": 197.40800559520721,
+ "p90": 206.84799551963806,
+ "p95": 210.81599593162537,
+ "p99": 228.15999388694763
+ },
+ "isolatedSum": {
+ "p50": 228.86399924755096,
+ "p90": 248.31999838352203,
+ "p95": 255.99999725818634,
+ "p99": 310.4960024356842
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 38836224,
+ "combineLogicalBytes": 38836224,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 372,
+ "stragglerRank": 3,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
+ "dispatch": {
+ "p50": 134.8160058259964,
+ "p90": 146.7839926481247,
+ "p95": 156.8640023469925,
+ "p99": 189.5039975643158
+ },
+ "combine": {
+ "p50": 131.32800161838531,
+ "p90": 136.1279934644699,
+ "p95": 141.02399349212646,
+ "p99": 171.32799327373505
+ },
+ "roundtrip": {
+ "p50": 233.95200073719025,
+ "p90": 242.97599494457245,
+ "p95": 247.3279982805252,
+ "p99": 270.52798867225647
+ },
+ "isolatedSum": {
+ "p50": 266.1440074443817,
+ "p90": 282.9119861125946,
+ "p95": 297.88799583911896,
+ "p99": 360.83199083805084
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 77514752,
+ "combineLogicalBytes": 77514752,
+ "fanoutMean": 5.2802734375,
+ "recvTokensMax": 707,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -22399,47 +21657,48 @@
]
},
{
- "id": "cx-f0a8ca82",
- "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d",
- "colorKey": "b300_e3d449ce",
- "comparisonKey": "5a2fc26356c2c7bc",
+ "id": "cx-ea3485e1",
+ "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||af0b2d2a9119979",
+ "colorKey": "gb300_d4c8afb8",
+ "comparisonKey": "947bc78137c317bf",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:47:59.202782+00:00",
+ "generatedAt": "2026-06-29T13:41:20.865867+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_10",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
+ "phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · uniform+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
+ "label": "GB300 EP8 · deepep · bf16 · balanced",
+ "model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
- "experts": 288,
- "routing": "uniform",
- "routingLabel": "uniform+eplb",
+ "experts": 256,
+ "routing": "balanced",
+ "routingLabel": "balanced",
"routingStep": 0,
"unevenTokens": "none",
- "eplbEnabled": true,
+ "eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -22447,244 +21706,170 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "2225dbbdab9bf2d",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 1.006072998046875,
- "eplbImbalanceAfter": 1.0000152587890625,
- "backendVersion": "1.2.1",
+ "traceSignature": "af0b2d2a9119979",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": null,
+ "eplbImbalanceAfter": null,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285607618",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285607618",
- "createdAt": "2026-06-27T09:47:59.202782+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 94.46399658918381,
- "p90": 100.35199671983719,
- "p95": 103.2319962978363,
- "p99": 128.4160017967224
- },
- "combine": {
- "p50": 115.03999680280685,
- "p90": 115.80800265073776,
- "p95": 116.7680025100708,
- "p99": 120.99199742078781
- },
- "roundtrip": {
- "p50": 193.4400051832199,
- "p90": 200.1280039548874,
- "p95": 201.9840031862259,
- "p99": 223.1999933719635
- },
- "isolatedSum": {
- "p50": 209.50399339199066,
- "p90": 216.15999937057495,
- "p95": 219.9999988079071,
- "p99": 249.40799921751022
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77041664,
- "combineLogicalBytes": 77041664,
- "fanoutMean": 5.248046875,
- "recvTokensMax": 686,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 135.3919953107834,
- "p90": 139.80799913406372,
- "p95": 141.4719969034195,
- "p99": 152.6080071926117
- },
- "combine": {
- "p50": 153.9199948310852,
- "p90": 163.7440025806427,
- "p95": 164.22399878501892,
- "p99": 176.67199671268463
- },
- "roundtrip": {
- "p50": 270.4319953918457,
- "p90": 275.4560112953186,
- "p95": 277.47198939323425,
- "p99": 282.4000120162964
- },
- "isolatedSum": {
- "p50": 289.3119901418686,
- "p90": 303.5520017147064,
- "p95": 305.6959956884384,
- "p99": 329.2800039052963
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 154542080,
- "combineLogicalBytes": 154542080,
- "fanoutMean": 5.263671875,
- "recvTokensMax": 1365,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 1,
+ "globalTokens": 8,
"dispatch": {
- "p50": 195.0719952583313,
- "p90": 202.91200280189514,
- "p95": 204.76800203323364,
- "p99": 211.5519940853119
+ "p50": 93.28000247478485,
+ "p90": 106.9440022110939,
+ "p95": 113.21599781513214,
+ "p99": 137.43999600410461
},
"combine": {
- "p50": 273.75999093055725,
- "p90": 275.4560112953186,
- "p95": 276.70401334762573,
- "p99": 286.8480086326599
+ "p50": 82.56000280380249,
+ "p90": 86.81599795818329,
+ "p95": 91.00800007581711,
+ "p99": 94.55999732017517
},
"roundtrip": {
- "p50": 438.33601474761963,
- "p90": 447.6799964904785,
- "p95": 457.2800099849701,
- "p99": 516.0959959030151
+ "p50": 152.12799608707428,
+ "p90": 164.73600268363953,
+ "p95": 168.12799870967865,
+ "p99": 181.34400248527527
},
"isolatedSum": {
- "p50": 468.83198618888855,
- "p90": 478.36801409721375,
- "p95": 481.4720153808594,
- "p99": 498.4000027179718
+ "p50": 175.84000527858734,
+ "p90": 193.7600001692772,
+ "p95": 204.22399789094925,
+ "p99": 231.99999332427979
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 310589440,
- "combineLogicalBytes": 310589440,
- "fanoutMean": 5.289306640625,
- "recvTokensMax": 2746,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 917504,
+ "combineLogicalBytes": 917504,
+ "fanoutMean": 8,
+ "recvTokensMax": 8,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 325.408011674881,
- "p90": 328.99200916290283,
- "p95": 330.2080035209656,
- "p99": 342.0160114765167
+ "p50": 96.12800180912018,
+ "p90": 108.70400071144104,
+ "p95": 112.2559979557991,
+ "p99": 135.68000495433807
},
"combine": {
- "p50": 459.48800444602966,
- "p90": 470.46399116516113,
- "p95": 470.94398736953735,
- "p99": 482.87999629974365
+ "p50": 85.95199882984161,
+ "p90": 93.88799965381622,
+ "p95": 95.51999717950821,
+ "p99": 106.46399855613708
},
"roundtrip": {
- "p50": 764.959990978241,
- "p90": 773.792028427124,
- "p95": 783.456027507782,
- "p99": 817.8880214691162
+ "p50": 158.59200060367584,
+ "p90": 168.16000640392303,
+ "p95": 171.74400389194489,
+ "p99": 180.92800676822662
},
"isolatedSum": {
- "p50": 784.8960161209106,
- "p90": 799.456000328064,
- "p95": 801.1519908905029,
- "p99": 824.8960077762604
+ "p50": 182.0800006389618,
+ "p90": 202.59200036525726,
+ "p95": 207.7759951353073,
+ "p99": 242.14400351047516
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 619171840,
- "combineLogicalBytes": 619171840,
- "fanoutMean": 5.272216796875,
- "recvTokensMax": 5467,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 7340032,
+ "combineLogicalBytes": 7340032,
+ "fanoutMean": 8,
+ "recvTokensMax": 64,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 568.9600110054016,
- "p90": 572.8960037231445,
- "p95": 575.8079886436462,
- "p99": 665.9200191497803
+ "p50": 103.4879982471466,
+ "p90": 114.3999993801117,
+ "p95": 118.46400052309036,
+ "p99": 127.80800461769104
},
"combine": {
- "p50": 814.0159845352173,
- "p90": 815.6480193138123,
- "p95": 817.8880214691162,
- "p99": 888.8959884643555
+ "p50": 93.82399916648865,
+ "p90": 98.01600128412247,
+ "p95": 103.7760004401207,
+ "p99": 111.26399785280228
},
"roundtrip": {
- "p50": 1359.7760200500488,
- "p90": 1370.0480461120605,
- "p95": 1375.8080005645752,
- "p99": 1418.239951133728
+ "p50": 169.50400173664093,
+ "p90": 178.01600694656372,
+ "p95": 182.01600015163422,
+ "p99": 187.26399540901184
},
"isolatedSum": {
- "p50": 1382.975995540619,
- "p90": 1388.5440230369568,
- "p95": 1393.6960101127625,
- "p99": 1554.8160076141357
+ "p50": 197.31199741363525,
+ "p90": 212.41600066423416,
+ "p95": 222.24000096321106,
+ "p99": 239.07200247049332
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1238945792,
- "combineLogicalBytes": 1238945792,
- "fanoutMean": 5.2747802734375,
- "recvTokensMax": 10913,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 29360128,
+ "combineLogicalBytes": 29360128,
+ "fanoutMean": 8,
+ "recvTokensMax": 256,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4096,
- "globalTokens": 32768,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 1064.2880201339722,
- "p90": 1069.823980331421,
- "p95": 1076.5119791030884,
- "p99": 1097.6639986038208
+ "p50": 138.40000331401825,
+ "p90": 148.54399859905243,
+ "p95": 152.3520052433014,
+ "p99": 162.08000481128693
},
"combine": {
- "p50": 1516.8960094451904,
- "p90": 1527.9040336608887,
- "p95": 1529.8240184783936,
- "p99": 1575.8399963378906
+ "p50": 144.76799964904785,
+ "p90": 153.3759981393814,
+ "p95": 155.2319973707199,
+ "p99": 158.55999290943146
},
"roundtrip": {
- "p50": 2567.840099334717,
- "p90": 2580.9600353240967,
- "p95": 2591.4878845214844,
- "p99": 2632.960081100464
+ "p50": 255.48800826072693,
+ "p90": 264.3199861049652,
+ "p95": 267.520010471344,
+ "p99": 273.6319899559021
},
"isolatedSum": {
- "p50": 2581.1840295791626,
- "p90": 2597.7280139923096,
- "p95": 2606.335997581482,
- "p99": 2673.5039949417114
+ "p50": 283.1680029630661,
+ "p90": 301.91999673843384,
+ "p95": 307.5840026140213,
+ "p99": 320.6399977207184
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2481747968,
- "combineLogicalBytes": 2481747968,
- "fanoutMean": 5.282989501953125,
- "recvTokensMax": 21789,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 117440512,
+ "combineLogicalBytes": 117440512,
+ "fanoutMean": 8,
+ "recvTokensMax": 1024,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -22692,47 +21877,48 @@
]
},
{
- "id": "cx-4cb883eb",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||4caecd33bedf786",
- "colorKey": "b300_8d2811e3",
- "comparisonKey": "c2361bc487e04e6e",
+ "id": "cx-8068f2a4",
+ "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||f0bc700e9998f70",
+ "colorKey": "gb300_f163949b",
+ "comparisonKey": "13efb5d3604f8176",
"schemaVersion": 3,
- "generatedAt": "2026-06-26T23:58:36.475166+00:00",
+ "generatedAt": "2026-06-29T13:43:17.120318+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_15",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
+ "phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · zipf",
+ "label": "GB300 EP8 · deepep · bf16 · balanced-rank-local",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
"experts": 256,
- "routing": "zipf",
- "routingLabel": "zipf",
+ "routing": "balanced-rank-local",
+ "routingLabel": "balanced-rank-local",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -22740,133 +21926,170 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "4caecd33bedf786",
- "workloadId": "set:3:830e36e88869e222",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "f0bc700e9998f70",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28271889990",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271889990",
- "createdAt": "2026-06-26T23:58:36.475166+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 1,
+ "globalTokens": 8,
"dispatch": {
- "p50": 103.87200117111206,
- "p90": 106.88000172376633,
- "p95": 109.3439981341362,
- "p99": 126.62400305271149
+ "p50": 110.68800091743469,
+ "p90": 150.751993060112,
+ "p95": 157.31200575828552,
+ "p99": 168.60799491405487
},
"combine": {
- "p50": 126.91199779510498,
- "p90": 128.1919926404953,
- "p95": 128.57599556446075,
- "p99": 139.615997672081
+ "p50": 71.29599899053574,
+ "p90": 105.59999942779541,
+ "p95": 115.9679964184761,
+ "p99": 147.32800424098969
},
"roundtrip": {
- "p50": 209.6640020608902,
- "p90": 213.95200490951538,
- "p95": 215.488001704216,
- "p99": 220.47999501228333
+ "p50": 144.99199390411377,
+ "p90": 185.31200289726257,
+ "p95": 193.66399943828583,
+ "p99": 216.95999801158905
},
"isolatedSum": {
- "p50": 230.78399896621704,
- "p90": 235.07199436426163,
- "p95": 237.91999369859695,
- "p99": 266.2400007247925
+ "p50": 181.98399990797043,
+ "p90": 256.3519924879074,
+ "p95": 273.2800021767616,
+ "p99": 315.93599915504456
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 49946624,
- "combineLogicalBytes": 49946624,
- "fanoutMean": 3.40234375,
- "recvTokensMax": 1022,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 114688,
+ "combineLogicalBytes": 114688,
+ "fanoutMean": 1,
+ "recvTokensMax": 4,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 204.73599433898926,
- "p90": 212.44800090789795,
- "p95": 213.98399770259857,
- "p99": 221.02400660514832
+ "p50": 109.95200276374817,
+ "p90": 151.32799744606018,
+ "p95": 158.01599621772766,
+ "p99": 171.39199376106262
},
"combine": {
- "p50": 325.28001070022583,
- "p90": 336.41600608825684,
- "p95": 336.70398592948914,
- "p99": 340.4799997806549
+ "p50": 78.91199737787247,
+ "p90": 109.31199789047241,
+ "p95": 119.64800208806992,
+ "p99": 140.1280015707016
},
"roundtrip": {
- "p50": 510.528028011322,
- "p90": 517.087996006012,
- "p95": 519.1680192947388,
- "p99": 526.4639854431152
+ "p50": 149.1840034723282,
+ "p90": 187.3600035905838,
+ "p95": 195.6160068511963,
+ "p99": 217.75999665260315
},
"isolatedSum": {
- "p50": 530.0160050392151,
- "p90": 548.8640069961548,
- "p95": 550.6879836320877,
- "p99": 561.5040063858032
+ "p50": 188.86400014162064,
+ "p90": 260.6399953365326,
+ "p95": 277.6639983057976,
+ "p99": 311.5199953317642
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 201678848,
- "combineLogicalBytes": 201678848,
- "fanoutMean": 3.4345703125,
- "recvTokensMax": 4094,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 917504,
+ "combineLogicalBytes": 917504,
+ "fanoutMean": 1,
+ "recvTokensMax": 8,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 648.1919884681702,
- "p90": 659.0080261230469,
- "p95": 662.6240015029907,
- "p99": 672.5760102272034
+ "p50": 129.98400628566742,
+ "p90": 154.27200496196747,
+ "p95": 159.39199924468994,
+ "p99": 175.9359985589981
},
"combine": {
- "p50": 1063.8400316238403,
- "p90": 1073.248028755188,
- "p95": 1073.6639499664307,
- "p99": 1096.60804271698
+ "p50": 85.40800213813782,
+ "p90": 122.17599898576736,
+ "p95": 139.80799913406372,
+ "p99": 144.67200636863708
},
"roundtrip": {
- "p50": 1698.815941810608,
- "p90": 1708.1600427627563,
- "p95": 1712.4799489974976,
- "p99": 1786.7519855499268
+ "p50": 169.72799599170685,
+ "p90": 203.87199521064758,
+ "p95": 213.76000344753265,
+ "p99": 228.5120040178299
},
"isolatedSum": {
- "p50": 1712.0320200920105,
- "p90": 1732.2560548782349,
- "p95": 1736.2879514694214,
- "p99": 1769.1840529441833
+ "p50": 215.39200842380524,
+ "p90": 276.44800394773483,
+ "p95": 299.19999837875366,
+ "p99": 320.6080049276352
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 808822784,
- "combineLogicalBytes": 808822784,
- "fanoutMean": 3.44354248046875,
- "recvTokensMax": 16380,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 3670016,
+ "combineLogicalBytes": 3670016,
+ "fanoutMean": 1,
+ "recvTokensMax": 32,
+ "stragglerRank": 2,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
+ "dispatch": {
+ "p50": 119.4240003824234,
+ "p90": 152.19199657440186,
+ "p95": 156.41599893569946,
+ "p99": 169.72799599170685
+ },
+ "combine": {
+ "p50": 90.55999666452408,
+ "p90": 122.78400361537933,
+ "p95": 137.28000223636627,
+ "p99": 153.50399911403656
+ },
+ "roundtrip": {
+ "p50": 163.71199488639832,
+ "p90": 192.25600361824036,
+ "p95": 204.48000729084015,
+ "p99": 222.04799950122833
+ },
+ "isolatedSum": {
+ "p50": 209.98399704694748,
+ "p90": 274.9760001897812,
+ "p95": 293.69600117206573,
+ "p99": 323.2319951057434
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 14680064,
+ "combineLogicalBytes": 14680064,
+ "fanoutMean": 1,
+ "recvTokensMax": 128,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -22874,47 +22097,48 @@
]
},
{
- "id": "cx-2d848061",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86",
- "colorKey": "b300_8d2811e3",
- "comparisonKey": "572a75005556e63b",
+ "id": "cx-e180de44",
+ "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||0456df9778e5c0f",
+ "colorKey": "gb300_c93e2296",
+ "comparisonKey": "657a9fa446798c99",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:48:48.610470+00:00",
+ "generatedAt": "2026-06-29T13:38:59.609788+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_06",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
+ "phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · zipf",
- "model": "DeepSeek-V3/V4",
+ "label": "GB300 EP8 · deepep · bf16 · balanced+eplb",
+ "model": "DeepSeek-V3 (EPLB physical)",
"shape": {
"hidden": 7168,
"topk": 8,
- "experts": 256,
- "routing": "zipf",
- "routingLabel": "zipf",
+ "experts": 288,
+ "routing": "balanced",
+ "routingLabel": "balanced+eplb",
"routingStep": 0,
"unevenTokens": "none",
- "eplbEnabled": false,
+ "eplbEnabled": true,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -22922,244 +22146,318 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "b5217e990b95f86",
- "workloadId": "set:6:830e36e88869e222",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "traceSignature": "0456df9778e5c0f",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": 1,
+ "eplbImbalanceAfter": 1,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285625501",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285625501",
- "createdAt": "2026-06-27T09:48:48.610470+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 1,
+ "globalTokens": 8,
"dispatch": {
- "p50": 100.19200295209885,
- "p90": 104.2879968881607,
- "p95": 107.35999792814255,
- "p99": 113.69600147008896
+ "p50": 92.0960009098053,
+ "p90": 109.98400300741196,
+ "p95": 123.74400347471237,
+ "p99": 163.80800306797028
},
"combine": {
- "p50": 118.43200027942657,
- "p90": 127.03999876976013,
- "p95": 127.51999497413635,
- "p99": 129.2479932308197
+ "p50": 71.10399752855301,
+ "p90": 78.52800190448761,
+ "p95": 83.61600339412689,
+ "p99": 118.01599711179733
},
"roundtrip": {
- "p50": 207.58399367332458,
- "p90": 212.54399418830872,
- "p95": 213.82400393486023,
- "p99": 217.0879989862442
+ "p50": 141.6960060596466,
+ "p90": 160.64000129699707,
+ "p95": 192.9280012845993,
+ "p99": 237.31200397014618
},
"isolatedSum": {
- "p50": 218.62400323152542,
- "p90": 231.32799565792084,
- "p95": 234.8799929022789,
- "p99": 242.94399470090866
+ "p50": 163.1999984383583,
+ "p90": 188.51200491189957,
+ "p95": 207.36000686883926,
+ "p99": 281.8240001797676
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 49946624,
- "combineLogicalBytes": 49946624,
- "fanoutMean": 3.40234375,
- "recvTokensMax": 1022,
+ "dispatchLogicalBytes": 229376,
+ "combineLogicalBytes": 229376,
+ "fanoutMean": 2,
+ "recvTokensMax": 3,
"stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 256,
- "globalTokens": 2048,
+ "tokensPerRank": 2,
+ "globalTokens": 16,
"dispatch": {
- "p50": 130.40000200271606,
- "p90": 137.40800321102142,
- "p95": 138.84800672531128,
- "p99": 147.93600142002106
+ "p50": 92.67199784517288,
+ "p90": 109.02400314807892,
+ "p95": 121.40800058841705,
+ "p99": 166.20799899101257
},
"combine": {
- "p50": 176.28799378871918,
- "p90": 178.3359944820404,
- "p95": 179.87200617790222,
- "p99": 189.91999328136444
+ "p50": 72.41600006818771,
+ "p90": 80.79999685287476,
+ "p95": 83.61600339412689,
+ "p99": 94.11200135946274
},
"roundtrip": {
- "p50": 294.5280075073242,
- "p90": 299.77598786354065,
- "p95": 301.56800150871277,
- "p99": 312.22400069236755
+ "p50": 145.37599682807922,
+ "p90": 161.76000237464905,
+ "p95": 179.967999458313,
+ "p99": 224.57599639892578
},
"isolatedSum": {
- "p50": 306.68799579143524,
- "p90": 315.74399769306183,
- "p95": 318.7200129032135,
- "p99": 337.8559947013855
+ "p50": 165.0879979133606,
+ "p90": 189.82400000095367,
+ "p95": 205.02400398254395,
+ "p99": 260.3200003504753
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 100509696,
- "combineLogicalBytes": 100509696,
- "fanoutMean": 3.42333984375,
- "recvTokensMax": 2046,
+ "dispatchLogicalBytes": 458752,
+ "combineLogicalBytes": 458752,
+ "fanoutMean": 2,
+ "recvTokensMax": 6,
"stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 4,
+ "globalTokens": 32,
"dispatch": {
- "p50": 207.13600516319275,
- "p90": 211.776003241539,
- "p95": 213.24799954891205,
- "p99": 220.99199891090393
+ "p50": 92.57599711418152,
+ "p90": 105.95200210809708,
+ "p95": 110.55999994277954,
+ "p99": 130.91200590133667
},
"combine": {
- "p50": 324.8960077762604,
- "p90": 334.9440097808838,
- "p95": 335.61599254608154,
- "p99": 338.46399188041687
+ "p50": 73.27999919652939,
+ "p90": 83.45600217580795,
+ "p95": 92.12800115346909,
+ "p99": 132.64000415802002
},
"roundtrip": {
- "p50": 504.12797927856445,
- "p90": 511.03997230529785,
- "p95": 513.2480263710022,
- "p99": 517.5359845161438
+ "p50": 145.50399780273438,
+ "p90": 158.720001578331,
+ "p95": 166.59200191497803,
+ "p99": 232.16000199317932
},
"isolatedSum": {
- "p50": 532.0320129394531,
- "p90": 546.7200130224228,
- "p95": 548.8639920949936,
- "p99": 559.4559907913208
+ "p50": 165.8559963107109,
+ "p90": 189.40800428390503,
+ "p95": 202.68800109624863,
+ "p99": 263.5520100593567
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 201678848,
- "combineLogicalBytes": 201678848,
- "fanoutMean": 3.4345703125,
- "recvTokensMax": 4094,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 917504,
+ "combineLogicalBytes": 917504,
+ "fanoutMean": 2,
+ "recvTokensMax": 12,
+ "stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 347.6479947566986,
- "p90": 353.08799147605896,
- "p95": 354.8479974269867,
- "p99": 364.4160032272339
+ "p50": 95.67999839782715,
+ "p90": 114.20799791812897,
+ "p95": 131.74399733543396,
+ "p99": 174.20800030231476
},
"combine": {
- "p50": 582.751989364624,
- "p90": 592.8320288658142,
- "p95": 593.4399962425232,
- "p99": 599.7120141983032
+ "p50": 75.23199915885925,
+ "p90": 84.57600325345993,
+ "p95": 94.94400024414062,
+ "p99": 132.89600610733032
},
"roundtrip": {
- "p50": 909.4719886779785,
- "p90": 917.248010635376,
- "p95": 919.2320108413696,
- "p99": 935.0079894065857
+ "p50": 147.32800424098969,
+ "p90": 164.44799304008484,
+ "p95": 190.46400487422943,
+ "p99": 226.623997092247
},
"isolatedSum": {
- "p50": 930.3999841213226,
- "p90": 945.9200203418732,
- "p95": 948.2879936695099,
- "p99": 964.1280174255371
+ "p50": 170.9119975566864,
+ "p90": 198.7840011715889,
+ "p95": 226.68799757957458,
+ "p99": 307.1040064096451
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 405035008,
- "combineLogicalBytes": 405035008,
- "fanoutMean": 3.4488525390625,
- "recvTokensMax": 8189,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1835008,
+ "combineLogicalBytes": 1835008,
+ "fanoutMean": 2,
+ "recvTokensMax": 24,
+ "stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 16,
+ "globalTokens": 128,
+ "dispatch": {
+ "p50": 95.74399888515472,
+ "p90": 111.51999980211258,
+ "p95": 126.01600587368011,
+ "p99": 166.04800522327423
+ },
+ "combine": {
+ "p50": 78.72000336647034,
+ "p90": 85.28000116348267,
+ "p95": 94.24000233411789,
+ "p99": 134.68800485134125
+ },
+ "roundtrip": {
+ "p50": 148.0959951877594,
+ "p90": 163.35999965667725,
+ "p95": 186.27199530601501,
+ "p99": 248.09600412845612
+ },
+ "isolatedSum": {
+ "p50": 174.46400225162506,
+ "p90": 196.80000096559525,
+ "p95": 220.256008207798,
+ "p99": 300.7360100746155
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 3670016,
+ "combineLogicalBytes": 3670016,
+ "fanoutMean": 2,
+ "recvTokensMax": 48,
+ "stragglerRank": 6,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 641.152024269104,
- "p90": 652.0000100135803,
- "p95": 655.2960276603699,
- "p99": 747.6480007171631
+ "p50": 95.77599912881851,
+ "p90": 107.39199817180634,
+ "p95": 113.18399757146835,
+ "p99": 154.14400398731232
},
"combine": {
- "p50": 1062.0479583740234,
- "p90": 1072.0640420913696,
- "p95": 1072.6079940795898,
- "p99": 1096.5440273284912
+ "p50": 81.24800026416779,
+ "p90": 86.71999722719193,
+ "p95": 96.25600278377533,
+ "p99": 136.09600067138672
},
"roundtrip": {
- "p50": 1689.9199485778809,
- "p90": 1699.0079879760742,
- "p95": 1702.5599479675293,
- "p99": 1800.9920120239258
+ "p50": 149.08799529075623,
+ "p90": 160.44799983501434,
+ "p95": 163.7759953737259,
+ "p99": 207.87200331687927
},
"isolatedSum": {
- "p50": 1703.1999826431274,
- "p90": 1724.06405210495,
- "p95": 1727.9040217399597,
- "p99": 1844.1920280456543
+ "p50": 177.0239993929863,
+ "p90": 194.11199539899826,
+ "p95": 209.44000035524368,
+ "p99": 290.24000465869904
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 808822784,
- "combineLogicalBytes": 808822784,
- "fanoutMean": 3.44354248046875,
- "recvTokensMax": 16380,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 7340032,
+ "combineLogicalBytes": 7340032,
+ "fanoutMean": 2,
+ "recvTokensMax": 96,
+ "stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4096,
- "globalTokens": 32768,
+ "tokensPerRank": 64,
+ "globalTokens": 512,
"dispatch": {
- "p50": 1252.0320415496826,
- "p90": 1263.424038887024,
- "p95": 1268.7360048294067,
- "p99": 1281.2479734420776
+ "p50": 99.07200187444687,
+ "p90": 113.02399635314941,
+ "p95": 124.76799637079239,
+ "p99": 159.16800498962402
},
"combine": {
- "p50": 2043.8721179962158,
- "p90": 2046.015977859497,
- "p95": 2054.464101791382,
- "p99": 2093.503952026367
+ "p50": 82.8159973025322,
+ "p90": 89.40800279378891,
+ "p95": 94.11200135946274,
+ "p99": 130.65600395202637
},
"roundtrip": {
- "p50": 3286.976099014282,
- "p90": 3298.5599040985107,
- "p95": 3302.432060241699,
- "p99": 3373.823881149292
+ "p50": 153.50399911403656,
+ "p90": 169.76000368595123,
+ "p95": 184.25600230693817,
+ "p99": 212.64000236988068
},
"isolatedSum": {
- "p50": 3295.9041595458984,
- "p90": 3309.440016746521,
- "p95": 3323.2001066207886,
- "p99": 3374.751925468445
+ "p50": 181.88799917697906,
+ "p90": 202.43199914693832,
+ "p95": 218.87999773025513,
+ "p99": 289.8240089416504
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1619795968,
- "combineLogicalBytes": 1619795968,
- "fanoutMean": 3.4481201171875,
- "recvTokensMax": 32761,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 14680064,
+ "combineLogicalBytes": 14680064,
+ "fanoutMean": 2,
+ "recvTokensMax": 192,
+ "stragglerRank": 6,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
+ "dispatch": {
+ "p50": 104.92800176143646,
+ "p90": 116.15999788045883,
+ "p95": 120.2239990234375,
+ "p99": 151.7760008573532
+ },
+ "combine": {
+ "p50": 96.67199850082397,
+ "p90": 104.92800176143646,
+ "p95": 107.71200060844421,
+ "p99": 122.43200093507767
+ },
+ "roundtrip": {
+ "p50": 176.03200674057007,
+ "p90": 186.3040030002594,
+ "p95": 190.17599523067474,
+ "p99": 249.05599653720856
+ },
+ "isolatedSum": {
+ "p50": 201.60000026226044,
+ "p90": 221.0879996418953,
+ "p95": 227.9359996318817,
+ "p99": 274.2080017924309
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 29360128,
+ "combineLogicalBytes": 29360128,
+ "fanoutMean": 2,
+ "recvTokensMax": 384,
+ "stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -23167,47 +22465,48 @@
]
},
{
- "id": "cx-f7ec6aaf",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||3dd868cb33839a3",
- "colorKey": "b300_2e44c039",
- "comparisonKey": "b198376a27b75c7f",
+ "id": "cx-61745319",
+ "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||d0eaac3a0f0ae8c",
+ "colorKey": "gb300_440d13a2",
+ "comparisonKey": "aa2d44f964843de7",
"schemaVersion": 3,
- "generatedAt": "2026-06-26T23:58:40.218743+00:00",
+ "generatedAt": "2026-06-29T13:47:56.420171+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_11",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
+ "phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · zipf-heavy",
+ "label": "GB300 EP8 · deepep · bf16 · hotspot-single",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
"experts": 256,
- "routing": "zipf-heavy",
- "routingLabel": "zipf-heavy",
+ "routing": "hotspot-single",
+ "routingLabel": "hotspot-single",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -23215,133 +22514,170 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "3dd868cb33839a3",
- "workloadId": "set:3:1ca614e23cc66be1",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "d0eaac3a0f0ae8c",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28271897134",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271897134",
- "createdAt": "2026-06-26T23:58:40.218743+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 1,
+ "globalTokens": 8,
"dispatch": {
- "p50": 92.3520028591156,
- "p90": 95.90400010347366,
- "p95": 98.78399968147278,
- "p99": 113.34399878978729
+ "p50": 95.51999717950821,
+ "p90": 110.36799848079681,
+ "p95": 114.9120032787323,
+ "p99": 127.96799838542938
},
"combine": {
- "p50": 116.19199812412262,
- "p90": 120.2239990234375,
- "p95": 126.39999389648438,
- "p99": 127.68000364303589
+ "p50": 80.1599994301796,
+ "p90": 85.05599945783615,
+ "p95": 88.83199840784073,
+ "p99": 94.84799951314926
},
"roundtrip": {
- "p50": 194.5279985666275,
- "p90": 202.43200659751892,
- "p95": 204.22400534152985,
- "p99": 214.23999965190887
+ "p50": 149.3760049343109,
+ "p90": 161.0880047082901,
+ "p95": 165.56799411773682,
+ "p99": 176.15999281406403
},
"isolatedSum": {
- "p50": 208.54400098323822,
- "p90": 216.12799912691116,
- "p95": 225.18399357795715,
- "p99": 241.02400243282318
+ "p50": 175.6799966096878,
+ "p90": 195.42399793863297,
+ "p95": 203.74400168657303,
+ "p99": 222.81599789857864
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 22650880,
- "combineLogicalBytes": 22650880,
- "fanoutMean": 1.54296875,
- "recvTokensMax": 1024,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 602112,
+ "combineLogicalBytes": 602112,
+ "fanoutMean": 5.25,
+ "recvTokensMax": 8,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 180.09600043296814,
- "p90": 188.6720061302185,
- "p95": 190.46400487422943,
- "p99": 204.83200252056122
+ "p50": 98.4639972448349,
+ "p90": 113.47199976444244,
+ "p95": 118.75200271606445,
+ "p99": 128.31999361515045
},
"combine": {
- "p50": 302.94400453567505,
- "p90": 311.42398715019226,
- "p95": 311.67998909950256,
- "p99": 315.16799330711365
+ "p50": 84.06399935483932,
+ "p90": 92.67199784517288,
+ "p95": 95.48799693584442,
+ "p99": 107.29599744081497
},
"roundtrip": {
- "p50": 473.1520116329193,
- "p90": 481.6960096359253,
- "p95": 485.0560128688812,
- "p99": 493.696004152298
+ "p50": 157.1200042963028,
+ "p90": 169.91999745368958,
+ "p95": 174.46400225162506,
+ "p99": 187.71199882030487
},
"isolatedSum": {
- "p50": 483.0400049686432,
- "p90": 500.09599328041077,
- "p95": 502.143993973732,
- "p99": 519.9999958276749
+ "p50": 182.52799659967422,
+ "p90": 206.14399760961533,
+ "p95": 214.23999965190887,
+ "p99": 235.61599105596542
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 91521024,
- "combineLogicalBytes": 91521024,
- "fanoutMean": 1.55859375,
- "recvTokensMax": 4096,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 4859904,
+ "combineLogicalBytes": 4859904,
+ "fanoutMean": 5.296875,
+ "recvTokensMax": 64,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 570.0479745864868,
- "p90": 580.4160237312317,
- "p95": 583.7439894676208,
- "p99": 621.0560202598572
+ "p50": 101.95200145244598,
+ "p90": 114.9120032787323,
+ "p95": 119.87199634313583,
+ "p99": 123.90399724245071
},
"combine": {
- "p50": 1098.7199544906616,
- "p90": 1109.1840267181396,
- "p95": 1109.663963317871,
- "p99": 1124.4159936904907
+ "p50": 92.73599833250046,
+ "p90": 98.68799895048141,
+ "p95": 104.22399640083313,
+ "p99": 115.42399972677231
},
"roundtrip": {
- "p50": 1622.8159666061401,
- "p90": 1629.3760538101196,
- "p95": 1632.2239637374878,
- "p99": 1643.3279514312744
+ "p50": 165.40800034999847,
+ "p90": 177.69600450992584,
+ "p95": 182.14400112628937,
+ "p99": 190.11199474334717
},
"isolatedSum": {
- "p50": 1668.7679290771484,
- "p90": 1689.6000504493713,
- "p95": 1693.407952785492,
- "p99": 1745.472013950348
+ "p50": 194.68799978494644,
+ "p90": 213.60000222921371,
+ "p95": 224.09599274396896,
+ "p99": 239.32799696922302
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 368062464,
- "combineLogicalBytes": 368062464,
- "fanoutMean": 1.5670166015625,
- "recvTokensMax": 16384,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 19525632,
+ "combineLogicalBytes": 19525632,
+ "fanoutMean": 5.3203125,
+ "recvTokensMax": 256,
+ "stragglerRank": 3,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
+ "dispatch": {
+ "p50": 131.77600502967834,
+ "p90": 142.04800128936768,
+ "p95": 146.14400267601013,
+ "p99": 155.13600409030914
+ },
+ "combine": {
+ "p50": 139.3280029296875,
+ "p90": 146.33600413799286,
+ "p95": 147.96799421310425,
+ "p99": 158.1760048866272
+ },
+ "roundtrip": {
+ "p50": 245.34399807453156,
+ "p90": 254.62400913238525,
+ "p95": 258.5600018501282,
+ "p99": 269.0559923648834
+ },
+ "isolatedSum": {
+ "p50": 271.10400795936584,
+ "p90": 288.38400542736053,
+ "p95": 294.1119968891144,
+ "p99": 313.31200897693634
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 78102528,
+ "combineLogicalBytes": 78102528,
+ "fanoutMean": 5.3203125,
+ "recvTokensMax": 1024,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -23349,47 +22685,48 @@
]
},
{
- "id": "cx-3f3c8c0f",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe",
- "colorKey": "b300_2e44c039",
- "comparisonKey": "5c8a1b2520d6dc6d",
+ "id": "cx-419170bd",
+ "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||5793a02d08aaa9c",
+ "colorKey": "gb300_87f4d4ec",
+ "comparisonKey": "fd229a6aff63668c",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:50:12.421760+00:00",
+ "generatedAt": "2026-06-29T13:55:00.278129+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_04",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
+ "phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · zipf-heavy",
- "model": "DeepSeek-V3/V4",
+ "label": "GB300 EP8 · deepep · bf16 · hotspot-single+eplb",
+ "model": "DeepSeek-V3 (EPLB physical)",
"shape": {
"hidden": 7168,
"topk": 8,
- "experts": 256,
- "routing": "zipf-heavy",
- "routingLabel": "zipf-heavy",
+ "experts": 288,
+ "routing": "hotspot-single",
+ "routingLabel": "hotspot-single+eplb",
"routingStep": 0,
"unevenTokens": "none",
- "eplbEnabled": false,
+ "eplbEnabled": true,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -23397,244 +22734,318 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "bbcd1d9d8d1e4fe",
- "workloadId": "set:6:1ca614e23cc66be1",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "traceSignature": "5793a02d08aaa9c",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": 1.830078125,
+ "eplbImbalanceAfter": 1.0007595486111112,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285656632",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285656632",
- "createdAt": "2026-06-27T09:50:12.421760+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 1,
+ "globalTokens": 8,
"dispatch": {
- "p50": 93.47199648618698,
- "p90": 95.67999839782715,
- "p95": 98.65599870681763,
- "p99": 132.1599930524826
+ "p50": 98.04800152778625,
+ "p90": 109.02400314807892,
+ "p95": 113.02399635314941,
+ "p99": 145.02400159835815
},
"combine": {
- "p50": 116.83200299739838,
- "p90": 126.30400061607361,
- "p95": 126.88000500202179,
- "p99": 138.047993183136
+ "p50": 80.64000308513641,
+ "p90": 85.4400023818016,
+ "p95": 90.7519981265068,
+ "p99": 95.71199864149094
},
"roundtrip": {
- "p50": 196.51199877262115,
- "p90": 204.25599813461304,
- "p95": 207.5520008802414,
- "p99": 222.71999716758728
+ "p50": 153.43999862670898,
+ "p90": 163.7440025806427,
+ "p95": 167.13599860668182,
+ "p99": 178.6240041255951
},
"isolatedSum": {
- "p50": 210.30399948358536,
- "p90": 221.98399901390076,
- "p95": 225.53600370883942,
- "p99": 270.2079862356186
+ "p50": 178.68800461292267,
+ "p90": 194.46400552988052,
+ "p95": 203.77599447965622,
+ "p99": 240.7360002398491
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 22650880,
- "combineLogicalBytes": 22650880,
- "fanoutMean": 1.54296875,
- "recvTokensMax": 1024,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 630784,
+ "combineLogicalBytes": 630784,
+ "fanoutMean": 5.5,
+ "recvTokensMax": 7,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 256,
- "globalTokens": 2048,
+ "tokensPerRank": 2,
+ "globalTokens": 16,
"dispatch": {
- "p50": 127.6479959487915,
- "p90": 131.1040073633194,
- "p95": 133.95200669765472,
- "p99": 144.57599818706512
+ "p50": 97.59999811649323,
+ "p90": 108.5439994931221,
+ "p95": 112.5440001487732,
+ "p99": 121.11999839544296
},
"combine": {
- "p50": 174.55999553203583,
- "p90": 176.7680048942566,
- "p95": 177.279993891716,
- "p99": 179.32799458503723
+ "p50": 81.53600245714188,
+ "p90": 88.79999816417694,
+ "p95": 92.47999638319016,
+ "p99": 101.6639992594719
},
"roundtrip": {
- "p50": 283.29598903656006,
- "p90": 288.12798857688904,
- "p95": 290.1439964771271,
- "p99": 312.73600459098816
+ "p50": 154.62400019168854,
+ "p90": 164.92800414562225,
+ "p95": 168.09600591659546,
+ "p99": 176.35199427604675
},
"isolatedSum": {
- "p50": 302.20799148082733,
- "p90": 307.872012257576,
- "p95": 311.2320005893707,
- "p99": 323.90399277210236
+ "p50": 179.1360005736351,
+ "p90": 197.34399765729904,
+ "p95": 205.02399653196335,
+ "p99": 222.78399765491486
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 45688832,
- "combineLogicalBytes": 45688832,
- "fanoutMean": 1.55615234375,
- "recvTokensMax": 2048,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1175552,
+ "combineLogicalBytes": 1175552,
+ "fanoutMean": 5.125,
+ "recvTokensMax": 12,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 4,
+ "globalTokens": 32,
"dispatch": {
- "p50": 186.65599822998047,
- "p90": 191.16799533367157,
- "p95": 194.97600197792053,
- "p99": 212.44800090789795
+ "p50": 98.81599992513657,
+ "p90": 110.97600311040878,
+ "p95": 115.84000289440155,
+ "p99": 136.28800213336945
},
"combine": {
- "p50": 311.3279938697815,
- "p90": 313.24800848960876,
- "p95": 314.7520124912262,
- "p99": 326.911985874176
+ "p50": 82.17599987983704,
+ "p90": 87.87199854850769,
+ "p95": 90.36800265312195,
+ "p99": 96.00000083446503
},
"roundtrip": {
- "p50": 479.74398732185364,
- "p90": 486.7520034313202,
- "p95": 488.8960123062134,
- "p99": 497.79200553894043
+ "p50": 156.92800283432007,
+ "p90": 167.04000532627106,
+ "p95": 170.43200135231018,
+ "p99": 186.88000738620758
},
"isolatedSum": {
- "p50": 497.98399209976196,
- "p90": 504.41600382328033,
- "p95": 509.72801446914673,
- "p99": 539.359986782074
+ "p50": 180.9919998049736,
+ "p90": 198.84800165891647,
+ "p95": 206.2080055475235,
+ "p99": 232.28800296783447
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 91521024,
- "combineLogicalBytes": 91521024,
- "fanoutMean": 1.55859375,
- "recvTokensMax": 4096,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 2451456,
+ "combineLogicalBytes": 2451456,
+ "fanoutMean": 5.34375,
+ "recvTokensMax": 23,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 311.42398715019226,
- "p90": 318.62398982048035,
- "p95": 323.13600182533264,
- "p99": 337.15200424194336
+ "p50": 98.24000298976898,
+ "p90": 108.0000028014183,
+ "p95": 111.51999980211258,
+ "p99": 117.76000261306763
},
"combine": {
- "p50": 583.6480259895325,
- "p90": 594.3679809570312,
- "p95": 596.671998500824,
- "p99": 632.6079964637756
+ "p50": 83.93599838018417,
+ "p90": 91.39200299978256,
+ "p95": 93.47199648618698,
+ "p99": 103.29599678516388
},
"roundtrip": {
- "p50": 887.4239921569824,
- "p90": 891.9680118560791,
- "p95": 893.6960101127625,
- "p99": 918.4960126876831
+ "p50": 159.67999398708344,
+ "p90": 169.53599452972412,
+ "p95": 172.67200350761414,
+ "p99": 178.65599691867828
},
"isolatedSum": {
- "p50": 895.0720131397247,
- "p90": 912.9919707775116,
- "p95": 919.8080003261566,
- "p99": 969.760000705719
+ "p50": 182.17600136995316,
+ "p90": 199.39200580120087,
+ "p95": 204.99199628829956,
+ "p99": 221.0559993982315
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 183916544,
- "combineLogicalBytes": 183916544,
- "fanoutMean": 1.5660400390625,
- "recvTokensMax": 8192,
+ "dispatchLogicalBytes": 4730880,
+ "combineLogicalBytes": 4730880,
+ "fanoutMean": 5.15625,
+ "recvTokensMax": 44,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 16,
+ "globalTokens": 128,
+ "dispatch": {
+ "p50": 98.59199821949005,
+ "p90": 109.21599715948105,
+ "p95": 111.77600175142288,
+ "p99": 117.50400066375732
+ },
+ "combine": {
+ "p50": 85.82399785518646,
+ "p90": 92.73599833250046,
+ "p95": 94.01600062847137,
+ "p99": 99.04000163078308
+ },
+ "roundtrip": {
+ "p50": 162.1759980916977,
+ "p90": 171.74400389194489,
+ "p95": 175.48799514770508,
+ "p99": 185.95199286937714
+ },
+ "isolatedSum": {
+ "p50": 184.4159960746765,
+ "p90": 201.9519954919815,
+ "p95": 205.79200237989426,
+ "p99": 216.5440022945404
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 9691136,
+ "combineLogicalBytes": 9691136,
+ "fanoutMean": 5.28125,
+ "recvTokensMax": 88,
+ "stragglerRank": 5,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 568.2560205459595,
- "p90": 577.0559906959534,
- "p95": 585.6639742851257,
- "p99": 622.0160126686096
+ "p50": 103.20000350475311,
+ "p90": 112.38399893045425,
+ "p95": 115.77600240707397,
+ "p99": 126.20800733566284
},
"combine": {
- "p50": 1099.1679430007935,
- "p90": 1110.0800037384033,
- "p95": 1111.1040115356445,
- "p99": 1136.8639469146729
+ "p50": 92.76799857616425,
+ "p90": 96.79999947547913,
+ "p95": 101.34399682283401,
+ "p99": 105.24799674749374
},
"roundtrip": {
- "p50": 1613.2479906082153,
- "p90": 1620.7040548324585,
- "p95": 1624.2239475250244,
- "p99": 1674.720048904419
+ "p50": 166.97600483894348,
+ "p90": 175.55199563503265,
+ "p95": 179.36000227928162,
+ "p99": 195.77600061893463
},
"isolatedSum": {
- "p50": 1667.423963546753,
- "p90": 1687.1359944343567,
- "p95": 1696.7679858207703,
- "p99": 1758.8799595832825
+ "p50": 195.96800208091736,
+ "p90": 209.18399840593338,
+ "p95": 217.119999229908,
+ "p99": 231.45600408315659
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 368062464,
- "combineLogicalBytes": 368062464,
- "fanoutMean": 1.5670166015625,
- "recvTokensMax": 16384,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 19568640,
+ "combineLogicalBytes": 19568640,
+ "fanoutMean": 5.33203125,
+ "recvTokensMax": 179,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4096,
- "globalTokens": 32768,
+ "tokensPerRank": 64,
+ "globalTokens": 512,
"dispatch": {
- "p50": 1112.2239828109741,
- "p90": 1126.8800497055054,
- "p95": 1135.7439756393433,
- "p99": 1233.247995376587
+ "p50": 112.28799819946289,
+ "p90": 121.2799996137619,
+ "p95": 125.44000148773193,
+ "p99": 132.1280002593994
},
"combine": {
- "p50": 2068.864107131958,
- "p90": 2072.096109390259,
- "p95": 2080.4800987243652,
- "p99": 2143.2321071624756
+ "p50": 106.4319983124733,
+ "p90": 113.56800049543381,
+ "p95": 116.19199812412262,
+ "p99": 122.27199971675873
},
"roundtrip": {
- "p50": 3127.5839805603027,
- "p90": 3139.359951019287,
- "p95": 3147.6480960845947,
- "p99": 3192.70396232605
+ "p50": 189.88800048828125,
+ "p90": 199.5840072631836,
+ "p95": 203.64800095558167,
+ "p99": 214.4320011138916
},
"isolatedSum": {
- "p50": 3181.088089942932,
- "p90": 3198.976159095764,
- "p95": 3216.2240743637085,
- "p99": 3376.4801025390625
+ "p50": 218.7199965119362,
+ "p90": 234.8480001091957,
+ "p95": 241.63199961185455,
+ "p99": 254.39999997615814
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 734720000,
- "combineLogicalBytes": 734720000,
- "fanoutMean": 1.56402587890625,
- "recvTokensMax": 32768,
- "stragglerRank": 0,
+ "dispatchLogicalBytes": 38750208,
+ "combineLogicalBytes": 38750208,
+ "fanoutMean": 5.279296875,
+ "recvTokensMax": 348,
+ "stragglerRank": 5,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
+ "dispatch": {
+ "p50": 128.67200374603271,
+ "p90": 137.5039964914322,
+ "p95": 141.12000167369843,
+ "p99": 153.47200632095337
+ },
+ "combine": {
+ "p50": 127.68000364303589,
+ "p90": 131.96800649166107,
+ "p95": 133.56800377368927,
+ "p99": 139.77600634098053
+ },
+ "roundtrip": {
+ "p50": 226.1440008878708,
+ "p90": 234.0800017118454,
+ "p95": 237.7600073814392,
+ "p99": 245.15199661254883
+ },
+ "isolatedSum": {
+ "p50": 256.3520073890686,
+ "p90": 269.47200298309326,
+ "p95": 274.6880054473877,
+ "p99": 293.2480126619339
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 77342720,
+ "combineLogicalBytes": 77342720,
+ "fanoutMean": 5.2685546875,
+ "recvTokensMax": 687,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -23642,47 +23053,48 @@
]
},
{
- "id": "cx-861c4f52",
- "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb",
- "colorKey": "b300_6d2e4735",
- "comparisonKey": "e4e20084a0948dac",
+ "id": "cx-c886abc0",
+ "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||a572344820478f0",
+ "colorKey": "gb300_8b7def4e",
+ "comparisonKey": "8adbe858ea6e1f63",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:50:16.626677+00:00",
+ "generatedAt": "2026-06-29T13:36:56.578195+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_10",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
+ "phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · zipf-heavy+eplb",
+ "label": "GB300 EP8 · deepep · bf16 · uniform+eplb",
"model": "DeepSeek-V3 (EPLB physical)",
"shape": {
"hidden": 7168,
"topk": 8,
"experts": 288,
- "routing": "zipf-heavy",
- "routingLabel": "zipf-heavy+eplb",
+ "routing": "uniform",
+ "routingLabel": "uniform+eplb",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": true,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -23690,244 +23102,318 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "46855e7fa6754eb",
- "workloadId": "set:6:1ca614e23cc66be1",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 7.38995361328125,
- "eplbImbalanceAfter": 1.0000210716610862,
- "backendVersion": "1.2.1",
+ "traceSignature": "a572344820478f0",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": 1.078125,
+ "eplbImbalanceAfter": 1.00048828125,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285658973",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285658973",
- "createdAt": "2026-06-27T09:50:16.626677+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 1,
+ "globalTokens": 8,
"dispatch": {
- "p50": 94.30400282144547,
- "p90": 96.03200107812881,
- "p95": 97.88800030946732,
- "p99": 106.55999928712845
+ "p50": 95.04000097513199,
+ "p90": 109.27999764680862,
+ "p95": 115.1999980211258,
+ "p99": 145.08800208568573
},
"combine": {
- "p50": 114.75200206041336,
- "p90": 115.77600240707397,
- "p95": 116.54400080442429,
- "p99": 125.98399817943573
+ "p50": 78.36800068616867,
+ "p90": 83.52000266313553,
+ "p95": 85.66399663686752,
+ "p99": 93.56799721717834
},
"roundtrip": {
- "p50": 192.25600361824036,
- "p90": 196.1279958486557,
- "p95": 198.11199605464935,
- "p99": 216.19200706481934
+ "p50": 147.39200472831726,
+ "p90": 158.55999290943146,
+ "p95": 162.27200627326965,
+ "p99": 170.56000232696533
},
"isolatedSum": {
- "p50": 209.05600488185883,
- "p90": 211.8080034852028,
- "p95": 214.4320011138916,
- "p99": 232.54399746656418
+ "p50": 173.40800166130066,
+ "p90": 192.80000030994415,
+ "p95": 200.86399465799332,
+ "p99": 238.65599930286407
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 79206400,
- "combineLogicalBytes": 79206400,
- "fanoutMean": 5.3955078125,
- "recvTokensMax": 713,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 516096,
+ "combineLogicalBytes": 516096,
+ "fanoutMean": 4.5,
+ "recvTokensMax": 6,
+ "stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 256,
- "globalTokens": 2048,
+ "tokensPerRank": 2,
+ "globalTokens": 16,
"dispatch": {
- "p50": 132.83200562000275,
- "p90": 138.49599659442902,
- "p95": 140.6400054693222,
- "p99": 152.6080071926117
+ "p50": 97.24800288677216,
+ "p90": 111.61600053310394,
+ "p95": 117.08799749612808,
+ "p99": 130.8480054140091
},
"combine": {
- "p50": 155.20000457763672,
- "p90": 163.83999586105347,
- "p95": 164.19200599193573,
- "p99": 166.9439971446991
+ "p50": 79.74400371313095,
+ "p90": 84.3840017914772,
+ "p95": 86.2400010228157,
+ "p99": 95.67999839782715
},
"roundtrip": {
- "p50": 272.4800109863281,
- "p90": 279.87200021743774,
- "p95": 287.1040105819702,
- "p99": 306.5919876098633
+ "p50": 149.75999295711517,
+ "p90": 161.3759994506836,
+ "p95": 164.000004529953,
+ "p99": 171.48800194263458
},
"isolatedSum": {
- "p50": 288.03201019763947,
- "p90": 302.3359924554825,
- "p95": 304.83201146125793,
- "p99": 319.5520043373108
+ "p50": 176.9920065999031,
+ "p90": 196.00000232458115,
+ "p95": 203.3279985189438,
+ "p99": 226.52800381183624
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 159330304,
- "combineLogicalBytes": 159330304,
- "fanoutMean": 5.4267578125,
- "recvTokensMax": 1436,
+ "dispatchLogicalBytes": 1089536,
+ "combineLogicalBytes": 1089536,
+ "fanoutMean": 4.75,
+ "recvTokensMax": 11,
"stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 4,
+ "globalTokens": 32,
"dispatch": {
- "p50": 192.671999335289,
- "p90": 199.48799908161163,
- "p95": 201.05600357055664,
- "p99": 214.27200734615326
+ "p50": 97.37599641084671,
+ "p90": 109.24799740314484,
+ "p95": 113.92000317573547,
+ "p99": 123.58400225639343
},
"combine": {
- "p50": 274.2080092430115,
- "p90": 277.5680124759674,
- "p95": 285.95200181007385,
- "p99": 298.335999250412
+ "p50": 82.84799754619598,
+ "p90": 86.84799820184708,
+ "p95": 91.13600105047226,
+ "p99": 98.01600128412247
},
"roundtrip": {
- "p50": 444.0639913082123,
- "p90": 448.63998889923096,
- "p95": 450.9119987487793,
- "p99": 470.91200947761536
+ "p50": 154.11199629306793,
+ "p90": 165.18400609493256,
+ "p95": 168.5120016336441,
+ "p99": 175.4560023546219
},
"isolatedSum": {
- "p50": 466.8800085783005,
- "p90": 477.05601155757904,
- "p95": 487.0080053806305,
- "p99": 512.6080065965652
+ "p50": 180.2239939570427,
+ "p90": 196.0959956049919,
+ "p95": 205.05600422620773,
+ "p99": 221.6000035405159
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 319535104,
- "combineLogicalBytes": 319535104,
- "fanoutMean": 5.441650390625,
- "recvTokensMax": 2897,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 2207744,
+ "combineLogicalBytes": 2207744,
+ "fanoutMean": 4.8125,
+ "recvTokensMax": 23,
+ "stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
+ "dispatch": {
+ "p50": 96.73599898815155,
+ "p90": 109.21599715948105,
+ "p95": 113.76000195741653,
+ "p99": 125.50400197505951
+ },
+ "combine": {
+ "p50": 83.39200168848038,
+ "p90": 88.35200220346451,
+ "p95": 94.01600062847137,
+ "p99": 97.02400118112564
+ },
+ "roundtrip": {
+ "p50": 155.7759940624237,
+ "p90": 167.07199811935425,
+ "p95": 171.26399278640747,
+ "p99": 178.78399789333344
+ },
+ "isolatedSum": {
+ "p50": 180.12800067663193,
+ "p90": 197.56799936294556,
+ "p95": 207.7760025858879,
+ "p99": 222.52800315618515
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 4558848,
+ "combineLogicalBytes": 4558848,
+ "fanoutMean": 4.96875,
+ "recvTokensMax": 46,
+ "stragglerRank": 6,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 16,
+ "globalTokens": 128,
"dispatch": {
- "p50": 326.84800028800964,
- "p90": 330.6240141391754,
- "p95": 334.3679904937744,
- "p99": 394.3359851837158
+ "p50": 96.70399874448776,
+ "p90": 108.31999778747559,
+ "p95": 112.83200234174728,
+ "p99": 127.80800461769104
},
"combine": {
- "p50": 469.63199973106384,
- "p90": 471.1039960384369,
- "p95": 472.7039933204651,
- "p99": 483.13599824905396
+ "p50": 83.74399691820145,
+ "p90": 91.10400080680847,
+ "p95": 94.04800087213516,
+ "p99": 97.53599762916565
},
"roundtrip": {
- "p50": 772.4480032920837,
- "p90": 781.7919850349426,
- "p95": 785.2159738540649,
- "p99": 801.2480139732361
+ "p50": 156.99200332164764,
+ "p90": 168.89600455760956,
+ "p95": 172.992005944252,
+ "p99": 182.0800006389618
},
"isolatedSum": {
- "p50": 796.4800000190735,
- "p90": 801.7280101776123,
- "p95": 807.0719838142395,
- "p99": 877.4719834327698
+ "p50": 180.4479956626892,
+ "p90": 199.42399859428406,
+ "p95": 206.88000321388245,
+ "p99": 225.3440022468567
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 638410752,
- "combineLogicalBytes": 638410752,
- "fanoutMean": 5.43603515625,
- "recvTokensMax": 5815,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 9347072,
+ "combineLogicalBytes": 9347072,
+ "fanoutMean": 5.09375,
+ "recvTokensMax": 86,
+ "stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 581.6959738731384,
- "p90": 585.9519839286804,
- "p95": 589.792013168335,
- "p99": 671.8400120735168
+ "p50": 99.48799759149551,
+ "p90": 110.04800349473953,
+ "p95": 113.66400122642517,
+ "p99": 123.71200323104858
},
"combine": {
- "p50": 828.2240033149719,
- "p90": 838.8159871101379,
- "p95": 839.6160006523132,
- "p99": 850.8480191230774
+ "p50": 90.55999666452408,
+ "p90": 96.25600278377533,
+ "p95": 98.11200201511383,
+ "p99": 105.0880029797554
},
"roundtrip": {
- "p50": 1393.4400081634521,
- "p90": 1402.4319648742676,
- "p95": 1406.6879749298096,
- "p99": 1428.1920194625854
+ "p50": 164.38399255275726,
+ "p90": 173.40800166130066,
+ "p95": 176.67199671268463,
+ "p99": 183.9040070772171
},
"isolatedSum": {
- "p50": 1409.9199771881104,
- "p90": 1424.7679710388184,
- "p95": 1429.4080138206482,
- "p99": 1522.6880311965942
+ "p50": 190.0479942560196,
+ "p90": 206.30400627851486,
+ "p95": 211.776003241539,
+ "p99": 228.80000621080399
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1275144192,
- "combineLogicalBytes": 1275144192,
- "fanoutMean": 5.42889404296875,
- "recvTokensMax": 11606,
+ "dispatchLogicalBytes": 18995200,
+ "combineLogicalBytes": 18995200,
+ "fanoutMean": 5.17578125,
+ "recvTokensMax": 178,
+ "stragglerRank": 6,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 64,
+ "globalTokens": 512,
+ "dispatch": {
+ "p50": 110.944002866745,
+ "p90": 118.46400052309036,
+ "p95": 121.5360015630722,
+ "p99": 127.83999741077423
+ },
+ "combine": {
+ "p50": 106.6880002617836,
+ "p90": 111.26399785280228,
+ "p95": 115.52000045776367,
+ "p99": 120.19199877977371
+ },
+ "roundtrip": {
+ "p50": 188.1600022315979,
+ "p90": 198.36799800395966,
+ "p95": 201.88799500465393,
+ "p99": 214.62400257587433
+ },
+ "isolatedSum": {
+ "p50": 217.6320031285286,
+ "p90": 229.72799837589264,
+ "p95": 237.05600202083588,
+ "p99": 248.03199619054794
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 38291456,
+ "combineLogicalBytes": 38291456,
+ "fanoutMean": 5.216796875,
+ "recvTokensMax": 348,
"stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4096,
- "globalTokens": 32768,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 1084.3839645385742,
- "p90": 1092.7679538726807,
- "p95": 1101.5679836273193,
- "p99": 1113.6959791183472
+ "p50": 127.00800597667694,
+ "p90": 135.68000495433807,
+ "p95": 140.6400054693222,
+ "p99": 147.48799800872803
},
"combine": {
- "p50": 1567.4560070037842,
- "p90": 1576.8959522247314,
- "p95": 1578.976035118103,
- "p99": 1629.3120384216309
+ "p50": 124.09599870443344,
+ "p90": 132.51200318336487,
+ "p95": 133.95200669765472,
+ "p99": 138.84800672531128
},
"roundtrip": {
- "p50": 2638.4639739990234,
- "p90": 2648.47993850708,
- "p95": 2653.088092803955,
- "p99": 2690.3679370880127
+ "p50": 224.2880016565323,
+ "p90": 231.9359928369522,
+ "p95": 235.29599606990814,
+ "p99": 241.18399620056152
},
"isolatedSum": {
- "p50": 2651.8399715423584,
- "p90": 2669.663906097412,
- "p95": 2680.5440187454224,
- "p99": 2743.008017539978
+ "p50": 251.10400468111038,
+ "p90": 268.19200813770294,
+ "p95": 274.59201216697693,
+ "p99": 286.3360047340393
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2546374656,
- "combineLogicalBytes": 2546374656,
- "fanoutMean": 5.420562744140625,
- "recvTokensMax": 23170,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 77113344,
+ "combineLogicalBytes": 77113344,
+ "fanoutMean": 5.2529296875,
+ "recvTokensMax": 685,
+ "stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -23935,292 +23421,367 @@
]
},
{
- "id": "cx-cae00445",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428",
- "colorKey": "b300_7ab35d34",
- "comparisonKey": "d9d28463325111a5",
+ "id": "cx-e3eecced",
+ "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||1093cd76c9cd2db",
+ "colorKey": "gb300_b3a88763",
+ "comparisonKey": "1521f576cce519c9",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:49:16.226066+00:00",
+ "generatedAt": "2026-06-29T13:40:26.333111+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_11",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
+ "phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · zipf-mild",
+ "label": "GB300 EP8 · deepep · bf16 · zipf",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
"experts": 256,
- "routing": "zipf-mild",
- "routingLabel": "zipf-mild",
+ "routing": "zipf",
+ "routingLabel": "zipf",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
"paretoEligible": false
},
"placement": {
- "kind": "packed",
- "nodes": 1,
+ "kind": "adversarial",
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "cf93f8f6b52e428",
- "workloadId": "set:6:a224603e5a1640b8",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "1093cd76c9cd2db",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285635254",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285635254",
- "createdAt": "2026-06-27T09:49:16.226066+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 1,
+ "globalTokens": 8,
"dispatch": {
- "p50": 102.78400033712387,
- "p90": 104.80000078678131,
- "p95": 105.66399991512299,
- "p99": 110.23999750614166
+ "p50": 94.40000355243683,
+ "p90": 106.39999806880951,
+ "p95": 112.64000087976456,
+ "p99": 123.00799787044525
},
"combine": {
- "p50": 126.65599584579468,
- "p90": 128.09599936008453,
- "p95": 128.89599800109863,
- "p99": 141.85599982738495
+ "p50": 81.53600245714188,
+ "p90": 85.11999994516373,
+ "p95": 91.71199798583984,
+ "p99": 96.03200107812881
},
"roundtrip": {
- "p50": 205.85599541664124,
- "p90": 213.15200626850128,
- "p95": 215.55200219154358,
- "p99": 228.15999388694763
+ "p50": 149.59999918937683,
+ "p90": 160.3199988603592,
+ "p95": 165.18400609493256,
+ "p99": 179.32799458503723
},
"isolatedSum": {
- "p50": 229.43999618291855,
- "p90": 232.89600014686584,
- "p95": 234.55999791622162,
- "p99": 252.0959973335266
+ "p50": 175.9360060095787,
+ "p90": 191.51999801397324,
+ "p95": 204.3519988656044,
+ "p99": 219.03999894857407
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 70160384,
- "combineLogicalBytes": 70160384,
- "fanoutMean": 4.779296875,
- "recvTokensMax": 987,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 444416,
+ "combineLogicalBytes": 444416,
+ "fanoutMean": 3.875,
+ "recvTokensMax": 8,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 256,
- "globalTokens": 2048,
+ "tokensPerRank": 2,
+ "globalTokens": 16,
"dispatch": {
- "p50": 139.8719996213913,
- "p90": 141.63200557231903,
- "p95": 143.23200285434723,
- "p99": 152.8320014476776
+ "p50": 95.20000219345093,
+ "p90": 107.26399719715118,
+ "p95": 111.455999314785,
+ "p99": 117.34399944543839
},
"combine": {
- "p50": 176.9919991493225,
- "p90": 186.8479996919632,
- "p95": 187.96800076961517,
- "p99": 201.05600357055664
+ "p50": 78.65600287914276,
+ "p90": 84.25600081682205,
+ "p95": 86.17600053548813,
+ "p99": 94.01600062847137
},
"roundtrip": {
- "p50": 305.5039942264557,
- "p90": 311.2640082836151,
- "p95": 312.1599853038788,
- "p99": 315.8400058746338
+ "p50": 150.7200002670288,
+ "p90": 162.08000481128693,
+ "p95": 164.92800414562225,
+ "p99": 174.97600615024567
},
"isolatedSum": {
- "p50": 316.8639987707138,
- "p90": 328.4800052642822,
- "p95": 331.2000036239624,
- "p99": 353.88800501823425
+ "p50": 173.8560050725937,
+ "p90": 191.51999801397324,
+ "p95": 197.63199985027313,
+ "p99": 211.36000007390976
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 140879872,
- "combineLogicalBytes": 140879872,
- "fanoutMean": 4.79833984375,
- "recvTokensMax": 1972,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 845824,
+ "combineLogicalBytes": 845824,
+ "fanoutMean": 3.6875,
+ "recvTokensMax": 16,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 4,
+ "globalTokens": 32,
"dispatch": {
- "p50": 213.6639952659607,
- "p90": 221.88800573349,
- "p95": 222.91199862957,
- "p99": 224.63999688625336
+ "p50": 97.37599641084671,
+ "p90": 109.76000130176544,
+ "p95": 113.8560026884079,
+ "p99": 125.5359947681427
},
"combine": {
- "p50": 326.4960050582886,
- "p90": 335.55200695991516,
- "p95": 336.8639945983887,
- "p99": 396.9919979572296
+ "p50": 81.727996468544,
+ "p90": 87.16800063848495,
+ "p95": 93.66399794816971,
+ "p99": 105.02400249242783
},
"roundtrip": {
- "p50": 522.2079753875732,
- "p90": 529.8879742622375,
- "p95": 531.4239859580994,
- "p99": 539.2640233039856
+ "p50": 152.38399803638458,
+ "p90": 164.8319959640503,
+ "p95": 168.12799870967865,
+ "p99": 175.90400576591492
},
"isolatedSum": {
- "p50": 540.1600003242493,
- "p90": 557.4400126934052,
- "p95": 559.7759932279587,
- "p99": 621.631994843483
+ "p50": 179.10399287939072,
+ "p90": 196.9280019402504,
+ "p95": 207.5200006365776,
+ "p99": 230.55999726057053
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 282333184,
- "combineLogicalBytes": 282333184,
- "fanoutMean": 4.80810546875,
- "recvTokensMax": 3936,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 1691648,
+ "combineLogicalBytes": 1691648,
+ "fanoutMean": 3.6875,
+ "recvTokensMax": 32,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 355.6160032749176,
- "p90": 362.9760146141052,
- "p95": 364.8639917373657,
- "p99": 368.3519959449768
+ "p50": 98.1760025024414,
+ "p90": 108.38399827480316,
+ "p95": 112.70400136709213,
+ "p99": 125.95200538635254
},
"combine": {
- "p50": 569.4720149040222,
- "p90": 572.7360248565674,
- "p95": 580.7039737701416,
- "p99": 594.1759943962097
+ "p50": 83.03999900817871,
+ "p90": 86.91199868917465,
+ "p95": 92.3520028591156,
+ "p99": 97.18400239944458
},
"roundtrip": {
- "p50": 920.2880263328552,
- "p90": 929.0239810943604,
- "p95": 936.7679953575134,
- "p99": 955.5839896202087
+ "p50": 155.10399639606476,
+ "p90": 167.1680063009262,
+ "p95": 170.30400037765503,
+ "p99": 176.64000391960144
},
"isolatedSum": {
- "p50": 925.0880181789398,
- "p90": 935.7120394706726,
- "p95": 945.5679655075073,
- "p99": 962.5279903411865
+ "p50": 181.21600151062012,
+ "p90": 195.2959969639778,
+ "p95": 205.05600422620773,
+ "p99": 223.13600778579712
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 566716416,
- "combineLogicalBytes": 566716416,
- "fanoutMean": 4.8255615234375,
- "recvTokensMax": 7855,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 3354624,
+ "combineLogicalBytes": 3354624,
+ "fanoutMean": 3.65625,
+ "recvTokensMax": 64,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 16,
+ "globalTokens": 128,
"dispatch": {
- "p50": 658.6880087852478,
- "p90": 667.9679751396179,
- "p95": 669.2479848861694,
- "p99": 679.6159744262695
+ "p50": 97.95200079679489,
+ "p90": 109.02400314807892,
+ "p95": 112.22399771213531,
+ "p99": 118.01599711179733
},
"combine": {
- "p50": 1048.1280088424683,
- "p90": 1052.191972732544,
- "p95": 1061.1519813537598,
- "p99": 1171.712040901184
+ "p50": 83.29600095748901,
+ "p90": 91.36000275611877,
+ "p95": 94.01600062847137,
+ "p99": 98.9760011434555
},
"roundtrip": {
- "p50": 1691.648006439209,
- "p90": 1700.1279592514038,
- "p95": 1703.5839557647705,
- "p99": 1764.7039890289307
+ "p50": 156.00000321865082,
+ "p90": 168.5120016336441,
+ "p95": 172.06400632858276,
+ "p99": 178.3359944820404
},
"isolatedSum": {
- "p50": 1706.816017627716,
- "p90": 1720.1599478721619,
- "p95": 1730.3999662399292,
- "p99": 1851.3280153274536
+ "p50": 181.2480017542839,
+ "p90": 200.3840059041977,
+ "p95": 206.2399983406067,
+ "p99": 216.99199825525284
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1132285952,
- "combineLogicalBytes": 1132285952,
- "fanoutMean": 4.8206787109375,
- "recvTokensMax": 15694,
- "stragglerRank": 0,
+ "dispatchLogicalBytes": 6537216,
+ "combineLogicalBytes": 6537216,
+ "fanoutMean": 3.5625,
+ "recvTokensMax": 127,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4096,
- "globalTokens": 32768,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 1285.375952720642,
- "p90": 1298.6559867858887,
- "p95": 1301.2160062789917,
- "p99": 1428.063988685608
+ "p50": 100.03200173377991,
+ "p90": 111.55200004577637,
+ "p95": 115.42399972677231,
+ "p99": 124.1919994354248
},
"combine": {
- "p50": 2018.496036529541,
- "p90": 2022.7839946746826,
- "p95": 2031.3599109649658,
- "p99": 2082.4639797210693
+ "p50": 85.82399785518646,
+ "p90": 94.81599926948547,
+ "p95": 96.76799923181534,
+ "p99": 106.81600123643875
},
"roundtrip": {
- "p50": 3294.048070907593,
- "p90": 3308.799982070923,
- "p95": 3315.9360885620117,
- "p99": 3368.2239055633545
+ "p50": 162.11199760437012,
+ "p90": 172.19200730323792,
+ "p95": 175.20000040531158,
+ "p99": 180.57599663734436
},
"isolatedSum": {
- "p50": 3303.871989250183,
- "p90": 3321.4399814605713,
- "p95": 3332.5759172439575,
- "p99": 3510.5279684066772
+ "p50": 185.85599958896637,
+ "p90": 206.36799931526184,
+ "p95": 212.19199895858765,
+ "p99": 231.00800067186356
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2267840512,
- "combineLogicalBytes": 2267840512,
- "fanoutMean": 4.82763671875,
- "recvTokensMax": 31357,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 12859392,
+ "combineLogicalBytes": 12859392,
+ "fanoutMean": 3.50390625,
+ "recvTokensMax": 255,
+ "stragglerRank": 3,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 64,
+ "globalTokens": 512,
+ "dispatch": {
+ "p50": 110.36799848079681,
+ "p90": 119.93599683046341,
+ "p95": 122.46400117874146,
+ "p99": 131.74399733543396
+ },
+ "combine": {
+ "p50": 103.58399897813797,
+ "p90": 108.76800119876862,
+ "p95": 110.75200140476227,
+ "p99": 119.90399658679962
+ },
+ "roundtrip": {
+ "p50": 187.29600310325623,
+ "p90": 197.4399983882904,
+ "p95": 201.88799500465393,
+ "p99": 228.19200158119202
+ },
+ "isolatedSum": {
+ "p50": 213.95199745893478,
+ "p90": 228.70399802923203,
+ "p95": 233.21600258350372,
+ "p99": 251.64799392223358
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 25145344,
+ "combineLogicalBytes": 25145344,
+ "fanoutMean": 3.42578125,
+ "recvTokensMax": 510,
+ "stragglerRank": 2,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
+ "dispatch": {
+ "p50": 128.60800325870514,
+ "p90": 138.68799805641174,
+ "p95": 141.9840008020401,
+ "p99": 149.1519957780838
+ },
+ "combine": {
+ "p50": 134.0160071849823,
+ "p90": 142.91200041770935,
+ "p95": 144.57599818706512,
+ "p99": 154.84799444675446
+ },
+ "roundtrip": {
+ "p50": 235.00800132751465,
+ "p90": 243.20000410079956,
+ "p95": 245.9840029478073,
+ "p99": 251.64800882339478
+ },
+ "isolatedSum": {
+ "p50": 262.62401044368744,
+ "p90": 281.5999984741211,
+ "p95": 286.5599989891052,
+ "p99": 303.99999022483826
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 49946624,
+ "combineLogicalBytes": 49946624,
+ "fanoutMean": 3.40234375,
+ "recvTokensMax": 1022,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -24228,47 +23789,48 @@
]
},
{
- "id": "cx-17599843",
- "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9",
- "colorKey": "b300_5e3d915a",
- "comparisonKey": "0397aa2abeee044f",
+ "id": "cx-c0b8c5b4",
+ "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||7eace9164e82cd6",
+ "colorKey": "gb300_961589b9",
+ "comparisonKey": "484727a851531c1a",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:49:19.827351+00:00",
+ "generatedAt": "2026-06-29T13:46:03.238873+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_05",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
+ "phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · zipf-mild+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
+ "label": "GB300 EP8 · deepep · bf16 · zipf-heavy",
+ "model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
- "experts": 288,
- "routing": "zipf-mild",
- "routingLabel": "zipf-mild+eplb",
+ "experts": 256,
+ "routing": "zipf-heavy",
+ "routingLabel": "zipf-heavy",
"routingStep": 0,
"unevenTokens": "none",
- "eplbEnabled": true,
+ "eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -24276,292 +23838,219 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "27ddc85ded0add9",
- "workloadId": "set:6:a224603e5a1640b8",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 2.545684814453125,
- "eplbImbalanceAfter": 1.0001495361328125,
- "backendVersion": "1.2.1",
+ "traceSignature": "7eace9164e82cd6",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": null,
+ "eplbImbalanceAfter": null,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285637742",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285637742",
- "createdAt": "2026-06-27T09:49:19.827351+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 1,
+ "globalTokens": 8,
"dispatch": {
- "p50": 94.94400024414062,
- "p90": 96.89600020647049,
- "p95": 99.16800260543823,
- "p99": 108.99200290441513
+ "p50": 91.39200299978256,
+ "p90": 108.76800119876862,
+ "p95": 117.63200163841248,
+ "p99": 187.99999356269836
},
"combine": {
- "p50": 115.4559999704361,
- "p90": 116.80000275373459,
- "p95": 117.76000261306763,
- "p99": 127.6479959487915
+ "p50": 70.23999840021133,
+ "p90": 74.94399696588516,
+ "p95": 81.11999928951263,
+ "p99": 110.46399921178818
},
"roundtrip": {
- "p50": 193.6960071325302,
- "p90": 199.2959976196289,
- "p95": 201.75999402999878,
- "p99": 233.11999440193176
+ "p50": 139.16799426078796,
+ "p90": 152.67199277877808,
+ "p95": 158.33599865436554,
+ "p99": 228.70400547981262
},
"isolatedSum": {
- "p50": 210.40000021457672,
- "p90": 213.69600296020508,
- "p95": 216.92800521850586,
- "p99": 236.63999885320663
+ "p50": 161.6320013999939,
+ "p90": 183.71199816465378,
+ "p95": 198.7520009279251,
+ "p99": 298.46399277448654
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 78159872,
- "combineLogicalBytes": 78159872,
- "fanoutMean": 5.32421875,
- "recvTokensMax": 702,
+ "dispatchLogicalBytes": 172032,
+ "combineLogicalBytes": 172032,
+ "fanoutMean": 1.5,
+ "recvTokensMax": 8,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 256,
- "globalTokens": 2048,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 132.25600123405457,
- "p90": 138.62399756908417,
- "p95": 140.06400108337402,
- "p99": 148.28799664974213
+ "p50": 96.67199850082397,
+ "p90": 112.96000331640244,
+ "p95": 138.11199367046356,
+ "p99": 177.91999876499176
},
"combine": {
- "p50": 163.4880006313324,
- "p90": 164.73600268363953,
- "p95": 165.53600132465363,
- "p99": 188.48000466823578
+ "p50": 73.88799637556076,
+ "p90": 83.23200047016144,
+ "p95": 85.37600189447403,
+ "p99": 131.9359987974167
},
"roundtrip": {
- "p50": 273.3759880065918,
- "p90": 280.89600801467896,
- "p95": 283.4239900112152,
- "p99": 295.0400114059448
+ "p50": 146.7519998550415,
+ "p90": 161.3440066576004,
+ "p95": 187.00799345970154,
+ "p99": 256.0960054397583
},
"isolatedSum": {
- "p50": 295.74400186538696,
- "p90": 303.3600002527237,
- "p95": 305.60000240802765,
- "p99": 336.7680013179779
+ "p50": 170.55999487638474,
+ "p90": 196.19200378656387,
+ "p95": 223.4879955649376,
+ "p99": 309.85599756240845
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 156563456,
- "combineLogicalBytes": 156563456,
- "fanoutMean": 5.33251953125,
- "recvTokensMax": 1393,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 1376256,
+ "combineLogicalBytes": 1376256,
+ "fanoutMean": 1.5,
+ "recvTokensMax": 64,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 193.63200664520264,
- "p90": 196.83200120925903,
- "p95": 202.55999267101288,
- "p99": 238.27199637889862
+ "p50": 100.44799745082855,
+ "p90": 114.04799669981003,
+ "p95": 135.16800105571747,
+ "p99": 194.36800479888916
},
"combine": {
- "p50": 264.384001493454,
- "p90": 274.2399871349335,
- "p95": 274.9119997024536,
- "p99": 299.6160089969635
+ "p50": 82.87999778985977,
+ "p90": 90.17600119113922,
+ "p95": 94.4959968328476,
+ "p99": 146.04799449443817
},
"roundtrip": {
- "p50": 442.78401136398315,
- "p90": 448.4800100326538,
- "p95": 453.8559913635254,
- "p99": 481.1199903488159
+ "p50": 155.87200224399567,
+ "p90": 170.59199512004852,
+ "p95": 186.17600202560425,
+ "p99": 261.4400088787079
},
"isolatedSum": {
- "p50": 458.0160081386566,
- "p90": 471.0719883441925,
- "p95": 477.4719923734665,
- "p99": 537.8880053758621
+ "p50": 183.32799524068832,
+ "p90": 204.22399789094925,
+ "p95": 229.66399788856506,
+ "p99": 340.41599929332733
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 312410112,
- "combineLogicalBytes": 312410112,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 2773,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 5533696,
+ "combineLogicalBytes": 5533696,
+ "fanoutMean": 1.5078125,
+ "recvTokensMax": 256,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 326.33599638938904,
- "p90": 329.21600341796875,
- "p95": 330.1120102405548,
- "p99": 349.08801317214966
+ "p50": 123.45600128173828,
+ "p90": 139.8719996213913,
+ "p95": 160.70400178432465,
+ "p99": 219.32800114154816
},
"combine": {
- "p50": 459.26401019096375,
- "p90": 470.43201327323914,
- "p95": 471.48799896240234,
- "p99": 483.68000984191895
+ "p50": 130.40000200271606,
+ "p90": 135.3279948234558,
+ "p95": 142.20799505710602,
+ "p99": 169.76000368595123
},
"roundtrip": {
- "p50": 768.5440182685852,
- "p90": 775.3919959068298,
- "p95": 784.3199968338013,
- "p99": 826.6239762306213
+ "p50": 225.21600127220154,
+ "p90": 238.78400027751923,
+ "p95": 248.35200607776642,
+ "p99": 308.7039887905121
},
"isolatedSum": {
- "p50": 785.6000065803528,
- "p90": 799.6480166912079,
- "p95": 801.6000092029572,
- "p99": 832.7680230140686
+ "p50": 253.85600328445435,
+ "p90": 275.1999944448471,
+ "p95": 302.91199684143066,
+ "p99": 389.0880048274994
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 622712832,
- "combineLogicalBytes": 622712832,
- "fanoutMean": 5.3023681640625,
- "recvTokensMax": 5498,
+ "dispatchLogicalBytes": 22650880,
+ "combineLogicalBytes": 22650880,
+ "fanoutMean": 1.54296875,
+ "recvTokensMax": 1024,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 565.6960010528564,
- "p90": 575.6480097770691,
- "p95": 583.6799740791321,
- "p99": 610.4320287704468
- },
- "combine": {
- "p50": 815.7439827919006,
- "p90": 827.8719782829285,
- "p95": 830.6559920310974,
- "p99": 852.6080250740051
- },
- "roundtrip": {
- "p50": 1371.8719482421875,
- "p90": 1386.7199420928955,
- "p95": 1397.7919816970825,
- "p99": 1450.4319429397583
- },
- "isolatedSum": {
- "p50": 1381.439983844757,
- "p90": 1403.5199880599976,
- "p95": 1414.3359661102295,
- "p99": 1463.040053844452
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1245038592,
- "combineLogicalBytes": 1245038592,
- "fanoutMean": 5.30072021484375,
- "recvTokensMax": 10955,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1069.599986076355,
- "p90": 1080.8639526367188,
- "p95": 1088.8639688491821,
- "p99": 1109.0879440307617
- },
- "combine": {
- "p50": 1531.3600301742554,
- "p90": 1552.8000593185425,
- "p95": 1564.2240047454834,
- "p99": 1616.8960332870483
- },
- "roundtrip": {
- "p50": 2586.0159397125244,
- "p90": 2608.6719036102295,
- "p95": 2621.151924133301,
- "p99": 2671.7441082000732
- },
- "isolatedSum": {
- "p50": 2600.9600162506104,
- "p90": 2633.6640119552612,
- "p95": 2653.0879735946655,
- "p99": 2725.98397731781
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2489460736,
- "combineLogicalBytes": 2489460736,
- "fanoutMean": 5.299407958984375,
- "recvTokensMax": 21864,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
}
]
},
{
- "id": "cx-4c124953",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86",
- "colorKey": "b300_fdf55523",
- "comparisonKey": "61f6ca66d0cc490b",
+ "id": "cx-33e9cd0d",
+ "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||39778bd75f046da",
+ "colorKey": "gb300_db9a43b5",
+ "comparisonKey": "d24055c7960098e6",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:49:50.578369+00:00",
+ "generatedAt": "2026-06-29T13:52:52.053215+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_07",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
+ "phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · zipf-moderate",
- "model": "DeepSeek-V3/V4",
+ "label": "GB300 EP8 · deepep · bf16 · zipf-heavy+eplb",
+ "model": "DeepSeek-V3 (EPLB physical)",
"shape": {
"hidden": 7168,
"topk": 8,
- "experts": 256,
- "routing": "zipf-moderate",
- "routingLabel": "zipf-moderate",
+ "experts": 288,
+ "routing": "zipf-heavy",
+ "routingLabel": "zipf-heavy+eplb",
"routingStep": 0,
"unevenTokens": "none",
- "eplbEnabled": false,
+ "eplbEnabled": true,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -24569,244 +24058,318 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "b5217e990b95f86",
- "workloadId": "set:6:6709a02c31933a9f",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "traceSignature": "39778bd75f046da",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": 7.40625,
+ "eplbImbalanceAfter": 1.0004417782738093,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285646148",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285646148",
- "createdAt": "2026-06-27T09:49:50.578369+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 1,
+ "globalTokens": 8,
"dispatch": {
- "p50": 100.73599964380264,
- "p90": 103.16800326108932,
- "p95": 105.12000322341919,
- "p99": 109.53599959611893
+ "p50": 98.49599748849869,
+ "p90": 111.64800077676773,
+ "p95": 115.03999680280685,
+ "p99": 126.91199779510498
},
"combine": {
- "p50": 126.20800733566284,
- "p90": 127.71199643611908,
- "p95": 128.25599312782288,
- "p99": 138.65600526332855
+ "p50": 76.51200145483017,
+ "p90": 84.70399677753448,
+ "p95": 86.27200126647949,
+ "p99": 92.32000261545181
},
"roundtrip": {
- "p50": 208.3200067281723,
- "p90": 212.70400285720825,
- "p95": 213.50400149822235,
- "p99": 231.04000091552734
+ "p50": 154.2080044746399,
+ "p90": 165.0560051202774,
+ "p95": 169.11999881267548,
+ "p99": 225.24799406528473
},
"isolatedSum": {
- "p50": 226.94400697946548,
- "p90": 230.8799996972084,
- "p95": 233.37599635124207,
- "p99": 248.19200485944748
+ "p50": 175.00799894332886,
+ "p90": 196.35199755430222,
+ "p95": 201.31199806928635,
+ "p99": 219.2320004105568
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 49946624,
- "combineLogicalBytes": 49946624,
- "fanoutMean": 3.40234375,
- "recvTokensMax": 1022,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 559104,
+ "combineLogicalBytes": 559104,
+ "fanoutMean": 4.875,
+ "recvTokensMax": 6,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 256,
- "globalTokens": 2048,
+ "tokensPerRank": 2,
+ "globalTokens": 16,
"dispatch": {
- "p50": 135.93600690364838,
- "p90": 139.39200341701508,
- "p95": 140.83200693130493,
- "p99": 151.7760008573532
+ "p50": 98.39999675750732,
+ "p90": 111.64800077676773,
+ "p95": 116.83200299739838,
+ "p99": 131.9040060043335
},
"combine": {
- "p50": 176.86399817466736,
- "p90": 179.07199263572693,
- "p95": 180.03199994564056,
- "p99": 189.63199853897095
+ "p50": 81.88799768686295,
+ "p90": 86.84799820184708,
+ "p95": 90.71999788284302,
+ "p99": 97.02400118112564
},
"roundtrip": {
- "p50": 297.63200879096985,
- "p90": 303.3599853515625,
- "p95": 305.63199520111084,
- "p99": 315.71200489997864
+ "p50": 156.22399747371674,
+ "p90": 167.64800250530243,
+ "p95": 170.49600183963776,
+ "p99": 186.94399297237396
},
"isolatedSum": {
- "p50": 312.80000507831573,
- "p90": 318.463996052742,
- "p95": 320.8640068769455,
- "p99": 341.40799939632416
+ "p50": 180.28799444437027,
+ "p90": 198.4959989786148,
+ "p95": 207.5520008802414,
+ "p99": 228.92800718545914
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 100509696,
- "combineLogicalBytes": 100509696,
- "fanoutMean": 3.42333984375,
- "recvTokensMax": 2046,
- "stragglerRank": 3,
+ "dispatchLogicalBytes": 1175552,
+ "combineLogicalBytes": 1175552,
+ "fanoutMean": 5.125,
+ "recvTokensMax": 12,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 4,
+ "globalTokens": 32,
"dispatch": {
- "p50": 202.4639993906021,
- "p90": 210.207998752594,
- "p95": 211.2320065498352,
- "p99": 216.76799654960632
+ "p50": 99.45599734783173,
+ "p90": 111.77600175142288,
+ "p95": 116.99199676513672,
+ "p99": 135.83999872207642
},
"combine": {
- "p50": 325.1520097255707,
- "p90": 335.07201075553894,
- "p95": 335.7760012149811,
- "p99": 359.23200845718384
+ "p50": 84.03199911117554,
+ "p90": 87.67999708652496,
+ "p95": 92.51199662685394,
+ "p99": 100.96000134944916
},
"roundtrip": {
- "p50": 506.84797763824463,
- "p90": 513.5999917984009,
- "p95": 517.7599787712097,
- "p99": 538.4640097618103
+ "p50": 160.64000129699707,
+ "p90": 170.78399658203125,
+ "p95": 175.04000663757324,
+ "p99": 184.35199558734894
},
"isolatedSum": {
- "p50": 527.6160091161728,
- "p90": 545.2800095081329,
- "p95": 547.0080077648163,
- "p99": 576.0000050067902
+ "p50": 183.48799645900726,
+ "p90": 199.45599883794785,
+ "p95": 209.50399339199066,
+ "p99": 236.80000007152557
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 201678848,
- "combineLogicalBytes": 201678848,
- "fanoutMean": 3.4345703125,
- "recvTokensMax": 4094,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 2465792,
+ "combineLogicalBytes": 2465792,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 25,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 347.55200147628784,
- "p90": 352.9599905014038,
- "p95": 354.0799915790558,
- "p99": 366.2079870700836
+ "p50": 100.35199671983719,
+ "p90": 112.92800307273865,
+ "p95": 116.41599982976913,
+ "p99": 124.95999783277512
},
"combine": {
- "p50": 582.6560258865356,
- "p90": 592.3839807510376,
- "p95": 593.4720039367676,
- "p99": 617.0560121536255
+ "p50": 85.11999994516373,
+ "p90": 90.14400094747543,
+ "p95": 94.7519987821579,
+ "p99": 106.08000308275223
},
"roundtrip": {
- "p50": 910.431981086731,
- "p90": 917.8879857063293,
- "p95": 920.0000166893005,
- "p99": 955.6159973144531
+ "p50": 162.30399906635284,
+ "p90": 172.2559928894043,
+ "p95": 175.84000527858734,
+ "p99": 182.75199830532074
},
"isolatedSum": {
- "p50": 930.2080273628235,
- "p90": 945.3439712524414,
- "p95": 947.5519955158234,
- "p99": 983.2639992237091
+ "p50": 185.47199666500092,
+ "p90": 203.07200402021408,
+ "p95": 211.16799861192703,
+ "p99": 231.04000091552734
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 405035008,
- "combineLogicalBytes": 405035008,
- "fanoutMean": 3.4488525390625,
- "recvTokensMax": 8189,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 4988928,
+ "combineLogicalBytes": 4988928,
+ "fanoutMean": 5.4375,
+ "recvTokensMax": 47,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 16,
+ "globalTokens": 128,
"dispatch": {
- "p50": 640.7679915428162,
- "p90": 647.4559903144836,
- "p95": 654.7520160675049,
- "p99": 680.7680130004883
+ "p50": 102.52799838781357,
+ "p90": 113.79200220108032,
+ "p95": 118.07999759912491,
+ "p99": 128.80000472068787
},
"combine": {
- "p50": 1063.1359815597534,
- "p90": 1072.8960037231445,
- "p95": 1073.6639499664307,
- "p99": 1096.384048461914
+ "p50": 86.11200004816055,
+ "p90": 94.30400282144547,
+ "p95": 97.08800166845322,
+ "p99": 108.25599730014801
},
"roundtrip": {
- "p50": 1693.8879489898682,
- "p90": 1702.7519941329956,
- "p95": 1707.0399522781372,
- "p99": 1791.648030281067
+ "p50": 163.2000058889389,
+ "p90": 172.83199727535248,
+ "p95": 176.256000995636,
+ "p99": 183.1039935350418
},
"isolatedSum": {
- "p50": 1703.9039731025696,
- "p90": 1720.3519940376282,
- "p95": 1728.4159660339355,
- "p99": 1777.1520614624023
+ "p50": 188.63999843597412,
+ "p90": 208.0960050225258,
+ "p95": 215.16799926757812,
+ "p99": 237.05600202083588
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 808822784,
- "combineLogicalBytes": 808822784,
- "fanoutMean": 3.44354248046875,
- "recvTokensMax": 16380,
+ "dispatchLogicalBytes": 9791488,
+ "combineLogicalBytes": 9791488,
+ "fanoutMean": 5.3359375,
+ "recvTokensMax": 94,
"stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4096,
- "globalTokens": 32768,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 1251.9999742507935,
- "p90": 1263.10396194458,
- "p95": 1265.504002571106,
- "p99": 1327.9999494552612
+ "p50": 104.92800176143646,
+ "p90": 115.87200313806534,
+ "p95": 122.65600264072418,
+ "p99": 137.11999356746674
},
"combine": {
- "p50": 2043.5841083526611,
- "p90": 2046.623945236206,
- "p95": 2055.6159019470215,
- "p99": 2118.272066116333
+ "p50": 92.03200042247772,
+ "p90": 97.69599884748459,
+ "p95": 99.32799637317657,
+ "p99": 105.18400371074677
},
"roundtrip": {
- "p50": 3285.952091217041,
- "p90": 3299.0078926086426,
- "p95": 3308.896064758301,
- "p99": 3355.7119369506836
+ "p50": 168.67199540138245,
+ "p90": 179.71199750900269,
+ "p95": 183.3920031785965,
+ "p99": 199.39200580120087
},
"isolatedSum": {
- "p50": 3295.5840826034546,
- "p90": 3309.727907180786,
- "p95": 3321.1199045181274,
- "p99": 3446.2720155715942
+ "p50": 196.96000218391418,
+ "p90": 213.56800198554993,
+ "p95": 221.98399901390076,
+ "p99": 242.3039972782135
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1619795968,
- "combineLogicalBytes": 1619795968,
- "fanoutMean": 3.4481201171875,
- "recvTokensMax": 32761,
- "stragglerRank": 3,
+ "dispatchLogicalBytes": 19410944,
+ "combineLogicalBytes": 19410944,
+ "fanoutMean": 5.2890625,
+ "recvTokensMax": 178,
+ "stragglerRank": 4,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 64,
+ "globalTokens": 512,
+ "dispatch": {
+ "p50": 112.73600161075592,
+ "p90": 122.04799801111221,
+ "p95": 127.26399302482605,
+ "p99": 146.40000462532043
+ },
+ "combine": {
+ "p50": 107.19999670982361,
+ "p90": 111.80800199508667,
+ "p95": 114.81600254774094,
+ "p99": 120.95999717712402
+ },
+ "roundtrip": {
+ "p50": 189.79200720787048,
+ "p90": 197.56799936294556,
+ "p95": 200.95999538898468,
+ "p99": 207.8399956226349
+ },
+ "isolatedSum": {
+ "p50": 219.93599832057953,
+ "p90": 233.85600000619888,
+ "p95": 242.079995572567,
+ "p99": 267.36000180244446
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 38678528,
+ "combineLogicalBytes": 38678528,
+ "fanoutMean": 5.26953125,
+ "recvTokensMax": 360,
+ "stragglerRank": 4,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
+ "dispatch": {
+ "p50": 129.5360028743744,
+ "p90": 137.7280056476593,
+ "p95": 141.15199446678162,
+ "p99": 153.3759981393814
+ },
+ "combine": {
+ "p50": 124.25599992275238,
+ "p90": 132.1599930524826,
+ "p95": 134.14399325847626,
+ "p99": 137.63199746608734
+ },
+ "roundtrip": {
+ "p50": 226.55999660491943,
+ "p90": 236.25600337982178,
+ "p95": 239.3600046634674,
+ "p99": 247.3279982805252
+ },
+ "isolatedSum": {
+ "p50": 253.79200279712677,
+ "p90": 269.8879987001419,
+ "p95": 275.2959877252579,
+ "p99": 291.00799560546875
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 77285376,
+ "combineLogicalBytes": 77285376,
+ "fanoutMean": 5.2646484375,
+ "recvTokensMax": 704,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -24814,47 +24377,48 @@
]
},
{
- "id": "cx-5c56d46f",
- "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39",
- "colorKey": "b300_4eade0db",
- "comparisonKey": "0fc5df79c3e0429b",
+ "id": "cx-f55a7c17",
+ "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||a3b13bb200bb717",
+ "colorKey": "gb300_15a35db4",
+ "comparisonKey": "2d8b83ad658760e4",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:49:49.297184+00:00",
+ "generatedAt": "2026-06-29T13:43:00.976664+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_08",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
+ "phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · zipf-moderate+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
+ "label": "GB300 EP8 · deepep · bf16 · zipf-mild",
+ "model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
- "experts": 288,
- "routing": "zipf-moderate",
- "routingLabel": "zipf-moderate+eplb",
+ "experts": 256,
+ "routing": "zipf-mild",
+ "routingLabel": "zipf-mild",
"routingStep": 0,
"unevenTokens": "none",
- "eplbEnabled": true,
+ "eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -24862,244 +24426,318 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "2b57a75d27f5b39",
- "workloadId": "set:6:6709a02c31933a9f",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 4.895263671875,
- "eplbImbalanceAfter": 1.0000902811686199,
- "backendVersion": "1.2.1",
+ "traceSignature": "a3b13bb200bb717",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": null,
+ "eplbImbalanceAfter": null,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285648797",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285648797",
- "createdAt": "2026-06-27T09:49:49.297184+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 1,
+ "globalTokens": 8,
"dispatch": {
- "p50": 95.0080007314682,
- "p90": 97.59999811649323,
- "p95": 99.61599856615067,
- "p99": 111.16799712181091
+ "p50": 90.04800021648407,
+ "p90": 101.40799731016159,
+ "p95": 105.24799674749374,
+ "p99": 114.14399743080139
},
"combine": {
- "p50": 115.29599875211716,
- "p90": 116.95999652147293,
- "p95": 118.8800036907196,
- "p99": 139.52000439167023
+ "p50": 79.48800176382065,
+ "p90": 83.77599716186523,
+ "p95": 86.01599931716919,
+ "p99": 100.09600222110748
},
"roundtrip": {
- "p50": 193.24800372123718,
- "p90": 199.42399859428406,
- "p95": 200.70399343967438,
- "p99": 229.08799350261688
+ "p50": 146.464005112648,
+ "p90": 156.89599514007568,
+ "p95": 161.72799468040466,
+ "p99": 172.2240000963211
},
"isolatedSum": {
- "p50": 210.30399948358536,
- "p90": 214.55999463796616,
- "p95": 218.49600225687027,
- "p99": 250.68800151348114
+ "p50": 169.53600198030472,
+ "p90": 185.18399447202682,
+ "p95": 191.26399606466293,
+ "p99": 214.23999965190887
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77385728,
- "combineLogicalBytes": 77385728,
- "fanoutMean": 5.271484375,
- "recvTokensMax": 691,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 587776,
+ "combineLogicalBytes": 587776,
+ "fanoutMean": 5.125,
+ "recvTokensMax": 8,
+ "stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 256,
- "globalTokens": 2048,
+ "tokensPerRank": 2,
+ "globalTokens": 16,
"dispatch": {
- "p50": 137.56799697875977,
- "p90": 140.35199582576752,
- "p95": 142.2400027513504,
- "p99": 159.58400070667267
+ "p50": 91.16800129413605,
+ "p90": 101.9200012087822,
+ "p95": 106.6880002617836,
+ "p99": 114.33599889278412
},
"combine": {
- "p50": 154.7520011663437,
- "p90": 163.93600404262543,
- "p95": 164.32000696659088,
- "p99": 166.4000004529953
+ "p50": 81.7599967122078,
+ "p90": 86.46400272846222,
+ "p95": 93.02400052547455,
+ "p99": 96.38399630784988
},
"roundtrip": {
- "p50": 272.2559869289398,
- "p90": 278.01600098609924,
- "p95": 280.64000606536865,
- "p99": 293.66400837898254
+ "p50": 148.99200201034546,
+ "p90": 158.11200439929962,
+ "p95": 161.56800091266632,
+ "p99": 169.08800601959229
},
"isolatedSum": {
- "p50": 292.31999814510345,
- "p90": 304.28799986839294,
- "p95": 306.5600097179413,
- "p99": 325.98400115966797
+ "p50": 172.92799800634384,
+ "p90": 188.38400393724442,
+ "p95": 199.71200078725815,
+ "p99": 210.719995200634
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 155172864,
- "combineLogicalBytes": 155172864,
- "fanoutMean": 5.28515625,
- "recvTokensMax": 1378,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 1103872,
+ "combineLogicalBytes": 1103872,
+ "fanoutMean": 4.8125,
+ "recvTokensMax": 16,
+ "stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 4,
+ "globalTokens": 32,
"dispatch": {
- "p50": 193.4719979763031,
- "p90": 199.23199713230133,
- "p95": 200.54399967193604,
- "p99": 207.48800039291382
+ "p50": 92.16000139713287,
+ "p90": 104.76800054311752,
+ "p95": 109.3439981341362,
+ "p99": 124.67200309038162
},
"combine": {
- "p50": 265.79201221466064,
- "p90": 274.52799677848816,
- "p95": 274.9119997024536,
- "p99": 285.8879864215851
+ "p50": 81.91999793052673,
+ "p90": 87.67999708652496,
+ "p95": 91.90399944782257,
+ "p99": 96.3520035147667
},
"roundtrip": {
- "p50": 444.2239999771118,
- "p90": 450.5600035190582,
- "p95": 459.3920111656189,
- "p99": 474.016010761261
+ "p50": 150.59199929237366,
+ "p90": 162.59199380874634,
+ "p95": 166.33599996566772,
+ "p99": 178.6240041255951
},
"isolatedSum": {
- "p50": 459.26401019096375,
- "p90": 473.7599939107895,
- "p95": 475.45599937438965,
- "p99": 493.3759868144989
+ "p50": 174.0799993276596,
+ "p90": 192.4479976296425,
+ "p95": 201.24799758195877,
+ "p99": 221.02400660514832
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 310546432,
- "combineLogicalBytes": 310546432,
- "fanoutMean": 5.28857421875,
- "recvTokensMax": 2745,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 2250752,
+ "combineLogicalBytes": 2250752,
+ "fanoutMean": 4.90625,
+ "recvTokensMax": 31,
+ "stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 325.0240087509155,
- "p90": 329.8240005970001,
- "p95": 331.0079872608185,
- "p99": 350.14399886131287
+ "p50": 93.47199648618698,
+ "p90": 103.93600165843964,
+ "p95": 108.0000028014183,
+ "p99": 115.90400338172913
},
"combine": {
- "p50": 457.7920138835907,
- "p90": 459.4239890575409,
- "p95": 461.95200085639954,
- "p99": 473.66398572921753
+ "p50": 82.87999778985977,
+ "p90": 90.7519981265068,
+ "p95": 93.21600198745728,
+ "p99": 100.89600086212158
},
"roundtrip": {
- "p50": 760.479986667633,
- "p90": 767.7119970321655,
- "p95": 772.2240090370178,
- "p99": 781.9520235061646
+ "p50": 153.6960005760193,
+ "p90": 163.5199934244156,
+ "p95": 168.03200542926788,
+ "p99": 176.32000148296356
},
"isolatedSum": {
- "p50": 782.8160226345062,
- "p90": 789.247989654541,
- "p95": 792.959988117218,
- "p99": 823.8079845905304
+ "p50": 176.35199427604675,
+ "p90": 194.68799978494644,
+ "p95": 201.21600478887558,
+ "p99": 216.8000042438507
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 620619776,
- "combineLogicalBytes": 620619776,
- "fanoutMean": 5.2845458984375,
- "recvTokensMax": 5526,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 4472832,
+ "combineLogicalBytes": 4472832,
+ "fanoutMean": 4.875,
+ "recvTokensMax": 62,
+ "stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 16,
+ "globalTokens": 128,
"dispatch": {
- "p50": 574.9120116233826,
- "p90": 582.2719931602478,
- "p95": 583.9999914169312,
- "p99": 640.3840184211731
+ "p50": 94.30400282144547,
+ "p90": 105.47199845314026,
+ "p95": 109.02400314807892,
+ "p99": 117.0239970088005
},
"combine": {
- "p50": 830.016016960144,
- "p90": 839.9360179901123,
- "p95": 840.287983417511,
- "p99": 852.4479866027832
+ "p50": 84.44800227880478,
+ "p90": 91.45600348711014,
+ "p95": 93.28000247478485,
+ "p99": 98.88000041246414
},
"roundtrip": {
- "p50": 1387.8079652786255,
- "p90": 1396.7679738998413,
- "p95": 1398.9759683609009,
- "p99": 1455.1680088043213
+ "p50": 155.2640050649643,
+ "p90": 166.4000004529953,
+ "p95": 169.40799355506897,
+ "p99": 179.6800047159195
},
"isolatedSum": {
- "p50": 1404.9280285835266,
- "p90": 1422.20801115036,
- "p95": 1424.2879748344421,
- "p99": 1492.8320050239563
+ "p50": 178.75200510025024,
+ "p90": 196.9280019402504,
+ "p95": 202.30400562286377,
+ "p99": 215.90399742126465
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1239175168,
- "combineLogicalBytes": 1239175168,
- "fanoutMean": 5.2757568359375,
- "recvTokensMax": 11165,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 8888320,
+ "combineLogicalBytes": 8888320,
+ "fanoutMean": 4.84375,
+ "recvTokensMax": 124,
+ "stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4096,
- "globalTokens": 32768,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 1065.2480125427246,
- "p90": 1072.6079940795898,
- "p95": 1079.2319774627686,
- "p99": 1102.720022201538
+ "p50": 99.07200187444687,
+ "p90": 109.6000000834465,
+ "p95": 113.21599781513214,
+ "p99": 132.4159950017929
},
"combine": {
- "p50": 1539.5840406417847,
- "p90": 1542.464017868042,
- "p95": 1552.2559881210327,
- "p99": 1614.7840023040771
+ "p50": 93.24800223112106,
+ "p90": 97.08800166845322,
+ "p95": 101.6319990158081,
+ "p99": 108.67200046777725
},
"roundtrip": {
- "p50": 2586.3358974456787,
- "p90": 2598.720073699951,
- "p95": 2605.4399013519287,
- "p99": 2665.247917175293
+ "p50": 163.455992937088,
+ "p90": 172.12800681591034,
+ "p95": 175.00799894332886,
+ "p99": 187.16800212860107
},
"isolatedSum": {
- "p50": 2604.8320531845093,
- "p90": 2615.072011947632,
- "p95": 2631.4879655838013,
- "p99": 2717.5040245056152
+ "p50": 192.32000410556793,
+ "p90": 206.68800175189972,
+ "p95": 214.84799683094025,
+ "p99": 241.08799546957016
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2481604608,
- "combineLogicalBytes": 2481604608,
- "fanoutMean": 5.282684326171875,
- "recvTokensMax": 22165,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 17733632,
+ "combineLogicalBytes": 17733632,
+ "fanoutMean": 4.83203125,
+ "recvTokensMax": 248,
+ "stragglerRank": 6,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 64,
+ "globalTokens": 512,
+ "dispatch": {
+ "p50": 110.20799726247787,
+ "p90": 117.66400188207626,
+ "p95": 121.0239976644516,
+ "p99": 127.13600695133209
+ },
+ "combine": {
+ "p50": 105.85600137710571,
+ "p90": 110.01600325107574,
+ "p95": 115.13599753379822,
+ "p99": 120.54400146007538
+ },
+ "roundtrip": {
+ "p50": 187.83999979496002,
+ "p90": 196.31999731063843,
+ "p95": 199.48799908161163,
+ "p99": 208.639994263649
+ },
+ "isolatedSum": {
+ "p50": 216.0639986395836,
+ "p90": 227.680005133152,
+ "p95": 236.15999519824982,
+ "p99": 247.68000841140747
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 35424256,
+ "combineLogicalBytes": 35424256,
+ "fanoutMean": 4.826171875,
+ "recvTokensMax": 492,
+ "stragglerRank": 1,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
+ "dispatch": {
+ "p50": 128.51199507713318,
+ "p90": 136.99199259281158,
+ "p95": 139.42399621009827,
+ "p99": 148.28799664974213
+ },
+ "combine": {
+ "p50": 134.20799374580383,
+ "p90": 142.46399700641632,
+ "p95": 144.51199769973755,
+ "p99": 151.90400183200836
+ },
+ "roundtrip": {
+ "p50": 238.5600060224533,
+ "p90": 246.2719976902008,
+ "p95": 249.63200092315674,
+ "p99": 260.1599991321564
+ },
+ "isolatedSum": {
+ "p50": 262.719988822937,
+ "p90": 279.4559895992279,
+ "p95": 283.9359939098358,
+ "p99": 300.1919984817505
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 70160384,
+ "combineLogicalBytes": 70160384,
+ "fanoutMean": 4.779296875,
+ "recvTokensMax": 987,
+ "stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -25107,47 +24745,48 @@
]
},
{
- "id": "cx-fb4f7eef",
- "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39",
- "colorKey": "b300_f1ea991b",
- "comparisonKey": "c5288b3181a71a36",
+ "id": "cx-9d14c709",
+ "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||ab982093c4eac2b",
+ "colorKey": "gb300_46b172da",
+ "comparisonKey": "23a6c8c598f2838f",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:48:56.789691+00:00",
+ "generatedAt": "2026-06-29T13:43:59.087832+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_13",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
+ "phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 · zipf+eplb",
+ "label": "GB300 EP8 · deepep · bf16 · zipf-mild+eplb",
"model": "DeepSeek-V3 (EPLB physical)",
"shape": {
"hidden": 7168,
"topk": 8,
"experts": 288,
- "routing": "zipf",
- "routingLabel": "zipf+eplb",
+ "routing": "zipf-mild",
+ "routingLabel": "zipf-mild+eplb",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": true,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -25155,243 +24794,317 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "2b57a75d27f5b39",
- "workloadId": "set:6:830e36e88869e222",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 4.895263671875,
- "eplbImbalanceAfter": 1.0000902811686199,
- "backendVersion": "1.2.1",
+ "traceSignature": "ab982093c4eac2b",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": 2.61328125,
+ "eplbImbalanceAfter": 1.0009114583333334,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285627928",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285627928",
- "createdAt": "2026-06-27T09:48:56.789691+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 1,
+ "globalTokens": 8,
"dispatch": {
- "p50": 94.43199634552002,
- "p90": 96.73599898815155,
- "p95": 99.0080013871193,
- "p99": 112.5119999051094
+ "p50": 98.36799651384354,
+ "p90": 110.52799969911575,
+ "p95": 113.6000007390976,
+ "p99": 123.48800152540207
},
"combine": {
- "p50": 115.35999923944473,
- "p90": 116.22399836778641,
- "p95": 117.37599968910217,
- "p99": 128.4160017967224
+ "p50": 81.24800026416779,
+ "p90": 85.15200018882751,
+ "p95": 87.96799927949905,
+ "p99": 99.58399832248688
},
"roundtrip": {
- "p50": 195.71200013160706,
- "p90": 200.51200687885284,
- "p95": 201.31200551986694,
- "p99": 211.61599457263947
+ "p50": 154.88000214099884,
+ "p90": 166.33599996566772,
+ "p95": 170.9119975566864,
+ "p99": 182.43199586868286
},
"isolatedSum": {
- "p50": 209.79199558496475,
- "p90": 212.95999735593796,
- "p95": 216.38400107622147,
- "p99": 240.92800170183182
+ "p50": 179.61599677801132,
+ "p90": 195.67999988794327,
+ "p95": 201.56800001859665,
+ "p99": 223.07199984788895
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77385728,
- "combineLogicalBytes": 77385728,
- "fanoutMean": 5.271484375,
- "recvTokensMax": 691,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 602112,
+ "combineLogicalBytes": 602112,
+ "fanoutMean": 5.25,
+ "recvTokensMax": 7,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 256,
- "globalTokens": 2048,
+ "tokensPerRank": 2,
+ "globalTokens": 16,
"dispatch": {
- "p50": 137.40800321102142,
- "p90": 139.90400731563568,
- "p95": 140.83200693130493,
- "p99": 150.176003575325
- },
- "combine": {
- "p50": 153.85599434375763,
- "p90": 163.2319986820221,
- "p95": 163.7440025806427,
- "p99": 175.6799966096878
+ "p50": 101.15200281143188,
+ "p90": 114.01599645614624,
+ "p95": 119.48800086975098,
+ "p99": 142.59199798107147
+ },
+ "combine": {
+ "p50": 83.8719978928566,
+ "p90": 89.37600255012512,
+ "p95": 92.92799979448318,
+ "p99": 98.65599870681763
},
"roundtrip": {
- "p50": 272.09600806236267,
- "p90": 277.15200185775757,
- "p95": 278.4639894962311,
- "p99": 286.46400570869446
+ "p50": 158.24000537395477,
+ "p90": 171.10399901866913,
+ "p95": 174.8799979686737,
+ "p99": 182.72000551223755
},
"isolatedSum": {
- "p50": 291.26399755477905,
- "p90": 303.1360059976578,
- "p95": 304.57600951194763,
- "p99": 325.8560001850128
+ "p50": 185.02400070428848,
+ "p90": 203.39199900627136,
+ "p95": 212.41600066423416,
+ "p99": 241.2479966878891
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 155172864,
- "combineLogicalBytes": 155172864,
- "fanoutMean": 5.28515625,
- "recvTokensMax": 1378,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 1189888,
+ "combineLogicalBytes": 1189888,
+ "fanoutMean": 5.1875,
+ "recvTokensMax": 12,
+ "stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 4,
+ "globalTokens": 32,
"dispatch": {
- "p50": 193.53599846363068,
- "p90": 200.8959949016571,
- "p95": 202.2079974412918,
- "p99": 214.84799683094025
+ "p50": 99.5199978351593,
+ "p90": 112.03200370073318,
+ "p95": 116.38399958610535,
+ "p99": 130.3039938211441
},
"combine": {
- "p50": 265.1839852333069,
- "p90": 274.1760015487671,
- "p95": 274.78399872779846,
- "p99": 279.04000878334045
+ "p50": 84.03199911117554,
+ "p90": 91.16800129413605,
+ "p95": 93.91999989748001,
+ "p99": 103.84000092744827
},
"roundtrip": {
- "p50": 440.8000111579895,
- "p90": 447.7440118789673,
- "p95": 449.15199279785156,
- "p99": 459.03998613357544
+ "p50": 159.58400070667267,
+ "p90": 172.60800302028656,
+ "p95": 175.74399709701538,
+ "p99": 189.2479956150055
},
"isolatedSum": {
- "p50": 458.71998369693756,
- "p90": 475.0719964504242,
- "p95": 476.99199616909027,
- "p99": 493.8880056142807
+ "p50": 183.55199694633484,
+ "p90": 203.20000499486923,
+ "p95": 210.30399948358536,
+ "p99": 234.14399474859238
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 310546432,
- "combineLogicalBytes": 310546432,
- "fanoutMean": 5.28857421875,
- "recvTokensMax": 2745,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 2408448,
+ "combineLogicalBytes": 2408448,
+ "fanoutMean": 5.25,
+ "recvTokensMax": 23,
+ "stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 325.6959915161133,
- "p90": 330.3680121898651,
- "p95": 331.84000849723816,
- "p99": 342.72000193595886
+ "p50": 100.09600222110748,
+ "p90": 112.92800307273865,
+ "p95": 118.27199906110764,
+ "p99": 129.56799566745758
},
"combine": {
- "p50": 450.3360092639923,
- "p90": 459.3920111656189,
- "p95": 460.4479968547821,
- "p99": 472.6080000400543
+ "p50": 84.95999872684479,
+ "p90": 93.72799843549728,
+ "p95": 96.47999703884125,
+ "p99": 103.35999727249146
},
"roundtrip": {
- "p50": 759.4239711761475,
- "p90": 766.2720084190369,
- "p95": 770.3679800033569,
- "p99": 786.6560220718384
+ "p50": 162.49600052833557,
+ "p90": 172.2559928894043,
+ "p95": 175.64800381660461,
+ "p99": 183.61599743366241
},
"isolatedSum": {
- "p50": 776.0320007801056,
- "p90": 789.760023355484,
- "p95": 792.2880053520203,
- "p99": 815.3280019760132
+ "p50": 185.05600094795227,
+ "p90": 206.65600150823593,
+ "p95": 214.75199609994888,
+ "p99": 232.92799293994904
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 620619776,
- "combineLogicalBytes": 620619776,
- "fanoutMean": 5.2845458984375,
- "recvTokensMax": 5526,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 4859904,
+ "combineLogicalBytes": 4859904,
+ "fanoutMean": 5.296875,
+ "recvTokensMax": 47,
+ "stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 16,
+ "globalTokens": 128,
"dispatch": {
- "p50": 574.4640231132507,
- "p90": 581.9839835166931,
- "p95": 583.9359760284424,
- "p99": 595.9039926528931
+ "p50": 100.70399940013885,
+ "p90": 112.89600282907486,
+ "p95": 116.64000153541565,
+ "p99": 131.071999669075
},
"combine": {
- "p50": 828.9600014686584,
- "p90": 839.9680256843567,
- "p95": 840.4160141944885,
- "p99": 851.9359827041626
+ "p50": 86.04799956083298,
+ "p90": 94.71999853849411,
+ "p95": 96.38399630784988,
+ "p99": 100.99200159311295
},
"roundtrip": {
- "p50": 1387.0079517364502,
- "p90": 1396.83198928833,
- "p95": 1399.5200395584106,
- "p99": 1415.1999950408936
+ "p50": 164.99200463294983,
+ "p90": 175.77600479125977,
+ "p95": 179.71199750900269,
+ "p99": 195.51999866962433
},
"isolatedSum": {
- "p50": 1403.4240245819092,
- "p90": 1421.9520092010498,
- "p95": 1424.351990222931,
- "p99": 1447.8399753570557
+ "p50": 186.75199896097183,
+ "p90": 207.61600136756897,
+ "p95": 213.02399784326553,
+ "p99": 232.06400126218796
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1239175168,
- "combineLogicalBytes": 1239175168,
- "fanoutMean": 5.2757568359375,
- "recvTokensMax": 11165,
+ "dispatchLogicalBytes": 9605120,
+ "combineLogicalBytes": 9605120,
+ "fanoutMean": 5.234375,
+ "recvTokensMax": 93,
"stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4096,
- "globalTokens": 32768,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 1063.904047012329,
- "p90": 1069.216012954712,
- "p95": 1072.543978691101,
- "p99": 1081.1200141906738
+ "p50": 103.74400019645691,
+ "p90": 114.72000181674957,
+ "p95": 117.88800358772278,
+ "p99": 125.5359947681427
},
"combine": {
- "p50": 1530.303955078125,
- "p90": 1540.4800176620483,
- "p95": 1541.9520139694214,
- "p99": 1576.799988746643
+ "p50": 94.27200257778168,
+ "p90": 98.81599992513657,
+ "p95": 102.62399911880493,
+ "p99": 115.39199948310852
},
"roundtrip": {
- "p50": 2580.832004547119,
- "p90": 2592.2560691833496,
- "p95": 2598.8481044769287,
- "p99": 2691.8399333953857
+ "p50": 170.23999989032745,
+ "p90": 180.7360053062439,
+ "p95": 184.12800133228302,
+ "p99": 192.89599359035492
},
"isolatedSum": {
- "p50": 2594.208002090454,
- "p90": 2609.6960306167603,
- "p95": 2614.4959926605225,
- "p99": 2657.920002937317
+ "p50": 198.0160027742386,
+ "p90": 213.53600174188614,
+ "p95": 220.5120027065277,
+ "p99": 240.92799425125122
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2481604608,
- "combineLogicalBytes": 2481604608,
- "fanoutMean": 5.282684326171875,
- "recvTokensMax": 22165,
+ "dispatchLogicalBytes": 19367936,
+ "combineLogicalBytes": 19367936,
+ "fanoutMean": 5.27734375,
+ "recvTokensMax": 182,
+ "stragglerRank": 6,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 64,
+ "globalTokens": 512,
+ "dispatch": {
+ "p50": 113.27999830245972,
+ "p90": 122.01599776744843,
+ "p95": 124.60800260305405,
+ "p99": 134.5279961824417
+ },
+ "combine": {
+ "p50": 108.03200304508209,
+ "p90": 111.90400272607803,
+ "p95": 116.41599982976913,
+ "p99": 121.95199728012085
+ },
+ "roundtrip": {
+ "p50": 191.00800156593323,
+ "p90": 200.95999538898468,
+ "p95": 204.79999482631683,
+ "p99": 214.20800685882568
+ },
+ "isolatedSum": {
+ "p50": 221.3120013475418,
+ "p90": 233.92000049352646,
+ "p95": 241.02400243282318,
+ "p99": 256.47999346256256
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 38535168,
+ "combineLogicalBytes": 38535168,
+ "fanoutMean": 5.25,
+ "recvTokensMax": 358,
+ "stragglerRank": 6,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
+ "dispatch": {
+ "p50": 129.18399274349213,
+ "p90": 138.047993183136,
+ "p95": 141.4400041103363,
+ "p99": 150.04800260066986
+ },
+ "combine": {
+ "p50": 123.90399724245071,
+ "p90": 132.6719969511032,
+ "p95": 134.46399569511414,
+ "p99": 142.0159935951233
+ },
+ "roundtrip": {
+ "p50": 227.7120053768158,
+ "p90": 236.35199666023254,
+ "p95": 238.8480007648468,
+ "p99": 244.32000517845154
+ },
+ "isolatedSum": {
+ "p50": 253.08798998594284,
+ "p90": 270.7199901342392,
+ "p95": 275.90399980545044,
+ "p99": 292.06399619579315
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 76869632,
+ "combineLogicalBytes": 76869632,
+ "fanoutMean": 5.236328125,
+ "recvTokensMax": 688,
"stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
@@ -25400,878 +25113,735 @@
]
},
{
- "id": "cx-e7727ce9",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31",
- "colorKey": "b300_c1ad910f",
- "comparisonKey": "9532205a80f3d757",
+ "id": "cx-e40c9223",
+ "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||1093cd76c9cd2db",
+ "colorKey": "gb300_6e04dda3",
+ "comparisonKey": "a225bda519f2d24b",
"schemaVersion": 3,
- "generatedAt": "2026-06-26T17:38:48.516779+00:00",
+ "generatedAt": "2026-06-29T13:47:11.009762+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_15",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
+ "phase": "decode",
"mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
+ "resourceMode": "tuned",
+ "suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 (norm)",
+ "label": "GB300 EP8 · deepep · bf16 · zipf-moderate",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
"experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
+ "routing": "zipf-moderate",
+ "routingLabel": "zipf-moderate",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1824,
- "configuredUnits": 27,
- "deviceUnits": 148,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
+ "requestedFraction": null,
+ "achievedFraction": 0.1316,
+ "configuredUnits": 20,
+ "deviceUnits": 152,
+ "resourceClass": "backend-tuned",
+ "conformanceClass": "backend-default",
"fixedKernel": false,
- "paretoEligible": true
+ "paretoEligible": false
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "1093cd76c9cd2db",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28254469772",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254469772",
- "createdAt": "2026-06-26T17:38:48.516779+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 1,
+ "globalTokens": 8,
"dispatch": {
- "p50": 94.11200135946274,
- "p90": 98.9760011434555,
- "p95": 100.54399818181992,
- "p99": 116.44800007343292
+ "p50": 91.93599969148636,
+ "p90": 106.33599758148193,
+ "p95": 110.944002866745,
+ "p99": 121.0239976644516
},
"combine": {
- "p50": 115.1999980211258,
- "p90": 115.9679964184761,
- "p95": 116.89600348472595,
- "p99": 129.02399897575378
+ "p50": 77.2479996085167,
+ "p90": 83.39200168848038,
+ "p95": 85.24800091981888,
+ "p99": 94.7519987821579
},
"roundtrip": {
- "p50": 193.2159960269928,
- "p90": 198.43199849128723,
- "p95": 199.8080015182495,
- "p99": 217.50399470329285
+ "p50": 148.51200580596924,
+ "p90": 160.16000509262085,
+ "p95": 163.7440025806427,
+ "p99": 172.89599776268005
},
"isolatedSum": {
- "p50": 209.31199938058853,
- "p90": 214.9439975619316,
- "p95": 217.44000166654587,
- "p99": 245.4719990491867
+ "p50": 169.18399930000305,
+ "p90": 189.7279992699623,
+ "p95": 196.19200378656387,
+ "p99": 215.7759964466095
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 444416,
+ "combineLogicalBytes": 444416,
+ "fanoutMean": 3.875,
+ "recvTokensMax": 8,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 256,
- "globalTokens": 2048,
+ "tokensPerRank": 2,
+ "globalTokens": 16,
"dispatch": {
- "p50": 135.42400300502777,
- "p90": 138.75199854373932,
- "p95": 141.184002161026,
- "p99": 151.0079950094223
+ "p50": 92.25600212812424,
+ "p90": 105.05600273609161,
+ "p95": 109.79200154542923,
+ "p99": 123.00799787044525
},
"combine": {
- "p50": 154.59200739860535,
- "p90": 163.90399634838104,
- "p95": 164.5440012216568,
- "p99": 176.54399573802948
+ "p50": 80.19199967384338,
+ "p90": 83.96799862384796,
+ "p95": 86.33600175380707,
+ "p99": 95.74399888515472
},
"roundtrip": {
- "p50": 271.67999744415283,
- "p90": 277.6319980621338,
- "p95": 280.70399165153503,
- "p99": 291.3599908351898
+ "p50": 147.77599275112152,
+ "p90": 159.36000645160675,
+ "p95": 164.38399255275726,
+ "p99": 175.23199319839478
},
"isolatedSum": {
- "p50": 290.0160104036331,
- "p90": 302.65599489212036,
- "p95": 305.7280033826828,
- "p99": 327.5519907474518
+ "p50": 172.44800180196762,
+ "p90": 189.02400135993958,
+ "p95": 196.1280032992363,
+ "p99": 218.75199675559998
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 155889664,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 845824,
+ "combineLogicalBytes": 845824,
+ "fanoutMean": 3.6875,
+ "recvTokensMax": 16,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 4,
+ "globalTokens": 32,
"dispatch": {
- "p50": 193.24800372123718,
- "p90": 199.61600005626678,
- "p95": 200.80000162124634,
- "p99": 206.68800175189972
+ "p50": 94.33600306510925,
+ "p90": 107.2319969534874,
+ "p95": 113.47199976444244,
+ "p99": 124.7360035777092
},
"combine": {
- "p50": 265.8880054950714,
- "p90": 274.59201216697693,
- "p95": 275.2000093460083,
- "p99": 286.78399324417114
+ "p50": 80.99199831485748,
+ "p90": 84.95999872684479,
+ "p95": 89.31200206279755,
+ "p99": 120.12799829244614
},
"roundtrip": {
- "p50": 442.59199500083923,
- "p90": 448.96000623703003,
- "p95": 455.00800013542175,
- "p99": 461.40798926353455
+ "p50": 150.07999539375305,
+ "p90": 161.0880047082901,
+ "p95": 164.19200599193573,
+ "p99": 173.98400604724884
},
"isolatedSum": {
- "p50": 459.1360092163086,
- "p90": 474.2080122232437,
- "p95": 476.00001096725464,
- "p99": 493.47199499607086
+ "p50": 175.32800137996674,
+ "p90": 192.19199568033218,
+ "p95": 202.78400182724,
+ "p99": 244.86400187015533
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 312266752,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1691648,
+ "combineLogicalBytes": 1691648,
+ "fanoutMean": 3.6875,
+ "recvTokensMax": 32,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 326.2079954147339,
- "p90": 329.75998520851135,
- "p95": 331.6799998283386,
- "p99": 341.6000008583069
+ "p50": 95.74399888515472,
+ "p90": 107.35999792814255,
+ "p95": 112.44799941778183,
+ "p99": 123.96799772977829
},
"combine": {
- "p50": 457.66401290893555,
- "p90": 459.77601408958435,
- "p95": 469.760000705719,
- "p99": 473.7600088119507
+ "p50": 82.40000158548355,
+ "p90": 87.0399996638298,
+ "p95": 92.76799857616425,
+ "p99": 97.37599641084671
},
"roundtrip": {
- "p50": 762.5920176506042,
- "p90": 771.7440128326416,
- "p95": 774.2080092430115,
- "p99": 789.6320223808289
+ "p50": 151.48800611495972,
+ "p90": 164.000004529953,
+ "p95": 167.9999977350235,
+ "p99": 179.00800704956055
},
"isolatedSum": {
- "p50": 783.8720083236694,
- "p90": 789.5359992980957,
- "p95": 801.4400005340576,
- "p99": 815.3600096702576
+ "p50": 178.14400047063828,
+ "p90": 194.39999759197235,
+ "p95": 205.21599799394608,
+ "p99": 221.343994140625
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 623443968,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 3354624,
+ "combineLogicalBytes": 3354624,
+ "fanoutMean": 3.65625,
+ "recvTokensMax": 64,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 16,
+ "globalTokens": 128,
"dispatch": {
- "p50": 577.1200060844421,
- "p90": 582.5920104980469,
- "p95": 583.5520029067993,
- "p99": 591.2960171699524
+ "p50": 96.3200032711029,
+ "p90": 108.15999656915665,
+ "p95": 113.34399878978729,
+ "p99": 134.5600038766861
},
"combine": {
- "p50": 817.2799944877625,
- "p90": 828.4159898757935,
- "p95": 831.8719863891602,
- "p99": 913.4079813957214
+ "p50": 83.45600217580795,
+ "p90": 87.71199733018875,
+ "p95": 92.6079973578453,
+ "p99": 97.08800166845322
},
"roundtrip": {
- "p50": 1376.9279718399048,
- "p90": 1386.9119882583618,
- "p95": 1392.7680253982544,
- "p99": 1453.8240432739258
+ "p50": 155.61600029468536,
+ "p90": 167.04000532627106,
+ "p95": 171.36000096797943,
+ "p99": 181.69599771499634
},
"isolatedSum": {
- "p50": 1394.4000005722046,
- "p90": 1411.0080003738403,
- "p95": 1415.4239892959595,
- "p99": 1504.7039985656738
+ "p50": 179.77600544691086,
+ "p90": 195.8719938993454,
+ "p95": 205.9519961476326,
+ "p99": 231.6480055451393
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1243805696,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 6537216,
+ "combineLogicalBytes": 6537216,
+ "fanoutMean": 3.5625,
+ "recvTokensMax": 127,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4096,
- "globalTokens": 32768,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 1069.5040225982666,
- "p90": 1078.0160427093506,
- "p95": 1080.2559852600098,
- "p99": 1090.880036354065
+ "p50": 97.21600264310837,
+ "p90": 107.42399841547012,
+ "p95": 110.81600189208984,
+ "p99": 119.32799965143204
},
"combine": {
- "p50": 1528.8959741592407,
- "p90": 1540.4479503631592,
- "p95": 1542.688012123108,
- "p99": 1554.751992225647
+ "p50": 87.52000331878662,
+ "p90": 95.90400010347366,
+ "p95": 97.47199714183807,
+ "p99": 105.6319996714592
},
"roundtrip": {
- "p50": 2581.9520950317383,
- "p90": 2594.6240425109863,
- "p95": 2602.303981781006,
- "p99": 2637.9199028015137
+ "p50": 162.08000481128693,
+ "p90": 172.57599532604218,
+ "p95": 176.09600722789764,
+ "p99": 182.01600015163422
},
"isolatedSum": {
- "p50": 2598.3999967575073,
- "p90": 2618.4639930725098,
- "p95": 2622.9439973831177,
- "p99": 2645.632028579712
+ "p50": 184.736005961895,
+ "p90": 203.3279985189438,
+ "p95": 208.28799903392792,
+ "p99": 224.95999932289124
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2487009280,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
+ "dispatchLogicalBytes": 12859392,
+ "combineLogicalBytes": 12859392,
+ "fanoutMean": 3.50390625,
+ "recvTokensMax": 255,
"stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
+ },
+ {
+ "tokensPerRank": 64,
+ "globalTokens": 512,
+ "dispatch": {
+ "p50": 104.47999835014343,
+ "p90": 113.69600147008896,
+ "p95": 117.11999773979187,
+ "p99": 125.08800625801086
+ },
+ "combine": {
+ "p50": 103.07200253009796,
+ "p90": 108.76800119876862,
+ "p95": 110.36799848079681,
+ "p99": 120.99199742078781
+ },
+ "roundtrip": {
+ "p50": 186.52799725532532,
+ "p90": 194.7840005159378,
+ "p95": 197.37599790096283,
+ "p99": 208.99200439453125
+ },
+ "isolatedSum": {
+ "p50": 207.5520008802414,
+ "p90": 222.46400266885757,
+ "p95": 227.48799622058868,
+ "p99": 246.08000367879868
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 25145344,
+ "combineLogicalBytes": 25145344,
+ "fanoutMean": 3.42578125,
+ "recvTokensMax": 510,
+ "stragglerRank": 0,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
+ "dispatch": {
+ "p50": 125.47199428081512,
+ "p90": 134.11200046539307,
+ "p95": 136.63999736309052,
+ "p99": 144.44799721240997
+ },
+ "combine": {
+ "p50": 133.760005235672,
+ "p90": 142.14399456977844,
+ "p95": 144.896000623703,
+ "p99": 150.87999403476715
+ },
+ "roundtrip": {
+ "p50": 236.60799860954285,
+ "p90": 243.96799504756927,
+ "p95": 247.26399779319763,
+ "p99": 255.0719976425171
+ },
+ "isolatedSum": {
+ "p50": 259.2319995164871,
+ "p90": 276.2559950351715,
+ "p95": 281.5359979867935,
+ "p99": 295.3279912471771
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 49946624,
+ "combineLogicalBytes": 49946624,
+ "fanoutMean": 3.40234375,
+ "recvTokensMax": 1022,
+ "stragglerRank": 7,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
}
]
},
{
- "id": "cx-5fd5a06c",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|normalized|0.18|0a3064a2af0dd39",
- "colorKey": "b300_0622d929",
- "comparisonKey": "8c83b99af9d27709",
+ "id": "cx-3ab662a4",
+ "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||431e44245dd1524",
+ "colorKey": "gb300_04de5a5b",
+ "comparisonKey": "3a5f0bb6e0d0b96c",
"schemaVersion": 3,
- "generatedAt": "2026-06-26T18:11:00.153293+00:00",
+ "generatedAt": "2026-06-29T13:48:08.393602+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_10",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
+ "phase": "decode",
"mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
+ "resourceMode": "tuned",
+ "suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 (norm) · balanced",
- "model": "DeepSeek-V3/V4",
+ "label": "GB300 EP8 · deepep · bf16 · zipf-moderate+eplb",
+ "model": "DeepSeek-V3 (EPLB physical)",
"shape": {
"hidden": 7168,
"topk": 8,
- "experts": 256,
- "routing": "balanced",
- "routingLabel": "balanced",
+ "experts": 288,
+ "routing": "zipf-moderate",
+ "routingLabel": "zipf-moderate+eplb",
"routingStep": 0,
"unevenTokens": "none",
- "eplbEnabled": false,
+ "eplbEnabled": true,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1824,
- "configuredUnits": 27,
- "deviceUnits": 148,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
+ "requestedFraction": null,
+ "achievedFraction": 0.1316,
+ "configuredUnits": 20,
+ "deviceUnits": 152,
+ "resourceClass": "backend-tuned",
+ "conformanceClass": "backend-default",
"fixedKernel": false,
- "paretoEligible": true
+ "paretoEligible": false
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "0a3064a2af0dd39",
- "workloadId": "set:6:2dad1a73ff872905",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "traceSignature": "431e44245dd1524",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": 4.927734375,
+ "eplbImbalanceAfter": 1.0006103515625,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28254508907",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254508907",
- "createdAt": "2026-06-26T18:11:00.153293+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 1,
+ "globalTokens": 8,
"dispatch": {
- "p50": 105.79200088977814,
- "p90": 108.83200168609619,
- "p95": 111.00800335407257,
- "p99": 118.9119964838028
+ "p50": 96.6079980134964,
+ "p90": 120.19199877977371,
+ "p95": 128.09599936008453,
+ "p99": 145.28000354766846
},
"combine": {
- "p50": 130.0159990787506,
- "p90": 139.20000195503235,
- "p95": 139.74399864673615,
- "p99": 150.84800124168396
+ "p50": 80.4160013794899,
+ "p90": 84.76799726486206,
+ "p95": 86.97599917650223,
+ "p99": 96.3520035147667
},
"roundtrip": {
- "p50": 228.38400304317474,
- "p90": 234.65600609779358,
- "p95": 235.61599850654602,
- "p99": 252.28801369667053
+ "p50": 151.13599598407745,
+ "p90": 162.84799575805664,
+ "p95": 167.61599481105804,
+ "p99": 174.97600615024567
},
"isolatedSum": {
- "p50": 235.80799996852875,
- "p90": 248.03200364112854,
- "p95": 250.75200200080872,
- "p99": 269.75999772548676
+ "p50": 177.0239993929863,
+ "p90": 204.95999604463577,
+ "p95": 215.07199853658676,
+ "p99": 241.63200706243515
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 117440512,
- "combineLogicalBytes": 117440512,
- "fanoutMean": 8,
- "recvTokensMax": 1024,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 616448,
+ "combineLogicalBytes": 616448,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 7,
+ "stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 256,
- "globalTokens": 2048,
+ "tokensPerRank": 2,
+ "globalTokens": 16,
"dispatch": {
- "p50": 159.36000645160675,
- "p90": 162.56000101566315,
- "p95": 163.90399634838104,
- "p99": 170.59199512004852
+ "p50": 93.53599697351456,
+ "p90": 108.57599973678589,
+ "p95": 112.5440001487732,
+ "p99": 122.17599898576736
},
"combine": {
- "p50": 201.34399831295013,
- "p90": 203.96800339221954,
- "p95": 211.45600080490112,
- "p99": 224.86400604248047
+ "p50": 82.8159973025322,
+ "p90": 88.3840024471283,
+ "p95": 94.91200000047684,
+ "p99": 96.99200093746185
},
"roundtrip": {
- "p50": 334.879994392395,
- "p90": 340.03201127052307,
- "p95": 342.0479893684387,
- "p99": 360.28799414634705
+ "p50": 152.79999375343323,
+ "p90": 172.28800058364868,
+ "p95": 193.82399320602417,
+ "p99": 212.6079946756363
},
"isolatedSum": {
- "p50": 360.7040047645569,
- "p90": 366.5280044078827,
- "p95": 375.35999715328217,
- "p99": 395.456001162529
+ "p50": 176.35199427604675,
+ "p90": 196.96000218391418,
+ "p95": 207.45600014925003,
+ "p99": 219.16799992322922
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 234881024,
- "combineLogicalBytes": 234881024,
- "fanoutMean": 8,
- "recvTokensMax": 2048,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1204224,
+ "combineLogicalBytes": 1204224,
+ "fanoutMean": 5.25,
+ "recvTokensMax": 14,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 4,
+ "globalTokens": 32,
"dispatch": {
- "p50": 232.67200589179993,
- "p90": 240.76800048351288,
- "p95": 244.60799992084503,
- "p99": 252.22399830818176
+ "p50": 93.79199892282486,
+ "p90": 107.42399841547012,
+ "p95": 110.01600325107574,
+ "p99": 119.35999989509583
},
"combine": {
- "p50": 338.01600337028503,
- "p90": 347.8719890117645,
- "p95": 348.7040102481842,
- "p99": 361.407995223999
+ "p50": 83.03999900817871,
+ "p90": 87.71199733018875,
+ "p95": 93.79199892282486,
+ "p99": 99.32799637317657
},
"roundtrip": {
- "p50": 553.9519786834717,
- "p90": 560.2239966392517,
- "p95": 564.3839836120605,
- "p99": 589.8879766464233
+ "p50": 154.27200496196747,
+ "p90": 167.4560010433197,
+ "p95": 171.10399901866913,
+ "p99": 189.82400000095367
},
"isolatedSum": {
- "p50": 570.688009262085,
- "p90": 588.6399894952774,
- "p95": 593.3120101690292,
- "p99": 613.6319935321808
+ "p50": 176.83199793100357,
+ "p90": 195.13599574565887,
+ "p95": 203.8080021739006,
+ "p99": 218.6879962682724
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 469762048,
- "combineLogicalBytes": 469762048,
- "fanoutMean": 8,
- "recvTokensMax": 4096,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 2394112,
+ "combineLogicalBytes": 2394112,
+ "fanoutMean": 5.21875,
+ "recvTokensMax": 24,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 409.5360040664673,
- "p90": 415.0719940662384,
- "p95": 416.76801443099976,
- "p99": 433.50398540496826
+ "p50": 97.18400239944458,
+ "p90": 109.8880022764206,
+ "p95": 113.40799927711487,
+ "p99": 124.32000041007996
},
"combine": {
- "p50": 594.3359732627869,
- "p90": 599.7120141983032,
- "p95": 606.2399744987488,
- "p99": 619.2640066146851
+ "p50": 84.76799726486206,
+ "p90": 93.91999989748001,
+ "p95": 96.09600156545639,
+ "p99": 104.44799810647964
},
"roundtrip": {
- "p50": 986.1119985580444,
- "p90": 993.5680031776428,
- "p95": 998.8160133361816,
- "p99": 1015.8400535583496
+ "p50": 158.27199816703796,
+ "p90": 171.03999853134155,
+ "p95": 175.35999417304993,
+ "p99": 183.67999792099
},
"isolatedSum": {
- "p50": 1003.8719773292542,
- "p90": 1014.7840082645416,
- "p95": 1023.0079889297485,
- "p99": 1052.7679920196533
+ "p50": 181.95199966430664,
+ "p90": 203.8080021739006,
+ "p95": 209.50400084257126,
+ "p99": 228.7679985165596
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 939524096,
- "combineLogicalBytes": 939524096,
- "fanoutMean": 8,
- "recvTokensMax": 8192,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 4630528,
+ "combineLogicalBytes": 4630528,
+ "fanoutMean": 5.046875,
+ "recvTokensMax": 45,
+ "stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 16,
+ "globalTokens": 128,
"dispatch": {
- "p50": 756.384015083313,
- "p90": 767.3280239105225,
- "p95": 769.6639895439148,
- "p99": 787.7439856529236
+ "p50": 97.69599884748459,
+ "p90": 109.92000252008438,
+ "p95": 114.75200206041336,
+ "p99": 137.11999356746674
},
"combine": {
- "p50": 1112.671971321106,
- "p90": 1122.8480339050293,
- "p95": 1133.6640119552612,
- "p99": 1208.4800004959106
+ "p50": 85.08799970149994,
+ "p90": 92.44800359010696,
+ "p95": 95.13600170612335,
+ "p99": 105.98400235176086
},
"roundtrip": {
- "p50": 1856.0960292816162,
- "p90": 1870.6879615783691,
- "p95": 1877.087950706482,
- "p99": 1941.5040016174316
+ "p50": 159.61599349975586,
+ "p90": 171.58399522304535,
+ "p95": 174.94399845600128,
+ "p99": 185.08799374103546
},
"isolatedSum": {
- "p50": 1869.055986404419,
- "p90": 1890.1760578155518,
- "p95": 1903.328001499176,
- "p99": 1996.2239861488342
+ "p50": 182.78399854898453,
+ "p90": 202.36800611019135,
+ "p95": 209.8880037665367,
+ "p99": 243.1039959192276
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1879048192,
- "combineLogicalBytes": 1879048192,
- "fanoutMean": 8,
- "recvTokensMax": 16384,
+ "dispatchLogicalBytes": 9447424,
+ "combineLogicalBytes": 9447424,
+ "fanoutMean": 5.1484375,
+ "recvTokensMax": 91,
"stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1458.8799476623535,
- "p90": 1475.0720262527466,
- "p95": 1481.4079999923706,
- "p99": 1536.8640422821045
- },
- "combine": {
- "p50": 2142.047882080078,
- "p90": 2154.560089111328,
- "p95": 2158.9438915252686,
- "p99": 2215.9039974212646
- },
- "roundtrip": {
- "p50": 3584.160089492798,
- "p90": 3605.760097503662,
- "p95": 3613.152027130127,
- "p99": 3669.503927230835
- },
- "isolatedSum": {
- "p50": 3600.9278297424316,
- "p90": 3629.6321153640747,
- "p95": 3640.351891517639,
- "p99": 3752.768039703369
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3758096384,
- "combineLogicalBytes": 3758096384,
- "fanoutMean": 8,
- "recvTokensMax": 32768,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-6620cae5",
- "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|normalized|0.18|b5217e990b95f86",
- "colorKey": "b300_01ab5b1a",
- "comparisonKey": "5702bf02b3927f32",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:38:15.541333+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_06",
- "sku": "b300",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "B300 EP8 · deepep · bf16 (norm) · zipf",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "zipf",
- "routingLabel": "zipf",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1824,
- "configuredUnits": 27,
- "deviceUnits": 148,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "b5217e990b95f86",
- "workloadId": "set:6:830e36e88869e222",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271231753",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271231753",
- "createdAt": "2026-06-26T23:38:15.541333+00:00",
- "sha": "ee4ffe77871d0200cb4a78c96d3ae9f692e9af02"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 101.3759970664978,
- "p90": 104.76800054311752,
- "p95": 106.01600259542465,
- "p99": 111.90400272607803
- },
- "combine": {
- "p50": 126.11199915409088,
- "p90": 127.3919939994812,
- "p95": 127.83999741077423,
- "p99": 129.18399274349213
- },
- "roundtrip": {
- "p50": 207.8080028295517,
- "p90": 212.6079946756363,
- "p95": 213.69600296020508,
- "p99": 224.2559939622879
- },
- "isolatedSum": {
- "p50": 227.48799622058868,
- "p90": 232.15999454259872,
- "p95": 233.85600000619888,
- "p99": 241.08799546957016
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 49946624,
- "combineLogicalBytes": 49946624,
- "fanoutMean": 3.40234375,
- "recvTokensMax": 1022,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 136.31999492645264,
- "p90": 139.80799913406372,
- "p95": 140.86399972438812,
- "p99": 150.43200552463531
- },
- "combine": {
- "p50": 176.35199427604675,
- "p90": 178.78399789333344,
- "p95": 180.03199994564056,
- "p99": 188.60800564289093
- },
- "roundtrip": {
- "p50": 297.5679934024811,
- "p90": 303.45600843429565,
- "p95": 306.46398663520813,
- "p99": 319.2960023880005
- },
- "isolatedSum": {
- "p50": 312.6719892024994,
- "p90": 318.59199702739716,
- "p95": 320.8959996700287,
- "p99": 339.04001116752625
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 100509696,
- "combineLogicalBytes": 100509696,
- "fanoutMean": 3.42333984375,
- "recvTokensMax": 2046,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 203.90400290489197,
- "p90": 211.58400177955627,
- "p95": 212.51200139522552,
- "p99": 223.32799434661865
- },
- "combine": {
- "p50": 325.1839876174927,
- "p90": 335.55200695991516,
- "p95": 335.80800890922546,
- "p99": 337.8559947013855
- },
- "roundtrip": {
- "p50": 506.20800256729126,
- "p90": 514.4960284233093,
- "p95": 519.7759866714478,
- "p99": 534.0160131454468
- },
- "isolatedSum": {
- "p50": 529.0879905223846,
- "p90": 547.1360087394714,
- "p95": 548.320010304451,
- "p99": 561.1839890480042
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 201678848,
- "combineLogicalBytes": 201678848,
- "fanoutMean": 3.4345703125,
- "recvTokensMax": 4094,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 348.86398911476135,
- "p90": 353.40800881385803,
- "p95": 354.7520041465759,
- "p99": 364.22398686408997
+ "p50": 101.6319990158081,
+ "p90": 112.96000331640244,
+ "p95": 116.44800007343292,
+ "p99": 121.85599654912949
},
"combine": {
- "p50": 582.4000239372253,
- "p90": 585.9519839286804,
- "p95": 593.0879712104797,
- "p99": 594.5919752120972
+ "p50": 93.02400052547455,
+ "p90": 97.50399738550186,
+ "p95": 100.63999891281128,
+ "p99": 108.0000028014183
},
"roundtrip": {
- "p50": 909.5680117607117,
- "p90": 917.2160029411316,
- "p95": 918.5600280761719,
- "p99": 924.127995967865
+ "p50": 163.93600404262543,
+ "p90": 175.55199563503265,
+ "p95": 179.71199750900269,
+ "p99": 188.48000466823578
},
"isolatedSum": {
- "p50": 931.2640130519867,
- "p90": 939.3599927425385,
- "p95": 947.8399753570557,
- "p99": 958.8159620761871
+ "p50": 194.65599954128265,
+ "p90": 210.4640007019043,
+ "p95": 217.0879989862442,
+ "p99": 229.8559993505478
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 405035008,
- "combineLogicalBytes": 405035008,
- "fanoutMean": 3.4488525390625,
- "recvTokensMax": 8189,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 19023872,
+ "combineLogicalBytes": 19023872,
+ "fanoutMean": 5.18359375,
+ "recvTokensMax": 178,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 64,
+ "globalTokens": 512,
"dispatch": {
- "p50": 641.8560147285461,
- "p90": 648.639976978302,
- "p95": 655.135989189148,
- "p99": 660.256028175354
+ "p50": 111.23199760913849,
+ "p90": 121.85599654912949,
+ "p95": 126.8479973077774,
+ "p99": 160.35200655460358
},
"combine": {
- "p50": 1062.7520084381104,
- "p90": 1072.7039575576782,
- "p95": 1073.4080076217651,
- "p99": 1076.5119791030884
+ "p50": 107.58399963378906,
+ "p90": 115.39199948310852,
+ "p95": 118.8800036907196,
+ "p99": 122.52800166606903
},
"roundtrip": {
- "p50": 1693.343997001648,
- "p90": 1700.6080150604248,
- "p95": 1702.847957611084,
- "p99": 1706.6559791564941
+ "p50": 189.15200233459473,
+ "p90": 197.66399264335632,
+ "p95": 201.1519968509674,
+ "p99": 207.61600136756897
},
"isolatedSum": {
- "p50": 1704.6080231666565,
- "p90": 1721.3439345359802,
- "p95": 1728.543996810913,
- "p99": 1736.7680072784424
+ "p50": 218.81599724292755,
+ "p90": 237.247996032238,
+ "p95": 245.728000998497,
+ "p99": 282.8800082206726
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 808822784,
- "combineLogicalBytes": 808822784,
- "fanoutMean": 3.44354248046875,
- "recvTokensMax": 16380,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 38148096,
+ "combineLogicalBytes": 38148096,
+ "fanoutMean": 5.197265625,
+ "recvTokensMax": 350,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4096,
- "globalTokens": 32768,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 1252.1920204162598,
- "p90": 1262.719988822937,
- "p95": 1264.7360563278198,
- "p99": 1276.8640518188477
+ "p50": 126.46399438381195,
+ "p90": 136.9280070066452,
+ "p95": 140.00000059604645,
+ "p99": 148.03199470043182
},
"combine": {
- "p50": 2043.4560775756836,
- "p90": 2045.151948928833,
- "p95": 2047.1999645233154,
- "p99": 2067.392110824585
+ "p50": 124.03199821710587,
+ "p90": 132.7359974384308,
+ "p95": 133.63200426101685,
+ "p99": 135.5839967727661
},
"roundtrip": {
- "p50": 3284.6720218658447,
- "p90": 3295.1040267944336,
- "p95": 3299.0400791168213,
- "p99": 3313.3440017700195
+ "p50": 225.92000663280487,
+ "p90": 235.20000278949738,
+ "p95": 240.31999707221985,
+ "p99": 262.81601190567017
},
"isolatedSum": {
- "p50": 3295.6480979919434,
- "p90": 3307.87193775177,
- "p95": 3311.9360208511353,
- "p99": 3344.2561626434326
+ "p50": 250.49599260091782,
+ "p90": 269.664004445076,
+ "p95": 273.6320048570633,
+ "p99": 283.61599147319794
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1619795968,
- "combineLogicalBytes": 1619795968,
- "fanoutMean": 3.4481201171875,
- "recvTokensMax": 32761,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 76955648,
+ "combineLogicalBytes": 76955648,
+ "fanoutMean": 5.2421875,
+ "recvTokensMax": 687,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -26279,28 +25849,28 @@
]
},
{
- "id": "cx-9b7dbfc5",
- "identity": "b300|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|normalized|0.18|2b57a75d27f5b39",
- "colorKey": "b300_085c12d4",
- "comparisonKey": "afb8d29f702ca3c1",
+ "id": "cx-48c02d24",
+ "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||431e44245dd1524",
+ "colorKey": "gb300_8cda999b",
+ "comparisonKey": "f43e80b5c2df2021",
"schemaVersion": 3,
- "generatedAt": "2026-06-26T18:21:45.459593+00:00",
+ "generatedAt": "2026-06-29T13:40:58.816700+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_16",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
+ "phase": "decode",
"mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
+ "resourceMode": "tuned",
+ "suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 (norm) · zipf+eplb",
+ "label": "GB300 EP8 · deepep · bf16 · zipf+eplb",
"model": "DeepSeek-V3 (EPLB physical)",
"shape": {
"hidden": 7168,
@@ -26312,288 +25882,363 @@
"unevenTokens": "none",
"eplbEnabled": true,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1824,
- "configuredUnits": 27,
- "deviceUnits": 148,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
+ "requestedFraction": null,
+ "achievedFraction": 0.1316,
+ "configuredUnits": 20,
+ "deviceUnits": 152,
+ "resourceClass": "backend-tuned",
+ "conformanceClass": "backend-default",
"fixedKernel": false,
- "paretoEligible": true
+ "paretoEligible": false
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "2b57a75d27f5b39",
- "workloadId": "set:6:830e36e88869e222",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 4.895263671875,
- "eplbImbalanceAfter": 1.0000902811686199,
- "backendVersion": "1.2.1",
+ "traceSignature": "431e44245dd1524",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": 4.927734375,
+ "eplbImbalanceAfter": 1.0006103515625,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28255311146",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28255311146",
- "createdAt": "2026-06-26T18:21:45.459593+00:00",
- "sha": "36d3eb6c3c7386d3220c873d305410219c5c0f17"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 1,
+ "globalTokens": 8,
"dispatch": {
- "p50": 93.28000247478485,
- "p90": 96.16000205278397,
- "p95": 98.78399968147278,
- "p99": 129.2479932308197
+ "p50": 88.60799670219421,
+ "p90": 101.6319990158081,
+ "p95": 104.8320010304451,
+ "p99": 119.74400281906128
},
"combine": {
- "p50": 114.94400352239609,
- "p90": 115.55200070142746,
- "p95": 115.93600362539291,
- "p99": 126.3359934091568
+ "p50": 74.11199808120728,
+ "p90": 81.91999793052673,
+ "p95": 83.64800363779068,
+ "p99": 92.32000261545181
},
"roundtrip": {
- "p50": 195.6160068511963,
- "p90": 199.42399859428406,
- "p95": 200.83199441432953,
- "p99": 215.16799926757812
+ "p50": 144.3520039319992,
+ "p90": 154.27200496196747,
+ "p95": 158.2079976797104,
+ "p99": 168.2880073785782
},
"isolatedSum": {
- "p50": 208.22400599718094,
- "p90": 211.71200275421143,
- "p95": 214.7200033068657,
- "p99": 255.5839866399765
+ "p50": 162.7199947834015,
+ "p90": 183.55199694633484,
+ "p95": 188.48000466823578,
+ "p99": 212.0640054345131
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77385728,
- "combineLogicalBytes": 77385728,
- "fanoutMean": 5.271484375,
- "recvTokensMax": 691,
+ "dispatchLogicalBytes": 616448,
+ "combineLogicalBytes": 616448,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 7,
"stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 256,
- "globalTokens": 2048,
+ "tokensPerRank": 2,
+ "globalTokens": 16,
"dispatch": {
- "p50": 136.25599443912506,
- "p90": 139.00800049304962,
- "p95": 141.50400459766388,
- "p99": 155.03999590873718
+ "p50": 89.37600255012512,
+ "p90": 105.12000322341919,
+ "p95": 113.63200098276138,
+ "p99": 151.99999511241913
},
"combine": {
- "p50": 153.72799336910248,
- "p90": 163.2319986820221,
- "p95": 163.80800306797028,
- "p99": 167.67999529838562
+ "p50": 80.64000308513641,
+ "p90": 85.4400023818016,
+ "p95": 92.86399930715561,
+ "p99": 119.6800023317337
},
"roundtrip": {
- "p50": 269.9199914932251,
- "p90": 275.64799785614014,
- "p95": 276.92800760269165,
- "p99": 291.77600145339966
+ "p50": 148.22399616241455,
+ "p90": 164.63999450206757,
+ "p95": 176.79999768733978,
+ "p99": 213.6639952659607
},
"isolatedSum": {
- "p50": 289.98398780822754,
- "p90": 302.2399991750717,
- "p95": 305.31200766563416,
- "p99": 322.7199912071228
+ "p50": 170.01600563526154,
+ "p90": 190.5600056052208,
+ "p95": 206.496000289917,
+ "p99": 271.67999744415283
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 155172864,
- "combineLogicalBytes": 155172864,
- "fanoutMean": 5.28515625,
- "recvTokensMax": 1378,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1204224,
+ "combineLogicalBytes": 1204224,
+ "fanoutMean": 5.25,
+ "recvTokensMax": 14,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 4,
+ "globalTokens": 32,
"dispatch": {
- "p50": 192.671999335289,
- "p90": 200.095996260643,
- "p95": 201.1840045452118,
- "p99": 211.99999749660492
+ "p50": 88.76799792051315,
+ "p90": 100.89600086212158,
+ "p95": 105.24799674749374,
+ "p99": 112.73600161075592
},
"combine": {
- "p50": 264.70398902893066,
- "p90": 274.2399871349335,
- "p95": 274.9119997024536,
- "p99": 286.3999903202057
+ "p50": 80.92799782752991,
+ "p90": 84.86399799585342,
+ "p95": 87.5839963555336,
+ "p99": 96.89600020647049
},
"roundtrip": {
- "p50": 439.7439956665039,
- "p90": 445.279985666275,
- "p95": 447.519987821579,
- "p99": 459.9039852619171
+ "p50": 147.90399372577667,
+ "p90": 158.01599621772766,
+ "p95": 161.8880033493042,
+ "p99": 186.49600446224213
},
"isolatedSum": {
- "p50": 457.37598836421967,
- "p90": 474.3359833955765,
- "p95": 476.0960042476654,
- "p99": 498.3999878168106
+ "p50": 169.69599574804306,
+ "p90": 185.759998857975,
+ "p95": 192.83199310302734,
+ "p99": 209.6320018172264
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 310546432,
- "combineLogicalBytes": 310546432,
- "fanoutMean": 5.28857421875,
- "recvTokensMax": 2745,
+ "dispatchLogicalBytes": 2394112,
+ "combineLogicalBytes": 2394112,
+ "fanoutMean": 5.21875,
+ "recvTokensMax": 24,
"stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 319.90399956703186,
- "p90": 325.8560001850128,
- "p95": 327.1999955177307,
- "p99": 333.44000577926636
+ "p50": 90.65599739551544,
+ "p90": 104.92800176143646,
+ "p95": 109.50399935245514,
+ "p99": 151.90400183200836
},
"combine": {
- "p50": 450.78399777412415,
- "p90": 458.8800072669983,
- "p95": 459.77601408958435,
- "p99": 482.87999629974365
+ "p50": 83.10399949550629,
+ "p90": 88.35200220346451,
+ "p95": 94.52799707651138,
+ "p99": 108.22399705648422
},
"roundtrip": {
- "p50": 756.1600208282471,
- "p90": 761.5039944648743,
- "p95": 763.5840177536011,
- "p99": 783.5519909858704
+ "p50": 151.8400013446808,
+ "p90": 162.33600676059723,
+ "p95": 167.84000396728516,
+ "p99": 185.15199422836304
},
"isolatedSum": {
- "p50": 770.687997341156,
- "p90": 784.7360074520111,
- "p95": 786.9760096073151,
- "p99": 816.32000207901
+ "p50": 173.75999689102173,
+ "p90": 193.28000396490097,
+ "p95": 204.03199642896652,
+ "p99": 260.1279988884926
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 620619776,
- "combineLogicalBytes": 620619776,
- "fanoutMean": 5.2845458984375,
- "recvTokensMax": 5526,
+ "dispatchLogicalBytes": 4630528,
+ "combineLogicalBytes": 4630528,
+ "fanoutMean": 5.046875,
+ "recvTokensMax": 45,
"stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 16,
+ "globalTokens": 128,
"dispatch": {
- "p50": 573.0559825897217,
- "p90": 581.6959738731384,
- "p95": 583.7119817733765,
- "p99": 671.4879870414734
+ "p50": 93.47199648618698,
+ "p90": 104.12800312042236,
+ "p95": 108.25599730014801,
+ "p99": 117.50400066375732
},
"combine": {
- "p50": 827.4880051612854,
- "p90": 838.6240005493164,
- "p95": 839.9040102958679,
- "p99": 863.4560108184814
+ "p50": 83.96799862384796,
+ "p90": 88.70399743318558,
+ "p95": 93.75999867916107,
+ "p99": 97.95200079679489
},
"roundtrip": {
- "p50": 1382.9760551452637,
- "p90": 1392.9920196533203,
- "p95": 1396.8960046768188,
- "p99": 1428.1599521636963
+ "p50": 154.2080044746399,
+ "p90": 165.56799411773682,
+ "p95": 169.18399930000305,
+ "p99": 176.86399817466736
},
"isolatedSum": {
- "p50": 1400.543987751007,
- "p90": 1420.3199744224548,
- "p95": 1423.6159920692444,
- "p99": 1534.9439978599548
+ "p50": 177.43999511003494,
+ "p90": 192.83200055360794,
+ "p95": 202.01599597930908,
+ "p99": 215.45600146055222
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1239175168,
- "combineLogicalBytes": 1239175168,
- "fanoutMean": 5.2757568359375,
- "recvTokensMax": 11165,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 9447424,
+ "combineLogicalBytes": 9447424,
+ "fanoutMean": 5.1484375,
+ "recvTokensMax": 91,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4096,
- "globalTokens": 32768,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 1061.8879795074463,
- "p90": 1068.7040090560913,
- "p95": 1075.9040117263794,
- "p99": 1094.048023223877
+ "p50": 98.1760025024414,
+ "p90": 111.61600053310394,
+ "p95": 123.32800030708313,
+ "p99": 153.85599434375763
},
"combine": {
- "p50": 1530.2079916000366,
- "p90": 1540.7040119171143,
- "p95": 1551.2640476226807,
- "p99": 1662.6559495925903
+ "p50": 91.00800007581711,
+ "p90": 96.73599898815155,
+ "p95": 104.51199859380722,
+ "p99": 131.3599944114685
},
"roundtrip": {
- "p50": 2579.9999237060547,
- "p90": 2593.7600135803223,
- "p95": 2600.543975830078,
- "p99": 2645.440101623535
+ "p50": 161.43999993801117,
+ "p90": 174.20800030231476,
+ "p95": 183.16799402236938,
+ "p99": 232.44799673557281
},
"isolatedSum": {
- "p50": 2592.095971107483,
- "p90": 2609.4080209732056,
- "p95": 2627.16805934906,
- "p99": 2756.7039728164673
+ "p50": 189.18400257825851,
+ "p90": 208.3519995212555,
+ "p95": 227.83999890089035,
+ "p99": 285.21598875522614
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2481604608,
- "combineLogicalBytes": 2481604608,
- "fanoutMean": 5.282684326171875,
- "recvTokensMax": 22165,
+ "dispatchLogicalBytes": 19023872,
+ "combineLogicalBytes": 19023872,
+ "fanoutMean": 5.18359375,
+ "recvTokensMax": 178,
+ "stragglerRank": 7,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 64,
+ "globalTokens": 512,
+ "dispatch": {
+ "p50": 108.60799998044968,
+ "p90": 114.3999993801117,
+ "p95": 117.8240031003952,
+ "p99": 126.11199915409088
+ },
+ "combine": {
+ "p50": 106.175996363163,
+ "p90": 109.82400178909302,
+ "p95": 112.73600161075592,
+ "p99": 119.80800330638885
+ },
+ "roundtrip": {
+ "p50": 185.72799861431122,
+ "p90": 193.27999651432037,
+ "p95": 196.73599302768707,
+ "p99": 210.11200547218323
+ },
+ "isolatedSum": {
+ "p50": 214.78399634361267,
+ "p90": 224.2240011692047,
+ "p95": 230.56000471115112,
+ "p99": 245.92000246047974
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 38148096,
+ "combineLogicalBytes": 38148096,
+ "fanoutMean": 5.197265625,
+ "recvTokensMax": 350,
"stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
"trials": 3
+ },
+ {
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
+ "dispatch": {
+ "p50": 123.07199835777283,
+ "p90": 132.79999792575836,
+ "p95": 137.9839926958084,
+ "p99": 175.7120043039322
+ },
+ "combine": {
+ "p50": 122.72000312805176,
+ "p90": 131.48799538612366,
+ "p95": 133.91999900341034,
+ "p99": 151.8079936504364
+ },
+ "roundtrip": {
+ "p50": 223.00800681114197,
+ "p90": 230.6559979915619,
+ "p95": 234.49599742889404,
+ "p99": 251.71199440956116
+ },
+ "isolatedSum": {
+ "p50": 245.79200148582458,
+ "p90": 264.287993311882,
+ "p95": 271.90399169921875,
+ "p99": 327.5199979543686
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 76955648,
+ "combineLogicalBytes": 76955648,
+ "fanoutMean": 5.2421875,
+ "recvTokensMax": 687,
+ "stragglerRank": 0,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
}
]
},
{
- "id": "cx-07a9b9e5",
- "identity": "b300|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31",
- "colorKey": "b300_63f1354f",
- "comparisonKey": "e1e888fe005f12d0",
+ "id": "cx-99af315f",
+ "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510",
+ "colorKey": "gb300_20de545c",
+ "comparisonKey": "fcd0e10182ca372c",
"schemaVersion": 3,
- "generatedAt": "2026-06-26T17:43:21.918392+00:00",
+ "generatedAt": "2026-06-29T13:45:32.504465+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_01",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
+ "phase": "decode",
"mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
+ "resourceMode": "tuned",
+ "suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "cached-layout-comm-only-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 (norm) [cl]",
+ "label": "GB300 EP8 · deepep · bf16 [cl]",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
@@ -26605,259 +26250,334 @@
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1824,
- "configuredUnits": 27,
- "deviceUnits": 148,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
+ "requestedFraction": null,
+ "achievedFraction": 0.1316,
+ "configuredUnits": 20,
+ "deviceUnits": 152,
+ "resourceClass": "backend-tuned",
+ "conformanceClass": "backend-default",
"fixedKernel": false,
- "paretoEligible": true
+ "paretoEligible": false
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "74444524b5db510",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28254489726",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254489726",
- "createdAt": "2026-06-26T17:43:21.918392+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 1,
+ "globalTokens": 8,
"dispatch": {
- "p50": 86.43200248479843,
- "p90": 88.95999938249588,
- "p95": 91.58399701118469,
- "p99": 99.55199807882309
+ "p50": 76.09599828720093,
+ "p90": 89.79199826717377,
+ "p95": 93.82399916648865,
+ "p99": 100.03200173377991
},
"combine": {
- "p50": 115.35999923944473,
- "p90": 116.03199690580368,
- "p95": 116.38399958610535,
- "p99": 121.56800180673599
+ "p50": 82.0159986615181,
+ "p90": 87.52000331878662,
+ "p95": 93.72799843549728,
+ "p99": 97.18400239944458
},
"roundtrip": {
- "p50": 186.8479996919632,
- "p90": 192.47999787330627,
- "p95": 193.31200420856476,
- "p99": 215.45599400997162
+ "p50": 137.11999356746674,
+ "p90": 149.47199821472168,
+ "p95": 152.8639942407608,
+ "p99": 170.30400037765503
},
"isolatedSum": {
- "p50": 201.79200172424316,
- "p90": 204.99199628829956,
- "p95": 207.96799659729004,
- "p99": 221.11999988555908
+ "p50": 158.11199694871902,
+ "p90": 177.3120015859604,
+ "p95": 187.55199760198593,
+ "p99": 197.2160041332245
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 630784,
+ "combineLogicalBytes": 630784,
+ "fanoutMean": 5.5,
+ "recvTokensMax": 7,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 256,
- "globalTokens": 2048,
+ "tokensPerRank": 2,
+ "globalTokens": 16,
"dispatch": {
- "p50": 128.60800325870514,
- "p90": 131.48799538612366,
- "p95": 132.79999792575836,
- "p99": 147.20000326633453
+ "p50": 78.59200239181519,
+ "p90": 91.90399944782257,
+ "p95": 98.49599748849869,
+ "p99": 109.24799740314484
},
"combine": {
- "p50": 156.19200468063354,
- "p90": 164.48000073432922,
- "p95": 164.76799547672272,
- "p99": 167.71200299263
+ "p50": 82.84799754619598,
+ "p90": 87.93599903583527,
+ "p95": 92.32000261545181,
+ "p99": 96.92800045013428
},
"roundtrip": {
- "p50": 264.8000121116638,
- "p90": 271.232008934021,
- "p95": 274.6239900588989,
- "p99": 307.20001459121704
+ "p50": 140.9599930047989,
+ "p90": 152.3520052433014,
+ "p95": 156.44800662994385,
+ "p99": 162.78399527072906
},
"isolatedSum": {
- "p50": 284.8000079393387,
- "p90": 295.9679961204529,
- "p95": 297.5679934024811,
- "p99": 314.91200625896454
+ "p50": 161.43999993801117,
+ "p90": 179.83999848365784,
+ "p95": 190.8160001039505,
+ "p99": 206.1759978532791
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 155889664,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 1232896,
+ "combineLogicalBytes": 1232896,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 13,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 4,
+ "globalTokens": 32,
"dispatch": {
- "p50": 186.46399676799774,
- "p90": 192.86400079727173,
- "p95": 195.360004901886,
- "p99": 208.3200067281723
+ "p50": 80.09599894285202,
+ "p90": 93.59999746084213,
+ "p95": 99.93600100278854,
+ "p99": 114.97599631547928
},
"combine": {
- "p50": 266.6879892349243,
- "p90": 274.78399872779846,
- "p95": 275.2639949321747,
- "p99": 287.1359884738922
+ "p50": 83.00799876451492,
+ "p90": 90.11200070381165,
+ "p95": 94.7519987821579,
+ "p99": 104.3199971318245
},
"roundtrip": {
- "p50": 437.4080002307892,
- "p90": 442.30398535728455,
- "p95": 445.6320106983185,
- "p99": 468.51199865341187
+ "p50": 140.76800644397736,
+ "p90": 152.73599326610565,
+ "p95": 155.7759940624237,
+ "p99": 162.84799575805664
},
"isolatedSum": {
- "p50": 453.15198600292206,
- "p90": 467.6479995250702,
- "p95": 470.62399983406067,
- "p99": 495.4559952020645
+ "p50": 163.10399770736694,
+ "p90": 183.71199816465378,
+ "p95": 194.68799978494644,
+ "p99": 219.29599344730377
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 312266752,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 2480128,
+ "combineLogicalBytes": 2480128,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 29,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 316.0319924354553,
- "p90": 319.16800141334534,
- "p95": 320.99199295043945,
- "p99": 330.01598715782166
- },
- "combine": {
- "p50": 458.8479995727539,
- "p90": 461.66399121284485,
- "p95": 470.20798921585083,
- "p99": 483.39200019836426
+ "p50": 80.92799782752991,
+ "p90": 91.61599725484848,
+ "p95": 95.77599912881851,
+ "p99": 104.38399761915207
+ },
+ "combine": {
+ "p50": 84.3840017914772,
+ "p90": 91.61599725484848,
+ "p95": 93.9520001411438,
+ "p99": 97.21600264310837
},
"roundtrip": {
- "p50": 752.0639896392822,
- "p90": 761.3440155982971,
- "p95": 763.6799812316895,
- "p99": 787.6480221748352
+ "p50": 143.45599710941315,
+ "p90": 155.2640050649643,
+ "p95": 159.10400450229645,
+ "p99": 169.66399550437927
},
"isolatedSum": {
- "p50": 774.8799920082092,
- "p90": 780.8319926261902,
- "p95": 791.1999821662903,
- "p99": 813.4079873561859
+ "p50": 165.3119996190071,
+ "p90": 183.23199450969696,
+ "p95": 189.7279992699623,
+ "p99": 201.60000026226044
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 623443968,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 4974592,
+ "combineLogicalBytes": 4974592,
+ "fanoutMean": 5.421875,
+ "recvTokensMax": 47,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 16,
+ "globalTokens": 128,
"dispatch": {
- "p50": 557.2800040245056,
- "p90": 565.0240182876587,
- "p95": 566.3679838180542,
- "p99": 600.0319719314575
+ "p50": 81.28000050783157,
+ "p90": 91.23200178146362,
+ "p95": 94.08000111579895,
+ "p99": 107.64800012111664
},
"combine": {
- "p50": 817.4399733543396,
- "p90": 827.8399705886841,
- "p95": 832.0639729499817,
- "p99": 854.3999791145325
+ "p50": 86.14400029182434,
+ "p90": 94.01600062847137,
+ "p95": 96.22400254011154,
+ "p99": 103.87200117111206
},
"roundtrip": {
- "p50": 1359.328031539917,
- "p90": 1370.911955833435,
- "p95": 1380.5760145187378,
- "p99": 1444.640040397644
+ "p50": 145.63199877738953,
+ "p90": 156.99200332164764,
+ "p95": 160.7999950647354,
+ "p99": 173.47200214862823
},
"isolatedSum": {
- "p50": 1374.7199773788452,
- "p90": 1392.8639888763428,
- "p95": 1398.431956768036,
- "p99": 1454.43195104599
+ "p50": 167.42400079965591,
+ "p90": 185.248002409935,
+ "p95": 190.3040036559105,
+ "p99": 211.5200012922287
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1243805696,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 9920512,
+ "combineLogicalBytes": 9920512,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 92,
+ "stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4096,
- "globalTokens": 32768,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 1037.4079942703247,
- "p90": 1044.800043106079,
- "p95": 1047.4879741668701,
- "p99": 1074.3039846420288
+ "p50": 85.50400286912918,
+ "p90": 95.32800316810608,
+ "p95": 98.62399846315384,
+ "p99": 109.0560033917427
},
"combine": {
- "p50": 1529.6319723129272,
- "p90": 1541.375994682312,
- "p95": 1552.0639419555664,
- "p99": 1577.1199464797974
+ "p50": 93.05600076913834,
+ "p90": 98.04800152778625,
+ "p95": 100.41599720716476,
+ "p99": 107.744000852108
},
"roundtrip": {
- "p50": 2550.9119033813477,
- "p90": 2564.2240047454834,
- "p95": 2571.199893951416,
- "p99": 2613.2800579071045
+ "p50": 149.56800639629364,
+ "p90": 161.15200519561768,
+ "p95": 164.41600024700165,
+ "p99": 173.47200214862823
},
"isolatedSum": {
- "p50": 2567.039966583252,
- "p90": 2586.176037788391,
- "p95": 2599.5519161224365,
- "p99": 2651.423931121826
+ "p50": 178.56000363826752,
+ "p90": 193.37600469589233,
+ "p95": 199.0399956703186,
+ "p99": 216.8000042438507
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2487009280,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 19726336,
+ "combineLogicalBytes": 19726336,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 182,
+ "stragglerRank": 0,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 64,
+ "globalTokens": 512,
+ "dispatch": {
+ "p50": 97.05600142478943,
+ "p90": 107.26399719715118,
+ "p95": 111.35999858379364,
+ "p99": 125.88800489902496
+ },
+ "combine": {
+ "p50": 106.81600123643875,
+ "p90": 112.06399649381638,
+ "p95": 116.44800007343292,
+ "p99": 120.4800009727478
+ },
+ "roundtrip": {
+ "p50": 174.17599260807037,
+ "p90": 182.97599256038666,
+ "p95": 186.24000251293182,
+ "p99": 202.04800367355347
+ },
+ "isolatedSum": {
+ "p50": 203.87200266122818,
+ "p90": 219.32799369096756,
+ "p95": 227.80799865722656,
+ "p99": 246.36800587177277
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 38993920,
+ "combineLogicalBytes": 38993920,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 367,
+ "stragglerRank": 0,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
+ "dispatch": {
+ "p50": 110.52799969911575,
+ "p90": 118.65600198507309,
+ "p95": 122.079998254776,
+ "p99": 128.35200130939484
+ },
+ "combine": {
+ "p50": 124.60800260305405,
+ "p90": 132.32000172138214,
+ "p95": 134.65599715709686,
+ "p99": 141.95199310779572
+ },
+ "roundtrip": {
+ "p50": 208.8319957256317,
+ "p90": 217.3440009355545,
+ "p95": 220.22399306297302,
+ "p99": 224.89599883556366
+ },
+ "isolatedSum": {
+ "p50": 235.1360023021698,
+ "p90": 250.97600370645523,
+ "p95": 256.73599541187286,
+ "p99": 270.30399441719055
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 77672448,
+ "combineLogicalBytes": 77672448,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 723,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -26865,28 +26585,28 @@
]
},
{
- "id": "cx-179c0247",
- "identity": "b300|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "b300_33311fdc",
- "comparisonKey": "6deb8b087f7b728f",
+ "id": "cx-bd5b38a4",
+ "identity": "gb300|deepep|v1|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510",
+ "colorKey": "gb300_8d02a479",
+ "comparisonKey": "661dd1b497fcaeac",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:46:35.384079+00:00",
+ "generatedAt": "2026-06-29T13:51:03.850938+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_09",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
+ "phase": "decode",
+ "mode": "ll",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "cached-layout-comm-only-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "layout-and-dispatch-v1",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · bf16 [cl]",
+ "label": "GB300 EP8 · deepep · bf16 LL",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
@@ -26898,259 +26618,334 @@
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
- "configuredUnits": 20,
- "deviceUnits": 148,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
+ "achievedFraction": null,
+ "configuredUnits": null,
+ "deviceUnits": 152,
+ "resourceClass": "fixed-kernel",
+ "conformanceClass": "not-applicable",
+ "fixedKernel": true,
"paretoEligible": false
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "74444524b5db510",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285576352",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285576352",
- "createdAt": "2026-06-27T09:46:35.384079+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 1,
+ "globalTokens": 8,
"dispatch": {
- "p50": 87.26400136947632,
- "p90": 88.8959988951683,
- "p95": 91.20000153779984,
- "p99": 106.08000308275223
+ "p50": 61.76000088453293,
+ "p90": 65.98400324583054,
+ "p95": 67.32799857854843,
+ "p99": 73.27999919652939
},
"combine": {
- "p50": 115.55200070142746,
- "p90": 116.80000275373459,
- "p95": 117.60000139474869,
- "p99": 140.32000303268433
+ "p50": 60.095999389886856,
+ "p90": 63.77600133419037,
+ "p95": 68.70400160551071,
+ "p99": 72.86400347948074
},
"roundtrip": {
- "p50": 186.17600202560425,
- "p90": 192.76799261569977,
- "p95": 193.82399320602417,
- "p99": 217.75999665260315
+ "p50": 93.79199892282486,
+ "p90": 99.7759997844696,
+ "p95": 101.18400305509567,
+ "p99": 104.44799810647964
},
"isolatedSum": {
- "p50": 202.81600207090378,
- "p90": 205.6960016489029,
- "p95": 208.80000293254852,
- "p99": 246.40000611543655
+ "p50": 121.85600027441978,
+ "p90": 129.7600045800209,
+ "p95": 136.03200018405914,
+ "p99": 146.14400267601013
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 630784,
+ "combineLogicalBytes": 630784,
+ "fanoutMean": 5.5,
+ "recvTokensMax": 14,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 256,
- "globalTokens": 2048,
+ "tokensPerRank": 2,
+ "globalTokens": 16,
"dispatch": {
- "p50": 130.20800054073334,
- "p90": 132.35199451446533,
- "p95": 133.34399461746216,
- "p99": 138.7840062379837
+ "p50": 64.09599632024765,
+ "p90": 68.35199892520905,
+ "p95": 73.15199822187424,
+ "p99": 77.91999727487564
},
"combine": {
- "p50": 155.45600652694702,
- "p90": 164.51199352741241,
- "p95": 164.8319959640503,
- "p99": 176.83200538158417
+ "p50": 60.63999980688095,
+ "p90": 69.11999732255936,
+ "p95": 70.49600034952164,
+ "p99": 74.17599856853485
},
"roundtrip": {
- "p50": 266.4639949798584,
- "p90": 271.61601185798645,
- "p95": 274.59201216697693,
- "p99": 283.3600044250488
+ "p50": 98.43199700117111,
+ "p90": 102.30399668216705,
+ "p95": 103.32799702882767,
+ "p99": 106.62399977445602
},
"isolatedSum": {
- "p50": 285.66400706768036,
- "p90": 296.86398804187775,
- "p95": 298.17599058151245,
- "p99": 315.61601161956787
+ "p50": 124.7359961271286,
+ "p90": 137.4719962477684,
+ "p95": 143.64799857139587,
+ "p99": 152.0959958434105
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 155889664,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 1,
+ "dispatchLogicalBytes": 1232896,
+ "combineLogicalBytes": 1232896,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 21,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 4,
+ "globalTokens": 32,
"dispatch": {
- "p50": 186.81600689888,
- "p90": 192.83199310302734,
- "p95": 195.0400024652481,
- "p99": 202.7519941329956
+ "p50": 65.0240033864975,
+ "p90": 74.07999783754349,
+ "p95": 75.71200281381607,
+ "p99": 78.97599786520004
},
"combine": {
- "p50": 274.52799677848816,
- "p90": 275.64799785614014,
- "p95": 276.5760123729706,
- "p99": 286.624014377594
+ "p50": 62.17600032687187,
+ "p90": 71.52000069618225,
+ "p95": 72.35199958086014,
+ "p99": 75.03999769687653
},
"roundtrip": {
- "p50": 440.064013004303,
- "p90": 445.3119933605194,
- "p95": 451.61598920822144,
- "p99": 459.77601408958435
+ "p50": 101.79200023412704,
+ "p90": 106.175996363163,
+ "p95": 108.22399705648422,
+ "p99": 114.14399743080139
},
"isolatedSum": {
- "p50": 461.34400367736816,
- "p90": 468.4799909591675,
- "p95": 471.6160148382187,
- "p99": 489.3760085105896
+ "p50": 127.20000371336937,
+ "p90": 145.59999853372574,
+ "p95": 148.0640023946762,
+ "p99": 154.01599556207657
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 312266752,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 2480128,
+ "combineLogicalBytes": 2480128,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 39,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 316.25598669052124,
- "p90": 319.68000531196594,
- "p95": 321.1199939250946,
- "p99": 329.120010137558
+ "p50": 67.77600198984146,
+ "p90": 76.73600316047668,
+ "p95": 77.79199630022049,
+ "p99": 80.60800284147263
},
"combine": {
- "p50": 459.00800824165344,
- "p90": 461.0239863395691,
- "p95": 462.5920057296753,
- "p99": 473.66398572921753
+ "p50": 69.023996591568,
+ "p90": 72.92799651622772,
+ "p95": 74.01599735021591,
+ "p99": 80.44800162315369
},
"roundtrip": {
- "p50": 752.5119781494141,
- "p90": 760.9919905662537,
- "p95": 763.3919715881348,
- "p99": 770.4640030860901
+ "p50": 110.01600325107574,
+ "p90": 113.79200220108032,
+ "p95": 115.48800021409988,
+ "p99": 120.12799829244614
},
"isolatedSum": {
- "p50": 775.2639949321747,
- "p90": 780.703991651535,
- "p95": 783.7119996547699,
- "p99": 802.7839958667755
+ "p50": 136.79999858140945,
+ "p90": 149.6639996767044,
+ "p95": 151.8079936504364,
+ "p99": 161.0560044646263
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 623443968,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 0,
+ "dispatchLogicalBytes": 4974592,
+ "combineLogicalBytes": 4974592,
+ "fanoutMean": 5.421875,
+ "recvTokensMax": 74,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 16,
+ "globalTokens": 128,
"dispatch": {
- "p50": 558.5920214653015,
- "p90": 565.3759837150574,
- "p95": 566.9119954109192,
- "p99": 578.7839889526367
+ "p50": 76.64000242948532,
+ "p90": 79.8719972372055,
+ "p95": 81.4720019698143,
+ "p99": 88.67199718952179
},
"combine": {
- "p50": 819.0079927444458,
- "p90": 828.4800052642822,
- "p95": 830.9760093688965,
- "p99": 844.8960185050964
+ "p50": 74.5600014925003,
+ "p90": 83.26400071382523,
+ "p95": 84.54400300979614,
+ "p99": 87.90399879217148
},
"roundtrip": {
- "p50": 1360.640048980713,
- "p90": 1367.583990097046,
- "p95": 1372.320055961609,
- "p99": 1414.1119718551636
+ "p50": 127.3919939994812,
+ "p90": 132.47999548912048,
+ "p95": 136.25599443912506,
+ "p99": 144.19199526309967
},
"isolatedSum": {
- "p50": 1377.6000142097473,
- "p90": 1393.8559889793396,
- "p95": 1397.8880047798157,
- "p99": 1423.6800074577332
+ "p50": 151.20000392198563,
+ "p90": 163.13599795103073,
+ "p95": 166.01600497961044,
+ "p99": 176.57599598169327
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1243805696,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 9920512,
+ "combineLogicalBytes": 9920512,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 145,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4096,
- "globalTokens": 32768,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 1036.255955696106,
- "p90": 1045.151948928833,
- "p95": 1047.584056854248,
- "p99": 1086.5919589996338
+ "p50": 104.51199859380722,
+ "p90": 112.5119999051094,
+ "p95": 113.8560026884079,
+ "p99": 116.89600348472595
},
"combine": {
- "p50": 1528.480052947998,
- "p90": 1540.544033050537,
- "p95": 1543.2319641113281,
- "p99": 1555.2959442138672
+ "p50": 105.34399747848511,
+ "p90": 110.88000237941742,
+ "p95": 111.68000102043152,
+ "p99": 113.24799805879593
},
"roundtrip": {
- "p50": 2546.976089477539,
- "p90": 2557.1839809417725,
- "p95": 2563.4560585021973,
- "p99": 2601.2799739837646
+ "p50": 187.1040016412735,
+ "p90": 191.71200692653656,
+ "p95": 193.53599846363068,
+ "p99": 199.77599382400513
},
"isolatedSum": {
- "p50": 2564.736008644104,
- "p90": 2585.69598197937,
- "p95": 2590.816020965576,
- "p99": 2641.887903213501
+ "p50": 209.85599607229233,
+ "p90": 223.39200228452682,
+ "p95": 225.53600370883942,
+ "p99": 230.14400154352188
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2487009280,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 19726336,
+ "combineLogicalBytes": 19726336,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 287,
+ "stragglerRank": 3,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 64,
+ "globalTokens": 512,
+ "dispatch": {
+ "p50": 187.80800700187683,
+ "p90": 198.71999323368073,
+ "p95": 200.6399929523468,
+ "p99": 204.6079933643341
+ },
+ "combine": {
+ "p50": 193.15199553966522,
+ "p90": 315.5840039253235,
+ "p95": 317.50398874282837,
+ "p99": 320.3200101852417
+ },
+ "roundtrip": {
+ "p50": 344.2560136318207,
+ "p90": 355.0400137901306,
+ "p95": 360.28799414634705,
+ "p99": 370.2720105648041
+ },
+ "isolatedSum": {
+ "p50": 380.96000254154205,
+ "p90": 514.3039971590042,
+ "p95": 518.1439816951752,
+ "p99": 524.9280035495758
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 38993920,
+ "combineLogicalBytes": 38993920,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 564,
+ "stragglerRank": 3,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
+ "dispatch": {
+ "p50": 363.0400002002716,
+ "p90": 542.2400236129761,
+ "p95": 544.6400046348572,
+ "p99": 555.1360249519348
+ },
+ "combine": {
+ "p50": 535.3279709815979,
+ "p90": 541.5359735488892,
+ "p95": 543.008029460907,
+ "p99": 549.7919917106628
+ },
+ "roundtrip": {
+ "p50": 585.5039954185486,
+ "p90": 642.9759860038757,
+ "p95": 653.760015964508,
+ "p99": 675.7760047912598
+ },
+ "isolatedSum": {
+ "p50": 898.3679711818695,
+ "p90": 1083.7759971618652,
+ "p95": 1087.6480340957642,
+ "p99": 1104.9280166625977
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 77672448,
+ "combineLogicalBytes": 77672448,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 1104,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -27158,321 +26953,396 @@
]
},
{
- "id": "cx-d90a63c5",
- "identity": "b300|deepep|4096|8|128|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569",
- "colorKey": "b300_d6fd14c3",
- "comparisonKey": "0a8b502bd3614965",
+ "id": "cx-7b4b7034",
+ "identity": "gb300|deepep|v1|7168|8|256|bf16|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510",
+ "colorKey": "gb300_99f44a59",
+ "comparisonKey": "b22da9163d34e85f",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T11:14:26.432170+00:00",
+ "generatedAt": "2026-06-29T13:52:16.884650+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_01",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
+ "phase": "decode",
+ "mode": "ll",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "runtime-visible-v1",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · fp8",
- "model": "Qwen3.5",
+ "label": "GB300 EP8 · deepep · bf16 LL",
+ "model": "DeepSeek-V3/V4",
"shape": {
- "hidden": 4096,
+ "hidden": 7168,
"topk": 8,
- "experts": 128,
+ "experts": 256,
"routing": "uniform",
"routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "fp8",
+ "dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
- "configuredUnits": 20,
- "deviceUnits": 148,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
+ "achievedFraction": null,
+ "configuredUnits": null,
+ "deviceUnits": 152,
+ "resourceClass": "fixed-kernel",
+ "conformanceClass": "not-applicable",
+ "fixedKernel": true,
"paretoEligible": false
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "dc27c5e0894e569",
- "workloadId": "set:6:76d8142d69406335",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "74444524b5db510",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28287509502",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287509502",
- "createdAt": "2026-06-27T11:14:26.432170+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 1,
+ "globalTokens": 8,
"dispatch": {
- "p50": 80.60800284147263,
- "p90": 83.45600217580795,
- "p95": 84.6719965338707,
- "p99": 93.56799721717834
+ "p50": 61.344001442193985,
+ "p90": 64.64000046253204,
+ "p95": 66.11199676990509,
+ "p99": 73.18399846553802
},
"combine": {
- "p50": 89.82399851083755,
- "p90": 92.12800115346909,
- "p95": 93.40800344944,
- "p99": 99.84000027179718
+ "p50": 59.61599946022034,
+ "p90": 67.16799736022949,
+ "p95": 69.31199878454208,
+ "p99": 71.58400118350983
},
"roundtrip": {
- "p50": 183.4239959716797,
- "p90": 186.46399676799774,
- "p95": 187.68000602722168,
- "p99": 198.17599654197693
+ "p50": 92.44800359010696,
+ "p90": 98.7199991941452,
+ "p95": 100.09600222110748,
+ "p99": 104.35199737548828
},
"isolatedSum": {
- "p50": 170.43200135231018,
- "p90": 175.58400332927704,
- "p95": 178.0799999833107,
- "p99": 193.40799748897552
+ "p50": 120.96000090241432,
+ "p90": 131.80799782276154,
+ "p95": 135.42399555444717,
+ "p99": 144.76799964904785
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 22282240,
- "combineLogicalBytes": 44564480,
- "fanoutMean": 5.3125,
- "recvTokensMax": 699,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 630784,
+ "combineLogicalBytes": 630784,
+ "fanoutMean": 5.5,
+ "recvTokensMax": 14,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 256,
- "globalTokens": 2048,
+ "tokensPerRank": 2,
+ "globalTokens": 16,
"dispatch": {
- "p50": 108.15999656915665,
- "p90": 112.19199746847153,
- "p95": 116.60800129175186,
- "p99": 134.62400436401367
+ "p50": 63.64800035953522,
+ "p90": 67.32799857854843,
+ "p95": 71.87200337648392,
+ "p99": 77.08799839019775
},
"combine": {
- "p50": 124.92799758911133,
- "p90": 128.4479945898056,
- "p95": 129.98400628566742,
- "p99": 134.91199910640717
+ "p50": 60.06399914622307,
+ "p90": 69.023996591568,
+ "p95": 70.04799693822861,
+ "p99": 72.06399738788605
},
"roundtrip": {
- "p50": 259.99999046325684,
- "p90": 264.3519937992096,
- "p95": 266.07999205589294,
- "p99": 281.6320061683655
+ "p50": 97.50399738550186,
+ "p90": 102.01600193977356,
+ "p95": 104.16000336408615,
+ "p99": 111.00800335407257
},
"isolatedSum": {
- "p50": 233.08799415826797,
- "p90": 240.63999205827713,
- "p95": 246.59200757741928,
- "p99": 269.53600347042084
+ "p50": 123.71199950575829,
+ "p90": 136.35199517011642,
+ "p95": 141.92000031471252,
+ "p99": 149.1519957780838
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 44863488,
- "combineLogicalBytes": 89726976,
- "fanoutMean": 5.34814453125,
- "recvTokensMax": 1385,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1232896,
+ "combineLogicalBytes": 1232896,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 21,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 4,
+ "globalTokens": 32,
"dispatch": {
- "p50": 152.79999375343323,
- "p90": 157.02399611473083,
- "p95": 159.2639982700348,
- "p99": 168.16000640392303
+ "p50": 64.25599753856659,
+ "p90": 71.07199728488922,
+ "p95": 74.52800124883652,
+ "p99": 77.40800082683563
},
"combine": {
- "p50": 189.60000574588776,
- "p90": 192.7040070295334,
- "p95": 194.46399807929993,
- "p99": 207.71199464797974
+ "p50": 61.47199869155884,
+ "p90": 70.88000327348709,
+ "p95": 71.84000313282013,
+ "p99": 74.11199808120728
},
"roundtrip": {
- "p50": 395.04000544548035,
- "p90": 400.2879858016968,
- "p95": 402.3360013961792,
- "p99": 415.6799912452698
+ "p50": 101.02400183677673,
+ "p90": 105.21599650382996,
+ "p95": 107.00800269842148,
+ "p99": 113.0559965968132
},
"isolatedSum": {
- "p50": 342.399999499321,
- "p90": 349.7280031442642,
- "p95": 353.7279963493347,
- "p99": 375.87200105190277
+ "p50": 125.72799623012543,
+ "p90": 141.9520005583763,
+ "p95": 146.36800438165665,
+ "p99": 151.5199989080429
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 89751552,
- "combineLogicalBytes": 179503104,
- "fanoutMean": 5.349609375,
- "recvTokensMax": 2772,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 2480128,
+ "combineLogicalBytes": 2480128,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 39,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 245.92000246047974,
- "p90": 251.3599991798401,
- "p95": 252.99200415611267,
- "p99": 264.0640139579773
+ "p50": 66.43199920654297,
+ "p90": 75.68000257015228,
+ "p95": 77.40800082683563,
+ "p99": 87.00799942016602
},
"combine": {
- "p50": 390.75198769569397,
- "p90": 398.9120125770569,
- "p95": 401.66398882865906,
- "p99": 409.0240001678467
+ "p50": 68.89600306749344,
+ "p90": 73.21599870920181,
+ "p95": 74.14399832487106,
+ "p99": 76.06399804353714
},
"roundtrip": {
- "p50": 774.0160226821899,
- "p90": 781.9200158119202,
- "p95": 786.4320278167725,
- "p99": 796.3839769363403
+ "p50": 110.3999987244606,
+ "p90": 114.23999816179276,
+ "p95": 116.28799885511398,
+ "p99": 123.96799772977829
},
"isolatedSum": {
- "p50": 636.6719901561737,
- "p90": 650.272011756897,
- "p95": 654.6559929847717,
- "p99": 673.088014125824
+ "p50": 135.3280022740364,
+ "p90": 148.8960012793541,
+ "p95": 151.5519991517067,
+ "p99": 163.07199746370316
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 179511296,
- "combineLogicalBytes": 359022592,
- "fanoutMean": 5.349853515625,
- "recvTokensMax": 5558,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 4974592,
+ "combineLogicalBytes": 4974592,
+ "fanoutMean": 5.421875,
+ "recvTokensMax": 74,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 16,
+ "globalTokens": 128,
"dispatch": {
- "p50": 438.1119906902313,
- "p90": 448.09600710868835,
- "p95": 452.4799883365631,
- "p99": 461.63201332092285
+ "p50": 75.80800354480743,
+ "p90": 78.17599922418594,
+ "p95": 79.1039988398552,
+ "p99": 86.04799956083298
},
"combine": {
- "p50": 750.6240010261536,
- "p90": 756.4160227775574,
- "p95": 758.2399845123291,
- "p99": 767.0400142669678
+ "p50": 72.64000177383423,
+ "p90": 81.53600245714188,
+ "p95": 83.10399949550629,
+ "p99": 84.6719965338707
},
"roundtrip": {
- "p50": 1456.3839435577393,
- "p90": 1466.4959907531738,
- "p95": 1470.3359603881836,
- "p99": 1482.3039770126343
+ "p50": 128.7039965391159,
+ "p90": 136.1279934644699,
+ "p95": 137.69599795341492,
+ "p99": 141.82400703430176
},
"isolatedSum": {
- "p50": 1188.735991716385,
- "p90": 1204.5120298862457,
- "p95": 1210.7199728488922,
- "p99": 1228.6720275878906
+ "p50": 148.44800531864166,
+ "p90": 159.71200168132782,
+ "p95": 162.20799833536148,
+ "p99": 170.71999609470367
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 358055936,
- "combineLogicalBytes": 716111872,
- "fanoutMean": 5.33544921875,
- "recvTokensMax": 10982,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 9920512,
+ "combineLogicalBytes": 9920512,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 145,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4096,
- "globalTokens": 32768,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 854.6559810638428,
- "p90": 867.7120208740234,
- "p95": 873.2159733772278,
- "p99": 887.8080248832703
+ "p50": 103.58399897813797,
+ "p90": 112.8000020980835,
+ "p95": 114.30399864912033,
+ "p99": 124.41600114107132
},
"combine": {
- "p50": 1436.5119934082031,
- "p90": 1444.5120096206665,
- "p95": 1448.3519792556763,
- "p99": 1471.9359874725342
+ "p50": 101.43999755382538,
+ "p90": 109.11999642848969,
+ "p95": 110.3999987244606,
+ "p99": 120.06399780511856
},
"roundtrip": {
- "p50": 2809.664011001587,
- "p90": 2821.1519718170166,
- "p95": 2827.1679878234863,
- "p99": 2873.1839656829834
+ "p50": 186.81600689888,
+ "p90": 192.83199310302734,
+ "p95": 196.76800072193146,
+ "p99": 202.78400182724
},
"isolatedSum": {
- "p50": 2291.167974472046,
- "p90": 2312.22403049469,
- "p95": 2321.567952632904,
- "p99": 2359.7440123558044
+ "p50": 205.02399653196335,
+ "p90": 221.91999852657318,
+ "p95": 224.70399737358093,
+ "p99": 244.47999894618988
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 716197888,
- "combineLogicalBytes": 1432395776,
- "fanoutMean": 5.336090087890625,
- "recvTokensMax": 21939,
- "stragglerRank": 7,
- "correct": true,
+ "dispatchLogicalBytes": 19726336,
+ "combineLogicalBytes": 19726336,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 287,
+ "stragglerRank": 3,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 64,
+ "globalTokens": 512,
+ "dispatch": {
+ "p50": 186.49600446224213,
+ "p90": 195.96800208091736,
+ "p95": 199.3280053138733,
+ "p99": 204.92799580097198
+ },
+ "combine": {
+ "p50": 183.58400464057922,
+ "p90": 196.83200120925903,
+ "p95": 200.03199577331543,
+ "p99": 208.064004778862
+ },
+ "roundtrip": {
+ "p50": 342.72000193595886,
+ "p90": 358.17599296569824,
+ "p95": 361.63198947906494,
+ "p99": 370.33599615097046
+ },
+ "isolatedSum": {
+ "p50": 370.08000910282135,
+ "p90": 392.8000032901764,
+ "p95": 399.3600010871887,
+ "p99": 412.992000579834
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 38993920,
+ "combineLogicalBytes": 38993920,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 564,
+ "stragglerRank": 2,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
+ "dispatch": {
+ "p50": 337.2479975223541,
+ "p90": 365.34398794174194,
+ "p95": 374.752014875412,
+ "p99": 394.81601119041443
+ },
+ "combine": {
+ "p50": 529.2800068855286,
+ "p90": 540.6079888343811,
+ "p95": 541.9840216636658,
+ "p99": 545.2160239219666
+ },
+ "roundtrip": {
+ "p50": 585.9519839286804,
+ "p90": 642.3360109329224,
+ "p95": 655.9680104255676,
+ "p99": 683.1039786338806
+ },
+ "isolatedSum": {
+ "p50": 866.5280044078827,
+ "p90": 905.951976776123,
+ "p95": 916.7360365390778,
+ "p99": 940.032035112381
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 77672448,
+ "combineLogicalBytes": 77672448,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 1104,
+ "stragglerRank": 4,
+ "correct": true,
"samplesPooled": 600,
"trials": 3
}
]
},
{
- "id": "cx-acd7c4ed",
- "identity": "b300|deepep|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569",
- "colorKey": "b300_c4c63f07",
- "comparisonKey": "31714ccd7ce96f8f",
+ "id": "cx-1a41c2ea",
+ "identity": "gb300|deepep|v1|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ebe68878aa18bb0",
+ "colorKey": "gb300_b1bd5887",
+ "comparisonKey": "100b396b86e03573",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:52:16.802838+00:00",
+ "generatedAt": "2026-06-29T14:01:21.648224+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_07",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
+ "phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "runtime-visible-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · fp8",
+ "label": "GB300 EP8 · deepep · fp8",
"model": "Qwen3.5",
"shape": {
"hidden": 4096,
@@ -27484,14 +27354,15 @@
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "fp8",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -27499,244 +27370,318 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "dc27c5e0894e569",
- "workloadId": "set:6:76d8142d69406335",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "ebe68878aa18bb0",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285696261",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285696261",
- "createdAt": "2026-06-27T09:52:16.802838+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 1,
+ "globalTokens": 8,
"dispatch": {
- "p50": 1875.615954399109,
- "p90": 2675.9040355682373,
- "p95": 2804.8319816589355,
- "p99": 3161.2160205841064
+ "p50": 408.8959991931915,
+ "p90": 433.1839978694916,
+ "p95": 441.18401408195496,
+ "p99": 458.624005317688
},
"combine": {
- "p50": 1791.424036026001,
- "p90": 2183.648109436035,
- "p95": 2710.495948791504,
- "p99": 2984.6720695495605
+ "p50": 65.72800129652023,
+ "p90": 71.52000069618225,
+ "p95": 74.17599856853485,
+ "p99": 82.91199803352356
},
"roundtrip": {
- "p50": 1945.6959962844849,
- "p90": 2103.775978088379,
- "p95": 2727.839946746826,
- "p99": 3128.959894180298
+ "p50": 453.66400480270386,
+ "p90": 478.36801409721375,
+ "p95": 484.8639965057373,
+ "p99": 517.6960229873657
},
"isolatedSum": {
- "p50": 3667.03999042511,
- "p90": 4859.5521450042725,
- "p95": 5515.327930450439,
- "p99": 6145.888090133667
+ "p50": 474.62400048971176,
+ "p90": 504.7039985656738,
+ "p95": 515.3600126504898,
+ "p99": 541.5360033512115
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 22282240,
- "combineLogicalBytes": 44564480,
- "fanoutMean": 5.3125,
- "recvTokensMax": 699,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 172032,
+ "combineLogicalBytes": 344064,
+ "fanoutMean": 5.25,
+ "recvTokensMax": 6,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 256,
- "globalTokens": 2048,
+ "tokensPerRank": 2,
+ "globalTokens": 16,
"dispatch": {
- "p50": 1994.4000244140625,
- "p90": 2822.2079277038574,
- "p95": 3089.344024658203,
- "p99": 4134.687900543213
+ "p50": 402.5599956512451,
+ "p90": 429.6639859676361,
+ "p95": 439.9360120296478,
+ "p99": 457.63200521469116
},
"combine": {
- "p50": 1834.3039751052856,
- "p90": 2468.640089035034,
- "p95": 2714.9438858032227,
- "p99": 3004.672050476074
+ "p50": 67.61600077152252,
+ "p90": 72.80000299215317,
+ "p95": 76.38400048017502,
+ "p99": 82.62400329113007
},
"roundtrip": {
- "p50": 2093.0240154266357,
- "p90": 2329.024076461792,
- "p95": 2922.7840900421143,
- "p99": 3284.0960025787354
+ "p50": 440.12799859046936,
+ "p90": 471.2640047073364,
+ "p95": 478.84801030158997,
+ "p99": 491.456001996994
},
"isolatedSum": {
- "p50": 3828.703999519348,
- "p90": 5290.848016738892,
- "p95": 5804.287910461426,
- "p99": 7139.359951019287
+ "p50": 470.17599642276764,
+ "p90": 502.4639889597893,
+ "p95": 516.3200125098228,
+ "p99": 540.2560085058212
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 44863488,
- "combineLogicalBytes": 89726976,
- "fanoutMean": 5.34814453125,
- "recvTokensMax": 1385,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 352256,
+ "combineLogicalBytes": 704512,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 12,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 4,
+ "globalTokens": 32,
"dispatch": {
- "p50": 2082.495927810669,
- "p90": 2575.5200386047363,
- "p95": 3061.8879795074463,
- "p99": 3882.4000358581543
+ "p50": 399.7119963169098,
+ "p90": 434.84801054000854,
+ "p95": 495.2000081539154,
+ "p99": 603.8399934768677
},
"combine": {
- "p50": 1895.7120180130005,
- "p90": 2081.5999507904053,
- "p95": 2722.0799922943115,
- "p99": 3054.0480613708496
+ "p50": 68.09599697589874,
+ "p90": 74.94399696588516,
+ "p95": 79.83999699354172,
+ "p99": 108.70400071144104
},
"roundtrip": {
- "p50": 2248.447895050049,
- "p90": 2507.391929626465,
- "p95": 3178.4000396728516,
- "p99": 3517.632007598877
+ "p50": 439.9679899215698,
+ "p90": 469.08798813819885,
+ "p95": 509.8239779472351,
+ "p99": 644.5440053939819
},
"isolatedSum": {
- "p50": 3978.2079458236694,
- "p90": 4657.119989395142,
- "p95": 5783.967971801758,
- "p99": 6936.448097229004
+ "p50": 467.80799329280853,
+ "p90": 509.7920075058937,
+ "p95": 575.0400051474571,
+ "p99": 712.5439941883087
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 89751552,
- "combineLogicalBytes": 179503104,
- "fanoutMean": 5.349609375,
- "recvTokensMax": 2772,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 692224,
+ "combineLogicalBytes": 1384448,
+ "fanoutMean": 5.28125,
+ "recvTokensMax": 26,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 2266.335964202881,
- "p90": 2846.816062927246,
- "p95": 3325.5679607391357,
- "p99": 3900.8638858795166
+ "p50": 410.6239974498749,
+ "p90": 436.8000030517578,
+ "p95": 446.4319944381714,
+ "p99": 454.46398854255676
},
"combine": {
- "p50": 2154.8800468444824,
- "p90": 2735.584020614624,
- "p95": 3072.096109390259,
- "p99": 3418.11203956604
+ "p50": 70.52800059318542,
+ "p90": 76.19199901819229,
+ "p95": 78.68800312280655,
+ "p99": 89.28000181913376
},
"roundtrip": {
- "p50": 2644.864082336426,
- "p90": 3269.08802986145,
- "p95": 3706.2718868255615,
- "p99": 6074.7199058532715
+ "p50": 451.29600167274475,
+ "p90": 476.25601291656494,
+ "p95": 482.40000009536743,
+ "p99": 499.7119903564453
},
"isolatedSum": {
- "p50": 4421.216011047363,
- "p90": 5582.40008354187,
- "p95": 6397.6640701293945,
- "p99": 7318.975925445557
+ "p50": 481.1519980430603,
+ "p90": 512.9920020699501,
+ "p95": 525.1199975609779,
+ "p99": 543.7439903616905
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 179511296,
- "combineLogicalBytes": 359022592,
- "fanoutMean": 5.349853515625,
- "recvTokensMax": 5558,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 1372160,
+ "combineLogicalBytes": 2744320,
+ "fanoutMean": 5.234375,
+ "recvTokensMax": 49,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 16,
+ "globalTokens": 128,
"dispatch": {
- "p50": 2653.8240909576416,
- "p90": 2906.4319133758545,
- "p95": 3221.951961517334,
- "p99": 3759.3278884887695
+ "p50": 407.99999237060547,
+ "p90": 434.112012386322,
+ "p95": 444.38400864601135,
+ "p99": 462.72000670433044
},
"combine": {
- "p50": 2523.5838890075684,
- "p90": 2799.743890762329,
- "p95": 3378.0479431152344,
- "p99": 3780.8001041412354
+ "p50": 72.35199958086014,
+ "p90": 77.34400033950806,
+ "p95": 79.93599772453308,
+ "p99": 86.84799820184708
},
"roundtrip": {
- "p50": 3350.048065185547,
- "p90": 3693.056106567383,
- "p95": 4236.576080322266,
- "p99": 4646.240234375
+ "p50": 450.46401023864746,
+ "p90": 475.13601183891296,
+ "p95": 482.7519953250885,
+ "p99": 500.19198656082153
},
"isolatedSum": {
- "p50": 5177.40797996521,
- "p90": 5706.175804138184,
- "p95": 6599.999904632568,
- "p99": 7540.127992630005
+ "p50": 480.3519919514656,
+ "p90": 511.4560127258301,
+ "p95": 524.3200063705444,
+ "p99": 549.5680049061775
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 358055936,
- "combineLogicalBytes": 716111872,
- "fanoutMean": 5.33544921875,
- "recvTokensMax": 10982,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 2732032,
+ "combineLogicalBytes": 5464064,
+ "fanoutMean": 5.2109375,
+ "recvTokensMax": 94,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4096,
- "globalTokens": 32768,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 3389.280080795288,
- "p90": 3919.840097427368,
- "p95": 4479.1998863220215,
- "p99": 6919.424057006836
+ "p50": 401.6000032424927,
+ "p90": 423.8719940185547,
+ "p95": 436.352014541626,
+ "p99": 457.92001485824585
},
"combine": {
- "p50": 3219.4879055023193,
- "p90": 3500.704050064087,
- "p95": 4088.6402130126953,
- "p99": 4587.488174438477
+ "p50": 73.98399710655212,
+ "p90": 78.87999713420868,
+ "p95": 81.15199953317642,
+ "p99": 87.87199854850769
},
"roundtrip": {
- "p50": 4788.127899169922,
- "p90": 4992.767810821533,
- "p95": 5423.679828643799,
- "p99": 6249.695777893066
+ "p50": 444.8640048503876,
+ "p90": 467.7119851112366,
+ "p95": 476.73600912094116,
+ "p99": 497.47198820114136
},
"isolatedSum": {
- "p50": 6608.767986297607,
- "p90": 7420.544147491455,
- "p95": 8567.840099334717,
- "p99": 11506.912231445312
+ "p50": 475.5840003490448,
+ "p90": 502.75199115276337,
+ "p95": 517.5040140748024,
+ "p99": 545.7920134067535
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 716197888,
- "combineLogicalBytes": 1432395776,
- "fanoutMean": 5.336090087890625,
- "recvTokensMax": 21939,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 5562368,
+ "combineLogicalBytes": 11124736,
+ "fanoutMean": 5.3046875,
+ "recvTokensMax": 186,
+ "stragglerRank": 2,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 64,
+ "globalTokens": 512,
+ "dispatch": {
+ "p50": 404.5119881629944,
+ "p90": 432.96000361442566,
+ "p95": 440.41600823402405,
+ "p99": 459.77601408958435
+ },
+ "combine": {
+ "p50": 85.88799834251404,
+ "p90": 92.86399930715561,
+ "p95": 97.08800166845322,
+ "p99": 130.94399869441986
+ },
+ "roundtrip": {
+ "p50": 451.61598920822144,
+ "p90": 477.88798809051514,
+ "p95": 501.0560154914856,
+ "p99": 640.2559876441956
+ },
+ "isolatedSum": {
+ "p50": 490.3999865055084,
+ "p90": 525.8240029215813,
+ "p95": 537.5040099024773,
+ "p99": 590.7200127840042
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 11096064,
+ "combineLogicalBytes": 22192128,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 358,
+ "stragglerRank": 2,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
+ "dispatch": {
+ "p50": 403.29599380493164,
+ "p90": 433.53599309921265,
+ "p95": 451.9039988517761,
+ "p99": 577.0879983901978
+ },
+ "combine": {
+ "p50": 101.9200012087822,
+ "p90": 108.83200168609619,
+ "p95": 112.64000087976456,
+ "p99": 144.44799721240997
+ },
+ "roundtrip": {
+ "p50": 468.83198618888855,
+ "p90": 489.8880124092102,
+ "p95": 501.4079809188843,
+ "p99": 646.4959979057312
+ },
+ "isolatedSum": {
+ "p50": 505.21599501371384,
+ "p90": 542.3679947853088,
+ "p95": 564.5439997315407,
+ "p99": 721.5359956026077
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 22282240,
+ "combineLogicalBytes": 44564480,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 699,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -27744,28 +27689,28 @@
]
},
{
- "id": "cx-a725beb5",
- "identity": "b300|deepep|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42",
- "colorKey": "b300_c4c63f07",
- "comparisonKey": "9a5b239287748a0a",
+ "id": "cx-214b01d3",
+ "identity": "gb300|deepep|v1|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||13e2b193b87a112",
+ "colorKey": "gb300_b1bd5887",
+ "comparisonKey": "6b1e52df2e686455",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:52:25.584381+00:00",
+ "generatedAt": "2026-06-29T14:05:39.896445+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_12",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
+ "phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "runtime-visible-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · fp8",
+ "label": "GB300 EP8 · deepep · fp8",
"model": "shape 5120/8/160",
"shape": {
"hidden": 5120,
@@ -27777,14 +27722,15 @@
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "fp8",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -27792,244 +27738,318 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "0c022a63bbcbf42",
- "workloadId": "set:6:28c0c09b13ff0acf",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "13e2b193b87a112",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285707789",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285707789",
- "createdAt": "2026-06-27T09:52:25.584381+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 1,
+ "globalTokens": 8,
"dispatch": {
- "p50": 155.20000457763672,
- "p90": 159.2320054769516,
- "p95": 161.9199961423874,
- "p99": 180.83199858665466
+ "p50": 432.5439929962158,
+ "p90": 458.3680033683777,
+ "p95": 472.896009683609,
+ "p99": 526.4000296592712
},
"combine": {
- "p50": 95.74399888515472,
- "p90": 98.14400225877762,
- "p95": 99.10400211811066,
- "p99": 110.36799848079681
+ "p50": 69.56800073385239,
+ "p90": 78.68800312280655,
+ "p95": 82.20800012350082,
+ "p99": 96.92800045013428
},
"roundtrip": {
- "p50": 242.5920069217682,
- "p90": 246.20799720287323,
- "p95": 248.28800559043884,
- "p99": 264.0959918498993
+ "p50": 474.2079973220825,
+ "p90": 498.6560046672821,
+ "p95": 505.3759813308716,
+ "p99": 537.2160077095032
},
"isolatedSum": {
- "p50": 250.94400346279144,
- "p90": 257.3760077357292,
- "p95": 261.02399826049805,
- "p99": 291.1999970674515
+ "p50": 502.1119937300682,
+ "p90": 537.0560064911842,
+ "p95": 555.1040098071098,
+ "p99": 623.3280301094055
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 27837440,
- "combineLogicalBytes": 55674880,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 699,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 215040,
+ "combineLogicalBytes": 430080,
+ "fanoutMean": 5.25,
+ "recvTokensMax": 8,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 256,
- "globalTokens": 2048,
+ "tokensPerRank": 2,
+ "globalTokens": 16,
"dispatch": {
- "p50": 201.1519968509674,
- "p90": 206.2080055475235,
- "p95": 208.00000429153442,
- "p99": 217.69599616527557
+ "p50": 433.9520037174225,
+ "p90": 462.4960124492645,
+ "p95": 474.3039906024933,
+ "p99": 519.7759866714478
},
"combine": {
- "p50": 131.84000551700592,
- "p90": 134.65599715709686,
- "p95": 135.77599823474884,
- "p99": 142.46399700641632
+ "p50": 70.27199864387512,
+ "p90": 77.08799839019775,
+ "p95": 81.82399719953537,
+ "p99": 91.90399944782257
},
"roundtrip": {
- "p50": 328.3199965953827,
- "p90": 334.56000685691833,
- "p95": 336.8000090122223,
- "p99": 351.77600383758545
+ "p50": 475.6479859352112,
+ "p90": 499.61599707603455,
+ "p95": 515.7120227813721,
+ "p99": 560.7360005378723
},
"isolatedSum": {
- "p50": 332.9920023679733,
- "p90": 340.86400270462036,
- "p95": 343.77600252628326,
- "p99": 360.1599931716919
+ "p50": 504.2240023612976,
+ "p90": 539.5840108394623,
+ "p95": 556.1279878020287,
+ "p99": 611.6799861192703
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 55552000,
- "combineLogicalBytes": 111104000,
- "fanoutMean": 5.2978515625,
- "recvTokensMax": 1387,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 440320,
+ "combineLogicalBytes": 880640,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 13,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 4,
+ "globalTokens": 32,
"dispatch": {
- "p50": 302.94400453567505,
- "p90": 307.2640001773834,
- "p95": 309.82398986816406,
- "p99": 326.07999444007874
+ "p50": 431.3279986381531,
+ "p90": 458.49600434303284,
+ "p95": 470.14400362968445,
+ "p99": 529.9199819564819
},
"combine": {
- "p50": 206.4639925956726,
- "p90": 211.71200275421143,
- "p95": 213.24799954891205,
- "p99": 225.8879989385605
+ "p50": 72.41600006818771,
+ "p90": 79.42400127649307,
+ "p95": 84.1279998421669,
+ "p99": 115.167997777462
},
"roundtrip": {
- "p50": 523.4879851341248,
- "p90": 529.8240184783936,
- "p95": 533.3120226860046,
- "p99": 555.6480288505554
+ "p50": 474.43199157714844,
+ "p90": 499.07198548316956,
+ "p95": 509.7600221633911,
+ "p99": 560.2239966392517
},
"isolatedSum": {
- "p50": 509.40799713134766,
- "p90": 518.9760029315948,
- "p95": 523.0719894170761,
- "p99": 551.9679933786392
+ "p50": 503.7439987063408,
+ "p90": 537.9200056195259,
+ "p95": 554.2720034718513,
+ "p99": 645.0879797339439
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 111549440,
- "combineLogicalBytes": 223098880,
- "fanoutMean": 5.319091796875,
- "recvTokensMax": 2762,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 870400,
+ "combineLogicalBytes": 1740800,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 25,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 526.5600085258484,
- "p90": 532.480001449585,
- "p95": 534.8799824714661,
- "p99": 544.8639988899231
+ "p50": 434.9760115146637,
+ "p90": 463.55199813842773,
+ "p95": 471.1039960384369,
+ "p99": 509.0559720993042
},
"combine": {
- "p50": 429.8880100250244,
- "p90": 435.232013463974,
- "p95": 437.855988740921,
- "p99": 454.0480077266693
+ "p50": 74.43200051784515,
+ "p90": 82.59200304746628,
+ "p95": 86.56000345945358,
+ "p99": 113.24799805879593
},
"roundtrip": {
- "p50": 936.2559914588928,
- "p90": 944.0320134162903,
- "p95": 946.6879963874817,
- "p99": 960.096001625061
+ "p50": 475.0080108642578,
+ "p90": 496.41600251197815,
+ "p95": 506.20800256729126,
+ "p99": 553.056001663208
},
"isolatedSum": {
- "p50": 956.4480185508728,
- "p90": 967.712014913559,
- "p95": 972.7359712123871,
- "p99": 998.9120066165924
+ "p50": 509.40801203250885,
+ "p90": 546.144001185894,
+ "p95": 557.6639994978905,
+ "p99": 622.3039701581001
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 223365120,
- "combineLogicalBytes": 446730240,
- "fanoutMean": 5.325439453125,
- "recvTokensMax": 5518,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 1735680,
+ "combineLogicalBytes": 3471360,
+ "fanoutMean": 5.296875,
+ "recvTokensMax": 50,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 16,
+ "globalTokens": 128,
"dispatch": {
- "p50": 967.9359793663025,
- "p90": 977.728009223938,
- "p95": 980.7999730110168,
- "p99": 989.5679950714111
+ "p50": 432.2560131549835,
+ "p90": 459.7119987010956,
+ "p95": 472.76800870895386,
+ "p99": 531.4239859580994
},
"combine": {
- "p50": 777.8559923171997,
- "p90": 783.9679718017578,
- "p95": 787.1999740600586,
- "p99": 800.000011920929
+ "p50": 75.6160020828247,
+ "p90": 82.43200182914734,
+ "p95": 85.37600189447403,
+ "p99": 93.85599941015244
},
"roundtrip": {
- "p50": 1729.024052619934,
- "p90": 1740.5760288238525,
- "p95": 1744.0320253372192,
- "p99": 1758.9759826660156
+ "p50": 472.9920029640198,
+ "p90": 491.9680058956146,
+ "p95": 498.9440143108368,
+ "p99": 548.192024230957
},
"isolatedSum": {
- "p50": 1745.7919716835022,
- "p90": 1761.6959810256958,
- "p95": 1767.9999470710754,
- "p99": 1789.56800699234
+ "p50": 507.8720152378082,
+ "p90": 542.1440005302429,
+ "p95": 558.1440106034279,
+ "p99": 625.2799853682518
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 446817280,
- "combineLogicalBytes": 893634560,
- "fanoutMean": 5.32647705078125,
- "recvTokensMax": 11032,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 3456000,
+ "combineLogicalBytes": 6912000,
+ "fanoutMean": 5.2734375,
+ "recvTokensMax": 93,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4096,
- "globalTokens": 32768,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 1878.9119720458984,
- "p90": 1891.3919925689697,
- "p95": 1897.055983543396,
- "p99": 1933.2799911499023
+ "p50": 430.88001012802124,
+ "p90": 466.0159945487976,
+ "p95": 493.151992559433,
+ "p99": 558.9439868927002
},
"combine": {
- "p50": 1474.8159646987915,
- "p90": 1484.8320484161377,
- "p95": 1491.3280010223389,
- "p99": 1509.2159509658813
+ "p50": 79.1039988398552,
+ "p90": 86.84799820184708,
+ "p95": 92.51199662685394,
+ "p99": 112.70400136709213
},
"roundtrip": {
- "p50": 3333.631992340088,
- "p90": 3347.424030303955,
- "p95": 3355.1039695739746,
- "p99": 3383.3279609680176
+ "p50": 476.063996553421,
+ "p90": 496.3200092315674,
+ "p95": 506.335973739624,
+ "p99": 553.8560152053833
},
"isolatedSum": {
- "p50": 3353.72793674469,
- "p90": 3376.2240409851074,
- "p95": 3388.383984565735,
- "p99": 3442.4959421157837
+ "p50": 509.98400896787643,
+ "p90": 552.8639927506447,
+ "p95": 585.6639891862869,
+ "p99": 671.6479882597923
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 893132800,
- "combineLogicalBytes": 1786265600,
- "fanoutMean": 5.323486328125,
- "recvTokensMax": 21895,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 6988800,
+ "combineLogicalBytes": 13977600,
+ "fanoutMean": 5.33203125,
+ "recvTokensMax": 179,
+ "stragglerRank": 3,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 64,
+ "globalTokens": 512,
+ "dispatch": {
+ "p50": 430.88001012802124,
+ "p90": 456.1919867992401,
+ "p95": 464.54399824142456,
+ "p99": 488.6400103569031
+ },
+ "combine": {
+ "p50": 93.98400038480759,
+ "p90": 101.27999633550644,
+ "p95": 104.67199981212616,
+ "p99": 110.944002866745
+ },
+ "roundtrip": {
+ "p50": 486.55998706817627,
+ "p90": 509.0240240097046,
+ "p95": 521.5680003166199,
+ "p99": 577.5359869003296
+ },
+ "isolatedSum": {
+ "p50": 524.8640105128288,
+ "p90": 557.4719831347466,
+ "p95": 569.2159980535507,
+ "p99": 599.5840132236481
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 13987840,
+ "combineLogicalBytes": 27975680,
+ "fanoutMean": 5.3359375,
+ "recvTokensMax": 355,
+ "stragglerRank": 3,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
+ "dispatch": {
+ "p50": 433.98401141166687,
+ "p90": 461.5359902381897,
+ "p95": 473.4399914741516,
+ "p99": 519.5840001106262
+ },
+ "combine": {
+ "p50": 110.49599945545197,
+ "p90": 118.33599954843521,
+ "p95": 121.15199863910675,
+ "p99": 134.8479986190796
+ },
+ "roundtrip": {
+ "p50": 508.54402780532837,
+ "p90": 526.9759893417358,
+ "p95": 534.8160266876221,
+ "p99": 572.2560286521912
+ },
+ "isolatedSum": {
+ "p50": 544.4800108671188,
+ "p90": 579.8719897866249,
+ "p95": 594.5919901132584,
+ "p99": 654.4319987297058
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 27837440,
+ "combineLogicalBytes": 55674880,
+ "fanoutMean": 5.3095703125,
+ "recvTokensMax": 699,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -28037,28 +28057,28 @@
]
},
{
- "id": "cx-a5fb5961",
- "identity": "b300|deepep|6144|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "b300_d6fd14c3",
- "comparisonKey": "6214ef692f2daf2b",
+ "id": "cx-5095ae79",
+ "identity": "gb300|deepep|v1|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510",
+ "colorKey": "gb300_b1bd5887",
+ "comparisonKey": "e0aaecfc18971490",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T11:14:24.890661+00:00",
+ "generatedAt": "2026-06-29T14:10:09.168558+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_03",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
+ "phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "runtime-visible-v1",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · fp8",
+ "label": "GB300 EP8 · deepep · fp8",
"model": "MiniMax-M3",
"shape": {
"hidden": 6144,
@@ -28070,14 +28090,15 @@
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "fp8",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -28085,276 +28106,350 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:9f5e1e005a35e937",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "74444524b5db510",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28287498289",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287498289",
- "createdAt": "2026-06-27T11:14:24.890661+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 1,
+ "globalTokens": 8,
"dispatch": {
- "p50": 81.18399977684021,
- "p90": 83.83999764919281,
- "p95": 84.95999872684479,
- "p99": 91.90399944782257
+ "p50": 427.67998576164246,
+ "p90": 451.84001326560974,
+ "p95": 460.1280093193054,
+ "p99": 497.24799394607544
},
"combine": {
- "p50": 102.27199643850327,
- "p90": 105.40799796581268,
- "p95": 106.36799782514572,
- "p99": 112.99200356006622
+ "p50": 71.3919997215271,
+ "p90": 76.12799853086472,
+ "p95": 79.19999957084656,
+ "p99": 83.00799876451492
},
"roundtrip": {
- "p50": 204.96000349521637,
- "p90": 208.41600000858307,
- "p95": 210.30400693416595,
- "p99": 227.743998169899
+ "p50": 471.52000665664673,
+ "p90": 494.1120147705078,
+ "p95": 502.1119713783264,
+ "p99": 534.8160266876221
},
"isolatedSum": {
- "p50": 183.45599621534348,
- "p90": 189.2479956150055,
- "p95": 191.3279965519905,
- "p99": 204.8960030078888
+ "p50": 499.07198548316956,
+ "p90": 527.9680117964745,
+ "p95": 539.328008890152,
+ "p99": 580.2559927105904
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 33288192,
- "combineLogicalBytes": 66576384,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 270336,
+ "combineLogicalBytes": 540672,
+ "fanoutMean": 5.5,
+ "recvTokensMax": 7,
+ "stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 256,
- "globalTokens": 2048,
+ "tokensPerRank": 2,
+ "globalTokens": 16,
"dispatch": {
- "p50": 113.40799927711487,
- "p90": 116.70400202274323,
- "p95": 118.6240017414093,
- "p99": 128.80000472068787
+ "p50": 431.90398812294006,
+ "p90": 452.9919922351837,
+ "p95": 459.6799910068512,
+ "p99": 501.9199848175049
},
"combine": {
- "p50": 140.9599930047989,
- "p90": 143.93599331378937,
- "p95": 145.31199634075165,
- "p99": 153.4080058336258
+ "p50": 70.68800181150436,
+ "p90": 76.4160007238388,
+ "p95": 79.00799810886383,
+ "p99": 88.3840024471283
},
"roundtrip": {
- "p50": 306.0159981250763,
- "p90": 310.8159899711609,
- "p95": 313.2160007953644,
- "p99": 340.5759930610657
+ "p50": 478.5600006580353,
+ "p90": 510.591983795166,
+ "p95": 535.5200171470642,
+ "p99": 614.687979221344
},
"isolatedSum": {
- "p50": 254.36799228191376,
- "p90": 260.6399953365326,
- "p95": 263.93599808216095,
- "p99": 282.20801055431366
+ "p50": 502.5919899344444,
+ "p90": 529.4079929590225,
+ "p95": 538.687989115715,
+ "p99": 590.3039872646332
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 66809856,
- "combineLogicalBytes": 133619712,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 528384,
+ "combineLogicalBytes": 1056768,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 13,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 4,
+ "globalTokens": 32,
"dispatch": {
- "p50": 173.43999445438385,
- "p90": 178.30400168895721,
- "p95": 179.967999458313,
- "p99": 184.54399704933167
+ "p50": 431.4880073070526,
+ "p90": 456.959992647171,
+ "p95": 466.2080109119415,
+ "p99": 506.6559910774231
},
"combine": {
- "p50": 239.04000222682953,
- "p90": 245.27999758720398,
- "p95": 247.23200500011444,
- "p99": 258.59200954437256
+ "p50": 73.31199944019318,
+ "p90": 79.03999835252762,
+ "p95": 83.03999900817871,
+ "p99": 87.26400136947632
},
"roundtrip": {
- "p50": 515.4240131378174,
- "p90": 521.5039849281311,
- "p95": 525.0880122184753,
- "p99": 547.4560260772705
+ "p50": 477.728009223938,
+ "p90": 497.72799015045166,
+ "p95": 505.21600246429443,
+ "p99": 524.5440006256104
},
"isolatedSum": {
- "p50": 412.4799966812134,
- "p90": 423.5839992761612,
- "p95": 427.20000445842743,
- "p99": 443.1360065937042
+ "p50": 504.8000067472458,
+ "p90": 535.9999909996986,
+ "p95": 549.2480099201202,
+ "p99": 593.9199924468994
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 133828608,
- "combineLogicalBytes": 267657216,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 1062912,
+ "combineLogicalBytes": 2125824,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 29,
+ "stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 290.52799940109253,
- "p90": 296.35199904441833,
- "p95": 299.1040050983429,
- "p99": 307.93601274490356
+ "p50": 427.90400981903076,
+ "p90": 453.18400859832764,
+ "p95": 460.4479968547821,
+ "p99": 525.3440141677856
},
"combine": {
- "p50": 438.6560022830963,
- "p90": 443.4239864349365,
- "p95": 447.00801372528076,
- "p99": 467.3919975757599
+ "p50": 74.8480036854744,
+ "p90": 81.44000172615051,
+ "p95": 86.36800199747086,
+ "p99": 95.83999961614609
},
"roundtrip": {
- "p50": 922.2720265388489,
- "p90": 931.007981300354,
- "p95": 934.719979763031,
- "p99": 978.7840247154236
+ "p50": 472.28801250457764,
+ "p90": 500.8000135421753,
+ "p95": 514.4000053405762,
+ "p99": 534.9119901657104
},
"isolatedSum": {
- "p50": 729.1840016841888,
- "p90": 739.7759854793549,
- "p95": 746.1120188236237,
- "p99": 775.3280103206635
+ "p50": 502.75201350450516,
+ "p90": 534.6240103244781,
+ "p95": 546.815998852253,
+ "p99": 621.1840137839317
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 267190272,
- "combineLogicalBytes": 534380544,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 2131968,
+ "combineLogicalBytes": 4263936,
+ "fanoutMean": 5.421875,
+ "recvTokensMax": 47,
+ "stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 16,
+ "globalTokens": 128,
"dispatch": {
- "p50": 532.3519706726074,
- "p90": 539.2640233039856,
- "p95": 543.5519814491272,
- "p99": 568.0000185966492
+ "p50": 428.19198966026306,
+ "p90": 454.912006855011,
+ "p95": 463.4239971637726,
+ "p99": 491.90399050712585
},
"combine": {
- "p50": 796.064019203186,
- "p90": 802.4960160255432,
- "p95": 809.5679879188538,
- "p99": 841.3119912147522
+ "p50": 76.51200145483017,
+ "p90": 82.94399827718735,
+ "p95": 85.95199882984161,
+ "p99": 92.3520028591156
},
"roundtrip": {
- "p50": 1721.9840288162231,
- "p90": 1732.4479818344116,
- "p95": 1739.743947982788,
- "p99": 1767.4560546875
+ "p50": 472.351998090744,
+ "p90": 494.4640100002289,
+ "p95": 501.8879771232605,
+ "p99": 524.9599814414978
},
"isolatedSum": {
- "p50": 1328.4159898757935,
- "p90": 1341.7600393295288,
- "p95": 1353.119969367981,
- "p99": 1409.3120098114014
+ "p50": 504.70399111509323,
+ "p90": 537.8560051321983,
+ "p95": 549.3759959936142,
+ "p99": 584.2559933662415
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 533059584,
- "combineLogicalBytes": 1066119168,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 4251648,
+ "combineLogicalBytes": 8503296,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 92,
+ "stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4096,
- "globalTokens": 32768,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 1001.7600059509277,
- "p90": 1012.6080513000488,
- "p95": 1018.2720422744751,
- "p99": 1040.3200387954712
+ "p50": 432.6080083847046,
+ "p90": 458.9439928531647,
+ "p95": 469.08798813819885,
+ "p99": 504.57602739334106
},
"combine": {
- "p50": 1498.3359575271606,
- "p90": 1507.5520277023315,
- "p95": 1513.983964920044,
- "p99": 1537.0559692382812
+ "p50": 80.64000308513641,
+ "p90": 86.43200248479843,
+ "p95": 91.10400080680847,
+ "p99": 96.70399874448776
},
"roundtrip": {
- "p50": 3295.1040267944336,
- "p90": 3310.1439476013184,
- "p95": 3322.4000930786133,
- "p99": 3358.4959506988525
+ "p50": 472.0959961414337,
+ "p90": 492.15999245643616,
+ "p95": 499.7439980506897,
+ "p99": 512.2560262680054
},
"isolatedSum": {
- "p50": 2500.0959634780884,
- "p90": 2520.1600790023804,
- "p95": 2532.256007194519,
- "p99": 2577.3760080337524
+ "p50": 513.248011469841,
+ "p90": 545.3759953379631,
+ "p95": 560.1919889450073,
+ "p99": 601.2800261378288
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1065861120,
- "combineLogicalBytes": 2131722240,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 8454144,
+ "combineLogicalBytes": 16908288,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 182,
+ "stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
"trials": 3
- }
- ]
- },
+ },
+ {
+ "tokensPerRank": 64,
+ "globalTokens": 512,
+ "dispatch": {
+ "p50": 430.7839870452881,
+ "p90": 454.43201065063477,
+ "p95": 462.20800280570984,
+ "p99": 486.33599281311035
+ },
+ "combine": {
+ "p50": 94.14400160312653,
+ "p90": 100.41599720716476,
+ "p95": 103.74400019645691,
+ "p99": 108.92800241708755
+ },
+ "roundtrip": {
+ "p50": 484.3519926071167,
+ "p90": 501.21599435806274,
+ "p95": 507.32797384262085,
+ "p99": 533.951997756958
+ },
+ "isolatedSum": {
+ "p50": 524.9279886484146,
+ "p90": 554.8480078577995,
+ "p95": 565.9520030021667,
+ "p99": 595.2639952301979
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 16711680,
+ "combineLogicalBytes": 33423360,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 367,
+ "stragglerRank": 6,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
+ "dispatch": {
+ "p50": 433.27999114990234,
+ "p90": 455.1039934158325,
+ "p95": 462.94400095939636,
+ "p99": 492.2240078449249
+ },
+ "combine": {
+ "p50": 114.68800157308578,
+ "p90": 121.15199863910675,
+ "p95": 124.03199821710587,
+ "p99": 128.63999605178833
+ },
+ "roundtrip": {
+ "p50": 505.2800178527832,
+ "p90": 527.679979801178,
+ "p95": 535.1359844207764,
+ "p99": 585.312008857727
+ },
+ "isolatedSum": {
+ "p50": 547.9679927229881,
+ "p90": 576.2559920549393,
+ "p95": 586.9759991765022,
+ "p99": 620.8640038967133
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 33288192,
+ "combineLogicalBytes": 66576384,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 723,
+ "stragglerRank": 7,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ }
+ ]
+ },
{
- "id": "cx-fba134bd",
- "identity": "b300|deepep|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "b300_c4c63f07",
- "comparisonKey": "690e54d4fc20f43e",
+ "id": "cx-d75d6ecc",
+ "identity": "gb300|deepep|v1|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510",
+ "colorKey": "gb300_c4ac4643",
+ "comparisonKey": "817cf09679b30bf0",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:52:55.540924+00:00",
+ "generatedAt": "2026-06-29T13:49:31.371479+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_11",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
+ "phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "layout-and-dispatch-v1",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · fp8",
- "model": "MiniMax-M3",
+ "label": "GB300 EP8 · deepep · fp8",
+ "model": "DeepSeek-V3/V4",
"shape": {
- "hidden": 6144,
+ "hidden": 7168,
"topk": 8,
"experts": 256,
"routing": "uniform",
@@ -28363,14 +28458,15 @@
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "fp8",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -28378,244 +28474,318 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:9f5e1e005a35e937",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "74444524b5db510",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285718802",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285718802",
- "createdAt": "2026-06-27T09:52:55.540924+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 1,
+ "globalTokens": 8,
"dispatch": {
- "p50": 156.67200088500977,
- "p90": 160.863995552063,
- "p95": 164.48000073432922,
- "p99": 179.03999984264374
+ "p50": 94.78399902582169,
+ "p90": 109.31199789047241,
+ "p95": 112.70400136709213,
+ "p99": 122.94399738311768
},
"combine": {
- "p50": 101.6639992594719,
- "p90": 103.67999970912933,
- "p95": 104.3199971318245,
- "p99": 107.26399719715118
+ "p50": 73.05599749088287,
+ "p90": 80.03199845552444,
+ "p95": 83.42400193214417,
+ "p99": 89.66399729251862
},
"roundtrip": {
- "p50": 251.3599991798401,
- "p90": 255.23200631141663,
- "p95": 258.87998938560486,
- "p99": 285.7919931411743
+ "p50": 249.34400618076324,
+ "p90": 270.30399441719055,
+ "p95": 275.90399980545044,
+ "p99": 288.8000011444092
},
"isolatedSum": {
- "p50": 258.33600014448166,
- "p90": 264.5439952611923,
- "p95": 268.7999978661537,
- "p99": 286.3039970397949
+ "p50": 167.83999651670456,
+ "p90": 189.34399634599686,
+ "p95": 196.1280032992363,
+ "p99": 212.6079946756363
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 33288192,
- "combineLogicalBytes": 66576384,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 315392,
+ "combineLogicalBytes": 630784,
+ "fanoutMean": 5.5,
+ "recvTokensMax": 7,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 256,
- "globalTokens": 2048,
+ "tokensPerRank": 2,
+ "globalTokens": 16,
"dispatch": {
- "p50": 210.40000021457672,
- "p90": 215.07200598716736,
- "p95": 218.9439982175827,
- "p99": 230.880007147789
+ "p50": 96.25600278377533,
+ "p90": 109.15199667215347,
+ "p95": 114.68800157308578,
+ "p99": 125.34399330615997
},
"combine": {
- "p50": 140.86399972438812,
- "p90": 144.6080058813095,
- "p95": 145.7280069589615,
- "p99": 171.29600048065186
+ "p50": 75.80800354480743,
+ "p90": 81.727996468544,
+ "p95": 84.25600081682205,
+ "p99": 91.87199920415878
},
"roundtrip": {
- "p50": 349.95201230049133,
- "p90": 354.2720079421997,
- "p95": 357.91999101638794,
- "p99": 378.62399220466614
+ "p50": 253.4399926662445,
+ "p90": 272.3200023174286,
+ "p95": 277.536004781723,
+ "p99": 288.60801458358765
},
"isolatedSum": {
- "p50": 351.26399993896484,
- "p90": 359.68001186847687,
- "p95": 364.6720051765442,
- "p99": 402.17600762844086
+ "p50": 172.06400632858276,
+ "p90": 190.87999314069748,
+ "p95": 198.94400238990784,
+ "p99": 217.21599251031876
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 66809856,
- "combineLogicalBytes": 133619712,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 616448,
+ "combineLogicalBytes": 1232896,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 13,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 4,
+ "globalTokens": 32,
"dispatch": {
- "p50": 335.58401465415955,
- "p90": 340.12800455093384,
- "p95": 341.15201234817505,
- "p99": 352.28800773620605
+ "p50": 94.14400160312653,
+ "p90": 108.89600217342377,
+ "p95": 114.27199840545654,
+ "p99": 136.4479959011078
},
"combine": {
- "p50": 239.1359955072403,
- "p90": 245.02399563789368,
- "p95": 247.13599681854248,
- "p99": 252.70399451255798
+ "p50": 74.68800246715546,
+ "p90": 80.60800284147263,
+ "p95": 84.16000008583069,
+ "p99": 87.07199990749359
},
"roundtrip": {
- "p50": 575.872004032135,
- "p90": 582.8160047531128,
- "p95": 585.4079723358154,
- "p99": 596.6399908065796
+ "p50": 248.28800559043884,
+ "p90": 267.10399985313416,
+ "p95": 275.6800055503845,
+ "p99": 288.7359857559204
},
"isolatedSum": {
- "p50": 574.7200101613998,
- "p90": 585.1520001888275,
- "p95": 588.2880091667175,
- "p99": 604.992002248764
+ "p50": 168.83200407028198,
+ "p90": 189.5040050148964,
+ "p95": 198.43199849128723,
+ "p99": 223.51999580860138
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 133828608,
- "combineLogicalBytes": 267657216,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1240064,
+ "combineLogicalBytes": 2480128,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 29,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 584.1599702835083,
- "p90": 589.8879766464233,
- "p95": 592.2240018844604,
- "p99": 601.472020149231
+ "p50": 94.84799951314926,
+ "p90": 107.07200318574905,
+ "p95": 111.55200004577637,
+ "p99": 122.72000312805176
},
"combine": {
- "p50": 437.5999867916107,
- "p90": 441.8880045413971,
- "p95": 445.43999433517456,
- "p99": 456.7039906978607
+ "p50": 77.56800204515457,
+ "p90": 83.45600217580795,
+ "p95": 86.27200126647949,
+ "p99": 91.2960022687912
},
"roundtrip": {
- "p50": 1006.943941116333,
- "p90": 1015.2640342712402,
- "p95": 1019.10400390625,
- "p99": 1030.9120416641235
+ "p50": 251.23199820518494,
+ "p90": 268.92799139022827,
+ "p95": 274.3360102176666,
+ "p99": 284.2879891395569
},
"isolatedSum": {
- "p50": 1021.759957075119,
- "p90": 1031.7759811878204,
- "p95": 1037.663996219635,
- "p99": 1058.1760108470917
+ "p50": 172.41600155830383,
+ "p90": 190.528005361557,
+ "p95": 197.82400131225586,
+ "p99": 214.01600539684296
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 267190272,
- "combineLogicalBytes": 534380544,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 2487296,
+ "combineLogicalBytes": 4974592,
+ "fanoutMean": 5.421875,
+ "recvTokensMax": 47,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 16,
+ "globalTokens": 128,
"dispatch": {
- "p50": 1078.8160562515259,
- "p90": 1086.2720012664795,
- "p95": 1088.8639688491821,
- "p99": 1102.6240587234497
+ "p50": 95.90400010347366,
+ "p90": 108.73600095510483,
+ "p95": 114.04799669981003,
+ "p99": 122.52800166606903
},
"combine": {
- "p50": 797.0240116119385,
- "p90": 804.7360181808472,
- "p95": 809.9200129508972,
- "p99": 828.2560110092163
+ "p50": 79.26400005817413,
+ "p90": 85.40800213813782,
+ "p95": 89.1840010881424,
+ "p99": 94.87999975681305
},
"roundtrip": {
- "p50": 1859.071969985962,
- "p90": 1870.6560134887695,
- "p95": 1876.1919736862183,
- "p99": 1887.3920440673828
+ "p50": 251.67998671531677,
+ "p90": 271.93599939346313,
+ "p95": 277.47198939323425,
+ "p99": 294.97599601745605
},
"isolatedSum": {
- "p50": 1875.8400678634644,
- "p90": 1891.0080194473267,
- "p95": 1898.7839818000793,
- "p99": 1930.880069732666
+ "p50": 175.1680001616478,
+ "p90": 194.14400309324265,
+ "p95": 203.23199778795242,
+ "p99": 217.40800142288208
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 533059584,
- "combineLogicalBytes": 1066119168,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 4960256,
+ "combineLogicalBytes": 9920512,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 92,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4096,
- "globalTokens": 32768,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 2078.239917755127,
- "p90": 2087.264060974121,
- "p95": 2091.3920402526855,
- "p99": 2107.840061187744
+ "p50": 95.83999961614609,
+ "p90": 108.5439994931221,
+ "p95": 113.24799805879593,
+ "p99": 121.56800180673599
},
"combine": {
- "p50": 1500.2559423446655,
- "p90": 1509.8240375518799,
- "p95": 1514.6880149841309,
- "p99": 1528.4160375595093
+ "p50": 83.77599716186523,
+ "p90": 90.2400016784668,
+ "p95": 92.99200028181076,
+ "p99": 99.29600358009338
},
"roundtrip": {
- "p50": 3560.703992843628,
- "p90": 3572.9920864105225,
- "p95": 3578.847885131836,
- "p99": 3600.7680892944336
+ "p50": 259.16799902915955,
+ "p90": 278.52800488471985,
+ "p95": 283.4239900112152,
+ "p99": 288.5119915008545
},
"isolatedSum": {
- "p50": 3578.4958600997925,
- "p90": 3597.088098526001,
- "p95": 3606.0800552368164,
- "p99": 3636.2560987472534
+ "p50": 179.61599677801132,
+ "p90": 198.7840011715889,
+ "p95": 206.2399983406067,
+ "p99": 220.86400538682938
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1065861120,
- "combineLogicalBytes": 2131722240,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 9863168,
+ "combineLogicalBytes": 19726336,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 182,
+ "stragglerRank": 1,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 64,
+ "globalTokens": 512,
+ "dispatch": {
+ "p50": 99.61599856615067,
+ "p90": 111.64800077676773,
+ "p95": 114.94400352239609,
+ "p99": 127.29600071907043
+ },
+ "combine": {
+ "p50": 98.36799651384354,
+ "p90": 105.05600273609161,
+ "p95": 107.80800133943558,
+ "p99": 112.57600039243698
+ },
+ "roundtrip": {
+ "p50": 271.7440128326416,
+ "p90": 290.336012840271,
+ "p95": 294.14400458335876,
+ "p99": 308.8639974594116
+ },
+ "isolatedSum": {
+ "p50": 197.9839950799942,
+ "p90": 216.70400351285934,
+ "p95": 222.75200486183167,
+ "p99": 239.87200111150742
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 19496960,
+ "combineLogicalBytes": 38993920,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 367,
+ "stragglerRank": 1,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
+ "dispatch": {
+ "p50": 110.78400164842606,
+ "p90": 119.64800208806992,
+ "p95": 122.78400361537933,
+ "p99": 135.5839967727661
+ },
+ "combine": {
+ "p50": 118.23999881744385,
+ "p90": 124.12799894809723,
+ "p95": 127.10399925708771,
+ "p99": 132.09599256515503
+ },
+ "roundtrip": {
+ "p50": 294.65600848197937,
+ "p90": 311.2959861755371,
+ "p95": 316.5439963340759,
+ "p99": 326.3680040836334
+ },
+ "isolatedSum": {
+ "p50": 229.0240004658699,
+ "p90": 243.77600103616714,
+ "p95": 249.88800287246704,
+ "p99": 267.67998933792114
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 38836224,
+ "combineLogicalBytes": 77672448,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 723,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -28623,28 +28793,28 @@
]
},
{
- "id": "cx-67e5feea",
- "identity": "b300|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "b300_d6fd14c3",
- "comparisonKey": "ff71982761f18df0",
+ "id": "cx-7733ba4c",
+ "identity": "gb300|deepep|v1|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510",
+ "colorKey": "gb300_b1bd5887",
+ "comparisonKey": "44ee0b05a8b4a1e8",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T10:26:31.663724+00:00",
+ "generatedAt": "2026-06-29T13:53:40.208752+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_14",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
+ "phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "runtime-visible-v1",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · fp8",
+ "label": "GB300 EP8 · deepep · fp8",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
@@ -28656,14 +28826,15 @@
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "fp8",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -28671,244 +28842,318 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "74444524b5db510",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "2.0.0+af9a040",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28286436120",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286436120",
- "createdAt": "2026-06-27T10:26:31.663724+00:00",
- "sha": "91c7acf59a5e524f37742922ec67721d86a03f6b"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 1,
+ "globalTokens": 8,
"dispatch": {
- "p50": 1799.5840311050415,
- "p90": 2587.9039764404297,
- "p95": 2896.159887313843,
- "p99": 3459.968090057373
+ "p50": 437.50399351119995,
+ "p90": 466.1119878292084,
+ "p95": 474.8480021953583,
+ "p99": 494.1120147705078
},
"combine": {
- "p50": 1817.7920579910278,
- "p90": 2162.816047668457,
- "p95": 2672.192096710205,
- "p99": 2924.3199825286865
+ "p50": 77.02399790287018,
+ "p90": 82.97599852085114,
+ "p95": 85.69599688053131,
+ "p99": 88.92799913883209
},
"roundtrip": {
- "p50": 1977.4080514907837,
- "p90": 2173.4719276428223,
- "p95": 2860.5120182037354,
- "p99": 3130.8159828186035
+ "p50": 484.8960041999817,
+ "p90": 512.9280090332031,
+ "p95": 523.4240293502808,
+ "p99": 548.6720204353333
},
"isolatedSum": {
- "p50": 3617.3760890960693,
- "p90": 4750.720024108887,
- "p95": 5568.351984024048,
- "p99": 6384.28807258606
+ "p50": 514.5279914140701,
+ "p90": 549.0879863500595,
+ "p95": 560.5439990758896,
+ "p99": 583.0400139093399
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 315392,
+ "combineLogicalBytes": 630784,
+ "fanoutMean": 5.5,
+ "recvTokensMax": 7,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 256,
- "globalTokens": 2048,
+ "tokensPerRank": 2,
+ "globalTokens": 16,
"dispatch": {
- "p50": 1846.0479974746704,
- "p90": 2604.5761108398438,
- "p95": 2895.456075668335,
- "p99": 3439.487934112549
+ "p50": 449.5680034160614,
+ "p90": 479.99998927116394,
+ "p95": 490.1440143585205,
+ "p99": 515.3279900550842
},
"combine": {
- "p50": 1870.6239461898804,
- "p90": 2174.5920181274414,
- "p95": 2705.2159309387207,
- "p99": 3008.8319778442383
+ "p50": 78.14399898052216,
+ "p90": 86.496002972126,
+ "p95": 91.48799628019333,
+ "p99": 133.31200182437897
},
"roundtrip": {
- "p50": 2121.920108795166,
- "p90": 2273.087978363037,
- "p95": 2978.7840843200684,
- "p99": 3390.048027038574
+ "p50": 495.10401487350464,
+ "p90": 525.1200199127197,
+ "p95": 533.8559746742249,
+ "p99": 566.4960145950317
},
"isolatedSum": {
- "p50": 3716.671943664551,
- "p90": 4779.168128967285,
- "p95": 5600.672006607056,
- "p99": 6448.319911956787
+ "p50": 527.7120023965836,
+ "p90": 566.49599224329,
+ "p95": 581.6320106387138,
+ "p99": 648.6399918794632
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77944832,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 616448,
+ "combineLogicalBytes": 1232896,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 13,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 4,
+ "globalTokens": 32,
"dispatch": {
- "p50": 1958.3040475845337,
- "p90": 2819.5838928222656,
- "p95": 3096.895933151245,
- "p99": 5452.991962432861
+ "p50": 440.5759871006012,
+ "p90": 470.43201327323914,
+ "p95": 480.9280037879944,
+ "p99": 524.5760083198547
},
"combine": {
- "p50": 1994.7839975357056,
- "p90": 2250.5600452423096,
- "p95": 2893.791913986206,
- "p99": 3337.984085083008
+ "p50": 78.78399640321732,
+ "p90": 85.79199761152267,
+ "p95": 88.60799670219421,
+ "p99": 95.29600292444229
},
"roundtrip": {
- "p50": 2347.584009170532,
- "p90": 2880.44810295105,
- "p95": 3284.991979598999,
- "p99": 3777.6639461517334
+ "p50": 491.2959933280945,
+ "p90": 518.9759731292725,
+ "p95": 524.7359871864319,
+ "p99": 553.2799959182739
},
"isolatedSum": {
- "p50": 3953.0880451202393,
- "p90": 5070.143938064575,
- "p95": 5990.687847137451,
- "p99": 8790.97604751587
+ "p50": 519.3599835038185,
+ "p90": 556.2240108847618,
+ "p95": 569.5360004901886,
+ "p99": 619.872011244297
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 156133376,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
+ "dispatchLogicalBytes": 1240064,
+ "combineLogicalBytes": 2480128,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 29,
"stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
+ "dispatch": {
+ "p50": 443.83999705314636,
+ "p90": 471.6480076313019,
+ "p95": 480.6079864501953,
+ "p99": 500.19198656082153
+ },
+ "combine": {
+ "p50": 81.37600123882294,
+ "p90": 88.28800171613693,
+ "p95": 92.6399976015091,
+ "p99": 108.92800241708755
+ },
+ "roundtrip": {
+ "p50": 490.84800481796265,
+ "p90": 514.303982257843,
+ "p95": 522.5279927253723,
+ "p99": 539.2320156097412
+ },
+ "isolatedSum": {
+ "p50": 525.2159982919693,
+ "p90": 559.9360093474388,
+ "p95": 573.2479840517044,
+ "p99": 609.1199889779091
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 2487296,
+ "combineLogicalBytes": 4974592,
+ "fanoutMean": 5.421875,
+ "recvTokensMax": 47,
+ "stragglerRank": 0,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 16,
+ "globalTokens": 128,
"dispatch": {
- "p50": 2067.199945449829,
- "p90": 2887.7758979797363,
- "p95": 3118.6559200286865,
- "p99": 3810.5599880218506
+ "p50": 438.2399916648865,
+ "p90": 472.6719856262207,
+ "p95": 483.90400409698486,
+ "p99": 500.0320076942444
},
"combine": {
- "p50": 2245.4400062561035,
- "p90": 2792.095899581909,
- "p95": 3188.5440349578857,
- "p99": 3587.552070617676
+ "p50": 83.83999764919281,
+ "p90": 90.36800265312195,
+ "p95": 94.40000355243683,
+ "p99": 98.49599748849869
},
"roundtrip": {
- "p50": 2770.080089569092,
- "p90": 2971.872091293335,
- "p95": 3523.7441062927246,
- "p99": 3988.640069961548
+ "p50": 487.45599389076233,
+ "p90": 519.8400020599365,
+ "p95": 528.2559990882874,
+ "p99": 541.0240292549133
},
"isolatedSum": {
- "p50": 4312.639951705933,
- "p90": 5679.8717975616455,
- "p95": 6307.199954986572,
- "p99": 7398.112058639526
+ "p50": 522.0799893140793,
+ "p90": 563.0399882793427,
+ "p95": 578.3040076494217,
+ "p99": 598.5280051827431
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 311721984,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 4960256,
+ "combineLogicalBytes": 9920512,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 92,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 2318.943977355957,
- "p90": 2845.599889755249,
- "p95": 3288.3200645446777,
- "p99": 3567.9359436035156
+ "p50": 443.36000084877014,
+ "p90": 471.5520143508911,
+ "p95": 478.4959852695465,
+ "p99": 490.3999865055084
},
"combine": {
- "p50": 2601.759910583496,
- "p90": 2804.192066192627,
- "p95": 3261.3439559936523,
- "p99": 3862.2400760650635
+ "p50": 87.77599781751633,
+ "p90": 94.33600306510925,
+ "p95": 96.73599898815155,
+ "p99": 104.99200224876404
},
"roundtrip": {
- "p50": 3612.5121116638184,
- "p90": 4097.760200500488,
- "p95": 4626.783847808838,
- "p99": 6537.69588470459
+ "p50": 496.832013130188,
+ "p90": 522.271990776062,
+ "p95": 527.9039740562439,
+ "p99": 549.3760108947754
},
"isolatedSum": {
- "p50": 4920.703887939453,
- "p90": 5649.791955947876,
- "p95": 6549.66402053833,
- "p99": 7430.176019668579
+ "p50": 531.1359986662865,
+ "p90": 565.8880174160004,
+ "p95": 575.2319842576981,
+ "p99": 595.3919887542725
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 621902848,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 9863168,
+ "combineLogicalBytes": 19726336,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 182,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4096,
- "globalTokens": 32768,
+ "tokensPerRank": 64,
+ "globalTokens": 512,
"dispatch": {
- "p50": 2815.4239654541016,
- "p90": 3583.904027938843,
- "p95": 3803.584098815918,
- "p99": 4226.624011993408
+ "p50": 442.49600172042847,
+ "p90": 474.2720127105713,
+ "p95": 480.3520143032074,
+ "p99": 500.5760192871094
},
"combine": {
- "p50": 3305.2799701690674,
- "p90": 3407.8400135040283,
- "p95": 3562.688112258911,
- "p99": 4382.976055145264
+ "p50": 102.39999741315842,
+ "p90": 109.21599715948105,
+ "p95": 113.27999830245972,
+ "p99": 117.72800236940384
},
"roundtrip": {
- "p50": 5279.6478271484375,
- "p90": 5909.920215606689,
- "p95": 6326.015949249268,
- "p99": 6807.90376663208
+ "p50": 505.9199929237366,
+ "p90": 532.3839783668518,
+ "p95": 538.8799905776978,
+ "p99": 555.2319884300232
},
"isolatedSum": {
- "p50": 6120.703935623169,
- "p90": 6991.744041442871,
- "p95": 7366.272211074829,
- "p99": 8609.600067138672
+ "p50": 544.8959991335869,
+ "p90": 583.4880098700523,
+ "p95": 593.6320126056671,
+ "p99": 618.3040216565132
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1243504640,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 19496960,
+ "combineLogicalBytes": 38993920,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 367,
+ "stragglerRank": 3,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
+ "dispatch": {
+ "p50": 443.83999705314636,
+ "p90": 475.67999362945557,
+ "p95": 483.5839867591858,
+ "p99": 507.423996925354
+ },
+ "combine": {
+ "p50": 122.56000190973282,
+ "p90": 128.22400033473969,
+ "p95": 130.5920034646988,
+ "p99": 135.0719928741455
+ },
+ "roundtrip": {
+ "p50": 527.9359817504883,
+ "p90": 553.4719824790955,
+ "p95": 560.4479908943176,
+ "p99": 581.1840295791626
+ },
+ "isolatedSum": {
+ "p50": 566.3999989628792,
+ "p90": 603.9039939641953,
+ "p95": 614.1759902238846,
+ "p99": 642.4959897994995
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 38836224,
+ "combineLogicalBytes": 77672448,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 723,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -28916,47 +29161,48 @@
]
},
{
- "id": "cx-45b4616a",
- "identity": "b300|deepep|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "b300_c4c63f07",
- "comparisonKey": "56fe7b02fd8e6b1a",
+ "id": "cx-8eb16503",
+ "identity": "gb300|deepep|v1|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||6d507ec2ec8998f",
+ "colorKey": "gb300_b1bd5887",
+ "comparisonKey": "b2acbf95773921f9",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:51:04.276703+00:00",
+ "generatedAt": "2026-06-29T13:55:48.622903+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_15",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
+ "phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "runtime-visible-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · fp8",
- "model": "DeepSeek-V3/V4",
+ "label": "GB300 EP8 · deepep · fp8",
+ "model": "Kimi-K2",
"shape": {
"hidden": 7168,
"topk": 8,
- "experts": 256,
+ "experts": 384,
"routing": "uniform",
"routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "fp8",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -28964,244 +29210,318 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "6d507ec2ec8998f",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285674665",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285674665",
- "createdAt": "2026-06-27T09:51:04.276703+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 1,
+ "globalTokens": 8,
"dispatch": {
- "p50": 158.01599621772766,
- "p90": 162.6559942960739,
- "p95": 164.48000073432922,
- "p99": 177.88800597190857
+ "p50": 442.30398535728455,
+ "p90": 470.335990190506,
+ "p95": 489.9199903011322,
+ "p99": 534.1439843177795
},
"combine": {
- "p50": 108.35199803113937,
- "p90": 110.43199896812439,
- "p95": 111.455999314785,
- "p99": 118.56000125408173
+ "p50": 73.56800138950348,
+ "p90": 80.6720033288002,
+ "p95": 85.69599688053131,
+ "p99": 125.02400577068329
},
"roundtrip": {
- "p50": 259.39199328422546,
- "p90": 266.36800169944763,
- "p95": 268.22400093078613,
- "p99": 283.55199098587036
+ "p50": 482.04800486564636,
+ "p90": 516.0639882087708,
+ "p95": 532.6399803161621,
+ "p99": 585.8240127563477
},
"isolatedSum": {
- "p50": 266.36799424886703,
- "p90": 273.0879932641983,
- "p95": 275.9360000491142,
- "p99": 296.4480072259903
+ "p50": 515.871986746788,
+ "p90": 551.0079935193062,
+ "p95": 575.6159871816635,
+ "p99": 659.1679900884628
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
+ "dispatchLogicalBytes": 301056,
+ "combineLogicalBytes": 602112,
+ "fanoutMean": 5.25,
+ "recvTokensMax": 8,
"stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 256,
- "globalTokens": 2048,
+ "tokensPerRank": 2,
+ "globalTokens": 16,
"dispatch": {
- "p50": 219.32800114154816,
- "p90": 222.52799570560455,
- "p95": 223.55200350284576,
- "p99": 230.335995554924
+ "p50": 446.9760060310364,
+ "p90": 470.97599506378174,
+ "p95": 484.25599932670593,
+ "p99": 524.6719717979431
},
"combine": {
- "p50": 152.28800475597382,
- "p90": 155.74400126934052,
- "p95": 157.4079990386963,
- "p99": 167.64800250530243
+ "p50": 75.32799988985062,
+ "p90": 84.03199911117554,
+ "p95": 88.25600147247314,
+ "p99": 120.35199999809265
},
"roundtrip": {
- "p50": 371.36000394821167,
- "p90": 375.5199909210205,
- "p95": 376.99198722839355,
- "p99": 389.0239894390106
+ "p50": 489.79198932647705,
+ "p90": 513.375997543335,
+ "p95": 520.9919810295105,
+ "p99": 565.8559799194336
},
"isolatedSum": {
- "p50": 371.616005897522,
- "p90": 378.27199697494507,
- "p95": 380.96000254154205,
- "p99": 397.98399806022644
+ "p50": 522.304005920887,
+ "p90": 555.0079941749573,
+ "p95": 572.5120007991791,
+ "p99": 645.0239717960358
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77944832,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 609280,
+ "combineLogicalBytes": 1218560,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 14,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 4,
+ "globalTokens": 32,
"dispatch": {
- "p50": 360.9600067138672,
- "p90": 365.1840090751648,
- "p95": 367.23199486732483,
- "p99": 394.9120044708252
+ "p50": 426.2720048427582,
+ "p90": 454.367995262146,
+ "p95": 460.57599782943726,
+ "p99": 502.52801179885864
},
"combine": {
- "p50": 264.5759880542755,
- "p90": 268.7680125236511,
- "p95": 271.232008934021,
- "p99": 281.76000714302063
+ "p50": 75.42400062084198,
+ "p90": 83.03999900817871,
+ "p95": 87.13600039482117,
+ "p99": 133.18400084972382
},
"roundtrip": {
- "p50": 614.7840023040771,
- "p90": 620.9279894828796,
- "p95": 623.6799955368042,
- "p99": 633.4720253944397
+ "p50": 470.2399969100952,
+ "p90": 500.5760192871094,
+ "p95": 513.4080052375793,
+ "p99": 566.9119954109192
},
"isolatedSum": {
- "p50": 625.5359947681427,
- "p90": 633.9520215988159,
- "p95": 638.4640038013458,
- "p99": 676.6720116138458
+ "p50": 501.69600546360016,
+ "p90": 537.4079942703247,
+ "p95": 547.7119982242584,
+ "p99": 635.7120126485825
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 156133376,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1204224,
+ "combineLogicalBytes": 2408448,
+ "fanoutMean": 5.25,
+ "recvTokensMax": 26,
+ "stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 627.9360055923462,
- "p90": 633.5999965667725,
- "p95": 635.8720064163208,
- "p99": 650.111973285675
+ "p50": 447.1360146999359,
+ "p90": 469.05601024627686,
+ "p95": 477.4399995803833,
+ "p99": 518.1440114974976
},
"combine": {
- "p50": 453.0239999294281,
- "p90": 457.66401290893555,
- "p95": 460.31999588012695,
- "p99": 473.56799244880676
+ "p50": 79.32800054550171,
+ "p90": 86.46400272846222,
+ "p95": 89.37600255012512,
+ "p99": 105.27999699115753
},
"roundtrip": {
- "p50": 1066.5600299835205,
- "p90": 1073.6639499664307,
- "p95": 1077.5359869003296,
- "p99": 1090.1119709014893
+ "p50": 490.1440143585205,
+ "p90": 511.74402236938477,
+ "p95": 549.6000051498413,
+ "p99": 588.0320072174072
},
"isolatedSum": {
- "p50": 1080.9600055217743,
- "p90": 1091.264009475708,
- "p95": 1096.1920022964478,
- "p99": 1123.6799657344818
+ "p50": 526.4640152454376,
+ "p90": 555.5200129747391,
+ "p95": 566.8160021305084,
+ "p99": 623.4240084886551
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 311721984,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 2415616,
+ "combineLogicalBytes": 4831232,
+ "fanoutMean": 5.265625,
+ "recvTokensMax": 48,
+ "stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 16,
+ "globalTokens": 128,
"dispatch": {
- "p50": 1180.3840398788452,
- "p90": 1187.1999502182007,
- "p95": 1190.4640197753906,
- "p99": 1241.3439750671387
+ "p50": 439.7760033607483,
+ "p90": 462.5599980354309,
+ "p95": 470.2399969100952,
+ "p99": 483.0079972743988
},
"combine": {
- "p50": 815.3600096702576,
- "p90": 822.5280046463013,
- "p95": 825.8879780769348,
- "p99": 834.2080116271973
+ "p50": 80.54400235414505,
+ "p90": 86.91199868917465,
+ "p95": 91.39200299978256,
+ "p99": 125.98399817943573
},
"roundtrip": {
- "p50": 1978.0479669570923,
- "p90": 1988.8639450073242,
- "p95": 1993.8240051269531,
- "p99": 2242.1441078186035
+ "p50": 485.56798696517944,
+ "p90": 505.0240159034729,
+ "p95": 513.1840109825134,
+ "p99": 553.2799959182739
},
"isolatedSum": {
- "p50": 1995.7440495491028,
- "p90": 2009.727954864502,
- "p95": 2016.3519978523254,
- "p99": 2075.551986694336
+ "p50": 520.3200057148933,
+ "p90": 549.4719967246056,
+ "p95": 561.6319999098778,
+ "p99": 608.9919954538345
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 621902848,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
+ "dispatchLogicalBytes": 4924416,
+ "combineLogicalBytes": 9848832,
+ "fanoutMean": 5.3671875,
+ "recvTokensMax": 91,
"stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4096,
- "globalTokens": 32768,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 2262.399911880493,
- "p90": 2271.775960922241,
- "p95": 2276.8959999084473,
- "p99": 2323.2638835906982
+ "p50": 438.62399458885193,
+ "p90": 462.43199706077576,
+ "p95": 473.31199049949646,
+ "p99": 510.24001836776733
},
"combine": {
- "p50": 1527.232050895691,
- "p90": 1535.6800556182861,
- "p95": 1539.29603099823,
- "p99": 1596.2879657745361
+ "p50": 85.75999736785889,
+ "p90": 92.25600212812424,
+ "p95": 98.62399846315384,
+ "p99": 112.22399771213531
},
"roundtrip": {
- "p50": 3780.895948410034,
- "p90": 3792.6719188690186,
- "p95": 3798.464059829712,
- "p99": 3837.4719619750977
+ "p50": 490.6559884548187,
+ "p90": 509.7600221633911,
+ "p95": 517.087996006012,
+ "p99": 546.7519760131836
},
"isolatedSum": {
- "p50": 3789.631962776184,
- "p90": 3807.4560165405273,
- "p95": 3816.1920309066772,
- "p99": 3919.5518493652344
+ "p50": 524.3839919567108,
+ "p90": 554.6879991889,
+ "p95": 571.9359889626503,
+ "p99": 622.4640160799026
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1243504640,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 9748480,
+ "combineLogicalBytes": 19496960,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 178,
+ "stragglerRank": 2,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 64,
+ "globalTokens": 512,
+ "dispatch": {
+ "p50": 439.6800100803375,
+ "p90": 462.5599980354309,
+ "p95": 470.14400362968445,
+ "p99": 541.920006275177
+ },
+ "combine": {
+ "p50": 100.12800246477127,
+ "p90": 108.64000022411346,
+ "p95": 110.81600189208984,
+ "p99": 137.92000710964203
+ },
+ "roundtrip": {
+ "p50": 502.6879906654358,
+ "p90": 521.7919945716858,
+ "p95": 529.151976108551,
+ "p99": 575.7120251655579
+ },
+ "isolatedSum": {
+ "p50": 539.8080125451088,
+ "p90": 571.1999982595444,
+ "p95": 580.9600055217743,
+ "p99": 679.840013384819
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 19418112,
+ "combineLogicalBytes": 38836224,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 372,
+ "stragglerRank": 7,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
+ "dispatch": {
+ "p50": 438.7199878692627,
+ "p90": 461.91999316215515,
+ "p95": 477.05599665641785,
+ "p99": 517.3439979553223
+ },
+ "combine": {
+ "p50": 120.09599804878235,
+ "p90": 128.7039965391159,
+ "p95": 133.34399461746216,
+ "p99": 160.863995552063
+ },
+ "roundtrip": {
+ "p50": 527.1360278129578,
+ "p90": 547.2319722175598,
+ "p95": 557.5680136680603,
+ "p99": 616.927981376648
+ },
+ "isolatedSum": {
+ "p50": 558.815985918045,
+ "p90": 590.6239897012711,
+ "p95": 610.39999127388,
+ "p99": 678.2079935073853
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 38757376,
+ "combineLogicalBytes": 77514752,
+ "fanoutMean": 5.2802734375,
+ "recvTokensMax": 707,
+ "stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -29209,47 +29529,48 @@
]
},
{
- "id": "cx-d208a3bd",
- "identity": "b300|deepep|7168|8|384|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf",
- "colorKey": "b300_d6fd14c3",
- "comparisonKey": "5ca15c20f75abaa9",
+ "id": "cx-f16587a8",
+ "identity": "gb300|deepep|v1|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510",
+ "colorKey": "gb300_b1b733fb",
+ "comparisonKey": "1ce91864f23d9173",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T11:14:09.340656+00:00",
+ "generatedAt": "2026-06-29T13:51:35.154469+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_07",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
+ "phase": "decode",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "cached-layout-comm-only-v1",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · fp8",
- "model": "Kimi-K2",
+ "label": "GB300 EP8 · deepep · fp8 [cl]",
+ "model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
- "experts": 384,
+ "experts": 256,
"routing": "uniform",
"routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "fp8",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -29257,244 +29578,318 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "cd50548525dafdf",
- "workloadId": "set:6:b23bc0c4b6402c69",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "74444524b5db510",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28287503879",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287503879",
- "createdAt": "2026-06-27T11:14:09.340656+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 1,
+ "globalTokens": 8,
"dispatch": {
- "p50": 86.27200126647949,
- "p90": 89.12000060081482,
- "p95": 90.30400216579437,
- "p99": 98.30400347709656
+ "p50": 78.23999971151352,
+ "p90": 109.24799740314484,
+ "p95": 128.4479945898056,
+ "p99": 150.62400698661804
},
"combine": {
- "p50": 108.86400192975998,
- "p90": 110.97600311040878,
- "p95": 112.2559979557991,
- "p99": 117.76000261306763
+ "p50": 75.45600086450577,
+ "p90": 111.64800077676773,
+ "p95": 116.31999909877777,
+ "p99": 137.85600662231445
},
"roundtrip": {
- "p50": 221.18400037288666,
- "p90": 224.99200701713562,
- "p95": 226.68799757957458,
- "p99": 240.12799561023712
+ "p50": 230.04800081253052,
+ "p90": 263.0079984664917,
+ "p95": 269.567996263504,
+ "p99": 295.199990272522
},
"isolatedSum": {
- "p50": 195.13600319623947,
- "p90": 200.0960037112236,
- "p95": 202.56000012159348,
- "p99": 216.06400609016418
+ "p50": 153.6960005760193,
+ "p90": 220.89599817991257,
+ "p95": 244.76799368858337,
+ "p99": 288.4800136089325
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38757376,
- "combineLogicalBytes": 77514752,
- "fanoutMean": 5.2802734375,
- "recvTokensMax": 707,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 315392,
+ "combineLogicalBytes": 630784,
+ "fanoutMean": 5.5,
+ "recvTokensMax": 7,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 256,
- "globalTokens": 2048,
+ "tokensPerRank": 2,
+ "globalTokens": 16,
"dispatch": {
- "p50": 118.04799735546112,
- "p90": 121.08799815177917,
- "p95": 122.3360002040863,
- "p99": 129.85600531101227
+ "p50": 88.44800293445587,
+ "p90": 124.60800260305405,
+ "p95": 130.68799674510956,
+ "p99": 139.8719996213913
},
"combine": {
- "p50": 157.05600380897522,
- "p90": 161.15200519561768,
- "p95": 161.95200383663177,
- "p99": 169.8240041732788
+ "p50": 77.82399654388428,
+ "p90": 114.94400352239609,
+ "p95": 131.9040060043335,
+ "p99": 143.74400675296783
},
"roundtrip": {
- "p50": 329.3440043926239,
- "p90": 333.5680067539215,
- "p95": 335.32801270484924,
- "p99": 343.58400106430054
+ "p50": 230.56000471115112,
+ "p90": 258.432000875473,
+ "p95": 270.27198672294617,
+ "p99": 287.9999876022339
},
"isolatedSum": {
- "p50": 275.10400116443634,
- "p90": 282.24000334739685,
- "p95": 284.2880040407181,
- "p99": 299.6800094842911
+ "p50": 166.27199947834015,
+ "p90": 239.55200612545013,
+ "p95": 262.59200274944305,
+ "p99": 283.61600637435913
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77285376,
- "combineLogicalBytes": 154570752,
- "fanoutMean": 5.2646484375,
- "recvTokensMax": 1391,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 616448,
+ "combineLogicalBytes": 1232896,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 13,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 4,
+ "globalTokens": 32,
"dispatch": {
- "p50": 176.92799866199493,
- "p90": 180.67200481891632,
- "p95": 182.46400356292725,
- "p99": 189.60000574588776
+ "p50": 81.56800270080566,
+ "p90": 114.43199962377548,
+ "p95": 130.3360015153885,
+ "p99": 151.5520066022873
+ },
+ "combine": {
+ "p50": 76.09599828720093,
+ "p90": 96.67199850082397,
+ "p95": 118.1119978427887,
+ "p99": 140.9280002117157
+ },
+ "roundtrip": {
+ "p50": 231.455996632576,
+ "p90": 267.2959864139557,
+ "p95": 275.64799785614014,
+ "p99": 290.6560003757477
+ },
+ "isolatedSum": {
+ "p50": 157.6640009880066,
+ "p90": 211.10399812459946,
+ "p95": 248.44799935817719,
+ "p99": 292.480006814003
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 1240064,
+ "combineLogicalBytes": 2480128,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 29,
+ "stragglerRank": 1,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 8,
+ "globalTokens": 64,
+ "dispatch": {
+ "p50": 78.36800068616867,
+ "p90": 105.3759977221489,
+ "p95": 122.49600142240524,
+ "p99": 148.00000190734863
},
"combine": {
- "p50": 266.975998878479,
- "p90": 271.87201380729675,
- "p95": 273.6319899559021,
- "p99": 285.3119969367981
+ "p50": 78.015998005867,
+ "p90": 105.76000064611435,
+ "p95": 120.57600170373917,
+ "p99": 147.039994597435
},
"roundtrip": {
- "p50": 550.2079725265503,
- "p90": 556.6719770431519,
- "p95": 559.328019618988,
- "p99": 570.8479881286621
+ "p50": 227.26400196552277,
+ "p90": 254.5279860496521,
+ "p95": 263.4879946708679,
+ "p99": 288.38399052619934
},
"isolatedSum": {
- "p50": 443.90399754047394,
- "p90": 452.5440186262131,
- "p95": 456.09599351882935,
- "p99": 474.91200268268585
+ "p50": 156.38399869203568,
+ "p90": 211.13599836826324,
+ "p95": 243.0720031261444,
+ "p99": 295.03999650478363
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 154886144,
- "combineLogicalBytes": 309772288,
- "fanoutMean": 5.275390625,
- "recvTokensMax": 2754,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 2487296,
+ "combineLogicalBytes": 4974592,
+ "fanoutMean": 5.421875,
+ "recvTokensMax": 47,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 16,
+ "globalTokens": 128,
"dispatch": {
- "p50": 299.26401376724243,
- "p90": 304.57600951194763,
- "p95": 306.40000104904175,
- "p99": 312.9279911518097
+ "p50": 80.48000186681747,
+ "p90": 118.81600320339203,
+ "p95": 131.8719983100891,
+ "p99": 171.74400389194489
},
"combine": {
- "p50": 455.9360146522522,
- "p90": 462.0479941368103,
- "p95": 467.6479995250702,
- "p99": 488.5759949684143
+ "p50": 81.44000172615051,
+ "p90": 115.74400216341019,
+ "p95": 128.25599312782288,
+ "p99": 149.47199821472168
},
"roundtrip": {
- "p50": 977.5360226631165,
- "p90": 984.0959906578064,
- "p95": 988.3840084075928,
- "p99": 1000.1920461654663
+ "p50": 231.1359941959381,
+ "p90": 265.82399010658264,
+ "p95": 274.2399871349335,
+ "p99": 290.75199365615845
},
"isolatedSum": {
- "p50": 755.2000284194946,
- "p90": 766.6240036487579,
- "p95": 774.0480005741119,
- "p99": 801.503986120224
+ "p50": 161.920003592968,
+ "p90": 234.56000536680222,
+ "p95": 260.127991437912,
+ "p99": 321.21600210666656
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 309750784,
- "combineLogicalBytes": 619501568,
- "fanoutMean": 5.2750244140625,
- "recvTokensMax": 5469,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 4960256,
+ "combineLogicalBytes": 9920512,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 92,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 535.0720286369324,
- "p90": 539.3919944763184,
- "p95": 542.3679947853088,
- "p99": 553.6320209503174
+ "p50": 83.0719992518425,
+ "p90": 118.367999792099,
+ "p95": 134.14399325847626,
+ "p99": 146.81600034236908
},
"combine": {
- "p50": 812.1280074119568,
- "p90": 818.4319734573364,
- "p95": 821.120023727417,
- "p99": 830.8799862861633
+ "p50": 85.4720026254654,
+ "p90": 123.52000176906586,
+ "p95": 145.31199634075165,
+ "p99": 153.24799716472626
},
"roundtrip": {
- "p50": 1807.520031929016,
- "p90": 1816.864013671875,
- "p95": 1821.1840391159058,
- "p99": 1864.832043647766
+ "p50": 234.40000414848328,
+ "p90": 270.04799246788025,
+ "p95": 280.86400032043457,
+ "p99": 294.94398832321167
},
"isolatedSum": {
- "p50": 1347.2000360488892,
- "p90": 1357.8239679336548,
- "p95": 1363.4880185127258,
- "p99": 1384.5120072364807
+ "p50": 168.5440018773079,
+ "p90": 241.88800156116486,
+ "p95": 279.4559895992279,
+ "p99": 300.06399750709534
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 619687936,
- "combineLogicalBytes": 1239375872,
- "fanoutMean": 5.276611328125,
- "recvTokensMax": 10883,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 9863168,
+ "combineLogicalBytes": 19726336,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 182,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4096,
- "globalTokens": 32768,
+ "tokensPerRank": 64,
+ "globalTokens": 512,
"dispatch": {
- "p50": 1011.9999647140503,
- "p90": 1018.9759731292725,
- "p95": 1022.5919485092163,
- "p99": 1036.6719961166382
+ "p50": 84.99199897050858,
+ "p90": 112.03200370073318,
+ "p95": 129.37599420547485,
+ "p99": 150.81599354743958
},
"combine": {
- "p50": 1512.671947479248,
- "p90": 1519.5200443267822,
- "p95": 1524.0000486373901,
- "p99": 1541.6959524154663
+ "p50": 101.34399682283401,
+ "p90": 148.03199470043182,
+ "p95": 153.79199385643005,
+ "p99": 163.2319986820221
},
"roundtrip": {
- "p50": 3455.4879665374756,
- "p90": 3466.2721157073975,
- "p95": 3470.144033432007,
- "p99": 3507.744073867798
+ "p50": 254.55999374389648,
+ "p90": 292.7680015563965,
+ "p95": 306.68801069259644,
+ "p99": 338.75200152397156
},
"isolatedSum": {
- "p50": 2524.6719121932983,
- "p90": 2538.4960174560547,
- "p95": 2546.5919971466064,
- "p99": 2578.3679485321045
+ "p50": 186.3359957933426,
+ "p90": 260.063998401165,
+ "p95": 283.1679880619049,
+ "p99": 314.04799222946167
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1239834624,
- "combineLogicalBytes": 2479669248,
- "fanoutMean": 5.278564453125,
- "recvTokensMax": 21730,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 19496960,
+ "combineLogicalBytes": 38993920,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 367,
+ "stragglerRank": 1,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
+ "dispatch": {
+ "p50": 97.9200005531311,
+ "p90": 147.8399932384491,
+ "p95": 152.19199657440186,
+ "p99": 160.8320027589798
+ },
+ "combine": {
+ "p50": 123.03999811410904,
+ "p90": 149.98400211334229,
+ "p95": 158.36800634860992,
+ "p99": 185.47199666500092
+ },
+ "roundtrip": {
+ "p50": 272.96000719070435,
+ "p90": 310.4639947414398,
+ "p95": 322.81601428985596,
+ "p99": 345.34400701522827
+ },
+ "isolatedSum": {
+ "p50": 220.95999866724014,
+ "p90": 297.8239953517914,
+ "p95": 310.5600029230118,
+ "p99": 346.3039994239807
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 38836224,
+ "combineLogicalBytes": 77672448,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 723,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -29502,292 +29897,367 @@
]
},
{
- "id": "cx-252efc4d",
- "identity": "b300|deepep|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf",
- "colorKey": "b300_c4c63f07",
- "comparisonKey": "d0265daf2fea0a3e",
+ "id": "cx-d1adb2c7",
+ "identity": "gb300|deepep|v1|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510",
+ "colorKey": "gb300_4ebffb62",
+ "comparisonKey": "b12bc00db050e57a",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:51:32.842462+00:00",
+ "generatedAt": "2026-06-29T13:53:46.301476+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_17",
- "sku": "b300",
+ "publicationStatus": "diagnostic",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
+ "phase": "decode",
+ "mode": "ll",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "layout-and-dispatch-v1",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · fp8",
- "model": "Kimi-K2",
+ "label": "GB300 EP8 · deepep · fp8 LL",
+ "model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
- "experts": 384,
+ "experts": 256,
"routing": "uniform",
"routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "fp8",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
- "configuredUnits": 20,
- "deviceUnits": 148,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
+ "achievedFraction": null,
+ "configuredUnits": null,
+ "deviceUnits": 152,
+ "resourceClass": "fixed-kernel",
+ "conformanceClass": "not-applicable",
+ "fixedKernel": true,
"paretoEligible": false
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "cd50548525dafdf",
- "workloadId": "set:6:b23bc0c4b6402c69",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "74444524b5db510",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285685489",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285685489",
- "createdAt": "2026-06-27T09:51:32.842462+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 1,
+ "globalTokens": 8,
"dispatch": {
- "p50": 164.86400365829468,
- "p90": 169.855996966362,
- "p95": 173.69599640369415,
- "p99": 187.26399540901184
+ "p50": 58.27200040221214,
+ "p90": 63.90400230884552,
+ "p95": 67.32799857854843,
+ "p99": 103.7760004401207
},
"combine": {
- "p50": 108.2879975438118,
- "p90": 110.68800091743469,
- "p95": 112.15999722480774,
- "p99": 124.64000284671783
+ "p50": 73.11999797821045,
+ "p90": 79.68000322580338,
+ "p95": 83.67999643087387,
+ "p99": 113.02399635314941
},
"roundtrip": {
- "p50": 267.1999931335449,
- "p90": 272.0000147819519,
- "p95": 274.7200131416321,
- "p99": 301.472008228302
+ "p50": 1549.1520166397095,
+ "p90": 1555.7760000228882,
+ "p95": 1558.4640502929688,
+ "p99": 1581.6960334777832
},
"isolatedSum": {
- "p50": 273.1520012021065,
- "p90": 280.5439978837967,
- "p95": 285.8559936285019,
- "p99": 311.9039982557297
+ "p50": 131.3919983804226,
+ "p90": 143.5840055346489,
+ "p95": 151.0079950094223,
+ "p99": 216.7999967932701
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38757376,
- "combineLogicalBytes": 77514752,
- "fanoutMean": 5.2802734375,
- "recvTokensMax": 707,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 315392,
+ "combineLogicalBytes": 630784,
+ "fanoutMean": 5.5,
+ "recvTokensMax": 14,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 256,
- "globalTokens": 2048,
+ "tokensPerRank": 2,
+ "globalTokens": 16,
"dispatch": {
- "p50": 221.95200622081757,
- "p90": 225.0880002975464,
- "p95": 227.84000635147095,
- "p99": 246.11200392246246
+ "p50": 60.7680007815361,
+ "p90": 66.30399823188782,
+ "p95": 67.90400296449661,
+ "p99": 71.29599899053574
},
"combine": {
- "p50": 153.3759981393814,
- "p90": 157.0879966020584,
- "p95": 158.33599865436554,
- "p99": 163.5199934244156
+ "p50": 73.53600114583969,
+ "p90": 79.8719972372055,
+ "p95": 81.37600123882294,
+ "p99": 88.57599645853043
},
"roundtrip": {
- "p50": 374.87998604774475,
- "p90": 379.61599230766296,
- "p95": 385.72800159454346,
- "p99": 410.2720022201538
+ "p50": 1551.6159534454346,
+ "p90": 1555.999994277954,
+ "p95": 1557.6319694519043,
+ "p99": 1560.6399774551392
},
"isolatedSum": {
- "p50": 375.328004360199,
- "p90": 382.1759968996048,
- "p95": 386.1760050058365,
- "p99": 409.63199734687805
+ "p50": 134.3040019273758,
+ "p90": 146.17599546909332,
+ "p95": 149.28000420331955,
+ "p99": 159.87199544906616
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77285376,
- "combineLogicalBytes": 154570752,
- "fanoutMean": 5.2646484375,
- "recvTokensMax": 1391,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 616448,
+ "combineLogicalBytes": 1232896,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 21,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 4,
+ "globalTokens": 32,
"dispatch": {
- "p50": 358.271986246109,
- "p90": 362.43200302124023,
- "p95": 364.8639917373657,
- "p99": 389.44000005722046
+ "p50": 61.59999966621399,
+ "p90": 67.19999760389328,
+ "p95": 69.18399780988693,
+ "p99": 72.09599763154984
},
"combine": {
- "p50": 265.4399871826172,
- "p90": 270.6559896469116,
- "p95": 273.8560140132904,
- "p99": 306.68801069259644
+ "p50": 73.85600358247757,
+ "p90": 80.57600259780884,
+ "p95": 82.59200304746628,
+ "p99": 86.7839977145195
},
"roundtrip": {
- "p50": 616.159975528717,
- "p90": 622.8799819946289,
- "p95": 628.063976764679,
- "p99": 656.4800143241882
+ "p50": 1556.2880039215088,
+ "p90": 1561.2800121307373,
+ "p95": 1564.3199682235718,
+ "p99": 1581.503987312317
},
"isolatedSum": {
- "p50": 623.7119734287262,
- "p90": 633.0879926681519,
- "p95": 638.7200057506561,
- "p99": 696.1280107498169
+ "p50": 135.45600324869156,
+ "p90": 147.77600020170212,
+ "p95": 151.7760008573532,
+ "p99": 158.87999534606934
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 154886144,
- "combineLogicalBytes": 309772288,
- "fanoutMean": 5.275390625,
- "recvTokensMax": 2754,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 1240064,
+ "combineLogicalBytes": 2480128,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 39,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 629.1840076446533,
- "p90": 634.1760158538818,
- "p95": 637.2799873352051,
- "p99": 658.3679914474487
+ "p50": 64.19199705123901,
+ "p90": 70.11199742555618,
+ "p95": 72.25599884986877,
+ "p99": 75.83999633789062
},
"combine": {
- "p50": 454.912006855011,
- "p90": 460.1280093193054,
- "p95": 465.2479887008667,
- "p99": 487.61600255966187
+ "p50": 82.24000036716461,
+ "p90": 90.04800021648407,
+ "p95": 91.93599969148636,
+ "p99": 96.57599776983261
},
"roundtrip": {
- "p50": 1072.5760459899902,
- "p90": 1080.7360410690308,
- "p95": 1090.3040170669556,
- "p99": 1124.351978302002
+ "p50": 1561.7599487304688,
+ "p90": 1566.4960145950317,
+ "p95": 1568.2239532470703,
+ "p99": 1570.7520246505737
},
"isolatedSum": {
- "p50": 1084.0960144996643,
- "p90": 1094.3040251731873,
- "p95": 1102.5279760360718,
- "p99": 1145.9839940071106
+ "p50": 146.43199741840363,
+ "p90": 160.15999764204025,
+ "p95": 164.19199854135513,
+ "p99": 172.41599410772324
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 309750784,
- "combineLogicalBytes": 619501568,
- "fanoutMean": 5.2750244140625,
- "recvTokensMax": 5469,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 2487296,
+ "combineLogicalBytes": 4974592,
+ "fanoutMean": 5.421875,
+ "recvTokensMax": 74,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 16,
+ "globalTokens": 128,
"dispatch": {
- "p50": 1168.992042541504,
- "p90": 1176.31995677948,
- "p95": 1185.5360269546509,
- "p99": 1203.6160230636597
+ "p50": 68.25599819421768,
+ "p90": 73.82400333881378,
+ "p95": 75.71200281381607,
+ "p99": 78.87999713420868
},
"combine": {
- "p50": 810.2719783782959,
- "p90": 818.943977355957,
- "p95": 826.1759877204895,
- "p99": 878.6560297012329
+ "p50": 89.79199826717377,
+ "p90": 97.59999811649323,
+ "p95": 100.54399818181992,
+ "p99": 109.56799983978271
},
"roundtrip": {
- "p50": 1966.6880369186401,
- "p90": 1979.6799421310425,
- "p95": 1991.487979888916,
- "p99": 2013.6001110076904
+ "p50": 1575.32799243927,
+ "p90": 1579.8399448394775,
+ "p95": 1582.3999643325806,
+ "p99": 1598.6239910125732
},
"isolatedSum": {
- "p50": 1979.2640209197998,
- "p90": 1995.263934135437,
- "p95": 2011.7120146751404,
- "p99": 2082.2720527648926
+ "p50": 158.04799646139145,
+ "p90": 171.424001455307,
+ "p95": 176.256000995636,
+ "p99": 188.4479969739914
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 619687936,
- "combineLogicalBytes": 1239375872,
- "fanoutMean": 5.276611328125,
- "recvTokensMax": 10883,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 4960256,
+ "combineLogicalBytes": 9920512,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 145,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4096,
- "globalTokens": 32768,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 2255.136013031006,
- "p90": 2265.471935272217,
- "p95": 2275.2959728240967,
- "p99": 2326.7838954925537
+ "p50": 92.47999638319016,
+ "p90": 96.99200093746185,
+ "p95": 98.2080027461052,
+ "p99": 101.50399804115295
},
"combine": {
- "p50": 1510.5600357055664,
- "p90": 1526.144027709961,
- "p95": 1534.656047821045,
- "p99": 1569.7920322418213
+ "p50": 111.42399907112122,
+ "p90": 114.97599631547928,
+ "p95": 115.99999666213989,
+ "p99": 118.78400295972824
},
"roundtrip": {
- "p50": 3753.2479763031006,
- "p90": 3776.5119075775146,
- "p95": 3788.383960723877,
- "p99": 3816.6720867156982
+ "p50": 1624.7680187225342,
+ "p90": 1630.3999423980713,
+ "p95": 1632.5119733810425,
+ "p99": 1636.1600160598755
},
"isolatedSum": {
- "p50": 3765.6960487365723,
- "p90": 3791.6159629821777,
- "p95": 3809.9520206451416,
- "p99": 3896.575927734375
+ "p50": 203.90399545431137,
+ "p90": 211.96799725294113,
+ "p95": 214.2079994082451,
+ "p99": 220.2880010008812
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1239834624,
- "combineLogicalBytes": 2479669248,
- "fanoutMean": 5.278564453125,
- "recvTokensMax": 21730,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 9863168,
+ "combineLogicalBytes": 19726336,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 287,
+ "stragglerRank": 2,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 64,
+ "globalTokens": 512,
+ "dispatch": {
+ "p50": 152.12799608707428,
+ "p90": 168.35199296474457,
+ "p95": 173.2800006866455,
+ "p99": 187.71199882030487
+ },
+ "combine": {
+ "p50": 191.8720006942749,
+ "p90": 202.59200036525726,
+ "p95": 206.56000077724457,
+ "p99": 214.49600160121918
+ },
+ "roundtrip": {
+ "p50": 1760.4479789733887,
+ "p90": 1772.4159955978394,
+ "p95": 1775.264024734497,
+ "p99": 1783.4559679031372
+ },
+ "isolatedSum": {
+ "p50": 343.9999967813492,
+ "p90": 370.94399333000183,
+ "p95": 379.8400014638901,
+ "p99": 402.20800042152405
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 19496960,
+ "combineLogicalBytes": 38993920,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 564,
+ "stragglerRank": 2,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
+ "dispatch": {
+ "p50": 252.83199548721313,
+ "p90": 273.9520072937012,
+ "p95": 282.6240062713623,
+ "p99": 301.85601115226746
+ },
+ "combine": {
+ "p50": 332.63999223709106,
+ "p90": 359.20000076293945,
+ "p95": 367.2960102558136,
+ "p99": 384.768009185791
+ },
+ "roundtrip": {
+ "p50": 2012.7360820770264,
+ "p90": 2045.7279682159424,
+ "p95": 2053.7281036376953,
+ "p99": 2084.736108779907
+ },
+ "isolatedSum": {
+ "p50": 585.4719877243042,
+ "p90": 633.1520080566406,
+ "p95": 649.9200165271759,
+ "p99": 686.6240203380585
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 38836224,
+ "combineLogicalBytes": 77672448,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 1104,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -29795,28 +30265,28 @@
]
},
{
- "id": "cx-c8d1506e",
- "identity": "b300|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31",
- "colorKey": "b300_eee29686",
- "comparisonKey": "efab2d3670b24be2",
+ "id": "cx-ef83f327",
+ "identity": "gb300|deepep|v1|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510",
+ "colorKey": "gb300_b8af531e",
+ "comparisonKey": "71503b9e265e42a9",
"schemaVersion": 3,
- "generatedAt": "2026-06-26T17:42:54.702578+00:00",
+ "generatedAt": "2026-06-29T13:55:19.539361+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_15",
- "sku": "b300",
+ "publicationStatus": "diagnostic",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
+ "phase": "decode",
+ "mode": "ll",
+ "resourceMode": "tuned",
+ "suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "runtime-visible-v1",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · fp8 (norm)",
+ "label": "GB300 EP8 · deepep · fp8 LL",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
@@ -29828,259 +30298,334 @@
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "fp8",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1824,
- "configuredUnits": 27,
- "deviceUnits": 148,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
+ "requestedFraction": null,
+ "achievedFraction": null,
+ "configuredUnits": null,
+ "deviceUnits": 152,
+ "resourceClass": "fixed-kernel",
+ "conformanceClass": "not-applicable",
+ "fixedKernel": true,
+ "paretoEligible": false
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "74444524b5db510",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28254479346",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254479346",
- "createdAt": "2026-06-26T17:42:54.702578+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 1,
+ "globalTokens": 8,
"dispatch": {
- "p50": 83.45600217580795,
- "p90": 86.14400029182434,
- "p95": 87.2960016131401,
- "p99": 102.08000242710114
+ "p50": 58.111999183893204,
+ "p90": 71.84000313282013,
+ "p95": 78.015998005867,
+ "p99": 89.72799777984619
},
"combine": {
- "p50": 108.38399827480316,
- "p90": 110.75200140476227,
- "p95": 111.61600053310394,
- "p99": 114.9120032787323
+ "p50": 75.96799731254578,
+ "p90": 83.03999900817871,
+ "p95": 86.04799956083298,
+ "p99": 90.84799885749817
},
"roundtrip": {
- "p50": 218.33600103855133,
- "p90": 221.6320037841797,
- "p95": 222.84799814224243,
- "p99": 235.23199558258057
+ "p50": 1549.6000051498413,
+ "p90": 1556.5439462661743,
+ "p95": 1560.703992843628,
+ "p99": 1568.0639743804932
},
"isolatedSum": {
- "p50": 191.84000045061111,
- "p90": 196.8960016965866,
- "p95": 198.91200214624405,
- "p99": 216.99200570583344
+ "p50": 134.07999649643898,
+ "p90": 154.88000214099884,
+ "p95": 164.06399756669998,
+ "p99": 180.57599663734436
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 315392,
+ "combineLogicalBytes": 630784,
+ "fanoutMean": 5.5,
+ "recvTokensMax": 14,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 256,
- "globalTokens": 2048,
+ "tokensPerRank": 2,
+ "globalTokens": 16,
"dispatch": {
- "p50": 116.70400202274323,
- "p90": 119.64800208806992,
- "p95": 121.15199863910675,
- "p99": 135.3600025177002
+ "p50": 60.447998344898224,
+ "p90": 66.3359984755516,
+ "p95": 68.2239979505539,
+ "p99": 75.48800110816956
},
"combine": {
- "p50": 155.29599785804749,
- "p90": 167.4560010433197,
- "p95": 176.60799622535706,
- "p99": 184.1599941253662
+ "p50": 77.2479996085167,
+ "p90": 85.56800335645676,
+ "p95": 87.71199733018875,
+ "p99": 94.27200257778168
},
"roundtrip": {
- "p50": 324.47999715805054,
- "p90": 328.19199562072754,
- "p95": 330.04799485206604,
- "p99": 345.40799260139465
+ "p50": 1554.1759729385376,
+ "p90": 1560.6720447540283,
+ "p95": 1563.7120008468628,
+ "p99": 1572.5120306015015
},
"isolatedSum": {
- "p50": 271.9999998807907,
- "p90": 287.1040031313896,
- "p95": 297.7599948644638,
- "p99": 319.5199966430664
+ "p50": 137.69599795341492,
+ "p90": 151.90400183200836,
+ "p95": 155.93599528074265,
+ "p99": 169.76000368595123
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77944832,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 616448,
+ "combineLogicalBytes": 1232896,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 21,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 4,
+ "globalTokens": 32,
"dispatch": {
- "p50": 177.7919977903366,
- "p90": 182.27200210094452,
- "p95": 183.9040070772171,
- "p99": 191.103994846344
+ "p50": 63.77600133419037,
+ "p90": 78.84799689054489,
+ "p95": 94.91200000047684,
+ "p99": 107.93600231409073
},
"combine": {
- "p50": 267.520010471344,
- "p90": 270.81599831581116,
- "p95": 272.0640003681183,
- "p99": 275.4879891872406
+ "p50": 78.65600287914276,
+ "p90": 87.90399879217148,
+ "p95": 90.84799885749817,
+ "p99": 95.32800316810608
},
"roundtrip": {
- "p50": 550.8480072021484,
- "p90": 556.9599866867065,
- "p95": 560.2560043334961,
- "p99": 578.3360004425049
+ "p50": 1556.0640096664429,
+ "p90": 1562.399983406067,
+ "p95": 1564.6400451660156,
+ "p99": 1574.1440057754517
},
"isolatedSum": {
- "p50": 445.3120082616806,
- "p90": 453.0880004167557,
- "p95": 455.9680074453354,
- "p99": 466.5919840335846
+ "p50": 142.43200421333313,
+ "p90": 166.75199568271637,
+ "p95": 185.759998857975,
+ "p99": 203.2640054821968
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 156133376,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1240064,
+ "combineLogicalBytes": 2480128,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 39,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 8,
+ "globalTokens": 64,
"dispatch": {
- "p50": 298.0160117149353,
- "p90": 302.4959862232208,
- "p95": 304.4799864292145,
- "p99": 319.07200813293457
+ "p50": 67.77600198984146,
+ "p90": 79.83999699354172,
+ "p95": 87.99999952316284,
+ "p99": 112.86400258541107
},
"combine": {
- "p50": 452.1920084953308,
- "p90": 456.6720128059387,
- "p95": 458.624005317688,
- "p99": 467.9360091686249
+ "p50": 83.45600217580795,
+ "p90": 93.08800101280212,
+ "p95": 95.87199985980988,
+ "p99": 100.80000013113022
},
"roundtrip": {
- "p50": 976.5759706497192,
- "p90": 983.8719964027405,
- "p95": 991.5199875831604,
- "p99": 1023.3279466629028
+ "p50": 1566.1439895629883,
+ "p90": 1593.6000347137451,
+ "p95": 1602.560043334961,
+ "p99": 1620.2880144119263
},
"isolatedSum": {
- "p50": 750.2080202102661,
- "p90": 759.1679990291595,
- "p95": 763.1039917469025,
- "p99": 787.0080173015594
+ "p50": 151.2320041656494,
+ "p90": 172.92799800634384,
+ "p95": 183.87199938297272,
+ "p99": 213.6640027165413
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 311721984,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 2487296,
+ "combineLogicalBytes": 4974592,
+ "fanoutMean": 5.421875,
+ "recvTokensMax": 74,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 16,
+ "globalTokens": 128,
"dispatch": {
- "p50": 541.4720177650452,
- "p90": 546.7519760131836,
- "p95": 549.4080185890198,
- "p99": 557.7920079231262
+ "p50": 69.08799707889557,
+ "p90": 75.26399940252304,
+ "p95": 77.98399776220322,
+ "p99": 290.8799946308136
},
"combine": {
- "p50": 814.7199749946594,
- "p90": 820.8320140838623,
- "p95": 824.0640163421631,
- "p99": 847.2959995269775
+ "p50": 86.11200004816055,
+ "p90": 92.6079973578453,
+ "p95": 95.77599912881851,
+ "p99": 100.92800110578537
},
"roundtrip": {
- "p50": 1818.0160522460938,
- "p90": 1827.712059020996,
- "p95": 1832.0000171661377,
- "p99": 1889.5679712295532
+ "p50": 1577.9839754104614,
+ "p90": 1589.311957359314,
+ "p95": 1604.383945465088,
+ "p99": 1625.3440380096436
},
"isolatedSum": {
- "p50": 1356.1919927597046,
- "p90": 1367.583990097046,
- "p95": 1373.4720349311829,
- "p99": 1405.0880074501038
+ "p50": 155.19999712705612,
+ "p90": 167.87199676036835,
+ "p95": 173.75999689102173,
+ "p99": 391.80799573659897
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 621902848,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 4960256,
+ "combineLogicalBytes": 9920512,
+ "fanoutMean": 5.40625,
+ "recvTokensMax": 145,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4096,
- "globalTokens": 32768,
+ "tokensPerRank": 32,
+ "globalTokens": 256,
"dispatch": {
- "p50": 1019.6160078048706,
- "p90": 1027.9040336608887,
- "p95": 1031.391978263855,
- "p99": 1045.2799797058105
+ "p50": 92.54399687051773,
+ "p90": 98.14400225877762,
+ "p95": 101.08800232410431,
+ "p99": 138.11199367046356
},
"combine": {
- "p50": 1529.4400453567505,
- "p90": 1537.2480154037476,
- "p95": 1540.8639907836914,
- "p99": 1614.6240234375
+ "p50": 111.93600296974182,
+ "p90": 118.49600076675415,
+ "p95": 121.2799996137619,
+ "p99": 125.37600100040436
},
"roundtrip": {
- "p50": 3477.3120880126953,
- "p90": 3490.272045135498,
- "p95": 3495.3598976135254,
- "p99": 3531.3920974731445
+ "p50": 1626.3359785079956,
+ "p90": 1636.0960006713867,
+ "p95": 1640.5760049819946,
+ "p99": 1658.400058746338
},
"isolatedSum": {
- "p50": 2549.056053161621,
- "p90": 2565.1520490646362,
- "p95": 2572.2559690475464,
- "p99": 2659.9040031433105
+ "p50": 204.47999984025955,
+ "p90": 216.64000302553177,
+ "p95": 222.3680019378662,
+ "p99": 263.4879946708679
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1243504640,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 9863168,
+ "combineLogicalBytes": 19726336,
+ "fanoutMean": 5.375,
+ "recvTokensMax": 287,
+ "stragglerRank": 2,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 64,
+ "globalTokens": 512,
+ "dispatch": {
+ "p50": 151.19999647140503,
+ "p90": 165.75999557971954,
+ "p95": 170.27199268341064,
+ "p99": 181.0240000486374
+ },
+ "combine": {
+ "p50": 191.26400351524353,
+ "p90": 201.53599977493286,
+ "p95": 205.37599921226501,
+ "p99": 212.8320038318634
+ },
+ "roundtrip": {
+ "p50": 1757.9200267791748,
+ "p90": 1771.2639570236206,
+ "p95": 1776.0319709777832,
+ "p99": 1783.2000255584717
+ },
+ "isolatedSum": {
+ "p50": 342.46399998664856,
+ "p90": 367.2959953546524,
+ "p95": 375.64799189567566,
+ "p99": 393.8560038805008
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 19496960,
+ "combineLogicalBytes": 38993920,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 564,
+ "stragglerRank": 2,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
+ "dispatch": {
+ "p50": 257.9199969768524,
+ "p90": 279.9359858036041,
+ "p95": 285.21600365638733,
+ "p99": 299.80799555778503
+ },
+ "combine": {
+ "p50": 336.2880051136017,
+ "p90": 367.64800548553467,
+ "p95": 376.5760064125061,
+ "p99": 390.04799723625183
+ },
+ "roundtrip": {
+ "p50": 2014.3039226531982,
+ "p90": 2045.85599899292,
+ "p95": 2056.9920539855957,
+ "p99": 2080.3520679473877
+ },
+ "isolatedSum": {
+ "p50": 594.2080020904541,
+ "p90": 647.5839912891388,
+ "p95": 661.7920100688934,
+ "p99": 689.8559927940369
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 38836224,
+ "combineLogicalBytes": 77672448,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 1104,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -30088,107 +30633,108 @@
]
},
{
- "id": "cx-9971d342",
- "identity": "b300|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31",
- "colorKey": "b300_84b10b26",
- "comparisonKey": "1c850249e23e1e8c",
+ "id": "cx-7e3a1c52",
+ "identity": "gb300|deepep|v1|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||03f98832f76b043",
+ "colorKey": "gb300_b97bfb88",
+ "comparisonKey": "c1d0b67251736b2c",
"schemaVersion": 3,
- "generatedAt": "2026-06-26T18:09:25.013454+00:00",
+ "generatedAt": "2026-06-29T14:04:35.790815+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_15",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
"phase": "prefill",
"mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
+ "resourceMode": "tuned",
+ "suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "cached-layout-comm-only-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "runtime-visible-v1",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · fp8 (norm) [cl]",
- "model": "DeepSeek-V3/V4",
+ "label": "GB300 EP8 · deepep · bf16",
+ "model": "Qwen3.5",
"shape": {
- "hidden": 7168,
+ "hidden": 4096,
"topk": 8,
- "experts": 256,
+ "experts": 128,
"routing": "uniform",
"routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "fp8",
+ "dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1824,
- "configuredUnits": 27,
- "deviceUnits": 148,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
+ "requestedFraction": null,
+ "achievedFraction": 0.1316,
+ "configuredUnits": 20,
+ "deviceUnits": 152,
+ "resourceClass": "backend-tuned",
+ "conformanceClass": "backend-default",
"fixedKernel": false,
- "paretoEligible": true
+ "paretoEligible": false
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "03f98832f76b043",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28254499301",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254499301",
- "createdAt": "2026-06-26T18:09:25.013454+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 77.02399790287018,
- "p90": 79.48800176382065,
- "p95": 80.89599758386612,
- "p99": 85.28000116348267
+ "p50": 115.93600362539291,
+ "p90": 125.56800246238708,
+ "p95": 129.31199371814728,
+ "p99": 139.42399621009827
},
"combine": {
- "p50": 108.5439994931221,
- "p90": 111.29599809646606,
- "p95": 112.35199868679047,
- "p99": 124.41600114107132
+ "p50": 106.9440022110939,
+ "p90": 111.93600296974182,
+ "p95": 117.11999773979187,
+ "p99": 121.31199985742569
},
"roundtrip": {
- "p50": 211.74399554729462,
- "p90": 214.4320011138916,
- "p95": 216.0000056028366,
- "p99": 233.15200209617615
+ "p50": 193.37600469589233,
+ "p90": 202.55999267101288,
+ "p95": 205.05599677562714,
+ "p99": 213.47199380397797
},
"isolatedSum": {
- "p50": 185.56799739599228,
- "p90": 190.7839998602867,
- "p95": 193.24799627065659,
- "p99": 209.69600230455399
+ "p50": 222.88000583648682,
+ "p90": 237.5040054321289,
+ "p95": 246.43199145793915,
+ "p99": 260.73599606752396
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 44564480,
+ "combineLogicalBytes": 44564480,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 699,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -30197,35 +30743,35 @@
"tokensPerRank": 256,
"globalTokens": 2048,
"dispatch": {
- "p50": 110.1439967751503,
- "p90": 113.53600025177002,
- "p95": 115.90400338172913,
- "p99": 132.6719969511032
+ "p50": 145.82400023937225,
+ "p90": 155.2640050649643,
+ "p95": 158.55999290943146,
+ "p99": 168.2240068912506
},
"combine": {
- "p50": 153.3759981393814,
- "p90": 157.60000050067902,
- "p95": 159.32799875736237,
- "p99": 173.69599640369415
+ "p50": 143.8080072402954,
+ "p90": 148.51200580596924,
+ "p95": 154.11199629306793,
+ "p99": 158.01599621772766
},
"roundtrip": {
- "p50": 318.30400228500366,
- "p90": 322.52800464630127,
- "p95": 325.408011674881,
- "p99": 346.49598598480225
+ "p50": 260.5440020561218,
+ "p90": 269.1200077533722,
+ "p95": 272.352010011673,
+ "p99": 280.5120050907135
},
"isolatedSum": {
- "p50": 263.5199949145317,
- "p90": 271.13600075244904,
- "p95": 275.2320021390915,
- "p99": 306.36799335479736
+ "p50": 289.63200747966766,
+ "p90": 303.77601087093353,
+ "p95": 312.6719892024994,
+ "p99": 326.24000310897827
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77944832,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 89726976,
+ "combineLogicalBytes": 89726976,
+ "fanoutMean": 5.34814453125,
+ "recvTokensMax": 1385,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -30234,35 +30780,35 @@
"tokensPerRank": 512,
"globalTokens": 4096,
"dispatch": {
- "p50": 171.26399278640747,
- "p90": 176.15999281406403,
- "p95": 178.6240041255951,
- "p99": 194.815993309021
+ "p50": 198.68800044059753,
+ "p90": 217.0879989862442,
+ "p95": 230.335995554924,
+ "p99": 242.68800020217896
},
"combine": {
- "p50": 268.2879865169525,
- "p90": 273.0560004711151,
- "p95": 275.64799785614014,
- "p99": 283.58399868011475
+ "p50": 214.75200355052948,
+ "p90": 228.5120040178299,
+ "p95": 233.21600258350372,
+ "p99": 253.6959946155548
},
"roundtrip": {
- "p50": 543.7120199203491,
- "p90": 550.6880283355713,
- "p95": 554.1120171546936,
- "p99": 576.0639905929565
+ "p50": 381.632000207901,
+ "p90": 396.4479863643646,
+ "p95": 404.4159948825836,
+ "p99": 415.3600037097931
},
"isolatedSum": {
- "p50": 439.55197930336,
- "p90": 449.21599328517914,
- "p95": 454.27200198173523,
- "p99": 478.39999198913574
+ "p50": 413.440003991127,
+ "p90": 445.6000030040741,
+ "p95": 463.55199813842773,
+ "p99": 496.38399481773376
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 156133376,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 179503104,
+ "combineLogicalBytes": 179503104,
+ "fanoutMean": 5.349609375,
+ "recvTokensMax": 2772,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -30271,35 +30817,35 @@
"tokensPerRank": 1024,
"globalTokens": 8192,
"dispatch": {
- "p50": 288.92800211906433,
- "p90": 294.0160036087036,
- "p95": 297.791987657547,
- "p99": 315.3280019760132
+ "p50": 296.79998755455017,
+ "p90": 312.9599988460541,
+ "p95": 322.7519989013672,
+ "p99": 346.8480110168457
},
"combine": {
- "p50": 452.09598541259766,
- "p90": 457.37600326538086,
- "p95": 461.7280066013336,
- "p99": 471.74400091171265
+ "p50": 379.96798753738403,
+ "p90": 390.0800049304962,
+ "p95": 391.64799451828003,
+ "p99": 400.57599544525146
},
"roundtrip": {
- "p50": 967.1040177345276,
- "p90": 974.62397813797,
- "p95": 977.5360226631165,
- "p99": 995.6160187721252
+ "p50": 616.5760159492493,
+ "p90": 625.5360245704651,
+ "p95": 629.9840211868286,
+ "p99": 637.8239989280701
},
"isolatedSum": {
- "p50": 741.023987531662,
- "p90": 751.3920068740845,
- "p95": 759.5199942588806,
- "p99": 787.0720028877258
+ "p50": 676.7679750919342,
+ "p90": 703.0400037765503,
+ "p95": 714.3999934196472,
+ "p99": 747.4240064620972
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 311721984,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 359022592,
+ "combineLogicalBytes": 359022592,
+ "fanoutMean": 5.349853515625,
+ "recvTokensMax": 5558,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -30308,35 +30854,35 @@
"tokensPerRank": 2048,
"globalTokens": 16384,
"dispatch": {
- "p50": 523.3920216560364,
- "p90": 529.2800068855286,
- "p95": 533.3439707756042,
- "p99": 550.1120090484619
+ "p50": 505.7600140571594,
+ "p90": 518.559992313385,
+ "p95": 523.6160159111023,
+ "p99": 532.0000052452087
},
"combine": {
- "p50": 816.32000207901,
- "p90": 824.9599933624268,
- "p95": 831.1359882354736,
- "p99": 855.135977268219
+ "p50": 800.5120158195496,
+ "p90": 810.5279803276062,
+ "p95": 811.743974685669,
+ "p99": 820.4159736633301
},
"roundtrip": {
- "p50": 1800.096035003662,
- "p90": 1811.743974685669,
- "p95": 1825.7919549942017,
- "p99": 1866.8160438537598
+ "p50": 1271.83997631073,
+ "p90": 1283.2319736480713,
+ "p95": 1286.5279912948608,
+ "p99": 1292.9279804229736
},
"isolatedSum": {
- "p50": 1339.7120237350464,
- "p90": 1354.2400002479553,
- "p95": 1364.4799590110779,
- "p99": 1405.247986316681
+ "p50": 1306.272029876709,
+ "p90": 1329.0879726409912,
+ "p95": 1335.3599905967712,
+ "p99": 1352.4159789085388
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 621902848,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 716111872,
+ "combineLogicalBytes": 716111872,
+ "fanoutMean": 5.33544921875,
+ "recvTokensMax": 10982,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -30345,35 +30891,35 @@
"tokensPerRank": 4096,
"globalTokens": 32768,
"dispatch": {
- "p50": 984.9280118942261,
- "p90": 992.2239780426025,
- "p95": 996.5760111808777,
- "p99": 1026.9759893417358
+ "p50": 932.4480295181274,
+ "p90": 948.2880234718323,
+ "p95": 953.8879990577698,
+ "p99": 972.2239971160889
},
"combine": {
- "p50": 1529.312014579773,
- "p90": 1539.1039848327637,
- "p95": 1548.0320453643799,
- "p99": 1564.3839836120605
+ "p50": 1506.4959526062012,
+ "p90": 1514.3359899520874,
+ "p95": 1520.3839540481567,
+ "p99": 1524.8960256576538
},
"roundtrip": {
- "p50": 3440.864086151123,
- "p90": 3457.6640129089355,
- "p95": 3468.832015991211,
- "p99": 3514.2080783843994
+ "p50": 2408.128023147583,
+ "p90": 2428.4160137176514,
+ "p95": 2437.8559589385986,
+ "p99": 2452.7359008789062
},
"isolatedSum": {
- "p50": 2514.240026473999,
- "p90": 2531.327962875366,
- "p95": 2544.6080565452576,
- "p99": 2591.3599729537964
+ "p50": 2438.9439821243286,
+ "p90": 2462.6240134239197,
+ "p95": 2474.2719531059265,
+ "p99": 2497.1200227737427
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1243504640,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 1432395776,
+ "combineLogicalBytes": 1432395776,
+ "fanoutMean": 5.336090087890625,
+ "recvTokensMax": 21939,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -30381,47 +30927,48 @@
]
},
{
- "id": "cx-3feaa006",
- "identity": "b300|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "b300_49e66a7b",
- "comparisonKey": "5b68240330e760fc",
+ "id": "cx-0457a436",
+ "identity": "gb300|deepep|v1|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||a9df48e6438e77a",
+ "colorKey": "gb300_b97bfb88",
+ "comparisonKey": "e008e386a7e2bc41",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T09:47:42.062998+00:00",
+ "generatedAt": "2026-06-29T14:09:04.614868+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_11",
- "sku": "b300",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
"phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "cached-layout-comm-only-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "runtime-visible-v1",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep · fp8 [cl]",
- "model": "DeepSeek-V3/V4",
+ "label": "GB300 EP8 · deepep · bf16",
+ "model": "shape 5120/8/160",
"shape": {
- "hidden": 7168,
+ "hidden": 5120,
"topk": 8,
- "experts": 256,
+ "experts": 160,
"routing": "uniform",
"routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "fp8",
+ "dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -30429,59 +30976,59 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "a9df48e6438e77a",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28285593016",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285593016",
- "createdAt": "2026-06-27T09:47:42.062998+00:00",
- "sha": "149586650dbed5b7579537347e9489d5b41543c1"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 76.9599974155426,
- "p90": 79.64800298213959,
- "p95": 80.92799782752991,
- "p99": 92.3520028591156
+ "p50": 117.34399944543839,
+ "p90": 126.5919953584671,
+ "p95": 130.3360015153885,
+ "p99": 138.62399756908417
},
"combine": {
- "p50": 108.51199924945831,
- "p90": 111.42399907112122,
- "p95": 112.41599917411804,
- "p99": 122.40000069141388
+ "p50": 113.53600025177002,
+ "p90": 120.7360029220581,
+ "p95": 122.81599640846252,
+ "p99": 132.83200562000275
},
"roundtrip": {
- "p50": 210.78400313854218,
- "p90": 213.85599672794342,
- "p95": 216.5759950876236,
- "p99": 232.9919934272766
+ "p50": 203.45599949359894,
+ "p90": 211.35999262332916,
+ "p95": 214.27200734615326,
+ "p99": 223.1680005788803
},
"isolatedSum": {
- "p50": 185.47199666500092,
- "p90": 191.0720020532608,
- "p95": 193.34399700164795,
- "p99": 214.75200355052948
+ "p50": 230.8799996972084,
+ "p90": 247.3279982805252,
+ "p95": 253.151997923851,
+ "p99": 271.4560031890869
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 55674880,
+ "combineLogicalBytes": 55674880,
+ "fanoutMean": 5.3095703125,
+ "recvTokensMax": 699,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -30490,35 +31037,35 @@
"tokensPerRank": 256,
"globalTokens": 2048,
"dispatch": {
- "p50": 109.95200276374817,
- "p90": 113.43999952077866,
- "p95": 115.26399850845337,
- "p99": 126.62400305271149
+ "p50": 148.3200043439865,
+ "p90": 169.40799355506897,
+ "p95": 187.00799345970154,
+ "p99": 236.25600337982178
},
"combine": {
- "p50": 154.91199493408203,
- "p90": 159.04000401496887,
- "p95": 161.18399798870087,
- "p99": 169.88800466060638
+ "p50": 150.68799257278442,
+ "p90": 172.89599776268005,
+ "p95": 190.0479942560196,
+ "p99": 229.69600558280945
},
"roundtrip": {
- "p50": 318.30400228500366,
- "p90": 323.3279883861542,
- "p95": 325.21599531173706,
- "p99": 336.70398592948914
+ "p50": 270.687997341156,
+ "p90": 281.3439965248108,
+ "p95": 305.34398555755615,
+ "p99": 322.2079873085022
},
"isolatedSum": {
- "p50": 264.8639976978302,
- "p90": 272.4800035357475,
- "p95": 276.44799649715424,
- "p99": 296.51200771331787
+ "p50": 299.00799691677094,
+ "p90": 342.303991317749,
+ "p95": 377.05598771572113,
+ "p99": 465.9520089626312
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77944832,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 111104000,
+ "combineLogicalBytes": 111104000,
+ "fanoutMean": 5.2978515625,
+ "recvTokensMax": 1387,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -30527,35 +31074,35 @@
"tokensPerRank": 512,
"globalTokens": 4096,
"dispatch": {
- "p50": 172.09599912166595,
- "p90": 176.38400197029114,
- "p95": 178.56000363826752,
- "p99": 194.07999515533447
+ "p50": 204.54399287700653,
+ "p90": 227.32800245285034,
+ "p95": 239.1359955072403,
+ "p99": 334.27199721336365
},
"combine": {
- "p50": 265.79201221466064,
- "p90": 270.112007856369,
- "p95": 272.0319926738739,
- "p99": 286.655992269516
+ "p50": 241.18399620056152,
+ "p90": 248.09600412845612,
+ "p95": 256.19199872016907,
+ "p99": 278.78400683403015
},
"roundtrip": {
- "p50": 542.7200198173523,
- "p90": 548.8640069961548,
- "p95": 551.967978477478,
- "p99": 560.8000159263611
+ "p50": 399.4559943675995,
+ "p90": 425.79200863838196,
+ "p95": 449.3440091609955,
+ "p99": 484.3200147151947
},
"isolatedSum": {
- "p50": 437.8880113363266,
- "p90": 446.49600982666016,
- "p95": 450.5919963121414,
- "p99": 480.73598742485046
+ "p50": 445.72798907756805,
+ "p90": 475.42400658130646,
+ "p95": 495.32799422740936,
+ "p99": 613.0560040473938
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 156133376,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 223098880,
+ "combineLogicalBytes": 223098880,
+ "fanoutMean": 5.319091796875,
+ "recvTokensMax": 2762,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -30564,35 +31111,35 @@
"tokensPerRank": 1024,
"globalTokens": 8192,
"dispatch": {
- "p50": 286.8160009384155,
- "p90": 291.9040024280548,
- "p95": 294.65600848197937,
- "p99": 305.184006690979
+ "p50": 317.7280128002167,
+ "p90": 331.4560055732727,
+ "p95": 345.7280099391937,
+ "p99": 370.2720105648041
},
"combine": {
- "p50": 452.2559940814972,
- "p90": 458.0160081386566,
- "p95": 460.7360064983368,
- "p99": 470.5919921398163
+ "p50": 465.88799357414246,
+ "p90": 487.5839948654175,
+ "p95": 502.560019493103,
+ "p99": 514.9120092391968
},
"roundtrip": {
- "p50": 964.1919732093811,
- "p90": 972.320020198822,
- "p95": 979.5200228691101,
- "p99": 989.8560047149658
+ "p50": 714.3039703369141,
+ "p90": 724.2559790611267,
+ "p95": 728.16002368927,
+ "p99": 738.0800247192383
},
"isolatedSum": {
- "p50": 739.0719950199127,
- "p90": 749.9200105667114,
- "p95": 755.3920149803162,
- "p99": 775.7759988307953
+ "p50": 783.6160063743591,
+ "p90": 819.0400004386902,
+ "p95": 848.2880294322968,
+ "p99": 885.1840198040009
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 311721984,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 446730240,
+ "combineLogicalBytes": 446730240,
+ "fanoutMean": 5.325439453125,
+ "recvTokensMax": 5518,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -30601,35 +31148,35 @@
"tokensPerRank": 2048,
"globalTokens": 16384,
"dispatch": {
- "p50": 523.1040120124817,
- "p90": 529.7920107841492,
- "p95": 532.2239995002747,
- "p99": 541.5999889373779
+ "p50": 551.967978477478,
+ "p90": 561.3440275192261,
+ "p95": 564.3200278282166,
+ "p99": 576.1920213699341
},
"combine": {
- "p50": 814.2399787902832,
- "p90": 822.5280046463013,
- "p95": 828.8639783859253,
- "p99": 839.9360179901123
+ "p50": 835.8719944953918,
+ "p90": 845.9839820861816,
+ "p95": 848.3840227127075,
+ "p99": 855.8719754219055
},
"roundtrip": {
- "p50": 1798.4319925308228,
- "p90": 1811.8720054626465,
- "p95": 1817.9839849472046,
- "p99": 1887.3599767684937
+ "p50": 1357.0560216903687,
+ "p90": 1370.3999519348145,
+ "p95": 1381.8880319595337,
+ "p99": 1400.7680416107178
},
"isolatedSum": {
- "p50": 1337.343990802765,
- "p90": 1352.3200154304504,
- "p95": 1361.0879778862,
- "p99": 1381.5360069274902
+ "p50": 1387.8399729728699,
+ "p90": 1407.3280096054077,
+ "p95": 1412.704050540924,
+ "p99": 1432.0639967918396
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 621902848,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 893634560,
+ "combineLogicalBytes": 893634560,
+ "fanoutMean": 5.32647705078125,
+ "recvTokensMax": 11032,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -30638,35 +31185,35 @@
"tokensPerRank": 4096,
"globalTokens": 32768,
"dispatch": {
- "p50": 989.2160296440125,
- "p90": 998.3360171318054,
- "p95": 1003.7120580673218,
- "p99": 1014.8160457611084
+ "p50": 1023.0720043182373,
+ "p90": 1034.5280170440674,
+ "p95": 1038.4960174560547,
+ "p99": 1047.1359491348267
},
"combine": {
- "p50": 1527.8079509735107,
- "p90": 1537.376046180725,
- "p95": 1542.8800582885742,
- "p99": 1555.7119846343994
+ "p50": 1569.85604763031,
+ "p90": 1575.7440328598022,
+ "p95": 1580.8639526367188,
+ "p99": 1587.2960090637207
},
"roundtrip": {
- "p50": 3446.592092514038,
- "p90": 3460.416078567505,
- "p95": 3467.77606010437,
- "p99": 3511.4240646362305
+ "p50": 2552.4160861968994,
+ "p90": 2566.272020339966,
+ "p95": 2571.7759132385254,
+ "p99": 2589.53595161438
},
"isolatedSum": {
- "p50": 2517.023980617523,
- "p90": 2535.7120633125305,
- "p95": 2546.592116355896,
- "p99": 2570.528030395508
+ "p50": 2592.9280519485474,
+ "p90": 2610.2720499038696,
+ "p95": 2619.3599700927734,
+ "p99": 2634.4319581985474
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1243504640,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1786265600,
+ "combineLogicalBytes": 1786265600,
+ "fanoutMean": 5.323486328125,
+ "recvTokensMax": 21895,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -30674,31 +31221,31 @@
]
},
{
- "id": "cx-7cddf11f",
- "identity": "b300|deepep-hybrid|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "b300_9bdf2cf9",
- "comparisonKey": "6cb3f1841938f6d9",
+ "id": "cx-529dcc68",
+ "identity": "gb300|deepep|v1|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901",
+ "colorKey": "gb300_b97bfb88",
+ "comparisonKey": "0958f6765b1be546",
"schemaVersion": 3,
- "generatedAt": "2026-06-28T02:32:30.085872+00:00",
+ "generatedAt": "2026-06-29T14:13:22.285501+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_09",
- "sku": "b300",
- "backend": "deepep-hybrid",
- "phase": "decode",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
+ "backend": "deepep",
+ "phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "runtime-visible-v1",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep-hybrid · bf16",
- "model": "DeepSeek-V3/V4",
+ "label": "GB300 EP8 · deepep · bf16",
+ "model": "MiniMax-M3",
"shape": {
- "hidden": 7168,
+ "hidden": 6144,
"topk": 8,
"experts": 256,
"routing": "uniform",
@@ -30707,333 +31254,260 @@
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 148,
- "resourceClass": "fixed-kernel",
- "conformanceClass": "not-applicable",
- "fixedKernel": true,
+ "achievedFraction": 0.1316,
+ "configuredUnits": 20,
+ "deviceUnits": 152,
+ "resourceClass": "backend-tuned",
+ "conformanceClass": "backend-default",
+ "fixedKernel": false,
"paretoEligible": false
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "b9896b0a7ca9901",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "hybrid-e0a5b1d",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28308873989",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28308873989",
- "createdAt": "2026-06-28T02:32:30.085872+00:00",
- "sha": "02ef8d2d9b6fd7519504810daae202e88ee66360"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 119.07199770212173,
- "p90": 122.17599898576736,
- "p95": 123.36000055074692,
- "p99": 126.94400548934937
- },
- "combine": {
- "p50": 36.51199862360954,
- "p90": 38.015998899936676,
- "p95": 38.816001266241074,
- "p99": 39.872001856565475
- },
- "roundtrip": {
- "p50": 151.48800611495972,
- "p90": 155.8080017566681,
- "p95": 157.98400342464447,
- "p99": 166.52800142765045
- },
- "isolatedSum": {
- "p50": 155.58399632573128,
- "p90": 160.19199788570404,
- "p95": 162.176001816988,
- "p99": 166.81600734591484
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 122.20799922943115,
- "p90": 125.34399330615997,
- "p95": 126.75200402736664,
- "p99": 132.54399597644806
+ "p50": 122.11199849843979,
+ "p90": 133.760005235672,
+ "p95": 147.10399508476257,
+ "p99": 180.83199858665466
},
"combine": {
- "p50": 37.63199970126152,
- "p90": 39.5519994199276,
- "p95": 39.84000161290169,
- "p99": 49.984000623226166
+ "p50": 119.90399658679962,
+ "p90": 128.92800569534302,
+ "p95": 132.79999792575836,
+ "p99": 160.76800227165222
},
"roundtrip": {
- "p50": 154.55999970436096,
- "p90": 157.72800147533417,
- "p95": 158.65600109100342,
- "p99": 166.143998503685
+ "p50": 212.64000236988068,
+ "p90": 223.07200729846954,
+ "p95": 232.54400491714478,
+ "p99": 283.03998708724976
},
"isolatedSum": {
- "p50": 159.83999893069267,
- "p90": 164.89599272608757,
- "p95": 166.59200564026833,
- "p99": 182.52799659967422
+ "p50": 242.0159950852394,
+ "p90": 262.688010931015,
+ "p95": 279.90399301052094,
+ "p99": 341.6000008583069
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 66576384,
+ "combineLogicalBytes": 66576384,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 723,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4,
- "globalTokens": 32,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 125.02400577068329,
- "p90": 128.76799702644348,
- "p95": 130.11200726032257,
- "p99": 150.39999783039093
+ "p50": 156.38400614261627,
+ "p90": 166.59200191497803,
+ "p95": 170.33599317073822,
+ "p99": 182.72000551223755
},
"combine": {
- "p50": 39.99999910593033,
- "p90": 41.88799858093262,
- "p95": 42.65600070357323,
- "p99": 50.97600072622299
+ "p50": 159.9999964237213,
+ "p90": 168.32000017166138,
+ "p95": 170.1440066099167,
+ "p99": 184.1920018196106
},
"roundtrip": {
- "p50": 159.67999398708344,
- "p90": 162.88000345230103,
- "p95": 163.96799683570862,
- "p99": 178.5919964313507
+ "p50": 288.7679934501648,
+ "p90": 298.0160117149353,
+ "p95": 302.14399099349976,
+ "p99": 324.0320086479187
},
"isolatedSum": {
- "p50": 165.02400487661362,
- "p90": 170.6559956073761,
- "p95": 172.7680079638958,
- "p99": 201.37599855661392
+ "p50": 316.3840025663376,
+ "p90": 334.9120020866394,
+ "p95": 340.4799997806549,
+ "p99": 366.91200733184814
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 133619712,
+ "combineLogicalBytes": 133619712,
+ "fanoutMean": 5.3095703125,
+ "recvTokensMax": 1422,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 130.49599528312683,
- "p90": 133.7279975414276,
- "p95": 135.04000008106232,
- "p99": 140.1599943637848
+ "p50": 221.69600427150726,
+ "p90": 230.81600666046143,
+ "p95": 234.65600609779358,
+ "p99": 257.9840123653412
},
"combine": {
- "p50": 40.863998234272,
- "p90": 42.367998510599136,
- "p95": 43.327998369932175,
- "p99": 46.30399867892265
+ "p50": 257.4079930782318,
+ "p90": 265.53601026535034,
+ "p95": 267.5839960575104,
+ "p99": 280.09599447250366
},
"roundtrip": {
- "p50": 167.4560010433197,
- "p90": 170.78399658203125,
- "p95": 172.19200730323792,
- "p99": 184.51200425624847
+ "p50": 426.30401253700256,
+ "p90": 436.0960125923157,
+ "p95": 439.7439956665039,
+ "p99": 474.8480021953583
},
"isolatedSum": {
- "p50": 171.35999351739883,
- "p90": 176.09599605202675,
- "p95": 178.3679984509945,
- "p99": 186.46399304270744
+ "p50": 479.1039973497391,
+ "p90": 496.35201692581177,
+ "p95": 502.24000215530396,
+ "p99": 538.0800068378448
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 267657216,
+ "combineLogicalBytes": 267657216,
+ "fanoutMean": 5.31787109375,
+ "recvTokensMax": 2779,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 142.65599846839905,
- "p90": 145.6959992647171,
- "p95": 147.35999703407288,
- "p99": 153.60000729560852
+ "p50": 344.0319895744324,
+ "p90": 352.80001163482666,
+ "p95": 355.74400424957275,
+ "p99": 374.55999851226807
},
"combine": {
- "p50": 44.79999840259552,
- "p90": 46.720001846551895,
- "p95": 47.42399975657463,
- "p99": 48.54400083422661
+ "p50": 472.4479913711548,
+ "p90": 478.36801409721375,
+ "p95": 481.59998655319214,
+ "p99": 518.9759731292725
},
"roundtrip": {
- "p50": 185.47199666500092,
- "p90": 188.960000872612,
- "p95": 191.8720006942749,
- "p99": 211.29600703716278
+ "p50": 779.9040079116821,
+ "p90": 790.5279994010925,
+ "p95": 797.3759770393372,
+ "p99": 827.1039724349976
},
"isolatedSum": {
- "p50": 187.45599687099457,
- "p90": 192.416001111269,
- "p95": 194.7839967906475,
- "p99": 202.14400812983513
+ "p50": 816.4799809455872,
+ "p90": 831.1680257320404,
+ "p95": 837.3439908027649,
+ "p99": 893.5359716415405
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 534380544,
+ "combineLogicalBytes": 534380544,
+ "fanoutMean": 5.30859375,
+ "recvTokensMax": 5505,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 163.83999586105347,
- "p90": 166.52800142765045,
- "p95": 167.64800250530243,
- "p99": 172.41600155830383
+ "p50": 600.1920104026794,
+ "p90": 609.0559959411621,
+ "p95": 613.9839887619019,
+ "p99": 639.9359703063965
},
"combine": {
- "p50": 44.544000178575516,
- "p90": 46.33599892258644,
- "p95": 46.751998364925385,
- "p99": 54.336000233888626
+ "p50": 850.2399921417236,
+ "p90": 860.0000143051147,
+ "p95": 861.5999817848206,
+ "p99": 882.0160031318665
},
"roundtrip": {
- "p50": 207.7759951353073,
- "p90": 211.13599836826324,
- "p95": 213.02400529384613,
- "p99": 223.4240025281906
+ "p50": 1419.6159839630127,
+ "p90": 1429.5680522918701,
+ "p95": 1432.6080083847046,
+ "p99": 1440.832018852234
},
"isolatedSum": {
- "p50": 208.38399603962898,
- "p90": 212.8640003502369,
- "p95": 214.4000008702278,
- "p99": 226.75200179219246
+ "p50": 1450.432002544403,
+ "p90": 1469.0560102462769,
+ "p95": 1475.5839705467224,
+ "p99": 1521.951973438263
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1066119168,
+ "combineLogicalBytes": 1066119168,
+ "fanoutMean": 5.29547119140625,
+ "recvTokensMax": 10952,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 64,
- "globalTokens": 512,
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
"dispatch": {
- "p50": 210.9760046005249,
- "p90": 214.65599536895752,
- "p95": 216.12800657749176,
- "p99": 231.455996632576
+ "p50": 1100.000023841858,
+ "p90": 1110.0159883499146,
+ "p95": 1115.231990814209,
+ "p99": 1144.0320014953613
},
"combine": {
- "p50": 48.51200059056282,
- "p90": 50.27199909090996,
- "p95": 50.84799975156784,
- "p99": 52.41600051522255
+ "p50": 1596.5759754180908,
+ "p90": 1604.9920320510864,
+ "p95": 1608.3199977874756,
+ "p99": 1615.5200004577637
},
"roundtrip": {
- "p50": 253.76001000404358,
- "p90": 258.0159902572632,
- "p95": 260.09601354599,
- "p99": 319.2639946937561
+ "p50": 2659.872055053711,
+ "p90": 2672.640085220337,
+ "p95": 2677.2799491882324,
+ "p99": 2690.0479793548584
},
"isolatedSum": {
- "p50": 259.4880051910877,
- "p90": 264.9279944598675,
- "p95": 266.9760063290596,
- "p99": 283.87199714779854
+ "p50": 2696.5759992599487,
+ "p90": 2715.008020401001,
+ "p95": 2723.5519886016846,
+ "p99": 2759.552001953125
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 209.6640020608902,
- "p90": 212.79999613761902,
- "p95": 214.33599293231964,
- "p99": 221.69600427150726
- },
- "combine": {
- "p50": 56.352000683546066,
- "p90": 59.007998555898666,
- "p95": 59.967998415231705,
- "p99": 61.24800071120262
- },
- "roundtrip": {
- "p50": 261.7279887199402,
- "p90": 264.70398902893066,
- "p95": 266.2079930305481,
- "p99": 278.8800001144409
- },
- "isolatedSum": {
- "p50": 266.01600274443626,
- "p90": 271.8079946935177,
- "p95": 274.30399134755135,
- "p99": 282.9440049827099
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 2131722240,
+ "combineLogicalBytes": 2131722240,
+ "fanoutMean": 5.294189453125,
+ "recvTokensMax": 21781,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -31041,28 +31515,28 @@
]
},
{
- "id": "cx-4a0e300c",
- "identity": "b300|deepep-hybrid|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "b300_9bdf2cf9",
- "comparisonKey": "e35b7ffee4d4fef7",
+ "id": "cx-e8b502a1",
+ "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901",
+ "colorKey": "gb300_74218200",
+ "comparisonKey": "2870a44c1f8d758b",
"schemaVersion": 3,
- "generatedAt": "2026-06-28T02:33:00.515887+00:00",
+ "generatedAt": "2026-06-29T13:58:42.564954+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_03",
- "sku": "b300",
- "backend": "deepep-hybrid",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
+ "backend": "deepep",
"phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · deepep-hybrid · bf16",
+ "label": "GB300 EP8 · deepep · bf16",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
@@ -31074,74 +31548,75 @@
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 148,
- "resourceClass": "fixed-kernel",
- "conformanceClass": "not-applicable",
- "fixedKernel": true,
+ "achievedFraction": 0.1316,
+ "configuredUnits": 20,
+ "deviceUnits": 152,
+ "resourceClass": "backend-tuned",
+ "conformanceClass": "backend-default",
+ "fixedKernel": false,
"paretoEligible": false
},
"placement": {
- "kind": "packed",
- "nodes": 1,
+ "kind": "adversarial",
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "b9896b0a7ca9901",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "hybrid-e0a5b1d",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28308873989",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28308873989",
- "createdAt": "2026-06-28T02:33:00.515887+00:00",
- "sha": "02ef8d2d9b6fd7519504810daae202e88ee66360"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 207.7759951353073,
- "p90": 211.2639993429184,
- "p95": 212.96000480651855,
- "p99": 220.15999257564545
+ "p50": 125.31200051307678,
+ "p90": 141.59999787807465,
+ "p95": 155.13600409030914,
+ "p99": 183.87199938297272
},
"combine": {
- "p50": 57.88800120353699,
- "p90": 59.23200026154518,
- "p95": 59.99999865889549,
- "p99": 67.84000247716904
+ "p50": 126.5919953584671,
+ "p90": 144.28800344467163,
+ "p95": 156.00000321865082,
+ "p99": 190.91199338436127
},
"roundtrip": {
- "p50": 260.44800877571106,
- "p90": 263.61599564552307,
- "p95": 266.36800169944763,
- "p99": 274.7200131416321
+ "p50": 223.7119972705841,
+ "p90": 243.20000410079956,
+ "p95": 256.51198625564575,
+ "p99": 284.35200452804565
},
"isolatedSum": {
- "p50": 265.6639963388443,
- "p90": 270.4959996044636,
- "p95": 272.96000346541405,
- "p99": 287.9999950528145
+ "p50": 251.90399587154388,
+ "p90": 285.8880013227463,
+ "p95": 311.13600730895996,
+ "p99": 374.783992767334
},
"roundtripMeasured": true,
"dispatchLogicalBytes": 77672448,
"combineLogicalBytes": 77672448,
"fanoutMean": 5.291015625,
"recvTokensMax": 723,
- "stragglerRank": 6,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -31150,35 +31625,35 @@
"tokensPerRank": 256,
"globalTokens": 2048,
"dispatch": {
- "p50": 216.44799411296844,
- "p90": 220.60799598693848,
- "p95": 222.33599424362183,
- "p99": 248.73599410057068
+ "p50": 161.82400286197662,
+ "p90": 170.97599804401398,
+ "p95": 174.52800273895264,
+ "p99": 207.16799795627594
},
"combine": {
- "p50": 74.23999905586243,
- "p90": 76.38400048017502,
- "p95": 76.80000364780426,
- "p99": 87.20000088214874
+ "p50": 167.87199676036835,
+ "p90": 172.8000044822693,
+ "p95": 179.1040003299713,
+ "p99": 211.99999749660492
},
"roundtrip": {
- "p50": 283.87200832366943,
- "p90": 287.32800483703613,
- "p95": 289.5039916038513,
- "p99": 305.27999997138977
+ "p50": 298.43199253082275,
+ "p90": 307.71198868751526,
+ "p95": 315.90399146080017,
+ "p99": 358.4960103034973
},
"isolatedSum": {
- "p50": 290.6879931688309,
- "p90": 296.9919964671135,
- "p95": 299.1359978914261,
- "p99": 335.9359949827194
+ "p50": 329.69599962234497,
+ "p90": 343.77600252628326,
+ "p95": 353.63200306892395,
+ "p99": 419.16799545288086
},
"roundtripMeasured": true,
"dispatchLogicalBytes": 155889664,
"combineLogicalBytes": 155889664,
"fanoutMean": 5.3095703125,
"recvTokensMax": 1422,
- "stragglerRank": 4,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -31187,35 +31662,35 @@
"tokensPerRank": 512,
"globalTokens": 4096,
"dispatch": {
- "p50": 226.3679951429367,
- "p90": 230.24000227451324,
- "p95": 231.77599906921387,
- "p99": 250.2399981021881
+ "p50": 231.87200725078583,
+ "p90": 259.8719894886017,
+ "p95": 273.3120024204254,
+ "p99": 294.5599853992462
},
"combine": {
- "p50": 107.64800012111664,
- "p90": 109.79200154542923,
- "p95": 110.62400043010712,
- "p99": 118.75200271606445
+ "p50": 282.9119861125946,
+ "p90": 296.7039942741394,
+ "p95": 315.90399146080017,
+ "p99": 331.36001229286194
},
"roundtrip": {
- "p50": 329.47200536727905,
- "p90": 333.6319923400879,
- "p95": 335.6800079345703,
- "p99": 362.2719943523407
+ "p50": 458.75200629234314,
+ "p90": 481.7599952220917,
+ "p95": 489.6959960460663,
+ "p99": 510.43200492858887
},
"isolatedSum": {
- "p50": 334.01599526405334,
- "p90": 340.0320038199425,
- "p95": 342.399999499321,
- "p99": 368.99200081825256
+ "p50": 514.7839933633804,
+ "p90": 556.5759837627411,
+ "p95": 589.2159938812256,
+ "p99": 625.9199976921082
},
"roundtripMeasured": true,
"dispatchLogicalBytes": 312266752,
"combineLogicalBytes": 312266752,
"fanoutMean": 5.31787109375,
"recvTokensMax": 2779,
- "stragglerRank": 6,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -31224,35 +31699,35 @@
"tokensPerRank": 1024,
"globalTokens": 8192,
"dispatch": {
- "p50": 286.01598739624023,
- "p90": 289.34401273727417,
- "p95": 290.5600070953369,
- "p99": 304.3519854545593
+ "p50": 361.85601353645325,
+ "p90": 388.0319893360138,
+ "p95": 397.5040018558502,
+ "p99": 415.3920114040375
},
"combine": {
- "p50": 185.15199422836304,
- "p90": 187.8719925880432,
- "p95": 188.54400515556335,
- "p99": 190.8160001039505
+ "p50": 489.1520142555237,
+ "p90": 513.696014881134,
+ "p95": 525.4719853401184,
+ "p99": 546.3359951972961
},
"roundtrip": {
- "p50": 467.0400023460388,
- "p90": 471.48799896240234,
- "p95": 473.91998767852783,
- "p99": 505.3759813308716
+ "p50": 815.936028957367,
+ "p90": 848.6400246620178,
+ "p95": 862.2400164604187,
+ "p99": 877.9839873313904
},
"isolatedSum": {
- "p50": 471.16798162460327,
- "p90": 477.2160053253174,
- "p95": 479.10401225090027,
- "p99": 495.1679855585098
+ "p50": 851.0080277919769,
+ "p90": 901.7280042171478,
+ "p95": 922.9759871959686,
+ "p99": 961.7280066013336
},
"roundtripMeasured": true,
"dispatchLogicalBytes": 623443968,
"combineLogicalBytes": 623443968,
"fanoutMean": 5.30859375,
"recvTokensMax": 5505,
- "stragglerRank": 4,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -31261,35 +31736,35 @@
"tokensPerRank": 2048,
"globalTokens": 16384,
"dispatch": {
- "p50": 458.40001106262207,
- "p90": 461.60000562667847,
- "p95": 463.1040096282959,
- "p99": 475.93599557876587
+ "p50": 615.6479716300964,
+ "p90": 625.823974609375,
+ "p95": 631.0719847679138,
+ "p99": 670.9439754486084
},
"combine": {
- "p50": 320.76799869537354,
- "p90": 323.93598556518555,
- "p95": 325.0240087509155,
- "p99": 332.5760066509247
+ "p50": 870.464026927948,
+ "p90": 877.7920007705688,
+ "p95": 882.8160166740417,
+ "p99": 902.4959802627563
},
"roundtrip": {
- "p50": 776.095986366272,
- "p90": 781.216025352478,
- "p95": 785.9200239181519,
- "p99": 819.5199966430664
+ "p50": 1457.4400186538696,
+ "p90": 1474.176049232483,
+ "p95": 1483.7440252304077,
+ "p99": 1505.5359601974487
},
"isolatedSum": {
- "p50": 779.1680097579956,
- "p90": 785.535991191864,
- "p95": 788.1280183792114,
- "p99": 808.5120022296906
+ "p50": 1486.1119985580444,
+ "p90": 1503.6159753799438,
+ "p95": 1513.8880014419556,
+ "p99": 1573.4399557113647
},
"roundtripMeasured": true,
"dispatchLogicalBytes": 1243805696,
"combineLogicalBytes": 1243805696,
"fanoutMean": 5.29547119140625,
"recvTokensMax": 10952,
- "stragglerRank": 4,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -31298,35 +31773,35 @@
"tokensPerRank": 4096,
"globalTokens": 32768,
"dispatch": {
- "p50": 741.0560250282288,
- "p90": 745.2800273895264,
- "p95": 748.8639950752258,
- "p99": 812.4160170555115
+ "p50": 1129.7600269317627,
+ "p90": 1146.5599536895752,
+ "p95": 1158.9759588241577,
+ "p99": 1175.9040355682373
},
"combine": {
- "p50": 593.1199789047241,
- "p90": 595.3599810600281,
- "p95": 596.1599946022034,
- "p99": 601.5999913215637
+ "p50": 1619.104027748108,
+ "p90": 1635.6159448623657,
+ "p95": 1645.18404006958,
+ "p99": 1662.5280380249023
},
"roundtrip": {
- "p50": 1334.2399597167969,
- "p90": 1338.528037071228,
- "p95": 1340.000033378601,
- "p99": 1418.4319972991943
+ "p50": 2731.6160202026367,
+ "p90": 2748.863935470581,
+ "p95": 2757.9519748687744,
+ "p99": 2776.8640518188477
},
"isolatedSum": {
- "p50": 1334.1760039329529,
- "p90": 1340.6400084495544,
- "p95": 1345.0239896774292,
- "p99": 1414.0160083770752
+ "p50": 2748.8640546798706,
+ "p90": 2782.175898551941,
+ "p95": 2804.159998893738,
+ "p99": 2838.4320735931396
},
"roundtripMeasured": true,
"dispatchLogicalBytes": 2487009280,
"combineLogicalBytes": 2487009280,
"fanoutMean": 5.294189453125,
"recvTokensMax": 21781,
- "stragglerRank": 4,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -31334,28 +31809,28 @@
]
},
{
- "id": "cx-6136a9d3",
- "identity": "b300|flashinfer|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|fp8|none|0|tuned||ac583971f94b176",
- "colorKey": "b300_5ec8473f",
- "comparisonKey": "be2ec236ee21b030",
+ "id": "cx-ba4423d9",
+ "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901",
+ "colorKey": "gb300_b97bfb88",
+ "comparisonKey": "02cbe60d1402a41c",
"schemaVersion": 3,
- "generatedAt": "2026-06-28T05:40:56.109359+00:00",
+ "generatedAt": "2026-06-29T13:48:34.462312+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_07",
- "sku": "b300",
- "backend": "flashinfer",
- "phase": "decode",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
+ "backend": "deepep",
+ "phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "runtime-visible-v1",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · flashinfer · bf16",
+ "label": "GB300 EP8 · deepep · bf16",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
@@ -31367,333 +31842,260 @@
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
- "combineQuantMode": "fp8"
+ "combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 148,
- "resourceClass": "fixed-kernel",
- "conformanceClass": "not-applicable",
- "fixedKernel": true,
+ "achievedFraction": 0.1316,
+ "configuredUnits": 20,
+ "deviceUnits": 152,
+ "resourceClass": "backend-tuned",
+ "conformanceClass": "backend-default",
+ "fixedKernel": false,
"paretoEligible": false
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "b9896b0a7ca9901",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": null,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28312753674",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28312753674",
- "createdAt": "2026-06-28T05:40:56.109359+00:00",
- "sha": "85273c67789913421295080d1d06daacdc027a4a"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 54.59199845790863,
- "p90": 56.60799890756607,
- "p95": 57.472001761198044,
- "p99": 65.66400080919266
- },
- "combine": {
- "p50": 54.59199845790863,
- "p90": 56.60799890756607,
- "p95": 57.472001761198044,
- "p99": 65.66400080919266
- },
- "roundtrip": {
- "p50": 54.59199845790863,
- "p90": 56.60799890756607,
- "p95": 57.472001761198044,
- "p99": 65.66400080919266
- },
- "isolatedSum": {
- "p50": 109.18399691581726,
- "p90": 113.21599781513214,
- "p95": 114.94400352239609,
- "p99": 131.32800161838531
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 8,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 55.36000058054924,
- "p90": 57.440001517534256,
- "p95": 58.9120015501976,
- "p99": 68.15999746322632
- },
- "combine": {
- "p50": 55.36000058054924,
- "p90": 57.440001517534256,
- "p95": 58.9120015501976,
- "p99": 68.15999746322632
- },
- "roundtrip": {
- "p50": 55.36000058054924,
- "p90": 57.440001517534256,
- "p95": 58.9120015501976,
- "p99": 68.15999746322632
- },
- "isolatedSum": {
- "p50": 110.72000116109848,
- "p90": 114.88000303506851,
- "p95": 117.8240031003952,
- "p99": 136.31999492645264
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 8,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 57.18399956822395,
- "p90": 59.74400043487549,
- "p95": 60.736000537872314,
- "p99": 74.23999905586243
+ "p50": 123.77600371837616,
+ "p90": 134.36800241470337,
+ "p95": 155.2640050649643,
+ "p99": 180.63999712467194
},
"combine": {
- "p50": 57.18399956822395,
- "p90": 59.74400043487549,
- "p95": 60.736000537872314,
- "p99": 74.23999905586243
+ "p50": 123.64800274372101,
+ "p90": 131.96800649166107,
+ "p95": 139.93600010871887,
+ "p99": 164.99200463294983
},
"roundtrip": {
- "p50": 57.18399956822395,
- "p90": 59.74400043487549,
- "p95": 60.736000537872314,
- "p99": 74.23999905586243
+ "p50": 221.53599560260773,
+ "p90": 233.15200209617615,
+ "p95": 243.00800263881683,
+ "p99": 297.4399924278259
},
"isolatedSum": {
- "p50": 114.3679991364479,
- "p90": 119.48800086975098,
- "p95": 121.47200107574463,
- "p99": 148.47999811172485
+ "p50": 247.42400646209717,
+ "p90": 266.33600890636444,
+ "p95": 295.20000517368317,
+ "p99": 345.63200175762177
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 8,
+ "dispatchLogicalBytes": 77672448,
+ "combineLogicalBytes": 77672448,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 723,
"stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 57.50399827957153,
- "p90": 60.06399914622307,
- "p95": 61.85600161552429,
- "p99": 68.7360018491745
+ "p50": 160.47999262809753,
+ "p90": 168.16000640392303,
+ "p95": 170.6240028142929,
+ "p99": 176.09600722789764
},
"combine": {
- "p50": 57.50399827957153,
- "p90": 60.06399914622307,
- "p95": 61.85600161552429,
- "p99": 68.7360018491745
+ "p50": 166.6879951953888,
+ "p90": 171.36000096797943,
+ "p95": 173.0239987373352,
+ "p99": 180.31999468803406
},
"roundtrip": {
- "p50": 57.50399827957153,
- "p90": 60.06399914622307,
- "p95": 61.85600161552429,
- "p99": 68.7360018491745
+ "p50": 297.0240116119385,
+ "p90": 305.08801341056824,
+ "p95": 307.9040050506592,
+ "p99": 314.6879971027374
},
"isolatedSum": {
- "p50": 115.00799655914307,
- "p90": 120.12799829244614,
- "p95": 123.71200323104858,
- "p99": 137.472003698349
+ "p50": 327.1679878234863,
+ "p90": 339.52000737190247,
+ "p95": 343.6480015516281,
+ "p99": 356.4160019159317
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 8,
+ "dispatchLogicalBytes": 155889664,
+ "combineLogicalBytes": 155889664,
+ "fanoutMean": 5.3095703125,
+ "recvTokensMax": 1422,
"stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 58.20799991488457,
- "p90": 60.35200133919716,
- "p95": 61.824001371860504,
- "p99": 74.17599856853485
+ "p50": 227.55199670791626,
+ "p90": 237.95199394226074,
+ "p95": 241.15200340747833,
+ "p99": 287.03999519348145
},
"combine": {
- "p50": 58.20799991488457,
- "p90": 60.35200133919716,
- "p95": 61.824001371860504,
- "p99": 74.17599856853485
+ "p50": 284.41599011421204,
+ "p90": 292.1600043773651,
+ "p95": 294.20799016952515,
+ "p99": 301.85601115226746
},
"roundtrip": {
- "p50": 58.20799991488457,
- "p90": 60.35200133919716,
- "p95": 61.824001371860504,
- "p99": 74.17599856853485
+ "p50": 461.8239998817444,
+ "p90": 471.2960124015808,
+ "p95": 474.8159945011139,
+ "p99": 501.5680193901062
},
"isolatedSum": {
- "p50": 116.41599982976913,
- "p90": 120.70400267839432,
- "p95": 123.64800274372101,
- "p99": 148.3519971370697
+ "p50": 511.9679868221283,
+ "p90": 530.1119983196259,
+ "p95": 535.3599935770035,
+ "p99": 588.8960063457489
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 8,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 312266752,
+ "combineLogicalBytes": 312266752,
+ "fanoutMean": 5.31787109375,
+ "recvTokensMax": 2779,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 59.328000992536545,
- "p90": 61.59999966621399,
- "p95": 62.72000074386597,
- "p99": 75.6160020828247
+ "p50": 360.54399609565735,
+ "p90": 367.8080141544342,
+ "p95": 371.13600969314575,
+ "p99": 404.1920006275177
},
"combine": {
- "p50": 59.328000992536545,
- "p90": 61.59999966621399,
- "p95": 62.72000074386597,
- "p99": 75.6160020828247
+ "p50": 487.87200450897217,
+ "p90": 493.6639964580536,
+ "p95": 496.44801020622253,
+ "p99": 506.1759948730469
},
"roundtrip": {
- "p50": 59.328000992536545,
- "p90": 61.59999966621399,
- "p95": 62.72000074386597,
- "p99": 75.6160020828247
+ "p50": 816.1600232124329,
+ "p90": 824.2560029029846,
+ "p95": 827.072024345398,
+ "p99": 852.895975112915
},
"isolatedSum": {
- "p50": 118.65600198507309,
- "p90": 123.19999933242798,
- "p95": 125.44000148773193,
- "p99": 151.2320041656494
+ "p50": 848.4160006046295,
+ "p90": 861.4720106124878,
+ "p95": 867.5840198993683,
+ "p99": 910.3679955005646
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 8,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 623443968,
+ "combineLogicalBytes": 623443968,
+ "fanoutMean": 5.30859375,
+ "recvTokensMax": 5505,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 64,
- "globalTokens": 512,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 62.78400123119354,
- "p90": 65.21599739789963,
- "p95": 66.17599725723267,
- "p99": 76.67200267314911
+ "p50": 615.1040196418762,
+ "p90": 624.9600052833557,
+ "p95": 628.9920210838318,
+ "p99": 655.4880142211914
},
"combine": {
- "p50": 62.78400123119354,
- "p90": 65.21599739789963,
- "p95": 66.17599725723267,
- "p99": 76.67200267314911
+ "p50": 870.6560134887695,
+ "p90": 879.4559836387634,
+ "p95": 884.447991847992,
+ "p99": 908.6080193519592
},
"roundtrip": {
- "p50": 62.78400123119354,
- "p90": 65.21599739789963,
- "p95": 66.17599725723267,
- "p99": 76.67200267314911
+ "p50": 1458.240032196045,
+ "p90": 1469.9840545654297,
+ "p95": 1476.0639667510986,
+ "p99": 1511.8720531463623
},
"isolatedSum": {
- "p50": 125.56800246238708,
- "p90": 130.43199479579926,
- "p95": 132.35199451446533,
- "p99": 153.34400534629822
+ "p50": 1485.7600331306458,
+ "p90": 1504.4159889221191,
+ "p95": 1513.4400129318237,
+ "p99": 1564.0960335731506
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 8,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1243805696,
+ "combineLogicalBytes": 1243805696,
+ "fanoutMean": 5.29547119140625,
+ "recvTokensMax": 10952,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
"dispatch": {
- "p50": 71.07199728488922,
- "p90": 72.9919970035553,
- "p95": 73.63200187683105,
- "p99": 87.36000210046768
+ "p50": 1133.0879926681519,
+ "p90": 1140.6079530715942,
+ "p95": 1143.5840129852295,
+ "p99": 1149.1520404815674
},
"combine": {
- "p50": 71.07199728488922,
- "p90": 72.9919970035553,
- "p95": 73.63200187683105,
- "p99": 87.36000210046768
+ "p50": 1619.264006614685,
+ "p90": 1625.440001487732,
+ "p95": 1628.0319690704346,
+ "p99": 1635.6480121612549
},
"roundtrip": {
- "p50": 71.07199728488922,
- "p90": 72.9919970035553,
- "p95": 73.63200187683105,
- "p99": 87.36000210046768
+ "p50": 2728.7039756774902,
+ "p90": 2738.1439208984375,
+ "p95": 2741.1201000213623,
+ "p99": 2750.080108642578
},
"isolatedSum": {
- "p50": 142.14399456977844,
- "p90": 145.9839940071106,
- "p95": 147.2640037536621,
- "p99": 174.72000420093536
+ "p50": 2752.351999282837,
+ "p90": 2766.047954559326,
+ "p95": 2771.615982055664,
+ "p99": 2784.8000526428223
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 8,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 2487009280,
+ "combineLogicalBytes": 2487009280,
+ "fanoutMean": 5.294189453125,
+ "recvTokensMax": 21781,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -31701,366 +32103,476 @@
]
},
{
- "id": "cx-4e6a4685",
- "identity": "b300|flashinfer|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "b300_5ec8473f",
- "comparisonKey": "0f567db5f9c07223",
+ "id": "cx-fdc9a7c6",
+ "identity": "gb300|deepep|v1|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||fc08bf2f8d42ed8",
+ "colorKey": "gb300_b97bfb88",
+ "comparisonKey": "1dc982986bf98728",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T17:26:58.425220+00:00",
+ "generatedAt": "2026-06-29T13:18:54.160154+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_07",
- "sku": "b300",
- "backend": "flashinfer",
- "phase": "decode",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
+ "backend": "deepep",
+ "phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "runtime-visible-v1",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · flashinfer · bf16",
- "model": "DeepSeek-V3/V4",
+ "label": "GB300 EP8 · deepep · bf16",
+ "model": "Kimi-K2",
"shape": {
"hidden": 7168,
"topk": 8,
- "experts": 256,
+ "experts": 384,
"routing": "uniform",
"routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 148,
- "resourceClass": "fixed-kernel",
- "conformanceClass": "not-applicable",
- "fixedKernel": true,
+ "achievedFraction": 0.1316,
+ "configuredUnits": 20,
+ "deviceUnits": 152,
+ "resourceClass": "backend-tuned",
+ "conformanceClass": "backend-default",
+ "fixedKernel": false,
"paretoEligible": false
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "fc08bf2f8d42ed8",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": null,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28296434249",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28296434249",
- "createdAt": "2026-06-27T17:26:58.425220+00:00",
- "sha": "2ebeba9134a8c84f7a80ac87742d57f7cdf1cf18"
+ "id": "28374328313",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374328313",
+ "createdAt": "2026-06-29T13:08:14Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 48.576001077890396,
- "p90": 49.855999648571014,
- "p95": 50.81599950790405,
- "p99": 64.4799992442131
+ "p50": 124.41600114107132,
+ "p90": 131.3599944114685,
+ "p95": 134.49600338935852,
+ "p99": 152.48000621795654
},
"combine": {
- "p50": 48.576001077890396,
- "p90": 49.855999648571014,
- "p95": 50.81599950790405,
- "p99": 64.4799992442131
+ "p50": 122.49600142240524,
+ "p90": 131.48799538612366,
+ "p95": 133.02400708198547,
+ "p99": 143.71199905872345
},
"roundtrip": {
- "p50": 48.576001077890396,
- "p90": 49.855999648571014,
- "p95": 50.81599950790405,
- "p99": 64.4799992442131
+ "p50": 223.7119972705841,
+ "p90": 232.06399381160736,
+ "p95": 234.46400463581085,
+ "p99": 241.31199717521667
},
"isolatedSum": {
- "p50": 97.15200215578079,
- "p90": 99.71199929714203,
- "p95": 101.6319990158081,
- "p99": 128.9599984884262
+ "p50": 246.91200256347656,
+ "p90": 262.84798979759216,
+ "p95": 267.520010471344,
+ "p99": 296.19200527668
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 8,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 77514752,
+ "combineLogicalBytes": 77514752,
+ "fanoutMean": 5.2802734375,
+ "recvTokensMax": 707,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2,
- "globalTokens": 16,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 49.44000020623207,
- "p90": 50.912000238895416,
- "p95": 51.552001386880875,
- "p99": 56.543998420238495
+ "p50": 163.29599916934967,
+ "p90": 180.57599663734436,
+ "p95": 196.60800695419312,
+ "p99": 224.67200458049774
},
"combine": {
- "p50": 49.44000020623207,
- "p90": 50.912000238895416,
- "p95": 51.552001386880875,
- "p99": 56.543998420238495
+ "p50": 169.0240055322647,
+ "p90": 201.7280012369156,
+ "p95": 208.44799280166626,
+ "p99": 229.5359969139099
},
"roundtrip": {
- "p50": 49.44000020623207,
- "p90": 50.912000238895416,
- "p95": 51.552001386880875,
- "p99": 56.543998420238495
+ "p50": 301.56800150871277,
+ "p90": 311.39200925827026,
+ "p95": 314.59200382232666,
+ "p99": 321.5999901294708
},
"isolatedSum": {
- "p50": 98.88000041246414,
- "p90": 101.82400047779083,
- "p95": 103.10400277376175,
- "p99": 113.08799684047699
+ "p50": 332.3200047016144,
+ "p90": 382.30399787425995,
+ "p95": 405.0559997558594,
+ "p99": 454.20800149440765
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 8,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 154570752,
+ "combineLogicalBytes": 154570752,
+ "fanoutMean": 5.2646484375,
+ "recvTokensMax": 1391,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4,
- "globalTokens": 32,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 51.552001386880875,
- "p90": 53.888000547885895,
- "p95": 54.976001381874084,
- "p99": 66.72000139951706
+ "p50": 229.63200509548187,
+ "p90": 245.88799476623535,
+ "p95": 262.1760070323944,
+ "p99": 281.3760042190552
},
"combine": {
- "p50": 51.552001386880875,
- "p90": 53.888000547885895,
- "p95": 54.976001381874084,
- "p99": 66.72000139951706
+ "p50": 289.63199257850647,
+ "p90": 312.25600838661194,
+ "p95": 320.67200541496277,
+ "p99": 339.55198526382446
},
"roundtrip": {
- "p50": 51.552001386880875,
- "p90": 53.888000547885895,
- "p95": 54.976001381874084,
- "p99": 66.72000139951706
+ "p50": 465.05600214004517,
+ "p90": 485.9200119972229,
+ "p95": 495.712012052536,
+ "p99": 518.2399749755859
},
"isolatedSum": {
- "p50": 103.10400277376175,
- "p90": 107.77600109577179,
- "p95": 109.95200276374817,
- "p99": 133.44000279903412
+ "p50": 519.2639976739883,
+ "p90": 558.1440031528473,
+ "p95": 582.8480124473572,
+ "p99": 620.9279894828796
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 8,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 309772288,
+ "combineLogicalBytes": 309772288,
+ "fanoutMean": 5.275390625,
+ "recvTokensMax": 2754,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 51.7439991235733,
- "p90": 54.43200096487999,
- "p95": 55.07199838757515,
- "p99": 61.664000153541565
+ "p50": 359.42399501800537,
+ "p90": 385.50400733947754,
+ "p95": 399.29598569869995,
+ "p99": 414.0160083770752
},
"combine": {
- "p50": 51.7439991235733,
- "p90": 54.43200096487999,
- "p95": 55.07199838757515,
- "p99": 61.664000153541565
+ "p50": 490.01601338386536,
+ "p90": 505.7920217514038,
+ "p95": 515.8079862594604,
+ "p99": 541.8239831924438
},
"roundtrip": {
- "p50": 51.7439991235733,
- "p90": 54.43200096487999,
- "p95": 55.07199838757515,
- "p99": 61.664000153541565
+ "p50": 817.8240060806274,
+ "p90": 834.272027015686,
+ "p95": 856.000006198883,
+ "p99": 874.5920062065125
},
"isolatedSum": {
- "p50": 103.4879982471466,
- "p90": 108.86400192975998,
- "p95": 110.1439967751503,
- "p99": 123.32800030708313
+ "p50": 849.4400084018707,
+ "p90": 891.2960290908813,
+ "p95": 915.1039719581604,
+ "p99": 955.839991569519
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 8,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 619501568,
+ "combineLogicalBytes": 619501568,
+ "fanoutMean": 5.2750244140625,
+ "recvTokensMax": 5469,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 54.207999259233475,
- "p90": 56.0000017285347,
- "p95": 56.703999638557434,
- "p99": 63.13599646091461
+ "p50": 608.4480285644531,
+ "p90": 615.231990814209,
+ "p95": 618.6239719390869,
+ "p99": 633.5359811782837
},
"combine": {
- "p50": 54.207999259233475,
- "p90": 56.0000017285347,
- "p95": 56.703999638557434,
- "p99": 63.13599646091461
+ "p50": 857.7280044555664,
+ "p90": 862.8479838371277,
+ "p95": 865.9200072288513,
+ "p99": 871.7439770698547
},
"roundtrip": {
- "p50": 54.207999259233475,
- "p90": 56.0000017285347,
- "p95": 56.703999638557434,
- "p99": 63.13599646091461
+ "p50": 1434.0800046920776,
+ "p90": 1442.4959421157837,
+ "p95": 1445.7600116729736,
+ "p99": 1471.8719720840454
},
"isolatedSum": {
- "p50": 108.41599851846695,
- "p90": 112.0000034570694,
- "p95": 113.40799927711487,
- "p99": 126.27199292182922
+ "p50": 1466.1760330200195,
+ "p90": 1478.0799746513367,
+ "p95": 1484.5439791679382,
+ "p99": 1505.2799582481384
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 8,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 1239375872,
+ "combineLogicalBytes": 1239375872,
+ "fanoutMean": 5.276611328125,
+ "recvTokensMax": 10883,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
"dispatch": {
- "p50": 54.976001381874084,
- "p90": 57.312000542879105,
- "p95": 58.04799869656563,
- "p99": 60.7680007815361
+ "p50": 1127.3280382156372,
+ "p90": 1133.7920427322388,
+ "p95": 1135.8720064163208,
+ "p99": 1139.4239664077759
},
"combine": {
- "p50": 54.976001381874084,
- "p90": 57.312000542879105,
- "p95": 58.04799869656563,
- "p99": 60.7680007815361
+ "p50": 1611.4239692687988,
+ "p90": 1621.5039491653442,
+ "p95": 1624.1919994354248,
+ "p99": 1664.9600267410278
},
"roundtrip": {
- "p50": 54.976001381874084,
- "p90": 57.312000542879105,
- "p95": 58.04799869656563,
- "p99": 60.7680007815361
+ "p50": 2713.792085647583,
+ "p90": 2722.9440212249756,
+ "p95": 2724.9600887298584,
+ "p99": 2731.4560413360596
},
"isolatedSum": {
- "p50": 109.95200276374817,
- "p90": 114.62400108575821,
- "p95": 116.09599739313126,
- "p99": 121.5360015630722
+ "p50": 2738.752007484436,
+ "p90": 2755.295991897583,
+ "p95": 2760.0640058517456,
+ "p99": 2804.3839931488037
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 8,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 2479669248,
+ "combineLogicalBytes": 2479669248,
+ "fanoutMean": 5.278564453125,
+ "recvTokensMax": 21730,
+ "stragglerRank": 1,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ }
+ ]
+ },
+ {
+ "id": "cx-8c3cdccb",
+ "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||8bf55e36863f028",
+ "colorKey": "gb300_d4c8afb8",
+ "comparisonKey": "7c5d1ae307d82bca",
+ "schemaVersion": 3,
+ "generatedAt": "2026-06-29T13:42:20.458799+00:00",
+ "status": "valid",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
+ "backend": "deepep",
+ "phase": "prefill",
+ "mode": "normal",
+ "resourceMode": "tuned",
+ "suite": "backend-default",
+ "comparisonClass": "standardized",
+ "measurementContract": "layout-and-dispatch-v1",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
+ "worldSize": 8,
+ "epSize": 8,
+ "label": "GB300 EP8 · deepep · bf16 · balanced",
+ "model": "DeepSeek-V3/V4",
+ "shape": {
+ "hidden": 7168,
+ "topk": 8,
+ "experts": 256,
+ "routing": "balanced",
+ "routingLabel": "balanced",
+ "routingStep": 0,
+ "unevenTokens": "none",
+ "eplbEnabled": false,
+ "dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
+ "activationProfile": "normal",
+ "combineQuantMode": "none"
+ },
+ "resourceProfile": {
+ "requestedFraction": null,
+ "achievedFraction": 0.1316,
+ "configuredUnits": 20,
+ "deviceUnits": 152,
+ "resourceClass": "backend-tuned",
+ "conformanceClass": "backend-default",
+ "fixedKernel": false,
+ "paretoEligible": false
+ },
+ "placement": {
+ "kind": "packed",
+ "nodes": 2,
+ "gpusPerNode": 8,
+ "scaleUpDomain": 8
+ },
+ "routingConsistent": true,
+ "traceSignature": "8bf55e36863f028",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": null,
+ "eplbImbalanceAfter": null,
+ "backendVersion": "1.1.0+814e508",
+ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
+ "repository": "SemiAnalysisAI/InferenceX",
+ "run": {
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
+ },
+ "rows": [
+ {
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
+ "dispatch": {
+ "p50": 141.4400041103363,
+ "p90": 151.10400319099426,
+ "p95": 154.84799444675446,
+ "p99": 160.73599457740784
+ },
+ "combine": {
+ "p50": 146.36799693107605,
+ "p90": 156.19200468063354,
+ "p95": 158.11200439929962,
+ "p99": 167.39200055599213
+ },
+ "roundtrip": {
+ "p50": 259.2960000038147,
+ "p90": 268.3199942111969,
+ "p95": 271.4560031890869,
+ "p99": 277.24799513816833
+ },
+ "isolatedSum": {
+ "p50": 287.80800104141235,
+ "p90": 307.2960078716278,
+ "p95": 312.9599988460541,
+ "p99": 328.12799513339996
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 117440512,
+ "combineLogicalBytes": 117440512,
+ "fanoutMean": 8,
+ "recvTokensMax": 1024,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 64,
- "globalTokens": 512,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 59.29600074887276,
- "p90": 61.47199869155884,
- "p95": 62.912002205848694,
- "p99": 81.31200075149536
+ "p50": 279.07198667526245,
+ "p90": 288.9919877052307,
+ "p95": 292.4799919128418,
+ "p99": 327.90398597717285
},
"combine": {
- "p50": 59.29600074887276,
- "p90": 61.47199869155884,
- "p95": 62.912002205848694,
- "p99": 81.31200075149536
+ "p50": 368.00000071525574,
+ "p90": 376.70400738716125,
+ "p95": 378.08001041412354,
+ "p99": 390.5920088291168
},
"roundtrip": {
- "p50": 59.29600074887276,
- "p90": 61.47199869155884,
- "p95": 62.912002205848694,
- "p99": 81.31200075149536
+ "p50": 606.4959764480591,
+ "p90": 617.0240044593811,
+ "p95": 620.8320260047913,
+ "p99": 659.6480011940002
},
"isolatedSum": {
- "p50": 118.59200149774551,
- "p90": 122.94399738311768,
- "p95": 125.82400441169739,
- "p99": 162.62400150299072
+ "p50": 647.0719873905182,
+ "p90": 665.695995092392,
+ "p95": 670.5600023269653,
+ "p99": 718.4959948062897
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 8,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 469762048,
+ "combineLogicalBytes": 469762048,
+ "fanoutMean": 8,
+ "recvTokensMax": 4096,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 69.2799985408783,
- "p90": 71.00799679756165,
- "p95": 71.29599899053574,
- "p99": 78.97599786520004
+ "p50": 808.9600205421448,
+ "p90": 823.2319951057434,
+ "p95": 826.6559839248657,
+ "p99": 860.8959913253784
},
"combine": {
- "p50": 69.2799985408783,
- "p90": 71.00799679756165,
- "p95": 71.29599899053574,
- "p99": 78.97599786520004
+ "p50": 1192.863941192627,
+ "p90": 1201.1200189590454,
+ "p95": 1202.623963356018,
+ "p99": 1207.6159715652466
},
"roundtrip": {
- "p50": 69.2799985408783,
- "p90": 71.00799679756165,
- "p95": 71.29599899053574,
- "p99": 78.97599786520004
+ "p50": 1971.0719585418701,
+ "p90": 1987.712025642395,
+ "p95": 1993.183970451355,
+ "p99": 2004.2240619659424
},
"isolatedSum": {
- "p50": 138.5599970817566,
- "p90": 142.0159935951233,
- "p95": 142.59199798107147,
- "p99": 157.95199573040009
+ "p50": 2001.8239617347717,
+ "p90": 2024.3520140647888,
+ "p95": 2029.2799472808838,
+ "p99": 2068.511962890625
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 8,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 1879048192,
+ "combineLogicalBytes": 1879048192,
+ "fanoutMean": 8,
+ "recvTokensMax": 16384,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -32068,366 +32580,476 @@
]
},
{
- "id": "cx-c4d51897",
- "identity": "b300|flashinfer|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|nvfp4|none|0|tuned||ac583971f94b176",
- "colorKey": "b300_5ec8473f",
- "comparisonKey": "fcbe4c54041214ff",
+ "id": "cx-5f85a462",
+ "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7f1ea4cf569d12c",
+ "colorKey": "gb300_f163949b",
+ "comparisonKey": "255dfa9bd7173c73",
"schemaVersion": 3,
- "generatedAt": "2026-06-28T06:30:40.335883+00:00",
+ "generatedAt": "2026-06-29T13:44:11.777684+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_07",
- "sku": "b300",
- "backend": "flashinfer",
- "phase": "decode",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
+ "backend": "deepep",
+ "phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · flashinfer · bf16",
+ "label": "GB300 EP8 · deepep · bf16 · balanced-rank-local",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
"experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
+ "routing": "balanced-rank-local",
+ "routingLabel": "balanced-rank-local",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
- "combineQuantMode": "nvfp4"
+ "combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 148,
- "resourceClass": "fixed-kernel",
- "conformanceClass": "not-applicable",
- "fixedKernel": true,
+ "achievedFraction": 0.1316,
+ "configuredUnits": 20,
+ "deviceUnits": 152,
+ "resourceClass": "backend-tuned",
+ "conformanceClass": "backend-default",
+ "fixedKernel": false,
"paretoEligible": false
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "7f1ea4cf569d12c",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": null,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28313781903",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28313781903",
- "createdAt": "2026-06-28T06:30:40.335883+00:00",
- "sha": "0e61ac1009cdb939b811e283f71ad6306241d3dd"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 55.64799904823303,
- "p90": 57.88800120353699,
- "p95": 60.127999633550644,
- "p99": 76.86399668455124
+ "p50": 100.19200295209885,
+ "p90": 110.17599701881409,
+ "p95": 114.68800157308578,
+ "p99": 122.65600264072418
},
"combine": {
- "p50": 55.64799904823303,
- "p90": 57.88800120353699,
- "p95": 60.127999633550644,
- "p99": 76.86399668455124
+ "p50": 81.91999793052673,
+ "p90": 85.9839990735054,
+ "p95": 92.28800237178802,
+ "p99": 96.3520035147667
},
"roundtrip": {
- "p50": 55.64799904823303,
- "p90": 57.88800120353699,
- "p95": 60.127999633550644,
- "p99": 76.86399668455124
+ "p50": 155.29599785804749,
+ "p90": 171.36000096797943,
+ "p95": 193.95199418067932,
+ "p99": 208.28799903392792
},
"isolatedSum": {
- "p50": 111.29599809646606,
- "p90": 115.77600240707397,
- "p95": 120.25599926710129,
- "p99": 153.72799336910248
+ "p50": 182.11200088262558,
+ "p90": 196.1599960923195,
+ "p95": 206.9760039448738,
+ "p99": 219.00800615549088
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 8,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 14680064,
+ "combineLogicalBytes": 14680064,
+ "fanoutMean": 1,
+ "recvTokensMax": 128,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2,
- "globalTokens": 16,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 56.51199817657471,
- "p90": 58.52799862623215,
- "p95": 59.61599946022034,
- "p99": 66.3679987192154
+ "p50": 122.84799665212631,
+ "p90": 131.45600259304047,
+ "p95": 135.23200154304504,
+ "p99": 144.76799964904785
},
"combine": {
- "p50": 56.51199817657471,
- "p90": 58.52799862623215,
- "p95": 59.61599946022034,
- "p99": 66.3679987192154
+ "p50": 124.12799894809723,
+ "p90": 131.8719983100891,
+ "p95": 133.85599851608276,
+ "p99": 146.91199362277985
},
"roundtrip": {
- "p50": 56.51199817657471,
- "p90": 58.52799862623215,
- "p95": 59.61599946022034,
- "p99": 66.3679987192154
+ "p50": 220.60799598693848,
+ "p90": 228.4799963235855,
+ "p95": 232.60800540447235,
+ "p99": 247.23200500011444
},
"isolatedSum": {
- "p50": 113.02399635314941,
- "p90": 117.0559972524643,
- "p95": 119.23199892044067,
- "p99": 132.7359974384308
+ "p50": 246.97599560022354,
+ "p90": 263.3280009031296,
+ "p95": 269.0880000591278,
+ "p99": 291.6799932718277
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 8,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 58720256,
+ "combineLogicalBytes": 58720256,
+ "fanoutMean": 1,
+ "recvTokensMax": 512,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4,
- "globalTokens": 32,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 57.5999990105629,
- "p90": 59.67999994754791,
- "p95": 60.92799827456474,
- "p99": 67.1359971165657
+ "p50": 210.36800742149353,
+ "p90": 219.7439968585968,
+ "p95": 222.20799326896667,
+ "p99": 242.2720044851303
},
"combine": {
- "p50": 57.5999990105629,
- "p90": 59.67999994754791,
- "p95": 60.92799827456474,
- "p99": 67.1359971165657
+ "p50": 256.0960054397583,
+ "p90": 263.13599944114685,
+ "p95": 266.6560113430023,
+ "p99": 283.29598903656006
},
"roundtrip": {
- "p50": 57.5999990105629,
- "p90": 59.67999994754791,
- "p95": 60.92799827456474,
- "p99": 67.1359971165657
+ "p50": 447.90399074554443,
+ "p90": 455.4559886455536,
+ "p95": 457.7600061893463,
+ "p99": 465.6960070133209
},
"isolatedSum": {
- "p50": 115.1999980211258,
- "p90": 119.35999989509583,
- "p95": 121.85599654912949,
- "p99": 134.2719942331314
+ "p50": 466.46401286125183,
+ "p90": 482.87999629974365,
+ "p95": 488.864004611969,
+ "p99": 525.5679935216904
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 8,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 234881024,
+ "combineLogicalBytes": 234881024,
+ "fanoutMean": 1,
+ "recvTokensMax": 2048,
+ "stragglerRank": 3,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ }
+ ]
+ },
+ {
+ "id": "cx-ba690ae0",
+ "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||7ac30b0a39b1405",
+ "colorKey": "gb300_c93e2296",
+ "comparisonKey": "2ca11a784293be10",
+ "schemaVersion": 3,
+ "generatedAt": "2026-06-29T13:40:00.700440+00:00",
+ "status": "valid",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
+ "backend": "deepep",
+ "phase": "prefill",
+ "mode": "normal",
+ "resourceMode": "tuned",
+ "suite": "backend-default",
+ "comparisonClass": "standardized",
+ "measurementContract": "layout-and-dispatch-v1",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
+ "worldSize": 8,
+ "epSize": 8,
+ "label": "GB300 EP8 · deepep · bf16 · balanced+eplb",
+ "model": "DeepSeek-V3 (EPLB physical)",
+ "shape": {
+ "hidden": 7168,
+ "topk": 8,
+ "experts": 288,
+ "routing": "balanced",
+ "routingLabel": "balanced+eplb",
+ "routingStep": 0,
+ "unevenTokens": "none",
+ "eplbEnabled": true,
+ "dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
+ "activationProfile": "normal",
+ "combineQuantMode": "none"
+ },
+ "resourceProfile": {
+ "requestedFraction": null,
+ "achievedFraction": 0.1316,
+ "configuredUnits": 20,
+ "deviceUnits": 152,
+ "resourceClass": "backend-tuned",
+ "conformanceClass": "backend-default",
+ "fixedKernel": false,
+ "paretoEligible": false
+ },
+ "placement": {
+ "kind": "packed",
+ "nodes": 2,
+ "gpusPerNode": 8,
+ "scaleUpDomain": 8
+ },
+ "routingConsistent": true,
+ "traceSignature": "7ac30b0a39b1405",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": 1,
+ "eplbImbalanceAfter": 1,
+ "backendVersion": "1.1.0+814e508",
+ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
+ "repository": "SemiAnalysisAI/InferenceX",
+ "run": {
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
+ },
+ "rows": [
+ {
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
+ "dispatch": {
+ "p50": 108.64000022411346,
+ "p90": 116.64000153541565,
+ "p95": 119.9679970741272,
+ "p99": 128.92800569534302
+ },
+ "combine": {
+ "p50": 96.70399874448776,
+ "p90": 103.7760004401207,
+ "p95": 106.65600001811981,
+ "p99": 110.04800349473953
+ },
+ "roundtrip": {
+ "p50": 176.57600343227386,
+ "p90": 184.9920004606247,
+ "p95": 187.29600310325623,
+ "p99": 195.360004901886
+ },
+ "isolatedSum": {
+ "p50": 205.34399896860123,
+ "p90": 220.41600197553635,
+ "p95": 226.623997092247,
+ "p99": 238.97600919008255
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 29360128,
+ "combineLogicalBytes": 29360128,
+ "fanoutMean": 2,
+ "recvTokensMax": 384,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 57.82400071620941,
- "p90": 59.87200140953064,
- "p95": 60.7680007815361,
- "p99": 70.0799971818924
+ "p50": 125.63200294971466,
+ "p90": 133.85599851608276,
+ "p95": 137.7599984407425,
+ "p99": 141.92000031471252
},
"combine": {
- "p50": 57.82400071620941,
- "p90": 59.87200140953064,
- "p95": 60.7680007815361,
- "p99": 70.0799971818924
+ "p50": 120.06399780511856,
+ "p90": 124.09599870443344,
+ "p95": 128.86400520801544,
+ "p99": 138.2399946451187
},
"roundtrip": {
- "p50": 57.82400071620941,
- "p90": 59.87200140953064,
- "p95": 60.7680007815361,
- "p99": 70.0799971818924
+ "p50": 216.19200706481934,
+ "p90": 224.60800409317017,
+ "p95": 228.12800109386444,
+ "p99": 236.12800240516663
},
"isolatedSum": {
- "p50": 115.64800143241882,
- "p90": 119.74400281906128,
- "p95": 121.5360015630722,
- "p99": 140.1599943637848
+ "p50": 245.69600075483322,
+ "p90": 257.9519972205162,
+ "p95": 266.62400364875793,
+ "p99": 280.15999495983124
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 8,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 58720256,
+ "combineLogicalBytes": 58720256,
+ "fanoutMean": 2,
+ "recvTokensMax": 768,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 59.39200147986412,
- "p90": 61.63199990987778,
- "p95": 63.35999816656113,
- "p99": 71.48800045251846
+ "p50": 161.21600568294525,
+ "p90": 170.01600563526154,
+ "p95": 172.41600155830383,
+ "p99": 177.37600207328796
},
"combine": {
- "p50": 59.39200147986412,
- "p90": 61.63199990987778,
- "p95": 63.35999816656113,
- "p99": 71.48800045251846
+ "p50": 155.16799688339233,
+ "p90": 159.2320054769516,
+ "p95": 160.64000129699707,
+ "p99": 168.83200407028198
},
"roundtrip": {
- "p50": 59.39200147986412,
- "p90": 61.63199990987778,
- "p95": 63.35999816656113,
- "p99": 71.48800045251846
+ "p50": 286.49601340293884,
+ "p90": 293.37599873542786,
+ "p95": 295.77600955963135,
+ "p99": 300.6080090999603
},
"isolatedSum": {
- "p50": 118.78400295972824,
- "p90": 123.26399981975555,
- "p95": 126.71999633312225,
- "p99": 142.97600090503693
+ "p50": 316.3840025663376,
+ "p90": 329.24801111221313,
+ "p95": 333.0560028553009,
+ "p99": 346.20800614356995
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 8,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 117440512,
+ "combineLogicalBytes": 117440512,
+ "fanoutMean": 2,
+ "recvTokensMax": 1536,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 60.35200133919716,
- "p90": 62.463998794555664,
- "p95": 64.19199705123901,
- "p99": 72.35199958086014
+ "p50": 229.88800704479218,
+ "p90": 240.03200232982635,
+ "p95": 244.9920028448105,
+ "p99": 291.29600524902344
},
"combine": {
- "p50": 60.35200133919716,
- "p90": 62.463998794555664,
- "p95": 64.19199705123901,
- "p99": 72.35199958086014
+ "p50": 280.95999360084534,
+ "p90": 287.4560058116913,
+ "p95": 291.00799560546875,
+ "p99": 312.00000643730164
},
"roundtrip": {
- "p50": 60.35200133919716,
- "p90": 62.463998794555664,
- "p95": 64.19199705123901,
- "p99": 72.35199958086014
+ "p50": 480.73598742485046,
+ "p90": 491.5519952774048,
+ "p95": 498.27200174331665,
+ "p99": 536.4159941673279
},
"isolatedSum": {
- "p50": 120.70400267839432,
- "p90": 124.92799758911133,
- "p95": 128.38399410247803,
- "p99": 144.70399916172028
+ "p50": 510.8480006456375,
+ "p90": 527.4880081415176,
+ "p95": 535.9999984502792,
+ "p99": 603.2960116863251
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 8,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 234881024,
+ "combineLogicalBytes": 234881024,
+ "fanoutMean": 2,
+ "recvTokensMax": 3072,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 64,
- "globalTokens": 512,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 62.97600269317627,
- "p90": 65.76000154018402,
- "p95": 66.14399701356888,
- "p99": 75.03999769687653
+ "p50": 368.3519959449768,
+ "p90": 378.36799025535583,
+ "p95": 381.056010723114,
+ "p99": 384.6080005168915
},
"combine": {
- "p50": 62.97600269317627,
- "p90": 65.76000154018402,
- "p95": 66.14399701356888,
- "p99": 75.03999769687653
+ "p50": 491.32800102233887,
+ "p90": 495.87199091911316,
+ "p95": 499.87199902534485,
+ "p99": 507.1679949760437
},
"roundtrip": {
- "p50": 62.97600269317627,
- "p90": 65.76000154018402,
- "p95": 66.14399701356888,
- "p99": 75.03999769687653
+ "p50": 830.3359746932983,
+ "p90": 837.1840119361877,
+ "p95": 839.3599987030029,
+ "p99": 843.8720107078552
},
"isolatedSum": {
- "p50": 125.95200538635254,
- "p90": 131.52000308036804,
- "p95": 132.28799402713776,
- "p99": 150.07999539375305
+ "p50": 859.6799969673157,
+ "p90": 874.239981174469,
+ "p95": 880.9280097484589,
+ "p99": 891.7759954929352
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 8,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 469762048,
+ "combineLogicalBytes": 469762048,
+ "fanoutMean": 2,
+ "recvTokensMax": 6144,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
"dispatch": {
- "p50": 70.8480030298233,
- "p90": 72.92799651622772,
- "p95": 74.07999783754349,
- "p99": 86.27200126647949
+ "p50": 663.2639765739441,
+ "p90": 672.1280217170715,
+ "p95": 677.0560145378113,
+ "p99": 696.4160203933716
},
"combine": {
- "p50": 70.8480030298233,
- "p90": 72.92799651622772,
- "p95": 74.07999783754349,
- "p99": 86.27200126647949
+ "p50": 886.5280151367188,
+ "p90": 895.2640295028687,
+ "p95": 897.9200124740601,
+ "p99": 910.0800156593323
},
"roundtrip": {
- "p50": 70.8480030298233,
- "p90": 72.92799651622772,
- "p95": 74.07999783754349,
- "p99": 86.27200126647949
+ "p50": 1518.5279846191406,
+ "p90": 1526.8160104751587,
+ "p95": 1530.3360223770142,
+ "p99": 1536.7679595947266
},
"isolatedSum": {
- "p50": 141.6960060596466,
- "p90": 145.85599303245544,
- "p95": 148.15999567508698,
- "p99": 172.54400253295898
+ "p50": 1549.7919917106628,
+ "p90": 1567.3920512199402,
+ "p95": 1574.9760270118713,
+ "p99": 1606.4960360527039
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 8,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 939524096,
+ "combineLogicalBytes": 939524096,
+ "fanoutMean": 2,
+ "recvTokensMax": 12288,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -32435,366 +33057,476 @@
]
},
{
- "id": "cx-30070070",
- "identity": "b300|flashinfer|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "b300_a52edb56",
- "comparisonKey": "46230412bf8dc722",
+ "id": "cx-c6f5f498",
+ "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||eb906a106a6cb71",
+ "colorKey": "gb300_440d13a2",
+ "comparisonKey": "e5ffbcf044278e38",
"schemaVersion": 3,
- "generatedAt": "2026-06-28T01:38:11.748195+00:00",
+ "generatedAt": "2026-06-29T13:48:54.490576+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_12",
- "sku": "b300",
- "backend": "flashinfer",
- "phase": "decode",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
+ "backend": "deepep",
+ "phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · flashinfer · fp8",
+ "label": "GB300 EP8 · deepep · bf16 · hotspot-single",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
"experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
+ "routing": "hotspot-single",
+ "routingLabel": "hotspot-single",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "fp8",
+ "dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 148,
- "resourceClass": "fixed-kernel",
- "conformanceClass": "not-applicable",
- "fixedKernel": true,
+ "achievedFraction": 0.1316,
+ "configuredUnits": 20,
+ "deviceUnits": 152,
+ "resourceClass": "backend-tuned",
+ "conformanceClass": "backend-default",
+ "fixedKernel": false,
"paretoEligible": false
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "eb906a106a6cb71",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": null,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28307775342",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28307775342",
- "createdAt": "2026-06-28T01:38:11.748195+00:00",
- "sha": "510fc17001789ae4f32b99b60b9a0a0aa53ab6b5"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 50.49600079655647,
- "p90": 52.000001072883606,
- "p95": 53.21599915623665,
- "p99": 66.3359984755516
+ "p50": 130.5920034646988,
+ "p90": 142.11200177669525,
+ "p95": 145.82400023937225,
+ "p99": 173.0239987373352
},
"combine": {
- "p50": 50.49600079655647,
- "p90": 52.000001072883606,
- "p95": 53.21599915623665,
- "p99": 66.3359984755516
+ "p50": 139.3599957227707,
+ "p90": 146.7200070619583,
+ "p95": 154.14400398731232,
+ "p99": 182.20800161361694
},
"roundtrip": {
- "p50": 50.49600079655647,
- "p90": 52.000001072883606,
- "p95": 53.21599915623665,
- "p99": 66.3359984755516
+ "p50": 245.60000002384186,
+ "p90": 254.84800338745117,
+ "p95": 259.2639923095703,
+ "p99": 282.0799946784973
},
"isolatedSum": {
- "p50": 100.99200159311295,
- "p90": 104.00000214576721,
- "p95": 106.4319983124733,
- "p99": 132.6719969511032
+ "p50": 269.9519991874695,
+ "p90": 288.83200883865356,
+ "p95": 299.96800422668457,
+ "p99": 355.23200035095215
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 315392,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 8,
+ "dispatchLogicalBytes": 78102528,
+ "combineLogicalBytes": 78102528,
+ "fanoutMean": 5.3203125,
+ "recvTokensMax": 1024,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2,
- "globalTokens": 16,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 53.21599915623665,
- "p90": 55.23199960589409,
- "p95": 56.12799897789955,
- "p99": 60.32000109553337
+ "p50": 255.13601303100586,
+ "p90": 284.8320007324219,
+ "p95": 298.0799973011017,
+ "p99": 321.4080035686493
},
"combine": {
- "p50": 53.21599915623665,
- "p90": 55.23199960589409,
- "p95": 56.12799897789955,
- "p99": 60.32000109553337
+ "p50": 364.80000615119934,
+ "p90": 381.82398676872253,
+ "p95": 394.6560025215149,
+ "p99": 412.8960072994232
},
"roundtrip": {
- "p50": 53.21599915623665,
- "p90": 55.23199960589409,
- "p95": 56.12799897789955,
- "p99": 60.32000109553337
+ "p50": 578.0479907989502,
+ "p90": 592.8959846496582,
+ "p95": 611.9359731674194,
+ "p99": 636.0960006713867
},
"isolatedSum": {
- "p50": 106.4319983124733,
- "p90": 110.46399921178818,
- "p95": 112.2559979557991,
- "p99": 120.64000219106674
+ "p50": 619.9360191822052,
+ "p90": 666.6559875011444,
+ "p95": 692.7359998226166,
+ "p99": 734.3040108680725
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 8,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 311091200,
+ "combineLogicalBytes": 311091200,
+ "fanoutMean": 5.2978515625,
+ "recvTokensMax": 4096,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4,
- "globalTokens": 32,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 54.976001381874084,
- "p90": 57.40800127387047,
- "p95": 59.20000001788139,
- "p99": 64.96000289916992
+ "p50": 720.9920287132263,
+ "p90": 748.3199834823608,
+ "p95": 759.9359750747681,
+ "p99": 828.6719918251038
},
"combine": {
- "p50": 54.976001381874084,
- "p90": 57.40800127387047,
- "p95": 59.20000001788139,
- "p99": 64.96000289916992
+ "p50": 1155.4239988327026,
+ "p90": 1177.183985710144,
+ "p95": 1192.479968070984,
+ "p99": 1226.5599966049194
},
"roundtrip": {
- "p50": 54.976001381874084,
- "p90": 57.40800127387047,
- "p95": 59.20000001788139,
- "p99": 64.96000289916992
+ "p50": 1848.1279611587524,
+ "p90": 1863.8720512390137,
+ "p95": 1877.1840333938599,
+ "p99": 1896.83198928833
},
"isolatedSum": {
- "p50": 109.95200276374817,
- "p90": 114.81600254774094,
- "p95": 118.40000003576279,
- "p99": 129.92000579833984
+ "p50": 1876.416027545929,
+ "p90": 1925.5039691925049,
+ "p95": 1952.415943145752,
+ "p99": 2055.231988430023
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1240064,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 8,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 1241511936,
+ "combineLogicalBytes": 1241511936,
+ "fanoutMean": 5.28570556640625,
+ "recvTokensMax": 16384,
+ "stragglerRank": 4,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ }
+ ]
+ },
+ {
+ "id": "cx-5f0c4166",
+ "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||6248b19ef786add",
+ "colorKey": "gb300_87f4d4ec",
+ "comparisonKey": "6a684c17508b8933",
+ "schemaVersion": 3,
+ "generatedAt": "2026-06-29T13:56:09.280253+00:00",
+ "status": "valid",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
+ "backend": "deepep",
+ "phase": "prefill",
+ "mode": "normal",
+ "resourceMode": "tuned",
+ "suite": "backend-default",
+ "comparisonClass": "standardized",
+ "measurementContract": "layout-and-dispatch-v1",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
+ "worldSize": 8,
+ "epSize": 8,
+ "label": "GB300 EP8 · deepep · bf16 · hotspot-single+eplb",
+ "model": "DeepSeek-V3 (EPLB physical)",
+ "shape": {
+ "hidden": 7168,
+ "topk": 8,
+ "experts": 288,
+ "routing": "hotspot-single",
+ "routingLabel": "hotspot-single+eplb",
+ "routingStep": 0,
+ "unevenTokens": "none",
+ "eplbEnabled": true,
+ "dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
+ "activationProfile": "normal",
+ "combineQuantMode": "none"
+ },
+ "resourceProfile": {
+ "requestedFraction": null,
+ "achievedFraction": 0.1316,
+ "configuredUnits": 20,
+ "deviceUnits": 152,
+ "resourceClass": "backend-tuned",
+ "conformanceClass": "backend-default",
+ "fixedKernel": false,
+ "paretoEligible": false
+ },
+ "placement": {
+ "kind": "packed",
+ "nodes": 2,
+ "gpusPerNode": 8,
+ "scaleUpDomain": 8
+ },
+ "routingConsistent": true,
+ "traceSignature": "6248b19ef786add",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": 1.8466796875,
+ "eplbImbalanceAfter": 1.0002700343276514,
+ "backendVersion": "1.1.0+814e508",
+ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
+ "repository": "SemiAnalysisAI/InferenceX",
+ "run": {
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
+ },
+ "rows": [
+ {
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
+ "dispatch": {
+ "p50": 127.3919939994812,
+ "p90": 135.42400300502777,
+ "p95": 138.14400136470795,
+ "p99": 149.08799529075623
+ },
+ "combine": {
+ "p50": 126.17599964141846,
+ "p90": 131.9040060043335,
+ "p95": 133.44000279903412,
+ "p99": 144.25599575042725
+ },
+ "roundtrip": {
+ "p50": 225.8239984512329,
+ "p90": 234.9119931459427,
+ "p95": 238.01599442958832,
+ "p99": 244.28799748420715
+ },
+ "isolatedSum": {
+ "p50": 253.56799364089966,
+ "p90": 267.32800900936127,
+ "p95": 271.58400416374207,
+ "p99": 293.3439910411835
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 77701120,
+ "combineLogicalBytes": 77701120,
+ "fanoutMean": 5.29296875,
+ "recvTokensMax": 697,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 54.9440011382103,
- "p90": 58.111999183893204,
- "p95": 59.20000001788139,
- "p99": 68.86400282382965
+ "p50": 164.70399498939514,
+ "p90": 173.66400361061096,
+ "p95": 176.60799622535706,
+ "p99": 182.20800161361694
},
"combine": {
- "p50": 54.9440011382103,
- "p90": 58.111999183893204,
- "p95": 59.20000001788139,
- "p99": 68.86400282382965
+ "p50": 169.08800601959229,
+ "p90": 176.06399953365326,
+ "p95": 178.81600558757782,
+ "p99": 181.8239986896515
},
"roundtrip": {
- "p50": 54.9440011382103,
- "p90": 58.111999183893204,
- "p95": 59.20000001788139,
- "p99": 68.86400282382965
+ "p50": 305.5039942264557,
+ "p90": 314.91199135780334,
+ "p95": 317.7280128002167,
+ "p99": 328.12801003456116
},
"isolatedSum": {
- "p50": 109.8880022764206,
- "p90": 116.22399836778641,
- "p95": 118.40000003576279,
- "p99": 137.7280056476593
+ "p50": 333.7920010089874,
+ "p90": 349.7280031442642,
+ "p95": 355.4240018129349,
+ "p99": 364.03200030326843
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2487296,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 8,
+ "dispatchLogicalBytes": 155187200,
+ "combineLogicalBytes": 155187200,
+ "fanoutMean": 5.28564453125,
+ "recvTokensMax": 1372,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 56.0000017285347,
- "p90": 58.33600088953972,
- "p95": 59.167999774217606,
- "p99": 64.15999680757523
+ "p50": 231.9359928369522,
+ "p90": 239.74399268627167,
+ "p95": 242.91199445724487,
+ "p99": 254.88001108169556
},
"combine": {
- "p50": 56.0000017285347,
- "p90": 58.33600088953972,
- "p95": 59.167999774217606,
- "p99": 64.15999680757523
+ "p50": 284.86400842666626,
+ "p90": 292.7680015563965,
+ "p95": 294.5919930934906,
+ "p99": 303.0399978160858
},
"roundtrip": {
- "p50": 56.0000017285347,
- "p90": 58.33600088953972,
- "p95": 59.167999774217606,
- "p99": 64.15999680757523
+ "p50": 462.8799855709076,
+ "p90": 471.8720018863678,
+ "p95": 475.1040041446686,
+ "p99": 479.93600368499756
},
"isolatedSum": {
- "p50": 112.0000034570694,
- "p90": 116.67200177907944,
- "p95": 118.33599954843521,
- "p99": 128.31999361515045
+ "p50": 516.8000012636185,
+ "p90": 532.5119942426682,
+ "p95": 537.5039875507355,
+ "p99": 557.9200088977814
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4960256,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 8,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 311162880,
+ "combineLogicalBytes": 311162880,
+ "fanoutMean": 5.299072265625,
+ "recvTokensMax": 2761,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 56.76800012588501,
- "p90": 58.94400179386139,
- "p95": 60.15999987721443,
- "p99": 69.69600170850754
+ "p50": 362.65599727630615,
+ "p90": 372.8320002555847,
+ "p95": 377.3120045661926,
+ "p99": 408.2239866256714
},
"combine": {
- "p50": 56.76800012588501,
- "p90": 58.94400179386139,
- "p95": 60.15999987721443,
- "p99": 69.69600170850754
+ "p50": 490.911990404129,
+ "p90": 499.7119903564453,
+ "p95": 501.8560290336609,
+ "p99": 520.7679867744446
},
"roundtrip": {
- "p50": 56.76800012588501,
- "p90": 58.94400179386139,
- "p95": 60.15999987721443,
- "p99": 69.69600170850754
+ "p50": 823.8720297813416,
+ "p90": 832.1920037269592,
+ "p95": 836.9280099868774,
+ "p99": 854.8480272293091
},
"isolatedSum": {
- "p50": 113.53600025177002,
- "p90": 117.88800358772278,
- "p95": 120.31999975442886,
- "p99": 139.39200341701508
+ "p50": 853.5679876804352,
+ "p90": 872.54399061203,
+ "p95": 879.1680335998535,
+ "p99": 928.991973400116
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9863168,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 8,
+ "dispatchLogicalBytes": 619974656,
+ "combineLogicalBytes": 619974656,
+ "fanoutMean": 5.279052734375,
+ "recvTokensMax": 5481,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 64,
- "globalTokens": 512,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 58.52799862623215,
- "p90": 61.24800071120262,
- "p95": 62.65600025653839,
- "p99": 74.49600100517273
+ "p50": 612.3200058937073,
+ "p90": 618.943989276886,
+ "p95": 622.3359704017639,
+ "p99": 626.6239881515503
},
"combine": {
- "p50": 58.52799862623215,
- "p90": 61.24800071120262,
- "p95": 62.65600025653839,
- "p99": 74.49600100517273
+ "p50": 863.2000088691711,
+ "p90": 870.8480000495911,
+ "p95": 872.7359771728516,
+ "p99": 876.9919872283936
},
"roundtrip": {
- "p50": 58.52799862623215,
- "p90": 61.24800071120262,
- "p95": 62.65600025653839,
- "p99": 74.49600100517273
+ "p50": 1445.4400539398193,
+ "p90": 1454.0159702301025,
+ "p95": 1455.8720588684082,
+ "p99": 1462.6879692077637
},
"isolatedSum": {
- "p50": 117.0559972524643,
- "p90": 122.49600142240524,
- "p95": 125.31200051307678,
- "p99": 148.99200201034546
+ "p50": 1475.5200147628784,
+ "p90": 1489.791989326477,
+ "p95": 1495.0719475746155,
+ "p99": 1503.6159753799438
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19496960,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 8,
+ "dispatchLogicalBytes": 1240020992,
+ "combineLogicalBytes": 1240020992,
+ "fanoutMean": 5.27935791015625,
+ "recvTokensMax": 10883,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
"dispatch": {
- "p50": 65.08799642324448,
- "p90": 67.26399809122086,
- "p95": 67.87200272083282,
- "p99": 74.43200051784515
+ "p50": 1125.440001487732,
+ "p90": 1132.256031036377,
+ "p95": 1134.6559524536133,
+ "p99": 1146.6879844665527
},
"combine": {
- "p50": 65.08799642324448,
- "p90": 67.26399809122086,
- "p95": 67.87200272083282,
- "p99": 74.43200051784515
+ "p50": 1601.0240316390991,
+ "p90": 1609.8239421844482,
+ "p95": 1611.6479635238647,
+ "p99": 1616.5440082550049
},
"roundtrip": {
- "p50": 65.08799642324448,
- "p90": 67.26399809122086,
- "p95": 67.87200272083282,
- "p99": 74.43200051784515
+ "p50": 2701.9519805908203,
+ "p90": 2711.711883544922,
+ "p95": 2714.303970336914,
+ "p99": 2723.871946334839
},
"isolatedSum": {
- "p50": 130.17599284648895,
- "p90": 134.5279961824417,
- "p95": 135.74400544166565,
- "p99": 148.8640010356903
+ "p50": 2726.464033126831,
+ "p90": 2742.079973220825,
+ "p95": 2746.303915977478,
+ "p99": 2763.2319927215576
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 8,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 2480414720,
+ "combineLogicalBytes": 2480414720,
+ "fanoutMean": 5.2801513671875,
+ "recvTokensMax": 21702,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -32802,366 +33534,293 @@
]
},
{
- "id": "cx-9a73b5f5",
- "identity": "b300|flashinfer|7168|8|256|mxfp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "b300_6af1abcd",
- "comparisonKey": "227468e11845c947",
+ "id": "cx-5a5f4e18",
+ "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||291e5ce62735286",
+ "colorKey": "gb300_8b7def4e",
+ "comparisonKey": "e430694c35257860",
"schemaVersion": 3,
- "generatedAt": "2026-06-28T01:38:16.371741+00:00",
+ "generatedAt": "2026-06-29T13:38:02.057307+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_06",
- "sku": "b300",
- "backend": "flashinfer",
- "phase": "decode",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
+ "backend": "deepep",
+ "phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · flashinfer · mxfp8",
- "model": "DeepSeek-V3/V4",
+ "label": "GB300 EP8 · deepep · bf16 · uniform+eplb",
+ "model": "DeepSeek-V3 (EPLB physical)",
"shape": {
"hidden": 7168,
"topk": 8,
- "experts": 256,
+ "experts": 288,
"routing": "uniform",
- "routingLabel": "uniform",
+ "routingLabel": "uniform+eplb",
"routingStep": 0,
"unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "mxfp8",
+ "eplbEnabled": true,
+ "dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 148,
- "resourceClass": "fixed-kernel",
- "conformanceClass": "not-applicable",
- "fixedKernel": true,
+ "achievedFraction": 0.1316,
+ "configuredUnits": 20,
+ "deviceUnits": 152,
+ "resourceClass": "backend-tuned",
+ "conformanceClass": "backend-default",
+ "fixedKernel": false,
"paretoEligible": false
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": null,
+ "traceSignature": "291e5ce62735286",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": 1.006072998046875,
+ "eplbImbalanceAfter": 1.0000152587890625,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28307776684",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28307776684",
- "createdAt": "2026-06-28T01:38:16.371741+00:00",
- "sha": "510fc17001789ae4f32b99b60b9a0a0aa53ab6b5"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 50.464000552892685,
- "p90": 52.2879995405674,
- "p95": 53.21599915623665,
- "p99": 66.81600213050842
+ "p50": 122.94399738311768,
+ "p90": 130.20800054073334,
+ "p95": 133.18400084972382,
+ "p99": 146.08000218868256
},
"combine": {
- "p50": 50.464000552892685,
- "p90": 52.2879995405674,
- "p95": 53.21599915623665,
- "p99": 66.81600213050842
+ "p50": 122.49600142240524,
+ "p90": 131.42399489879608,
+ "p95": 132.7359974384308,
+ "p99": 143.90400052070618
},
"roundtrip": {
- "p50": 50.464000552892685,
- "p90": 52.2879995405674,
- "p95": 53.21599915623665,
- "p99": 66.81600213050842
+ "p50": 220.35199403762817,
+ "p90": 228.83200645446777,
+ "p95": 231.83999955654144,
+ "p99": 239.29600417613983
},
"isolatedSum": {
- "p50": 100.92800110578537,
- "p90": 104.5759990811348,
- "p95": 106.4319983124733,
- "p99": 133.63200426101685
+ "p50": 245.43999880552292,
+ "p90": 261.6319954395294,
+ "p95": 265.9199982881546,
+ "p99": 289.98400270938873
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 315392,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 8,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 77041664,
+ "combineLogicalBytes": 77041664,
+ "fanoutMean": 5.248046875,
+ "recvTokensMax": 686,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2,
- "globalTokens": 16,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 51.90400034189224,
- "p90": 53.568001836538315,
- "p95": 54.46400120854378,
- "p99": 58.36800113320351
+ "p50": 160.41600704193115,
+ "p90": 168.99199783802032,
+ "p95": 171.61600291728973,
+ "p99": 179.80800569057465
},
"combine": {
- "p50": 51.90400034189224,
- "p90": 53.568001836538315,
- "p95": 54.46400120854378,
- "p99": 58.36800113320351
+ "p50": 167.77600347995758,
+ "p90": 172.03199863433838,
+ "p95": 177.69600450992584,
+ "p99": 182.65600502490997
},
"roundtrip": {
- "p50": 51.90400034189224,
- "p90": 53.568001836538315,
- "p95": 54.46400120854378,
- "p99": 58.36800113320351
+ "p50": 298.880010843277,
+ "p90": 306.8160116672516,
+ "p95": 310.016006231308,
+ "p99": 319.2319869995117
},
"isolatedSum": {
- "p50": 103.80800068378448,
- "p90": 107.13600367307663,
- "p95": 108.92800241708755,
- "p99": 116.73600226640701
+ "p50": 328.19201052188873,
+ "p90": 341.0239964723587,
+ "p95": 349.3120074272156,
+ "p99": 362.4640107154846
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 8,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 53.63199859857559,
- "p90": 55.39200082421303,
- "p95": 56.28800019621849,
- "p99": 68.80000233650208
- },
- "combine": {
- "p50": 53.63199859857559,
- "p90": 55.39200082421303,
- "p95": 56.28800019621849,
- "p99": 68.80000233650208
- },
- "roundtrip": {
- "p50": 53.63199859857559,
- "p90": 55.39200082421303,
- "p95": 56.28800019621849,
- "p99": 68.80000233650208
- },
- "isolatedSum": {
- "p50": 107.26399719715118,
- "p90": 110.78400164842606,
- "p95": 112.57600039243698,
- "p99": 137.60000467300415
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1240064,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 8,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 53.727999329566956,
- "p90": 55.776000022888184,
- "p95": 56.89600110054016,
- "p99": 65.63200056552887
- },
- "combine": {
- "p50": 53.727999329566956,
- "p90": 55.776000022888184,
- "p95": 56.89600110054016,
- "p99": 65.63200056552887
- },
- "roundtrip": {
- "p50": 53.727999329566956,
- "p90": 55.776000022888184,
- "p95": 56.89600110054016,
- "p99": 65.63200056552887
- },
- "isolatedSum": {
- "p50": 107.45599865913391,
- "p90": 111.55200004577637,
- "p95": 113.79200220108032,
- "p99": 131.26400113105774
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487296,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 8,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 154542080,
+ "combineLogicalBytes": 154542080,
+ "fanoutMean": 5.263671875,
+ "recvTokensMax": 1365,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 55.135998874902725,
- "p90": 56.92800134420395,
- "p95": 57.920001447200775,
- "p99": 66.3359984755516
+ "p50": 228.96000742912292,
+ "p90": 238.0480021238327,
+ "p95": 242.01600253582,
+ "p99": 250.71999430656433
},
"combine": {
- "p50": 55.135998874902725,
- "p90": 56.92800134420395,
- "p95": 57.920001447200775,
- "p99": 66.3359984755516
+ "p50": 282.46399760246277,
+ "p90": 291.26399755477905,
+ "p95": 292.60799288749695,
+ "p99": 294.40000653266907
},
"roundtrip": {
- "p50": 55.135998874902725,
- "p90": 56.92800134420395,
- "p95": 57.920001447200775,
- "p99": 66.3359984755516
+ "p50": 465.7599925994873,
+ "p90": 475.74400901794434,
+ "p95": 478.8160026073456,
+ "p99": 491.90399050712585
},
"isolatedSum": {
- "p50": 110.27199774980545,
- "p90": 113.8560026884079,
- "p95": 115.84000289440155,
- "p99": 132.6719969511032
+ "p50": 511.4240050315857,
+ "p90": 529.3119996786118,
+ "p95": 534.623995423317,
+ "p99": 545.1200008392334
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4960256,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 8,
+ "dispatchLogicalBytes": 310589440,
+ "combineLogicalBytes": 310589440,
+ "fanoutMean": 5.289306640625,
+ "recvTokensMax": 2746,
"stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 55.55199831724167,
- "p90": 57.37600103020668,
- "p95": 58.04799869656563,
- "p99": 64.51199948787689
+ "p50": 355.0400137901306,
+ "p90": 362.4640107154846,
+ "p95": 364.54400420188904,
+ "p99": 369.24800276756287
},
"combine": {
- "p50": 55.55199831724167,
- "p90": 57.37600103020668,
- "p95": 58.04799869656563,
- "p99": 64.51199948787689
+ "p50": 488.70399594306946,
+ "p90": 492.73601174354553,
+ "p95": 495.7759976387024,
+ "p99": 500.4799962043762
},
"roundtrip": {
- "p50": 55.55199831724167,
- "p90": 57.37600103020668,
- "p95": 58.04799869656563,
- "p99": 64.51199948787689
+ "p50": 815.3280019760132,
+ "p90": 822.9759931564331,
+ "p95": 825.6000280380249,
+ "p99": 832.6399922370911
},
"isolatedSum": {
- "p50": 111.10399663448334,
- "p90": 114.75200206041336,
- "p95": 116.09599739313126,
- "p99": 129.02399897575378
+ "p50": 843.7440097332001,
+ "p90": 855.2000224590302,
+ "p95": 860.3200018405914,
+ "p99": 869.7279989719391
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9863168,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 8,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 619171840,
+ "combineLogicalBytes": 619171840,
+ "fanoutMean": 5.272216796875,
+ "recvTokensMax": 5467,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 64,
- "globalTokens": 512,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 57.40800127387047,
- "p90": 59.51999872922897,
- "p95": 60.22400036454201,
- "p99": 71.35999947786331
+ "p50": 607.807993888855,
+ "p90": 615.1360273361206,
+ "p95": 617.2159910202026,
+ "p99": 620.9279894828796
},
"combine": {
- "p50": 57.40800127387047,
- "p90": 59.51999872922897,
- "p95": 60.22400036454201,
- "p99": 71.35999947786331
+ "p50": 857.088029384613,
+ "p90": 861.2800240516663,
+ "p95": 862.2720241546631,
+ "p99": 871.1040019989014
},
"roundtrip": {
- "p50": 57.40800127387047,
- "p90": 59.51999872922897,
- "p95": 60.22400036454201,
- "p99": 71.35999947786331
+ "p50": 1434.656023979187,
+ "p90": 1442.6239728927612,
+ "p95": 1445.95205783844,
+ "p99": 1453.4399509429932
},
"isolatedSum": {
- "p50": 114.81600254774094,
- "p90": 119.03999745845795,
- "p95": 120.44800072908401,
- "p99": 142.71999895572662
+ "p50": 1464.896023273468,
+ "p90": 1476.4160513877869,
+ "p95": 1479.4880151748657,
+ "p99": 1492.031991481781
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19496960,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 8,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1238945792,
+ "combineLogicalBytes": 1238945792,
+ "fanoutMean": 5.2747802734375,
+ "recvTokensMax": 10913,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
"dispatch": {
- "p50": 63.840001821517944,
- "p90": 65.69600105285645,
- "p95": 66.84800237417221,
- "p99": 73.79200309515
+ "p50": 1124.4479417800903,
+ "p90": 1131.8399906158447,
+ "p95": 1135.2640390396118,
+ "p99": 1138.7519836425781
},
"combine": {
- "p50": 63.840001821517944,
- "p90": 65.69600105285645,
- "p95": 66.84800237417221,
- "p99": 73.79200309515
+ "p50": 1602.5279760360718,
+ "p90": 1611.1359596252441,
+ "p95": 1612.8640174865723,
+ "p99": 1620.736002922058
},
"roundtrip": {
- "p50": 63.840001821517944,
- "p90": 65.69600105285645,
- "p95": 66.84800237417221,
- "p99": 73.79200309515
+ "p50": 2700.9921073913574,
+ "p90": 2710.047960281372,
+ "p95": 2712.671995162964,
+ "p99": 2718.91188621521
},
"isolatedSum": {
- "p50": 127.68000364303589,
- "p90": 131.3920021057129,
- "p95": 133.69600474834442,
- "p99": 147.5840061903
+ "p50": 2726.975917816162,
+ "p90": 2742.975950241089,
+ "p95": 2748.128056526184,
+ "p99": 2759.4879865646362
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 8,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 2481747968,
+ "combineLogicalBytes": 2481747968,
+ "fanoutMean": 5.282989501953125,
+ "recvTokensMax": 21789,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -33169,366 +33828,293 @@
]
},
{
- "id": "cx-1cb033e4",
- "identity": "b300|flashinfer|7168|8|256|nvfp4|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "b300_f0247ae6",
- "comparisonKey": "0025025816a64ee6",
+ "id": "cx-939b56bc",
+ "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||bfb01c61bdf926e",
+ "colorKey": "gb300_b3a88763",
+ "comparisonKey": "92dc80df4affb401",
"schemaVersion": 3,
- "generatedAt": "2026-06-28T01:38:24.335990+00:00",
+ "generatedAt": "2026-06-29T14:00:52.364979+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_11",
- "sku": "b300",
- "backend": "flashinfer",
- "phase": "decode",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
+ "backend": "deepep",
+ "phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · flashinfer · nvfp4",
+ "label": "GB300 EP8 · deepep · bf16 · zipf",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
"experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
+ "routing": "zipf",
+ "routingLabel": "zipf",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "nvfp4",
+ "dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 148,
- "resourceClass": "fixed-kernel",
- "conformanceClass": "not-applicable",
- "fixedKernel": true,
+ "achievedFraction": 0.1316,
+ "configuredUnits": 20,
+ "deviceUnits": 152,
+ "resourceClass": "backend-tuned",
+ "conformanceClass": "backend-default",
+ "fixedKernel": false,
"paretoEligible": false
},
"placement": {
- "kind": "packed",
- "nodes": 1,
+ "kind": "adversarial",
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "bfb01c61bdf926e",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": null,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28307777849",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28307777849",
- "createdAt": "2026-06-28T01:38:24.335990+00:00",
- "sha": "510fc17001789ae4f32b99b60b9a0a0aa53ab6b5"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 1746.3040351867676,
- "p90": 1957.1199417114258,
- "p95": 2725.9199619293213,
- "p99": 3350.719928741455
- },
- "combine": {
- "p50": 1746.3040351867676,
- "p90": 1957.1199417114258,
- "p95": 2725.9199619293213,
- "p99": 3350.719928741455
- },
- "roundtrip": {
- "p50": 1746.3040351867676,
- "p90": 1957.1199417114258,
- "p95": 2725.9199619293213,
- "p99": 3350.719928741455
- },
- "isolatedSum": {
- "p50": 3492.608070373535,
- "p90": 3914.2398834228516,
- "p95": 5451.839923858643,
- "p99": 6701.43985748291
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 157696,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 8,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 1747.3280429840088,
- "p90": 2064.6719932556152,
- "p95": 2780.8001041412354,
- "p99": 3415.9998893737793
- },
- "combine": {
- "p50": 1747.3280429840088,
- "p90": 2064.6719932556152,
- "p95": 2780.8001041412354,
- "p99": 3415.9998893737793
- },
- "roundtrip": {
- "p50": 1747.3280429840088,
- "p90": 2064.6719932556152,
- "p95": 2780.8001041412354,
- "p99": 3415.9998893737793
- },
- "isolatedSum": {
- "p50": 3494.6560859680176,
- "p90": 4129.3439865112305,
- "p95": 5561.600208282471,
- "p99": 6831.999778747559
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 308224,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 8,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 1755.903959274292,
- "p90": 1934.3680143356323,
- "p95": 2666.5918827056885,
- "p99": 3387.00795173645
+ "p50": 125.47199428081512,
+ "p90": 134.20799374580383,
+ "p95": 138.91200721263885,
+ "p99": 150.4960060119629
},
"combine": {
- "p50": 1755.903959274292,
- "p90": 1934.3680143356323,
- "p95": 2666.5918827056885,
- "p99": 3387.00795173645
+ "p50": 134.14399325847626,
+ "p90": 143.0719941854477,
+ "p95": 144.896000623703,
+ "p99": 148.95999431610107
},
"roundtrip": {
- "p50": 1755.903959274292,
- "p90": 1934.3680143356323,
- "p95": 2666.5918827056885,
- "p99": 3387.00795173645
+ "p50": 234.8479926586151,
+ "p90": 242.08000302314758,
+ "p95": 244.7039932012558,
+ "p99": 250.65600872039795
},
"isolatedSum": {
- "p50": 3511.807918548584,
- "p90": 3868.7360286712646,
- "p95": 5333.183765411377,
- "p99": 6774.0159034729
+ "p50": 259.6159875392914,
+ "p90": 277.2799879312515,
+ "p95": 283.80800783634186,
+ "p99": 299.45600032806396
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 620032,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 8,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 49946624,
+ "combineLogicalBytes": 49946624,
+ "fanoutMean": 3.40234375,
+ "recvTokensMax": 1022,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 1760.9599828720093,
- "p90": 2005.1519870758057,
- "p95": 2768.415927886963,
- "p99": 3292.3200130462646
+ "p50": 161.02400422096252,
+ "p90": 184.38400328159332,
+ "p95": 192.47999787330627,
+ "p99": 212.47999370098114
},
"combine": {
- "p50": 1760.9599828720093,
- "p90": 2005.1519870758057,
- "p95": 2768.415927886963,
- "p99": 3292.3200130462646
+ "p50": 184.1599941253662,
+ "p90": 206.01600408554077,
+ "p95": 230.78399896621704,
+ "p99": 240.9600019454956
},
"roundtrip": {
- "p50": 1760.9599828720093,
- "p90": 2005.1519870758057,
- "p95": 2768.415927886963,
- "p99": 3292.3200130462646
+ "p50": 318.36798787117004,
+ "p90": 339.3920063972473,
+ "p95": 351.00799798965454,
+ "p99": 374.65599179267883
},
"isolatedSum": {
- "p50": 3521.9199657440186,
- "p90": 4010.3039741516113,
- "p95": 5536.831855773926,
- "p99": 6584.640026092529
+ "p50": 345.18399834632874,
+ "p90": 390.4000073671341,
+ "p95": 423.2639968395233,
+ "p99": 453.43999564647675
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1243648,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 8,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 100509696,
+ "combineLogicalBytes": 100509696,
+ "fanoutMean": 3.42333984375,
+ "recvTokensMax": 2046,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 1761.0559463500977,
- "p90": 2489.1200065612793,
- "p95": 2836.575984954834,
- "p99": 4053.1201362609863
+ "p50": 231.77599906921387,
+ "p90": 241.98399484157562,
+ "p95": 250.40000677108765,
+ "p99": 270.01601457595825
},
"combine": {
- "p50": 1761.0559463500977,
- "p90": 2489.1200065612793,
- "p95": 2836.575984954834,
- "p99": 4053.1201362609863
+ "p50": 345.95200419425964,
+ "p90": 367.16800928115845,
+ "p95": 391.4879858493805,
+ "p99": 408.86399149894714
},
"roundtrip": {
- "p50": 1761.0559463500977,
- "p90": 2489.1200065612793,
- "p95": 2836.575984954834,
- "p99": 4053.1201362609863
+ "p50": 544.6079969406128,
+ "p90": 556.6400289535522,
+ "p95": 564.0959739685059,
+ "p99": 585.6320261955261
},
"isolatedSum": {
- "p50": 3522.1118927001953,
- "p90": 4978.240013122559,
- "p95": 5673.151969909668,
- "p99": 8106.240272521973
+ "p50": 577.7280032634735,
+ "p90": 609.1520041227341,
+ "p95": 641.8879926204681,
+ "p99": 678.8800060749054
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 8,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 201678848,
+ "combineLogicalBytes": 201678848,
+ "fanoutMean": 3.4345703125,
+ "recvTokensMax": 4094,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 1755.552053451538,
- "p90": 1923.6479997634888,
- "p95": 2723.328113555908,
- "p99": 3401.18408203125
+ "p50": 371.71199917793274,
+ "p90": 382.207989692688,
+ "p95": 386.3680064678192,
+ "p99": 416.3520038127899
},
"combine": {
- "p50": 1755.552053451538,
- "p90": 1923.6479997634888,
- "p95": 2723.328113555908,
- "p99": 3401.18408203125
+ "p50": 624.5759725570679,
+ "p90": 629.6319961547852,
+ "p95": 633.9840292930603,
+ "p99": 647.9039788246155
},
"roundtrip": {
- "p50": 1755.552053451538,
- "p90": 1923.6479997634888,
- "p95": 2723.328113555908,
- "p99": 3401.18408203125
+ "p50": 955.1039934158325,
+ "p90": 964.8640155792236,
+ "p95": 969.8240160942078,
+ "p99": 981.2800288200378
},
"isolatedSum": {
- "p50": 3511.104106903076,
- "p90": 3847.2959995269775,
- "p95": 5446.656227111816,
- "p99": 6802.3681640625
+ "p50": 996.2879717350006,
+ "p90": 1011.8399858474731,
+ "p95": 1020.3520357608795,
+ "p99": 1064.2559826374054
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4931584,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 8,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 405035008,
+ "combineLogicalBytes": 405035008,
+ "fanoutMean": 3.4488525390625,
+ "recvTokensMax": 8189,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 64,
- "globalTokens": 512,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 1759.071946144104,
- "p90": 2116.3198947906494,
- "p95": 2758.9120864868164,
- "p99": 3519.9038982391357
+ "p50": 686.240017414093,
+ "p90": 698.6240148544312,
+ "p95": 704.479992389679,
+ "p99": 723.6800193786621
},
"combine": {
- "p50": 1759.071946144104,
- "p90": 2116.3198947906494,
- "p95": 2758.9120864868164,
- "p99": 3519.9038982391357
+ "p50": 1130.6560039520264,
+ "p90": 1135.807991027832,
+ "p95": 1141.5679454803467,
+ "p99": 1145.9519863128662
},
"roundtrip": {
- "p50": 1759.071946144104,
- "p90": 2116.3198947906494,
- "p95": 2758.9120864868164,
- "p99": 3519.9038982391357
+ "p50": 1787.4239683151245,
+ "p90": 1797.2160577774048,
+ "p95": 1799.839973449707,
+ "p99": 1810.304045677185
},
"isolatedSum": {
- "p50": 3518.143892288208,
- "p90": 4232.639789581299,
- "p95": 5517.824172973633,
- "p99": 7039.8077964782715
+ "p50": 1816.8960213661194,
+ "p90": 1834.4320058822632,
+ "p95": 1846.0479378700256,
+ "p99": 1869.6320056915283
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9748480,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 8,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 808822784,
+ "combineLogicalBytes": 808822784,
+ "fanoutMean": 3.44354248046875,
+ "recvTokensMax": 16380,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
"dispatch": {
- "p50": 1765.4720544815063,
- "p90": 2013.4079456329346,
- "p95": 2776.8959999084473,
- "p99": 3300.992012023926
+ "p50": 1334.8480463027954,
+ "p90": 1348.031997680664,
+ "p95": 1351.8719673156738,
+ "p99": 1359.231948852539
},
"combine": {
- "p50": 1765.4720544815063,
- "p90": 2013.4079456329346,
- "p95": 2776.8959999084473,
- "p99": 3300.992012023926
+ "p50": 2164.1600131988525,
+ "p90": 2191.135883331299,
+ "p95": 2203.552007675171,
+ "p99": 2225.4080772399902
},
"roundtrip": {
- "p50": 1765.4720544815063,
- "p90": 2013.4079456329346,
- "p95": 2776.8959999084473,
- "p99": 3300.992012023926
+ "p50": 3478.6880016326904,
+ "p90": 3500.6399154663086,
+ "p95": 3511.0080242156982,
+ "p99": 3533.087968826294
},
"isolatedSum": {
- "p50": 3530.9441089630127,
- "p90": 4026.815891265869,
- "p95": 5553.7919998168945,
- "p99": 6601.984024047852
+ "p50": 3499.008059501648,
+ "p90": 3539.167881011963,
+ "p95": 3555.4239749908447,
+ "p99": 3584.6400260925293
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19418112,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 8,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1619795968,
+ "combineLogicalBytes": 1619795968,
+ "fanoutMean": 3.4481201171875,
+ "recvTokensMax": 32761,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -33536,291 +34122,364 @@
]
},
{
- "id": "cx-207d8ef2",
- "identity": "b300|flashinfer|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "b300_5ec8473f",
- "comparisonKey": "01804e6d9a96754e",
+ "id": "cx-3904ac00",
+ "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||e47f9de18e6cabe",
+ "colorKey": "gb300_b3a88763",
+ "comparisonKey": "92dc80df4affb401",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T17:26:59.581224+00:00",
+ "generatedAt": "2026-06-29T13:45:08.590637+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_03",
- "sku": "b300",
- "backend": "flashinfer",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
+ "backend": "deepep",
"phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · flashinfer · bf16",
+ "label": "GB300 EP8 · deepep · bf16 · zipf",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
"experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
+ "routing": "zipf",
+ "routingLabel": "zipf",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 148,
- "resourceClass": "fixed-kernel",
- "conformanceClass": "not-applicable",
- "fixedKernel": true,
+ "achievedFraction": 0.1316,
+ "configuredUnits": 20,
+ "deviceUnits": 152,
+ "resourceClass": "backend-tuned",
+ "conformanceClass": "backend-default",
+ "fixedKernel": false,
"paretoEligible": false
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "e47f9de18e6cabe",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": null,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28296434249",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28296434249",
- "createdAt": "2026-06-27T17:26:59.581224+00:00",
- "sha": "2ebeba9134a8c84f7a80ac87742d57f7cdf1cf18"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 69.31199878454208,
- "p90": 70.91200351715088,
- "p95": 71.58400118350983,
- "p99": 75.42400062084198
+ "p50": 127.77599692344666,
+ "p90": 135.5839967727661,
+ "p95": 139.67999815940857,
+ "p99": 148.67199957370758
},
"combine": {
- "p50": 69.31199878454208,
- "p90": 70.91200351715088,
- "p95": 71.58400118350983,
- "p99": 75.42400062084198
+ "p50": 134.8479986190796,
+ "p90": 144.31999623775482,
+ "p95": 145.91999351978302,
+ "p99": 156.09599649906158
},
"roundtrip": {
- "p50": 69.31199878454208,
- "p90": 70.91200351715088,
- "p95": 71.58400118350983,
- "p99": 75.42400062084198
+ "p50": 237.18400299549103,
+ "p90": 246.7840015888214,
+ "p95": 249.28000569343567,
+ "p99": 256.415992975235
},
"isolatedSum": {
- "p50": 138.62399756908417,
- "p90": 141.82400703430176,
- "p95": 143.16800236701965,
- "p99": 150.84800124168396
+ "p50": 262.62399554252625,
+ "p90": 279.90399301052094,
+ "p95": 285.5999916791916,
+ "p99": 304.76799607276917
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 8,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 49946624,
+ "combineLogicalBytes": 49946624,
+ "fanoutMean": 3.40234375,
+ "recvTokensMax": 1022,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 256,
- "globalTokens": 2048,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 96.22400254011154,
- "p90": 98.2080027461052,
- "p95": 99.10400211811066,
- "p99": 110.75200140476227
+ "p50": 232.67200589179993,
+ "p90": 241.37599766254425,
+ "p95": 246.0159957408905,
+ "p99": 254.04798984527588
},
"combine": {
- "p50": 96.22400254011154,
- "p90": 98.2080027461052,
- "p95": 99.10400211811066,
- "p99": 110.75200140476227
+ "p50": 347.7120101451874,
+ "p90": 356.25600814819336,
+ "p95": 358.3360016345978,
+ "p99": 368.1280016899109
},
"roundtrip": {
- "p50": 96.22400254011154,
- "p90": 98.2080027461052,
- "p95": 99.10400211811066,
- "p99": 110.75200140476227
+ "p50": 542.1119928359985,
+ "p90": 551.9359707832336,
+ "p95": 553.9199709892273,
+ "p99": 563.3599758148193
},
"isolatedSum": {
- "p50": 192.44800508022308,
- "p90": 196.4160054922104,
- "p95": 198.2080042362213,
- "p99": 221.50400280952454
+ "p50": 580.3840160369873,
+ "p90": 597.6320058107376,
+ "p95": 604.3519973754883,
+ "p99": 622.1759915351868
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 155889664,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 8,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 201678848,
+ "combineLogicalBytes": 201678848,
+ "fanoutMean": 3.4345703125,
+ "recvTokensMax": 4094,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 512,
- "globalTokens": 4096,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 153.08800339698792,
- "p90": 155.42399883270264,
- "p95": 156.47999942302704,
- "p99": 161.79199516773224
+ "p50": 694.5279836654663,
+ "p90": 704.7680020332336,
+ "p95": 707.8080177307129,
+ "p99": 722.3680019378662
},
"combine": {
- "p50": 153.08800339698792,
- "p90": 155.42399883270264,
- "p95": 156.47999942302704,
- "p99": 161.79199516773224
+ "p50": 1134.6240043640137,
+ "p90": 1143.1039571762085,
+ "p95": 1144.5759534835815,
+ "p99": 1158.944010734558
},
"roundtrip": {
- "p50": 153.08800339698792,
- "p90": 155.42399883270264,
- "p95": 156.47999942302704,
- "p99": 161.79199516773224
+ "p50": 1801.3440370559692,
+ "p90": 1811.6159439086914,
+ "p95": 1815.4560327529907,
+ "p99": 1830.399990081787
},
"isolatedSum": {
- "p50": 306.17600679397583,
- "p90": 310.8479976654053,
- "p95": 312.9599988460541,
- "p99": 323.5839903354645
+ "p50": 1829.15198802948,
+ "p90": 1847.8719592094421,
+ "p95": 1852.3839712142944,
+ "p99": 1881.3120126724243
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 312266752,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 8,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 808822784,
+ "combineLogicalBytes": 808822784,
+ "fanoutMean": 3.44354248046875,
+ "recvTokensMax": 16380,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
- },
+ }
+ ]
+ },
+ {
+ "id": "cx-6da8b67d",
+ "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||9014f8b812bd39e",
+ "colorKey": "gb300_961589b9",
+ "comparisonKey": "796f3c416772b90a",
+ "schemaVersion": 3,
+ "generatedAt": "2026-06-29T13:47:00.191579+00:00",
+ "status": "valid",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
+ "backend": "deepep",
+ "phase": "prefill",
+ "mode": "normal",
+ "resourceMode": "tuned",
+ "suite": "backend-default",
+ "comparisonClass": "standardized",
+ "measurementContract": "layout-and-dispatch-v1",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
+ "worldSize": 8,
+ "epSize": 8,
+ "label": "GB300 EP8 · deepep · bf16 · zipf-heavy",
+ "model": "DeepSeek-V3/V4",
+ "shape": {
+ "hidden": 7168,
+ "topk": 8,
+ "experts": 256,
+ "routing": "zipf-heavy",
+ "routingLabel": "zipf-heavy",
+ "routingStep": 0,
+ "unevenTokens": "none",
+ "eplbEnabled": false,
+ "dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
+ "activationProfile": "normal",
+ "combineQuantMode": "none"
+ },
+ "resourceProfile": {
+ "requestedFraction": null,
+ "achievedFraction": 0.1316,
+ "configuredUnits": 20,
+ "deviceUnits": 152,
+ "resourceClass": "backend-tuned",
+ "conformanceClass": "backend-default",
+ "fixedKernel": false,
+ "paretoEligible": false
+ },
+ "placement": {
+ "kind": "packed",
+ "nodes": 2,
+ "gpusPerNode": 8,
+ "scaleUpDomain": 8
+ },
+ "routingConsistent": true,
+ "traceSignature": "9014f8b812bd39e",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": null,
+ "eplbImbalanceAfter": null,
+ "backendVersion": "1.1.0+814e508",
+ "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
+ "repository": "SemiAnalysisAI/InferenceX",
+ "run": {
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
+ },
+ "rows": [
{
- "tokensPerRank": 1024,
- "globalTokens": 8192,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 270.6240117549896,
- "p90": 273.6319899559021,
- "p95": 275.07200837135315,
- "p99": 281.3119888305664
+ "p50": 124.15999919176102,
+ "p90": 132.6719969511032,
+ "p95": 136.89599931240082,
+ "p99": 145.7280069589615
},
"combine": {
- "p50": 270.6240117549896,
- "p90": 273.6319899559021,
- "p95": 275.07200837135315,
- "p99": 281.3119888305664
+ "p50": 130.14400005340576,
+ "p90": 134.97599959373474,
+ "p95": 139.23199474811554,
+ "p99": 145.47200500965118
},
"roundtrip": {
- "p50": 270.6240117549896,
- "p90": 273.6319899559021,
- "p95": 275.07200837135315,
- "p99": 281.3119888305664
+ "p50": 227.00800001621246,
+ "p90": 238.36800456047058,
+ "p95": 241.82400107383728,
+ "p99": 252.00000405311584
},
"isolatedSum": {
- "p50": 541.2480235099792,
- "p90": 547.2639799118042,
- "p95": 550.1440167427063,
- "p99": 562.6239776611328
+ "p50": 254.30399924516678,
+ "p90": 267.64799654483795,
+ "p95": 276.12799406051636,
+ "p99": 291.20001196861267
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 623443968,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 8,
+ "dispatchLogicalBytes": 22650880,
+ "combineLogicalBytes": 22650880,
+ "fanoutMean": 1.54296875,
+ "recvTokensMax": 1024,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2048,
- "globalTokens": 16384,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 498.6560046672821,
- "p90": 501.6000270843506,
- "p95": 502.6879906654358,
- "p99": 510.3679895401001
+ "p50": 212.19199895858765,
+ "p90": 220.47999501228333,
+ "p95": 222.88000583648682,
+ "p99": 228.12800109386444
},
"combine": {
- "p50": 498.6560046672821,
- "p90": 501.6000270843506,
- "p95": 502.6879906654358,
- "p99": 510.3679895401001
+ "p50": 314.62401151657104,
+ "p90": 324.6400058269501,
+ "p95": 327.7760148048401,
+ "p99": 331.6799998283386
},
"roundtrip": {
- "p50": 498.6560046672821,
- "p90": 501.6000270843506,
- "p95": 502.6879906654358,
- "p99": 510.3679895401001
+ "p50": 499.55201148986816,
+ "p90": 511.23201847076416,
+ "p95": 515.1039958000183,
+ "p99": 522.4000215530396
},
"isolatedSum": {
- "p50": 997.3120093345642,
- "p90": 1003.2000541687012,
- "p95": 1005.3759813308716,
- "p99": 1020.7359790802002
+ "p50": 526.8160104751587,
+ "p90": 545.1200008392334,
+ "p95": 550.6560206413269,
+ "p99": 559.8080009222031
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1243805696,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 8,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 91521024,
+ "combineLogicalBytes": 91521024,
+ "fanoutMean": 1.55859375,
+ "recvTokensMax": 4096,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4096,
- "globalTokens": 32768,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 943.9679980278015,
- "p90": 947.4560022354126,
- "p95": 949.0879774093628,
- "p99": 961.6640210151672
+ "p50": 614.1120195388794,
+ "p90": 625.1199841499329,
+ "p95": 628.5439729690552,
+ "p99": 638.3360028266907
},
"combine": {
- "p50": 943.9679980278015,
- "p90": 947.4560022354126,
- "p95": 949.0879774093628,
- "p99": 961.6640210151672
+ "p50": 1166.1440134048462,
+ "p90": 1170.9760427474976,
+ "p95": 1172.9919910430908,
+ "p99": 1179.5519590377808
},
"roundtrip": {
- "p50": 943.9679980278015,
- "p90": 947.4560022354126,
- "p95": 949.0879774093628,
- "p99": 961.6640210151672
+ "p50": 1718.7199592590332,
+ "p90": 1729.0879487991333,
+ "p95": 1731.5839529037476,
+ "p99": 1738.976001739502
},
"isolatedSum": {
- "p50": 1887.935996055603,
- "p90": 1894.9120044708252,
- "p95": 1898.1759548187256,
- "p99": 1923.3280420303345
+ "p50": 1780.2560329437256,
+ "p90": 1796.0960268974304,
+ "p95": 1801.535964012146,
+ "p99": 1817.8879618644714
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2487009280,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 8,
+ "dispatchLogicalBytes": 368062464,
+ "combineLogicalBytes": 368062464,
+ "fanoutMean": 1.5670166015625,
+ "recvTokensMax": 16384,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
@@ -33829,107 +34488,108 @@
]
},
{
- "id": "cx-ae942e6d",
- "identity": "b300|flashinfer|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "b300_a52edb56",
- "comparisonKey": "e30791951192637e",
+ "id": "cx-37cf5d77",
+ "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||0e6b07a25691d72",
+ "colorKey": "gb300_db9a43b5",
+ "comparisonKey": "8cb163d8db9bc0c6",
"schemaVersion": 3,
- "generatedAt": "2026-06-28T01:38:12.335801+00:00",
+ "generatedAt": "2026-06-29T13:54:01.125432+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_15",
- "sku": "b300",
- "backend": "flashinfer",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
+ "backend": "deepep",
"phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · flashinfer · fp8",
- "model": "DeepSeek-V3/V4",
+ "label": "GB300 EP8 · deepep · bf16 · zipf-heavy+eplb",
+ "model": "DeepSeek-V3 (EPLB physical)",
"shape": {
"hidden": 7168,
"topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
+ "experts": 288,
+ "routing": "zipf-heavy",
+ "routingLabel": "zipf-heavy+eplb",
"routingStep": 0,
"unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
+ "eplbEnabled": true,
+ "dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 148,
- "resourceClass": "fixed-kernel",
- "conformanceClass": "not-applicable",
- "fixedKernel": true,
+ "achievedFraction": 0.1316,
+ "configuredUnits": 20,
+ "deviceUnits": 152,
+ "resourceClass": "backend-tuned",
+ "conformanceClass": "backend-default",
+ "fixedKernel": false,
"paretoEligible": false
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": null,
+ "traceSignature": "0e6b07a25691d72",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": 7.38995361328125,
+ "eplbImbalanceAfter": 1.0000210716610862,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28307775342",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28307775342",
- "createdAt": "2026-06-28T01:38:12.335801+00:00",
- "sha": "510fc17001789ae4f32b99b60b9a0a0aa53ab6b5"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 63.93600255250931,
- "p90": 65.76000154018402,
- "p95": 66.6240006685257,
- "p99": 72.73600250482559
+ "p50": 128.09599936008453,
+ "p90": 137.95199990272522,
+ "p95": 142.14399456977844,
+ "p99": 166.46400094032288
},
"combine": {
- "p50": 63.93600255250931,
- "p90": 65.76000154018402,
- "p95": 66.6240006685257,
- "p99": 72.73600250482559
+ "p50": 125.95200538635254,
+ "p90": 132.38400220870972,
+ "p95": 134.94400680065155,
+ "p99": 147.07200229167938
},
"roundtrip": {
- "p50": 63.93600255250931,
- "p90": 65.76000154018402,
- "p95": 66.6240006685257,
- "p99": 72.73600250482559
+ "p50": 224.8000055551529,
+ "p90": 233.7920069694519,
+ "p95": 238.24000358581543,
+ "p99": 269.9199914932251
},
"isolatedSum": {
- "p50": 127.87200510501862,
- "p90": 131.52000308036804,
- "p95": 133.2480013370514,
- "p99": 145.47200500965118
+ "p50": 254.04800474643707,
+ "p90": 270.33600211143494,
+ "p95": 277.08800137043,
+ "p99": 313.53600323200226
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 8,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 79206400,
+ "combineLogicalBytes": 79206400,
+ "fanoutMean": 5.3955078125,
+ "recvTokensMax": 713,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -33938,35 +34598,35 @@
"tokensPerRank": 256,
"globalTokens": 2048,
"dispatch": {
- "p50": 84.03199911117554,
- "p90": 86.11200004816055,
- "p95": 86.56000345945358,
- "p99": 92.83199906349182
+ "p50": 166.20799899101257,
+ "p90": 173.43999445438385,
+ "p95": 176.96000635623932,
+ "p99": 193.50400567054749
},
"combine": {
- "p50": 84.03199911117554,
- "p90": 86.11200004816055,
- "p95": 86.56000345945358,
- "p99": 92.83199906349182
+ "p50": 169.0240055322647,
+ "p90": 175.77600479125977,
+ "p95": 179.58399653434753,
+ "p99": 203.8400024175644
},
"roundtrip": {
- "p50": 84.03199911117554,
- "p90": 86.11200004816055,
- "p95": 86.56000345945358,
- "p99": 92.83199906349182
+ "p50": 304.80000376701355,
+ "p90": 313.82399797439575,
+ "p95": 317.8560137748718,
+ "p99": 350.71998834609985
},
"isolatedSum": {
- "p50": 168.06399822235107,
- "p90": 172.2240000963211,
- "p95": 173.12000691890717,
- "p99": 185.66399812698364
+ "p50": 335.2320045232773,
+ "p90": 349.2159992456436,
+ "p95": 356.54400289058685,
+ "p99": 397.3440080881119
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77944832,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 8,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 159330304,
+ "combineLogicalBytes": 159330304,
+ "fanoutMean": 5.4267578125,
+ "recvTokensMax": 1436,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -33975,35 +34635,35 @@
"tokensPerRank": 512,
"globalTokens": 4096,
"dispatch": {
- "p50": 130.87999820709229,
- "p90": 133.56800377368927,
- "p95": 135.26399433612823,
- "p99": 155.5200070142746
+ "p50": 233.98399353027344,
+ "p90": 243.55199933052063,
+ "p95": 246.17600440979004,
+ "p99": 277.0560085773468
},
"combine": {
- "p50": 130.87999820709229,
- "p90": 133.56800377368927,
- "p95": 135.26399433612823,
- "p99": 155.5200070142746
+ "p50": 293.5679852962494,
+ "p90": 300.86401104927063,
+ "p95": 304.4160008430481,
+ "p99": 320.73599100112915
},
"roundtrip": {
- "p50": 130.87999820709229,
- "p90": 133.56800377368927,
- "p95": 135.26399433612823,
- "p99": 155.5200070142746
+ "p50": 469.5039987564087,
+ "p90": 478.2400131225586,
+ "p95": 483.99999737739563,
+ "p99": 497.72799015045166
},
"isolatedSum": {
- "p50": 261.75999641418457,
- "p90": 267.13600754737854,
- "p95": 270.52798867225647,
- "p99": 311.0400140285492
+ "p50": 527.5519788265228,
+ "p90": 544.4160103797913,
+ "p95": 550.5920052528381,
+ "p99": 597.791999578476
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 156133376,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 8,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 319535104,
+ "combineLogicalBytes": 319535104,
+ "fanoutMean": 5.441650390625,
+ "recvTokensMax": 2897,
+ "stragglerRank": 5,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -34012,35 +34672,35 @@
"tokensPerRank": 1024,
"globalTokens": 8192,
"dispatch": {
- "p50": 229.40799593925476,
- "p90": 233.11999440193176,
- "p95": 234.27200317382812,
- "p99": 247.48800694942474
+ "p50": 364.25599455833435,
+ "p90": 375.328004360199,
+ "p95": 380.95998764038086,
+ "p99": 411.23199462890625
},
"combine": {
- "p50": 229.40799593925476,
- "p90": 233.11999440193176,
- "p95": 234.27200317382812,
- "p99": 247.48800694942474
+ "p50": 496.6079890727997,
+ "p90": 505.2480101585388,
+ "p95": 509.0879797935486,
+ "p99": 533.6959958076477
},
"roundtrip": {
- "p50": 229.40799593925476,
- "p90": 233.11999440193176,
- "p95": 234.27200317382812,
- "p99": 247.48800694942474
+ "p50": 832.0320248603821,
+ "p90": 843.2000279426575,
+ "p95": 850.9439826011658,
+ "p99": 886.4319920539856
},
"isolatedSum": {
- "p50": 458.8159918785095,
- "p90": 466.2399888038635,
- "p95": 468.54400634765625,
- "p99": 494.9760138988495
+ "p50": 860.863983631134,
+ "p90": 880.5760145187378,
+ "p95": 890.0479674339294,
+ "p99": 944.927990436554
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 311721984,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 8,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 638410752,
+ "combineLogicalBytes": 638410752,
+ "fanoutMean": 5.43603515625,
+ "recvTokensMax": 5815,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -34049,35 +34709,35 @@
"tokensPerRank": 2048,
"globalTokens": 16384,
"dispatch": {
- "p50": 404.992014169693,
- "p90": 407.9360067844391,
- "p95": 409.63199734687805,
- "p99": 412.6720130443573
+ "p50": 623.2640147209167,
+ "p90": 633.3119869232178,
+ "p95": 638.3039951324463,
+ "p99": 663.1680130958557
},
"combine": {
- "p50": 404.992014169693,
- "p90": 407.9360067844391,
- "p95": 409.63199734687805,
- "p99": 412.6720130443573
+ "p50": 895.5519795417786,
+ "p90": 901.7599821090698,
+ "p95": 906.2079787254333,
+ "p99": 914.5280122756958
},
"roundtrip": {
- "p50": 404.992014169693,
- "p90": 407.9360067844391,
- "p95": 409.63199734687805,
- "p99": 412.6720130443573
+ "p50": 1488.2240295410156,
+ "p90": 1498.304009437561,
+ "p95": 1500.991940498352,
+ "p99": 1510.7200145721436
},
"isolatedSum": {
- "p50": 809.984028339386,
- "p90": 815.8720135688782,
- "p95": 819.2639946937561,
- "p99": 825.3440260887146
+ "p50": 1518.8159942626953,
+ "p90": 1535.0719690322876,
+ "p95": 1544.5119738578796,
+ "p99": 1577.6960253715515
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 621902848,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 8,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1275144192,
+ "combineLogicalBytes": 1275144192,
+ "fanoutMean": 5.42889404296875,
+ "recvTokensMax": 11606,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -34086,34 +34746,34 @@
"tokensPerRank": 4096,
"globalTokens": 32768,
"dispatch": {
- "p50": 756.1280131340027,
- "p90": 759.6480250358582,
- "p95": 760.703980922699,
- "p99": 774.944007396698
+ "p50": 1152.1919965744019,
+ "p90": 1162.6240015029907,
+ "p95": 1166.7200326919556,
+ "p99": 1189.95201587677
},
"combine": {
- "p50": 756.1280131340027,
- "p90": 759.6480250358582,
- "p95": 760.703980922699,
- "p99": 774.944007396698
+ "p50": 1676.7679452896118,
+ "p90": 1686.2080097198486,
+ "p95": 1689.2160177230835,
+ "p99": 1704.7679424285889
},
"roundtrip": {
- "p50": 756.1280131340027,
- "p90": 759.6480250358582,
- "p95": 760.703980922699,
- "p99": 774.944007396698
+ "p50": 2801.2800216674805,
+ "p90": 2810.976028442383,
+ "p95": 2813.568115234375,
+ "p99": 2820.9919929504395
},
"isolatedSum": {
- "p50": 1512.2560262680054,
- "p90": 1519.2960500717163,
- "p95": 1521.407961845398,
- "p99": 1549.888014793396
+ "p50": 2828.9599418640137,
+ "p90": 2848.8320112228394,
+ "p95": 2855.936050415039,
+ "p99": 2894.719958305359
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1243504640,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 8,
+ "dispatchLogicalBytes": 2546374656,
+ "combineLogicalBytes": 2546374656,
+ "fanoutMean": 5.420562744140625,
+ "recvTokensMax": 23170,
"stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
@@ -34122,106 +34782,107 @@
]
},
{
- "id": "cx-dede56e2",
- "identity": "b300|flashinfer|7168|8|256|mxfp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "b300_6af1abcd",
- "comparisonKey": "26534c8239f2bdd1",
+ "id": "cx-2c8de23f",
+ "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||a39eeb7c2dc6ca7",
+ "colorKey": "gb300_15a35db4",
+ "comparisonKey": "ce656c1689809360",
"schemaVersion": 3,
- "generatedAt": "2026-06-28T01:38:47.923344+00:00",
+ "generatedAt": "2026-06-29T13:45:06.518011+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_14",
- "sku": "b300",
- "backend": "flashinfer",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
+ "backend": "deepep",
"phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · flashinfer · mxfp8",
+ "label": "GB300 EP8 · deepep · bf16 · zipf-mild",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
"experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
+ "routing": "zipf-mild",
+ "routingLabel": "zipf-mild",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "mxfp8",
+ "dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 148,
- "resourceClass": "fixed-kernel",
- "conformanceClass": "not-applicable",
- "fixedKernel": true,
+ "achievedFraction": 0.1316,
+ "configuredUnits": 20,
+ "deviceUnits": 152,
+ "resourceClass": "backend-tuned",
+ "conformanceClass": "backend-default",
+ "fixedKernel": false,
"paretoEligible": false
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "a39eeb7c2dc6ca7",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": null,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28307776684",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28307776684",
- "createdAt": "2026-06-28T01:38:47.923344+00:00",
- "sha": "510fc17001789ae4f32b99b60b9a0a0aa53ab6b5"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 64.7680014371872,
- "p90": 66.39999896287918,
- "p95": 67.32799857854843,
- "p99": 76.4160007238388
+ "p50": 126.91199779510498,
+ "p90": 136.63999736309052,
+ "p95": 144.96000111103058,
+ "p99": 181.7599982023239
},
"combine": {
- "p50": 64.7680014371872,
- "p90": 66.39999896287918,
- "p95": 67.32799857854843,
- "p99": 76.4160007238388
+ "p50": 133.69600474834442,
+ "p90": 151.0400027036667,
+ "p95": 164.57599401474,
+ "p99": 193.50400567054749
},
"roundtrip": {
- "p50": 64.7680014371872,
- "p90": 66.39999896287918,
- "p95": 67.32799857854843,
- "p99": 76.4160007238388
+ "p50": 237.34399676322937,
+ "p90": 254.94399666786194,
+ "p95": 272.6080119609833,
+ "p99": 294.49599981307983
},
"isolatedSum": {
- "p50": 129.5360028743744,
- "p90": 132.79999792575836,
- "p95": 134.65599715709686,
- "p99": 152.8320014476776
+ "p50": 260.6080025434494,
+ "p90": 287.6800000667572,
+ "p95": 309.53599512577057,
+ "p99": 375.2640038728714
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 8,
+ "dispatchLogicalBytes": 70160384,
+ "combineLogicalBytes": 70160384,
+ "fanoutMean": 4.779296875,
+ "recvTokensMax": 987,
"stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
@@ -34231,35 +34892,35 @@
"tokensPerRank": 256,
"globalTokens": 2048,
"dispatch": {
- "p50": 84.28800106048584,
- "p90": 85.40800213813782,
- "p95": 86.07999980449677,
- "p99": 86.91199868917465
+ "p50": 165.24800658226013,
+ "p90": 173.88799786567688,
+ "p95": 176.67199671268463,
+ "p99": 182.5920045375824
},
"combine": {
- "p50": 84.28800106048584,
- "p90": 85.40800213813782,
- "p95": 86.07999980449677,
- "p99": 86.91199868917465
+ "p50": 181.85600638389587,
+ "p90": 190.17599523067474,
+ "p95": 192.22399592399597,
+ "p99": 194.5279985666275
},
"roundtrip": {
- "p50": 84.28800106048584,
- "p90": 85.40800213813782,
- "p95": 86.07999980449677,
- "p99": 86.91199868917465
+ "p50": 323.743999004364,
+ "p90": 331.64799213409424,
+ "p95": 335.2000117301941,
+ "p99": 340.60800075531006
},
"isolatedSum": {
- "p50": 168.57600212097168,
- "p90": 170.81600427627563,
- "p95": 172.15999960899353,
- "p99": 173.8239973783493
+ "p50": 347.104012966156,
+ "p90": 364.0639930963516,
+ "p95": 368.8959926366806,
+ "p99": 377.1200031042099
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77944832,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 8,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 140879872,
+ "combineLogicalBytes": 140879872,
+ "fanoutMean": 4.79833984375,
+ "recvTokensMax": 1972,
+ "stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -34268,34 +34929,34 @@
"tokensPerRank": 512,
"globalTokens": 4096,
"dispatch": {
- "p50": 131.3920021057129,
- "p90": 133.91999900341034,
- "p95": 134.91199910640717,
- "p99": 136.63999736309052
+ "p50": 240.12799561023712,
+ "p90": 247.99999594688416,
+ "p95": 251.74400210380554,
+ "p99": 255.74401021003723
},
"combine": {
- "p50": 131.3920021057129,
- "p90": 133.91999900341034,
- "p95": 134.91199910640717,
- "p99": 136.63999736309052
+ "p50": 351.99999809265137,
+ "p90": 356.3520014286041,
+ "p95": 361.7280125617981,
+ "p99": 372.8320002555847
},
"roundtrip": {
- "p50": 131.3920021057129,
- "p90": 133.91999900341034,
- "p95": 134.91199910640717,
- "p99": 136.63999736309052
+ "p50": 553.1200170516968,
+ "p90": 562.8160238265991,
+ "p95": 566.2720203399658,
+ "p99": 577.6000022888184
},
"isolatedSum": {
- "p50": 262.7840042114258,
- "p90": 267.8399980068207,
- "p95": 269.82399821281433,
- "p99": 273.27999472618103
+ "p50": 592.1279937028885,
+ "p90": 604.3519973754883,
+ "p95": 613.4720146656036,
+ "p99": 628.576010465622
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 156133376,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 8,
+ "dispatchLogicalBytes": 282333184,
+ "combineLogicalBytes": 282333184,
+ "fanoutMean": 4.80810546875,
+ "recvTokensMax": 3936,
"stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
@@ -34305,35 +34966,35 @@
"tokensPerRank": 1024,
"globalTokens": 8192,
"dispatch": {
- "p50": 229.72799837589264,
- "p90": 232.7679991722107,
- "p95": 233.95200073719025,
- "p99": 236.32000386714935
+ "p50": 388.89598846435547,
+ "p90": 396.92801237106323,
+ "p95": 399.77601170539856,
+ "p99": 405.7919979095459
},
"combine": {
- "p50": 229.72799837589264,
- "p90": 232.7679991722107,
- "p95": 233.95200073719025,
- "p99": 236.32000386714935
+ "p50": 608.0960035324097,
+ "p90": 613.2479906082153,
+ "p95": 615.0720119476318,
+ "p99": 623.7760186195374
},
"roundtrip": {
- "p50": 229.72799837589264,
- "p90": 232.7679991722107,
- "p95": 233.95200073719025,
- "p99": 236.32000386714935
+ "p50": 973.4399914741516,
+ "p90": 981.7600250244141,
+ "p95": 984.9920272827148,
+ "p99": 990.9759759902954
},
"isolatedSum": {
- "p50": 459.4559967517853,
- "p90": 465.5359983444214,
- "p95": 467.9040014743805,
- "p99": 472.6400077342987
+ "p50": 996.9919919967651,
+ "p90": 1010.1760029792786,
+ "p95": 1014.8480236530304,
+ "p99": 1029.5680165290833
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 311721984,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 8,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 566716416,
+ "combineLogicalBytes": 566716416,
+ "fanoutMean": 4.8255615234375,
+ "recvTokensMax": 7855,
+ "stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -34342,35 +35003,35 @@
"tokensPerRank": 2048,
"globalTokens": 16384,
"dispatch": {
- "p50": 404.7999978065491,
- "p90": 407.1680009365082,
- "p95": 407.99999237060547,
- "p99": 411.3920032978058
+ "p50": 692.2879815101624,
+ "p90": 703.0079960823059,
+ "p95": 705.7279944419861,
+ "p99": 715.008020401001
},
"combine": {
- "p50": 404.7999978065491,
- "p90": 407.1680009365082,
- "p95": 407.99999237060547,
- "p99": 411.3920032978058
+ "p50": 1107.7120304107666,
+ "p90": 1117.8560256958008,
+ "p95": 1119.488000869751,
+ "p99": 1128.8319826126099
},
"roundtrip": {
- "p50": 404.7999978065491,
- "p90": 407.1680009365082,
- "p95": 407.99999237060547,
- "p99": 411.3920032978058
+ "p50": 1777.1519422531128,
+ "p90": 1790.6240224838257,
+ "p95": 1798.1120347976685,
+ "p99": 1832.1280479431152
},
"isolatedSum": {
- "p50": 809.5999956130981,
- "p90": 814.3360018730164,
- "p95": 815.9999847412109,
- "p99": 822.7840065956116
+ "p50": 1800.000011920929,
+ "p90": 1820.8640217781067,
+ "p95": 1825.215995311737,
+ "p99": 1843.8400030136108
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 621902848,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 8,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 1132285952,
+ "combineLogicalBytes": 1132285952,
+ "fanoutMean": 4.8206787109375,
+ "recvTokensMax": 15694,
+ "stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -34379,35 +35040,35 @@
"tokensPerRank": 4096,
"globalTokens": 32768,
"dispatch": {
- "p50": 755.6480169296265,
- "p90": 758.9439749717712,
- "p95": 759.7439885139465,
- "p99": 764.1919851303101
+ "p50": 1350.208044052124,
+ "p90": 1361.791968345642,
+ "p95": 1364.7040128707886,
+ "p99": 1372.7359771728516
},
"combine": {
- "p50": 755.6480169296265,
- "p90": 758.9439749717712,
- "p95": 759.7439885139465,
- "p99": 764.1919851303101
+ "p50": 2127.1679401397705,
+ "p90": 2136.8319988250732,
+ "p95": 2138.5281085968018,
+ "p99": 2141.2479877471924
},
"roundtrip": {
- "p50": 755.6480169296265,
- "p90": 758.9439749717712,
- "p95": 759.7439885139465,
- "p99": 764.1919851303101
+ "p50": 3460.7040882110596,
+ "p90": 3473.088026046753,
+ "p95": 3476.736068725586,
+ "p99": 3485.503911972046
},
"isolatedSum": {
- "p50": 1511.296033859253,
- "p90": 1517.8879499435425,
- "p95": 1519.487977027893,
- "p99": 1528.3839702606201
+ "p50": 3477.3759841918945,
+ "p90": 3498.6239671707153,
+ "p95": 3503.2321214675903,
+ "p99": 3513.983964920044
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1243504640,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 8,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 2267840512,
+ "combineLogicalBytes": 2267840512,
+ "fanoutMean": 4.82763671875,
+ "recvTokensMax": 31357,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -34415,107 +35076,108 @@
]
},
{
- "id": "cx-85dec801",
- "identity": "b300|flashinfer|7168|8|256|nvfp4|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "b300_f0247ae6",
- "comparisonKey": "eb4126aa6cf3bfca",
+ "id": "cx-6461e658",
+ "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||3eb2f0d7bdba0fe",
+ "colorKey": "gb300_46b172da",
+ "comparisonKey": "398178595fe92367",
"schemaVersion": 3,
- "generatedAt": "2026-06-28T01:38:25.905345+00:00",
+ "generatedAt": "2026-06-29T13:46:12.216671+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_04",
- "sku": "b300",
- "backend": "flashinfer",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
+ "backend": "deepep",
"phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · flashinfer · nvfp4",
- "model": "DeepSeek-V3/V4",
+ "label": "GB300 EP8 · deepep · bf16 · zipf-mild+eplb",
+ "model": "DeepSeek-V3 (EPLB physical)",
"shape": {
"hidden": 7168,
"topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
+ "experts": 288,
+ "routing": "zipf-mild",
+ "routingLabel": "zipf-mild+eplb",
"routingStep": 0,
"unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "nvfp4",
+ "eplbEnabled": true,
+ "dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 148,
- "resourceClass": "fixed-kernel",
- "conformanceClass": "not-applicable",
- "fixedKernel": true,
+ "achievedFraction": 0.1316,
+ "configuredUnits": 20,
+ "deviceUnits": 152,
+ "resourceClass": "backend-tuned",
+ "conformanceClass": "backend-default",
+ "fixedKernel": false,
"paretoEligible": false
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": null,
+ "traceSignature": "3eb2f0d7bdba0fe",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": 2.545684814453125,
+ "eplbImbalanceAfter": 1.0001495361328125,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28307777849",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28307777849",
- "createdAt": "2026-06-28T01:38:25.905345+00:00",
- "sha": "510fc17001789ae4f32b99b60b9a0a0aa53ab6b5"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 62.68800050020218,
- "p90": 64.64000046253204,
- "p95": 65.66400080919266,
- "p99": 75.99999755620956
+ "p50": 123.45600128173828,
+ "p90": 137.05599308013916,
+ "p95": 143.0719941854477,
+ "p99": 186.27199530601501
},
"combine": {
- "p50": 62.68800050020218,
- "p90": 64.64000046253204,
- "p95": 65.66400080919266,
- "p99": 75.99999755620956
+ "p50": 128.00000607967377,
+ "p90": 150.36800503730774,
+ "p95": 155.39200603961945,
+ "p99": 177.72799730300903
},
"roundtrip": {
- "p50": 62.68800050020218,
- "p90": 64.64000046253204,
- "p95": 65.66400080919266,
- "p99": 75.99999755620956
+ "p50": 222.01600670814514,
+ "p90": 235.58400571346283,
+ "p95": 246.46399915218353,
+ "p99": 272.0640003681183
},
"isolatedSum": {
- "p50": 125.37600100040436,
- "p90": 129.2800009250641,
- "p95": 131.32800161838531,
- "p99": 151.99999511241913
+ "p50": 251.45600736141205,
+ "p90": 287.4239981174469,
+ "p95": 298.46400022506714,
+ "p99": 363.99999260902405
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19418112,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 8,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 78159872,
+ "combineLogicalBytes": 78159872,
+ "fanoutMean": 5.32421875,
+ "recvTokensMax": 702,
+ "stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -34524,34 +35186,34 @@
"tokensPerRank": 256,
"globalTokens": 2048,
"dispatch": {
- "p50": 78.36800068616867,
- "p90": 80.25600016117096,
- "p95": 81.05599880218506,
- "p99": 84.95999872684479
+ "p50": 160.99199652671814,
+ "p90": 170.33599317073822,
+ "p95": 173.15199971199036,
+ "p99": 181.85600638389587
},
"combine": {
- "p50": 78.36800068616867,
- "p90": 80.25600016117096,
- "p95": 81.05599880218506,
- "p99": 84.95999872684479
+ "p50": 168.03200542926788,
+ "p90": 171.87200486660004,
+ "p95": 177.44000256061554,
+ "p99": 185.98400056362152
},
"roundtrip": {
- "p50": 78.36800068616867,
- "p90": 80.25600016117096,
- "p95": 81.05599880218506,
- "p99": 84.95999872684479
+ "p50": 299.3920147418976,
+ "p90": 308.19201469421387,
+ "p95": 311.0400140285492,
+ "p99": 319.2319869995117
},
"isolatedSum": {
- "p50": 156.73600137233734,
- "p90": 160.51200032234192,
- "p95": 162.11199760437012,
- "p99": 169.91999745368958
+ "p50": 329.024001955986,
+ "p90": 342.20799803733826,
+ "p95": 350.5920022726059,
+ "p99": 367.8400069475174
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38972416,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 8,
+ "dispatchLogicalBytes": 156563456,
+ "combineLogicalBytes": 156563456,
+ "fanoutMean": 5.33251953125,
+ "recvTokensMax": 1393,
"stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
@@ -34561,35 +35223,35 @@
"tokensPerRank": 512,
"globalTokens": 4096,
"dispatch": {
- "p50": 118.04799735546112,
- "p90": 120.60800194740295,
- "p95": 121.34400010108948,
- "p99": 124.79999661445618
+ "p50": 229.66399788856506,
+ "p90": 238.39999735355377,
+ "p95": 240.89600145816803,
+ "p99": 251.26400589942932
},
"combine": {
- "p50": 118.04799735546112,
- "p90": 120.60800194740295,
- "p95": 121.34400010108948,
- "p99": 124.79999661445618
+ "p50": 286.52799129486084,
+ "p90": 293.11999678611755,
+ "p95": 295.00800371170044,
+ "p99": 302.4640083312988
},
"roundtrip": {
- "p50": 118.04799735546112,
- "p90": 120.60800194740295,
- "p95": 121.34400010108948,
- "p99": 124.79999661445618
+ "p50": 466.97598695755005,
+ "p90": 476.73600912094116,
+ "p95": 480.70400953292847,
+ "p99": 490.27198553085327
},
"isolatedSum": {
- "p50": 236.09599471092224,
- "p90": 241.2160038948059,
- "p95": 242.68800020217896,
- "p99": 249.59999322891235
+ "p50": 516.1919891834259,
+ "p90": 531.5199941396713,
+ "p95": 535.9040051698685,
+ "p99": 553.7280142307281
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 78066688,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 8,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 312410112,
+ "combineLogicalBytes": 312410112,
+ "fanoutMean": 5.3203125,
+ "recvTokensMax": 2773,
+ "stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -34598,34 +35260,34 @@
"tokensPerRank": 1024,
"globalTokens": 8192,
"dispatch": {
- "p50": 204.83200252056122,
- "p90": 208.48000049591064,
- "p95": 209.1200053691864,
- "p99": 221.76000475883484
+ "p50": 357.5040102005005,
+ "p90": 365.664005279541,
+ "p95": 367.8080141544342,
+ "p99": 374.4960129261017
},
"combine": {
- "p50": 204.83200252056122,
- "p90": 208.48000049591064,
- "p95": 209.1200053691864,
- "p99": 221.76000475883484
+ "p50": 489.02401328086853,
+ "p90": 494.30400133132935,
+ "p95": 498.9120066165924,
+ "p99": 502.20799446105957
},
"roundtrip": {
- "p50": 204.83200252056122,
- "p90": 208.48000049591064,
- "p95": 209.1200053691864,
- "p99": 221.76000475883484
+ "p50": 816.7999982833862,
+ "p90": 826.304018497467,
+ "p95": 830.1759958267212,
+ "p99": 842.2719836235046
},
"isolatedSum": {
- "p50": 409.66400504112244,
- "p90": 416.9600009918213,
- "p95": 418.2400107383728,
- "p99": 443.5200095176697
+ "p50": 846.528023481369,
+ "p90": 859.9680066108704,
+ "p95": 866.7200207710266,
+ "p99": 876.7040073871613
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 155860992,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 8,
+ "dispatchLogicalBytes": 622712832,
+ "combineLogicalBytes": 622712832,
+ "fanoutMean": 5.3023681640625,
+ "recvTokensMax": 5498,
"stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
@@ -34635,35 +35297,35 @@
"tokensPerRank": 2048,
"globalTokens": 16384,
"dispatch": {
- "p50": 359.51998829841614,
- "p90": 362.5600039958954,
- "p95": 363.77599835395813,
- "p99": 371.16798758506775
+ "p50": 613.53600025177,
+ "p90": 623.3599781990051,
+ "p95": 627.232015132904,
+ "p99": 638.1760239601135
},
"combine": {
- "p50": 359.51998829841614,
- "p90": 362.5600039958954,
- "p95": 363.77599835395813,
- "p99": 371.16798758506775
+ "p50": 868.9280152320862,
+ "p90": 882.6239705085754,
+ "p95": 892.5759792327881,
+ "p99": 912.2560024261475
},
"roundtrip": {
- "p50": 359.51998829841614,
- "p90": 362.5600039958954,
- "p95": 363.77599835395813,
- "p99": 371.16798758506775
+ "p50": 1452.1280527114868,
+ "p90": 1467.360019683838,
+ "p95": 1480.1280498504639,
+ "p99": 1500.8959770202637
},
"isolatedSum": {
- "p50": 719.0399765968323,
- "p90": 725.1200079917908,
- "p95": 727.5519967079163,
- "p99": 742.3359751701355
+ "p50": 1482.4640154838562,
+ "p90": 1505.9839487075806,
+ "p95": 1519.8079943656921,
+ "p99": 1550.432026386261
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 310951424,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 8,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 1245038592,
+ "combineLogicalBytes": 1245038592,
+ "fanoutMean": 5.30072021484375,
+ "recvTokensMax": 10955,
+ "stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -34672,35 +35334,35 @@
"tokensPerRank": 4096,
"globalTokens": 32768,
"dispatch": {
- "p50": 662.0479822158813,
- "p90": 664.9919748306274,
- "p95": 665.9520268440247,
- "p99": 692.6400065422058
+ "p50": 1130.4320096969604,
+ "p90": 1137.7919912338257,
+ "p95": 1140.9920454025269,
+ "p99": 1147.1680402755737
},
"combine": {
- "p50": 662.0479822158813,
- "p90": 664.9919748306274,
- "p95": 665.9520268440247,
- "p99": 692.6400065422058
+ "p50": 1613.0239963531494,
+ "p90": 1622.1120357513428,
+ "p95": 1623.9999532699585,
+ "p99": 1631.9040060043335
},
"roundtrip": {
- "p50": 662.0479822158813,
- "p90": 664.9919748306274,
- "p95": 665.9520268440247,
- "p99": 692.6400065422058
+ "p50": 2719.4559574127197,
+ "p90": 2728.447914123535,
+ "p95": 2731.3599586486816,
+ "p99": 2740.0639057159424
},
"isolatedSum": {
- "p50": 1324.0959644317627,
- "p90": 1329.9839496612549,
- "p95": 1331.9040536880493,
- "p99": 1385.2800130844116
+ "p50": 2743.45600605011,
+ "p90": 2759.9040269851685,
+ "p95": 2764.9919986724854,
+ "p99": 2779.072046279907
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 621752320,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 8,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 2489460736,
+ "combineLogicalBytes": 2489460736,
+ "fanoutMean": 5.299407958984375,
+ "recvTokensMax": 21864,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -34708,47 +35370,48 @@
]
},
{
- "id": "cx-2fdde1de",
- "identity": "b300|uccl|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "b300_e952b5c0",
- "comparisonKey": "97ed86fe35a5b2af",
+ "id": "cx-a4c44aca",
+ "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||bfb01c61bdf926e",
+ "colorKey": "gb300_6e04dda3",
+ "comparisonKey": "1dcefebf80b3425d",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T17:36:18.907415+00:00",
+ "generatedAt": "2026-06-29T13:49:15.844464+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_16",
- "sku": "b300",
- "backend": "uccl",
- "phase": "decode",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
+ "backend": "deepep",
+ "phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · uccl · bf16",
+ "label": "GB300 EP8 · deepep · bf16 · zipf-moderate",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
"topk": 8,
"experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
+ "routing": "zipf-moderate",
+ "routingLabel": "zipf-moderate",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -34756,318 +35419,244 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "bfb01c61bdf926e",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": null,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28296669967",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28296669967",
- "createdAt": "2026-06-27T17:36:18.907415+00:00",
- "sha": "cfa1ec56258b94b4a173844810a163a832bcb07e"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 81.24800026416779,
- "p90": 87.45600283145905,
- "p95": 88.41600269079208,
- "p99": 92.99200028181076
- },
- "combine": {
- "p50": 74.23999905586243,
- "p90": 76.48000121116638,
- "p95": 77.11999863386154,
- "p99": 85.66399663686752
- },
- "roundtrip": {
- "p50": 140.19200205802917,
- "p90": 144.31999623775482,
- "p95": 145.37599682807922,
- "p99": 151.93599462509155
- },
- "isolatedSum": {
- "p50": 155.4879993200302,
- "p90": 163.93600404262543,
- "p95": 165.53600132465363,
- "p99": 178.65599691867828
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 80.92799782752991,
- "p90": 85.37600189447403,
- "p95": 86.2400010228157,
- "p99": 89.75999802350998
- },
- "combine": {
- "p50": 74.30399954319,
- "p90": 76.25599950551987,
- "p95": 77.18399912118912,
- "p99": 84.44800227880478
- },
- "roundtrip": {
- "p50": 141.6960060596466,
- "p90": 145.50399780273438,
- "p95": 146.7200070619583,
- "p99": 162.27200627326965
- },
- "isolatedSum": {
- "p50": 155.2319973707199,
- "p90": 161.6320013999939,
- "p95": 163.42400014400482,
- "p99": 174.20800030231476
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 81.40800148248672,
- "p90": 84.44800227880478,
- "p95": 87.77599781751633,
- "p99": 107.04000294208527
+ "p50": 122.20799922943115,
+ "p90": 128.25599312782288,
+ "p95": 132.76800513267517,
+ "p99": 143.51999759674072
},
"combine": {
- "p50": 75.6480023264885,
- "p90": 84.73599702119827,
- "p95": 85.21600067615509,
- "p99": 96.38399630784988
+ "p50": 132.57600367069244,
+ "p90": 140.22399485111237,
+ "p95": 142.4960047006607,
+ "p99": 144.896000623703
},
"roundtrip": {
- "p50": 138.2720023393631,
- "p90": 141.66399836540222,
- "p95": 143.51999759674072,
- "p99": 149.21599626541138
+ "p50": 232.4800044298172,
+ "p90": 239.55200612545013,
+ "p95": 242.23999679088593,
+ "p99": 250.07998943328857
},
"isolatedSum": {
- "p50": 157.05600380897522,
- "p90": 169.18399930000305,
- "p95": 172.99199849367142,
- "p99": 203.42399924993515
+ "p50": 254.7840029001236,
+ "p90": 268.47998797893524,
+ "p95": 275.2640098333359,
+ "p99": 288.4159982204437
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 49946624,
+ "combineLogicalBytes": 49946624,
+ "fanoutMean": 3.40234375,
+ "recvTokensMax": 1022,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 81.40800148248672,
- "p90": 83.93599838018417,
- "p95": 85.28000116348267,
- "p99": 93.28000247478485
+ "p50": 155.4879993200302,
+ "p90": 179.83999848365784,
+ "p95": 191.03999435901642,
+ "p99": 217.8879976272583
},
"combine": {
- "p50": 84.54400300979614,
- "p90": 85.66399663686752,
- "p95": 86.01599931716919,
- "p99": 88.70399743318558
+ "p50": 181.60000443458557,
+ "p90": 204.70400154590607,
+ "p95": 230.68800568580627,
+ "p99": 242.5280064344406
},
"roundtrip": {
- "p50": 138.84800672531128,
- "p90": 143.99999380111694,
- "p95": 147.0080018043518,
- "p99": 160.22400557994843
+ "p50": 316.0000145435333,
+ "p90": 329.5679986476898,
+ "p95": 340.4160141944885,
+ "p99": 372.48000502586365
},
"isolatedSum": {
- "p50": 165.95200449228287,
- "p90": 169.5999950170517,
- "p95": 171.29600048065186,
- "p99": 181.98399990797043
+ "p50": 337.0880037546158,
+ "p90": 384.5440000295639,
+ "p95": 421.7280000448227,
+ "p99": 460.4160040616989
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 100509696,
+ "combineLogicalBytes": 100509696,
+ "fanoutMean": 3.42333984375,
+ "recvTokensMax": 2046,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 82.84799754619598,
- "p90": 84.99199897050858,
- "p95": 87.10400015115738,
- "p99": 97.37599641084671
+ "p50": 227.00800001621246,
+ "p90": 234.8479926586151,
+ "p95": 237.69600689411163,
+ "p99": 246.97600305080414
},
"combine": {
- "p50": 84.95999872684479,
- "p90": 85.88799834251404,
- "p95": 86.2400010228157,
- "p99": 92.22400188446045
+ "p50": 342.75200963020325,
+ "p90": 357.85600543022156,
+ "p95": 372.25601077079773,
+ "p99": 393.2799994945526
},
"roundtrip": {
- "p50": 140.25600254535675,
- "p90": 147.20000326633453,
- "p95": 148.5760062932968,
- "p99": 155.07200360298157
+ "p50": 544.0959930419922,
+ "p90": 556.8959712982178,
+ "p95": 565.7600164413452,
+ "p99": 583.1040143966675
},
"isolatedSum": {
- "p50": 167.80799627304077,
- "p90": 170.8799973130226,
- "p95": 173.34400117397308,
- "p99": 189.59999829530716
+ "p50": 569.7600096464157,
+ "p90": 592.7039980888367,
+ "p95": 609.9520176649094,
+ "p99": 640.2560025453568
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 201678848,
+ "combineLogicalBytes": 201678848,
+ "fanoutMean": 3.4345703125,
+ "recvTokensMax": 4094,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 99.84000027179718,
- "p90": 102.11200267076492,
- "p95": 103.58399897813797,
- "p99": 106.84800148010254
+ "p50": 367.0080006122589,
+ "p90": 380.92800974845886,
+ "p95": 395.52000164985657,
+ "p99": 414.0479862689972
},
"combine": {
- "p50": 85.85599809885025,
- "p90": 87.26400136947632,
- "p95": 88.48000317811966,
- "p99": 96.41599655151367
+ "p50": 622.8160262107849,
+ "p90": 648.7039923667908,
+ "p95": 659.2000126838684,
+ "p99": 677.40797996521
},
"roundtrip": {
- "p50": 150.59199929237366,
- "p90": 153.56799960136414,
- "p95": 155.008003115654,
- "p99": 163.93600404262543
+ "p50": 954.4960260391235,
+ "p90": 977.4399995803833,
+ "p95": 991.5199875831604,
+ "p99": 1010.0480318069458
},
"isolatedSum": {
- "p50": 185.69599837064743,
- "p90": 189.37600404024124,
- "p95": 192.06400215625763,
- "p99": 203.2639980316162
+ "p50": 989.8240268230438,
+ "p90": 1029.6320021152496,
+ "p95": 1054.720014333725,
+ "p99": 1091.4559662342072
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 405035008,
+ "combineLogicalBytes": 405035008,
+ "fanoutMean": 3.4488525390625,
+ "recvTokensMax": 8189,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 64,
- "globalTokens": 512,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 111.10399663448334,
- "p90": 114.30399864912033,
- "p95": 115.35999923944473,
- "p99": 118.97599697113037
+ "p50": 683.135986328125,
+ "p90": 693.2799816131592,
+ "p95": 696.7039704322815,
+ "p99": 702.5920152664185
},
"combine": {
- "p50": 99.04000163078308,
- "p90": 102.24000364542007,
- "p95": 109.0560033917427,
- "p99": 112.35199868679047
+ "p50": 1128.607988357544,
+ "p90": 1132.0960521697998,
+ "p95": 1133.3119869232178,
+ "p99": 1142.016053199768
},
"roundtrip": {
- "p50": 177.2480010986328,
- "p90": 184.35199558734894,
- "p95": 186.27199530601501,
- "p99": 194.36800479888916
+ "p50": 1789.5679473876953,
+ "p90": 1799.8080253601074,
+ "p95": 1802.2719621658325,
+ "p99": 1810.2400302886963
},
"isolatedSum": {
- "p50": 210.14399826526642,
- "p90": 216.5440022945404,
- "p95": 224.41600263118744,
- "p99": 231.32799565792084
+ "p50": 1811.743974685669,
+ "p90": 1825.376033782959,
+ "p95": 1830.0159573554993,
+ "p99": 1844.6080684661865
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 808822784,
+ "combineLogicalBytes": 808822784,
+ "fanoutMean": 3.44354248046875,
+ "recvTokensMax": 16380,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
"dispatch": {
- "p50": 124.51200187206268,
- "p90": 127.3919939994812,
- "p95": 128.54400277137756,
- "p99": 138.7840062379837
+ "p50": 1342.8479433059692,
+ "p90": 1354.207992553711,
+ "p95": 1358.1119775772095,
+ "p99": 1363.2639646530151
},
"combine": {
- "p50": 122.43200093507767,
- "p90": 123.32800030708313,
- "p95": 124.54400211572647,
- "p99": 133.69600474834442
+ "p50": 2158.720016479492,
+ "p90": 2175.8079528808594,
+ "p95": 2198.591947555542,
+ "p99": 2221.280097961426
},
"roundtrip": {
- "p50": 212.2880071401596,
- "p90": 216.25599265098572,
- "p95": 218.46400201320648,
- "p99": 222.46399521827698
+ "p50": 3481.8880558013916,
+ "p90": 3498.975992202759,
+ "p95": 3513.8559341430664,
+ "p99": 3535.583972930908
},
"isolatedSum": {
- "p50": 246.94400280714035,
- "p90": 250.71999430656433,
- "p95": 253.08800488710403,
- "p99": 272.4800109863281
+ "p50": 3501.5679597854614,
+ "p90": 3530.0159454345703,
+ "p95": 3556.7039251327515,
+ "p99": 3584.544062614441
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 1619795968,
+ "combineLogicalBytes": 1619795968,
+ "fanoutMean": 3.4481201171875,
+ "recvTokensMax": 32761,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -35075,47 +35664,48 @@
]
},
{
- "id": "cx-8d828593",
- "identity": "b300|uccl|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "b300_e952b5c0",
- "comparisonKey": "6e0e03618d466091",
+ "id": "cx-3622f171",
+ "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||45b103b10fbcaef",
+ "colorKey": "gb300_04de5a5b",
+ "comparisonKey": "173fe7343d391895",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T17:36:27.427420+00:00",
+ "generatedAt": "2026-06-29T13:50:21.485150+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "b300-nv_07",
- "sku": "b300",
- "backend": "uccl",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
+ "backend": "deepep",
"phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "B300 EP8 · uccl · bf16",
- "model": "DeepSeek-V3/V4",
+ "label": "GB300 EP8 · deepep · bf16 · zipf-moderate+eplb",
+ "model": "DeepSeek-V3 (EPLB physical)",
"shape": {
"hidden": 7168,
"topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
+ "experts": 288,
+ "routing": "zipf-moderate",
+ "routingLabel": "zipf-moderate+eplb",
"routingStep": 0,
"unevenTokens": "none",
- "eplbEnabled": false,
+ "eplbEnabled": true,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1351,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 148,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -35123,59 +35713,59 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": null,
+ "traceSignature": "45b103b10fbcaef",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": 4.895263671875,
+ "eplbImbalanceAfter": 1.0000902811686199,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28296669967",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28296669967",
- "createdAt": "2026-06-27T17:36:27.427420+00:00",
- "sha": "cfa1ec56258b94b4a173844810a163a832bcb07e"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 125.2799928188324,
- "p90": 127.9039978981018,
- "p95": 128.9599984884262,
- "p99": 135.51999628543854
+ "p50": 123.83999675512314,
+ "p90": 131.48799538612366,
+ "p95": 135.29600203037262,
+ "p99": 144.99199390411377
},
"combine": {
- "p50": 122.6240023970604,
- "p90": 123.52000176906586,
- "p95": 124.4800016283989,
- "p99": 126.62400305271149
+ "p50": 122.94399738311768,
+ "p90": 132.1599930524826,
+ "p95": 134.43200290203094,
+ "p99": 143.42400431632996
},
"roundtrip": {
- "p50": 212.6079946756363,
- "p90": 216.35200083255768,
- "p95": 218.62399578094482,
- "p99": 233.72800648212433
+ "p50": 221.95200622081757,
+ "p90": 229.5680046081543,
+ "p95": 231.87200725078583,
+ "p99": 237.95199394226074
},
"isolatedSum": {
- "p50": 247.9039952158928,
- "p90": 251.42399966716766,
- "p95": 253.4400001168251,
- "p99": 262.14399933815
+ "p50": 246.7839941382408,
+ "p90": 263.64798843860626,
+ "p95": 269.72800493240356,
+ "p99": 288.4159982204437
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 77385728,
+ "combineLogicalBytes": 77385728,
+ "fanoutMean": 5.271484375,
+ "recvTokensMax": 691,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -35184,35 +35774,35 @@
"tokensPerRank": 256,
"globalTokens": 2048,
"dispatch": {
- "p50": 157.3439985513687,
- "p90": 162.9759967327118,
- "p95": 164.12800550460815,
- "p99": 174.43199455738068
+ "p50": 161.69600188732147,
+ "p90": 170.46399414539337,
+ "p95": 173.40800166130066,
+ "p99": 177.7919977903366
},
"combine": {
- "p50": 160.38399934768677,
- "p90": 169.98399794101715,
- "p95": 170.81600427627563,
- "p99": 174.6560037136078
+ "p50": 167.1999990940094,
+ "p90": 170.75200378894806,
+ "p95": 172.06400632858276,
+ "p99": 180.09600043296814
},
"roundtrip": {
- "p50": 290.5920147895813,
- "p90": 297.5040078163147,
- "p95": 300.927996635437,
- "p99": 310.91201305389404
+ "p50": 298.8480031490326,
+ "p90": 307.2640001773834,
+ "p95": 310.2079927921295,
+ "p99": 317.6319897174835
},
"isolatedSum": {
- "p50": 317.7279978990555,
- "p90": 332.95999467372894,
- "p95": 334.9440097808838,
- "p99": 349.08799827098846
+ "p50": 328.8960009813309,
+ "p90": 341.21599793434143,
+ "p95": 345.4720079898834,
+ "p99": 357.88799822330475
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 155889664,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 155172864,
+ "combineLogicalBytes": 155172864,
+ "fanoutMean": 5.28515625,
+ "recvTokensMax": 1378,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -35221,35 +35811,35 @@
"tokensPerRank": 512,
"globalTokens": 4096,
"dispatch": {
- "p50": 222.3999947309494,
- "p90": 224.95999932289124,
- "p95": 226.30399465560913,
- "p99": 234.68799889087677
+ "p50": 232.09600150585175,
+ "p90": 255.10400533676147,
+ "p95": 262.2080147266388,
+ "p99": 287.84000873565674
},
"combine": {
- "p50": 272.2240090370178,
- "p90": 281.66401386260986,
- "p95": 282.24000334739685,
- "p99": 296.3840067386627
+ "p50": 282.55999088287354,
+ "p90": 305.34398555755615,
+ "p95": 319.7759985923767,
+ "p99": 340.31999111175537
},
"roundtrip": {
- "p50": 466.7840003967285,
- "p90": 473.66398572921753,
- "p95": 476.73600912094116,
- "p99": 491.93599820137024
+ "p50": 463.9680087566376,
+ "p90": 481.471985578537,
+ "p95": 493.9199984073639,
+ "p99": 512.3839974403381
},
"isolatedSum": {
- "p50": 494.6240037679672,
- "p90": 506.6240131855011,
- "p95": 508.543998003006,
- "p99": 531.0720056295395
+ "p50": 514.6559923887253,
+ "p90": 560.4479908943176,
+ "p95": 581.9840133190155,
+ "p99": 628.1599998474121
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 312266752,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 310546432,
+ "combineLogicalBytes": 310546432,
+ "fanoutMean": 5.28857421875,
+ "recvTokensMax": 2745,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -35258,35 +35848,35 @@
"tokensPerRank": 1024,
"globalTokens": 8192,
"dispatch": {
- "p50": 346.015989780426,
- "p90": 350.0800132751465,
- "p95": 351.6800105571747,
- "p99": 360.76799035072327
+ "p50": 353.0240058898926,
+ "p90": 360.4159951210022,
+ "p95": 363.647997379303,
+ "p99": 370.4639971256256
},
"combine": {
- "p50": 466.2719964981079,
- "p90": 469.5360064506531,
- "p95": 477.63198614120483,
- "p99": 491.8079972267151
+ "p50": 481.31200671195984,
+ "p90": 489.439994096756,
+ "p95": 491.07199907302856,
+ "p99": 493.9520061016083
},
"roundtrip": {
- "p50": 785.152018070221,
- "p90": 791.1360263824463,
- "p95": 795.2319979667664,
- "p99": 807.9360127449036
+ "p50": 807.0719838142395,
+ "p90": 815.0079846382141,
+ "p95": 816.9599771499634,
+ "p99": 822.2399950027466
},
"isolatedSum": {
- "p50": 812.2879862785339,
- "p90": 819.6160197257996,
- "p95": 829.3119966983795,
- "p99": 852.5759875774384
+ "p50": 834.3360126018524,
+ "p90": 849.8559892177582,
+ "p95": 854.7199964523315,
+ "p99": 864.4160032272339
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 623443968,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 620619776,
+ "combineLogicalBytes": 620619776,
+ "fanoutMean": 5.2845458984375,
+ "recvTokensMax": 5526,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -35295,35 +35885,35 @@
"tokensPerRank": 2048,
"globalTokens": 16384,
"dispatch": {
- "p50": 592.0000076293945,
- "p90": 598.8479852676392,
- "p95": 602.3359894752502,
- "p99": 609.5679998397827
+ "p50": 613.1200194358826,
+ "p90": 632.0000290870667,
+ "p95": 650.4639983177185,
+ "p99": 669.4080233573914
},
"combine": {
- "p50": 826.9439935684204,
- "p90": 835.9040021896362,
- "p95": 838.1119966506958,
- "p99": 860.6079816818237
+ "p50": 874.6880292892456,
+ "p90": 885.4719996452332,
+ "p95": 890.7840251922607,
+ "p99": 911.2319946289062
},
"roundtrip": {
- "p50": 1397.760033607483,
- "p90": 1407.039999961853,
- "p95": 1411.2639427185059,
- "p99": 1424.3839979171753
+ "p50": 1460.4159593582153,
+ "p90": 1475.6159782409668,
+ "p95": 1487.0400428771973,
+ "p99": 1511.6159915924072
},
"isolatedSum": {
- "p50": 1418.944001197815,
- "p90": 1434.7519874572754,
- "p95": 1440.447986125946,
- "p99": 1470.1759815216064
+ "p50": 1487.8080487251282,
+ "p90": 1517.4720287322998,
+ "p95": 1541.2480235099792,
+ "p99": 1580.6400179862976
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1243805696,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 1239175168,
+ "combineLogicalBytes": 1239175168,
+ "fanoutMean": 5.2757568359375,
+ "recvTokensMax": 11165,
+ "stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -35332,35 +35922,35 @@
"tokensPerRank": 4096,
"globalTokens": 32768,
"dispatch": {
- "p50": 1092.576026916504,
- "p90": 1101.0559797286987,
- "p95": 1106.4640283584595,
- "p99": 1123.5840320587158
+ "p50": 1129.3120384216309,
+ "p90": 1136.031985282898,
+ "p95": 1138.11194896698,
+ "p99": 1143.1679725646973
},
"combine": {
- "p50": 1536.7679595947266,
- "p90": 1549.2479801177979,
- "p95": 1559.775948524475,
- "p99": 1572.0000267028809
+ "p50": 1624.2560148239136,
+ "p90": 1634.559988975525,
+ "p95": 1636.2240314483643,
+ "p99": 1642.2719955444336
},
"roundtrip": {
- "p50": 2607.2959899902344,
- "p90": 2621.471881866455,
- "p95": 2628.511905670166,
- "p99": 2641.5040493011475
+ "p50": 2725.600004196167,
+ "p90": 2734.1439723968506,
+ "p95": 2737.6959323883057,
+ "p99": 2742.1441078186035
},
"isolatedSum": {
- "p50": 2629.3439865112305,
- "p90": 2650.3039598464966,
- "p95": 2666.2399768829346,
- "p99": 2695.5840587615967
+ "p50": 2753.5680532455444,
+ "p90": 2770.591974258423,
+ "p95": 2774.3359804153442,
+ "p99": 2785.439968109131
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2487009280,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 2481604608,
+ "combineLogicalBytes": 2481604608,
+ "fanoutMean": 5.282684326171875,
+ "recvTokensMax": 22165,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -35368,47 +35958,48 @@
]
},
{
- "id": "cx-32323f85",
- "identity": "h100|deepep|4096|8|128|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452",
- "colorKey": "h100_42947950",
- "comparisonKey": "13b620ce9b7928e9",
+ "id": "cx-cf8cb8f1",
+ "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||45b103b10fbcaef",
+ "colorKey": "gb300_8cda999b",
+ "comparisonKey": "f8887e85df9ef186",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T11:16:12.750378+00:00",
+ "generatedAt": "2026-06-29T13:42:04.489904+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_00",
- "sku": "h100",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "decode",
+ "phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "H100 EP8 · deepep · bf16",
- "model": "Qwen3.5",
+ "label": "GB300 EP8 · deepep · bf16 · zipf+eplb",
+ "model": "DeepSeek-V3 (EPLB physical)",
"shape": {
- "hidden": 4096,
+ "hidden": 7168,
"topk": 8,
- "experts": 128,
- "routing": "uniform",
- "routingLabel": "uniform",
+ "experts": 288,
+ "routing": "zipf",
+ "routingLabel": "zipf+eplb",
"routingStep": 0,
"unevenTokens": "none",
- "eplbEnabled": false,
+ "eplbEnabled": true,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1515,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 132,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -35416,318 +36007,244 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "75530960a30b452",
- "workloadId": "set:8:d1b92539bddfb570",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "traceSignature": "45b103b10fbcaef",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
+ "eplbImbalanceBefore": 4.895263671875,
+ "eplbImbalanceAfter": 1.0000902811686199,
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28287504962",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287504962",
- "createdAt": "2026-06-27T11:16:12.750378+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 96.44799679517746,
- "p90": 104.54399883747101,
- "p95": 106.27199709415436,
- "p99": 110.07999628782272
- },
- "combine": {
- "p50": 71.32799923419952,
- "p90": 73.34399968385696,
- "p95": 73.88799637556076,
- "p99": 79.68000322580338
- },
- "roundtrip": {
- "p50": 136.80000603199005,
- "p90": 143.74400675296783,
- "p95": 145.50399780273438,
- "p99": 150.78400075435638
- },
- "isolatedSum": {
- "p50": 167.77599602937698,
- "p90": 177.88799852132797,
- "p95": 180.15999346971512,
- "p99": 189.7599995136261
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 344064,
- "combineLogicalBytes": 344064,
- "fanoutMean": 5.25,
- "recvTokensMax": 6,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 68.9919963479042,
- "p90": 101.6319990158081,
- "p95": 103.5199984908104,
- "p99": 110.91200262308121
- },
- "combine": {
- "p50": 63.45599889755249,
- "p90": 72.95999675989151,
- "p95": 73.27999919652939,
- "p99": 78.87999713420868
- },
- "roundtrip": {
- "p50": 116.28799885511398,
- "p90": 142.2719955444336,
- "p95": 144.57599818706512,
- "p99": 150.43200552463531
- },
- "isolatedSum": {
- "p50": 132.4479952454567,
- "p90": 174.59199577569962,
- "p95": 176.79999768733978,
- "p99": 189.7919997572899
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 704512,
- "combineLogicalBytes": 704512,
- "fanoutMean": 5.375,
- "recvTokensMax": 12,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 69.21599805355072,
- "p90": 134.20799374580383,
- "p95": 137.43999600410461,
- "p99": 141.34399592876434
+ "p50": 124.64000284671783,
+ "p90": 132.4480026960373,
+ "p95": 136.25599443912506,
+ "p99": 147.10399508476257
},
"combine": {
- "p50": 63.58399987220764,
- "p90": 86.97599917650223,
- "p95": 87.8399983048439,
- "p99": 162.36799955368042
+ "p50": 123.00799787044525,
+ "p90": 132.1599930524826,
+ "p95": 133.7279975414276,
+ "p99": 140.76800644397736
},
"roundtrip": {
- "p50": 116.80000275373459,
- "p90": 144.28800344467163,
- "p95": 147.10399508476257,
- "p99": 151.39199793338776
+ "p50": 223.00800681114197,
+ "p90": 230.5919975042343,
+ "p95": 233.98399353027344,
+ "p99": 241.18399620056152
},
"isolatedSum": {
- "p50": 132.79999792575836,
- "p90": 221.18399292230606,
- "p95": 225.27999430894852,
- "p99": 303.71199548244476
+ "p50": 247.6480007171631,
+ "p90": 264.6079957485199,
+ "p95": 269.9839919805527,
+ "p99": 287.87200152873993
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1384448,
- "combineLogicalBytes": 1384448,
- "fanoutMean": 5.28125,
- "recvTokensMax": 26,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 77385728,
+ "combineLogicalBytes": 77385728,
+ "fanoutMean": 5.271484375,
+ "recvTokensMax": 691,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 83.52000266313553,
- "p90": 104.60799932479858,
- "p95": 113.21599781513214,
- "p99": 352.54400968551636
+ "p50": 162.01600432395935,
+ "p90": 169.37600076198578,
+ "p95": 172.03199863433838,
+ "p99": 182.11199343204498
},
"combine": {
- "p50": 64.64000046253204,
- "p90": 72.83200323581696,
- "p95": 73.18399846553802,
- "p99": 77.98399776220322
+ "p50": 167.61599481105804,
+ "p90": 171.1679995059967,
+ "p95": 173.0239987373352,
+ "p99": 179.80800569057465
},
"roundtrip": {
- "p50": 117.53600090742111,
- "p90": 146.30399644374847,
- "p95": 149.34399724006653,
- "p99": 153.60000729560852
+ "p50": 299.26401376724243,
+ "p90": 307.23199248313904,
+ "p95": 310.9759986400604,
+ "p99": 318.015992641449
},
"isolatedSum": {
- "p50": 148.16000312566757,
- "p90": 177.44000256061554,
- "p95": 186.39999628067017,
- "p99": 430.5280074477196
+ "p50": 329.6319991350174,
+ "p90": 340.5440002679825,
+ "p95": 345.0559973716736,
+ "p99": 361.91999912261963
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2744320,
- "combineLogicalBytes": 2744320,
- "fanoutMean": 5.234375,
- "recvTokensMax": 49,
- "stragglerRank": 3,
+ "dispatchLogicalBytes": 155172864,
+ "combineLogicalBytes": 155172864,
+ "fanoutMean": 5.28515625,
+ "recvTokensMax": 1378,
+ "stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 96.3200032711029,
- "p90": 102.14400291442871,
- "p95": 104.47999835014343,
- "p99": 109.56799983978271
+ "p50": 229.5359969139099,
+ "p90": 238.97600173950195,
+ "p95": 241.69600009918213,
+ "p99": 249.28000569343567
},
"combine": {
- "p50": 71.80800288915634,
- "p90": 74.11199808120728,
- "p95": 78.8159966468811,
- "p99": 80.19199967384338
+ "p50": 281.2800109386444,
+ "p90": 287.4560058116913,
+ "p95": 290.6560003757477,
+ "p99": 294.14400458335876
},
"roundtrip": {
- "p50": 143.71199905872345,
- "p90": 151.39199793338776,
- "p95": 153.02400290966034,
- "p99": 157.95199573040009
+ "p50": 463.3280038833618,
+ "p90": 472.22399711608887,
+ "p95": 475.8400022983551,
+ "p99": 481.440007686615
},
"isolatedSum": {
- "p50": 168.12800616025925,
- "p90": 176.256000995636,
- "p95": 183.29599499702454,
- "p99": 189.7599995136261
+ "p50": 510.8160078525543,
+ "p90": 526.4320075511932,
+ "p95": 532.3520004749298,
+ "p99": 543.4240102767944
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 5464064,
- "combineLogicalBytes": 5464064,
- "fanoutMean": 5.2109375,
- "recvTokensMax": 94,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 310546432,
+ "combineLogicalBytes": 310546432,
+ "fanoutMean": 5.28857421875,
+ "recvTokensMax": 2745,
+ "stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 78.11199873685837,
- "p90": 98.65599870681763,
- "p95": 103.32799702882767,
- "p99": 114.3999993801117
+ "p50": 352.1279990673065,
+ "p90": 360.25598645210266,
+ "p95": 363.0400002002716,
+ "p99": 368.73599886894226
},
"combine": {
- "p50": 65.92000275850296,
- "p90": 79.29600030183792,
- "p95": 80.44800162315369,
- "p99": 81.31200075149536
+ "p50": 481.5039932727814,
+ "p90": 489.8560047149658,
+ "p95": 491.67999625205994,
+ "p99": 493.75998973846436
},
"roundtrip": {
- "p50": 117.53600090742111,
- "p90": 150.36800503730774,
- "p95": 152.63999998569489,
- "p99": 155.7759940624237
+ "p50": 807.744026184082,
+ "p90": 815.0720000267029,
+ "p95": 818.015992641449,
+ "p99": 824.7680068016052
},
"isolatedSum": {
- "p50": 144.03200149536133,
- "p90": 177.95199900865555,
- "p95": 183.77599865198135,
- "p99": 195.71200013160706
+ "p50": 833.6319923400879,
+ "p90": 850.1119911670685,
+ "p95": 854.7199964523315,
+ "p99": 862.4959886074066
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 11124736,
- "combineLogicalBytes": 11124736,
- "fanoutMean": 5.3046875,
- "recvTokensMax": 186,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 620619776,
+ "combineLogicalBytes": 620619776,
+ "fanoutMean": 5.2845458984375,
+ "recvTokensMax": 5526,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 64,
- "globalTokens": 512,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 88.35200220346451,
- "p90": 136.1279934644699,
- "p95": 138.91200721263885,
- "p99": 147.2959965467453
+ "p50": 611.6480231285095,
+ "p90": 620.2239990234375,
+ "p95": 622.5280165672302,
+ "p99": 630.6560039520264
},
"combine": {
- "p50": 74.0479975938797,
- "p90": 96.3200032711029,
- "p95": 102.11200267076492,
- "p99": 104.3199971318245
+ "p50": 874.6560215950012,
+ "p90": 883.7440013885498,
+ "p95": 885.3759765625,
+ "p99": 891.327977180481
},
"roundtrip": {
- "p50": 133.760005235672,
- "p90": 191.16799533367157,
- "p95": 192.73599982261658,
- "p99": 197.9839950799942
+ "p50": 1460.7360363006592,
+ "p90": 1469.5359468460083,
+ "p95": 1472.0959663391113,
+ "p99": 1478.335976600647
},
"isolatedSum": {
- "p50": 162.3999997973442,
- "p90": 232.44799673557281,
- "p95": 241.02400988340378,
- "p99": 251.6159936785698
+ "p50": 1486.3040447235107,
+ "p90": 1503.9680004119873,
+ "p95": 1507.9039931297302,
+ "p99": 1521.9839811325073
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 22192128,
- "combineLogicalBytes": 22192128,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 358,
- "stragglerRank": 1,
+ "dispatchLogicalBytes": 1239175168,
+ "combineLogicalBytes": 1239175168,
+ "fanoutMean": 5.2757568359375,
+ "recvTokensMax": 11165,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
"dispatch": {
- "p50": 95.83999961614609,
- "p90": 113.6000007390976,
- "p95": 117.88800358772278,
- "p99": 121.79200351238251
+ "p50": 1128.5760402679443,
+ "p90": 1135.424017906189,
+ "p95": 1137.1519565582275,
+ "p99": 1142.3360109329224
},
"combine": {
- "p50": 88.28800171613693,
- "p90": 96.16000205278397,
- "p95": 96.6079980134964,
- "p99": 104.09600287675858
+ "p50": 1623.2000589370728,
+ "p90": 1631.6479444503784,
+ "p95": 1634.6880197525024,
+ "p99": 1638.2720470428467
},
"roundtrip": {
- "p50": 159.42400693893433,
- "p90": 173.3119934797287,
- "p95": 175.135999917984,
- "p99": 178.01600694656372
+ "p50": 2724.6720790863037,
+ "p90": 2733.407974243164,
+ "p95": 2735.5520725250244,
+ "p99": 2739.936113357544
},
"isolatedSum": {
- "p50": 184.12800133228302,
- "p90": 209.76000279188156,
- "p95": 214.49600160121918,
- "p99": 225.88800638914108
+ "p50": 2751.776099205017,
+ "p90": 2767.0719623565674,
+ "p95": 2771.83997631073,
+ "p99": 2780.608057975769
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 44564480,
- "combineLogicalBytes": 44564480,
- "fanoutMean": 5.3125,
- "recvTokensMax": 699,
- "stragglerRank": 2,
+ "dispatchLogicalBytes": 2481604608,
+ "combineLogicalBytes": 2481604608,
+ "fanoutMean": 5.282684326171875,
+ "recvTokensMax": 22165,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -35735,47 +36252,48 @@
]
},
{
- "id": "cx-1c34e3d1",
- "identity": "h100|deepep|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452",
- "colorKey": "h100_ff7906f8",
- "comparisonKey": "ad5ebda2342035d4",
+ "id": "cx-eb796145",
+ "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901",
+ "colorKey": "gb300_20de545c",
+ "comparisonKey": "c081f7cbf8991063",
"schemaVersion": 3,
- "generatedAt": "2026-06-26T23:51:21.600015+00:00",
+ "generatedAt": "2026-06-29T13:46:36.139153+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_04",
- "sku": "h100",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "decode",
+ "phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "cached-layout-comm-only-v1",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "H100 EP8 · deepep · bf16",
- "model": "Qwen3.5",
+ "label": "GB300 EP8 · deepep · bf16 [cl]",
+ "model": "DeepSeek-V3/V4",
"shape": {
- "hidden": 4096,
+ "hidden": 7168,
"topk": 8,
- "experts": 128,
+ "experts": 256,
"routing": "uniform",
"routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
"dispatchDtype": "bf16",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1515,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 132,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -35783,318 +36301,244 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "75530960a30b452",
- "workloadId": "set:8:d1b92539bddfb570",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "b9896b0a7ca9901",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28271684428",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271684428",
- "createdAt": "2026-06-26T23:51:21.600015+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 98.4639972448349,
- "p90": 106.52799904346466,
- "p95": 128.12800705432892,
- "p99": 158.87999534606934
+ "p50": 106.52799904346466,
+ "p90": 112.92800307273865,
+ "p95": 116.06399714946747,
+ "p99": 122.11199849843979
},
"combine": {
- "p50": 66.52799993753433,
- "p90": 73.34399968385696,
- "p95": 81.34400099515915,
- "p99": 91.96799993515015
+ "p50": 122.20799922943115,
+ "p90": 129.85600531101227,
+ "p95": 132.1599930524826,
+ "p99": 135.13599336147308
},
"roundtrip": {
- "p50": 139.42399621009827,
- "p90": 146.84799313545227,
- "p95": 150.56000649929047,
- "p99": 186.81600689888
+ "p50": 205.56800067424774,
+ "p90": 211.16800606250763,
+ "p95": 213.28000724315643,
+ "p99": 217.66400337219238
},
"isolatedSum": {
- "p50": 164.99199718236923,
- "p90": 179.87199872732162,
- "p95": 209.47200804948807,
- "p99": 250.84799528121948
+ "p50": 228.7359982728958,
+ "p90": 242.78400838375092,
+ "p95": 248.22399020195007,
+ "p99": 257.2479918599129
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 344064,
- "combineLogicalBytes": 344064,
- "fanoutMean": 5.25,
- "recvTokensMax": 6,
+ "dispatchLogicalBytes": 77672448,
+ "combineLogicalBytes": 77672448,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 723,
"stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 71.58400118350983,
- "p90": 103.45599800348282,
- "p95": 106.39999806880951,
- "p99": 124.67200309038162
- },
- "combine": {
- "p50": 64.06400352716446,
- "p90": 72.92799651622772,
- "p95": 73.31199944019318,
- "p99": 74.43200051784515
- },
- "roundtrip": {
- "p50": 117.53600090742111,
- "p90": 144.41600441932678,
- "p95": 147.71200716495514,
- "p99": 173.5360026359558
- },
- "isolatedSum": {
- "p50": 135.6480047106743,
- "p90": 176.38399451971054,
- "p95": 179.71199750900269,
- "p99": 199.10400360822678
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 704512,
- "combineLogicalBytes": 704512,
- "fanoutMean": 5.375,
- "recvTokensMax": 12,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 92.70399808883667,
- "p90": 100.47999769449234,
- "p95": 102.75200009346008,
- "p99": 106.23999685049057
+ "p50": 145.7280069589615,
+ "p90": 177.18400061130524,
+ "p95": 185.85599958896637,
+ "p99": 197.50399887561798
},
"combine": {
- "p50": 66.01600348949432,
- "p90": 72.38399982452393,
- "p95": 72.86400347948074,
- "p99": 75.6160020828247
+ "p50": 168.7680035829544,
+ "p90": 190.08000195026398,
+ "p95": 205.56800067424774,
+ "p99": 230.49600422382355
},
"roundtrip": {
- "p50": 134.33599472045898,
- "p90": 143.77599954605103,
- "p95": 146.08000218868256,
- "p99": 149.82399344444275
+ "p50": 283.6799919605255,
+ "p90": 305.7279884815216,
+ "p95": 316.3839876651764,
+ "p99": 342.8800106048584
},
"isolatedSum": {
- "p50": 158.720001578331,
- "p90": 172.86399751901627,
- "p95": 175.61600357294083,
- "p99": 181.85599893331528
+ "p50": 314.4960105419159,
+ "p90": 367.2640025615692,
+ "p95": 391.4240002632141,
+ "p99": 428.0000030994415
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1384448,
- "combineLogicalBytes": 1384448,
- "fanoutMean": 5.28125,
- "recvTokensMax": 26,
+ "dispatchLogicalBytes": 155889664,
+ "combineLogicalBytes": 155889664,
+ "fanoutMean": 5.3095703125,
+ "recvTokensMax": 1422,
"stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 97.75999933481216,
- "p90": 105.8880016207695,
- "p95": 129.66400384902954,
- "p99": 177.44000256061554
+ "p50": 213.24799954891205,
+ "p90": 221.44000232219696,
+ "p95": 223.80800545215607,
+ "p99": 230.5919975042343
},
"combine": {
- "p50": 71.32799923419952,
- "p90": 74.65600222349167,
- "p95": 81.53600245714188,
- "p99": 92.00000017881393
+ "p50": 284.12801027297974,
+ "p90": 293.503999710083,
+ "p95": 295.48799991607666,
+ "p99": 302.4959862232208
},
"roundtrip": {
- "p50": 140.99200069904327,
- "p90": 149.6960073709488,
- "p95": 159.19999778270721,
- "p99": 189.43999707698822
+ "p50": 451.35998725891113,
+ "p90": 479.0079891681671,
+ "p95": 489.21599984169006,
+ "p99": 509.66399908065796
},
"isolatedSum": {
- "p50": 169.0879985690117,
- "p90": 180.54400384426117,
- "p95": 211.20000630617142,
- "p99": 269.4400027394295
+ "p50": 497.3760098218918,
+ "p90": 514.94400203228,
+ "p95": 519.2960053682327,
+ "p99": 533.0879837274551
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2744320,
- "combineLogicalBytes": 2744320,
- "fanoutMean": 5.234375,
- "recvTokensMax": 49,
+ "dispatchLogicalBytes": 312266752,
+ "combineLogicalBytes": 312266752,
+ "fanoutMean": 5.31787109375,
+ "recvTokensMax": 2779,
"stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 97.56799787282944,
- "p90": 100.99200159311295,
- "p95": 104.3199971318245,
- "p99": 107.42399841547012
+ "p50": 344.7679877281189,
+ "p90": 366.87999963760376,
+ "p95": 396.12799882888794,
+ "p99": 411.74399852752686
},
"combine": {
- "p50": 71.6480016708374,
- "p90": 73.7600028514862,
- "p95": 75.00799745321274,
- "p99": 80.92799782752991
+ "p50": 488.0320131778717,
+ "p90": 507.04002380371094,
+ "p95": 523.360013961792,
+ "p99": 549.4080185890198
},
"roundtrip": {
- "p50": 142.68800616264343,
- "p90": 150.30400454998016,
- "p95": 154.2080044746399,
- "p99": 156.09599649906158
+ "p50": 800.927996635437,
+ "p90": 814.7519826889038,
+ "p95": 830.0480246543884,
+ "p99": 856.3839793205261
},
"isolatedSum": {
- "p50": 169.21599954366684,
- "p90": 174.75200444459915,
- "p95": 179.32799458503723,
- "p99": 188.35199624300003
+ "p50": 832.8000009059906,
+ "p90": 873.9200234413147,
+ "p95": 919.4880127906799,
+ "p99": 961.1520171165466
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 5464064,
- "combineLogicalBytes": 5464064,
- "fanoutMean": 5.2109375,
- "recvTokensMax": 94,
+ "dispatchLogicalBytes": 623443968,
+ "combineLogicalBytes": 623443968,
+ "fanoutMean": 5.30859375,
+ "recvTokensMax": 5505,
"stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 79.80799674987793,
- "p90": 99.55199807882309,
- "p95": 101.27999633550644,
- "p99": 106.08000308275223
- },
- "combine": {
- "p50": 66.68800115585327,
- "p90": 76.03199779987335,
- "p95": 80.38400113582611,
- "p99": 81.31200075149536
- },
- "roundtrip": {
- "p50": 123.87199699878693,
- "p90": 150.27199685573578,
- "p95": 152.16000378131866,
- "p99": 155.4879993200302
- },
- "isolatedSum": {
- "p50": 146.4959979057312,
- "p90": 175.58399587869644,
- "p95": 181.66399747133255,
- "p99": 187.3920038342476
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 11124736,
- "combineLogicalBytes": 11124736,
- "fanoutMean": 5.3046875,
- "recvTokensMax": 186,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 100.28800368309021,
- "p90": 107.96800255775452,
- "p95": 109.47199910879135,
- "p99": 119.90399658679962
+ "p50": 599.8079776763916,
+ "p90": 618.4319853782654,
+ "p95": 636.4799737930298,
+ "p99": 653.9520025253296
},
"combine": {
- "p50": 81.11999928951263,
- "p90": 87.71199733018875,
- "p95": 89.1840010881424,
- "p99": 90.14400094747543
+ "p50": 870.527982711792,
+ "p90": 875.8400082588196,
+ "p95": 880.6719779968262,
+ "p99": 887.55202293396
},
"roundtrip": {
- "p50": 151.8079936504364,
- "p90": 162.59199380874634,
- "p95": 164.06400501728058,
- "p99": 168.57600212097168
+ "p50": 1442.3359632492065,
+ "p90": 1450.5599737167358,
+ "p95": 1453.7279605865479,
+ "p99": 1460.70396900177
},
"isolatedSum": {
- "p50": 181.40800297260284,
- "p90": 195.67999988794327,
- "p95": 198.65600019693375,
- "p99": 210.04799753427505
+ "p50": 1470.3359603881836,
+ "p90": 1494.271993637085,
+ "p95": 1517.151951789856,
+ "p99": 1541.5040254592896
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 22192128,
- "combineLogicalBytes": 22192128,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 358,
+ "dispatchLogicalBytes": 1243805696,
+ "combineLogicalBytes": 1243805696,
+ "fanoutMean": 5.29547119140625,
+ "recvTokensMax": 10952,
"stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
"dispatch": {
- "p50": 105.69600015878677,
- "p90": 115.99999666213989,
- "p95": 118.1119978427887,
- "p99": 124.83199685811996
+ "p50": 1104.2560338974,
+ "p90": 1115.1039600372314,
+ "p95": 1133.6640119552612,
+ "p99": 1154.3680429458618
},
"combine": {
- "p50": 88.3840024471283,
- "p90": 97.4079966545105,
- "p95": 97.88800030946732,
- "p99": 100.38399696350098
+ "p50": 1622.879981994629,
+ "p90": 1637.9200220108032,
+ "p95": 1651.2000560760498,
+ "p99": 1676.7679452896118
},
"roundtrip": {
- "p50": 161.72799468040466,
- "p90": 177.2480010986328,
- "p95": 181.15200102329254,
- "p99": 415.48800468444824
+ "p50": 2707.711935043335,
+ "p90": 2723.3920097351074,
+ "p95": 2733.920097351074,
+ "p99": 2751.1041164398193
},
"isolatedSum": {
- "p50": 194.08000260591507,
- "p90": 213.4079933166504,
- "p95": 215.999998152256,
- "p99": 225.21599382162094
+ "p50": 2727.136015892029,
+ "p90": 2753.0239820480347,
+ "p95": 2784.864068031311,
+ "p99": 2831.1359882354736
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 44564480,
- "combineLogicalBytes": 44564480,
- "fanoutMean": 5.3125,
- "recvTokensMax": 699,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 2487009280,
+ "combineLogicalBytes": 2487009280,
+ "fanoutMean": 5.294189453125,
+ "recvTokensMax": 21781,
+ "stragglerRank": 6,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -36102,47 +36546,48 @@
]
},
{
- "id": "cx-8988cd24",
- "identity": "h100|deepep|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef",
- "colorKey": "h100_ff7906f8",
- "comparisonKey": "c91a22e0dde262e4",
+ "id": "cx-ddfe8a4d",
+ "identity": "gb300|deepep|v1|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||03f98832f76b043",
+ "colorKey": "gb300_b1bd5887",
+ "comparisonKey": "ed367f24667806d7",
"schemaVersion": 3,
- "generatedAt": "2026-06-26T23:51:51.137960+00:00",
+ "generatedAt": "2026-06-29T14:02:32.068698+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_18",
- "sku": "h100",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "decode",
+ "phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "runtime-visible-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "H100 EP8 · deepep · bf16",
- "model": "shape 5120/8/160",
+ "label": "GB300 EP8 · deepep · fp8",
+ "model": "Qwen3.5",
"shape": {
- "hidden": 5120,
+ "hidden": 4096,
"topk": 8,
- "experts": 160,
+ "experts": 128,
"routing": "uniform",
"routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "bf16",
+ "dispatchDtype": "fp8",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1515,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 132,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -36150,318 +36595,244 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "be1b44a963bd4ef",
- "workloadId": "set:8:34e5874082f8ea8f",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "03f98832f76b043",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28271699258",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271699258",
- "createdAt": "2026-06-26T23:51:51.137960+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 95.551997423172,
- "p90": 107.04000294208527,
- "p95": 120.38400024175644,
- "p99": 156.00000321865082
+ "p50": 419.20000314712524,
+ "p90": 444.70399618148804,
+ "p95": 450.6239891052246,
+ "p99": 467.3919975757599
},
"combine": {
- "p50": 71.19999825954437,
- "p90": 78.84799689054489,
- "p95": 81.15199953317642,
- "p99": 97.56799787282944
+ "p50": 100.89600086212158,
+ "p90": 107.29599744081497,
+ "p95": 111.455999314785,
+ "p99": 118.46400052309036
},
"roundtrip": {
- "p50": 140.25600254535675,
- "p90": 152.319997549057,
- "p95": 169.8240041732788,
- "p99": 207.68000185489655
+ "p50": 488.0639910697937,
+ "p90": 510.8479857444763,
+ "p95": 516.3840055465698,
+ "p99": 548.1280088424683
},
"isolatedSum": {
- "p50": 166.75199568271637,
- "p90": 185.88799983263016,
- "p95": 201.53599977493286,
- "p99": 253.56800109148026
+ "p50": 520.0960040092468,
+ "p90": 551.999993622303,
+ "p95": 562.0799884200096,
+ "p99": 585.8559980988503
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 430080,
- "combineLogicalBytes": 430080,
- "fanoutMean": 5.25,
- "recvTokensMax": 8,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 22282240,
+ "combineLogicalBytes": 44564480,
+ "fanoutMean": 5.3125,
+ "recvTokensMax": 699,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2,
- "globalTokens": 16,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 68.4799998998642,
- "p90": 104.12800312042236,
- "p95": 121.69600278139114,
- "p99": 155.13600409030914
+ "p50": 408.32000970840454,
+ "p90": 433.9199960231781,
+ "p95": 440.5120015144348,
+ "p99": 452.2880017757416
},
"combine": {
- "p50": 64.80000168085098,
- "p90": 79.00799810886383,
- "p95": 88.06400001049042,
- "p99": 103.39199751615524
+ "p50": 139.39200341701508,
+ "p90": 146.94400131702423,
+ "p95": 149.9200016260147,
+ "p99": 159.58400070667267
},
"roundtrip": {
- "p50": 119.6800023317337,
- "p90": 147.32800424098969,
- "p95": 149.08799529075623,
- "p99": 153.888002038002
+ "p50": 522.3039984703064,
+ "p90": 547.9679703712463,
+ "p95": 554.0480017662048,
+ "p99": 570.2080130577087
},
"isolatedSum": {
- "p50": 133.28000158071518,
- "p90": 183.1360012292862,
- "p95": 209.76000279188156,
- "p99": 258.5280016064644
+ "p50": 547.7120131254196,
+ "p90": 580.8639973402023,
+ "p95": 590.4320031404495,
+ "p99": 611.8720024824142
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 880640,
- "combineLogicalBytes": 880640,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 44863488,
+ "combineLogicalBytes": 89726976,
+ "fanoutMean": 5.34814453125,
+ "recvTokensMax": 1385,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4,
- "globalTokens": 32,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 70.01599669456482,
- "p90": 98.27200323343277,
- "p95": 101.47199779748917,
- "p99": 114.33599889278412
+ "p50": 469.7279930114746,
+ "p90": 483.0079972743988,
+ "p95": 487.5200092792511,
+ "p99": 502.4319887161255
},
"combine": {
- "p50": 65.08799642324448,
- "p90": 78.8159966468811,
- "p95": 79.23199981451035,
- "p99": 85.95199882984161
+ "p50": 208.15999805927277,
+ "p90": 214.20800685882568,
+ "p95": 217.056006193161,
+ "p99": 224.2559939622879
},
"roundtrip": {
- "p50": 119.03999745845795,
- "p90": 149.98400211334229,
- "p95": 151.8079936504364,
- "p99": 158.33599865436554
+ "p50": 659.0719819068909,
+ "p90": 674.5280027389526,
+ "p95": 680.832028388977,
+ "p99": 701.0560035705566
},
"isolatedSum": {
- "p50": 135.1039931178093,
- "p90": 177.08799988031387,
- "p95": 180.7039976119995,
- "p99": 200.28799772262573
+ "p50": 677.8879910707474,
+ "p90": 697.2160041332245,
+ "p95": 704.5760154724121,
+ "p99": 726.6879826784134
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1740800,
- "combineLogicalBytes": 1740800,
- "fanoutMean": 5.3125,
- "recvTokensMax": 25,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 89751552,
+ "combineLogicalBytes": 179503104,
+ "fanoutMean": 5.349609375,
+ "recvTokensMax": 2772,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 70.23999840021133,
- "p90": 97.79199957847595,
- "p95": 102.01600193977356,
- "p99": 116.67200177907944
+ "p50": 624.2560148239136,
+ "p90": 642.4959897994995,
+ "p95": 648.3520269393921,
+ "p99": 666.1120057106018
},
"combine": {
- "p50": 65.47199934720993,
- "p90": 79.0719985961914,
- "p95": 79.64800298213959,
- "p99": 87.67999708652496
+ "p50": 381.21598958969116,
+ "p90": 390.6880021095276,
+ "p95": 393.75999569892883,
+ "p99": 401.91999077796936
},
"roundtrip": {
- "p50": 118.367999792099,
- "p90": 150.4639983177185,
- "p95": 155.68000078201294,
- "p99": 188.25599551200867
+ "p50": 1005.952000617981,
+ "p90": 1020.6079483032227,
+ "p95": 1025.439977645874,
+ "p99": 1037.984013557434
},
"isolatedSum": {
- "p50": 135.71199774742126,
- "p90": 176.86399817466736,
- "p95": 181.66400492191315,
- "p99": 204.3519988656044
+ "p50": 1005.4720044136047,
+ "p90": 1033.183991909027,
+ "p95": 1042.112022638321,
+ "p99": 1068.0319964885712
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 3471360,
- "combineLogicalBytes": 3471360,
- "fanoutMean": 5.296875,
- "recvTokensMax": 50,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 179511296,
+ "combineLogicalBytes": 359022592,
+ "fanoutMean": 5.349853515625,
+ "recvTokensMax": 5558,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 95.10400146245956,
- "p90": 101.34399682283401,
- "p95": 105.6319996714592,
- "p99": 117.11999773979187
+ "p50": 938.7199878692627,
+ "p90": 958.5599899291992,
+ "p95": 966.9439792633057,
+ "p99": 1001.4079809188843
},
"combine": {
- "p50": 69.11999732255936,
- "p90": 79.42400127649307,
- "p95": 80.03199845552444,
- "p99": 86.87999844551086
+ "p50": 801.5360236167908,
+ "p90": 809.1840147972107,
+ "p95": 812.7040266990662,
+ "p99": 824.1919875144958
},
"roundtrip": {
- "p50": 120.03199756145477,
- "p90": 147.039994597435,
- "p95": 149.72800016403198,
- "p99": 158.55999290943146
+ "p50": 1702.3040056228638,
+ "p90": 1729.9840450286865,
+ "p95": 1735.80801486969,
+ "p99": 1754.080057144165
},
"isolatedSum": {
- "p50": 164.22399878501892,
- "p90": 180.7679980993271,
- "p95": 185.66399812698364,
- "p99": 203.99999618530273
+ "p50": 1740.2560114860535,
+ "p90": 1767.74400472641,
+ "p95": 1779.6480059623718,
+ "p99": 1825.5999684333801
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 6912000,
- "combineLogicalBytes": 6912000,
- "fanoutMean": 5.2734375,
- "recvTokensMax": 93,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 77.47200131416321,
- "p90": 103.16800326108932,
- "p95": 109.72800105810165,
- "p99": 237.37600445747375
- },
- "combine": {
- "p50": 71.99999690055847,
- "p90": 87.13600039482117,
- "p95": 95.20000219345093,
- "p99": 104.16000336408615
- },
- "roundtrip": {
- "p50": 146.14400267601013,
- "p90": 166.52800142765045,
- "p95": 171.1679995059967,
- "p99": 366.0160005092621
- },
- "isolatedSum": {
- "p50": 149.47199821472168,
- "p90": 190.3040036559105,
- "p95": 204.92800325155258,
- "p99": 341.5360078215599
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 13977600,
- "combineLogicalBytes": 13977600,
- "fanoutMean": 5.33203125,
- "recvTokensMax": 179,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 92.67199784517288,
- "p90": 111.04000359773636,
- "p95": 113.79200220108032,
- "p99": 126.68800354003906
- },
- "combine": {
- "p50": 81.08799904584885,
- "p90": 88.67199718952179,
- "p95": 95.45599669218063,
- "p99": 96.28800302743912
- },
- "roundtrip": {
- "p50": 147.5840061903,
- "p90": 168.96000504493713,
- "p95": 170.9440052509308,
- "p99": 174.9120056629181
- },
- "isolatedSum": {
- "p50": 173.75999689102173,
- "p90": 199.71200078725815,
- "p95": 209.24799889326096,
- "p99": 222.97600656747818
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 27975680,
- "combineLogicalBytes": 27975680,
- "fanoutMean": 5.3359375,
- "recvTokensMax": 355,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 358055936,
+ "combineLogicalBytes": 716111872,
+ "fanoutMean": 5.33544921875,
+ "recvTokensMax": 10982,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
"dispatch": {
- "p50": 105.15200346708298,
- "p90": 120.7680031657219,
- "p95": 122.68800288438797,
- "p99": 131.29599392414093
+ "p50": 1547.935962677002,
+ "p90": 1566.2399530410767,
+ "p95": 1573.9200115203857,
+ "p99": 1590.9119844436646
},
"combine": {
- "p50": 95.90400010347366,
- "p90": 104.67199981212616,
- "p95": 112.60800063610077,
- "p99": 267.5839960575104
+ "p50": 1503.3600330352783,
+ "p90": 1512.5759840011597,
+ "p95": 1514.6559476852417,
+ "p99": 1518.720030784607
},
"roundtrip": {
- "p50": 173.0239987373352,
- "p90": 194.17600333690643,
- "p95": 195.90400159358978,
- "p99": 308.351993560791
+ "p50": 3009.4399452209473,
+ "p90": 3032.543897628784,
+ "p95": 3040.3521060943604,
+ "p99": 3060.2879524230957
},
"isolatedSum": {
- "p50": 201.05600357055664,
- "p90": 225.44000297784805,
- "p95": 235.29600352048874,
- "p99": 398.8799899816513
+ "p50": 3051.2959957122803,
+ "p90": 3078.8159370422363,
+ "p95": 3088.5759592056274,
+ "p99": 3109.6320152282715
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 55674880,
- "combineLogicalBytes": 55674880,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 699,
- "stragglerRank": 2,
+ "dispatchLogicalBytes": 716197888,
+ "combineLogicalBytes": 1432395776,
+ "fanoutMean": 5.336090087890625,
+ "recvTokensMax": 21939,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -36469,47 +36840,48 @@
]
},
{
- "id": "cx-1d6bf339",
- "identity": "h100|deepep|6144|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h100_42947950",
- "comparisonKey": "4f849813bdf740d5",
+ "id": "cx-7de7dc87",
+ "identity": "gb300|deepep|v1|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||a9df48e6438e77a",
+ "colorKey": "gb300_b1bd5887",
+ "comparisonKey": "6233cb31a6511067",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T11:13:11.578821+00:00",
+ "generatedAt": "2026-06-29T14:06:51.104687+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_11",
- "sku": "h100",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "decode",
+ "phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "runtime-visible-v1",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "H100 EP8 · deepep · bf16",
- "model": "MiniMax-M3",
+ "label": "GB300 EP8 · deepep · fp8",
+ "model": "shape 5120/8/160",
"shape": {
- "hidden": 6144,
+ "hidden": 5120,
"topk": 8,
- "experts": 256,
+ "experts": 160,
"routing": "uniform",
"routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "bf16",
+ "dispatchDtype": "fp8",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1515,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 132,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -36517,318 +36889,244 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:2e0df6a62cd0143e",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "a9df48e6438e77a",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28287492752",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287492752",
- "createdAt": "2026-06-27T11:13:11.578821+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 96.47999703884125,
- "p90": 105.56799918413162,
- "p95": 109.15199667215347,
- "p99": 129.66400384902954
- },
- "combine": {
- "p50": 74.46400076150894,
- "p90": 80.73599636554718,
- "p95": 81.37600123882294,
- "p99": 85.05599945783615
- },
- "roundtrip": {
- "p50": 146.40000462532043,
- "p90": 152.73599326610565,
- "p95": 154.52800691127777,
- "p99": 157.79200196266174
- },
- "isolatedSum": {
- "p50": 170.9439978003502,
- "p90": 186.3039955496788,
- "p95": 190.5279979109764,
- "p99": 214.7200033068657
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 540672,
- "combineLogicalBytes": 540672,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 97.31200337409973,
- "p90": 103.32799702882767,
- "p95": 104.89600151777267,
- "p99": 109.27999764680862
- },
- "combine": {
- "p50": 74.8480036854744,
- "p90": 80.28800040483475,
- "p95": 81.69600367546082,
- "p99": 86.33600175380707
- },
- "roundtrip": {
- "p50": 144.16000247001648,
- "p90": 152.19199657440186,
- "p95": 154.52800691127777,
- "p99": 164.8319959640503
- },
- "isolatedSum": {
- "p50": 172.16000705957413,
- "p90": 183.61599743366241,
- "p95": 186.5920051932335,
- "p99": 195.6159994006157
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1056768,
- "combineLogicalBytes": 1056768,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 95.96800059080124,
- "p90": 102.39999741315842,
- "p95": 104.8320010304451,
- "p99": 110.88000237941742
+ "p50": 426.62400007247925,
+ "p90": 460.7999920845032,
+ "p95": 468.9919948577881,
+ "p99": 490.81599712371826
},
"combine": {
- "p50": 74.46400076150894,
- "p90": 79.77599650621414,
- "p95": 81.216000020504,
- "p99": 83.96799862384796
+ "p50": 110.46399921178818,
+ "p90": 117.08799749612808,
+ "p95": 121.95199728012085,
+ "p99": 140.44800400733948
},
"roundtrip": {
- "p50": 144.16000247001648,
- "p90": 152.28800475597382,
- "p95": 155.03999590873718,
- "p99": 161.40800714492798
+ "p50": 481.6319942474365,
+ "p90": 513.7919783592224,
+ "p95": 521.6320157051086,
+ "p99": 537.8559827804565
},
"isolatedSum": {
- "p50": 170.43200135231018,
- "p90": 182.17599391937256,
- "p95": 186.0480010509491,
- "p99": 194.84800100326538
+ "p50": 537.0879992842674,
+ "p90": 577.8879895806313,
+ "p95": 590.9439921379089,
+ "p99": 631.2640011310577
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2125824,
- "combineLogicalBytes": 2125824,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 27837440,
+ "combineLogicalBytes": 55674880,
+ "fanoutMean": 5.3095703125,
+ "recvTokensMax": 699,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 95.58399766683578,
- "p90": 103.58399897813797,
- "p95": 106.49599879980087,
- "p99": 112.2559979557991
+ "p50": 367.1039938926697,
+ "p90": 381.47199153900146,
+ "p95": 387.1999979019165,
+ "p99": 459.4239890575409
},
"combine": {
- "p50": 75.6160020828247,
- "p90": 81.60000294446945,
- "p95": 82.65600353479385,
- "p99": 87.00799942016602
+ "p50": 144.67200636863708,
+ "p90": 150.94399452209473,
+ "p95": 154.08000349998474,
+ "p99": 159.71200168132782
},
"roundtrip": {
- "p50": 147.2959965467453,
- "p90": 154.23999726772308,
- "p95": 156.44800662994385,
- "p99": 160.73599457740784
+ "p50": 487.96799778938293,
+ "p90": 501.72799825668335,
+ "p95": 506.0160160064697,
+ "p99": 574.1119980812073
},
"isolatedSum": {
- "p50": 171.1999997496605,
- "p90": 185.18400192260742,
- "p95": 189.15200233459473,
- "p99": 199.26399737596512
+ "p50": 511.77600026130676,
+ "p90": 532.4159860610962,
+ "p95": 541.2800014019012,
+ "p99": 619.1359907388687
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4263936,
- "combineLogicalBytes": 4263936,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 55552000,
+ "combineLogicalBytes": 111104000,
+ "fanoutMean": 5.2978515625,
+ "recvTokensMax": 1387,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 96.0640013217926,
- "p90": 102.30399668216705,
- "p95": 105.59999942779541,
- "p99": 112.47999966144562
+ "p50": 450.655996799469,
+ "p90": 463.5840058326721,
+ "p95": 468.35198998451233,
+ "p99": 482.7840030193329
},
"combine": {
- "p50": 78.68800312280655,
- "p90": 81.95199817419052,
- "p95": 83.71199667453766,
- "p99": 89.47200328111649
+ "p50": 221.18400037288666,
+ "p90": 227.2000014781952,
+ "p95": 229.44000363349915,
+ "p99": 233.2800030708313
},
"roundtrip": {
- "p50": 149.05600249767303,
- "p90": 155.42399883270264,
- "p95": 158.84800255298615,
- "p99": 165.6319946050644
+ "p50": 660.9920263290405,
+ "p90": 672.4479794502258,
+ "p95": 676.9919991493225,
+ "p99": 694.0479874610901
},
"isolatedSum": {
- "p50": 174.75200444459915,
- "p90": 184.25599485635757,
- "p95": 189.31199610233307,
- "p99": 201.9520029425621
+ "p50": 671.8399971723557,
+ "p90": 690.7840073108673,
+ "p95": 697.7919936180115,
+ "p99": 716.0640060901642
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 8503296,
- "combineLogicalBytes": 8503296,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 111549440,
+ "combineLogicalBytes": 223098880,
+ "fanoutMean": 5.319091796875,
+ "recvTokensMax": 2762,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 96.0640013217926,
- "p90": 104.16000336408615,
- "p95": 105.6319996714592,
- "p99": 113.27999830245972
+ "p50": 631.4240097999573,
+ "p90": 645.6320285797119,
+ "p95": 649.6959924697876,
+ "p99": 663.0399823188782
},
"combine": {
- "p50": 82.71999657154083,
- "p90": 87.99999952316284,
- "p95": 88.99199962615967,
- "p99": 91.20000153779984
+ "p50": 459.77601408958435,
+ "p90": 465.5359983444214,
+ "p95": 467.2960042953491,
+ "p99": 473.7600088119507
},
"roundtrip": {
- "p50": 150.27199685573578,
- "p90": 159.8079949617386,
- "p95": 162.08000481128693,
- "p99": 168.92799735069275
+ "p50": 1042.8160429000854,
+ "p90": 1055.7440519332886,
+ "p95": 1059.648036956787,
+ "p99": 1105.2160263061523
},
"isolatedSum": {
- "p50": 178.78399789333344,
- "p90": 192.160002887249,
- "p95": 194.62399929761887,
- "p99": 204.47999984025955
+ "p50": 1091.2000238895416,
+ "p90": 1111.1680269241333,
+ "p95": 1116.9919967651367,
+ "p99": 1136.7999911308289
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 16908288,
- "combineLogicalBytes": 16908288,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 223365120,
+ "combineLogicalBytes": 446730240,
+ "fanoutMean": 5.325439453125,
+ "recvTokensMax": 5518,
+ "stragglerRank": 4,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 64,
- "globalTokens": 512,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 107.744000852108,
- "p90": 114.9120032787323,
- "p95": 116.22399836778641,
- "p99": 124.06399846076965
+ "p50": 1002.9120445251465,
+ "p90": 1051.4240264892578,
+ "p95": 1057.1839809417725,
+ "p99": 1065.4079914093018
},
"combine": {
- "p50": 91.96799993515015,
- "p90": 96.3520035147667,
- "p95": 97.6639986038208,
- "p99": 103.61599922180176
+ "p50": 834.1439962387085,
+ "p90": 841.2479758262634,
+ "p95": 844.2559838294983,
+ "p99": 850.4319787025452
},
"roundtrip": {
- "p50": 164.60800170898438,
- "p90": 177.40799486637115,
- "p95": 179.26399409770966,
- "p99": 182.3360025882721
+ "p50": 1799.7119426727295,
+ "p90": 1841.69602394104,
+ "p95": 1851.4560461044312,
+ "p99": 1875.3600120544434
},
"isolatedSum": {
- "p50": 199.71200078725815,
- "p90": 211.264006793499,
- "p95": 213.8879969716072,
- "p99": 227.6799976825714
+ "p50": 1837.056040763855,
+ "p90": 1892.6720023155212,
+ "p95": 1901.4399647712708,
+ "p99": 1915.839970111847
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 33423360,
- "combineLogicalBytes": 33423360,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 446817280,
+ "combineLogicalBytes": 893634560,
+ "fanoutMean": 5.32647705078125,
+ "recvTokensMax": 11032,
+ "stragglerRank": 2,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
"dispatch": {
- "p50": 120.86399644613266,
- "p90": 128.92800569534302,
- "p95": 130.65600395202637,
- "p99": 136.7039978504181
+ "p50": 1781.8880081176758,
+ "p90": 1797.4720001220703,
+ "p95": 1803.1680583953857,
+ "p99": 1816.5760040283203
},
"combine": {
- "p50": 107.16799646615982,
- "p90": 112.12799698114395,
- "p95": 112.99200356006622,
- "p99": 115.29599875211716
+ "p50": 1567.3600435256958,
+ "p90": 1573.855996131897,
+ "p95": 1575.5200386047363,
+ "p99": 1583.6479663848877
},
"roundtrip": {
- "p50": 197.76000082492828,
- "p90": 205.4080069065094,
- "p95": 210.4959934949875,
- "p99": 479.45600748062134
+ "p50": 3305.3760528564453,
+ "p90": 3318.0160522460938,
+ "p95": 3323.0080604553223,
+ "p99": 3348.479986190796
},
"isolatedSum": {
- "p50": 228.03199291229248,
- "p90": 241.05600267648697,
- "p95": 243.6480075120926,
- "p99": 251.99999660253525
+ "p50": 3349.2480516433716,
+ "p90": 3371.3279962539673,
+ "p95": 3378.688097000122,
+ "p99": 3400.223970413208
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 66576384,
- "combineLogicalBytes": 66576384,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 893132800,
+ "combineLogicalBytes": 1786265600,
+ "fanoutMean": 5.323486328125,
+ "recvTokensMax": 21895,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -36836,28 +37134,28 @@
]
},
{
- "id": "cx-d5af8f11",
- "identity": "h100|deepep|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h100_ff7906f8",
- "comparisonKey": "bb40f1d7fb8ef5bf",
+ "id": "cx-d318914f",
+ "identity": "gb300|deepep|v1|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901",
+ "colorKey": "gb300_b1bd5887",
+ "comparisonKey": "6304da2c595b352d",
"schemaVersion": 3,
- "generatedAt": "2026-06-26T23:52:15.657129+00:00",
+ "generatedAt": "2026-06-29T14:11:20.066461+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_00",
- "sku": "h100",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "decode",
+ "phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "runtime-visible-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "H100 EP8 · deepep · bf16",
+ "label": "GB300 EP8 · deepep · fp8",
"model": "MiniMax-M3",
"shape": {
"hidden": 6144,
@@ -36868,15 +37166,16 @@
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "bf16",
+ "dispatchDtype": "fp8",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1515,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 132,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -36884,318 +37183,244 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:2e0df6a62cd0143e",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "b9896b0a7ca9901",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28271714089",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271714089",
- "createdAt": "2026-06-26T23:52:15.657129+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 95.51999717950821,
- "p90": 104.99200224876404,
- "p95": 123.16799908876419,
- "p99": 153.05599570274353
- },
- "combine": {
- "p50": 74.0479975938797,
- "p90": 82.36800134181976,
- "p95": 90.65599739551544,
- "p99": 115.13599753379822
- },
- "roundtrip": {
- "p50": 144.73600685596466,
- "p90": 151.7760008573532,
- "p95": 153.9199948310852,
- "p99": 191.74399971961975
- },
- "isolatedSum": {
- "p50": 169.5679947733879,
- "p90": 187.3600035905838,
- "p95": 213.82399648427963,
- "p99": 268.19199323654175
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 540672,
- "combineLogicalBytes": 540672,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 71.26399874687195,
- "p90": 102.4319976568222,
- "p95": 104.47999835014343,
- "p99": 143.48800480365753
+ "p50": 428.6719858646393,
+ "p90": 454.8160135746002,
+ "p95": 462.3039960861206,
+ "p99": 486.62400245666504
},
"combine": {
- "p50": 67.77600198984146,
- "p90": 81.15199953317642,
- "p95": 81.727996468544,
- "p99": 87.71199733018875
+ "p50": 117.18399822711945,
+ "p90": 123.03999811410904,
+ "p95": 125.95200538635254,
+ "p99": 131.42399489879608
},
"roundtrip": {
- "p50": 124.03199821710587,
- "p90": 153.02400290966034,
- "p95": 154.94400262832642,
- "p99": 158.36800634860992
+ "p50": 506.5600275993347,
+ "p90": 525.5680084228516,
+ "p95": 531.1999917030334,
+ "p99": 541.8239831924438
},
"isolatedSum": {
- "p50": 139.0400007367134,
- "p90": 183.58399718999863,
- "p95": 186.20799481868744,
- "p99": 231.20000213384628
+ "p50": 545.8559840917587,
+ "p90": 577.8560116887093,
+ "p95": 588.2560014724731,
+ "p99": 618.0479973554611
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1056768,
- "combineLogicalBytes": 1056768,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
+ "dispatchLogicalBytes": 33288192,
+ "combineLogicalBytes": 66576384,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 723,
"stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 91.58399701118469,
- "p90": 103.20000350475311,
- "p95": 104.032002389431,
- "p99": 107.58399963378906
- },
- "combine": {
- "p50": 74.20799881219864,
- "p90": 80.64000308513641,
- "p95": 81.31200075149536,
- "p99": 82.49600231647491
- },
- "roundtrip": {
- "p50": 145.79200744628906,
- "p90": 152.38399803638458,
- "p95": 154.55999970436096,
- "p99": 172.38399386405945
- },
- "isolatedSum": {
- "p50": 165.79199582338333,
- "p90": 183.84000658988953,
- "p95": 185.34400314092636,
- "p99": 190.08000195026398
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2125824,
- "combineLogicalBytes": 2125824,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 92.6079973578453,
- "p90": 103.00800204277039,
- "p95": 114.46399986743927,
- "p99": 149.98400211334229
+ "p50": 423.0720102787018,
+ "p90": 441.9519901275635,
+ "p95": 446.46400213241577,
+ "p99": 457.8559994697571
},
"combine": {
- "p50": 76.1599987745285,
- "p90": 82.49600231647491,
- "p95": 86.68799698352814,
- "p99": 95.77599912881851
+ "p50": 156.2879979610443,
+ "p90": 162.84799575805664,
+ "p95": 165.69599509239197,
+ "p99": 172.83199727535248
},
"roundtrip": {
- "p50": 146.84799313545227,
- "p90": 161.40800714492798,
- "p95": 192.09599494934082,
- "p99": 203.74399423599243
+ "p50": 552.5439977645874,
+ "p90": 569.2160129547119,
+ "p95": 574.2719769477844,
+ "p99": 587.6160264015198
},
"isolatedSum": {
- "p50": 168.7679961323738,
- "p90": 185.5040043592453,
- "p95": 201.1519968509674,
- "p99": 245.7600012421608
+ "p50": 579.3600082397461,
+ "p90": 604.7999858856201,
+ "p95": 612.1599972248077,
+ "p99": 630.6879967451096
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4263936,
- "combineLogicalBytes": 4263936,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 0,
+ "dispatchLogicalBytes": 66809856,
+ "combineLogicalBytes": 133619712,
+ "fanoutMean": 5.3095703125,
+ "recvTokensMax": 1422,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 91.2960022687912,
- "p90": 99.16800260543823,
- "p95": 101.1200025677681,
- "p99": 107.68000036478043
+ "p50": 524.1600275039673,
+ "p90": 540.4800176620483,
+ "p95": 544.8960065841675,
+ "p99": 551.6800284385681
},
"combine": {
- "p50": 77.37600058317184,
- "p90": 81.53600245714188,
- "p95": 82.24000036716461,
- "p99": 87.13600039482117
+ "p50": 257.56800174713135,
+ "p90": 265.28000831604004,
+ "p95": 267.87200570106506,
+ "p99": 273.69600534439087
},
"roundtrip": {
- "p50": 150.30400454998016,
- "p90": 157.05600380897522,
- "p95": 158.9760035276413,
- "p99": 162.49600052833557
+ "p50": 758.6560249328613,
+ "p90": 776.3839960098267,
+ "p95": 781.6960215568542,
+ "p99": 798.6239790916443
},
"isolatedSum": {
- "p50": 168.67200285196304,
- "p90": 180.7040050625801,
- "p95": 183.3600029349327,
- "p99": 194.8160007596016
+ "p50": 781.7280292510986,
+ "p90": 805.7600259780884,
+ "p95": 812.7680122852325,
+ "p99": 825.376033782959
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 8503296,
- "combineLogicalBytes": 8503296,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 133828608,
+ "combineLogicalBytes": 267657216,
+ "fanoutMean": 5.31787109375,
+ "recvTokensMax": 2779,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 96.03200107812881,
- "p90": 103.90400141477585,
- "p95": 107.68000036478043,
- "p99": 194.815993309021
+ "p50": 736.3520264625549,
+ "p90": 751.9999742507935,
+ "p95": 755.3279995918274,
+ "p99": 762.2399926185608
},
"combine": {
- "p50": 80.51200211048126,
- "p90": 87.00799942016602,
- "p95": 90.55999666452408,
- "p99": 383.7119936943054
+ "p50": 467.23198890686035,
+ "p90": 473.05598855018616,
+ "p95": 476.063996553421,
+ "p99": 483.5520088672638
},
"roundtrip": {
- "p50": 134.97599959373474,
- "p90": 158.27199816703796,
- "p95": 171.36000096797943,
- "p99": 204.0960043668747
+ "p50": 1161.8560552597046,
+ "p90": 1176.1280298233032,
+ "p95": 1179.4240474700928,
+ "p99": 1195.520043373108
},
"isolatedSum": {
- "p50": 176.54400318861008,
- "p90": 190.91200083494186,
- "p95": 198.2399970293045,
- "p99": 578.5279870033264
+ "p50": 1203.5840153694153,
+ "p90": 1225.0559628009796,
+ "p95": 1231.3919961452484,
+ "p99": 1245.7920014858246
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 16908288,
- "combineLogicalBytes": 16908288,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 267190272,
+ "combineLogicalBytes": 534380544,
+ "fanoutMean": 5.30859375,
+ "recvTokensMax": 5505,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 64,
- "globalTokens": 512,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 107.61599987745285,
- "p90": 114.49600011110306,
- "p95": 116.35199934244156,
- "p99": 122.84799665212631
+ "p50": 1161.6319417953491,
+ "p90": 1181.663990020752,
+ "p95": 1189.4079446792603,
+ "p99": 1212.5439643859863
},
"combine": {
- "p50": 92.06400066614151,
- "p90": 98.2080027461052,
- "p95": 98.68799895048141,
- "p99": 102.46399790048599
+ "p50": 848.2239842414856,
+ "p90": 855.135977268219,
+ "p95": 856.768012046814,
+ "p99": 861.7280125617981
},
"roundtrip": {
- "p50": 167.84000396728516,
- "p90": 173.567995429039,
- "p95": 175.90400576591492,
- "p99": 179.4240027666092
+ "p50": 1978.816032409668,
+ "p90": 2005.0559043884277,
+ "p95": 2013.0879878997803,
+ "p99": 2033.6639881134033
},
"isolatedSum": {
- "p50": 199.68000054359436,
- "p90": 212.70400285720825,
- "p95": 215.03999829292297,
- "p99": 225.3119945526123
+ "p50": 2009.8559260368347,
+ "p90": 2036.799967288971,
+ "p95": 2046.1759567260742,
+ "p99": 2074.2719769477844
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 33423360,
- "combineLogicalBytes": 33423360,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 533059584,
+ "combineLogicalBytes": 1066119168,
+ "fanoutMean": 5.29547119140625,
+ "recvTokensMax": 10952,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
"dispatch": {
- "p50": 123.26399981975555,
- "p90": 130.68799674510956,
- "p95": 132.83200562000275,
- "p99": 148.0959951877594
+ "p50": 2040.6720638275146,
+ "p90": 2056.096076965332,
+ "p95": 2061.8560314178467,
+ "p99": 2084.480047225952
},
"combine": {
- "p50": 106.6880002617836,
- "p90": 114.23999816179276,
- "p95": 115.23199826478958,
- "p99": 137.85600662231445
+ "p50": 1591.647982597351,
+ "p90": 1599.552035331726,
+ "p95": 1603.327989578247,
+ "p99": 1609.055995941162
},
"roundtrip": {
- "p50": 197.60000705718994,
- "p90": 204.8639953136444,
- "p95": 207.07200467586517,
- "p99": 225.8879989385605
+ "p50": 3593.087911605835,
+ "p90": 3605.5679321289062,
+ "p95": 3608.8318824768066,
+ "p99": 3622.080087661743
},
"isolatedSum": {
- "p50": 229.95200008153915,
- "p90": 244.9279949069023,
- "p95": 248.06400388479233,
- "p99": 285.95200181007385
+ "p50": 3632.3200464248657,
+ "p90": 3655.648112297058,
+ "p95": 3665.1840209960938,
+ "p99": 3693.5360431671143
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 66576384,
- "combineLogicalBytes": 66576384,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 2,
+ "dispatchLogicalBytes": 1065861120,
+ "combineLogicalBytes": 2131722240,
+ "fanoutMean": 5.294189453125,
+ "recvTokensMax": 21781,
+ "stragglerRank": 3,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -37203,28 +37428,28 @@
]
},
{
- "id": "cx-7171c240",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|fp8-saturation|none|none|0|tuned||8c8497a77d9085d",
- "colorKey": "h100_42947950",
- "comparisonKey": "1fe2184d83233e7e",
+ "id": "cx-6fe76bb4",
+ "identity": "gb300|deepep|v1|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901",
+ "colorKey": "gb300_c4ac4643",
+ "comparisonKey": "9bdacb5bc6bbc14d",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T00:05:32.898956+00:00",
+ "generatedAt": "2026-06-29T13:50:37.611513+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_03",
- "sku": "h100",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "decode",
+ "phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
"measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "H100 EP8 · deepep · bf16",
+ "label": "GB300 EP8 · deepep · fp8",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
@@ -37235,15 +37460,16 @@
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "fp8-saturation",
+ "dispatchDtype": "fp8",
+ "kernelGeneration": "v1",
+ "activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1515,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 132,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -37251,199 +37477,273 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "8c8497a77d9085d",
- "workloadId": "set:4:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "b9896b0a7ca9901",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28272125238",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272125238",
- "createdAt": "2026-06-27T00:05:32.898956+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 94.7519987821579,
- "p90": 118.49600076675415,
- "p95": 129.60000336170197,
- "p99": 144.31999623775482
+ "p50": 111.39199882745743,
+ "p90": 120.89599668979645,
+ "p95": 123.6800029873848,
+ "p99": 132.54399597644806
},
"combine": {
- "p50": 76.64000242948532,
- "p90": 87.2960016131401,
- "p95": 90.52799642086029,
- "p99": 103.10400277376175
+ "p50": 119.6800023317337,
+ "p90": 126.14400684833527,
+ "p95": 129.66400384902954,
+ "p99": 136.51199638843536
},
"roundtrip": {
- "p50": 147.2640037536621,
- "p90": 170.30400037765503,
- "p95": 184.89600718021393,
- "p99": 195.6160068511963
+ "p50": 280.64000606536865,
+ "p90": 298.335999250412,
+ "p95": 303.5520017147064,
+ "p99": 315.3280019760132
},
"isolatedSum": {
- "p50": 171.39200121164322,
- "p90": 205.79200237989426,
- "p95": 220.12799978256226,
- "p99": 247.42399901151657
+ "p50": 231.07200115919113,
+ "p90": 247.0400035381317,
+ "p95": 253.34400683641434,
+ "p99": 269.0559923648834
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 38836224,
+ "combineLogicalBytes": 77672448,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 723,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 98.68799895048141,
- "p90": 122.17599898576736,
- "p95": 138.7840062379837,
- "p99": 191.9039934873581
+ "p50": 141.88799262046814,
+ "p90": 160.76800227165222,
+ "p95": 182.23999440670013,
+ "p99": 200.3519982099533
},
"combine": {
- "p50": 81.31200075149536,
- "p90": 89.72799777984619,
- "p95": 97.08800166845322,
- "p99": 106.62399977445602
+ "p50": 161.28000617027283,
+ "p90": 182.91200697422028,
+ "p95": 202.14399695396423,
+ "p99": 226.33600234985352
},
"roundtrip": {
- "p50": 152.70400047302246,
- "p90": 174.9120056629181,
- "p95": 184.03199315071106,
- "p99": 195.51999866962433
+ "p50": 356.51201009750366,
+ "p90": 385.5679929256439,
+ "p95": 394.8799967765808,
+ "p99": 411.6480052471161
},
"isolatedSum": {
- "p50": 179.99999970197678,
- "p90": 211.90399676561356,
- "p95": 235.87200790643692,
- "p99": 298.5279932618141
+ "p50": 303.16799879074097,
+ "p90": 343.6800092458725,
+ "p95": 384.38399136066437,
+ "p99": 426.6880005598068
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 6,
+ "dispatchLogicalBytes": 77944832,
+ "combineLogicalBytes": 155889664,
+ "fanoutMean": 5.3095703125,
+ "recvTokensMax": 1422,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 101.79200023412704,
- "p90": 127.96799838542938,
- "p95": 147.42399752140045,
- "p99": 195.16800343990326
+ "p50": 197.37599790096283,
+ "p90": 217.53600239753723,
+ "p95": 232.2240024805069,
+ "p99": 252.06398963928223
},
"combine": {
- "p50": 89.66399729251862,
- "p90": 103.4879982471466,
- "p95": 113.02399635314941,
- "p99": 128.1599998474121
+ "p50": 281.69599175453186,
+ "p90": 295.6799864768982,
+ "p95": 312.0959997177124,
+ "p99": 327.58399844169617
},
"roundtrip": {
- "p50": 162.88000345230103,
- "p90": 193.53599846363068,
- "p95": 214.08000588417053,
- "p99": 247.71200120449066
+ "p50": 584.2559933662415,
+ "p90": 597.5040197372437,
+ "p95": 617.2800064086914,
+ "p99": 638.4320259094238
},
"isolatedSum": {
- "p50": 191.45599752664566,
- "p90": 231.455996632576,
- "p95": 260.44799387454987,
- "p99": 323.32800328731537
+ "p50": 479.0719896554947,
+ "p90": 513.2159888744354,
+ "p95": 544.3200021982193,
+ "p99": 579.6479880809784
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 156133376,
+ "combineLogicalBytes": 312266752,
+ "fanoutMean": 5.31787109375,
+ "recvTokensMax": 2779,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 130.49599528312683,
- "p90": 143.8719928264618,
- "p95": 152.70400047302246,
- "p99": 158.9760035276413
+ "p50": 310.5599880218506,
+ "p90": 328.8959860801697,
+ "p95": 342.9119884967804,
+ "p99": 363.45601081848145
},
"combine": {
- "p50": 114.81600254774094,
- "p90": 127.23200023174286,
- "p95": 131.071999669075,
- "p99": 139.5840048789978
+ "p50": 482.91200399398804,
+ "p90": 501.2800097465515,
+ "p95": 510.047972202301,
+ "p99": 528.6719799041748
},
"roundtrip": {
- "p50": 212.70400285720825,
- "p90": 226.33600234985352,
- "p95": 233.69599878787994,
- "p99": 247.8400021791458
+ "p50": 1010.1120471954346,
+ "p90": 1035.0079536437988,
+ "p95": 1043.67995262146,
+ "p99": 1060.3519678115845
},
"isolatedSum": {
- "p50": 245.31199783086777,
- "p90": 271.10399305820465,
- "p95": 283.7760001420975,
- "p99": 298.5600084066391
+ "p50": 793.4719920158386,
+ "p90": 830.1759958267212,
+ "p95": 852.9599606990814,
+ "p99": 892.1279907226562
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
+ "dispatchLogicalBytes": 311721984,
+ "combineLogicalBytes": 623443968,
+ "fanoutMean": 5.30859375,
+ "recvTokensMax": 5505,
+ "stragglerRank": 1,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
+ "dispatch": {
+ "p50": 545.5999970436096,
+ "p90": 557.2479963302612,
+ "p95": 560.8320236206055,
+ "p99": 570.4320073127747
+ },
+ "combine": {
+ "p50": 868.0959939956665,
+ "p90": 876.8960237503052,
+ "p95": 879.5199990272522,
+ "p99": 900.704026222229
+ },
+ "roundtrip": {
+ "p50": 1849.5999574661255,
+ "p90": 1864.7359609603882,
+ "p95": 1871.9359636306763,
+ "p99": 1903.2959938049316
+ },
+ "isolatedSum": {
+ "p50": 1413.6959910392761,
+ "p90": 1434.1440200805664,
+ "p95": 1440.3520226478577,
+ "p99": 1471.1360335350037
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 621902848,
+ "combineLogicalBytes": 1243805696,
+ "fanoutMean": 5.29547119140625,
+ "recvTokensMax": 10952,
+ "stragglerRank": 1,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ },
+ {
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
+ "dispatch": {
+ "p50": 1018.1759595870972,
+ "p90": 1037.343978881836,
+ "p95": 1054.0800094604492,
+ "p99": 1073.472023010254
+ },
+ "combine": {
+ "p50": 1616.3519620895386,
+ "p90": 1627.0400285720825,
+ "p95": 1634.9120140075684,
+ "p99": 1655.8079719543457
+ },
+ "roundtrip": {
+ "p50": 3515.0399208068848,
+ "p90": 3526.4639854431152,
+ "p95": 3531.424045562744,
+ "p99": 3540.8639907836914
+ },
+ "isolatedSum": {
+ "p50": 2634.5279216766357,
+ "p90": 2664.3840074539185,
+ "p95": 2688.9920234680176,
+ "p99": 2729.2799949645996
+ },
+ "roundtripMeasured": true,
+ "dispatchLogicalBytes": 1243504640,
+ "combineLogicalBytes": 2487009280,
+ "fanoutMean": 5.294189453125,
+ "recvTokensMax": 21781,
+ "stragglerRank": 1,
+ "correct": true,
+ "samplesPooled": 600,
+ "trials": 3
+ }
]
},
{
- "id": "cx-7a284f4e",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h100_42947950",
- "comparisonKey": "2b24bee4ac6d8f67",
+ "id": "cx-8e404634",
+ "identity": "gb300|deepep|v1|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901",
+ "colorKey": "gb300_b1bd5887",
+ "comparisonKey": "67d9b2df504c0ef6",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T10:09:52.345460+00:00",
+ "generatedAt": "2026-06-29T13:54:47.856231+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_17",
- "sku": "h100",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "decode",
+ "phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "runtime-visible-v1",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "H100 EP8 · deepep · bf16",
+ "label": "GB300 EP8 · deepep · fp8",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
@@ -37454,15 +37754,16 @@
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "bf16",
+ "dispatchDtype": "fp8",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1515,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 132,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -37470,318 +37771,244 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "b9896b0a7ca9901",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "2.0.0+af9a040",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28286083501",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286083501",
- "createdAt": "2026-06-27T10:09:52.345460+00:00",
- "sha": "76a3032d20288ee17220eb6099346f74d56ce005"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 74.36800003051758,
- "p90": 106.81600123643875,
- "p95": 108.06400328874588,
- "p99": 112.89600282907486
- },
- "combine": {
- "p50": 74.87999647855759,
- "p90": 83.80799740552902,
- "p95": 84.22400057315826,
- "p99": 88.99199962615967
- },
- "roundtrip": {
- "p50": 134.24000144004822,
- "p90": 164.0319973230362,
- "p95": 166.81599617004395,
- "p99": 169.91999745368958
- },
- "isolatedSum": {
- "p50": 149.24799650907516,
- "p90": 190.62399864196777,
- "p95": 192.28800386190414,
- "p99": 201.88800245523453
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 72.60800153017044,
- "p90": 107.84000158309937,
- "p95": 108.76800119876862,
- "p99": 112.44799941778183
- },
- "combine": {
- "p50": 74.91199672222137,
- "p90": 84.03199911117554,
- "p95": 84.48000252246857,
- "p99": 89.24800157546997
- },
- "roundtrip": {
- "p50": 134.8160058259964,
- "p90": 165.69599509239197,
- "p95": 167.42399334907532,
- "p99": 170.04799842834473
- },
- "isolatedSum": {
- "p50": 147.51999825239182,
- "p90": 191.8720006942749,
- "p95": 193.24800372123718,
- "p99": 201.6960009932518
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 72.57600128650665,
- "p90": 103.32799702882767,
- "p95": 106.1440035700798,
- "p99": 112.22399771213531
+ "p50": 364.5760118961334,
+ "p90": 393.44000816345215,
+ "p95": 400.7039964199066,
+ "p99": 503.9359927177429
},
"combine": {
- "p50": 75.55200159549713,
- "p90": 87.90399879217148,
- "p95": 88.92799913883209,
- "p99": 91.26400202512741
+ "p50": 118.04799735546112,
+ "p90": 125.72799623012543,
+ "p95": 128.28800082206726,
+ "p99": 142.33599603176117
},
"roundtrip": {
- "p50": 134.36800241470337,
- "p90": 164.8319959640503,
- "p95": 166.75199568271637,
- "p99": 172.44799435138702
+ "p50": 454.1119933128357,
+ "p90": 480.5760085582733,
+ "p95": 500.63997507095337,
+ "p99": 580.8640122413635
},
"isolatedSum": {
- "p50": 148.12800288200378,
- "p90": 191.23199582099915,
- "p95": 195.0720027089119,
- "p99": 203.48799973726273
+ "p50": 482.62400925159454,
+ "p90": 519.1680043935776,
+ "p95": 528.9919972419739,
+ "p99": 646.2719887495041
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 38836224,
+ "combineLogicalBytes": 77672448,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 723,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 75.93599706888199,
- "p90": 104.54399883747101,
- "p95": 107.51999914646149,
- "p99": 112.64000087976456
+ "p50": 385.0879967212677,
+ "p90": 405.4720103740692,
+ "p95": 416.703999042511,
+ "p99": 539.5839810371399
},
"combine": {
- "p50": 75.48800110816956,
- "p90": 84.28800106048584,
- "p95": 88.86399865150452,
- "p99": 91.32800251245499
+ "p50": 160.38399934768677,
+ "p90": 167.64800250530243,
+ "p95": 171.07200622558594,
+ "p99": 191.42399728298187
},
"roundtrip": {
- "p50": 134.5279961824417,
- "p90": 165.47200083732605,
- "p95": 167.35999286174774,
- "p99": 170.71999609470367
+ "p50": 534.3999862670898,
+ "p90": 554.4959902763367,
+ "p95": 571.3919997215271,
+ "p99": 673.7279891967773
},
"isolatedSum": {
- "p50": 151.42399817705154,
- "p90": 188.83199989795685,
- "p95": 196.383997797966,
- "p99": 203.96800339221954
+ "p50": 545.4719960689545,
+ "p90": 573.1200128793716,
+ "p95": 587.7760052680969,
+ "p99": 731.0079783201218
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 77944832,
+ "combineLogicalBytes": 155889664,
+ "fanoutMean": 5.3095703125,
+ "recvTokensMax": 1422,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 84.51200276613235,
- "p90": 102.68799960613251,
- "p95": 104.41599786281586,
- "p99": 111.13599687814713
+ "p50": 507.29602575302124,
+ "p90": 522.271990776062,
+ "p95": 525.2479910850525,
+ "p99": 538.1119847297668
},
"combine": {
- "p50": 75.96799731254578,
- "p90": 90.62399715185165,
- "p95": 91.58399701118469,
- "p99": 92.6079973578453
+ "p50": 275.7120132446289,
+ "p90": 282.20799565315247,
+ "p95": 284.960001707077,
+ "p99": 289.72798585891724
},
"roundtrip": {
- "p50": 133.44000279903412,
- "p90": 164.67200219631195,
- "p95": 167.29600727558136,
- "p99": 349.88799691200256
+ "p50": 761.0880136489868,
+ "p90": 775.1039862632751,
+ "p95": 780.9600234031677,
+ "p99": 856.0960292816162
},
"isolatedSum": {
- "p50": 160.48000007867813,
- "p90": 193.31199675798416,
- "p95": 195.99999487400055,
- "p99": 203.74399423599243
+ "p50": 783.0080389976501,
+ "p90": 804.4799864292145,
+ "p95": 810.2079927921295,
+ "p99": 827.8399705886841
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 7,
+ "dispatchLogicalBytes": 156133376,
+ "combineLogicalBytes": 312266752,
+ "fanoutMean": 5.31787109375,
+ "recvTokensMax": 2779,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 91.87199920415878,
- "p90": 107.58399963378906,
- "p95": 108.60799998044968,
- "p99": 113.15199732780457
+ "p50": 739.5840287208557,
+ "p90": 757.3760151863098,
+ "p95": 777.4080038070679,
+ "p99": 874.4000196456909
},
"combine": {
- "p50": 83.23200047016144,
- "p90": 92.0960009098053,
- "p95": 92.70399808883667,
- "p99": 97.59999811649323
+ "p50": 481.56800866127014,
+ "p90": 488.0320131778717,
+ "p95": 492.76798963546753,
+ "p99": 521.0239887237549
},
"roundtrip": {
- "p50": 143.71199905872345,
- "p90": 170.1440066099167,
- "p95": 173.66400361061096,
- "p99": 177.2480010986328
+ "p50": 1188.704013824463,
+ "p90": 1206.5919637680054,
+ "p95": 1228.5759449005127,
+ "p99": 1329.4399976730347
},
"isolatedSum": {
- "p50": 175.10399967432022,
- "p90": 199.68000054359436,
- "p95": 201.31199806928635,
- "p99": 210.7519954442978
+ "p50": 1221.1520373821259,
+ "p90": 1245.4080283641815,
+ "p95": 1270.1759934425354,
+ "p99": 1395.4240083694458
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 3,
+ "dispatchLogicalBytes": 311721984,
+ "combineLogicalBytes": 623443968,
+ "fanoutMean": 5.30859375,
+ "recvTokensMax": 5505,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 64,
- "globalTokens": 512,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 98.24000298976898,
- "p90": 121.69600278139114,
- "p95": 124.95999783277512,
- "p99": 132.06399977207184
+ "p50": 1198.3360052108765,
+ "p90": 1216.6399955749512,
+ "p95": 1229.5360565185547,
+ "p99": 1348.1279611587524
},
"combine": {
- "p50": 92.00000017881393,
- "p90": 105.34399747848511,
- "p95": 106.52799904346466,
- "p99": 107.93600231409073
+ "p50": 871.8400001525879,
+ "p90": 879.5520067214966,
+ "p95": 882.5920224189758,
+ "p99": 895.039975643158
},
"roundtrip": {
- "p50": 167.67999529838562,
- "p90": 185.2799952030182,
- "p95": 188.4479969739914,
- "p99": 196.0960030555725
+ "p50": 2025.696039199829,
+ "p90": 2043.5519218444824,
+ "p95": 2052.639961242676,
+ "p99": 2149.120092391968
},
"isolatedSum": {
- "p50": 190.24000316858292,
- "p90": 227.04000025987625,
- "p95": 231.48799687623978,
- "p99": 240.00000208616257
+ "p50": 2070.1760053634644,
+ "p90": 2096.1920022964478,
+ "p95": 2112.1280789375305,
+ "p99": 2243.1679368019104
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 621902848,
+ "combineLogicalBytes": 1243805696,
+ "fanoutMean": 5.29547119140625,
+ "recvTokensMax": 10952,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
"dispatch": {
- "p50": 118.65600198507309,
- "p90": 134.3040019273758,
- "p95": 136.86400651931763,
- "p99": 142.17600226402283
+ "p50": 2230.2401065826416,
+ "p90": 2242.2080039978027,
+ "p95": 2245.5999851226807,
+ "p99": 2260.4479789733887
},
"combine": {
- "p50": 108.70400071144104,
- "p90": 121.76000326871872,
- "p95": 122.8799968957901,
- "p99": 124.35200065374374
+ "p50": 1617.3759698867798,
+ "p90": 1623.968005180359,
+ "p95": 1626.6560554504395,
+ "p99": 1631.5200328826904
},
"roundtrip": {
- "p50": 202.65600085258484,
- "p90": 218.6560034751892,
- "p95": 221.3120013475418,
- "p99": 225.0880002975464
+ "p50": 3825.472116470337,
+ "p90": 3839.168071746826,
+ "p95": 3843.4879779815674,
+ "p99": 3857.9840660095215
},
"isolatedSum": {
- "p50": 227.36000269651413,
- "p90": 256.0640051960945,
- "p95": 259.7440034151077,
- "p99": 266.52800291776657
+ "p50": 3847.6160764694214,
+ "p90": 3866.1760091781616,
+ "p95": 3872.25604057312,
+ "p99": 3891.968011856079
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 1243504640,
+ "combineLogicalBytes": 2487009280,
+ "fanoutMean": 5.294189453125,
+ "recvTokensMax": 21781,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
@@ -37789,47 +38016,48 @@
]
},
{
- "id": "cx-9a231e73",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||b029c1a6fded400",
- "colorKey": "h100_42947950",
- "comparisonKey": "fb346b1019e55bb0",
+ "id": "cx-16cb50ff",
+ "identity": "gb300|deepep|v1|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||fc08bf2f8d42ed8",
+ "colorKey": "gb300_b1bd5887",
+ "comparisonKey": "ff56b33f9f8f54e2",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T00:13:23.336108+00:00",
+ "generatedAt": "2026-06-29T13:56:56.953377+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_12",
- "sku": "h100",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "decode",
+ "phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "runtime-visible-v1",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "H100 EP8 · deepep · bf16",
- "model": "DeepSeek-V3/V4",
+ "label": "GB300 EP8 · deepep · fp8",
+ "model": "Kimi-K2",
"shape": {
"hidden": 7168,
"topk": 8,
- "experts": 256,
+ "experts": 384,
"routing": "uniform",
"routingLabel": "uniform",
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "bf16",
+ "dispatchDtype": "fp8",
+ "kernelGeneration": "v1",
"activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1515,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 132,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -37837,388 +38065,243 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "b029c1a6fded400",
- "workloadId": "set:3:07d544ac2af401ec",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "fc08bf2f8d42ed8",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28272369133",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272369133",
- "createdAt": "2026-06-27T00:13:23.336108+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 98.88000041246414,
- "p90": 104.8320010304451,
- "p95": 107.96800255775452,
- "p99": 118.97599697113037
- },
- "combine": {
- "p50": 79.93599772453308,
- "p90": 87.5839963555336,
- "p95": 87.99999952316284,
- "p99": 92.28800237178802
- },
- "roundtrip": {
- "p50": 154.11199629306793,
- "p90": 159.2639982700348,
- "p95": 161.43999993801117,
- "p99": 167.29600727558136
- },
- "isolatedSum": {
- "p50": 178.81599813699722,
- "p90": 192.4159973859787,
- "p95": 195.96800208091736,
- "p99": 211.2639993429184
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 103.04000228643417,
- "p90": 108.22399705648422,
- "p95": 110.43199896812439,
- "p99": 116.64000153541565
- },
- "combine": {
- "p50": 87.93599903583527,
- "p90": 94.94400024414062,
- "p95": 96.03200107812881,
- "p99": 98.49599748849869
- },
- "roundtrip": {
- "p50": 162.4639928340912,
- "p90": 170.3999936580658,
- "p95": 172.31999337673187,
- "p99": 178.9119988679886
- },
- "isolatedSum": {
- "p50": 190.97600132226944,
- "p90": 203.16799730062485,
- "p95": 206.4640000462532,
- "p99": 215.13599902391434
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
{
"tokensPerRank": 128,
"globalTokens": 1024,
"dispatch": {
- "p50": 129.40800189971924,
- "p90": 137.7599984407425,
- "p95": 139.45600390434265,
- "p99": 143.48800480365753
+ "p50": 340.60800075531006,
+ "p90": 360.7040047645569,
+ "p95": 367.0080006122589,
+ "p99": 378.2399892807007
},
"combine": {
- "p50": 114.88000303506851,
- "p90": 119.87199634313583,
- "p95": 120.4800009727478,
- "p99": 123.48800152540207
+ "p50": 118.46400052309036,
+ "p90": 124.64000284671783,
+ "p95": 126.91199779510498,
+ "p99": 131.6159963607788
},
"roundtrip": {
- "p50": 213.0880057811737,
- "p90": 217.3759937286377,
- "p95": 219.10400688648224,
- "p99": 223.23200106620789
+ "p50": 433.3760142326355,
+ "p90": 448.5760033130646,
+ "p95": 453.3120095729828,
+ "p99": 469.2479968070984
},
"isolatedSum": {
- "p50": 244.28800493478775,
- "p90": 257.6319947838783,
- "p95": 259.93600487709045,
- "p99": 266.9760063290596
+ "p50": 459.0720012784004,
+ "p90": 485.3440076112747,
+ "p95": 493.9199984073639,
+ "p99": 509.8559856414795
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 38757376,
+ "combineLogicalBytes": 77514752,
+ "fanoutMean": 5.2802734375,
+ "recvTokensMax": 707,
+ "stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
"trials": 3
- }
- ]
- },
- {
- "id": "cx-535aa40c",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||c774c8e4abb34da",
- "colorKey": "h100_42947950",
- "comparisonKey": "f31dd87deba90285",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:53:48.998127+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_03",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "c774c8e4abb34da",
- "workloadId": "set:5:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28273506790",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28273506790",
- "createdAt": "2026-06-27T00:53:48.998127+00:00",
- "sha": "2c15d9415503e9ccb84cd49cf446a122796efc1e"
- },
- "rows": [
+ },
{
- "tokensPerRank": 1,
- "globalTokens": 8,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 94.4959968328476,
- "p90": 100.5759984254837,
- "p95": 102.81600058078766,
- "p99": 107.42399841547012
+ "p50": 372.76801466941833,
+ "p90": 385.0559890270233,
+ "p95": 388.2879912853241,
+ "p99": 482.91200399398804
},
"combine": {
- "p50": 76.92799717187881,
- "p90": 80.89599758386612,
- "p95": 81.37600123882294,
- "p99": 85.91999858617783
+ "p50": 162.81600296497345,
+ "p90": 168.44800114631653,
+ "p95": 171.80800437927246,
+ "p99": 178.52799594402313
},
"roundtrip": {
- "p50": 150.65599977970123,
- "p90": 155.35999834537506,
- "p95": 157.02399611473083,
- "p99": 163.5199934244156
+ "p50": 522.8480100631714,
+ "p90": 534.5919728279114,
+ "p95": 538.0480289459229,
+ "p99": 551.8720149993896
},
"isolatedSum": {
- "p50": 171.4239940047264,
- "p90": 181.47199600934982,
- "p95": 184.1920018196106,
- "p99": 193.34399700164795
+ "p50": 535.5840176343918,
+ "p90": 553.5039901733398,
+ "p95": 560.0959956645966,
+ "p99": 661.4399999380112
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 1,
+ "dispatchLogicalBytes": 77285376,
+ "combineLogicalBytes": 154570752,
+ "fanoutMean": 5.2646484375,
+ "recvTokensMax": 1391,
+ "stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 2,
- "globalTokens": 16,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 90.97599983215332,
- "p90": 98.52799773216248,
- "p95": 101.02400183677673,
- "p99": 107.68000036478043
+ "p50": 485.4080080986023,
+ "p90": 496.6079890727997,
+ "p95": 499.35999512672424,
+ "p99": 537.0240211486816
},
"combine": {
- "p50": 77.11999863386154,
- "p90": 81.216000020504,
- "p95": 82.71999657154083,
- "p99": 87.55200356245041
+ "p50": 281.2480032444,
+ "p90": 288.60801458358765,
+ "p95": 290.5600070953369,
+ "p99": 295.80798745155334
},
"roundtrip": {
- "p50": 149.47199821472168,
- "p90": 154.91199493408203,
- "p95": 157.151997089386,
- "p99": 163.80800306797028
+ "p50": 745.0559735298157,
+ "p90": 754.8480033874512,
+ "p95": 758.8160037994385,
+ "p99": 774.0160226821899
},
"isolatedSum": {
- "p50": 168.09599846601486,
- "p90": 179.74399775266647,
- "p95": 183.74399840831757,
- "p99": 195.23200392723083
+ "p50": 766.6560113430023,
+ "p90": 785.2160036563873,
+ "p95": 789.9200022220612,
+ "p99": 832.832008600235
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
+ "dispatchLogicalBytes": 154886144,
+ "combineLogicalBytes": 309772288,
+ "fanoutMean": 5.275390625,
+ "recvTokensMax": 2754,
"stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 4,
- "globalTokens": 32,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 93.12000125646591,
- "p90": 99.64799880981445,
- "p95": 102.27199643850327,
- "p99": 109.43999886512756
+ "p50": 719.4880247116089,
+ "p90": 731.6160202026367,
+ "p95": 734.7519993782043,
+ "p99": 751.1680126190186
},
"combine": {
- "p50": 79.3600007891655,
- "p90": 83.0719992518425,
- "p95": 84.22400057315826,
- "p99": 88.54400366544724
+ "p50": 487.90401220321655,
+ "p90": 494.3679869174957,
+ "p95": 496.0959851741791,
+ "p99": 502.4639964103699
},
"roundtrip": {
- "p50": 151.96800231933594,
- "p90": 158.9439958333969,
- "p95": 160.25599837303162,
- "p99": 163.07200491428375
+ "p50": 1180.2239418029785,
+ "p90": 1194.4960355758667,
+ "p95": 1202.1119594573975,
+ "p99": 1253.1520128250122
},
"isolatedSum": {
- "p50": 172.4800020456314,
- "p90": 182.71999806165695,
- "p95": 186.49599701166153,
- "p99": 197.9840025305748
+ "p50": 1207.3920369148254,
+ "p90": 1225.9840071201324,
+ "p95": 1230.8479845523834,
+ "p99": 1253.6320090293884
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
+ "dispatchLogicalBytes": 309750784,
+ "combineLogicalBytes": 619501568,
+ "fanoutMean": 5.2750244140625,
+ "recvTokensMax": 5469,
"stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 94.27200257778168,
- "p90": 100.80000013113022,
- "p95": 102.62399911880493,
- "p99": 107.80800133943558
+ "p50": 1175.0400066375732,
+ "p90": 1188.4160041809082,
+ "p95": 1192.1919584274292,
+ "p99": 1211.583971977234
},
"combine": {
- "p50": 78.68800312280655,
- "p90": 83.13599973917007,
- "p95": 84.25600081682205,
- "p99": 86.65599673986435
+ "p50": 856.8000197410583,
+ "p90": 863.5839819908142,
+ "p95": 866.3039803504944,
+ "p99": 876.416027545929
},
"roundtrip": {
- "p50": 151.39199793338776,
- "p90": 157.79200196266174,
- "p95": 160.25599837303162,
- "p99": 164.95999693870544
+ "p50": 1998.2080459594727,
+ "p90": 2011.5840435028076,
+ "p95": 2016.767978668213,
+ "p99": 2027.26411819458
},
"isolatedSum": {
- "p50": 172.96000570058823,
- "p90": 183.9359998703003,
- "p95": 186.87999993562698,
- "p99": 194.46399807929993
+ "p50": 2031.8400263786316,
+ "p90": 2051.9999861717224,
+ "p95": 2058.4959387779236,
+ "p99": 2087.999999523163
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 619687936,
+ "combineLogicalBytes": 1239375872,
+ "fanoutMean": 5.276611328125,
+ "recvTokensMax": 10883,
+ "stragglerRank": 7,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 16,
- "globalTokens": 128,
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
"dispatch": {
- "p50": 96.16000205278397,
- "p90": 100.92800110578537,
- "p95": 103.71199995279312,
- "p99": 108.06400328874588
+ "p50": 2221.760034561157,
+ "p90": 2233.1199645996094,
+ "p95": 2237.823963165283,
+ "p99": 2246.335983276367
},
"combine": {
- "p50": 81.85599744319916,
- "p90": 87.26400136947632,
- "p95": 88.8959988951683,
- "p99": 90.04800021648407
+ "p50": 1596.6399908065796,
+ "p90": 1603.6479473114014,
+ "p95": 1605.2160263061523,
+ "p99": 1610.4960441589355
},
"roundtrip": {
- "p50": 153.6639928817749,
- "p90": 160.35200655460358,
- "p95": 161.95200383663177,
- "p99": 165.3439998626709
+ "p50": 3788.2559299468994,
+ "p90": 3799.2959022521973,
+ "p95": 3803.936004638672,
+ "p99": 3814.0480518341064
},
"isolatedSum": {
- "p50": 178.01599949598312,
- "p90": 188.1920024752617,
- "p95": 192.60799884796143,
- "p99": 198.11200350522995
+ "p50": 3818.400025367737,
+ "p90": 3836.7679119110107,
+ "p95": 3843.0399894714355,
+ "p99": 3856.8320274353027
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
+ "dispatchLogicalBytes": 1239834624,
+ "combineLogicalBytes": 2479669248,
+ "fanoutMean": 5.278564453125,
+ "recvTokensMax": 21730,
"stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
@@ -38227,28 +38310,28 @@
]
},
{
- "id": "cx-5a3d925c",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|small-amplitude|none|none|0|tuned||8c8497a77d9085d",
- "colorKey": "h100_42947950",
- "comparisonKey": "da8c4fcc63f5bf6e",
+ "id": "cx-b8ab0990",
+ "identity": "gb300|deepep|v1|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901",
+ "colorKey": "gb300_b1b733fb",
+ "comparisonKey": "5ba58d24d34449fd",
"schemaVersion": 3,
- "generatedAt": "2026-06-27T00:05:07.028525+00:00",
+ "generatedAt": "2026-06-29T13:52:40.434876+00:00",
"status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_18",
- "sku": "h100",
+ "publicationStatus": "comparable-experimental",
+ "runner": "gb300-8x",
+ "sku": "gb300",
"backend": "deepep",
- "phase": "decode",
+ "phase": "prefill",
"mode": "normal",
"resourceMode": "tuned",
"suite": "backend-default",
"comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
+ "measurementContract": "cached-layout-comm-only-v1",
+ "topologyClass": "gb300-nvl72-mnnvl",
+ "transport": "mnnvl",
"worldSize": 8,
"epSize": 8,
- "label": "H100 EP8 · deepep · bf16",
+ "label": "GB300 EP8 · deepep · fp8 [cl]",
"model": "DeepSeek-V3/V4",
"shape": {
"hidden": 7168,
@@ -38259,15 +38342,16 @@
"routingStep": 0,
"unevenTokens": "none",
"eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "small-amplitude",
+ "dispatchDtype": "fp8",
+ "kernelGeneration": "v1",
+ "activationProfile": "normal",
"combineQuantMode": "none"
},
"resourceProfile": {
"requestedFraction": null,
- "achievedFraction": 0.1515,
+ "achievedFraction": 0.1316,
"configuredUnits": 20,
- "deviceUnits": 132,
+ "deviceUnits": 152,
"resourceClass": "backend-tuned",
"conformanceClass": "backend-default",
"fixedKernel": false,
@@ -38275,91990 +38359,2496 @@
},
"placement": {
"kind": "packed",
- "nodes": 1,
+ "nodes": 2,
"gpusPerNode": 8,
"scaleUpDomain": 8
},
"routingConsistent": true,
- "traceSignature": "8c8497a77d9085d",
- "workloadId": "set:4:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
+ "traceSignature": "b9896b0a7ca9901",
+ "workloadId": null,
+ "workloadSource": "seeded-runtime",
"eplbImbalanceBefore": null,
"eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
+ "backendVersion": "1.1.0+814e508",
"imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
"repository": "SemiAnalysisAI/InferenceX",
"run": {
- "id": "28272117855",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272117855",
- "createdAt": "2026-06-27T00:05:07.028525+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
},
"rows": [
{
- "tokensPerRank": 1,
- "globalTokens": 8,
+ "tokensPerRank": 128,
+ "globalTokens": 1024,
"dispatch": {
- "p50": 97.79199957847595,
- "p90": 105.02400249242783,
- "p95": 107.29599744081497,
- "p99": 115.90400338172913
+ "p50": 99.0080013871193,
+ "p90": 138.08000087738037,
+ "p95": 148.3519971370697,
+ "p99": 159.19999778270721
},
"combine": {
- "p50": 79.77599650621414,
- "p90": 82.11199939250946,
- "p95": 86.91199868917465,
- "p99": 88.79999816417694
+ "p50": 121.76000326871872,
+ "p90": 147.67999947071075,
+ "p95": 155.5519998073578,
+ "p99": 182.0800006389618
},
"roundtrip": {
- "p50": 152.44799852371216,
- "p90": 158.59200060367584,
- "p95": 160.44799983501434,
- "p99": 165.40800034999847
+ "p50": 265.6640112400055,
+ "p90": 291.77600145339966,
+ "p95": 302.5279939174652,
+ "p99": 338.49599957466125
},
"isolatedSum": {
- "p50": 177.5679960846901,
- "p90": 187.1360018849373,
- "p95": 194.20799612998962,
- "p99": 204.70400154590607
+ "p50": 220.768004655838,
+ "p90": 285.7600003480911,
+ "p95": 303.9039969444275,
+ "p99": 341.279998421669
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 38836224,
+ "combineLogicalBytes": 77672448,
+ "fanoutMean": 5.291015625,
+ "recvTokensMax": 723,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 256,
+ "globalTokens": 2048,
"dispatch": {
- "p50": 98.01600128412247,
- "p90": 103.87200117111206,
- "p95": 106.01600259542465,
- "p99": 113.11999708414078
+ "p50": 130.048006772995,
+ "p90": 154.33600544929504,
+ "p95": 175.7120043039322,
+ "p99": 193.7279999256134
},
"combine": {
- "p50": 81.02399855852127,
- "p90": 87.71199733018875,
- "p95": 87.96799927949905,
- "p99": 89.50400352478027
+ "p50": 162.9440039396286,
+ "p90": 187.74400651454926,
+ "p95": 203.8400024175644,
+ "p99": 224.73600506782532
},
"roundtrip": {
- "p50": 155.16799688339233,
- "p90": 160.38399934768677,
- "p95": 162.23999857902527,
- "p99": 166.87999665737152
+ "p50": 344.1919982433319,
+ "p90": 379.5199990272522,
+ "p95": 392.8000032901764,
+ "p99": 424.54400658607483
},
"isolatedSum": {
- "p50": 179.03999984264374,
- "p90": 191.5839985013008,
- "p95": 193.9840018749237,
- "p99": 202.62400060892105
+ "p50": 292.9920107126236,
+ "p90": 342.0800119638443,
+ "p95": 379.5520067214966,
+ "p99": 418.4640049934387
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 4,
+ "dispatchLogicalBytes": 77944832,
+ "combineLogicalBytes": 155889664,
+ "fanoutMean": 5.3095703125,
+ "recvTokensMax": 1422,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 512,
+ "globalTokens": 4096,
"dispatch": {
- "p50": 101.98400169610977,
- "p90": 106.6880002617836,
- "p95": 109.95200276374817,
- "p99": 120.35199999809265
+ "p50": 183.45600366592407,
+ "p90": 190.08000195026398,
+ "p95": 192.57600605487823,
+ "p99": 198.71999323368073
},
"combine": {
- "p50": 88.22400122880936,
- "p90": 95.0080007314682,
- "p95": 95.93600034713745,
- "p99": 96.83199971914291
+ "p50": 282.24000334739685,
+ "p90": 289.4720137119293,
+ "p95": 291.1680042743683,
+ "p99": 295.74400186538696
},
"roundtrip": {
- "p50": 162.75200247764587,
- "p90": 169.63200271129608,
- "p95": 171.58399522304535,
- "p99": 176.28799378871918
+ "p50": 569.3759918212891,
+ "p90": 578.0799984931946,
+ "p95": 581.6640257835388,
+ "p99": 587.3600244522095
},
"isolatedSum": {
- "p50": 190.20800292491913,
- "p90": 201.6960009932518,
- "p95": 205.88800311088562,
- "p99": 217.18399971723557
+ "p50": 465.6960070133209,
+ "p90": 479.5520156621933,
+ "p95": 483.7440103292465,
+ "p99": 494.4639950990677
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 129.66400384902954,
- "p90": 137.08800077438354,
- "p95": 139.0399932861328,
- "p99": 142.752006649971
- },
- "combine": {
- "p50": 115.00799655914307,
- "p90": 120.7680031657219,
- "p95": 121.31199985742569,
- "p99": 127.83999741077423
- },
- "roundtrip": {
- "p50": 212.89600431919098,
- "p90": 218.72000396251678,
- "p95": 219.9680060148239,
- "p99": 224.06400740146637
- },
- "isolatedSum": {
- "p50": 244.6720004081726,
- "p90": 257.85600394010544,
- "p95": 260.3519931435585,
- "p99": 270.59200406074524
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-49497b06",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|wide-dynamic-range|none|none|0|tuned||8c8497a77d9085d",
- "colorKey": "h100_42947950",
- "comparisonKey": "5ec10556693a8c2b",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:05:08.113815+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_05",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "wide-dynamic-range",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "8c8497a77d9085d",
- "workloadId": "set:4:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272121618",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272121618",
- "createdAt": "2026-06-27T00:05:08.113815+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 98.24000298976898,
- "p90": 105.69600015878677,
- "p95": 108.12799632549286,
- "p99": 113.37599903345108
- },
- "combine": {
- "p50": 79.68000322580338,
- "p90": 82.07999914884567,
- "p95": 82.97599852085114,
- "p99": 87.61599659919739
- },
- "roundtrip": {
- "p50": 146.464005112648,
- "p90": 152.8320014476776,
- "p95": 154.59200739860535,
- "p99": 158.84800255298615
- },
- "isolatedSum": {
- "p50": 177.92000621557236,
- "p90": 187.77599930763245,
- "p95": 191.103994846344,
- "p99": 200.99199563264847
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 156133376,
+ "combineLogicalBytes": 312266752,
+ "fanoutMean": 5.31787109375,
+ "recvTokensMax": 2779,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 8,
- "globalTokens": 64,
+ "tokensPerRank": 1024,
+ "globalTokens": 8192,
"dispatch": {
- "p50": 100.0640019774437,
- "p90": 107.32799768447876,
- "p95": 110.27199774980545,
- "p99": 160.92799603939056
+ "p50": 297.0240116119385,
+ "p90": 310.62400341033936,
+ "p95": 319.93600726127625,
+ "p99": 348.4160006046295
},
"combine": {
- "p50": 81.34400099515915,
- "p90": 87.16800063848495,
- "p95": 87.87199854850769,
- "p99": 90.27200192213058
+ "p50": 483.71198773384094,
+ "p90": 502.1439790725708,
+ "p95": 511.1680030822754,
+ "p99": 532.4159860610962
},
"roundtrip": {
- "p50": 152.92799472808838,
- "p90": 160.51200032234192,
- "p95": 162.30399906635284,
- "p99": 166.24000668525696
+ "p50": 999.2319941520691,
+ "p90": 1013.8880014419556,
+ "p95": 1025.8560180664062,
+ "p99": 1047.5200414657593
},
"isolatedSum": {
- "p50": 181.40800297260284,
- "p90": 194.49599832296371,
- "p95": 198.14399629831314,
- "p99": 251.19999796152115
+ "p50": 780.7359993457794,
+ "p90": 812.7679824829102,
+ "p95": 831.1040103435516,
+ "p99": 880.8319866657257
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 311721984,
+ "combineLogicalBytes": 623443968,
+ "fanoutMean": 5.30859375,
+ "recvTokensMax": 5505,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 32,
- "globalTokens": 256,
+ "tokensPerRank": 2048,
+ "globalTokens": 16384,
"dispatch": {
- "p50": 103.39199751615524,
- "p90": 108.2879975438118,
- "p95": 110.6560006737709,
- "p99": 119.03999745845795
+ "p50": 531.9679975509644,
+ "p90": 557.9839944839478,
+ "p95": 576.2240290641785,
+ "p99": 596.9280004501343
},
"combine": {
- "p50": 89.75999802350998,
- "p90": 95.20000219345093,
- "p95": 95.93600034713745,
- "p99": 98.68799895048141
+ "p50": 868.3519959449768,
+ "p90": 883.8719725608826,
+ "p95": 894.208014011383,
+ "p99": 924.0959882736206
},
"roundtrip": {
- "p50": 161.6320013999939,
- "p90": 169.08800601959229,
- "p95": 170.68800330162048,
- "p99": 175.64800381660461
+ "p50": 1833.9840173721313,
+ "p90": 1859.8719835281372,
+ "p95": 1879.6800374984741,
+ "p99": 1900.704026222229
},
"isolatedSum": {
- "p50": 193.15199553966522,
- "p90": 203.48799973726273,
- "p95": 206.59200102090836,
- "p99": 217.72799640893936
+ "p50": 1400.3199934959412,
+ "p90": 1441.8559670448303,
+ "p95": 1470.4320430755615,
+ "p99": 1521.0239887237549
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 621902848,
+ "combineLogicalBytes": 1243805696,
+ "fanoutMean": 5.29547119140625,
+ "recvTokensMax": 10952,
+ "stragglerRank": 1,
"correct": true,
"samplesPooled": 600,
"trials": 3
},
{
- "tokensPerRank": 128,
- "globalTokens": 1024,
+ "tokensPerRank": 4096,
+ "globalTokens": 32768,
"dispatch": {
- "p50": 130.46400249004364,
- "p90": 136.9280070066452,
- "p95": 139.23199474811554,
- "p99": 143.5839980840683
+ "p50": 992.031991481781,
+ "p90": 1004.863977432251,
+ "p95": 1008.7360143661499,
+ "p99": 1018.0480480194092
},
"combine": {
- "p50": 114.78400230407715,
- "p90": 120.83200365304947,
- "p95": 122.11199849843979,
- "p99": 122.8799968957901
+ "p50": 1615.7439947128296,
+ "p90": 1622.4639415740967,
+ "p95": 1625.1519918441772,
+ "p99": 1631.935954093933
},
"roundtrip": {
- "p50": 211.71200275421143,
- "p90": 219.35999393463135,
- "p95": 221.91999852657318,
- "p99": 235.00800132751465
+ "p50": 3490.72003364563,
+ "p90": 3504.6401023864746,
+ "p95": 3512.00008392334,
+ "p99": 3539.936065673828
},
"isolatedSum": {
- "p50": 245.2480047941208,
- "p90": 257.7600106596947,
- "p95": 261.3439932465553,
- "p99": 266.4639949798584
+ "p50": 2607.7759861946106,
+ "p90": 2627.3279190063477,
+ "p95": 2633.888006210327,
+ "p99": 2649.9840021133423
},
"roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 5,
+ "dispatchLogicalBytes": 1243504640,
+ "combineLogicalBytes": 2487009280,
+ "fanoutMean": 5.294189453125,
+ "recvTokensMax": 21781,
+ "stragglerRank": 0,
"correct": true,
"samplesPooled": 600,
"trials": 3
}
]
+ }
+ ],
+ "failures": [
+ {
+ "id": "cxf-152e9bea",
+ "identity": "gb200|deepep|v1|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510",
+ "generatedAt": "2026-06-29T13:56:37.073274+00:00",
+ "publicationStatus": "diagnostic",
+ "status": "valid",
+ "sku": "gb200",
+ "backend": "deepep",
+ "phase": "decode",
+ "config": "fp8/ll/layout-and-dispatch",
+ "reason": "anomaly:roundtrip_gt_isolated_sum",
+ "returnCode": null,
+ "run": {
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
+ }
},
{
- "id": "cx-3b04d344",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|zeros|none|none|0|tuned||8c8497a77d9085d",
- "colorKey": "h100_42947950",
- "comparisonKey": "8bd0272e65400ebd",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:05:11.747577+00:00",
+ "id": "cxf-ef4bad88",
+ "identity": "gb200|deepep|v1|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510",
+ "generatedAt": "2026-06-29T13:57:26.133416+00:00",
+ "publicationStatus": "diagnostic",
"status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_00",
- "sku": "h100",
+ "sku": "gb200",
"backend": "deepep",
"phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "zeros",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "8c8497a77d9085d",
- "workloadId": "set:4:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
+ "config": "fp8/ll/runtime-visible",
+ "reason": "anomaly:roundtrip_gt_isolated_sum",
+ "returnCode": null,
"run": {
- "id": "28272113941",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272113941",
- "createdAt": "2026-06-27T00:05:11.747577+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 97.15200215578079,
- "p90": 103.67999970912933,
- "p95": 105.85600137710571,
- "p99": 108.99200290441513
- },
- "combine": {
- "p50": 79.64800298213959,
- "p90": 82.33600109815598,
- "p95": 86.84799820184708,
- "p99": 87.96799927949905
- },
- "roundtrip": {
- "p50": 151.8400013446808,
- "p90": 158.01599621772766,
- "p95": 160.76800227165222,
- "p99": 165.3120070695877
- },
- "isolatedSum": {
- "p50": 176.80000513792038,
- "p90": 186.0160008072853,
- "p95": 192.7039995789528,
- "p99": 196.96000218391418
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 97.28000313043594,
- "p90": 103.71199995279312,
- "p95": 106.4319983124733,
- "p99": 121.63200229406357
- },
- "combine": {
- "p50": 79.93599772453308,
- "p90": 87.39200234413147,
- "p95": 87.93599903583527,
- "p99": 90.04800021648407
- },
- "roundtrip": {
- "p50": 153.72799336910248,
- "p90": 159.55199301242828,
- "p95": 160.7999950647354,
- "p99": 165.6000018119812
- },
- "isolatedSum": {
- "p50": 177.21600085496902,
- "p90": 191.1040022969246,
- "p95": 194.36799734830856,
- "p99": 211.68000251054764
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 104.00000214576721,
- "p90": 108.44799876213074,
- "p95": 111.68000102043152,
- "p99": 126.75200402736664
- },
- "combine": {
- "p50": 87.99999952316284,
- "p90": 93.44000369310379,
- "p95": 95.87199985980988,
- "p99": 97.59999811649323
- },
- "roundtrip": {
- "p50": 161.8880033493042,
- "p90": 168.64000260829926,
- "p95": 170.0800061225891,
- "p99": 175.99999904632568
- },
- "isolatedSum": {
- "p50": 192.00000166893005,
- "p90": 201.88800245523453,
- "p95": 207.5520008802414,
- "p99": 224.35200214385986
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 130.0159990787506,
- "p90": 137.7280056476593,
- "p95": 138.7840062379837,
- "p99": 142.2719955444336
- },
- "combine": {
- "p50": 115.167997777462,
- "p90": 120.54400146007538,
- "p95": 120.95999717712402,
- "p99": 123.87199699878693
- },
- "roundtrip": {
- "p50": 212.47999370098114,
- "p90": 216.63999557495117,
- "p95": 218.1439995765686,
- "p99": 221.47199511528015
- },
- "isolatedSum": {
- "p50": 245.18399685621262,
- "p90": 258.2720071077347,
- "p95": 259.7440034151077,
- "p99": 266.1439925432205
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
+ "id": "28374335449",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374335449",
+ "createdAt": "2026-06-29T13:08:20Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
+ }
},
{
- "id": "cx-d0428a76",
- "identity": "h100|deepep|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h100_ff7906f8",
- "comparisonKey": "e3488cf5058170e6",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:47:28.813270+00:00",
+ "id": "cxf-70ad6a68",
+ "identity": "gb300|deepep|v1|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510",
+ "generatedAt": "2026-06-29T13:53:46.301476+00:00",
+ "publicationStatus": "diagnostic",
"status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_07",
- "sku": "h100",
+ "sku": "gb300",
"backend": "deepep",
"phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
+ "config": "fp8/ll/layout-and-dispatch",
+ "reason": "anomaly:roundtrip_gt_isolated_sum",
+ "returnCode": null,
+ "run": {
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
+ }
+ },
+ {
+ "id": "cxf-6ecc9670",
+ "identity": "gb300|deepep|v1|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510",
+ "generatedAt": "2026-06-29T13:55:19.539361+00:00",
+ "publicationStatus": "diagnostic",
+ "status": "valid",
+ "sku": "gb300",
+ "backend": "deepep",
+ "phase": "decode",
+ "config": "fp8/ll/runtime-visible",
+ "reason": "anomaly:roundtrip_gt_isolated_sum",
+ "returnCode": null,
+ "run": {
+ "id": "28374342409",
+ "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28374342409",
+ "createdAt": "2026-06-29T13:08:27Z",
+ "sha": "38890f652c38d280794346b6b5b951b9b380a24a"
+ }
+ }
+ ],
+ "summaryCards": [
+ {
+ "title": "Best backend · decode EP8",
+ "value": "no data",
+ "sub": "no official headline cell at this phase/EP"
+ },
+ {
+ "title": "Best backend · prefill EP8",
+ "value": "no data",
+ "sub": "no official headline cell at this phase/EP"
+ },
+ {
+ "title": "LL -> normal crossover",
+ "value": "T~128 tok/rank",
+ "sub": "GB200 EP8 bf16 · normal RT p50 wins above this"
+ },
+ {
+ "title": "Resource-normalized winner",
+ "value": "no data",
+ "sub": "no official headline cell at this phase/EP"
+ },
+ {
+ "title": "Backend-default winner",
+ "value": "no data",
+ "sub": "no official headline cell at this phase/EP"
+ },
+ {
+ "title": "Most unstable config",
+ "value": "GB200 · deepep prefill",
+ "sub": "2.89x p99 under zipf vs uniform",
+ "warning": true
+ },
+ {
+ "title": "Invalid / diagnostic cases",
+ "value": "4",
+ "sub": "see Evidence failed table",
+ "warning": true,
+ "href": "#tab-evidence"
+ }
+ ],
+ "decision": {
+ "budgetsUs": [100, 250, 500],
+ "maxTokensUnderBudget": [],
+ "recommendations": [
+ {
+ "id": "cxr-0f8e2a04",
+ "sku": "gb200",
+ "phase": "decode",
+ "atTokensPerRank": 64,
+ "lowestP99DispatchUs": 100.3,
+ "config": "fp8/normal/cached-layout-comm-only-v1/uniform/tuned",
+ "epSize": 8
},
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
+ {
+ "id": "cxr-3109b82a",
+ "sku": "gb200",
+ "phase": "prefill",
+ "atTokensPerRank": 256,
+ "lowestP99DispatchUs": 137.2,
+ "config": "bf16/normal/layout-and-dispatch-v1/balanced+eplb/tuned",
+ "epSize": 8
},
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
+ {
+ "id": "cxr-1445ce8d",
+ "sku": "gb300",
+ "phase": "decode",
+ "atTokensPerRank": 64,
+ "lowestP99DispatchUs": 125.1,
+ "config": "bf16/normal/layout-and-dispatch-v1/zipf-moderate/tuned",
+ "epSize": 8
},
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271559607",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271559607",
- "createdAt": "2026-06-26T23:47:28.813270+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 96.79999947547913,
- "p90": 103.39199751615524,
- "p95": 104.80000078678131,
- "p99": 109.43999886512756
- },
- "combine": {
- "p50": 79.13599908351898,
- "p90": 81.40800148248672,
- "p95": 86.68799698352814,
- "p99": 87.90399879217148
- },
- "roundtrip": {
- "p50": 152.12799608707428,
- "p90": 159.96800363063812,
- "p95": 162.36799955368042,
- "p99": 177.69600450992584
- },
- "isolatedSum": {
- "p50": 175.9359985589981,
- "p90": 184.79999899864197,
- "p95": 191.48799777030945,
- "p99": 197.34399765729904
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 71.23199850320816,
- "p90": 101.98400169610977,
- "p95": 103.84000092744827,
- "p99": 108.35199803113937
- },
- "combine": {
- "p50": 72.54400104284286,
- "p90": 81.40800148248672,
- "p95": 82.62400329113007,
- "p99": 87.77599781751633
- },
- "roundtrip": {
- "p50": 129.08799946308136,
- "p90": 158.2079976797104,
- "p95": 159.58400070667267,
- "p99": 165.02399742603302
- },
- "isolatedSum": {
- "p50": 143.77599954605103,
- "p90": 183.3920031785965,
- "p95": 186.46400421857834,
- "p99": 196.1279958486557
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 70.52800059318542,
- "p90": 99.84000027179718,
- "p95": 105.72800040245056,
- "p99": 115.07199704647064
- },
- "combine": {
- "p50": 72.9919970035553,
- "p90": 80.99199831485748,
- "p95": 86.94399893283844,
- "p99": 103.55199873447418
- },
- "roundtrip": {
- "p50": 129.43999469280243,
- "p90": 156.19200468063354,
- "p95": 159.07199680805206,
- "p99": 162.56000101566315
- },
- "isolatedSum": {
- "p50": 143.51999759674072,
- "p90": 180.83199858665466,
- "p95": 192.671999335289,
- "p99": 218.62399578094482
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 96.54399752616882,
- "p90": 101.3759970664978,
- "p95": 103.61599922180176,
- "p99": 111.26399785280228
- },
- "combine": {
- "p50": 79.52000200748444,
- "p90": 87.13600039482117,
- "p95": 87.64799684286118,
- "p99": 88.73599767684937
- },
- "roundtrip": {
- "p50": 152.16000378131866,
- "p90": 159.39199924468994,
- "p95": 161.15200519561768,
- "p99": 170.52799463272095
- },
- "isolatedSum": {
- "p50": 176.06399953365326,
- "p90": 188.51199746131897,
- "p95": 191.26399606466293,
- "p99": 199.99999552965164
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 96.3520035147667,
- "p90": 101.75999999046326,
- "p95": 104.89600151777267,
- "p99": 110.11199653148651
- },
- "combine": {
- "p50": 84.48000252246857,
- "p90": 88.03199976682663,
- "p95": 89.21600133180618,
- "p99": 95.23200243711472
- },
- "roundtrip": {
- "p50": 153.05599570274353,
- "p90": 160.288006067276,
- "p95": 162.432000041008,
- "p99": 171.2000072002411
- },
- "isolatedSum": {
- "p50": 180.83200603723526,
- "p90": 189.7919997572899,
- "p95": 194.11200284957886,
- "p99": 205.34399896860123
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 89.9519994854927,
- "p90": 104.35199737548828,
- "p95": 106.65600001811981,
- "p99": 117.85600334405899
- },
- "combine": {
- "p50": 81.216000020504,
- "p90": 92.19200164079666,
- "p95": 95.39200365543365,
- "p99": 96.0640013217926
- },
- "roundtrip": {
- "p50": 141.05600118637085,
- "p90": 168.2880073785782,
- "p95": 169.5680022239685,
- "p99": 174.40000176429749
- },
- "isolatedSum": {
- "p50": 171.1679995059967,
- "p90": 196.54399901628494,
- "p95": 202.04800367355347,
- "p99": 213.9200046658516
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 111.51999980211258,
- "p90": 119.00799721479416,
- "p95": 121.44000083208084,
- "p99": 126.56000256538391
- },
- "combine": {
- "p50": 95.0080007314682,
- "p90": 103.04000228643417,
- "p95": 103.35999727249146,
- "p99": 104.92800176143646
- },
- "roundtrip": {
- "p50": 164.63999450206757,
- "p90": 182.3039948940277,
- "p95": 185.12000143527985,
- "p99": 188.7039989233017
- },
- "isolatedSum": {
- "p50": 206.52800053358078,
- "p90": 222.04799950122833,
- "p95": 224.7999981045723,
- "p99": 231.48800432682037
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 116.99199676513672,
- "p90": 133.2480013370514,
- "p95": 135.51999628543854,
- "p99": 140.6400054693222
- },
- "combine": {
- "p50": 106.88000172376633,
- "p90": 119.55200135707855,
- "p95": 120.2239990234375,
- "p99": 127.55200266838074
- },
- "roundtrip": {
- "p50": 199.3280053138733,
- "p90": 215.45599400997162,
- "p95": 217.56799519062042,
- "p99": 258.91199707984924
- },
- "isolatedSum": {
- "p50": 223.87199848890305,
- "p90": 252.80000269412994,
- "p95": 255.74399530887604,
- "p99": 268.19200813770294
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-79a82113",
- "identity": "h100|deepep|7168|8|384|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760",
- "colorKey": "h100_42947950",
- "comparisonKey": "d4720c9e1313f28d",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T11:13:34.351891+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_05",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16",
- "model": "Kimi-K2",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 384,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "d6c49ae98878760",
- "workloadId": "set:8:9a27d0df4b17fa09",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28287499275",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287499275",
- "createdAt": "2026-06-27T11:13:34.351891+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 96.09600156545639,
- "p90": 104.25599664449692,
- "p95": 107.744000852108,
- "p99": 122.81599640846252
- },
- "combine": {
- "p50": 78.75200361013412,
- "p90": 81.24800026416779,
- "p95": 81.85599744319916,
- "p99": 87.3280018568039
- },
- "roundtrip": {
- "p50": 149.3760049343109,
- "p90": 157.72800147533417,
- "p95": 160.863995552063,
- "p99": 184.7359985113144
- },
- "isolatedSum": {
- "p50": 174.84800517559052,
- "p90": 185.5039969086647,
- "p95": 189.59999829530716,
- "p99": 210.14399826526642
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 602112,
- "combineLogicalBytes": 602112,
- "fanoutMean": 5.25,
- "recvTokensMax": 8,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 73.82400333881378,
- "p90": 103.20000350475311,
- "p95": 108.19199681282043,
- "p99": 178.3359944820404
- },
- "combine": {
- "p50": 72.83200323581696,
- "p90": 80.9599980711937,
- "p95": 82.14399963617325,
- "p99": 90.4960036277771
- },
- "roundtrip": {
- "p50": 129.02399897575378,
- "p90": 156.76799416542053,
- "p95": 159.39199924468994,
- "p99": 176.64000391960144
- },
- "isolatedSum": {
- "p50": 146.65600657463074,
- "p90": 184.1600015759468,
- "p95": 190.33599644899368,
- "p99": 268.8319981098175
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1218560,
- "combineLogicalBytes": 1218560,
- "fanoutMean": 5.3125,
- "recvTokensMax": 14,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 72.95999675989151,
- "p90": 101.95200145244598,
- "p95": 110.1439967751503,
- "p99": 251.10399723052979
- },
- "combine": {
- "p50": 72.67200201749802,
- "p90": 81.50400221347809,
- "p95": 82.43200182914734,
- "p99": 87.42400258779526
- },
- "roundtrip": {
- "p50": 129.02399897575378,
- "p90": 155.32800555229187,
- "p95": 159.61599349975586,
- "p99": 171.87200486660004
- },
- "isolatedSum": {
- "p50": 145.63199877738953,
- "p90": 183.45600366592407,
- "p95": 192.57599860429764,
- "p99": 338.52799981832504
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2408448,
- "combineLogicalBytes": 2408448,
- "fanoutMean": 5.25,
- "recvTokensMax": 26,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 95.13600170612335,
- "p90": 103.2319962978363,
- "p95": 106.46399855613708,
- "p99": 127.93600559234619
- },
- "combine": {
- "p50": 78.65600287914276,
- "p90": 81.727996468544,
- "p95": 86.496002972126,
- "p99": 88.16000074148178
- },
- "roundtrip": {
- "p50": 150.751993060112,
- "p90": 161.50400042533875,
- "p95": 208.41600000858307,
- "p99": 230.20799458026886
- },
- "isolatedSum": {
- "p50": 173.7920045852661,
- "p90": 184.9599927663803,
- "p95": 192.9600015282631,
- "p99": 216.09600633382797
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4831232,
- "combineLogicalBytes": 4831232,
- "fanoutMean": 5.265625,
- "recvTokensMax": 48,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 96.19200229644775,
- "p90": 101.50399804115295,
- "p95": 103.96800190210342,
- "p99": 112.57600039243698
- },
- "combine": {
- "p50": 81.91999793052673,
- "p90": 88.19200098514557,
- "p95": 89.1840010881424,
- "p99": 90.40000289678574
- },
- "roundtrip": {
- "p50": 151.0400027036667,
- "p90": 159.4880074262619,
- "p95": 161.76000237464905,
- "p99": 199.77599382400513
- },
- "isolatedSum": {
- "p50": 178.1120002269745,
- "p90": 189.69599902629852,
- "p95": 193.15200299024582,
- "p99": 202.97600328922272
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9848832,
- "combineLogicalBytes": 9848832,
- "fanoutMean": 5.3671875,
- "recvTokensMax": 91,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 90.01599997282028,
- "p90": 104.22399640083313,
- "p95": 106.39999806880951,
- "p99": 117.47200042009354
- },
- "combine": {
- "p50": 81.24800026416779,
- "p90": 90.36800265312195,
- "p95": 94.59199756383896,
- "p99": 96.00000083446503
- },
- "roundtrip": {
- "p50": 142.81600713729858,
- "p90": 168.60799491405487,
- "p95": 176.06399953365326,
- "p99": 256.8640112876892
- },
- "isolatedSum": {
- "p50": 171.26400023698807,
- "p90": 194.59199905395508,
- "p95": 200.99199563264847,
- "p99": 213.47200125455856
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19496960,
- "combineLogicalBytes": 19496960,
- "fanoutMean": 5.3125,
- "recvTokensMax": 178,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 109.40799862146378,
- "p90": 139.26400244235992,
- "p95": 141.7279988527298,
- "p99": 146.84799313545227
- },
- "combine": {
- "p50": 95.83999961614609,
- "p90": 112.57600039243698,
- "p95": 115.35999923944473,
- "p99": 119.77600306272507
- },
- "roundtrip": {
- "p50": 173.40800166130066,
- "p90": 206.68800175189972,
- "p95": 210.4959934949875,
- "p99": 213.3760005235672
- },
- "isolatedSum": {
- "p50": 205.24799823760986,
- "p90": 251.8400028347969,
- "p95": 257.08799809217453,
- "p99": 266.62399619817734
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 38836224,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 372,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 115.07199704647064,
- "p90": 131.96800649166107,
- "p95": 135.19999384880066,
- "p99": 139.13600146770477
- },
- "combine": {
- "p50": 106.4319983124733,
- "p90": 119.29599940776825,
- "p95": 120.06399780511856,
- "p99": 122.11199849843979
- },
- "roundtrip": {
- "p50": 202.5279998779297,
- "p90": 216.0319983959198,
- "p95": 217.66400337219238,
- "p99": 221.50400280952454
- },
- "isolatedSum": {
- "p50": 221.50399535894394,
- "p90": 251.26400589942932,
- "p95": 255.26399165391922,
- "p99": 261.24799996614456
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77514752,
- "combineLogicalBytes": 77514752,
- "fanoutMean": 5.2802734375,
- "recvTokensMax": 707,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-e96d722b",
- "identity": "h100|deepep|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760",
- "colorKey": "h100_ff7906f8",
- "comparisonKey": "c69daa1ab05193b6",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:51:56.132475+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_17",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16",
- "model": "Kimi-K2",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 384,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "d6c49ae98878760",
- "workloadId": "set:8:9a27d0df4b17fa09",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271667766",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271667766",
- "createdAt": "2026-06-26T23:51:56.132475+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 96.09600156545639,
- "p90": 102.81600058078766,
- "p95": 104.54399883747101,
- "p99": 110.59200018644333
- },
- "combine": {
- "p50": 79.03999835252762,
- "p90": 81.50400221347809,
- "p95": 82.11199939250946,
- "p99": 87.90399879217148
- },
- "roundtrip": {
- "p50": 145.56799829006195,
- "p90": 153.31199765205383,
- "p95": 155.71199357509613,
- "p99": 159.39199924468994
- },
- "isolatedSum": {
- "p50": 175.135999917984,
- "p90": 184.32000279426575,
- "p95": 186.65599822998047,
- "p99": 198.4959989786148
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 602112,
- "combineLogicalBytes": 602112,
- "fanoutMean": 5.25,
- "recvTokensMax": 8,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 71.03999704122543,
- "p90": 101.6319990158081,
- "p95": 102.65599936246872,
- "p99": 106.62399977445602
- },
- "combine": {
- "p50": 72.28799909353256,
- "p90": 80.54400235414505,
- "p95": 81.40800148248672,
- "p99": 87.00799942016602
- },
- "roundtrip": {
- "p50": 129.18399274349213,
- "p90": 152.70400047302246,
- "p95": 156.92800283432007,
- "p99": 160.76800227165222
- },
- "isolatedSum": {
- "p50": 143.327996134758,
- "p90": 182.17600136995316,
- "p95": 184.06400084495544,
- "p99": 193.63199919462204
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1218560,
- "combineLogicalBytes": 1218560,
- "fanoutMean": 5.3125,
- "recvTokensMax": 14,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 72.57600128650665,
- "p90": 101.02400183677673,
- "p95": 103.61599922180176,
- "p99": 110.81600189208984
- },
- "combine": {
- "p50": 72.25599884986877,
- "p90": 79.96799796819687,
- "p95": 86.71999722719193,
- "p99": 87.64799684286118
- },
- "roundtrip": {
- "p50": 129.92000579833984,
- "p90": 161.3759994506836,
- "p95": 162.30399906635284,
- "p99": 166.4319932460785
- },
- "isolatedSum": {
- "p50": 144.83200013637543,
- "p90": 180.9919998049736,
- "p95": 190.33599644899368,
- "p99": 198.46399873495102
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2408448,
- "combineLogicalBytes": 2408448,
- "fanoutMean": 5.25,
- "recvTokensMax": 26,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 96.3200032711029,
- "p90": 101.1200025677681,
- "p95": 102.52799838781357,
- "p99": 109.11999642848969
- },
- "combine": {
- "p50": 79.23199981451035,
- "p90": 82.11199939250946,
- "p95": 87.00799942016602,
- "p99": 87.71199733018875
- },
- "roundtrip": {
- "p50": 151.5199989080429,
- "p90": 159.2320054769516,
- "p95": 160.60799360275269,
- "p99": 165.21599888801575
- },
- "isolatedSum": {
- "p50": 175.55200308561325,
- "p90": 183.23200196027756,
- "p95": 189.53599780797958,
- "p99": 196.83199375867844
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4831232,
- "combineLogicalBytes": 4831232,
- "fanoutMean": 5.265625,
- "recvTokensMax": 48,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 96.22400254011154,
- "p90": 102.36799716949463,
- "p95": 105.05600273609161,
- "p99": 110.30399799346924
- },
- "combine": {
- "p50": 81.88799768686295,
- "p90": 88.28800171613693,
- "p95": 89.31200206279755,
- "p99": 94.43199634552002
- },
- "roundtrip": {
- "p50": 152.48000621795654,
- "p90": 160.09600460529327,
- "p95": 164.19200599193573,
- "p99": 172.83199727535248
- },
- "isolatedSum": {
- "p50": 178.1120002269745,
- "p90": 190.65599888563156,
- "p95": 194.36800479888916,
- "p99": 204.73599433898926
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9848832,
- "combineLogicalBytes": 9848832,
- "fanoutMean": 5.3671875,
- "recvTokensMax": 91,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 90.36800265312195,
- "p90": 102.59199887514114,
- "p95": 104.3199971318245,
- "p99": 108.03200304508209
- },
- "combine": {
- "p50": 80.92799782752991,
- "p90": 90.01599997282028,
- "p95": 95.13600170612335,
- "p99": 96.41599655151367
- },
- "roundtrip": {
- "p50": 142.46399700641632,
- "p90": 169.95200514793396,
- "p95": 174.55999553203583,
- "p99": 181.7920058965683
- },
- "isolatedSum": {
- "p50": 171.29600048065186,
- "p90": 192.60799884796143,
- "p95": 199.45599883794785,
- "p99": 204.44799959659576
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19496960,
- "combineLogicalBytes": 19496960,
- "fanoutMean": 5.3125,
- "recvTokensMax": 178,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 95.71199864149094,
- "p90": 116.54400080442429,
- "p95": 118.59200149774551,
- "p99": 125.63200294971466
- },
- "combine": {
- "p50": 89.72799777984619,
- "p90": 103.74400019645691,
- "p95": 104.22399640083313,
- "p99": 106.04800283908844
- },
- "roundtrip": {
- "p50": 165.66400229930878,
- "p90": 185.34399569034576,
- "p95": 186.97600066661835,
- "p99": 190.08000195026398
- },
- "isolatedSum": {
- "p50": 185.43999642133713,
- "p90": 220.2880010008812,
- "p95": 222.81599789857864,
- "p99": 231.6800057888031
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 38836224,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 372,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 113.11999708414078,
- "p90": 133.82400572299957,
- "p95": 137.05599308013916,
- "p99": 140.28799533843994
- },
- "combine": {
- "p50": 106.46399855613708,
- "p90": 120.12799829244614,
- "p95": 120.51200121641159,
- "p99": 120.99199742078781
- },
- "roundtrip": {
- "p50": 196.8960016965866,
- "p90": 216.99200570583344,
- "p95": 218.9120054244995,
- "p99": 220.99199891090393
- },
- "isolatedSum": {
- "p50": 219.58399564027786,
- "p90": 253.9520040154457,
- "p95": 257.56799429655075,
- "p99": 261.27999275922775
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77514752,
- "combineLogicalBytes": 77514752,
- "fanoutMean": 5.2802734375,
- "recvTokensMax": 707,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-62470199",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s1|8|decode|normal|none|none|1|tuned||f8662de0b3559f9",
- "colorKey": "h100_b681a3a4",
- "comparisonKey": "03a9af950bebf5a9",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:12:00.195927+00:00",
- "status": "valid",
- "publicationStatus": "comparable-experimental",
- "runner": "h100-dgxc-slurm_17",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · alternating-groups@s1",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "alternating-groups",
- "routingLabel": "alternating-groups@s1",
- "routingStep": 1,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "f8662de0b3559f9",
- "workloadId": null,
- "workloadSource": "seeded-runtime",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272331593",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272331593",
- "createdAt": "2026-06-27T00:12:00.195927+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 95.20000219345093,
- "p90": 101.24800354242325,
- "p95": 103.42399775981903,
- "p99": 115.84000289440155
- },
- "combine": {
- "p50": 79.29600030183792,
- "p90": 80.92799782752991,
- "p95": 81.79199695587158,
- "p99": 88.03199976682663
- },
- "roundtrip": {
- "p50": 148.03199470043182,
- "p90": 153.24799716472626,
- "p95": 156.41599893569946,
- "p99": 176.06399953365326
- },
- "isolatedSum": {
- "p50": 174.49600249528885,
- "p90": 182.17600136995316,
- "p95": 185.2159947156906,
- "p99": 203.87200266122818
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3297280,
- "combineLogicalBytes": 3297280,
- "fanoutMean": 3.59375,
- "recvTokensMax": 61,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 99.5199978351593,
- "p90": 107.39199817180634,
- "p95": 118.72000247240067,
- "p99": 229.95199263095856
- },
- "combine": {
- "p50": 87.52000331878662,
- "p90": 89.34400230646133,
- "p95": 92.3520028591156,
- "p99": 96.44799679517746
- },
- "roundtrip": {
- "p50": 155.5519998073578,
- "p90": 160.70400178432465,
- "p95": 164.76799547672272,
- "p99": 175.07199943065643
- },
- "isolatedSum": {
- "p50": 187.04000115394592,
- "p90": 196.73600047826767,
- "p95": 211.07200533151627,
- "p99": 326.399989426136
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 13275136,
- "combineLogicalBytes": 13275136,
- "fanoutMean": 3.6171875,
- "recvTokensMax": 236,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 133.82400572299957,
- "p90": 141.08799397945404,
- "p95": 142.62400567531586,
- "p99": 146.40000462532043
- },
- "combine": {
- "p50": 120.28799951076508,
- "p90": 122.56000190973282,
- "p95": 127.10399925708771,
- "p99": 136.00000739097595
- },
- "roundtrip": {
- "p50": 221.88800573349,
- "p90": 225.79200565814972,
- "p95": 227.26400196552277,
- "p99": 233.024001121521
- },
- "isolatedSum": {
- "p50": 254.11200523376465,
- "p90": 263.64799588918686,
- "p95": 269.72800493240356,
- "p99": 282.4000120162964
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 53172224,
- "combineLogicalBytes": 53172224,
- "fanoutMean": 3.6220703125,
- "recvTokensMax": 934,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-62dda1f3",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s2|8|decode|normal|none|none|2|tuned||3cd13eac5b27759",
- "colorKey": "h100_b981a85d",
- "comparisonKey": "03a9af950bebf5a9",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:12:08.462042+00:00",
- "status": "valid",
- "publicationStatus": "comparable-experimental",
- "runner": "h100-dgxc-slurm_04",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · alternating-groups@s2",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "alternating-groups",
- "routingLabel": "alternating-groups@s2",
- "routingStep": 2,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "3cd13eac5b27759",
- "workloadId": null,
- "workloadSource": "seeded-runtime",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272335347",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272335347",
- "createdAt": "2026-06-27T00:12:08.462042+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 91.96799993515015,
- "p90": 101.85600072145462,
- "p95": 102.88000106811523,
- "p99": 111.00800335407257
- },
- "combine": {
- "p50": 76.60800218582153,
- "p90": 81.60000294446945,
- "p95": 82.17599987983704,
- "p99": 85.21600067615509
- },
- "roundtrip": {
- "p50": 146.7839926481247,
- "p90": 152.6080071926117,
- "p95": 154.27200496196747,
- "p99": 160.99199652671814
- },
- "isolatedSum": {
- "p50": 168.57600212097168,
- "p90": 183.45600366592407,
- "p95": 185.05600094795227,
- "p99": 196.22400403022766
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3297280,
- "combineLogicalBytes": 3297280,
- "fanoutMean": 3.59375,
- "recvTokensMax": 61,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 98.88000041246414,
- "p90": 104.06400263309479,
- "p95": 106.30399733781815,
- "p99": 139.42399621009827
- },
- "combine": {
- "p50": 84.60800349712372,
- "p90": 86.30400151014328,
- "p95": 86.81599795818329,
- "p99": 92.51199662685394
- },
- "roundtrip": {
- "p50": 154.65599298477173,
- "p90": 160.64000129699707,
- "p95": 162.59199380874634,
- "p99": 168.09600591659546
- },
- "isolatedSum": {
- "p50": 183.48800390958786,
- "p90": 190.36800414323807,
- "p95": 193.11999529600143,
- "p99": 231.9359928369522
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 13275136,
- "combineLogicalBytes": 13275136,
- "fanoutMean": 3.6171875,
- "recvTokensMax": 236,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 141.50400459766388,
- "p90": 146.68799936771393,
- "p95": 147.67999947071075,
- "p99": 152.41600573062897
- },
- "combine": {
- "p50": 118.17599833011627,
- "p90": 122.56000190973282,
- "p95": 123.58400225639343,
- "p99": 125.82400441169739
- },
- "roundtrip": {
- "p50": 227.13600099086761,
- "p90": 231.23200237751007,
- "p95": 232.92799293994904,
- "p99": 237.05600202083588
- },
- "isolatedSum": {
- "p50": 259.68000292778015,
- "p90": 269.24800127744675,
- "p95": 271.2640017271042,
- "p99": 278.24001014232635
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 53172224,
- "combineLogicalBytes": 53172224,
- "fanoutMean": 3.6220703125,
- "recvTokensMax": 934,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-f337d9a1",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s3|8|decode|normal|none|none|3|tuned||f8662de0b3559f9",
- "colorKey": "h100_b881a6ca",
- "comparisonKey": "03a9af950bebf5a9",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:12:29.724404+00:00",
- "status": "valid",
- "publicationStatus": "comparable-experimental",
- "runner": "h100-dgxc-slurm_15",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · alternating-groups@s3",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "alternating-groups",
- "routingLabel": "alternating-groups@s3",
- "routingStep": 3,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "f8662de0b3559f9",
- "workloadId": null,
- "workloadSource": "seeded-runtime",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272338723",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272338723",
- "createdAt": "2026-06-27T00:12:29.724404+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 94.84799951314926,
- "p90": 121.37600034475327,
- "p95": 148.8959938287735,
- "p99": 189.56799805164337
- },
- "combine": {
- "p50": 79.58400249481201,
- "p90": 96.6079980134964,
- "p95": 113.0559965968132,
- "p99": 123.77600371837616
- },
- "roundtrip": {
- "p50": 148.44800531864166,
- "p90": 183.20000171661377,
- "p95": 218.78400444984436,
- "p99": 249.79199469089508
- },
- "isolatedSum": {
- "p50": 174.43200200796127,
- "p90": 217.98399835824966,
- "p95": 261.9519904255867,
- "p99": 313.34400177001953
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3297280,
- "combineLogicalBytes": 3297280,
- "fanoutMean": 3.59375,
- "recvTokensMax": 61,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 100.41599720716476,
- "p90": 127.55200266838074,
- "p95": 156.5759927034378,
- "p99": 182.81599879264832
- },
- "combine": {
- "p50": 87.8399983048439,
- "p90": 103.93600165843964,
- "p95": 120.38400024175644,
- "p99": 128.89599800109863
- },
- "roundtrip": {
- "p50": 156.99200332164764,
- "p90": 193.7599927186966,
- "p95": 223.7119972705841,
- "p99": 247.23200500011444
- },
- "isolatedSum": {
- "p50": 188.25599551200867,
- "p90": 231.48800432682037,
- "p95": 276.95999294519424,
- "p99": 311.71199679374695
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 13275136,
- "combineLogicalBytes": 13275136,
- "fanoutMean": 3.6171875,
- "recvTokensMax": 236,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 134.2719942331314,
- "p90": 147.39200472831726,
- "p95": 173.567995429039,
- "p99": 188.1919950246811
- },
- "combine": {
- "p50": 120.44800072908401,
- "p90": 138.62399756908417,
- "p95": 152.38399803638458,
- "p99": 160.96000373363495
- },
- "roundtrip": {
- "p50": 222.6880043745041,
- "p90": 247.80799448490143,
- "p95": 264.6079957485199,
- "p99": 279.35999631881714
- },
- "isolatedSum": {
- "p50": 254.71999496221542,
- "p90": 286.0160022974014,
- "p95": 325.9519934654236,
- "p99": 349.15199875831604
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 53172224,
- "combineLogicalBytes": 53172224,
- "fanoutMean": 3.6220703125,
- "recvTokensMax": 934,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-cf5bc26b",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||2279937619f3971",
- "colorKey": "h100_16047c28",
- "comparisonKey": "64192d9d479bdd44",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:54:33.118563+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_12",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · balanced",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "balanced",
- "routingLabel": "balanced",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "2279937619f3971",
- "workloadId": "set:4:7af12818400d6348",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271788376",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271788376",
- "createdAt": "2026-06-26T23:54:33.118563+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 94.68799829483032,
- "p90": 101.1200025677681,
- "p95": 104.41599786281586,
- "p99": 111.10399663448334
- },
- "combine": {
- "p50": 80.99199831485748,
- "p90": 86.84799820184708,
- "p95": 87.8399983048439,
- "p99": 89.9519994854927
- },
- "roundtrip": {
- "p50": 150.30400454998016,
- "p90": 156.95999562740326,
- "p95": 159.67999398708344,
- "p99": 164.15999829769135
- },
- "isolatedSum": {
- "p50": 175.6799966096878,
- "p90": 187.96800076961517,
- "p95": 192.25599616765976,
- "p99": 201.05599611997604
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 917504,
- "combineLogicalBytes": 917504,
- "fanoutMean": 8,
- "recvTokensMax": 8,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 95.0080007314682,
- "p90": 100.00000149011612,
- "p95": 102.68799960613251,
- "p99": 108.57599973678589
- },
- "combine": {
- "p50": 81.727996468544,
- "p90": 88.51200342178345,
- "p95": 89.37600255012512,
- "p99": 90.59199690818787
- },
- "roundtrip": {
- "p50": 150.65599977970123,
- "p90": 159.58400070667267,
- "p95": 161.50400042533875,
- "p99": 167.42399334907532
- },
- "isolatedSum": {
- "p50": 176.7359972000122,
- "p90": 188.51200491189957,
- "p95": 192.06400215625763,
- "p99": 199.16799664497375
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 7340032,
- "combineLogicalBytes": 7340032,
- "fanoutMean": 8,
- "recvTokensMax": 64,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 104.63999956846237,
- "p90": 112.28799819946289,
- "p95": 114.14399743080139,
- "p99": 119.84000355005264
- },
- "combine": {
- "p50": 92.25600212812424,
- "p90": 97.69599884748459,
- "p95": 98.39999675750732,
- "p99": 104.47999835014343
- },
- "roundtrip": {
- "p50": 164.000004529953,
- "p90": 171.64799571037292,
- "p95": 175.4560023546219,
- "p99": 228.4799963235855
- },
- "isolatedSum": {
- "p50": 196.8960016965866,
- "p90": 209.98399704694748,
- "p95": 212.54399418830872,
- "p99": 224.32000190019608
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 29360128,
- "combineLogicalBytes": 29360128,
- "fanoutMean": 8,
- "recvTokensMax": 256,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 143.93599331378937,
- "p90": 148.00000190734863,
- "p95": 149.79200065135956,
- "p99": 155.68000078201294
- },
- "combine": {
- "p50": 132.06399977207184,
- "p90": 138.75199854373932,
- "p95": 139.29599523544312,
- "p99": 145.6959992647171
- },
- "roundtrip": {
- "p50": 241.2479966878891,
- "p90": 247.6480007171631,
- "p95": 249.15200471878052,
- "p99": 252.76800990104675
- },
- "isolatedSum": {
- "p50": 275.9999930858612,
- "p90": 286.75200045108795,
- "p95": 289.0879958868027,
- "p99": 301.37600004673004
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 117440512,
- "combineLogicalBytes": 117440512,
- "fanoutMean": 8,
- "recvTokensMax": 1024,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-4d49fd79",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500",
- "colorKey": "h100_16047c28",
- "comparisonKey": "64192d9d479bdd44",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:59:13.030328+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_03",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · balanced",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "balanced",
- "routingLabel": "balanced",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ffa946582edb500",
- "workloadId": "set:8:7af12818400d6348",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271931349",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271931349",
- "createdAt": "2026-06-26T23:59:13.030328+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 95.93600034713745,
- "p90": 101.56799852848053,
- "p95": 103.13600301742554,
- "p99": 107.744000852108
- },
- "combine": {
- "p50": 80.89599758386612,
- "p90": 87.07199990749359,
- "p95": 87.8399983048439,
- "p99": 89.40800279378891
- },
- "roundtrip": {
- "p50": 151.42400562763214,
- "p90": 160.12799739837646,
- "p95": 172.86400496959686,
- "p99": 232.12799429893494
- },
- "isolatedSum": {
- "p50": 176.83199793100357,
- "p90": 188.63999843597412,
- "p95": 190.97600132226944,
- "p99": 197.1520036458969
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 917504,
- "combineLogicalBytes": 917504,
- "fanoutMean": 8,
- "recvTokensMax": 8,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 96.47999703884125,
- "p90": 103.42399775981903,
- "p95": 107.71200060844421,
- "p99": 161.40800714492798
- },
- "combine": {
- "p50": 81.11999928951263,
- "p90": 87.61599659919739,
- "p95": 89.1840010881424,
- "p99": 185.5359971523285
- },
- "roundtrip": {
- "p50": 153.43999862670898,
- "p90": 159.4880074262619,
- "p95": 163.71199488639832,
- "p99": 313.1200075149536
- },
- "isolatedSum": {
- "p50": 177.59999632835388,
- "p90": 191.03999435901642,
- "p95": 196.8960016965866,
- "p99": 346.94400429725647
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1835008,
- "combineLogicalBytes": 1835008,
- "fanoutMean": 8,
- "recvTokensMax": 16,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 95.32800316810608,
- "p90": 100.3199964761734,
- "p95": 102.1760031580925,
- "p99": 106.84800148010254
- },
- "combine": {
- "p50": 80.32000064849854,
- "p90": 84.22400057315826,
- "p95": 88.41600269079208,
- "p99": 90.14400094747543
- },
- "roundtrip": {
- "p50": 150.94399452209473,
- "p90": 158.4639996290207,
- "p95": 159.90400314331055,
- "p99": 163.32800686359406
- },
- "isolatedSum": {
- "p50": 175.64800381660461,
- "p90": 184.54399704933167,
- "p95": 190.59200584888458,
- "p99": 196.99200242757797
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3670016,
- "combineLogicalBytes": 3670016,
- "fanoutMean": 8,
- "recvTokensMax": 32,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 94.87999975681305,
- "p90": 98.43199700117111,
- "p95": 100.3199964761734,
- "p99": 105.3759977221489
- },
- "combine": {
- "p50": 80.54400235414505,
- "p90": 87.20000088214874,
- "p95": 88.73599767684937,
- "p99": 89.82399851083755
- },
- "roundtrip": {
- "p50": 152.0960032939911,
- "p90": 158.65600109100342,
- "p95": 160.16000509262085,
- "p99": 166.97600483894348
- },
- "isolatedSum": {
- "p50": 175.4240021109581,
- "p90": 185.63199788331985,
- "p95": 189.05599415302277,
- "p99": 195.19999623298645
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 7340032,
- "combineLogicalBytes": 7340032,
- "fanoutMean": 8,
- "recvTokensMax": 64,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 95.93600034713745,
- "p90": 103.20000350475311,
- "p95": 106.20799660682678,
- "p99": 168.57600212097168
- },
- "combine": {
- "p50": 84.3840017914772,
- "p90": 89.40800279378891,
- "p95": 89.75999802350998,
- "p99": 94.84799951314926
- },
- "roundtrip": {
- "p50": 154.84799444675446,
- "p90": 161.02400422096252,
- "p95": 163.7440025806427,
- "p99": 497.50399589538574
- },
- "isolatedSum": {
- "p50": 180.32000213861465,
- "p90": 192.60800629854202,
- "p95": 195.96799463033676,
- "p99": 263.42400163412094
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 14680064,
- "combineLogicalBytes": 14680064,
- "fanoutMean": 8,
- "recvTokensMax": 128,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 103.16800326108932,
- "p90": 109.37599837779999,
- "p95": 110.75200140476227,
- "p99": 113.43999952077866
- },
- "combine": {
- "p50": 88.79999816417694,
- "p90": 95.74399888515472,
- "p95": 97.120001912117,
- "p99": 97.95200079679489
- },
- "roundtrip": {
- "p50": 161.6639941930771,
- "p90": 167.1999990940094,
- "p95": 168.73599588871002,
- "p99": 172.89599776268005
- },
- "isolatedSum": {
- "p50": 191.96800142526627,
- "p90": 205.1199972629547,
- "p95": 207.87200331687927,
- "p99": 211.39200031757355
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 29360128,
- "combineLogicalBytes": 29360128,
- "fanoutMean": 8,
- "recvTokensMax": 256,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 119.6800023317337,
- "p90": 128.00000607967377,
- "p95": 129.05600666999817,
- "p99": 133.91999900341034
- },
- "combine": {
- "p50": 103.16800326108932,
- "p90": 106.55999928712845,
- "p95": 107.90400207042694,
- "p99": 113.63200098276138
- },
- "roundtrip": {
- "p50": 186.71999871730804,
- "p90": 194.65599954128265,
- "p95": 196.31999731063843,
- "p99": 199.48799908161163
- },
- "isolatedSum": {
- "p50": 222.84800559282303,
- "p90": 234.56000536680222,
- "p95": 236.9600087404251,
- "p99": 247.55199998617172
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 58720256,
- "combineLogicalBytes": 58720256,
- "fanoutMean": 8,
- "recvTokensMax": 512,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 137.66400516033173,
- "p90": 146.7200070619583,
- "p95": 147.8080004453659,
- "p99": 151.10400319099426
- },
- "combine": {
- "p50": 131.1360001564026,
- "p90": 137.82399892807007,
- "p95": 138.46400380134583,
- "p99": 145.28000354766846
- },
- "roundtrip": {
- "p50": 241.40800535678864,
- "p90": 248.60799312591553,
- "p95": 250.59199333190918,
- "p99": 258.5600018501282
- },
- "isolatedSum": {
- "p50": 268.8000053167343,
- "p90": 284.5440059900284,
- "p95": 286.27200424671173,
- "p99": 296.3840067386627
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 117440512,
- "combineLogicalBytes": 117440512,
- "fanoutMean": 8,
- "recvTokensMax": 1024,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-38b8b0c2",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8",
- "colorKey": "h100_0c515f8b",
- "comparisonKey": "47e8e48c891afabb",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:54:43.774495+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_09",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · balanced-rank-local",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "balanced-rank-local",
- "routingLabel": "balanced-rank-local",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "d02a66236b524b8",
- "workloadId": "set:4:2eebbed158fe1320",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271795429",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271795429",
- "createdAt": "2026-06-26T23:54:43.774495+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 96.03200107812881,
- "p90": 102.49599814414978,
- "p95": 105.66399991512299,
- "p99": 117.88800358772278
- },
- "combine": {
- "p50": 71.45600020885468,
- "p90": 73.98399710655212,
- "p95": 77.18399912118912,
- "p99": 81.56800270080566
- },
- "roundtrip": {
- "p50": 142.04800128936768,
- "p90": 149.98400211334229,
- "p95": 151.45599842071533,
- "p99": 159.07199680805206
- },
- "isolatedSum": {
- "p50": 167.4880012869835,
- "p90": 176.4799952507019,
- "p95": 182.8479990363121,
- "p99": 199.45600628852844
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 114688,
- "combineLogicalBytes": 114688,
- "fanoutMean": 1,
- "recvTokensMax": 4,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 98.9760011434555,
- "p90": 106.62399977445602,
- "p95": 110.07999628782272,
- "p99": 123.00799787044525
- },
- "combine": {
- "p50": 71.32799923419952,
- "p90": 73.69600236415863,
- "p95": 78.52800190448761,
- "p99": 80.22399991750717
- },
- "roundtrip": {
- "p50": 143.26399564743042,
- "p90": 150.14399588108063,
- "p95": 153.1520038843155,
- "p99": 162.88000345230103
- },
- "isolatedSum": {
- "p50": 170.30400037765503,
- "p90": 180.32000213861465,
- "p95": 188.60799819231033,
- "p99": 203.23199778795242
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 917504,
- "combineLogicalBytes": 917504,
- "fanoutMean": 1,
- "recvTokensMax": 8,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 100.09600222110748,
- "p90": 107.61599987745285,
- "p95": 112.31999844312668,
- "p99": 163.16799819469452
- },
- "combine": {
- "p50": 79.71200346946716,
- "p90": 87.16800063848495,
- "p95": 87.74399757385254,
- "p99": 95.8079993724823
- },
- "roundtrip": {
- "p50": 154.01600301265717,
- "p90": 161.47199273109436,
- "p95": 164.5440012216568,
- "p99": 176.83200538158417
- },
- "isolatedSum": {
- "p50": 179.80800569057465,
- "p90": 194.7840005159378,
- "p95": 200.06399601697922,
- "p99": 258.9759975671768
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3670016,
- "combineLogicalBytes": 3670016,
- "fanoutMean": 1,
- "recvTokensMax": 32,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 104.38399761915207,
- "p90": 108.44799876213074,
- "p95": 111.455999314785,
- "p99": 119.74400281906128
- },
- "combine": {
- "p50": 83.26400071382523,
- "p90": 88.03199976682663,
- "p95": 88.22400122880936,
- "p99": 92.83199906349182
- },
- "roundtrip": {
- "p50": 154.9759954214096,
- "p90": 161.18399798870087,
- "p95": 165.0879979133606,
- "p99": 170.01600563526154
- },
- "isolatedSum": {
- "p50": 187.6479983329773,
- "p90": 196.47999852895737,
- "p95": 199.68000054359436,
- "p99": 212.5760018825531
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 14680064,
- "combineLogicalBytes": 14680064,
- "fanoutMean": 1,
- "recvTokensMax": 128,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-94696c7b",
- "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b",
- "colorKey": "h100_c0c0ad86",
- "comparisonKey": "00faf19eae8c1230",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:00:00.906485+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_19",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · balanced+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "balanced",
- "routingLabel": "balanced+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "f0e66a15078595b",
- "workloadId": "set:8:7af12818400d6348",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 1,
- "eplbImbalanceAfter": 1,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271935069",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271935069",
- "createdAt": "2026-06-27T00:00:00.906485+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 97.63199836015701,
- "p90": 106.36799782514572,
- "p95": 109.63200032711029,
- "p99": 118.65600198507309
- },
- "combine": {
- "p50": 71.45600020885468,
- "p90": 78.94399762153625,
- "p95": 79.42400127649307,
- "p99": 82.24000036716461
- },
- "roundtrip": {
- "p50": 145.4080045223236,
- "p90": 154.23999726772308,
- "p95": 155.64799308776855,
- "p99": 157.98400342464447
- },
- "isolatedSum": {
- "p50": 169.0879985690117,
- "p90": 185.31199544668198,
- "p95": 189.05600160360336,
- "p99": 200.8960023522377
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 229376,
- "combineLogicalBytes": 229376,
- "fanoutMean": 2,
- "recvTokensMax": 3,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 77.08799839019775,
- "p90": 104.06400263309479,
- "p95": 105.66399991512299,
- "p99": 111.1999973654747
- },
- "combine": {
- "p50": 65.05600363016129,
- "p90": 74.5600014925003,
- "p95": 79.00799810886383,
- "p99": 82.33600109815598
- },
- "roundtrip": {
- "p50": 122.8799968957901,
- "p90": 151.64799988269806,
- "p95": 153.24799716472626,
- "p99": 161.50400042533875
- },
- "isolatedSum": {
- "p50": 142.14400202035904,
- "p90": 178.6240041255951,
- "p95": 184.67199802398682,
- "p99": 193.53599846363068
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 458752,
- "combineLogicalBytes": 458752,
- "fanoutMean": 2,
- "recvTokensMax": 6,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 97.56799787282944,
- "p90": 107.80800133943558,
- "p95": 114.04799669981003,
- "p99": 120.44800072908401
- },
- "combine": {
- "p50": 65.69600105285645,
- "p90": 78.87999713420868,
- "p95": 79.32800054550171,
- "p99": 87.13600039482117
- },
- "roundtrip": {
- "p50": 123.99999797344208,
- "p90": 158.720001578331,
- "p95": 165.3439998626709,
- "p99": 176.28799378871918
- },
- "isolatedSum": {
- "p50": 163.26399892568588,
- "p90": 186.68799847364426,
- "p95": 193.37599724531174,
- "p99": 207.58400112390518
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 917504,
- "combineLogicalBytes": 917504,
- "fanoutMean": 2,
- "recvTokensMax": 12,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 98.11200201511383,
- "p90": 105.76000064611435,
- "p95": 108.64000022411346,
- "p99": 122.30399996042252
- },
- "combine": {
- "p50": 72.22399860620499,
- "p90": 79.1039988398552,
- "p95": 80.38400113582611,
- "p99": 87.0399996638298
- },
- "roundtrip": {
- "p50": 145.28000354766846,
- "p90": 152.54400670528412,
- "p95": 155.39200603961945,
- "p99": 160.38399934768677
- },
- "isolatedSum": {
- "p50": 170.33600062131882,
- "p90": 184.86399948596954,
- "p95": 189.02400135993958,
- "p99": 209.34399962425232
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1835008,
- "combineLogicalBytes": 1835008,
- "fanoutMean": 2,
- "recvTokensMax": 24,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 98.52799773216248,
- "p90": 109.69600081443787,
- "p95": 117.34399944543839,
- "p99": 131.45600259304047
- },
- "combine": {
- "p50": 78.59200239181519,
- "p90": 81.53600245714188,
- "p95": 86.91199868917465,
- "p99": 88.32000195980072
- },
- "roundtrip": {
- "p50": 146.97599411010742,
- "p90": 156.47999942302704,
- "p95": 161.56800091266632,
- "p99": 173.18400740623474
- },
- "isolatedSum": {
- "p50": 177.12000012397766,
- "p90": 191.23200327157974,
- "p95": 204.25599813461304,
- "p99": 219.7760045528412
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3670016,
- "combineLogicalBytes": 3670016,
- "fanoutMean": 2,
- "recvTokensMax": 48,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 84.54400300979614,
- "p90": 107.07200318574905,
- "p95": 113.40799927711487,
- "p99": 126.08000636100769
- },
- "combine": {
- "p50": 71.10399752855301,
- "p90": 80.57600259780884,
- "p95": 87.13600039482117,
- "p99": 95.51999717950821
- },
- "roundtrip": {
- "p50": 127.93600559234619,
- "p90": 151.7760008573532,
- "p95": 154.40000593662262,
- "p99": 161.56800091266632
- },
- "isolatedSum": {
- "p50": 155.64800053834915,
- "p90": 187.6480057835579,
- "p95": 200.54399967193604,
- "p99": 221.6000035405159
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 7340032,
- "combineLogicalBytes": 7340032,
- "fanoutMean": 2,
- "recvTokensMax": 96,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 100.35199671983719,
- "p90": 113.37599903345108,
- "p95": 126.49600207805634,
- "p99": 162.1759980916977
- },
- "combine": {
- "p50": 79.58400249481201,
- "p90": 87.16800063848495,
- "p95": 87.71199733018875,
- "p99": 95.45599669218063
- },
- "roundtrip": {
- "p50": 154.62400019168854,
- "p90": 165.18400609493256,
- "p95": 170.27199268341064,
- "p99": 184.7359985113144
- },
- "isolatedSum": {
- "p50": 179.9359992146492,
- "p90": 200.54399967193604,
- "p95": 214.2079994082451,
- "p99": 257.6319947838783
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 14680064,
- "combineLogicalBytes": 14680064,
- "fanoutMean": 2,
- "recvTokensMax": 192,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 98.1760025024414,
- "p90": 120.80000340938568,
- "p95": 125.56800246238708,
- "p99": 134.49600338935852
- },
- "combine": {
- "p50": 87.77599781751633,
- "p90": 96.0640013217926,
- "p95": 97.69599884748459,
- "p99": 107.35999792814255
- },
- "roundtrip": {
- "p50": 160.70400178432465,
- "p90": 178.3680021762848,
- "p95": 184.1920018196106,
- "p99": 190.62399864196777
- },
- "isolatedSum": {
- "p50": 185.95200031995773,
- "p90": 216.86400473117828,
- "p95": 223.26400130987167,
- "p99": 241.85600131750107
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 29360128,
- "combineLogicalBytes": 29360128,
- "fanoutMean": 2,
- "recvTokensMax": 384,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-b4d89049",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving|8|decode|normal|none|none|0|tuned||90042e0db6a8297",
- "colorKey": "h100_1c83c0b0",
- "comparisonKey": "b84a29c0643a5455",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:11:39.736162+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_09",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · hotspot-moving",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "hotspot-moving",
- "routingLabel": "hotspot-moving",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "90042e0db6a8297",
- "workloadId": "set:3:8fd05d9ebee41064",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272315381",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272315381",
- "createdAt": "2026-06-27T00:11:39.736162+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 98.1760025024414,
- "p90": 105.12000322341919,
- "p95": 107.4879989027977,
- "p99": 114.43199962377548
- },
- "combine": {
- "p50": 81.216000020504,
- "p90": 87.8399983048439,
- "p95": 88.19200098514557,
- "p99": 89.08800035715103
- },
- "roundtrip": {
- "p50": 154.4959992170334,
- "p90": 160.99199652671814,
- "p95": 162.59199380874634,
- "p99": 167.35999286174774
- },
- "isolatedSum": {
- "p50": 179.3920025229454,
- "p90": 192.9600015282631,
- "p95": 195.67999988794327,
- "p99": 203.5199999809265
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4859904,
- "combineLogicalBytes": 4859904,
- "fanoutMean": 5.296875,
- "recvTokensMax": 64,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 104.3199971318245,
- "p90": 109.98400300741196,
- "p95": 111.77600175142288,
- "p99": 118.81600320339203
- },
- "combine": {
- "p50": 89.1840010881424,
- "p90": 95.58399766683578,
- "p95": 96.09600156545639,
- "p99": 97.18400239944458
- },
- "roundtrip": {
- "p50": 164.2560064792633,
- "p90": 169.69600319862366,
- "p95": 171.64799571037292,
- "p99": 176.64000391960144
- },
- "isolatedSum": {
- "p50": 193.5039982199669,
- "p90": 205.56800067424774,
- "p95": 207.87200331687927,
- "p99": 216.0000056028366
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19525632,
- "combineLogicalBytes": 19525632,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 256,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 137.28000223636627,
- "p90": 146.11199498176575,
- "p95": 149.6639996767044,
- "p99": 152.19199657440186
- },
- "combine": {
- "p50": 128.48000228405,
- "p90": 130.14400005340576,
- "p95": 130.65600395202637,
- "p99": 136.57599687576294
- },
- "roundtrip": {
- "p50": 231.10400140285492,
- "p90": 236.4799976348877,
- "p95": 238.11200261116028,
- "p99": 242.88000166416168
- },
- "isolatedSum": {
- "p50": 265.76000452041626,
- "p90": 276.2559950351715,
- "p95": 280.3200036287308,
- "p99": 288.7679934501648
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 78102528,
- "combineLogicalBytes": 78102528,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 1024,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-595b6f36",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s2|8|decode|normal|none|none|2|tuned||675e15b52e37958",
- "colorKey": "h100_55b1ee31",
- "comparisonKey": "b84a29c0643a5455",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:11:41.163804+00:00",
- "status": "valid",
- "publicationStatus": "comparable-experimental",
- "runner": "h100-dgxc-slurm_02",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · hotspot-moving@s2",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "hotspot-moving",
- "routingLabel": "hotspot-moving@s2",
- "routingStep": 2,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "675e15b52e37958",
- "workloadId": null,
- "workloadSource": "seeded-runtime",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272321917",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272321917",
- "createdAt": "2026-06-27T00:11:41.163804+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 98.24000298976898,
- "p90": 103.96800190210342,
- "p95": 106.30399733781815,
- "p99": 111.07199639081955
- },
- "combine": {
- "p50": 79.52000200748444,
- "p90": 86.87999844551086,
- "p95": 87.52000331878662,
- "p99": 88.0960002541542
- },
- "roundtrip": {
- "p50": 153.28000485897064,
- "p90": 161.3759994506836,
- "p95": 163.4880006313324,
- "p99": 455.80801367759705
- },
- "isolatedSum": {
- "p50": 177.76000499725342,
- "p90": 190.8480003476143,
- "p95": 193.82400065660477,
- "p99": 199.16799664497375
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4859904,
- "combineLogicalBytes": 4859904,
- "fanoutMean": 5.296875,
- "recvTokensMax": 64,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 103.84000092744827,
- "p90": 109.0560033917427,
- "p95": 110.1439967751503,
- "p99": 113.88800293207169
- },
- "combine": {
- "p50": 87.87199854850769,
- "p90": 95.32800316810608,
- "p95": 95.90400010347366,
- "p99": 96.25600278377533
- },
- "roundtrip": {
- "p50": 161.98399662971497,
- "p90": 168.99199783802032,
- "p95": 170.56000232696533,
- "p99": 175.80799758434296
- },
- "isolatedSum": {
- "p50": 191.71199947595596,
- "p90": 204.38400655984879,
- "p95": 206.04799687862396,
- "p99": 210.14400571584702
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19525632,
- "combineLogicalBytes": 19525632,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 256,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 136.80000603199005,
- "p90": 145.4399973154068,
- "p95": 146.68799936771393,
- "p99": 149.4079977273941
- },
- "combine": {
- "p50": 123.99999797344208,
- "p90": 129.05600666999817,
- "p95": 130.36799430847168,
- "p99": 136.00000739097595
- },
- "roundtrip": {
- "p50": 228.7999987602234,
- "p90": 236.12800240516663,
- "p95": 237.98400163650513,
- "p99": 241.5039986371994
- },
- "isolatedSum": {
- "p50": 260.80000400543213,
- "p90": 274.49600398540497,
- "p95": 277.0559936761856,
- "p99": 285.40800511837006
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 78102528,
- "combineLogicalBytes": 78102528,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 1024,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-f5ba95c3",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s3|8|decode|normal|none|none|3|tuned||82b2963fc322419",
- "colorKey": "h100_54b1ec9e",
- "comparisonKey": "b84a29c0643a5455",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:12:09.752348+00:00",
- "status": "valid",
- "publicationStatus": "comparable-experimental",
- "runner": "h100-dgxc-slurm_14",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · hotspot-moving@s3",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "hotspot-moving",
- "routingLabel": "hotspot-moving@s3",
- "routingStep": 3,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "82b2963fc322419",
- "workloadId": null,
- "workloadSource": "seeded-runtime",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272325031",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272325031",
- "createdAt": "2026-06-27T00:12:09.752348+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 97.98400104045868,
- "p90": 104.44799810647964,
- "p95": 107.84000158309937,
- "p99": 116.06399714946747
- },
- "combine": {
- "p50": 81.02399855852127,
- "p90": 87.64799684286118,
- "p95": 88.06400001049042,
- "p99": 96.00000083446503
- },
- "roundtrip": {
- "p50": 156.41599893569946,
- "p90": 162.62400150299072,
- "p95": 165.75999557971954,
- "p99": 176.7359972000122
- },
- "isolatedSum": {
- "p50": 179.00799959897995,
- "p90": 192.09599494934082,
- "p95": 195.90400159358978,
- "p99": 212.0639979839325
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4859904,
- "combineLogicalBytes": 4859904,
- "fanoutMean": 5.296875,
- "recvTokensMax": 64,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 103.20000350475311,
- "p90": 107.39199817180634,
- "p95": 111.51999980211258,
- "p99": 119.00799721479416
- },
- "combine": {
- "p50": 88.16000074148178,
- "p90": 95.8079993724823,
- "p95": 96.16000205278397,
- "p99": 98.11200201511383
- },
- "roundtrip": {
- "p50": 162.78399527072906,
- "p90": 168.73599588871002,
- "p95": 170.9440052509308,
- "p99": 176.57600343227386
- },
- "isolatedSum": {
- "p50": 191.3600042462349,
- "p90": 203.19999754428864,
- "p95": 207.68000185489655,
- "p99": 217.119999229908
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19525632,
- "combineLogicalBytes": 19525632,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 256,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 137.28000223636627,
- "p90": 149.1200029850006,
- "p95": 151.0079950094223,
- "p99": 153.18399667739868
- },
- "combine": {
- "p50": 128.86400520801544,
- "p90": 131.1360001564026,
- "p95": 135.71199774742126,
- "p99": 138.3039951324463
- },
- "roundtrip": {
- "p50": 234.49599742889404,
- "p90": 241.4720058441162,
- "p95": 242.65600740909576,
- "p99": 247.9040026664734
- },
- "isolatedSum": {
- "p50": 266.1440074443817,
- "p90": 280.2560031414032,
- "p95": 286.71999275684357,
- "p99": 291.48799180984497
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 78102528,
- "combineLogicalBytes": 78102528,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 1024,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-fb3ea9d7",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||2ad5ef98d328fa1",
- "colorKey": "h100_b654f9b2",
- "comparisonKey": "10b5062b8e23fcad",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:55:39.087780+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_00",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · hotspot-single",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "hotspot-single",
- "routingLabel": "hotspot-single",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "2ad5ef98d328fa1",
- "workloadId": "set:4:286be993cd819ed9",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271817166",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271817166",
- "createdAt": "2026-06-26T23:55:39.087780+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 97.82399982213974,
- "p90": 105.95200210809708,
- "p95": 109.95200276374817,
- "p99": 121.50400131940842
- },
- "combine": {
- "p50": 80.25600016117096,
- "p90": 81.88799768686295,
- "p95": 83.3280012011528,
- "p99": 89.37600255012512
- },
- "roundtrip": {
- "p50": 152.12799608707428,
- "p90": 158.78400206565857,
- "p95": 160.64000129699707,
- "p99": 166.81599617004395
- },
- "isolatedSum": {
- "p50": 178.0799999833107,
- "p90": 187.83999979496002,
- "p95": 193.28000396490097,
- "p99": 210.88000386953354
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 602112,
- "combineLogicalBytes": 602112,
- "fanoutMean": 5.25,
- "recvTokensMax": 8,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 97.08800166845322,
- "p90": 103.39199751615524,
- "p95": 107.51999914646149,
- "p99": 115.93600362539291
- },
- "combine": {
- "p50": 80.89599758386612,
- "p90": 84.03199911117554,
- "p95": 87.42400258779526,
- "p99": 89.47200328111649
- },
- "roundtrip": {
- "p50": 153.60000729560852,
- "p90": 161.15200519561768,
- "p95": 163.83999586105347,
- "p99": 171.55200242996216
- },
- "isolatedSum": {
- "p50": 177.98399925231934,
- "p90": 187.42399662733078,
- "p95": 194.94400173425674,
- "p99": 205.4080069065094
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4859904,
- "combineLogicalBytes": 4859904,
- "fanoutMean": 5.296875,
- "recvTokensMax": 64,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 103.39199751615524,
- "p90": 108.47999900579453,
- "p95": 110.55999994277954,
- "p99": 117.18399822711945
- },
- "combine": {
- "p50": 89.34400230646133,
- "p90": 95.551997423172,
- "p95": 97.34400361776352,
- "p99": 99.93600100278854
- },
- "roundtrip": {
- "p50": 162.75200247764587,
- "p90": 170.43200135231018,
- "p95": 172.83199727535248,
- "p99": 179.61600422859192
- },
- "isolatedSum": {
- "p50": 192.73599982261658,
- "p90": 204.03199642896652,
- "p95": 207.90400356054306,
- "p99": 217.119999229908
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19525632,
- "combineLogicalBytes": 19525632,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 256,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 137.85600662231445,
- "p90": 144.41600441932678,
- "p95": 147.0080018043518,
- "p99": 151.16800367832184
- },
- "combine": {
- "p50": 128.83199751377106,
- "p90": 131.23199343681335,
- "p95": 131.99999928474426,
- "p99": 137.95199990272522
- },
- "roundtrip": {
- "p50": 233.75999927520752,
- "p90": 239.3919974565506,
- "p95": 240.92799425125122,
- "p99": 245.1840043067932
- },
- "isolatedSum": {
- "p50": 266.6880041360855,
- "p90": 275.64799785614014,
- "p95": 279.00800108909607,
- "p99": 289.12000358104706
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 78102528,
- "combineLogicalBytes": 78102528,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 1024,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-e0ce741a",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621",
- "colorKey": "h100_b654f9b2",
- "comparisonKey": "10b5062b8e23fcad",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:01:31.374180+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_19",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · hotspot-single",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "hotspot-single",
- "routingLabel": "hotspot-single",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "b6caf944f6bb621",
- "workloadId": "set:8:286be993cd819ed9",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272004392",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272004392",
- "createdAt": "2026-06-27T00:01:31.374180+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 96.67199850082397,
- "p90": 104.60799932479858,
- "p95": 106.11200332641602,
- "p99": 113.56800049543381
- },
- "combine": {
- "p50": 79.00799810886383,
- "p90": 82.0159986615181,
- "p95": 82.36800134181976,
- "p99": 87.67999708652496
- },
- "roundtrip": {
- "p50": 147.2640037536621,
- "p90": 154.59200739860535,
- "p95": 157.3439985513687,
- "p99": 161.5999937057495
- },
- "isolatedSum": {
- "p50": 175.6799966096878,
- "p90": 186.62399798631668,
- "p95": 188.48000466823578,
- "p99": 201.24799758195877
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 602112,
- "combineLogicalBytes": 602112,
- "fanoutMean": 5.25,
- "recvTokensMax": 8,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 99.67999905347824,
- "p90": 105.0880029797554,
- "p95": 107.16799646615982,
- "p99": 112.99200356006622
- },
- "combine": {
- "p50": 81.11999928951263,
- "p90": 82.49600231647491,
- "p95": 83.03999900817871,
- "p99": 87.2960016131401
- },
- "roundtrip": {
- "p50": 147.0080018043518,
- "p90": 153.6639928817749,
- "p95": 155.71199357509613,
- "p99": 159.10400450229645
- },
- "isolatedSum": {
- "p50": 180.79999834299088,
- "p90": 187.58400529623032,
- "p95": 190.20799547433853,
- "p99": 200.28800517320633
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1247232,
- "combineLogicalBytes": 1247232,
- "fanoutMean": 5.4375,
- "recvTokensMax": 16,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 97.18400239944458,
- "p90": 103.93600165843964,
- "p95": 106.30399733781815,
- "p99": 122.04799801111221
- },
- "combine": {
- "p50": 78.94399762153625,
- "p90": 82.43200182914734,
- "p95": 86.40000224113464,
- "p99": 103.45599800348282
- },
- "roundtrip": {
- "p50": 148.15999567508698,
- "p90": 158.55999290943146,
- "p95": 160.3199988603592,
- "p99": 164.09599781036377
- },
- "isolatedSum": {
- "p50": 176.12800002098083,
- "p90": 186.36800348758698,
- "p95": 192.7039995789528,
- "p99": 225.50399601459503
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2451456,
- "combineLogicalBytes": 2451456,
- "fanoutMean": 5.34375,
- "recvTokensMax": 32,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 98.91200065612793,
- "p90": 104.35199737548828,
- "p95": 106.65600001811981,
- "p99": 112.47999966144562
- },
- "combine": {
- "p50": 81.24800026416779,
- "p90": 83.3280012011528,
- "p95": 87.0399996638298,
- "p99": 87.93599903583527
- },
- "roundtrip": {
- "p50": 153.4080058336258,
- "p90": 159.61599349975586,
- "p95": 161.47199273109436,
- "p99": 165.21599888801575
- },
- "isolatedSum": {
- "p50": 180.16000092029572,
- "p90": 187.67999857664108,
- "p95": 193.69599968194962,
- "p99": 200.41599869728088
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4859904,
- "combineLogicalBytes": 4859904,
- "fanoutMean": 5.296875,
- "recvTokensMax": 64,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 96.92800045013428,
- "p90": 104.3199971318245,
- "p95": 110.55999994277954,
- "p99": 161.9199961423874
- },
- "combine": {
- "p50": 81.4720019698143,
- "p90": 87.2960016131401,
- "p95": 87.8399983048439,
- "p99": 90.27200192213058
- },
- "roundtrip": {
- "p50": 153.43999862670898,
- "p90": 160.19199788570404,
- "p95": 162.78399527072906,
- "p99": 169.98399794101715
- },
- "isolatedSum": {
- "p50": 178.40000241994858,
- "p90": 191.6159987449646,
- "p95": 198.39999824762344,
- "p99": 252.19199806451797
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9748480,
- "combineLogicalBytes": 9748480,
- "fanoutMean": 5.3125,
- "recvTokensMax": 128,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 103.64799946546555,
- "p90": 108.31999778747559,
- "p95": 110.62400043010712,
- "p99": 114.84800279140472
- },
- "combine": {
- "p50": 87.5839963555336,
- "p90": 91.839998960495,
- "p95": 95.39200365543365,
- "p99": 96.38399630784988
- },
- "roundtrip": {
- "p50": 155.96799552440643,
- "p90": 165.50399363040924,
- "p95": 168.41599345207214,
- "p99": 175.64800381660461
- },
- "isolatedSum": {
- "p50": 191.23199582099915,
- "p90": 200.15999674797058,
- "p95": 206.01600408554077,
- "p99": 211.2319990992546
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19525632,
- "combineLogicalBytes": 19525632,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 256,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 114.88000303506851,
- "p90": 126.11199915409088,
- "p95": 127.6479959487915,
- "p99": 133.56800377368927
- },
- "combine": {
- "p50": 98.43199700117111,
- "p90": 103.96800190210342,
- "p95": 105.8880016207695,
- "p99": 119.71200257539749
- },
- "roundtrip": {
- "p50": 180.38399517536163,
- "p90": 191.39200448989868,
- "p95": 194.39999759197235,
- "p99": 201.9840031862259
- },
- "isolatedSum": {
- "p50": 213.31200003623962,
- "p90": 230.0800010561943,
- "p95": 233.535997569561,
- "p99": 253.28000634908676
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38621184,
- "combineLogicalBytes": 38621184,
- "fanoutMean": 5.26171875,
- "recvTokensMax": 512,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 141.79199934005737,
- "p90": 147.2959965467453,
- "p95": 149.82399344444275,
- "p99": 153.3759981393814
- },
- "combine": {
- "p50": 122.36800044775009,
- "p90": 128.4160017967224,
- "p95": 129.02399897575378,
- "p99": 136.1600011587143
- },
- "roundtrip": {
- "p50": 231.77599906921387,
- "p90": 241.85599386692047,
- "p95": 244.9280023574829,
- "p99": 248.76800179481506
- },
- "isolatedSum": {
- "p50": 264.15999978780746,
- "p90": 275.7119983434677,
- "p95": 278.84799242019653,
- "p99": 289.5359992980957
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 78102528,
- "combineLogicalBytes": 78102528,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 1024,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-73951147",
- "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac",
- "colorKey": "h100_456a963c",
- "comparisonKey": "12dbc31e8daf0a44",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:01:37.187210+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_01",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · hotspot-single+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "hotspot-single",
- "routingLabel": "hotspot-single+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "e41f5099a9733ac",
- "workloadId": "set:8:286be993cd819ed9",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 1.830078125,
- "eplbImbalanceAfter": 1.0007595486111112,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272008867",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272008867",
- "createdAt": "2026-06-27T00:01:37.187210+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 68.41599941253662,
- "p90": 76.1599987745285,
- "p95": 77.69600301980972,
- "p99": 84.83199775218964
- },
- "combine": {
- "p50": 71.07199728488922,
- "p90": 73.11999797821045,
- "p95": 73.7600028514862,
- "p99": 79.74400371313095
- },
- "roundtrip": {
- "p50": 126.46399438381195,
- "p90": 130.62399625778198,
- "p95": 131.55199587345123,
- "p99": 136.4479959011078
- },
- "isolatedSum": {
- "p50": 139.48799669742584,
- "p90": 149.27999675273895,
- "p95": 151.45600587129593,
- "p99": 164.5760014653206
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 68.70400160551071,
- "p90": 76.9599974155426,
- "p95": 81.727996468544,
- "p99": 107.10400342941284
- },
- "combine": {
- "p50": 71.48800045251846,
- "p90": 73.15199822187424,
- "p95": 73.56800138950348,
- "p99": 79.55200225114822
- },
- "roundtrip": {
- "p50": 127.77599692344666,
- "p90": 131.23199343681335,
- "p95": 132.60799646377563,
- "p99": 138.7840062379837
- },
- "isolatedSum": {
- "p50": 140.19200205802917,
- "p90": 150.11199563741684,
- "p95": 155.29599785804749,
- "p99": 186.65600568056107
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1175552,
- "combineLogicalBytes": 1175552,
- "fanoutMean": 5.125,
- "recvTokensMax": 12,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 70.8480030298233,
- "p90": 77.79199630022049,
- "p95": 80.09599894285202,
- "p99": 87.0399996638298
- },
- "combine": {
- "p50": 72.4480003118515,
- "p90": 73.56800138950348,
- "p95": 74.27199929952621,
- "p99": 79.80799674987793
- },
- "roundtrip": {
- "p50": 126.94400548934937,
- "p90": 131.77600502967834,
- "p95": 133.4719955921173,
- "p99": 137.2479945421219
- },
- "isolatedSum": {
- "p50": 143.2960033416748,
- "p90": 151.35999768972397,
- "p95": 154.36799824237823,
- "p99": 166.84799641370773
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2451456,
- "combineLogicalBytes": 2451456,
- "fanoutMean": 5.34375,
- "recvTokensMax": 23,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 70.11199742555618,
- "p90": 76.9599974155426,
- "p95": 79.3600007891655,
- "p99": 86.14400029182434
- },
- "combine": {
- "p50": 72.64000177383423,
- "p90": 73.82400333881378,
- "p95": 74.94399696588516,
- "p99": 81.08799904584885
- },
- "roundtrip": {
- "p50": 125.47199428081512,
- "p90": 131.6480040550232,
- "p95": 133.66399705410004,
- "p99": 139.29599523544312
- },
- "isolatedSum": {
- "p50": 142.7519991993904,
- "p90": 150.78400075435638,
- "p95": 154.30399775505066,
- "p99": 167.2319993376732
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4730880,
- "combineLogicalBytes": 4730880,
- "fanoutMean": 5.15625,
- "recvTokensMax": 44,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 81.40800148248672,
- "p90": 83.99999886751175,
- "p95": 86.33600175380707,
- "p99": 91.36000275611877
- },
- "combine": {
- "p50": 73.37599992752075,
- "p90": 78.75200361013412,
- "p95": 79.6160027384758,
- "p99": 81.34400099515915
- },
- "roundtrip": {
- "p50": 125.95200538635254,
- "p90": 133.15199315547943,
- "p95": 134.5919966697693,
- "p99": 140.32000303268433
- },
- "isolatedSum": {
- "p50": 154.78400141000748,
- "p90": 162.75200247764587,
- "p95": 165.95200449228287,
- "p99": 172.70400375127792
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9691136,
- "combineLogicalBytes": 9691136,
- "fanoutMean": 5.28125,
- "recvTokensMax": 88,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 90.08000046014786,
- "p90": 92.54399687051773,
- "p95": 94.4959968328476,
- "p99": 98.52799773216248
- },
- "combine": {
- "p50": 80.09599894285202,
- "p90": 81.56800270080566,
- "p95": 82.07999914884567,
- "p99": 87.2960016131401
- },
- "roundtrip": {
- "p50": 141.08799397945404,
- "p90": 144.96000111103058,
- "p95": 146.30399644374847,
- "p99": 150.33599734306335
- },
- "isolatedSum": {
- "p50": 170.17599940299988,
- "p90": 174.1119995713234,
- "p95": 176.57599598169327,
- "p99": 185.82399934530258
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19568640,
- "combineLogicalBytes": 19568640,
- "fanoutMean": 5.33203125,
- "recvTokensMax": 179,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 95.23200243711472,
- "p90": 113.24799805879593,
- "p95": 114.59200084209442,
- "p99": 119.10399794578552
- },
- "combine": {
- "p50": 89.85599875450134,
- "p90": 98.2080027461052,
- "p95": 114.3679991364479,
- "p99": 130.49599528312683
- },
- "roundtrip": {
- "p50": 159.39199924468994,
- "p90": 165.53600132465363,
- "p95": 167.87199676036835,
- "p99": 179.51999604701996
- },
- "isolatedSum": {
- "p50": 185.08800119161606,
- "p90": 211.45600080490112,
- "p95": 228.95999997854233,
- "p99": 249.59999322891235
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38750208,
- "combineLogicalBytes": 38750208,
- "fanoutMean": 5.279296875,
- "recvTokensMax": 348,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 111.29599809646606,
- "p90": 117.21599847078323,
- "p95": 118.43200027942657,
- "p99": 122.72000312805176
- },
- "combine": {
- "p50": 106.39999806880951,
- "p90": 112.28799819946289,
- "p95": 113.11999708414078,
- "p99": 114.33599889278412
- },
- "roundtrip": {
- "p50": 197.63199985027313,
- "p90": 202.11200416088104,
- "p95": 203.39199900627136,
- "p99": 206.9759964942932
- },
- "isolatedSum": {
- "p50": 217.69599616527557,
- "p90": 229.50399667024612,
- "p95": 231.55199736356735,
- "p99": 237.05600202083588
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77342720,
- "combineLogicalBytes": 77342720,
- "fanoutMean": 5.2685546875,
- "recvTokensMax": 687,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-fc133662",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·empty-rank|8|decode|normal|none|empty-rank|0|tuned||5621f0d4899ad7a",
- "colorKey": "h100_d54acd03",
- "comparisonKey": "fb346b1019e55bb0",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:13:31.132134+00:00",
- "status": "valid",
- "publicationStatus": "comparable-experimental",
- "runner": "h100-dgxc-slurm_01",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · uniform·empty-rank",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform·empty-rank",
- "routingStep": 0,
- "unevenTokens": "empty-rank",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "5621f0d4899ad7a",
- "workloadId": null,
- "workloadSource": "seeded-runtime",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272375977",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272375977",
- "createdAt": "2026-06-27T00:13:31.132134+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 8,
- "globalTokens": 63,
- "dispatch": {
- "p50": 98.01600128412247,
- "p90": 108.03200304508209,
- "p95": 124.22399967908859,
- "p99": 164.000004529953
- },
- "combine": {
- "p50": 80.73599636554718,
- "p90": 89.63199704885483,
- "p95": 104.63999956846237,
- "p99": 112.5440001487732
- },
- "roundtrip": {
- "p50": 154.1759967803955,
- "p90": 160.35200655460358,
- "p95": 162.08000481128693,
- "p99": 175.3920018672943
- },
- "isolatedSum": {
- "p50": 178.75199764966965,
- "p90": 197.66400009393692,
- "p95": 228.86399924755096,
- "p99": 276.5440046787262
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4888576,
- "combineLogicalBytes": 4888576,
- "fanoutMean": 5.412698268890381,
- "recvTokensMax": 46,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 252,
- "dispatch": {
- "p50": 104.76800054311752,
- "p90": 134.0479999780655,
- "p95": 136.1279934644699,
- "p99": 144.41600441932678
- },
- "combine": {
- "p50": 89.02399986982346,
- "p90": 104.12800312042236,
- "p95": 104.41599786281586,
- "p99": 107.90400207042694
- },
- "roundtrip": {
- "p50": 166.59200191497803,
- "p90": 189.95200097560883,
- "p95": 191.96799397468567,
- "p99": 199.5840072631836
- },
- "isolatedSum": {
- "p50": 193.79200041294098,
- "p90": 238.17600309848785,
- "p95": 240.54399132728577,
- "p99": 252.32000648975372
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19396608,
- "combineLogicalBytes": 19396608,
- "fanoutMean": 5.36904764175415,
- "recvTokensMax": 180,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1022,
- "dispatch": {
- "p50": 130.52800297737122,
- "p90": 139.90400731563568,
- "p95": 151.61600708961487,
- "p99": 458.5599899291992
- },
- "combine": {
- "p50": 120.7680031657219,
- "p90": 127.93600559234619,
- "p95": 128.54400277137756,
- "p99": 129.50399518013
- },
- "roundtrip": {
- "p50": 216.35200083255768,
- "p90": 221.98399901390076,
- "p95": 224.7679978609085,
- "p99": 229.5359969139099
- },
- "isolatedSum": {
- "p50": 251.2960061430931,
- "p90": 267.8400129079819,
- "p95": 280.16000986099243,
- "p99": 588.0639851093292
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77529088,
- "combineLogicalBytes": 77529088,
- "fanoutMean": 5.2915849685668945,
- "recvTokensMax": 722,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-e7e5caec",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·linear|8|decode|normal|none|linear|0|tuned||b029c1a6fded400",
- "colorKey": "h100_f70758a0",
- "comparisonKey": "fb346b1019e55bb0",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:13:24.801629+00:00",
- "status": "valid",
- "publicationStatus": "comparable-experimental",
- "runner": "h100-dgxc-slurm_00",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · uniform·linear",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform·linear",
- "routingStep": 0,
- "unevenTokens": "linear",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "b029c1a6fded400",
- "workloadId": null,
- "workloadSource": "seeded-runtime",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272372388",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272372388",
- "createdAt": "2026-06-27T00:13:24.801629+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 98.24000298976898,
- "p90": 103.64799946546555,
- "p95": 106.4319983124733,
- "p99": 112.5119999051094
- },
- "combine": {
- "p50": 80.73599636554718,
- "p90": 87.55200356245041,
- "p95": 88.03199976682663,
- "p99": 90.08000046014786
- },
- "roundtrip": {
- "p50": 154.33600544929504,
- "p90": 159.45599973201752,
- "p95": 161.6639941930771,
- "p99": 166.75199568271637
- },
- "isolatedSum": {
- "p50": 178.97599935531616,
- "p90": 191.20000302791595,
- "p95": 194.46399807929993,
- "p99": 202.59200036525726
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 100.09600222110748,
- "p90": 105.27999699115753,
- "p95": 106.91200196743011,
- "p99": 113.37599903345108
- },
- "combine": {
- "p50": 89.53599631786346,
- "p90": 96.16000205278397,
- "p95": 96.73599898815155,
- "p99": 98.43199700117111
- },
- "roundtrip": {
- "p50": 163.39200735092163,
- "p90": 168.99199783802032,
- "p95": 170.43200135231018,
- "p99": 174.81599748134613
- },
- "isolatedSum": {
- "p50": 189.63199853897095,
- "p90": 201.4399990439415,
- "p95": 203.64800095558167,
- "p99": 211.8079960346222
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 130.75199723243713,
- "p90": 136.99199259281158,
- "p95": 138.7840062379837,
- "p99": 143.42400431632996
- },
- "combine": {
- "p50": 128.1599998474121,
- "p90": 130.40000200271606,
- "p95": 135.8720064163208,
- "p99": 278.6880135536194
- },
- "roundtrip": {
- "p50": 225.75999796390533,
- "p90": 231.74400627613068,
- "p95": 232.80000686645508,
- "p99": 235.6480062007904
- },
- "isolatedSum": {
- "p50": 258.91199707984924,
- "p90": 267.39199459552765,
- "p95": 274.6560126543045,
- "p99": 422.11201786994934
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-5fad8218",
- "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de",
- "colorKey": "h100_fb5b86de",
- "comparisonKey": "bba2bec66db838b4",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:59:15.450287+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_17",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · uniform+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "uniform",
- "routingLabel": "uniform+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "73351bbcd4d02de",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 1.078125,
- "eplbImbalanceAfter": 1.00048828125,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271923814",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271923814",
- "createdAt": "2026-06-26T23:59:15.450287+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 96.99200093746185,
- "p90": 104.89600151777267,
- "p95": 107.04000294208527,
- "p99": 111.68000102043152
- },
- "combine": {
- "p50": 75.29599964618683,
- "p90": 81.28000050783157,
- "p95": 81.69600367546082,
- "p99": 83.20000022649765
- },
- "roundtrip": {
- "p50": 146.27200365066528,
- "p90": 154.11199629306793,
- "p95": 156.031996011734,
- "p99": 158.6879938840866
- },
- "isolatedSum": {
- "p50": 172.28800058364868,
- "p90": 186.17600202560425,
- "p95": 188.73600661754608,
- "p99": 194.88000124692917
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 516096,
- "combineLogicalBytes": 516096,
- "fanoutMean": 4.5,
- "recvTokensMax": 6,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 72.09599763154984,
- "p90": 103.87200117111206,
- "p95": 106.4319983124733,
- "p99": 113.76000195741653
- },
- "combine": {
- "p50": 72.67200201749802,
- "p90": 81.18399977684021,
- "p95": 81.82399719953537,
- "p99": 84.28800106048584
- },
- "roundtrip": {
- "p50": 127.48800218105316,
- "p90": 153.76000106334686,
- "p95": 156.3200056552887,
- "p99": 158.720001578331
- },
- "isolatedSum": {
- "p50": 144.76799964904785,
- "p90": 185.05600094795227,
- "p95": 188.25599551200867,
- "p99": 198.04800301790237
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1089536,
- "combineLogicalBytes": 1089536,
- "fanoutMean": 4.75,
- "recvTokensMax": 11,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 71.26399874687195,
- "p90": 100.89600086212158,
- "p95": 104.19200360774994,
- "p99": 112.96000331640244
- },
- "combine": {
- "p50": 72.7040022611618,
- "p90": 80.4160013794899,
- "p95": 80.6720033288002,
- "p99": 87.80799806118011
- },
- "roundtrip": {
- "p50": 130.0159990787506,
- "p90": 154.78399395942688,
- "p95": 158.81599485874176,
- "p99": 165.53600132465363
- },
- "isolatedSum": {
- "p50": 143.96800100803375,
- "p90": 181.31200224161148,
- "p95": 184.86400693655014,
- "p99": 200.76800137758255
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2207744,
- "combineLogicalBytes": 2207744,
- "fanoutMean": 4.8125,
- "recvTokensMax": 23,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 96.79999947547913,
- "p90": 103.16800326108932,
- "p95": 105.79200088977814,
- "p99": 110.46399921178818
- },
- "combine": {
- "p50": 80.73599636554718,
- "p90": 81.98399841785431,
- "p95": 82.36800134181976,
- "p99": 89.75999802350998
- },
- "roundtrip": {
- "p50": 150.2400040626526,
- "p90": 156.47999942302704,
- "p95": 158.91200304031372,
- "p99": 168.2240068912506
- },
- "isolatedSum": {
- "p50": 177.5359958410263,
- "p90": 185.15200167894363,
- "p95": 188.1600022315979,
- "p99": 200.22399723529816
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4558848,
- "combineLogicalBytes": 4558848,
- "fanoutMean": 4.96875,
- "recvTokensMax": 46,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 96.83199971914291,
- "p90": 102.24000364542007,
- "p95": 104.44799810647964,
- "p99": 107.77600109577179
- },
- "combine": {
- "p50": 81.05599880218506,
- "p90": 87.80799806118011,
- "p95": 88.70399743318558,
- "p99": 89.75999802350998
- },
- "roundtrip": {
- "p50": 152.73599326610565,
- "p90": 160.73599457740784,
- "p95": 162.75200247764587,
- "p99": 167.55199432373047
- },
- "isolatedSum": {
- "p50": 177.88799852132797,
- "p90": 190.0480017066002,
- "p95": 193.15199553966522,
- "p99": 197.53599911928177
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9347072,
- "combineLogicalBytes": 9347072,
- "fanoutMean": 5.09375,
- "recvTokensMax": 86,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 91.32800251245499,
- "p90": 101.9200012087822,
- "p95": 104.19200360774994,
- "p99": 108.57599973678589
- },
- "combine": {
- "p50": 81.216000020504,
- "p90": 90.01599997282028,
- "p95": 90.40000289678574,
- "p99": 97.88800030946732
- },
- "roundtrip": {
- "p50": 142.2400027513504,
- "p90": 161.8880033493042,
- "p95": 163.96799683570862,
- "p99": 168.67199540138245
- },
- "isolatedSum": {
- "p50": 172.54400253295898,
- "p90": 191.93600118160248,
- "p95": 194.59200650453568,
- "p99": 206.4640000462532
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 18995200,
- "combineLogicalBytes": 18995200,
- "fanoutMean": 5.17578125,
- "recvTokensMax": 178,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 108.25599730014801,
- "p90": 114.9120032787323,
- "p95": 117.08799749612808,
- "p99": 121.72800302505493
- },
- "combine": {
- "p50": 96.0640013217926,
- "p90": 97.85600006580353,
- "p95": 102.11200267076492,
- "p99": 108.96000266075134
- },
- "roundtrip": {
- "p50": 166.46400094032288,
- "p90": 181.63199722766876,
- "p95": 186.0159933567047,
- "p99": 189.91999328136444
- },
- "isolatedSum": {
- "p50": 204.3199986219406,
- "p90": 212.76800334453583,
- "p95": 219.200000166893,
- "p99": 230.68800568580627
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38291456,
- "combineLogicalBytes": 38291456,
- "fanoutMean": 5.216796875,
- "recvTokensMax": 348,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 112.12799698114395,
- "p90": 131.26400113105774,
- "p95": 135.6479972600937,
- "p99": 141.05600118637085
- },
- "combine": {
- "p50": 106.36799782514572,
- "p90": 117.37599968910217,
- "p95": 120.80000340938568,
- "p99": 121.8239963054657
- },
- "roundtrip": {
- "p50": 195.68000733852386,
- "p90": 214.59199488162994,
- "p95": 216.60800278186798,
- "p99": 221.91999852657318
- },
- "isolatedSum": {
- "p50": 218.49599480628967,
- "p90": 248.6400008201599,
- "p95": 256.44800066947937,
- "p99": 262.87999749183655
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77113344,
- "combineLogicalBytes": 77113344,
- "fanoutMean": 5.2529296875,
- "recvTokensMax": 685,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-7f743bfe",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c",
- "colorKey": "h100_aa268d13",
- "comparisonKey": "791af0af2f802328",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:59:41.322977+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_18",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · zipf",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "zipf",
- "routingLabel": "zipf",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "14ded8461f2636c",
- "workloadId": "set:8:f5576e2b712d38c3",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271945409",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271945409",
- "createdAt": "2026-06-26T23:59:41.322977+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 93.37600320577621,
- "p90": 101.59999877214432,
- "p95": 103.16800326108932,
- "p99": 108.15999656915665
- },
- "combine": {
- "p50": 73.69600236415863,
- "p90": 78.17599922418594,
- "p95": 79.99999821186066,
- "p99": 82.59200304746628
- },
- "roundtrip": {
- "p50": 142.59199798107147,
- "p90": 150.62400698661804,
- "p95": 152.54400670528412,
- "p99": 159.5200002193451
- },
- "isolatedSum": {
- "p50": 167.07200556993484,
- "p90": 179.77599799633026,
- "p95": 183.16800147294998,
- "p99": 190.75199961662292
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 444416,
- "combineLogicalBytes": 444416,
- "fanoutMean": 3.875,
- "recvTokensMax": 8,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 96.0640013217926,
- "p90": 100.89600086212158,
- "p95": 101.82400047779083,
- "p99": 107.07200318574905
- },
- "combine": {
- "p50": 74.43200051784515,
- "p90": 80.48000186681747,
- "p95": 81.216000020504,
- "p99": 82.11199939250946
- },
- "roundtrip": {
- "p50": 143.39199662208557,
- "p90": 147.87200093269348,
- "p95": 153.31199765205383,
- "p99": 168.60799491405487
- },
- "isolatedSum": {
- "p50": 170.49600183963776,
- "p90": 181.37600272893906,
- "p95": 183.04000049829483,
- "p99": 189.18400257825851
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 845824,
- "combineLogicalBytes": 845824,
- "fanoutMean": 3.6875,
- "recvTokensMax": 16,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 95.87199985980988,
- "p90": 100.73599964380264,
- "p95": 102.81600058078766,
- "p99": 109.95200276374817
- },
- "combine": {
- "p50": 74.30399954319,
- "p90": 80.89599758386612,
- "p95": 81.4720019698143,
- "p99": 84.19200032949448
- },
- "roundtrip": {
- "p50": 142.752006649971,
- "p90": 153.02400290966034,
- "p95": 154.9759954214096,
- "p99": 160.0639969110489
- },
- "isolatedSum": {
- "p50": 170.17599940299988,
- "p90": 181.63199722766876,
- "p95": 184.28800255060196,
- "p99": 194.14400309324265
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1691648,
- "combineLogicalBytes": 1691648,
- "fanoutMean": 3.6875,
- "recvTokensMax": 32,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 97.88800030946732,
- "p90": 101.82400047779083,
- "p95": 103.96800190210342,
- "p99": 111.42399907112122
- },
- "combine": {
- "p50": 75.6160020828247,
- "p90": 81.4720019698143,
- "p95": 82.04799890518188,
- "p99": 84.03199911117554
- },
- "roundtrip": {
- "p50": 146.7519998550415,
- "p90": 153.47200632095337,
- "p95": 154.9759954214096,
- "p99": 167.9680049419403
- },
- "isolatedSum": {
- "p50": 173.50400239229202,
- "p90": 183.29600244760513,
- "p95": 186.0160008072853,
- "p99": 195.45599818229675
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3354624,
- "combineLogicalBytes": 3354624,
- "fanoutMean": 3.65625,
- "recvTokensMax": 64,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 97.08800166845322,
- "p90": 100.67199915647507,
- "p95": 104.25599664449692,
- "p99": 110.6560006737709
- },
- "combine": {
- "p50": 78.94399762153625,
- "p90": 82.04799890518188,
- "p95": 82.78399705886841,
- "p99": 89.40800279378891
- },
- "roundtrip": {
- "p50": 150.7200002670288,
- "p90": 159.10400450229645,
- "p95": 161.69600188732147,
- "p99": 167.07199811935425
- },
- "isolatedSum": {
- "p50": 176.03199928998947,
- "p90": 182.71999806165695,
- "p95": 187.03999370336533,
- "p99": 200.06400346755981
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 6537216,
- "combineLogicalBytes": 6537216,
- "fanoutMean": 3.5625,
- "recvTokensMax": 127,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 96.47999703884125,
- "p90": 101.31199657917023,
- "p95": 104.5759990811348,
- "p99": 110.62400043010712
- },
- "combine": {
- "p50": 86.46400272846222,
- "p90": 90.11200070381165,
- "p95": 90.62399715185165,
- "p99": 93.18400174379349
- },
- "roundtrip": {
- "p50": 158.75199437141418,
- "p90": 163.55200111865997,
- "p95": 164.89599645137787,
- "p99": 169.21600699424744
- },
- "isolatedSum": {
- "p50": 182.94399976730347,
- "p90": 191.42399728298187,
- "p95": 195.19999623298645,
- "p99": 203.8080021739006
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 12859392,
- "combineLogicalBytes": 12859392,
- "fanoutMean": 3.50390625,
- "recvTokensMax": 255,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 106.9440022110939,
- "p90": 138.36799561977386,
- "p95": 143.0400013923645,
- "p99": 250.2720057964325
- },
- "combine": {
- "p50": 95.0080007314682,
- "p90": 98.39999675750732,
- "p95": 98.91200065612793,
- "p99": 105.59999942779541
- },
- "roundtrip": {
- "p50": 176.67199671268463,
- "p90": 184.03199315071106,
- "p95": 187.3600035905838,
- "p99": 190.5599981546402
- },
- "isolatedSum": {
- "p50": 201.9520029425621,
- "p90": 236.7679923772812,
- "p95": 241.95200204849243,
- "p99": 355.8720052242279
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 25145344,
- "combineLogicalBytes": 25145344,
- "fanoutMean": 3.42578125,
- "recvTokensMax": 510,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 128.4160017967224,
- "p90": 145.9520012140274,
- "p95": 148.83199334144592,
- "p99": 151.99999511241913
- },
- "combine": {
- "p50": 119.74400281906128,
- "p90": 122.56000190973282,
- "p95": 123.80799651145935,
- "p99": 129.7920048236847
- },
- "roundtrip": {
- "p50": 228.2560020685196,
- "p90": 233.88800024986267,
- "p95": 236.12800240516663,
- "p99": 240.28800427913666
- },
- "isolatedSum": {
- "p50": 248.1600046157837,
- "p90": 268.5120031237602,
- "p95": 272.6399898529053,
- "p99": 281.7919999361038
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 49946624,
- "combineLogicalBytes": 49946624,
- "fanoutMean": 3.40234375,
- "recvTokensMax": 1022,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-456ed1f6",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||1fa7fe74d0e30a3",
- "colorKey": "h100_aa268d13",
- "comparisonKey": "791af0af2f802328",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:55:00.953910+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_16",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · zipf",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "zipf",
- "routingLabel": "zipf",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "1fa7fe74d0e30a3",
- "workloadId": "set:4:f5576e2b712d38c3",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271802749",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271802749",
- "createdAt": "2026-06-26T23:55:00.953910+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 97.34400361776352,
- "p90": 106.33599758148193,
- "p95": 108.99200290441513,
- "p99": 118.14399808645248
- },
- "combine": {
- "p50": 78.72000336647034,
- "p90": 81.11999928951263,
- "p95": 82.14399963617325,
- "p99": 87.42400258779526
- },
- "roundtrip": {
- "p50": 148.76799285411835,
- "p90": 160.5439931154251,
- "p95": 164.73600268363953,
- "p99": 172.44799435138702
- },
- "isolatedSum": {
- "p50": 176.06400698423386,
- "p90": 187.45599687099457,
- "p95": 191.13600254058838,
- "p99": 205.56800067424774
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 444416,
- "combineLogicalBytes": 444416,
- "fanoutMean": 3.875,
- "recvTokensMax": 8,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 97.50399738550186,
- "p90": 104.38399761915207,
- "p95": 108.99200290441513,
- "p99": 137.2479945421219
- },
- "combine": {
- "p50": 79.39200103282928,
- "p90": 86.68799698352814,
- "p95": 87.52000331878662,
- "p99": 103.90400141477585
- },
- "roundtrip": {
- "p50": 152.99199521541595,
- "p90": 162.9759967327118,
- "p95": 165.69599509239197,
- "p99": 171.55200242996216
- },
- "isolatedSum": {
- "p50": 176.89599841833115,
- "p90": 191.0719946026802,
- "p95": 196.51200622320175,
- "p99": 241.15199595689774
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3354624,
- "combineLogicalBytes": 3354624,
- "fanoutMean": 3.65625,
- "recvTokensMax": 64,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 102.33599692583084,
- "p90": 111.68000102043152,
- "p95": 115.68000167608261,
- "p99": 123.74400347471237
- },
- "combine": {
- "p50": 87.45600283145905,
- "p90": 94.81599926948547,
- "p95": 95.32800316810608,
- "p99": 96.3200032711029
- },
- "roundtrip": {
- "p50": 160.7999950647354,
- "p90": 168.67199540138245,
- "p95": 171.29600048065186,
- "p99": 178.52799594402313
- },
- "isolatedSum": {
- "p50": 189.7919997572899,
- "p90": 206.496000289917,
- "p95": 211.0080048441887,
- "p99": 220.06400674581528
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 12859392,
- "combineLogicalBytes": 12859392,
- "fanoutMean": 3.50390625,
- "recvTokensMax": 255,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 139.39200341701508,
- "p90": 145.34400403499603,
- "p95": 147.5200057029724,
- "p99": 163.71199488639832
- },
- "combine": {
- "p50": 120.15999853610992,
- "p90": 128.1599998474121,
- "p95": 128.86400520801544,
- "p99": 129.88799810409546
- },
- "roundtrip": {
- "p50": 227.87199914455414,
- "p90": 232.7360063791275,
- "p95": 235.32800376415253,
- "p99": 255.13601303100586
- },
- "isolatedSum": {
- "p50": 259.552001953125,
- "p90": 273.50400388240814,
- "p95": 276.38401091098785,
- "p99": 293.5999929904938
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 49946624,
- "combineLogicalBytes": 49946624,
- "fanoutMean": 3.40234375,
- "recvTokensMax": 1022,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-db353ddd",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c",
- "colorKey": "h100_002beb29",
- "comparisonKey": "d83561aeea03cdbc",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:01:11.693533+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_12",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · zipf-heavy",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "zipf-heavy",
- "routingLabel": "zipf-heavy",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "22da8b58646609c",
- "workloadId": "set:8:6b84350720aa8233",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271987393",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271987393",
- "createdAt": "2026-06-27T00:01:11.693533+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 94.14400160312653,
- "p90": 104.41599786281586,
- "p95": 109.8560020327568,
- "p99": 133.69600474834442
- },
- "combine": {
- "p50": 71.32799923419952,
- "p90": 75.03999769687653,
- "p95": 80.86399734020233,
- "p99": 237.34399676322937
- },
- "roundtrip": {
- "p50": 141.2159949541092,
- "p90": 150.39999783039093,
- "p95": 151.8079936504364,
- "p99": 244.73600089550018
- },
- "isolatedSum": {
- "p50": 165.47200083732605,
- "p90": 179.45599555969238,
- "p95": 190.71999937295914,
- "p99": 371.0400015115738
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 172032,
- "combineLogicalBytes": 172032,
- "fanoutMean": 1.5,
- "recvTokensMax": 8,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 94.43199634552002,
- "p90": 101.50399804115295,
- "p95": 103.04000228643417,
- "p99": 105.85600137710571
- },
- "combine": {
- "p50": 72.03199714422226,
- "p90": 73.95199686288834,
- "p95": 74.5600014925003,
- "p99": 79.80799674987793
- },
- "roundtrip": {
- "p50": 141.02399349212646,
- "p90": 147.77599275112152,
- "p95": 150.176003575325,
- "p99": 175.6799966096878
- },
- "isolatedSum": {
- "p50": 166.46399348974228,
- "p90": 175.4559949040413,
- "p95": 177.60000377893448,
- "p99": 185.66399812698364
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 315392,
- "combineLogicalBytes": 315392,
- "fanoutMean": 1.375,
- "recvTokensMax": 16,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 95.20000219345093,
- "p90": 101.47199779748917,
- "p95": 103.13600301742554,
- "p99": 108.12799632549286
- },
- "combine": {
- "p50": 70.8480030298233,
- "p90": 78.65600287914276,
- "p95": 79.0719985961914,
- "p99": 81.53600245714188
- },
- "roundtrip": {
- "p50": 143.93599331378937,
- "p90": 152.41600573062897,
- "p95": 155.61600029468536,
- "p99": 564.3519759178162
- },
- "isolatedSum": {
- "p50": 166.04800522327423,
- "p90": 180.12800067663193,
- "p95": 182.20800161361694,
- "p99": 189.66399878263474
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 616448,
- "fanoutMean": 1.34375,
- "recvTokensMax": 32,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 96.25600278377533,
- "p90": 103.58399897813797,
- "p95": 107.58399963378906,
- "p99": 168.09600591659546
- },
- "combine": {
- "p50": 75.71200281381607,
- "p90": 80.1599994301796,
- "p95": 80.83199709653854,
- "p99": 82.30400085449219
- },
- "roundtrip": {
- "p50": 144.73600685596466,
- "p90": 150.81599354743958,
- "p95": 152.79999375343323,
- "p99": 157.95199573040009
- },
- "isolatedSum": {
- "p50": 171.9680055975914,
- "p90": 183.74399840831757,
- "p95": 188.4159967303276,
- "p99": 250.40000677108765
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1376256,
- "combineLogicalBytes": 1376256,
- "fanoutMean": 1.5,
- "recvTokensMax": 64,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 96.3200032711029,
- "p90": 102.39999741315842,
- "p95": 104.51199859380722,
- "p99": 110.27199774980545
- },
- "combine": {
- "p50": 78.65600287914276,
- "p90": 81.37600123882294,
- "p95": 81.82399719953537,
- "p99": 87.0399996638298
- },
- "roundtrip": {
- "p50": 146.33600413799286,
- "p90": 152.38399803638458,
- "p95": 153.76000106334686,
- "p99": 157.82399475574493
- },
- "isolatedSum": {
- "p50": 174.97600615024567,
- "p90": 183.77599865198135,
- "p95": 186.3359957933426,
- "p99": 197.31199741363525
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2781184,
- "combineLogicalBytes": 2781184,
- "fanoutMean": 1.515625,
- "recvTokensMax": 128,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 96.67199850082397,
- "p90": 101.95200145244598,
- "p95": 103.87200117111206,
- "p99": 109.56799983978271
- },
- "combine": {
- "p50": 83.20000022649765,
- "p90": 88.639996945858,
- "p95": 89.28000181913376,
- "p99": 90.27200192213058
- },
- "roundtrip": {
- "p50": 154.27200496196747,
- "p90": 159.90400314331055,
- "p95": 161.8880033493042,
- "p99": 171.64799571037292
- },
- "isolatedSum": {
- "p50": 179.87199872732162,
- "p90": 190.59199839830399,
- "p95": 193.15200299024582,
- "p99": 199.8400017619133
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 5533696,
- "combineLogicalBytes": 5533696,
- "fanoutMean": 1.5078125,
- "recvTokensMax": 256,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 104.80000078678131,
- "p90": 112.5440001487732,
- "p95": 115.35999923944473,
- "p99": 119.64800208806992
- },
- "combine": {
- "p50": 95.32800316810608,
- "p90": 97.6639986038208,
- "p95": 98.14400225877762,
- "p99": 103.45599800348282
- },
- "roundtrip": {
- "p50": 173.21600019931793,
- "p90": 177.47199535369873,
- "p95": 178.97599935531616,
- "p99": 184.09599363803864
- },
- "isolatedSum": {
- "p50": 200.1280039548874,
- "p90": 210.207998752594,
- "p95": 213.50400149822235,
- "p99": 223.10400009155273
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 11210752,
- "combineLogicalBytes": 11210752,
- "fanoutMean": 1.52734375,
- "recvTokensMax": 512,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 120.64000219106674,
- "p90": 141.9840008020401,
- "p95": 143.23200285434723,
- "p99": 148.54399859905243
- },
- "combine": {
- "p50": 119.48800086975098,
- "p90": 122.04799801111221,
- "p95": 122.56000190973282,
- "p99": 123.58400225639343
- },
- "roundtrip": {
- "p50": 219.84000504016876,
- "p90": 226.17599368095398,
- "p95": 227.29599475860596,
- "p99": 232.16000199317932
- },
- "isolatedSum": {
- "p50": 240.12800306081772,
- "p90": 264.0319988131523,
- "p95": 265.79200476408005,
- "p99": 272.12800085544586
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 22650880,
- "combineLogicalBytes": 22650880,
- "fanoutMean": 1.54296875,
- "recvTokensMax": 1024,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-acf36978",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||47fddabb3277bec",
- "colorKey": "h100_002beb29",
- "comparisonKey": "d83561aeea03cdbc",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:55:11.297271+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_18",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · zipf-heavy",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "zipf-heavy",
- "routingLabel": "zipf-heavy",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "47fddabb3277bec",
- "workloadId": "set:4:6b84350720aa8233",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271810135",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271810135",
- "createdAt": "2026-06-26T23:55:11.297271+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 95.83999961614609,
- "p90": 101.27999633550644,
- "p95": 104.86400127410889,
- "p99": 111.51999980211258
- },
- "combine": {
- "p50": 71.74400240182877,
- "p90": 73.95199686288834,
- "p95": 79.03999835252762,
- "p99": 81.08799904584885
- },
- "roundtrip": {
- "p50": 142.5279974937439,
- "p90": 149.79200065135956,
- "p95": 151.71200037002563,
- "p99": 156.73600137233734
- },
- "isolatedSum": {
- "p50": 167.58400201797485,
- "p90": 175.23199319839478,
- "p95": 183.9039996266365,
- "p99": 192.60799884796143
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 172032,
- "combineLogicalBytes": 172032,
- "fanoutMean": 1.5,
- "recvTokensMax": 8,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 98.1760025024414,
- "p90": 104.96000200510025,
- "p95": 106.91200196743011,
- "p99": 112.44799941778183
- },
- "combine": {
- "p50": 73.34399968385696,
- "p90": 79.99999821186066,
- "p95": 80.48000186681747,
- "p99": 85.08799970149994
- },
- "roundtrip": {
- "p50": 146.14400267601013,
- "p90": 152.6080071926117,
- "p95": 154.7520011663437,
- "p99": 160.73599457740784
- },
- "isolatedSum": {
- "p50": 171.52000218629837,
- "p90": 184.9600002169609,
- "p95": 187.3920038342476,
- "p99": 197.53599911928177
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1376256,
- "combineLogicalBytes": 1376256,
- "fanoutMean": 1.5,
- "recvTokensMax": 64,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 98.91200065612793,
- "p90": 105.92000186443329,
- "p95": 108.47999900579453,
- "p99": 115.93600362539291
- },
- "combine": {
- "p50": 82.87999778985977,
- "p90": 88.54400366544724,
- "p95": 88.92799913883209,
- "p99": 90.27200192213058
- },
- "roundtrip": {
- "p50": 156.19200468063354,
- "p90": 162.84799575805664,
- "p95": 165.56799411773682,
- "p99": 169.72799599170685
- },
- "isolatedSum": {
- "p50": 181.7919984459877,
- "p90": 194.46400552988052,
- "p95": 197.40799814462662,
- "p99": 206.2080055475235
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 5533696,
- "combineLogicalBytes": 5533696,
- "fanoutMean": 1.5078125,
- "recvTokensMax": 256,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 121.88799679279327,
- "p90": 129.88799810409546,
- "p95": 131.16799294948578,
- "p99": 136.1279934644699
- },
- "combine": {
- "p50": 114.68800157308578,
- "p90": 121.18399888277054,
- "p95": 122.079998254776,
- "p99": 129.2160004377365
- },
- "roundtrip": {
- "p50": 219.90400552749634,
- "p90": 224.73600506782532,
- "p95": 226.623997092247,
- "p99": 230.30400276184082
- },
- "isolatedSum": {
- "p50": 236.57599836587906,
- "p90": 251.071996986866,
- "p95": 253.24799120426178,
- "p99": 265.3439939022064
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 22650880,
- "combineLogicalBytes": 22650880,
- "fanoutMean": 1.54296875,
- "recvTokensMax": 1024,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-18fdfbeb",
- "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366",
- "colorKey": "h100_c44978e5",
- "comparisonKey": "26b5ab23f62d3389",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:01:10.918377+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_11",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · zipf-heavy+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "zipf-heavy",
- "routingLabel": "zipf-heavy+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "5a3054422534366",
- "workloadId": "set:8:6b84350720aa8233",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 7.40625,
- "eplbImbalanceAfter": 1.0004417782738093,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271992225",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271992225",
- "createdAt": "2026-06-27T00:01:10.918377+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 94.01600062847137,
- "p90": 101.59999877214432,
- "p95": 102.68799960613251,
- "p99": 107.96800255775452
- },
- "combine": {
- "p50": 71.87200337648392,
- "p90": 78.87999713420868,
- "p95": 79.48800176382065,
- "p99": 80.99199831485748
- },
- "roundtrip": {
- "p50": 138.72000575065613,
- "p90": 147.2640037536621,
- "p95": 148.76799285411835,
- "p99": 153.08800339698792
- },
- "isolatedSum": {
- "p50": 165.8880040049553,
- "p90": 180.479995906353,
- "p95": 182.17600136995316,
- "p99": 188.960000872612
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 559104,
- "combineLogicalBytes": 559104,
- "fanoutMean": 4.875,
- "recvTokensMax": 6,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 69.92000341415405,
- "p90": 99.64799880981445,
- "p95": 101.43999755382538,
- "p99": 106.84800148010254
- },
- "combine": {
- "p50": 71.6480016708374,
- "p90": 79.71200346946716,
- "p95": 80.64000308513641,
- "p99": 81.91999793052673
- },
- "roundtrip": {
- "p50": 129.34400141239166,
- "p90": 143.71199905872345,
- "p95": 146.08000218868256,
- "p99": 150.39999783039093
- },
- "isolatedSum": {
- "p50": 141.56800508499146,
- "p90": 179.36000227928162,
- "p95": 182.0800006389618,
- "p99": 188.76799941062927
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1175552,
- "combineLogicalBytes": 1175552,
- "fanoutMean": 5.125,
- "recvTokensMax": 12,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 71.74400240182877,
- "p90": 99.80800002813339,
- "p95": 101.79200023412704,
- "p99": 107.96800255775452
- },
- "combine": {
- "p50": 72.67200201749802,
- "p90": 81.56800270080566,
- "p95": 86.43200248479843,
- "p99": 88.73599767684937
- },
- "roundtrip": {
- "p50": 129.50399518013,
- "p90": 156.47999942302704,
- "p95": 159.13599729537964,
- "p99": 162.6880019903183
- },
- "isolatedSum": {
- "p50": 144.41600441932678,
- "p90": 181.37600272893906,
- "p95": 188.22400271892548,
- "p99": 196.70400023460388
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2465792,
- "combineLogicalBytes": 2465792,
- "fanoutMean": 5.375,
- "recvTokensMax": 25,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 72.12799787521362,
- "p90": 96.16000205278397,
- "p95": 98.30400347709656,
- "p99": 103.64799946546555
- },
- "combine": {
- "p50": 72.9919970035553,
- "p90": 81.08799904584885,
- "p95": 81.60000294446945,
- "p99": 87.13600039482117
- },
- "roundtrip": {
- "p50": 127.9039978981018,
- "p90": 152.16000378131866,
- "p95": 155.90399503707886,
- "p99": 157.24800527095795
- },
- "isolatedSum": {
- "p50": 145.11999487876892,
- "p90": 177.2480010986328,
- "p95": 179.904006421566,
- "p99": 190.7839998602867
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4988928,
- "combineLogicalBytes": 4988928,
- "fanoutMean": 5.4375,
- "recvTokensMax": 47,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 82.0159986615181,
- "p90": 98.55999797582626,
- "p95": 101.50399804115295,
- "p99": 106.33599758148193
- },
- "combine": {
- "p50": 73.56800138950348,
- "p90": 87.87199854850769,
- "p95": 88.8959988951683,
- "p99": 89.88799899816513
- },
- "roundtrip": {
- "p50": 127.71199643611908,
- "p90": 159.32799875736237,
- "p95": 160.99199652671814,
- "p99": 163.90399634838104
- },
- "isolatedSum": {
- "p50": 155.58400005102158,
- "p90": 186.43199652433395,
- "p95": 190.39999693632126,
- "p99": 196.22399657964706
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9791488,
- "combineLogicalBytes": 9791488,
- "fanoutMean": 5.3359375,
- "recvTokensMax": 94,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 89.82399851083755,
- "p90": 101.27999633550644,
- "p95": 102.65599936246872,
- "p99": 107.29599744081497
- },
- "combine": {
- "p50": 80.73599636554718,
- "p90": 89.4400030374527,
- "p95": 89.85599875450134,
- "p99": 95.42399644851685
- },
- "roundtrip": {
- "p50": 141.59999787807465,
- "p90": 158.9439958333969,
- "p95": 161.18399798870087,
- "p99": 167.32800006866455
- },
- "isolatedSum": {
- "p50": 170.55999487638474,
- "p90": 190.71999937295914,
- "p95": 192.51199811697006,
- "p99": 202.71999388933182
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19410944,
- "combineLogicalBytes": 19410944,
- "fanoutMean": 5.2890625,
- "recvTokensMax": 178,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 96.09600156545639,
- "p90": 118.1119978427887,
- "p95": 120.57600170373917,
- "p99": 127.83999741077423
- },
- "combine": {
- "p50": 89.82399851083755,
- "p90": 103.20000350475311,
- "p95": 103.80800068378448,
- "p99": 104.70400005578995
- },
- "roundtrip": {
- "p50": 160.288006067276,
- "p90": 180.95999956130981,
- "p95": 185.18400192260742,
- "p99": 188.60800564289093
- },
- "isolatedSum": {
- "p50": 185.92000007629395,
- "p90": 221.3120013475418,
- "p95": 224.38400238752365,
- "p99": 232.54399746656418
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38678528,
- "combineLogicalBytes": 38678528,
- "fanoutMean": 5.26953125,
- "recvTokensMax": 360,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 114.52800035476685,
- "p90": 135.0719928741455,
- "p95": 136.6720050573349,
- "p99": 140.00000059604645
- },
- "combine": {
- "p50": 106.01600259542465,
- "p90": 119.71200257539749,
- "p95": 120.35199999809265,
- "p99": 122.14399874210358
- },
- "roundtrip": {
- "p50": 195.96800208091736,
- "p90": 214.33599293231964,
- "p95": 216.86400473117828,
- "p99": 220.44800221920013
- },
- "isolatedSum": {
- "p50": 220.5440029501915,
- "p90": 254.783995449543,
- "p95": 257.02400505542755,
- "p99": 262.14399933815
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77285376,
- "combineLogicalBytes": 77285376,
- "fanoutMean": 5.2646484375,
- "recvTokensMax": 704,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-efff3174",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b",
- "colorKey": "h100_9aa30544",
- "comparisonKey": "c4aa2e0da9446ced",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:00:21.116102+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_13",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · zipf-mild",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "zipf-mild",
- "routingLabel": "zipf-mild",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "f3df51be7d5c32b",
- "workloadId": "set:8:289b7f9c14292e96",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271958693",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271958693",
- "createdAt": "2026-06-27T00:00:21.116102+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 97.28000313043594,
- "p90": 104.70400005578995,
- "p95": 106.11200332641602,
- "p99": 112.73600161075592
- },
- "combine": {
- "p50": 79.71200346946716,
- "p90": 82.65600353479385,
- "p95": 99.13600236177444,
- "p99": 275.4560112953186
- },
- "roundtrip": {
- "p50": 147.61599898338318,
- "p90": 155.32800555229187,
- "p95": 156.73600137233734,
- "p99": 162.91199624538422
- },
- "isolatedSum": {
- "p50": 176.9920065999031,
- "p90": 187.3600035905838,
- "p95": 205.24800568819046,
- "p99": 388.1920129060745
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 587776,
- "combineLogicalBytes": 587776,
- "fanoutMean": 5.125,
- "recvTokensMax": 8,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 72.80000299215317,
- "p90": 102.7199998497963,
- "p95": 104.89600151777267,
- "p99": 109.66400057077408
- },
- "combine": {
- "p50": 73.15199822187424,
- "p90": 81.44000172615051,
- "p95": 81.88799768686295,
- "p99": 82.91199803352356
- },
- "roundtrip": {
- "p50": 129.4720023870468,
- "p90": 153.3759981393814,
- "p95": 156.15999698638916,
- "p99": 164.92800414562225
- },
- "isolatedSum": {
- "p50": 145.9520012140274,
- "p90": 184.1600015759468,
- "p95": 186.78399920463562,
- "p99": 192.57599860429764
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1103872,
- "combineLogicalBytes": 1103872,
- "fanoutMean": 4.8125,
- "recvTokensMax": 16,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 76.25599950551987,
- "p90": 102.62399911880493,
- "p95": 105.24799674749374,
- "p99": 109.47199910879135
- },
- "combine": {
- "p50": 73.31199944019318,
- "p90": 81.4720019698143,
- "p95": 86.20800077915192,
- "p99": 89.34400230646133
- },
- "roundtrip": {
- "p50": 129.56799566745758,
- "p90": 157.9200029373169,
- "p95": 160.35200655460358,
- "p99": 166.04800522327423
- },
- "isolatedSum": {
- "p50": 149.56799894571304,
- "p90": 184.09600108861923,
- "p95": 191.45599752664566,
- "p99": 198.81600141525269
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2250752,
- "combineLogicalBytes": 2250752,
- "fanoutMean": 4.90625,
- "recvTokensMax": 31,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 76.83199644088745,
- "p90": 101.79200023412704,
- "p95": 105.02400249242783,
- "p99": 109.31199789047241
- },
- "combine": {
- "p50": 73.5040009021759,
- "p90": 82.04799890518188,
- "p95": 86.40000224113464,
- "p99": 88.54400366544724
- },
- "roundtrip": {
- "p50": 130.23999333381653,
- "p90": 159.39199924468994,
- "p95": 161.82400286197662,
- "p99": 165.98400473594666
- },
- "isolatedSum": {
- "p50": 150.33599734306335,
- "p90": 183.83999913930893,
- "p95": 191.42400473356247,
- "p99": 197.85600155591965
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4472832,
- "combineLogicalBytes": 4472832,
- "fanoutMean": 4.875,
- "recvTokensMax": 62,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 96.00000083446503,
- "p90": 104.73600029945374,
- "p95": 108.51199924945831,
- "p99": 115.74400216341019
- },
- "combine": {
- "p50": 80.03199845552444,
- "p90": 87.23200112581253,
- "p95": 88.51200342178345,
- "p99": 90.01599997282028
- },
- "roundtrip": {
- "p50": 135.1040005683899,
- "p90": 161.40800714492798,
- "p95": 164.5440012216568,
- "p99": 169.50400173664093
- },
- "isolatedSum": {
- "p50": 176.03199928998947,
- "p90": 191.96800142526627,
- "p95": 197.02400267124176,
- "p99": 205.76000213623047
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 8888320,
- "combineLogicalBytes": 8888320,
- "fanoutMean": 4.84375,
- "recvTokensMax": 124,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 90.65599739551544,
- "p90": 102.75200009346008,
- "p95": 105.69600015878677,
- "p99": 109.37599837779999
- },
- "combine": {
- "p50": 81.60000294446945,
- "p90": 90.59199690818787,
- "p95": 95.32800316810608,
- "p99": 97.47199714183807
- },
- "roundtrip": {
- "p50": 145.1839953660965,
- "p90": 165.56799411773682,
- "p95": 168.5439944267273,
- "p99": 174.68799650669098
- },
- "isolatedSum": {
- "p50": 172.2560003399849,
- "p90": 193.34399700164795,
- "p95": 201.02400332689285,
- "p99": 206.84799551963806
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 17733632,
- "combineLogicalBytes": 17733632,
- "fanoutMean": 4.83203125,
- "recvTokensMax": 248,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 101.43999755382538,
- "p90": 116.89600348472595,
- "p95": 119.77600306272507,
- "p99": 138.7840062379837
- },
- "combine": {
- "p50": 90.59199690818787,
- "p90": 103.35999727249146,
- "p95": 104.3199971318245,
- "p99": 105.92000186443329
- },
- "roundtrip": {
- "p50": 168.7680035829544,
- "p90": 185.88800728321075,
- "p95": 188.6720061302185,
- "p99": 193.37600469589233
- },
- "isolatedSum": {
- "p50": 192.03199446201324,
- "p90": 220.2560007572174,
- "p95": 224.09600019454956,
- "p99": 244.704008102417
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 35424256,
- "combineLogicalBytes": 35424256,
- "fanoutMean": 4.826171875,
- "recvTokensMax": 492,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 122.5920021533966,
- "p90": 134.91199910640717,
- "p95": 136.9280070066452,
- "p99": 143.64799857139587
- },
- "combine": {
- "p50": 115.07199704647064,
- "p90": 128.63999605178833,
- "p95": 130.40000200271606,
- "p99": 139.71200585365295
- },
- "roundtrip": {
- "p50": 215.5199944972992,
- "p90": 233.66400599479675,
- "p95": 235.35999655723572,
- "p99": 240.12799561023712
- },
- "isolatedSum": {
- "p50": 237.66399919986725,
- "p90": 263.5519951581955,
- "p95": 267.32800900936127,
- "p99": 283.3600044250488
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 70160384,
- "combineLogicalBytes": 70160384,
- "fanoutMean": 4.779296875,
- "recvTokensMax": 987,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-6d1780ec",
- "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243",
- "colorKey": "h100_e8b903ea",
- "comparisonKey": "0d93a7b7a0fcf6d0",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:00:17.527263+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_01",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · zipf-mild+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "zipf-mild",
- "routingLabel": "zipf-mild+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "16babcaf4204243",
- "workloadId": "set:8:289b7f9c14292e96",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 2.61328125,
- "eplbImbalanceAfter": 1.0009114583333334,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271962037",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271962037",
- "createdAt": "2026-06-27T00:00:17.527263+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 98.55999797582626,
- "p90": 106.33599758148193,
- "p95": 108.51199924945831,
- "p99": 113.21599781513214
- },
- "combine": {
- "p50": 79.39200103282928,
- "p90": 81.85599744319916,
- "p95": 82.56000280380249,
- "p99": 87.10400015115738
- },
- "roundtrip": {
- "p50": 145.50399780273438,
- "p90": 154.7199934720993,
- "p95": 156.8640023469925,
- "p99": 160.7999950647354
- },
- "isolatedSum": {
- "p50": 177.95199900865555,
- "p90": 188.1919950246811,
- "p95": 191.0720020532608,
- "p99": 200.31999796628952
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 602112,
- "combineLogicalBytes": 602112,
- "fanoutMean": 5.25,
- "recvTokensMax": 7,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 73.60000163316727,
- "p90": 108.31999778747559,
- "p95": 109.66400057077408,
- "p99": 115.13599753379822
- },
- "combine": {
- "p50": 72.51200079917908,
- "p90": 81.60000294446945,
- "p95": 82.36800134181976,
- "p99": 87.20000088214874
- },
- "roundtrip": {
- "p50": 129.05600666999817,
- "p90": 156.47999942302704,
- "p95": 160.0639969110489,
- "p99": 162.1759980916977
- },
- "isolatedSum": {
- "p50": 146.11200243234634,
- "p90": 189.92000073194504,
- "p95": 192.03200191259384,
- "p99": 202.33599841594696
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1189888,
- "combineLogicalBytes": 1189888,
- "fanoutMean": 5.1875,
- "recvTokensMax": 12,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 97.120001912117,
- "p90": 103.87200117111206,
- "p95": 105.66399991512299,
- "p99": 110.68800091743469
- },
- "combine": {
- "p50": 79.55200225114822,
- "p90": 82.20800012350082,
- "p95": 86.30400151014328,
- "p99": 88.3840024471283
- },
- "roundtrip": {
- "p50": 151.32799744606018,
- "p90": 159.61599349975586,
- "p95": 161.15200519561768,
- "p99": 167.71200299263
- },
- "isolatedSum": {
- "p50": 176.67200416326523,
- "p90": 186.08000129461288,
- "p95": 191.96800142526627,
- "p99": 199.072003364563
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2408448,
- "combineLogicalBytes": 2408448,
- "fanoutMean": 5.25,
- "recvTokensMax": 23,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 96.83199971914291,
- "p90": 103.07200253009796,
- "p95": 104.47999835014343,
- "p99": 111.48799955844879
- },
- "combine": {
- "p50": 79.48800176382065,
- "p90": 82.49600231647491,
- "p95": 87.0399996638298,
- "p99": 88.76799792051315
- },
- "roundtrip": {
- "p50": 152.38399803638458,
- "p90": 159.96800363063812,
- "p95": 162.20800578594208,
- "p99": 166.59200191497803
- },
- "isolatedSum": {
- "p50": 176.32000148296356,
- "p90": 185.56800484657288,
- "p95": 191.51999801397324,
- "p99": 200.25599747896194
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4859904,
- "combineLogicalBytes": 4859904,
- "fanoutMean": 5.296875,
- "recvTokensMax": 47,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 96.92800045013428,
- "p90": 102.01600193977356,
- "p95": 104.76800054311752,
- "p99": 113.02399635314941
- },
- "combine": {
- "p50": 80.86399734020233,
- "p90": 88.3840024471283,
- "p95": 89.63199704885483,
- "p99": 94.65599805116653
- },
- "roundtrip": {
- "p50": 153.21600437164307,
- "p90": 159.39199924468994,
- "p95": 160.8320027589798,
- "p99": 165.3759926557541
- },
- "isolatedSum": {
- "p50": 177.7919977903366,
- "p90": 190.40000438690186,
- "p95": 194.39999759197235,
- "p99": 207.67999440431595
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9605120,
- "combineLogicalBytes": 9605120,
- "fanoutMean": 5.234375,
- "recvTokensMax": 93,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 91.5519967675209,
- "p90": 105.27999699115753,
- "p95": 106.52799904346466,
- "p99": 110.55999994277954
- },
- "combine": {
- "p50": 81.216000020504,
- "p90": 90.17600119113922,
- "p95": 94.33600306510925,
- "p99": 96.79999947547913
- },
- "roundtrip": {
- "p50": 144.1279947757721,
- "p90": 167.52000153064728,
- "p95": 168.99199783802032,
- "p99": 173.567995429039
- },
- "isolatedSum": {
- "p50": 172.7679967880249,
- "p90": 195.45599818229675,
- "p95": 200.8640021085739,
- "p99": 207.35999941825867
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19367936,
- "combineLogicalBytes": 19367936,
- "fanoutMean": 5.27734375,
- "recvTokensMax": 182,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 104.80000078678131,
- "p90": 116.35199934244156,
- "p95": 118.81600320339203,
- "p99": 122.97599762678146
- },
- "combine": {
- "p50": 96.38399630784988,
- "p90": 104.00000214576721,
- "p95": 104.5759990811348,
- "p99": 106.4319983124733
- },
- "roundtrip": {
- "p50": 177.76000499725342,
- "p90": 185.44000387191772,
- "p95": 187.16800212860107,
- "p99": 190.3039962053299
- },
- "isolatedSum": {
- "p50": 201.1839970946312,
- "p90": 220.35200148820877,
- "p95": 223.39200228452682,
- "p99": 229.40799593925476
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38535168,
- "combineLogicalBytes": 38535168,
- "fanoutMean": 5.25,
- "recvTokensMax": 358,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 113.56800049543381,
- "p90": 131.58400356769562,
- "p95": 133.66399705410004,
- "p99": 139.96799290180206
- },
- "combine": {
- "p50": 106.55999928712845,
- "p90": 119.55200135707855,
- "p95": 120.09599804878235,
- "p99": 121.05599790811539
- },
- "roundtrip": {
- "p50": 198.46400618553162,
- "p90": 217.6000028848648,
- "p95": 218.75199675559998,
- "p99": 224.2880016565323
- },
- "isolatedSum": {
- "p50": 220.12799978256226,
- "p90": 251.13600492477417,
- "p95": 253.75999510288239,
- "p99": 261.02399080991745
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 76869632,
- "combineLogicalBytes": 76869632,
- "fanoutMean": 5.236328125,
- "recvTokensMax": 688,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-9d829c00",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c",
- "colorKey": "h100_552a4b73",
- "comparisonKey": "95c165fc74bc43c0",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:00:35.674306+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_17",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · zipf-moderate",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "zipf-moderate",
- "routingLabel": "zipf-moderate",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "14ded8461f2636c",
- "workloadId": "set:8:120a8dc1dba92ca9",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271971983",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271971983",
- "createdAt": "2026-06-27T00:00:35.674306+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 95.8079993724823,
- "p90": 103.10400277376175,
- "p95": 104.16000336408615,
- "p99": 110.01600325107574
- },
- "combine": {
- "p50": 74.33599978685379,
- "p90": 81.56800270080566,
- "p95": 81.98399841785431,
- "p99": 83.29600095748901
- },
- "roundtrip": {
- "p50": 142.2719955444336,
- "p90": 148.67199957370758,
- "p95": 150.4639983177185,
- "p99": 154.11199629306793
- },
- "isolatedSum": {
- "p50": 170.1439991593361,
- "p90": 184.6720054745674,
- "p95": 186.14400178194046,
- "p99": 193.31200420856476
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 444416,
- "combineLogicalBytes": 444416,
- "fanoutMean": 3.875,
- "recvTokensMax": 8,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 70.88000327348709,
- "p90": 101.98400169610977,
- "p95": 102.94400155544281,
- "p99": 106.01600259542465
- },
- "combine": {
- "p50": 72.4480003118515,
- "p90": 81.40800148248672,
- "p95": 81.95199817419052,
- "p99": 85.7279971241951
- },
- "roundtrip": {
- "p50": 128.7039965391159,
- "p90": 147.71200716495514,
- "p95": 149.59999918937683,
- "p99": 152.79999375343323
- },
- "isolatedSum": {
- "p50": 143.3280035853386,
- "p90": 183.3920031785965,
- "p95": 184.89599972963333,
- "p99": 191.74399971961975
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 845824,
- "combineLogicalBytes": 845824,
- "fanoutMean": 3.6875,
- "recvTokensMax": 16,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 73.18399846553802,
- "p90": 93.82399916648865,
- "p95": 96.41599655151367,
- "p99": 104.99200224876404
- },
- "combine": {
- "p50": 70.8480030298233,
- "p90": 77.82399654388428,
- "p95": 78.59200239181519,
- "p99": 83.45600217580795
- },
- "roundtrip": {
- "p50": 125.44000148773193,
- "p90": 151.74399316310883,
- "p95": 154.1759967803955,
- "p99": 160.09600460529327
- },
- "isolatedSum": {
- "p50": 144.03200149536133,
- "p90": 171.64799571037292,
- "p95": 175.00799894332886,
- "p99": 188.448004424572
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1691648,
- "combineLogicalBytes": 1691648,
- "fanoutMean": 3.6875,
- "recvTokensMax": 32,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 75.83999633789062,
- "p90": 100.22400319576263,
- "p95": 102.39999741315842,
- "p99": 107.4879989027977
- },
- "combine": {
- "p50": 73.18399846553802,
- "p90": 81.44000172615051,
- "p95": 82.24000036716461,
- "p99": 87.23200112581253
- },
- "roundtrip": {
- "p50": 126.27199292182922,
- "p90": 154.88000214099884,
- "p95": 157.47199952602386,
- "p99": 159.4880074262619
- },
- "isolatedSum": {
- "p50": 149.02399480342865,
- "p90": 181.66400492191315,
- "p95": 184.63999778032303,
- "p99": 194.72000002861023
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3354624,
- "combineLogicalBytes": 3354624,
- "fanoutMean": 3.65625,
- "recvTokensMax": 64,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 79.3600007891655,
- "p90": 100.0640019774437,
- "p95": 123.80799651145935,
- "p99": 229.76000607013702
- },
- "combine": {
- "p50": 73.88799637556076,
- "p90": 82.2720006108284,
- "p95": 83.36000144481659,
- "p99": 89.28000181913376
- },
- "roundtrip": {
- "p50": 130.17599284648895,
- "p90": 154.62400019168854,
- "p95": 157.3760062456131,
- "p99": 162.7199947834015
- },
- "isolatedSum": {
- "p50": 153.24799716472626,
- "p90": 182.3360025882721,
- "p95": 207.16799795627594,
- "p99": 319.0400078892708
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 6537216,
- "combineLogicalBytes": 6537216,
- "fanoutMean": 3.5625,
- "recvTokensMax": 127,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 88.44800293445587,
- "p90": 103.71199995279312,
- "p95": 105.76000064611435,
- "p99": 110.1439967751503
- },
- "combine": {
- "p50": 81.60000294446945,
- "p90": 89.6959975361824,
- "p95": 90.27200192213058,
- "p99": 91.80799871683121
- },
- "roundtrip": {
- "p50": 141.34399592876434,
- "p90": 161.98399662971497,
- "p95": 163.455992937088,
- "p99": 169.24799978733063
- },
- "isolatedSum": {
- "p50": 170.04800587892532,
- "p90": 193.40799748897552,
- "p95": 196.03200256824493,
- "p99": 201.9519954919815
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 12859392,
- "combineLogicalBytes": 12859392,
- "fanoutMean": 3.50390625,
- "recvTokensMax": 255,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 102.36799716949463,
- "p90": 119.6800023317337,
- "p95": 121.31199985742569,
- "p99": 123.77600371837616
- },
- "combine": {
- "p50": 89.9839997291565,
- "p90": 96.03200107812881,
- "p95": 99.48799759149551,
- "p99": 102.04800218343735
- },
- "roundtrip": {
- "p50": 165.69599509239197,
- "p90": 182.43199586868286,
- "p95": 184.1599941253662,
- "p99": 187.51999735832214
- },
- "isolatedSum": {
- "p50": 192.35199689865112,
- "p90": 215.71200340986252,
- "p95": 220.7999974489212,
- "p99": 225.8240059018135
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 25145344,
- "combineLogicalBytes": 25145344,
- "fanoutMean": 3.42578125,
- "recvTokensMax": 510,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 125.91999769210815,
- "p90": 144.70399916172028,
- "p95": 145.9520012140274,
- "p99": 148.00000190734863
- },
- "combine": {
- "p50": 114.56000059843063,
- "p90": 119.99999731779099,
- "p95": 122.30399996042252,
- "p99": 126.91199779510498
- },
- "roundtrip": {
- "p50": 218.9760059118271,
- "p90": 233.63199830055237,
- "p95": 235.1360023021698,
- "p99": 238.304004073143
- },
- "isolatedSum": {
- "p50": 240.4799982905388,
- "p90": 264.70399647951126,
- "p95": 268.2560011744499,
- "p99": 274.9119997024536
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 49946624,
- "combineLogicalBytes": 49946624,
- "fanoutMean": 3.40234375,
- "recvTokensMax": 1022,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-c61b6088",
- "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836",
- "colorKey": "h100_106a51ab",
- "comparisonKey": "6643ae5a97d68820",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:00:43.354862+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_07",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · zipf-moderate+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "zipf-moderate",
- "routingLabel": "zipf-moderate+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "a8f501af7004836",
- "workloadId": "set:8:120a8dc1dba92ca9",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 4.927734375,
- "eplbImbalanceAfter": 1.0006103515625,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271975554",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271975554",
- "createdAt": "2026-06-27T00:00:43.354862+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 69.72800195217133,
- "p90": 76.7040029168129,
- "p95": 82.24000036716461,
- "p99": 100.09600222110748
- },
- "combine": {
- "p50": 70.78400254249573,
- "p90": 73.11999797821045,
- "p95": 73.53600114583969,
- "p99": 78.3040001988411
- },
- "roundtrip": {
- "p50": 124.35200065374374,
- "p90": 129.88799810409546,
- "p95": 131.20000064373016,
- "p99": 137.40800321102142
- },
- "isolatedSum": {
- "p50": 140.51200449466705,
- "p90": 149.82400089502335,
- "p95": 155.7760015130043,
- "p99": 178.40000241994858
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 616448,
- "fanoutMean": 5.375,
- "recvTokensMax": 7,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 69.92000341415405,
- "p90": 77.79199630022049,
- "p95": 80.19199967384338,
- "p99": 96.19200229644775
- },
- "combine": {
- "p50": 71.16799801588058,
- "p90": 73.27999919652939,
- "p95": 73.85600358247757,
- "p99": 78.94399762153625
- },
- "roundtrip": {
- "p50": 126.94400548934937,
- "p90": 130.91200590133667,
- "p95": 132.1280002593994,
- "p99": 138.33600282669067
- },
- "isolatedSum": {
- "p50": 141.08800143003464,
- "p90": 151.07199549674988,
- "p95": 154.04800325632095,
- "p99": 175.135999917984
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1204224,
- "combineLogicalBytes": 1204224,
- "fanoutMean": 5.25,
- "recvTokensMax": 14,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 74.07999783754349,
- "p90": 101.34399682283401,
- "p95": 103.13600301742554,
- "p99": 111.39199882745743
- },
- "combine": {
- "p50": 72.9919970035553,
- "p90": 82.0159986615181,
- "p95": 87.00799942016602,
- "p99": 89.31200206279755
- },
- "roundtrip": {
- "p50": 131.32800161838531,
- "p90": 158.59200060367584,
- "p95": 163.13600540161133,
- "p99": 169.69600319862366
- },
- "isolatedSum": {
- "p50": 147.07199484109879,
- "p90": 183.3599954843521,
- "p95": 190.14400243759155,
- "p99": 200.70400089025497
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2394112,
- "combineLogicalBytes": 2394112,
- "fanoutMean": 5.21875,
- "recvTokensMax": 24,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 75.6480023264885,
- "p90": 100.76799988746643,
- "p95": 102.01600193977356,
- "p99": 105.95200210809708
- },
- "combine": {
- "p50": 72.9919970035553,
- "p90": 79.68000322580338,
- "p95": 80.6720033288002,
- "p99": 85.88799834251404
- },
- "roundtrip": {
- "p50": 129.63199615478516,
- "p90": 154.91199493408203,
- "p95": 156.47999942302704,
- "p99": 159.96800363063812
- },
- "isolatedSum": {
- "p50": 148.6399993300438,
- "p90": 180.4480031132698,
- "p95": 182.68800526857376,
- "p99": 191.84000045061111
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4630528,
- "combineLogicalBytes": 4630528,
- "fanoutMean": 5.046875,
- "recvTokensMax": 45,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 82.49600231647491,
- "p90": 100.73599964380264,
- "p95": 103.04000228643417,
- "p99": 106.81600123643875
- },
- "combine": {
- "p50": 74.36800003051758,
- "p90": 87.0399996638298,
- "p95": 87.90399879217148,
- "p99": 89.63199704885483
- },
- "roundtrip": {
- "p50": 132.38400220870972,
- "p90": 161.02400422096252,
- "p95": 162.81600296497345,
- "p99": 166.72000288963318
- },
- "isolatedSum": {
- "p50": 156.8640023469925,
- "p90": 187.77599930763245,
- "p95": 190.94400107860565,
- "p99": 196.44799828529358
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9447424,
- "combineLogicalBytes": 9447424,
- "fanoutMean": 5.1484375,
- "recvTokensMax": 91,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 90.30400216579437,
- "p90": 103.32799702882767,
- "p95": 104.35199737548828,
- "p99": 109.6000000834465
- },
- "combine": {
- "p50": 81.31200075149536,
- "p90": 89.75999802350998,
- "p95": 90.43200314044952,
- "p99": 91.61599725484848
- },
- "roundtrip": {
- "p50": 142.20799505710602,
- "p90": 158.65600109100342,
- "p95": 161.50400042533875,
- "p99": 167.39200055599213
- },
- "isolatedSum": {
- "p50": 171.61600291728973,
- "p90": 193.08799505233765,
- "p95": 194.7840005159378,
- "p99": 201.21599733829498
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19023872,
- "combineLogicalBytes": 19023872,
- "fanoutMean": 5.18359375,
- "recvTokensMax": 178,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 96.89600020647049,
- "p90": 116.60800129175186,
- "p95": 118.43200027942657,
- "p99": 124.32000041007996
- },
- "combine": {
- "p50": 90.30400216579437,
- "p90": 103.32799702882767,
- "p95": 103.74400019645691,
- "p99": 104.25599664449692
- },
- "roundtrip": {
- "p50": 162.08000481128693,
- "p90": 178.8800060749054,
- "p95": 181.85600638389587,
- "p99": 186.49600446224213
- },
- "isolatedSum": {
- "p50": 187.20000237226486,
- "p90": 219.93599832057953,
- "p95": 222.17600047588348,
- "p99": 228.57599705457687
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38148096,
- "combineLogicalBytes": 38148096,
- "fanoutMean": 5.197265625,
- "recvTokensMax": 350,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 118.07999759912491,
- "p90": 135.3279948234558,
- "p95": 138.2399946451187,
- "p99": 140.57600498199463
- },
- "combine": {
- "p50": 106.84800148010254,
- "p90": 119.45600062608719,
- "p95": 119.74400281906128,
- "p99": 120.54400146007538
- },
- "roundtrip": {
- "p50": 198.84799420833588,
- "p90": 216.2880003452301,
- "p95": 219.67999637126923,
- "p99": 221.47199511528015
- },
- "isolatedSum": {
- "p50": 224.92799907922745,
- "p90": 254.783995449543,
- "p95": 257.98399746418,
- "p99": 261.12000644207
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 76955648,
- "combineLogicalBytes": 76955648,
- "fanoutMean": 5.2421875,
- "recvTokensMax": 687,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-a38d13e8",
- "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836",
- "colorKey": "h100_769b9c4b",
- "comparisonKey": "115d84ad1ee38d09",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:00:11.807854+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_02",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · zipf+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "zipf",
- "routingLabel": "zipf+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "a8f501af7004836",
- "workloadId": "set:8:f5576e2b712d38c3",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 4.927734375,
- "eplbImbalanceAfter": 1.0006103515625,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271948775",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271948775",
- "createdAt": "2026-06-27T00:00:11.807854+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 70.39999961853027,
- "p90": 100.832000374794,
- "p95": 105.56799918413162,
- "p99": 192.73599982261658
- },
- "combine": {
- "p50": 73.18399846553802,
- "p90": 88.44800293445587,
- "p95": 188.38399648666382,
- "p99": 344.2560136318207
- },
- "roundtrip": {
- "p50": 123.77600371837616,
- "p90": 133.08799266815186,
- "p95": 149.4400054216385,
- "p99": 156.12800419330597
- },
- "isolatedSum": {
- "p50": 143.5839980840683,
- "p90": 189.28000330924988,
- "p95": 293.95199567079544,
- "p99": 536.9920134544373
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 616448,
- "fanoutMean": 5.375,
- "recvTokensMax": 7,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 68.38399916887283,
- "p90": 75.71200281381607,
- "p95": 77.11999863386154,
- "p99": 95.61599791049957
- },
- "combine": {
- "p50": 71.29599899053574,
- "p90": 73.44000041484833,
- "p95": 74.36800003051758,
- "p99": 82.2720006108284
- },
- "roundtrip": {
- "p50": 126.68800354003906,
- "p90": 130.87999820709229,
- "p95": 133.56800377368927,
- "p99": 142.59199798107147
- },
- "isolatedSum": {
- "p50": 139.67999815940857,
- "p90": 149.1520032286644,
- "p95": 151.48799866437912,
- "p99": 177.88799852132797
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1204224,
- "combineLogicalBytes": 1204224,
- "fanoutMean": 5.25,
- "recvTokensMax": 14,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 72.54400104284286,
- "p90": 99.2640033364296,
- "p95": 102.08000242710114,
- "p99": 107.39199817180634
- },
- "combine": {
- "p50": 72.9919970035553,
- "p90": 79.71200346946716,
- "p95": 84.22400057315826,
- "p99": 87.39200234413147
- },
- "roundtrip": {
- "p50": 130.23999333381653,
- "p90": 156.41599893569946,
- "p95": 160.22400557994843,
- "p99": 165.53600132465363
- },
- "isolatedSum": {
- "p50": 145.53599804639816,
- "p90": 178.97600680589676,
- "p95": 186.3040030002594,
- "p99": 194.7840005159378
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2394112,
- "combineLogicalBytes": 2394112,
- "fanoutMean": 5.21875,
- "recvTokensMax": 24,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 96.19200229644775,
- "p90": 109.56799983978271,
- "p95": 112.73600161075592,
- "p99": 155.87200224399567
- },
- "combine": {
- "p50": 75.45600086450577,
- "p90": 88.06400001049042,
- "p95": 89.4400030374527,
- "p99": 97.37599641084671
- },
- "roundtrip": {
- "p50": 130.94399869441986,
- "p90": 154.4319987297058,
- "p95": 156.44800662994385,
- "p99": 176.67199671268463
- },
- "isolatedSum": {
- "p50": 171.64800316095352,
- "p90": 197.63199985027313,
- "p95": 202.17600464820862,
- "p99": 253.24799865484238
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4630528,
- "combineLogicalBytes": 4630528,
- "fanoutMean": 5.046875,
- "recvTokensMax": 45,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 82.97599852085114,
- "p90": 100.16000270843506,
- "p95": 103.55199873447418,
- "p99": 106.72000050544739
- },
- "combine": {
- "p50": 74.14399832487106,
- "p90": 87.3280018568039,
- "p95": 88.95999938249588,
- "p99": 89.82399851083755
- },
- "roundtrip": {
- "p50": 131.6480040550232,
- "p90": 158.9760035276413,
- "p95": 161.31199896335602,
- "p99": 166.78400337696075
- },
- "isolatedSum": {
- "p50": 157.1199968457222,
- "p90": 187.48800456523895,
- "p95": 192.51199811697006,
- "p99": 196.54399901628494
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9447424,
- "combineLogicalBytes": 9447424,
- "fanoutMean": 5.1484375,
- "recvTokensMax": 91,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 90.30400216579437,
- "p90": 105.6319996714592,
- "p95": 106.6880002617836,
- "p99": 111.04000359773636
- },
- "combine": {
- "p50": 80.99199831485748,
- "p90": 89.15200084447861,
- "p95": 89.88799899816513,
- "p99": 90.91199934482574
- },
- "roundtrip": {
- "p50": 142.17600226402283,
- "p90": 157.6640009880066,
- "p95": 160.44799983501434,
- "p99": 164.8319959640503
- },
- "isolatedSum": {
- "p50": 171.29600048065186,
- "p90": 194.7840005159378,
- "p95": 196.57599925994873,
- "p99": 201.9520029425621
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19023872,
- "combineLogicalBytes": 19023872,
- "fanoutMean": 5.18359375,
- "recvTokensMax": 178,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 95.74399888515472,
- "p90": 116.2559986114502,
- "p95": 121.98399752378464,
- "p99": 398.6560106277466
- },
- "combine": {
- "p50": 90.20800143480301,
- "p90": 101.1200025677681,
- "p95": 104.25599664449692,
- "p99": 111.55200004577637
- },
- "roundtrip": {
- "p50": 160.76800227165222,
- "p90": 181.536003947258,
- "p95": 185.37600338459015,
- "p99": 188.35200369358063
- },
- "isolatedSum": {
- "p50": 185.95200031995773,
- "p90": 217.3760011792183,
- "p95": 226.23999416828156,
- "p99": 510.20801067352295
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38148096,
- "combineLogicalBytes": 38148096,
- "fanoutMean": 5.197265625,
- "recvTokensMax": 350,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 114.3679991364479,
- "p90": 133.44000279903412,
- "p95": 137.31199502944946,
- "p99": 142.7839994430542
- },
- "combine": {
- "p50": 108.15999656915665,
- "p90": 120.2239990234375,
- "p95": 121.24799937009811,
- "p99": 123.99999797344208
- },
- "roundtrip": {
- "p50": 199.35999810695648,
- "p90": 217.31199324131012,
- "p95": 220.15999257564545,
- "p99": 380.8319866657257
- },
- "isolatedSum": {
- "p50": 222.52799570560455,
- "p90": 253.66400182247162,
- "p95": 258.5599943995476,
- "p99": 266.7839974164963
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 76955648,
- "combineLogicalBytes": 76955648,
- "fanoutMean": 5.2421875,
- "recvTokensMax": 687,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-4ad32f1a",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|fp8-saturation|none|none|0|normalized|0.18|8c8497a77d9085d",
- "colorKey": "h100_7b3247bf",
- "comparisonKey": "2a087c80bac58077",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T15:27:59.966964+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_12",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 (norm)",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "fp8-saturation",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1818,
- "configuredUnits": 24,
- "deviceUnits": 132,
- "resourceClass": "unknown",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "8c8497a77d9085d",
- "workloadId": "set:4:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28247603308",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28247603308",
- "createdAt": "2026-06-26T15:27:59.966964+00:00",
- "sha": "fd23d02b65dba6f1ed963342b188022fc27263d1"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 96.73599898815155,
- "p90": 102.49599814414978,
- "p95": 104.12800312042236,
- "p99": 112.19199746847153
- },
- "combine": {
- "p50": 79.42400127649307,
- "p90": 81.4720019698143,
- "p95": 82.14399963617325,
- "p99": 87.93599903583527
- },
- "roundtrip": {
- "p50": 146.84799313545227,
- "p90": 156.15999698638916,
- "p95": 159.13599729537964,
- "p99": 164.000004529953
- },
- "isolatedSum": {
- "p50": 176.16000026464462,
- "p90": 183.96800011396408,
- "p95": 186.2720027565956,
- "p99": 200.1279965043068
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 98.33600372076035,
- "p90": 103.93600165843964,
- "p95": 106.52799904346466,
- "p99": 111.58400028944016
- },
- "combine": {
- "p50": 80.03199845552444,
- "p90": 86.84799820184708,
- "p95": 87.61599659919739,
- "p99": 88.06400001049042
- },
- "roundtrip": {
- "p50": 151.64799988269806,
- "p90": 159.16800498962402,
- "p95": 160.35200655460358,
- "p99": 165.50399363040924
- },
- "isolatedSum": {
- "p50": 178.3680021762848,
- "p90": 190.7839998602867,
- "p95": 194.14399564266205,
- "p99": 199.64800029993057
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 99.90400075912476,
- "p90": 105.76000064611435,
- "p95": 108.15999656915665,
- "p99": 116.60800129175186
- },
- "combine": {
- "p50": 87.90399879217148,
- "p90": 90.55999666452408,
- "p95": 95.23200243711472,
- "p99": 96.57599776983261
- },
- "roundtrip": {
- "p50": 157.82399475574493,
- "p90": 163.7759953737259,
- "p95": 166.78400337696075,
- "p99": 169.95200514793396
- },
- "isolatedSum": {
- "p50": 187.80799955129623,
- "p90": 196.31999731063843,
- "p95": 203.39199900627136,
- "p99": 213.18399906158447
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 128.60800325870514,
- "p90": 133.53599607944489,
- "p95": 135.51999628543854,
- "p99": 138.49599659442902
- },
- "combine": {
- "p50": 112.57600039243698,
- "p90": 120.4800009727478,
- "p95": 120.7680031657219,
- "p99": 122.40000069141388
- },
- "roundtrip": {
- "p50": 208.3519995212555,
- "p90": 215.71199595928192,
- "p95": 217.56799519062042,
- "p99": 220.5439954996109
- },
- "isolatedSum": {
- "p50": 241.18400365114212,
- "p90": 254.0159970521927,
- "p95": 256.28799945116043,
- "p99": 260.8959972858429
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-b5d97134",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.1|8c8497a77d9085d",
- "colorKey": "h100_7b3247bf",
- "comparisonKey": "b51e047646ec8fac",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:27:16.815311+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_07",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 (norm)",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.1,
- "achievedFraction": 0.0985,
- "configuredUnits": 13,
- "deviceUnits": 132,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "8c8497a77d9085d",
- "workloadId": "set:4:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28254271442",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254271442",
- "createdAt": "2026-06-26T17:27:16.815311+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 97.24800288677216,
- "p90": 103.39199751615524,
- "p95": 105.8880016207695,
- "p99": 111.13599687814713
- },
- "combine": {
- "p50": 78.84799689054489,
- "p90": 81.727996468544,
- "p95": 85.11999994516373,
- "p99": 89.02399986982346
- },
- "roundtrip": {
- "p50": 151.36000514030457,
- "p90": 157.53600001335144,
- "p95": 159.67999398708344,
- "p99": 164.63999450206757
- },
- "isolatedSum": {
- "p50": 176.09599977731705,
- "p90": 185.11999398469925,
- "p95": 191.00800156593323,
- "p99": 200.15999674797058
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 99.29600358009338,
- "p90": 104.70400005578995,
- "p95": 106.72000050544739,
- "p99": 113.53600025177002
- },
- "combine": {
- "p50": 79.58400249481201,
- "p90": 86.97599917650223,
- "p95": 87.39200234413147,
- "p99": 91.5519967675209
- },
- "roundtrip": {
- "p50": 153.85599434375763,
- "p90": 161.28000617027283,
- "p95": 162.432000041008,
- "p99": 166.07999801635742
- },
- "isolatedSum": {
- "p50": 178.8800060749054,
- "p90": 191.67999923229218,
- "p95": 194.11200284957886,
- "p99": 205.08799701929092
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 103.29599678516388,
- "p90": 107.64800012111664,
- "p95": 109.98400300741196,
- "p99": 121.40800058841705
- },
- "combine": {
- "p50": 87.74399757385254,
- "p90": 95.20000219345093,
- "p95": 95.48799693584442,
- "p99": 97.18400239944458
- },
- "roundtrip": {
- "p50": 161.6639941930771,
- "p90": 169.50400173664093,
- "p95": 170.9440052509308,
- "p99": 175.52000284194946
- },
- "isolatedSum": {
- "p50": 191.03999435901642,
- "p90": 202.84800231456757,
- "p95": 205.47199994325638,
- "p99": 218.59200298786163
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 129.66400384902954,
- "p90": 137.79200613498688,
- "p95": 139.55199718475342,
- "p99": 143.93599331378937
- },
- "combine": {
- "p50": 113.72800171375275,
- "p90": 120.15999853610992,
- "p95": 120.83200365304947,
- "p99": 123.55200201272964
- },
- "roundtrip": {
- "p50": 211.776003241539,
- "p90": 217.21599996089935,
- "p95": 218.9439982175827,
- "p99": 222.75200486183167
- },
- "isolatedSum": {
- "p50": 243.3920055627823,
- "p90": 257.9520046710968,
- "p95": 260.3840008378029,
- "p99": 267.487995326519
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-2f9f6948",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176",
- "colorKey": "h100_7b3247bf",
- "comparisonKey": "b51e047646ec8fac",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:29:02.253264+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_00",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 (norm)",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1818,
- "configuredUnits": 24,
- "deviceUnits": 132,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28254315809",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254315809",
- "createdAt": "2026-06-26T17:29:02.253264+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 95.74399888515472,
- "p90": 102.78400033712387,
- "p95": 104.99200224876404,
- "p99": 109.37599837779999
- },
- "combine": {
- "p50": 79.32800054550171,
- "p90": 82.07999914884567,
- "p95": 82.87999778985977,
- "p99": 88.03199976682663
- },
- "roundtrip": {
- "p50": 147.74399995803833,
- "p90": 154.6880006790161,
- "p95": 157.44000673294067,
- "p99": 171.9360053539276
- },
- "isolatedSum": {
- "p50": 175.07199943065643,
- "p90": 184.86399948596954,
- "p95": 187.8720000386238,
- "p99": 197.40799814462662
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 71.23199850320816,
- "p90": 101.27999633550644,
- "p95": 102.52799838781357,
- "p99": 107.87200182676315
- },
- "combine": {
- "p50": 72.22399860620499,
- "p90": 80.92799782752991,
- "p95": 81.44000172615051,
- "p99": 84.76799726486206
- },
- "roundtrip": {
- "p50": 127.45599448680878,
- "p90": 153.02400290966034,
- "p95": 155.64799308776855,
- "p99": 159.4880074262619
- },
- "isolatedSum": {
- "p50": 143.45599710941315,
- "p90": 182.20799416303635,
- "p95": 183.96800011396408,
- "p99": 192.6399990916252
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 95.23200243711472,
- "p90": 102.36799716949463,
- "p95": 107.84000158309937,
- "p99": 439.64800238609314
- },
- "combine": {
- "p50": 72.95999675989151,
- "p90": 81.66400343179703,
- "p95": 86.81599795818329,
- "p99": 88.92799913883209
- },
- "roundtrip": {
- "p50": 128.7360042333603,
- "p90": 159.19999778270721,
- "p95": 161.31199896335602,
- "p99": 167.1680063009262
- },
- "isolatedSum": {
- "p50": 168.19199919700623,
- "p90": 184.03200060129166,
- "p95": 194.65599954128265,
- "p99": 528.5760015249252
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 95.42399644851685,
- "p90": 102.52799838781357,
- "p95": 104.89600151777267,
- "p99": 113.53600025177002
- },
- "combine": {
- "p50": 79.58400249481201,
- "p90": 82.91199803352356,
- "p95": 87.07199990749359,
- "p99": 87.96799927949905
- },
- "roundtrip": {
- "p50": 151.48800611495972,
- "p90": 159.90400314331055,
- "p95": 162.20800578594208,
- "p99": 169.47199404239655
- },
- "isolatedSum": {
- "p50": 175.00799894332886,
- "p90": 185.43999642133713,
- "p95": 191.96800142526627,
- "p99": 201.50399953126907
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 95.71199864149094,
- "p90": 100.8640006184578,
- "p95": 102.68799960613251,
- "p99": 106.49599879980087
- },
- "combine": {
- "p50": 80.64000308513641,
- "p90": 87.90399879217148,
- "p95": 89.24800157546997,
- "p99": 95.23200243711472
- },
- "roundtrip": {
- "p50": 152.319997549057,
- "p90": 160.19199788570404,
- "p95": 162.23999857902527,
- "p99": 168.92799735069275
- },
- "isolatedSum": {
- "p50": 176.35200172662735,
- "p90": 188.76799941062927,
- "p95": 191.93600118160248,
- "p99": 201.7280012369156
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 80.86399734020233,
- "p90": 103.26399654150009,
- "p95": 105.47199845314026,
- "p99": 113.18399757146835
- },
- "combine": {
- "p50": 80.35200089216232,
- "p90": 89.31200206279755,
- "p95": 90.04800021648407,
- "p99": 95.74399888515472
- },
- "roundtrip": {
- "p50": 136.48000359535217,
- "p90": 164.60800170898438,
- "p95": 167.10400581359863,
- "p99": 175.10400712490082
- },
- "isolatedSum": {
- "p50": 161.21599823236465,
- "p90": 192.57599860429764,
- "p95": 195.51999866962433,
- "p99": 208.92799645662308
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 103.4879982471466,
- "p90": 112.8000020980835,
- "p95": 114.3679991364479,
- "p99": 125.72799623012543
- },
- "combine": {
- "p50": 96.83199971914291,
- "p90": 104.12800312042236,
- "p95": 104.99200224876404,
- "p99": 106.33599758148193
- },
- "roundtrip": {
- "p50": 170.71999609470367,
- "p90": 181.21600151062012,
- "p95": 182.91200697422028,
- "p99": 186.81600689888
- },
- "isolatedSum": {
- "p50": 200.31999796628952,
- "p90": 216.92800521850586,
- "p95": 219.36000138521194,
- "p99": 232.06399381160736
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 111.29599809646606,
- "p90": 130.87999820709229,
- "p95": 133.5040032863617,
- "p99": 139.93600010871887
- },
- "combine": {
- "p50": 106.27199709415436,
- "p90": 119.58400160074234,
- "p95": 119.99999731779099,
- "p99": 122.3360002040863
- },
- "roundtrip": {
- "p50": 197.56799936294556,
- "p90": 215.80800414085388,
- "p95": 217.92000532150269,
- "p99": 219.80799734592438
- },
- "isolatedSum": {
- "p50": 217.56799519062042,
- "p90": 250.46399980783463,
- "p95": 253.50400060415268,
- "p99": 262.2720003128052
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-3752524d",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.6|8c8497a77d9085d",
- "colorKey": "h100_7b3247bf",
- "comparisonKey": "b51e047646ec8fac",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:30:39.045176+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_13",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 (norm)",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.6,
- "achievedFraction": 0.5985,
- "configuredUnits": 79,
- "deviceUnits": 132,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "8c8497a77d9085d",
- "workloadId": "set:4:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28254286950",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254286950",
- "createdAt": "2026-06-26T17:30:39.045176+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 96.28800302743912,
- "p90": 103.55199873447418,
- "p95": 105.66399991512299,
- "p99": 108.51199924945831
- },
- "combine": {
- "p50": 79.1039988398552,
- "p90": 81.37600123882294,
- "p95": 84.89599823951721,
- "p99": 89.91999924182892
- },
- "roundtrip": {
- "p50": 146.27200365066528,
- "p90": 156.38400614261627,
- "p95": 161.82400286197662,
- "p99": 219.2319929599762
- },
- "isolatedSum": {
- "p50": 175.3920018672943,
- "p90": 184.92799997329712,
- "p95": 190.5599981546402,
- "p99": 198.43199849128723
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 96.70399874448776,
- "p90": 102.30399668216705,
- "p95": 104.51199859380722,
- "p99": 112.22399771213531
- },
- "combine": {
- "p50": 79.58400249481201,
- "p90": 87.3280018568039,
- "p95": 87.80799806118011,
- "p99": 89.9519994854927
- },
- "roundtrip": {
- "p50": 153.3759981393814,
- "p90": 161.21600568294525,
- "p95": 162.56000101566315,
- "p99": 166.72000288963318
- },
- "isolatedSum": {
- "p50": 176.28800123929977,
- "p90": 189.63199853897095,
- "p95": 192.31999665498734,
- "p99": 202.17599719762802
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 102.88000106811523,
- "p90": 106.81600123643875,
- "p95": 109.0560033917427,
- "p99": 114.3679991364479
- },
- "combine": {
- "p50": 87.99999952316284,
- "p90": 95.48799693584442,
- "p95": 96.22400254011154,
- "p99": 119.1679984331131
- },
- "roundtrip": {
- "p50": 161.95200383663177,
- "p90": 170.0800061225891,
- "p95": 172.5119948387146,
- "p99": 460.7999920845032
- },
- "isolatedSum": {
- "p50": 190.88000059127808,
- "p90": 202.30399817228317,
- "p95": 205.28000593185425,
- "p99": 233.535997569561
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 129.08799946308136,
- "p90": 135.80800592899323,
- "p95": 137.56799697875977,
- "p99": 142.14399456977844
- },
- "combine": {
- "p50": 113.27999830245972,
- "p90": 120.44800072908401,
- "p95": 120.67200243473053,
- "p99": 123.74400347471237
- },
- "roundtrip": {
- "p50": 211.5200012922287,
- "p90": 218.176007270813,
- "p95": 219.64800357818604,
- "p99": 223.68000447750092
- },
- "isolatedSum": {
- "p50": 242.36799776554108,
- "p90": 256.25600665807724,
- "p95": 258.2399994134903,
- "p99": 265.8879980444908
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-7db267e7",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|normalized|0.18|ffa946582edb500",
- "colorKey": "h100_716e65b9",
- "comparisonKey": "259b0e9f1092ac0e",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:32:00.320566+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_15",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 (norm) · balanced",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "balanced",
- "routingLabel": "balanced",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1818,
- "configuredUnits": 24,
- "deviceUnits": 132,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ffa946582edb500",
- "workloadId": "set:8:7af12818400d6348",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28254367516",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254367516",
- "createdAt": "2026-06-26T17:32:00.320566+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 95.93600034713745,
- "p90": 103.00800204277039,
- "p95": 104.38399761915207,
- "p99": 107.64800012111664
- },
- "combine": {
- "p50": 81.08799904584885,
- "p90": 87.93599903583527,
- "p95": 88.60799670219421,
- "p99": 90.36800265312195
- },
- "roundtrip": {
- "p50": 151.2639969587326,
- "p90": 158.9760035276413,
- "p95": 160.73599457740784,
- "p99": 164.06400501728058
- },
- "isolatedSum": {
- "p50": 177.0239993929863,
- "p90": 190.94400107860565,
- "p95": 192.99199432134628,
- "p99": 198.0160027742386
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 917504,
- "combineLogicalBytes": 917504,
- "fanoutMean": 8,
- "recvTokensMax": 8,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 74.23999905586243,
- "p90": 96.79999947547913,
- "p95": 100.00000149011612,
- "p99": 103.7760004401207
- },
- "combine": {
- "p50": 73.98399710655212,
- "p90": 87.64799684286118,
- "p95": 88.54400366544724,
- "p99": 89.66399729251862
- },
- "roundtrip": {
- "p50": 127.32799351215363,
- "p90": 158.1439971923828,
- "p95": 159.32799875736237,
- "p99": 162.52799332141876
- },
- "isolatedSum": {
- "p50": 148.22399616241455,
- "p90": 184.4479963183403,
- "p95": 188.54400515556335,
- "p99": 193.4399977326393
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1835008,
- "combineLogicalBytes": 1835008,
- "fanoutMean": 8,
- "recvTokensMax": 16,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 74.87999647855759,
- "p90": 99.5199978351593,
- "p95": 103.20000350475311,
- "p99": 106.62399977445602
- },
- "combine": {
- "p50": 73.95199686288834,
- "p90": 87.74399757385254,
- "p95": 88.06400001049042,
- "p99": 88.76799792051315
- },
- "roundtrip": {
- "p50": 127.80800461769104,
- "p90": 156.3519984483719,
- "p95": 158.81599485874176,
- "p99": 162.33600676059723
- },
- "isolatedSum": {
- "p50": 148.83199334144592,
- "p90": 187.26399540901184,
- "p95": 191.26400351524353,
- "p99": 195.39199769496918
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3670016,
- "combineLogicalBytes": 3670016,
- "fanoutMean": 8,
- "recvTokensMax": 32,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 94.36800330877304,
- "p90": 100.09600222110748,
- "p95": 101.95200145244598,
- "p99": 107.4879989027977
- },
- "combine": {
- "p50": 80.92799782752991,
- "p90": 88.03199976682663,
- "p95": 88.86399865150452,
- "p99": 89.79199826717377
- },
- "roundtrip": {
- "p50": 149.85600113868713,
- "p90": 156.95999562740326,
- "p95": 158.1760048866272,
- "p99": 161.98399662971497
- },
- "isolatedSum": {
- "p50": 175.29600113630295,
- "p90": 188.1280019879341,
- "p95": 190.8160001039505,
- "p99": 197.27999716997147
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 7340032,
- "combineLogicalBytes": 7340032,
- "fanoutMean": 8,
- "recvTokensMax": 64,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 94.36800330877304,
- "p90": 104.80000078678131,
- "p95": 106.78400099277496,
- "p99": 115.00799655914307
- },
- "combine": {
- "p50": 86.59200370311737,
- "p90": 88.76799792051315,
- "p95": 89.56799656152725,
- "p99": 96.83199971914291
- },
- "roundtrip": {
- "p50": 150.11200308799744,
- "p90": 161.50400042533875,
- "p95": 166.24000668525696,
- "p99": 490.62401056289673
- },
- "isolatedSum": {
- "p50": 180.9600070118904,
- "p90": 193.56799870729446,
- "p95": 196.35199755430222,
- "p99": 211.83999627828598
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 14680064,
- "combineLogicalBytes": 14680064,
- "fanoutMean": 8,
- "recvTokensMax": 128,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 87.0399996638298,
- "p90": 106.04800283908844,
- "p95": 110.1439967751503,
- "p99": 123.83999675512314
- },
- "combine": {
- "p50": 82.5280025601387,
- "p90": 96.3200032711029,
- "p95": 96.73599898815155,
- "p99": 97.56799787282944
- },
- "roundtrip": {
- "p50": 143.5839980840683,
- "p90": 166.55999422073364,
- "p95": 168.7680035829544,
- "p99": 175.55199563503265
- },
- "isolatedSum": {
- "p50": 169.5680022239685,
- "p90": 202.36800611019135,
- "p95": 206.87999576330185,
- "p99": 221.40799462795258
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 29360128,
- "combineLogicalBytes": 29360128,
- "fanoutMean": 8,
- "recvTokensMax": 256,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 116.92799627780914,
- "p90": 126.3359934091568,
- "p95": 128.63999605178833,
- "p99": 132.6719969511032
- },
- "combine": {
- "p50": 104.19200360774994,
- "p90": 112.06399649381638,
- "p95": 112.99200356006622,
- "p99": 113.76000195741653
- },
- "roundtrip": {
- "p50": 190.49599766731262,
- "p90": 199.74400103092194,
- "p95": 202.36800611019135,
- "p99": 204.76800203323364
- },
- "isolatedSum": {
- "p50": 221.11999988555908,
- "p90": 238.39998990297318,
- "p95": 241.63199961185455,
- "p99": 246.43199890851974
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 58720256,
- "combineLogicalBytes": 58720256,
- "fanoutMean": 8,
- "recvTokensMax": 512,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 129.85600531101227,
- "p90": 152.96000242233276,
- "p95": 154.78399395942688,
- "p99": 158.87999534606934
- },
- "combine": {
- "p50": 121.2799996137619,
- "p90": 129.43999469280243,
- "p95": 130.3360015153885,
- "p99": 145.34400403499603
- },
- "roundtrip": {
- "p50": 226.8799990415573,
- "p90": 240.31999707221985,
- "p95": 242.01600253582,
- "p99": 245.02399563789368
- },
- "isolatedSum": {
- "p50": 251.13600492477417,
- "p90": 282.3999971151352,
- "p95": 285.11999547481537,
- "p99": 304.22399938106537
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 117440512,
- "combineLogicalBytes": 117440512,
- "fanoutMean": 8,
- "recvTokensMax": 1024,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-c5b168ae",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|normalized|0.18|14ded8461f2636c",
- "colorKey": "h100_f7ec28aa",
- "comparisonKey": "9896b8e4d81bc6a5",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:32:03.917674+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_11",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 (norm) · zipf",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "zipf",
- "routingLabel": "zipf",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1818,
- "configuredUnits": 24,
- "deviceUnits": 132,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "14ded8461f2636c",
- "workloadId": "set:8:f5576e2b712d38c3",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28254376151",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254376151",
- "createdAt": "2026-06-26T17:32:03.917674+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 96.89600020647049,
- "p90": 104.032002389431,
- "p95": 106.04800283908844,
- "p99": 111.04000359773636
- },
- "combine": {
- "p50": 74.36800003051758,
- "p90": 80.03199845552444,
- "p95": 81.31200075149536,
- "p99": 82.68799632787704
- },
- "roundtrip": {
- "p50": 145.82400023937225,
- "p90": 153.76000106334686,
- "p95": 160.0639969110489,
- "p99": 226.30399465560913
- },
- "isolatedSum": {
- "p50": 171.26400023698807,
- "p90": 184.06400084495544,
- "p95": 187.3600035905838,
- "p99": 193.7279999256134
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 444416,
- "combineLogicalBytes": 444416,
- "fanoutMean": 3.875,
- "recvTokensMax": 8,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 70.72000205516815,
- "p90": 103.93600165843964,
- "p95": 105.18400371074677,
- "p99": 113.63200098276138
- },
- "combine": {
- "p50": 71.35999947786331,
- "p90": 80.32000064849854,
- "p95": 81.18399977684021,
- "p99": 88.16000074148178
- },
- "roundtrip": {
- "p50": 126.68800354003906,
- "p90": 152.5759994983673,
- "p95": 155.32800555229187,
- "p99": 159.29600596427917
- },
- "isolatedSum": {
- "p50": 142.08000153303146,
- "p90": 184.25600230693817,
- "p95": 186.36800348758698,
- "p99": 201.79200172424316
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 845824,
- "combineLogicalBytes": 845824,
- "fanoutMean": 3.6875,
- "recvTokensMax": 16,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 70.14399766921997,
- "p90": 100.28800368309021,
- "p95": 102.55999863147736,
- "p99": 131.71200454235077
- },
- "combine": {
- "p50": 71.61600142717361,
- "p90": 79.55200225114822,
- "p95": 79.74400371313095,
- "p99": 84.22400057315826
- },
- "roundtrip": {
- "p50": 127.77599692344666,
- "p90": 153.50399911403656,
- "p95": 155.2640050649643,
- "p99": 160.73599457740784
- },
- "isolatedSum": {
- "p50": 141.75999909639359,
- "p90": 179.84000593423843,
- "p95": 182.3040023446083,
- "p99": 215.93600511550903
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1691648,
- "combineLogicalBytes": 1691648,
- "fanoutMean": 3.6875,
- "recvTokensMax": 32,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 94.97600048780441,
- "p90": 100.832000374794,
- "p95": 102.30399668216705,
- "p99": 114.3999993801117
- },
- "combine": {
- "p50": 71.52000069618225,
- "p90": 81.18399977684021,
- "p95": 81.7599967122078,
- "p99": 86.94399893283844
- },
- "roundtrip": {
- "p50": 125.31200051307678,
- "p90": 153.05599570274353,
- "p95": 156.0640037059784,
- "p99": 159.42400693893433
- },
- "isolatedSum": {
- "p50": 166.49600118398666,
- "p90": 182.01600015163422,
- "p95": 184.06399339437485,
- "p99": 201.34399831295013
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3354624,
- "combineLogicalBytes": 3354624,
- "fanoutMean": 3.65625,
- "recvTokensMax": 64,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 95.551997423172,
- "p90": 100.89600086212158,
- "p95": 103.26399654150009,
- "p99": 112.31999844312668
- },
- "combine": {
- "p50": 79.48800176382065,
- "p90": 86.87999844551086,
- "p95": 87.71199733018875,
- "p99": 88.22400122880936
- },
- "roundtrip": {
- "p50": 149.79200065135956,
- "p90": 158.24000537395477,
- "p95": 160.0320041179657,
- "p99": 165.69599509239197
- },
- "isolatedSum": {
- "p50": 175.03999918699265,
- "p90": 187.77599930763245,
- "p95": 190.97599387168884,
- "p99": 200.54399967193604
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 6537216,
- "combineLogicalBytes": 6537216,
- "fanoutMean": 3.5625,
- "recvTokensMax": 127,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 83.16799998283386,
- "p90": 99.96800124645233,
- "p95": 104.96000200510025,
- "p99": 109.11999642848969
- },
- "combine": {
- "p50": 79.8719972372055,
- "p90": 87.93599903583527,
- "p95": 89.28000181913376,
- "p99": 95.39200365543365
- },
- "roundtrip": {
- "p50": 135.26399433612823,
- "p90": 159.19999778270721,
- "p95": 161.72799468040466,
- "p99": 166.6560024023056
- },
- "isolatedSum": {
- "p50": 163.03999722003937,
- "p90": 187.9040002822876,
- "p95": 194.240003824234,
- "p99": 204.51200008392334
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 12859392,
- "combineLogicalBytes": 12859392,
- "fanoutMean": 3.50390625,
- "recvTokensMax": 255,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 100.832000374794,
- "p90": 114.68800157308578,
- "p95": 116.67200177907944,
- "p99": 134.91199910640717
- },
- "combine": {
- "p50": 90.27200192213058,
- "p90": 103.32799702882767,
- "p95": 104.16000336408615,
- "p99": 152.12799608707428
- },
- "roundtrip": {
- "p50": 164.70399498939514,
- "p90": 182.8480064868927,
- "p95": 186.49600446224213,
- "p99": 189.40800428390503
- },
- "isolatedSum": {
- "p50": 191.1040022969246,
- "p90": 218.01599860191345,
- "p95": 220.8320051431656,
- "p99": 287.03999519348145
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 25145344,
- "combineLogicalBytes": 25145344,
- "fanoutMean": 3.42578125,
- "recvTokensMax": 510,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 121.31199985742569,
- "p90": 139.67999815940857,
- "p95": 144.57599818706512,
- "p99": 150.87999403476715
- },
- "combine": {
- "p50": 112.99200356006622,
- "p90": 120.64000219106674,
- "p95": 120.80000340938568,
- "p99": 128.51199507713318
- },
- "roundtrip": {
- "p50": 212.67199516296387,
- "p90": 228.4799963235855,
- "p95": 230.0799936056137,
- "p99": 235.74399948120117
- },
- "isolatedSum": {
- "p50": 234.3040034174919,
- "p90": 260.3200003504753,
- "p95": 265.3760015964508,
- "p99": 279.39198911190033
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 49946624,
- "combineLogicalBytes": 49946624,
- "fanoutMean": 3.40234375,
- "recvTokensMax": 1022,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-cf899bce",
- "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|normalized|0.18|a8f501af7004836",
- "colorKey": "h100_93503624",
- "comparisonKey": "74d307ed048ea3b5",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:46:24.194442+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_02",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 (norm) · zipf+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "zipf",
- "routingLabel": "zipf+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1818,
- "configuredUnits": 24,
- "deviceUnits": 132,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "a8f501af7004836",
- "workloadId": "set:8:f5576e2b712d38c3",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 4.927734375,
- "eplbImbalanceAfter": 1.0006103515625,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28255296001",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28255296001",
- "createdAt": "2026-06-26T17:46:24.194442+00:00",
- "sha": "36d3eb6c3c7386d3220c873d305410219c5c0f17"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 69.72800195217133,
- "p90": 75.83999633789062,
- "p95": 77.85599678754807,
- "p99": 83.39200168848038
- },
- "combine": {
- "p50": 71.26399874687195,
- "p90": 73.40800017118454,
- "p95": 74.0479975938797,
- "p99": 78.87999713420868
- },
- "roundtrip": {
- "p50": 121.85599654912949,
- "p90": 128.12800705432892,
- "p95": 130.3039938211441,
- "p99": 134.71999764442444
- },
- "isolatedSum": {
- "p50": 140.99200069904327,
- "p90": 149.24799650907516,
- "p95": 151.90399438142776,
- "p99": 162.27199882268906
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 616448,
- "fanoutMean": 5.375,
- "recvTokensMax": 7,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 70.3359991312027,
- "p90": 76.25599950551987,
- "p95": 78.59200239181519,
- "p99": 84.6719965338707
- },
- "combine": {
- "p50": 71.16799801588058,
- "p90": 73.53600114583969,
- "p95": 74.27199929952621,
- "p99": 79.80799674987793
- },
- "roundtrip": {
- "p50": 127.20000743865967,
- "p90": 131.00799918174744,
- "p95": 133.27999413013458,
- "p99": 138.08000087738037
- },
- "isolatedSum": {
- "p50": 141.50399714708328,
- "p90": 149.79200065135956,
- "p95": 152.8640016913414,
- "p99": 164.47999328374863
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1204224,
- "combineLogicalBytes": 1204224,
- "fanoutMean": 5.25,
- "recvTokensMax": 14,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 73.18399846553802,
- "p90": 102.14400291442871,
- "p95": 105.50399869680405,
- "p99": 108.44799876213074
- },
- "combine": {
- "p50": 73.40800017118454,
- "p90": 81.82399719953537,
- "p95": 87.10400015115738,
- "p99": 88.95999938249588
- },
- "roundtrip": {
- "p50": 131.8719983100891,
- "p90": 160.3199988603592,
- "p95": 162.88000345230103,
- "p99": 167.1680063009262
- },
- "isolatedSum": {
- "p50": 146.59199863672256,
- "p90": 183.96800011396408,
- "p95": 192.60799884796143,
- "p99": 197.40799814462662
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2394112,
- "combineLogicalBytes": 2394112,
- "fanoutMean": 5.21875,
- "recvTokensMax": 24,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 70.30399888753891,
- "p90": 78.20799946784973,
- "p95": 81.02399855852127,
- "p99": 89.4400030374527
- },
- "combine": {
- "p50": 72.7040022611618,
- "p90": 73.91999661922455,
- "p95": 74.27199929952621,
- "p99": 79.58400249481201
- },
- "roundtrip": {
- "p50": 128.67200374603271,
- "p90": 132.83200562000275,
- "p95": 135.0719928741455,
- "p99": 140.22399485111237
- },
- "isolatedSum": {
- "p50": 143.0080011487007,
- "p90": 152.12799608707428,
- "p95": 155.29599785804749,
- "p99": 169.0240055322647
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4630528,
- "combineLogicalBytes": 4630528,
- "fanoutMean": 5.046875,
- "recvTokensMax": 45,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 82.97599852085114,
- "p90": 102.14400291442871,
- "p95": 104.70400005578995,
- "p99": 109.56799983978271
- },
- "combine": {
- "p50": 74.30399954319,
- "p90": 87.87199854850769,
- "p95": 89.12000060081482,
- "p99": 89.9519994854927
- },
- "roundtrip": {
- "p50": 132.4480026960373,
- "p90": 161.47199273109436,
- "p95": 163.26400637626648,
- "p99": 166.9120043516159
- },
- "isolatedSum": {
- "p50": 157.27999806404114,
- "p90": 190.0160014629364,
- "p95": 193.82400065660477,
- "p99": 199.51999932527542
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9447424,
- "combineLogicalBytes": 9447424,
- "fanoutMean": 5.1484375,
- "recvTokensMax": 91,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 81.40800148248672,
- "p90": 103.80800068378448,
- "p95": 105.3759977221489,
- "p99": 108.0000028014183
- },
- "combine": {
- "p50": 79.77599650621414,
- "p90": 90.08000046014786,
- "p95": 90.71999788284302,
- "p99": 247.67999351024628
- },
- "roundtrip": {
- "p50": 138.17599415779114,
- "p90": 156.3519984483719,
- "p95": 159.7760021686554,
- "p99": 163.83999586105347
- },
- "isolatedSum": {
- "p50": 161.18399798870087,
- "p90": 193.88800114393234,
- "p95": 196.0959956049919,
- "p99": 355.6799963116646
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19023872,
- "combineLogicalBytes": 19023872,
- "fanoutMean": 5.18359375,
- "recvTokensMax": 178,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 99.74399954080582,
- "p90": 115.35999923944473,
- "p95": 117.37599968910217,
- "p99": 125.2799928188324
- },
- "combine": {
- "p50": 90.55999666452408,
- "p90": 103.61599922180176,
- "p95": 104.19200360774994,
- "p99": 104.8320010304451
- },
- "roundtrip": {
- "p50": 163.87200355529785,
- "p90": 178.0479997396469,
- "p95": 180.2240014076233,
- "p99": 185.47199666500092
- },
- "isolatedSum": {
- "p50": 190.3039962053299,
- "p90": 218.9759984612465,
- "p95": 221.5680032968521,
- "p99": 230.1119938492775
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38148096,
- "combineLogicalBytes": 38148096,
- "fanoutMean": 5.197265625,
- "recvTokensMax": 350,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 113.66400122642517,
- "p90": 132.22399353981018,
- "p95": 133.88800621032715,
- "p99": 139.64800536632538
- },
- "combine": {
- "p50": 106.59199953079224,
- "p90": 114.75200206041336,
- "p95": 119.99999731779099,
- "p99": 121.91999703645706
- },
- "roundtrip": {
- "p50": 198.91199469566345,
- "p90": 213.69600296020508,
- "p95": 216.0319983959198,
- "p99": 220.60799598693848
- },
- "isolatedSum": {
- "p50": 220.2560007572174,
- "p90": 246.97599560022354,
- "p95": 253.88800352811813,
- "p99": 261.56800240278244
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 76955648,
- "combineLogicalBytes": 76955648,
- "fanoutMean": 5.2421875,
- "recvTokensMax": 687,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-4eb12954",
- "identity": "h100|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176",
- "colorKey": "h100_5df912ff",
- "comparisonKey": "5074d4febd922e2d",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:28:11.272284+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_10",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "cached-layout-comm-only-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 (norm) [cl]",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1818,
- "configuredUnits": 24,
- "deviceUnits": 132,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28254332840",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254332840",
- "createdAt": "2026-06-26T17:28:11.272284+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 78.91199737787247,
- "p90": 85.21600067615509,
- "p95": 87.20000088214874,
- "p99": 93.34400296211243
- },
- "combine": {
- "p50": 79.68000322580338,
- "p90": 81.60000294446945,
- "p95": 86.91199868917465,
- "p99": 88.54400366544724
- },
- "roundtrip": {
- "p50": 133.69600474834442,
- "p90": 141.184002161026,
- "p95": 143.2960033416748,
- "p99": 151.48800611495972
- },
- "isolatedSum": {
- "p50": 158.59200060367584,
- "p90": 166.81600362062454,
- "p95": 174.1119995713234,
- "p99": 181.88800662755966
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 62.39999830722809,
- "p90": 84.35200154781342,
- "p95": 87.00799942016602,
- "p99": 96.57599776983261
- },
- "combine": {
- "p50": 71.99999690055847,
- "p90": 81.02399855852127,
- "p95": 81.44000172615051,
- "p99": 87.80799806118011
- },
- "roundtrip": {
- "p50": 116.7680025100708,
- "p90": 140.00000059604645,
- "p95": 141.6960060596466,
- "p99": 143.96800100803375
- },
- "isolatedSum": {
- "p50": 134.39999520778656,
- "p90": 165.3760001063347,
- "p95": 168.44800114631653,
- "p99": 184.38399583101273
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 62.01599910855293,
- "p90": 82.56000280380249,
- "p95": 84.76799726486206,
- "p99": 91.90399944782257
- },
- "combine": {
- "p50": 72.89600372314453,
- "p90": 86.94399893283844,
- "p95": 87.61599659919739,
- "p99": 88.22400122880936
- },
- "roundtrip": {
- "p50": 116.57600104808807,
- "p90": 143.13599467277527,
- "p95": 144.96000111103058,
- "p99": 189.40800428390503
- },
- "isolatedSum": {
- "p50": 134.91200283169746,
- "p90": 169.50400173664093,
- "p95": 172.38399386405945,
- "p99": 180.12800067663193
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 66.78400188684464,
- "p90": 82.46400207281113,
- "p95": 85.1840004324913,
- "p99": 90.65599739551544
- },
- "combine": {
- "p50": 73.02399724721909,
- "p90": 86.87999844551086,
- "p95": 87.55200356245041,
- "p99": 88.57599645853043
- },
- "roundtrip": {
- "p50": 116.67200177907944,
- "p90": 142.4960047006607,
- "p95": 143.64799857139587,
- "p99": 149.1200029850006
- },
- "isolatedSum": {
- "p50": 139.80799913406372,
- "p90": 169.344000518322,
- "p95": 172.7360039949417,
- "p99": 179.23199385404587
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 78.97599786520004,
- "p90": 84.83199775218964,
- "p95": 86.94399893283844,
- "p99": 90.87999910116196
- },
- "combine": {
- "p50": 80.4160013794899,
- "p90": 87.99999952316284,
- "p95": 88.25600147247314,
- "p99": 89.75999802350998
- },
- "roundtrip": {
- "p50": 116.73600226640701,
- "p90": 140.00000059604645,
- "p95": 143.23200285434723,
- "p99": 146.94400131702423
- },
- "isolatedSum": {
- "p50": 159.39199924468994,
- "p90": 172.83199727535248,
- "p95": 175.20000040531158,
- "p99": 180.63999712467194
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 75.58400183916092,
- "p90": 84.6719965338707,
- "p95": 86.20800077915192,
- "p99": 90.97599983215332
- },
- "combine": {
- "p50": 80.19199967384338,
- "p90": 88.51200342178345,
- "p95": 95.10400146245956,
- "p99": 111.77600175142288
- },
- "roundtrip": {
- "p50": 143.16800236701965,
- "p90": 153.28000485897064,
- "p95": 154.7520011663437,
- "p99": 170.6240028142929
- },
- "isolatedSum": {
- "p50": 155.7760015130043,
- "p90": 173.18399995565414,
- "p95": 181.31200224161148,
- "p99": 202.7520015835762
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 85.15200018882751,
- "p90": 96.67199850082397,
- "p95": 98.30400347709656,
- "p99": 158.65600109100342
- },
- "combine": {
- "p50": 91.20000153779984,
- "p90": 105.02400249242783,
- "p95": 106.04800283908844,
- "p99": 127.87200510501862
- },
- "roundtrip": {
- "p50": 151.8079936504364,
- "p90": 167.67999529838562,
- "p95": 172.06400632858276,
- "p99": 198.2399970293045
- },
- "isolatedSum": {
- "p50": 176.35200172662735,
- "p90": 201.6960009932518,
- "p95": 204.352006316185,
- "p99": 286.52800619602203
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 104.73600029945374,
- "p90": 119.64800208806992,
- "p95": 128.03199887275696,
- "p99": 401.43999457359314
- },
- "combine": {
- "p50": 106.49599879980087,
- "p90": 120.83200365304947,
- "p95": 121.47200107574463,
- "p99": 128.00000607967377
- },
- "roundtrip": {
- "p50": 187.45599687099457,
- "p90": 201.34399831295013,
- "p95": 202.55999267101288,
- "p99": 206.68800175189972
- },
- "isolatedSum": {
- "p50": 211.2319990992546,
- "p90": 240.48000574111938,
- "p95": 249.5039999485016,
- "p99": 529.4400006532669
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-76b84ec2",
- "identity": "h100|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h100_17694d2c",
- "comparisonKey": "d31efe4aa43e0223",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:47:16.080205+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_11",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "cached-layout-comm-only-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 [cl]",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271551406",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271551406",
- "createdAt": "2026-06-26T23:47:16.080205+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 77.7600035071373,
- "p90": 84.25600081682205,
- "p95": 86.496002972126,
- "p99": 92.57599711418152
- },
- "combine": {
- "p50": 75.9039968252182,
- "p90": 81.95199817419052,
- "p95": 82.40000158548355,
- "p99": 87.2960016131401
- },
- "roundtrip": {
- "p50": 131.45600259304047,
- "p90": 136.25599443912506,
- "p95": 138.59200477600098,
- "p99": 142.68800616264343
- },
- "isolatedSum": {
- "p50": 153.6640003323555,
- "p90": 166.20799899101257,
- "p95": 168.89600455760956,
- "p99": 179.87199872732162
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 66.23999774456024,
- "p90": 80.99199831485748,
- "p95": 83.13599973917007,
- "p99": 87.52000331878662
- },
- "combine": {
- "p50": 72.06399738788605,
- "p90": 81.85599744319916,
- "p95": 82.11199939250946,
- "p99": 85.91999858617783
- },
- "roundtrip": {
- "p50": 115.55200070142746,
- "p90": 136.06399297714233,
- "p95": 137.9839926958084,
- "p99": 142.4960047006607
- },
- "isolatedSum": {
- "p50": 138.3039951324463,
- "p90": 162.84799575805664,
- "p95": 165.24799913167953,
- "p99": 173.44000190496445
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 77.60000228881836,
- "p90": 81.69600367546082,
- "p95": 83.93599838018417,
- "p99": 89.02399986982346
- },
- "combine": {
- "p50": 79.52000200748444,
- "p90": 82.20800012350082,
- "p95": 83.16799998283386,
- "p99": 87.2960016131401
- },
- "roundtrip": {
- "p50": 133.82400572299957,
- "p90": 140.86399972438812,
- "p95": 143.10400187969208,
- "p99": 149.72800016403198
- },
- "isolatedSum": {
- "p50": 157.1200042963028,
- "p90": 163.90400379896164,
- "p95": 167.10399836301804,
- "p99": 176.32000148296356
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 77.66400277614594,
- "p90": 83.13599973917007,
- "p95": 87.8399983048439,
- "p99": 131.67999684810638
- },
- "combine": {
- "p50": 81.216000020504,
- "p90": 82.71999657154083,
- "p95": 84.03199911117554,
- "p99": 90.20800143480301
- },
- "roundtrip": {
- "p50": 134.68800485134125,
- "p90": 139.55199718475342,
- "p95": 142.752006649971,
- "p99": 145.56799829006195
- },
- "isolatedSum": {
- "p50": 158.88000279664993,
- "p90": 165.8559963107109,
- "p95": 171.87199741601944,
- "p99": 221.8879982829094
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 77.79199630022049,
- "p90": 81.66400343179703,
- "p95": 84.73599702119827,
- "p99": 87.23200112581253
- },
- "combine": {
- "p50": 81.69600367546082,
- "p90": 84.79999750852585,
- "p95": 88.95999938249588,
- "p99": 90.27200192213058
- },
- "roundtrip": {
- "p50": 135.29600203037262,
- "p90": 143.5839980840683,
- "p95": 144.96000111103058,
- "p99": 150.30400454998016
- },
- "isolatedSum": {
- "p50": 159.4879999756813,
- "p90": 166.46400094032288,
- "p95": 173.69599640369415,
- "p99": 177.50400304794312
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 83.42400193214417,
- "p90": 88.3840024471283,
- "p95": 89.28000181913376,
- "p99": 95.20000219345093
- },
- "combine": {
- "p50": 81.44000172615051,
- "p90": 89.9839997291565,
- "p95": 90.27200192213058,
- "p99": 92.47999638319016
- },
- "roundtrip": {
- "p50": 129.18399274349213,
- "p90": 144.51199769973755,
- "p95": 147.0080018043518,
- "p99": 152.73599326610565
- },
- "isolatedSum": {
- "p50": 164.86400365829468,
- "p90": 178.3680021762848,
- "p95": 179.55200374126434,
- "p99": 187.67999857664108
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 93.56799721717834,
- "p90": 101.40799731016159,
- "p95": 102.36799716949463,
- "p99": 109.47199910879135
- },
- "combine": {
- "p50": 94.81599926948547,
- "p90": 99.61599856615067,
- "p95": 102.33599692583084,
- "p99": 105.82400113344193
- },
- "roundtrip": {
- "p50": 158.78400206565857,
- "p90": 165.72800278663635,
- "p95": 167.04000532627106,
- "p99": 170.01600563526154
- },
- "isolatedSum": {
- "p50": 188.38399648666382,
- "p90": 201.02399587631226,
- "p95": 204.70399409532547,
- "p99": 215.29600024223328
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 112.41599917411804,
- "p90": 120.4800009727478,
- "p95": 123.48800152540207,
- "p99": 303.6800026893616
- },
- "combine": {
- "p50": 111.90400272607803,
- "p90": 117.34399944543839,
- "p95": 120.03199756145477,
- "p99": 125.08800625801086
- },
- "roundtrip": {
- "p50": 192.80000030994415,
- "p90": 199.74400103092194,
- "p95": 201.9519954919815,
- "p99": 206.9759964942932
- },
- "isolatedSum": {
- "p50": 224.32000190019608,
- "p90": 237.8240004181862,
- "p95": 243.51999908685684,
- "p99": 428.76800894737244
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-6f4d88a5",
- "identity": "h100|deepep|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h100_8abde1a9",
- "comparisonKey": "a63125ec759ccc03",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:48:24.132792+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_00",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "ll",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 LL",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 132,
- "resourceClass": "fixed-kernel",
- "conformanceClass": "not-applicable",
- "fixedKernel": true,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271587010",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271587010",
- "createdAt": "2026-06-26T23:48:24.132792+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 47.29599878191948,
- "p90": 48.70399832725525,
- "p95": 49.02400076389313,
- "p99": 54.75199967622757
- },
- "combine": {
- "p50": 36.57599911093712,
- "p90": 37.408001720905304,
- "p95": 38.59199956059456,
- "p99": 44.60800066590309
- },
- "roundtrip": {
- "p50": 58.97599831223488,
- "p90": 66.6240006685257,
- "p95": 67.1359971165657,
- "p99": 67.6800012588501
- },
- "isolatedSum": {
- "p50": 83.8719978928566,
- "p90": 86.11200004816055,
- "p95": 87.61600032448769,
- "p99": 99.36000034213066
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 14,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 40.32000154256821,
- "p90": 48.51200059056282,
- "p95": 48.73599857091904,
- "p99": 53.82400006055832
- },
- "combine": {
- "p50": 35.77600046992302,
- "p90": 37.02399879693985,
- "p95": 38.94399851560593,
- "p99": 44.47999969124794
- },
- "roundtrip": {
- "p50": 56.57599866390228,
- "p90": 65.05600363016129,
- "p95": 66.27199798822403,
- "p99": 67.07199662923813
- },
- "isolatedSum": {
- "p50": 76.09600201249123,
- "p90": 85.53599938750267,
- "p95": 87.67999708652496,
- "p99": 98.30399975180626
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 21,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 42.27200150489807,
- "p90": 48.70399832725525,
- "p95": 49.056001007556915,
- "p99": 55.39200082421303
- },
- "combine": {
- "p50": 36.70400008559227,
- "p90": 37.50399872660637,
- "p95": 43.07200014591217,
- "p99": 45.05600035190582
- },
- "roundtrip": {
- "p50": 59.167999774217606,
- "p90": 66.880002617836,
- "p95": 67.45599955320358,
- "p99": 68.57600063085556
- },
- "isolatedSum": {
- "p50": 78.97600159049034,
- "p90": 86.20799705386162,
- "p95": 92.12800115346909,
- "p99": 100.44800117611885
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 39,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 47.359999269247055,
- "p90": 48.70399832725525,
- "p95": 48.895999789237976,
- "p99": 55.26399984955788
- },
- "combine": {
- "p50": 36.57599911093712,
- "p90": 43.2640016078949,
- "p95": 43.776001781225204,
- "p99": 45.024000108242035
- },
- "roundtrip": {
- "p50": 64.67200070619583,
- "p90": 67.10399687290192,
- "p95": 67.29599833488464,
- "p99": 69.47200000286102
- },
- "isolatedSum": {
- "p50": 83.93599838018417,
- "p90": 91.96799993515015,
- "p95": 92.67200157046318,
- "p99": 100.28799995779991
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 74,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 48.448000103235245,
- "p90": 55.64799904823303,
- "p95": 56.2559999525547,
- "p99": 56.89600110054016
- },
- "combine": {
- "p50": 43.776001781225204,
- "p90": 44.73600164055824,
- "p95": 44.89599913358688,
- "p99": 48.22399839758873
- },
- "roundtrip": {
- "p50": 66.880002617836,
- "p90": 73.82400333881378,
- "p95": 74.68800246715546,
- "p99": 75.29599964618683
- },
- "isolatedSum": {
- "p50": 92.22400188446045,
- "p90": 100.38400068879128,
- "p95": 101.15199908614159,
- "p99": 105.11999949812889
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 145,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 49.60000142455101,
- "p90": 56.8000003695488,
- "p95": 57.08799883723259,
- "p99": 59.167999774217606
- },
- "combine": {
- "p50": 51.00800096988678,
- "p90": 52.86400020122528,
- "p95": 53.0879981815815,
- "p99": 53.98400127887726
- },
- "roundtrip": {
- "p50": 75.39200037717819,
- "p90": 83.26400071382523,
- "p95": 83.74399691820145,
- "p99": 84.63999629020691
- },
- "isolatedSum": {
- "p50": 100.60800239443779,
- "p90": 109.66400057077408,
- "p95": 110.17599701881409,
- "p99": 113.15200105309486
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 287,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 64.92800265550613,
- "p90": 67.45599955320358,
- "p95": 72.41600006818771,
- "p99": 74.0479975938797
- },
- "combine": {
- "p50": 61.055999249219894,
- "p90": 63.1679967045784,
- "p95": 68.54400038719177,
- "p99": 77.18399912118912
- },
- "roundtrip": {
- "p50": 105.76000064611435,
- "p90": 108.67200046777725,
- "p95": 109.18399691581726,
- "p99": 113.69600147008896
- },
- "isolatedSum": {
- "p50": 125.98400190472603,
- "p90": 130.62399625778198,
- "p95": 140.9600004553795,
- "p99": 151.23199671506882
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 564,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 86.04799956083298,
- "p90": 91.71199798583984,
- "p95": 92.83199906349182,
- "p99": 94.62399780750275
- },
- "combine": {
- "p50": 94.36800330877304,
- "p90": 96.79999947547913,
- "p95": 97.82399982213974,
- "p99": 218.78400444984436
- },
- "roundtrip": {
- "p50": 152.8960019350052,
- "p90": 158.91200304031372,
- "p95": 159.67999398708344,
- "p99": 163.2000058889389
- },
- "isolatedSum": {
- "p50": 180.41600286960602,
- "p90": 188.51199746131897,
- "p95": 190.65599888563156,
- "p99": 313.4080022573471
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 1104,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-fecf5035",
- "identity": "h100|deepep|7168|8|256|bf16|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h100_45e1ef29",
- "comparisonKey": "b17b52153b29fbde",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:48:28.951078+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_11",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "ll",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 LL",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 132,
- "resourceClass": "fixed-kernel",
- "conformanceClass": "not-applicable",
- "fixedKernel": true,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271590306",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271590306",
- "createdAt": "2026-06-26T23:48:28.951078+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 42.17600077390671,
- "p90": 48.928000032901764,
- "p95": 49.8879998922348,
- "p99": 51.77599936723709
- },
- "combine": {
- "p50": 36.99199855327606,
- "p90": 38.176000118255615,
- "p95": 38.40000182390213,
- "p99": 44.03200000524521
- },
- "roundtrip": {
- "p50": 59.42400172352791,
- "p90": 61.216000467538834,
- "p95": 61.63199990987778,
- "p99": 69.31199878454208
- },
- "isolatedSum": {
- "p50": 79.16799932718277,
- "p90": 87.10400015115738,
- "p95": 88.28800171613693,
- "p99": 95.8079993724823
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 14,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 42.30400174856186,
- "p90": 49.375999718904495,
- "p95": 49.95200037956238,
- "p99": 51.80799961090088
- },
- "combine": {
- "p50": 38.11199963092804,
- "p90": 39.0079990029335,
- "p95": 39.84000161290169,
- "p99": 45.9199994802475
- },
- "roundtrip": {
- "p50": 60.47999858856201,
- "p90": 61.69600039720535,
- "p95": 63.90400230884552,
- "p99": 69.21599805355072
- },
- "isolatedSum": {
- "p50": 80.4160013794899,
- "p90": 88.383998721838,
- "p95": 89.79200199246407,
- "p99": 97.72799909114838
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 21,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 42.367998510599136,
- "p90": 49.855999648571014,
- "p95": 50.20799860358238,
- "p99": 57.95200169086456
- },
- "combine": {
- "p50": 37.47199848294258,
- "p90": 38.7520007789135,
- "p95": 39.03999924659729,
- "p99": 46.30399867892265
- },
- "roundtrip": {
- "p50": 59.26400050520897,
- "p90": 61.983998864889145,
- "p95": 63.19999694824219,
- "p99": 69.50400024652481
- },
- "isolatedSum": {
- "p50": 79.83999699354172,
- "p90": 88.60800042748451,
- "p95": 89.24799785017967,
- "p99": 104.25600036978722
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 39,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 42.81599819660187,
- "p90": 49.247998744249344,
- "p95": 49.855999648571014,
- "p99": 51.42400041222572
- },
- "combine": {
- "p50": 37.9519984126091,
- "p90": 38.784001022577286,
- "p95": 40.352001786231995,
- "p99": 46.39999940991402
- },
- "roundtrip": {
- "p50": 60.63999980688095,
- "p90": 68.35199892520905,
- "p95": 68.80000233650208,
- "p99": 69.88800317049026
- },
- "isolatedSum": {
- "p50": 80.76799660921097,
- "p90": 88.03199976682663,
- "p95": 90.20800143480301,
- "p99": 97.82399982213974
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 74,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 43.74400153756142,
- "p90": 50.23999884724617,
- "p95": 50.84799975156784,
- "p99": 57.18399956822395
- },
- "combine": {
- "p50": 38.2080003619194,
- "p90": 45.791998505592346,
- "p95": 46.08000069856644,
- "p99": 49.056001007556915
- },
- "roundtrip": {
- "p50": 66.91200286149979,
- "p90": 69.15199756622314,
- "p95": 69.98399645090103,
- "p99": 76.7040029168129
- },
- "isolatedSum": {
- "p50": 81.95200189948082,
- "p90": 96.03199735283852,
- "p95": 96.92800045013428,
- "p99": 106.24000057578087
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 145,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 50.464000552892685,
- "p90": 52.352000027894974,
- "p95": 57.023998349905014,
- "p99": 59.90400165319443
- },
- "combine": {
- "p50": 46.68800160288811,
- "p90": 48.128001391887665,
- "p95": 49.056001007556915,
- "p99": 54.84800040721893
- },
- "roundtrip": {
- "p50": 76.76800340414047,
- "p90": 84.44800227880478,
- "p95": 85.21600067615509,
- "p99": 86.30400151014328
- },
- "isolatedSum": {
- "p50": 97.15200215578079,
- "p90": 100.48000141978264,
- "p95": 106.07999935746193,
- "p99": 114.75200206041336
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 287,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 61.69600039720535,
- "p90": 66.6240006685257,
- "p95": 67.55200028419495,
- "p99": 73.7600028514862
- },
- "combine": {
- "p50": 62.17600032687187,
- "p90": 63.551999628543854,
- "p95": 64.06400352716446,
- "p99": 70.49600034952164
- },
- "roundtrip": {
- "p50": 102.11200267076492,
- "p90": 109.8560020327568,
- "p95": 110.27199774980545,
- "p99": 111.39199882745743
- },
- "isolatedSum": {
- "p50": 123.87200072407722,
- "p90": 130.17600029706955,
- "p95": 131.6160038113594,
- "p99": 144.25600320100784
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 564,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 85.56800335645676,
- "p90": 89.50400352478027,
- "p95": 90.14400094747543,
- "p99": 95.45599669218063
- },
- "combine": {
- "p50": 91.45600348711014,
- "p90": 99.16800260543823,
- "p95": 99.80800002813339,
- "p99": 101.05600208044052
- },
- "roundtrip": {
- "p50": 158.52800011634827,
- "p90": 164.60800170898438,
- "p95": 166.52800142765045,
- "p99": 168.38400065898895
- },
- "isolatedSum": {
- "p50": 177.0240068435669,
- "p90": 188.6720061302185,
- "p95": 189.95200097560883,
- "p99": 196.51199877262115
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 1104,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-f1655975",
- "identity": "h100|deepep|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176",
- "colorKey": "h100_81ce2214",
- "comparisonKey": "16f06985ac4d7bde",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:31:24.570568+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_02",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "ll",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 LL (norm)",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 132,
- "resourceClass": "fixed-kernel",
- "conformanceClass": "not-applicable",
- "fixedKernel": true,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28254350430",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254350430",
- "createdAt": "2026-06-26T17:31:24.570568+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 42.49599948525429,
- "p90": 50.27199909090996,
- "p95": 50.87999999523163,
- "p99": 57.920001447200775
- },
- "combine": {
- "p50": 37.98399865627289,
- "p90": 39.135999977588654,
- "p95": 45.3759990632534,
- "p99": 46.911999583244324
- },
- "roundtrip": {
- "p50": 60.83200126886368,
- "p90": 62.272001057863235,
- "p95": 67.90400296449661,
- "p99": 69.88800317049026
- },
- "isolatedSum": {
- "p50": 80.47999814152718,
- "p90": 89.40799906849861,
- "p95": 96.25599905848503,
- "p99": 104.8320010304451
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 14,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 49.02400076389313,
- "p90": 50.40000006556511,
- "p95": 50.87999999523163,
- "p99": 57.11999908089638
- },
- "combine": {
- "p50": 38.2080003619194,
- "p90": 38.84800150990486,
- "p95": 39.64800015091896,
- "p99": 45.85599899291992
- },
- "roundtrip": {
- "p50": 61.216000467538834,
- "p90": 67.84000247716904,
- "p95": 68.9919963479042,
- "p99": 69.88800317049026
- },
- "isolatedSum": {
- "p50": 87.23200112581253,
- "p90": 89.24800157546997,
- "p95": 90.52800014615059,
- "p99": 102.9759980738163
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 21,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 42.75200143456459,
- "p90": 50.04800111055374,
- "p95": 50.52800104022026,
- "p99": 57.88800120353699
- },
- "combine": {
- "p50": 37.9519984126091,
- "p90": 38.84800150990486,
- "p95": 40.44799879193306,
- "p99": 46.52800038456917
- },
- "roundtrip": {
- "p50": 60.736000537872314,
- "p90": 62.431998550891876,
- "p95": 67.9360032081604,
- "p99": 70.0799971818924
- },
- "isolatedSum": {
- "p50": 80.70399984717369,
- "p90": 88.8960026204586,
- "p95": 90.97599983215332,
- "p99": 104.41600158810616
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 39,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 49.12000149488449,
- "p90": 50.36799982190132,
- "p95": 50.783999264240265,
- "p99": 56.44800141453743
- },
- "combine": {
- "p50": 38.2080003619194,
- "p90": 39.8080013692379,
- "p95": 44.89599913358688,
- "p99": 46.23999819159508
- },
- "roundtrip": {
- "p50": 61.08799949288368,
- "p90": 68.54400038719177,
- "p95": 69.023996591568,
- "p99": 70.01599669456482
- },
- "isolatedSum": {
- "p50": 87.3280018568039,
- "p90": 90.17600119113922,
- "p95": 95.67999839782715,
- "p99": 102.68799960613251
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 74,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 49.536000937223434,
- "p90": 50.783999264240265,
- "p95": 52.73599922657013,
- "p99": 58.079998940229416
- },
- "combine": {
- "p50": 45.24800181388855,
- "p90": 46.431999653577805,
- "p95": 46.68800160288811,
- "p99": 48.48000034689903
- },
- "roundtrip": {
- "p50": 68.67200136184692,
- "p90": 70.30399888753891,
- "p95": 75.42400062084198,
- "p99": 77.504001557827
- },
- "isolatedSum": {
- "p50": 94.78400275111198,
- "p90": 97.21599891781807,
- "p95": 99.42400082945824,
- "p99": 106.55999928712845
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 145,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 50.52800104022026,
- "p90": 57.5999990105629,
- "p95": 58.079998940229416,
- "p99": 58.97599831223488
- },
- "combine": {
- "p50": 46.592000871896744,
- "p90": 53.568001836538315,
- "p95": 54.207999259233475,
- "p99": 55.10399863123894
- },
- "roundtrip": {
- "p50": 77.56800204515457,
- "p90": 85.34400165081024,
- "p95": 85.79199761152267,
- "p99": 86.496002972126
- },
- "isolatedSum": {
- "p50": 97.120001912117,
- "p90": 111.16800084710121,
- "p95": 112.28799819946289,
- "p99": 114.07999694347382
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 287,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 66.01600348949432,
- "p90": 107.35999792814255,
- "p95": 108.06400328874588,
- "p99": 109.40799862146378
- },
- "combine": {
- "p50": 62.52799928188324,
- "p90": 63.93600255250931,
- "p95": 65.85600227117538,
- "p99": 79.29600030183792
- },
- "roundtrip": {
- "p50": 102.39999741315842,
- "p90": 110.1439967751503,
- "p95": 110.68800091743469,
- "p99": 112.89600282907486
- },
- "isolatedSum": {
- "p50": 128.54400277137756,
- "p90": 171.29600048065186,
- "p95": 173.92000555992126,
- "p99": 188.7039989233017
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 564,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 87.2960016131401,
- "p90": 90.91199934482574,
- "p95": 94.08000111579895,
- "p99": 95.51999717950821
- },
- "combine": {
- "p50": 88.86399865150452,
- "p90": 95.64799815416336,
- "p95": 96.3520035147667,
- "p99": 97.43999689817429
- },
- "roundtrip": {
- "p50": 153.21600437164307,
- "p90": 159.90400314331055,
- "p95": 160.67199409008026,
- "p99": 161.95200383663177
- },
- "isolatedSum": {
- "p50": 176.16000026464462,
- "p90": 186.5599974989891,
- "p95": 190.43200463056564,
- "p99": 192.9599940776825
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 1104,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-075b23a8",
- "identity": "h100|deepep|4096|8|128|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452",
- "colorKey": "h100_a96c99f3",
- "comparisonKey": "b300aeac7d2a6068",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T11:15:32.751842+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_03",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · fp8",
- "model": "Qwen3.5",
- "shape": {
- "hidden": 4096,
- "topk": 8,
- "experts": 128,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "75530960a30b452",
- "workloadId": "set:8:d1b92539bddfb570",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28287505969",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287505969",
- "createdAt": "2026-06-27T11:15:32.751842+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 93.34400296211243,
- "p90": 97.15200215578079,
- "p95": 99.2640033364296,
- "p99": 102.20800340175629
- },
- "combine": {
- "p50": 60.15999987721443,
- "p90": 61.63199990987778,
- "p95": 63.07200342416763,
- "p99": 68.25599819421768
- },
- "roundtrip": {
- "p50": 174.97600615024567,
- "p90": 179.55200374126434,
- "p95": 182.40000307559967,
- "p99": 185.5359971523285
- },
- "isolatedSum": {
- "p50": 153.50400283932686,
- "p90": 158.78400206565857,
- "p95": 162.33600676059723,
- "p99": 170.46400159597397
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 172032,
- "combineLogicalBytes": 344064,
- "fanoutMean": 5.25,
- "recvTokensMax": 6,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 70.3359991312027,
- "p90": 95.0080007314682,
- "p95": 98.11200201511383,
- "p99": 103.4879982471466
- },
- "combine": {
- "p50": 53.18399891257286,
- "p90": 61.11999973654747,
- "p95": 61.69600039720535,
- "p99": 64.19199705123901
- },
- "roundtrip": {
- "p50": 145.4080045223236,
- "p90": 176.70400440692902,
- "p95": 179.26399409770966,
- "p99": 185.44000387191772
- },
- "isolatedSum": {
- "p50": 123.51999804377556,
- "p90": 156.12800046801567,
- "p95": 159.80800241231918,
- "p99": 167.67999529838562
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 352256,
- "combineLogicalBytes": 704512,
- "fanoutMean": 5.375,
- "recvTokensMax": 12,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 74.68800246715546,
- "p90": 98.04800152778625,
- "p95": 100.0640019774437,
- "p99": 110.97600311040878
- },
- "combine": {
- "p50": 52.191998809576035,
- "p90": 62.431998550891876,
- "p95": 63.1679967045784,
- "p99": 67.52000004053116
- },
- "roundtrip": {
- "p50": 145.9520012140274,
- "p90": 179.77599799633026,
- "p95": 183.07200074195862,
- "p99": 188.06399405002594
- },
- "isolatedSum": {
- "p50": 126.88000127673149,
- "p90": 160.48000007867813,
- "p95": 163.2319986820221,
- "p99": 178.49600315093994
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 692224,
- "combineLogicalBytes": 1384448,
- "fanoutMean": 5.28125,
- "recvTokensMax": 26,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 70.23999840021133,
- "p90": 96.12800180912018,
- "p95": 98.43199700117111,
- "p99": 103.42399775981903
- },
- "combine": {
- "p50": 53.75999957323074,
- "p90": 62.752000987529755,
- "p95": 63.87200206518173,
- "p99": 71.87200337648392
- },
- "roundtrip": {
- "p50": 146.2399959564209,
- "p90": 179.83999848365784,
- "p95": 182.81599879264832,
- "p99": 186.71999871730804
- },
- "isolatedSum": {
- "p50": 123.99999797344208,
- "p90": 158.88000279664993,
- "p95": 162.30399906635284,
- "p99": 175.29600113630295
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1372160,
- "combineLogicalBytes": 2744320,
- "fanoutMean": 5.234375,
- "recvTokensMax": 49,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 93.85599941015244,
- "p90": 98.14400225877762,
- "p95": 100.832000374794,
- "p99": 104.60799932479858
- },
- "combine": {
- "p50": 62.144000083208084,
- "p90": 63.80800157785416,
- "p95": 65.08799642324448,
- "p99": 69.24799829721451
- },
- "roundtrip": {
- "p50": 147.2959965467453,
- "p90": 180.7679980993271,
- "p95": 184.86399948596954,
- "p99": 189.82400000095367
- },
- "isolatedSum": {
- "p50": 155.99999949336052,
- "p90": 161.95200383663177,
- "p95": 165.91999679803848,
- "p99": 173.8559976220131
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2732032,
- "combineLogicalBytes": 5464064,
- "fanoutMean": 5.2109375,
- "recvTokensMax": 94,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 71.10399752855301,
- "p90": 95.10400146245956,
- "p95": 97.75999933481216,
- "p99": 105.92000186443329
- },
- "combine": {
- "p50": 57.95200169086456,
- "p90": 66.84800237417221,
- "p95": 67.4239993095398,
- "p99": 71.74400240182877
- },
- "roundtrip": {
- "p50": 150.9760022163391,
- "p90": 184.25600230693817,
- "p95": 188.7039989233017,
- "p99": 192.80000030994415
- },
- "isolatedSum": {
- "p50": 129.05599921941757,
- "p90": 161.95200383663177,
- "p95": 165.18399864435196,
- "p99": 177.66400426626205
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 5562368,
- "combineLogicalBytes": 11124736,
- "fanoutMean": 5.3046875,
- "recvTokensMax": 186,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 74.43200051784515,
- "p90": 96.54399752616882,
- "p95": 100.5759984254837,
- "p99": 110.75200140476227
- },
- "combine": {
- "p50": 66.17599725723267,
- "p90": 75.39200037717819,
- "p95": 76.22399926185608,
- "p99": 80.79999685287476
- },
- "roundtrip": {
- "p50": 158.75199437141418,
- "p90": 192.51200556755066,
- "p95": 196.19199633598328,
- "p99": 201.6959935426712
- },
- "isolatedSum": {
- "p50": 140.60799777507782,
- "p90": 171.93599790334702,
- "p95": 176.79999768733978,
- "p99": 191.55199825763702
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 11096064,
- "combineLogicalBytes": 22192128,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 358,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 80.44800162315369,
- "p90": 96.67199850082397,
- "p95": 99.23200309276581,
- "p99": 107.04000294208527
- },
- "combine": {
- "p50": 78.3040001988411,
- "p90": 88.79999816417694,
- "p95": 89.63199704885483,
- "p99": 92.83199906349182
- },
- "roundtrip": {
- "p50": 173.21600019931793,
- "p90": 207.519993185997,
- "p95": 211.13599836826324,
- "p99": 220.64000368118286
- },
- "isolatedSum": {
- "p50": 158.75200182199478,
- "p90": 185.47199666500092,
- "p95": 188.86400014162064,
- "p99": 199.8720020055771
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 22282240,
- "combineLogicalBytes": 44564480,
- "fanoutMean": 5.3125,
- "recvTokensMax": 699,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-1bb82fc0",
- "identity": "h100|deepep|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452",
- "colorKey": "h100_97196257",
- "comparisonKey": "efcc4c7d487df84c",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:51:08.338542+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_02",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · fp8",
- "model": "Qwen3.5",
- "shape": {
- "hidden": 4096,
- "topk": 8,
- "experts": 128,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "75530960a30b452",
- "workloadId": "set:8:d1b92539bddfb570",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271676478",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271676478",
- "createdAt": "2026-06-26T23:51:08.338542+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 184.7359985113144,
- "p90": 193.08799505233765,
- "p95": 196.86399400234222,
- "p99": 204.25599813461304
- },
- "combine": {
- "p50": 49.79199916124344,
- "p90": 51.96800082921982,
- "p95": 53.79199981689453,
- "p99": 56.86400085687637
- },
- "roundtrip": {
- "p50": 218.9760059118271,
- "p90": 226.52800381183624,
- "p95": 230.0799936056137,
- "p99": 235.6480062007904
- },
- "isolatedSum": {
- "p50": 234.52799767255783,
- "p90": 245.05599588155746,
- "p95": 250.65599381923676,
- "p99": 261.1199989914894
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 172032,
- "combineLogicalBytes": 344064,
- "fanoutMean": 5.25,
- "recvTokensMax": 6,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 183.87199938297272,
- "p90": 192.19200313091278,
- "p95": 195.16800343990326,
- "p99": 201.56799256801605
- },
- "combine": {
- "p50": 50.87999999523163,
- "p90": 54.17599901556969,
- "p95": 55.67999929189682,
- "p99": 59.328000992536545
- },
- "roundtrip": {
- "p50": 220.12799978256226,
- "p90": 227.87199914455414,
- "p95": 230.43200373649597,
- "p99": 237.31200397014618
- },
- "isolatedSum": {
- "p50": 234.75199937820435,
- "p90": 246.36800214648247,
- "p95": 250.84800273180008,
- "p99": 260.8959935605526
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 352256,
- "combineLogicalBytes": 704512,
- "fanoutMean": 5.375,
- "recvTokensMax": 12,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 187.77599930763245,
- "p90": 268.0320143699646,
- "p95": 271.36000990867615,
- "p99": 282.49600529670715
- },
- "combine": {
- "p50": 52.44800075888634,
- "p90": 63.90400230884552,
- "p95": 64.86400216817856,
- "p99": 69.76000219583511
- },
- "roundtrip": {
- "p50": 225.3440022468567,
- "p90": 308.9280128479004,
- "p95": 312.48000264167786,
- "p99": 320.5440044403076
- },
- "isolatedSum": {
- "p50": 240.22400006651878,
- "p90": 331.9360166788101,
- "p95": 336.2240120768547,
- "p99": 352.25600749254227
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 692224,
- "combineLogicalBytes": 1384448,
- "fanoutMean": 5.28125,
- "recvTokensMax": 26,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 184.03199315071106,
- "p90": 193.31200420856476,
- "p95": 197.79199361801147,
- "p99": 205.9839963912964
- },
- "combine": {
- "p50": 51.7439991235733,
- "p90": 55.296000093221664,
- "p95": 57.312000542879105,
- "p99": 63.19999694824219
- },
- "roundtrip": {
- "p50": 220.8320051431656,
- "p90": 228.7680059671402,
- "p95": 231.455996632576,
- "p99": 239.55200612545013
- },
- "isolatedSum": {
- "p50": 235.77599227428436,
- "p90": 248.60800430178642,
- "p95": 255.10399416089058,
- "p99": 269.1839933395386
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1372160,
- "combineLogicalBytes": 2744320,
- "fanoutMean": 5.234375,
- "recvTokensMax": 49,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 187.96800076961517,
- "p90": 273.24798703193665,
- "p95": 286.6879999637604,
- "p99": 400.06399154663086
- },
- "combine": {
- "p50": 53.75999957323074,
- "p90": 65.15199691057205,
- "p95": 67.45599955320358,
- "p99": 75.23199915885925
- },
- "roundtrip": {
- "p50": 225.600004196167,
- "p90": 310.8479976654053,
- "p95": 322.6880133152008,
- "p99": 449.7919976711273
- },
- "isolatedSum": {
- "p50": 241.72800034284592,
- "p90": 338.3999839425087,
- "p95": 354.14399951696396,
- "p99": 475.2959907054901
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2732032,
- "combineLogicalBytes": 5464064,
- "fanoutMean": 5.2109375,
- "recvTokensMax": 94,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 189.11999464035034,
- "p90": 271.36000990867615,
- "p95": 286.9440019130707,
- "p99": 324.0959942340851
- },
- "combine": {
- "p50": 56.44800141453743,
- "p90": 68.57600063085556,
- "p95": 69.11999732255936,
- "p99": 73.56800138950348
- },
- "roundtrip": {
- "p50": 226.27200186252594,
- "p90": 234.14400219917297,
- "p95": 238.68800699710846,
- "p99": 254.27201390266418
- },
- "isolatedSum": {
- "p50": 245.56799605488777,
- "p90": 339.9360105395317,
- "p95": 356.06399923563004,
- "p99": 397.66399562358856
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 5562368,
- "combineLogicalBytes": 11124736,
- "fanoutMean": 5.3046875,
- "recvTokensMax": 186,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 189.34400379657745,
- "p90": 270.08000016212463,
- "p95": 275.2639949321747,
- "p99": 289.98398780822754
- },
- "combine": {
- "p50": 64.60800021886826,
- "p90": 76.89599692821503,
- "p95": 78.23999971151352,
- "p99": 82.2720006108284
- },
- "roundtrip": {
- "p50": 238.3359968662262,
- "p90": 318.015992641449,
- "p95": 321.4719891548157,
- "p99": 329.72800731658936
- },
- "isolatedSum": {
- "p50": 253.9520040154457,
- "p90": 346.97599709033966,
- "p95": 353.5039946436882,
- "p99": 372.25598841905594
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 11096064,
- "combineLogicalBytes": 22192128,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 358,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 192.19200313091278,
- "p90": 272.15999364852905,
- "p95": 275.7120132446289,
- "p99": 291.29600524902344
- },
- "combine": {
- "p50": 78.17599922418594,
- "p90": 87.93599903583527,
- "p95": 89.15200084447861,
- "p99": 95.20000219345093
- },
- "roundtrip": {
- "p50": 255.3279995918274,
- "p90": 335.6480002403259,
- "p95": 343.9359962940216,
- "p99": 380.0320029258728
- },
- "isolatedSum": {
- "p50": 270.3680023550987,
- "p90": 360.0959926843643,
- "p95": 364.8640140891075,
- "p99": 386.49600744247437
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 22282240,
- "combineLogicalBytes": 44564480,
- "fanoutMean": 5.3125,
- "recvTokensMax": 699,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-c961a187",
- "identity": "h100|deepep|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef",
- "colorKey": "h100_97196257",
- "comparisonKey": "994b6e44326c8d14",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:51:36.382828+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_11",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · fp8",
- "model": "shape 5120/8/160",
- "shape": {
- "hidden": 5120,
- "topk": 8,
- "experts": 160,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "be1b44a963bd4ef",
- "workloadId": "set:8:34e5874082f8ea8f",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271691858",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271691858",
- "createdAt": "2026-06-26T23:51:36.382828+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 196.03200256824493,
- "p90": 203.48800718784332,
- "p95": 207.32800662517548,
- "p99": 214.9759978055954
- },
- "combine": {
- "p50": 53.727999329566956,
- "p90": 55.48800155520439,
- "p95": 57.760000228881836,
- "p99": 60.80000102519989
- },
- "roundtrip": {
- "p50": 231.26399517059326,
- "p90": 238.91200125217438,
- "p95": 242.36799776554108,
- "p99": 250.0160038471222
- },
- "isolatedSum": {
- "p50": 249.7600018978119,
- "p90": 258.9760087430477,
- "p95": 265.0880068540573,
- "p99": 275.7759988307953
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 215040,
- "combineLogicalBytes": 430080,
- "fanoutMean": 5.25,
- "recvTokensMax": 8,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 195.80799341201782,
- "p90": 202.78400182724,
- "p95": 205.1199972629547,
- "p99": 212.12799847126007
- },
- "combine": {
- "p50": 55.93600124120712,
- "p90": 57.53599852323532,
- "p95": 59.93599817156792,
- "p99": 62.880001962184906
- },
- "roundtrip": {
- "p50": 233.60000550746918,
- "p90": 240.9600019454956,
- "p95": 243.13600361347198,
- "p99": 255.10400533676147
- },
- "isolatedSum": {
- "p50": 251.74399465322495,
- "p90": 260.3200003504753,
- "p95": 265.05599543452263,
- "p99": 275.008000433445
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 440320,
- "combineLogicalBytes": 880640,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 200.15999674797058,
- "p90": 287.48801350593567,
- "p95": 290.2719974517822,
- "p99": 298.17599058151245
- },
- "combine": {
- "p50": 57.11999908089638,
- "p90": 68.67200136184692,
- "p95": 69.56800073385239,
- "p99": 75.3600001335144
- },
- "roundtrip": {
- "p50": 238.01599442958832,
- "p90": 328.5120129585266,
- "p95": 332.73598551750183,
- "p99": 340.1600122451782
- },
- "isolatedSum": {
- "p50": 257.27999582886696,
- "p90": 356.1600148677826,
- "p95": 359.8399981856346,
- "p99": 373.53599071502686
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 870400,
- "combineLogicalBytes": 1740800,
- "fanoutMean": 5.3125,
- "recvTokensMax": 25,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 199.072003364563,
- "p90": 282.1120023727417,
- "p95": 285.8240008354187,
- "p99": 292.7359938621521
- },
- "combine": {
- "p50": 57.5999990105629,
- "p90": 66.14399701356888,
- "p95": 66.72000139951706,
- "p99": 71.48800045251846
- },
- "roundtrip": {
- "p50": 236.32000386714935,
- "p90": 315.3280019760132,
- "p95": 318.91199946403503,
- "p99": 326.2079954147339
- },
- "isolatedSum": {
- "p50": 256.6720023751259,
- "p90": 348.2559993863106,
- "p95": 352.54400223493576,
- "p99": 364.22399431467056
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1735680,
- "combineLogicalBytes": 3471360,
- "fanoutMean": 5.296875,
- "recvTokensMax": 50,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 199.71199333667755,
- "p90": 288.86398673057556,
- "p95": 291.23198986053467,
- "p99": 296.4160144329071
- },
- "combine": {
- "p50": 58.62399935722351,
- "p90": 70.14399766921997,
- "p95": 71.03999704122543,
- "p99": 74.11199808120728
- },
- "roundtrip": {
- "p50": 239.19999599456787,
- "p90": 329.75998520851135,
- "p95": 332.5439989566803,
- "p99": 338.3359909057617
- },
- "isolatedSum": {
- "p50": 258.33599269390106,
- "p90": 359.00798439979553,
- "p95": 362.2719869017601,
- "p99": 370.5280125141144
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3456000,
- "combineLogicalBytes": 6912000,
- "fanoutMean": 5.2734375,
- "recvTokensMax": 93,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 200.3519982099533,
- "p90": 288.2559895515442,
- "p95": 290.49599170684814,
- "p99": 295.1360046863556
- },
- "combine": {
- "p50": 63.040003180503845,
- "p90": 73.44000041484833,
- "p95": 73.95199686288834,
- "p99": 79.45600152015686
- },
- "roundtrip": {
- "p50": 244.25600469112396,
- "p90": 330.7200074195862,
- "p95": 333.24798941612244,
- "p99": 339.35999870300293
- },
- "isolatedSum": {
- "p50": 263.39200139045715,
- "p90": 361.6959899663925,
- "p95": 364.4479885697365,
- "p99": 374.59200620651245
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 6988800,
- "combineLogicalBytes": 13977600,
- "fanoutMean": 5.33203125,
- "recvTokensMax": 179,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 199.5519995689392,
- "p90": 287.55199909210205,
- "p95": 291.6480004787445,
- "p99": 305.5360019207001
- },
- "combine": {
- "p50": 73.34399968385696,
- "p90": 85.02399921417236,
- "p95": 86.5280032157898,
- "p99": 89.72799777984619
- },
- "roundtrip": {
- "p50": 254.72000241279602,
- "p90": 339.83999490737915,
- "p95": 342.97600388526917,
- "p99": 349.5680093765259
- },
- "isolatedSum": {
- "p50": 272.8959992527962,
- "p90": 372.5759983062744,
- "p95": 378.1760036945343,
- "p99": 395.26399970054626
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 13987840,
- "combineLogicalBytes": 27975680,
- "fanoutMean": 5.3359375,
- "recvTokensMax": 355,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 206.33600652217865,
- "p90": 288.32000494003296,
- "p95": 292.4480140209198,
- "p99": 296.671986579895
- },
- "combine": {
- "p50": 86.87999844551086,
- "p90": 100.19200295209885,
- "p95": 104.63999956846237,
- "p99": 326.24000310897827
- },
- "roundtrip": {
- "p50": 274.944007396698,
- "p90": 355.0719916820526,
- "p95": 358.8480055332184,
- "p99": 364.8959994316101
- },
- "isolatedSum": {
- "p50": 293.2160049676895,
- "p90": 388.5120078921318,
- "p95": 397.0880135893822,
- "p99": 622.9119896888733
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 27837440,
- "combineLogicalBytes": 55674880,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 699,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-0c56b994",
- "identity": "h100|deepep|6144|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h100_a96c99f3",
- "comparisonKey": "b1bf09d425749f09",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T11:13:21.071476+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_18",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · fp8",
- "model": "MiniMax-M3",
- "shape": {
- "hidden": 6144,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:2e0df6a62cd0143e",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28287494014",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287494014",
- "createdAt": "2026-06-27T11:13:21.071476+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 98.68799895048141,
- "p90": 103.26399654150009,
- "p95": 105.27999699115753,
- "p99": 110.11199653148651
- },
- "combine": {
- "p50": 69.24799829721451,
- "p90": 71.16799801588058,
- "p95": 72.51200079917908,
- "p99": 74.97599720954895
- },
- "roundtrip": {
- "p50": 197.40800559520721,
- "p90": 202.4639993906021,
- "p95": 204.96000349521637,
- "p99": 210.87999641895294
- },
- "isolatedSum": {
- "p50": 167.93599724769592,
- "p90": 174.43199455738068,
- "p95": 177.7919977903366,
- "p99": 185.08799374103546
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 270336,
- "combineLogicalBytes": 540672,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 71.23199850320816,
- "p90": 100.73599964380264,
- "p95": 103.26399654150009,
- "p99": 108.83200168609619
- },
- "combine": {
- "p50": 58.27200040221214,
- "p90": 69.95200365781784,
- "p95": 71.68000191450119,
- "p99": 75.45600086450577
- },
- "roundtrip": {
- "p50": 151.96800231933594,
- "p90": 197.24799692630768,
- "p95": 199.71199333667755,
- "p99": 207.93600380420685
- },
- "isolatedSum": {
- "p50": 129.5039989054203,
- "p90": 170.68800330162048,
- "p95": 174.94399845600128,
- "p99": 184.28800255060196
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 528384,
- "combineLogicalBytes": 1056768,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 71.84000313282013,
- "p90": 94.87999975681305,
- "p95": 98.49599748849869,
- "p99": 103.93600165843964
- },
- "combine": {
- "p50": 60.447998344898224,
- "p90": 67.1359971165657,
- "p95": 68.64000111818314,
- "p99": 72.95999675989151
- },
- "roundtrip": {
- "p50": 154.40000593662262,
- "p90": 196.31999731063843,
- "p95": 197.79199361801147,
- "p99": 202.2400051355362
- },
- "isolatedSum": {
- "p50": 132.28800147771835,
- "p90": 162.01599687337875,
- "p95": 167.13599860668182,
- "p99": 176.89599841833115
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1062912,
- "combineLogicalBytes": 2125824,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 93.88799965381622,
- "p90": 101.02400183677673,
- "p95": 103.42399775981903,
- "p99": 116.12799763679504
- },
- "combine": {
- "p50": 66.3359984755516,
- "p90": 71.48800045251846,
- "p95": 73.02399724721909,
- "p99": 77.31200009584427
- },
- "roundtrip": {
- "p50": 193.6960071325302,
- "p90": 200.00000298023224,
- "p95": 202.5279998779297,
- "p99": 206.56000077724457
- },
- "isolatedSum": {
- "p50": 160.22399812936783,
- "p90": 172.5120022892952,
- "p95": 176.44799500703812,
- "p99": 193.4399977326393
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2131968,
- "combineLogicalBytes": 4263936,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 92.96000003814697,
- "p90": 100.99200159311295,
- "p95": 102.78400033712387,
- "p99": 106.78400099277496
- },
- "combine": {
- "p50": 67.52000004053116,
- "p90": 72.9919970035553,
- "p95": 74.30399954319,
- "p99": 78.59200239181519
- },
- "roundtrip": {
- "p50": 196.76800072193146,
- "p90": 203.0400037765503,
- "p95": 205.1199972629547,
- "p99": 208.8640034198761
- },
- "isolatedSum": {
- "p50": 160.48000007867813,
- "p90": 173.98399859666824,
- "p95": 177.08799988031387,
- "p99": 185.37600338459015
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4251648,
- "combineLogicalBytes": 8503296,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 72.92799651622772,
- "p90": 95.48799693584442,
- "p95": 99.20000284910202,
- "p99": 104.8320010304451
- },
- "combine": {
- "p50": 66.78400188684464,
- "p90": 73.37599992752075,
- "p95": 74.75200295448303,
- "p99": 78.17599922418594
- },
- "roundtrip": {
- "p50": 160.51200032234192,
- "p90": 202.07999646663666,
- "p95": 204.79999482631683,
- "p99": 209.60000157356262
- },
- "isolatedSum": {
- "p50": 139.71199840307236,
- "p90": 168.86399686336517,
- "p95": 173.95200580358505,
- "p99": 183.00800025463104
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 8454144,
- "combineLogicalBytes": 16908288,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 96.09600156545639,
- "p90": 101.72799974679947,
- "p95": 107.4879989027977,
- "p99": 478.08000445365906
- },
- "combine": {
- "p50": 82.07999914884567,
- "p90": 87.10400015115738,
- "p95": 87.8399983048439,
- "p99": 89.82399851083755
- },
- "roundtrip": {
- "p50": 175.58400332927704,
- "p90": 211.96800470352173,
- "p95": 215.03999829292297,
- "p99": 219.9999988079071
- },
- "isolatedSum": {
- "p50": 178.17600071430206,
- "p90": 188.83199989795685,
- "p95": 195.3279972076416,
- "p99": 567.9040029644966
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 16711680,
- "combineLogicalBytes": 33423360,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 85.82399785518646,
- "p90": 103.10400277376175,
- "p95": 106.11200332641602,
- "p99": 116.60800129175186
- },
- "combine": {
- "p50": 91.45600348711014,
- "p90": 99.35999661684036,
- "p95": 102.62399911880493,
- "p99": 148.3200043439865
- },
- "roundtrip": {
- "p50": 200.6720006465912,
- "p90": 229.18400168418884,
- "p95": 231.64799809455872,
- "p99": 236.86400055885315
- },
- "isolatedSum": {
- "p50": 177.2800013422966,
- "p90": 202.4639993906021,
- "p95": 208.73600244522095,
- "p99": 264.9280056357384
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 33288192,
- "combineLogicalBytes": 66576384,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-55a4c230",
- "identity": "h100|deepep|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h100_97196257",
- "comparisonKey": "8ab5124e24ec36ab",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:52:02.860609+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_19",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · fp8",
- "model": "MiniMax-M3",
- "shape": {
- "hidden": 6144,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:2e0df6a62cd0143e",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271706435",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271706435",
- "createdAt": "2026-06-26T23:52:02.860609+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 196.8960016965866,
- "p90": 227.77600586414337,
- "p95": 297.40801453590393,
- "p99": 503.32802534103394
- },
- "combine": {
- "p50": 57.920001447200775,
- "p90": 62.144000083208084,
- "p95": 67.10399687290192,
- "p99": 282.0799946784973
- },
- "roundtrip": {
- "p50": 237.40799725055695,
- "p90": 243.77599358558655,
- "p95": 245.31200528144836,
- "p99": 250.0160038471222
- },
- "isolatedSum": {
- "p50": 254.81600314378738,
- "p90": 289.92000594735146,
- "p95": 364.51201140880585,
- "p99": 785.4080200195312
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 270336,
- "combineLogicalBytes": 540672,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 197.1839964389801,
- "p90": 204.92799580097198,
- "p95": 207.45599269866943,
- "p99": 214.6880030632019
- },
- "combine": {
- "p50": 58.49599838256836,
- "p90": 60.92799827456474,
- "p95": 63.26399743556976,
- "p99": 70.65600156784058
- },
- "roundtrip": {
- "p50": 237.56800591945648,
- "p90": 243.96799504756927,
- "p95": 247.29600548744202,
- "p99": 255.61600923538208
- },
- "isolatedSum": {
- "p50": 255.67999482154846,
- "p90": 265.8559940755367,
- "p95": 270.7199901342392,
- "p99": 285.3440046310425
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 528384,
- "combineLogicalBytes": 1056768,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 200.47999918460846,
- "p90": 282.71999955177307,
- "p95": 291.20001196861267,
- "p99": 401.2480080127716
- },
- "combine": {
- "p50": 59.90400165319443,
- "p90": 66.84800237417221,
- "p95": 69.5360004901886,
- "p99": 75.68000257015228
- },
- "roundtrip": {
- "p50": 243.20000410079956,
- "p90": 321.9839930534363,
- "p95": 326.7199993133545,
- "p99": 334.75199341773987
- },
- "isolatedSum": {
- "p50": 260.3840008378029,
- "p90": 349.5680019259453,
- "p95": 360.73601245880127,
- "p99": 476.9280105829239
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1062912,
- "combineLogicalBytes": 2125824,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 200.6399929523468,
- "p90": 261.9200050830841,
- "p95": 265.6959891319275,
- "p99": 275.1680016517639
- },
- "combine": {
- "p50": 60.99199876189232,
- "p90": 69.2799985408783,
- "p95": 69.88800317049026,
- "p99": 75.32799988985062
- },
- "roundtrip": {
- "p50": 239.9040013551712,
- "p90": 296.9599962234497,
- "p95": 299.8400032520294,
- "p99": 307.5200021266937
- },
- "isolatedSum": {
- "p50": 261.6319917142391,
- "p90": 331.2000036239624,
- "p95": 335.58399230241776,
- "p99": 350.49600154161453
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2131968,
- "combineLogicalBytes": 4263936,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 201.75999402999878,
- "p90": 280.3199887275696,
- "p95": 284.89598631858826,
- "p99": 351.48799419403076
- },
- "combine": {
- "p50": 61.76000088453293,
- "p90": 69.72800195217133,
- "p95": 72.92799651622772,
- "p99": 133.82400572299957
- },
- "roundtrip": {
- "p50": 245.82399427890778,
- "p90": 325.53601264953613,
- "p95": 328.8959860801697,
- "p99": 600.3199815750122
- },
- "isolatedSum": {
- "p50": 263.5199949145317,
- "p90": 350.0479906797409,
- "p95": 357.823982834816,
- "p99": 485.31199991703033
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4251648,
- "combineLogicalBytes": 8503296,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 200.73600113391876,
- "p90": 285.0559949874878,
- "p95": 287.9680097103119,
- "p99": 303.42400074005127
- },
- "combine": {
- "p50": 66.78400188684464,
- "p90": 78.20799946784973,
- "p95": 79.93599772453308,
- "p99": 83.8719978928566
- },
- "roundtrip": {
- "p50": 249.9839961528778,
- "p90": 319.487988948822,
- "p95": 328.8959860801697,
- "p99": 336.35199069976807
- },
- "isolatedSum": {
- "p50": 267.5200030207634,
- "p90": 363.2639944553375,
- "p95": 367.90400743484497,
- "p99": 387.29599863290787
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 8454144,
- "combineLogicalBytes": 16908288,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 200.73600113391876,
- "p90": 281.2480032444,
- "p95": 289.11998867988586,
- "p99": 304.9919903278351
- },
- "combine": {
- "p50": 77.11999863386154,
- "p90": 84.1279998421669,
- "p95": 86.40000224113464,
- "p99": 95.77599912881851
- },
- "roundtrip": {
- "p50": 259.5840096473694,
- "p90": 337.8559947013855,
- "p95": 341.3439989089966,
- "p99": 350.5280017852783
- },
- "isolatedSum": {
- "p50": 277.8559997677803,
- "p90": 365.3760030865669,
- "p95": 375.5199909210205,
- "p99": 400.7679894566536
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 16711680,
- "combineLogicalBytes": 33423360,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 212.5760018825531,
- "p90": 282.1759879589081,
- "p95": 286.5920066833496,
- "p99": 307.96799063682556
- },
- "combine": {
- "p50": 92.06400066614151,
- "p90": 98.11200201511383,
- "p95": 99.48799759149551,
- "p99": 103.74400019645691
- },
- "roundtrip": {
- "p50": 289.44000601768494,
- "p90": 355.3279936313629,
- "p95": 359.71200466156006,
- "p99": 366.91200733184814
- },
- "isolatedSum": {
- "p50": 304.6400025486946,
- "p90": 380.2879899740219,
- "p95": 386.0800042748451,
- "p99": 411.71199083328247
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 33288192,
- "combineLogicalBytes": 66576384,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-416fcf7d",
- "identity": "h100|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h100_a96c99f3",
- "comparisonKey": "59d5014bb7031dbe",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T10:13:04.882575+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_19",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · fp8",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "2.0.0+af9a040",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28286086353",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286086353",
- "createdAt": "2026-06-27T10:13:04.882575+00:00",
- "sha": "76a3032d20288ee17220eb6099346f74d56ce005"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 100.03200173377991,
- "p90": 104.44799810647964,
- "p95": 106.30399733781815,
- "p99": 110.59200018644333
- },
- "combine": {
- "p50": 74.65600222349167,
- "p90": 76.38400048017502,
- "p95": 77.69600301980972,
- "p99": 81.7599967122078
- },
- "roundtrip": {
- "p50": 195.64799964427948,
- "p90": 208.3200067281723,
- "p95": 210.65600216388702,
- "p99": 216.15999937057495
- },
- "isolatedSum": {
- "p50": 174.68800395727158,
- "p90": 180.83199858665466,
- "p95": 184.00000035762787,
- "p99": 192.35199689865112
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 315392,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 71.74400240182877,
- "p90": 101.08800232410431,
- "p95": 102.62399911880493,
- "p99": 109.15199667215347
- },
- "combine": {
- "p50": 64.19199705123901,
- "p90": 74.43200051784515,
- "p95": 75.00799745321274,
- "p99": 78.62400263547897
- },
- "roundtrip": {
- "p50": 158.59200060367584,
- "p90": 206.81600272655487,
- "p95": 209.9519968032837,
- "p99": 367.71199107170105
- },
- "isolatedSum": {
- "p50": 135.93599945306778,
- "p90": 175.52000284194946,
- "p95": 177.63199657201767,
- "p99": 187.77599930763245
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 71.77600264549255,
- "p90": 102.78400033712387,
- "p95": 104.76800054311752,
- "p99": 109.63200032711029
- },
- "combine": {
- "p50": 65.8240020275116,
- "p90": 77.85599678754807,
- "p95": 78.5600021481514,
- "p99": 81.82399719953537
- },
- "roundtrip": {
- "p50": 159.71200168132782,
- "p90": 209.98400449752808,
- "p95": 212.09600567817688,
- "p99": 216.92800521850586
- },
- "isolatedSum": {
- "p50": 137.60000467300415,
- "p90": 180.63999712467194,
- "p95": 183.32800269126892,
- "p99": 191.45599752664566
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1240064,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 97.79199957847595,
- "p90": 103.61599922180176,
- "p95": 106.175996363163,
- "p99": 111.90400272607803
- },
- "combine": {
- "p50": 75.71200281381607,
- "p90": 77.98399776220322,
- "p95": 79.77599650621414,
- "p99": 83.64800363779068
- },
- "roundtrip": {
- "p50": 195.71200013160706,
- "p90": 209.6640020608902,
- "p95": 211.96800470352173,
- "p99": 217.8879976272583
- },
- "isolatedSum": {
- "p50": 173.50400239229202,
- "p90": 181.59999698400497,
- "p95": 185.95199286937714,
- "p99": 195.5520063638687
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487296,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 97.9200005531311,
- "p90": 102.91200131177902,
- "p95": 105.34399747848511,
- "p99": 110.04800349473953
- },
- "combine": {
- "p50": 77.31200009584427,
- "p90": 80.79999685287476,
- "p95": 81.98399841785431,
- "p99": 87.00799942016602
- },
- "roundtrip": {
- "p50": 197.02400267124176,
- "p90": 212.3199999332428,
- "p95": 214.36800062656403,
- "p99": 219.200000166893
- },
- "isolatedSum": {
- "p50": 175.23200064897537,
- "p90": 183.71199816465378,
- "p95": 187.32799589633942,
- "p99": 197.05600291490555
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4960256,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 73.98399710655212,
- "p90": 102.55999863147736,
- "p95": 105.02400249242783,
- "p99": 107.87200182676315
- },
- "combine": {
- "p50": 73.21599870920181,
- "p90": 85.56800335645676,
- "p95": 86.46400272846222,
- "p99": 90.33600240945816
- },
- "roundtrip": {
- "p50": 168.03200542926788,
- "p90": 216.73600375652313,
- "p95": 218.36799383163452,
- "p99": 223.1999933719635
- },
- "isolatedSum": {
- "p50": 147.19999581575394,
- "p90": 188.1280019879341,
- "p95": 191.48800522089005,
- "p99": 198.2080042362213
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9863168,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 97.98400104045868,
- "p90": 142.752006649971,
- "p95": 145.82400023937225,
- "p99": 154.27200496196747
- },
- "combine": {
- "p50": 92.19200164079666,
- "p90": 112.96000331640244,
- "p95": 113.82400244474411,
- "p99": 118.07999759912491
- },
- "roundtrip": {
- "p50": 179.77599799633026,
- "p90": 277.3439884185791,
- "p95": 285.535991191864,
- "p99": 456.64000511169434
- },
- "isolatedSum": {
- "p50": 190.17600268125534,
- "p90": 255.71200996637344,
- "p95": 259.64800268411636,
- "p99": 272.3520025610924
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19496960,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 90.43200314044952,
- "p90": 111.42399907112122,
- "p95": 113.24799805879593,
- "p99": 117.40799993276596
- },
- "combine": {
- "p50": 100.5759984254837,
- "p90": 112.47999966144562,
- "p95": 114.01599645614624,
- "p99": 117.53600090742111
- },
- "roundtrip": {
- "p50": 219.7120040655136,
- "p90": 246.87999486923218,
- "p95": 249.2160052061081,
- "p99": 254.07999753952026
- },
- "isolatedSum": {
- "p50": 191.00800156593323,
- "p90": 223.90399873256683,
- "p95": 227.26399451494217,
- "p99": 234.94400084018707
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-d4dbb29d",
- "identity": "h100|deepep|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h100_97196257",
- "comparisonKey": "9687217877b9ce9c",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:48:10.138934+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_03",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · fp8",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271579958",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271579958",
- "createdAt": "2026-06-26T23:48:10.138934+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 193.05600225925446,
- "p90": 204.3839991092682,
- "p95": 210.52800118923187,
- "p99": 277.9200077056885
- },
- "combine": {
- "p50": 60.95999851822853,
- "p90": 63.29599767923355,
- "p95": 65.31199812889099,
- "p99": 68.76800209283829
- },
- "roundtrip": {
- "p50": 237.63200640678406,
- "p90": 244.25600469112396,
- "p95": 246.14399671554565,
- "p99": 269.4079875946045
- },
- "isolatedSum": {
- "p50": 254.016000777483,
- "p90": 267.67999678850174,
- "p95": 275.83999931812286,
- "p99": 346.68800979852676
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 315392,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 192.9280012845993,
- "p90": 200.6720006465912,
- "p95": 204.79999482631683,
- "p99": 264.5759880542755
- },
- "combine": {
- "p50": 62.272001057863235,
- "p90": 64.7680014371872,
- "p95": 67.391999065876,
- "p99": 73.08799773454666
- },
- "roundtrip": {
- "p50": 235.6480062007904,
- "p90": 243.0720031261444,
- "p95": 245.60000002384186,
- "p99": 259.71201062202454
- },
- "isolatedSum": {
- "p50": 255.20000234246254,
- "p90": 265.4400020837784,
- "p95": 272.19199389219284,
- "p99": 337.6639857888222
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 197.24799692630768,
- "p90": 286.080002784729,
- "p95": 290.71998596191406,
- "p99": 302.2400140762329
- },
- "combine": {
- "p50": 63.32799792289734,
- "p90": 71.32799923419952,
- "p95": 75.45600086450577,
- "p99": 82.62400329113007
- },
- "roundtrip": {
- "p50": 242.94400215148926,
- "p90": 349.40800070762634,
- "p95": 354.4960021972656,
- "p99": 367.13600158691406
- },
- "isolatedSum": {
- "p50": 260.575994849205,
- "p90": 357.4080020189285,
- "p95": 366.17598682641983,
- "p99": 384.864017367363
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1240064,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 196.383997797966,
- "p90": 251.583993434906,
- "p95": 254.8159956932068,
- "p99": 268.15998554229736
- },
- "combine": {
- "p50": 63.87200206518173,
- "p90": 72.73600250482559,
- "p95": 73.5040009021759,
- "p99": 77.95199751853943
- },
- "roundtrip": {
- "p50": 242.11199581623077,
- "p90": 299.3920147418976,
- "p95": 304.1599988937378,
- "p99": 410.8160138130188
- },
- "isolatedSum": {
- "p50": 260.25599986314774,
- "p90": 324.3199959397316,
- "p95": 328.3199965953827,
- "p99": 346.1119830608368
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487296,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 197.63199985027313,
- "p90": 288.35201263427734,
- "p95": 294.048011302948,
- "p99": 322.04800844192505
- },
- "combine": {
- "p50": 66.46399945020676,
- "p90": 79.9039974808693,
- "p95": 106.33599758148193,
- "p99": 204.25599813461304
- },
- "roundtrip": {
- "p50": 246.62399291992188,
- "p90": 330.24001121520996,
- "p95": 333.5359990596771,
- "p99": 341.18399024009705
- },
- "isolatedSum": {
- "p50": 264.0959993004799,
- "p90": 368.25601011514664,
- "p95": 400.38400888442993,
- "p99": 526.3040065765381
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4960256,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 198.40000569820404,
- "p90": 284.35200452804565,
- "p95": 288.06400299072266,
- "p99": 295.9040105342865
- },
- "combine": {
- "p50": 70.97599655389786,
- "p90": 79.96799796819687,
- "p95": 80.70400357246399,
- "p99": 83.52000266313553
- },
- "roundtrip": {
- "p50": 250.36799907684326,
- "p90": 306.5919876098633,
- "p95": 310.2079927921295,
- "p99": 368.8639998435974
- },
- "isolatedSum": {
- "p50": 269.3760022521019,
- "p90": 364.3200024962425,
- "p95": 368.76800656318665,
- "p99": 379.424013197422
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9863168,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 198.65599274635315,
- "p90": 284.8320007324219,
- "p95": 289.69600796699524,
- "p99": 304.4480085372925
- },
- "combine": {
- "p50": 80.48000186681747,
- "p90": 88.83199840784073,
- "p95": 90.52799642086029,
- "p99": 101.31199657917023
- },
- "roundtrip": {
- "p50": 260.96001267433167,
- "p90": 351.80801153182983,
- "p95": 355.55198788642883,
- "p99": 367.0400083065033
- },
- "isolatedSum": {
- "p50": 279.1359946131706,
- "p90": 373.6639991402626,
- "p95": 380.22400438785553,
- "p99": 405.7600051164627
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19496960,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 216.8319970369339,
- "p90": 312.8640055656433,
- "p95": 320.73599100112915,
- "p99": 336.41600608825684
- },
- "combine": {
- "p50": 98.94400089979172,
- "p90": 112.83200234174728,
- "p95": 113.79200220108032,
- "p99": 119.13599818944931
- },
- "roundtrip": {
- "p50": 303.2959997653961,
- "p90": 388.0000114440918,
- "p95": 392.2879993915558,
- "p99": 401.2480080127716
- },
- "isolatedSum": {
- "p50": 315.7759979367256,
- "p90": 425.6960079073906,
- "p95": 434.5279932022095,
- "p99": 455.55200427770615
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-52396484",
- "identity": "h100|deepep|7168|8|384|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760",
- "colorKey": "h100_a96c99f3",
- "comparisonKey": "7d245d1c48b9f399",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T11:15:21.281924+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_19",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · fp8",
- "model": "Kimi-K2",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 384,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "d6c49ae98878760",
- "workloadId": "set:8:9a27d0df4b17fa09",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28287500362",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287500362",
- "createdAt": "2026-06-27T11:15:21.281924+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 98.78399968147278,
- "p90": 104.54399883747101,
- "p95": 108.22399705648422,
- "p99": 114.88000303506851
- },
- "combine": {
- "p50": 71.45600020885468,
- "p90": 73.34399968385696,
- "p95": 74.49600100517273,
- "p99": 145.88800072669983
- },
- "roundtrip": {
- "p50": 201.12000405788422,
- "p90": 207.2640061378479,
- "p95": 210.11200547218323,
- "p99": 237.59999871253967
- },
- "isolatedSum": {
- "p50": 170.23999989032745,
- "p90": 177.88799852132797,
- "p95": 182.71999806165695,
- "p99": 260.76800376176834
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 301056,
- "combineLogicalBytes": 602112,
- "fanoutMean": 5.25,
- "recvTokensMax": 8,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 99.48799759149551,
- "p90": 103.64799946546555,
- "p95": 105.66399991512299,
- "p99": 111.55200004577637
- },
- "combine": {
- "p50": 72.95999675989151,
- "p90": 74.5600014925003,
- "p95": 75.99999755620956,
- "p99": 78.97599786520004
- },
- "roundtrip": {
- "p50": 203.19999754428864,
- "p90": 207.13600516319275,
- "p95": 210.1760059595108,
- "p99": 213.82400393486023
- },
- "isolatedSum": {
- "p50": 172.44799435138702,
- "p90": 178.20800095796585,
- "p95": 181.66399747133255,
- "p99": 190.5279979109764
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 609280,
- "combineLogicalBytes": 1218560,
- "fanoutMean": 5.3125,
- "recvTokensMax": 14,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 99.29600358009338,
- "p90": 104.09600287675858,
- "p95": 106.175996363163,
- "p99": 110.49599945545197
- },
- "combine": {
- "p50": 72.06399738788605,
- "p90": 74.17599856853485,
- "p95": 75.52000135183334,
- "p99": 79.74400371313095
- },
- "roundtrip": {
- "p50": 202.72000133991241,
- "p90": 207.90399610996246,
- "p95": 211.0079973936081,
- "p99": 221.24800086021423
- },
- "isolatedSum": {
- "p50": 171.36000096797943,
- "p90": 178.27200144529343,
- "p95": 181.69599771499634,
- "p99": 190.24000316858292
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1204224,
- "combineLogicalBytes": 2408448,
- "fanoutMean": 5.25,
- "recvTokensMax": 26,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 100.16000270843506,
- "p90": 104.35199737548828,
- "p95": 106.27199709415436,
- "p99": 111.93600296974182
- },
- "combine": {
- "p50": 73.11999797821045,
- "p90": 75.16799867153168,
- "p95": 76.80000364780426,
- "p99": 83.20000022649765
- },
- "roundtrip": {
- "p50": 203.42400670051575,
- "p90": 208.12800526618958,
- "p95": 210.78400313854218,
- "p99": 215.29600024223328
- },
- "isolatedSum": {
- "p50": 173.2800006866455,
- "p90": 179.51999604701996,
- "p95": 183.07200074195862,
- "p99": 195.13600319623947
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2415616,
- "combineLogicalBytes": 4831232,
- "fanoutMean": 5.265625,
- "recvTokensMax": 48,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 98.88000041246414,
- "p90": 103.58399897813797,
- "p95": 106.27199709415436,
- "p99": 112.22399771213531
- },
- "combine": {
- "p50": 75.93599706888199,
- "p90": 78.3040001988411,
- "p95": 80.60800284147263,
- "p99": 82.91199803352356
- },
- "roundtrip": {
- "p50": 205.72799444198608,
- "p90": 210.01599729061127,
- "p95": 212.6079946756363,
- "p99": 216.89599752426147
- },
- "isolatedSum": {
- "p50": 174.81599748134613,
- "p90": 181.88799917697906,
- "p95": 186.87999993562698,
- "p99": 195.13599574565887
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4924416,
- "combineLogicalBytes": 9848832,
- "fanoutMean": 5.3671875,
- "recvTokensMax": 91,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 99.93600100278854,
- "p90": 142.71999895572662,
- "p95": 161.5999937057495,
- "p99": 181.11999332904816
- },
- "combine": {
- "p50": 82.07999914884567,
- "p90": 102.01600193977356,
- "p95": 109.40799862146378,
- "p99": 114.52800035476685
- },
- "roundtrip": {
- "p50": 211.64800226688385,
- "p90": 216.35200083255768,
- "p95": 218.23999285697937,
- "p99": 223.32799434661865
- },
- "isolatedSum": {
- "p50": 182.01600015163422,
- "p90": 244.73600089550018,
- "p95": 271.0079923272133,
- "p99": 295.647993683815
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9748480,
- "combineLogicalBytes": 19496960,
- "fanoutMean": 5.3125,
- "recvTokensMax": 178,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 101.40799731016159,
- "p90": 206.81600272655487,
- "p95": 216.86400473117828,
- "p99": 370.88000774383545
- },
- "combine": {
- "p50": 91.16800129413605,
- "p90": 95.29600292444229,
- "p95": 99.5199978351593,
- "p99": 122.40000069141388
- },
- "roundtrip": {
- "p50": 221.37600183486938,
- "p90": 226.43199563026428,
- "p95": 228.7680059671402,
- "p99": 233.34400355815887
- },
- "isolatedSum": {
- "p50": 192.57599860429764,
- "p90": 302.11200565099716,
- "p95": 316.3840025663376,
- "p99": 493.28000843524933
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19418112,
- "combineLogicalBytes": 38836224,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 372,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 106.23999685049057,
- "p90": 109.8880022764206,
- "p95": 112.5440001487732,
- "p99": 117.5680011510849
- },
- "combine": {
- "p50": 107.77600109577179,
- "p90": 110.20799726247787,
- "p95": 111.48799955844879,
- "p99": 114.56000059843063
- },
- "roundtrip": {
- "p50": 240.35200476646423,
- "p90": 247.1040040254593,
- "p95": 249.82400238513947,
- "p99": 295.80798745155334
- },
- "isolatedSum": {
- "p50": 214.01599794626236,
- "p90": 220.09599953889847,
- "p95": 224.03199970722198,
- "p99": 232.12800174951553
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38757376,
- "combineLogicalBytes": 77514752,
- "fanoutMean": 5.2802734375,
- "recvTokensMax": 707,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-8e5c4d34",
- "identity": "h100|deepep|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760",
- "colorKey": "h100_97196257",
- "comparisonKey": "969c3964291e1270",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:50:43.012530+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_19",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · fp8",
- "model": "Kimi-K2",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 384,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "d6c49ae98878760",
- "workloadId": "set:8:9a27d0df4b17fa09",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271660154",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271660154",
- "createdAt": "2026-06-26T23:50:43.012530+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 198.7520009279251,
- "p90": 206.2399983406067,
- "p95": 209.56799387931824,
- "p99": 221.69600427150726
- },
- "combine": {
- "p50": 60.83200126886368,
- "p90": 64.31999802589417,
- "p95": 65.98400324583054,
- "p99": 69.05599683523178
- },
- "roundtrip": {
- "p50": 242.71999299526215,
- "p90": 250.07998943328857,
- "p95": 254.5279860496521,
- "p99": 290.0159955024719
- },
- "isolatedSum": {
- "p50": 259.5840021967888,
- "p90": 270.55999636650085,
- "p95": 275.5519971251488,
- "p99": 290.75200110673904
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 301056,
- "combineLogicalBytes": 602112,
- "fanoutMean": 5.25,
- "recvTokensMax": 8,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 205.53599298000336,
- "p90": 313.6320114135742,
- "p95": 323.8399922847748,
- "p99": 375.5840063095093
- },
- "combine": {
- "p50": 62.81600147485733,
- "p90": 76.1599987745285,
- "p95": 79.19999957084656,
- "p99": 83.0719992518425
- },
- "roundtrip": {
- "p50": 242.49599874019623,
- "p90": 250.43201446533203,
- "p95": 253.08799743652344,
- "p99": 294.1119968891144
- },
- "isolatedSum": {
- "p50": 268.3519944548607,
- "p90": 389.7920101881027,
- "p95": 403.03999185562134,
- "p99": 458.6560055613518
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 609280,
- "combineLogicalBytes": 1218560,
- "fanoutMean": 5.3125,
- "recvTokensMax": 14,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 203.5519927740097,
- "p90": 291.55200719833374,
- "p95": 296.09599709510803,
- "p99": 303.6159873008728
- },
- "combine": {
- "p50": 63.26399743556976,
- "p90": 73.98399710655212,
- "p95": 75.83999633789062,
- "p99": 80.09599894285202
- },
- "roundtrip": {
- "p50": 247.42400646209717,
- "p90": 336.67200803756714,
- "p95": 339.4559919834137,
- "p99": 346.20800614356995
- },
- "isolatedSum": {
- "p50": 266.81599020957947,
- "p90": 365.53600430488586,
- "p95": 371.93599343299866,
- "p99": 383.7119862437248
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1204224,
- "combineLogicalBytes": 2408448,
- "fanoutMean": 5.25,
- "recvTokensMax": 26,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 199.45600628852844,
- "p90": 207.8080028295517,
- "p95": 213.02400529384613,
- "p99": 235.29599606990814
- },
- "combine": {
- "p50": 62.72000074386597,
- "p90": 67.16799736022949,
- "p95": 68.64000111818314,
- "p99": 73.60000163316727
- },
- "roundtrip": {
- "p50": 245.85600197315216,
- "p90": 253.1839907169342,
- "p95": 256.9279968738556,
- "p99": 269.3119943141937
- },
- "isolatedSum": {
- "p50": 262.1760070323944,
- "p90": 274.9760001897812,
- "p95": 281.66400641202927,
- "p99": 308.8959977030754
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2415616,
- "combineLogicalBytes": 4831232,
- "fanoutMean": 5.265625,
- "recvTokensMax": 48,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 204.22400534152985,
- "p90": 292.60799288749695,
- "p95": 296.3840067386627,
- "p99": 434.30399894714355
- },
- "combine": {
- "p50": 66.14399701356888,
- "p90": 75.55200159549713,
- "p95": 76.1599987745285,
- "p99": 79.8719972372055
- },
- "roundtrip": {
- "p50": 250.59199333190918,
- "p90": 335.32801270484924,
- "p95": 340.2239978313446,
- "p99": 366.5919899940491
- },
- "isolatedSum": {
- "p50": 270.3680023550987,
- "p90": 368.1599944829941,
- "p95": 372.5440055131912,
- "p99": 514.1759961843491
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4924416,
- "combineLogicalBytes": 9848832,
- "fanoutMean": 5.3671875,
- "recvTokensMax": 91,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 200.99200308322906,
- "p90": 286.3039970397949,
- "p95": 293.3120131492615,
- "p99": 305.11999130249023
- },
- "combine": {
- "p50": 70.88000327348709,
- "p90": 75.83999633789062,
- "p95": 78.11199873685837,
- "p99": 86.84799820184708
- },
- "roundtrip": {
- "p50": 253.31199169158936,
- "p90": 259.71201062202454,
- "p95": 262.4959945678711,
- "p99": 270.9439992904663
- },
- "isolatedSum": {
- "p50": 271.87200635671616,
- "p90": 362.14399337768555,
- "p95": 371.42401188611984,
- "p99": 391.9679895043373
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9748480,
- "combineLogicalBytes": 19496960,
- "fanoutMean": 5.3125,
- "recvTokensMax": 178,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 204.22400534152985,
- "p90": 293.8239872455597,
- "p95": 299.74400997161865,
- "p99": 323.4559893608093
- },
- "combine": {
- "p50": 81.82399719953537,
- "p90": 93.40800344944,
- "p95": 96.63999825716019,
- "p99": 99.64799880981445
- },
- "roundtrip": {
- "p50": 268.73600482940674,
- "p90": 351.6159951686859,
- "p95": 354.4960021972656,
- "p99": 361.6639971733093
- },
- "isolatedSum": {
- "p50": 286.0480025410652,
- "p90": 387.2319906949997,
- "p95": 396.38400822877884,
- "p99": 423.1039881706238
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19418112,
- "combineLogicalBytes": 38836224,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 372,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 224.2240011692047,
- "p90": 294.5919930934906,
- "p95": 298.4960079193115,
- "p99": 310.8159899711609
- },
- "combine": {
- "p50": 99.90400075912476,
- "p90": 110.33599823713303,
- "p95": 111.35999858379364,
- "p99": 114.68800157308578
- },
- "roundtrip": {
- "p50": 310.88000535964966,
- "p90": 375.2320110797882,
- "p95": 378.04800271987915,
- "p99": 386.46399974823
- },
- "isolatedSum": {
- "p50": 324.12800192832947,
- "p90": 404.9279913306236,
- "p95": 409.85600650310516,
- "p99": 425.5039915442467
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38757376,
- "combineLogicalBytes": 77514752,
- "fanoutMean": 5.2802734375,
- "recvTokensMax": 707,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-4e4a7f2d",
- "identity": "h100|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176",
- "colorKey": "h100_91aa6e56",
- "comparisonKey": "511cf861d6b2e142",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:28:00.849157+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_18",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · fp8 (norm)",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1818,
- "configuredUnits": 24,
- "deviceUnits": 132,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28254323956",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254323956",
- "createdAt": "2026-06-26T17:28:00.849157+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 97.98400104045868,
- "p90": 102.88000106811523,
- "p95": 104.38399761915207,
- "p99": 110.20799726247787
- },
- "combine": {
- "p50": 72.28799909353256,
- "p90": 74.14399832487106,
- "p95": 75.29599964618683,
- "p99": 78.65600287914276
- },
- "roundtrip": {
- "p50": 190.65600633621216,
- "p90": 195.90400159358978,
- "p95": 198.30399751663208,
- "p99": 202.72000133991241
- },
- "isolatedSum": {
- "p50": 170.27200013399124,
- "p90": 177.0239993929863,
- "p95": 179.6799972653389,
- "p99": 188.86400014162064
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 315392,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 72.15999811887741,
- "p90": 99.90400075912476,
- "p95": 102.52799838781357,
- "p99": 105.0880029797554
- },
- "combine": {
- "p50": 63.35999816656113,
- "p90": 73.18399846553802,
- "p95": 73.98399710655212,
- "p99": 78.46400141716003
- },
- "roundtrip": {
- "p50": 153.82400155067444,
- "p90": 194.43200528621674,
- "p95": 196.28800451755524,
- "p99": 201.05600357055664
- },
- "isolatedSum": {
- "p50": 135.51999628543854,
- "p90": 173.08799922466278,
- "p95": 176.5119954943657,
- "p99": 183.55200439691544
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 72.31999933719635,
- "p90": 103.4879982471466,
- "p95": 107.26399719715118,
- "p99": 115.48800021409988
- },
- "combine": {
- "p50": 64.03200328350067,
- "p90": 76.28799974918365,
- "p95": 77.82399654388428,
- "p99": 81.98399841785431
- },
- "roundtrip": {
- "p50": 156.09599649906158,
- "p90": 202.36800611019135,
- "p95": 205.63200116157532,
- "p99": 212.51200139522552
- },
- "isolatedSum": {
- "p50": 136.35200262069702,
- "p90": 179.77599799633026,
- "p95": 185.08799374103546,
- "p99": 197.4719986319542
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1240064,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 97.50399738550186,
- "p90": 102.30399668216705,
- "p95": 105.85600137710571,
- "p99": 113.40799927711487
- },
- "combine": {
- "p50": 63.80800157785416,
- "p90": 74.94399696588516,
- "p95": 76.28799974918365,
- "p99": 80.89599758386612
- },
- "roundtrip": {
- "p50": 154.6880006790161,
- "p90": 194.7840005159378,
- "p95": 199.0399956703186,
- "p99": 203.87199521064758
- },
- "isolatedSum": {
- "p50": 161.31199896335602,
- "p90": 177.24799364805222,
- "p95": 182.14400112628937,
- "p99": 194.303996860981
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487296,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 97.08800166845322,
- "p90": 104.3199971318245,
- "p95": 107.39199817180634,
- "p99": 113.43999952077866
- },
- "combine": {
- "p50": 75.74400305747986,
- "p90": 78.49600166082382,
- "p95": 80.06399869918823,
- "p99": 83.36000144481659
- },
- "roundtrip": {
- "p50": 195.2960044145584,
- "p90": 205.85599541664124,
- "p95": 209.85600352287292,
- "p99": 223.83999824523926
- },
- "isolatedSum": {
- "p50": 172.83200472593307,
- "p90": 182.81599879264832,
- "p95": 187.45599687099457,
- "p99": 196.80000096559525
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4960256,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 73.11999797821045,
- "p90": 104.16000336408615,
- "p95": 106.84800148010254,
- "p99": 112.09599673748016
- },
- "combine": {
- "p50": 69.2799985408783,
- "p90": 81.88799768686295,
- "p95": 82.87999778985977,
- "p99": 88.28800171613693
- },
- "roundtrip": {
- "p50": 161.21600568294525,
- "p90": 206.65599405765533,
- "p95": 210.84800362586975,
- "p99": 216.22399985790253
- },
- "isolatedSum": {
- "p50": 142.39999651908875,
- "p90": 186.0480010509491,
- "p95": 189.7279992699623,
- "p99": 200.3839984536171
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9863168,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 78.65600287914276,
- "p90": 106.9440022110939,
- "p95": 110.55999994277954,
- "p99": 125.44000148773193
- },
- "combine": {
- "p50": 83.64800363779068,
- "p90": 96.38399630784988,
- "p95": 97.69599884748459,
- "p99": 100.00000149011612
- },
- "roundtrip": {
- "p50": 175.7120043039322,
- "p90": 222.6880043745041,
- "p95": 225.24799406528473,
- "p99": 231.74400627613068
- },
- "isolatedSum": {
- "p50": 162.30400651693344,
- "p90": 203.3279985189438,
- "p95": 208.25599879026413,
- "p99": 225.44000297784805
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19496960,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 90.33600240945816,
- "p90": 110.84800213575363,
- "p95": 113.82400244474411,
- "p99": 117.11999773979187
- },
- "combine": {
- "p50": 98.78399968147278,
- "p90": 111.00800335407257,
- "p95": 112.0000034570694,
- "p99": 117.21599847078323
- },
- "roundtrip": {
- "p50": 216.12800657749176,
- "p90": 240.60800671577454,
- "p95": 244.25600469112396,
- "p99": 250.2720057964325
- },
- "isolatedSum": {
- "p50": 189.12000209093094,
- "p90": 221.8560054898262,
- "p95": 225.8240059018135,
- "p99": 234.3359962105751
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-750e874d",
- "identity": "h100|deepep|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176",
- "colorKey": "h100_7f10961a",
- "comparisonKey": "f145cb161a39591f",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T15:23:35.919985+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_05",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · fp8 (norm)",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1818,
- "configuredUnits": 24,
- "deviceUnits": 132,
- "resourceClass": "unknown",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28247584217",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28247584217",
- "createdAt": "2026-06-26T15:23:35.919985+00:00",
- "sha": "fd23d02b65dba6f1ed963342b188022fc27263d1"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 251.93598866462708,
- "p90": 260.3839933872223,
- "p95": 263.10399174690247,
- "p99": 268.5759961605072
- },
- "combine": {
- "p50": 68.41599941253662,
- "p90": 69.88800317049026,
- "p95": 70.8480030298233,
- "p99": 76.03199779987335
- },
- "roundtrip": {
- "p50": 296.51200771331787,
- "p90": 304.1279911994934,
- "p95": 306.40000104904175,
- "p99": 349.15199875831604
- },
- "isolatedSum": {
- "p50": 320.3519880771637,
- "p90": 330.27199655771255,
- "p95": 333.95199477672577,
- "p99": 344.60799396038055
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 315392,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 200.51200687885284,
- "p90": 256.8320035934448,
- "p95": 259.99999046325684,
- "p99": 268.0000066757202
- },
- "combine": {
- "p50": 63.00800293684006,
- "p90": 71.00799679756165,
- "p95": 71.84000313282013,
- "p99": 74.68800246715546
- },
- "roundtrip": {
- "p50": 243.1039959192276,
- "p90": 300.1919984817505,
- "p95": 303.5840094089508,
- "p99": 308.9919984340668
- },
- "isolatedSum": {
- "p50": 263.5200098156929,
- "p90": 327.84000039100647,
- "p95": 331.83999359607697,
- "p99": 342.68800914287567
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 199.13600385189056,
- "p90": 287.9680097103119,
- "p95": 291.1359965801239,
- "p99": 298.2720136642456
- },
- "combine": {
- "p50": 63.519999384880066,
- "p90": 75.1039981842041,
- "p95": 76.73600316047668,
- "p99": 81.40800148248672
- },
- "roundtrip": {
- "p50": 246.17600440979004,
- "p90": 330.84800839424133,
- "p95": 333.9200019836426,
- "p99": 343.6479866504669
- },
- "isolatedSum": {
- "p50": 262.65600323677063,
- "p90": 363.072007894516,
- "p95": 367.8719997406006,
- "p99": 379.68001514673233
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1240064,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 199.16799664497375,
- "p90": 258.14399123191833,
- "p95": 261.4080011844635,
- "p99": 267.16798543930054
- },
- "combine": {
- "p50": 63.4239986538887,
- "p90": 72.57600128650665,
- "p95": 73.18399846553802,
- "p99": 76.28799974918365
- },
- "roundtrip": {
- "p50": 244.83199417591095,
- "p90": 302.3039996623993,
- "p95": 305.759996175766,
- "p99": 310.94399094581604
- },
- "isolatedSum": {
- "p50": 262.59199529886246,
- "p90": 330.719992518425,
- "p95": 334.5919996500015,
- "p99": 343.4559851884842
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487296,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 200.28799772262573,
- "p90": 286.5599989891052,
- "p95": 290.0800108909607,
- "p99": 296.57599329948425
- },
- "combine": {
- "p50": 65.5359998345375,
- "p90": 76.86399668455124,
- "p95": 77.66400277614594,
- "p99": 80.76799660921097
- },
- "roundtrip": {
- "p50": 248.57600033283234,
- "p90": 330.4640054702759,
- "p95": 333.6319923400879,
- "p99": 344.7360098361969
- },
- "isolatedSum": {
- "p50": 265.82399755716324,
- "p90": 363.42399567365646,
- "p95": 367.7440136671066,
- "p99": 377.3439899086952
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4960256,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 198.88000190258026,
- "p90": 284.4800055027008,
- "p95": 288.12798857688904,
- "p99": 293.0240035057068
- },
- "combine": {
- "p50": 69.18399780988693,
- "p90": 80.54400235414505,
- "p95": 81.4720019698143,
- "p99": 84.63999629020691
- },
- "roundtrip": {
- "p50": 253.12000513076782,
- "p90": 334.01599526405334,
- "p95": 336.89600229263306,
- "p99": 340.31999111175537
- },
- "isolatedSum": {
- "p50": 268.0639997124672,
- "p90": 365.02400785684586,
- "p95": 369.59999054670334,
- "p99": 377.6639997959137
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9863168,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 202.07999646663666,
- "p90": 355.00800609588623,
- "p95": 361.7280125617981,
- "p99": 423.007994890213
- },
- "combine": {
- "p50": 82.65600353479385,
- "p90": 94.11200135946274,
- "p95": 95.8079993724823,
- "p99": 99.45599734783173
- },
- "roundtrip": {
- "p50": 266.88000559806824,
- "p90": 352.03200578689575,
- "p95": 355.3600013256073,
- "p99": 361.4720106124878
- },
- "isolatedSum": {
- "p50": 284.7360000014305,
- "p90": 449.12000745534897,
- "p95": 457.5360119342804,
- "p99": 522.4639922380447
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19496960,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 221.79199755191803,
- "p90": 289.72798585891724,
- "p95": 293.08798909187317,
- "p99": 300.9600043296814
- },
- "combine": {
- "p50": 98.27200323343277,
- "p90": 108.8000014424324,
- "p95": 110.1439967751503,
- "p99": 113.88800293207169
- },
- "roundtrip": {
- "p50": 303.74398827552795,
- "p90": 364.8639917373657,
- "p95": 367.45598912239075,
- "p99": 371.5519905090332
- },
- "isolatedSum": {
- "p50": 320.0640007853508,
- "p90": 398.52798730134964,
- "p95": 403.23198586702347,
- "p99": 414.8480072617531
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-b83230a1",
- "identity": "h100|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176",
- "colorKey": "h100_eddc3af6",
- "comparisonKey": "f291497d6f9ce0d1",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:31:42.999710+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_12",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "cached-layout-comm-only-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · fp8 (norm) [cl]",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1818,
- "configuredUnits": 24,
- "deviceUnits": 132,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28254341346",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254341346",
- "createdAt": "2026-06-26T17:31:42.999710+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 81.34400099515915,
- "p90": 84.927998483181,
- "p95": 86.496002972126,
- "p99": 90.14400094747543
- },
- "combine": {
- "p50": 71.3919997215271,
- "p90": 73.91999661922455,
- "p95": 74.87999647855759,
- "p99": 77.98399776220322
- },
- "roundtrip": {
- "p50": 173.15199971199036,
- "p90": 178.6240041255951,
- "p95": 180.92800676822662,
- "p99": 186.5600049495697
- },
- "isolatedSum": {
- "p50": 152.73600071668625,
- "p90": 158.84799510240555,
- "p95": 161.3759994506836,
- "p99": 168.12799870967865
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 315392,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 58.49599838256836,
- "p90": 82.78399705886841,
- "p95": 84.3840017914772,
- "p99": 90.01599997282028
- },
- "combine": {
- "p50": 63.07200342416763,
- "p90": 74.0479975938797,
- "p95": 74.8480036854744,
- "p99": 77.44000107049942
- },
- "roundtrip": {
- "p50": 141.12000167369843,
- "p90": 176.54399573802948,
- "p95": 178.81600558757782,
- "p99": 181.92000687122345
- },
- "isolatedSum": {
- "p50": 121.56800180673599,
- "p90": 156.8319946527481,
- "p95": 159.2320054769516,
- "p99": 167.4560010433197
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 59.13599953055382,
- "p90": 82.68799632787704,
- "p95": 85.37600189447403,
- "p99": 91.61599725484848
- },
- "combine": {
- "p50": 63.64800035953522,
- "p90": 74.14399832487106,
- "p95": 75.19999891519547,
- "p99": 79.32800054550171
- },
- "roundtrip": {
- "p50": 140.83200693130493,
- "p90": 178.49600315093994,
- "p95": 180.92800676822662,
- "p99": 187.45599687099457
- },
- "isolatedSum": {
- "p50": 122.78399989008904,
- "p90": 156.8319946527481,
- "p95": 160.5760008096695,
- "p99": 170.9439978003502
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1240064,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 61.792001128196716,
- "p90": 83.20000022649765,
- "p95": 86.07999980449677,
- "p99": 96.00000083446503
- },
- "combine": {
- "p50": 65.43999910354614,
- "p90": 75.93599706888199,
- "p95": 78.14399898052216,
- "p99": 83.74399691820145
- },
- "roundtrip": {
- "p50": 144.44799721240997,
- "p90": 181.15200102329254,
- "p95": 184.25600230693817,
- "p99": 199.8080015182495
- },
- "isolatedSum": {
- "p50": 127.23200023174286,
- "p90": 159.13599729537964,
- "p95": 164.22399878501892,
- "p99": 179.74399775266647
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487296,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 80.38400113582611,
- "p90": 83.52000266313553,
- "p95": 85.08799970149994,
- "p99": 92.38400310277939
- },
- "combine": {
- "p50": 75.80800354480743,
- "p90": 77.85599678754807,
- "p95": 79.03999835252762,
- "p99": 80.83199709653854
- },
- "roundtrip": {
- "p50": 150.59199929237366,
- "p90": 182.49599635601044,
- "p95": 184.60799753665924,
- "p99": 194.815993309021
- },
- "isolatedSum": {
- "p50": 156.19200468063354,
- "p90": 161.3759994506836,
- "p95": 164.12799805402756,
- "p99": 173.21600019931793
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4960256,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 61.792001128196716,
- "p90": 81.727996468544,
- "p95": 84.28800106048584,
- "p99": 89.88799899816513
- },
- "combine": {
- "p50": 69.34399902820587,
- "p90": 79.96799796819687,
- "p95": 81.24800026416779,
- "p99": 83.99999886751175
- },
- "roundtrip": {
- "p50": 146.11199498176575,
- "p90": 184.32000279426575,
- "p95": 186.52799725532532,
- "p99": 192.44800508022308
- },
- "isolatedSum": {
- "p50": 131.1360001564026,
- "p90": 161.69599443674088,
- "p95": 165.53600132465363,
- "p99": 173.88799786567688
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9863168,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 72.80000299215317,
- "p90": 86.43200248479843,
- "p95": 92.54399687051773,
- "p99": 99.7759997844696
- },
- "combine": {
- "p50": 85.08799970149994,
- "p90": 95.0080007314682,
- "p95": 96.41599655151367,
- "p99": 101.21600329875946
- },
- "roundtrip": {
- "p50": 182.8799992799759,
- "p90": 202.94399559497833,
- "p95": 208.3200067281723,
- "p99": 218.176007270813
- },
- "isolatedSum": {
- "p50": 157.8880026936531,
- "p90": 181.44000321626663,
- "p95": 188.9599934220314,
- "p99": 200.99200308322906
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19496960,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 77.79199630022049,
- "p90": 92.12800115346909,
- "p95": 93.72799843549728,
- "p99": 98.24000298976898
- },
- "combine": {
- "p50": 99.55199807882309,
- "p90": 109.72800105810165,
- "p95": 110.91200262308121,
- "p99": 114.46399986743927
- },
- "roundtrip": {
- "p50": 205.1520049571991,
- "p90": 219.200000166893,
- "p95": 220.89600563049316,
- "p99": 223.4880030155182
- },
- "isolatedSum": {
- "p50": 177.34399437904358,
- "p90": 201.85600221157074,
- "p95": 204.6400010585785,
- "p99": 212.70400285720825
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-d8e58489",
- "identity": "h100|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h100_ec72792b",
- "comparisonKey": "2bfd4913feb2a935",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:47:54.320638+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_02",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "cached-layout-comm-only-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · fp8 [cl]",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271573150",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271573150",
- "createdAt": "2026-06-26T23:47:54.320638+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 78.3040001988411,
- "p90": 82.07999914884567,
- "p95": 84.44800227880478,
- "p99": 88.03199976682663
- },
- "combine": {
- "p50": 71.1359977722168,
- "p90": 72.86400347948074,
- "p95": 73.82400333881378,
- "p99": 77.88799703121185
- },
- "roundtrip": {
- "p50": 136.63999736309052,
- "p90": 174.75199699401855,
- "p95": 177.15199291706085,
- "p99": 181.08800053596497
- },
- "isolatedSum": {
- "p50": 149.4399979710579,
- "p90": 154.94400262832642,
- "p95": 158.27200561761856,
- "p99": 165.91999679803848
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 315392,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 56.832000613212585,
- "p90": 79.74400371313095,
- "p95": 81.11999928951263,
- "p99": 85.69599688053131
- },
- "combine": {
- "p50": 62.3680017888546,
- "p90": 71.58400118350983,
- "p95": 72.25599884986877,
- "p99": 75.9039968252182
- },
- "roundtrip": {
- "p50": 138.0160003900528,
- "p90": 172.95999825000763,
- "p95": 174.30399358272552,
- "p99": 179.61600422859192
- },
- "isolatedSum": {
- "p50": 119.20000240206718,
- "p90": 151.32800489664078,
- "p95": 153.3759981393814,
- "p99": 161.5999937057495
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 56.92800134420395,
- "p90": 82.0159986615181,
- "p95": 85.02399921417236,
- "p99": 87.77599781751633
- },
- "combine": {
- "p50": 63.07200342416763,
- "p90": 74.94399696588516,
- "p95": 76.28799974918365,
- "p99": 79.99999821186066
- },
- "roundtrip": {
- "p50": 138.7840062379837,
- "p90": 179.51999604701996,
- "p95": 182.01600015163422,
- "p99": 187.42400407791138
- },
- "isolatedSum": {
- "p50": 120.00000476837158,
- "p90": 156.95999562740326,
- "p95": 161.31199896335602,
- "p99": 167.77599602937698
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1240064,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 56.832000613212585,
- "p90": 80.99199831485748,
- "p95": 82.94399827718735,
- "p99": 87.99999952316284
- },
- "combine": {
- "p50": 63.71200084686279,
- "p90": 74.43200051784515,
- "p95": 75.19999891519547,
- "p99": 79.52000200748444
- },
- "roundtrip": {
- "p50": 139.93600010871887,
- "p90": 178.5919964313507,
- "p95": 181.98400735855103,
- "p99": 185.47199666500092
- },
- "isolatedSum": {
- "p50": 120.54400146007538,
- "p90": 155.42399883270264,
- "p95": 158.1439971923828,
- "p99": 167.52000153064728
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487296,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 57.472001761198044,
- "p90": 81.79199695587158,
- "p95": 84.28800106048584,
- "p99": 87.87199854850769
- },
- "combine": {
- "p50": 65.5359998345375,
- "p90": 77.37600058317184,
- "p95": 79.3600007891655,
- "p99": 82.46400207281113
- },
- "roundtrip": {
- "p50": 141.184002161026,
- "p90": 181.7920058965683,
- "p95": 184.9599927663803,
- "p99": 191.93600118160248
- },
- "isolatedSum": {
- "p50": 123.00800159573555,
- "p90": 159.16799753904343,
- "p95": 163.64800184965134,
- "p99": 170.33600062131882
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4960256,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 60.32000109553337,
- "p90": 82.0159986615181,
- "p95": 84.63999629020691,
- "p99": 91.0400003194809
- },
- "combine": {
- "p50": 70.97599655389786,
- "p90": 82.14399963617325,
- "p95": 83.20000022649765,
- "p99": 88.60799670219421
- },
- "roundtrip": {
- "p50": 147.0080018043518,
- "p90": 185.7919991016388,
- "p95": 188.06399405002594,
- "p99": 192.25600361824036
- },
- "isolatedSum": {
- "p50": 131.29599764943123,
- "p90": 164.15999829769135,
- "p95": 167.83999651670456,
- "p99": 179.6479970216751
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9863168,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 69.60000097751617,
- "p90": 85.69599688053131,
- "p95": 87.99999952316284,
- "p99": 100.8640006184578
- },
- "combine": {
- "p50": 80.6720033288002,
- "p90": 92.70399808883667,
- "p95": 93.66399794816971,
- "p99": 97.4079966545105
- },
- "roundtrip": {
- "p50": 160.70400178432465,
- "p90": 200.83199441432953,
- "p95": 203.19999754428864,
- "p99": 211.5200012922287
- },
- "isolatedSum": {
- "p50": 150.27200430631638,
- "p90": 178.39999496936798,
- "p95": 181.66399747133255,
- "p99": 198.2719972729683
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19496960,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 77.05599814653397,
- "p90": 91.96799993515015,
- "p95": 94.43199634552002,
- "p99": 99.32799637317657
- },
- "combine": {
- "p50": 97.53599762916565,
- "p90": 109.37599837779999,
- "p95": 110.68800091743469,
- "p99": 115.7120019197464
- },
- "roundtrip": {
- "p50": 203.80799472332,
- "p90": 219.9999988079071,
- "p95": 222.59199619293213,
- "p99": 236.4799976348877
- },
- "isolatedSum": {
- "p50": 174.59199577569962,
- "p90": 201.34399831295013,
- "p95": 205.1199972629547,
- "p99": 215.03999829292297
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-f1a3625a",
- "identity": "h100|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h100_7720baf2",
- "comparisonKey": "800e526f613bc59d",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:49:09.827299+00:00",
- "status": "valid",
- "publicationStatus": "diagnostic",
- "runner": "h100-dgxc-slurm_04",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "ll",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · fp8 LL",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 132,
- "resourceClass": "fixed-kernel",
- "conformanceClass": "not-applicable",
- "fixedKernel": true,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271594334",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271594334",
- "createdAt": "2026-06-26T23:49:09.827299+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 30.81599995493889,
- "p90": 33.824000507593155,
- "p95": 36.67199984192848,
- "p99": 41.760001331567764
- },
- "combine": {
- "p50": 33.535998314619064,
- "p90": 36.06399893760681,
- "p95": 38.656000047922134,
- "p99": 94.62399780750275
- },
- "roundtrip": {
- "p50": 2063.647985458374,
- "p90": 2066.3039684295654,
- "p95": 2067.5199031829834,
- "p99": 2072.1280574798584
- },
- "isolatedSum": {
- "p50": 64.35199826955795,
- "p90": 69.88799944519997,
- "p95": 75.32799988985062,
- "p99": 136.3839991390705
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 315392,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 14,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 30.688000842928886,
- "p90": 33.440001308918,
- "p95": 35.32800078392029,
- "p99": 41.85599833726883
- },
- "combine": {
- "p50": 35.10399907827377,
- "p90": 39.135999977588654,
- "p95": 60.99199876189232,
- "p99": 184.2239946126938
- },
- "roundtrip": {
- "p50": 2065.023899078369,
- "p90": 2067.647933959961,
- "p95": 2069.279909133911,
- "p99": 2082.5600624084473
- },
- "isolatedSum": {
- "p50": 65.79199992120266,
- "p90": 72.57600128650665,
- "p95": 96.3199995458126,
- "p99": 226.07999294996262
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 21,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 30.527999624609947,
- "p90": 32.70399942994118,
- "p95": 34.33600068092346,
- "p99": 38.72000053524971
- },
- "combine": {
- "p50": 34.71999987959862,
- "p90": 36.896001547575,
- "p95": 37.82400116324425,
- "p99": 40.672000497579575
- },
- "roundtrip": {
- "p50": 2065.7920837402344,
- "p90": 2069.4079399108887,
- "p95": 2074.079990386963,
- "p99": 2120.703935623169
- },
- "isolatedSum": {
- "p50": 65.24799950420856,
- "p90": 69.60000097751617,
- "p95": 72.16000184416771,
- "p99": 79.39200103282928
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1240064,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 39,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 31.007999554276466,
- "p90": 33.24799984693527,
- "p95": 35.45600175857544,
- "p99": 42.11200028657913
- },
- "combine": {
- "p50": 35.74400022625923,
- "p90": 38.62399980425835,
- "p95": 39.903998374938965,
- "p99": 44.12800073623657
- },
- "roundtrip": {
- "p50": 2066.240072250366,
- "p90": 2069.6959495544434,
- "p95": 2070.784091949463,
- "p99": 2073.9200115203857
- },
- "isolatedSum": {
- "p50": 66.7519997805357,
- "p90": 71.87199965119362,
- "p95": 75.3600001335144,
- "p99": 86.2400010228157
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487296,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 74,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 32.32000023126602,
- "p90": 39.103999733924866,
- "p95": 52.799999713897705,
- "p99": 55.36000058054924
- },
- "combine": {
- "p50": 38.656000047922134,
- "p90": 41.79200157523155,
- "p95": 42.97599941492081,
- "p99": 47.520000487565994
- },
- "roundtrip": {
- "p50": 2071.9680786132812,
- "p90": 2074.592113494873,
- "p95": 2075.615882873535,
- "p99": 2079.7760486602783
- },
- "isolatedSum": {
- "p50": 70.97600027918816,
- "p90": 80.89600130915642,
- "p95": 95.77599912881851,
- "p99": 102.88000106811523
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4960256,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 145,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 36.3520011305809,
- "p90": 38.11199963092804,
- "p95": 40.22400081157684,
- "p99": 45.951999723911285
- },
- "combine": {
- "p50": 47.968000173568726,
- "p90": 50.87999999523163,
- "p95": 51.83999985456467,
- "p99": 58.04799869656563
- },
- "roundtrip": {
- "p50": 2082.7200412750244,
- "p90": 2085.2479934692383,
- "p95": 2086.2081050872803,
- "p99": 2089.1199111938477
- },
- "isolatedSum": {
- "p50": 84.32000130414963,
- "p90": 88.99199962615967,
- "p95": 92.06400066614151,
- "p99": 103.99999842047691
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9863168,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 287,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 41.600000113248825,
- "p90": 51.00800096988678,
- "p95": 52.12799832224846,
- "p99": 55.1999993622303
- },
- "combine": {
- "p50": 60.67200005054474,
- "p90": 68.67200136184692,
- "p95": 71.68000191450119,
- "p99": 97.08800166845322
- },
- "roundtrip": {
- "p50": 2101.8240451812744,
- "p90": 2108.736038208008,
- "p95": 2111.936092376709,
- "p99": 2120.1279163360596
- },
- "isolatedSum": {
- "p50": 102.27200016379356,
- "p90": 119.6800023317337,
- "p95": 123.80800023674965,
- "p99": 152.28800103068352
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19496960,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 564,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 54.016001522541046,
- "p90": 56.223999708890915,
- "p95": 57.312000542879105,
- "p99": 60.575999319553375
- },
- "combine": {
- "p50": 88.54400366544724,
- "p90": 91.93599969148636,
- "p95": 92.70399808883667,
- "p99": 114.81600254774094
- },
- "roundtrip": {
- "p50": 2143.0718898773193,
- "p90": 2146.7199325561523,
- "p95": 2147.455930709839,
- "p99": 2153.791904449463
- },
- "isolatedSum": {
- "p50": 142.56000518798828,
- "p90": 148.15999940037727,
- "p95": 150.01599863171577,
- "p99": 175.3920018672943
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 1104,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-73d1725a",
- "identity": "h100|deepep|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h100_3a77ee8e",
- "comparisonKey": "93509525aa3f27c6",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:49:16.484836+00:00",
- "status": "valid",
- "publicationStatus": "diagnostic",
- "runner": "h100-dgxc-slurm_13",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "ll",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · fp8 LL",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 132,
- "resourceClass": "fixed-kernel",
- "conformanceClass": "not-applicable",
- "fixedKernel": true,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271598000",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271598000",
- "createdAt": "2026-06-26T23:49:16.484836+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 31.10400028526783,
- "p90": 33.376000821590424,
- "p95": 34.88000109791756,
- "p99": 39.264000952243805
- },
- "combine": {
- "p50": 32.575998455286026,
- "p90": 35.32800078392029,
- "p95": 36.928001791238785,
- "p99": 40.41599854826927
- },
- "roundtrip": {
- "p50": 2062.4639987945557,
- "p90": 2065.1841163635254,
- "p95": 2067.9678916931152,
- "p99": 2091.871976852417
- },
- "isolatedSum": {
- "p50": 63.679998740553856,
- "p90": 68.70400160551071,
- "p95": 71.80800288915634,
- "p99": 79.67999950051308
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 315392,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 14,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 30.719999223947525,
- "p90": 32.99200162291527,
- "p95": 35.551998764276505,
- "p99": 40.64000025391579
- },
- "combine": {
- "p50": 32.735999673604965,
- "p90": 35.00799834728241,
- "p95": 36.3520011305809,
- "p99": 43.807998299598694
- },
- "roundtrip": {
- "p50": 2063.136100769043,
- "p90": 2065.376043319702,
- "p95": 2067.296028137207,
- "p99": 2071.039915084839
- },
- "isolatedSum": {
- "p50": 63.45599889755249,
- "p90": 67.99999997019768,
- "p95": 71.9039998948574,
- "p99": 84.44799855351448
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 21,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 32.35200047492981,
- "p90": 46.65600135922432,
- "p95": 47.42399975657463,
- "p99": 53.279999643564224
- },
- "combine": {
- "p50": 33.824000507593155,
- "p90": 36.768000572919846,
- "p95": 39.07199949026108,
- "p99": 50.783999264240265
- },
- "roundtrip": {
- "p50": 2064.095973968506,
- "p90": 2066.9119358062744,
- "p95": 2069.567918777466,
- "p99": 2080.512046813965
- },
- "isolatedSum": {
- "p50": 66.17600098252296,
- "p90": 83.42400193214417,
- "p95": 86.49599924683571,
- "p99": 104.06399890780449
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1240064,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 39,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 31.90400078892708,
- "p90": 34.04799848794937,
- "p95": 35.74400022625923,
- "p99": 39.77600112557411
- },
- "combine": {
- "p50": 34.17599946260452,
- "p90": 36.22400015592575,
- "p95": 37.53599897027016,
- "p99": 42.208001017570496
- },
- "roundtrip": {
- "p50": 2065.279960632324,
- "p90": 2068.416118621826,
- "p95": 2070.6560611724854,
- "p99": 2080.8000564575195
- },
- "isolatedSum": {
- "p50": 66.0800002515316,
- "p90": 70.27199864387512,
- "p95": 73.27999919652939,
- "p99": 81.98400214314461
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487296,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 74,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 32.86400064826012,
- "p90": 34.432001411914825,
- "p95": 36.25600039958954,
- "p99": 40.73600098490715
- },
- "combine": {
- "p50": 37.88800165057182,
- "p90": 44.67200115323067,
- "p95": 46.30399867892265,
- "p99": 69.24799829721451
- },
- "roundtrip": {
- "p50": 2071.1679458618164,
- "p90": 2079.5199871063232,
- "p95": 2080.4800987243652,
- "p99": 2085.439920425415
- },
- "isolatedSum": {
- "p50": 70.75200229883194,
- "p90": 79.10400256514549,
- "p95": 82.55999907851219,
- "p99": 109.98399928212166
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4960256,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 145,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 35.00799834728241,
- "p90": 36.928001791238785,
- "p95": 39.07199949026108,
- "p99": 41.98399931192398
- },
- "combine": {
- "p50": 43.68000105023384,
- "p90": 45.72800174355507,
- "p95": 46.879999339580536,
- "p99": 52.480001002550125
- },
- "roundtrip": {
- "p50": 2079.263925552368,
- "p90": 2081.279993057251,
- "p95": 2082.5281143188477,
- "p99": 2086.1759185791016
- },
- "isolatedSum": {
- "p50": 78.68799939751625,
- "p90": 82.65600353479385,
- "p95": 85.95199882984161,
- "p99": 94.4640003144741
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9863168,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 287,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 42.11200028657913,
- "p90": 65.15199691057205,
- "p95": 74.36800003051758,
- "p99": 88.99199962615967
- },
- "combine": {
- "p50": 58.9120015501976,
- "p90": 63.87200206518173,
- "p95": 64.80000168085098,
- "p99": 71.45600020885468
- },
- "roundtrip": {
- "p50": 2100.9280681610107,
- "p90": 2110.1760864257812,
- "p95": 2111.2639904022217,
- "p99": 2114.367961883545
- },
- "isolatedSum": {
- "p50": 101.02400183677673,
- "p90": 129.02399897575378,
- "p95": 139.16800171136856,
- "p99": 160.44799983501434
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19496960,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 564,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 53.18399891257286,
- "p90": 54.78399991989136,
- "p95": 56.60799890756607,
- "p99": 61.535999178886414
- },
- "combine": {
- "p50": 85.75999736785889,
- "p90": 88.03199976682663,
- "p95": 89.12000060081482,
- "p99": 95.29600292444229
- },
- "roundtrip": {
- "p50": 2140.671968460083,
- "p90": 2143.5201168060303,
- "p95": 2145.632028579712,
- "p99": 2288.991928100586
- },
- "isolatedSum": {
- "p50": 138.94399628043175,
- "p90": 142.815999686718,
- "p95": 145.7279995083809,
- "p99": 156.8320021033287
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 1104,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-1d30dd2c",
- "identity": "h100|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176",
- "colorKey": "h100_ac25b0a1",
- "comparisonKey": "405d06288635d74f",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:32:59.549027+00:00",
- "status": "valid",
- "publicationStatus": "diagnostic",
- "runner": "h100-dgxc-slurm_00",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "ll",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · fp8 LL (norm)",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 132,
- "resourceClass": "fixed-kernel",
- "conformanceClass": "not-applicable",
- "fixedKernel": true,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28254359089",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254359089",
- "createdAt": "2026-06-26T17:32:59.549027+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 30.432000756263733,
- "p90": 32.32000023126602,
- "p95": 34.143999218940735,
- "p99": 38.015998899936676
- },
- "combine": {
- "p50": 32.287999987602234,
- "p90": 34.78400036692619,
- "p95": 35.87200120091438,
- "p99": 40.383998304605484
- },
- "roundtrip": {
- "p50": 2063.9359951019287,
- "p90": 2065.632104873657,
- "p95": 2066.9760704040527,
- "p99": 2069.6001052856445
- },
- "isolatedSum": {
- "p50": 62.72000074386597,
- "p90": 67.10400059819221,
- "p95": 70.01600041985512,
- "p99": 78.39999720454216
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 315392,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 14,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 30.368000268936157,
- "p90": 32.09599852561951,
- "p95": 34.01599824428558,
- "p99": 37.248000502586365
- },
- "combine": {
- "p50": 32.22399950027466,
- "p90": 34.46400165557861,
- "p95": 35.711999982595444,
- "p99": 45.88799923658371
- },
- "roundtrip": {
- "p50": 2064.768075942993,
- "p90": 2067.13604927063,
- "p95": 2069.024085998535,
- "p99": 2083.7440490722656
- },
- "isolatedSum": {
- "p50": 62.591999769210815,
- "p90": 66.56000018119812,
- "p95": 69.72799822688103,
- "p99": 83.13599973917007
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 21,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 30.527999624609947,
- "p90": 32.54399821162224,
- "p95": 35.26400029659271,
- "p99": 40.063999593257904
- },
- "combine": {
- "p50": 34.2399999499321,
- "p90": 37.53599897027016,
- "p95": 38.24000060558319,
- "p99": 40.031999349594116
- },
- "roundtrip": {
- "p50": 2065.376043319702,
- "p90": 2067.3279762268066,
- "p95": 2068.3200359344482,
- "p99": 2070.5599784851074
- },
- "isolatedSum": {
- "p50": 64.76799957454205,
- "p90": 70.0799971818924,
- "p95": 73.5040009021759,
- "p99": 80.09599894285202
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1240064,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 39,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 31.231999397277832,
- "p90": 33.055998384952545,
- "p95": 35.61599925160408,
- "p99": 38.94399851560593
- },
- "combine": {
- "p50": 33.76000002026558,
- "p90": 35.999998450279236,
- "p95": 37.76000067591667,
- "p99": 53.888000547885895
- },
- "roundtrip": {
- "p50": 2066.528081893921,
- "p90": 2068.511962890625,
- "p95": 2069.6959495544434,
- "p99": 2078.07993888855
- },
- "isolatedSum": {
- "p50": 64.99199941754341,
- "p90": 69.05599683523178,
- "p95": 73.37599992752075,
- "p99": 92.83199906349182
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487296,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 74,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 32.51200169324875,
- "p90": 34.20799970626831,
- "p95": 36.86400130391121,
- "p99": 40.09599983692169
- },
- "combine": {
- "p50": 37.21600025892258,
- "p90": 39.45599868893623,
- "p95": 40.41599854826927,
- "p99": 42.399998754262924
- },
- "roundtrip": {
- "p50": 2071.392059326172,
- "p90": 2074.687957763672,
- "p95": 2078.7200927734375,
- "p99": 2156.5120220184326
- },
- "isolatedSum": {
- "p50": 69.72800195217133,
- "p90": 73.66399839520454,
- "p95": 77.27999985218048,
- "p99": 82.49599859118462
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4960256,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 145,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 35.10399907827377,
- "p90": 36.38400137424469,
- "p95": 38.43199834227562,
- "p99": 42.208001017570496
- },
- "combine": {
- "p50": 42.7200011909008,
- "p90": 44.89599913358688,
- "p95": 45.66400125622749,
- "p99": 48.70399832725525
- },
- "roundtrip": {
- "p50": 2080.22403717041,
- "p90": 2081.9520950317383,
- "p95": 2083.359956741333,
- "p99": 2118.4639930725098
- },
- "isolatedSum": {
- "p50": 77.82400026917458,
- "p90": 81.28000050783157,
- "p95": 84.09599959850311,
- "p99": 90.91199934482574
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9863168,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 287,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 42.047999799251556,
- "p90": 47.90399968624115,
- "p95": 48.8319993019104,
- "p99": 53.119998425245285
- },
- "combine": {
- "p50": 57.40800127387047,
- "p90": 62.68800050020218,
- "p95": 64.51199948787689,
- "p99": 67.03999638557434
- },
- "roundtrip": {
- "p50": 2100.5120277404785,
- "p90": 2108.383893966675,
- "p95": 2109.503984451294,
- "p99": 2111.9039058685303
- },
- "isolatedSum": {
- "p50": 99.45600107312202,
- "p90": 110.59200018644333,
- "p95": 113.34399878978729,
- "p99": 120.15999481081963
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19496960,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 564,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 53.599998354911804,
- "p90": 55.39200082421303,
- "p95": 56.41600117087364,
- "p99": 61.08799949288368
- },
- "combine": {
- "p50": 83.5840031504631,
- "p90": 86.11200004816055,
- "p95": 87.2960016131401,
- "p99": 91.51999652385712
- },
- "roundtrip": {
- "p50": 2139.967918395996,
- "p90": 2142.303943634033,
- "p95": 2142.911911010742,
- "p99": 2144.831895828247
- },
- "isolatedSum": {
- "p50": 137.1840015053749,
- "p90": 141.50400087237358,
- "p95": 143.71200278401375,
- "p99": 152.6079960167408
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 1104,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-d35502c2",
- "identity": "h100|deepep|7168|8|256|fp8-directcast|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h100_805b6904",
- "comparisonKey": "a3be04b3aa017ede",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T15:55:34.014711+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_06",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · fp8-directcast",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8-directcast",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28294158591",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28294158591",
- "createdAt": "2026-06-27T15:55:34.014711+00:00",
- "sha": "42eddb48c3eed35214c5ad50da1aa6527363ff70"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 97.50399738550186,
- "p90": 102.27199643850327,
- "p95": 104.70400005578995,
- "p99": 110.75200140476227
- },
- "combine": {
- "p50": 73.60000163316727,
- "p90": 75.42400062084198,
- "p95": 76.92799717187881,
- "p99": 80.48000186681747
- },
- "roundtrip": {
- "p50": 193.79200041294098,
- "p90": 199.26400482654572,
- "p95": 201.47199928760529,
- "p99": 205.79199492931366
- },
- "isolatedSum": {
- "p50": 171.10399901866913,
- "p90": 177.69599705934525,
- "p95": 181.63199722766876,
- "p99": 191.23200327157974
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 71.10399752855301,
- "p90": 98.88000041246414,
- "p95": 101.59999877214432,
- "p99": 105.50399869680405
- },
- "combine": {
- "p50": 62.55999952554703,
- "p90": 71.07199728488922,
- "p95": 71.74400240182877,
- "p99": 74.81600344181061
- },
- "roundtrip": {
- "p50": 154.01600301265717,
- "p90": 193.12000274658203,
- "p95": 195.3279972076416,
- "p99": 198.91199469566345
- },
- "isolatedSum": {
- "p50": 133.66399705410004,
- "p90": 169.95199769735336,
- "p95": 173.34400117397308,
- "p99": 180.32000213861465
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 71.16799801588058,
- "p90": 103.2319962978363,
- "p95": 105.18400371074677,
- "p99": 110.52799969911575
- },
- "combine": {
- "p50": 63.968002796173096,
- "p90": 75.99999755620956,
- "p95": 77.98399776220322,
- "p99": 81.66400343179703
- },
- "roundtrip": {
- "p50": 154.62400019168854,
- "p90": 201.02399587631226,
- "p95": 203.99999618530273,
- "p99": 212.0320051908493
- },
- "isolatedSum": {
- "p50": 135.13600081205368,
- "p90": 179.23199385404587,
- "p95": 183.16800147294998,
- "p99": 192.19200313091278
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 71.45600020885468,
- "p90": 100.73599964380264,
- "p95": 102.78400033712387,
- "p99": 107.55199939012527
- },
- "combine": {
- "p50": 63.840001821517944,
- "p90": 74.97599720954895,
- "p95": 76.19199901819229,
- "p99": 83.29600095748901
- },
- "roundtrip": {
- "p50": 155.42399883270264,
- "p90": 199.68000054359436,
- "p95": 201.9840031862259,
- "p99": 291.6480004787445
- },
- "isolatedSum": {
- "p50": 135.29600203037262,
- "p90": 175.7119968533516,
- "p95": 178.97599935531616,
- "p99": 190.8480003476143
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 71.42399996519089,
- "p90": 100.89600086212158,
- "p95": 103.00800204277039,
- "p99": 108.22399705648422
- },
- "combine": {
- "p50": 65.5680000782013,
- "p90": 77.08799839019775,
- "p95": 78.5600021481514,
- "p99": 82.91199803352356
- },
- "roundtrip": {
- "p50": 157.18400478363037,
- "p90": 202.04800367355347,
- "p95": 204.76800203323364,
- "p99": 209.4080001115799
- },
- "isolatedSum": {
- "p50": 136.99200004339218,
- "p90": 177.98399925231934,
- "p95": 181.56800419092178,
- "p99": 191.13599509000778
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 73.18399846553802,
- "p90": 101.08800232410431,
- "p95": 102.88000106811523,
- "p99": 106.81600123643875
- },
- "combine": {
- "p50": 71.35999947786331,
- "p90": 82.84799754619598,
- "p95": 83.67999643087387,
- "p99": 86.94399893283844
- },
- "roundtrip": {
- "p50": 162.04799711704254,
- "p90": 207.23199844360352,
- "p95": 209.34399962425232,
- "p99": 212.41599321365356
- },
- "isolatedSum": {
- "p50": 144.54399794340134,
- "p90": 183.9359998703003,
- "p95": 186.5599974989891,
- "p99": 193.7600001692772
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 81.34400099515915,
- "p90": 105.43999820947647,
- "p95": 109.6000000834465,
- "p99": 460.54399013519287
- },
- "combine": {
- "p50": 80.64000308513641,
- "p90": 92.99200028181076,
- "p95": 94.24000233411789,
- "p99": 98.55999797582626
- },
- "roundtrip": {
- "p50": 174.01599884033203,
- "p90": 220.5439954996109,
- "p95": 222.91199862957,
- "p99": 228.2239943742752
- },
- "isolatedSum": {
- "p50": 161.98400408029556,
- "p90": 198.43199849128723,
- "p95": 203.8400024175644,
- "p99": 559.1039881110191
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 88.8959988951683,
- "p90": 109.63200032711029,
- "p95": 111.32799834012985,
- "p99": 116.70400202274323
- },
- "combine": {
- "p50": 98.88000041246414,
- "p90": 111.10399663448334,
- "p95": 112.64000087976456,
- "p99": 115.07199704647064
- },
- "roundtrip": {
- "p50": 215.61600267887115,
- "p90": 238.43200504779816,
- "p95": 240.76800048351288,
- "p99": 245.15199661254883
- },
- "isolatedSum": {
- "p50": 187.77599930763245,
- "p90": 220.73599696159363,
- "p95": 223.9679992198944,
- "p99": 231.77599906921387
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-779ba710",
- "identity": "h100|deepep|7168|8|256|fp8-pertoken|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h100_b68ae8a2",
- "comparisonKey": "6d2a2c2b7775de32",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T15:55:42.044043+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_05",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · fp8-pertoken",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8-pertoken",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28294162181",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28294162181",
- "createdAt": "2026-06-27T15:55:42.044043+00:00",
- "sha": "42eddb48c3eed35214c5ad50da1aa6527363ff70"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 94.59199756383896,
- "p90": 99.35999661684036,
- "p95": 101.56799852848053,
- "p99": 106.1440035700798
- },
- "combine": {
- "p50": 68.4799998998642,
- "p90": 71.23199850320816,
- "p95": 72.22399860620499,
- "p99": 76.06399804353714
- },
- "roundtrip": {
- "p50": 184.79999899864197,
- "p90": 190.72000682353973,
- "p95": 192.9280012845993,
- "p99": 197.9839950799942
- },
- "isolatedSum": {
- "p50": 163.07199746370316,
- "p90": 170.59199512004852,
- "p95": 173.79199713468552,
- "p99": 182.20800161361694
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 70.78400254249573,
- "p90": 95.07200121879578,
- "p95": 97.18400239944458,
- "p99": 103.13600301742554
- },
- "combine": {
- "p50": 62.463998794555664,
- "p90": 70.97599655389786,
- "p95": 71.52000069618225,
- "p99": 75.96799731254578
- },
- "roundtrip": {
- "p50": 151.8400013446808,
- "p90": 189.28000330924988,
- "p95": 190.75199961662292,
- "p99": 195.26399672031403
- },
- "isolatedSum": {
- "p50": 133.2480013370514,
- "p90": 166.04799777269363,
- "p95": 168.70400309562683,
- "p99": 179.1040003299713
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 94.11200135946274,
- "p90": 101.34399682283401,
- "p95": 105.12000322341919,
- "p99": 111.42399907112122
- },
- "combine": {
- "p50": 71.48800045251846,
- "p90": 75.68000257015228,
- "p95": 77.08799839019775,
- "p99": 80.32000064849854
- },
- "roundtrip": {
- "p50": 152.92799472808838,
- "p90": 198.0160027742386,
- "p95": 201.1840045452118,
- "p99": 207.64799416065216
- },
- "isolatedSum": {
- "p50": 165.6000018119812,
- "p90": 177.0239993929863,
- "p95": 182.20800161361694,
- "p99": 191.74399971961975
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 94.65599805116653,
- "p90": 100.92800110578537,
- "p95": 103.10400277376175,
- "p99": 107.16799646615982
- },
- "combine": {
- "p50": 73.18399846553802,
- "p90": 74.81600344181061,
- "p95": 76.19199901819229,
- "p99": 79.29600030183792
- },
- "roundtrip": {
- "p50": 187.83999979496002,
- "p90": 195.45599818229675,
- "p95": 197.28000462055206,
- "p99": 202.84800231456757
- },
- "isolatedSum": {
- "p50": 167.83999651670456,
- "p90": 175.74400454759598,
- "p95": 179.29600179195404,
- "p99": 186.46399676799774
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 94.40000355243683,
- "p90": 100.70399940013885,
- "p95": 102.55999863147736,
- "p99": 108.51199924945831
- },
- "combine": {
- "p50": 74.75200295448303,
- "p90": 77.44000107049942,
- "p95": 79.83999699354172,
- "p99": 83.42400193214417
- },
- "roundtrip": {
- "p50": 192.1280026435852,
- "p90": 201.05600357055664,
- "p95": 204.28800582885742,
- "p99": 209.4080001115799
- },
- "isolatedSum": {
- "p50": 169.15200650691986,
- "p90": 178.14400047063828,
- "p95": 182.39999562501907,
- "p99": 191.93600118160248
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 73.21599870920181,
- "p90": 100.51199793815613,
- "p95": 102.75200009346008,
- "p99": 106.36799782514572
- },
- "combine": {
- "p50": 71.16799801588058,
- "p90": 82.0159986615181,
- "p95": 83.00799876451492,
- "p99": 86.11200004816055
- },
- "roundtrip": {
- "p50": 160.76800227165222,
- "p90": 204.3199986219406,
- "p95": 207.10399746894836,
- "p99": 212.0639979839325
- },
- "isolatedSum": {
- "p50": 144.3839967250824,
- "p90": 182.52799659967422,
- "p95": 185.759998857975,
- "p99": 192.47999787330627
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 95.551997423172,
- "p90": 104.25599664449692,
- "p95": 106.88000172376633,
- "p99": 122.65600264072418
- },
- "combine": {
- "p50": 89.88799899816513,
- "p90": 92.54399687051773,
- "p95": 94.04800087213516,
- "p99": 97.24800288677216
- },
- "roundtrip": {
- "p50": 207.8080028295517,
- "p90": 219.16800737380981,
- "p95": 221.66399657726288,
- "p99": 228.44800353050232
- },
- "isolatedSum": {
- "p50": 185.43999642133713,
- "p90": 196.79999351501465,
- "p95": 200.9280025959015,
- "p99": 219.90400552749634
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 88.86399865150452,
- "p90": 107.77600109577179,
- "p95": 110.23999750614166,
- "p99": 115.61600118875504
- },
- "combine": {
- "p50": 98.78399968147278,
- "p90": 110.49599945545197,
- "p95": 111.77600175142288,
- "p99": 115.13599753379822
- },
- "roundtrip": {
- "p50": 216.8000042438507,
- "p90": 236.38400435447693,
- "p95": 240.57599902153015,
- "p99": 246.14399671554565
- },
- "isolatedSum": {
- "p50": 187.6479983329773,
- "p90": 218.27200055122375,
- "p95": 222.01599925756454,
- "p99": 230.75199872255325
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-108bdec2",
- "identity": "h100|deepep|4096|8|128|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569",
- "colorKey": "h100_42947950",
- "comparisonKey": "5aeeda2cd42e92cb",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T11:13:50.229059+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_10",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16",
- "model": "Qwen3.5",
- "shape": {
- "hidden": 4096,
- "topk": 8,
- "experts": 128,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "dc27c5e0894e569",
- "workloadId": "set:6:76d8142d69406335",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28287504962",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287504962",
- "createdAt": "2026-06-27T11:13:50.229059+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 94.17600184679031,
- "p90": 98.43199700117111,
- "p95": 100.25600343942642,
- "p99": 104.47999835014343
- },
- "combine": {
- "p50": 87.20000088214874,
- "p90": 89.66399729251862,
- "p95": 90.40000289678574,
- "p99": 95.42399644851685
- },
- "roundtrip": {
- "p50": 156.51200711727142,
- "p90": 162.20800578594208,
- "p95": 163.455992937088,
- "p99": 169.53599452972412
- },
- "isolatedSum": {
- "p50": 181.37600272893906,
- "p90": 188.09599429368973,
- "p95": 190.65600633621216,
- "p99": 199.90399479866028
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 44564480,
- "combineLogicalBytes": 44564480,
- "fanoutMean": 5.3125,
- "recvTokensMax": 699,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 114.56000059843063,
- "p90": 127.26399302482605,
- "p95": 130.78400492668152,
- "p99": 137.11999356746674
- },
- "combine": {
- "p50": 112.15999722480774,
- "p90": 115.35999923944473,
- "p95": 118.75200271606445,
- "p99": 122.5920021533966
- },
- "roundtrip": {
- "p50": 197.02400267124176,
- "p90": 202.33599841594696,
- "p95": 204.57600057125092,
- "p99": 207.68000185489655
- },
- "isolatedSum": {
- "p50": 226.71999782323837,
- "p90": 242.62399226427078,
- "p95": 249.53600764274597,
- "p99": 259.71199572086334
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 89726976,
- "combineLogicalBytes": 89726976,
- "fanoutMean": 5.34814453125,
- "recvTokensMax": 1385,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 153.6639928817749,
- "p90": 169.855996966362,
- "p95": 171.7119961977005,
- "p99": 176.32000148296356
- },
- "combine": {
- "p50": 167.71200299263,
- "p90": 180.38399517536163,
- "p95": 182.43199586868286,
- "p99": 184.1599941253662
- },
- "roundtrip": {
- "p50": 289.6000146865845,
- "p90": 307.45598673820496,
- "p95": 310.07999181747437,
- "p99": 317.1519935131073
- },
- "isolatedSum": {
- "p50": 321.3759958744049,
- "p90": 350.23999214172363,
- "p95": 354.14399206638336,
- "p99": 360.4799956083298
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 179503104,
- "combineLogicalBytes": 179503104,
- "fanoutMean": 5.349609375,
- "recvTokensMax": 2772,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 216.8319970369339,
- "p90": 221.02400660514832,
- "p95": 222.46399521827698,
- "p99": 227.2000014781952
- },
- "combine": {
- "p50": 277.0240008831024,
- "p90": 282.78398513793945,
- "p95": 284.2879891395569,
- "p99": 288.4480059146881
- },
- "roundtrip": {
- "p50": 469.4080054759979,
- "p90": 475.23200511932373,
- "p95": 476.83200240135193,
- "p99": 480.3520143032074
- },
- "isolatedSum": {
- "p50": 493.8559979200363,
- "p90": 503.80799174308777,
- "p95": 506.75198435783386,
- "p99": 515.6480073928833
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 359022592,
- "combineLogicalBytes": 359022592,
- "fanoutMean": 5.349853515625,
- "recvTokensMax": 5558,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 361.2799942493439,
- "p90": 374.208003282547,
- "p95": 379.2960047721863,
- "p99": 538.752019405365
- },
- "combine": {
- "p50": 470.5600142478943,
- "p90": 482.87999629974365,
- "p95": 485.0879907608032,
- "p99": 490.81599712371826
- },
- "roundtrip": {
- "p50": 804.4800162315369,
- "p90": 820.2239871025085,
- "p95": 825.3120183944702,
- "p99": 835.3919982910156
- },
- "isolatedSum": {
- "p50": 831.8400084972382,
- "p90": 857.0879995822906,
- "p95": 864.3839955329895,
- "p99": 1029.5680165290833
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 716111872,
- "combineLogicalBytes": 716111872,
- "fanoutMean": 5.33544921875,
- "recvTokensMax": 10982,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 640.5439972877502,
- "p90": 648.4159827232361,
- "p95": 651.7760157585144,
- "p99": 662.015974521637
- },
- "combine": {
- "p50": 846.176028251648,
- "p90": 854.9759984016418,
- "p95": 857.5360178947449,
- "p99": 862.8479838371277
- },
- "roundtrip": {
- "p50": 1459.9039554595947,
- "p90": 1470.5599546432495,
- "p95": 1474.4000434875488,
- "p99": 1484.1920137405396
- },
- "isolatedSum": {
- "p50": 1486.7200255393982,
- "p90": 1503.391981124878,
- "p95": 1509.3120336532593,
- "p99": 1524.8639583587646
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1432395776,
- "combineLogicalBytes": 1432395776,
- "fanoutMean": 5.336090087890625,
- "recvTokensMax": 21939,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-8265fe0e",
- "identity": "h100|deepep|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569",
- "colorKey": "h100_ff7906f8",
- "comparisonKey": "d0edce95a580d060",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:52:06.777183+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_13",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16",
- "model": "Qwen3.5",
- "shape": {
- "hidden": 4096,
- "topk": 8,
- "experts": 128,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "dc27c5e0894e569",
- "workloadId": "set:6:76d8142d69406335",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271688175",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271688175",
- "createdAt": "2026-06-26T23:52:06.777183+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 92.32000261545181,
- "p90": 96.41599655151367,
- "p95": 98.39999675750732,
- "p99": 104.22399640083313
- },
- "combine": {
- "p50": 86.97599917650223,
- "p90": 88.41600269079208,
- "p95": 89.50400352478027,
- "p99": 93.31200271844864
- },
- "roundtrip": {
- "p50": 156.73600137233734,
- "p90": 160.70400178432465,
- "p95": 161.6639941930771,
- "p99": 166.04800522327423
- },
- "isolatedSum": {
- "p50": 179.29600179195404,
- "p90": 184.83199924230576,
- "p95": 187.9040002822876,
- "p99": 197.53599911928177
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 44564480,
- "combineLogicalBytes": 44564480,
- "fanoutMean": 5.3125,
- "recvTokensMax": 699,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 111.10399663448334,
- "p90": 133.98399949073792,
- "p95": 135.96799969673157,
- "p99": 139.96799290180206
- },
- "combine": {
- "p50": 112.99200356006622,
- "p90": 121.47200107574463,
- "p95": 122.01599776744843,
- "p99": 128.35200130939484
- },
- "roundtrip": {
- "p50": 202.72000133991241,
- "p90": 217.6000028848648,
- "p95": 219.39200162887573,
- "p99": 223.7440049648285
- },
- "isolatedSum": {
- "p50": 224.09600019454956,
- "p90": 255.45600056648254,
- "p95": 257.98399746418,
- "p99": 268.3199942111969
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 89726976,
- "combineLogicalBytes": 89726976,
- "fanoutMean": 5.34814453125,
- "recvTokensMax": 1385,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 153.08800339698792,
- "p90": 166.9439971446991,
- "p95": 168.67199540138245,
- "p99": 175.55199563503265
- },
- "combine": {
- "p50": 168.92799735069275,
- "p90": 181.15200102329254,
- "p95": 183.07200074195862,
- "p99": 186.0480010509491
- },
- "roundtrip": {
- "p50": 291.29600524902344,
- "p90": 307.45598673820496,
- "p95": 309.6959888935089,
- "p99": 313.9199912548065
- },
- "isolatedSum": {
- "p50": 322.01600074768066,
- "p90": 348.09599816799164,
- "p95": 351.74399614334106,
- "p99": 361.59999668598175
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 179503104,
- "combineLogicalBytes": 179503104,
- "fanoutMean": 5.349609375,
- "recvTokensMax": 2772,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 219.26400065422058,
- "p90": 230.71999847888947,
- "p95": 234.9119931459427,
- "p99": 238.62400650978088
- },
- "combine": {
- "p50": 274.04800057411194,
- "p90": 280.5440127849579,
- "p95": 281.69599175453186,
- "p99": 284.1919958591461
- },
- "roundtrip": {
- "p50": 467.4240052700043,
- "p90": 473.2159972190857,
- "p95": 475.8079946041107,
- "p99": 479.2639911174774
- },
- "isolatedSum": {
- "p50": 493.3120012283325,
- "p90": 511.26401126384735,
- "p95": 516.6079849004745,
- "p99": 522.816002368927
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 359022592,
- "combineLogicalBytes": 359022592,
- "fanoutMean": 5.349853515625,
- "recvTokensMax": 5558,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 360.79999804496765,
- "p90": 374.36801195144653,
- "p95": 376.5760064125061,
- "p99": 380.2880048751831
- },
- "combine": {
- "p50": 465.88799357414246,
- "p90": 475.77598690986633,
- "p95": 478.4319996833801,
- "p99": 481.53600096702576
- },
- "roundtrip": {
- "p50": 799.1999983787537,
- "p90": 816.6720271110535,
- "p95": 819.8080062866211,
- "p99": 824.7680068016052
- },
- "isolatedSum": {
- "p50": 826.6879916191101,
- "p90": 850.1439988613129,
- "p95": 855.0080060958862,
- "p99": 861.8240058422089
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 716111872,
- "combineLogicalBytes": 716111872,
- "fanoutMean": 5.33544921875,
- "recvTokensMax": 10982,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 638.975977897644,
- "p90": 648.1279730796814,
- "p95": 652.7040004730225,
- "p99": 661.1520051956177
- },
- "combine": {
- "p50": 848.4799861907959,
- "p90": 856.8000197410583,
- "p95": 859.5520257949829,
- "p99": 898.5919952392578
- },
- "roundtrip": {
- "p50": 1462.623953819275,
- "p90": 1474.079966545105,
- "p95": 1478.4959554672241,
- "p99": 1489.3120527267456
- },
- "isolatedSum": {
- "p50": 1487.45596408844,
- "p90": 1504.9279928207397,
- "p95": 1512.2560262680054,
- "p99": 1559.7440004348755
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1432395776,
- "combineLogicalBytes": 1432395776,
- "fanoutMean": 5.336090087890625,
- "recvTokensMax": 21939,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-2dcc1e5c",
- "identity": "h100|deepep|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42",
- "colorKey": "h100_ff7906f8",
- "comparisonKey": "69b861c40f88be42",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:51:59.492832+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_06",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16",
- "model": "shape 5120/8/160",
- "shape": {
- "hidden": 5120,
- "topk": 8,
- "experts": 160,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "0c022a63bbcbf42",
- "workloadId": "set:6:28c0c09b13ff0acf",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271702702",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271702702",
- "createdAt": "2026-06-26T23:51:59.492832+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 99.45599734783173,
- "p90": 105.05600273609161,
- "p95": 106.04800283908844,
- "p99": 110.23999750614166
- },
- "combine": {
- "p50": 95.58399766683578,
- "p90": 97.47199714183807,
- "p95": 98.39999675750732,
- "p99": 102.9760017991066
- },
- "roundtrip": {
- "p50": 170.33599317073822,
- "p90": 175.10400712490082,
- "p95": 177.85599827766418,
- "p99": 179.58399653434753
- },
- "isolatedSum": {
- "p50": 195.0399950146675,
- "p90": 202.5279998779297,
- "p95": 204.44799959659576,
- "p99": 213.21599930524826
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 55674880,
- "combineLogicalBytes": 55674880,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 699,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 119.29599940776825,
- "p90": 124.22399967908859,
- "p95": 126.30400061607361,
- "p99": 130.5599957704544
- },
- "combine": {
- "p50": 122.079998254776,
- "p90": 127.80800461769104,
- "p95": 128.67200374603271,
- "p99": 132.9919993877411
- },
- "roundtrip": {
- "p50": 219.32800114154816,
- "p90": 223.1680005788803,
- "p95": 224.5440036058426,
- "p99": 228.7359982728958
- },
- "isolatedSum": {
- "p50": 241.37599766254425,
- "p90": 252.03200429677963,
- "p95": 254.97600436210632,
- "p99": 263.5519951581955
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 111104000,
- "combineLogicalBytes": 111104000,
- "fanoutMean": 5.2978515625,
- "recvTokensMax": 1387,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 165.53600132465363,
- "p90": 178.1120002269745,
- "p95": 180.12799322605133,
- "p99": 184.25600230693817
- },
- "combine": {
- "p50": 190.46400487422943,
- "p90": 198.71999323368073,
- "p95": 200.9280025959015,
- "p99": 213.79199624061584
- },
- "roundtrip": {
- "p50": 325.76000690460205,
- "p90": 331.07200264930725,
- "p95": 332.73598551750183,
- "p99": 336.1920118331909
- },
- "isolatedSum": {
- "p50": 356.00000619888306,
- "p90": 376.8319934606552,
- "p95": 381.0559958219528,
- "p99": 398.047998547554
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 223098880,
- "combineLogicalBytes": 223098880,
- "fanoutMean": 5.319091796875,
- "recvTokensMax": 2762,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 244.57600712776184,
- "p90": 249.439999461174,
- "p95": 253.56799364089966,
- "p99": 409.56801176071167
- },
- "combine": {
- "p50": 299.1040050983429,
- "p90": 303.9360046386719,
- "p95": 305.759996175766,
- "p99": 311.0719919204712
- },
- "roundtrip": {
- "p50": 515.7759785652161,
- "p90": 522.2399830818176,
- "p95": 524.1600275039673,
- "p99": 528.8959741592407
- },
- "isolatedSum": {
- "p50": 543.6800122261047,
- "p90": 553.3760040998459,
- "p95": 559.3279898166656,
- "p99": 720.6400036811829
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 446730240,
- "combineLogicalBytes": 446730240,
- "fanoutMean": 5.325439453125,
- "recvTokensMax": 5518,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 413.1520092487335,
- "p90": 423.0720102787018,
- "p95": 426.2399971485138,
- "p99": 432.5760006904602
- },
- "combine": {
- "p50": 515.7439708709717,
- "p90": 523.7119793891907,
- "p95": 526.4319777488708,
- "p99": 530.3360223770142
- },
- "roundtrip": {
- "p50": 898.2080221176147,
- "p90": 911.0400080680847,
- "p95": 915.2960181236267,
- "p99": 921.6639995574951
- },
- "isolatedSum": {
- "p50": 928.8959801197052,
- "p90": 946.7839896678925,
- "p95": 952.6719748973846,
- "p99": 962.9120230674744
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 893634560,
- "combineLogicalBytes": 893634560,
- "fanoutMean": 5.32647705078125,
- "recvTokensMax": 11032,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 745.0559735298157,
- "p90": 758.2719922065735,
- "p95": 762.112021446228,
- "p99": 772.4159955978394
- },
- "combine": {
- "p50": 933.247983455658,
- "p90": 941.9839978218079,
- "p95": 945.1839923858643,
- "p99": 951.3279795646667
- },
- "roundtrip": {
- "p50": 1646.2719440460205,
- "p90": 1661.9199514389038,
- "p95": 1667.3599481582642,
- "p99": 1685.7600212097168
- },
- "isolatedSum": {
- "p50": 1678.3039569854736,
- "p90": 1700.2559900283813,
- "p95": 1707.2960138320923,
- "p99": 1723.743975162506
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1786265600,
- "combineLogicalBytes": 1786265600,
- "fanoutMean": 5.323486328125,
- "recvTokensMax": 21895,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-8f627a86",
- "identity": "h100|deepep|6144|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "h100_42947950",
- "comparisonKey": "68eaec6b4043581a",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T11:13:20.359016+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_06",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16",
- "model": "MiniMax-M3",
- "shape": {
- "hidden": 6144,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:9f5e1e005a35e937",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28287492752",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287492752",
- "createdAt": "2026-06-27T11:13:20.359016+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 111.35999858379364,
- "p90": 115.58400094509125,
- "p95": 116.35199934244156,
- "p99": 121.56800180673599
- },
- "combine": {
- "p50": 97.72799909114838,
- "p90": 103.45599800348282,
- "p95": 104.3199971318245,
- "p99": 108.25599730014801
- },
- "roundtrip": {
- "p50": 183.9359998703003,
- "p90": 187.96800076961517,
- "p95": 189.31199610233307,
- "p99": 192.76799261569977
- },
- "isolatedSum": {
- "p50": 209.08799767494202,
- "p90": 219.03999894857407,
- "p95": 220.67199647426605,
- "p99": 229.823999106884
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 66576384,
- "combineLogicalBytes": 66576384,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 133.15199315547943,
- "p90": 150.27199685573578,
- "p95": 157.56799280643463,
- "p99": 168.2240068912506
- },
- "combine": {
- "p50": 137.05599308013916,
- "p90": 144.03200149536133,
- "p95": 145.50399780273438,
- "p99": 152.79999375343323
- },
- "roundtrip": {
- "p50": 239.74399268627167,
- "p90": 252.70399451255798,
- "p95": 254.17599081993103,
- "p99": 258.2400143146515
- },
- "isolatedSum": {
- "p50": 270.2079862356186,
- "p90": 294.3039983510971,
- "p95": 303.071990609169,
- "p99": 321.02400064468384
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 133619712,
- "combineLogicalBytes": 133619712,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 182.49599635601044,
- "p90": 195.23200392723083,
- "p95": 198.14400374889374,
- "p99": 200.95999538898468
- },
- "combine": {
- "p50": 208.44799280166626,
- "p90": 217.98400580883026,
- "p95": 219.10400688648224,
- "p99": 253.76001000404358
- },
- "roundtrip": {
- "p50": 361.6960048675537,
- "p90": 376.0319948196411,
- "p95": 379.71198558807373,
- "p99": 384.6080005168915
- },
- "isolatedSum": {
- "p50": 390.9439891576767,
- "p90": 413.2160097360611,
- "p95": 417.248010635376,
- "p99": 454.72000539302826
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 267657216,
- "combineLogicalBytes": 267657216,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 274.1119861602783,
- "p90": 283.3920121192932,
- "p95": 286.1120104789734,
- "p99": 290.8160090446472
- },
- "combine": {
- "p50": 330.1120102405548,
- "p90": 336.0320031642914,
- "p95": 336.89600229263306,
- "p99": 341.8560028076172
- },
- "roundtrip": {
- "p50": 577.344000339508,
- "p90": 583.9359760284424,
- "p95": 586.0480070114136,
- "p99": 589.3440246582031
- },
- "isolatedSum": {
- "p50": 604.2239964008331,
- "p90": 619.4240152835846,
- "p95": 623.0080127716064,
- "p99": 632.6720118522644
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 534380544,
- "combineLogicalBytes": 534380544,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 464.03199434280396,
- "p90": 478.59200835227966,
- "p95": 481.6960096359253,
- "p99": 491.5519952774048
- },
- "combine": {
- "p50": 581.4080238342285,
- "p90": 591.5840268135071,
- "p95": 594.6879982948303,
- "p99": 603.5839915275574
- },
- "roundtrip": {
- "p50": 1013.3440494537354,
- "p90": 1023.2000350952148,
- "p95": 1027.008056640625,
- "p99": 1076.6079425811768
- },
- "isolatedSum": {
- "p50": 1045.4400181770325,
- "p90": 1070.1760351657867,
- "p95": 1076.3840079307556,
- "p99": 1095.1359868049622
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1066119168,
- "combineLogicalBytes": 1066119168,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 886.1759901046753,
- "p90": 910.2720022201538,
- "p95": 915.5840277671814,
- "p99": 928.8960099220276
- },
- "combine": {
- "p50": 1059.2319965362549,
- "p90": 1067.520022392273,
- "p95": 1070.0160264968872,
- "p99": 1076.8640041351318
- },
- "roundtrip": {
- "p50": 1908.6079597473145,
- "p90": 1929.2479753494263,
- "p95": 1936.3199472427368,
- "p99": 1965.440034866333
- },
- "isolatedSum": {
- "p50": 1945.4079866409302,
- "p90": 1977.7920246124268,
- "p95": 1985.6000542640686,
- "p99": 2005.7600140571594
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2131722240,
- "combineLogicalBytes": 2131722240,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-29bbdbee",
- "identity": "h100|deepep|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "h100_ff7906f8",
- "comparisonKey": "4401899311d5e08c",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:52:30.177352+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_05",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16",
- "model": "MiniMax-M3",
- "shape": {
- "hidden": 6144,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:9f5e1e005a35e937",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271717621",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271717621",
- "createdAt": "2026-06-26T23:52:30.177352+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 111.42399907112122,
- "p90": 114.94400352239609,
- "p95": 116.03199690580368,
- "p99": 119.61600184440613
- },
- "combine": {
- "p50": 98.33600372076035,
- "p90": 103.71199995279312,
- "p95": 104.67199981212616,
- "p99": 106.4319983124733
- },
- "roundtrip": {
- "p50": 184.9599927663803,
- "p90": 188.63999843597412,
- "p95": 189.66400623321533,
- "p99": 194.11200284957886
- },
- "isolatedSum": {
- "p50": 209.76000279188156,
- "p90": 218.6560034751892,
- "p95": 220.70399671792984,
- "p99": 226.04800015687943
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 66576384,
- "combineLogicalBytes": 66576384,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 132.86399841308594,
- "p90": 137.7599984407425,
- "p95": 139.3280029296875,
- "p99": 142.4960047006607
- },
- "combine": {
- "p50": 137.69599795341492,
- "p90": 140.4159963130951,
- "p95": 141.37600362300873,
- "p99": 145.53600549697876
- },
- "roundtrip": {
- "p50": 237.2480034828186,
- "p90": 242.08000302314758,
- "p95": 243.1039959192276,
- "p99": 246.24000489711761
- },
- "isolatedSum": {
- "p50": 270.55999636650085,
- "p90": 278.1759947538376,
- "p95": 280.7040065526962,
- "p99": 288.03201019763947
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 133619712,
- "combineLogicalBytes": 133619712,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 184.00000035762787,
- "p90": 197.31199741363525,
- "p95": 200.15999674797058,
- "p99": 204.12799715995789
- },
- "combine": {
- "p50": 209.6959948539734,
- "p90": 216.86400473117828,
- "p95": 217.92000532150269,
- "p99": 221.95200622081757
- },
- "roundtrip": {
- "p50": 365.02400040626526,
- "p90": 377.21601128578186,
- "p95": 380.5760145187378,
- "p99": 388.12801241874695
- },
- "isolatedSum": {
- "p50": 393.69599521160126,
- "p90": 414.17600214481354,
- "p95": 418.08000206947327,
- "p99": 426.08000338077545
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 267657216,
- "combineLogicalBytes": 267657216,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 273.21600914001465,
- "p90": 277.44001150131226,
- "p95": 279.87200021743774,
- "p99": 289.3120050430298
- },
- "combine": {
- "p50": 332.41599798202515,
- "p90": 337.119996547699,
- "p95": 338.20798993110657,
- "p99": 341.66398644447327
- },
- "roundtrip": {
- "p50": 577.6320099830627,
- "p90": 582.751989364624,
- "p95": 584.7679972648621,
- "p99": 588.7680053710938
- },
- "isolatedSum": {
- "p50": 605.6320071220398,
- "p90": 614.5600080490112,
- "p95": 618.0799901485443,
- "p99": 630.975991487503
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 534380544,
- "combineLogicalBytes": 534380544,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 464.32000398635864,
- "p90": 473.60000014305115,
- "p95": 477.3760139942169,
- "p99": 648.8320231437683
- },
- "combine": {
- "p50": 584.384024143219,
- "p90": 590.9119844436646,
- "p95": 593.0560231208801,
- "p99": 596.8000292778015
- },
- "roundtrip": {
- "p50": 1019.2320346832275,
- "p90": 1029.6640396118164,
- "p95": 1033.7599515914917,
- "p99": 1037.984013557434
- },
- "isolatedSum": {
- "p50": 1048.7040281295776,
- "p90": 1064.5119845867157,
- "p95": 1070.432037115097,
- "p99": 1245.6320524215698
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1066119168,
- "combineLogicalBytes": 1066119168,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 879.423975944519,
- "p90": 904.6720266342163,
- "p95": 913.2480025291443,
- "p99": 928.991973400116
- },
- "combine": {
- "p50": 1065.6960010528564,
- "p90": 1075.3920078277588,
- "p95": 1078.3040523529053,
- "p99": 1084.2560529708862
- },
- "roundtrip": {
- "p50": 1901.9520282745361,
- "p90": 1920.7359552383423,
- "p95": 1926.5919923782349,
- "p99": 1940.1600360870361
- },
- "isolatedSum": {
- "p50": 1945.1199769973755,
- "p90": 1980.064034461975,
- "p95": 1991.5520548820496,
- "p99": 2013.2480263710022
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2131722240,
- "combineLogicalBytes": 2131722240,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-d524fd7e",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||157ca81687ddb63",
- "colorKey": "h100_42947950",
- "comparisonKey": "4c920ba7523ac63b",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:54:28.917588+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_08",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "157ca81687ddb63",
- "workloadId": "set:3:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271785174",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271785174",
- "createdAt": "2026-06-26T23:54:28.917588+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 130.52800297737122,
- "p90": 135.55200397968292,
- "p95": 138.43199610710144,
- "p99": 176.79999768733978
- },
- "combine": {
- "p50": 113.8560026884079,
- "p90": 120.86399644613266,
- "p95": 122.11199849843979,
- "p99": 145.50399780273438
- },
- "roundtrip": {
- "p50": 209.05600488185883,
- "p90": 217.56799519062042,
- "p95": 219.200000166893,
- "p99": 275.04000067710876
- },
- "isolatedSum": {
- "p50": 244.3840056657791,
- "p90": 256.4160004258156,
- "p95": 260.54399460554123,
- "p99": 322.30399549007416
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 210.27199923992157,
- "p90": 217.056006193161,
- "p95": 220.22399306297302,
- "p99": 256.99201226234436
- },
- "combine": {
- "p50": 234.9119931459427,
- "p90": 241.40800535678864,
- "p95": 244.9920028448105,
- "p99": 262.9759907722473
- },
- "roundtrip": {
- "p50": 412.54401206970215,
- "p90": 420.9280014038086,
- "p95": 423.0720102787018,
- "p99": 427.35999822616577
- },
- "isolatedSum": {
- "p50": 445.18399238586426,
- "p90": 458.46401154994965,
- "p95": 465.2159959077835,
- "p99": 519.9680030345917
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 312266752,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 526.5920162200928,
- "p90": 541.4720177650452,
- "p95": 545.9200143814087,
- "p99": 552.3520112037659
- },
- "combine": {
- "p50": 637.5679969787598,
- "p90": 649.6959924697876,
- "p95": 652.6079773902893,
- "p99": 661.0879898071289
- },
- "roundtrip": {
- "p50": 1134.6240043640137,
- "p90": 1146.880030632019,
- "p95": 1151.2320041656494,
- "p99": 1158.5919857025146
- },
- "isolatedSum": {
- "p50": 1164.1600131988525,
- "p90": 1191.1680102348328,
- "p95": 1198.527991771698,
- "p99": 1213.4400010108948
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1243805696,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-efe3a643",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "h100_42947950",
- "comparisonKey": "cca7a3f5d9dbba36",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T10:12:09.407437+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_12",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "2.0.0+af9a040",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28286083501",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286083501",
- "createdAt": "2026-06-27T10:12:09.407437+00:00",
- "sha": "76a3032d20288ee17220eb6099346f74d56ce005"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 112.5119999051094,
- "p90": 118.01599711179733,
- "p95": 119.39200013875961,
- "p99": 123.4240010380745
- },
- "combine": {
- "p50": 107.77600109577179,
- "p90": 113.40799927711487,
- "p95": 114.1119971871376,
- "p99": 116.2559986114502
- },
- "roundtrip": {
- "p50": 200.57600736618042,
- "p90": 204.73599433898926,
- "p95": 206.36799931526184,
- "p99": 209.85600352287292
- },
- "isolatedSum": {
- "p50": 220.2880010008812,
- "p90": 231.4239963889122,
- "p95": 233.50399732589722,
- "p99": 239.6799996495247
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 143.77599954605103,
- "p90": 148.12800288200378,
- "p95": 149.6960073709488,
- "p99": 152.51199901103973
- },
- "combine": {
- "p50": 151.10400319099426,
- "p90": 155.74400126934052,
- "p95": 156.76799416542053,
- "p99": 158.11200439929962
- },
- "roundtrip": {
- "p50": 265.53601026535034,
- "p90": 269.79199051856995,
- "p95": 270.9760069847107,
- "p99": 274.01599287986755
- },
- "isolatedSum": {
- "p50": 294.8800027370453,
- "p90": 303.8720041513443,
- "p95": 306.4640015363693,
- "p99": 310.62400341033936
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 155889664,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 199.96799528598785,
- "p90": 204.12799715995789,
- "p95": 205.31199872493744,
- "p99": 209.72800254821777
- },
- "combine": {
- "p50": 229.0560007095337,
- "p90": 232.2559952735901,
- "p95": 235.80799996852875,
- "p99": 239.19999599456787
- },
- "roundtrip": {
- "p50": 401.5359878540039,
- "p90": 406.0159921646118,
- "p95": 407.6800048351288,
- "p99": 412.1280014514923
- },
- "isolatedSum": {
- "p50": 429.02399599552155,
- "p90": 436.383992433548,
- "p95": 441.1199986934662,
- "p99": 448.92799854278564
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 312266752,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 304.9600124359131,
- "p90": 310.016006231308,
- "p95": 311.3279938697815,
- "p99": 313.2160007953644
- },
- "combine": {
- "p50": 367.39200353622437,
- "p90": 373.3440041542053,
- "p95": 375.90399384498596,
- "p99": 378.81600856781006
- },
- "roundtrip": {
- "p50": 645.4079747200012,
- "p90": 652.5760293006897,
- "p95": 654.7200083732605,
- "p99": 659.0719819068909
- },
- "isolatedSum": {
- "p50": 672.3520159721375,
- "p90": 683.3600103855133,
- "p95": 687.2319877147675,
- "p99": 692.0320093631744
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 623443968,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 532.0320129394531,
- "p90": 541.8559908866882,
- "p95": 545.4720258712769,
- "p99": 554.0480017662048
- },
- "combine": {
- "p50": 637.9200220108032,
- "p90": 645.7599997520447,
- "p95": 647.9679942131042,
- "p99": 653.6639928817749
- },
- "roundtrip": {
- "p50": 1139.6479606628418,
- "p90": 1149.888038635254,
- "p95": 1154.3359756469727,
- "p99": 1160.032033920288
- },
- "isolatedSum": {
- "p50": 1169.9520349502563,
- "p90": 1187.615990638733,
- "p95": 1193.440020084381,
- "p99": 1207.7119946479797
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1243805696,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 999.4239807128906,
- "p90": 1017.2480344772339,
- "p95": 1023.8080024719238,
- "p99": 1035.040020942688
- },
- "combine": {
- "p50": 1168.544054031372,
- "p90": 1176.8640279769897,
- "p95": 1180.5119514465332,
- "p99": 1186.1759424209595
- },
- "roundtrip": {
- "p50": 2132.4799060821533,
- "p90": 2148.47993850708,
- "p95": 2154.9439430236816,
- "p99": 2171.5519428253174
- },
- "isolatedSum": {
- "p50": 2167.9680347442627,
- "p90": 2194.1120624542236,
- "p95": 2204.319953918457,
- "p99": 2221.2159633636475
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487009280,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-8a96205b",
- "identity": "h100|deepep|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "h100_ff7906f8",
- "comparisonKey": "6a625438eb544ee8",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:48:12.079136+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_06",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271563151",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271563151",
- "createdAt": "2026-06-26T23:48:12.079136+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 111.61600053310394,
- "p90": 117.3119992017746,
- "p95": 118.81600320339203,
- "p99": 123.74400347471237
- },
- "combine": {
- "p50": 105.85600137710571,
- "p90": 107.07200318574905,
- "p95": 111.16799712181091,
- "p99": 113.8560026884079
- },
- "roundtrip": {
- "p50": 193.02399456501007,
- "p90": 199.52000677585602,
- "p95": 200.9280025959015,
- "p99": 204.96000349521637
- },
- "isolatedSum": {
- "p50": 217.47200191020966,
- "p90": 224.38400238752365,
- "p95": 229.98400032520294,
- "p99": 237.60000616312027
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 143.23200285434723,
- "p90": 147.5200057029724,
- "p95": 148.6400067806244,
- "p99": 152.28800475597382
- },
- "combine": {
- "p50": 148.76799285411835,
- "p90": 154.4640064239502,
- "p95": 155.29599785804749,
- "p99": 156.76799416542053
- },
- "roundtrip": {
- "p50": 262.33598589897156,
- "p90": 266.431987285614,
- "p95": 268.12800765037537,
- "p99": 271.1679935455322
- },
- "isolatedSum": {
- "p50": 291.9999957084656,
- "p90": 301.9840121269226,
- "p95": 303.9360046386719,
- "p99": 309.05599892139435
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 155889664,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 196.25599682331085,
- "p90": 201.1840045452118,
- "p95": 202.72000133991241,
- "p99": 214.84799683094025
- },
- "combine": {
- "p50": 230.49600422382355,
- "p90": 236.12800240516663,
- "p95": 237.2799962759018,
- "p99": 241.15200340747833
- },
- "roundtrip": {
- "p50": 403.0719995498657,
- "p90": 408.3839952945709,
- "p95": 410.14400124549866,
- "p99": 412.76800632476807
- },
- "isolatedSum": {
- "p50": 426.7520010471344,
- "p90": 437.3120069503784,
- "p95": 439.9999976158142,
- "p99": 456.0000002384186
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 312266752,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 301.6960024833679,
- "p90": 306.43200874328613,
- "p95": 307.9040050506592,
- "p99": 312.1280074119568
- },
- "combine": {
- "p50": 364.1279935836792,
- "p90": 369.4399893283844,
- "p95": 372.0319867134094,
- "p99": 374.9760091304779
- },
- "roundtrip": {
- "p50": 640.064001083374,
- "p90": 646.8160152435303,
- "p95": 648.5120058059692,
- "p99": 653.6960005760193
- },
- "isolatedSum": {
- "p50": 665.8239960670471,
- "p90": 675.8719980716705,
- "p95": 679.9359917640686,
- "p99": 687.1040165424347
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 623443968,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 532.1599841117859,
- "p90": 540.7040119171143,
- "p95": 544.1280007362366,
- "p99": 549.2799878120422
- },
- "combine": {
- "p50": 637.503981590271,
- "p90": 645.5039978027344,
- "p95": 647.7760076522827,
- "p99": 653.9520025253296
- },
- "roundtrip": {
- "p50": 1141.9199705123901,
- "p90": 1154.4320583343506,
- "p95": 1160.1920127868652,
- "p99": 1180.9600591659546
- },
- "isolatedSum": {
- "p50": 1169.6639657020569,
- "p90": 1186.2080097198486,
- "p95": 1191.9040083885193,
- "p99": 1203.2319903373718
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1243805696,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 993.9200282096863,
- "p90": 1017.2799825668335,
- "p95": 1023.4240293502808,
- "p99": 1036.8319749832153
- },
- "combine": {
- "p50": 1165.0559902191162,
- "p90": 1175.3599643707275,
- "p95": 1177.9520511627197,
- "p99": 1283.2640409469604
- },
- "roundtrip": {
- "p50": 2117.6319122314453,
- "p90": 2134.848117828369,
- "p95": 2139.6799087524414,
- "p99": 2151.5839099884033
- },
- "isolatedSum": {
- "p50": 2158.9760184288025,
- "p90": 2192.639946937561,
- "p95": 2201.3760805130005,
- "p99": 2320.096015930176
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487009280,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-27ce5700",
- "identity": "h100|deepep|7168|8|384|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf",
- "colorKey": "h100_42947950",
- "comparisonKey": "4106e8f613d56fa1",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T11:16:10.577708+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_04",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16",
- "model": "Kimi-K2",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 384,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "cd50548525dafdf",
- "workloadId": "set:6:b23bc0c4b6402c69",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28287499275",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287499275",
- "createdAt": "2026-06-27T11:16:10.577708+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 111.07199639081955,
- "p90": 116.38399958610535,
- "p95": 117.8240031003952,
- "p99": 120.70400267839432
- },
- "combine": {
- "p50": 106.36799782514572,
- "p90": 107.64800012111664,
- "p95": 111.77600175142288,
- "p99": 114.656001329422
- },
- "roundtrip": {
- "p50": 195.10400295257568,
- "p90": 200.6399929523468,
- "p95": 202.2079974412918,
- "p99": 206.68800175189972
- },
- "isolatedSum": {
- "p50": 217.43999421596527,
- "p90": 224.03199970722198,
- "p95": 229.60000485181808,
- "p99": 235.36000400781631
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77514752,
- "combineLogicalBytes": 77514752,
- "fanoutMean": 5.2802734375,
- "recvTokensMax": 707,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 145.34400403499603,
- "p90": 151.10400319099426,
- "p95": 152.25599706172943,
- "p99": 155.29599785804749
- },
- "combine": {
- "p50": 149.63200688362122,
- "p90": 155.16799688339233,
- "p95": 155.71199357509613,
- "p99": 156.76799416542053
- },
- "roundtrip": {
- "p50": 268.12800765037537,
- "p90": 272.99201488494873,
- "p95": 274.6880054473877,
- "p99": 278.78400683403015
- },
- "isolatedSum": {
- "p50": 294.97601091861725,
- "p90": 306.2720000743866,
- "p95": 307.96799063682556,
- "p99": 312.063992023468
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 154570752,
- "combineLogicalBytes": 154570752,
- "fanoutMean": 5.2646484375,
- "recvTokensMax": 1391,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 199.42399859428406,
- "p90": 204.41600680351257,
- "p95": 207.87200331687927,
- "p99": 219.93599832057953
- },
- "combine": {
- "p50": 228.99200022220612,
- "p90": 234.6239984035492,
- "p95": 236.06400191783905,
- "p99": 237.69600689411163
- },
- "roundtrip": {
- "p50": 400.89601278305054,
- "p90": 405.11998534202576,
- "p95": 406.49598836898804,
- "p99": 409.88799929618835
- },
- "isolatedSum": {
- "p50": 428.4159988164902,
- "p90": 439.04000520706177,
- "p95": 443.9360052347183,
- "p99": 457.63200521469116
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 309772288,
- "combineLogicalBytes": 309772288,
- "fanoutMean": 5.275390625,
- "recvTokensMax": 2754,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 304.86398935317993,
- "p90": 336.38399839401245,
- "p95": 347.51999378204346,
- "p99": 513.0239725112915
- },
- "combine": {
- "p50": 366.33598804473877,
- "p90": 372.0960021018982,
- "p95": 374.91199374198914,
- "p99": 474.7520089149475
- },
- "roundtrip": {
- "p50": 644.0640091896057,
- "p90": 650.9439945220947,
- "p95": 653.823971748352,
- "p99": 658.5919857025146
- },
- "isolatedSum": {
- "p50": 671.1999773979187,
- "p90": 708.4800004959106,
- "p95": 722.4319875240326,
- "p99": 987.775981426239
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 619501568,
- "combineLogicalBytes": 619501568,
- "fanoutMean": 5.2750244140625,
- "recvTokensMax": 5469,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 524.5440006256104,
- "p90": 540.8959984779358,
- "p95": 543.7120199203491,
- "p99": 551.6800284385681
- },
- "combine": {
- "p50": 630.8159828186035,
- "p90": 639.8720145225525,
- "p95": 642.4000263214111,
- "p99": 648.1599807739258
- },
- "roundtrip": {
- "p50": 1125.1519918441772,
- "p90": 1138.8800144195557,
- "p95": 1142.2719955444336,
- "p99": 1151.6799926757812
- },
- "isolatedSum": {
- "p50": 1155.3599834442139,
- "p90": 1180.7680130004883,
- "p95": 1186.1120462417603,
- "p99": 1199.840009212494
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1239375872,
- "combineLogicalBytes": 1239375872,
- "fanoutMean": 5.276611328125,
- "recvTokensMax": 10883,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1012.2560262680054,
- "p90": 1040.9280061721802,
- "p95": 1049.504041671753,
- "p99": 1060.0320100784302
- },
- "combine": {
- "p50": 1154.3359756469727,
- "p90": 1163.2000207901,
- "p95": 1166.8479442596436,
- "p99": 1173.7279891967773
- },
- "roundtrip": {
- "p50": 2117.1839237213135,
- "p90": 2141.8559551239014,
- "p95": 2147.104024887085,
- "p99": 2157.1199893951416
- },
- "isolatedSum": {
- "p50": 2166.592001914978,
- "p90": 2204.1280269622803,
- "p95": 2216.3519859313965,
- "p99": 2233.7599992752075
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2479669248,
- "combineLogicalBytes": 2479669248,
- "fanoutMean": 5.278564453125,
- "recvTokensMax": 21730,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-32c90de8",
- "identity": "h100|deepep|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf",
- "colorKey": "h100_ff7906f8",
- "comparisonKey": "db866d0065c2a509",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:51:05.825406+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_05",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16",
- "model": "Kimi-K2",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 384,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "cd50548525dafdf",
- "workloadId": "set:6:b23bc0c4b6402c69",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271671786",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271671786",
- "createdAt": "2026-06-26T23:51:05.825406+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 112.96000331640244,
- "p90": 118.78400295972824,
- "p95": 120.28799951076508,
- "p99": 130.40000200271606
- },
- "combine": {
- "p50": 106.1440035700798,
- "p90": 109.15199667215347,
- "p95": 110.30399799346924,
- "p99": 114.49600011110306
- },
- "roundtrip": {
- "p50": 196.99199497699738,
- "p90": 201.34399831295013,
- "p95": 202.94399559497833,
- "p99": 206.04799687862396
- },
- "isolatedSum": {
- "p50": 219.10400688648224,
- "p90": 227.9359996318817,
- "p95": 230.5919975042343,
- "p99": 244.89600211381912
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77514752,
- "combineLogicalBytes": 77514752,
- "fanoutMean": 5.2802734375,
- "recvTokensMax": 707,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 149.21599626541138,
- "p90": 155.03999590873718,
- "p95": 157.05600380897522,
- "p99": 159.4880074262619
- },
- "combine": {
- "p50": 153.50399911403656,
- "p90": 158.62399339675903,
- "p95": 160.25599837303162,
- "p99": 165.15199840068817
- },
- "roundtrip": {
- "p50": 270.3999876976013,
- "p90": 284.0000092983246,
- "p95": 285.69599986076355,
- "p99": 288.9600098133087
- },
- "isolatedSum": {
- "p50": 302.71999537944794,
- "p90": 313.6639893054962,
- "p95": 317.31200218200684,
- "p99": 324.6400058269501
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 154570752,
- "combineLogicalBytes": 154570752,
- "fanoutMean": 5.2646484375,
- "recvTokensMax": 1391,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 201.92000269889832,
- "p90": 212.5760018825531,
- "p95": 214.59199488162994,
- "p99": 217.8560048341751
- },
- "combine": {
- "p50": 229.5999974012375,
- "p90": 237.92000114917755,
- "p95": 241.2479966878891,
- "p99": 245.2159970998764
- },
- "roundtrip": {
- "p50": 404.2240083217621,
- "p90": 417.5359904766083,
- "p95": 419.3919897079468,
- "p99": 424.1600036621094
- },
- "isolatedSum": {
- "p50": 431.5200001001358,
- "p90": 450.49600303173065,
- "p95": 455.83999156951904,
- "p99": 463.0720019340515
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 309772288,
- "combineLogicalBytes": 309772288,
- "fanoutMean": 5.275390625,
- "recvTokensMax": 2754,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 304.83201146125793,
- "p90": 315.39198756217957,
- "p95": 317.6319897174835,
- "p99": 320.51199674606323
- },
- "combine": {
- "p50": 367.48799681663513,
- "p90": 376.96000933647156,
- "p95": 381.9200098514557,
- "p99": 392.192006111145
- },
- "roundtrip": {
- "p50": 644.7039842605591,
- "p90": 655.456006526947,
- "p95": 677.951991558075,
- "p99": 919.8399782180786
- },
- "isolatedSum": {
- "p50": 672.3200082778931,
- "p90": 692.3519968986511,
- "p95": 699.5519995689392,
- "p99": 712.7040028572083
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 619501568,
- "combineLogicalBytes": 619501568,
- "fanoutMean": 5.2750244140625,
- "recvTokensMax": 5469,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 521.2799906730652,
- "p90": 536.4800095558167,
- "p95": 540.224015712738,
- "p99": 549.3119955062866
- },
- "combine": {
- "p50": 632.4160099029541,
- "p90": 640.7679915428162,
- "p95": 643.3600187301636,
- "p99": 651.4559984207153
- },
- "roundtrip": {
- "p50": 1126.431941986084,
- "p90": 1137.8240585327148,
- "p95": 1141.5679454803467,
- "p99": 1157.6000452041626
- },
- "isolatedSum": {
- "p50": 1153.6960005760193,
- "p90": 1177.2480010986328,
- "p95": 1183.5840344429016,
- "p99": 1200.767993927002
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1239375872,
- "combineLogicalBytes": 1239375872,
- "fanoutMean": 5.276611328125,
- "recvTokensMax": 10883,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1011.2960338592529,
- "p90": 1036.895990371704,
- "p95": 1044.3840026855469,
- "p99": 1057.088017463684
- },
- "combine": {
- "p50": 1154.8160314559937,
- "p90": 1163.9360189437866,
- "p95": 1166.5279865264893,
- "p99": 1172.160029411316
- },
- "roundtrip": {
- "p50": 2122.7200031280518,
- "p90": 2144.9921131134033,
- "p95": 2150.559902191162,
- "p99": 2167.6158905029297
- },
- "isolatedSum": {
- "p50": 2166.1120653152466,
- "p90": 2200.8320093154907,
- "p95": 2210.911989212036,
- "p99": 2229.248046875
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2479669248,
- "combineLogicalBytes": 2479669248,
- "fanoutMean": 5.278564453125,
- "recvTokensMax": 21730,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-3c52549e",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39",
- "colorKey": "h100_16047c28",
- "comparisonKey": "987d0ef30063bb5c",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:59:36.290170+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_11",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · balanced",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "balanced",
- "routingLabel": "balanced",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "0a3064a2af0dd39",
- "workloadId": "set:6:2dad1a73ff872905",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271938768",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271938768",
- "createdAt": "2026-06-26T23:59:36.290170+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 127.23200023174286,
- "p90": 131.52000308036804,
- "p95": 133.08799266815186,
- "p99": 136.3839954137802
- },
- "combine": {
- "p50": 126.11199915409088,
- "p90": 130.62399625778198,
- "p95": 131.48799538612366,
- "p99": 133.98399949073792
- },
- "roundtrip": {
- "p50": 233.43999683856964,
- "p90": 236.76800727844238,
- "p95": 237.40799725055695,
- "p99": 240.4160052537918
- },
- "isolatedSum": {
- "p50": 253.34399938583374,
- "p90": 262.14399933815,
- "p95": 264.5759880542755,
- "p99": 270.3679949045181
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 117440512,
- "combineLogicalBytes": 117440512,
- "fanoutMean": 8,
- "recvTokensMax": 1024,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 180.7039976119995,
- "p90": 191.3280040025711,
- "p95": 193.08799505233765,
- "p99": 197.28000462055206
- },
- "combine": {
- "p50": 183.26400220394135,
- "p90": 190.97599387168884,
- "p95": 192.3840045928955,
- "p99": 197.66399264335632
- },
- "roundtrip": {
- "p50": 332.15999603271484,
- "p90": 344.35200691223145,
- "p95": 346.3680148124695,
- "p99": 348.83201122283936
- },
- "isolatedSum": {
- "p50": 363.96799981594086,
- "p90": 382.30399787425995,
- "p95": 385.47199964523315,
- "p99": 394.9439972639084
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 234881024,
- "combineLogicalBytes": 234881024,
- "fanoutMean": 8,
- "recvTokensMax": 2048,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 272.41599559783936,
- "p90": 284.0000092983246,
- "p95": 286.46400570869446,
- "p99": 290.1439964771271
- },
- "combine": {
- "p50": 276.2239873409271,
- "p90": 285.0880026817322,
- "p95": 286.8799865245819,
- "p99": 294.624000787735
- },
- "roundtrip": {
- "p50": 519.648015499115,
- "p90": 533.2159996032715,
- "p95": 535.1999998092651,
- "p99": 538.0480289459229
- },
- "isolatedSum": {
- "p50": 548.6399829387665,
- "p90": 569.0880119800568,
- "p95": 573.3439922332764,
- "p99": 584.7679972648621
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 469762048,
- "combineLogicalBytes": 469762048,
- "fanoutMean": 8,
- "recvTokensMax": 4096,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 450.3679871559143,
- "p90": 462.14398741722107,
- "p95": 464.2559885978699,
- "p99": 469.34399008750916
- },
- "combine": {
- "p50": 469.11999583244324,
- "p90": 477.53599286079407,
- "p95": 479.0720045566559,
- "p99": 484.0959906578064
- },
- "roundtrip": {
- "p50": 892.3839926719666,
- "p90": 904.3520092964172,
- "p95": 909.0560078620911,
- "p99": 1079.967975616455
- },
- "isolatedSum": {
- "p50": 919.4879829883575,
- "p90": 939.6799802780151,
- "p95": 943.3279931545258,
- "p99": 953.4399807453156
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 939524096,
- "combineLogicalBytes": 939524096,
- "fanoutMean": 8,
- "recvTokensMax": 8192,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 810.7200264930725,
- "p90": 828.607976436615,
- "p95": 831.3599824905396,
- "p99": 837.2480273246765
- },
- "combine": {
- "p50": 854.8160195350647,
- "p90": 863.6159896850586,
- "p95": 865.9840226173401,
- "p99": 870.3359961509705
- },
- "roundtrip": {
- "p50": 1635.583996772766,
- "p90": 1645.0239419937134,
- "p95": 1648.095965385437,
- "p99": 1656.7679643630981
- },
- "isolatedSum": {
- "p50": 1665.5360460281372,
- "p90": 1692.2239661216736,
- "p95": 1697.3440051078796,
- "p99": 1707.584023475647
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1879048192,
- "combineLogicalBytes": 1879048192,
- "fanoutMean": 8,
- "recvTokensMax": 16384,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1546.623945236206,
- "p90": 1554.0159940719604,
- "p95": 1556.3839673995972,
- "p99": 1562.559962272644
- },
- "combine": {
- "p50": 1599.552035331726,
- "p90": 1609.2480421066284,
- "p95": 1612.4800443649292,
- "p99": 1621.6000318527222
- },
- "roundtrip": {
- "p50": 3122.015953063965,
- "p90": 3132.4799060821533,
- "p95": 3136.352062225342,
- "p99": 3144.4480419158936
- },
- "isolatedSum": {
- "p50": 3146.175980567932,
- "p90": 3163.264036178589,
- "p95": 3168.8640117645264,
- "p99": 3184.159994125366
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3758096384,
- "combineLogicalBytes": 3758096384,
- "fanoutMean": 8,
- "recvTokensMax": 32768,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-05271e8a",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||9e6ac678a09f7f8",
- "colorKey": "h100_16047c28",
- "comparisonKey": "987d0ef30063bb5c",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:55:32.762651+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_15",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · balanced",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "balanced",
- "routingLabel": "balanced",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "9e6ac678a09f7f8",
- "workloadId": "set:3:2dad1a73ff872905",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271791847",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271791847",
- "createdAt": "2026-06-26T23:55:32.762651+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 141.9840008020401,
- "p90": 148.15999567508698,
- "p95": 150.43200552463531,
- "p99": 159.71200168132782
- },
- "combine": {
- "p50": 131.77600502967834,
- "p90": 138.7840062379837,
- "p95": 139.80799913406372,
- "p99": 147.07200229167938
- },
- "roundtrip": {
- "p50": 243.1039959192276,
- "p90": 250.71999430656433,
- "p95": 252.03201174736023,
- "p99": 257.9840123653412
- },
- "isolatedSum": {
- "p50": 273.76000583171844,
- "p90": 286.9440019130707,
- "p95": 290.24000465869904,
- "p99": 306.7840039730072
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 117440512,
- "combineLogicalBytes": 117440512,
- "fanoutMean": 8,
- "recvTokensMax": 1024,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 282.20799565315247,
- "p90": 291.04000329971313,
- "p95": 293.3439910411835,
- "p99": 299.3920147418976
- },
- "combine": {
- "p50": 282.71999955177307,
- "p90": 287.4560058116913,
- "p95": 288.9600098133087,
- "p99": 297.5040078163147
- },
- "roundtrip": {
- "p50": 530.239999294281,
- "p90": 536.9600057601929,
- "p95": 540.0320291519165,
- "p99": 549.3119955062866
- },
- "isolatedSum": {
- "p50": 564.9279952049255,
- "p90": 578.4960091114044,
- "p95": 582.3040008544922,
- "p99": 596.8960225582123
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 469762048,
- "combineLogicalBytes": 469762048,
- "fanoutMean": 8,
- "recvTokensMax": 4096,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 815.7439827919006,
- "p90": 825.2800107002258,
- "p95": 828.5760283470154,
- "p99": 835.0080251693726
- },
- "combine": {
- "p50": 857.9840064048767,
- "p90": 866.27197265625,
- "p95": 869.6320056915283,
- "p99": 877.8560161590576
- },
- "roundtrip": {
- "p50": 1642.5280570983887,
- "p90": 1654.5920372009277,
- "p95": 1658.944010734558,
- "p99": 1692.7039623260498
- },
- "isolatedSum": {
- "p50": 1673.7279891967773,
- "p90": 1691.5519833564758,
- "p95": 1698.2080340385437,
- "p99": 1712.8640413284302
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1879048192,
- "combineLogicalBytes": 1879048192,
- "fanoutMean": 8,
- "recvTokensMax": 16384,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-06b4b084",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9",
- "colorKey": "h100_0c515f8b",
- "comparisonKey": "e2c5b47e428e10b6",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:54:50.950252+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_04",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · balanced-rank-local",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "balanced-rank-local",
- "routingLabel": "balanced-rank-local",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "7aa44c7b86748b9",
- "workloadId": "set:3:388ff74baef05c72",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271798809",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271798809",
- "createdAt": "2026-06-26T23:54:50.950252+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 102.52799838781357,
- "p90": 106.52799904346466,
- "p95": 108.31999778747559,
- "p99": 112.44799941778183
- },
- "combine": {
- "p50": 81.31200075149536,
- "p90": 88.128000497818,
- "p95": 88.48000317811966,
- "p99": 90.4960036277771
- },
- "roundtrip": {
- "p50": 155.32800555229187,
- "p90": 160.92799603939056,
- "p95": 161.79199516773224,
- "p99": 165.40800034999847
- },
- "isolatedSum": {
- "p50": 183.83999913930893,
- "p90": 194.65599954128265,
- "p95": 196.80000096559525,
- "p99": 202.94400304555893
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 14680064,
- "combineLogicalBytes": 14680064,
- "fanoutMean": 1,
- "recvTokensMax": 128,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 124.95999783277512,
- "p90": 130.36799430847168,
- "p95": 131.9040060043335,
- "p99": 142.17600226402283
- },
- "combine": {
- "p50": 128.7039965391159,
- "p90": 130.43199479579926,
- "p95": 136.80000603199005,
- "p99": 147.67999947071075
- },
- "roundtrip": {
- "p50": 216.25599265098572,
- "p90": 220.57600319385529,
- "p95": 223.4880030155182,
- "p99": 267.8399980068207
- },
- "isolatedSum": {
- "p50": 253.66399437189102,
- "p90": 260.79998910427094,
- "p95": 268.70401203632355,
- "p99": 289.8560017347336
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 58720256,
- "combineLogicalBytes": 58720256,
- "fanoutMean": 1,
- "recvTokensMax": 512,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 205.6639939546585,
- "p90": 211.13599836826324,
- "p95": 216.48000180721283,
- "p99": 269.1200077533722
- },
- "combine": {
- "p50": 295.80798745155334,
- "p90": 300.54399371147156,
- "p95": 305.2160143852234,
- "p99": 337.3439908027649
- },
- "roundtrip": {
- "p50": 464.4800126552582,
- "p90": 471.45599126815796,
- "p95": 474.047988653183,
- "p99": 503.35997343063354
- },
- "isolatedSum": {
- "p50": 501.47198140621185,
- "p90": 511.6799920797348,
- "p95": 521.6960161924362,
- "p99": 606.4639985561371
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 234881024,
- "combineLogicalBytes": 234881024,
- "fanoutMean": 1,
- "recvTokensMax": 2048,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-4058f6f5",
- "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71",
- "colorKey": "h100_c0c0ad86",
- "comparisonKey": "252e0af9287be53d",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:59:35.979250+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_07",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · balanced+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "balanced",
- "routingLabel": "balanced+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "df54a9510825f71",
- "workloadId": "set:6:2dad1a73ff872905",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 1,
- "eplbImbalanceAfter": 1,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271942138",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271942138",
- "createdAt": "2026-06-26T23:59:35.979250+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 94.14400160312653,
- "p90": 98.01600128412247,
- "p95": 99.74399954080582,
- "p99": 103.29599678516388
- },
- "combine": {
- "p50": 83.03999900817871,
- "p90": 88.22400122880936,
- "p95": 89.15200084447861,
- "p99": 90.81599861383438
- },
- "roundtrip": {
- "p50": 157.79200196266174,
- "p90": 161.9199961423874,
- "p95": 163.5199934244156,
- "p99": 167.67999529838562
- },
- "isolatedSum": {
- "p50": 177.18400061130524,
- "p90": 186.24000251293182,
- "p95": 188.89600038528442,
- "p99": 194.11199539899826
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 29360128,
- "combineLogicalBytes": 29360128,
- "fanoutMean": 2,
- "recvTokensMax": 384,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 122.40000069141388,
- "p90": 129.95199859142303,
- "p95": 143.10400187969208,
- "p99": 173.95199835300446
- },
- "combine": {
- "p50": 104.41599786281586,
- "p90": 106.65600001811981,
- "p95": 120.51200121641159,
- "p99": 144.28800344467163
- },
- "roundtrip": {
- "p50": 198.43199849128723,
- "p90": 202.36800611019135,
- "p95": 205.1839977502823,
- "p99": 235.32800376415253
- },
- "isolatedSum": {
- "p50": 226.81599855422974,
- "p90": 236.60799860954285,
- "p95": 263.61600309610367,
- "p99": 318.2400017976761
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 58720256,
- "combineLogicalBytes": 58720256,
- "fanoutMean": 2,
- "recvTokensMax": 768,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 150.43200552463531,
- "p90": 154.7199934720993,
- "p95": 158.4320068359375,
- "p99": 386.1120045185089
- },
- "combine": {
- "p50": 141.15199446678162,
- "p90": 145.91999351978302,
- "p95": 146.55999839305878,
- "p99": 147.5200057029724
- },
- "roundtrip": {
- "p50": 266.1440074443817,
- "p90": 274.9119997024536,
- "p95": 278.3679962158203,
- "p99": 286.9440019130707
- },
- "isolatedSum": {
- "p50": 291.58399999141693,
- "p90": 300.6399869918823,
- "p95": 304.9920052289963,
- "p99": 533.6320102214813
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 117440512,
- "combineLogicalBytes": 117440512,
- "fanoutMean": 2,
- "recvTokensMax": 1536,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 207.61600136756897,
- "p90": 213.44000101089478,
- "p95": 217.98400580883026,
- "p99": 245.5040067434311
- },
- "combine": {
- "p50": 219.93599832057953,
- "p90": 225.0880002975464,
- "p95": 227.2000014781952,
- "p99": 244.86400187015533
- },
- "roundtrip": {
- "p50": 405.023992061615,
- "p90": 410.0480079650879,
- "p95": 412.31998801231384,
- "p99": 437.6640021800995
- },
- "isolatedSum": {
- "p50": 427.5519996881485,
- "p90": 438.52800130844116,
- "p95": 445.18400728702545,
- "p99": 490.3680086135864
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 234881024,
- "combineLogicalBytes": 234881024,
- "fanoutMean": 2,
- "recvTokensMax": 3072,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 329.47200536727905,
- "p90": 336.60799264907837,
- "p95": 339.04001116752625,
- "p99": 460.4159891605377
- },
- "combine": {
- "p50": 368.3199882507324,
- "p90": 375.2639889717102,
- "p95": 377.6960074901581,
- "p99": 383.07198882102966
- },
- "roundtrip": {
- "p50": 670.0159907341003,
- "p90": 675.8400201797485,
- "p95": 678.3360242843628,
- "p99": 682.3359727859497
- },
- "isolatedSum": {
- "p50": 697.7919936180115,
- "p90": 711.8719816207886,
- "p95": 716.7360186576843,
- "p99": 843.4879779815674
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 469762048,
- "combineLogicalBytes": 469762048,
- "fanoutMean": 2,
- "recvTokensMax": 6144,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 580.8960199356079,
- "p90": 590.1119709014893,
- "p95": 592.6079750061035,
- "p99": 597.5040197372437
- },
- "combine": {
- "p50": 647.9039788246155,
- "p90": 655.0719738006592,
- "p95": 657.2480201721191,
- "p99": 660.863995552063
- },
- "roundtrip": {
- "p50": 1207.4559926986694,
- "p90": 1217.087984085083,
- "p95": 1224.0639925003052,
- "p99": 1241.312026977539
- },
- "isolatedSum": {
- "p50": 1228.7999987602234,
- "p90": 1245.1839447021484,
- "p95": 1249.8559951782227,
- "p99": 1258.3680152893066
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 939524096,
- "combineLogicalBytes": 939524096,
- "fanoutMean": 2,
- "recvTokensMax": 12288,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-b89c63a5",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||38fd0bcf7109c32",
- "colorKey": "h100_b654f9b2",
- "comparisonKey": "37db9a5137981152",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:55:36.358305+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_13",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · hotspot-single",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "hotspot-single",
- "routingLabel": "hotspot-single",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "38fd0bcf7109c32",
- "workloadId": "set:3:b952d4a43d688b50",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271820121",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271820121",
- "createdAt": "2026-06-26T23:55:36.358305+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 123.61600250005722,
- "p90": 127.48800218105316,
- "p95": 131.1040073633194,
- "p99": 136.19199395179749
- },
- "combine": {
- "p50": 116.95999652147293,
- "p90": 122.46400117874146,
- "p95": 124.95999783277512,
- "p99": 131.26400113105774
- },
- "roundtrip": {
- "p50": 217.72800385951996,
- "p90": 224.89599883556366,
- "p95": 229.24800217151642,
- "p99": 245.37600576877594
- },
- "isolatedSum": {
- "p50": 240.57599902153015,
- "p90": 249.95200335979462,
- "p95": 256.0640051960945,
- "p99": 267.4559950828552
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 78102528,
- "combineLogicalBytes": 78102528,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 1024,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 238.91200125217438,
- "p90": 257.24801421165466,
- "p95": 259.2960000038147,
- "p99": 261.9520127773285
- },
- "combine": {
- "p50": 271.93599939346313,
- "p90": 282.1759879589081,
- "p95": 284.8320007324219,
- "p99": 288.5119915008545
- },
- "roundtrip": {
- "p50": 486.04801297187805,
- "p90": 500.8959770202637,
- "p95": 503.55201959609985,
- "p99": 509.2160105705261
- },
- "isolatedSum": {
- "p50": 510.8480006456375,
- "p90": 539.4240021705627,
- "p95": 544.1280007362366,
- "p99": 550.464004278183
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 311091200,
- "combineLogicalBytes": 311091200,
- "fanoutMean": 5.2978515625,
- "recvTokensMax": 4096,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 718.3039784431458,
- "p90": 732.3840260505676,
- "p95": 736.3520264625549,
- "p99": 740.4159903526306
- },
- "combine": {
- "p50": 829.9520015716553,
- "p90": 838.047981262207,
- "p95": 840.2559757232666,
- "p99": 846.6879725456238
- },
- "roundtrip": {
- "p50": 1516.2559747695923,
- "p90": 1525.3759622573853,
- "p95": 1528.223991394043,
- "p99": 1535.2319478988647
- },
- "isolatedSum": {
- "p50": 1548.255980014801,
- "p90": 1570.4320073127747,
- "p95": 1576.6080021858215,
- "p99": 1587.1039628982544
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1241511936,
- "combineLogicalBytes": 1241511936,
- "fanoutMean": 5.28570556640625,
- "recvTokensMax": 16384,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-fa73d33e",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c",
- "colorKey": "h100_b654f9b2",
- "comparisonKey": "37db9a5137981152",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:01:55.460957+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_13",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · hotspot-single",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "hotspot-single",
- "routingLabel": "hotspot-single",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "bfbb64a166e9f1c",
- "workloadId": "set:6:b952d4a43d688b50",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272012738",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272012738",
- "createdAt": "2026-06-27T00:01:55.460957+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 129.08799946308136,
- "p90": 131.42399489879608,
- "p95": 132.03200697898865,
- "p99": 135.903999209404
- },
- "combine": {
- "p50": 119.87199634313583,
- "p90": 121.98399752378464,
- "p95": 122.36800044775009,
- "p99": 125.72799623012543
- },
- "roundtrip": {
- "p50": 219.200000166893,
- "p90": 223.80800545215607,
- "p95": 224.7679978609085,
- "p99": 228.0000001192093
- },
- "isolatedSum": {
- "p50": 248.9599958062172,
- "p90": 253.40799242258072,
- "p95": 254.40000742673874,
- "p99": 261.6319954395294
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 78102528,
- "combineLogicalBytes": 78102528,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 1024,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 163.96799683570862,
- "p90": 168.19199919700623,
- "p95": 170.43200135231018,
- "p99": 173.12000691890717
- },
- "combine": {
- "p50": 171.55200242996216,
- "p90": 176.83200538158417,
- "p95": 178.3680021762848,
- "p99": 180.60800433158875
- },
- "roundtrip": {
- "p50": 306.7840039730072,
- "p90": 310.94399094581604,
- "p95": 312.3199939727783,
- "p99": 314.7839903831482
- },
- "isolatedSum": {
- "p50": 335.5199992656708,
- "p90": 345.0240045785904,
- "p95": 348.80000352859497,
- "p99": 353.7280112504959
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 156090368,
- "combineLogicalBytes": 156090368,
- "fanoutMean": 5.31640625,
- "recvTokensMax": 2048,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 237.34399676322937,
- "p90": 242.11199581623077,
- "p95": 244.1920042037964,
- "p99": 248.28800559043884
- },
- "combine": {
- "p50": 268.22400093078613,
- "p90": 273.53599667549133,
- "p95": 274.84801411628723,
- "p99": 277.69601345062256
- },
- "roundtrip": {
- "p50": 482.7519953250885,
- "p90": 488.44799399375916,
- "p95": 490.4319941997528,
- "p99": 495.07200717926025
- },
- "isolatedSum": {
- "p50": 505.5679976940155,
- "p90": 515.6479924917221,
- "p95": 519.0400183200836,
- "p99": 525.9840190410614
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 311091200,
- "combineLogicalBytes": 311091200,
- "fanoutMean": 5.2978515625,
- "recvTokensMax": 4096,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 391.61598682403564,
- "p90": 397.5679874420166,
- "p95": 399.9040126800537,
- "p99": 407.1039855480194
- },
- "combine": {
- "p50": 455.6480050086975,
- "p90": 461.5359902381897,
- "p95": 463.0720019340515,
- "p99": 466.5600061416626
- },
- "roundtrip": {
- "p50": 823.2960104942322,
- "p90": 829.5040130615234,
- "p95": 831.5839767456055,
- "p99": 835.4560136795044
- },
- "isolatedSum": {
- "p50": 847.2639918327332,
- "p90": 859.1039776802063,
- "p95": 862.9760146141052,
- "p99": 873.663991689682
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 620648448,
- "combineLogicalBytes": 620648448,
- "fanoutMean": 5.2847900390625,
- "recvTokensMax": 8192,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 720.6720113754272,
- "p90": 733.6320281028748,
- "p95": 737.5680208206177,
- "p99": 744.9280023574829
- },
- "combine": {
- "p50": 825.7279992103577,
- "p90": 834.559977054596,
- "p95": 837.3759984970093,
- "p99": 841.2479758262634
- },
- "roundtrip": {
- "p50": 1514.240026473999,
- "p90": 1523.7120389938354,
- "p95": 1526.6239643096924,
- "p99": 1534.3999862670898
- },
- "isolatedSum": {
- "p50": 1546.400010585785,
- "p90": 1568.1920051574707,
- "p95": 1574.944019317627,
- "p99": 1586.1759781837463
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1241511936,
- "combineLogicalBytes": 1241511936,
- "fanoutMean": 5.28570556640625,
- "recvTokensMax": 16384,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1379.1359663009644,
- "p90": 1390.1439905166626,
- "p95": 1393.280029296875,
- "p99": 1400.480031967163
- },
- "combine": {
- "p50": 1540.5759811401367,
- "p90": 1547.4879741668701,
- "p95": 1549.7599840164185,
- "p99": 1553.1519651412964
- },
- "roundtrip": {
- "p50": 2893.3119773864746,
- "p90": 2902.30393409729,
- "p95": 2905.695915222168,
- "p99": 2912.480115890503
- },
- "isolatedSum": {
- "p50": 2919.711947441101,
- "p90": 2937.6319646835327,
- "p95": 2943.0400133132935,
- "p99": 2953.6319971084595
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2484242432,
- "combineLogicalBytes": 2484242432,
- "fanoutMean": 5.288299560546875,
- "recvTokensMax": 32768,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-e91dfe75",
- "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8",
- "colorKey": "h100_456a963c",
- "comparisonKey": "54b53207b090a644",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:01:57.841646+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_17",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · hotspot-single+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "hotspot-single",
- "routingLabel": "hotspot-single+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "29ae5ace13636f8",
- "workloadId": "set:6:b952d4a43d688b50",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 1.8466796875,
- "eplbImbalanceAfter": 1.0002700343276514,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272016505",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272016505",
- "createdAt": "2026-06-27T00:01:57.841646+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 109.69600081443787,
- "p90": 113.98400366306305,
- "p95": 115.77600240707397,
- "p99": 122.43200093507767
- },
- "combine": {
- "p50": 105.50399869680405,
- "p90": 111.10399663448334,
- "p95": 112.31999844312668,
- "p99": 114.27199840545654
- },
- "roundtrip": {
- "p50": 196.6720074415207,
- "p90": 203.2960057258606,
- "p95": 204.0960043668747,
- "p99": 207.64799416065216
- },
- "isolatedSum": {
- "p50": 215.1999995112419,
- "p90": 225.0880002975464,
- "p95": 228.09600085020065,
- "p99": 236.7039993405342
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77701120,
- "combineLogicalBytes": 77701120,
- "fanoutMean": 5.29296875,
- "recvTokensMax": 697,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 144.44799721240997,
- "p90": 149.63200688362122,
- "p95": 151.2320041656494,
- "p99": 155.83999454975128
- },
- "combine": {
- "p50": 152.0639955997467,
- "p90": 153.60000729560852,
- "p95": 154.4640064239502,
- "p99": 158.52800011634827
- },
- "roundtrip": {
- "p50": 265.0560140609741,
- "p90": 268.92799139022827,
- "p95": 270.687997341156,
- "p99": 273.21600914001465
- },
- "isolatedSum": {
- "p50": 296.5119928121567,
- "p90": 303.23201417922974,
- "p95": 305.6960105895996,
- "p99": 314.36799466609955
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 155187200,
- "combineLogicalBytes": 155187200,
- "fanoutMean": 5.28564453125,
- "recvTokensMax": 1372,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 199.64799284934998,
- "p90": 203.45599949359894,
- "p95": 204.79999482631683,
- "p99": 208.76799523830414
- },
- "combine": {
- "p50": 228.5120040178299,
- "p90": 234.23999547958374,
- "p95": 235.167995095253,
- "p99": 236.95999383926392
- },
- "roundtrip": {
- "p50": 403.80799770355225,
- "p90": 408.35198760032654,
- "p95": 410.0799858570099,
- "p99": 413.88800740242004
- },
- "isolatedSum": {
- "p50": 428.15999686717987,
- "p90": 437.6959949731827,
- "p95": 439.9679899215698,
- "p99": 445.72798907756805
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 311162880,
- "combineLogicalBytes": 311162880,
- "fanoutMean": 5.299072265625,
- "recvTokensMax": 2761,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 305.4080009460449,
- "p90": 310.016006231308,
- "p95": 311.7760121822357,
- "p99": 316.76799058914185
- },
- "combine": {
- "p50": 367.19998717308044,
- "p90": 374.0159869194031,
- "p95": 375.5199909210205,
- "p99": 379.2960047721863
- },
- "roundtrip": {
- "p50": 649.1199731826782,
- "p90": 655.6479930877686,
- "p95": 658.4640145301819,
- "p99": 661.9840264320374
- },
- "isolatedSum": {
- "p50": 672.6079881191254,
- "p90": 684.0319931507111,
- "p95": 687.2960031032562,
- "p99": 696.0639953613281
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 619974656,
- "combineLogicalBytes": 619974656,
- "fanoutMean": 5.279052734375,
- "recvTokensMax": 5481,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 528.8640260696411,
- "p90": 539.3919944763184,
- "p95": 543.8079833984375,
- "p99": 805.9520125389099
- },
- "combine": {
- "p50": 633.184015750885,
- "p90": 640.9919857978821,
- "p95": 643.9039707183838,
- "p99": 648.5440135002136
- },
- "roundtrip": {
- "p50": 1132.032036781311,
- "p90": 1143.8720226287842,
- "p95": 1147.3920345306396,
- "p99": 1154.8160314559937
- },
- "isolatedSum": {
- "p50": 1162.0480418205261,
- "p90": 1180.3839802742004,
- "p95": 1187.7119541168213,
- "p99": 1454.4960260391235
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1240020992,
- "combineLogicalBytes": 1240020992,
- "fanoutMean": 5.27935791015625,
- "recvTokensMax": 10883,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 985.8880043029785,
- "p90": 1005.5680274963379,
- "p95": 1010.9119415283203,
- "p99": 1020.5440521240234
- },
- "combine": {
- "p50": 1144.1919803619385,
- "p90": 1153.92005443573,
- "p95": 1157.439947128296,
- "p99": 1163.6799573898315
- },
- "roundtrip": {
- "p50": 2094.464063644409,
- "p90": 2109.8880767822266,
- "p95": 2115.295886993408,
- "p99": 2124.5760917663574
- },
- "isolatedSum": {
- "p50": 2130.079984664917,
- "p90": 2159.488081932068,
- "p95": 2168.351888656616,
- "p99": 2184.224009513855
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480414720,
- "combineLogicalBytes": 2480414720,
- "fanoutMean": 5.2801513671875,
- "recvTokensMax": 21702,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-f8095d72",
- "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d",
- "colorKey": "h100_fb5b86de",
- "comparisonKey": "cd6da73322e03923",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:59:17.404659+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_04",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · uniform+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "uniform",
- "routingLabel": "uniform+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "2225dbbdab9bf2d",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 1.006072998046875,
- "eplbImbalanceAfter": 1.0000152587890625,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271927356",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271927356",
- "createdAt": "2026-06-26T23:59:17.404659+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 113.02399635314941,
- "p90": 118.40000003576279,
- "p95": 120.12799829244614,
- "p99": 126.0479986667633
- },
- "combine": {
- "p50": 105.66399991512299,
- "p90": 108.89600217342377,
- "p95": 112.06399649381638,
- "p99": 115.9679964184761
- },
- "roundtrip": {
- "p50": 195.8719938993454,
- "p90": 201.24800503253937,
- "p95": 202.62399315834045,
- "p99": 207.39200711250305
- },
- "isolatedSum": {
- "p50": 218.6879962682724,
- "p90": 227.29600220918655,
- "p95": 232.1919947862625,
- "p99": 242.0159950852394
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77041664,
- "combineLogicalBytes": 77041664,
- "fanoutMean": 5.248046875,
- "recvTokensMax": 686,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 146.97599411010742,
- "p90": 150.91200172901154,
- "p95": 151.7760008573532,
- "p99": 155.39200603961945
- },
- "combine": {
- "p50": 148.3519971370697,
- "p90": 153.82400155067444,
- "p95": 154.4959992170334,
- "p99": 156.67200088500977
- },
- "roundtrip": {
- "p50": 265.9200131893158,
- "p90": 270.9760069847107,
- "p95": 273.1199860572815,
- "p99": 278.4000039100647
- },
- "isolatedSum": {
- "p50": 295.3279912471771,
- "p90": 304.736003279686,
- "p95": 306.2720000743866,
- "p99": 312.0640069246292
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 154542080,
- "combineLogicalBytes": 154542080,
- "fanoutMean": 5.263671875,
- "recvTokensMax": 1365,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 200.32000541687012,
- "p90": 203.3279985189438,
- "p95": 204.57600057125092,
- "p99": 208.28799903392792
- },
- "combine": {
- "p50": 229.8559993505478,
- "p90": 235.4239970445633,
- "p95": 236.4480048418045,
- "p99": 237.98400163650513
- },
- "roundtrip": {
- "p50": 402.46400237083435,
- "p90": 407.9360067844391,
- "p95": 410.0480079650879,
- "p99": 413.1839871406555
- },
- "isolatedSum": {
- "p50": 430.1760047674179,
- "p90": 438.7519955635071,
- "p95": 441.0240054130554,
- "p99": 446.27200067043304
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 310589440,
- "combineLogicalBytes": 310589440,
- "fanoutMean": 5.289306640625,
- "recvTokensMax": 2746,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 303.51999402046204,
- "p90": 308.6720108985901,
- "p95": 310.2720081806183,
- "p99": 315.8400058746338
- },
- "combine": {
- "p50": 366.3040101528168,
- "p90": 374.33600425720215,
- "p95": 375.99998712539673,
- "p99": 380.0320029258728
- },
- "roundtrip": {
- "p50": 643.9679861068726,
- "p90": 650.9119868278503,
- "p95": 653.4720063209534,
- "p99": 656.9280028343201
- },
- "isolatedSum": {
- "p50": 669.8240041732788,
- "p90": 683.0080151557922,
- "p95": 686.271995306015,
- "p99": 695.8720088005066
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 619171840,
- "combineLogicalBytes": 619171840,
- "fanoutMean": 5.272216796875,
- "recvTokensMax": 5467,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 526.6559720039368,
- "p90": 533.8879823684692,
- "p95": 536.0000133514404,
- "p99": 542.4000024795532
- },
- "combine": {
- "p50": 628.607988357544,
- "p90": 636.5759968757629,
- "p95": 639.3600106239319,
- "p99": 643.455982208252
- },
- "roundtrip": {
- "p50": 1128.5760402679443,
- "p90": 1137.984037399292,
- "p95": 1141.5679454803467,
- "p99": 1146.1759805679321
- },
- "isolatedSum": {
- "p50": 1155.2639603614807,
- "p90": 1170.4639792442322,
- "p95": 1175.3600239753723,
- "p99": 1185.8559846878052
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1238945792,
- "combineLogicalBytes": 1238945792,
- "fanoutMean": 5.2747802734375,
- "recvTokensMax": 10913,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1018.4320211410522,
- "p90": 1046.496033668518,
- "p95": 1056.1920404434204,
- "p99": 1073.5039710998535
- },
- "combine": {
- "p50": 1148.5120058059692,
- "p90": 1156.3199758529663,
- "p95": 1158.784031867981,
- "p99": 1164.031982421875
- },
- "roundtrip": {
- "p50": 2113.408088684082,
- "p90": 2138.5281085968018,
- "p95": 2143.807888031006,
- "p99": 2155.679941177368
- },
- "isolatedSum": {
- "p50": 2166.9440269470215,
- "p90": 2202.8160095214844,
- "p95": 2214.9760723114014,
- "p99": 2237.5359535217285
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2481747968,
- "combineLogicalBytes": 2481747968,
- "fanoutMean": 5.282989501953125,
- "recvTokensMax": 21789,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-ff5c49bb",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||4caecd33bedf786",
- "colorKey": "h100_aa268d13",
- "comparisonKey": "927a6d7282665742",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:55:17.079494+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_02",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · zipf",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "zipf",
- "routingLabel": "zipf",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "4caecd33bedf786",
- "workloadId": "set:3:830e36e88869e222",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271806404",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271806404",
- "createdAt": "2026-06-26T23:55:17.079494+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 125.37600100040436,
- "p90": 131.9040060043335,
- "p95": 132.7359974384308,
- "p99": 137.08800077438354
- },
- "combine": {
- "p50": 113.0559965968132,
- "p90": 114.04799669981003,
- "p95": 114.56000059843063,
- "p99": 120.67200243473053
- },
- "roundtrip": {
- "p50": 216.2880003452301,
- "p90": 219.67999637126923,
- "p95": 221.15199267864227,
- "p99": 226.17599368095398
- },
- "isolatedSum": {
- "p50": 238.43199759721756,
- "p90": 245.95200270414352,
- "p95": 247.29599803686142,
- "p99": 257.7600032091141
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 49946624,
- "combineLogicalBytes": 49946624,
- "fanoutMean": 3.40234375,
- "recvTokensMax": 1022,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 239.42400515079498,
- "p90": 255.5519938468933,
- "p95": 258.14399123191833,
- "p99": 261.9200050830841
- },
- "combine": {
- "p50": 267.07199215888977,
- "p90": 276.63999795913696,
- "p95": 277.536004781723,
- "p99": 279.90400791168213
- },
- "roundtrip": {
- "p50": 476.22400522232056,
- "p90": 492.3520088195801,
- "p95": 495.03999948501587,
- "p99": 499.55201148986816
- },
- "isolatedSum": {
- "p50": 506.49599730968475,
- "p90": 532.1919918060303,
- "p95": 535.6799960136414,
- "p99": 541.8240129947662
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 201678848,
- "combineLogicalBytes": 201678848,
- "fanoutMean": 3.4345703125,
- "recvTokensMax": 4094,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 677.183985710144,
- "p90": 691.3599967956543,
- "p95": 694.8800086975098,
- "p99": 701.2479901313782
- },
- "combine": {
- "p50": 816.2879943847656,
- "p90": 828.607976436615,
- "p95": 832.5759768486023,
- "p99": 837.8239870071411
- },
- "roundtrip": {
- "p50": 1460.4159593582153,
- "p90": 1474.176049232483,
- "p95": 1478.4640073776245,
- "p99": 1485.8880043029785
- },
- "isolatedSum": {
- "p50": 1493.4719800949097,
- "p90": 1519.9679732322693,
- "p95": 1527.455985546112,
- "p99": 1539.0719771385193
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 808822784,
- "combineLogicalBytes": 808822784,
- "fanoutMean": 3.44354248046875,
- "recvTokensMax": 16380,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-f5264491",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86",
- "colorKey": "h100_aa268d13",
- "comparisonKey": "927a6d7282665742",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:00:04.176924+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_08",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · zipf",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "zipf",
- "routingLabel": "zipf",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "b5217e990b95f86",
- "workloadId": "set:6:830e36e88869e222",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271951888",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271951888",
- "createdAt": "2026-06-27T00:00:04.176924+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 124.70400333404541,
- "p90": 128.86400520801544,
- "p95": 131.071999669075,
- "p99": 132.9600065946579
- },
- "combine": {
- "p50": 112.5119999051094,
- "p90": 114.01599645614624,
- "p95": 114.3679991364479,
- "p99": 116.5120005607605
- },
- "roundtrip": {
- "p50": 216.22399985790253,
- "p90": 219.90400552749634,
- "p95": 221.02400660514832,
- "p99": 223.90399873256683
- },
- "isolatedSum": {
- "p50": 237.21600323915482,
- "p90": 242.88000166416168,
- "p95": 245.43999880552292,
- "p99": 249.4720071554184
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 49946624,
- "combineLogicalBytes": 49946624,
- "fanoutMean": 3.40234375,
- "recvTokensMax": 1022,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 164.57599401474,
- "p90": 167.93599724769592,
- "p95": 169.5680022239685,
- "p99": 229.15199398994446
- },
- "combine": {
- "p50": 162.6559942960739,
- "p90": 168.64000260829926,
- "p95": 169.98399794101715,
- "p99": 171.29600048065186
- },
- "roundtrip": {
- "p50": 299.80799555778503,
- "p90": 305.11999130249023,
- "p95": 306.71998858451843,
- "p99": 308.9919984340668
- },
- "isolatedSum": {
- "p50": 327.2319883108139,
- "p90": 336.5759998559952,
- "p95": 339.55200016498566,
- "p99": 400.4479944705963
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 100509696,
- "combineLogicalBytes": 100509696,
- "fanoutMean": 3.42333984375,
- "recvTokensMax": 2046,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 237.92000114917755,
- "p90": 242.3039972782135,
- "p95": 244.4159984588623,
- "p99": 250.14400482177734
- },
- "combine": {
- "p50": 260.9280049800873,
- "p90": 265.6640112400055,
- "p95": 267.67998933792114,
- "p99": 272.7360129356384
- },
- "roundtrip": {
- "p50": 471.77600860595703,
- "p90": 476.8959879875183,
- "p95": 479.2639911174774,
- "p99": 495.2000081539154
- },
- "isolatedSum": {
- "p50": 498.84800612926483,
- "p90": 507.968008518219,
- "p95": 512.0959877967834,
- "p99": 522.8800177574158
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 201678848,
- "combineLogicalBytes": 201678848,
- "fanoutMean": 3.4345703125,
- "recvTokensMax": 4094,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 377.85598635673523,
- "p90": 384.38400626182556,
- "p95": 385.8239948749542,
- "p99": 390.6880021095276
- },
- "combine": {
- "p50": 442.1760141849518,
- "p90": 447.80799746513367,
- "p95": 449.3120014667511,
- "p99": 452.86399126052856
- },
- "roundtrip": {
- "p50": 795.6799864768982,
- "p90": 803.167998790741,
- "p95": 806.3039779663086,
- "p99": 813.0559921264648
- },
- "isolatedSum": {
- "p50": 820.032000541687,
- "p90": 832.1920037269592,
- "p95": 835.1359963417053,
- "p99": 843.5519933700562
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 405035008,
- "combineLogicalBytes": 405035008,
- "fanoutMean": 3.4488525390625,
- "recvTokensMax": 8189,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 675.3919720649719,
- "p90": 690.4320120811462,
- "p95": 694.1120028495789,
- "p99": 700.8320093154907
- },
- "combine": {
- "p50": 806.1439990997314,
- "p90": 816.5119886398315,
- "p95": 818.5279965400696,
- "p99": 824.5440125465393
- },
- "roundtrip": {
- "p50": 1447.1999406814575,
- "p90": 1458.143949508667,
- "p95": 1462.5600576400757,
- "p99": 1468.991994857788
- },
- "isolatedSum": {
- "p50": 1481.5359711647034,
- "p90": 1506.9440007209778,
- "p95": 1512.6399993896484,
- "p99": 1525.37602186203
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 808822784,
- "combineLogicalBytes": 808822784,
- "fanoutMean": 3.44354248046875,
- "recvTokensMax": 16380,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1273.7280130386353,
- "p90": 1286.1759662628174,
- "p95": 1290.2400493621826,
- "p99": 1300.3519773483276
- },
- "combine": {
- "p50": 1515.6480073928833,
- "p90": 1529.1199684143066,
- "p95": 1554.6239614486694,
- "p99": 1575.2639770507812
- },
- "roundtrip": {
- "p50": 2763.0081176757812,
- "p90": 2772.9599475860596,
- "p95": 2776.3519287109375,
- "p99": 2782.464027404785
- },
- "isolatedSum": {
- "p50": 2789.3760204315186,
- "p90": 2815.295934677124,
- "p95": 2844.864010810852,
- "p99": 2875.615954399109
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1619795968,
- "combineLogicalBytes": 1619795968,
- "fanoutMean": 3.4481201171875,
- "recvTokensMax": 32761,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-f680673f",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||3dd868cb33839a3",
- "colorKey": "h100_002beb29",
- "comparisonKey": "3715210183d38757",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:55:20.108988+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_06",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · zipf-heavy",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "zipf-heavy",
- "routingLabel": "zipf-heavy",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "3dd868cb33839a3",
- "workloadId": "set:3:1ca614e23cc66be1",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271813470",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271813470",
- "createdAt": "2026-06-26T23:55:20.108988+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 117.63200163841248,
- "p90": 122.43200093507767,
- "p95": 123.55200201272964,
- "p99": 126.5919953584671
- },
- "combine": {
- "p50": 106.62399977445602,
- "p90": 112.31999844312668,
- "p95": 113.27999830245972,
- "p99": 115.9679964184761
- },
- "roundtrip": {
- "p50": 207.58399367332458,
- "p90": 211.84000372886658,
- "p95": 213.18399906158447,
- "p99": 216.35200083255768
- },
- "isolatedSum": {
- "p50": 224.2560014128685,
- "p90": 234.75199937820435,
- "p95": 236.83200031518936,
- "p99": 242.5599917769432
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 22650880,
- "combineLogicalBytes": 22650880,
- "fanoutMean": 1.54296875,
- "recvTokensMax": 1024,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 236.35199666023254,
- "p90": 249.82400238513947,
- "p95": 253.88801097869873,
- "p99": 257.02399015426636
- },
- "combine": {
- "p50": 251.583993434906,
- "p90": 259.7759962081909,
- "p95": 260.47998666763306,
- "p99": 262.2080147266388
- },
- "roundtrip": {
- "p50": 459.29598808288574,
- "p90": 472.1919894218445,
- "p95": 474.88000988960266,
- "p99": 478.5279929637909
- },
- "isolatedSum": {
- "p50": 487.93599009513855,
- "p90": 509.5999985933304,
- "p95": 514.3679976463318,
- "p99": 519.2320048809052
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 91521024,
- "combineLogicalBytes": 91521024,
- "fanoutMean": 1.55859375,
- "recvTokensMax": 4096,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 659.3279838562012,
- "p90": 669.0239906311035,
- "p95": 672.0960140228271,
- "p99": 678.4319877624512
- },
- "combine": {
- "p50": 783.456027507782,
- "p90": 794.6239709854126,
- "p95": 799.0720272064209,
- "p99": 807.6800107955933
- },
- "roundtrip": {
- "p50": 1412.6399755477905,
- "p90": 1421.8239784240723,
- "p95": 1426.0480403900146,
- "p99": 1434.0159893035889
- },
- "isolatedSum": {
- "p50": 1442.7840113639832,
- "p90": 1463.647961616516,
- "p95": 1471.168041229248,
- "p99": 1486.1119985580444
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 368062464,
- "combineLogicalBytes": 368062464,
- "fanoutMean": 1.5670166015625,
- "recvTokensMax": 16384,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-329395ff",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe",
- "colorKey": "h100_002beb29",
- "comparisonKey": "3715210183d38757",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:01:29.454209+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_15",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · zipf-heavy",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "zipf-heavy",
- "routingLabel": "zipf-heavy",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "bbcd1d9d8d1e4fe",
- "workloadId": "set:6:1ca614e23cc66be1",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271996602",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271996602",
- "createdAt": "2026-06-27T00:01:29.454209+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 115.87200313806534,
- "p90": 122.14399874210358,
- "p95": 123.10399860143661,
- "p99": 127.16799974441528
- },
- "combine": {
- "p50": 106.72000050544739,
- "p90": 111.7120012640953,
- "p95": 112.57600039243698,
- "p99": 114.46399986743927
- },
- "roundtrip": {
- "p50": 207.07200467586517,
- "p90": 210.91200411319733,
- "p95": 212.54399418830872,
- "p99": 243.52000653743744
- },
- "isolatedSum": {
- "p50": 222.59200364351273,
- "p90": 233.85600000619888,
- "p95": 235.6799989938736,
- "p99": 241.63199961185455
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 22650880,
- "combineLogicalBytes": 22650880,
- "fanoutMean": 1.54296875,
- "recvTokensMax": 1024,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 159.29600596427917,
- "p90": 166.62399470806122,
- "p95": 167.4560010433197,
- "p99": 169.21600699424744
- },
- "combine": {
- "p50": 154.65599298477173,
- "p90": 163.10399770736694,
- "p95": 163.7759953737259,
- "p99": 165.0560051202774
- },
- "roundtrip": {
- "p50": 289.44000601768494,
- "p90": 301.66399478912354,
- "p95": 303.5840094089508,
- "p99": 308.03200602531433
- },
- "isolatedSum": {
- "p50": 313.9519989490509,
- "p90": 329.72799241542816,
- "p95": 331.2319964170456,
- "p99": 334.27201211452484
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 45688832,
- "combineLogicalBytes": 45688832,
- "fanoutMean": 1.55615234375,
- "recvTokensMax": 2048,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 231.1680018901825,
- "p90": 236.4799976348877,
- "p95": 237.40799725055695,
- "p99": 240.7039999961853
- },
- "combine": {
- "p50": 252.73600220680237,
- "p90": 260.8639895915985,
- "p95": 261.8879973888397,
- "p99": 263.64800333976746
- },
- "roundtrip": {
- "p50": 461.34400367736816,
- "p90": 475.39201378822327,
- "p95": 476.639986038208,
- "p99": 479.45600748062134
- },
- "isolatedSum": {
- "p50": 483.90400409698486,
- "p90": 497.3439872264862,
- "p95": 499.29599463939667,
- "p99": 504.35200333595276
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 91521024,
- "combineLogicalBytes": 91521024,
- "fanoutMean": 1.55859375,
- "recvTokensMax": 4096,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 374.2400109767914,
- "p90": 379.040002822876,
- "p95": 381.98399543762207,
- "p99": 387.4559998512268
- },
- "combine": {
- "p50": 431.2640130519867,
- "p90": 439.8399889469147,
- "p95": 443.07199120521545,
- "p99": 446.78398966789246
- },
- "roundtrip": {
- "p50": 779.2320251464844,
- "p90": 791.3600206375122,
- "p95": 794.0160036087036,
- "p99": 801.0240197181702
- },
- "isolatedSum": {
- "p50": 805.5040240287781,
- "p90": 818.8799917697906,
- "p95": 825.0559866428375,
- "p99": 834.2399895191193
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 183916544,
- "combineLogicalBytes": 183916544,
- "fanoutMean": 1.5660400390625,
- "recvTokensMax": 8192,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 661.1520051956177,
- "p90": 676.1919856071472,
- "p95": 679.6479821205139,
- "p99": 685.9520077705383
- },
- "combine": {
- "p50": 789.9519801139832,
- "p90": 800.0959753990173,
- "p95": 803.1359910964966,
- "p99": 808.7360262870789
- },
- "roundtrip": {
- "p50": 1422.271966934204,
- "p90": 1435.1680278778076,
- "p95": 1439.1039609909058,
- "p99": 1454.367995262146
- },
- "isolatedSum": {
- "p50": 1451.1039853096008,
- "p90": 1476.2879610061646,
- "p95": 1482.7839732170105,
- "p99": 1494.6880340576172
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 368062464,
- "combineLogicalBytes": 368062464,
- "fanoutMean": 1.5670166015625,
- "recvTokensMax": 16384,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1241.8559789657593,
- "p90": 1251.871943473816,
- "p95": 1256.4799785614014,
- "p99": 1264.0639543533325
- },
- "combine": {
- "p50": 1471.4560508728027,
- "p90": 1480.1599979400635,
- "p95": 1482.6240539550781,
- "p99": 1489.8879528045654
- },
- "roundtrip": {
- "p50": 2687.9680156707764,
- "p90": 2698.848009109497,
- "p95": 2703.104019165039,
- "p99": 2708.928108215332
- },
- "isolatedSum": {
- "p50": 2713.312029838562,
- "p90": 2732.0319414138794,
- "p95": 2739.1040325164795,
- "p99": 2753.951907157898
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 734720000,
- "combineLogicalBytes": 734720000,
- "fanoutMean": 1.56402587890625,
- "recvTokensMax": 32768,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-c90a67e2",
- "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb",
- "colorKey": "h100_c44978e5",
- "comparisonKey": "6c5c69e3474ec552",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:01:29.771027+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_05",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · zipf-heavy+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "zipf-heavy",
- "routingLabel": "zipf-heavy+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "46855e7fa6754eb",
- "workloadId": "set:6:1ca614e23cc66be1",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 7.38995361328125,
- "eplbImbalanceAfter": 1.0000210716610862,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272000459",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272000459",
- "createdAt": "2026-06-27T00:01:29.771027+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 110.75200140476227,
- "p90": 114.97599631547928,
- "p95": 116.95999652147293,
- "p99": 122.01599776744843
- },
- "combine": {
- "p50": 105.92000186443329,
- "p90": 109.56799983978271,
- "p95": 111.23199760913849,
- "p99": 114.14399743080139
- },
- "roundtrip": {
- "p50": 193.1840032339096,
- "p90": 198.7520009279251,
- "p95": 200.19200444221497,
- "p99": 204.44799959659576
- },
- "isolatedSum": {
- "p50": 216.67200326919556,
- "p90": 224.543996155262,
- "p95": 228.19199413061142,
- "p99": 236.15999519824982
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 79206400,
- "combineLogicalBytes": 79206400,
- "fanoutMean": 5.3955078125,
- "recvTokensMax": 713,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 146.81600034236908,
- "p90": 151.48800611495972,
- "p95": 152.44799852371216,
- "p99": 156.80000185966492
- },
- "combine": {
- "p50": 150.62400698661804,
- "p90": 154.7520011663437,
- "p95": 155.39200603961945,
- "p99": 161.31199896335602
- },
- "roundtrip": {
- "p50": 266.59199595451355,
- "p90": 270.4640030860901,
- "p95": 271.64798974990845,
- "p99": 274.84801411628723
- },
- "isolatedSum": {
- "p50": 297.4400073289871,
- "p90": 306.2400072813034,
- "p95": 307.8400045633316,
- "p99": 318.11200082302094
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 159330304,
- "combineLogicalBytes": 159330304,
- "fanoutMean": 5.4267578125,
- "recvTokensMax": 1436,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 201.05600357055664,
- "p90": 204.70400154590607,
- "p95": 205.63200116157532,
- "p99": 209.1200053691864
- },
- "combine": {
- "p50": 227.64800488948822,
- "p90": 231.99999332427979,
- "p95": 234.17599499225616,
- "p99": 235.83999276161194
- },
- "roundtrip": {
- "p50": 403.55199575424194,
- "p90": 408.160001039505,
- "p95": 409.15200114250183,
- "p99": 411.77600622177124
- },
- "isolatedSum": {
- "p50": 428.70400846004486,
- "p90": 436.70399487018585,
- "p95": 439.8079961538315,
- "p99": 444.95999813079834
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 319535104,
- "combineLogicalBytes": 319535104,
- "fanoutMean": 5.441650390625,
- "recvTokensMax": 2897,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 309.1840147972107,
- "p90": 313.2160007953644,
- "p95": 314.62401151657104,
- "p99": 317.79199838638306
- },
- "combine": {
- "p50": 368.5440123081207,
- "p90": 374.9440014362335,
- "p95": 376.22401118278503,
- "p99": 380.7680010795593
- },
- "roundtrip": {
- "p50": 652.2560119628906,
- "p90": 658.9760184288025,
- "p95": 661.3759994506836,
- "p99": 665.2479767799377
- },
- "isolatedSum": {
- "p50": 677.7280271053314,
- "p90": 688.1600022315979,
- "p95": 690.8480226993561,
- "p99": 698.5599994659424
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 638410752,
- "combineLogicalBytes": 638410752,
- "fanoutMean": 5.43603515625,
- "recvTokensMax": 5815,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 532.6079726219177,
- "p90": 546.5599894523621,
- "p95": 550.495982170105,
- "p99": 557.7600002288818
- },
- "combine": {
- "p50": 642.5279974937439,
- "p90": 649.9519944190979,
- "p95": 652.2560119628906,
- "p99": 658.8159799575806
- },
- "roundtrip": {
- "p50": 1146.399974822998,
- "p90": 1156.9600105285645,
- "p95": 1160.9920263290405,
- "p99": 1168.511986732483
- },
- "isolatedSum": {
- "p50": 1175.1359701156616,
- "p90": 1196.51198387146,
- "p95": 1202.7519941329956,
- "p99": 1216.5759801864624
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1275144192,
- "combineLogicalBytes": 1275144192,
- "fanoutMean": 5.42889404296875,
- "recvTokensMax": 11606,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1024.351954460144,
- "p90": 1048.5440492630005,
- "p95": 1056.9599866867065,
- "p99": 1069.3119764328003
- },
- "combine": {
- "p50": 1185.9840154647827,
- "p90": 1194.1759586334229,
- "p95": 1196.5759992599487,
- "p99": 1201.5680074691772
- },
- "roundtrip": {
- "p50": 2167.520046234131,
- "p90": 2183.3600997924805,
- "p95": 2188.8959407806396,
- "p99": 2197.727918624878
- },
- "isolatedSum": {
- "p50": 2210.3359699249268,
- "p90": 2242.7200078964233,
- "p95": 2253.5359859466553,
- "p99": 2270.8799839019775
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2546374656,
- "combineLogicalBytes": 2546374656,
- "fanoutMean": 5.420562744140625,
- "recvTokensMax": 23170,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-fe520015",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428",
- "colorKey": "h100_9aa30544",
- "comparisonKey": "212a6f0661f5d2d6",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:00:29.937355+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_09",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · zipf-mild",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "zipf-mild",
- "routingLabel": "zipf-mild",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "cf93f8f6b52e428",
- "workloadId": "set:6:a224603e5a1640b8",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271965088",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271965088",
- "createdAt": "2026-06-27T00:00:29.937355+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 123.71200323104858,
- "p90": 127.6479959487915,
- "p95": 131.20000064373016,
- "p99": 133.7279975414276
- },
- "combine": {
- "p50": 113.76000195741653,
- "p90": 115.13599753379822,
- "p95": 119.48800086975098,
- "p99": 121.56800180673599
- },
- "roundtrip": {
- "p50": 214.65599536895752,
- "p90": 219.29599344730377,
- "p95": 220.12799978256226,
- "p99": 223.61600399017334
- },
- "isolatedSum": {
- "p50": 237.47200518846512,
- "p90": 242.78399348258972,
- "p95": 250.68800151348114,
- "p99": 255.2959993481636
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 70160384,
- "combineLogicalBytes": 70160384,
- "fanoutMean": 4.779296875,
- "recvTokensMax": 987,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 160.19199788570404,
- "p90": 166.4000004529953,
- "p95": 167.61599481105804,
- "p99": 170.43200135231018
- },
- "combine": {
- "p50": 169.37600076198578,
- "p90": 172.5119948387146,
- "p95": 173.40800166130066,
- "p99": 177.50400304794312
- },
- "roundtrip": {
- "p50": 299.5840013027191,
- "p90": 303.42400074005127,
- "p95": 305.1519989967346,
- "p99": 310.8479976654053
- },
- "isolatedSum": {
- "p50": 329.5679986476898,
- "p90": 338.9119952917099,
- "p95": 341.0239964723587,
- "p99": 347.9360044002533
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 140879872,
- "combineLogicalBytes": 140879872,
- "fanoutMean": 4.79833984375,
- "recvTokensMax": 1972,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 233.18399488925934,
- "p90": 239.26399648189545,
- "p95": 240.28800427913666,
- "p99": 242.94400215148926
- },
- "combine": {
- "p50": 263.5839879512787,
- "p90": 268.70399713516235,
- "p95": 270.27198672294617,
- "p99": 274.1760015487671
- },
- "roundtrip": {
- "p50": 471.71199321746826,
- "p90": 476.639986038208,
- "p95": 478.5600006580353,
- "p99": 481.3440144062042
- },
- "isolatedSum": {
- "p50": 496.767982840538,
- "p90": 507.9679936170578,
- "p95": 510.5599910020828,
- "p99": 517.1200037002563
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 282333184,
- "combineLogicalBytes": 282333184,
- "fanoutMean": 4.80810546875,
- "recvTokensMax": 3936,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 377.27999687194824,
- "p90": 383.35999846458435,
- "p95": 385.18399000167847,
- "p99": 387.84000277519226
- },
- "combine": {
- "p50": 446.30399346351624,
- "p90": 453.44001054763794,
- "p95": 455.52000403404236,
- "p99": 460.89598536491394
- },
- "roundtrip": {
- "p50": 797.0240116119385,
- "p90": 804.4800162315369,
- "p95": 807.1039915084839,
- "p99": 811.6480112075806
- },
- "isolatedSum": {
- "p50": 823.5839903354645,
- "p90": 836.8000090122223,
- "p95": 840.7039940357208,
- "p99": 848.7359881401062
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 566716416,
- "combineLogicalBytes": 566716416,
- "fanoutMean": 4.8255615234375,
- "recvTokensMax": 7855,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 676.4479875564575,
- "p90": 686.8799924850464,
- "p95": 690.5279755592346,
- "p99": 791.9679880142212
- },
- "combine": {
- "p50": 796.3520288467407,
- "p90": 808.4160089492798,
- "p95": 811.3920092582703,
- "p99": 820.5440044403076
- },
- "roundtrip": {
- "p50": 1445.5360174179077,
- "p90": 1457.311987876892,
- "p95": 1460.6399536132812,
- "p99": 1468.2879447937012
- },
- "isolatedSum": {
- "p50": 1472.8000164031982,
- "p90": 1495.2960014343262,
- "p95": 1501.9199848175049,
- "p99": 1612.5119924545288
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1132285952,
- "combineLogicalBytes": 1132285952,
- "fanoutMean": 4.8206787109375,
- "recvTokensMax": 15694,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1284.8639488220215,
- "p90": 1296.3199615478516,
- "p95": 1299.7759580612183,
- "p99": 1306.5279722213745
- },
- "combine": {
- "p50": 1503.5840272903442,
- "p90": 1517.2799825668335,
- "p95": 1524.2880582809448,
- "p99": 1540.0960445404053
- },
- "roundtrip": {
- "p50": 2760.960102081299,
- "p90": 2775.10404586792,
- "p95": 2783.936023712158,
- "p99": 2810.0481033325195
- },
- "isolatedSum": {
- "p50": 2788.4479761123657,
- "p90": 2813.599944114685,
- "p95": 2824.064016342163,
- "p99": 2846.62401676178
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2267840512,
- "combineLogicalBytes": 2267840512,
- "fanoutMean": 4.82763671875,
- "recvTokensMax": 31357,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-2b98c773",
- "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9",
- "colorKey": "h100_e8b903ea",
- "comparisonKey": "5961b4bc09451ca4",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:00:35.470349+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_16",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · zipf-mild+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "zipf-mild",
- "routingLabel": "zipf-mild+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "27ddc85ded0add9",
- "workloadId": "set:6:a224603e5a1640b8",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 2.545684814453125,
- "eplbImbalanceAfter": 1.0001495361328125,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271968791",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271968791",
- "createdAt": "2026-06-27T00:00:35.470349+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 112.41599917411804,
- "p90": 117.18399822711945,
- "p95": 118.9119964838028,
- "p99": 122.91199713945389
- },
- "combine": {
- "p50": 106.33599758148193,
- "p90": 112.12799698114395,
- "p95": 113.0559965968132,
- "p99": 114.43199962377548
- },
- "roundtrip": {
- "p50": 198.81600141525269,
- "p90": 204.03200387954712,
- "p95": 205.4080069065094,
- "p99": 207.58399367332458
- },
- "isolatedSum": {
- "p50": 218.75199675559998,
- "p90": 229.3119952082634,
- "p95": 231.967993080616,
- "p99": 237.34399676322937
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 78159872,
- "combineLogicalBytes": 78159872,
- "fanoutMean": 5.32421875,
- "recvTokensMax": 702,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 148.44800531864166,
- "p90": 151.99999511241913,
- "p95": 153.3759981393814,
- "p99": 156.3519984483719
- },
- "combine": {
- "p50": 149.47199821472168,
- "p90": 155.39200603961945,
- "p95": 159.39199924468994,
- "p99": 164.06400501728058
- },
- "roundtrip": {
- "p50": 267.4880027770996,
- "p90": 272.2879946231842,
- "p95": 274.04800057411194,
- "p99": 279.4879972934723
- },
- "isolatedSum": {
- "p50": 297.92000353336334,
- "p90": 307.3920011520386,
- "p95": 312.76799738407135,
- "p99": 320.41600346565247
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 156563456,
- "combineLogicalBytes": 156563456,
- "fanoutMean": 5.33251953125,
- "recvTokensMax": 1393,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 201.12000405788422,
- "p90": 204.48000729084015,
- "p95": 206.04799687862396,
- "p99": 212.22400665283203
- },
- "combine": {
- "p50": 229.0239930152893,
- "p90": 233.95200073719025,
- "p95": 236.4480048418045,
- "p99": 238.52799832820892
- },
- "roundtrip": {
- "p50": 404.06399965286255,
- "p90": 408.86399149894714,
- "p95": 411.0719859600067,
- "p99": 431.5840005874634
- },
- "isolatedSum": {
- "p50": 430.1439970731735,
- "p90": 438.4320080280304,
- "p95": 442.49600172042847,
- "p99": 450.75200498104095
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 312410112,
- "combineLogicalBytes": 312410112,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 2773,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 304.0960133075714,
- "p90": 309.28000807762146,
- "p95": 311.64801120758057,
- "p99": 479.5520007610321
- },
- "combine": {
- "p50": 366.11199378967285,
- "p90": 372.8959858417511,
- "p95": 374.55999851226807,
- "p99": 383.4559917449951
- },
- "roundtrip": {
- "p50": 644.0640091896057,
- "p90": 650.1439809799194,
- "p95": 652.1919965744019,
- "p99": 656.5120220184326
- },
- "isolatedSum": {
- "p50": 670.2080070972443,
- "p90": 682.1759939193726,
- "p95": 686.2080097198486,
- "p99": 863.0079925060272
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 622712832,
- "combineLogicalBytes": 622712832,
- "fanoutMean": 5.3023681640625,
- "recvTokensMax": 5498,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 521.5039849281311,
- "p90": 530.1120281219482,
- "p95": 533.3759784698486,
- "p99": 540.5120253562927
- },
- "combine": {
- "p50": 632.1920156478882,
- "p90": 639.3280029296875,
- "p95": 640.9599781036377,
- "p99": 647.2960114479065
- },
- "roundtrip": {
- "p50": 1123.9999532699585,
- "p90": 1132.8959465026855,
- "p95": 1135.807991027832,
- "p99": 1143.5840129852295
- },
- "isolatedSum": {
- "p50": 1153.6960005760193,
- "p90": 1169.4400310516357,
- "p95": 1174.3359565734863,
- "p99": 1187.8080368041992
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1245038592,
- "combineLogicalBytes": 1245038592,
- "fanoutMean": 5.30072021484375,
- "recvTokensMax": 10955,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 990.2399778366089,
- "p90": 1009.4720125198364,
- "p95": 1016.1279439926147,
- "p99": 1026.8160104751587
- },
- "combine": {
- "p50": 1164.736032485962,
- "p90": 1174.015998840332,
- "p95": 1177.2799491882324,
- "p99": 1183.9359998703003
- },
- "roundtrip": {
- "p50": 2116.895914077759,
- "p90": 2137.7599239349365,
- "p95": 2143.712043762207,
- "p99": 2157.8240394592285
- },
- "isolatedSum": {
- "p50": 2154.976010322571,
- "p90": 2183.4880113601685,
- "p95": 2193.407893180847,
- "p99": 2210.752010345459
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2489460736,
- "combineLogicalBytes": 2489460736,
- "fanoutMean": 5.299407958984375,
- "recvTokensMax": 21864,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-0a66c8a3",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86",
- "colorKey": "h100_552a4b73",
- "comparisonKey": "44cbfb11e1668dc5",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:01:00.044863+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_03",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · zipf-moderate",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "zipf-moderate",
- "routingLabel": "zipf-moderate",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "b5217e990b95f86",
- "workloadId": "set:6:6709a02c31933a9f",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271978834",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271978834",
- "createdAt": "2026-06-27T00:01:00.044863+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 124.7360035777092,
- "p90": 130.68799674510956,
- "p95": 132.03200697898865,
- "p99": 136.4479959011078
- },
- "combine": {
- "p50": 112.5119999051094,
- "p90": 114.17599767446518,
- "p95": 115.07199704647064,
- "p99": 120.67200243473053
- },
- "roundtrip": {
- "p50": 215.16799926757812,
- "p90": 219.35999393463135,
- "p95": 221.11999988555908,
- "p99": 229.18400168418884
- },
- "isolatedSum": {
- "p50": 237.2480034828186,
- "p90": 244.86399441957474,
- "p95": 247.1040040254593,
- "p99": 257.1199983358383
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 49946624,
- "combineLogicalBytes": 49946624,
- "fanoutMean": 3.40234375,
- "recvTokensMax": 1022,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 163.7440025806427,
- "p90": 167.26399958133698,
- "p95": 168.44800114631653,
- "p99": 174.6560037136078
- },
- "combine": {
- "p50": 164.51199352741241,
- "p90": 169.50400173664093,
- "p95": 170.1440066099167,
- "p99": 174.14399981498718
- },
- "roundtrip": {
- "p50": 297.91998863220215,
- "p90": 302.72001028060913,
- "p95": 304.32000756263733,
- "p99": 306.5600097179413
- },
- "isolatedSum": {
- "p50": 328.2559961080551,
- "p90": 336.7680013179779,
- "p95": 338.5920077562332,
- "p99": 348.80000352859497
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 100509696,
- "combineLogicalBytes": 100509696,
- "fanoutMean": 3.42333984375,
- "recvTokensMax": 2046,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 237.44000494480133,
- "p90": 241.82400107383728,
- "p95": 243.0720031261444,
- "p99": 247.74399399757385
- },
- "combine": {
- "p50": 264.51200246810913,
- "p90": 268.41598749160767,
- "p95": 271.5519964694977,
- "p99": 281.6320061683655
- },
- "roundtrip": {
- "p50": 475.5840003490448,
- "p90": 482.59198665618896,
- "p95": 490.30399322509766,
- "p99": 504.96000051498413
- },
- "isolatedSum": {
- "p50": 501.95200741291046,
- "p90": 510.23998856544495,
- "p95": 514.6239995956421,
- "p99": 529.3760001659393
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 201678848,
- "combineLogicalBytes": 201678848,
- "fanoutMean": 3.4345703125,
- "recvTokensMax": 4094,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 379.040002822876,
- "p90": 385.72800159454346,
- "p95": 388.2240056991577,
- "p99": 414.3359959125519
- },
- "combine": {
- "p50": 447.00801372528076,
- "p90": 452.4799883365631,
- "p95": 453.5039961338043,
- "p99": 456.89600706100464
- },
- "roundtrip": {
- "p50": 800.2240061759949,
- "p90": 805.791974067688,
- "p95": 807.744026184082,
- "p99": 811.680018901825
- },
- "isolatedSum": {
- "p50": 826.0480165481567,
- "p90": 838.2079899311066,
- "p95": 841.728001832962,
- "p99": 871.2320029735565
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 405035008,
- "combineLogicalBytes": 405035008,
- "fanoutMean": 3.4488525390625,
- "recvTokensMax": 8189,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 675.3919720649719,
- "p90": 695.6800222396851,
- "p95": 707.8400254249573,
- "p99": 910.8160138130188
- },
- "combine": {
- "p50": 819.2319869995117,
- "p90": 829.6639919281006,
- "p95": 833.2160115242004,
- "p99": 841.3439989089966
- },
- "roundtrip": {
- "p50": 1459.9679708480835,
- "p90": 1476.9599437713623,
- "p95": 1481.8559885025024,
- "p99": 1501.2799501419067
- },
- "isolatedSum": {
- "p50": 1494.6239590644836,
- "p90": 1525.3440141677856,
- "p95": 1541.0560369491577,
- "p99": 1752.1600127220154
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 808822784,
- "combineLogicalBytes": 808822784,
- "fanoutMean": 3.44354248046875,
- "recvTokensMax": 16380,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1275.10404586792,
- "p90": 1287.5200510025024,
- "p95": 1291.8399572372437,
- "p99": 1346.0479974746704
- },
- "combine": {
- "p50": 1538.7200117111206,
- "p90": 1550.3679513931274,
- "p95": 1555.232048034668,
- "p99": 1607.9360246658325
- },
- "roundtrip": {
- "p50": 2787.168025970459,
- "p90": 2798.784017562866,
- "p95": 2802.9439449310303,
- "p99": 2818.4640407562256
- },
- "isolatedSum": {
- "p50": 2813.8240575790405,
- "p90": 2837.88800239563,
- "p95": 2847.0720052719116,
- "p99": 2953.984022140503
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1619795968,
- "combineLogicalBytes": 1619795968,
- "fanoutMean": 3.4481201171875,
- "recvTokensMax": 32761,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-7114a01f",
- "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39",
- "colorKey": "h100_106a51ab",
- "comparisonKey": "80b7db884aaf5a8c",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:01:17.822701+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_10",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · zipf-moderate+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "zipf-moderate",
- "routingLabel": "zipf-moderate+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "2b57a75d27f5b39",
- "workloadId": "set:6:6709a02c31933a9f",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 4.895263671875,
- "eplbImbalanceAfter": 1.0000902811686199,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271982260",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271982260",
- "createdAt": "2026-06-27T00:01:17.822701+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 114.30399864912033,
- "p90": 120.31999975442886,
- "p95": 121.56800180673599,
- "p99": 125.02400577068329
- },
- "combine": {
- "p50": 106.27199709415436,
- "p90": 111.48799955844879,
- "p95": 111.77600175142288,
- "p99": 114.1119971871376
- },
- "roundtrip": {
- "p50": 198.0160027742386,
- "p90": 201.82399451732635,
- "p95": 203.36000621318817,
- "p99": 207.35999941825867
- },
- "isolatedSum": {
- "p50": 220.5759957432747,
- "p90": 231.80799931287766,
- "p95": 233.34400355815887,
- "p99": 239.1360029578209
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77385728,
- "combineLogicalBytes": 77385728,
- "fanoutMean": 5.271484375,
- "recvTokensMax": 691,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 144.9279934167862,
- "p90": 149.85600113868713,
- "p95": 151.45599842071533,
- "p99": 155.87200224399567
- },
- "combine": {
- "p50": 151.19999647140503,
- "p90": 154.84799444675446,
- "p95": 156.63999319076538,
- "p99": 160.73599457740784
- },
- "roundtrip": {
- "p50": 266.11199975013733,
- "p90": 271.5519964694977,
- "p95": 273.6000120639801,
- "p99": 277.1199941635132
- },
- "isolatedSum": {
- "p50": 296.1279898881912,
- "p90": 304.7039955854416,
- "p95": 308.0959916114807,
- "p99": 316.6079968214035
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 155172864,
- "combineLogicalBytes": 155172864,
- "fanoutMean": 5.28515625,
- "recvTokensMax": 1378,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 200.32000541687012,
- "p90": 204.12799715995789,
- "p95": 205.4399996995926,
- "p99": 208.38400721549988
- },
- "combine": {
- "p50": 227.58400440216064,
- "p90": 233.75999927520752,
- "p95": 234.55999791622162,
- "p99": 238.3359968662262
- },
- "roundtrip": {
- "p50": 402.0479917526245,
- "p90": 407.1039855480194,
- "p95": 408.735990524292,
- "p99": 412.06398606300354
- },
- "isolatedSum": {
- "p50": 427.90400981903076,
- "p90": 437.8879964351654,
- "p95": 439.9999976158142,
- "p99": 446.7200040817261
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 310546432,
- "combineLogicalBytes": 310546432,
- "fanoutMean": 5.28857421875,
- "recvTokensMax": 2745,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 303.16799879074097,
- "p90": 307.3920011520386,
- "p95": 308.76800417900085,
- "p99": 313.27998638153076
- },
- "combine": {
- "p50": 362.2399866580963,
- "p90": 368.76800656318665,
- "p95": 370.3039884567261,
- "p99": 372.70399928092957
- },
- "roundtrip": {
- "p50": 641.1839723587036,
- "p90": 647.9359865188599,
- "p95": 650.7520079612732,
- "p99": 656.6399931907654
- },
- "isolatedSum": {
- "p50": 665.4079854488373,
- "p90": 676.1600077152252,
- "p95": 679.0719926357269,
- "p99": 685.9839856624603
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 620619776,
- "combineLogicalBytes": 620619776,
- "fanoutMean": 5.2845458984375,
- "recvTokensMax": 5526,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 520.9919810295105,
- "p90": 531.4239859580994,
- "p95": 534.4640016555786,
- "p99": 541.1840081214905
- },
- "combine": {
- "p50": 639.3600106239319,
- "p90": 650.592029094696,
- "p95": 654.5600295066833,
- "p99": 660.4800224304199
- },
- "roundtrip": {
- "p50": 1128.864049911499,
- "p90": 1138.2720470428467,
- "p95": 1141.2479877471924,
- "p99": 1146.3040113449097
- },
- "isolatedSum": {
- "p50": 1160.3519916534424,
- "p90": 1182.0160150527954,
- "p95": 1189.024031162262,
- "p99": 1201.6640305519104
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1239175168,
- "combineLogicalBytes": 1239175168,
- "fanoutMean": 5.2757568359375,
- "recvTokensMax": 11165,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1005.5999755859375,
- "p90": 1031.7120552062988,
- "p95": 1038.3360385894775,
- "p99": 1051.103949546814
- },
- "combine": {
- "p50": 1158.9759588241577,
- "p90": 1167.8719520568848,
- "p95": 1169.9199676513672,
- "p99": 1174.6560335159302
- },
- "roundtrip": {
- "p50": 2121.5360164642334,
- "p90": 2138.2720470428467,
- "p95": 2142.6239013671875,
- "p99": 2150.0160694122314
- },
- "isolatedSum": {
- "p50": 2164.575934410095,
- "p90": 2199.5840072631836,
- "p95": 2208.2560062408447,
- "p99": 2225.759983062744
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2481604608,
- "combineLogicalBytes": 2481604608,
- "fanoutMean": 5.282684326171875,
- "recvTokensMax": 22165,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-71b6107f",
- "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39",
- "colorKey": "h100_769b9c4b",
- "comparisonKey": "24fc2cc385891299",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:00:08.090138+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_05",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 · zipf+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "zipf",
- "routingLabel": "zipf+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "2b57a75d27f5b39",
- "workloadId": "set:6:830e36e88869e222",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 4.895263671875,
- "eplbImbalanceAfter": 1.0000902811686199,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271955196",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271955196",
- "createdAt": "2026-06-27T00:00:08.090138+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 111.07199639081955,
- "p90": 115.93600362539291,
- "p95": 118.14399808645248,
- "p99": 121.08799815177917
- },
- "combine": {
- "p50": 106.08000308275223,
- "p90": 111.26399785280228,
- "p95": 112.38399893045425,
- "p99": 114.14399743080139
- },
- "roundtrip": {
- "p50": 195.68000733852386,
- "p90": 201.1840045452118,
- "p95": 202.39999890327454,
- "p99": 204.96000349521637
- },
- "isolatedSum": {
- "p50": 217.15199947357178,
- "p90": 227.2000014781952,
- "p95": 230.52799701690674,
- "p99": 235.23199558258057
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77385728,
- "combineLogicalBytes": 77385728,
- "fanoutMean": 5.271484375,
- "recvTokensMax": 691,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 144.48000490665436,
- "p90": 148.0640023946762,
- "p95": 149.6960073709488,
- "p99": 153.60000729560852
- },
- "combine": {
- "p50": 148.92800152301788,
- "p90": 154.33600544929504,
- "p95": 155.008003115654,
- "p99": 157.8879952430725
- },
- "roundtrip": {
- "p50": 262.81601190567017,
- "p90": 266.975998878479,
- "p95": 268.3199942111969,
- "p99": 272.44800329208374
- },
- "isolatedSum": {
- "p50": 293.40800642967224,
- "p90": 302.40000784397125,
- "p95": 304.7040104866028,
- "p99": 311.48800253868103
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 155172864,
- "combineLogicalBytes": 155172864,
- "fanoutMean": 5.28515625,
- "recvTokensMax": 1378,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 199.68000054359436,
- "p90": 203.42400670051575,
- "p95": 205.47200739383698,
- "p99": 222.52799570560455
- },
- "combine": {
- "p50": 227.80799865722656,
- "p90": 232.9919934272766,
- "p95": 234.3679964542389,
- "p99": 237.34399676322937
- },
- "roundtrip": {
- "p50": 399.83999729156494,
- "p90": 405.023992061615,
- "p95": 406.3040018081665,
- "p99": 414.43198919296265
- },
- "isolatedSum": {
- "p50": 427.4879992008209,
- "p90": 436.41600012779236,
- "p95": 439.84000384807587,
- "p99": 459.8719924688339
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 310546432,
- "combineLogicalBytes": 310546432,
- "fanoutMean": 5.28857421875,
- "recvTokensMax": 2745,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 303.5840094089508,
- "p90": 309.471994638443,
- "p95": 310.4960024356842,
- "p99": 313.82399797439575
- },
- "combine": {
- "p50": 362.8480136394501,
- "p90": 367.74399876594543,
- "p95": 369.6320056915283,
- "p99": 523.7119793891907
- },
- "roundtrip": {
- "p50": 640.8320069313049,
- "p90": 648.576021194458,
- "p95": 651.2960195541382,
- "p99": 733.4399819374084
- },
- "isolatedSum": {
- "p50": 666.4320230484009,
- "p90": 677.2159934043884,
- "p95": 680.1280081272125,
- "p99": 837.5359773635864
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 620619776,
- "combineLogicalBytes": 620619776,
- "fanoutMean": 5.2845458984375,
- "recvTokensMax": 5526,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 524.3200063705444,
- "p90": 533.5680246353149,
- "p95": 536.191999912262,
- "p99": 542.2080159187317
- },
- "combine": {
- "p50": 643.9039707183838,
- "p90": 653.1839966773987,
- "p95": 655.8719873428345,
- "p99": 661.1520051956177
- },
- "roundtrip": {
- "p50": 1135.2959871292114,
- "p90": 1144.8320150375366,
- "p95": 1148.4800577163696,
- "p99": 1153.92005443573
- },
- "isolatedSum": {
- "p50": 1168.2239770889282,
- "p90": 1186.7520213127136,
- "p95": 1192.0639872550964,
- "p99": 1203.3600211143494
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1239175168,
- "combineLogicalBytes": 1239175168,
- "fanoutMean": 5.2757568359375,
- "recvTokensMax": 11165,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1020.4800367355347,
- "p90": 1048.8959550857544,
- "p95": 1056.2560558319092,
- "p99": 1071.4880228042603
- },
- "combine": {
- "p50": 1164.6720170974731,
- "p90": 1173.375964164734,
- "p95": 1177.024006843567,
- "p99": 1183.135986328125
- },
- "roundtrip": {
- "p50": 2140.575885772705,
- "p90": 2157.248020172119,
- "p95": 2164.031982421875,
- "p99": 2171.4560985565186
- },
- "isolatedSum": {
- "p50": 2185.152053833008,
- "p90": 2222.2719192504883,
- "p95": 2233.280062675476,
- "p99": 2254.6240091323853
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2481604608,
- "combineLogicalBytes": 2481604608,
- "fanoutMean": 5.282684326171875,
- "recvTokensMax": 22165,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-19a8d159",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31",
- "colorKey": "h100_7b3247bf",
- "comparisonKey": "0ac8f8817cb63abb",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:30:47.651979+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_17",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 (norm)",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1818,
- "configuredUnits": 24,
- "deviceUnits": 132,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28254315809",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254315809",
- "createdAt": "2026-06-26T17:30:47.651979+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 110.46399921178818,
- "p90": 116.35199934244156,
- "p95": 117.8240031003952,
- "p99": 166.01599752902985
- },
- "combine": {
- "p50": 106.1440035700798,
- "p90": 111.51999980211258,
- "p95": 112.06399649381638,
- "p99": 114.07999694347382
- },
- "roundtrip": {
- "p50": 197.40800559520721,
- "p90": 200.9280025959015,
- "p95": 203.0400037765503,
- "p99": 206.01600408554077
- },
- "isolatedSum": {
- "p50": 216.60800278186798,
- "p90": 227.87199914455414,
- "p95": 229.88799959421158,
- "p99": 280.09599447250366
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 147.39200472831726,
- "p90": 150.68799257278442,
- "p95": 151.7760008573532,
- "p99": 154.33600544929504
- },
- "combine": {
- "p50": 145.1839953660965,
- "p90": 149.88799393177032,
- "p95": 151.67999267578125,
- "p99": 154.7199934720993
- },
- "roundtrip": {
- "p50": 262.4000012874603,
- "p90": 267.2640085220337,
- "p95": 269.27998661994934,
- "p99": 357.34400153160095
- },
- "isolatedSum": {
- "p50": 292.57600009441376,
- "p90": 300.57598650455475,
- "p95": 303.45599353313446,
- "p99": 309.05599892139435
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 155889664,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 204.92799580097198,
- "p90": 219.39200162887573,
- "p95": 221.76000475883484,
- "p99": 226.4000028371811
- },
- "combine": {
- "p50": 217.15199947357178,
- "p90": 221.3120013475418,
- "p95": 224.57599639892578,
- "p99": 227.743998169899
- },
- "roundtrip": {
- "p50": 392.60798692703247,
- "p90": 397.47199416160583,
- "p95": 400.09599924087524,
- "p99": 421.37598991394043
- },
- "isolatedSum": {
- "p50": 422.07999527454376,
- "p90": 440.70400297641754,
- "p95": 446.3360011577606,
- "p99": 454.1440010070801
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 312266752,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 319.93600726127625,
- "p90": 324.8960077762604,
- "p95": 327.1679878234863,
- "p99": 330.55999875068665
- },
- "combine": {
- "p50": 330.01598715782166,
- "p90": 335.1680040359497,
- "p95": 336.64000034332275,
- "p99": 340.2239978313446
- },
- "roundtrip": {
- "p50": 624.064028263092,
- "p90": 629.2480230331421,
- "p95": 631.6159963607788,
- "p99": 638.2399797439575
- },
- "isolatedSum": {
- "p50": 649.9519944190979,
- "p90": 660.0640118122101,
- "p95": 663.8079881668091,
- "p99": 670.7839965820312
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 623443968,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 570.9440112113953,
- "p90": 584.5119953155518,
- "p95": 589.1519784927368,
- "p99": 593.9199924468994
- },
- "combine": {
- "p50": 564.9920105934143,
- "p90": 574.3039846420288,
- "p95": 576.7999887466431,
- "p99": 583.5199952125549
- },
- "roundtrip": {
- "p50": 1105.5680513381958,
- "p90": 1120.1599836349487,
- "p95": 1124.7680187225342,
- "p99": 1134.719967842102
- },
- "isolatedSum": {
- "p50": 1135.9360218048096,
- "p90": 1158.8159799575806,
- "p95": 1165.9519672393799,
- "p99": 1177.4399876594543
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1243805696,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1075.8719444274902,
- "p90": 1088.703989982605,
- "p95": 1093.5360193252563,
- "p99": 1102.463960647583
- },
- "combine": {
- "p50": 1031.872034072876,
- "p90": 1041.3119792938232,
- "p95": 1044.4799661636353,
- "p99": 1055.359959602356
- },
- "roundtrip": {
- "p50": 2082.304000854492,
- "p90": 2096.640110015869,
- "p95": 2100.895881652832,
- "p99": 2108.031988143921
- },
- "isolatedSum": {
- "p50": 2107.743978500366,
- "p90": 2130.015969276428,
- "p95": 2138.0159854888916,
- "p99": 2157.823920249939
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487009280,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-107dd39c",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|normalized|0.18|0a3064a2af0dd39",
- "colorKey": "h100_716e65b9",
- "comparisonKey": "ea5a5b6f1b74dc9d",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:31:48.643579+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_04",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 (norm) · balanced",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "balanced",
- "routingLabel": "balanced",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1818,
- "configuredUnits": 24,
- "deviceUnits": 132,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "0a3064a2af0dd39",
- "workloadId": "set:6:2dad1a73ff872905",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28254367516",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254367516",
- "createdAt": "2026-06-26T17:31:48.643579+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 126.65599584579468,
- "p90": 131.74399733543396,
- "p95": 132.83200562000275,
- "p99": 139.80799913406372
- },
- "combine": {
- "p50": 120.4800009727478,
- "p90": 122.40000069141388,
- "p95": 124.28800016641617,
- "p99": 129.12000715732574
- },
- "roundtrip": {
- "p50": 221.40799462795258,
- "p90": 226.49599611759186,
- "p95": 227.77600586414337,
- "p99": 232.16000199317932
- },
- "isolatedSum": {
- "p50": 247.13599681854248,
- "p90": 254.14399802684784,
- "p95": 257.1200057864189,
- "p99": 268.92800629138947
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 117440512,
- "combineLogicalBytes": 117440512,
- "fanoutMean": 8,
- "recvTokensMax": 1024,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 174.04800653457642,
- "p90": 177.5359958410263,
- "p95": 179.29600179195404,
- "p99": 190.0160014629364
- },
- "combine": {
- "p50": 172.67200350761414,
- "p90": 174.52800273895264,
- "p95": 175.4239946603775,
- "p99": 180.28800189495087
- },
- "roundtrip": {
- "p50": 317.05600023269653,
- "p90": 321.3759958744049,
- "p95": 322.4320113658905,
- "p99": 326.04798674583435
- },
- "isolatedSum": {
- "p50": 346.72001004219055,
- "p90": 352.06399857997894,
- "p95": 354.71999645233154,
- "p99": 370.30400335788727
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 234881024,
- "combineLogicalBytes": 234881024,
- "fanoutMean": 8,
- "recvTokensMax": 2048,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 260.70401072502136,
- "p90": 264.41600918769836,
- "p95": 265.76000452041626,
- "p99": 269.6639895439148
- },
- "combine": {
- "p50": 255.13601303100586,
- "p90": 258.2080066204071,
- "p95": 259.5840096473694,
- "p99": 263.5520100593567
- },
- "roundtrip": {
- "p50": 489.3760085105896,
- "p90": 493.696004152298,
- "p95": 495.0079917907715,
- "p99": 498.9120066165924
- },
- "isolatedSum": {
- "p50": 515.8400237560272,
- "p90": 522.6240158081055,
- "p95": 525.3440141677856,
- "p99": 533.2159996032715
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 469762048,
- "combineLogicalBytes": 469762048,
- "fanoutMean": 8,
- "recvTokensMax": 4096,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 437.6640021800995,
- "p90": 443.7119960784912,
- "p95": 445.248007774353,
- "p99": 449.50398802757263
- },
- "combine": {
- "p50": 422.14399576187134,
- "p90": 426.07998847961426,
- "p95": 427.90400981903076,
- "p99": 431.0399889945984
- },
- "roundtrip": {
- "p50": 834.0799808502197,
- "p90": 840.3199911117554,
- "p95": 842.8159952163696,
- "p99": 852.512001991272
- },
- "isolatedSum": {
- "p50": 859.8079979419708,
- "p90": 869.7919845581055,
- "p95": 873.1520175933838,
- "p99": 880.543977022171
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 939524096,
- "combineLogicalBytes": 939524096,
- "fanoutMean": 8,
- "recvTokensMax": 8192,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 802.623987197876,
- "p90": 819.7439908981323,
- "p95": 822.3680257797241,
- "p99": 830.3359746932983
- },
- "combine": {
- "p50": 751.9360184669495,
- "p90": 759.6160173416138,
- "p95": 762.0480060577393,
- "p99": 765.5680179595947
- },
- "roundtrip": {
- "p50": 1521.9520330429077,
- "p90": 1534.208059310913,
- "p95": 1541.4400100708008,
- "p99": 1552.5120496749878
- },
- "isolatedSum": {
- "p50": 1554.5600056648254,
- "p90": 1579.360008239746,
- "p95": 1584.4160318374634,
- "p99": 1595.903992652893
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1879048192,
- "combineLogicalBytes": 1879048192,
- "fanoutMean": 8,
- "recvTokensMax": 16384,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1529.0240049362183,
- "p90": 1539.5519733428955,
- "p95": 1543.4880256652832,
- "p99": 1549.504041671753
- },
- "combine": {
- "p50": 1399.6479511260986,
- "p90": 1406.7840576171875,
- "p95": 1409.440040588379,
- "p99": 1416.767954826355
- },
- "roundtrip": {
- "p50": 2903.520107269287,
- "p90": 2916.3520336151123,
- "p95": 2920.2558994293213,
- "p99": 2930.016040802002
- },
- "isolatedSum": {
- "p50": 2928.671956062317,
- "p90": 2946.336030960083,
- "p95": 2952.928066253662,
- "p99": 2966.271996498108
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3758096384,
- "combineLogicalBytes": 3758096384,
- "fanoutMean": 8,
- "recvTokensMax": 32768,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-a1762095",
- "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|normalized|0.18|b5217e990b95f86",
- "colorKey": "h100_f7ec28aa",
- "comparisonKey": "18d3cab3936a264e",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:29:07.856119+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_14",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 (norm) · zipf",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "zipf",
- "routingLabel": "zipf",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1818,
- "configuredUnits": 24,
- "deviceUnits": 132,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "b5217e990b95f86",
- "workloadId": "set:6:830e36e88869e222",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28254376151",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254376151",
- "createdAt": "2026-06-26T17:29:07.856119+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 119.03999745845795,
- "p90": 125.44000148773193,
- "p95": 126.01600587368011,
- "p99": 130.68799674510956
- },
- "combine": {
- "p50": 111.32799834012985,
- "p90": 113.92000317573547,
- "p95": 114.33599889278412,
- "p99": 119.77600306272507
- },
- "roundtrip": {
- "p50": 207.42399990558624,
- "p90": 212.351992726326,
- "p95": 214.56000208854675,
- "p99": 233.3119958639145
- },
- "isolatedSum": {
- "p50": 230.3679957985878,
- "p90": 239.3600046634674,
- "p95": 240.35200476646423,
- "p99": 250.46399980783463
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 49946624,
- "combineLogicalBytes": 49946624,
- "fanoutMean": 3.40234375,
- "recvTokensMax": 1022,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 159.42400693893433,
- "p90": 165.8879965543747,
- "p95": 166.6879951953888,
- "p99": 169.69600319862366
- },
- "combine": {
- "p50": 156.19200468063354,
- "p90": 162.49600052833557,
- "p95": 163.26400637626648,
- "p99": 168.83200407028198
- },
- "roundtrip": {
- "p50": 290.336012840271,
- "p90": 296.4160144329071,
- "p95": 298.43199253082275,
- "p99": 313.4399950504303
- },
- "isolatedSum": {
- "p50": 315.61601161956787,
- "p90": 328.38399708271027,
- "p95": 329.9520015716553,
- "p99": 338.52800726890564
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 100509696,
- "combineLogicalBytes": 100509696,
- "fanoutMean": 3.42333984375,
- "recvTokensMax": 2046,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 234.78400707244873,
- "p90": 240.22400379180908,
- "p95": 242.20800399780273,
- "p99": 246.2719976902008
- },
- "combine": {
- "p50": 244.47999894618988,
- "p90": 252.16001272201538,
- "p95": 254.8159956932068,
- "p99": 262.4959945678711
- },
- "roundtrip": {
- "p50": 450.81600546836853,
- "p90": 456.83199167251587,
- "p95": 458.624005317688,
- "p99": 499.1680085659027
- },
- "isolatedSum": {
- "p50": 479.2640060186386,
- "p90": 492.38401651382446,
- "p95": 497.0239996910095,
- "p99": 508.7679922580719
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 201678848,
- "combineLogicalBytes": 201678848,
- "fanoutMean": 3.4345703125,
- "recvTokensMax": 4094,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 379.8399865627289,
- "p90": 387.58400082588196,
- "p95": 389.60000872612,
- "p99": 392.9600119590759
- },
- "combine": {
- "p50": 402.72000432014465,
- "p90": 408.35198760032654,
- "p95": 410.5280041694641,
- "p99": 414.2400026321411
- },
- "roundtrip": {
- "p50": 753.600001335144,
- "p90": 759.8080039024353,
- "p95": 761.5039944648743,
- "p99": 764.959990978241
- },
- "isolatedSum": {
- "p50": 782.5599908828735,
- "p90": 795.9359884262085,
- "p95": 800.1280128955841,
- "p99": 807.200014591217
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 405035008,
- "combineLogicalBytes": 405035008,
- "fanoutMean": 3.4488525390625,
- "recvTokensMax": 8189,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 663.7120246887207,
- "p90": 672.1919775009155,
- "p95": 675.9359836578369,
- "p99": 683.0080151557922
- },
- "combine": {
- "p50": 711.5839719772339,
- "p90": 725.5359888076782,
- "p95": 729.8880219459534,
- "p99": 740.0320172309875
- },
- "roundtrip": {
- "p50": 1344.383955001831,
- "p90": 1357.5999736785889,
- "p95": 1361.0880374908447,
- "p99": 1368.6399459838867
- },
- "isolatedSum": {
- "p50": 1375.2959966659546,
- "p90": 1397.7279663085938,
- "p95": 1405.8240056037903,
- "p99": 1423.0400323867798
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 808822784,
- "combineLogicalBytes": 808822784,
- "fanoutMean": 3.44354248046875,
- "recvTokensMax": 16380,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1251.1359453201294,
- "p90": 1264.8320198059082,
- "p95": 1269.6640491485596,
- "p99": 1279.0080308914185
- },
- "combine": {
- "p50": 1326.9120454788208,
- "p90": 1337.3440504074097,
- "p95": 1343.008041381836,
- "p99": 1352.5439500808716
- },
- "roundtrip": {
- "p50": 2547.0080375671387,
- "p90": 2561.2800121307373,
- "p95": 2564.863920211792,
- "p99": 2581.696033477783
- },
- "isolatedSum": {
- "p50": 2578.04799079895,
- "p90": 2602.176070213318,
- "p95": 2612.6720905303955,
- "p99": 2631.55198097229
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1619795968,
- "combineLogicalBytes": 1619795968,
- "fanoutMean": 3.4481201171875,
- "recvTokensMax": 32761,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-6339c695",
- "identity": "h100|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|normalized|0.18|2b57a75d27f5b39",
- "colorKey": "h100_93503624",
- "comparisonKey": "99696dfafd6d026a",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:46:27.794881+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_03",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 (norm) · zipf+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "zipf",
- "routingLabel": "zipf+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1818,
- "configuredUnits": 24,
- "deviceUnits": 132,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "2b57a75d27f5b39",
- "workloadId": "set:6:830e36e88869e222",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 4.895263671875,
- "eplbImbalanceAfter": 1.0000902811686199,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28255296001",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28255296001",
- "createdAt": "2026-06-26T17:46:27.794881+00:00",
- "sha": "36d3eb6c3c7386d3220c873d305410219c5c0f17"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 110.72000116109848,
- "p90": 114.78400230407715,
- "p95": 116.57600104808807,
- "p99": 121.0239976644516
- },
- "combine": {
- "p50": 105.8880016207695,
- "p90": 111.35999858379364,
- "p95": 112.0000034570694,
- "p99": 114.56000059843063
- },
- "roundtrip": {
- "p50": 195.99999487400055,
- "p90": 200.00000298023224,
- "p95": 201.24800503253937,
- "p99": 205.59999346733093
- },
- "isolatedSum": {
- "p50": 216.60800278186798,
- "p90": 226.1440008878708,
- "p95": 228.57600450515747,
- "p99": 235.58399826288223
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77385728,
- "combineLogicalBytes": 77385728,
- "fanoutMean": 5.271484375,
- "recvTokensMax": 691,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 144.31999623775482,
- "p90": 148.0640023946762,
- "p95": 149.24800395965576,
- "p99": 152.0960032939911
- },
- "combine": {
- "p50": 146.62399888038635,
- "p90": 151.10400319099426,
- "p95": 152.51199901103973,
- "p99": 155.32800555229187
- },
- "roundtrip": {
- "p50": 260.8959972858429,
- "p90": 265.3760015964508,
- "p95": 266.400009393692,
- "p99": 270.7520127296448
- },
- "isolatedSum": {
- "p50": 290.9439951181412,
- "p90": 299.16800558567047,
- "p95": 301.7600029706955,
- "p99": 307.42400884628296
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 155172864,
- "combineLogicalBytes": 155172864,
- "fanoutMean": 5.28515625,
- "recvTokensMax": 1378,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 205.56800067424774,
- "p90": 210.36800742149353,
- "p95": 212.09600567817688,
- "p99": 214.6880030632019
- },
- "combine": {
- "p50": 214.78399634361267,
- "p90": 219.13599967956543,
- "p95": 220.70400416851044,
- "p99": 225.2800017595291
- },
- "roundtrip": {
- "p50": 394.8799967765808,
- "p90": 400.2879858016968,
- "p95": 401.88801288604736,
- "p99": 407.9680144786835
- },
- "isolatedSum": {
- "p50": 420.3519970178604,
- "p90": 429.50400710105896,
- "p95": 432.8000098466873,
- "p99": 439.968004822731
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 310546432,
- "combineLogicalBytes": 310546432,
- "fanoutMean": 5.28857421875,
- "recvTokensMax": 2745,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 326.9760012626648,
- "p90": 332.35201239585876,
- "p95": 334.46401357650757,
- "p99": 337.98399567604065
- },
- "combine": {
- "p50": 338.75200152397156,
- "p90": 346.0479974746704,
- "p95": 347.4240005016327,
- "p99": 379.5199990272522
- },
- "roundtrip": {
- "p50": 642.8160071372986,
- "p90": 650.6879925727844,
- "p95": 652.895987033844,
- "p99": 658.7520241737366
- },
- "isolatedSum": {
- "p50": 665.7280027866364,
- "p90": 678.4000098705292,
- "p95": 681.8880140781403,
- "p99": 717.5039947032928
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 620619776,
- "combineLogicalBytes": 620619776,
- "fanoutMean": 5.2845458984375,
- "recvTokensMax": 5526,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 584.0640068054199,
- "p90": 592.6719903945923,
- "p95": 595.5520272254944,
- "p99": 601.2160181999207
- },
- "combine": {
- "p50": 568.8639879226685,
- "p90": 576.9280195236206,
- "p95": 579.3920159339905,
- "p99": 584.5119953155518
- },
- "roundtrip": {
- "p50": 1122.3679780960083,
- "p90": 1133.8560581207275,
- "p95": 1138.6239528656006,
- "p99": 1146.783947944641
- },
- "isolatedSum": {
- "p50": 1152.9279947280884,
- "p90": 1169.600009918213,
- "p95": 1174.9440431594849,
- "p99": 1185.7280135154724
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1239175168,
- "combineLogicalBytes": 1239175168,
- "fanoutMean": 5.2757568359375,
- "recvTokensMax": 11165,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1107.200026512146,
- "p90": 1119.0400123596191,
- "p95": 1124.384045600891,
- "p99": 1133.344054222107
- },
- "combine": {
- "p50": 1020.6719636917114,
- "p90": 1029.1839838027954,
- "p95": 1032.1919918060303,
- "p99": 1037.8559827804565
- },
- "roundtrip": {
- "p50": 2098.4959602355957,
- "p90": 2110.1760864257812,
- "p95": 2113.856077194214,
- "p99": 2120.60809135437
- },
- "isolatedSum": {
- "p50": 2127.8719902038574,
- "p90": 2148.2239961624146,
- "p95": 2156.5760374069214,
- "p99": 2171.2000370025635
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2481604608,
- "combineLogicalBytes": 2481604608,
- "fanoutMean": 5.282684326171875,
- "recvTokensMax": 22165,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-96b1ca55",
- "identity": "h100|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31",
- "colorKey": "h100_5df912ff",
- "comparisonKey": "9fdbd6763ea7346a",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:28:17.076570+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_08",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "cached-layout-comm-only-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 (norm) [cl]",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1818,
- "configuredUnits": 24,
- "deviceUnits": 132,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28254332840",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254332840",
- "createdAt": "2026-06-26T17:28:17.076570+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 101.31199657917023,
- "p90": 105.69600015878677,
- "p95": 107.55199939012527,
- "p99": 110.84800213575363
- },
- "combine": {
- "p50": 105.82400113344193,
- "p90": 107.42399841547012,
- "p95": 108.60799998044968,
- "p99": 112.64000087976456
- },
- "roundtrip": {
- "p50": 183.1360012292862,
- "p90": 188.03200125694275,
- "p95": 188.960000872612,
- "p99": 195.13599574565887
- },
- "isolatedSum": {
- "p50": 207.13599771261215,
- "p90": 213.1199985742569,
- "p95": 216.15999937057495,
- "p99": 223.4880030155182
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 132.89600610733032,
- "p90": 137.08800077438354,
- "p95": 138.2399946451187,
- "p99": 140.70400595664978
- },
- "combine": {
- "p50": 144.96000111103058,
- "p90": 147.5840061903,
- "p95": 148.28799664974213,
- "p99": 152.63999998569489
- },
- "roundtrip": {
- "p50": 249.56800043582916,
- "p90": 253.53598594665527,
- "p95": 254.59200143814087,
- "p99": 256.73601031303406
- },
- "isolatedSum": {
- "p50": 277.8560072183609,
- "p90": 284.67200696468353,
- "p95": 286.52799129486084,
- "p99": 293.34400594234467
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 155889664,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 193.4400051832199,
- "p90": 202.68799364566803,
- "p95": 203.87199521064758,
- "p99": 209.9519968032837
- },
- "combine": {
- "p50": 216.8319970369339,
- "p90": 220.92799842357635,
- "p95": 223.55200350284576,
- "p99": 226.04799270629883
- },
- "roundtrip": {
- "p50": 382.4959993362427,
- "p90": 387.7759873867035,
- "p95": 388.7679874897003,
- "p99": 392.767995595932
- },
- "isolatedSum": {
- "p50": 410.2720022201538,
- "p90": 423.6159920692444,
- "p95": 427.42399871349335,
- "p99": 435.9999895095825
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 312266752,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 315.0720000267029,
- "p90": 320.1279938220978,
- "p95": 322.04800844192505,
- "p99": 324.5759904384613
- },
- "combine": {
- "p50": 329.27998900413513,
- "p90": 333.3759903907776,
- "p95": 335.61599254608154,
- "p99": 338.9120101928711
- },
- "roundtrip": {
- "p50": 619.0720200538635,
- "p90": 625.2480149269104,
- "p95": 627.839982509613,
- "p99": 630.7839751243591
- },
- "isolatedSum": {
- "p50": 644.351989030838,
- "p90": 653.5039842128754,
- "p95": 657.6640009880066,
- "p99": 663.4880006313324
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 623443968,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 560.8959794044495,
- "p90": 569.8879957199097,
- "p95": 572.1920132637024,
- "p99": 577.2799849510193
- },
- "combine": {
- "p50": 563.3599758148193,
- "p90": 573.248028755188,
- "p95": 576.3840079307556,
- "p99": 580.672025680542
- },
- "roundtrip": {
- "p50": 1093.727946281433,
- "p90": 1102.6240587234497,
- "p95": 1105.5999994277954,
- "p99": 1112.0959520339966
- },
- "isolatedSum": {
- "p50": 1124.2559552192688,
- "p90": 1143.1360244750977,
- "p95": 1148.576021194458,
- "p99": 1157.9520106315613
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1243805696,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1059.0720176696777,
- "p90": 1071.7439651489258,
- "p95": 1074.8480558395386,
- "p99": 1091.2959575653076
- },
- "combine": {
- "p50": 1026.8800258636475,
- "p90": 1036.2880229949951,
- "p95": 1038.7840270996094,
- "p99": 1047.4879741668701
- },
- "roundtrip": {
- "p50": 2055.1679134368896,
- "p90": 2067.13604927063,
- "p95": 2069.823980331421,
- "p99": 2075.5200386047363
- },
- "isolatedSum": {
- "p50": 2085.952043533325,
- "p90": 2108.031988143921,
- "p95": 2113.632082939148,
- "p99": 2138.7839317321777
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487009280,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-1ed69eb7",
- "identity": "h100|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "h100_17694d2c",
- "comparisonKey": "379c3371e525c0fb",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:48:34.870060+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_15",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "cached-layout-comm-only-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · bf16 [cl]",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271555838",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271555838",
- "createdAt": "2026-06-26T23:48:34.870060+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 105.27999699115753,
- "p90": 110.36799848079681,
- "p95": 112.12799698114395,
- "p99": 115.23199826478958
- },
- "combine": {
- "p50": 106.175996363163,
- "p90": 108.0000028014183,
- "p95": 111.1999973654747,
- "p99": 113.72800171375275
- },
- "roundtrip": {
- "p50": 183.3599954843521,
- "p90": 188.48000466823578,
- "p95": 190.17599523067474,
- "p99": 193.56800615787506
- },
- "isolatedSum": {
- "p50": 211.45599335432053,
- "p90": 218.36800128221512,
- "p95": 223.32799434661865,
- "p99": 228.95999997854233
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 134.5919966697693,
- "p90": 140.06400108337402,
- "p95": 142.46399700641632,
- "p99": 146.88000082969666
- },
- "combine": {
- "p50": 152.12799608707428,
- "p90": 158.36800634860992,
- "p95": 161.0880047082901,
- "p99": 162.81600296497345
- },
- "roundtrip": {
- "p50": 254.46400046348572,
- "p90": 259.93600487709045,
- "p95": 262.4639868736267,
- "p99": 268.2560086250305
- },
- "isolatedSum": {
- "p50": 286.71999275684357,
- "p90": 298.43200743198395,
- "p95": 303.5520017147064,
- "p99": 309.6960037946701
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 155889664,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 188.960000872612,
- "p90": 194.97600197792053,
- "p95": 198.11199605464935,
- "p99": 202.5279998779297
- },
- "combine": {
- "p50": 228.67199778556824,
- "p90": 236.09599471092224,
- "p95": 237.05600202083588,
- "p99": 241.08800292015076
- },
- "roundtrip": {
- "p50": 391.90399646759033,
- "p90": 399.80798959732056,
- "p95": 402.3999869823456,
- "p99": 424.0959882736206
- },
- "isolatedSum": {
- "p50": 417.63199865818024,
- "p90": 431.0719966888428,
- "p95": 435.16799807548523,
- "p99": 443.61600279808044
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 312266752,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 294.0160036087036,
- "p90": 311.3279938697815,
- "p95": 315.20000100135803,
- "p99": 326.07999444007874
- },
- "combine": {
- "p50": 366.1760091781616,
- "p90": 382.9120099544525,
- "p95": 391.32800698280334,
- "p99": 407.039999961853
- },
- "roundtrip": {
- "p50": 632.9600214958191,
- "p90": 674.3680238723755,
- "p95": 687.3279809951782,
- "p99": 835.3919982910156
- },
- "isolatedSum": {
- "p50": 660.1920127868652,
- "p90": 694.240003824234,
- "p95": 706.5280079841614,
- "p99": 733.1199944019318
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 623443968,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 509.15199518203735,
- "p90": 521.8560099601746,
- "p95": 526.1120200157166,
- "p99": 533.0560207366943
- },
- "combine": {
- "p50": 635.2319717407227,
- "p90": 645.5680131912231,
- "p95": 649.4719982147217,
- "p99": 656.3839912414551
- },
- "roundtrip": {
- "p50": 1114.9760484695435,
- "p90": 1128.0319690704346,
- "p95": 1131.9680213928223,
- "p99": 1147.711992263794
- },
- "isolatedSum": {
- "p50": 1144.38396692276,
- "p90": 1167.4240231513977,
- "p95": 1175.5840182304382,
- "p99": 1189.4400119781494
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1243805696,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 970.848023891449,
- "p90": 995.136022567749,
- "p95": 1001.7919540405273,
- "p99": 1016.1279439926147
- },
- "combine": {
- "p50": 1156.3199758529663,
- "p90": 1167.1040058135986,
- "p95": 1172.287940979004,
- "p99": 1184.928059577942
- },
- "roundtrip": {
- "p50": 2089.279890060425,
- "p90": 2105.664014816284,
- "p95": 2110.431909561157,
- "p99": 2118.0479526519775
- },
- "isolatedSum": {
- "p50": 2127.1679997444153,
- "p90": 2162.2400283813477,
- "p95": 2174.0798950195312,
- "p99": 2201.0560035705566
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487009280,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-39ba4bd5",
- "identity": "h100|deepep|4096|8|128|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569",
- "colorKey": "h100_a96c99f3",
- "comparisonKey": "b9c15d0905ec0061",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T11:13:58.971427+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_17",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · fp8",
- "model": "Qwen3.5",
- "shape": {
- "hidden": 4096,
- "topk": 8,
- "experts": 128,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "dc27c5e0894e569",
- "workloadId": "set:6:76d8142d69406335",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28287505969",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287505969",
- "createdAt": "2026-06-27T11:13:58.971427+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 79.03999835252762,
- "p90": 84.63999629020691,
- "p95": 88.28800171613693,
- "p99": 108.89600217342377
- },
- "combine": {
- "p50": 77.02399790287018,
- "p90": 80.1599994301796,
- "p95": 82.0159986615181,
- "p99": 85.85599809885025
- },
- "roundtrip": {
- "p50": 171.64799571037292,
- "p90": 178.01600694656372,
- "p95": 187.74400651454926,
- "p99": 233.50399732589722
- },
- "isolatedSum": {
- "p50": 156.0639962553978,
- "p90": 164.7999957203865,
- "p95": 170.30400037765503,
- "p99": 194.75200027227402
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 22282240,
- "combineLogicalBytes": 44564480,
- "fanoutMean": 5.3125,
- "recvTokensMax": 699,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 95.39200365543365,
- "p90": 113.82400244474411,
- "p95": 119.61600184440613,
- "p99": 132.57600367069244
- },
- "combine": {
- "p50": 103.74400019645691,
- "p90": 114.43199962377548,
- "p95": 121.24799937009811,
- "p99": 155.2640050649643
- },
- "roundtrip": {
- "p50": 235.9679937362671,
- "p90": 250.62400102615356,
- "p95": 265.1839852333069,
- "p99": 275.2000093460083
- },
- "isolatedSum": {
- "p50": 199.13600385189056,
- "p90": 228.2560020685196,
- "p95": 240.86400121450424,
- "p99": 287.84000873565674
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 44863488,
- "combineLogicalBytes": 89726976,
- "fanoutMean": 5.34814453125,
- "recvTokensMax": 1385,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 126.8479973077774,
- "p90": 132.7040046453476,
- "p95": 135.74400544166565,
- "p99": 147.45600521564484
- },
- "combine": {
- "p50": 159.96800363063812,
- "p90": 164.95999693870544,
- "p95": 166.49599373340607,
- "p99": 175.9359985589981
- },
- "roundtrip": {
- "p50": 375.8719861507416,
- "p90": 385.4080140590668,
- "p95": 393.18400621414185,
- "p99": 407.9680144786835
- },
- "isolatedSum": {
- "p50": 286.8160009384155,
- "p90": 297.66400158405304,
- "p95": 302.2399991750717,
- "p99": 323.39200377464294
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 89751552,
- "combineLogicalBytes": 179503104,
- "fanoutMean": 5.349609375,
- "recvTokensMax": 2772,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 191.55199825763702,
- "p90": 209.08799767494202,
- "p95": 216.15999937057495,
- "p99": 227.55199670791626
- },
- "combine": {
- "p50": 267.2320008277893,
- "p90": 272.3200023174286,
- "p95": 273.6000120639801,
- "p99": 275.84001421928406
- },
- "roundtrip": {
- "p50": 636.2879872322083,
- "p90": 641.8560147285461,
- "p95": 644.1280245780945,
- "p99": 809.0239763259888
- },
- "isolatedSum": {
- "p50": 458.78399908542633,
- "p90": 481.4079999923706,
- "p95": 489.76001143455505,
- "p99": 503.3920109272003
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 179511296,
- "combineLogicalBytes": 359022592,
- "fanoutMean": 5.349853515625,
- "recvTokensMax": 5558,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 307.776004076004,
- "p90": 323.35999608039856,
- "p95": 325.21599531173706,
- "p99": 330.6879997253418
- },
- "combine": {
- "p50": 460.9279930591583,
- "p90": 468.4160053730011,
- "p95": 470.2720046043396,
- "p99": 600.9600162506104
- },
- "roundtrip": {
- "p50": 1133.8239908218384,
- "p90": 1142.5600051879883,
- "p95": 1146.1759805679321,
- "p99": 1151.3279676437378
- },
- "isolatedSum": {
- "p50": 768.7039971351624,
- "p90": 791.7760014533997,
- "p95": 795.4879999160767,
- "p99": 931.6480159759521
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 358055936,
- "combineLogicalBytes": 716111872,
- "fanoutMean": 5.33544921875,
- "recvTokensMax": 10982,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 546.3039875030518,
- "p90": 551.2959957122803,
- "p95": 553.4719824790955,
- "p99": 559.6799850463867
- },
- "combine": {
- "p50": 841.6640162467957,
- "p90": 850.816011428833,
- "p95": 854.1120290756226,
- "p99": 870.1440095901489
- },
- "roundtrip": {
- "p50": 2148.0960845947266,
- "p90": 2161.184072494507,
- "p95": 2165.440082550049,
- "p99": 2175.0400066375732
- },
- "isolatedSum": {
- "p50": 1387.9680037498474,
- "p90": 1402.1120071411133,
- "p95": 1407.584011554718,
- "p99": 1429.8239946365356
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 716197888,
- "combineLogicalBytes": 1432395776,
- "fanoutMean": 5.336090087890625,
- "recvTokensMax": 21939,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-8fb1cb65",
- "identity": "h100|deepep|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42",
- "colorKey": "h100_97196257",
- "comparisonKey": "d361c128552b2ee8",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:51:51.842450+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_10",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · fp8",
- "model": "shape 5120/8/160",
- "shape": {
- "hidden": 5120,
- "topk": 8,
- "experts": 160,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "0c022a63bbcbf42",
- "workloadId": "set:6:28c0c09b13ff0acf",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271695735",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271695735",
- "createdAt": "2026-06-26T23:51:51.842450+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 196.28800451755524,
- "p90": 202.33599841594696,
- "p95": 203.96800339221954,
- "p99": 210.07999777793884
- },
- "combine": {
- "p50": 85.11999994516373,
- "p90": 87.5839963555336,
- "p95": 89.72799777984619,
- "p99": 93.24800223112106
- },
- "roundtrip": {
- "p50": 266.7520046234131,
- "p90": 273.824006319046,
- "p95": 277.5680124759674,
- "p99": 291.83998703956604
- },
- "isolatedSum": {
- "p50": 281.40800446271896,
- "p90": 289.91999477148056,
- "p95": 293.69600117206573,
- "p99": 303.3280000090599
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 27837440,
- "combineLogicalBytes": 55674880,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 699,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 248.3839988708496,
- "p90": 296.7680096626282,
- "p95": 299.80799555778503,
- "p99": 321.28000259399414
- },
- "combine": {
- "p50": 118.81600320339203,
- "p90": 125.15200674533844,
- "p95": 126.17599964141846,
- "p99": 128.06400656700134
- },
- "roundtrip": {
- "p50": 353.85599732398987,
- "p90": 407.9360067844391,
- "p95": 410.3040099143982,
- "p99": 414.40001130104065
- },
- "isolatedSum": {
- "p50": 367.20000207424164,
- "p90": 421.9200164079666,
- "p95": 425.9839951992035,
- "p99": 449.3440091609955
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 55552000,
- "combineLogicalBytes": 111104000,
- "fanoutMean": 5.2978515625,
- "recvTokensMax": 1387,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 341.43999218940735,
- "p90": 394.0480053424835,
- "p95": 396.5440094470978,
- "p99": 400.41598677635193
- },
- "combine": {
- "p50": 185.12000143527985,
- "p90": 191.3280040025711,
- "p95": 193.05600225925446,
- "p99": 195.74399292469025
- },
- "roundtrip": {
- "p50": 510.0160241127014,
- "p90": 567.3919916152954,
- "p95": 570.8479881286621,
- "p99": 574.176013469696
- },
- "isolatedSum": {
- "p50": 526.5599936246872,
- "p90": 585.3760093450546,
- "p95": 589.6000117063522,
- "p99": 596.1599797010422
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 111549440,
- "combineLogicalBytes": 223098880,
- "fanoutMean": 5.319091796875,
- "recvTokensMax": 2762,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 517.9839730262756,
- "p90": 568.6720013618469,
- "p95": 574.0159749984741,
- "p99": 579.8079967498779
- },
- "combine": {
- "p50": 291.26399755477905,
- "p90": 295.80798745155334,
- "p95": 297.08799719810486,
- "p99": 299.96800422668457
- },
- "roundtrip": {
- "p50": 794.2079901695251,
- "p90": 801.3120293617249,
- "p95": 804.095983505249,
- "p99": 814.4959807395935
- },
- "isolatedSum": {
- "p50": 809.2479705810547,
- "p90": 864.4799888134003,
- "p95": 871.103972196579,
- "p99": 879.7760009765625
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 223365120,
- "combineLogicalBytes": 446730240,
- "fanoutMean": 5.325439453125,
- "recvTokensMax": 5518,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 962.5599980354309,
- "p90": 971.2640047073364,
- "p95": 973.0560183525085,
- "p99": 978.3999919891357
- },
- "combine": {
- "p50": 513.1199955940247,
- "p90": 523.5520005226135,
- "p95": 526.0800123214722,
- "p99": 531.9039821624756
- },
- "roundtrip": {
- "p50": 1460.576057434082,
- "p90": 1472.4160432815552,
- "p95": 1476.6080379486084,
- "p99": 1773.3759880065918
- },
- "isolatedSum": {
- "p50": 1475.6799936294556,
- "p90": 1494.81600522995,
- "p95": 1499.1360306739807,
- "p99": 1510.3039741516113
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 446817280,
- "combineLogicalBytes": 893634560,
- "fanoutMean": 5.32647705078125,
- "recvTokensMax": 11032,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1818.5919523239136,
- "p90": 1826.9439935684204,
- "p95": 1829.7280073165894,
- "p99": 1833.8559865951538
- },
- "combine": {
- "p50": 930.3359985351562,
- "p90": 939.7119879722595,
- "p95": 942.8160190582275,
- "p99": 948.0640292167664
- },
- "roundtrip": {
- "p50": 2736.9279861450195,
- "p90": 2750.3039836883545,
- "p95": 2755.199909210205,
- "p99": 2763.64803314209
- },
- "isolatedSum": {
- "p50": 2748.92795085907,
- "p90": 2766.65598154068,
- "p95": 2772.544026374817,
- "p99": 2781.92001581192
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 893132800,
- "combineLogicalBytes": 1786265600,
- "fanoutMean": 5.323486328125,
- "recvTokensMax": 21895,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-26196af1",
- "identity": "h100|deepep|6144|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "h100_a96c99f3",
- "comparisonKey": "fd1c952adc3abb43",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T11:13:32.456116+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_07",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · fp8",
- "model": "MiniMax-M3",
- "shape": {
- "hidden": 6144,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:9f5e1e005a35e937",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28287494014",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287494014",
- "createdAt": "2026-06-27T11:13:32.456116+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 86.30400151014328,
- "p90": 99.5199978351593,
- "p95": 101.08800232410431,
- "p99": 104.99200224876404
- },
- "combine": {
- "p50": 91.80799871683121,
- "p90": 100.28800368309021,
- "p95": 101.31199657917023,
- "p99": 105.95200210809708
- },
- "roundtrip": {
- "p50": 200.22399723529816,
- "p90": 218.87999773025513,
- "p95": 220.41599452495575,
- "p99": 234.52800512313843
- },
- "isolatedSum": {
- "p50": 178.1120002269745,
- "p90": 199.8080015182495,
- "p95": 202.39999890327454,
- "p99": 210.94400435686111
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 33288192,
- "combineLogicalBytes": 66576384,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 105.34399747848511,
- "p90": 117.60000139474869,
- "p95": 119.35999989509583,
- "p99": 123.36000055074692
- },
- "combine": {
- "p50": 131.9359987974167,
- "p90": 142.87999272346497,
- "p95": 148.70400726795197,
- "p99": 308.4479868412018
- },
- "roundtrip": {
- "p50": 299.9039888381958,
- "p90": 312.5759959220886,
- "p95": 314.65598940849304,
- "p99": 318.7200129032135
- },
- "isolatedSum": {
- "p50": 237.2799962759018,
- "p90": 260.47999411821365,
- "p95": 268.0640071630478,
- "p99": 431.8079873919487
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 66809856,
- "combineLogicalBytes": 133619712,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 139.3599957227707,
- "p90": 143.0400013923645,
- "p95": 144.51199769973755,
- "p99": 150.94399452209473
- },
- "combine": {
- "p50": 200.32000541687012,
- "p90": 203.67999374866486,
- "p95": 204.73599433898926,
- "p99": 209.82399582862854
- },
- "roundtrip": {
- "p50": 476.83200240135193,
- "p90": 482.40000009536743,
- "p95": 484.47999358177185,
- "p99": 581.2479853630066
- },
- "isolatedSum": {
- "p50": 339.6800011396408,
- "p90": 346.71999514102936,
- "p95": 349.2479920387268,
- "p99": 360.76799035072327
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 133828608,
- "combineLogicalBytes": 267657216,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 210.1760059595108,
- "p90": 214.6880030632019,
- "p95": 216.19200706481934,
- "p99": 219.55199539661407
- },
- "combine": {
- "p50": 324.95999336242676,
- "p90": 330.1759958267212,
- "p95": 332.2240114212036,
- "p99": 353.4719944000244
- },
- "roundtrip": {
- "p50": 807.9040050506592,
- "p90": 815.6800270080566,
- "p95": 819.6160197257996,
- "p99": 854.9759984016418
- },
- "isolatedSum": {
- "p50": 535.1359993219376,
- "p90": 544.8639988899231,
- "p95": 548.416018486023,
- "p99": 573.0239897966385
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 267190272,
- "combineLogicalBytes": 534380544,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 352.60799527168274,
- "p90": 362.5600039958954,
- "p95": 365.85599184036255,
- "p99": 394.9440121650696
- },
- "combine": {
- "p50": 570.8479881286621,
- "p90": 576.7679810523987,
- "p95": 579.0719985961914,
- "p99": 581.4719796180725
- },
- "roundtrip": {
- "p50": 1472.5439548492432,
- "p90": 1483.680009841919,
- "p95": 1486.4319562911987,
- "p99": 1497.4080324172974
- },
- "isolatedSum": {
- "p50": 923.4559834003448,
- "p90": 939.3279850482941,
- "p95": 944.927990436554,
- "p99": 976.4159917831421
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 533059584,
- "combineLogicalBytes": 1066119168,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 622.8160262107849,
- "p90": 630.2400231361389,
- "p95": 632.8639984130859,
- "p99": 637.503981590271
- },
- "combine": {
- "p50": 1051.8399477005005,
- "p90": 1060.4480504989624,
- "p95": 1063.5839700698853,
- "p99": 1077.728033065796
- },
- "roundtrip": {
- "p50": 2821.791887283325,
- "p90": 2846.463918685913,
- "p95": 2856.384038925171,
- "p99": 2868.5760498046875
- },
- "isolatedSum": {
- "p50": 1674.6559739112854,
- "p90": 1690.6880736351013,
- "p95": 1696.4479684829712,
- "p99": 1715.232014656067
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1065861120,
- "combineLogicalBytes": 2131722240,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-db3c52ad",
- "identity": "h100|deepep|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "h100_97196257",
- "comparisonKey": "d4fd66af6f4726f6",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:52:17.424978+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_09",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · fp8",
- "model": "MiniMax-M3",
- "shape": {
- "hidden": 6144,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:9f5e1e005a35e937",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271710412",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271710412",
- "createdAt": "2026-06-26T23:52:17.424978+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 208.8640034198761,
- "p90": 215.68000316619873,
- "p95": 218.75199675559998,
- "p99": 469.56801414489746
- },
- "combine": {
- "p50": 90.33600240945816,
- "p90": 93.21600198745728,
- "p95": 95.551997423172,
- "p99": 98.1760025024414
- },
- "roundtrip": {
- "p50": 286.72000765800476,
- "p90": 290.75199365615845,
- "p95": 293.0240035057068,
- "p99": 295.52000761032104
- },
- "isolatedSum": {
- "p50": 299.20000582933426,
- "p90": 308.896005153656,
- "p95": 314.303994178772,
- "p99": 567.7440166473389
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 33288192,
- "combineLogicalBytes": 66576384,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 267.2320008277893,
- "p90": 273.4079957008362,
- "p95": 275.64799785614014,
- "p99": 286.8480086326599
- },
- "combine": {
- "p50": 127.23200023174286,
- "p90": 130.40000200271606,
- "p95": 131.52000308036804,
- "p99": 134.0479999780655
- },
- "roundtrip": {
- "p50": 387.3920142650604,
- "p90": 392.2879993915558,
- "p95": 394.9440121650696,
- "p99": 403.328001499176
- },
- "isolatedSum": {
- "p50": 394.46400105953217,
- "p90": 403.80799770355225,
- "p95": 407.1680009365082,
- "p99": 420.8960086107254
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 66809856,
- "combineLogicalBytes": 133619712,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 377.6960074901581,
- "p90": 423.93600940704346,
- "p95": 426.4320135116577,
- "p99": 432.6080083847046
- },
- "combine": {
- "p50": 203.42400670051575,
- "p90": 213.95200490951538,
- "p95": 215.03999829292297,
- "p99": 218.6879962682724
- },
- "roundtrip": {
- "p50": 564.4479990005493,
- "p90": 604.8960089683533,
- "p95": 608.1920266151428,
- "p99": 615.1360273361206
- },
- "isolatedSum": {
- "p50": 581.1200141906738,
- "p90": 637.8880143165588,
- "p95": 641.4720118045807,
- "p99": 651.296004652977
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 133828608,
- "combineLogicalBytes": 267657216,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 593.504011631012,
- "p90": 597.9200005531311,
- "p95": 599.295973777771,
- "p99": 620.9279894828796
- },
- "combine": {
- "p50": 322.59199023246765,
- "p90": 326.78401470184326,
- "p95": 328.7999927997589,
- "p99": 331.36001229286194
- },
- "roundtrip": {
- "p50": 899.1680145263672,
- "p90": 904.416024684906,
- "p95": 906.6240191459656,
- "p99": 913.0560159683228
- },
- "isolatedSum": {
- "p50": 916.0960018634796,
- "p90": 924.7040152549744,
- "p95": 928.0959665775299,
- "p99": 952.2880017757416
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 267190272,
- "combineLogicalBytes": 534380544,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 1106.943964958191,
- "p90": 1117.9519891738892,
- "p95": 1120.7040548324585,
- "p99": 1126.2719631195068
- },
- "combine": {
- "p50": 574.4640231132507,
- "p90": 583.0720067024231,
- "p95": 584.991991519928,
- "p99": 590.719997882843
- },
- "roundtrip": {
- "p50": 1684.0640306472778,
- "p90": 1699.5840072631836,
- "p95": 1705.1520347595215,
- "p99": 1751.9680261611938
- },
- "isolatedSum": {
- "p50": 1681.4079880714417,
- "p90": 1701.0239958763123,
- "p95": 1705.6960463523865,
- "p99": 1716.9919610023499
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 533059584,
- "combineLogicalBytes": 1066119168,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 2105.151891708374,
- "p90": 2114.687919616699,
- "p95": 2120.1279163360596,
- "p99": 2217.0560359954834
- },
- "combine": {
- "p50": 1052.9279708862305,
- "p90": 1061.0560178756714,
- "p95": 1063.264012336731,
- "p99": 1068.0320262908936
- },
- "roundtrip": {
- "p50": 3201.6959190368652,
- "p90": 3233.1199645996094,
- "p95": 3240.8320903778076,
- "p99": 3259.615898132324
- },
- "isolatedSum": {
- "p50": 3158.0798625946045,
- "p90": 3175.7439374923706,
- "p95": 3183.3919286727905,
- "p99": 3285.088062286377
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1065861120,
- "combineLogicalBytes": 2131722240,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-bf310e7a",
- "identity": "h100|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "h100_a96c99f3",
- "comparisonKey": "3fc4c710187195cb",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T10:13:01.422194+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_18",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · fp8",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "2.0.0+af9a040",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28286086353",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286086353",
- "createdAt": "2026-06-27T10:13:01.422194+00:00",
- "sha": "76a3032d20288ee17220eb6099346f74d56ce005"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 89.53599631786346,
- "p90": 94.30400282144547,
- "p95": 95.87199985980988,
- "p99": 98.7199991941452
- },
- "combine": {
- "p50": 100.51199793815613,
- "p90": 103.35999727249146,
- "p95": 104.96000200510025,
- "p99": 108.73600095510483
- },
- "roundtrip": {
- "p50": 218.07999908924103,
- "p90": 221.69600427150726,
- "p95": 222.78399765491486,
- "p99": 227.64800488948822
- },
- "isolatedSum": {
- "p50": 190.0479942560196,
- "p90": 197.66400009393692,
- "p95": 200.83200186491013,
- "p99": 207.45600014925003
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 108.73600095510483,
- "p90": 128.83199751377106,
- "p95": 132.64000415802002,
- "p99": 136.83199882507324
- },
- "combine": {
- "p50": 144.76799964904785,
- "p90": 153.08800339698792,
- "p95": 157.6319932937622,
- "p99": 160.7999950647354
- },
- "roundtrip": {
- "p50": 332.96000957489014,
- "p90": 344.60800886154175,
- "p95": 350.271999835968,
- "p99": 356.86400532722473
- },
- "isolatedSum": {
- "p50": 253.50400060415268,
- "p90": 281.920000910759,
- "p95": 290.2719974517822,
- "p99": 297.63199388980865
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77944832,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 144.22400295734406,
- "p90": 159.29600596427917,
- "p95": 160.863995552063,
- "p99": 166.6879951953888
- },
- "combine": {
- "p50": 224.95999932289124,
- "p90": 231.26399517059326,
- "p95": 233.18399488925934,
- "p99": 236.12800240516663
- },
- "roundtrip": {
- "p50": 525.2799987792969,
- "p90": 531.4559936523438,
- "p95": 534.6879959106445,
- "p99": 546.2719798088074
- },
- "isolatedSum": {
- "p50": 369.1840022802353,
- "p90": 390.56000113487244,
- "p95": 394.0479904413223,
- "p99": 402.8159976005554
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 156133376,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 213.1199985742569,
- "p90": 218.9760059118271,
- "p95": 226.3679951429367,
- "p99": 239.42400515079498
- },
- "combine": {
- "p50": 360.8640134334564,
- "p90": 365.79200625419617,
- "p95": 367.3279881477356,
- "p99": 371.7760145664215
- },
- "roundtrip": {
- "p50": 894.208014011383,
- "p90": 899.7120261192322,
- "p95": 901.6320109367371,
- "p99": 904.8320055007935
- },
- "isolatedSum": {
- "p50": 573.9840120077133,
- "p90": 584.7680121660233,
- "p95": 593.6959832906723,
- "p99": 611.2000197172165
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 311721984,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 348.54400157928467,
- "p90": 367.39200353622437,
- "p95": 369.7600066661835,
- "p99": 378.6559998989105
- },
- "combine": {
- "p50": 634.4000101089478,
- "p90": 643.0079936981201,
- "p95": 645.7599997520447,
- "p99": 650.0160098075867
- },
- "roundtrip": {
- "p50": 1619.871973991394,
- "p90": 1633.5680484771729,
- "p95": 1636.3840103149414,
- "p99": 1644.09601688385
- },
- "isolatedSum": {
- "p50": 982.9440116882324,
- "p90": 1010.3999972343445,
- "p95": 1015.5200064182281,
- "p99": 1028.6720097064972
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 621902848,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 631.3279867172241,
- "p90": 641.6320204734802,
- "p95": 644.9919939041138,
- "p99": 654.4640064239502
- },
- "combine": {
- "p50": 1156.607985496521,
- "p90": 1167.0080423355103,
- "p95": 1169.7280406951904,
- "p99": 1179.3279647827148
- },
- "roundtrip": {
- "p50": 3077.791929244995,
- "p90": 3088.8640880584717,
- "p95": 3093.4720039367676,
- "p99": 3101.408004760742
- },
- "isolatedSum": {
- "p50": 1787.9359722137451,
- "p90": 1808.6400628089905,
- "p95": 1814.7200345993042,
- "p99": 1833.791971206665
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1243504640,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-9440251a",
- "identity": "h100|deepep|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "h100_97196257",
- "comparisonKey": "2b50b361430bc4f6",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:48:40.278594+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_14",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · fp8",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271583505",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271583505",
- "createdAt": "2026-06-26T23:48:40.278594+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 211.2639993429184,
- "p90": 217.28000044822693,
- "p95": 219.32800114154816,
- "p99": 226.78400576114655
- },
- "combine": {
- "p50": 97.15200215578079,
- "p90": 100.41599720716476,
- "p95": 102.27199643850327,
- "p99": 105.59999942779541
- },
- "roundtrip": {
- "p50": 296.640008687973,
- "p90": 303.26399207115173,
- "p95": 305.82401156425476,
- "p99": 313.9199912548065
- },
- "isolatedSum": {
- "p50": 308.4160014986992,
- "p90": 317.6959976553917,
- "p95": 321.5999975800514,
- "p99": 332.38400518894196
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 281.98400139808655,
- "p90": 324.6400058269501,
- "p95": 327.7760148048401,
- "p99": 332.99198746681213
- },
- "combine": {
- "p50": 141.53599739074707,
- "p90": 147.87200093269348,
- "p95": 149.9519944190979,
- "p99": 152.70400047302246
- },
- "roundtrip": {
- "p50": 409.7920060157776,
- "p90": 415.45599699020386,
- "p95": 417.7280068397522,
- "p99": 423.39199781417847
- },
- "isolatedSum": {
- "p50": 423.5199987888336,
- "p90": 472.51200675964355,
- "p95": 477.728009223938,
- "p99": 485.6959879398346
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77944832,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 402.24000811576843,
- "p90": 444.64001059532166,
- "p95": 447.3919868469238,
- "p99": 454.1440010070801
- },
- "combine": {
- "p50": 224.16000068187714,
- "p90": 233.0559939146042,
- "p95": 235.23199558258057,
- "p99": 239.29600417613983
- },
- "roundtrip": {
- "p50": 613.9839887619019,
- "p90": 657.7600240707397,
- "p95": 661.9200110435486,
- "p99": 734.7840070724487
- },
- "isolatedSum": {
- "p50": 626.4000087976456,
- "p90": 677.6960045099258,
- "p95": 682.6239824295044,
- "p99": 693.4400051832199
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 156133376,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 664.7359728813171,
- "p90": 669.5680022239685,
- "p95": 671.1360216140747,
- "p99": 674.7519969940186
- },
- "combine": {
- "p50": 358.0799996852875,
- "p90": 363.23198676109314,
- "p95": 364.47998881340027,
- "p99": 369.9199855327606
- },
- "roundtrip": {
- "p50": 1005.2160024642944,
- "p90": 1010.2720260620117,
- "p95": 1012.287974357605,
- "p99": 1019.9999809265137
- },
- "isolatedSum": {
- "p50": 1022.8159725666046,
- "p90": 1032.7999889850616,
- "p95": 1035.616010427475,
- "p99": 1044.6719825267792
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 311721984,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 1237.8560304641724,
- "p90": 1245.919942855835,
- "p95": 1249.0559816360474,
- "p99": 1253.6319494247437
- },
- "combine": {
- "p50": 632.0639848709106,
- "p90": 639.0720009803772,
- "p95": 641.5359973907471,
- "p99": 646.9119787216187
- },
- "roundtrip": {
- "p50": 1845.0239896774292,
- "p90": 1854.3039560317993,
- "p95": 1857.983946800232,
- "p99": 1862.720012664795
- },
- "isolatedSum": {
- "p50": 1869.920015335083,
- "p90": 1884.9919438362122,
- "p95": 1890.5919790267944,
- "p99": 1900.5439281463623
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 621902848,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 2360.80002784729,
- "p90": 2371.2639808654785,
- "p95": 2375.3280639648438,
- "p99": 2383.3279609680176
- },
- "combine": {
- "p50": 1150.8159637451172,
- "p90": 1160.032033920288,
- "p95": 1162.9120111465454,
- "p99": 1171.6159582138062
- },
- "roundtrip": {
- "p50": 3508.7039470672607,
- "p90": 3525.631904602051,
- "p95": 3531.615972518921,
- "p99": 3547.4560260772705
- },
- "isolatedSum": {
- "p50": 3511.615991592407,
- "p90": 3531.2960147857666,
- "p95": 3538.240075111389,
- "p99": 3554.9439191818237
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1243504640,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-4bac404d",
- "identity": "h100|deepep|7168|8|384|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf",
- "colorKey": "h100_a96c99f3",
- "comparisonKey": "773edc302de99204",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T11:16:09.188835+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_15",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · fp8",
- "model": "Kimi-K2",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 384,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "cd50548525dafdf",
- "workloadId": "set:6:b23bc0c4b6402c69",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28287500362",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287500362",
- "createdAt": "2026-06-27T11:16:09.188835+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 89.9839997291565,
- "p90": 93.9520001411438,
- "p95": 96.44799679517746,
- "p99": 158.87999534606934
- },
- "combine": {
- "p50": 98.04800152778625,
- "p90": 100.5759984254837,
- "p95": 102.11200267076492,
- "p99": 104.67199981212616
- },
- "roundtrip": {
- "p50": 217.75999665260315,
- "p90": 221.66399657726288,
- "p95": 223.00800681114197,
- "p99": 227.7120053768158
- },
- "isolatedSum": {
- "p50": 188.03200125694275,
- "p90": 194.5279985666275,
- "p95": 198.55999946594238,
- "p99": 263.5519951581955
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38757376,
- "combineLogicalBytes": 77514752,
- "fanoutMean": 5.2802734375,
- "recvTokensMax": 707,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 107.4879989027977,
- "p90": 111.42399907112122,
- "p95": 114.59200084209442,
- "p99": 170.30400037765503
- },
- "combine": {
- "p50": 143.39199662208557,
- "p90": 146.4959979057312,
- "p95": 147.8399932384491,
- "p99": 150.56000649929047
- },
- "roundtrip": {
- "p50": 329.24801111221313,
- "p90": 333.5680067539215,
- "p95": 335.32801270484924,
- "p99": 338.7199938297272
- },
- "isolatedSum": {
- "p50": 250.87999552488327,
- "p90": 257.9199969768524,
- "p95": 262.4319940805435,
- "p99": 320.8640068769455
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77285376,
- "combineLogicalBytes": 154570752,
- "fanoutMean": 5.2646484375,
- "recvTokensMax": 1391,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 146.97599411010742,
- "p90": 158.9439958333969,
- "p95": 163.39200735092163,
- "p99": 460.86400747299194
- },
- "combine": {
- "p50": 223.80800545215607,
- "p90": 229.40799593925476,
- "p95": 230.81600666046143,
- "p99": 233.40800404548645
- },
- "roundtrip": {
- "p50": 523.967981338501,
- "p90": 529.1839838027954,
- "p95": 530.6879878044128,
- "p99": 534.6559882164001
- },
- "isolatedSum": {
- "p50": 370.7839995622635,
- "p90": 388.3519917726517,
- "p95": 394.20801401138306,
- "p99": 694.2720115184784
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 154886144,
- "combineLogicalBytes": 309772288,
- "fanoutMean": 5.275390625,
- "recvTokensMax": 2754,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 215.32799303531647,
- "p90": 219.55199539661407,
- "p95": 220.7999974489212,
- "p99": 223.51999580860138
- },
- "combine": {
- "p50": 361.11998558044434,
- "p90": 366.5280044078827,
- "p95": 367.8719997406006,
- "p99": 371.7760145664215
- },
- "roundtrip": {
- "p50": 895.6800103187561,
- "p90": 901.2479782104492,
- "p95": 903.1360149383545,
- "p99": 906.8480134010315
- },
- "isolatedSum": {
- "p50": 576.4479786157608,
- "p90": 586.0799998044968,
- "p95": 588.6719971895218,
- "p99": 595.2960103750229
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 309750784,
- "combineLogicalBytes": 619501568,
- "fanoutMean": 5.2750244140625,
- "recvTokensMax": 5469,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 354.8479974269867,
- "p90": 371.8079924583435,
- "p95": 374.2400109767914,
- "p99": 380.2880048751831
- },
- "combine": {
- "p50": 633.0239772796631,
- "p90": 642.304003238678,
- "p95": 644.927978515625,
- "p99": 650.7840156555176
- },
- "roundtrip": {
- "p50": 1625.4080533981323,
- "p90": 1642.7520513534546,
- "p95": 1652.6720523834229,
- "p99": 1691.4559602737427
- },
- "isolatedSum": {
- "p50": 987.8719747066498,
- "p90": 1014.1119956970215,
- "p95": 1019.1679894924164,
- "p99": 1031.0720205307007
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 619687936,
- "combineLogicalBytes": 1239375872,
- "fanoutMean": 5.276611328125,
- "recvTokensMax": 10883,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 641.3760185241699,
- "p90": 647.5200057029724,
- "p95": 649.5040059089661,
- "p99": 655.4880142211914
- },
- "combine": {
- "p50": 1148.4800577163696,
- "p90": 1157.5679779052734,
- "p95": 1160.2239608764648,
- "p99": 1163.807988166809
- },
- "roundtrip": {
- "p50": 3082.240104675293,
- "p90": 3094.0160751342773,
- "p95": 3098.112106323242,
- "p99": 3109.055995941162
- },
- "isolatedSum": {
- "p50": 1789.8560762405396,
- "p90": 1805.0879836082458,
- "p95": 1809.727966785431,
- "p99": 1819.2960023880005
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1239834624,
- "combineLogicalBytes": 2479669248,
- "fanoutMean": 5.278564453125,
- "recvTokensMax": 21730,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-0ee3ca7d",
- "identity": "h100|deepep|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf",
- "colorKey": "h100_97196257",
- "comparisonKey": "7f26f72cd9fff78c",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:50:56.826066+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_15",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · fp8",
- "model": "Kimi-K2",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 384,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "cd50548525dafdf",
- "workloadId": "set:6:b23bc0c4b6402c69",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271663775",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271663775",
- "createdAt": "2026-06-26T23:50:56.826066+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 211.93599700927734,
- "p90": 218.33600103855133,
- "p95": 220.5120027065277,
- "p99": 225.055992603302
- },
- "combine": {
- "p50": 97.59999811649323,
- "p90": 100.0640019774437,
- "p95": 101.85600072145462,
- "p99": 104.5759990811348
- },
- "roundtrip": {
- "p50": 297.91998863220215,
- "p90": 303.9360046386719,
- "p95": 306.5600097179413,
- "p99": 328.000009059906
- },
- "isolatedSum": {
- "p50": 309.53599512577057,
- "p90": 318.400003015995,
- "p95": 322.36800342798233,
- "p99": 329.6319916844368
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38757376,
- "combineLogicalBytes": 77514752,
- "fanoutMean": 5.2802734375,
- "recvTokensMax": 707,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 278.3359885215759,
- "p90": 284.5759987831116,
- "p95": 285.8560085296631,
- "p99": 292.03200340270996
- },
- "combine": {
- "p50": 141.88799262046814,
- "p90": 145.1520025730133,
- "p95": 146.88000082969666,
- "p99": 151.39199793338776
- },
- "roundtrip": {
- "p50": 404.4800102710724,
- "p90": 410.7840061187744,
- "p95": 413.9519929885864,
- "p99": 420.51199078559875
- },
- "isolatedSum": {
- "p50": 420.22398114204407,
- "p90": 429.7280013561249,
- "p95": 432.73600935935974,
- "p99": 443.4240013360977
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77285376,
- "combineLogicalBytes": 154570752,
- "fanoutMean": 5.2646484375,
- "recvTokensMax": 1391,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 397.2800076007843,
- "p90": 402.8480052947998,
- "p95": 405.44000267982483,
- "p99": 410.71999073028564
- },
- "combine": {
- "p50": 221.02400660514832,
- "p90": 225.0880002975464,
- "p95": 226.01599991321564,
- "p99": 229.50400412082672
- },
- "roundtrip": {
- "p50": 601.4400124549866,
- "p90": 608.1600189208984,
- "p95": 610.4000210762024,
- "p99": 616.8000102043152
- },
- "isolatedSum": {
- "p50": 618.3040142059326,
- "p90": 627.9360055923462,
- "p95": 631.4560025930405,
- "p99": 640.2239948511124
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 154886144,
- "combineLogicalBytes": 309772288,
- "fanoutMean": 5.275390625,
- "recvTokensMax": 2754,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 659.5199704170227,
- "p90": 663.2959842681885,
- "p95": 665.0239825248718,
- "p99": 667.2319769859314
- },
- "combine": {
- "p50": 360.22400856018066,
- "p90": 364.9280071258545,
- "p95": 366.3040101528168,
- "p99": 369.85599994659424
- },
- "roundtrip": {
- "p50": 1002.9439926147461,
- "p90": 1008.3839893341064,
- "p95": 1010.0159645080566,
- "p99": 1013.856053352356
- },
- "isolatedSum": {
- "p50": 1019.7439789772034,
- "p90": 1028.223991394043,
- "p95": 1031.3279926776886,
- "p99": 1037.0879769325256
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 309750784,
- "combineLogicalBytes": 619501568,
- "fanoutMean": 5.2750244140625,
- "recvTokensMax": 5469,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 1226.9760370254517,
- "p90": 1235.1679801940918,
- "p95": 1236.8320226669312,
- "p99": 1242.143988609314
- },
- "combine": {
- "p50": 624.5120167732239,
- "p90": 631.8399906158447,
- "p95": 634.1120004653931,
- "p99": 675.8400201797485
- },
- "roundtrip": {
- "p50": 1831.455945968628,
- "p90": 1840.831995010376,
- "p95": 1843.775987625122,
- "p99": 1848.2880592346191
- },
- "isolatedSum": {
- "p50": 1851.4880537986755,
- "p90": 1867.0079708099365,
- "p95": 1870.9440231323242,
- "p99": 1917.9840087890625
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 619687936,
- "combineLogicalBytes": 1239375872,
- "fanoutMean": 5.276611328125,
- "recvTokensMax": 10883,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 2344.1600799560547,
- "p90": 2350.719928741455,
- "p95": 2352.9601097106934,
- "p99": 2358.0799102783203
- },
- "combine": {
- "p50": 1141.4719820022583,
- "p90": 1150.9439945220947,
- "p95": 1153.7920236587524,
- "p99": 1162.592053413391
- },
- "roundtrip": {
- "p50": 3469.856023788452,
- "p90": 3481.6958904266357,
- "p95": 3484.3521118164062,
- "p99": 3490.528106689453
- },
- "isolatedSum": {
- "p50": 3485.632061958313,
- "p90": 3501.66392326355,
- "p95": 3506.752133369446,
- "p99": 3520.6719636917114
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1239834624,
- "combineLogicalBytes": 2479669248,
- "fanoutMean": 5.278564453125,
- "recvTokensMax": 21730,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-560e55e7",
- "identity": "h100|deepep|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|dc27c5e0894e569",
- "colorKey": "h100_7f10961a",
- "comparisonKey": "6a3a9660e48371b3",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:45:34.307375+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_08",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · fp8 (norm)",
- "model": "Qwen3.5",
- "shape": {
- "hidden": 4096,
- "topk": 8,
- "experts": 128,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1818,
- "configuredUnits": 24,
- "deviceUnits": 132,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "dc27c5e0894e569",
- "workloadId": "set:6:76d8142d69406335",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28273218274",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28273218274",
- "createdAt": "2026-06-27T00:45:34.307375+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 196.79999351501465,
- "p90": 203.80799472332,
- "p95": 205.79199492931366,
- "p99": 214.11199867725372
- },
- "combine": {
- "p50": 75.71200281381607,
- "p90": 78.5600021481514,
- "p95": 80.54400235414505,
- "p99": 84.6719965338707
- },
- "roundtrip": {
- "p50": 255.64798712730408,
- "p90": 264.41600918769836,
- "p95": 274.1119861602783,
- "p99": 321.9519853591919
- },
- "isolatedSum": {
- "p50": 272.5119963288307,
- "p90": 282.3679968714714,
- "p95": 286.3359972834587,
- "p99": 298.7839952111244
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 22282240,
- "combineLogicalBytes": 44564480,
- "fanoutMean": 5.3125,
- "recvTokensMax": 699,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 231.26399517059326,
- "p90": 269.6639895439148,
- "p95": 272.19200134277344,
- "p99": 278.01600098609924
- },
- "combine": {
- "p50": 100.99200159311295,
- "p90": 109.82400178909302,
- "p95": 110.81600189208984,
- "p99": 113.3119985461235
- },
- "roundtrip": {
- "p50": 315.8720135688782,
- "p90": 327.39201188087463,
- "p95": 355.679988861084,
- "p99": 369.53601241111755
- },
- "isolatedSum": {
- "p50": 332.2559967637062,
- "p90": 379.4879913330078,
- "p95": 383.0080032348633,
- "p99": 391.32799953222275
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 44863488,
- "combineLogicalBytes": 89726976,
- "fanoutMean": 5.34814453125,
- "recvTokensMax": 1385,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 309.59999561309814,
- "p90": 352.1279990673065,
- "p95": 355.00800609588623,
- "p99": 361.1519932746887
- },
- "combine": {
- "p50": 147.90399372577667,
- "p90": 156.99200332164764,
- "p95": 158.24000537395477,
- "p99": 162.08000481128693
- },
- "roundtrip": {
- "p50": 442.4000084400177,
- "p90": 483.3280146121979,
- "p95": 487.8399968147278,
- "p99": 518.4000134468079
- },
- "isolatedSum": {
- "p50": 457.5039893388748,
- "p90": 509.12000238895416,
- "p95": 513.248011469841,
- "p99": 523.2319980859756
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 89751552,
- "combineLogicalBytes": 179503104,
- "fanoutMean": 5.349609375,
- "recvTokensMax": 2772,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 453.98399233818054,
- "p90": 459.3600034713745,
- "p95": 461.88798546791077,
- "p99": 466.623991727829
- },
- "combine": {
- "p50": 235.29599606990814,
- "p90": 239.00799453258514,
- "p95": 240.51199853420258,
- "p99": 242.46400594711304
- },
- "roundtrip": {
- "p50": 673.3120083808899,
- "p90": 678.8480281829834,
- "p95": 680.6079745292664,
- "p99": 684.544026851654
- },
- "isolatedSum": {
- "p50": 689.2799884080887,
- "p90": 698.3679980039597,
- "p95": 702.3999840021133,
- "p99": 709.087997674942
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 179511296,
- "combineLogicalBytes": 359022592,
- "fanoutMean": 5.349853515625,
- "recvTokensMax": 5558,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 784.928023815155,
- "p90": 799.8719811439514,
- "p95": 803.2000064849854,
- "p99": 809.0239763259888
- },
- "combine": {
- "p50": 405.4720103740692,
- "p90": 416.06399416923523,
- "p95": 418.3039963245392,
- "p99": 422.4959909915924
- },
- "roundtrip": {
- "p50": 1170.1120138168335,
- "p90": 1179.58402633667,
- "p95": 1183.6479902267456,
- "p99": 1192.7679777145386
- },
- "isolatedSum": {
- "p50": 1190.4000341892242,
- "p90": 1215.9359753131866,
- "p95": 1221.5040028095245,
- "p99": 1231.5199673175812
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 358055936,
- "combineLogicalBytes": 716111872,
- "fanoutMean": 5.33544921875,
- "recvTokensMax": 10982,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1483.3279848098755,
- "p90": 1490.496039390564,
- "p95": 1493.6319589614868,
- "p99": 1501.5679597854614
- },
- "combine": {
- "p50": 732.2880029678345,
- "p90": 738.8160228729248,
- "p95": 740.8000230789185,
- "p99": 745.9840178489685
- },
- "roundtrip": {
- "p50": 2199.039936065674,
- "p90": 2209.439992904663,
- "p95": 2212.5439643859863,
- "p99": 2217.087984085083
- },
- "isolatedSum": {
- "p50": 2215.61598777771,
- "p90": 2229.3120622634888,
- "p95": 2234.4319820404053,
- "p99": 2247.55197763443
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 716197888,
- "combineLogicalBytes": 1432395776,
- "fanoutMean": 5.336090087890625,
- "recvTokensMax": 21939,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-de081cfe",
- "identity": "h100|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31",
- "colorKey": "h100_91aa6e56",
- "comparisonKey": "e439d265ee12c9f2",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:30:20.983875+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_03",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · fp8 (norm)",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1818,
- "configuredUnits": 24,
- "deviceUnits": 132,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28254323956",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254323956",
- "createdAt": "2026-06-26T17:30:20.983875+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 89.59999680519104,
- "p90": 93.72799843549728,
- "p95": 95.36000341176987,
- "p99": 100.832000374794
- },
- "combine": {
- "p50": 98.14400225877762,
- "p90": 100.60799866914749,
- "p95": 102.11200267076492,
- "p99": 105.0880029797554
- },
- "roundtrip": {
- "p50": 215.13600647449493,
- "p90": 218.55999529361725,
- "p95": 220.12799978256226,
- "p99": 228.06400060653687
- },
- "isolatedSum": {
- "p50": 187.74399906396866,
- "p90": 194.33599710464478,
- "p95": 197.4720060825348,
- "p99": 205.9200033545494
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 106.175996363163,
- "p90": 121.47200107574463,
- "p95": 122.52800166606903,
- "p99": 125.91999769210815
- },
- "combine": {
- "p50": 139.48799669742584,
- "p90": 146.17599546909332,
- "p95": 147.61599898338318,
- "p99": 149.82399344444275
- },
- "roundtrip": {
- "p50": 320.92800736427307,
- "p90": 336.41600608825684,
- "p95": 337.92001008987427,
- "p99": 341.2800133228302
- },
- "isolatedSum": {
- "p50": 245.66399306058884,
- "p90": 267.64799654483795,
- "p95": 270.1440006494522,
- "p99": 275.7439911365509
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77944832,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 138.46400380134583,
- "p90": 182.8799992799759,
- "p95": 190.97599387168884,
- "p99": 197.28000462055206
- },
- "combine": {
- "p50": 208.3200067281723,
- "p90": 223.00800681114197,
- "p95": 231.83999955654144,
- "p99": 242.01600253582
- },
- "roundtrip": {
- "p50": 509.69600677490234,
- "p90": 521.5680003166199,
- "p95": 523.4879851341248,
- "p99": 528.9599895477295
- },
- "isolatedSum": {
- "p50": 346.7840105295181,
- "p90": 405.88800609111786,
- "p95": 422.8159934282303,
- "p99": 439.29600715637207
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 156133376,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 197.4399983882904,
- "p90": 210.87999641895294,
- "p95": 213.31200003623962,
- "p99": 216.2880003452301
- },
- "combine": {
- "p50": 325.82399249076843,
- "p90": 330.1120102405548,
- "p95": 331.6799998283386,
- "p99": 335.80800890922546
- },
- "roundtrip": {
- "p50": 847.4879860877991,
- "p90": 858.0160140991211,
- "p95": 861.0879778862,
- "p99": 869.2799806594849
- },
- "isolatedSum": {
- "p50": 523.2639908790588,
- "p90": 540.9920066595078,
- "p95": 544.9919998645782,
- "p99": 552.0960092544556
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 311721984,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 318.65599751472473,
- "p90": 335.29600501060486,
- "p95": 338.0799889564514,
- "p99": 347.29599952697754
- },
- "combine": {
- "p50": 559.7760081291199,
- "p90": 566.815972328186,
- "p95": 569.5040225982666,
- "p99": 573.311984539032
- },
- "roundtrip": {
- "p50": 1524.0000486373901,
- "p90": 1544.0640449523926,
- "p95": 1550.7839918136597,
- "p99": 1576.7359733581543
- },
- "isolatedSum": {
- "p50": 878.4320056438446,
- "p90": 902.1119773387909,
- "p95": 907.584011554718,
- "p99": 920.6079840660095
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 621902848,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 574.7519731521606,
- "p90": 593.1839942932129,
- "p95": 598.1760025024414,
- "p99": 604.7999858856201
- },
- "combine": {
- "p50": 1025.056004524231,
- "p90": 1033.5359573364258,
- "p95": 1036.1920595169067,
- "p99": 1042.847990989685
- },
- "roundtrip": {
- "p50": 2880.863904953003,
- "p90": 2894.5279121398926,
- "p95": 2899.9040126800537,
- "p99": 2908.3518981933594
- },
- "isolatedSum": {
- "p50": 1599.8079776763916,
- "p90": 1626.7199516296387,
- "p95": 1634.3680620193481,
- "p99": 1647.6479768753052
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1243504640,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-e8c2a4d2",
- "identity": "h100|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31",
- "colorKey": "h100_eddc3af6",
- "comparisonKey": "fd73340f2af530d5",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:30:48.926445+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_19",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "cached-layout-comm-only-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · fp8 (norm) [cl]",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1818,
- "configuredUnits": 24,
- "deviceUnits": 132,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28254341346",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254341346",
- "createdAt": "2026-06-26T17:30:48.926445+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 77.60000228881836,
- "p90": 81.4720019698143,
- "p95": 83.52000266313553,
- "p99": 102.7199998497963
- },
- "combine": {
- "p50": 98.08000177145004,
- "p90": 102.01600193977356,
- "p95": 115.35999923944473,
- "p99": 344.0319895744324
- },
- "roundtrip": {
- "p50": 205.1520049571991,
- "p90": 208.19200575351715,
- "p95": 209.85600352287292,
- "p99": 214.9440050125122
- },
- "isolatedSum": {
- "p50": 175.6800040602684,
- "p90": 183.48800390958786,
- "p95": 198.88000190258026,
- "p99": 446.75198942422867
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 95.74399888515472,
- "p90": 110.07999628782272,
- "p95": 111.13599687814713,
- "p99": 114.81600254774094
- },
- "combine": {
- "p50": 141.7279988527298,
- "p90": 148.8959938287735,
- "p95": 150.4960060119629,
- "p99": 153.02400290966034
- },
- "roundtrip": {
- "p50": 311.45599484443665,
- "p90": 319.5840120315552,
- "p95": 321.696013212204,
- "p99": 324.67201352119446
- },
- "isolatedSum": {
- "p50": 237.47199773788452,
- "p90": 258.9759901165962,
- "p95": 261.63200289011,
- "p99": 267.8400054574013
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77944832,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 129.56799566745758,
- "p90": 144.57599818706512,
- "p95": 146.14400267601013,
- "p99": 148.8959938287735
- },
- "combine": {
- "p50": 213.4079933166504,
- "p90": 218.36799383163452,
- "p95": 219.7760045528412,
- "p99": 224.2240011692047
- },
- "roundtrip": {
- "p50": 500.70399045944214,
- "p90": 508.1599950790405,
- "p95": 510.81597805023193,
- "p99": 514.8159861564636
- },
- "isolatedSum": {
- "p50": 342.97598898410797,
- "p90": 362.94399201869965,
- "p95": 365.9200072288513,
- "p99": 373.1199949979782
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 156133376,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 186.49600446224213,
- "p90": 196.0960030555725,
- "p95": 197.50399887561798,
- "p99": 202.55999267101288
- },
- "combine": {
- "p50": 327.7760148048401,
- "p90": 333.18400382995605,
- "p95": 334.3679904937744,
- "p99": 337.72799372673035
- },
- "roundtrip": {
- "p50": 835.2640271186829,
- "p90": 841.69602394104,
- "p95": 844.0639972686768,
- "p99": 848.2879996299744
- },
- "isolatedSum": {
- "p50": 514.2720192670822,
- "p90": 529.2800068855286,
- "p95": 531.8719893693924,
- "p99": 540.2879863977432
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 311721984,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 306.62399530410767,
- "p90": 320.19200921058655,
- "p95": 322.7519989013672,
- "p99": 327.1679878234863
- },
- "combine": {
- "p50": 559.6479773521423,
- "p90": 567.296028137207,
- "p95": 570.1119899749756,
- "p99": 574.5919942855835
- },
- "roundtrip": {
- "p50": 1509.6960067749023,
- "p90": 1522.7199792861938,
- "p95": 1525.6320238113403,
- "p99": 1585.9839916229248
- },
- "isolatedSum": {
- "p50": 866.27197265625,
- "p90": 887.4880373477936,
- "p95": 892.8639888763428,
- "p99": 901.7599821090698
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 621902848,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 539.8719906806946,
- "p90": 550.7839918136597,
- "p95": 555.7119846343994,
- "p99": 564.7040009498596
- },
- "combine": {
- "p50": 1024.9279737472534,
- "p90": 1034.3040227890015,
- "p95": 1037.11998462677,
- "p99": 1047.0720529556274
- },
- "roundtrip": {
- "p50": 2850.719928741455,
- "p90": 2861.407995223999,
- "p95": 2864.9280071258545,
- "p99": 2870.176076889038
- },
- "isolatedSum": {
- "p50": 1564.799964427948,
- "p90": 1585.0880146026611,
- "p95": 1592.8319692611694,
- "p99": 1611.776053905487
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1243504640,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-f6d2d196",
- "identity": "h100|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "h100_ec72792b",
- "comparisonKey": "39b4bc74c45641cb",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:48:09.793091+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_09",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "cached-layout-comm-only-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep · fp8 [cl]",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271576503",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271576503",
- "createdAt": "2026-06-26T23:48:09.793091+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 76.73600316047668,
- "p90": 80.19199967384338,
- "p95": 82.17599987983704,
- "p99": 85.4720026254654
- },
- "combine": {
- "p50": 98.68799895048141,
- "p90": 100.8640006184578,
- "p95": 102.84800082445145,
- "p99": 113.27999830245972
- },
- "roundtrip": {
- "p50": 204.25599813461304,
- "p90": 206.84799551963806,
- "p95": 208.0959975719452,
- "p99": 211.32799983024597
- },
- "isolatedSum": {
- "p50": 175.4240021109581,
- "p90": 181.05600029230118,
- "p95": 185.02400070428848,
- "p99": 198.7520009279251
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 95.551997423172,
- "p90": 98.65599870681763,
- "p95": 100.44799745082855,
- "p99": 104.63999956846237
- },
- "combine": {
- "p50": 143.51999759674072,
- "p90": 146.04799449443817,
- "p95": 147.2640037536621,
- "p99": 150.07999539375305
- },
- "roundtrip": {
- "p50": 317.05600023269653,
- "p90": 320.67200541496277,
- "p95": 322.07998633384705,
- "p99": 325.56799054145813
- },
- "isolatedSum": {
- "p50": 239.07199501991272,
- "p90": 244.7039932012558,
- "p95": 247.71200120449066,
- "p99": 254.71999496221542
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77944832,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 132.4159950017929,
- "p90": 136.09600067138672,
- "p95": 137.40800321102142,
- "p99": 140.19200205802917
- },
- "combine": {
- "p50": 224.16000068187714,
- "p90": 228.2239943742752,
- "p95": 229.312002658844,
- "p99": 232.03200101852417
- },
- "roundtrip": {
- "p50": 517.5039768218994,
- "p90": 522.5920081138611,
- "p95": 523.8400101661682,
- "p99": 534.1759920120239
- },
- "isolatedSum": {
- "p50": 356.57599568367004,
- "p90": 364.3199950456619,
- "p95": 366.7200058698654,
- "p99": 372.22400307655334
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 156133376,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 203.5199999809265,
- "p90": 207.39200711250305,
- "p95": 208.95999670028687,
- "p99": 213.1199985742569
- },
- "combine": {
- "p50": 359.0719997882843,
- "p90": 364.25599455833435,
- "p95": 365.4080033302307,
- "p99": 367.35999584198
- },
- "roundtrip": {
- "p50": 883.679986000061,
- "p90": 889.6960020065308,
- "p95": 891.5839791297913,
- "p99": 897.7599740028381
- },
- "isolatedSum": {
- "p50": 562.5919997692108,
- "p90": 571.6480016708374,
- "p95": 574.3680000305176,
- "p99": 580.4799944162369
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 311721984,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 341.0240113735199,
- "p90": 352.9280126094818,
- "p95": 354.7840118408203,
- "p99": 361.31200194358826
- },
- "combine": {
- "p50": 631.2000155448914,
- "p90": 639.136016368866,
- "p95": 641.5359973907471,
- "p99": 644.1599726676941
- },
- "roundtrip": {
- "p50": 1616.5440082550049,
- "p90": 1624.9920129776,
- "p95": 1627.3599863052368,
- "p99": 1631.9680213928223
- },
- "isolatedSum": {
- "p50": 972.2240269184113,
- "p90": 992.0640289783478,
- "p95": 996.3200092315674,
- "p99": 1005.4719746112823
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 621902848,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 611.6160154342651,
- "p90": 621.0240125656128,
- "p95": 624.0959763526917,
- "p99": 790.3040051460266
- },
- "combine": {
- "p50": 1165.503978729248,
- "p90": 1175.487995147705,
- "p95": 1177.664041519165,
- "p99": 1188.9280080795288
- },
- "roundtrip": {
- "p50": 3078.4640312194824,
- "p90": 3095.8399772644043,
- "p95": 3103.071928024292,
- "p99": 3115.9679889678955
- },
- "isolatedSum": {
- "p50": 1777.1199941635132,
- "p90": 1796.5120077133179,
- "p95": 1801.7600178718567,
- "p99": 1979.2320132255554
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1243504640,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-0f748c2f",
- "identity": "h100|deepep-hybrid|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h100_ec8c28a9",
- "comparisonKey": "04d8dc12f0898400",
- "schemaVersion": 3,
- "generatedAt": "2026-06-28T02:32:47.489418+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_08",
- "sku": "h100",
- "backend": "deepep-hybrid",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep-hybrid · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 132,
- "resourceClass": "fixed-kernel",
- "conformanceClass": "not-applicable",
- "fixedKernel": true,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "hybrid-e0a5b1d",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28308875809",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28308875809",
- "createdAt": "2026-06-28T02:32:47.489418+00:00",
- "sha": "02ef8d2d9b6fd7519504810daae202e88ee66360"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 167.1680063009262,
- "p90": 219.7120040655136,
- "p95": 222.01600670814514,
- "p99": 227.84000635147095
- },
- "combine": {
- "p50": 36.896001547575,
- "p90": 52.2879995405674,
- "p95": 52.799999713897705,
- "p99": 57.34400078654289
- },
- "roundtrip": {
- "p50": 195.3279972076416,
- "p90": 256.76798820495605,
- "p95": 260.51199436187744,
- "p99": 266.2079930305481
- },
- "isolatedSum": {
- "p50": 204.0640078485012,
- "p90": 272.000003606081,
- "p95": 274.81600642204285,
- "p99": 285.18400713801384
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 165.27999937534332,
- "p90": 173.08799922466278,
- "p95": 185.98400056362152,
- "p99": 224.48000311851501
- },
- "combine": {
- "p50": 35.93600168824196,
- "p90": 39.264000952243805,
- "p95": 42.047999799251556,
- "p99": 47.93599992990494
- },
- "roundtrip": {
- "p50": 193.34399700164795,
- "p90": 199.64799284934998,
- "p95": 202.72000133991241,
- "p99": 207.58399367332458
- },
- "isolatedSum": {
- "p50": 201.21600106358528,
- "p90": 212.35200017690659,
- "p95": 228.03200036287308,
- "p99": 272.41600304841995
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 166.97600483894348,
- "p90": 218.55999529361725,
- "p95": 221.72799706459045,
- "p99": 226.17599368095398
- },
- "combine": {
- "p50": 39.64800015091896,
- "p90": 52.15999856591225,
- "p95": 52.76799947023392,
- "p99": 55.93600124120712
- },
- "roundtrip": {
- "p50": 195.0400024652481,
- "p90": 255.90398907661438,
- "p95": 258.432000875473,
- "p99": 266.1759853363037
- },
- "isolatedSum": {
- "p50": 206.62400498986244,
- "p90": 270.7199938595295,
- "p95": 274.49599653482437,
- "p99": 282.1119949221611
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 168.44800114631653,
- "p90": 221.02400660514832,
- "p95": 223.39199483394623,
- "p99": 229.34399545192719
- },
- "combine": {
- "p50": 39.744000881910324,
- "p90": 52.352000027894974,
- "p95": 53.18399891257286,
- "p99": 58.079998940229416
- },
- "roundtrip": {
- "p50": 195.77600061893463,
- "p90": 259.68000292778015,
- "p95": 262.14399933815,
- "p99": 267.64801144599915
- },
- "isolatedSum": {
- "p50": 208.19200202822685,
- "p90": 273.3760066330433,
- "p95": 276.5759937465191,
- "p99": 287.4239943921566
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 180.28800189495087,
- "p90": 222.1119999885559,
- "p95": 225.37599503993988,
- "p99": 237.0239943265915
- },
- "combine": {
- "p50": 42.94399917125702,
- "p90": 53.727999329566956,
- "p95": 57.0559985935688,
- "p99": 143.96800100803375
- },
- "roundtrip": {
- "p50": 211.07199788093567,
- "p90": 258.84801149368286,
- "p95": 261.85598969459534,
- "p99": 270.7520127296448
- },
- "isolatedSum": {
- "p50": 223.23200106620789,
- "p90": 275.83999931812286,
- "p95": 282.4319936335087,
- "p99": 380.99199533462524
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 205.4399996995926,
- "p90": 242.5599992275238,
- "p95": 244.89599466323853,
- "p99": 249.31199848651886
- },
- "combine": {
- "p50": 45.21600157022476,
- "p90": 54.55999821424484,
- "p95": 55.48800155520439,
- "p99": 59.13599953055382
- },
- "roundtrip": {
- "p50": 241.66400730609894,
- "p90": 279.9679934978485,
- "p95": 282.20799565315247,
- "p99": 286.3680124282837
- },
- "isolatedSum": {
- "p50": 250.65600126981735,
- "p90": 297.11999744176865,
- "p95": 300.3839962184429,
- "p99": 308.4479980170727
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 258.36798548698425,
- "p90": 366.784006357193,
- "p95": 370.36800384521484,
- "p99": 381.98399543762207
- },
- "combine": {
- "p50": 56.352000683546066,
- "p90": 67.29599833488464,
- "p95": 68.09599697589874,
- "p99": 72.9919970035553
- },
- "roundtrip": {
- "p50": 305.88799715042114,
- "p90": 344.9920117855072,
- "p95": 346.78399562835693,
- "p99": 349.8559892177582
- },
- "isolatedSum": {
- "p50": 314.7199861705303,
- "p90": 434.08000469207764,
- "p95": 438.4640008211136,
- "p99": 454.97599244117737
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 262.0159983634949,
- "p90": 300.86401104927063,
- "p95": 303.99999022483826,
- "p99": 463.1359875202179
- },
- "combine": {
- "p50": 69.85600292682648,
- "p90": 80.73599636554718,
- "p95": 81.53600245714188,
- "p99": 84.95999872684479
- },
- "roundtrip": {
- "p50": 325.0240087509155,
- "p90": 364.4160032272339,
- "p95": 366.36799573898315,
- "p99": 370.11200189590454
- },
- "isolatedSum": {
- "p50": 331.87200129032135,
- "p90": 381.6000074148178,
- "p95": 385.53599268198013,
- "p99": 548.0959862470627
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-402bdadc",
- "identity": "h100|deepep-hybrid|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "h100_ec8c28a9",
- "comparisonKey": "2d8d821b3680de8a",
- "schemaVersion": 3,
- "generatedAt": "2026-06-28T02:32:51.441168+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_17",
- "sku": "h100",
- "backend": "deepep-hybrid",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · deepep-hybrid · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 132,
- "resourceClass": "fixed-kernel",
- "conformanceClass": "not-applicable",
- "fixedKernel": true,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "hybrid-e0a5b1d",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28308875809",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28308875809",
- "createdAt": "2026-06-28T02:32:51.441168+00:00",
- "sha": "02ef8d2d9b6fd7519504810daae202e88ee66360"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 257.79199600219727,
- "p90": 262.65600323677063,
- "p95": 264.95999097824097,
- "p99": 272.2240090370178
- },
- "combine": {
- "p50": 69.21599805355072,
- "p90": 71.19999825954437,
- "p95": 73.27999919652939,
- "p99": 79.83999699354172
- },
- "roundtrip": {
- "p50": 320.47998905181885,
- "p90": 324.3519961833954,
- "p95": 327.07199454307556,
- "p99": 332.3200047016144
- },
- "isolatedSum": {
- "p50": 327.007994055748,
- "p90": 333.856001496315,
- "p95": 338.23999017477036,
- "p99": 352.06400603055954
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 269.6320116519928,
- "p90": 274.6559977531433,
- "p95": 276.70401334762573,
- "p99": 282.6240062713623
- },
- "combine": {
- "p50": 104.51199859380722,
- "p90": 106.59199953079224,
- "p95": 107.45599865913391,
- "p99": 110.36799848079681
- },
- "roundtrip": {
- "p50": 368.3199882507324,
- "p90": 372.79999256134033,
- "p95": 375.0399947166443,
- "p99": 377.85598635673523
- },
- "isolatedSum": {
- "p50": 374.1440102458,
- "p90": 381.24799728393555,
- "p95": 384.16001200675964,
- "p99": 392.9920047521591
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 155889664,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 293.15200448036194,
- "p90": 298.5279858112335,
- "p95": 300.7360100746155,
- "p99": 305.9839904308319
- },
- "combine": {
- "p50": 172.31999337673187,
- "p90": 174.78400468826294,
- "p95": 175.9680062532425,
- "p99": 179.1680008172989
- },
- "roundtrip": {
- "p50": 464.4159972667694,
- "p90": 468.8960015773773,
- "p95": 470.8159863948822,
- "p99": 480.76799511909485
- },
- "isolatedSum": {
- "p50": 465.4719978570938,
- "p90": 473.31199049949646,
- "p95": 476.70401632785797,
- "p99": 485.1519912481308
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 312266752,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 448.0000138282776,
- "p90": 458.8800072669983,
- "p95": 461.1839950084686,
- "p99": 468.4799909591675
- },
- "combine": {
- "p50": 299.1679906845093,
- "p90": 301.7280101776123,
- "p95": 302.4959862232208,
- "p99": 305.6960105895996
- },
- "roundtrip": {
- "p50": 749.9840259552002,
- "p90": 761.568009853363,
- "p95": 765.2480006217957,
- "p99": 789.8880243301392
- },
- "isolatedSum": {
- "p50": 747.1680045127869,
- "p90": 760.6080174446106,
- "p95": 763.6799812316895,
- "p99": 774.1760015487671
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 623443968,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 729.3440103530884,
- "p90": 734.2399954795837,
- "p95": 737.824022769928,
- "p99": 864.9600148200989
- },
- "combine": {
- "p50": 555.8080077171326,
- "p90": 558.6240291595459,
- "p95": 559.935986995697,
- "p99": 565.7600164413452
- },
- "roundtrip": {
- "p50": 1285.599946975708,
- "p90": 1290.560007095337,
- "p95": 1292.7680015563965,
- "p99": 1297.9520559310913
- },
- "isolatedSum": {
- "p50": 1285.152018070221,
- "p90": 1292.8640246391296,
- "p95": 1297.760009765625,
- "p99": 1430.720031261444
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1243805696,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1308.0320358276367,
- "p90": 1330.7839632034302,
- "p95": 1333.2480192184448,
- "p99": 1338.43195438385
- },
- "combine": {
- "p50": 1069.3119764328003,
- "p90": 1073.2159614562988,
- "p95": 1074.7519731521606,
- "p99": 1078.3040523529053
- },
- "roundtrip": {
- "p50": 2376.9280910491943,
- "p90": 2398.9760875701904,
- "p95": 2401.3121128082275,
- "p99": 2405.503988265991
- },
- "isolatedSum": {
- "p50": 2377.344012260437,
- "p90": 2403.999924659729,
- "p95": 2407.9999923706055,
- "p99": 2416.7360067367554
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487009280,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-f1858975",
- "identity": "h100|flashinfer|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h100_6c33dc8f",
- "comparisonKey": "5205049e72237a92",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T17:24:08.744102+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_07",
- "sku": "h100",
- "backend": "flashinfer",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · flashinfer · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 132,
- "resourceClass": "fixed-kernel",
- "conformanceClass": "not-applicable",
- "fixedKernel": true,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": null,
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28296376857",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28296376857",
- "createdAt": "2026-06-27T17:24:08.744102+00:00",
- "sha": "2ebeba9134a8c84f7a80ac87742d57f7cdf1cf18"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 99.35999661684036,
- "p90": 103.29599678516388,
- "p95": 105.53599894046783,
- "p99": 110.81600189208984
- },
- "combine": {
- "p50": 99.35999661684036,
- "p90": 103.29599678516388,
- "p95": 105.53599894046783,
- "p99": 110.81600189208984
- },
- "roundtrip": {
- "p50": 99.35999661684036,
- "p90": 103.29599678516388,
- "p95": 105.53599894046783,
- "p99": 110.81600189208984
- },
- "isolatedSum": {
- "p50": 198.71999323368073,
- "p90": 206.59199357032776,
- "p95": 211.07199788093567,
- "p99": 221.6320037841797
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 8,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 99.20000284910202,
- "p90": 102.78400033712387,
- "p95": 105.53599894046783,
- "p99": 109.63200032711029
- },
- "combine": {
- "p50": 99.20000284910202,
- "p90": 102.78400033712387,
- "p95": 105.53599894046783,
- "p99": 109.63200032711029
- },
- "roundtrip": {
- "p50": 99.20000284910202,
- "p90": 102.78400033712387,
- "p95": 105.53599894046783,
- "p99": 109.63200032711029
- },
- "isolatedSum": {
- "p50": 198.40000569820404,
- "p90": 205.56800067424774,
- "p95": 211.07199788093567,
- "p99": 219.26400065422058
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 8,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 96.96000069379807,
- "p90": 101.08800232410431,
- "p95": 103.42399775981903,
- "p99": 108.86400192975998
- },
- "combine": {
- "p50": 96.96000069379807,
- "p90": 101.08800232410431,
- "p95": 103.42399775981903,
- "p99": 108.86400192975998
- },
- "roundtrip": {
- "p50": 96.96000069379807,
- "p90": 101.08800232410431,
- "p95": 103.42399775981903,
- "p99": 108.86400192975998
- },
- "isolatedSum": {
- "p50": 193.92000138759613,
- "p90": 202.17600464820862,
- "p95": 206.84799551963806,
- "p99": 217.72800385951996
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 8,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 98.68799895048141,
- "p90": 102.30399668216705,
- "p95": 104.25599664449692,
- "p99": 109.21599715948105
- },
- "combine": {
- "p50": 98.68799895048141,
- "p90": 102.30399668216705,
- "p95": 104.25599664449692,
- "p99": 109.21599715948105
- },
- "roundtrip": {
- "p50": 98.68799895048141,
- "p90": 102.30399668216705,
- "p95": 104.25599664449692,
- "p99": 109.21599715948105
- },
- "isolatedSum": {
- "p50": 197.37599790096283,
- "p90": 204.6079933643341,
- "p95": 208.51199328899384,
- "p99": 218.4319943189621
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 8,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 96.54399752616882,
- "p90": 101.72799974679947,
- "p95": 102.94400155544281,
- "p99": 107.42399841547012
- },
- "combine": {
- "p50": 96.54399752616882,
- "p90": 101.72799974679947,
- "p95": 102.94400155544281,
- "p99": 107.42399841547012
- },
- "roundtrip": {
- "p50": 96.54399752616882,
- "p90": 101.72799974679947,
- "p95": 102.94400155544281,
- "p99": 107.42399841547012
- },
- "isolatedSum": {
- "p50": 193.08799505233765,
- "p90": 203.45599949359894,
- "p95": 205.88800311088562,
- "p99": 214.84799683094025
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 8,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 97.72799909114838,
- "p90": 101.6639992594719,
- "p95": 105.0880029797554,
- "p99": 111.84000223875046
- },
- "combine": {
- "p50": 97.72799909114838,
- "p90": 101.6639992594719,
- "p95": 105.0880029797554,
- "p99": 111.84000223875046
- },
- "roundtrip": {
- "p50": 97.72799909114838,
- "p90": 101.6639992594719,
- "p95": 105.0880029797554,
- "p99": 111.84000223875046
- },
- "isolatedSum": {
- "p50": 195.45599818229675,
- "p90": 203.3279985189438,
- "p95": 210.1760059595108,
- "p99": 223.68000447750092
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 8,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 98.4639972448349,
- "p90": 104.44799810647964,
- "p95": 122.75200337171555,
- "p99": 401.5359878540039
- },
- "combine": {
- "p50": 98.4639972448349,
- "p90": 104.44799810647964,
- "p95": 122.75200337171555,
- "p99": 401.5359878540039
- },
- "roundtrip": {
- "p50": 98.4639972448349,
- "p90": 104.44799810647964,
- "p95": 122.75200337171555,
- "p99": 401.5359878540039
- },
- "isolatedSum": {
- "p50": 196.9279944896698,
- "p90": 208.8959962129593,
- "p95": 245.5040067434311,
- "p99": 803.0719757080078
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 8,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 110.62400043010712,
- "p90": 115.84000289440155,
- "p95": 118.23999881744385,
- "p99": 130.52800297737122
- },
- "combine": {
- "p50": 110.62400043010712,
- "p90": 115.84000289440155,
- "p95": 118.23999881744385,
- "p99": 130.52800297737122
- },
- "roundtrip": {
- "p50": 110.62400043010712,
- "p90": 115.84000289440155,
- "p95": 118.23999881744385,
- "p99": 130.52800297737122
- },
- "isolatedSum": {
- "p50": 221.24800086021423,
- "p90": 231.6800057888031,
- "p95": 236.4799976348877,
- "p99": 261.05600595474243
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 8,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-236b5900",
- "identity": "h100|flashinfer|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h100_865f08c6",
- "comparisonKey": "63f2ed34d1d8c7db",
- "schemaVersion": 3,
- "generatedAt": "2026-06-28T01:38:24.466545+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_15",
- "sku": "h100",
- "backend": "flashinfer",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · flashinfer · fp8",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 132,
- "resourceClass": "fixed-kernel",
- "conformanceClass": "not-applicable",
- "fixedKernel": true,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": null,
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28307778986",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28307778986",
- "createdAt": "2026-06-28T01:38:24.466545+00:00",
- "sha": "510fc17001789ae4f32b99b60b9a0a0aa53ab6b5"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 109.11999642848969,
- "p90": 113.8560026884079,
- "p95": 116.19199812412262,
- "p99": 121.34400010108948
- },
- "combine": {
- "p50": 109.11999642848969,
- "p90": 113.8560026884079,
- "p95": 116.19199812412262,
- "p99": 121.34400010108948
- },
- "roundtrip": {
- "p50": 109.11999642848969,
- "p90": 113.8560026884079,
- "p95": 116.19199812412262,
- "p99": 121.34400010108948
- },
- "isolatedSum": {
- "p50": 218.23999285697937,
- "p90": 227.7120053768158,
- "p95": 232.38399624824524,
- "p99": 242.68800020217896
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 315392,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 8,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 108.96000266075134,
- "p90": 116.80000275373459,
- "p95": 122.30399996042252,
- "p99": 135.3279948234558
- },
- "combine": {
- "p50": 108.96000266075134,
- "p90": 116.80000275373459,
- "p95": 122.30399996042252,
- "p99": 135.3279948234558
- },
- "roundtrip": {
- "p50": 108.96000266075134,
- "p90": 116.80000275373459,
- "p95": 122.30399996042252,
- "p99": 135.3279948234558
- },
- "isolatedSum": {
- "p50": 217.92000532150269,
- "p90": 233.60000550746918,
- "p95": 244.60799992084503,
- "p99": 270.6559896469116
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 8,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 109.18399691581726,
- "p90": 115.99999666213989,
- "p95": 119.55200135707855,
- "p99": 376.6399919986725
- },
- "combine": {
- "p50": 109.18399691581726,
- "p90": 115.99999666213989,
- "p95": 119.55200135707855,
- "p99": 376.6399919986725
- },
- "roundtrip": {
- "p50": 109.18399691581726,
- "p90": 115.99999666213989,
- "p95": 119.55200135707855,
- "p99": 376.6399919986725
- },
- "isolatedSum": {
- "p50": 218.36799383163452,
- "p90": 231.99999332427979,
- "p95": 239.1040027141571,
- "p99": 753.279983997345
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1240064,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 8,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 108.70400071144104,
- "p90": 114.30399864912033,
- "p95": 116.12799763679504,
- "p99": 120.64000219106674
- },
- "combine": {
- "p50": 108.70400071144104,
- "p90": 114.30399864912033,
- "p95": 116.12799763679504,
- "p99": 120.64000219106674
- },
- "roundtrip": {
- "p50": 108.70400071144104,
- "p90": 114.30399864912033,
- "p95": 116.12799763679504,
- "p99": 120.64000219106674
- },
- "isolatedSum": {
- "p50": 217.40800142288208,
- "p90": 228.60799729824066,
- "p95": 232.2559952735901,
- "p99": 241.28000438213348
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487296,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 8,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 108.86400192975998,
- "p90": 114.656001329422,
- "p95": 119.03999745845795,
- "p99": 151.19999647140503
- },
- "combine": {
- "p50": 108.86400192975998,
- "p90": 114.656001329422,
- "p95": 119.03999745845795,
- "p99": 151.19999647140503
- },
- "roundtrip": {
- "p50": 108.86400192975998,
- "p90": 114.656001329422,
- "p95": 119.03999745845795,
- "p99": 151.19999647140503
- },
- "isolatedSum": {
- "p50": 217.72800385951996,
- "p90": 229.312002658844,
- "p95": 238.0799949169159,
- "p99": 302.39999294281006
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4960256,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 8,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 109.21599715948105,
- "p90": 121.18399888277054,
- "p95": 127.3919939994812,
- "p99": 205.56800067424774
- },
- "combine": {
- "p50": 109.21599715948105,
- "p90": 121.18399888277054,
- "p95": 127.3919939994812,
- "p99": 205.56800067424774
- },
- "roundtrip": {
- "p50": 109.21599715948105,
- "p90": 121.18399888277054,
- "p95": 127.3919939994812,
- "p99": 205.56800067424774
- },
- "isolatedSum": {
- "p50": 218.4319943189621,
- "p90": 242.36799776554108,
- "p95": 254.7839879989624,
- "p99": 411.1360013484955
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9863168,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 8,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 110.97600311040878,
- "p90": 115.93600362539291,
- "p95": 118.27199906110764,
- "p99": 126.88000500202179
- },
- "combine": {
- "p50": 110.97600311040878,
- "p90": 115.93600362539291,
- "p95": 118.27199906110764,
- "p99": 126.88000500202179
- },
- "roundtrip": {
- "p50": 110.97600311040878,
- "p90": 115.93600362539291,
- "p95": 118.27199906110764,
- "p99": 126.88000500202179
- },
- "isolatedSum": {
- "p50": 221.95200622081757,
- "p90": 231.87200725078583,
- "p95": 236.54399812221527,
- "p99": 253.76001000404358
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19496960,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 8,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 124.41600114107132,
- "p90": 129.05600666999817,
- "p95": 130.5599957704544,
- "p99": 136.4479959011078
- },
- "combine": {
- "p50": 124.41600114107132,
- "p90": 129.05600666999817,
- "p95": 130.5599957704544,
- "p99": 136.4479959011078
- },
- "roundtrip": {
- "p50": 124.41600114107132,
- "p90": 129.05600666999817,
- "p95": 130.5599957704544,
- "p99": 136.4479959011078
- },
- "isolatedSum": {
- "p50": 248.83200228214264,
- "p90": 258.11201333999634,
- "p95": 261.1199915409088,
- "p99": 272.8959918022156
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 8,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-0d201725",
- "identity": "h100|flashinfer|7168|8|256|mxfp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h100_1686fbdd",
- "comparisonKey": "27114da636b19722",
- "schemaVersion": 3,
- "generatedAt": "2026-06-28T01:37:57.511914+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_17",
- "sku": "h100",
- "backend": "flashinfer",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · flashinfer · mxfp8",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "mxfp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 132,
- "resourceClass": "fixed-kernel",
- "conformanceClass": "not-applicable",
- "fixedKernel": true,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": null,
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28307780015",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28307780015",
- "createdAt": "2026-06-28T01:37:57.511914+00:00",
- "sha": "510fc17001789ae4f32b99b60b9a0a0aa53ab6b5"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 109.56799983978271,
- "p90": 113.98400366306305,
- "p95": 117.85600334405899,
- "p99": 129.72800433635712
- },
- "combine": {
- "p50": 109.56799983978271,
- "p90": 113.98400366306305,
- "p95": 117.85600334405899,
- "p99": 129.72800433635712
- },
- "roundtrip": {
- "p50": 109.56799983978271,
- "p90": 113.98400366306305,
- "p95": 117.85600334405899,
- "p99": 129.72800433635712
- },
- "isolatedSum": {
- "p50": 219.13599967956543,
- "p90": 227.9680073261261,
- "p95": 235.71200668811798,
- "p99": 259.45600867271423
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 315392,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 8,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 109.11999642848969,
- "p90": 113.27999830245972,
- "p95": 115.35999923944473,
- "p99": 119.84000355005264
- },
- "combine": {
- "p50": 109.11999642848969,
- "p90": 113.27999830245972,
- "p95": 115.35999923944473,
- "p99": 119.84000355005264
- },
- "roundtrip": {
- "p50": 109.11999642848969,
- "p90": 113.27999830245972,
- "p95": 115.35999923944473,
- "p99": 119.84000355005264
- },
- "isolatedSum": {
- "p50": 218.23999285697937,
- "p90": 226.55999660491943,
- "p95": 230.71999847888947,
- "p99": 239.68000710010529
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 8,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 108.99200290441513,
- "p90": 112.99200356006622,
- "p95": 116.64000153541565,
- "p99": 122.36800044775009
- },
- "combine": {
- "p50": 108.99200290441513,
- "p90": 112.99200356006622,
- "p95": 116.64000153541565,
- "p99": 122.36800044775009
- },
- "roundtrip": {
- "p50": 108.99200290441513,
- "p90": 112.99200356006622,
- "p95": 116.64000153541565,
- "p99": 122.36800044775009
- },
- "isolatedSum": {
- "p50": 217.98400580883026,
- "p90": 225.98400712013245,
- "p95": 233.2800030708313,
- "p99": 244.73600089550018
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1240064,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 8,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 108.67200046777725,
- "p90": 112.92800307273865,
- "p95": 115.1999980211258,
- "p99": 121.79200351238251
- },
- "combine": {
- "p50": 108.67200046777725,
- "p90": 112.92800307273865,
- "p95": 115.1999980211258,
- "p99": 121.79200351238251
- },
- "roundtrip": {
- "p50": 108.67200046777725,
- "p90": 112.92800307273865,
- "p95": 115.1999980211258,
- "p99": 121.79200351238251
- },
- "isolatedSum": {
- "p50": 217.3440009355545,
- "p90": 225.8560061454773,
- "p95": 230.3999960422516,
- "p99": 243.58400702476501
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487296,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 8,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 108.96000266075134,
- "p90": 113.47199976444244,
- "p95": 116.80000275373459,
- "p99": 125.40799379348755
- },
- "combine": {
- "p50": 108.96000266075134,
- "p90": 113.47199976444244,
- "p95": 116.80000275373459,
- "p99": 125.40799379348755
- },
- "roundtrip": {
- "p50": 108.96000266075134,
- "p90": 113.47199976444244,
- "p95": 116.80000275373459,
- "p99": 125.40799379348755
- },
- "isolatedSum": {
- "p50": 217.92000532150269,
- "p90": 226.9439995288849,
- "p95": 233.60000550746918,
- "p99": 250.8159875869751
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4960256,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 8,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 108.86400192975998,
- "p90": 113.21599781513214,
- "p95": 116.60800129175186,
- "p99": 119.00799721479416
- },
- "combine": {
- "p50": 108.86400192975998,
- "p90": 113.21599781513214,
- "p95": 116.60800129175186,
- "p99": 119.00799721479416
- },
- "roundtrip": {
- "p50": 108.86400192975998,
- "p90": 113.21599781513214,
- "p95": 116.60800129175186,
- "p99": 119.00799721479416
- },
- "isolatedSum": {
- "p50": 217.72800385951996,
- "p90": 226.43199563026428,
- "p95": 233.21600258350372,
- "p99": 238.01599442958832
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9863168,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 8,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 110.33599823713303,
- "p90": 115.42399972677231,
- "p95": 118.78400295972824,
- "p99": 129.88799810409546
- },
- "combine": {
- "p50": 110.33599823713303,
- "p90": 115.42399972677231,
- "p95": 118.78400295972824,
- "p99": 129.88799810409546
- },
- "roundtrip": {
- "p50": 110.33599823713303,
- "p90": 115.42399972677231,
- "p95": 118.78400295972824,
- "p99": 129.88799810409546
- },
- "isolatedSum": {
- "p50": 220.67199647426605,
- "p90": 230.84799945354462,
- "p95": 237.56800591945648,
- "p99": 259.7759962081909
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19496960,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 8,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 122.84799665212631,
- "p90": 128.28800082206726,
- "p95": 130.048006772995,
- "p99": 133.08799266815186
- },
- "combine": {
- "p50": 122.84799665212631,
- "p90": 128.28800082206726,
- "p95": 130.048006772995,
- "p99": 133.08799266815186
- },
- "roundtrip": {
- "p50": 122.84799665212631,
- "p90": 128.28800082206726,
- "p95": 130.048006772995,
- "p99": 133.08799266815186
- },
- "isolatedSum": {
- "p50": 245.69599330425262,
- "p90": 256.5760016441345,
- "p95": 260.09601354599,
- "p99": 266.1759853363037
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 8,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-6fee4962",
- "identity": "h100|flashinfer|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "h100_6c33dc8f",
- "comparisonKey": "ab2d6ab146526e25",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T17:55:14.883072+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_16",
- "sku": "h100",
- "backend": "flashinfer",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · flashinfer · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 132,
- "resourceClass": "fixed-kernel",
- "conformanceClass": "not-applicable",
- "fixedKernel": true,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": null,
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28297139240",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28297139240",
- "createdAt": "2026-06-27T17:55:14.883072+00:00",
- "sha": "cfa1ec56258b94b4a173844810a163a832bcb07e"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 88.92799913883209,
- "p90": 92.38400310277939,
- "p95": 93.66399794816971,
- "p99": 97.50399738550186
- },
- "combine": {
- "p50": 88.92799913883209,
- "p90": 92.38400310277939,
- "p95": 93.66399794816971,
- "p99": 97.50399738550186
- },
- "roundtrip": {
- "p50": 88.92799913883209,
- "p90": 92.38400310277939,
- "p95": 93.66399794816971,
- "p99": 97.50399738550186
- },
- "isolatedSum": {
- "p50": 177.85599827766418,
- "p90": 184.76800620555878,
- "p95": 187.32799589633942,
- "p99": 195.00799477100372
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 8,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 149.9519944190979,
- "p90": 152.51199901103973,
- "p95": 153.50399911403656,
- "p99": 157.56799280643463
- },
- "combine": {
- "p50": 149.9519944190979,
- "p90": 152.51199901103973,
- "p95": 153.50399911403656,
- "p99": 157.56799280643463
- },
- "roundtrip": {
- "p50": 149.9519944190979,
- "p90": 152.51199901103973,
- "p95": 153.50399911403656,
- "p99": 157.56799280643463
- },
- "isolatedSum": {
- "p50": 299.9039888381958,
- "p90": 305.02399802207947,
- "p95": 307.0079982280731,
- "p99": 315.13598561286926
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 155889664,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 8,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 264.70398902893066,
- "p90": 268.38400959968567,
- "p95": 269.9199914932251,
- "p99": 279.07198667526245
- },
- "combine": {
- "p50": 264.70398902893066,
- "p90": 268.38400959968567,
- "p95": 269.9199914932251,
- "p99": 279.07198667526245
- },
- "roundtrip": {
- "p50": 264.70398902893066,
- "p90": 268.38400959968567,
- "p95": 269.9199914932251,
- "p99": 279.07198667526245
- },
- "isolatedSum": {
- "p50": 529.4079780578613,
- "p90": 536.7680191993713,
- "p95": 539.8399829864502,
- "p99": 558.1439733505249
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 312266752,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 8,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 498.6239969730377,
- "p90": 502.4319887161255,
- "p95": 504.2240023612976,
- "p99": 506.9440007209778
- },
- "combine": {
- "p50": 498.6239969730377,
- "p90": 502.4319887161255,
- "p95": 504.2240023612976,
- "p99": 506.9440007209778
- },
- "roundtrip": {
- "p50": 498.6239969730377,
- "p90": 502.4319887161255,
- "p95": 504.2240023612976,
- "p99": 506.9440007209778
- },
- "isolatedSum": {
- "p50": 997.2479939460754,
- "p90": 1004.863977432251,
- "p95": 1008.4480047225952,
- "p99": 1013.8880014419556
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 623443968,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 8,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 953.6640048027039,
- "p90": 962.0800018310547,
- "p95": 964.2559885978699,
- "p99": 967.9039716720581
- },
- "combine": {
- "p50": 953.6640048027039,
- "p90": 962.0800018310547,
- "p95": 964.2559885978699,
- "p99": 967.9039716720581
- },
- "roundtrip": {
- "p50": 953.6640048027039,
- "p90": 962.0800018310547,
- "p95": 964.2559885978699,
- "p99": 967.9039716720581
- },
- "isolatedSum": {
- "p50": 1907.3280096054077,
- "p90": 1924.1600036621094,
- "p95": 1928.5119771957397,
- "p99": 1935.8079433441162
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1243805696,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 8,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1854.8799753189087,
- "p90": 1862.4320030212402,
- "p95": 1864.2560243606567,
- "p99": 1869.7919845581055
- },
- "combine": {
- "p50": 1854.8799753189087,
- "p90": 1862.4320030212402,
- "p95": 1864.2560243606567,
- "p99": 1869.7919845581055
- },
- "roundtrip": {
- "p50": 1854.8799753189087,
- "p90": 1862.4320030212402,
- "p95": 1864.2560243606567,
- "p99": 1869.7919845581055
- },
- "isolatedSum": {
- "p50": 3709.7599506378174,
- "p90": 3724.8640060424805,
- "p95": 3728.5120487213135,
- "p99": 3739.583969116211
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487009280,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 8,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-6d37a6fd",
- "identity": "h100|flashinfer|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "h100_865f08c6",
- "comparisonKey": "7ac85b4ec0b69909",
- "schemaVersion": 3,
- "generatedAt": "2026-06-28T01:37:55.644705+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_00",
- "sku": "h100",
- "backend": "flashinfer",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · flashinfer · fp8",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 132,
- "resourceClass": "fixed-kernel",
- "conformanceClass": "not-applicable",
- "fixedKernel": true,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": null,
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28307778986",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28307778986",
- "createdAt": "2026-06-28T01:37:55.644705+00:00",
- "sha": "510fc17001789ae4f32b99b60b9a0a0aa53ab6b5"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 101.15200281143188,
- "p90": 120.99199742078781,
- "p95": 122.17599898576736,
- "p99": 127.87200510501862
- },
- "combine": {
- "p50": 101.15200281143188,
- "p90": 120.99199742078781,
- "p95": 122.17599898576736,
- "p99": 127.87200510501862
- },
- "roundtrip": {
- "p50": 101.15200281143188,
- "p90": 120.99199742078781,
- "p95": 122.17599898576736,
- "p99": 127.87200510501862
- },
- "isolatedSum": {
- "p50": 202.30400562286377,
- "p90": 241.98399484157562,
- "p95": 244.35199797153473,
- "p99": 255.74401021003723
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 8,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 139.5840048789978,
- "p90": 168.60799491405487,
- "p95": 170.97599804401398,
- "p99": 179.1359931230545
- },
- "combine": {
- "p50": 139.5840048789978,
- "p90": 168.60799491405487,
- "p95": 170.97599804401398,
- "p99": 179.1359931230545
- },
- "roundtrip": {
- "p50": 139.5840048789978,
- "p90": 168.60799491405487,
- "p95": 170.97599804401398,
- "p99": 179.1359931230545
- },
- "isolatedSum": {
- "p50": 279.1680097579956,
- "p90": 337.21598982810974,
- "p95": 341.95199608802795,
- "p99": 358.271986246109
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77944832,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 8,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 221.66399657726288,
- "p90": 232.7360063791275,
- "p95": 234.46400463581085,
- "p99": 239.00799453258514
- },
- "combine": {
- "p50": 221.66399657726288,
- "p90": 232.7360063791275,
- "p95": 234.46400463581085,
- "p99": 239.00799453258514
- },
- "roundtrip": {
- "p50": 221.66399657726288,
- "p90": 232.7360063791275,
- "p95": 234.46400463581085,
- "p99": 239.00799453258514
- },
- "isolatedSum": {
- "p50": 443.32799315452576,
- "p90": 465.472012758255,
- "p95": 468.9280092716217,
- "p99": 478.0159890651703
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 156133376,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 8,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 406.43200278282166,
- "p90": 415.19999504089355,
- "p95": 416.9920086860657,
- "p99": 419.74401473999023
- },
- "combine": {
- "p50": 406.43200278282166,
- "p90": 415.19999504089355,
- "p95": 416.9920086860657,
- "p99": 419.74401473999023
- },
- "roundtrip": {
- "p50": 406.43200278282166,
- "p90": 415.19999504089355,
- "p95": 416.9920086860657,
- "p99": 419.74401473999023
- },
- "isolatedSum": {
- "p50": 812.8640055656433,
- "p90": 830.3999900817871,
- "p95": 833.9840173721313,
- "p99": 839.4880294799805
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 311721984,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 8,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 764.959990978241,
- "p90": 773.6319899559021,
- "p95": 775.5839824676514,
- "p99": 795.3600287437439
- },
- "combine": {
- "p50": 764.959990978241,
- "p90": 773.6319899559021,
- "p95": 775.5839824676514,
- "p99": 795.3600287437439
- },
- "roundtrip": {
- "p50": 764.959990978241,
- "p90": 773.6319899559021,
- "p95": 775.5839824676514,
- "p99": 795.3600287437439
- },
- "isolatedSum": {
- "p50": 1529.919981956482,
- "p90": 1547.2639799118042,
- "p95": 1551.1679649353027,
- "p99": 1590.7200574874878
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 621902848,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 8,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1479.904055595398,
- "p90": 1490.8479452133179,
- "p95": 1496.7039823532104,
- "p99": 1506.6879987716675
- },
- "combine": {
- "p50": 1479.904055595398,
- "p90": 1490.8479452133179,
- "p95": 1496.7039823532104,
- "p99": 1506.6879987716675
- },
- "roundtrip": {
- "p50": 1479.904055595398,
- "p90": 1490.8479452133179,
- "p95": 1496.7039823532104,
- "p99": 1506.6879987716675
- },
- "isolatedSum": {
- "p50": 2959.808111190796,
- "p90": 2981.6958904266357,
- "p95": 2993.407964706421,
- "p99": 3013.375997543335
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1243504640,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 8,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-00728192",
- "identity": "h100|flashinfer|7168|8|256|mxfp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "h100_1686fbdd",
- "comparisonKey": "f82129f37146e350",
- "schemaVersion": 3,
- "generatedAt": "2026-06-28T01:37:59.531491+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_18",
- "sku": "h100",
- "backend": "flashinfer",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · flashinfer · mxfp8",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "mxfp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 132,
- "resourceClass": "fixed-kernel",
- "conformanceClass": "not-applicable",
- "fixedKernel": true,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": null,
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28307780015",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28307780015",
- "createdAt": "2026-06-28T01:37:59.531491+00:00",
- "sha": "510fc17001789ae4f32b99b60b9a0a0aa53ab6b5"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 100.19200295209885,
- "p90": 127.6479959487915,
- "p95": 128.7039965391159,
- "p99": 135.68000495433807
- },
- "combine": {
- "p50": 100.19200295209885,
- "p90": 127.6479959487915,
- "p95": 128.7039965391159,
- "p99": 135.68000495433807
- },
- "roundtrip": {
- "p50": 100.19200295209885,
- "p90": 127.6479959487915,
- "p95": 128.7039965391159,
- "p99": 135.68000495433807
- },
- "isolatedSum": {
- "p50": 200.3840059041977,
- "p90": 255.295991897583,
- "p95": 257.4079930782318,
- "p99": 271.36000990867615
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 8,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 139.26400244235992,
- "p90": 165.12000560760498,
- "p95": 166.62399470806122,
- "p99": 172.41600155830383
- },
- "combine": {
- "p50": 139.26400244235992,
- "p90": 165.12000560760498,
- "p95": 166.62399470806122,
- "p99": 172.41600155830383
- },
- "roundtrip": {
- "p50": 139.26400244235992,
- "p90": 165.12000560760498,
- "p95": 166.62399470806122,
- "p99": 172.41600155830383
- },
- "isolatedSum": {
- "p50": 278.52800488471985,
- "p90": 330.24001121520996,
- "p95": 333.24798941612244,
- "p99": 344.83200311660767
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77944832,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 8,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 223.90399873256683,
- "p90": 230.3999960422516,
- "p95": 232.12799429893494,
- "p99": 235.29599606990814
- },
- "combine": {
- "p50": 223.90399873256683,
- "p90": 230.3999960422516,
- "p95": 232.12799429893494,
- "p99": 235.29599606990814
- },
- "roundtrip": {
- "p50": 223.90399873256683,
- "p90": 230.3999960422516,
- "p95": 232.12799429893494,
- "p99": 235.29599606990814
- },
- "isolatedSum": {
- "p50": 447.80799746513367,
- "p90": 460.7999920845032,
- "p95": 464.2559885978699,
- "p99": 470.5919921398163
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 156133376,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 8,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 407.26399421691895,
- "p90": 413.5040044784546,
- "p95": 415.3600037097931,
- "p99": 419.0399944782257
- },
- "combine": {
- "p50": 407.26399421691895,
- "p90": 413.5040044784546,
- "p95": 415.3600037097931,
- "p99": 419.0399944782257
- },
- "roundtrip": {
- "p50": 407.26399421691895,
- "p90": 413.5040044784546,
- "p95": 415.3600037097931,
- "p99": 419.0399944782257
- },
- "isolatedSum": {
- "p50": 814.5279884338379,
- "p90": 827.0080089569092,
- "p95": 830.7200074195862,
- "p99": 838.0799889564514
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 311721984,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 8,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 767.7760124206543,
- "p90": 772.8639841079712,
- "p95": 775.1359939575195,
- "p99": 777.8880000114441
- },
- "combine": {
- "p50": 767.7760124206543,
- "p90": 772.8639841079712,
- "p95": 775.1359939575195,
- "p99": 777.8880000114441
- },
- "roundtrip": {
- "p50": 767.7760124206543,
- "p90": 772.8639841079712,
- "p95": 775.1359939575195,
- "p99": 777.8880000114441
- },
- "isolatedSum": {
- "p50": 1535.5520248413086,
- "p90": 1545.7279682159424,
- "p95": 1550.271987915039,
- "p99": 1555.7760000228882
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 621902848,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 8,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1483.0399751663208,
- "p90": 1491.7759895324707,
- "p95": 1494.2400455474854,
- "p99": 1497.5039958953857
- },
- "combine": {
- "p50": 1483.0399751663208,
- "p90": 1491.7759895324707,
- "p95": 1494.2400455474854,
- "p99": 1497.5039958953857
- },
- "roundtrip": {
- "p50": 1483.0399751663208,
- "p90": 1491.7759895324707,
- "p95": 1494.2400455474854,
- "p99": 1497.5039958953857
- },
- "isolatedSum": {
- "p50": 2966.0799503326416,
- "p90": 2983.5519790649414,
- "p95": 2988.4800910949707,
- "p99": 2995.0079917907715
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1243504640,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 8,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-5657eb6e",
- "identity": "h100|uccl|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h100_7104d5f0",
- "comparisonKey": "d2fd76f5ec2f3d88",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T17:35:51.567423+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_14",
- "sku": "h100",
- "backend": "uccl",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · uccl · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": null,
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28296667411",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28296667411",
- "createdAt": "2026-06-27T17:35:51.567423+00:00",
- "sha": "cfa1ec56258b94b4a173844810a163a832bcb07e"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 127.68000364303589,
- "p90": 134.3040019273758,
- "p95": 136.60800457000732,
- "p99": 143.93599331378937
- },
- "combine": {
- "p50": 88.0960002541542,
- "p90": 90.36800265312195,
- "p95": 91.32800251245499,
- "p99": 270.30399441719055
- },
- "roundtrip": {
- "p50": 200.28799772262573,
- "p90": 205.56800067424774,
- "p95": 207.42399990558624,
- "p99": 212.79999613761902
- },
- "isolatedSum": {
- "p50": 215.7760038971901,
- "p90": 224.67200458049774,
- "p95": 227.9360070824623,
- "p99": 414.2399877309799
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 179.51999604701996,
- "p90": 186.17600202560425,
- "p95": 189.2160028219223,
- "p99": 194.91200149059296
- },
- "combine": {
- "p50": 99.20000284910202,
- "p90": 105.82400113344193,
- "p95": 107.19999670982361,
- "p99": 191.64800643920898
- },
- "roundtrip": {
- "p50": 254.84800338745117,
- "p90": 262.7840042114258,
- "p95": 265.4719948768616,
- "p99": 418.8799858093262
- },
- "isolatedSum": {
- "p50": 278.719998896122,
- "p90": 292.0000031590462,
- "p95": 296.4159995317459,
- "p99": 386.56000792980194
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 130.87999820709229,
- "p90": 193.4400051832199,
- "p95": 196.57599925994873,
- "p99": 200.95999538898468
- },
- "combine": {
- "p50": 89.72799777984619,
- "p90": 107.26399719715118,
- "p95": 108.06400328874588,
- "p99": 112.31999844312668
- },
- "roundtrip": {
- "p50": 204.67199385166168,
- "p90": 272.352010011673,
- "p95": 274.78399872779846,
- "p99": 282.30398893356323
- },
- "isolatedSum": {
- "p50": 220.60799598693848,
- "p90": 300.7040023803711,
- "p95": 304.6400025486946,
- "p99": 313.27999383211136
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 133.05599987506866,
- "p90": 187.23200261592865,
- "p95": 195.93599438667297,
- "p99": 479.0079891681671
- },
- "combine": {
- "p50": 89.75999802350998,
- "p90": 104.73600029945374,
- "p95": 105.92000186443329,
- "p99": 108.0000028014183
- },
- "roundtrip": {
- "p50": 205.63200116157532,
- "p90": 260.44800877571106,
- "p95": 262.36799359321594,
- "p99": 269.79199051856995
- },
- "isolatedSum": {
- "p50": 222.81599789857864,
- "p90": 291.9680029153824,
- "p95": 301.85599625110626,
- "p99": 587.0079919695854
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 129.60000336170197,
- "p90": 195.16800343990326,
- "p95": 197.60000705718994,
- "p99": 203.2960057258606
- },
- "combine": {
- "p50": 90.52799642086029,
- "p90": 107.29599744081497,
- "p95": 108.15999656915665,
- "p99": 114.30399864912033
- },
- "roundtrip": {
- "p50": 206.59199357032776,
- "p90": 274.6559977531433,
- "p95": 275.9360074996948,
- "p99": 280.7680070400238
- },
- "isolatedSum": {
- "p50": 220.12799978256226,
- "p90": 302.46400088071823,
- "p95": 305.7600036263466,
- "p99": 317.6000043749809
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 181.7920058965683,
- "p90": 195.68000733852386,
- "p95": 198.81600141525269,
- "p99": 324.47999715805054
- },
- "combine": {
- "p50": 108.12799632549286,
- "p90": 115.39199948310852,
- "p95": 116.19199812412262,
- "p99": 118.97599697113037
- },
- "roundtrip": {
- "p50": 263.7439966201782,
- "p90": 279.83999252319336,
- "p95": 281.43998980522156,
- "p99": 286.20800375938416
- },
- "isolatedSum": {
- "p50": 289.92000222206116,
- "p90": 311.0720068216324,
- "p95": 315.0079995393753,
- "p99": 443.4559941291809
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 147.71200716495514,
- "p90": 197.11999595165253,
- "p95": 200.3840059041977,
- "p99": 211.67999505996704
- },
- "combine": {
- "p50": 105.95200210809708,
- "p90": 124.32000041007996,
- "p95": 125.2799928188324,
- "p99": 129.98400628566742
- },
- "roundtrip": {
- "p50": 221.0880070924759,
- "p90": 289.40799832344055,
- "p95": 292.28800535202026,
- "p99": 295.77600955963135
- },
- "isolatedSum": {
- "p50": 253.66400927305222,
- "p90": 321.4399963617325,
- "p95": 325.6639987230301,
- "p99": 341.66400134563446
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 186.24000251293182,
- "p90": 199.74400103092194,
- "p95": 202.55999267101288,
- "p99": 208.03199708461761
- },
- "combine": {
- "p50": 134.20799374580383,
- "p90": 139.96799290180206,
- "p95": 141.15199446678162,
- "p99": 147.2640037536621
- },
- "roundtrip": {
- "p50": 292.32001304626465,
- "p90": 306.62399530410767,
- "p95": 309.63200330734253,
- "p99": 314.5279884338379
- },
- "isolatedSum": {
- "p50": 320.44799625873566,
- "p90": 339.711993932724,
- "p95": 343.7119871377945,
- "p99": 355.2960008382797
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-8af55e63",
- "identity": "h100|uccl|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "h100_7104d5f0",
- "comparisonKey": "4f16a23c02cdc2c5",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T17:35:56.194527+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h100-dgxc-slurm_07",
- "sku": "h100",
- "backend": "uccl",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H100 EP8 · uccl · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": null,
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28296667411",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28296667411",
- "createdAt": "2026-06-27T17:35:56.194527+00:00",
- "sha": "cfa1ec56258b94b4a173844810a163a832bcb07e"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 161.1199975013733,
- "p90": 165.24800658226013,
- "p95": 166.9120043516159,
- "p99": 170.84799706935883
- },
- "combine": {
- "p50": 120.99199742078781,
- "p90": 123.10399860143661,
- "p95": 128.03199887275696,
- "p99": 143.99999380111694
- },
- "roundtrip": {
- "p50": 242.01600253582,
- "p90": 246.7840015888214,
- "p95": 248.86399507522583,
- "p99": 252.70399451255798
- },
- "isolatedSum": {
- "p50": 282.1119949221611,
- "p90": 288.35200518369675,
- "p95": 294.94400322437286,
- "p99": 314.84799087047577
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 186.5919977426529,
- "p90": 191.42399728298187,
- "p95": 193.15199553966522,
- "p99": 197.31199741363525
- },
- "combine": {
- "p50": 164.67200219631195,
- "p90": 170.04799842834473,
- "p95": 171.23199999332428,
- "p99": 175.04000663757324
- },
- "roundtrip": {
- "p50": 305.08801341056824,
- "p90": 309.56798791885376,
- "p95": 310.9759986400604,
- "p99": 315.42399525642395
- },
- "isolatedSum": {
- "p50": 351.26399993896484,
- "p90": 361.4719957113266,
- "p95": 364.3839955329895,
- "p99": 372.3520040512085
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 155889664,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 239.55200612545013,
- "p90": 243.68000030517578,
- "p95": 245.31200528144836,
- "p99": 250.62400102615356
- },
- "combine": {
- "p50": 242.78399348258972,
- "p90": 246.848002076149,
- "p95": 248.60799312591553,
- "p99": 251.8720030784607
- },
- "roundtrip": {
- "p50": 442.4000084400177,
- "p90": 448.35200905799866,
- "p95": 450.20800828933716,
- "p99": 453.92000675201416
- },
- "isolatedSum": {
- "p50": 482.33599960803986,
- "p90": 490.52800238132477,
- "p95": 493.9199984073639,
- "p99": 502.49600410461426
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 312266752,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 346.8799889087677,
- "p90": 351.4240086078644,
- "p95": 353.4719944000244,
- "p99": 358.0799996852875
- },
- "combine": {
- "p50": 376.6399919986725,
- "p90": 383.4240138530731,
- "p95": 385.79198718070984,
- "p99": 474.2400050163269
- },
- "roundtrip": {
- "p50": 684.0000152587891,
- "p90": 691.3920044898987,
- "p95": 693.8560009002686,
- "p99": 700.4479765892029
- },
- "isolatedSum": {
- "p50": 723.5199809074402,
- "p90": 734.8480224609375,
- "p95": 739.2639815807343,
- "p99": 832.3200047016144
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 623443968,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 571.1039900779724,
- "p90": 600.8960008621216,
- "p95": 606.1760187149048,
- "p99": 621.1839914321899
- },
- "combine": {
- "p50": 647.5840210914612,
- "p90": 655.7440161705017,
- "p95": 657.9520106315613,
- "p99": 664.9919748306274
- },
- "roundtrip": {
- "p50": 1174.720048904419,
- "p90": 1189.0239715576172,
- "p95": 1194.3999528884888,
- "p99": 1201.1200189590454
- },
- "isolatedSum": {
- "p50": 1218.6880111694336,
- "p90": 1256.6400170326233,
- "p95": 1264.128029346466,
- "p99": 1286.1759662628174
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1243805696,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1035.5839729309082,
- "p90": 1058.0799579620361,
- "p95": 1064.9919509887695,
- "p99": 1074.463963508606
- },
- "combine": {
- "p50": 1176.1280298233032,
- "p90": 1185.5679750442505,
- "p95": 1188.6399984359741,
- "p99": 1197.376012802124
- },
- "roundtrip": {
- "p50": 2155.263900756836,
- "p90": 2171.488046646118,
- "p95": 2174.815893173218,
- "p99": 2184.2238903045654
- },
- "isolatedSum": {
- "p50": 2211.7120027542114,
- "p90": 2243.6479330062866,
- "p95": 2253.6319494247437,
- "p99": 2271.83997631073
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487009280,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-7d1c49e4",
- "identity": "h200|deepep|4096|8|128|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452",
- "colorKey": "h200_d982b749",
- "comparisonKey": "d546c8db19c82066",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T11:14:25.842054+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_5",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16",
- "model": "Qwen3.5",
- "shape": {
- "hidden": 4096,
- "topk": 8,
- "experts": 128,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "75530960a30b452",
- "workloadId": "set:8:d1b92539bddfb570",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28287506806",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287506806",
- "createdAt": "2026-06-27T11:14:25.842054+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 68.64000111818314,
- "p90": 105.82400113344193,
- "p95": 117.15199798345566,
- "p99": 156.89599514007568
- },
- "combine": {
- "p50": 59.87200140953064,
- "p90": 75.13599842786789,
- "p95": 80.83199709653854,
- "p99": 98.75199943780899
- },
- "roundtrip": {
- "p50": 113.79200220108032,
- "p90": 150.01599490642548,
- "p95": 160.73599457740784,
- "p99": 198.7520009279251
- },
- "isolatedSum": {
- "p50": 128.51200252771378,
- "p90": 180.95999956130981,
- "p95": 197.9839950799942,
- "p99": 255.64799457788467
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 344064,
- "combineLogicalBytes": 344064,
- "fanoutMean": 5.25,
- "recvTokensMax": 6,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 69.37599927186966,
- "p90": 103.67999970912933,
- "p95": 114.3679991364479,
- "p99": 147.96799421310425
- },
- "combine": {
- "p50": 59.67999994754791,
- "p90": 74.40000027418137,
- "p95": 81.66400343179703,
- "p99": 100.80000013113022
- },
- "roundtrip": {
- "p50": 113.82400244474411,
- "p90": 150.56000649929047,
- "p95": 163.16799819469452,
- "p99": 199.74400103092194
- },
- "isolatedSum": {
- "p50": 129.05599921941757,
- "p90": 178.0799999833107,
- "p95": 196.03200256824493,
- "p99": 248.76799434423447
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 704512,
- "combineLogicalBytes": 704512,
- "fanoutMean": 5.375,
- "recvTokensMax": 12,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 67.10399687290192,
- "p90": 91.45600348711014,
- "p95": 103.90400141477585,
- "p99": 139.615997672081
- },
- "combine": {
- "p50": 59.39200147986412,
- "p90": 71.87200337648392,
- "p95": 76.09599828720093,
- "p99": 94.52799707651138
- },
- "roundtrip": {
- "p50": 110.81600189208984,
- "p90": 141.59999787807465,
- "p95": 150.39999783039093,
- "p99": 204.12799715995789
- },
- "isolatedSum": {
- "p50": 126.49599835276604,
- "p90": 163.32800686359406,
- "p95": 179.99999970197678,
- "p99": 234.14399474859238
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1384448,
- "combineLogicalBytes": 1384448,
- "fanoutMean": 5.28125,
- "recvTokensMax": 26,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 72.12799787521362,
- "p90": 104.22399640083313,
- "p95": 114.46399986743927,
- "p99": 165.72800278663635
- },
- "combine": {
- "p50": 60.47999858856201,
- "p90": 74.20799881219864,
- "p95": 82.30400085449219,
- "p99": 100.09600222110748
- },
- "roundtrip": {
- "p50": 112.5119999051094,
- "p90": 143.71199905872345,
- "p95": 156.25600516796112,
- "p99": 205.53599298000336
- },
- "isolatedSum": {
- "p50": 132.60799646377563,
- "p90": 178.43199521303177,
- "p95": 196.76800072193146,
- "p99": 265.82400500774384
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2744320,
- "combineLogicalBytes": 2744320,
- "fanoutMean": 5.234375,
- "recvTokensMax": 49,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 73.56800138950348,
- "p90": 99.35999661684036,
- "p95": 106.33599758148193,
- "p99": 118.81600320339203
- },
- "combine": {
- "p50": 60.736000537872314,
- "p90": 74.94399696588516,
- "p95": 80.79999685287476,
- "p99": 96.63999825716019
- },
- "roundtrip": {
- "p50": 116.67200177907944,
- "p90": 153.9199948310852,
- "p95": 182.3360025882721,
- "p99": 242.97599494457245
- },
- "isolatedSum": {
- "p50": 134.3040019273758,
- "p90": 174.30399358272552,
- "p95": 187.1359944343567,
- "p99": 215.45600146055222
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 5464064,
- "combineLogicalBytes": 5464064,
- "fanoutMean": 5.2109375,
- "recvTokensMax": 94,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 74.72000271081924,
- "p90": 101.98400169610977,
- "p95": 109.79200154542923,
- "p99": 140.28799533843994
- },
- "combine": {
- "p50": 62.68800050020218,
- "p90": 80.38400113582611,
- "p95": 86.91199868917465,
- "p99": 119.71200257539749
- },
- "roundtrip": {
- "p50": 116.83200299739838,
- "p90": 152.19199657440186,
- "p95": 162.56000101566315,
- "p99": 194.75199282169342
- },
- "isolatedSum": {
- "p50": 137.40800321102142,
- "p90": 182.36800283193588,
- "p95": 196.70400023460388,
- "p99": 259.99999791383743
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 11124736,
- "combineLogicalBytes": 11124736,
- "fanoutMean": 5.3046875,
- "recvTokensMax": 186,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 83.48800241947174,
- "p90": 127.96799838542938,
- "p95": 135.3919953107834,
- "p99": 240.1919960975647
- },
- "combine": {
- "p50": 70.3359991312027,
- "p90": 83.96799862384796,
- "p95": 89.9519994854927,
- "p99": 99.61599856615067
- },
- "roundtrip": {
- "p50": 128.4160017967224,
- "p90": 151.74399316310883,
- "p95": 159.42400693893433,
- "p99": 176.12800002098083
- },
- "isolatedSum": {
- "p50": 153.82400155067444,
- "p90": 211.93599700927734,
- "p95": 225.3439947962761,
- "p99": 339.80799466371536
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 22192128,
- "combineLogicalBytes": 22192128,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 358,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 90.46400338411331,
- "p90": 113.47199976444244,
- "p95": 120.19199877977371,
- "p99": 153.6960005760193
- },
- "combine": {
- "p50": 84.06399935483932,
- "p90": 97.6639986038208,
- "p95": 102.30399668216705,
- "p99": 120.31999975442886
- },
- "roundtrip": {
- "p50": 152.6080071926117,
- "p90": 178.24000120162964,
- "p95": 190.72000682353973,
- "p99": 231.99999332427979
- },
- "isolatedSum": {
- "p50": 174.52800273895264,
- "p90": 211.13599836826324,
- "p95": 222.49599546194077,
- "p99": 274.01600033044815
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 44564480,
- "combineLogicalBytes": 44564480,
- "fanoutMean": 5.3125,
- "recvTokensMax": 699,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-ab8f0534",
- "identity": "h200|deepep|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452",
- "colorKey": "h200_3a47b6c9",
- "comparisonKey": "40ee6d196d286895",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:53:38.574880+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_6",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16",
- "model": "Qwen3.5",
- "shape": {
- "hidden": 4096,
- "topk": 8,
- "experts": 128,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "75530960a30b452",
- "workloadId": "set:8:d1b92539bddfb570",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271743900",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271743900",
- "createdAt": "2026-06-26T23:53:38.574880+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 68.64000111818314,
- "p90": 89.56799656152725,
- "p95": 96.41599655151367,
- "p99": 126.36800110340118
- },
- "combine": {
- "p50": 58.04799869656563,
- "p90": 69.60000097751617,
- "p95": 74.52800124883652,
- "p99": 91.80799871683121
- },
- "roundtrip": {
- "p50": 112.73600161075592,
- "p90": 135.93600690364838,
- "p95": 145.7280069589615,
- "p99": 215.26400744915009
- },
- "isolatedSum": {
- "p50": 126.68799981474876,
- "p90": 159.16799753904343,
- "p95": 170.9439978003502,
- "p99": 218.1759998202324
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 344064,
- "combineLogicalBytes": 344064,
- "fanoutMean": 5.25,
- "recvTokensMax": 6,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 69.34399902820587,
- "p90": 88.22400122880936,
- "p95": 94.68799829483032,
- "p99": 116.15999788045883
- },
- "combine": {
- "p50": 58.94400179386139,
- "p90": 68.70400160551071,
- "p95": 72.03199714422226,
- "p99": 83.52000266313553
- },
- "roundtrip": {
- "p50": 112.89600282907486,
- "p90": 138.3039951324463,
- "p95": 150.52799880504608,
- "p99": 196.51199877262115
- },
- "isolatedSum": {
- "p50": 128.28800082206726,
- "p90": 156.92800283432007,
- "p95": 166.71999543905258,
- "p99": 199.68000054359436
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 704512,
- "combineLogicalBytes": 704512,
- "fanoutMean": 5.375,
- "recvTokensMax": 12,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 70.46400010585785,
- "p90": 84.63999629020691,
- "p95": 92.0960009098053,
- "p99": 110.78400164842606
- },
- "combine": {
- "p50": 60.28800085186958,
- "p90": 70.91200351715088,
- "p95": 75.16799867153168,
- "p99": 87.5839963555336
- },
- "roundtrip": {
- "p50": 114.20799791812897,
- "p90": 135.68000495433807,
- "p95": 147.64800667762756,
- "p99": 195.5520063638687
- },
- "isolatedSum": {
- "p50": 130.75200095772743,
- "p90": 155.5519998073578,
- "p95": 167.26399958133698,
- "p99": 198.36799800395966
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1384448,
- "combineLogicalBytes": 1384448,
- "fanoutMean": 5.28125,
- "recvTokensMax": 26,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 72.38399982452393,
- "p90": 103.71199995279312,
- "p95": 118.72000247240067,
- "p99": 215.61600267887115
- },
- "combine": {
- "p50": 61.055999249219894,
- "p90": 76.03199779987335,
- "p95": 81.7599967122078,
- "p99": 112.57600039243698
- },
- "roundtrip": {
- "p50": 115.84000289440155,
- "p90": 143.51999759674072,
- "p95": 151.67999267578125,
- "p99": 190.46400487422943
- },
- "isolatedSum": {
- "p50": 133.43999907374382,
- "p90": 179.74399775266647,
- "p95": 200.47999918460846,
- "p99": 328.19200307130814
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2744320,
- "combineLogicalBytes": 2744320,
- "fanoutMean": 5.234375,
- "recvTokensMax": 49,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 75.23199915885925,
- "p90": 102.04800218343735,
- "p95": 107.87200182676315,
- "p99": 130.20800054073334
- },
- "combine": {
- "p50": 61.792001128196716,
- "p90": 71.16799801588058,
- "p95": 76.64000242948532,
- "p99": 86.84799820184708
- },
- "roundtrip": {
- "p50": 116.92799627780914,
- "p90": 138.2399946451187,
- "p95": 147.96799421310425,
- "p99": 179.967999458313
- },
- "isolatedSum": {
- "p50": 137.02400028705597,
- "p90": 173.21600019931793,
- "p95": 184.51200425624847,
- "p99": 217.0559987425804
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 5464064,
- "combineLogicalBytes": 5464064,
- "fanoutMean": 5.2109375,
- "recvTokensMax": 94,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 74.75200295448303,
- "p90": 90.52799642086029,
- "p95": 98.75199943780899,
- "p99": 135.48800349235535
- },
- "combine": {
- "p50": 63.74400109052658,
- "p90": 71.71200215816498,
- "p95": 78.78399640321732,
- "p99": 91.07200056314468
- },
- "roundtrip": {
- "p50": 119.9679970741272,
- "p90": 145.47200500965118,
- "p95": 149.50400590896606,
- "p99": 165.8879965543747
- },
- "isolatedSum": {
- "p50": 138.4960040450096,
- "p90": 162.23999857902527,
- "p95": 177.5359958410263,
- "p99": 226.56000405550003
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 11124736,
- "combineLogicalBytes": 11124736,
- "fanoutMean": 5.3046875,
- "recvTokensMax": 186,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 84.60800349712372,
- "p90": 103.13600301742554,
- "p95": 112.22399771213531,
- "p99": 138.11199367046356
- },
- "combine": {
- "p50": 72.03199714422226,
- "p90": 82.78399705886841,
- "p95": 89.56799656152725,
- "p99": 104.92800176143646
- },
- "roundtrip": {
- "p50": 131.48799538612366,
- "p90": 145.50399780273438,
- "p95": 155.8080017566681,
- "p99": 189.66400623321533
- },
- "isolatedSum": {
- "p50": 156.64000064134598,
- "p90": 185.92000007629395,
- "p95": 201.79199427366257,
- "p99": 243.03999543190002
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 22192128,
- "combineLogicalBytes": 22192128,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 358,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 94.2080020904541,
- "p90": 120.2239990234375,
- "p95": 133.82400572299957,
- "p99": 215.68000316619873
- },
- "combine": {
- "p50": 82.8159973025322,
- "p90": 92.70399808883667,
- "p95": 96.12800180912018,
- "p99": 107.04000294208527
- },
- "roundtrip": {
- "p50": 152.22400426864624,
- "p90": 168.32000017166138,
- "p95": 176.2239933013916,
- "p99": 196.03200256824493
- },
- "isolatedSum": {
- "p50": 177.0239993929863,
- "p90": 212.92799711227417,
- "p95": 229.95200753211975,
- "p99": 322.720006108284
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 44564480,
- "combineLogicalBytes": 44564480,
- "fanoutMean": 5.3125,
- "recvTokensMax": 699,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-3d690e39",
- "identity": "h200|deepep|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef",
- "colorKey": "h200_3a47b6c9",
- "comparisonKey": "540c08b08c068f8c",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:54:06.885074+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_4",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16",
- "model": "shape 5120/8/160",
- "shape": {
- "hidden": 5120,
- "topk": 8,
- "experts": 160,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "be1b44a963bd4ef",
- "workloadId": "set:8:34e5874082f8ea8f",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271759919",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271759919",
- "createdAt": "2026-06-26T23:54:06.885074+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 70.49600034952164,
- "p90": 102.1760031580925,
- "p95": 111.90400272607803,
- "p99": 133.34399461746216
- },
- "combine": {
- "p50": 60.5119988322258,
- "p90": 72.9919970035553,
- "p95": 79.55200225114822,
- "p99": 90.55999666452408
- },
- "roundtrip": {
- "p50": 113.8560026884079,
- "p90": 143.5839980840683,
- "p95": 150.94399452209473,
- "p99": 190.14400243759155
- },
- "isolatedSum": {
- "p50": 131.00799918174744,
- "p90": 175.1680001616478,
- "p95": 191.45600497722626,
- "p99": 223.90399128198624
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 430080,
- "combineLogicalBytes": 430080,
- "fanoutMean": 5.25,
- "recvTokensMax": 8,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 73.88799637556076,
- "p90": 109.43999886512756,
- "p95": 123.74400347471237,
- "p99": 176.2239933013916
- },
- "combine": {
- "p50": 62.463998794555664,
- "p90": 76.4480009675026,
- "p95": 81.37600123882294,
- "p99": 89.6959975361824
- },
- "roundtrip": {
- "p50": 118.40000003576279,
- "p90": 146.7839926481247,
- "p95": 154.88000214099884,
- "p99": 198.0160027742386
- },
- "isolatedSum": {
- "p50": 136.35199517011642,
- "p90": 185.88799983263016,
- "p95": 205.1200047135353,
- "p99": 265.919990837574
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 880640,
- "combineLogicalBytes": 880640,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 72.12799787521362,
- "p90": 100.8640006184578,
- "p95": 107.84000158309937,
- "p99": 182.5920045375824
- },
- "combine": {
- "p50": 62.24000081419945,
- "p90": 77.504001557827,
- "p95": 82.36800134181976,
- "p99": 100.22400319576263
- },
- "roundtrip": {
- "p50": 116.64000153541565,
- "p90": 148.3840048313141,
- "p95": 158.49600732326508,
- "p99": 193.34399700164795
- },
- "isolatedSum": {
- "p50": 134.36799868941307,
- "p90": 178.3680021762848,
- "p95": 190.20800292491913,
- "p99": 282.81600773334503
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1740800,
- "combineLogicalBytes": 1740800,
- "fanoutMean": 5.3125,
- "recvTokensMax": 25,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 72.60800153017044,
- "p90": 101.1200025677681,
- "p95": 114.1119971871376,
- "p99": 128.06400656700134
- },
- "combine": {
- "p50": 63.74400109052658,
- "p90": 79.26400005817413,
- "p95": 85.50400286912918,
- "p99": 120.03199756145477
- },
- "roundtrip": {
- "p50": 117.53600090742111,
- "p90": 147.74399995803833,
- "p95": 156.8319946527481,
- "p99": 184.54399704933167
- },
- "isolatedSum": {
- "p50": 136.35200262069702,
- "p90": 180.38400262594223,
- "p95": 199.61600005626678,
- "p99": 248.09600412845612
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3471360,
- "combineLogicalBytes": 3471360,
- "fanoutMean": 5.296875,
- "recvTokensMax": 50,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 75.9039968252182,
- "p90": 101.79200023412704,
- "p95": 111.77600175142288,
- "p99": 127.9039978981018
- },
- "combine": {
- "p50": 64.41599875688553,
- "p90": 79.68000322580338,
- "p95": 84.06399935483932,
- "p99": 103.61599922180176
- },
- "roundtrip": {
- "p50": 124.09599870443344,
- "p90": 154.91199493408203,
- "p95": 167.35999286174774,
- "p99": 218.6560034751892
- },
- "isolatedSum": {
- "p50": 140.31999558210373,
- "p90": 181.47200345993042,
- "p95": 195.8400011062622,
- "p99": 231.51999711990356
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 6912000,
- "combineLogicalBytes": 6912000,
- "fanoutMean": 5.2734375,
- "recvTokensMax": 93,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 77.63200253248215,
- "p90": 102.08000242710114,
- "p95": 110.1439967751503,
- "p99": 138.5280042886734
- },
- "combine": {
- "p50": 68.4799998998642,
- "p90": 83.45600217580795,
- "p95": 89.50400352478027,
- "p99": 97.82399982213974
- },
- "roundtrip": {
- "p50": 122.81599640846252,
- "p90": 153.50399911403656,
- "p95": 163.13600540161133,
- "p99": 190.5599981546402
- },
- "isolatedSum": {
- "p50": 146.11200243234634,
- "p90": 185.5360046029091,
- "p95": 199.64800029993057,
- "p99": 236.35200411081314
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 13977600,
- "combineLogicalBytes": 13977600,
- "fanoutMean": 5.33203125,
- "recvTokensMax": 179,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 91.90399944782257,
- "p90": 113.08799684047699,
- "p95": 123.52000176906586,
- "p99": 162.9759967327118
- },
- "combine": {
- "p50": 77.15199887752533,
- "p90": 91.13600105047226,
- "p95": 97.59999811649323,
- "p99": 112.06399649381638
- },
- "roundtrip": {
- "p50": 140.47999680042267,
- "p90": 166.75199568271637,
- "p95": 175.9359985589981,
- "p99": 250.20799040794373
- },
- "isolatedSum": {
- "p50": 169.0559983253479,
- "p90": 204.22399789094925,
- "p95": 221.11999988555908,
- "p99": 275.03999322652817
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 27975680,
- "combineLogicalBytes": 27975680,
- "fanoutMean": 5.3359375,
- "recvTokensMax": 355,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 99.07200187444687,
- "p90": 122.27199971675873,
- "p95": 127.42400169372559,
- "p99": 146.7519998550415
- },
- "combine": {
- "p50": 90.87999910116196,
- "p90": 105.3759977221489,
- "p95": 109.37599837779999,
- "p99": 125.37600100040436
- },
- "roundtrip": {
- "p50": 166.4319932460785,
- "p90": 186.5919977426529,
- "p95": 193.12000274658203,
- "p99": 222.01600670814514
- },
- "isolatedSum": {
- "p50": 189.95200097560883,
- "p90": 227.64799743890762,
- "p95": 236.80000007152557,
- "p99": 272.12800085544586
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 55674880,
- "combineLogicalBytes": 55674880,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 699,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-59f585e0",
- "identity": "h200|deepep|6144|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h200_d982b749",
- "comparisonKey": "6df8e885c58ea75d",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T11:13:46.508858+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_12",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16",
- "model": "MiniMax-M3",
- "shape": {
- "hidden": 6144,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:2e0df6a62cd0143e",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28287495061",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287495061",
- "createdAt": "2026-06-27T11:13:46.508858+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 71.96799665689468,
- "p90": 97.9200005531311,
- "p95": 106.1440035700798,
- "p99": 119.10399794578552
- },
- "combine": {
- "p50": 65.95200300216675,
- "p90": 76.12799853086472,
- "p95": 81.56800270080566,
- "p99": 110.07999628782272
- },
- "roundtrip": {
- "p50": 118.6240017414093,
- "p90": 145.50399780273438,
- "p95": 153.9520025253296,
- "p99": 180.63999712467194
- },
- "isolatedSum": {
- "p50": 137.91999965906143,
- "p90": 174.04799908399582,
- "p95": 187.71200627088547,
- "p99": 229.18399423360825
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 540672,
- "combineLogicalBytes": 540672,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 72.95999675989151,
- "p90": 100.09600222110748,
- "p95": 107.84000158309937,
- "p99": 141.76000654697418
- },
- "combine": {
- "p50": 66.17599725723267,
- "p90": 77.37600058317184,
- "p95": 85.02399921417236,
- "p99": 103.13600301742554
- },
- "roundtrip": {
- "p50": 120.60800194740295,
- "p90": 148.41599762439728,
- "p95": 158.1439971923828,
- "p99": 177.5359958410263
- },
- "isolatedSum": {
- "p50": 139.13599401712418,
- "p90": 177.47200280427933,
- "p95": 192.86400079727173,
- "p99": 244.89600956439972
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1056768,
- "combineLogicalBytes": 1056768,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 73.60000163316727,
- "p90": 98.49599748849869,
- "p95": 112.76800185441971,
- "p99": 134.8479986190796
- },
- "combine": {
- "p50": 67.00800359249115,
- "p90": 76.54400169849396,
- "p95": 85.50400286912918,
- "p99": 110.17599701881409
- },
- "roundtrip": {
- "p50": 118.8800036907196,
- "p90": 144.57599818706512,
- "p95": 156.44800662994385,
- "p99": 188.83199989795685
- },
- "isolatedSum": {
- "p50": 140.60800522565842,
- "p90": 175.03999918699265,
- "p95": 198.2720047235489,
- "p99": 245.02399563789368
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2125824,
- "combineLogicalBytes": 2125824,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 73.18399846553802,
- "p90": 94.24000233411789,
- "p95": 103.00800204277039,
- "p99": 124.57600235939026
- },
- "combine": {
- "p50": 67.1359971165657,
- "p90": 76.67200267314911,
- "p95": 87.42400258779526,
- "p99": 107.26399719715118
- },
- "roundtrip": {
- "p50": 122.3360002040863,
- "p90": 178.39999496936798,
- "p95": 188.54400515556335,
- "p99": 224.31999444961548
- },
- "isolatedSum": {
- "p50": 140.31999558210373,
- "p90": 170.912005007267,
- "p95": 190.43200463056564,
- "p99": 231.83999955654144
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4263936,
- "combineLogicalBytes": 4263936,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 74.27199929952621,
- "p90": 96.76799923181534,
- "p95": 109.18399691581726,
- "p99": 126.97599828243256
- },
- "combine": {
- "p50": 68.1919977068901,
- "p90": 80.51200211048126,
- "p95": 88.51200342178345,
- "p99": 103.84000092744827
- },
- "roundtrip": {
- "p50": 122.75200337171555,
- "p90": 148.70400726795197,
- "p95": 161.3440066576004,
- "p99": 200.6080001592636
- },
- "isolatedSum": {
- "p50": 142.46399700641632,
- "p90": 177.2800013422966,
- "p95": 197.6960003376007,
- "p99": 230.81599920988083
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 8503296,
- "combineLogicalBytes": 8503296,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 85.79199761152267,
- "p90": 118.30399930477142,
- "p95": 128.28800082206726,
- "p99": 147.87200093269348
- },
- "combine": {
- "p50": 74.40000027418137,
- "p90": 87.55200356245041,
- "p95": 92.0960009098053,
- "p99": 110.07999628782272
- },
- "roundtrip": {
- "p50": 130.72000443935394,
- "p90": 155.20000457763672,
- "p95": 167.4560010433197,
- "p99": 208.48000049591064
- },
- "isolatedSum": {
- "p50": 160.19199788570404,
- "p90": 205.85600286722183,
- "p95": 220.38400173187256,
- "p99": 257.9519972205162
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 16908288,
- "combineLogicalBytes": 16908288,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 91.07200056314468,
- "p90": 110.81600189208984,
- "p95": 117.53600090742111,
- "p99": 137.1839940547943
- },
- "combine": {
- "p50": 83.26400071382523,
- "p90": 94.36800330877304,
- "p95": 99.71199929714203,
- "p99": 128.00000607967377
- },
- "roundtrip": {
- "p50": 149.9200016260147,
- "p90": 169.5680022239685,
- "p95": 179.29600179195404,
- "p99": 200.41599869728088
- },
- "isolatedSum": {
- "p50": 174.3360012769699,
- "p90": 205.18400520086288,
- "p95": 217.24800020456314,
- "p99": 265.1840001344681
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 33423360,
- "combineLogicalBytes": 33423360,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 110.55999994277954,
- "p90": 124.09599870443344,
- "p95": 132.192000746727,
- "p99": 179.00800704956055
- },
- "combine": {
- "p50": 96.41599655151367,
- "p90": 108.51199924945831,
- "p95": 115.84000289440155,
- "p99": 140.79999923706055
- },
- "roundtrip": {
- "p50": 180.38399517536163,
- "p90": 201.02399587631226,
- "p95": 209.75999534130096,
- "p99": 226.6560047864914
- },
- "isolatedSum": {
- "p50": 206.9759964942932,
- "p90": 232.60799795389175,
- "p95": 248.03200364112854,
- "p99": 319.8080062866211
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 66576384,
- "combineLogicalBytes": 66576384,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-e3311b84",
- "identity": "h200|deepep|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h200_3a47b6c9",
- "comparisonKey": "fc31c0a33afa32cc",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:54:56.726240+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_7",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16",
- "model": "MiniMax-M3",
- "shape": {
- "hidden": 6144,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:2e0df6a62cd0143e",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271775418",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271775418",
- "createdAt": "2026-06-26T23:54:56.726240+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 73.05599749088287,
- "p90": 102.7199998497963,
- "p95": 111.35999858379364,
- "p99": 123.00799787044525
- },
- "combine": {
- "p50": 65.92000275850296,
- "p90": 79.77599650621414,
- "p95": 88.44800293445587,
- "p99": 126.30400061607361
- },
- "roundtrip": {
- "p50": 118.78400295972824,
- "p90": 148.28799664974213,
- "p95": 155.8080017566681,
- "p99": 184.64000523090363
- },
- "isolatedSum": {
- "p50": 138.97600024938583,
- "p90": 182.49599635601044,
- "p95": 199.8080015182495,
- "p99": 249.31199848651886
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 540672,
- "combineLogicalBytes": 540672,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 74.5920017361641,
- "p90": 107.07200318574905,
- "p95": 120.51200121641159,
- "p99": 142.87999272346497
- },
- "combine": {
- "p50": 67.03999638557434,
- "p90": 84.73599702119827,
- "p95": 92.12800115346909,
- "p99": 114.07999694347382
- },
- "roundtrip": {
- "p50": 120.38400024175644,
- "p90": 157.18400478363037,
- "p95": 169.24799978733063,
- "p99": 195.68000733852386
- },
- "isolatedSum": {
- "p50": 141.63199812173843,
- "p90": 191.80800020694733,
- "p95": 212.64000236988068,
- "p99": 256.9599896669388
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1056768,
- "combineLogicalBytes": 1056768,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 72.95999675989151,
- "p90": 101.05600208044052,
- "p95": 114.68800157308578,
- "p99": 137.472003698349
- },
- "combine": {
- "p50": 66.14399701356888,
- "p90": 79.23199981451035,
- "p95": 84.06399935483932,
- "p99": 93.50399672985077
- },
- "roundtrip": {
- "p50": 120.99199742078781,
- "p90": 154.81600165367126,
- "p95": 165.95199704170227,
- "p99": 220.41599452495575
- },
- "isolatedSum": {
- "p50": 139.1039937734604,
- "p90": 180.28800189495087,
- "p95": 198.7520009279251,
- "p99": 230.97600042819977
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2125824,
- "combineLogicalBytes": 2125824,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 73.08799773454666,
- "p90": 102.78400033712387,
- "p95": 110.88000237941742,
- "p99": 142.17600226402283
- },
- "combine": {
- "p50": 67.90400296449661,
- "p90": 83.29600095748901,
- "p95": 89.31200206279755,
- "p99": 102.30399668216705
- },
- "roundtrip": {
- "p50": 120.95999717712402,
- "p90": 156.73600137233734,
- "p95": 165.56799411773682,
- "p99": 189.43999707698822
- },
- "isolatedSum": {
- "p50": 140.99200069904327,
- "p90": 186.08000129461288,
- "p95": 200.19200444221497,
- "p99": 244.47999894618988
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4263936,
- "combineLogicalBytes": 4263936,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 74.81600344181061,
- "p90": 102.65599936246872,
- "p95": 110.68800091743469,
- "p99": 122.49600142240524
- },
- "combine": {
- "p50": 68.2239979505539,
- "p90": 86.14400029182434,
- "p95": 90.4960036277771,
- "p99": 105.95200210809708
- },
- "roundtrip": {
- "p50": 121.44000083208084,
- "p90": 152.25599706172943,
- "p95": 161.40800714492798,
- "p99": 200.9280025959015
- },
- "isolatedSum": {
- "p50": 143.0400013923645,
- "p90": 188.79999965429306,
- "p95": 201.1840045452118,
- "p99": 228.44800353050232
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 8503296,
- "combineLogicalBytes": 8503296,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 82.49600231647491,
- "p90": 114.01599645614624,
- "p95": 123.74400347471237,
- "p99": 148.3519971370697
- },
- "combine": {
- "p50": 74.14399832487106,
- "p90": 88.60799670219421,
- "p95": 94.11200135946274,
- "p99": 106.81600123643875
- },
- "roundtrip": {
- "p50": 128.54400277137756,
- "p90": 162.33600676059723,
- "p95": 178.20799350738525,
- "p99": 222.30400145053864
- },
- "isolatedSum": {
- "p50": 156.64000064134598,
- "p90": 202.62399315834045,
- "p95": 217.8560048341751,
- "p99": 255.16799837350845
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 16908288,
- "combineLogicalBytes": 16908288,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 94.36800330877304,
- "p90": 133.02400708198547,
- "p95": 138.49599659442902,
- "p99": 182.20800161361694
- },
- "combine": {
- "p50": 81.44000172615051,
- "p90": 95.42399644851685,
- "p95": 100.5759984254837,
- "p99": 123.74400347471237
- },
- "roundtrip": {
- "p50": 151.2320041656494,
- "p90": 172.03199863433838,
- "p95": 182.17599391937256,
- "p99": 404.1599929332733
- },
- "isolatedSum": {
- "p50": 175.80800503492355,
- "p90": 228.44800353050232,
- "p95": 239.07199501991272,
- "p99": 305.9520050883293
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 33423360,
- "combineLogicalBytes": 33423360,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 109.0880036354065,
- "p90": 134.68800485134125,
- "p95": 142.752006649971,
- "p99": 173.3119934797287
- },
- "combine": {
- "p50": 97.43999689817429,
- "p90": 114.97599631547928,
- "p95": 121.08799815177917,
- "p99": 138.75199854373932
- },
- "roundtrip": {
- "p50": 180.1919937133789,
- "p90": 205.56800067424774,
- "p95": 210.07999777793884,
- "p99": 237.7600073814392
- },
- "isolatedSum": {
- "p50": 206.52800053358078,
- "p90": 249.66400116682053,
- "p95": 263.8400048017502,
- "p99": 312.063992023468
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 66576384,
- "combineLogicalBytes": 66576384,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-a3bb3bd5",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|fp8-saturation|none|none|0|tuned||8c8497a77d9085d",
- "colorKey": "h200_d982b749",
- "comparisonKey": "1e550a8055ce0039",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:06:16.783949+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_12",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "fp8-saturation",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "8c8497a77d9085d",
- "workloadId": "set:4:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272139795",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272139795",
- "createdAt": "2026-06-27T00:06:16.783949+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 82.65600353479385,
- "p90": 133.59999656677246,
- "p95": 142.59199798107147,
- "p99": 158.4320068359375
- },
- "combine": {
- "p50": 76.38400048017502,
- "p90": 99.61599856615067,
- "p95": 103.84000092744827,
- "p99": 158.1760048866272
- },
- "roundtrip": {
- "p50": 128.35200130939484,
- "p90": 157.21599757671356,
- "p95": 169.63200271129608,
- "p99": 325.6959915161133
- },
- "isolatedSum": {
- "p50": 159.04000401496887,
- "p90": 233.21599513292313,
- "p95": 246.43199890851974,
- "p99": 316.6080117225647
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 73.60000163316727,
- "p90": 94.81599926948547,
- "p95": 101.82400047779083,
- "p99": 127.32799351215363
- },
- "combine": {
- "p50": 70.23999840021133,
- "p90": 99.16800260543823,
- "p95": 101.34399682283401,
- "p99": 121.34400010108948
- },
- "roundtrip": {
- "p50": 130.5599957704544,
- "p90": 186.46399676799774,
- "p95": 191.3280040025711,
- "p99": 227.48799622058868
- },
- "isolatedSum": {
- "p50": 143.8400000333786,
- "p90": 193.9840018749237,
- "p95": 203.16799730062485,
- "p99": 248.6719936132431
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 84.70399677753448,
- "p90": 100.44799745082855,
- "p95": 106.55999928712845,
- "p99": 121.18399888277054
- },
- "combine": {
- "p50": 77.47200131416321,
- "p90": 89.47200328111649,
- "p95": 95.32800316810608,
- "p99": 106.1440035700798
- },
- "roundtrip": {
- "p50": 137.37599551677704,
- "p90": 158.49600732326508,
- "p95": 167.42399334907532,
- "p99": 188.54400515556335
- },
- "isolatedSum": {
- "p50": 162.1759980916977,
- "p90": 189.92000073194504,
- "p95": 201.88800245523453,
- "p99": 227.32800245285034
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 117.69600212574005,
- "p90": 133.15199315547943,
- "p95": 140.25600254535675,
- "p99": 154.7199934720993
- },
- "combine": {
- "p50": 105.82400113344193,
- "p90": 123.55200201272964,
- "p95": 129.50399518013,
- "p99": 141.85599982738495
- },
- "roundtrip": {
- "p50": 196.83200120925903,
- "p90": 213.69600296020508,
- "p95": 222.04799950122833,
- "p99": 265.8880054950714
- },
- "isolatedSum": {
- "p50": 223.52000325918198,
- "p90": 256.7039951682091,
- "p95": 269.75999772548676,
- "p99": 296.57599329948425
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-9ca51f4f",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h200_d982b749",
- "comparisonKey": "a8d7aa1ea70e9702",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T10:26:23.408406+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_7",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "2.0.0+af9a040",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28286432534",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286432534",
- "createdAt": "2026-06-27T10:26:23.408406+00:00",
- "sha": "91c7acf59a5e524f37742922ec67721d86a03f6b"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 72.80000299215317,
- "p90": 86.7839977145195,
- "p95": 94.08000111579895,
- "p99": 129.12000715732574
- },
- "combine": {
- "p50": 69.82400268316269,
- "p90": 75.68000257015228,
- "p95": 78.75200361013412,
- "p99": 84.927998483181
- },
- "roundtrip": {
- "p50": 124.64000284671783,
- "p90": 133.88800621032715,
- "p95": 138.65600526332855,
- "p99": 154.62400019168854
- },
- "isolatedSum": {
- "p50": 142.62400567531586,
- "p90": 162.46400028467178,
- "p95": 172.83200472593307,
- "p99": 214.04800564050674
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 72.25599884986877,
- "p90": 81.53600245714188,
- "p95": 87.64799684286118,
- "p99": 115.68000167608261
- },
- "combine": {
- "p50": 69.7920024394989,
- "p90": 73.72800260782242,
- "p95": 78.40000092983246,
- "p99": 85.40800213813782
- },
- "roundtrip": {
- "p50": 123.74400347471237,
- "p90": 142.84799993038177,
- "p95": 166.30400717258453,
- "p99": 190.2720034122467
- },
- "isolatedSum": {
- "p50": 142.04800128936768,
- "p90": 155.2640050649643,
- "p95": 166.04799777269363,
- "p99": 201.08800381422043
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 73.11999797821045,
- "p90": 78.87999713420868,
- "p95": 81.82399719953537,
- "p99": 103.71199995279312
- },
- "combine": {
- "p50": 70.46400010585785,
- "p90": 76.92799717187881,
- "p95": 79.48800176382065,
- "p99": 91.07200056314468
- },
- "roundtrip": {
- "p50": 126.08000636100769,
- "p90": 152.41600573062897,
- "p95": 166.55999422073364,
- "p99": 194.0159946680069
- },
- "isolatedSum": {
- "p50": 143.5839980840683,
- "p90": 155.8079943060875,
- "p95": 161.31199896335602,
- "p99": 194.7840005159378
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 74.49600100517273,
- "p90": 88.41600269079208,
- "p95": 94.65599805116653,
- "p99": 108.92800241708755
- },
- "combine": {
- "p50": 71.07199728488922,
- "p90": 86.68799698352814,
- "p95": 92.22400188446045,
- "p99": 99.42399710416794
- },
- "roundtrip": {
- "p50": 125.47199428081512,
- "p90": 137.9839926958084,
- "p95": 149.98400211334229,
- "p99": 160.35200655460358
- },
- "isolatedSum": {
- "p50": 145.56799829006195,
- "p90": 175.10399967432022,
- "p95": 186.87999993562698,
- "p99": 208.3519995212555
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 79.52000200748444,
- "p90": 95.10400146245956,
- "p95": 101.56799852848053,
- "p99": 126.11199915409088
- },
- "combine": {
- "p50": 74.5920017361641,
- "p90": 89.4400030374527,
- "p95": 96.79999947547913,
- "p99": 107.13600367307663
- },
- "roundtrip": {
- "p50": 130.17599284648895,
- "p90": 145.05599439144135,
- "p95": 155.2319973707199,
- "p99": 176.32000148296356
- },
- "isolatedSum": {
- "p50": 154.11200374364853,
- "p90": 184.54400449991226,
- "p95": 198.36799800395966,
- "p99": 233.2480028271675
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 81.15199953317642,
- "p90": 91.67999774217606,
- "p95": 97.24800288677216,
- "p99": 135.3919953107834
- },
- "combine": {
- "p50": 78.59200239181519,
- "p90": 85.66399663686752,
- "p95": 87.90399879217148,
- "p99": 94.78399902582169
- },
- "roundtrip": {
- "p50": 136.7039978504181,
- "p90": 147.74399995803833,
- "p95": 156.51200711727142,
- "p99": 208.48000049591064
- },
- "isolatedSum": {
- "p50": 159.7440019249916,
- "p90": 177.34399437904358,
- "p95": 185.15200167894363,
- "p99": 230.17599433660507
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 96.54399752616882,
- "p90": 109.72800105810165,
- "p95": 117.63200163841248,
- "p99": 141.59999787807465
- },
- "combine": {
- "p50": 88.57599645853043,
- "p90": 97.82399982213974,
- "p95": 102.04800218343735,
- "p99": 136.4160031080246
- },
- "roundtrip": {
- "p50": 163.26400637626648,
- "p90": 180.92800676822662,
- "p95": 191.13600254058838,
- "p99": 213.56800198554993
- },
- "isolatedSum": {
- "p50": 185.11999398469925,
- "p90": 207.5520008802414,
- "p95": 219.68000382184982,
- "p99": 278.01600098609924
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 117.79200285673141,
- "p90": 128.00000607967377,
- "p95": 133.63200426101685,
- "p99": 155.74400126934052
- },
- "combine": {
- "p50": 104.89600151777267,
- "p90": 113.15199732780457,
- "p95": 118.75200271606445,
- "p99": 133.40799510478973
- },
- "roundtrip": {
- "p50": 196.79999351501465,
- "p90": 208.8959962129593,
- "p95": 215.10399878025055,
- "p99": 228.35199534893036
- },
- "isolatedSum": {
- "p50": 222.6880043745041,
- "p90": 241.15200340747833,
- "p95": 252.3840069770813,
- "p99": 289.15199637413025
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-b7604172",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||b029c1a6fded400",
- "colorKey": "h200_d982b749",
- "comparisonKey": "4dde4e46080a91eb",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:14:07.082435+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_2",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "b029c1a6fded400",
- "workloadId": "set:3:07d544ac2af401ec",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272379468",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272379468",
- "createdAt": "2026-06-27T00:14:07.082435+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 72.86400347948074,
- "p90": 82.24000036716461,
- "p95": 88.73599767684937,
- "p99": 117.66400188207626
- },
- "combine": {
- "p50": 70.01599669456482,
- "p90": 75.39200037717819,
- "p95": 80.6720033288002,
- "p99": 96.0640013217926
- },
- "roundtrip": {
- "p50": 123.90399724245071,
- "p90": 139.74399864673615,
- "p95": 148.47999811172485,
- "p99": 178.75200510025024
- },
- "isolatedSum": {
- "p50": 142.88000017404556,
- "p90": 157.6320007443428,
- "p95": 169.40800100564957,
- "p99": 213.72800320386887
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 81.66400343179703,
- "p90": 93.53599697351456,
- "p95": 100.70399940013885,
- "p99": 128.09599936008453
- },
- "combine": {
- "p50": 78.11199873685837,
- "p90": 84.51200276613235,
- "p95": 89.02399986982346,
- "p99": 123.6800029873848
- },
- "roundtrip": {
- "p50": 135.13599336147308,
- "p90": 146.7200070619583,
- "p95": 153.9199948310852,
- "p99": 176.89600586891174
- },
- "isolatedSum": {
- "p50": 159.7760021686554,
- "p90": 178.0479997396469,
- "p95": 189.7279992699623,
- "p99": 251.77600234746933
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 117.63200163841248,
- "p90": 127.3919939994812,
- "p95": 134.0479999780655,
- "p99": 154.94400262832642
- },
- "combine": {
- "p50": 104.67199981212616,
- "p90": 115.42399972677231,
- "p95": 121.98399752378464,
- "p99": 159.93599593639374
- },
- "roundtrip": {
- "p50": 196.25599682331085,
- "p90": 206.08000457286835,
- "p95": 214.08000588417053,
- "p99": 245.27999758720398
- },
- "isolatedSum": {
- "p50": 222.30400145053864,
- "p90": 242.8159937262535,
- "p95": 256.0319975018501,
- "p99": 314.87999856472015
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-875c4f49",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||c774c8e4abb34da",
- "colorKey": "h200_d982b749",
- "comparisonKey": "c8b8b28ca3d145bb",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:54:14.463003+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_0",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "c774c8e4abb34da",
- "workloadId": "set:5:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28273509838",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28273509838",
- "createdAt": "2026-06-27T00:54:14.463003+00:00",
- "sha": "2c15d9415503e9ccb84cd49cf446a122796efc1e"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 71.3919997215271,
- "p90": 87.5839963555336,
- "p95": 96.3520035147667,
- "p99": 139.55199718475342
- },
- "combine": {
- "p50": 68.09599697589874,
- "p90": 79.55200225114822,
- "p95": 84.95999872684479,
- "p99": 111.32799834012985
- },
- "roundtrip": {
- "p50": 119.55200135707855,
- "p90": 147.20000326633453,
- "p95": 157.18400478363037,
- "p99": 204.6079933643341
- },
- "isolatedSum": {
- "p50": 139.48799669742584,
- "p90": 167.13599860668182,
- "p95": 181.31200224161148,
- "p99": 250.87999552488327
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 72.9919970035553,
- "p90": 94.36800330877304,
- "p95": 103.13600301742554,
- "p99": 130.68799674510956
- },
- "combine": {
- "p50": 68.12799721956253,
- "p90": 80.9599980711937,
- "p95": 88.19200098514557,
- "p99": 105.15200346708298
- },
- "roundtrip": {
- "p50": 121.5360015630722,
- "p90": 147.16799557209015,
- "p95": 157.98400342464447,
- "p99": 185.92000007629395
- },
- "isolatedSum": {
- "p50": 141.11999422311783,
- "p90": 175.32800137996674,
- "p95": 191.3280040025711,
- "p99": 235.84000021219254
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 73.02399724721909,
- "p90": 95.48799693584442,
- "p95": 104.86400127410889,
- "p99": 133.08799266815186
- },
- "combine": {
- "p50": 68.76800209283829,
- "p90": 80.57600259780884,
- "p95": 86.30400151014328,
- "p99": 105.92000186443329
- },
- "roundtrip": {
- "p50": 120.12799829244614,
- "p90": 145.56799829006195,
- "p95": 155.64799308776855,
- "p99": 182.68799781799316
- },
- "isolatedSum": {
- "p50": 141.79199934005737,
- "p90": 176.06399953365326,
- "p95": 191.16800278425217,
- "p99": 239.00799453258514
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 73.66400212049484,
- "p90": 93.75999867916107,
- "p95": 98.65599870681763,
- "p99": 113.18399757146835
- },
- "combine": {
- "p50": 68.76800209283829,
- "p90": 80.54400235414505,
- "p95": 82.49600231647491,
- "p99": 91.77599847316742
- },
- "roundtrip": {
- "p50": 121.08799815177917,
- "p90": 145.9839940071106,
- "p95": 156.99200332164764,
- "p99": 216.35200083255768
- },
- "isolatedSum": {
- "p50": 142.43200421333313,
- "p90": 174.30400103330612,
- "p95": 181.15200102329254,
- "p99": 204.95999604463577
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 73.91999661922455,
- "p90": 96.67199850082397,
- "p95": 103.2319962978363,
- "p99": 125.34399330615997
- },
- "combine": {
- "p50": 70.75200229883194,
- "p90": 84.03199911117554,
- "p95": 89.59999680519104,
- "p99": 103.87200117111206
- },
- "roundtrip": {
- "p50": 123.9359974861145,
- "p90": 155.8080017566681,
- "p95": 170.49600183963776,
- "p99": 205.6960016489029
- },
- "isolatedSum": {
- "p50": 144.6719989180565,
- "p90": 180.7039976119995,
- "p95": 192.83199310302734,
- "p99": 229.21599447727203
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-19b41153",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|small-amplitude|none|none|0|tuned||8c8497a77d9085d",
- "colorKey": "h200_d982b749",
- "comparisonKey": "fb9666d12f9a34f8",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:05:55.021886+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_0",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "small-amplitude",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "8c8497a77d9085d",
- "workloadId": "set:4:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272132556",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272132556",
- "createdAt": "2026-06-27T00:05:55.021886+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 72.31999933719635,
- "p90": 95.0080007314682,
- "p95": 99.93600100278854,
- "p99": 117.69600212574005
- },
- "combine": {
- "p50": 68.00000369548798,
- "p90": 79.55200225114822,
- "p95": 85.79199761152267,
- "p99": 114.04799669981003
- },
- "roundtrip": {
- "p50": 120.70400267839432,
- "p90": 148.60799908638,
- "p95": 156.54399991035461,
- "p99": 199.0399956703186
- },
- "isolatedSum": {
- "p50": 140.32000303268433,
- "p90": 174.56000298261642,
- "p95": 185.72799861431122,
- "p99": 231.74399882555008
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 73.02399724721909,
- "p90": 95.87199985980988,
- "p95": 102.91200131177902,
- "p99": 124.35200065374374
- },
- "combine": {
- "p50": 68.67200136184692,
- "p90": 82.75199681520462,
- "p95": 89.53599631786346,
- "p99": 112.96000331640244
- },
- "roundtrip": {
- "p50": 123.10399860143661,
- "p90": 151.39199793338776,
- "p95": 160.19199788570404,
- "p99": 189.69599902629852
- },
- "isolatedSum": {
- "p50": 141.695998609066,
- "p90": 178.6239966750145,
- "p95": 192.4479976296425,
- "p99": 237.31200397014618
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 87.52000331878662,
- "p90": 135.23200154304504,
- "p95": 142.04800128936768,
- "p99": 161.21600568294525
- },
- "combine": {
- "p50": 77.504001557827,
- "p90": 92.38400310277939,
- "p95": 97.120001912117,
- "p99": 111.77600175142288
- },
- "roundtrip": {
- "p50": 135.77599823474884,
- "p90": 158.81599485874176,
- "p95": 168.92799735069275,
- "p99": 212.67199516296387
- },
- "isolatedSum": {
- "p50": 165.02400487661362,
- "p90": 227.61600464582443,
- "p95": 239.16800320148468,
- "p99": 272.99200743436813
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 116.67200177907944,
- "p90": 136.83199882507324,
- "p95": 145.79200744628906,
- "p99": 161.6320013999939
- },
- "combine": {
- "p50": 105.76000064611435,
- "p90": 121.63200229406357,
- "p95": 128.06400656700134,
- "p99": 140.60799777507782
- },
- "roundtrip": {
- "p50": 195.93599438667297,
- "p90": 217.3759937286377,
- "p95": 223.4240025281906,
- "p99": 252.9279887676239
- },
- "isolatedSum": {
- "p50": 222.4320024251938,
- "p90": 258.4640011191368,
- "p95": 273.8560140132904,
- "p99": 302.2399991750717
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-6b3584db",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|wide-dynamic-range|none|none|0|tuned||8c8497a77d9085d",
- "colorKey": "h200_d982b749",
- "comparisonKey": "0dade16dc8be5c94",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:06:19.346761+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_11",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "wide-dynamic-range",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "8c8497a77d9085d",
- "workloadId": "set:4:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272136313",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272136313",
- "createdAt": "2026-06-27T00:06:19.346761+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 75.13599842786789,
- "p90": 99.84000027179718,
- "p95": 112.38399893045425,
- "p99": 175.48799514770508
- },
- "combine": {
- "p50": 69.95200365781784,
- "p90": 81.95199817419052,
- "p95": 87.3280018568039,
- "p99": 117.95199662446976
- },
- "roundtrip": {
- "p50": 127.51999497413635,
- "p90": 157.9200029373169,
- "p95": 171.7119961977005,
- "p99": 223.26399385929108
- },
- "isolatedSum": {
- "p50": 145.08800208568573,
- "p90": 181.7919984459877,
- "p95": 199.71200078725815,
- "p99": 293.43999177217484
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 76.03199779987335,
- "p90": 112.73600161075592,
- "p95": 125.37600100040436,
- "p99": 209.4080001115799
- },
- "combine": {
- "p50": 70.91200351715088,
- "p90": 86.30400151014328,
- "p95": 95.13600170612335,
- "p99": 123.16799908876419
- },
- "roundtrip": {
- "p50": 125.11999905109406,
- "p90": 156.99200332164764,
- "p95": 177.47199535369873,
- "p99": 251.64800882339478
- },
- "isolatedSum": {
- "p50": 146.94400131702423,
- "p90": 199.0400031208992,
- "p95": 220.5120027065277,
- "p99": 332.5759992003441
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 84.28800106048584,
- "p90": 108.15999656915665,
- "p95": 115.29599875211716,
- "p99": 152.70400047302246
- },
- "combine": {
- "p50": 78.36800068616867,
- "p90": 91.87199920415878,
- "p95": 98.55999797582626,
- "p99": 110.17599701881409
- },
- "roundtrip": {
- "p50": 138.46400380134583,
- "p90": 167.23200678825378,
- "p95": 179.45599555969238,
- "p99": 238.91200125217438
- },
- "isolatedSum": {
- "p50": 162.6560017466545,
- "p90": 200.03199577331543,
- "p95": 213.85599672794342,
- "p99": 262.87999749183655
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 116.09599739313126,
- "p90": 137.56799697875977,
- "p95": 140.76800644397736,
- "p99": 157.47199952602386
- },
- "combine": {
- "p50": 104.35199737548828,
- "p90": 122.97599762678146,
- "p95": 125.50400197505951,
- "p99": 148.5760062932968
- },
- "roundtrip": {
- "p50": 198.7520009279251,
- "p90": 219.2319929599762,
- "p95": 227.58400440216064,
- "p99": 269.3440020084381
- },
- "isolatedSum": {
- "p50": 220.44799476861954,
- "p90": 260.54399460554123,
- "p95": 266.27200841903687,
- "p99": 306.0480058193207
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-f4f3e72f",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|zeros|none|none|0|tuned||8c8497a77d9085d",
- "colorKey": "h200_d982b749",
- "comparisonKey": "c5d592397744e4a1",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:05:52.426268+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_2",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "zeros",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "8c8497a77d9085d",
- "workloadId": "set:4:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272129001",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272129001",
- "createdAt": "2026-06-27T00:05:52.426268+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 73.7600028514862,
- "p90": 98.59199821949005,
- "p95": 107.77600109577179,
- "p99": 133.31200182437897
- },
- "combine": {
- "p50": 70.592001080513,
- "p90": 84.54400300979614,
- "p95": 90.43200314044952,
- "p99": 139.26400244235992
- },
- "roundtrip": {
- "p50": 125.59999525547028,
- "p90": 159.87199544906616,
- "p95": 172.57599532604218,
- "p99": 367.2960102558136
- },
- "isolatedSum": {
- "p50": 144.3520039319992,
- "p90": 183.1360012292862,
- "p95": 198.2080042362213,
- "p99": 272.5760042667389
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 74.87999647855759,
- "p90": 107.04000294208527,
- "p95": 128.80000472068787,
- "p99": 359.391987323761
- },
- "combine": {
- "p50": 70.49600034952164,
- "p90": 84.06399935483932,
- "p95": 89.88799899816513,
- "p99": 102.9760017991066
- },
- "roundtrip": {
- "p50": 124.70400333404541,
- "p90": 155.10399639606476,
- "p95": 165.72800278663635,
- "p99": 202.7519941329956
- },
- "isolatedSum": {
- "p50": 145.37599682807922,
- "p90": 191.1040022969246,
- "p95": 218.688003718853,
- "p99": 462.3679891228676
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 83.90399813652039,
- "p90": 107.07200318574905,
- "p95": 115.9679964184761,
- "p99": 136.51199638843536
- },
- "combine": {
- "p50": 78.33600044250488,
- "p90": 91.93599969148636,
- "p95": 97.69599884748459,
- "p99": 108.83200168609619
- },
- "roundtrip": {
- "p50": 137.2160017490387,
- "p90": 170.23999989032745,
- "p95": 181.37599527835846,
- "p99": 215.36000072956085
- },
- "isolatedSum": {
- "p50": 162.23999857902527,
- "p90": 199.0080028772354,
- "p95": 213.6639952659607,
- "p99": 245.34399807453156
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 115.99999666213989,
- "p90": 135.71199774742126,
- "p95": 143.8400000333786,
- "p99": 168.67199540138245
- },
- "combine": {
- "p50": 104.73600029945374,
- "p90": 121.47200107574463,
- "p95": 125.47199428081512,
- "p99": 163.00800442695618
- },
- "roundtrip": {
- "p50": 196.6720074415207,
- "p90": 216.19200706481934,
- "p95": 220.5120027065277,
- "p99": 240.1919960975647
- },
- "isolatedSum": {
- "p50": 220.73599696159363,
- "p90": 257.1839988231659,
- "p95": 269.3119943141937,
- "p99": 331.6799998283386
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-eb6d6f9b",
- "identity": "h200|deepep|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h200_3a47b6c9",
- "comparisonKey": "4a72e21e2f542236",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:49:45.031759+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_7",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271615137",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271615137",
- "createdAt": "2026-06-26T23:49:45.031759+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 70.97599655389786,
- "p90": 85.50400286912918,
- "p95": 95.36000341176987,
- "p99": 316.79999828338623
- },
- "combine": {
- "p50": 68.70400160551071,
- "p90": 74.72000271081924,
- "p95": 78.72000336647034,
- "p99": 94.2080020904541
- },
- "roundtrip": {
- "p50": 122.56000190973282,
- "p90": 143.26399564743042,
- "p95": 153.1199961900711,
- "p99": 172.2240000963211
- },
- "isolatedSum": {
- "p50": 139.67999815940857,
- "p90": 160.22400557994843,
- "p95": 174.0800067782402,
- "p99": 411.00800037384033
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 72.12799787521362,
- "p90": 85.24800091981888,
- "p95": 91.93599969148636,
- "p99": 119.48800086975098
- },
- "combine": {
- "p50": 68.57600063085556,
- "p90": 72.83200323581696,
- "p95": 77.15199887752533,
- "p99": 83.45600217580795
- },
- "roundtrip": {
- "p50": 120.83200365304947,
- "p90": 129.2160004377365,
- "p95": 133.215993642807,
- "p99": 145.75999975204468
- },
- "isolatedSum": {
- "p50": 140.70399850606918,
- "p90": 158.08000415563583,
- "p95": 169.0879985690117,
- "p99": 202.94400304555893
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 73.63200187683105,
- "p90": 90.84799885749817,
- "p95": 103.64799946546555,
- "p99": 133.02400708198547
- },
- "combine": {
- "p50": 70.52800059318542,
- "p90": 80.86399734020233,
- "p95": 87.74399757385254,
- "p99": 105.6319996714592
- },
- "roundtrip": {
- "p50": 123.64800274372101,
- "p90": 149.59999918937683,
- "p95": 158.33599865436554,
- "p99": 186.0480010509491
- },
- "isolatedSum": {
- "p50": 144.16000247001648,
- "p90": 171.7119961977005,
- "p95": 191.39199703931808,
- "p99": 238.65600675344467
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 73.18399846553802,
- "p90": 83.03999900817871,
- "p95": 94.91200000047684,
- "p99": 104.09600287675858
- },
- "combine": {
- "p50": 69.2799985408783,
- "p90": 77.82399654388428,
- "p95": 83.10399949550629,
- "p99": 110.04800349473953
- },
- "roundtrip": {
- "p50": 123.52000176906586,
- "p90": 143.19999516010284,
- "p95": 152.0960032939911,
- "p99": 205.08800446987152
- },
- "isolatedSum": {
- "p50": 142.46399700641632,
- "p90": 160.863995552063,
- "p95": 178.01599949598312,
- "p99": 214.1440063714981
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 71.16799801588058,
- "p90": 83.36000144481659,
- "p95": 94.11200135946274,
- "p99": 106.46399855613708
- },
- "combine": {
- "p50": 70.04799693822861,
- "p90": 78.07999849319458,
- "p95": 83.20000022649765,
- "p99": 95.71199864149094
- },
- "roundtrip": {
- "p50": 124.54400211572647,
- "p90": 144.0960019826889,
- "p95": 155.008003115654,
- "p99": 204.3839991092682
- },
- "isolatedSum": {
- "p50": 141.2159949541092,
- "p90": 161.43999993801117,
- "p95": 177.3120015859604,
- "p99": 202.17599719762802
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 80.60800284147263,
- "p90": 89.59999680519104,
- "p95": 94.81599926948547,
- "p99": 117.53600090742111
- },
- "combine": {
- "p50": 77.08799839019775,
- "p90": 81.95199817419052,
- "p95": 87.3280018568039,
- "p99": 95.0080007314682
- },
- "roundtrip": {
- "p50": 135.19999384880066,
- "p90": 148.47999811172485,
- "p95": 156.63999319076538,
- "p99": 188.57599794864655
- },
- "isolatedSum": {
- "p50": 157.69600123167038,
- "p90": 171.55199497938156,
- "p95": 182.14400112628937,
- "p99": 212.5440016388893
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 94.68799829483032,
- "p90": 114.97599631547928,
- "p95": 122.36800044775009,
- "p99": 148.03199470043182
- },
- "combine": {
- "p50": 87.39200234413147,
- "p90": 97.59999811649323,
- "p95": 102.9760017991066,
- "p99": 113.95200341939926
- },
- "roundtrip": {
- "p50": 158.87999534606934,
- "p90": 176.15999281406403,
- "p95": 185.2159947156906,
- "p99": 225.600004196167
- },
- "isolatedSum": {
- "p50": 182.0800006389618,
- "p90": 212.5759944319725,
- "p95": 225.3440022468567,
- "p99": 261.9839981198311
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 116.22399836778641,
- "p90": 125.18399953842163,
- "p95": 132.32000172138214,
- "p99": 140.83200693130493
- },
- "combine": {
- "p50": 105.34399747848511,
- "p90": 111.32799834012985,
- "p95": 116.28799885511398,
- "p99": 123.83999675512314
- },
- "roundtrip": {
- "p50": 197.60000705718994,
- "p90": 207.2640061378479,
- "p95": 214.81600403785706,
- "p99": 241.05599522590637
- },
- "isolatedSum": {
- "p50": 221.56799584627151,
- "p90": 236.51199787855148,
- "p95": 248.60800057649612,
- "p99": 264.67200368642807
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-55459bb6",
- "identity": "h200|deepep|7168|8|384|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760",
- "colorKey": "h200_d982b749",
- "comparisonKey": "a923f4d59c22dd5b",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T11:14:11.699427+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_3",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16",
- "model": "Kimi-K2",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 384,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "d6c49ae98878760",
- "workloadId": "set:8:9a27d0df4b17fa09",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28287501303",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287501303",
- "createdAt": "2026-06-27T11:14:11.699427+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 72.89600372314453,
- "p90": 89.9519994854927,
- "p95": 103.16800326108932,
- "p99": 117.63200163841248
- },
- "combine": {
- "p50": 68.80000233650208,
- "p90": 81.34400099515915,
- "p95": 87.26400136947632,
- "p99": 99.84000027179718
- },
- "roundtrip": {
- "p50": 125.2799928188324,
- "p90": 158.52800011634827,
- "p95": 173.2800006866455,
- "p99": 205.02400398254395
- },
- "isolatedSum": {
- "p50": 141.6960060596466,
- "p90": 171.29600048065186,
- "p95": 190.43200463056564,
- "p99": 217.47200191020966
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 602112,
- "combineLogicalBytes": 602112,
- "fanoutMean": 5.25,
- "recvTokensMax": 8,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 76.31999999284744,
- "p90": 108.83200168609619,
- "p95": 118.65600198507309,
- "p99": 149.9519944190979
- },
- "combine": {
- "p50": 70.23999840021133,
- "p90": 85.85599809885025,
- "p95": 93.98400038480759,
- "p99": 114.656001329422
- },
- "roundtrip": {
- "p50": 127.77599692344666,
- "p90": 164.44799304008484,
- "p95": 174.23999309539795,
- "p99": 215.96799790859222
- },
- "isolatedSum": {
- "p50": 146.55999839305878,
- "p90": 194.68799978494644,
- "p95": 212.64000236988068,
- "p99": 264.6079957485199
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1218560,
- "combineLogicalBytes": 1218560,
- "fanoutMean": 5.3125,
- "recvTokensMax": 14,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 76.89599692821503,
- "p90": 109.8560020327568,
- "p95": 119.07199770212173,
- "p99": 162.11199760437012
- },
- "combine": {
- "p50": 71.00799679756165,
- "p90": 83.90399813652039,
- "p95": 90.08000046014786,
- "p99": 109.15199667215347
- },
- "roundtrip": {
- "p50": 130.43199479579926,
- "p90": 159.39199924468994,
- "p95": 176.57600343227386,
- "p99": 223.51999580860138
- },
- "isolatedSum": {
- "p50": 147.90399372577667,
- "p90": 193.7600001692772,
- "p95": 209.1519981622696,
- "p99": 271.2639942765236
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2408448,
- "combineLogicalBytes": 2408448,
- "fanoutMean": 5.25,
- "recvTokensMax": 26,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 77.02399790287018,
- "p90": 101.31199657917023,
- "p95": 112.44799941778183,
- "p99": 129.85600531101227
- },
- "combine": {
- "p50": 71.71200215816498,
- "p90": 84.927998483181,
- "p95": 90.97599983215332,
- "p99": 101.95200145244598
- },
- "roundtrip": {
- "p50": 128.76799702644348,
- "p90": 155.13600409030914,
- "p95": 164.32000696659088,
- "p99": 192.51200556755066
- },
- "isolatedSum": {
- "p50": 148.73600006103516,
- "p90": 186.23999506235123,
- "p95": 203.42399924993515,
- "p99": 231.80800676345825
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4831232,
- "combineLogicalBytes": 4831232,
- "fanoutMean": 5.265625,
- "recvTokensMax": 48,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 79.6160027384758,
- "p90": 105.47199845314026,
- "p95": 115.23199826478958,
- "p99": 141.53599739074707
- },
- "combine": {
- "p50": 72.41600006818771,
- "p90": 86.14400029182434,
- "p95": 92.00000017881393,
- "p99": 109.40799862146378
- },
- "roundtrip": {
- "p50": 131.20000064373016,
- "p90": 157.27999806404114,
- "p95": 168.64000260829926,
- "p99": 207.74400234222412
- },
- "isolatedSum": {
- "p50": 152.0320028066635,
- "p90": 191.6159987449646,
- "p95": 207.23199844360352,
- "p99": 250.94399601221085
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9848832,
- "combineLogicalBytes": 9848832,
- "fanoutMean": 5.3671875,
- "recvTokensMax": 91,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 88.86399865150452,
- "p90": 114.72000181674957,
- "p95": 122.8799968957901,
- "p99": 132.47999548912048
- },
- "combine": {
- "p50": 79.26400005817413,
- "p90": 90.01599997282028,
- "p95": 98.01600128412247,
- "p99": 109.92000252008438
- },
- "roundtrip": {
- "p50": 139.77600634098053,
- "p90": 164.2879992723465,
- "p95": 177.63200402259827,
- "p99": 194.59199905395508
- },
- "isolatedSum": {
- "p50": 168.12799870967865,
- "p90": 204.73600178956985,
- "p95": 220.89599817991257,
- "p99": 242.39999800920486
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19496960,
- "combineLogicalBytes": 19496960,
- "fanoutMean": 5.3125,
- "recvTokensMax": 178,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 98.88000041246414,
- "p90": 118.01599711179733,
- "p95": 126.81600451469421,
- "p99": 194.2719966173172
- },
- "combine": {
- "p50": 88.22400122880936,
- "p90": 102.27199643850327,
- "p95": 108.2879975438118,
- "p99": 123.07199835777283
- },
- "roundtrip": {
- "p50": 163.55200111865997,
- "p90": 186.11200153827667,
- "p95": 196.83200120925903,
- "p99": 244.3840056657791
- },
- "isolatedSum": {
- "p50": 187.1040016412735,
- "p90": 220.2879935503006,
- "p95": 235.104002058506,
- "p99": 317.34399497509
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 38836224,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 372,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 119.03999745845795,
- "p90": 139.42399621009827,
- "p95": 145.4080045223236,
- "p99": 166.24000668525696
- },
- "combine": {
- "p50": 106.36799782514572,
- "p90": 121.37600034475327,
- "p95": 126.3359934091568,
- "p99": 143.0400013923645
- },
- "roundtrip": {
- "p50": 199.072003364563,
- "p90": 215.26400744915009,
- "p95": 224.48000311851501,
- "p99": 243.3599978685379
- },
- "isolatedSum": {
- "p50": 225.40799528360367,
- "p90": 260.79999655485153,
- "p95": 271.7439979314804,
- "p99": 309.28000807762146
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77514752,
- "combineLogicalBytes": 77514752,
- "fanoutMean": 5.2802734375,
- "recvTokensMax": 707,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-dea4952a",
- "identity": "h200|deepep|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760",
- "colorKey": "h200_3a47b6c9",
- "comparisonKey": "f2cda8ef40003c42",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:53:13.205485+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_7",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16",
- "model": "Kimi-K2",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 384,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "d6c49ae98878760",
- "workloadId": "set:8:9a27d0df4b17fa09",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271728983",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271728983",
- "createdAt": "2026-06-26T23:53:13.205485+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 73.53600114583969,
- "p90": 98.14400225877762,
- "p95": 107.45599865913391,
- "p99": 121.63200229406357
- },
- "combine": {
- "p50": 68.2239979505539,
- "p90": 82.24000036716461,
- "p95": 87.26400136947632,
- "p99": 110.07999628782272
- },
- "roundtrip": {
- "p50": 125.59999525547028,
- "p90": 155.39200603961945,
- "p95": 163.68000209331512,
- "p99": 201.6959935426712
- },
- "isolatedSum": {
- "p50": 141.75999909639359,
- "p90": 180.38400262594223,
- "p95": 194.72000002861023,
- "p99": 231.7119985818863
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 602112,
- "combineLogicalBytes": 602112,
- "fanoutMean": 5.25,
- "recvTokensMax": 8,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 75.87199658155441,
- "p90": 103.2319962978363,
- "p95": 112.15999722480774,
- "p99": 193.05600225925446
- },
- "combine": {
- "p50": 68.60800087451935,
- "p90": 83.5840031504631,
- "p95": 90.30400216579437,
- "p99": 129.60000336170197
- },
- "roundtrip": {
- "p50": 123.23199957609177,
- "p90": 153.31199765205383,
- "p95": 164.38399255275726,
- "p99": 185.37600338459015
- },
- "isolatedSum": {
- "p50": 144.47999745607376,
- "p90": 186.8159994482994,
- "p95": 202.4639993906021,
- "p99": 322.6560056209564
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1218560,
- "combineLogicalBytes": 1218560,
- "fanoutMean": 5.3125,
- "recvTokensMax": 14,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 74.87999647855759,
- "p90": 96.92800045013428,
- "p95": 105.85600137710571,
- "p99": 121.15199863910675
- },
- "combine": {
- "p50": 69.2799985408783,
- "p90": 82.56000280380249,
- "p95": 90.30400216579437,
- "p99": 102.04800218343735
- },
- "roundtrip": {
- "p50": 125.2799928188324,
- "p90": 152.28800475597382,
- "p95": 160.8320027589798,
- "p99": 174.55999553203583
- },
- "isolatedSum": {
- "p50": 144.15999501943588,
- "p90": 179.48800325393677,
- "p95": 196.16000354290009,
- "p99": 223.2000008225441
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2408448,
- "combineLogicalBytes": 2408448,
- "fanoutMean": 5.25,
- "recvTokensMax": 26,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 76.54400169849396,
- "p90": 104.2879968881607,
- "p95": 111.42399907112122,
- "p99": 138.5599970817566
- },
- "combine": {
- "p50": 70.52800059318542,
- "p90": 85.66399663686752,
- "p95": 91.67999774217606,
- "p99": 102.59199887514114
- },
- "roundtrip": {
- "p50": 126.39999389648438,
- "p90": 154.55999970436096,
- "p95": 166.97600483894348,
- "p99": 208.67200195789337
- },
- "isolatedSum": {
- "p50": 147.07200229167938,
- "p90": 189.95199352502823,
- "p95": 203.10399681329727,
- "p99": 241.15199595689774
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4831232,
- "combineLogicalBytes": 4831232,
- "fanoutMean": 5.265625,
- "recvTokensMax": 48,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 78.40000092983246,
- "p90": 99.07200187444687,
- "p95": 105.98400235176086,
- "p99": 126.3359934091568
- },
- "combine": {
- "p50": 71.61600142717361,
- "p90": 85.40800213813782,
- "p95": 90.27200192213058,
- "p99": 109.40799862146378
- },
- "roundtrip": {
- "p50": 129.02399897575378,
- "p90": 156.2879979610443,
- "p95": 166.143998503685,
- "p99": 196.51199877262115
- },
- "isolatedSum": {
- "p50": 150.01600235700607,
- "p90": 184.4800040125847,
- "p95": 196.25600427389145,
- "p99": 235.74399203062057
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9848832,
- "combineLogicalBytes": 9848832,
- "fanoutMean": 5.3671875,
- "recvTokensMax": 91,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 85.40800213813782,
- "p90": 105.12000322341919,
- "p95": 114.04799669981003,
- "p99": 131.71200454235077
- },
- "combine": {
- "p50": 77.91999727487564,
- "p90": 90.59199690818787,
- "p95": 96.63999825716019,
- "p99": 105.18400371074677
- },
- "roundtrip": {
- "p50": 137.2160017490387,
- "p90": 163.07200491428375,
- "p95": 172.35200107097626,
- "p99": 208.064004778862
- },
- "isolatedSum": {
- "p50": 163.32799941301346,
- "p90": 195.71200013160706,
- "p95": 210.68799495697021,
- "p99": 236.89600825309753
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19496960,
- "combineLogicalBytes": 19496960,
- "fanoutMean": 5.3125,
- "recvTokensMax": 178,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 99.55199807882309,
- "p90": 126.65599584579468,
- "p95": 135.00800728797913,
- "p99": 167.10400581359863
- },
- "combine": {
- "p50": 89.24800157546997,
- "p90": 106.1440035700798,
- "p95": 111.23199760913849,
- "p99": 126.65599584579468
- },
- "roundtrip": {
- "p50": 162.9759967327118,
- "p90": 185.88800728321075,
- "p95": 193.6960071325302,
- "p99": 255.87201118469238
- },
- "isolatedSum": {
- "p50": 188.79999965429306,
- "p90": 232.79999941587448,
- "p95": 246.24000489711761,
- "p99": 293.7600016593933
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 38836224,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 372,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 118.367999792099,
- "p90": 135.42400300502777,
- "p95": 143.5520052909851,
- "p99": 181.88799917697906
- },
- "combine": {
- "p50": 105.34399747848511,
- "p90": 119.99999731779099,
- "p95": 126.78399682044983,
- "p99": 139.0399932861328
- },
- "roundtrip": {
- "p50": 197.53600656986237,
- "p90": 215.83999693393707,
- "p95": 224.48000311851501,
- "p99": 253.1839907169342
- },
- "isolatedSum": {
- "p50": 223.7119972705841,
- "p90": 255.42400032281876,
- "p95": 270.33600211143494,
- "p99": 320.9279924631119
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77514752,
- "combineLogicalBytes": 77514752,
- "fanoutMean": 5.2802734375,
- "recvTokensMax": 707,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-14a4cdc0",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s1|8|decode|normal|none|none|1|tuned||f8662de0b3559f9",
- "colorKey": "h200_b02e4015",
- "comparisonKey": "7784b2ab75c0721c",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:13:31.348412+00:00",
- "status": "valid",
- "publicationStatus": "comparable-experimental",
- "runner": "h200-dgxc-slurm_7",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · alternating-groups@s1",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "alternating-groups",
- "routingLabel": "alternating-groups@s1",
- "routingStep": 1,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "f8662de0b3559f9",
- "workloadId": null,
- "workloadSource": "seeded-runtime",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272358996",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272358996",
- "createdAt": "2026-06-27T00:13:31.348412+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 70.75200229883194,
- "p90": 98.39999675750732,
- "p95": 108.57599973678589,
- "p99": 136.03200018405914
- },
- "combine": {
- "p50": 67.52000004053116,
- "p90": 79.83999699354172,
- "p95": 84.09599959850311,
- "p99": 104.09600287675858
- },
- "roundtrip": {
- "p50": 122.8799968957901,
- "p90": 146.62399888038635,
- "p95": 155.32800555229187,
- "p99": 178.3359944820404
- },
- "isolatedSum": {
- "p50": 138.2720023393631,
- "p90": 178.23999375104904,
- "p95": 192.671999335289,
- "p99": 240.12800306081772
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3297280,
- "combineLogicalBytes": 3297280,
- "fanoutMean": 3.59375,
- "recvTokensMax": 61,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 84.44800227880478,
- "p90": 110.30399799346924,
- "p95": 138.20800185203552,
- "p99": 196.22400403022766
- },
- "combine": {
- "p50": 75.16799867153168,
- "p90": 85.34400165081024,
- "p95": 91.00800007581711,
- "p99": 101.02400183677673
- },
- "roundtrip": {
- "p50": 135.3919953107834,
- "p90": 156.3200056552887,
- "p95": 166.4000004529953,
- "p99": 198.36799800395966
- },
- "isolatedSum": {
- "p50": 159.61600095033646,
- "p90": 195.64799964427948,
- "p95": 229.21600192785263,
- "p99": 297.2480058670044
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 13275136,
- "combineLogicalBytes": 13275136,
- "fanoutMean": 3.6171875,
- "recvTokensMax": 236,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 128.48000228405,
- "p90": 142.91200041770935,
- "p95": 151.36000514030457,
- "p99": 290.0159955024719
- },
- "combine": {
- "p50": 111.455999314785,
- "p90": 123.6800029873848,
- "p95": 127.93600559234619,
- "p99": 143.71199905872345
- },
- "roundtrip": {
- "p50": 210.81599593162537,
- "p90": 223.26399385929108,
- "p95": 229.34399545192719,
- "p99": 257.79199600219727
- },
- "isolatedSum": {
- "p50": 239.936001598835,
- "p90": 266.59200340509415,
- "p95": 279.29601073265076,
- "p99": 433.7279945611954
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 53172224,
- "combineLogicalBytes": 53172224,
- "fanoutMean": 3.6220703125,
- "recvTokensMax": 934,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-4bdc0b92",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s2|8|decode|normal|none|none|2|tuned||3cd13eac5b27759",
- "colorKey": "h200_ad2e3b5c",
- "comparisonKey": "7784b2ab75c0721c",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:13:31.907403+00:00",
- "status": "valid",
- "publicationStatus": "comparable-experimental",
- "runner": "h200-dgxc-slurm_4",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · alternating-groups@s2",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "alternating-groups",
- "routingLabel": "alternating-groups@s2",
- "routingStep": 2,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "3cd13eac5b27759",
- "workloadId": null,
- "workloadSource": "seeded-runtime",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272362308",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272362308",
- "createdAt": "2026-06-27T00:13:31.907403+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 71.32799923419952,
- "p90": 93.82399916648865,
- "p95": 109.72800105810165,
- "p99": 145.1520025730133
- },
- "combine": {
- "p50": 66.880002617836,
- "p90": 72.25599884986877,
- "p95": 80.32000064849854,
- "p99": 91.39200299978256
- },
- "roundtrip": {
- "p50": 123.48800152540207,
- "p90": 140.51200449466705,
- "p95": 156.8319946527481,
- "p99": 195.64799964427948
- },
- "isolatedSum": {
- "p50": 138.20800185203552,
- "p90": 166.07999801635742,
- "p95": 190.0480017066002,
- "p99": 236.54400557279587
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3297280,
- "combineLogicalBytes": 3297280,
- "fanoutMean": 3.59375,
- "recvTokensMax": 61,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 81.4720019698143,
- "p90": 136.48000359535217,
- "p95": 151.13599598407745,
- "p99": 198.04799556732178
- },
- "combine": {
- "p50": 75.80800354480743,
- "p90": 89.47200328111649,
- "p95": 102.91200131177902,
- "p99": 122.36800044775009
- },
- "roundtrip": {
- "p50": 134.5279961824417,
- "p90": 149.31200444698334,
- "p95": 162.9440039396286,
- "p99": 204.73599433898926
- },
- "isolatedSum": {
- "p50": 157.28000551462173,
- "p90": 225.95200687646866,
- "p95": 254.04799729585648,
- "p99": 320.41599601507187
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 13275136,
- "combineLogicalBytes": 13275136,
- "fanoutMean": 3.6171875,
- "recvTokensMax": 236,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 124.60800260305405,
- "p90": 131.80799782276154,
- "p95": 137.2160017490387,
- "p99": 164.35199975967407
- },
- "combine": {
- "p50": 111.00800335407257,
- "p90": 119.39200013875961,
- "p95": 125.5359947681427,
- "p99": 155.03999590873718
- },
- "roundtrip": {
- "p50": 208.41600000858307,
- "p90": 218.6560034751892,
- "p95": 229.72799837589264,
- "p99": 263.3279860019684
- },
- "isolatedSum": {
- "p50": 235.61600595712662,
- "p90": 251.19999796152115,
- "p95": 262.7519965171814,
- "p99": 319.39199566841125
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 53172224,
- "combineLogicalBytes": 53172224,
- "fanoutMean": 3.6220703125,
- "recvTokensMax": 934,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-fcadbf18",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s3|8|decode|normal|none|none|3|tuned||f8662de0b3559f9",
- "colorKey": "h200_ae2e3cef",
- "comparisonKey": "7784b2ab75c0721c",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:13:36.495887+00:00",
- "status": "valid",
- "publicationStatus": "comparable-experimental",
- "runner": "h200-dgxc-slurm_1",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · alternating-groups@s3",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "alternating-groups",
- "routingLabel": "alternating-groups@s3",
- "routingStep": 3,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "f8662de0b3559f9",
- "workloadId": null,
- "workloadSource": "seeded-runtime",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272365812",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272365812",
- "createdAt": "2026-06-27T00:13:36.495887+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 71.48800045251846,
- "p90": 94.46399658918381,
- "p95": 99.29600358009338,
- "p99": 110.23999750614166
- },
- "combine": {
- "p50": 67.10399687290192,
- "p90": 80.09599894285202,
- "p95": 84.54400300979614,
- "p99": 108.31999778747559
- },
- "roundtrip": {
- "p50": 119.61600184440613,
- "p90": 148.83199334144592,
- "p95": 158.01599621772766,
- "p99": 279.9359858036041
- },
- "isolatedSum": {
- "p50": 138.59199732542038,
- "p90": 174.55999553203583,
- "p95": 183.84000658988953,
- "p99": 218.55999529361725
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3297280,
- "combineLogicalBytes": 3297280,
- "fanoutMean": 3.59375,
- "recvTokensMax": 61,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 80.83199709653854,
- "p90": 99.45599734783173,
- "p95": 105.05600273609161,
- "p99": 118.07999759912491
- },
- "combine": {
- "p50": 75.23199915885925,
- "p90": 87.52000331878662,
- "p95": 92.0960009098053,
- "p99": 108.51199924945831
- },
- "roundtrip": {
- "p50": 133.91999900341034,
- "p90": 154.78399395942688,
- "p95": 162.04799711704254,
- "p99": 176.1920005083084
- },
- "isolatedSum": {
- "p50": 156.0639962553978,
- "p90": 186.97600066661835,
- "p95": 197.1520036458969,
- "p99": 226.59199684858322
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 13275136,
- "combineLogicalBytes": 13275136,
- "fanoutMean": 3.6171875,
- "recvTokensMax": 236,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 125.15200674533844,
- "p90": 135.96799969673157,
- "p95": 141.79199934005737,
- "p99": 205.34400641918182
- },
- "combine": {
- "p50": 109.72800105810165,
- "p90": 120.15999853610992,
- "p95": 123.36000055074692,
- "p99": 136.7039978504181
- },
- "roundtrip": {
- "p50": 207.96799659729004,
- "p90": 225.50399601459503,
- "p95": 231.77599906921387,
- "p99": 246.20799720287323
- },
- "isolatedSum": {
- "p50": 234.8800078034401,
- "p90": 256.1279982328415,
- "p95": 265.1519998908043,
- "p99": 342.0480042695999
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 53172224,
- "combineLogicalBytes": 53172224,
- "fanoutMean": 3.6220703125,
- "recvTokensMax": 934,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-f361a9a4",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||2279937619f3971",
- "colorKey": "h200_b5c683eb",
- "comparisonKey": "d82096ba4baa0cd5",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:56:27.284944+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_8",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · balanced",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "balanced",
- "routingLabel": "balanced",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "2279937619f3971",
- "workloadId": "set:4:7af12818400d6348",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271830346",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271830346",
- "createdAt": "2026-06-26T23:56:27.284944+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 76.73600316047668,
- "p90": 126.24000012874603,
- "p95": 134.46399569511414,
- "p99": 156.63999319076538
- },
- "combine": {
- "p50": 72.41600006818771,
- "p90": 83.71199667453766,
- "p95": 87.07199990749359,
- "p99": 99.13600236177444
- },
- "roundtrip": {
- "p50": 128.38399410247803,
- "p90": 148.03199470043182,
- "p95": 154.62400019168854,
- "p99": 179.6479970216751
- },
- "isolatedSum": {
- "p50": 149.1520032286644,
- "p90": 209.9519968032837,
- "p95": 221.53599560260773,
- "p99": 255.77599555253983
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 917504,
- "combineLogicalBytes": 917504,
- "fanoutMean": 8,
- "recvTokensMax": 8,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 75.83999633789062,
- "p90": 87.99999952316284,
- "p95": 98.11200201511383,
- "p99": 113.02399635314941
- },
- "combine": {
- "p50": 71.84000313282013,
- "p90": 79.6160027384758,
- "p95": 85.56800335645676,
- "p99": 95.87199985980988
- },
- "roundtrip": {
- "p50": 126.81600451469421,
- "p90": 139.67999815940857,
- "p95": 149.63200688362122,
- "p99": 170.20800709724426
- },
- "isolatedSum": {
- "p50": 147.67999947071075,
- "p90": 167.61600226163864,
- "p95": 183.6800053715706,
- "p99": 208.8959962129593
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 7340032,
- "combineLogicalBytes": 7340032,
- "fanoutMean": 8,
- "recvTokensMax": 64,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 92.73599833250046,
- "p90": 105.50399869680405,
- "p95": 112.41599917411804,
- "p99": 132.60799646377563
- },
- "combine": {
- "p50": 81.98399841785431,
- "p90": 93.56799721717834,
- "p95": 99.58399832248688,
- "p99": 112.57600039243698
- },
- "roundtrip": {
- "p50": 148.70400726795197,
- "p90": 168.7999963760376,
- "p95": 180.7679980993271,
- "p99": 196.6720074415207
- },
- "isolatedSum": {
- "p50": 174.71999675035477,
- "p90": 199.0719959139824,
- "p95": 211.99999749660492,
- "p99": 245.18399685621262
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 29360128,
- "combineLogicalBytes": 29360128,
- "fanoutMean": 8,
- "recvTokensMax": 256,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 139.00800049304962,
- "p90": 175.99999904632568,
- "p95": 181.34400248527527,
- "p99": 197.91999459266663
- },
- "combine": {
- "p50": 127.20000743865967,
- "p90": 150.68799257278442,
- "p95": 153.6639928817749,
- "p99": 160.5439931154251
- },
- "roundtrip": {
- "p50": 232.92799293994904,
- "p90": 266.04801416397095,
- "p95": 271.5199887752533,
- "p99": 294.20799016952515
- },
- "isolatedSum": {
- "p50": 266.2080079317093,
- "p90": 326.6879916191101,
- "p95": 335.00799536705017,
- "p99": 358.46398770809174
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 117440512,
- "combineLogicalBytes": 117440512,
- "fanoutMean": 8,
- "recvTokensMax": 1024,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-d65f5a76",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500",
- "colorKey": "h200_b5c683eb",
- "comparisonKey": "d82096ba4baa0cd5",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:02:47.642624+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_6",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · balanced",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "balanced",
- "routingLabel": "balanced",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ffa946582edb500",
- "workloadId": "set:8:7af12818400d6348",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272028751",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272028751",
- "createdAt": "2026-06-27T00:02:47.642624+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 75.99999755620956,
- "p90": 106.175996363163,
- "p95": 117.60000139474869,
- "p99": 352.512001991272
- },
- "combine": {
- "p50": 70.68800181150436,
- "p90": 85.9839990735054,
- "p95": 90.52799642086029,
- "p99": 104.12800312042236
- },
- "roundtrip": {
- "p50": 124.60800260305405,
- "p90": 158.62399339675903,
- "p95": 166.46400094032288,
- "p99": 186.27199530601501
- },
- "isolatedSum": {
- "p50": 146.68799936771393,
- "p90": 192.1599954366684,
- "p95": 208.12799781560898,
- "p99": 456.64000511169434
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 917504,
- "combineLogicalBytes": 917504,
- "fanoutMean": 8,
- "recvTokensMax": 8,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 74.65600222349167,
- "p90": 98.91200065612793,
- "p95": 104.09600287675858,
- "p99": 114.84800279140472
- },
- "combine": {
- "p50": 70.65600156784058,
- "p90": 87.20000088214874,
- "p95": 91.32800251245499,
- "p99": 106.46399855613708
- },
- "roundtrip": {
- "p50": 125.59999525547028,
- "p90": 161.02400422096252,
- "p95": 170.78399658203125,
- "p99": 197.05599546432495
- },
- "isolatedSum": {
- "p50": 145.31200379133224,
- "p90": 186.11200153827667,
- "p95": 195.42400538921356,
- "p99": 221.3120013475418
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1835008,
- "combineLogicalBytes": 1835008,
- "fanoutMean": 8,
- "recvTokensMax": 16,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 74.20799881219864,
- "p90": 102.24000364542007,
- "p95": 111.35999858379364,
- "p99": 129.63199615478516
- },
- "combine": {
- "p50": 71.87200337648392,
- "p90": 88.22400122880936,
- "p95": 94.52799707651138,
- "p99": 115.26399850845337
- },
- "roundtrip": {
- "p50": 125.08800625801086,
- "p90": 153.53600680828094,
- "p95": 163.87200355529785,
- "p99": 176.86399817466736
- },
- "isolatedSum": {
- "p50": 146.08000218868256,
- "p90": 190.46400487422943,
- "p95": 205.88799566030502,
- "p99": 244.89599466323853
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3670016,
- "combineLogicalBytes": 3670016,
- "fanoutMean": 8,
- "recvTokensMax": 32,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 74.30399954319,
- "p90": 98.49599748849869,
- "p95": 106.59199953079224,
- "p99": 120.19199877977371
- },
- "combine": {
- "p50": 71.16799801588058,
- "p90": 86.36800199747086,
- "p95": 90.52799642086029,
- "p99": 109.40799862146378
- },
- "roundtrip": {
- "p50": 124.64000284671783,
- "p90": 156.73600137233734,
- "p95": 164.48000073432922,
- "p99": 189.15200233459473
- },
- "isolatedSum": {
- "p50": 145.4719975590706,
- "p90": 184.86399948596954,
- "p95": 197.11999595165253,
- "p99": 229.5999974012375
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 7340032,
- "combineLogicalBytes": 7340032,
- "fanoutMean": 8,
- "recvTokensMax": 64,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 75.26399940252304,
- "p90": 102.59199887514114,
- "p95": 109.76000130176544,
- "p99": 125.59999525547028
- },
- "combine": {
- "p50": 76.92799717187881,
- "p90": 91.23200178146362,
- "p95": 94.94400024414062,
- "p99": 105.82400113344193
- },
- "roundtrip": {
- "p50": 128.7039965391159,
- "p90": 160.51200032234192,
- "p95": 171.07200622558594,
- "p99": 223.13599288463593
- },
- "isolatedSum": {
- "p50": 152.19199657440186,
- "p90": 193.82400065660477,
- "p95": 204.70400154590607,
- "p99": 231.4239963889122
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 14680064,
- "combineLogicalBytes": 14680064,
- "fanoutMean": 8,
- "recvTokensMax": 128,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 90.59199690818787,
- "p90": 110.52799969911575,
- "p95": 119.00799721479416,
- "p99": 143.39199662208557
- },
- "combine": {
- "p50": 81.53600245714188,
- "p90": 98.11200201511383,
- "p95": 105.79200088977814,
- "p99": 123.4240010380745
- },
- "roundtrip": {
- "p50": 145.4080045223236,
- "p90": 173.0239987373352,
- "p95": 180.4479956626892,
- "p99": 203.45599949359894
- },
- "isolatedSum": {
- "p50": 172.12799936532974,
- "p90": 208.64000171422958,
- "p95": 224.7999981045723,
- "p99": 266.81599766016006
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 29360128,
- "combineLogicalBytes": 29360128,
- "fanoutMean": 8,
- "recvTokensMax": 256,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 110.84800213575363,
- "p90": 130.17599284648895,
- "p95": 137.56799697875977,
- "p99": 212.12799847126007
- },
- "combine": {
- "p50": 95.13600170612335,
- "p90": 114.20799791812897,
- "p95": 124.57600235939026,
- "p99": 243.42399835586548
- },
- "roundtrip": {
- "p50": 178.14399302005768,
- "p90": 205.24799823760986,
- "p95": 233.40800404548645,
- "p99": 432.2560131549835
- },
- "isolatedSum": {
- "p50": 205.98400384187698,
- "p90": 244.38399076461792,
- "p95": 262.14399933815,
- "p99": 455.55199682712555
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 58720256,
- "combineLogicalBytes": 58720256,
- "fanoutMean": 8,
- "recvTokensMax": 512,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 133.44000279903412,
- "p90": 152.48000621795654,
- "p95": 157.95199573040009,
- "p99": 172.2240000963211
- },
- "combine": {
- "p50": 125.72799623012543,
- "p90": 140.60799777507782,
- "p95": 145.31199634075165,
- "p99": 176.7359972000122
- },
- "roundtrip": {
- "p50": 237.2480034828186,
- "p90": 255.51998615264893,
- "p95": 262.65600323677063,
- "p99": 295.9040105342865
- },
- "isolatedSum": {
- "p50": 259.16799902915955,
- "p90": 293.08800399303436,
- "p95": 303.26399207115173,
- "p99": 348.9599972963333
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 117440512,
- "combineLogicalBytes": 117440512,
- "fanoutMean": 8,
- "recvTokensMax": 1024,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-26bc6c27",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8",
- "colorKey": "h200_d0dfa19a",
- "comparisonKey": "5d5c9be2dc9b5f1f",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:56:33.428125+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_10",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · balanced-rank-local",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "balanced-rank-local",
- "routingLabel": "balanced-rank-local",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "d02a66236b524b8",
- "workloadId": "set:4:2eebbed158fe1320",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271837870",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271837870",
- "createdAt": "2026-06-26T23:56:33.428125+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 65.95200300216675,
- "p90": 102.7199998497963,
- "p95": 115.55200070142746,
- "p99": 166.6560024023056
- },
- "combine": {
- "p50": 58.6559996008873,
- "p90": 72.4480003118515,
- "p95": 78.59200239181519,
- "p99": 95.64799815416336
- },
- "roundtrip": {
- "p50": 112.44799941778183,
- "p90": 152.70400047302246,
- "p95": 159.2320054769516,
- "p99": 181.2479943037033
- },
- "isolatedSum": {
- "p50": 124.60800260305405,
- "p90": 175.1680001616478,
- "p95": 194.14400309324265,
- "p99": 262.30400055646896
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 114688,
- "combineLogicalBytes": 114688,
- "fanoutMean": 1,
- "recvTokensMax": 4,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 67.9360032081604,
- "p90": 85.66399663686752,
- "p95": 95.0080007314682,
- "p99": 111.00800335407257
- },
- "combine": {
- "p50": 59.93599817156792,
- "p90": 70.88000327348709,
- "p95": 77.18399912118912,
- "p99": 92.03200042247772
- },
- "roundtrip": {
- "p50": 112.2559979557991,
- "p90": 138.11199367046356,
- "p95": 150.2400040626526,
- "p99": 209.6319943666458
- },
- "isolatedSum": {
- "p50": 127.87200137972832,
- "p90": 156.54399991035461,
- "p95": 172.19199985265732,
- "p99": 203.0400037765503
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 917504,
- "combineLogicalBytes": 917504,
- "fanoutMean": 1,
- "recvTokensMax": 8,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 82.97599852085114,
- "p90": 103.71199995279312,
- "p95": 112.15999722480774,
- "p99": 135.23200154304504
- },
- "combine": {
- "p50": 69.76000219583511,
- "p90": 85.05599945783615,
- "p95": 93.88799965381622,
- "p99": 128.60800325870514
- },
- "roundtrip": {
- "p50": 125.56800246238708,
- "p90": 148.70400726795197,
- "p95": 165.92000424861908,
- "p99": 200.3519982099533
- },
- "isolatedSum": {
- "p50": 152.73600071668625,
- "p90": 188.76799941062927,
- "p95": 206.04799687862396,
- "p99": 263.8400048017502
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3670016,
- "combineLogicalBytes": 3670016,
- "fanoutMean": 1,
- "recvTokensMax": 32,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 83.39200168848038,
- "p90": 109.27999764680862,
- "p95": 120.28799951076508,
- "p99": 153.18399667739868
- },
- "combine": {
- "p50": 69.50400024652481,
- "p90": 82.87999778985977,
- "p95": 90.27200192213058,
- "p99": 100.89600086212158
- },
- "roundtrip": {
- "p50": 128.67200374603271,
- "p90": 153.53600680828094,
- "p95": 162.62400150299072,
- "p99": 190.65600633621216
- },
- "isolatedSum": {
- "p50": 152.8960019350052,
- "p90": 192.1599954366684,
- "p95": 210.56000143289566,
- "p99": 254.07999753952026
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 14680064,
- "combineLogicalBytes": 14680064,
- "fanoutMean": 1,
- "recvTokensMax": 128,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-b2e52442",
- "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b",
- "colorKey": "h200_06544e53",
- "comparisonKey": "57040e121807e028",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:02:47.649756+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_10",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · balanced+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "balanced",
- "routingLabel": "balanced+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "f0e66a15078595b",
- "workloadId": "set:8:7af12818400d6348",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 1,
- "eplbImbalanceAfter": 1,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272031884",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272031884",
- "createdAt": "2026-06-27T00:02:47.649756+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 68.15999746322632,
- "p90": 100.60799866914749,
- "p95": 110.72000116109848,
- "p99": 138.75199854373932
- },
- "combine": {
- "p50": 60.70400029420853,
- "p90": 72.86400347948074,
- "p95": 79.3600007891655,
- "p99": 86.11200004816055
- },
- "roundtrip": {
- "p50": 116.92799627780914,
- "p90": 150.2079963684082,
- "p95": 158.6879938840866,
- "p99": 184.83200669288635
- },
- "isolatedSum": {
- "p50": 128.86399775743484,
- "p90": 173.47200214862823,
- "p95": 190.08000195026398,
- "p99": 224.86399859189987
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 229376,
- "combineLogicalBytes": 229376,
- "fanoutMean": 2,
- "recvTokensMax": 3,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 71.29599899053574,
- "p90": 104.54399883747101,
- "p95": 113.8560026884079,
- "p99": 152.99199521541595
- },
- "combine": {
- "p50": 61.983998864889145,
- "p90": 78.97599786520004,
- "p95": 83.5840031504631,
- "p99": 98.49599748849869
- },
- "roundtrip": {
- "p50": 116.5120005607605,
- "p90": 149.88799393177032,
- "p95": 163.71199488639832,
- "p99": 195.45599818229675
- },
- "isolatedSum": {
- "p50": 133.27999785542488,
- "p90": 183.51999670267105,
- "p95": 197.440005838871,
- "p99": 251.48799270391464
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 458752,
- "combineLogicalBytes": 458752,
- "fanoutMean": 2,
- "recvTokensMax": 6,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 72.4480003118515,
- "p90": 95.04000097513199,
- "p95": 104.63999956846237,
- "p99": 125.40799379348755
- },
- "combine": {
- "p50": 61.664000153541565,
- "p90": 73.02399724721909,
- "p95": 81.82399719953537,
- "p99": 99.58399832248688
- },
- "roundtrip": {
- "p50": 117.08799749612808,
- "p90": 144.41600441932678,
- "p95": 157.72800147533417,
- "p99": 314.88001346588135
- },
- "isolatedSum": {
- "p50": 134.11200046539307,
- "p90": 168.06399822235107,
- "p95": 186.46399676799774,
- "p99": 224.99199211597443
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 917504,
- "combineLogicalBytes": 917504,
- "fanoutMean": 2,
- "recvTokensMax": 12,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 73.95199686288834,
- "p90": 99.2640033364296,
- "p95": 105.8880016207695,
- "p99": 122.27199971675873
- },
- "combine": {
- "p50": 62.6240000128746,
- "p90": 84.25600081682205,
- "p95": 90.11200070381165,
- "p99": 102.78400033712387
- },
- "roundtrip": {
- "p50": 116.15999788045883,
- "p90": 150.36800503730774,
- "p95": 161.69600188732147,
- "p99": 189.08800184726715
- },
- "isolatedSum": {
- "p50": 136.57599687576294,
- "p90": 183.52000415325165,
- "p95": 196.00000232458115,
- "p99": 225.0560000538826
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1835008,
- "combineLogicalBytes": 1835008,
- "fanoutMean": 2,
- "recvTokensMax": 24,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 75.87199658155441,
- "p90": 96.70399874448776,
- "p95": 106.01600259542465,
- "p99": 122.94399738311768
- },
- "combine": {
- "p50": 62.94400244951248,
- "p90": 78.65600287914276,
- "p95": 84.73599702119827,
- "p99": 96.6079980134964
- },
- "roundtrip": {
- "p50": 117.15199798345566,
- "p90": 145.11999487876892,
- "p95": 153.47200632095337,
- "p99": 190.75199961662292
- },
- "isolatedSum": {
- "p50": 138.8159990310669,
- "p90": 175.36000162363052,
- "p95": 190.75199961662292,
- "p99": 219.55199539661407
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3670016,
- "combineLogicalBytes": 3670016,
- "fanoutMean": 2,
- "recvTokensMax": 48,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 77.82399654388428,
- "p90": 99.32799637317657,
- "p95": 108.22399705648422,
- "p99": 131.52000308036804
- },
- "combine": {
- "p50": 66.3359984755516,
- "p90": 80.35200089216232,
- "p95": 87.74399757385254,
- "p99": 170.23999989032745
- },
- "roundtrip": {
- "p50": 119.90399658679962,
- "p90": 146.7519998550415,
- "p95": 154.4959992170334,
- "p99": 167.4879938364029
- },
- "isolatedSum": {
- "p50": 144.15999501943588,
- "p90": 179.6799972653389,
- "p95": 195.96799463033676,
- "p99": 301.7600029706955
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 7340032,
- "combineLogicalBytes": 7340032,
- "fanoutMean": 2,
- "recvTokensMax": 96,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 83.39200168848038,
- "p90": 107.93600231409073,
- "p95": 117.47200042009354,
- "p99": 157.82399475574493
- },
- "combine": {
- "p50": 70.17599791288376,
- "p90": 82.36800134181976,
- "p95": 89.59999680519104,
- "p99": 102.7199998497963
- },
- "roundtrip": {
- "p50": 127.51999497413635,
- "p90": 154.7199934720993,
- "p95": 170.04799842834473,
- "p99": 201.27999782562256
- },
- "isolatedSum": {
- "p50": 153.56799960136414,
- "p90": 190.3040036559105,
- "p95": 207.07199722528458,
- "p99": 260.54399460554123
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 14680064,
- "combineLogicalBytes": 14680064,
- "fanoutMean": 2,
- "recvTokensMax": 192,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 96.19200229644775,
- "p90": 114.04799669981003,
- "p95": 123.83999675512314,
- "p99": 167.4560010433197
- },
- "combine": {
- "p50": 84.48000252246857,
- "p90": 95.87199985980988,
- "p95": 99.93600100278854,
- "p99": 113.92000317573547
- },
- "roundtrip": {
- "p50": 156.3200056552887,
- "p90": 175.64800381660461,
- "p95": 185.56800484657288,
- "p99": 221.15199267864227
- },
- "isolatedSum": {
- "p50": 180.67200481891632,
- "p90": 209.9199965596199,
- "p95": 223.77599775791168,
- "p99": 281.3760042190552
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 29360128,
- "combineLogicalBytes": 29360128,
- "fanoutMean": 2,
- "recvTokensMax": 384,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-9febd1e2",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s1|8|decode|normal|none|none|1|tuned||6288a1aa76c20e7",
- "colorKey": "h200_9779cb2d",
- "comparisonKey": "65013819dd1ccf9e",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:12:58.540972+00:00",
- "status": "valid",
- "publicationStatus": "comparable-experimental",
- "runner": "h200-dgxc-slurm_5",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · hotspot-moving@s1",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "hotspot-moving",
- "routingLabel": "hotspot-moving@s1",
- "routingStep": 1,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "6288a1aa76c20e7",
- "workloadId": null,
- "workloadSource": "seeded-runtime",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272345418",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272345418",
- "createdAt": "2026-06-27T00:12:58.540972+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 74.97599720954895,
- "p90": 90.91199934482574,
- "p95": 99.32799637317657,
- "p99": 128.83199751377106
- },
- "combine": {
- "p50": 70.27199864387512,
- "p90": 80.1599994301796,
- "p95": 89.21600133180618,
- "p99": 107.07200318574905
- },
- "roundtrip": {
- "p50": 125.47199428081512,
- "p90": 145.6959992647171,
- "p95": 153.31199765205383,
- "p99": 184.54399704933167
- },
- "isolatedSum": {
- "p50": 145.24799585342407,
- "p90": 171.07199877500534,
- "p95": 188.54399770498276,
- "p99": 235.9040006995201
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4859904,
- "combineLogicalBytes": 4859904,
- "fanoutMean": 5.296875,
- "recvTokensMax": 64,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 83.26400071382523,
- "p90": 95.93600034713745,
- "p95": 101.05600208044052,
- "p99": 118.65600198507309
- },
- "combine": {
- "p50": 78.8159966468811,
- "p90": 86.75199747085571,
- "p95": 92.03200042247772,
- "p99": 111.84000223875046
- },
- "roundtrip": {
- "p50": 139.13600146770477,
- "p90": 150.68799257278442,
- "p95": 155.20000457763672,
- "p99": 181.05599284172058
- },
- "isolatedSum": {
- "p50": 162.07999736070633,
- "p90": 182.68799781799316,
- "p95": 193.08800250291824,
- "p99": 230.49600422382355
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19525632,
- "combineLogicalBytes": 19525632,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 256,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 124.95999783277512,
- "p90": 135.51999628543854,
- "p95": 140.54399728775024,
- "p99": 153.3759981393814
- },
- "combine": {
- "p50": 118.30399930477142,
- "p90": 126.0479986667633,
- "p95": 131.00799918174744,
- "p99": 152.5759994983673
- },
- "roundtrip": {
- "p50": 222.27199375629425,
- "p90": 233.5679978132248,
- "p95": 239.3600046634674,
- "p99": 254.55999374389648
- },
- "isolatedSum": {
- "p50": 243.26399713754654,
- "p90": 261.56799495220184,
- "p95": 271.5519964694977,
- "p99": 305.9519976377487
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 78102528,
- "combineLogicalBytes": 78102528,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 1024,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-f5a9f57f",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s2|8|decode|normal|none|none|2|tuned||675e15b52e37958",
- "colorKey": "h200_9479c674",
- "comparisonKey": "65013819dd1ccf9e",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:13:12.398873+00:00",
- "status": "valid",
- "publicationStatus": "comparable-experimental",
- "runner": "h200-dgxc-slurm_13",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · hotspot-moving@s2",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "hotspot-moving",
- "routingLabel": "hotspot-moving@s2",
- "routingStep": 2,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "675e15b52e37958",
- "workloadId": null,
- "workloadSource": "seeded-runtime",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272348704",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272348704",
- "createdAt": "2026-06-27T00:13:12.398873+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 75.87199658155441,
- "p90": 93.59999746084213,
- "p95": 100.19200295209885,
- "p99": 114.56000059843063
- },
- "combine": {
- "p50": 71.35999947786331,
- "p90": 79.64800298213959,
- "p95": 85.63199639320374,
- "p99": 97.79199957847595
- },
- "roundtrip": {
- "p50": 129.2160004377365,
- "p90": 148.5760062932968,
- "p95": 158.84800255298615,
- "p99": 188.22400271892548
- },
- "isolatedSum": {
- "p50": 147.23199605941772,
- "p90": 173.24800044298172,
- "p95": 185.82399934530258,
- "p99": 212.35200017690659
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4859904,
- "combineLogicalBytes": 4859904,
- "fanoutMean": 5.296875,
- "recvTokensMax": 64,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 88.0960002541542,
- "p90": 110.78400164842606,
- "p95": 121.72800302505493,
- "p99": 175.61599612236023
- },
- "combine": {
- "p50": 80.70400357246399,
- "p90": 92.3520028591156,
- "p95": 98.88000041246414,
- "p99": 121.34400010108948
- },
- "roundtrip": {
- "p50": 141.37600362300873,
- "p90": 164.19200599193573,
- "p95": 172.95999825000763,
- "p99": 193.7599927186966
- },
- "isolatedSum": {
- "p50": 168.8000038266182,
- "p90": 203.13600450754166,
- "p95": 220.60800343751907,
- "p99": 296.9599962234497
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19525632,
- "combineLogicalBytes": 19525632,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 256,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 128.92800569534302,
- "p90": 140.3840035200119,
- "p95": 146.65600657463074,
- "p99": 171.10399901866913
- },
- "combine": {
- "p50": 120.28799951076508,
- "p90": 132.38400220870972,
- "p95": 136.76799833774567,
- "p99": 159.36000645160675
- },
- "roundtrip": {
- "p50": 224.2880016565323,
- "p90": 240.1919960975647,
- "p95": 248.1279969215393,
- "p99": 276.8320143222809
- },
- "isolatedSum": {
- "p50": 249.2160052061081,
- "p90": 272.7680057287216,
- "p95": 283.4240049123764,
- "p99": 330.4640054702759
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 78102528,
- "combineLogicalBytes": 78102528,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 1024,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-13ab64c2",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s3|8|decode|normal|none|none|3|tuned||82b2963fc322419",
- "colorKey": "h200_9579c807",
- "comparisonKey": "65013819dd1ccf9e",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:13:19.903361+00:00",
- "status": "valid",
- "publicationStatus": "comparable-experimental",
- "runner": "h200-dgxc-slurm_6",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · hotspot-moving@s3",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "hotspot-moving",
- "routingLabel": "hotspot-moving@s3",
- "routingStep": 3,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "82b2963fc322419",
- "workloadId": null,
- "workloadSource": "seeded-runtime",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272352256",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272352256",
- "createdAt": "2026-06-27T00:13:19.903361+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 74.40000027418137,
- "p90": 94.7519987821579,
- "p95": 101.9200012087822,
- "p99": 123.36000055074692
- },
- "combine": {
- "p50": 70.20799815654755,
- "p90": 82.17599987983704,
- "p95": 89.37600255012512,
- "p99": 105.56799918413162
- },
- "roundtrip": {
- "p50": 125.34399330615997,
- "p90": 150.04800260066986,
- "p95": 162.6559942960739,
- "p99": 177.88800597190857
- },
- "isolatedSum": {
- "p50": 144.6079984307289,
- "p90": 176.92799866199493,
- "p95": 191.29600375890732,
- "p99": 228.92799973487854
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4859904,
- "combineLogicalBytes": 4859904,
- "fanoutMean": 5.296875,
- "recvTokensMax": 64,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 86.46400272846222,
- "p90": 103.00800204277039,
- "p95": 111.7440015077591,
- "p99": 129.95199859142303
- },
- "combine": {
- "p50": 79.26400005817413,
- "p90": 90.97599983215332,
- "p95": 96.47999703884125,
- "p99": 115.9679964184761
- },
- "roundtrip": {
- "p50": 139.8400068283081,
- "p90": 156.6080003976822,
- "p95": 163.96799683570862,
- "p99": 176.35199427604675
- },
- "isolatedSum": {
- "p50": 165.72800278663635,
- "p90": 193.9840018749237,
- "p95": 208.22399854660034,
- "p99": 245.91999500989914
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19525632,
- "combineLogicalBytes": 19525632,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 256,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 124.83199685811996,
- "p90": 138.59200477600098,
- "p95": 144.44799721240997,
- "p99": 233.88800024986267
- },
- "combine": {
- "p50": 119.07199770212173,
- "p90": 130.8159977197647,
- "p95": 139.71200585365295,
- "p99": 152.5759994983673
- },
- "roundtrip": {
- "p50": 222.24000096321106,
- "p90": 239.84000086784363,
- "p95": 250.65600872039795,
- "p99": 283.4239900112152
- },
- "isolatedSum": {
- "p50": 243.9039945602417,
- "p90": 269.4080024957657,
- "p95": 284.1600030660629,
- "p99": 386.46399974823
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 78102528,
- "combineLogicalBytes": 78102528,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 1024,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-7c6f809c",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||2ad5ef98d328fa1",
- "colorKey": "h200_189562cd",
- "comparisonKey": "6b812f29e2dcdef6",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:57:16.217396+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_1",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · hotspot-single",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "hotspot-single",
- "routingLabel": "hotspot-single",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "2ad5ef98d328fa1",
- "workloadId": "set:4:286be993cd819ed9",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271859196",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271859196",
- "createdAt": "2026-06-26T23:57:16.217396+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 71.71200215816498,
- "p90": 98.30400347709656,
- "p95": 109.69600081443787,
- "p99": 295.48799991607666
- },
- "combine": {
- "p50": 67.6800012588501,
- "p90": 82.07999914884567,
- "p95": 88.16000074148178,
- "p99": 110.04800349473953
- },
- "roundtrip": {
- "p50": 121.95199728012085,
- "p90": 153.24799716472626,
- "p95": 161.53599321842194,
- "p99": 211.16800606250763
- },
- "isolatedSum": {
- "p50": 139.39200341701508,
- "p90": 180.38400262594223,
- "p95": 197.85600155591965,
- "p99": 405.5360034108162
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 602112,
- "combineLogicalBytes": 602112,
- "fanoutMean": 5.25,
- "recvTokensMax": 8,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 74.17599856853485,
- "p90": 98.08000177145004,
- "p95": 108.0000028014183,
- "p99": 146.14400267601013
- },
- "combine": {
- "p50": 69.63200122117996,
- "p90": 83.13599973917007,
- "p95": 89.02399986982346,
- "p99": 103.20000350475311
- },
- "roundtrip": {
- "p50": 125.40799379348755,
- "p90": 153.50399911403656,
- "p95": 165.12000560760498,
- "p99": 192.83199310302734
- },
- "isolatedSum": {
- "p50": 143.8079997897148,
- "p90": 181.21600151062012,
- "p95": 197.02400267124176,
- "p99": 249.34400618076324
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4859904,
- "combineLogicalBytes": 4859904,
- "fanoutMean": 5.296875,
- "recvTokensMax": 64,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 81.02399855852127,
- "p90": 105.76000064611435,
- "p95": 114.46399986743927,
- "p99": 129.72800433635712
- },
- "combine": {
- "p50": 77.2159993648529,
- "p90": 89.34400230646133,
- "p95": 95.8079993724823,
- "p99": 114.97599631547928
- },
- "roundtrip": {
- "p50": 137.472003698349,
- "p90": 158.91200304031372,
- "p95": 166.20799899101257,
- "p99": 185.08799374103546
- },
- "isolatedSum": {
- "p50": 158.23999792337418,
- "p90": 195.10400295257568,
- "p95": 210.27199923992157,
- "p99": 244.7040006518364
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19525632,
- "combineLogicalBytes": 19525632,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 256,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 124.25599992275238,
- "p90": 137.02400028705597,
- "p95": 144.51199769973755,
- "p99": 166.6879951953888
- },
- "combine": {
- "p50": 118.30399930477142,
- "p90": 130.14400005340576,
- "p95": 135.71199774742126,
- "p99": 157.6319932937622
- },
- "roundtrip": {
- "p50": 220.06399929523468,
- "p90": 239.42400515079498,
- "p95": 246.17600440979004,
- "p99": 313.6639893054962
- },
- "isolatedSum": {
- "p50": 242.5599992275238,
- "p90": 267.16800034046173,
- "p95": 280.2239954471588,
- "p99": 324.319988489151
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 78102528,
- "combineLogicalBytes": 78102528,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 1024,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-13c27f2d",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621",
- "colorKey": "h200_189562cd",
- "comparisonKey": "6b812f29e2dcdef6",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:05:10.730241+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_9",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · hotspot-single",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "hotspot-single",
- "routingLabel": "hotspot-single",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "b6caf944f6bb621",
- "workloadId": "set:8:286be993cd819ed9",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272100552",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272100552",
- "createdAt": "2026-06-27T00:05:10.730241+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 74.14399832487106,
- "p90": 98.84800016880035,
- "p95": 106.36799782514572,
- "p99": 130.46400249004364
- },
- "combine": {
- "p50": 68.15999746322632,
- "p90": 80.19199967384338,
- "p95": 86.30400151014328,
- "p99": 99.16800260543823
- },
- "roundtrip": {
- "p50": 122.17599898576736,
- "p90": 154.4319987297058,
- "p95": 165.98400473594666,
- "p99": 216.44799411296844
- },
- "isolatedSum": {
- "p50": 142.30399578809738,
- "p90": 179.03999984264374,
- "p95": 192.671999335289,
- "p99": 229.63200509548187
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 602112,
- "combineLogicalBytes": 602112,
- "fanoutMean": 5.25,
- "recvTokensMax": 8,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 76.19199901819229,
- "p90": 103.5199984908104,
- "p95": 114.3679991364479,
- "p99": 145.9520012140274
- },
- "combine": {
- "p50": 69.2799985408783,
- "p90": 83.96799862384796,
- "p95": 90.11200070381165,
- "p99": 99.7759997844696
- },
- "roundtrip": {
- "p50": 125.02400577068329,
- "p90": 152.3520052433014,
- "p95": 163.58399391174316,
- "p99": 191.16799533367157
- },
- "isolatedSum": {
- "p50": 145.4719975590706,
- "p90": 187.48799711465836,
- "p95": 204.47999984025955,
- "p99": 245.728000998497
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1247232,
- "combineLogicalBytes": 1247232,
- "fanoutMean": 5.4375,
- "recvTokensMax": 16,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 79.16799932718277,
- "p90": 122.56000190973282,
- "p95": 143.8719928264618,
- "p99": 228.03199291229248
- },
- "combine": {
- "p50": 70.04799693822861,
- "p90": 85.1840004324913,
- "p95": 89.9519994854927,
- "p99": 98.4639972448349
- },
- "roundtrip": {
- "p50": 130.0159990787506,
- "p90": 166.17600619792938,
- "p95": 180.80000579357147,
- "p99": 225.63199698925018
- },
- "isolatedSum": {
- "p50": 149.21599626541138,
- "p90": 207.74400234222412,
- "p95": 233.8239923119545,
- "p99": 326.4959901571274
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2451456,
- "combineLogicalBytes": 2451456,
- "fanoutMean": 5.34375,
- "recvTokensMax": 32,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 75.52000135183334,
- "p90": 99.71199929714203,
- "p95": 106.62399977445602,
- "p99": 121.24799937009811
- },
- "combine": {
- "p50": 70.592001080513,
- "p90": 88.19200098514557,
- "p95": 93.31200271844864,
- "p99": 122.49600142240524
- },
- "roundtrip": {
- "p50": 127.29600071907043,
- "p90": 156.44800662994385,
- "p95": 164.2879992723465,
- "p99": 200.76799392700195
- },
- "isolatedSum": {
- "p50": 146.11200243234634,
- "p90": 187.9040002822876,
- "p95": 199.93600249290466,
- "p99": 243.74400079250336
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4859904,
- "combineLogicalBytes": 4859904,
- "fanoutMean": 5.296875,
- "recvTokensMax": 64,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 75.42400062084198,
- "p90": 99.32799637317657,
- "p95": 107.16799646615982,
- "p99": 116.44800007343292
- },
- "combine": {
- "p50": 72.7040022611618,
- "p90": 89.59999680519104,
- "p95": 95.551997423172,
- "p99": 149.1200029850006
- },
- "roundtrip": {
- "p50": 129.5360028743744,
- "p90": 163.42400014400482,
- "p95": 173.18400740623474,
- "p99": 210.36800742149353
- },
- "isolatedSum": {
- "p50": 148.12800288200378,
- "p90": 188.92799317836761,
- "p95": 202.71999388933182,
- "p99": 265.56800305843353
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9748480,
- "combineLogicalBytes": 9748480,
- "fanoutMean": 5.3125,
- "recvTokensMax": 128,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 84.89599823951721,
- "p90": 109.31199789047241,
- "p95": 117.15199798345566,
- "p99": 152.92799472808838
- },
- "combine": {
- "p50": 78.75200361013412,
- "p90": 95.36000341176987,
- "p95": 99.10400211811066,
- "p99": 120.06399780511856
- },
- "roundtrip": {
- "p50": 140.73599874973297,
- "p90": 167.29600727558136,
- "p95": 174.01599884033203,
- "p99": 211.07199788093567
- },
- "isolatedSum": {
- "p50": 163.64800184965134,
- "p90": 204.67200130224228,
- "p95": 216.25600010156631,
- "p99": 272.99199253320694
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19525632,
- "combineLogicalBytes": 19525632,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 256,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 98.91200065612793,
- "p90": 116.19199812412262,
- "p95": 121.31199985742569,
- "p99": 146.84799313545227
- },
- "combine": {
- "p50": 91.36000275611877,
- "p90": 105.50399869680405,
- "p95": 109.92000252008438,
- "p99": 130.65600395202637
- },
- "roundtrip": {
- "p50": 168.7999963760376,
- "p90": 190.8479928970337,
- "p95": 195.23200392723083,
- "p99": 233.69599878787994
- },
- "isolatedSum": {
- "p50": 190.2720034122467,
- "p90": 221.69599682092667,
- "p95": 231.23200237751007,
- "p99": 277.50399708747864
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38621184,
- "combineLogicalBytes": 38621184,
- "fanoutMean": 5.26171875,
- "recvTokensMax": 512,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 125.72799623012543,
- "p90": 143.16800236701965,
- "p95": 147.90399372577667,
- "p99": 170.71999609470367
- },
- "combine": {
- "p50": 120.06399780511856,
- "p90": 136.48000359535217,
- "p95": 141.9840008020401,
- "p99": 148.44800531864166
- },
- "roundtrip": {
- "p50": 224.09600019454956,
- "p90": 247.8400021791458,
- "p95": 254.68799471855164,
- "p99": 276.38399600982666
- },
- "isolatedSum": {
- "p50": 245.791994035244,
- "p90": 279.6480059623718,
- "p95": 289.8879945278168,
- "p99": 319.16800141334534
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 78102528,
- "combineLogicalBytes": 78102528,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 1024,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-c4fd916e",
- "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac",
- "colorKey": "h200_80a72891",
- "comparisonKey": "abe9d0af26c5a0c0",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:05:13.797855+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_1",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · hotspot-single+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "hotspot-single",
- "routingLabel": "hotspot-single+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "e41f5099a9733ac",
- "workloadId": "set:8:286be993cd819ed9",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 1.830078125,
- "eplbImbalanceAfter": 1.0007595486111112,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272103776",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272103776",
- "createdAt": "2026-06-27T00:05:13.797855+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 72.86400347948074,
- "p90": 99.10400211811066,
- "p95": 107.35999792814255,
- "p99": 136.48000359535217
- },
- "combine": {
- "p50": 67.87200272083282,
- "p90": 82.30400085449219,
- "p95": 87.55200356245041,
- "p99": 92.12800115346909
- },
- "roundtrip": {
- "p50": 121.31199985742569,
- "p90": 150.62400698661804,
- "p95": 160.76800227165222,
- "p99": 204.8639953136444
- },
- "isolatedSum": {
- "p50": 140.73600620031357,
- "p90": 181.40800297260284,
- "p95": 194.91200149059296,
- "p99": 228.60800474882126
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 74.43200051784515,
- "p90": 101.34399682283401,
- "p95": 109.66400057077408,
- "p99": 138.43199610710144
- },
- "combine": {
- "p50": 67.90400296449661,
- "p90": 80.76799660921097,
- "p95": 85.37600189447403,
- "p99": 95.13600170612335
- },
- "roundtrip": {
- "p50": 121.56800180673599,
- "p90": 151.67999267578125,
- "p95": 162.23999857902527,
- "p99": 191.64800643920898
- },
- "isolatedSum": {
- "p50": 142.33600348234177,
- "p90": 182.11199343204498,
- "p95": 195.0400024652481,
- "p99": 233.5679978132248
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1175552,
- "combineLogicalBytes": 1175552,
- "fanoutMean": 5.125,
- "recvTokensMax": 12,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 74.36800003051758,
- "p90": 106.30399733781815,
- "p95": 112.8000020980835,
- "p99": 133.34399461746216
- },
- "combine": {
- "p50": 69.31199878454208,
- "p90": 85.75999736785889,
- "p95": 93.05600076913834,
- "p99": 108.41599851846695
- },
- "roundtrip": {
- "p50": 123.16799908876419,
- "p90": 152.16000378131866,
- "p95": 162.33600676059723,
- "p99": 187.80800700187683
- },
- "isolatedSum": {
- "p50": 143.67999881505966,
- "p90": 192.06399470567703,
- "p95": 205.85600286722183,
- "p99": 241.7599931359291
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2451456,
- "combineLogicalBytes": 2451456,
- "fanoutMean": 5.34375,
- "recvTokensMax": 23,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 73.72800260782242,
- "p90": 94.94400024414062,
- "p95": 102.30399668216705,
- "p99": 121.2799996137619
- },
- "combine": {
- "p50": 68.44799965620041,
- "p90": 81.91999793052673,
- "p95": 88.03199976682663,
- "p99": 102.52799838781357
- },
- "roundtrip": {
- "p50": 124.22399967908859,
- "p90": 154.14400398731232,
- "p95": 164.60800170898438,
- "p99": 177.44000256061554
- },
- "isolatedSum": {
- "p50": 142.17600226402283,
- "p90": 176.86399817466736,
- "p95": 190.33599644899368,
- "p99": 223.80799800157547
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4730880,
- "combineLogicalBytes": 4730880,
- "fanoutMean": 5.15625,
- "recvTokensMax": 44,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 72.12799787521362,
- "p90": 98.55999797582626,
- "p95": 106.01600259542465,
- "p99": 130.62399625778198
- },
- "combine": {
- "p50": 69.92000341415405,
- "p90": 83.29600095748901,
- "p95": 89.28000181913376,
- "p99": 106.75200074911118
- },
- "roundtrip": {
- "p50": 123.77600371837616,
- "p90": 149.63200688362122,
- "p95": 158.4639996290207,
- "p99": 176.54399573802948
- },
- "isolatedSum": {
- "p50": 142.04800128936768,
- "p90": 181.85599893331528,
- "p95": 195.2960044145584,
- "p99": 237.37599700689316
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9691136,
- "combineLogicalBytes": 9691136,
- "fanoutMean": 5.28125,
- "recvTokensMax": 88,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 84.48000252246857,
- "p90": 110.75200140476227,
- "p95": 119.61600184440613,
- "p99": 152.41600573062897
- },
- "combine": {
- "p50": 77.2479996085167,
- "p90": 91.07200056314468,
- "p95": 98.36799651384354,
- "p99": 130.17599284648895
- },
- "roundtrip": {
- "p50": 134.783998131752,
- "p90": 159.04000401496887,
- "p95": 166.97600483894348,
- "p99": 194.36800479888916
- },
- "isolatedSum": {
- "p50": 161.72800213098526,
- "p90": 201.82400196790695,
- "p95": 217.98399835824966,
- "p99": 282.5919985771179
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19568640,
- "combineLogicalBytes": 19568640,
- "fanoutMean": 5.33203125,
- "recvTokensMax": 179,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 97.02400118112564,
- "p90": 121.2799996137619,
- "p95": 137.95199990272522,
- "p99": 238.87999355793
- },
- "combine": {
- "p50": 90.94399958848953,
- "p90": 106.97600245475769,
- "p95": 113.98400366306305,
- "p99": 139.3280029296875
- },
- "roundtrip": {
- "p50": 161.05599701404572,
- "p90": 182.17599391937256,
- "p95": 191.23199582099915,
- "p99": 230.27199506759644
- },
- "isolatedSum": {
- "p50": 187.96800076961517,
- "p90": 228.2560020685196,
- "p95": 251.93600356578827,
- "p99": 378.2079964876175
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38750208,
- "combineLogicalBytes": 38750208,
- "fanoutMean": 5.279296875,
- "recvTokensMax": 348,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 116.03199690580368,
- "p90": 134.39999520778656,
- "p95": 142.5279974937439,
- "p99": 206.11199736595154
- },
- "combine": {
- "p50": 103.04000228643417,
- "p90": 118.23999881744385,
- "p95": 122.079998254776,
- "p99": 137.69599795341492
- },
- "roundtrip": {
- "p50": 195.99999487400055,
- "p90": 214.33599293231964,
- "p95": 224.5440036058426,
- "p99": 265.02400636672974
- },
- "isolatedSum": {
- "p50": 219.07199919223785,
- "p90": 252.6399940252304,
- "p95": 264.6079957485199,
- "p99": 343.80799531936646
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77342720,
- "combineLogicalBytes": 77342720,
- "fanoutMean": 5.2685546875,
- "recvTokensMax": 687,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-34b2b051",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·empty-rank|8|decode|normal|none|empty-rank|0|tuned||5621f0d4899ad7a",
- "colorKey": "h200_2a7f12a0",
- "comparisonKey": "4dde4e46080a91eb",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:14:22.620116+00:00",
- "status": "valid",
- "publicationStatus": "comparable-experimental",
- "runner": "h200-dgxc-slurm_3",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · uniform·empty-rank",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform·empty-rank",
- "routingStep": 0,
- "unevenTokens": "empty-rank",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "5621f0d4899ad7a",
- "workloadId": null,
- "workloadSource": "seeded-runtime",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272386143",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272386143",
- "createdAt": "2026-06-27T00:14:22.620116+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 8,
- "globalTokens": 63,
- "dispatch": {
- "p50": 73.15199822187424,
- "p90": 92.76799857616425,
- "p95": 100.28800368309021,
- "p99": 131.58400356769562
- },
- "combine": {
- "p50": 68.96000355482101,
- "p90": 83.64800363779068,
- "p95": 88.92799913883209,
- "p99": 102.11200267076492
- },
- "roundtrip": {
- "p50": 121.66400253772736,
- "p90": 145.37599682807922,
- "p95": 157.18400478363037,
- "p99": 189.56799805164337
- },
- "isolatedSum": {
- "p50": 142.11200177669525,
- "p90": 176.41600221395493,
- "p95": 189.2160028219223,
- "p99": 233.69600623846054
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4888576,
- "combineLogicalBytes": 4888576,
- "fanoutMean": 5.412698268890381,
- "recvTokensMax": 46,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 252,
- "dispatch": {
- "p50": 82.43200182914734,
- "p90": 96.28800302743912,
- "p95": 103.84000092744827,
- "p99": 123.07199835777283
- },
- "combine": {
- "p50": 76.60800218582153,
- "p90": 86.65599673986435,
- "p95": 92.28800237178802,
- "p99": 107.84000158309937
- },
- "roundtrip": {
- "p50": 134.49600338935852,
- "p90": 156.031996011734,
- "p95": 167.4879938364029,
- "p99": 228.12800109386444
- },
- "isolatedSum": {
- "p50": 159.04000401496887,
- "p90": 182.94399976730347,
- "p95": 196.1280032992363,
- "p99": 230.9119999408722
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19396608,
- "combineLogicalBytes": 19396608,
- "fanoutMean": 5.36904764175415,
- "recvTokensMax": 180,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1022,
- "dispatch": {
- "p50": 115.90400338172913,
- "p90": 130.49599528312683,
- "p95": 136.86400651931763,
- "p99": 152.319997549057
- },
- "combine": {
- "p50": 108.92800241708755,
- "p90": 121.31199985742569,
- "p95": 126.8479973077774,
- "p99": 144.06399428844452
- },
- "roundtrip": {
- "p50": 201.08799636363983,
- "p90": 216.5759950876236,
- "p95": 222.33599424362183,
- "p99": 238.5919988155365
- },
- "isolatedSum": {
- "p50": 224.83200579881668,
- "p90": 251.80799514055252,
- "p95": 263.71200382709503,
- "p99": 296.3839918375015
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77529088,
- "combineLogicalBytes": 77529088,
- "fanoutMean": 5.2915849685668945,
- "recvTokensMax": 722,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-2de6a2af",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·linear|8|decode|normal|none|linear|0|tuned||b029c1a6fded400",
- "colorKey": "h200_58b5650b",
- "comparisonKey": "4dde4e46080a91eb",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:14:22.294115+00:00",
- "status": "valid",
- "publicationStatus": "comparable-experimental",
- "runner": "h200-dgxc-slurm_9",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · uniform·linear",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform·linear",
- "routingStep": 0,
- "unevenTokens": "linear",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "b029c1a6fded400",
- "workloadId": null,
- "workloadSource": "seeded-runtime",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272382939",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272382939",
- "createdAt": "2026-06-27T00:14:22.294115+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 75.19999891519547,
- "p90": 97.18400239944458,
- "p95": 107.84000158309937,
- "p99": 136.1279934644699
- },
- "combine": {
- "p50": 68.9919963479042,
- "p90": 80.48000186681747,
- "p95": 86.62399649620056,
- "p99": 96.47999703884125
- },
- "roundtrip": {
- "p50": 122.27199971675873,
- "p90": 154.6880006790161,
- "p95": 166.97600483894348,
- "p99": 202.78400182724
- },
- "isolatedSum": {
- "p50": 144.19199526309967,
- "p90": 177.66400426626205,
- "p95": 194.46399807929993,
- "p99": 232.60799050331116
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 85.69599688053131,
- "p90": 105.8880016207695,
- "p95": 113.63200098276138,
- "p99": 147.2959965467453
- },
- "combine": {
- "p50": 78.40000092983246,
- "p90": 89.85599875450134,
- "p95": 95.93600034713745,
- "p99": 106.84800148010254
- },
- "roundtrip": {
- "p50": 134.62400436401367,
- "p90": 154.81600165367126,
- "p95": 166.1120057106018,
- "p99": 190.0160014629364
- },
- "isolatedSum": {
- "p50": 164.09599781036377,
- "p90": 195.74400037527084,
- "p95": 209.56800132989883,
- "p99": 254.14399802684784
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 117.76000261306763,
- "p90": 139.13600146770477,
- "p95": 149.9200016260147,
- "p99": 190.94400107860565
- },
- "combine": {
- "p50": 114.88000303506851,
- "p90": 121.88799679279327,
- "p95": 128.1599998474121,
- "p99": 155.61600029468536
- },
- "roundtrip": {
- "p50": 208.25600624084473,
- "p90": 228.57600450515747,
- "p95": 237.37600445747375,
- "p99": 271.64798974990845
- },
- "isolatedSum": {
- "p50": 232.64000564813614,
- "p90": 261.02399826049805,
- "p95": 278.0800014734268,
- "p99": 346.560001373291
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-6ff3844b",
- "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de",
- "colorKey": "h200_580d7b05",
- "comparisonKey": "46ecc7ff5ccb7c5d",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:02:26.011362+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_11",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · uniform+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "uniform",
- "routingLabel": "uniform+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "73351bbcd4d02de",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 1.078125,
- "eplbImbalanceAfter": 1.00048828125,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272020269",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272020269",
- "createdAt": "2026-06-27T00:02:26.011362+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 76.76800340414047,
- "p90": 99.23200309276581,
- "p95": 110.59200018644333,
- "p99": 139.71200585365295
- },
- "combine": {
- "p50": 68.1919977068901,
- "p90": 80.09599894285202,
- "p95": 84.06399935483932,
- "p99": 98.65599870681763
- },
- "roundtrip": {
- "p50": 123.16799908876419,
- "p90": 143.90400052070618,
- "p95": 155.8080017566681,
- "p99": 181.5679967403412
- },
- "isolatedSum": {
- "p50": 144.96000111103058,
- "p90": 179.32800203561783,
- "p95": 194.65599954128265,
- "p99": 238.36800456047058
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 516096,
- "combineLogicalBytes": 516096,
- "fanoutMean": 4.5,
- "recvTokensMax": 6,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 76.09599828720093,
- "p90": 102.55999863147736,
- "p95": 112.70400136709213,
- "p99": 138.5599970817566
- },
- "combine": {
- "p50": 69.95200365781784,
- "p90": 79.83999699354172,
- "p95": 83.39200168848038,
- "p99": 91.93599969148636
- },
- "roundtrip": {
- "p50": 125.791996717453,
- "p90": 143.96800100803375,
- "p95": 156.67200088500977,
- "p99": 176.5120029449463
- },
- "isolatedSum": {
- "p50": 146.04800194501877,
- "p90": 182.39999562501907,
- "p95": 196.0960030555725,
- "p99": 230.49599677324295
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1089536,
- "combineLogicalBytes": 1089536,
- "fanoutMean": 4.75,
- "recvTokensMax": 11,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 77.05599814653397,
- "p90": 99.0080013871193,
- "p95": 106.6880002617836,
- "p99": 139.77600634098053
- },
- "combine": {
- "p50": 70.04799693822861,
- "p90": 82.49600231647491,
- "p95": 85.56800335645676,
- "p99": 100.09600222110748
- },
- "roundtrip": {
- "p50": 130.17599284648895,
- "p90": 161.6320013999939,
- "p95": 169.24799978733063,
- "p99": 194.43200528621674
- },
- "isolatedSum": {
- "p50": 147.10399508476257,
- "p90": 181.5040037035942,
- "p95": 192.25600361824036,
- "p99": 239.872008562088
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2207744,
- "combineLogicalBytes": 2207744,
- "fanoutMean": 4.8125,
- "recvTokensMax": 23,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 77.34400033950806,
- "p90": 90.94399958848953,
- "p95": 97.9200005531311,
- "p99": 113.18399757146835
- },
- "combine": {
- "p50": 71.19999825954437,
- "p90": 79.9039974808693,
- "p95": 84.06399935483932,
- "p99": 113.02399635314941
- },
- "roundtrip": {
- "p50": 130.0159990787506,
- "p90": 153.08800339698792,
- "p95": 165.24800658226013,
- "p99": 195.3279972076416
- },
- "isolatedSum": {
- "p50": 148.54399859905243,
- "p90": 170.84799706935883,
- "p95": 181.98399990797043,
- "p99": 226.20799392461777
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4558848,
- "combineLogicalBytes": 4558848,
- "fanoutMean": 4.96875,
- "recvTokensMax": 46,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 83.48800241947174,
- "p90": 106.20799660682678,
- "p95": 114.78400230407715,
- "p99": 256.0960054397583
- },
- "combine": {
- "p50": 72.9919970035553,
- "p90": 86.17600053548813,
- "p95": 91.51999652385712,
- "p99": 108.83200168609619
- },
- "roundtrip": {
- "p50": 132.9919993877411,
- "p90": 166.24000668525696,
- "p95": 176.35199427604675,
- "p99": 203.5519927740097
- },
- "isolatedSum": {
- "p50": 156.47999942302704,
- "p90": 192.3839971423149,
- "p95": 206.30399882793427,
- "p99": 364.9280071258545
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9347072,
- "combineLogicalBytes": 9347072,
- "fanoutMean": 5.09375,
- "recvTokensMax": 86,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 89.1840010881424,
- "p90": 104.51199859380722,
- "p95": 112.44799941778183,
- "p99": 135.5839967727661
- },
- "combine": {
- "p50": 79.3600007891655,
- "p90": 87.26400136947632,
- "p95": 92.73599833250046,
- "p99": 111.32799834012985
- },
- "roundtrip": {
- "p50": 139.90400731563568,
- "p90": 159.2639982700348,
- "p95": 169.3439930677414,
- "p99": 189.02400135993958
- },
- "isolatedSum": {
- "p50": 168.5440018773079,
- "p90": 191.77599996328354,
- "p95": 205.1839977502823,
- "p99": 246.91199511289597
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 18995200,
- "combineLogicalBytes": 18995200,
- "fanoutMean": 5.17578125,
- "recvTokensMax": 178,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 98.2080027461052,
- "p90": 113.40799927711487,
- "p95": 119.99999731779099,
- "p99": 140.19200205802917
- },
- "combine": {
- "p50": 89.12000060081482,
- "p90": 98.7199991941452,
- "p95": 102.7199998497963,
- "p99": 111.455999314785
- },
- "roundtrip": {
- "p50": 162.7199947834015,
- "p90": 182.0800006389618,
- "p95": 189.60000574588776,
- "p99": 210.4640007019043
- },
- "isolatedSum": {
- "p50": 187.32800334692,
- "p90": 212.12799847126007,
- "p95": 222.71999716758728,
- "p99": 251.64800137281418
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38291456,
- "combineLogicalBytes": 38291456,
- "fanoutMean": 5.216796875,
- "recvTokensMax": 348,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 122.46400117874146,
- "p90": 136.51199638843536,
- "p95": 143.64799857139587,
- "p99": 156.41599893569946
- },
- "combine": {
- "p50": 106.33599758148193,
- "p90": 117.91999638080597,
- "p95": 122.079998254776,
- "p99": 132.09599256515503
- },
- "roundtrip": {
- "p50": 200.15999674797058,
- "p90": 217.72800385951996,
- "p95": 223.29600155353546,
- "p99": 246.87999486923218
- },
- "isolatedSum": {
- "p50": 228.7999987602234,
- "p90": 254.43199276924133,
- "p95": 265.7279968261719,
- "p99": 288.5119915008545
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77113344,
- "combineLogicalBytes": 77113344,
- "fanoutMean": 5.2529296875,
- "recvTokensMax": 685,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-f68ea439",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c",
- "colorKey": "h200_b6aa6110",
- "comparisonKey": "5971fba5c9d29fa7",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:03:10.278228+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_5",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · zipf",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "zipf",
- "routingLabel": "zipf",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "14ded8461f2636c",
- "workloadId": "set:8:f5576e2b712d38c3",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272042133",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272042133",
- "createdAt": "2026-06-27T00:03:10.278228+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 71.68000191450119,
- "p90": 93.44000369310379,
- "p95": 102.68799960613251,
- "p99": 140.1599943637848
- },
- "combine": {
- "p50": 67.4239993095398,
- "p90": 79.45600152015686,
- "p95": 86.496002972126,
- "p99": 106.01600259542465
- },
- "roundtrip": {
- "p50": 119.4240003824234,
- "p90": 146.59200608730316,
- "p95": 155.07200360298157,
- "p99": 181.34400248527527
- },
- "isolatedSum": {
- "p50": 139.10400122404099,
- "p90": 172.89600521326065,
- "p95": 189.18400257825851,
- "p99": 246.17599695920944
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 444416,
- "combineLogicalBytes": 444416,
- "fanoutMean": 3.875,
- "recvTokensMax": 8,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 73.69600236415863,
- "p90": 100.92800110578537,
- "p95": 109.66400057077408,
- "p99": 146.04799449443817
- },
- "combine": {
- "p50": 68.28799843788147,
- "p90": 80.76799660921097,
- "p95": 85.69599688053131,
- "p99": 152.8320014476776
- },
- "roundtrip": {
- "p50": 121.15199863910675,
- "p90": 147.77599275112152,
- "p95": 155.71199357509613,
- "p99": 193.7599927186966
- },
- "isolatedSum": {
- "p50": 141.9840008020401,
- "p90": 181.69599771499634,
- "p95": 195.3599974513054,
- "p99": 298.8799959421158
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 845824,
- "combineLogicalBytes": 845824,
- "fanoutMean": 3.6875,
- "recvTokensMax": 16,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 72.73600250482559,
- "p90": 96.12800180912018,
- "p95": 103.90400141477585,
- "p99": 168.06399822235107
- },
- "combine": {
- "p50": 66.91200286149979,
- "p90": 78.65600287914276,
- "p95": 82.2720006108284,
- "p99": 94.71999853849411
- },
- "roundtrip": {
- "p50": 118.9119964838028,
- "p90": 143.8080072402954,
- "p95": 155.71199357509613,
- "p99": 209.6959948539734
- },
- "isolatedSum": {
- "p50": 139.64800536632538,
- "p90": 174.78400468826294,
- "p95": 186.17600202560425,
- "p99": 262.7839967608452
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1691648,
- "combineLogicalBytes": 1691648,
- "fanoutMean": 3.6875,
- "recvTokensMax": 32,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 73.56800138950348,
- "p90": 93.82399916648865,
- "p95": 101.47199779748917,
- "p99": 132.7359974384308
- },
- "combine": {
- "p50": 67.6800012588501,
- "p90": 79.6160027384758,
- "p95": 83.23200047016144,
- "p99": 101.21600329875946
- },
- "roundtrip": {
- "p50": 119.26399916410446,
- "p90": 145.24799585342407,
- "p95": 154.4959992170334,
- "p99": 191.71200692653656
- },
- "isolatedSum": {
- "p50": 141.24800264835358,
- "p90": 173.44000190496445,
- "p95": 184.7039982676506,
- "p99": 233.95200073719025
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3354624,
- "combineLogicalBytes": 3354624,
- "fanoutMean": 3.65625,
- "recvTokensMax": 64,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 77.66400277614594,
- "p90": 104.12800312042236,
- "p95": 114.30399864912033,
- "p99": 140.6400054693222
- },
- "combine": {
- "p50": 70.8480030298233,
- "p90": 84.32000130414963,
- "p95": 90.7519981265068,
- "p99": 122.27199971675873
- },
- "roundtrip": {
- "p50": 125.95200538635254,
- "p90": 157.151997089386,
- "p95": 166.81599617004395,
- "p99": 207.23199844360352
- },
- "isolatedSum": {
- "p50": 148.51200580596924,
- "p90": 188.448004424572,
- "p95": 205.05599677562714,
- "p99": 262.91200518608093
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 6537216,
- "combineLogicalBytes": 6537216,
- "fanoutMean": 3.5625,
- "recvTokensMax": 127,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 82.94399827718735,
- "p90": 101.56799852848053,
- "p95": 110.88000237941742,
- "p99": 162.11199760437012
- },
- "combine": {
- "p50": 76.31999999284744,
- "p90": 87.67999708652496,
- "p95": 90.68799763917923,
- "p99": 98.33600372076035
- },
- "roundtrip": {
- "p50": 135.71199774742126,
- "p90": 155.20000457763672,
- "p95": 165.6000018119812,
- "p99": 222.27199375629425
- },
- "isolatedSum": {
- "p50": 159.2639982700348,
- "p90": 189.2479956150055,
- "p95": 201.56800001859665,
- "p99": 260.44800132513046
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 12859392,
- "combineLogicalBytes": 12859392,
- "fanoutMean": 3.50390625,
- "recvTokensMax": 255,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 96.76799923181534,
- "p90": 109.11999642848969,
- "p95": 116.5120005607605,
- "p99": 174.01599884033203
- },
- "combine": {
- "p50": 86.17600053548813,
- "p90": 97.31200337409973,
- "p95": 103.07200253009796,
- "p99": 120.64000219106674
- },
- "roundtrip": {
- "p50": 160.67199409008026,
- "p90": 175.61599612236023,
- "p95": 181.40800297260284,
- "p99": 218.9439982175827
- },
- "isolatedSum": {
- "p50": 182.94399976730347,
- "p90": 206.43199980258942,
- "p95": 219.58400309085846,
- "p99": 294.6560010313988
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 25145344,
- "combineLogicalBytes": 25145344,
- "fanoutMean": 3.42578125,
- "recvTokensMax": 510,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 122.3360002040863,
- "p90": 132.47999548912048,
- "p95": 135.51999628543854,
- "p99": 155.90399503707886
- },
- "combine": {
- "p50": 112.86400258541107,
- "p90": 121.8239963054657,
- "p95": 126.62400305271149,
- "p99": 136.76799833774567
- },
- "roundtrip": {
- "p50": 214.52799439430237,
- "p90": 232.92799293994904,
- "p95": 243.42399835586548,
- "p99": 306.97599053382874
- },
- "isolatedSum": {
- "p50": 235.20000278949738,
- "p90": 254.30399179458618,
- "p95": 262.14399933815,
- "p99": 292.6719933748245
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 49946624,
- "combineLogicalBytes": 49946624,
- "fanoutMean": 3.40234375,
- "recvTokensMax": 1022,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-9e42f709",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||1fa7fe74d0e30a3",
- "colorKey": "h200_b6aa6110",
- "comparisonKey": "5971fba5c9d29fa7",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:56:48.444120+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_5",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · zipf",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "zipf",
- "routingLabel": "zipf",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "1fa7fe74d0e30a3",
- "workloadId": "set:4:f5576e2b712d38c3",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271844665",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271844665",
- "createdAt": "2026-06-26T23:56:48.444120+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 71.3919997215271,
- "p90": 101.1200025677681,
- "p95": 115.1999980211258,
- "p99": 144.44799721240997
- },
- "combine": {
- "p50": 64.4799992442131,
- "p90": 82.78399705886841,
- "p95": 91.48799628019333,
- "p99": 104.67199981212616
- },
- "roundtrip": {
- "p50": 117.98399686813354,
- "p90": 156.22399747371674,
- "p95": 165.3120070695877,
- "p99": 193.12000274658203
- },
- "isolatedSum": {
- "p50": 135.8719989657402,
- "p90": 183.9039996266365,
- "p95": 206.68799430131912,
- "p99": 249.11999702453613
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 444416,
- "combineLogicalBytes": 444416,
- "fanoutMean": 3.875,
- "recvTokensMax": 8,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 73.08799773454666,
- "p90": 95.77599912881851,
- "p95": 105.66399991512299,
- "p99": 147.32800424098969
- },
- "combine": {
- "p50": 67.6800012588501,
- "p90": 82.59200304746628,
- "p95": 89.02399986982346,
- "p99": 108.64000022411346
- },
- "roundtrip": {
- "p50": 121.2799996137619,
- "p90": 152.63999998569489,
- "p95": 167.4560010433197,
- "p99": 201.7280012369156
- },
- "isolatedSum": {
- "p50": 140.76799899339676,
- "p90": 178.3680021762848,
- "p95": 194.68799978494644,
- "p99": 255.96800446510315
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3354624,
- "combineLogicalBytes": 3354624,
- "fanoutMean": 3.65625,
- "recvTokensMax": 64,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 81.66400343179703,
- "p90": 98.30400347709656,
- "p95": 109.98400300741196,
- "p99": 134.14399325847626
- },
- "combine": {
- "p50": 76.31999999284744,
- "p90": 89.21600133180618,
- "p95": 95.90400010347366,
- "p99": 118.6240017414093
- },
- "roundtrip": {
- "p50": 136.00000739097595,
- "p90": 157.53600001335144,
- "p95": 172.7360039949417,
- "p99": 212.25599944591522
- },
- "isolatedSum": {
- "p50": 157.98400342464447,
- "p90": 187.52000480890274,
- "p95": 205.88800311088562,
- "p99": 252.76799499988556
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 12859392,
- "combineLogicalBytes": 12859392,
- "fanoutMean": 3.50390625,
- "recvTokensMax": 255,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 122.52800166606903,
- "p90": 136.6720050573349,
- "p95": 148.15999567508698,
- "p99": 160.89600324630737
- },
- "combine": {
- "p50": 112.03200370073318,
- "p90": 125.21600723266602,
- "p95": 132.4480026960373,
- "p99": 149.02399480342865
- },
- "roundtrip": {
- "p50": 211.58400177955627,
- "p90": 233.2800030708313,
- "p95": 244.159996509552,
- "p99": 292.03200340270996
- },
- "isolatedSum": {
- "p50": 234.56000536680222,
- "p90": 261.8880122900009,
- "p95": 280.60799837112427,
- "p99": 309.919998049736
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 49946624,
- "combineLogicalBytes": 49946624,
- "fanoutMean": 3.40234375,
- "recvTokensMax": 1022,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-b1823392",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c",
- "colorKey": "h200_c5b3365a",
- "comparisonKey": "73e84f1c938d90c0",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:04:44.997855+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_13",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · zipf-heavy",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "zipf-heavy",
- "routingLabel": "zipf-heavy",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "22da8b58646609c",
- "workloadId": "set:8:6b84350720aa8233",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272086516",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272086516",
- "createdAt": "2026-06-27T00:04:44.997855+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 64.92800265550613,
- "p90": 98.62399846315384,
- "p95": 109.27999764680862,
- "p99": 182.23999440670013
- },
- "combine": {
- "p50": 60.92799827456474,
- "p90": 75.42400062084198,
- "p95": 80.6720033288002,
- "p99": 96.54399752616882
- },
- "roundtrip": {
- "p50": 116.57600104808807,
- "p90": 152.44799852371216,
- "p95": 162.81600296497345,
- "p99": 179.51999604701996
- },
- "isolatedSum": {
- "p50": 125.85600093007088,
- "p90": 174.04799908399582,
- "p95": 189.95200097560883,
- "p99": 278.78399193286896
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 172032,
- "combineLogicalBytes": 172032,
- "fanoutMean": 1.5,
- "recvTokensMax": 8,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 65.50399959087372,
- "p90": 96.47999703884125,
- "p95": 104.86400127410889,
- "p99": 137.56799697875977
- },
- "combine": {
- "p50": 59.808000922203064,
- "p90": 72.83200323581696,
- "p95": 78.84799689054489,
- "p99": 92.19200164079666
- },
- "roundtrip": {
- "p50": 110.97600311040878,
- "p90": 140.00000059604645,
- "p95": 150.87999403476715,
- "p99": 177.72799730300903
- },
- "isolatedSum": {
- "p50": 125.31200051307678,
- "p90": 169.3120002746582,
- "p95": 183.71199816465378,
- "p99": 229.75999861955643
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 315392,
- "combineLogicalBytes": 315392,
- "fanoutMean": 1.375,
- "recvTokensMax": 16,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 73.18399846553802,
- "p90": 93.63199770450592,
- "p95": 108.67200046777725,
- "p99": 126.01600587368011
- },
- "combine": {
- "p50": 62.20800057053566,
- "p90": 70.52800059318542,
- "p95": 78.07999849319458,
- "p99": 100.51199793815613
- },
- "roundtrip": {
- "p50": 116.67200177907944,
- "p90": 144.1279947757721,
- "p95": 158.91200304031372,
- "p99": 186.17600202560425
- },
- "isolatedSum": {
- "p50": 135.39199903607368,
- "p90": 164.15999829769135,
- "p95": 186.75199896097183,
- "p99": 226.52800381183624
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 616448,
- "fanoutMean": 1.34375,
- "recvTokensMax": 32,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 72.64000177383423,
- "p90": 98.39999675750732,
- "p95": 103.93600165843964,
- "p99": 132.28799402713776
- },
- "combine": {
- "p50": 60.99199876189232,
- "p90": 72.06399738788605,
- "p95": 79.52000200748444,
- "p99": 91.5519967675209
- },
- "roundtrip": {
- "p50": 118.94399672746658,
- "p90": 150.30400454998016,
- "p95": 160.3199988603592,
- "p99": 178.78399789333344
- },
- "isolatedSum": {
- "p50": 133.63200053572655,
- "p90": 170.46399414539337,
- "p95": 183.45600366592407,
- "p99": 223.83999079465866
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1376256,
- "combineLogicalBytes": 1376256,
- "fanoutMean": 1.5,
- "recvTokensMax": 64,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 72.4480003118515,
- "p90": 85.9839990735054,
- "p95": 96.99200093746185,
- "p99": 122.17599898576736
- },
- "combine": {
- "p50": 67.10399687290192,
- "p90": 77.11999863386154,
- "p95": 83.74399691820145,
- "p99": 104.16000336408615
- },
- "roundtrip": {
- "p50": 118.40000003576279,
- "p90": 138.11199367046356,
- "p95": 145.11999487876892,
- "p99": 157.18400478363037
- },
- "isolatedSum": {
- "p50": 139.55199718475342,
- "p90": 163.10399770736694,
- "p95": 180.7359978556633,
- "p99": 226.33600234985352
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2781184,
- "combineLogicalBytes": 2781184,
- "fanoutMean": 1.515625,
- "recvTokensMax": 128,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 76.86399668455124,
- "p90": 97.31200337409973,
- "p95": 106.36799782514572,
- "p99": 120.25599926710129
- },
- "combine": {
- "p50": 69.47200000286102,
- "p90": 82.78399705886841,
- "p95": 87.80799806118011,
- "p99": 102.9760017991066
- },
- "roundtrip": {
- "p50": 128.25599312782288,
- "p90": 152.63999998569489,
- "p95": 163.10399770736694,
- "p99": 197.37599790096283
- },
- "isolatedSum": {
- "p50": 146.33599668741226,
- "p90": 180.09600043296814,
- "p95": 194.17599588632584,
- "p99": 223.23200106620789
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 5533696,
- "combineLogicalBytes": 5533696,
- "fanoutMean": 1.5078125,
- "recvTokensMax": 256,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 95.8079993724823,
- "p90": 111.68000102043152,
- "p95": 120.99199742078781,
- "p99": 207.61600136756897
- },
- "combine": {
- "p50": 81.53600245714188,
- "p90": 93.75999867916107,
- "p95": 102.24000364542007,
- "p99": 131.1360001564026
- },
- "roundtrip": {
- "p50": 155.96799552440643,
- "p90": 171.23199999332428,
- "p95": 179.9360066652298,
- "p99": 195.93599438667297
- },
- "isolatedSum": {
- "p50": 177.34400182962418,
- "p90": 205.4399996995926,
- "p95": 223.23200106620789,
- "p99": 338.75200152397156
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 11210752,
- "combineLogicalBytes": 11210752,
- "fanoutMean": 1.52734375,
- "recvTokensMax": 512,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 118.49600076675415,
- "p90": 133.40799510478973,
- "p95": 137.472003698349,
- "p99": 168.09600591659546
- },
- "combine": {
- "p50": 108.51199924945831,
- "p90": 121.37600034475327,
- "p95": 125.18399953842163,
- "p99": 135.74400544166565
- },
- "roundtrip": {
- "p50": 205.76000213623047,
- "p90": 222.78399765491486,
- "p95": 227.84000635147095,
- "p99": 288.2879972457886
- },
- "isolatedSum": {
- "p50": 227.00800001621246,
- "p90": 254.783995449543,
- "p95": 262.65600323677063,
- "p99": 303.8400113582611
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 22650880,
- "combineLogicalBytes": 22650880,
- "fanoutMean": 1.54296875,
- "recvTokensMax": 1024,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-1cebdc77",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||47fddabb3277bec",
- "colorKey": "h200_c5b3365a",
- "comparisonKey": "73e84f1c938d90c0",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:57:04.169845+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_4",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · zipf-heavy",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "zipf-heavy",
- "routingLabel": "zipf-heavy",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "47fddabb3277bec",
- "workloadId": "set:4:6b84350720aa8233",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271852422",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271852422",
- "createdAt": "2026-06-26T23:57:04.169845+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 70.27199864387512,
- "p90": 103.64799946546555,
- "p95": 125.791996717453,
- "p99": 208.15999805927277
- },
- "combine": {
- "p50": 61.95199862122536,
- "p90": 75.45600086450577,
- "p95": 80.6720033288002,
- "p99": 99.07200187444687
- },
- "roundtrip": {
- "p50": 117.37599968910217,
- "p90": 144.83200013637543,
- "p95": 152.73599326610565,
- "p99": 179.58399653434753
- },
- "isolatedSum": {
- "p50": 132.22399726510048,
- "p90": 179.1040003299713,
- "p95": 206.4640000462532,
- "p99": 307.23199993371964
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 172032,
- "combineLogicalBytes": 172032,
- "fanoutMean": 1.5,
- "recvTokensMax": 8,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 75.26399940252304,
- "p90": 98.11200201511383,
- "p95": 106.175996363163,
- "p99": 138.3039951324463
- },
- "combine": {
- "p50": 63.90400230884552,
- "p90": 78.43200117349625,
- "p95": 83.99999886751175,
- "p99": 94.11200135946274
- },
- "roundtrip": {
- "p50": 119.48800086975098,
- "p90": 151.16800367832184,
- "p95": 161.53599321842194,
- "p99": 214.4320011138916
- },
- "isolatedSum": {
- "p50": 139.16800171136856,
- "p90": 176.54400318861008,
- "p95": 190.17599523067474,
- "p99": 232.41599649190903
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1376256,
- "combineLogicalBytes": 1376256,
- "fanoutMean": 1.5,
- "recvTokensMax": 64,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 82.17599987983704,
- "p90": 105.98400235176086,
- "p95": 113.11999708414078,
- "p99": 133.18400084972382
- },
- "combine": {
- "p50": 72.15999811887741,
- "p90": 88.76799792051315,
- "p95": 93.28000247478485,
- "p99": 116.57600104808807
- },
- "roundtrip": {
- "p50": 134.49600338935852,
- "p90": 162.432000041008,
- "p95": 173.47200214862823,
- "p99": 268.8640058040619
- },
- "isolatedSum": {
- "p50": 154.33599799871445,
- "p90": 194.75200027227402,
- "p95": 206.39999955892563,
- "p99": 249.7600018978119
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 5533696,
- "combineLogicalBytes": 5533696,
- "fanoutMean": 1.5078125,
- "recvTokensMax": 256,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 119.58400160074234,
- "p90": 131.96800649166107,
- "p95": 136.3839954137802,
- "p99": 154.59200739860535
- },
- "combine": {
- "p50": 109.31199789047241,
- "p90": 120.67200243473053,
- "p95": 125.69600343704224,
- "p99": 135.3919953107834
- },
- "roundtrip": {
- "p50": 207.58399367332458,
- "p90": 222.91199862957,
- "p95": 232.86400735378265,
- "p99": 284.89598631858826
- },
- "isolatedSum": {
- "p50": 228.89599949121475,
- "p90": 252.6400089263916,
- "p95": 262.07999885082245,
- "p99": 289.98400270938873
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 22650880,
- "combineLogicalBytes": 22650880,
- "fanoutMean": 1.54296875,
- "recvTokensMax": 1024,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-78ae7872",
- "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366",
- "colorKey": "h200_06aa1194",
- "comparisonKey": "85dbd46cb77d1362",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:04:54.232728+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_5",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · zipf-heavy+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "zipf-heavy",
- "routingLabel": "zipf-heavy+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "5a3054422534366",
- "workloadId": "set:8:6b84350720aa8233",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 7.40625,
- "eplbImbalanceAfter": 1.0004417782738093,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272090308",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272090308",
- "createdAt": "2026-06-27T00:04:54.232728+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 69.21599805355072,
- "p90": 83.55200290679932,
- "p95": 92.83199906349182,
- "p99": 110.75200140476227
- },
- "combine": {
- "p50": 67.45599955320358,
- "p90": 76.12799853086472,
- "p95": 81.53600245714188,
- "p99": 88.54400366544724
- },
- "roundtrip": {
- "p50": 122.079998254776,
- "p90": 140.4159963130951,
- "p95": 148.25600385665894,
- "p99": 178.3680021762848
- },
- "isolatedSum": {
- "p50": 136.6719976067543,
- "p90": 159.68000143766403,
- "p95": 174.3680015206337,
- "p99": 199.2960050702095
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 559104,
- "combineLogicalBytes": 559104,
- "fanoutMean": 4.875,
- "recvTokensMax": 6,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 72.03199714422226,
- "p90": 80.57600259780884,
- "p95": 86.40000224113464,
- "p99": 97.34400361776352
- },
- "combine": {
- "p50": 67.61600077152252,
- "p90": 75.13599842786789,
- "p95": 79.0719985961914,
- "p99": 86.40000224113464
- },
- "roundtrip": {
- "p50": 120.7360029220581,
- "p90": 138.49599659442902,
- "p95": 162.01600432395935,
- "p99": 265.21599292755127
- },
- "isolatedSum": {
- "p50": 139.64799791574478,
- "p90": 155.71200102567673,
- "p95": 165.47200083732605,
- "p99": 183.74400585889816
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1175552,
- "combineLogicalBytes": 1175552,
- "fanoutMean": 5.125,
- "recvTokensMax": 12,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 74.30399954319,
- "p90": 86.91199868917465,
- "p95": 100.12800246477127,
- "p99": 123.48800152540207
- },
- "combine": {
- "p50": 68.51200014352798,
- "p90": 77.85599678754807,
- "p95": 84.70399677753448,
- "p99": 112.15999722480774
- },
- "roundtrip": {
- "p50": 121.31199985742569,
- "p90": 140.25600254535675,
- "p95": 151.64799988269806,
- "p99": 177.66399681568146
- },
- "isolatedSum": {
- "p50": 142.815999686718,
- "p90": 164.76799547672272,
- "p95": 184.83199924230576,
- "p99": 235.6479987502098
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2465792,
- "combineLogicalBytes": 2465792,
- "fanoutMean": 5.375,
- "recvTokensMax": 25,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 74.65600222349167,
- "p90": 88.28800171613693,
- "p95": 97.98400104045868,
- "p99": 121.2799996137619
- },
- "combine": {
- "p50": 69.56800073385239,
- "p90": 78.87999713420868,
- "p95": 83.16799998283386,
- "p99": 94.84799951314926
- },
- "roundtrip": {
- "p50": 126.36800110340118,
- "p90": 164.57599401474,
- "p95": 172.44799435138702,
- "p99": 196.22400403022766
- },
- "isolatedSum": {
- "p50": 144.22400295734406,
- "p90": 167.1679988503456,
- "p95": 181.15200102329254,
- "p99": 216.12799912691116
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4988928,
- "combineLogicalBytes": 4988928,
- "fanoutMean": 5.4375,
- "recvTokensMax": 47,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 73.53600114583969,
- "p90": 88.0960002541542,
- "p95": 96.03200107812881,
- "p99": 121.2799996137619
- },
- "combine": {
- "p50": 70.39999961853027,
- "p90": 78.91199737787247,
- "p95": 86.36800199747086,
- "p99": 98.9760011434555
- },
- "roundtrip": {
- "p50": 125.47199428081512,
- "p90": 143.96800100803375,
- "p95": 153.6960005760193,
- "p99": 172.8000044822693
- },
- "isolatedSum": {
- "p50": 143.93600076436996,
- "p90": 167.00799763202667,
- "p95": 182.40000307559967,
- "p99": 220.2560007572174
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9791488,
- "combineLogicalBytes": 9791488,
- "fanoutMean": 5.3359375,
- "recvTokensMax": 94,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 81.02399855852127,
- "p90": 94.71999853849411,
- "p95": 106.11200332641602,
- "p99": 144.6399986743927
- },
- "combine": {
- "p50": 76.7040029168129,
- "p90": 88.54400366544724,
- "p95": 96.76799923181534,
- "p99": 107.00800269842148
- },
- "roundtrip": {
- "p50": 135.29600203037262,
- "p90": 158.78400206565857,
- "p95": 170.84799706935883,
- "p99": 241.43999814987183
- },
- "isolatedSum": {
- "p50": 157.72800147533417,
- "p90": 183.26400220394135,
- "p95": 202.88000255823135,
- "p99": 251.64800137281418
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19410944,
- "combineLogicalBytes": 19410944,
- "fanoutMean": 5.2890625,
- "recvTokensMax": 178,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 92.96000003814697,
- "p90": 105.47199845314026,
- "p95": 115.80800265073776,
- "p99": 153.56799960136414
- },
- "combine": {
- "p50": 86.87999844551086,
- "p90": 96.03200107812881,
- "p95": 102.33599692583084,
- "p99": 112.67200112342834
- },
- "roundtrip": {
- "p50": 158.4320068359375,
- "p90": 171.26399278640747,
- "p95": 179.967999458313,
- "p99": 206.43199980258942
- },
- "isolatedSum": {
- "p50": 179.83999848365784,
- "p90": 201.50399953126907,
- "p95": 218.1439995765686,
- "p99": 266.2400007247925
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38678528,
- "combineLogicalBytes": 38678528,
- "fanoutMean": 5.26953125,
- "recvTokensMax": 360,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 117.18399822711945,
- "p90": 127.68000364303589,
- "p95": 131.3599944114685,
- "p99": 140.44800400733948
- },
- "combine": {
- "p50": 104.3199971318245,
- "p90": 113.76000195741653,
- "p95": 121.98399752378464,
- "p99": 137.28000223636627
- },
- "roundtrip": {
- "p50": 196.28800451755524,
- "p90": 208.95999670028687,
- "p95": 216.5759950876236,
- "p99": 241.56799912452698
- },
- "isolatedSum": {
- "p50": 221.50399535894394,
- "p90": 241.44000560045242,
- "p95": 253.34399193525314,
- "p99": 277.72800624370575
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77285376,
- "combineLogicalBytes": 77285376,
- "fanoutMean": 5.2646484375,
- "recvTokensMax": 704,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-4fa5aaad",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b",
- "colorKey": "h200_6a794fcd",
- "comparisonKey": "50f5858697d33730",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:03:36.902996+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_1",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · zipf-mild",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "zipf-mild",
- "routingLabel": "zipf-mild",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "f3df51be7d5c32b",
- "workloadId": "set:8:289b7f9c14292e96",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272056705",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272056705",
- "createdAt": "2026-06-27T00:03:36.902996+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 73.98399710655212,
- "p90": 98.84800016880035,
- "p95": 105.98400235176086,
- "p99": 125.21600723266602
- },
- "combine": {
- "p50": 68.96000355482101,
- "p90": 81.66400343179703,
- "p95": 86.496002972126,
- "p99": 102.88000106811523
- },
- "roundtrip": {
- "p50": 119.93599683046341,
- "p90": 147.93600142002106,
- "p95": 157.53600001335144,
- "p99": 168.09600591659546
- },
- "isolatedSum": {
- "p50": 142.94400066137314,
- "p90": 180.51200360059738,
- "p95": 192.48000532388687,
- "p99": 228.09600830078125
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 587776,
- "combineLogicalBytes": 587776,
- "fanoutMean": 5.125,
- "recvTokensMax": 8,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 73.08799773454666,
- "p90": 88.73599767684937,
- "p95": 100.09600222110748,
- "p99": 118.20799857378006
- },
- "combine": {
- "p50": 68.35199892520905,
- "p90": 77.08799839019775,
- "p95": 82.84799754619598,
- "p99": 91.61599725484848
- },
- "roundtrip": {
- "p50": 123.3920007944107,
- "p90": 151.296004652977,
- "p95": 158.84800255298615,
- "p99": 186.27199530601501
- },
- "isolatedSum": {
- "p50": 141.4399966597557,
- "p90": 165.82399606704712,
- "p95": 182.94399976730347,
- "p99": 209.82399582862854
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1103872,
- "combineLogicalBytes": 1103872,
- "fanoutMean": 4.8125,
- "recvTokensMax": 16,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 76.51200145483017,
- "p90": 102.88000106811523,
- "p95": 109.11999642848969,
- "p99": 128.31999361515045
- },
- "combine": {
- "p50": 69.82400268316269,
- "p90": 81.44000172615051,
- "p95": 86.75199747085571,
- "p99": 98.04800152778625
- },
- "roundtrip": {
- "p50": 126.14400684833527,
- "p90": 157.6640009880066,
- "p95": 167.84000396728516,
- "p99": 190.88000059127808
- },
- "isolatedSum": {
- "p50": 146.33600413799286,
- "p90": 184.32000279426575,
- "p95": 195.8719938993454,
- "p99": 226.3679951429367
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2250752,
- "combineLogicalBytes": 2250752,
- "fanoutMean": 4.90625,
- "recvTokensMax": 31,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 75.9039968252182,
- "p90": 97.4079966545105,
- "p95": 101.88800096511841,
- "p99": 117.60000139474869
- },
- "combine": {
- "p50": 70.62400132417679,
- "p90": 84.73599702119827,
- "p95": 90.11200070381165,
- "p99": 107.42399841547012
- },
- "roundtrip": {
- "p50": 125.69600343704224,
- "p90": 150.751993060112,
- "p95": 158.30400586128235,
- "p99": 175.4239946603775
- },
- "isolatedSum": {
- "p50": 146.527998149395,
- "p90": 182.14399367570877,
- "p95": 192.00000166893005,
- "p99": 225.0239998102188
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4472832,
- "combineLogicalBytes": 4472832,
- "fanoutMean": 4.875,
- "recvTokensMax": 62,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 76.60800218582153,
- "p90": 93.47199648618698,
- "p95": 101.9200012087822,
- "p99": 109.82400178909302
- },
- "combine": {
- "p50": 71.26399874687195,
- "p90": 84.09599959850311,
- "p95": 88.32000195980072,
- "p99": 100.89600086212158
- },
- "roundtrip": {
- "p50": 128.25599312782288,
- "p90": 152.96000242233276,
- "p95": 160.76800227165222,
- "p99": 201.92000269889832
- },
- "isolatedSum": {
- "p50": 147.87200093269348,
- "p90": 177.5679960846901,
- "p95": 190.24000316858292,
- "p99": 210.7200026512146
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 8888320,
- "combineLogicalBytes": 8888320,
- "fanoutMean": 4.84375,
- "recvTokensMax": 124,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 83.8719978928566,
- "p90": 102.55999863147736,
- "p95": 108.92800241708755,
- "p99": 121.76000326871872
- },
- "combine": {
- "p50": 78.43200117349625,
- "p90": 91.839998960495,
- "p95": 96.57599776983261,
- "p99": 108.12799632549286
- },
- "roundtrip": {
- "p50": 138.46400380134583,
- "p90": 160.19199788570404,
- "p95": 168.09600591659546,
- "p99": 186.14399433135986
- },
- "isolatedSum": {
- "p50": 162.30399906635284,
- "p90": 194.39999759197235,
- "p95": 205.50400018692017,
- "p99": 229.88799959421158
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 17733632,
- "combineLogicalBytes": 17733632,
- "fanoutMean": 4.83203125,
- "recvTokensMax": 248,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 99.29600358009338,
- "p90": 117.79200285673141,
- "p95": 125.44000148773193,
- "p99": 154.01600301265717
- },
- "combine": {
- "p50": 90.14400094747543,
- "p90": 102.91200131177902,
- "p95": 110.17599701881409,
- "p99": 119.35999989509583
- },
- "roundtrip": {
- "p50": 166.75199568271637,
- "p90": 185.7600063085556,
- "p95": 193.02399456501007,
- "p99": 220.60799598693848
- },
- "isolatedSum": {
- "p50": 189.44000452756882,
- "p90": 220.70400416851044,
- "p95": 235.61599850654602,
- "p99": 273.376002907753
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 35424256,
- "combineLogicalBytes": 35424256,
- "fanoutMean": 4.826171875,
- "recvTokensMax": 492,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 124.4800016283989,
- "p90": 135.93600690364838,
- "p95": 141.184002161026,
- "p99": 167.23200678825378
- },
- "combine": {
- "p50": 115.68000167608261,
- "p90": 127.29600071907043,
- "p95": 131.99999928474426,
- "p99": 150.78400075435638
- },
- "roundtrip": {
- "p50": 216.95999801158905,
- "p90": 232.80000686645508,
- "p95": 238.27199637889862,
- "p99": 261.02399826049805
- },
- "isolatedSum": {
- "p50": 240.1600033044815,
- "p90": 263.2320076227188,
- "p95": 273.18400144577026,
- "p99": 318.01600754261017
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 70160384,
- "combineLogicalBytes": 70160384,
- "fanoutMean": 4.779296875,
- "recvTokensMax": 987,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-ffad9f17",
- "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243",
- "colorKey": "h200_b2ffaf91",
- "comparisonKey": "b3b8e5cc27948267",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:03:43.326778+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_7",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · zipf-mild+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "zipf-mild",
- "routingLabel": "zipf-mild+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "16babcaf4204243",
- "workloadId": "set:8:289b7f9c14292e96",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 2.61328125,
- "eplbImbalanceAfter": 1.0009114583333334,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272060649",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272060649",
- "createdAt": "2026-06-27T00:03:43.326778+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 72.9919970035553,
- "p90": 96.73599898815155,
- "p95": 102.7199998497963,
- "p99": 128.83199751377106
- },
- "combine": {
- "p50": 68.15999746322632,
- "p90": 81.05599880218506,
- "p95": 86.40000224113464,
- "p99": 94.91200000047684
- },
- "roundtrip": {
- "p50": 122.30399996042252,
- "p90": 153.85599434375763,
- "p95": 167.23200678825378,
- "p99": 196.03200256824493
- },
- "isolatedSum": {
- "p50": 141.15199446678162,
- "p90": 177.7919977903366,
- "p95": 189.12000209093094,
- "p99": 223.7439975142479
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 602112,
- "combineLogicalBytes": 602112,
- "fanoutMean": 5.25,
- "recvTokensMax": 7,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 73.02399724721909,
- "p90": 95.77599912881851,
- "p95": 103.74400019645691,
- "p99": 121.72800302505493
- },
- "combine": {
- "p50": 67.80800223350525,
- "p90": 80.73599636554718,
- "p95": 87.39200234413147,
- "p99": 99.45599734783173
- },
- "roundtrip": {
- "p50": 121.34400010108948,
- "p90": 149.1840034723282,
- "p95": 156.76799416542053,
- "p99": 182.36799538135529
- },
- "isolatedSum": {
- "p50": 140.83199948072433,
- "p90": 176.5119954943657,
- "p95": 191.13600254058838,
- "p99": 221.18400037288666
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1189888,
- "combineLogicalBytes": 1189888,
- "fanoutMean": 5.1875,
- "recvTokensMax": 12,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 73.40800017118454,
- "p90": 92.54399687051773,
- "p95": 101.15200281143188,
- "p99": 184.28799510002136
- },
- "combine": {
- "p50": 68.28799843788147,
- "p90": 82.40000158548355,
- "p95": 88.03199976682663,
- "p99": 100.44799745082855
- },
- "roundtrip": {
- "p50": 124.38400089740753,
- "p90": 158.59200060367584,
- "p95": 172.2240000963211,
- "p99": 259.42400097846985
- },
- "isolatedSum": {
- "p50": 141.695998609066,
- "p90": 174.94399845600128,
- "p95": 189.18400257825851,
- "p99": 284.7359925508499
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2408448,
- "combineLogicalBytes": 2408448,
- "fanoutMean": 5.25,
- "recvTokensMax": 23,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 74.5600014925003,
- "p90": 100.12800246477127,
- "p95": 106.78400099277496,
- "p99": 138.11199367046356
- },
- "combine": {
- "p50": 69.08799707889557,
- "p90": 81.28000050783157,
- "p95": 86.81599795818329,
- "p99": 96.67199850082397
- },
- "roundtrip": {
- "p50": 123.23199957609177,
- "p90": 151.58399939537048,
- "p95": 159.87199544906616,
- "p99": 174.6560037136078
- },
- "isolatedSum": {
- "p50": 143.64799857139587,
- "p90": 181.40800297260284,
- "p95": 193.59999895095825,
- "p99": 234.78399217128754
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4859904,
- "combineLogicalBytes": 4859904,
- "fanoutMean": 5.296875,
- "recvTokensMax": 47,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 74.65600222349167,
- "p90": 99.32799637317657,
- "p95": 105.56799918413162,
- "p99": 127.20000743865967
- },
- "combine": {
- "p50": 69.88800317049026,
- "p90": 83.10399949550629,
- "p95": 88.639996945858,
- "p99": 99.35999661684036
- },
- "roundtrip": {
- "p50": 124.9919980764389,
- "p90": 151.48800611495972,
- "p95": 159.5200002193451,
- "p99": 197.88800179958344
- },
- "isolatedSum": {
- "p50": 144.54400539398193,
- "p90": 182.43199586868286,
- "p95": 194.20799612998962,
- "p99": 226.56000405550003
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9605120,
- "combineLogicalBytes": 9605120,
- "fanoutMean": 5.234375,
- "recvTokensMax": 93,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 80.35200089216232,
- "p90": 101.02400183677673,
- "p95": 105.6319996714592,
- "p99": 116.7680025100708
- },
- "combine": {
- "p50": 76.80000364780426,
- "p90": 88.86399865150452,
- "p95": 94.17600184679031,
- "p99": 101.56799852848053
- },
- "roundtrip": {
- "p50": 135.04000008106232,
- "p90": 155.29599785804749,
- "p95": 165.50399363040924,
- "p99": 190.43199717998505
- },
- "isolatedSum": {
- "p50": 157.15200453996658,
- "p90": 189.88800048828125,
- "p95": 199.8080015182495,
- "p99": 218.33600103855133
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19367936,
- "combineLogicalBytes": 19367936,
- "fanoutMean": 5.27734375,
- "recvTokensMax": 182,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 97.120001912117,
- "p90": 111.00800335407257,
- "p95": 117.11999773979187,
- "p99": 134.39999520778656
- },
- "combine": {
- "p50": 87.39200234413147,
- "p90": 99.32799637317657,
- "p95": 105.6319996714592,
- "p99": 121.18399888277054
- },
- "roundtrip": {
- "p50": 159.2320054769516,
- "p90": 177.2480010986328,
- "p95": 184.28799510002136,
- "p99": 207.71199464797974
- },
- "isolatedSum": {
- "p50": 184.51200425624847,
- "p90": 210.33599972724915,
- "p95": 222.75199741125107,
- "p99": 255.5839940905571
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38535168,
- "combineLogicalBytes": 38535168,
- "fanoutMean": 5.25,
- "recvTokensMax": 358,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 115.23199826478958,
- "p90": 132.4159950017929,
- "p95": 140.47999680042267,
- "p99": 171.64799571037292
- },
- "combine": {
- "p50": 102.84800082445145,
- "p90": 114.07999694347382,
- "p95": 119.1679984331131,
- "p99": 129.60000336170197
- },
- "roundtrip": {
- "p50": 195.90400159358978,
- "p90": 210.11200547218323,
- "p95": 217.15199947357178,
- "p99": 243.74400079250336
- },
- "isolatedSum": {
- "p50": 218.07999908924103,
- "p90": 246.49599194526672,
- "p95": 259.64799523353577,
- "p99": 301.2479990720749
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 76869632,
- "combineLogicalBytes": 76869632,
- "fanoutMean": 5.236328125,
- "recvTokensMax": 688,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-49529f9d",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c",
- "colorKey": "h200_f2b19f62",
- "comparisonKey": "cc27e02aea0a210a",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:04:04.313162+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_11",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · zipf-moderate",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "zipf-moderate",
- "routingLabel": "zipf-moderate",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "14ded8461f2636c",
- "workloadId": "set:8:120a8dc1dba92ca9",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272072315",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272072315",
- "createdAt": "2026-06-27T00:04:04.313162+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 72.95999675989151,
- "p90": 100.73599964380264,
- "p95": 110.88000237941742,
- "p99": 152.99199521541595
- },
- "combine": {
- "p50": 65.2799978852272,
- "p90": 80.9599980711937,
- "p95": 85.28000116348267,
- "p99": 102.1760031580925
- },
- "roundtrip": {
- "p50": 121.08799815177917,
- "p90": 155.20000457763672,
- "p95": 166.27199947834015,
- "p99": 225.11999309062958
- },
- "isolatedSum": {
- "p50": 138.2399946451187,
- "p90": 181.69599771499634,
- "p95": 196.16000354290009,
- "p99": 255.16799837350845
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 444416,
- "combineLogicalBytes": 444416,
- "fanoutMean": 3.875,
- "recvTokensMax": 8,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 73.2479989528656,
- "p90": 105.47199845314026,
- "p95": 114.84800279140472,
- "p99": 135.74400544166565
- },
- "combine": {
- "p50": 67.61600077152252,
- "p90": 79.83999699354172,
- "p95": 83.5840031504631,
- "p99": 92.99200028181076
- },
- "roundtrip": {
- "p50": 119.64800208806992,
- "p90": 145.56799829006195,
- "p95": 150.91200172901154,
- "p99": 165.18400609493256
- },
- "isolatedSum": {
- "p50": 140.86399972438812,
- "p90": 185.31199544668198,
- "p95": 198.43200594186783,
- "p99": 228.7360057234764
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 845824,
- "combineLogicalBytes": 845824,
- "fanoutMean": 3.6875,
- "recvTokensMax": 16,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 73.79200309515,
- "p90": 102.88000106811523,
- "p95": 112.0000034570694,
- "p99": 131.8719983100891
- },
- "combine": {
- "p50": 67.80800223350525,
- "p90": 78.8159966468811,
- "p95": 83.29600095748901,
- "p99": 102.08000242710114
- },
- "roundtrip": {
- "p50": 120.60800194740295,
- "p90": 144.44799721240997,
- "p95": 152.67199277877808,
- "p99": 166.59200191497803
- },
- "isolatedSum": {
- "p50": 141.60000532865524,
- "p90": 181.69599771499634,
- "p95": 195.2960044145584,
- "p99": 233.95200073719025
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1691648,
- "combineLogicalBytes": 1691648,
- "fanoutMean": 3.6875,
- "recvTokensMax": 32,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 73.79200309515,
- "p90": 97.75999933481216,
- "p95": 105.92000186443329,
- "p99": 117.69600212574005
- },
- "combine": {
- "p50": 68.06399673223495,
- "p90": 81.56800270080566,
- "p95": 87.39200234413147,
- "p99": 104.44799810647964
- },
- "roundtrip": {
- "p50": 121.31199985742569,
- "p90": 153.98399531841278,
- "p95": 162.78399527072906,
- "p99": 199.5519995689392
- },
- "isolatedSum": {
- "p50": 141.85599982738495,
- "p90": 179.32800203561783,
- "p95": 193.31200420856476,
- "p99": 222.1440002322197
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3354624,
- "combineLogicalBytes": 3354624,
- "fanoutMean": 3.65625,
- "recvTokensMax": 64,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 73.44000041484833,
- "p90": 97.69599884748459,
- "p95": 103.74400019645691,
- "p99": 117.15199798345566
- },
- "combine": {
- "p50": 69.98399645090103,
- "p90": 83.16799998283386,
- "p95": 88.51200342178345,
- "p99": 98.59199821949005
- },
- "roundtrip": {
- "p50": 125.91999769210815,
- "p90": 152.0320028066635,
- "p95": 167.7439957857132,
- "p99": 200.54399967193604
- },
- "isolatedSum": {
- "p50": 143.42399686574936,
- "p90": 180.86399883031845,
- "p95": 192.25600361824036,
- "p99": 215.7439962029457
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 6537216,
- "combineLogicalBytes": 6537216,
- "fanoutMean": 3.5625,
- "recvTokensMax": 127,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 85.4400023818016,
- "p90": 105.05600273609161,
- "p95": 111.93600296974182,
- "p99": 135.48800349235535
- },
- "combine": {
- "p50": 76.12799853086472,
- "p90": 88.60799670219421,
- "p95": 92.41600334644318,
- "p99": 124.06399846076965
- },
- "roundtrip": {
- "p50": 136.4479959011078,
- "p90": 159.04000401496887,
- "p95": 166.81599617004395,
- "p99": 204.12799715995789
- },
- "isolatedSum": {
- "p50": 161.56800091266632,
- "p90": 193.66399943828583,
- "p95": 204.352006316185,
- "p99": 259.552001953125
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 12859392,
- "combineLogicalBytes": 12859392,
- "fanoutMean": 3.50390625,
- "recvTokensMax": 255,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 99.55199807882309,
- "p90": 115.84000289440155,
- "p95": 124.79999661445618,
- "p99": 159.5200002193451
- },
- "combine": {
- "p50": 86.65599673986435,
- "p90": 98.68799895048141,
- "p95": 104.032002389431,
- "p99": 120.28799951076508
- },
- "roundtrip": {
- "p50": 162.23999857902527,
- "p90": 177.7919977903366,
- "p95": 186.62400543689728,
- "p99": 207.58399367332458
- },
- "isolatedSum": {
- "p50": 186.20799481868744,
- "p90": 214.52800184488297,
- "p95": 228.83199900388718,
- "p99": 279.80799973011017
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 25145344,
- "combineLogicalBytes": 25145344,
- "fanoutMean": 3.42578125,
- "recvTokensMax": 510,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 123.16799908876419,
- "p90": 138.08000087738037,
- "p95": 149.59999918937683,
- "p99": 160.35200655460358
- },
- "combine": {
- "p50": 112.47999966144562,
- "p90": 122.36800044775009,
- "p95": 127.45599448680878,
- "p99": 136.9280070066452
- },
- "roundtrip": {
- "p50": 213.4079933166504,
- "p90": 239.16800320148468,
- "p95": 253.6959946155548,
- "p99": 450.3040015697479
- },
- "isolatedSum": {
- "p50": 235.6479987502098,
- "p90": 260.44800132513046,
- "p95": 277.0559936761856,
- "p99": 297.2800135612488
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 49946624,
- "combineLogicalBytes": 49946624,
- "fanoutMean": 3.40234375,
- "recvTokensMax": 1022,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-904f847b",
- "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836",
- "colorKey": "h200_bac4102c",
- "comparisonKey": "6234055b9069f2f2",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:04:21.213602+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_0",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · zipf-moderate+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "zipf-moderate",
- "routingLabel": "zipf-moderate+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "a8f501af7004836",
- "workloadId": "set:8:120a8dc1dba92ca9",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 4.927734375,
- "eplbImbalanceAfter": 1.0006103515625,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272075655",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272075655",
- "createdAt": "2026-06-27T00:04:21.213602+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 71.99999690055847,
- "p90": 99.35999661684036,
- "p95": 108.47999900579453,
- "p99": 130.8480054140091
- },
- "combine": {
- "p50": 67.1359971165657,
- "p90": 80.64000308513641,
- "p95": 84.44800227880478,
- "p99": 108.12799632549286
- },
- "roundtrip": {
- "p50": 121.08799815177917,
- "p90": 149.4079977273941,
- "p95": 161.24799847602844,
- "p99": 199.8080015182495
- },
- "isolatedSum": {
- "p50": 139.13599401712418,
- "p90": 179.99999970197678,
- "p95": 192.9280012845993,
- "p99": 238.97600173950195
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 616448,
- "fanoutMean": 5.375,
- "recvTokensMax": 7,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 73.98399710655212,
- "p90": 101.79200023412704,
- "p95": 111.7120012640953,
- "p99": 146.33600413799286
- },
- "combine": {
- "p50": 68.7360018491745,
- "p90": 82.04799890518188,
- "p95": 88.73599767684937,
- "p99": 105.21599650382996
- },
- "roundtrip": {
- "p50": 124.41600114107132,
- "p90": 160.0320041179657,
- "p95": 172.86400496959686,
- "p99": 196.44799828529358
- },
- "isolatedSum": {
- "p50": 142.71999895572662,
- "p90": 183.83999913930893,
- "p95": 200.44799894094467,
- "p99": 251.55200064182281
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1204224,
- "combineLogicalBytes": 1204224,
- "fanoutMean": 5.25,
- "recvTokensMax": 14,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 76.22399926185608,
- "p90": 108.76800119876862,
- "p95": 123.1359988451004,
- "p99": 148.8640010356903
- },
- "combine": {
- "p50": 68.7360018491745,
- "p90": 82.14399963617325,
- "p95": 88.54400366544724,
- "p99": 105.02400249242783
- },
- "roundtrip": {
- "p50": 124.25599992275238,
- "p90": 160.0320041179657,
- "p95": 170.01600563526154,
- "p99": 244.89599466323853
- },
- "isolatedSum": {
- "p50": 144.96000111103058,
- "p90": 190.91200083494186,
- "p95": 211.68000251054764,
- "p99": 253.88800352811813
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2394112,
- "combineLogicalBytes": 2394112,
- "fanoutMean": 5.21875,
- "recvTokensMax": 24,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 74.62400197982788,
- "p90": 101.21600329875946,
- "p95": 111.10399663448334,
- "p99": 145.47200500965118
- },
- "combine": {
- "p50": 69.34399902820587,
- "p90": 84.70399677753448,
- "p95": 89.50400352478027,
- "p99": 104.44799810647964
- },
- "roundtrip": {
- "p50": 125.37600100040436,
- "p90": 159.4880074262619,
- "p95": 170.1119989156723,
- "p99": 203.23200523853302
- },
- "isolatedSum": {
- "p50": 143.96800100803375,
- "p90": 185.92000007629395,
- "p95": 200.6080001592636,
- "p99": 249.92000311613083
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4630528,
- "combineLogicalBytes": 4630528,
- "fanoutMean": 5.046875,
- "recvTokensMax": 45,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 72.73600250482559,
- "p90": 97.75999933481216,
- "p95": 108.03200304508209,
- "p99": 141.9840008020401
- },
- "combine": {
- "p50": 70.36799937486649,
- "p90": 88.28800171613693,
- "p95": 94.68799829483032,
- "p99": 104.54399883747101
- },
- "roundtrip": {
- "p50": 127.00800597667694,
- "p90": 156.12800419330597,
- "p95": 166.9439971446991,
- "p99": 198.33600521087646
- },
- "isolatedSum": {
- "p50": 143.10400187969208,
- "p90": 186.0480010509491,
- "p95": 202.72000133991241,
- "p99": 246.5279996395111
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9447424,
- "combineLogicalBytes": 9447424,
- "fanoutMean": 5.1484375,
- "recvTokensMax": 91,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 81.98399841785431,
- "p90": 106.65600001811981,
- "p95": 116.22399836778641,
- "p99": 165.69599509239197
- },
- "combine": {
- "p50": 76.9599974155426,
- "p90": 90.87999910116196,
- "p95": 97.120001912117,
- "p99": 118.23999881744385
- },
- "roundtrip": {
- "p50": 135.74400544166565,
- "p90": 164.48000073432922,
- "p95": 176.70400440692902,
- "p99": 220.22399306297302
- },
- "isolatedSum": {
- "p50": 158.9439958333969,
- "p90": 197.53599911928177,
- "p95": 213.3440002799034,
- "p99": 283.9359939098358
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19023872,
- "combineLogicalBytes": 19023872,
- "fanoutMean": 5.18359375,
- "recvTokensMax": 178,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 97.21600264310837,
- "p90": 125.50400197505951,
- "p95": 140.99200069904327,
- "p99": 185.85599958896637
- },
- "combine": {
- "p50": 87.77599781751633,
- "p90": 105.53599894046783,
- "p95": 113.0559965968132,
- "p99": 125.63200294971466
- },
- "roundtrip": {
- "p50": 159.7760021686554,
- "p90": 186.65599822998047,
- "p95": 201.53599977493286,
- "p99": 221.69600427150726
- },
- "isolatedSum": {
- "p50": 184.9920004606247,
- "p90": 231.04000091552734,
- "p95": 254.04799729585648,
- "p99": 311.48800253868103
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38148096,
- "combineLogicalBytes": 38148096,
- "fanoutMean": 5.197265625,
- "recvTokensMax": 350,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 118.40000003576279,
- "p90": 141.7279988527298,
- "p95": 151.296004652977,
- "p99": 174.84800517559052
- },
- "combine": {
- "p50": 103.74400019645691,
- "p90": 121.21599912643433,
- "p95": 128.60800325870514,
- "p99": 147.13600277900696
- },
- "roundtrip": {
- "p50": 198.08000326156616,
- "p90": 219.7760045528412,
- "p95": 227.55199670791626,
- "p99": 265.3760015964508
- },
- "isolatedSum": {
- "p50": 222.1440002322197,
- "p90": 262.9439979791641,
- "p95": 279.90400791168213,
- "p99": 321.9840079545975
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 76955648,
- "combineLogicalBytes": 76955648,
- "fanoutMean": 5.2421875,
- "recvTokensMax": 687,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-06bd64b9",
- "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836",
- "colorKey": "h200_1eda221e",
- "comparisonKey": "00e2c45e1159b581",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:03:16.896756+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_3",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · zipf+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "zipf",
- "routingLabel": "zipf+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "a8f501af7004836",
- "workloadId": "set:8:f5576e2b712d38c3",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 4.927734375,
- "eplbImbalanceAfter": 1.0006103515625,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272045914",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272045914",
- "createdAt": "2026-06-27T00:03:16.896756+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 72.15999811887741,
- "p90": 99.0080013871193,
- "p95": 105.56799918413162,
- "p99": 131.80799782276154
- },
- "combine": {
- "p50": 68.70400160551071,
- "p90": 83.23200047016144,
- "p95": 88.8959988951683,
- "p99": 117.40799993276596
- },
- "roundtrip": {
- "p50": 121.60000205039978,
- "p90": 151.8079936504364,
- "p95": 162.88000345230103,
- "p99": 197.63199985027313
- },
- "isolatedSum": {
- "p50": 140.86399972438812,
- "p90": 182.24000185728073,
- "p95": 194.46399807929993,
- "p99": 249.2159977555275
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 616448,
- "fanoutMean": 5.375,
- "recvTokensMax": 7,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 73.7600028514862,
- "p90": 99.96800124645233,
- "p95": 106.97600245475769,
- "p99": 125.63200294971466
- },
- "combine": {
- "p50": 67.58400052785873,
- "p90": 79.52000200748444,
- "p95": 84.35200154781342,
- "p99": 95.61599791049957
- },
- "roundtrip": {
- "p50": 121.95199728012085,
- "p90": 150.52799880504608,
- "p95": 158.9760035276413,
- "p99": 188.51199746131897
- },
- "isolatedSum": {
- "p50": 141.34400337934494,
- "p90": 179.48800325393677,
- "p95": 191.3280040025711,
- "p99": 221.24800086021423
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1204224,
- "combineLogicalBytes": 1204224,
- "fanoutMean": 5.25,
- "recvTokensMax": 14,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 74.23999905586243,
- "p90": 98.36799651384354,
- "p95": 105.8880016207695,
- "p99": 117.60000139474869
- },
- "combine": {
- "p50": 68.57600063085556,
- "p90": 81.82399719953537,
- "p95": 86.496002972126,
- "p99": 94.62399780750275
- },
- "roundtrip": {
- "p50": 123.19999933242798,
- "p90": 152.92799472808838,
- "p95": 164.12800550460815,
- "p99": 221.98399901390076
- },
- "isolatedSum": {
- "p50": 142.815999686718,
- "p90": 180.1919937133789,
- "p95": 192.3840045928955,
- "p99": 212.22399920225143
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2394112,
- "combineLogicalBytes": 2394112,
- "fanoutMean": 5.21875,
- "recvTokensMax": 24,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 76.06399804353714,
- "p90": 117.8240031003952,
- "p95": 132.03200697898865,
- "p99": 183.45600366592407
- },
- "combine": {
- "p50": 69.37599927186966,
- "p90": 85.02399921417236,
- "p95": 89.66399729251862,
- "p99": 100.3199964761734
- },
- "roundtrip": {
- "p50": 123.16799908876419,
- "p90": 152.8639942407608,
- "p95": 160.96000373363495,
- "p99": 184.1920018196106
- },
- "isolatedSum": {
- "p50": 145.4399973154068,
- "p90": 202.84800231456757,
- "p95": 221.69600427150726,
- "p99": 283.7760001420975
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4630528,
- "combineLogicalBytes": 4630528,
- "fanoutMean": 5.046875,
- "recvTokensMax": 45,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 76.67200267314911,
- "p90": 107.10400342941284,
- "p95": 114.20799791812897,
- "p99": 128.9599984884262
- },
- "combine": {
- "p50": 72.25599884986877,
- "p90": 88.76799792051315,
- "p95": 96.00000083446503,
- "p99": 114.75200206041336
- },
- "roundtrip": {
- "p50": 128.31999361515045,
- "p90": 158.6879938840866,
- "p95": 168.89600455760956,
- "p99": 192.89599359035492
- },
- "isolatedSum": {
- "p50": 148.92800152301788,
- "p90": 195.872001349926,
- "p95": 210.207998752594,
- "p99": 243.71200054883957
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9447424,
- "combineLogicalBytes": 9447424,
- "fanoutMean": 5.1484375,
- "recvTokensMax": 91,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 83.39200168848038,
- "p90": 103.80800068378448,
- "p95": 109.43999886512756,
- "p99": 126.71999633312225
- },
- "combine": {
- "p50": 77.18399912118912,
- "p90": 89.79199826717377,
- "p95": 95.10400146245956,
- "p99": 105.98400235176086
- },
- "roundtrip": {
- "p50": 134.783998131752,
- "p90": 157.79200196266174,
- "p95": 167.13599860668182,
- "p99": 210.94399690628052
- },
- "isolatedSum": {
- "p50": 160.5760008096695,
- "p90": 193.59999895095825,
- "p95": 204.54400032758713,
- "p99": 232.70399868488312
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19023872,
- "combineLogicalBytes": 19023872,
- "fanoutMean": 5.18359375,
- "recvTokensMax": 178,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 93.56799721717834,
- "p90": 113.63200098276138,
- "p95": 120.2239990234375,
- "p99": 133.4719955921173
- },
- "combine": {
- "p50": 86.40000224113464,
- "p90": 101.72799974679947,
- "p95": 105.6319996714592,
- "p99": 116.48000031709671
- },
- "roundtrip": {
- "p50": 157.9200029373169,
- "p90": 181.34400248527527,
- "p95": 187.42400407791138,
- "p99": 211.87199652194977
- },
- "isolatedSum": {
- "p50": 179.967999458313,
- "p90": 215.36000072956085,
- "p95": 225.8559986948967,
- "p99": 249.95199590921402
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38148096,
- "combineLogicalBytes": 38148096,
- "fanoutMean": 5.197265625,
- "recvTokensMax": 350,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 117.15199798345566,
- "p90": 137.1839940547943,
- "p95": 142.33599603176117,
- "p99": 165.79200327396393
- },
- "combine": {
- "p50": 106.84800148010254,
- "p90": 119.32799965143204,
- "p95": 122.81599640846252,
- "p99": 133.53599607944489
- },
- "roundtrip": {
- "p50": 197.56799936294556,
- "p90": 213.85599672794342,
- "p95": 221.3120013475418,
- "p99": 245.37600576877594
- },
- "isolatedSum": {
- "p50": 223.9999994635582,
- "p90": 256.51199370622635,
- "p95": 265.1519924402237,
- "p99": 299.3279993534088
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 76955648,
- "combineLogicalBytes": 76955648,
- "fanoutMean": 5.2421875,
- "recvTokensMax": 687,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-0d6ef23b",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176",
- "colorKey": "h200_c851a534",
- "comparisonKey": "6b4f4d7f65293019",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:29:45.312905+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_2",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 (norm)",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1818,
- "configuredUnits": 24,
- "deviceUnits": 132,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28254392935",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254392935",
- "createdAt": "2026-06-26T17:29:45.312905+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 74.11199808120728,
- "p90": 94.11200135946274,
- "p95": 104.35199737548828,
- "p99": 138.0160003900528
- },
- "combine": {
- "p50": 68.41599941253662,
- "p90": 78.72000336647034,
- "p95": 83.48800241947174,
- "p99": 105.72800040245056
- },
- "roundtrip": {
- "p50": 124.4800016283989,
- "p90": 144.31999623775482,
- "p95": 156.3200056552887,
- "p99": 193.53599846363068
- },
- "isolatedSum": {
- "p50": 142.5279974937439,
- "p90": 172.83200472593307,
- "p95": 187.83999979496002,
- "p99": 243.74400079250336
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 74.33599978685379,
- "p90": 99.42399710416794,
- "p95": 109.66400057077408,
- "p99": 131.71200454235077
- },
- "combine": {
- "p50": 69.85600292682648,
- "p90": 83.00799876451492,
- "p95": 90.40000289678574,
- "p99": 114.33599889278412
- },
- "roundtrip": {
- "p50": 122.43200093507767,
- "p90": 144.6080058813095,
- "p95": 154.62400019168854,
- "p99": 173.69599640369415
- },
- "isolatedSum": {
- "p50": 144.19200271368027,
- "p90": 182.43199586868286,
- "p95": 200.06400346755981,
- "p99": 246.0480034351349
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 74.97599720954895,
- "p90": 95.29600292444229,
- "p95": 104.12800312042236,
- "p99": 139.74399864673615
- },
- "combine": {
- "p50": 69.40799951553345,
- "p90": 81.63200318813324,
- "p95": 88.22400122880936,
- "p99": 119.4240003824234
- },
- "roundtrip": {
- "p50": 123.74400347471237,
- "p90": 150.36800503730774,
- "p95": 160.3199988603592,
- "p99": 204.8960030078888
- },
- "isolatedSum": {
- "p50": 144.3839967250824,
- "p90": 176.92800611257553,
- "p95": 192.35200434923172,
- "p99": 259.16799902915955
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 74.78400319814682,
- "p90": 92.25600212812424,
- "p95": 102.91200131177902,
- "p99": 123.16799908876419
- },
- "combine": {
- "p50": 70.52800059318542,
- "p90": 81.95199817419052,
- "p95": 87.48800307512283,
- "p99": 100.51199793815613
- },
- "roundtrip": {
- "p50": 124.03199821710587,
- "p90": 147.20000326633453,
- "p95": 153.9199948310852,
- "p99": 180.00000715255737
- },
- "isolatedSum": {
- "p50": 145.31200379133224,
- "p90": 174.20800030231476,
- "p95": 190.40000438690186,
- "p99": 223.67999702692032
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 73.18399846553802,
- "p90": 92.83199906349182,
- "p95": 103.61599922180176,
- "p99": 195.93599438667297
- },
- "combine": {
- "p50": 71.32799923419952,
- "p90": 86.33600175380707,
- "p95": 92.03200042247772,
- "p99": 120.80000340938568
- },
- "roundtrip": {
- "p50": 129.72800433635712,
- "p90": 161.31199896335602,
- "p95": 172.86400496959686,
- "p99": 215.10399878025055
- },
- "isolatedSum": {
- "p50": 144.51199769973755,
- "p90": 179.1680008172989,
- "p95": 195.64799964427948,
- "p99": 316.73599779605865
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 82.2720006108284,
- "p90": 100.80000013113022,
- "p95": 108.92800241708755,
- "p99": 134.88000631332397
- },
- "combine": {
- "p50": 76.03199779987335,
- "p90": 89.40800279378891,
- "p95": 94.97600048780441,
- "p99": 117.95199662446976
- },
- "roundtrip": {
- "p50": 130.8480054140091,
- "p90": 154.33600544929504,
- "p95": 164.73600268363953,
- "p99": 204.0639966726303
- },
- "isolatedSum": {
- "p50": 158.30399841070175,
- "p90": 190.20800292491913,
- "p95": 203.90400290489197,
- "p99": 252.83200293779373
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 91.32800251245499,
- "p90": 110.04800349473953,
- "p95": 116.86400324106216,
- "p99": 146.84799313545227
- },
- "combine": {
- "p50": 87.2960016131401,
- "p90": 98.36799651384354,
- "p95": 104.70400005578995,
- "p99": 124.92799758911133
- },
- "roundtrip": {
- "p50": 156.031996011734,
- "p90": 173.24799299240112,
- "p95": 180.38399517536163,
- "p99": 215.39199352264404
- },
- "isolatedSum": {
- "p50": 178.6240041255951,
- "p90": 208.41600000858307,
- "p95": 221.5680032968521,
- "p99": 271.7759907245636
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 116.03199690580368,
- "p90": 129.7599971294403,
- "p95": 136.57599687576294,
- "p99": 149.24800395965576
- },
- "combine": {
- "p50": 103.42399775981903,
- "p90": 116.54400080442429,
- "p95": 123.3920007944107,
- "p99": 141.95199310779572
- },
- "roundtrip": {
- "p50": 192.54399836063385,
- "p90": 208.8959962129593,
- "p95": 215.64799547195435,
- "p99": 228.7359982728958
- },
- "isolatedSum": {
- "p50": 219.4559946656227,
- "p90": 246.3039979338646,
- "p95": 259.96799767017365,
- "p99": 291.1999970674515
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-0f126172",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|normalized|0.18|ffa946582edb500",
- "colorKey": "h200_a1e795ec",
- "comparisonKey": "467cf4a4daff1cff",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:30:47.472039+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_12",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 (norm) · balanced",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "balanced",
- "routingLabel": "balanced",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1818,
- "configuredUnits": 24,
- "deviceUnits": 132,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ffa946582edb500",
- "workloadId": "set:8:7af12818400d6348",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28254443915",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254443915",
- "createdAt": "2026-06-26T17:30:47.472039+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 73.95199686288834,
- "p90": 88.0960002541542,
- "p95": 97.24800288677216,
- "p99": 108.25599730014801
- },
- "combine": {
- "p50": 70.91200351715088,
- "p90": 81.60000294446945,
- "p95": 87.26400136947632,
- "p99": 97.28000313043594
- },
- "roundtrip": {
- "p50": 125.2480000257492,
- "p90": 149.63200688362122,
- "p95": 157.85600244998932,
- "p99": 175.04000663757324
- },
- "isolatedSum": {
- "p50": 144.86400038003922,
- "p90": 169.69600319862366,
- "p95": 184.51200425624847,
- "p99": 205.53600043058395
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 917504,
- "combineLogicalBytes": 917504,
- "fanoutMean": 8,
- "recvTokensMax": 8,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 74.23999905586243,
- "p90": 91.00800007581711,
- "p95": 98.88000041246414,
- "p99": 130.23999333381653
- },
- "combine": {
- "p50": 70.52800059318542,
- "p90": 79.71200346946716,
- "p95": 85.50400286912918,
- "p99": 106.46399855613708
- },
- "roundtrip": {
- "p50": 123.6800029873848,
- "p90": 142.07999408245087,
- "p95": 152.99199521541595,
- "p99": 184.35199558734894
- },
- "isolatedSum": {
- "p50": 144.76799964904785,
- "p90": 170.72000354528427,
- "p95": 184.38400328159332,
- "p99": 236.7039918899536
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1835008,
- "combineLogicalBytes": 1835008,
- "fanoutMean": 8,
- "recvTokensMax": 16,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 75.03999769687653,
- "p90": 97.9200005531311,
- "p95": 108.47999900579453,
- "p99": 140.09599387645721
- },
- "combine": {
- "p50": 70.11199742555618,
- "p90": 81.34400099515915,
- "p95": 86.496002972126,
- "p99": 99.29600358009338
- },
- "roundtrip": {
- "p50": 125.69600343704224,
- "p90": 151.36000514030457,
- "p95": 159.55199301242828,
- "p99": 178.3359944820404
- },
- "isolatedSum": {
- "p50": 145.1519951224327,
- "p90": 179.26400154829025,
- "p95": 194.97600197792053,
- "p99": 239.3919974565506
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3670016,
- "combineLogicalBytes": 3670016,
- "fanoutMean": 8,
- "recvTokensMax": 32,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 73.56800138950348,
- "p90": 94.17600184679031,
- "p95": 102.62399911880493,
- "p99": 126.14400684833527
- },
- "combine": {
- "p50": 70.72000205516815,
- "p90": 82.04799890518188,
- "p95": 86.43200248479843,
- "p99": 96.47999703884125
- },
- "roundtrip": {
- "p50": 125.69600343704224,
- "p90": 148.0640023946762,
- "p95": 156.76799416542053,
- "p99": 182.72000551223755
- },
- "isolatedSum": {
- "p50": 144.28800344467163,
- "p90": 176.2240007519722,
- "p95": 189.05600160360336,
- "p99": 222.6240038871765
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 7340032,
- "combineLogicalBytes": 7340032,
- "fanoutMean": 8,
- "recvTokensMax": 64,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 74.46400076150894,
- "p90": 90.71999788284302,
- "p95": 96.44799679517746,
- "p99": 112.19199746847153
- },
- "combine": {
- "p50": 76.03199779987335,
- "p90": 84.70399677753448,
- "p95": 91.16800129413605,
- "p99": 104.54399883747101
- },
- "roundtrip": {
- "p50": 129.60000336170197,
- "p90": 153.6960005760193,
- "p95": 161.3440066576004,
- "p99": 196.28800451755524
- },
- "isolatedSum": {
- "p50": 150.4959985613823,
- "p90": 175.4239946603775,
- "p95": 187.6159980893135,
- "p99": 216.73599630594254
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 14680064,
- "combineLogicalBytes": 14680064,
- "fanoutMean": 8,
- "recvTokensMax": 128,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 83.20000022649765,
- "p90": 100.12800246477127,
- "p95": 107.45599865913391,
- "p99": 122.3360002040863
- },
- "combine": {
- "p50": 80.79999685287476,
- "p90": 89.88799899816513,
- "p95": 95.36000341176987,
- "p99": 100.54399818181992
- },
- "roundtrip": {
- "p50": 142.17600226402283,
- "p90": 155.45600652694702,
- "p95": 165.3439998626709,
- "p99": 182.0800006389618
- },
- "isolatedSum": {
- "p50": 163.9999970793724,
- "p90": 190.0160014629364,
- "p95": 202.81600207090378,
- "p99": 222.87999838590622
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 29360128,
- "combineLogicalBytes": 29360128,
- "fanoutMean": 8,
- "recvTokensMax": 256,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 107.61599987745285,
- "p90": 121.0239976644516,
- "p95": 127.07200646400452,
- "p99": 148.73600006103516
- },
- "combine": {
- "p50": 95.87199985980988,
- "p90": 105.3759977221489,
- "p95": 112.60800063610077,
- "p99": 123.29600006341934
- },
- "roundtrip": {
- "p50": 176.67199671268463,
- "p90": 191.80800020694733,
- "p95": 203.5840004682541,
- "p99": 225.98400712013245
- },
- "isolatedSum": {
- "p50": 203.48799973726273,
- "p90": 226.3999953866005,
- "p95": 239.68000710010529,
- "p99": 272.0320001244545
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 58720256,
- "combineLogicalBytes": 58720256,
- "fanoutMean": 8,
- "recvTokensMax": 512,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 133.66399705410004,
- "p90": 146.04799449443817,
- "p95": 152.41600573062897,
- "p99": 162.56000101566315
- },
- "combine": {
- "p50": 118.52800101041794,
- "p90": 127.68000364303589,
- "p95": 130.91200590133667,
- "p99": 144.67200636863708
- },
- "roundtrip": {
- "p50": 225.92000663280487,
- "p90": 240.48000574111938,
- "p95": 251.3279914855957,
- "p99": 700.223982334137
- },
- "isolatedSum": {
- "p50": 252.19199806451797,
- "p90": 273.72799813747406,
- "p95": 283.32801163196564,
- "p99": 307.23200738430023
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 117440512,
- "combineLogicalBytes": 117440512,
- "fanoutMean": 8,
- "recvTokensMax": 1024,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-8e3ecfeb",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|normalized|0.18|14ded8461f2636c",
- "colorKey": "h200_0a93a01f",
- "comparisonKey": "c7e35a057338b2fa",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:31:04.173894+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_6",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 (norm) · zipf",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "zipf",
- "routingLabel": "zipf",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1818,
- "configuredUnits": 24,
- "deviceUnits": 132,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "14ded8461f2636c",
- "workloadId": "set:8:f5576e2b712d38c3",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28254452252",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254452252",
- "createdAt": "2026-06-26T17:31:04.173894+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 74.27199929952621,
- "p90": 108.83200168609619,
- "p95": 118.49600076675415,
- "p99": 155.5200070142746
- },
- "combine": {
- "p50": 68.38399916887283,
- "p90": 84.03199911117554,
- "p95": 90.20800143480301,
- "p99": 114.88000303506851
- },
- "roundtrip": {
- "p50": 123.07199835777283,
- "p90": 153.08800339698792,
- "p95": 165.8560037612915,
- "p99": 205.9199959039688
- },
- "isolatedSum": {
- "p50": 142.65599846839905,
- "p90": 192.86400079727173,
- "p95": 208.70400220155716,
- "p99": 270.4000100493431
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 444416,
- "combineLogicalBytes": 444416,
- "fanoutMean": 3.875,
- "recvTokensMax": 8,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 73.95199686288834,
- "p90": 97.82399982213974,
- "p95": 106.6880002617836,
- "p99": 132.9919993877411
- },
- "combine": {
- "p50": 68.64000111818314,
- "p90": 80.51200211048126,
- "p95": 85.37600189447403,
- "p99": 98.49599748849869
- },
- "roundtrip": {
- "p50": 123.36000055074692,
- "p90": 150.176003575325,
- "p95": 158.4639996290207,
- "p99": 181.63199722766876
- },
- "isolatedSum": {
- "p50": 142.59199798107147,
- "p90": 178.336001932621,
- "p95": 192.06400215625763,
- "p99": 231.48799687623978
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 845824,
- "combineLogicalBytes": 845824,
- "fanoutMean": 3.6875,
- "recvTokensMax": 16,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 73.5040009021759,
- "p90": 95.551997423172,
- "p95": 104.86400127410889,
- "p99": 123.4240010380745
- },
- "combine": {
- "p50": 67.80800223350525,
- "p90": 78.46400141716003,
- "p95": 84.95999872684479,
- "p99": 125.2799928188324
- },
- "roundtrip": {
- "p50": 122.78400361537933,
- "p90": 150.65599977970123,
- "p95": 159.07199680805206,
- "p99": 200.51200687885284
- },
- "isolatedSum": {
- "p50": 141.31200313568115,
- "p90": 174.01599884033203,
- "p95": 189.82400000095367,
- "p99": 248.7039938569069
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1691648,
- "combineLogicalBytes": 1691648,
- "fanoutMean": 3.6875,
- "recvTokensMax": 32,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 75.23199915885925,
- "p90": 103.32799702882767,
- "p95": 111.87200248241425,
- "p99": 143.26399564743042
- },
- "combine": {
- "p50": 69.60000097751617,
- "p90": 85.79199761152267,
- "p95": 91.71199798583984,
- "p99": 124.12799894809723
- },
- "roundtrip": {
- "p50": 126.36800110340118,
- "p90": 160.12799739837646,
- "p95": 167.64800250530243,
- "p99": 193.2159960269928
- },
- "isolatedSum": {
- "p50": 144.83200013637543,
- "p90": 189.11999464035034,
- "p95": 203.5840004682541,
- "p99": 267.39199459552765
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3354624,
- "combineLogicalBytes": 3354624,
- "fanoutMean": 3.65625,
- "recvTokensMax": 64,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 77.40800082683563,
- "p90": 104.63999956846237,
- "p95": 113.43999952077866,
- "p99": 144.0960019826889
- },
- "combine": {
- "p50": 70.52800059318542,
- "p90": 87.23200112581253,
- "p95": 90.94399958848953,
- "p99": 101.1200025677681
- },
- "roundtrip": {
- "p50": 127.6479959487915,
- "p90": 161.85599565505981,
- "p95": 175.7120043039322,
- "p99": 230.27199506759644
- },
- "isolatedSum": {
- "p50": 147.93600142002106,
- "p90": 191.8720006942749,
- "p95": 204.3839991092682,
- "p99": 245.216004550457
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 6537216,
- "combineLogicalBytes": 6537216,
- "fanoutMean": 3.5625,
- "recvTokensMax": 127,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 83.0719992518425,
- "p90": 109.50399935245514,
- "p95": 115.61600118875504,
- "p99": 128.1599998474121
- },
- "combine": {
- "p50": 77.34400033950806,
- "p90": 91.64799749851227,
- "p95": 95.61599791049957,
- "p99": 112.73600161075592
- },
- "roundtrip": {
- "p50": 132.60799646377563,
- "p90": 157.0879966020584,
- "p95": 165.0560051202774,
- "p99": 194.20799612998962
- },
- "isolatedSum": {
- "p50": 160.41599959135056,
- "p90": 201.1519968509674,
- "p95": 211.2319990992546,
- "p99": 240.89600145816803
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 12859392,
- "combineLogicalBytes": 12859392,
- "fanoutMean": 3.50390625,
- "recvTokensMax": 255,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 97.15200215578079,
- "p90": 111.35999858379364,
- "p95": 121.31199985742569,
- "p99": 134.8479986190796
- },
- "combine": {
- "p50": 87.5839963555336,
- "p90": 99.80800002813339,
- "p95": 104.06400263309479,
- "p99": 116.95999652147293
- },
- "roundtrip": {
- "p50": 161.9199961423874,
- "p90": 177.72799730300903,
- "p95": 184.67199802398682,
- "p99": 235.61599850654602
- },
- "isolatedSum": {
- "p50": 184.7359985113144,
- "p90": 211.16799861192703,
- "p95": 225.37600249052048,
- "p99": 251.80799514055252
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 25145344,
- "combineLogicalBytes": 25145344,
- "fanoutMean": 3.42578125,
- "recvTokensMax": 510,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 122.97599762678146,
- "p90": 147.10399508476257,
- "p95": 156.25600516796112,
- "p99": 183.07200074195862
- },
- "combine": {
- "p50": 110.49599945545197,
- "p90": 123.87199699878693,
- "p95": 129.40800189971924,
- "p99": 150.751993060112
- },
- "roundtrip": {
- "p50": 208.73600244522095,
- "p90": 225.43999552726746,
- "p95": 233.024001121521,
- "p99": 256.415992975235
- },
- "isolatedSum": {
- "p50": 233.47199708223343,
- "p90": 270.9759920835495,
- "p95": 285.66400706768036,
- "p99": 333.8239938020706
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 49946624,
- "combineLogicalBytes": 49946624,
- "fanoutMean": 3.40234375,
- "recvTokensMax": 1022,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-9efea369",
- "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|normalized|0.18|a8f501af7004836",
- "colorKey": "h200_993777bf",
- "comparisonKey": "cdec001c60a84b85",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:46:59.245966+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_6",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 (norm) · zipf+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "zipf",
- "routingLabel": "zipf+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1818,
- "configuredUnits": 24,
- "deviceUnits": 132,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "a8f501af7004836",
- "workloadId": "set:8:f5576e2b712d38c3",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 4.927734375,
- "eplbImbalanceAfter": 1.0006103515625,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28255303840",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28255303840",
- "createdAt": "2026-06-26T17:46:59.245966+00:00",
- "sha": "36d3eb6c3c7386d3220c873d305410219c5c0f17"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 72.89600372314453,
- "p90": 99.45599734783173,
- "p95": 108.73600095510483,
- "p99": 128.86400520801544
- },
- "combine": {
- "p50": 67.19999760389328,
- "p90": 78.3040001988411,
- "p95": 82.46400207281113,
- "p99": 102.65599936246872
- },
- "roundtrip": {
- "p50": 119.32799965143204,
- "p90": 147.77599275112152,
- "p95": 155.07200360298157,
- "p99": 171.03999853134155
- },
- "isolatedSum": {
- "p50": 140.0960013270378,
- "p90": 177.75999754667282,
- "p95": 191.20000302791595,
- "p99": 231.52000457048416
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 616448,
- "fanoutMean": 5.375,
- "recvTokensMax": 7,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 73.18399846553802,
- "p90": 94.27200257778168,
- "p95": 104.5759990811348,
- "p99": 122.68800288438797
- },
- "combine": {
- "p50": 68.09599697589874,
- "p90": 81.15199953317642,
- "p95": 86.17600053548813,
- "p99": 113.3119985461235
- },
- "roundtrip": {
- "p50": 120.31999975442886,
- "p90": 147.45600521564484,
- "p95": 157.82399475574493,
- "p99": 190.08000195026398
- },
- "isolatedSum": {
- "p50": 141.27999544143677,
- "p90": 175.4240021109581,
- "p95": 190.75199961662292,
- "p99": 236.00000143051147
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1204224,
- "combineLogicalBytes": 1204224,
- "fanoutMean": 5.25,
- "recvTokensMax": 14,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 78.62400263547897,
- "p90": 130.5920034646988,
- "p95": 144.54400539398193,
- "p99": 178.847998380661
- },
- "combine": {
- "p50": 69.08799707889557,
- "p90": 80.51200211048126,
- "p95": 87.87199854850769,
- "p99": 104.19200360774994
- },
- "roundtrip": {
- "p50": 124.70400333404541,
- "p90": 154.14400398731232,
- "p95": 165.15199840068817,
- "p99": 194.68800723552704
- },
- "isolatedSum": {
- "p50": 147.71199971437454,
- "p90": 211.10400557518005,
- "p95": 232.41600394248962,
- "p99": 283.04000198841095
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2394112,
- "combineLogicalBytes": 2394112,
- "fanoutMean": 5.21875,
- "recvTokensMax": 24,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 74.46400076150894,
- "p90": 99.39199686050415,
- "p95": 109.76000130176544,
- "p99": 140.6400054693222
- },
- "combine": {
- "p50": 68.76800209283829,
- "p90": 83.64800363779068,
- "p95": 90.14400094747543,
- "p99": 115.35999923944473
- },
- "roundtrip": {
- "p50": 124.54400211572647,
- "p90": 155.7759940624237,
- "p95": 170.56000232696533,
- "p99": 186.91200017929077
- },
- "isolatedSum": {
- "p50": 143.23200285434723,
- "p90": 183.04000049829483,
- "p95": 199.90400224924088,
- "p99": 256.00000470876694
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4630528,
- "combineLogicalBytes": 4630528,
- "fanoutMean": 5.046875,
- "recvTokensMax": 45,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 76.25599950551987,
- "p90": 106.9440022110939,
- "p95": 120.7360029220581,
- "p99": 149.24800395965576
- },
- "combine": {
- "p50": 70.52800059318542,
- "p90": 85.24800091981888,
- "p95": 90.04800021648407,
- "p99": 104.5759990811348
- },
- "roundtrip": {
- "p50": 129.98400628566742,
- "p90": 161.05599701404572,
- "p95": 173.8560050725937,
- "p99": 205.21600544452667
- },
- "isolatedSum": {
- "p50": 146.7840000987053,
- "p90": 192.19200313091278,
- "p95": 210.78400313854218,
- "p99": 253.82400304079056
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9447424,
- "combineLogicalBytes": 9447424,
- "fanoutMean": 5.1484375,
- "recvTokensMax": 91,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 81.91999793052673,
- "p90": 99.07200187444687,
- "p95": 107.04000294208527,
- "p99": 128.57599556446075
- },
- "combine": {
- "p50": 76.03199779987335,
- "p90": 89.63199704885483,
- "p95": 96.54399752616882,
- "p99": 106.08000308275223
- },
- "roundtrip": {
- "p50": 129.08799946308136,
- "p90": 156.76799416542053,
- "p95": 167.29600727558136,
- "p99": 217.3440009355545
- },
- "isolatedSum": {
- "p50": 157.95199573040009,
- "p90": 188.7039989233017,
- "p95": 203.5840004682541,
- "p99": 234.65599864721298
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19023872,
- "combineLogicalBytes": 19023872,
- "fanoutMean": 5.18359375,
- "recvTokensMax": 178,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 101.79200023412704,
- "p90": 136.6720050573349,
- "p95": 146.36799693107605,
- "p99": 175.10400712490082
- },
- "combine": {
- "p50": 93.44000369310379,
- "p90": 112.76800185441971,
- "p95": 117.15199798345566,
- "p99": 131.71200454235077
- },
- "roundtrip": {
- "p50": 165.43999314308167,
- "p90": 204.44799959659576,
- "p95": 212.38400042057037,
- "p99": 240.03200232982635
- },
- "isolatedSum": {
- "p50": 195.23200392723083,
- "p90": 249.4400069117546,
- "p95": 263.5199949145317,
- "p99": 306.8160116672516
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38148096,
- "combineLogicalBytes": 38148096,
- "fanoutMean": 5.197265625,
- "recvTokensMax": 350,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 115.68000167608261,
- "p90": 135.29600203037262,
- "p95": 142.17600226402283,
- "p99": 160.64000129699707
- },
- "combine": {
- "p50": 104.96000200510025,
- "p90": 118.04799735546112,
- "p95": 122.68800288438797,
- "p99": 147.64800667762756
- },
- "roundtrip": {
- "p50": 194.97600197792053,
- "p90": 212.64000236988068,
- "p95": 220.19200026988983,
- "p99": 234.78400707244873
- },
- "isolatedSum": {
- "p50": 220.64000368118286,
- "p90": 253.34399938583374,
- "p95": 264.8640051484108,
- "p99": 308.28800797462463
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 76955648,
- "combineLogicalBytes": 76955648,
- "fanoutMean": 5.2421875,
- "recvTokensMax": 687,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-cee2e19b",
- "identity": "h200|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176",
- "colorKey": "h200_edd92e38",
- "comparisonKey": "4a9eb2a61bfd9462",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:30:08.901856+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_7",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "cached-layout-comm-only-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 (norm) [cl]",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1818,
- "configuredUnits": 24,
- "deviceUnits": 132,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28254409438",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254409438",
- "createdAt": "2026-06-26T17:30:08.901856+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 62.97600269317627,
- "p90": 86.40000224113464,
- "p95": 94.14400160312653,
- "p99": 136.9599997997284
- },
- "combine": {
- "p50": 69.21599805355072,
- "p90": 82.04799890518188,
- "p95": 87.20000088214874,
- "p99": 98.49599748849869
- },
- "roundtrip": {
- "p50": 109.98400300741196,
- "p90": 133.08799266815186,
- "p95": 140.8960074186325,
- "p99": 178.27199399471283
- },
- "isolatedSum": {
- "p50": 132.192000746727,
- "p90": 168.44800114631653,
- "p95": 181.34400248527527,
- "p99": 235.45599728822708
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 64.38399851322174,
- "p90": 88.73599767684937,
- "p95": 94.87999975681305,
- "p99": 119.48800086975098
- },
- "combine": {
- "p50": 69.2799985408783,
- "p90": 83.52000266313553,
- "p95": 88.95999938249588,
- "p99": 107.10400342941284
- },
- "roundtrip": {
- "p50": 110.20799726247787,
- "p90": 138.2720023393631,
- "p95": 145.37599682807922,
- "p99": 175.55199563503265
- },
- "isolatedSum": {
- "p50": 133.66399705410004,
- "p90": 172.2560003399849,
- "p95": 183.83999913930893,
- "p99": 226.59200429916382
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 64.41599875688553,
- "p90": 90.52799642086029,
- "p95": 101.88800096511841,
- "p99": 132.28799402713776
- },
- "combine": {
- "p50": 70.62400132417679,
- "p90": 85.34400165081024,
- "p95": 90.71999788284302,
- "p99": 102.27199643850327
- },
- "roundtrip": {
- "p50": 113.43999952077866,
- "p90": 141.79199934005737,
- "p95": 148.22399616241455,
- "p99": 183.58400464057922
- },
- "isolatedSum": {
- "p50": 135.04000008106232,
- "p90": 175.87199807167053,
- "p95": 192.60799884796143,
- "p99": 234.55999046564102
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 63.1679967045784,
- "p90": 82.75199681520462,
- "p95": 87.96799927949905,
- "p99": 107.744000852108
- },
- "combine": {
- "p50": 69.85600292682648,
- "p90": 85.1840004324913,
- "p95": 90.46400338411331,
- "p99": 100.99200159311295
- },
- "roundtrip": {
- "p50": 112.44799941778183,
- "p90": 139.20000195503235,
- "p95": 152.38399803638458,
- "p99": 206.7520022392273
- },
- "isolatedSum": {
- "p50": 133.02399963140488,
- "p90": 167.93599724769592,
- "p95": 178.43200266361237,
- "p99": 208.73600244522095
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 60.35200133919716,
- "p90": 85.02399921417236,
- "p95": 91.67999774217606,
- "p99": 111.13599687814713
- },
- "combine": {
- "p50": 70.3359991312027,
- "p90": 86.87999844551086,
- "p95": 89.82399851083755,
- "p99": 99.35999661684036
- },
- "roundtrip": {
- "p50": 116.03199690580368,
- "p90": 141.34399592876434,
- "p95": 148.3519971370697,
- "p99": 184.9920004606247
- },
- "isolatedSum": {
- "p50": 130.68800047039986,
- "p90": 171.90399765968323,
- "p95": 181.5039962530136,
- "p99": 210.4959934949875
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 71.74400240182877,
- "p90": 90.71999788284302,
- "p95": 96.73599898815155,
- "p99": 118.23999881744385
- },
- "combine": {
- "p50": 77.66400277614594,
- "p90": 93.05600076913834,
- "p95": 97.69599884748459,
- "p99": 108.92800241708755
- },
- "roundtrip": {
- "p50": 122.36800044775009,
- "p90": 149.05600249767303,
- "p95": 159.61599349975586,
- "p99": 184.12800133228302
- },
- "isolatedSum": {
- "p50": 149.4080051779747,
- "p90": 183.77599865198135,
- "p95": 194.43199783563614,
- "p99": 227.1680012345314
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 82.65600353479385,
- "p90": 100.3199964761734,
- "p95": 109.15199667215347,
- "p99": 139.39200341701508
- },
- "combine": {
- "p50": 91.45600348711014,
- "p90": 106.52799904346466,
- "p95": 114.30399864912033,
- "p99": 132.22399353981018
- },
- "roundtrip": {
- "p50": 147.42399752140045,
- "p90": 165.3439998626709,
- "p95": 174.20800030231476,
- "p99": 198.65599274635315
- },
- "isolatedSum": {
- "p50": 174.112007021904,
- "p90": 206.84799551963806,
- "p95": 223.4559953212738,
- "p99": 271.61599695682526
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 105.12000322341919,
- "p90": 118.40000003576279,
- "p95": 122.81599640846252,
- "p99": 147.32800424098969
- },
- "combine": {
- "p50": 104.73600029945374,
- "p90": 122.11199849843979,
- "p95": 126.75200402736664,
- "p99": 138.84800672531128
- },
- "roundtrip": {
- "p50": 184.38400328159332,
- "p90": 200.41599869728088,
- "p95": 207.96799659729004,
- "p99": 272.44800329208374
- },
- "isolatedSum": {
- "p50": 209.85600352287292,
- "p90": 240.51199853420258,
- "p95": 249.56800043582916,
- "p99": 286.17601096630096
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-8a74732f",
- "identity": "h200|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h200_76bb7d5d",
- "comparisonKey": "b4a52819ec3c25b8",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:49:31.596673+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_11",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "cached-layout-comm-only-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 [cl]",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271608834",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271608834",
- "createdAt": "2026-06-26T23:49:31.596673+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 62.144000083208084,
- "p90": 86.62399649620056,
- "p95": 98.49599748849869,
- "p99": 125.5359947681427
- },
- "combine": {
- "p50": 68.54400038719177,
- "p90": 84.41600203514099,
- "p95": 92.83199906349182,
- "p99": 123.07199835777283
- },
- "roundtrip": {
- "p50": 109.31199789047241,
- "p90": 135.29600203037262,
- "p95": 143.77599954605103,
- "p99": 159.84000265598297
- },
- "isolatedSum": {
- "p50": 130.68800047039986,
- "p90": 171.03999853134155,
- "p95": 191.3279965519905,
- "p99": 248.60799312591553
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 63.74400109052658,
- "p90": 91.26400202512741,
- "p95": 99.87200051546097,
- "p99": 171.9679981470108
- },
- "combine": {
- "p50": 70.81600278615952,
- "p90": 194.75199282169342,
- "p95": 206.94400370121002,
- "p99": 256.9279968738556
- },
- "roundtrip": {
- "p50": 110.04800349473953,
- "p90": 140.1599943637848,
- "p95": 147.13600277900696,
- "p99": 161.50400042533875
- },
- "isolatedSum": {
- "p50": 134.5600038766861,
- "p90": 286.01599484682083,
- "p95": 306.816004216671,
- "p99": 428.8959950208664
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 62.94400244951248,
- "p90": 80.51200211048126,
- "p95": 89.02399986982346,
- "p99": 111.39199882745743
- },
- "combine": {
- "p50": 68.38399916887283,
- "p90": 79.8719972372055,
- "p95": 88.54400366544724,
- "p99": 100.54399818181992
- },
- "roundtrip": {
- "p50": 111.16799712181091,
- "p90": 139.80799913406372,
- "p95": 148.41599762439728,
- "p99": 167.07199811935425
- },
- "isolatedSum": {
- "p50": 131.32800161838531,
- "p90": 160.38399934768677,
- "p95": 177.5680035352707,
- "p99": 211.93599700927734
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 61.88800185918808,
- "p90": 83.16799998283386,
- "p95": 92.51199662685394,
- "p99": 104.06400263309479
- },
- "combine": {
- "p50": 68.67200136184692,
- "p90": 82.84799754619598,
- "p95": 88.639996945858,
- "p99": 105.05600273609161
- },
- "roundtrip": {
- "p50": 110.84800213575363,
- "p90": 140.79999923706055,
- "p95": 148.0640023946762,
- "p99": 159.2639982700348
- },
- "isolatedSum": {
- "p50": 130.560003221035,
- "p90": 166.01599752902985,
- "p95": 181.15199357271194,
- "p99": 209.1200053691864
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 59.84000116586685,
- "p90": 82.17599987983704,
- "p95": 92.32000261545181,
- "p99": 105.92000186443329
- },
- "combine": {
- "p50": 69.72800195217133,
- "p90": 84.19200032949448,
- "p95": 90.68799763917923,
- "p99": 106.91200196743011
- },
- "roundtrip": {
- "p50": 112.12799698114395,
- "p90": 134.62400436401367,
- "p95": 145.9839940071106,
- "p99": 164.09599781036377
- },
- "isolatedSum": {
- "p50": 129.56800311803818,
- "p90": 166.3680002093315,
- "p95": 183.00800025463104,
- "p99": 212.8320038318634
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 70.20799815654755,
- "p90": 94.08000111579895,
- "p95": 101.15200281143188,
- "p99": 118.17599833011627
- },
- "combine": {
- "p50": 76.64000242948532,
- "p90": 91.2960022687912,
- "p95": 97.43999689817429,
- "p99": 105.27999699115753
- },
- "roundtrip": {
- "p50": 123.77600371837616,
- "p90": 148.3519971370697,
- "p95": 155.29599785804749,
- "p99": 175.135999917984
- },
- "isolatedSum": {
- "p50": 146.84800058603287,
- "p90": 185.37600338459015,
- "p95": 198.59199970960617,
- "p99": 223.4559953212738
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 86.43200248479843,
- "p90": 99.48799759149551,
- "p95": 106.84800148010254,
- "p99": 127.42400169372559
- },
- "combine": {
- "p50": 85.82399785518646,
- "p90": 96.63999825716019,
- "p95": 104.76800054311752,
- "p99": 113.21599781513214
- },
- "roundtrip": {
- "p50": 147.8399932384491,
- "p90": 164.5440012216568,
- "p95": 169.95200514793396,
- "p99": 197.53600656986237
- },
- "isolatedSum": {
- "p50": 172.2560003399849,
- "p90": 196.1279958486557,
- "p95": 211.61600202322006,
- "p99": 240.63999950885773
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 103.7760004401207,
- "p90": 118.9119964838028,
- "p95": 127.16799974441528,
- "p99": 134.97599959373474
- },
- "combine": {
- "p50": 105.15200346708298,
- "p90": 119.00799721479416,
- "p95": 124.35200065374374,
- "p99": 139.55199718475342
- },
- "roundtrip": {
- "p50": 185.2799952030182,
- "p90": 201.7280012369156,
- "p95": 207.39200711250305,
- "p99": 224.95999932289124
- },
- "isolatedSum": {
- "p50": 208.92800390720367,
- "p90": 237.91999369859695,
- "p95": 251.52000039815903,
- "p99": 274.52799677848816
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-274a06b0",
- "identity": "h200|deepep|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h200_c9aeae24",
- "comparisonKey": "0abd2163f516521c",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:50:44.931546+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_8",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "ll",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 LL",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 132,
- "resourceClass": "fixed-kernel",
- "conformanceClass": "not-applicable",
- "fixedKernel": true,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271645585",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271645585",
- "createdAt": "2026-06-26T23:50:44.931546+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 37.567999213933945,
- "p90": 48.0320006608963,
- "p95": 52.41600051522255,
- "p99": 62.33600154519081
- },
- "combine": {
- "p50": 33.663999289274216,
- "p90": 44.38399896025658,
- "p95": 46.879999339580536,
- "p99": 61.85600161552429
- },
- "roundtrip": {
- "p50": 51.231998950242996,
- "p90": 70.14399766921997,
- "p95": 77.31200009584427,
- "p99": 100.0640019774437
- },
- "isolatedSum": {
- "p50": 71.23199850320816,
- "p90": 92.41599962115288,
- "p95": 99.29599985480309,
- "p99": 124.1920031607151
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 14,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 37.88800165057182,
- "p90": 49.15200173854828,
- "p95": 55.87200075387955,
- "p99": 76.89599692821503
- },
- "combine": {
- "p50": 32.896000891923904,
- "p90": 43.83999854326248,
- "p95": 47.07200080156326,
- "p99": 67.74400174617767
- },
- "roundtrip": {
- "p50": 51.00800096988678,
- "p90": 67.9360032081604,
- "p95": 74.20799881219864,
- "p99": 96.83199971914291
- },
- "isolatedSum": {
- "p50": 70.78400254249573,
- "p90": 92.99200028181076,
- "p95": 102.94400155544281,
- "p99": 144.6399986743927
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 21,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 37.53599897027016,
- "p90": 44.95999962091446,
- "p95": 51.61599814891815,
- "p99": 66.30399823188782
- },
- "combine": {
- "p50": 29.791999608278275,
- "p90": 39.16800022125244,
- "p95": 44.064000248909,
- "p99": 53.63199859857559
- },
- "roundtrip": {
- "p50": 51.13599821925163,
- "p90": 63.519999384880066,
- "p95": 71.77600264549255,
- "p99": 81.34400099515915
- },
- "isolatedSum": {
- "p50": 67.32799857854843,
- "p90": 84.1279998421669,
- "p95": 95.67999839782715,
- "p99": 119.93599683046341
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 39,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 38.27200084924698,
- "p90": 51.32799968123436,
- "p95": 57.08799883723259,
- "p99": 66.97600334882736
- },
- "combine": {
- "p50": 34.623999148607254,
- "p90": 44.03200000524521,
- "p95": 46.62400111556053,
- "p99": 54.55999821424484
- },
- "roundtrip": {
- "p50": 55.39200082421303,
- "p90": 67.58400052785873,
- "p95": 75.42400062084198,
- "p99": 95.0080007314682
- },
- "isolatedSum": {
- "p50": 72.89599999785423,
- "p90": 95.35999968647957,
- "p95": 103.71199995279312,
- "p99": 121.5360015630722
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 74,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 38.816001266241074,
- "p90": 54.17599901556969,
- "p95": 57.72799998521805,
- "p99": 75.00799745321274
- },
- "combine": {
- "p50": 36.288000643253326,
- "p90": 46.01600021123886,
- "p95": 48.00000041723251,
- "p99": 69.47200000286102
- },
- "roundtrip": {
- "p50": 59.967998415231705,
- "p90": 73.05599749088287,
- "p95": 77.2159993648529,
- "p99": 92.12800115346909
- },
- "isolatedSum": {
- "p50": 75.1040019094944,
- "p90": 100.19199922680855,
- "p95": 105.72800040245056,
- "p99": 144.47999745607376
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 145,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 45.40799930691719,
- "p90": 55.32800033688545,
- "p95": 60.15999987721443,
- "p99": 70.88000327348709
- },
- "combine": {
- "p50": 43.87199878692627,
- "p90": 53.53600159287453,
- "p95": 55.32800033688545,
- "p99": 67.9360032081604
- },
- "roundtrip": {
- "p50": 72.35199958086014,
- "p90": 82.8159973025322,
- "p95": 86.01599931716919,
- "p99": 98.88000041246414
- },
- "isolatedSum": {
- "p50": 89.27999809384346,
- "p90": 108.86400192975998,
- "p95": 115.48800021409988,
- "p99": 138.8160064816475
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 287,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 55.296000093221664,
- "p90": 66.6240006685257,
- "p95": 70.36799937486649,
- "p99": 88.16000074148178
- },
- "combine": {
- "p50": 59.07199904322624,
- "p90": 67.71200150251389,
- "p95": 70.43199986219406,
- "p99": 79.3600007891655
- },
- "roundtrip": {
- "p50": 97.34400361776352,
- "p90": 109.3439981341362,
- "p95": 115.32799899578094,
- "p99": 128.12800705432892
- },
- "isolatedSum": {
- "p50": 114.3679991364479,
- "p90": 134.33600217103958,
- "p95": 140.79999923706055,
- "p99": 167.52000153064728
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 564,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 81.05599880218506,
- "p90": 91.26400202512741,
- "p95": 95.77599912881851,
- "p99": 104.38399761915207
- },
- "combine": {
- "p50": 86.40000224113464,
- "p90": 98.36799651384354,
- "p95": 102.84800082445145,
- "p99": 111.96800321340561
- },
- "roundtrip": {
- "p50": 148.44800531864166,
- "p90": 162.88000345230103,
- "p95": 168.16000640392303,
- "p99": 178.24000120162964
- },
- "isolatedSum": {
- "p50": 167.4560010433197,
- "p90": 189.63199853897095,
- "p95": 198.62399995326996,
- "p99": 216.35200083255768
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 1104,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-81e223f4",
- "identity": "h200|deepep|7168|8|256|bf16|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h200_7cfa04c4",
- "comparisonKey": "72cd529af4968fe8",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:50:48.529187+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_6",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "ll",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 LL",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 132,
- "resourceClass": "fixed-kernel",
- "conformanceClass": "not-applicable",
- "fixedKernel": true,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271650161",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271650161",
- "createdAt": "2026-06-26T23:50:48.529187+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 38.55999931693077,
- "p90": 52.25599929690361,
- "p95": 57.69599974155426,
- "p99": 68.70400160551071
- },
- "combine": {
- "p50": 33.440001308918,
- "p90": 46.23999819159508,
- "p95": 50.36799982190132,
- "p99": 62.912002205848694
- },
- "roundtrip": {
- "p50": 52.70399898290634,
- "p90": 70.43199986219406,
- "p95": 77.85599678754807,
- "p99": 90.27200192213058
- },
- "isolatedSum": {
- "p50": 72.00000062584877,
- "p90": 98.49599748849869,
- "p95": 108.06399956345558,
- "p99": 131.6160038113594
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 14,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 37.76000067591667,
- "p90": 48.25599864125252,
- "p95": 55.93600124120712,
- "p99": 79.68000322580338
- },
- "combine": {
- "p50": 32.80000016093254,
- "p90": 41.120000183582306,
- "p95": 44.863998889923096,
- "p99": 49.8879998922348
- },
- "roundtrip": {
- "p50": 52.83199995756149,
- "p90": 65.88800251483917,
- "p95": 71.80800288915634,
- "p99": 80.60800284147263
- },
- "isolatedSum": {
- "p50": 70.56000083684921,
- "p90": 89.37599882483482,
- "p95": 100.80000013113022,
- "p99": 129.56800311803818
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 21,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 38.495998829603195,
- "p90": 52.44800075888634,
- "p95": 56.543998420238495,
- "p99": 76.4480009675026
- },
- "combine": {
- "p50": 33.055998384952545,
- "p90": 44.16000097990036,
- "p95": 45.951999723911285,
- "p99": 53.568001836538315
- },
- "roundtrip": {
- "p50": 52.70399898290634,
- "p90": 64.2239972949028,
- "p95": 71.96799665689468,
- "p99": 81.53600245714188
- },
- "isolatedSum": {
- "p50": 71.55199721455574,
- "p90": 96.6080017387867,
- "p95": 102.49599814414978,
- "p99": 130.0160028040409
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 39,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 39.07199949026108,
- "p90": 52.960000932216644,
- "p95": 56.992001831531525,
- "p99": 65.43999910354614
- },
- "combine": {
- "p50": 34.04799848794937,
- "p90": 44.19200122356415,
- "p95": 46.1760014295578,
- "p99": 57.472001761198044
- },
- "roundtrip": {
- "p50": 54.11199852824211,
- "p90": 68.60800087451935,
- "p95": 74.78400319814682,
- "p99": 85.28000116348267
- },
- "isolatedSum": {
- "p50": 73.11999797821045,
- "p90": 97.15200215578079,
- "p95": 103.16800326108932,
- "p99": 122.91200086474419
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 74,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 40.12800008058548,
- "p90": 55.00800162553787,
- "p95": 59.29600074887276,
- "p99": 66.81600213050842
- },
- "combine": {
- "p50": 38.047999143600464,
- "p90": 49.82399940490723,
- "p95": 52.799999713897705,
- "p99": 63.19999694824219
- },
- "roundtrip": {
- "p50": 61.5679994225502,
- "p90": 75.48800110816956,
- "p95": 82.36800134181976,
- "p99": 96.89600020647049
- },
- "isolatedSum": {
- "p50": 78.17599922418594,
- "p90": 104.8320010304451,
- "p95": 112.09600046277046,
- "p99": 130.0159990787506
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 145,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 46.23999819159508,
- "p90": 56.57599866390228,
- "p95": 62.30400130152702,
- "p99": 70.8480030298233
- },
- "combine": {
- "p50": 43.96799951791763,
- "p90": 53.75999957323074,
- "p95": 58.33600088953972,
- "p99": 61.216000467538834
- },
- "roundtrip": {
- "p50": 71.19999825954437,
- "p90": 80.86399734020233,
- "p95": 85.28000116348267,
- "p99": 93.21600198745728
- },
- "isolatedSum": {
- "p50": 90.20799770951271,
- "p90": 110.33599823713303,
- "p95": 120.64000219106674,
- "p99": 132.06400349736214
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 287,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 56.60799890756607,
- "p90": 77.53600180149078,
- "p95": 85.31200140714645,
- "p99": 192.03199446201324
- },
- "combine": {
- "p50": 58.240000158548355,
- "p90": 67.29599833488464,
- "p95": 69.56800073385239,
- "p99": 77.82399654388428
- },
- "roundtrip": {
- "p50": 96.28800302743912,
- "p90": 107.39199817180634,
- "p95": 111.58400028944016,
- "p99": 126.52799487113953
- },
- "isolatedSum": {
- "p50": 114.84799906611443,
- "p90": 144.83200013637543,
- "p95": 154.88000214099884,
- "p99": 269.8559910058975
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 564,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 81.7599967122078,
- "p90": 92.67199784517288,
- "p95": 95.13600170612335,
- "p99": 128.38399410247803
- },
- "combine": {
- "p50": 86.27200126647949,
- "p90": 94.91200000047684,
- "p95": 97.120001912117,
- "p99": 105.27999699115753
- },
- "roundtrip": {
- "p50": 147.2959965467453,
- "p90": 157.56799280643463,
- "p95": 162.36799955368042,
- "p99": 174.9120056629181
- },
- "isolatedSum": {
- "p50": 168.0319979786873,
- "p90": 187.58399784564972,
- "p95": 192.25600361824036,
- "p99": 233.66399109363556
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 1104,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-43b4144e",
- "identity": "h200|deepep|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176",
- "colorKey": "h200_0a1a73b3",
- "comparisonKey": "14196b9d68f90910",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:30:32.638567+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_5",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "ll",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 LL (norm)",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 132,
- "resourceClass": "fixed-kernel",
- "conformanceClass": "not-applicable",
- "fixedKernel": true,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28254426529",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254426529",
- "createdAt": "2026-06-26T17:30:32.638567+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 36.86400130391121,
- "p90": 47.13600128889084,
- "p95": 51.52000114321709,
- "p99": 63.32799792289734
- },
- "combine": {
- "p50": 33.440001308918,
- "p90": 42.527999728918076,
- "p95": 46.81599885225296,
- "p99": 52.22399905323982
- },
- "roundtrip": {
- "p50": 50.52800104022026,
- "p90": 65.15199691057205,
- "p95": 71.03999704122543,
- "p99": 78.68800312280655
- },
- "isolatedSum": {
- "p50": 70.30400261282921,
- "p90": 89.66400101780891,
- "p95": 98.33599999547005,
- "p99": 115.55199697613716
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 14,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 37.408001720905304,
- "p90": 48.608001321554184,
- "p95": 54.687999188899994,
- "p99": 65.2799978852272
- },
- "combine": {
- "p50": 32.735999673604965,
- "p90": 42.59200021624565,
- "p95": 45.05600035190582,
- "p99": 51.35999992489815
- },
- "roundtrip": {
- "p50": 51.4880008995533,
- "p90": 66.72000139951706,
- "p95": 72.54400104284286,
- "p99": 85.08799970149994
- },
- "isolatedSum": {
- "p50": 70.14400139451027,
- "p90": 91.20000153779984,
- "p95": 99.74399954080582,
- "p99": 116.63999781012535
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 21,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 37.79200091958046,
- "p90": 49.27999898791313,
- "p95": 54.91200089454651,
- "p99": 61.08799949288368
- },
- "combine": {
- "p50": 31.231999397277832,
- "p90": 43.487999588251114,
- "p95": 47.26399853825569,
- "p99": 65.31199812889099
- },
- "roundtrip": {
- "p50": 51.58400163054466,
- "p90": 68.89600306749344,
- "p95": 73.95199686288834,
- "p99": 91.61599725484848
- },
- "isolatedSum": {
- "p50": 69.02400031685829,
- "p90": 92.76799857616425,
- "p95": 102.1759994328022,
- "p99": 126.39999762177467
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 39,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 37.53599897027016,
- "p90": 48.128001391887665,
- "p95": 54.75199967622757,
- "p99": 62.111999839544296
- },
- "combine": {
- "p50": 34.46400165557861,
- "p90": 44.544000178575516,
- "p95": 47.231998294591904,
- "p99": 57.37600103020668
- },
- "roundtrip": {
- "p50": 54.687999188899994,
- "p90": 67.4239993095398,
- "p95": 73.44000041484833,
- "p99": 91.96799993515015
- },
- "isolatedSum": {
- "p50": 72.00000062584877,
- "p90": 92.67200157046318,
- "p95": 101.98399797081947,
- "p99": 119.48800086975098
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 74,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 39.29600119590759,
- "p90": 51.35999992489815,
- "p95": 55.64799904823303,
- "p99": 64.96000289916992
- },
- "combine": {
- "p50": 36.67199984192848,
- "p90": 46.62400111556053,
- "p95": 50.56000128388405,
- "p99": 60.38400158286095
- },
- "roundtrip": {
- "p50": 60.47999858856201,
- "p90": 74.5920017361641,
- "p95": 79.3600007891655,
- "p99": 87.87199854850769
- },
- "isolatedSum": {
- "p50": 75.96800103783607,
- "p90": 97.98400104045868,
- "p95": 106.20800033211708,
- "p99": 125.34400448203087
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 145,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 45.05600035190582,
- "p90": 55.00800162553787,
- "p95": 57.95200169086456,
- "p99": 66.01600348949432
- },
- "combine": {
- "p50": 44.28799822926521,
- "p90": 53.05600166320801,
- "p95": 55.904000997543335,
- "p99": 61.3120011985302
- },
- "roundtrip": {
- "p50": 72.64000177383423,
- "p90": 84.16000008583069,
- "p95": 88.03199976682663,
- "p99": 106.30399733781815
- },
- "isolatedSum": {
- "p50": 89.34399858117104,
- "p90": 108.06400328874588,
- "p95": 113.8560026884079,
- "p99": 127.32800468802452
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 287,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 55.23199960589409,
- "p90": 65.63200056552887,
- "p95": 71.48800045251846,
- "p99": 79.55200225114822
- },
- "combine": {
- "p50": 58.43200162053108,
- "p90": 69.37599927186966,
- "p95": 71.07199728488922,
- "p99": 79.42400127649307
- },
- "roundtrip": {
- "p50": 96.8639999628067,
- "p90": 108.44799876213074,
- "p95": 113.72800171375275,
- "p99": 121.72800302505493
- },
- "isolatedSum": {
- "p50": 113.66400122642517,
- "p90": 135.00799983739853,
- "p95": 142.55999773740768,
- "p99": 158.9760035276413
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 564,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 79.26400005817413,
- "p90": 88.44800293445587,
- "p95": 92.6399976015091,
- "p99": 101.69599950313568
- },
- "combine": {
- "p50": 86.01599931716919,
- "p90": 95.0080007314682,
- "p95": 97.02400118112564,
- "p99": 103.32799702882767
- },
- "roundtrip": {
- "p50": 147.32800424098969,
- "p90": 157.53600001335144,
- "p95": 161.47199273109436,
- "p99": 169.0240055322647
- },
- "isolatedSum": {
- "p50": 165.27999937534332,
- "p90": 183.45600366592407,
- "p95": 189.66399878263474,
- "p99": 205.02399653196335
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 1104,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-878f6103",
- "identity": "h200|deepep|4096|8|128|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452",
- "colorKey": "h200_9979edfc",
- "comparisonKey": "539cbdfe3675c8d8",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T11:14:31.220360+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_1",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · fp8",
- "model": "Qwen3.5",
- "shape": {
- "hidden": 4096,
- "topk": 8,
- "experts": 128,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "75530960a30b452",
- "workloadId": "set:8:d1b92539bddfb570",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28287507619",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287507619",
- "createdAt": "2026-06-27T11:14:31.220360+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 66.6240006685257,
- "p90": 96.6079980134964,
- "p95": 105.40799796581268,
- "p99": 139.8400068283081
- },
- "combine": {
- "p50": 48.928000032901764,
- "p90": 62.144000083208084,
- "p95": 69.98399645090103,
- "p99": 94.71999853849411
- },
- "roundtrip": {
- "p50": 150.39999783039093,
- "p90": 202.27199792861938,
- "p95": 209.88799631595612,
- "p99": 232.35200345516205
- },
- "isolatedSum": {
- "p50": 115.55200070142746,
- "p90": 158.75199809670448,
- "p95": 175.39199441671371,
- "p99": 234.56000536680222
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 172032,
- "combineLogicalBytes": 344064,
- "fanoutMean": 5.25,
- "recvTokensMax": 6,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 63.13599646091461,
- "p90": 90.81599861383438,
- "p95": 99.84000027179718,
- "p99": 112.86400258541107
- },
- "combine": {
- "p50": 48.86399954557419,
- "p90": 59.487998485565186,
- "p95": 66.880002617836,
- "p99": 72.67200201749802
- },
- "roundtrip": {
- "p50": 141.82400703430176,
- "p90": 184.1599941253662,
- "p95": 192.1280026435852,
- "p99": 211.64800226688385
- },
- "isolatedSum": {
- "p50": 111.9999960064888,
- "p90": 150.30399709939957,
- "p95": 166.72000288963318,
- "p99": 185.5360046029091
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 352256,
- "combineLogicalBytes": 704512,
- "fanoutMean": 5.375,
- "recvTokensMax": 12,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 64.60800021886826,
- "p90": 93.28000247478485,
- "p95": 99.07200187444687,
- "p99": 110.46399921178818
- },
- "combine": {
- "p50": 49.375999718904495,
- "p90": 60.447998344898224,
- "p95": 67.61600077152252,
- "p99": 73.27999919652939
- },
- "roundtrip": {
- "p50": 142.752006649971,
- "p90": 189.69599902629852,
- "p95": 199.13600385189056,
- "p99": 217.3440009355545
- },
- "isolatedSum": {
- "p50": 113.98399993777275,
- "p90": 153.72800081968307,
- "p95": 166.6880026459694,
- "p99": 183.74399840831757
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 692224,
- "combineLogicalBytes": 1384448,
- "fanoutMean": 5.28125,
- "recvTokensMax": 26,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 61.85600161552429,
- "p90": 86.496002972126,
- "p95": 95.551997423172,
- "p99": 104.00000214576721
- },
- "combine": {
- "p50": 50.08000135421753,
- "p90": 60.28800085186958,
- "p95": 66.91200286149979,
- "p99": 77.40800082683563
- },
- "roundtrip": {
- "p50": 143.51999759674072,
- "p90": 185.5040043592453,
- "p95": 194.17600333690643,
- "p99": 225.63199698925018
- },
- "isolatedSum": {
- "p50": 111.93600296974182,
- "p90": 146.7840038239956,
- "p95": 162.46400028467178,
- "p99": 181.40800297260284
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1372160,
- "combineLogicalBytes": 2744320,
- "fanoutMean": 5.234375,
- "recvTokensMax": 49,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 67.87200272083282,
- "p90": 97.72799909114838,
- "p95": 107.10400342941284,
- "p99": 163.64799439907074
- },
- "combine": {
- "p50": 51.29599943757057,
- "p90": 61.824001371860504,
- "p95": 69.88800317049026,
- "p99": 75.6480023264885
- },
- "roundtrip": {
- "p50": 146.33600413799286,
- "p90": 189.31199610233307,
- "p95": 197.4399983882904,
- "p99": 221.18400037288666
- },
- "isolatedSum": {
- "p50": 119.1680021584034,
- "p90": 159.55200046300888,
- "p95": 176.9920065999031,
- "p99": 239.29599672555923
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2732032,
- "combineLogicalBytes": 5464064,
- "fanoutMean": 5.2109375,
- "recvTokensMax": 94,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 67.45599955320358,
- "p90": 91.61599725484848,
- "p95": 100.54399818181992,
- "p99": 115.48800021409988
- },
- "combine": {
- "p50": 54.17599901556969,
- "p90": 66.01600348949432,
- "p95": 71.74400240182877,
- "p99": 80.22399991750717
- },
- "roundtrip": {
- "p50": 148.80000054836273,
- "p90": 190.11199474334717,
- "p95": 201.79200172424316,
- "p99": 216.44799411296844
- },
- "isolatedSum": {
- "p50": 121.63199856877327,
- "p90": 157.6320007443428,
- "p95": 172.28800058364868,
- "p99": 195.71200013160706
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 5562368,
- "combineLogicalBytes": 11124736,
- "fanoutMean": 5.3046875,
- "recvTokensMax": 186,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 71.80800288915634,
- "p90": 112.67200112342834,
- "p95": 120.15999853610992,
- "p99": 136.28800213336945
- },
- "combine": {
- "p50": 62.3680017888546,
- "p90": 75.32799988985062,
- "p95": 80.25600016117096,
- "p99": 88.03199976682663
- },
- "roundtrip": {
- "p50": 162.52799332141876,
- "p90": 212.89600431919098,
- "p95": 224.41600263118744,
- "p99": 245.40799856185913
- },
- "isolatedSum": {
- "p50": 134.17600467801094,
- "p90": 188.00000101327896,
- "p95": 200.41599869728088,
- "p99": 224.32000190019608
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 11096064,
- "combineLogicalBytes": 22192128,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 358,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 76.48000121116638,
- "p90": 95.36000341176987,
- "p95": 103.64799946546555,
- "p99": 109.15199667215347
- },
- "combine": {
- "p50": 72.28799909353256,
- "p90": 84.57600325345993,
- "p95": 91.07200056314468,
- "p99": 94.2080020904541
- },
- "roundtrip": {
- "p50": 167.58400201797485,
- "p90": 208.8959962129593,
- "p95": 216.5759950876236,
- "p99": 233.08800160884857
- },
- "isolatedSum": {
- "p50": 148.76800030469894,
- "p90": 179.9360066652298,
- "p95": 194.72000002861023,
- "p99": 203.35999876260757
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 22282240,
- "combineLogicalBytes": 44564480,
- "fanoutMean": 5.3125,
- "recvTokensMax": 699,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-b5299c0b",
- "identity": "h200|deepep|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452",
- "colorKey": "h200_87683f6c",
- "comparisonKey": "0d3b5b81799f76d5",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:53:33.916655+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_2",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · fp8",
- "model": "Qwen3.5",
- "shape": {
- "hidden": 4096,
- "topk": 8,
- "experts": 128,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "75530960a30b452",
- "workloadId": "set:8:d1b92539bddfb570",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271736220",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271736220",
- "createdAt": "2026-06-26T23:53:33.916655+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 221.15199267864227,
- "p90": 287.26398944854736,
- "p95": 315.39198756217957,
- "p99": 401.98400616645813
- },
- "combine": {
- "p50": 47.87199944257736,
- "p90": 66.27199798822403,
- "p95": 73.91999661922455,
- "p99": 92.51199662685394
- },
- "roundtrip": {
- "p50": 246.75199389457703,
- "p90": 302.2400140762329,
- "p95": 335.61599254608154,
- "p99": 400.160014629364
- },
- "isolatedSum": {
- "p50": 269.02399212121964,
- "p90": 353.5359874367714,
- "p95": 389.3119841814041,
- "p99": 494.4960027933121
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 172032,
- "combineLogicalBytes": 344064,
- "fanoutMean": 5.25,
- "recvTokensMax": 6,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 264.6079957485199,
- "p90": 342.3680067062378,
- "p95": 371.0399866104126,
- "p99": 447.00801372528076
- },
- "combine": {
- "p50": 54.46400120854378,
- "p90": 68.03199648857117,
- "p95": 74.8480036854744,
- "p99": 88.83199840784073
- },
- "roundtrip": {
- "p50": 257.2160065174103,
- "p90": 336.4480137825012,
- "p95": 375.10401010513306,
- "p99": 443.93599033355713
- },
- "isolatedSum": {
- "p50": 319.0719969570637,
- "p90": 410.40000319480896,
- "p95": 445.887990295887,
- "p99": 535.8400121331215
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 352256,
- "combineLogicalBytes": 704512,
- "fanoutMean": 5.375,
- "recvTokensMax": 12,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 210.14399826526642,
- "p90": 260.0319981575012,
- "p95": 276.99199318885803,
- "p99": 401.856005191803
- },
- "combine": {
- "p50": 49.02400076389313,
- "p90": 61.983998864889145,
- "p95": 68.57600063085556,
- "p99": 82.43200182914734
- },
- "roundtrip": {
- "p50": 252.73600220680237,
- "p90": 308.51200222969055,
- "p95": 325.76000690460205,
- "p99": 404.2240083217621
- },
- "isolatedSum": {
- "p50": 259.16799902915955,
- "p90": 322.01599702239037,
- "p95": 345.5679938197136,
- "p99": 484.2880070209503
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 692224,
- "combineLogicalBytes": 1384448,
- "fanoutMean": 5.28125,
- "recvTokensMax": 26,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 229.40799593925476,
- "p90": 285.91999411582947,
- "p95": 302.97601222991943,
- "p99": 384.799987077713
- },
- "combine": {
- "p50": 50.6879985332489,
- "p90": 65.95200300216675,
- "p95": 71.48800045251846,
- "p99": 85.56800335645676
- },
- "roundtrip": {
- "p50": 262.7840042114258,
- "p90": 331.9680094718933,
- "p95": 359.6160113811493,
- "p99": 441.0560131072998
- },
- "isolatedSum": {
- "p50": 280.09599447250366,
- "p90": 351.8719971179962,
- "p95": 374.4640126824379,
- "p99": 470.36799043416977
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1372160,
- "combineLogicalBytes": 2744320,
- "fanoutMean": 5.234375,
- "recvTokensMax": 49,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 211.67999505996704,
- "p90": 262.0159983634949,
- "p95": 281.5360128879547,
- "p99": 434.4319999217987
- },
- "combine": {
- "p50": 50.87999999523163,
- "p90": 67.74400174617767,
- "p95": 72.76800274848938,
- "p99": 100.47999769449234
- },
- "roundtrip": {
- "p50": 261.1199915409088,
- "p90": 332.5119912624359,
- "p95": 354.8800051212311,
- "p99": 414.2720103263855
- },
- "isolatedSum": {
- "p50": 262.55999505519867,
- "p90": 329.76000010967255,
- "p95": 354.3040156364441,
- "p99": 534.911997616291
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2732032,
- "combineLogicalBytes": 5464064,
- "fanoutMean": 5.2109375,
- "recvTokensMax": 94,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 210.68799495697021,
- "p90": 258.91199707984924,
- "p95": 279.87200021743774,
- "p99": 326.1440098285675
- },
- "combine": {
- "p50": 53.85600030422211,
- "p90": 68.67200136184692,
- "p95": 72.51200079917908,
- "p99": 91.90399944782257
- },
- "roundtrip": {
- "p50": 265.6959891319275,
- "p90": 326.2079954147339,
- "p95": 351.52000188827515,
- "p99": 446.3360011577606
- },
- "isolatedSum": {
- "p50": 264.5439952611923,
- "p90": 327.58399844169617,
- "p95": 352.3840010166168,
- "p99": 418.0480092763901
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 5562368,
- "combineLogicalBytes": 11124736,
- "fanoutMean": 5.3046875,
- "recvTokensMax": 186,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 209.6640020608902,
- "p90": 265.21599292755127,
- "p95": 291.0720109939575,
- "p99": 366.14400148391724
- },
- "combine": {
- "p50": 61.43999844789505,
- "p90": 73.91999661922455,
- "p95": 79.42400127649307,
- "p99": 92.06400066614151
- },
- "roundtrip": {
- "p50": 262.2399926185608,
- "p90": 317.7280128002167,
- "p95": 350.7840037345886,
- "p99": 447.9680061340332
- },
- "isolatedSum": {
- "p50": 271.10400050878525,
- "p90": 339.1359895467758,
- "p95": 370.4960122704506,
- "p99": 458.20800215005875
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 11096064,
- "combineLogicalBytes": 22192128,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 358,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 217.8879976272583,
- "p90": 276.99199318885803,
- "p95": 317.05600023269653,
- "p99": 742.6559925079346
- },
- "combine": {
- "p50": 72.67200201749802,
- "p90": 88.54400366544724,
- "p95": 92.47999638319016,
- "p99": 113.02399635314941
- },
- "roundtrip": {
- "p50": 273.44000339508057,
- "p90": 323.5520124435425,
- "p95": 345.0239896774292,
- "p99": 420.3520119190216
- },
- "isolatedSum": {
- "p50": 290.5599996447563,
- "p90": 365.53599685430527,
- "p95": 409.5359966158867,
- "p99": 855.679988861084
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 22282240,
- "combineLogicalBytes": 44564480,
- "fanoutMean": 5.3125,
- "recvTokensMax": 699,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-a3751d3c",
- "identity": "h200|deepep|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef",
- "colorKey": "h200_87683f6c",
- "comparisonKey": "972ab14012f6276a",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:53:56.538326+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_11",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · fp8",
- "model": "shape 5120/8/160",
- "shape": {
- "hidden": 5120,
- "topk": 8,
- "experts": 160,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "be1b44a963bd4ef",
- "workloadId": "set:8:34e5874082f8ea8f",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271751941",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271751941",
- "createdAt": "2026-06-26T23:53:56.538326+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 217.3759937286377,
- "p90": 269.1839933395386,
- "p95": 295.1360046863556,
- "p99": 345.69600224494934
- },
- "combine": {
- "p50": 50.592001527547836,
- "p90": 66.46399945020676,
- "p95": 71.74400240182877,
- "p99": 89.34400230646133
- },
- "roundtrip": {
- "p50": 245.60000002384186,
- "p90": 292.64000058174133,
- "p95": 306.0480058193207,
- "p99": 346.8160033226013
- },
- "isolatedSum": {
- "p50": 267.96799525618553,
- "p90": 335.64799278974533,
- "p95": 366.88000708818436,
- "p99": 435.0400045514107
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 215040,
- "combineLogicalBytes": 430080,
- "fanoutMean": 5.25,
- "recvTokensMax": 8,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 206.2399983406067,
- "p90": 254.94399666786194,
- "p95": 270.4960107803345,
- "p99": 337.21598982810974
- },
- "combine": {
- "p50": 51.263999193906784,
- "p90": 65.72800129652023,
- "p95": 70.52800059318542,
- "p99": 75.58400183916092
- },
- "roundtrip": {
- "p50": 245.15199661254883,
- "p90": 296.31999135017395,
- "p95": 316.1279857158661,
- "p99": 367.3279881477356
- },
- "isolatedSum": {
- "p50": 257.5039975345135,
- "p90": 320.6719979643822,
- "p95": 341.0240113735199,
- "p99": 412.79999166727066
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 440320,
- "combineLogicalBytes": 880640,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 220.38400173187256,
- "p90": 289.15199637413025,
- "p95": 331.5519988536835,
- "p99": 1036.1599922180176
- },
- "combine": {
- "p50": 52.191998809576035,
- "p90": 65.21599739789963,
- "p95": 68.96000355482101,
- "p99": 77.88799703121185
- },
- "roundtrip": {
- "p50": 248.79999458789825,
- "p90": 299.71200227737427,
- "p95": 314.5279884338379,
- "p99": 352.09599137306213
- },
- "isolatedSum": {
- "p50": 272.5760005414486,
- "p90": 354.3679937720299,
- "p95": 400.5120024085045,
- "p99": 1114.0479892492294
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 870400,
- "combineLogicalBytes": 1740800,
- "fanoutMean": 5.3125,
- "recvTokensMax": 25,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 221.91999852657318,
- "p90": 292.4480140209198,
- "p95": 316.3520097732544,
- "p99": 412.76800632476807
- },
- "combine": {
- "p50": 54.84800040721893,
- "p90": 71.61600142717361,
- "p95": 80.64000308513641,
- "p99": 102.1760031580925
- },
- "roundtrip": {
- "p50": 249.24799799919128,
- "p90": 305.5360019207001,
- "p95": 325.1520097255707,
- "p99": 406.9119989871979
- },
- "isolatedSum": {
- "p50": 276.7679989337921,
- "p90": 364.0640154480934,
- "p95": 396.9920128583908,
- "p99": 514.9440094828606
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1735680,
- "combineLogicalBytes": 3471360,
- "fanoutMean": 5.296875,
- "recvTokensMax": 50,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 209.75999534130096,
- "p90": 260.73598861694336,
- "p95": 279.7119915485382,
- "p99": 349.98399019241333
- },
- "combine": {
- "p50": 54.88000065088272,
- "p90": 69.34399902820587,
- "p95": 73.91999661922455,
- "p99": 101.08800232410431
- },
- "roundtrip": {
- "p50": 254.36800718307495,
- "p90": 305.2160143852234,
- "p95": 330.55999875068665,
- "p99": 445.72800397872925
- },
- "isolatedSum": {
- "p50": 264.6399959921837,
- "p90": 330.07998764514923,
- "p95": 353.63198816776276,
- "p99": 451.07199251651764
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3456000,
- "combineLogicalBytes": 6912000,
- "fanoutMean": 5.2734375,
- "recvTokensMax": 93,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 212.16000616550446,
- "p90": 261.34398579597473,
- "p95": 274.4959890842438,
- "p99": 355.9679985046387
- },
- "combine": {
- "p50": 59.487998485565186,
- "p90": 75.9039968252182,
- "p95": 79.29600030183792,
- "p99": 111.13599687814713
- },
- "roundtrip": {
- "p50": 262.4320089817047,
- "p90": 318.33600997924805,
- "p95": 339.4559919834137,
- "p99": 384.0320110321045
- },
- "isolatedSum": {
- "p50": 271.64800465106964,
- "p90": 337.24798262119293,
- "p95": 353.7919893860817,
- "p99": 467.1039953827858
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 6988800,
- "combineLogicalBytes": 13977600,
- "fanoutMean": 5.33203125,
- "recvTokensMax": 179,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 213.44000101089478,
- "p90": 259.99999046325684,
- "p95": 280.2880108356476,
- "p99": 418.08000206947327
- },
- "combine": {
- "p50": 67.26399809122086,
- "p90": 79.1039988398552,
- "p95": 86.94399893283844,
- "p99": 97.59999811649323
- },
- "roundtrip": {
- "p50": 273.98398518562317,
- "p90": 361.2799942493439,
- "p95": 384.0959966182709,
- "p99": 485.24799942970276
- },
- "isolatedSum": {
- "p50": 280.70399910211563,
- "p90": 339.10398930311203,
- "p95": 367.232009768486,
- "p99": 515.6800001859665
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 13987840,
- "combineLogicalBytes": 27975680,
- "fanoutMean": 5.3359375,
- "recvTokensMax": 355,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 217.53600239753723,
- "p90": 271.9680070877075,
- "p95": 288.8000011444092,
- "p99": 367.71199107170105
- },
- "combine": {
- "p50": 80.73599636554718,
- "p90": 95.90400010347366,
- "p95": 99.16800260543823,
- "p99": 122.56000190973282
- },
- "roundtrip": {
- "p50": 289.6000146865845,
- "p90": 337.69598603248596,
- "p95": 350.847989320755,
- "p99": 431.4559996128082
- },
- "isolatedSum": {
- "p50": 298.2719987630844,
- "p90": 367.8720071911812,
- "p95": 387.9680037498474,
- "p99": 490.27199298143387
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 27837440,
- "combineLogicalBytes": 55674880,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 699,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-73819dd3",
- "identity": "h200|deepep|6144|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h200_9979edfc",
- "comparisonKey": "3ee03cee0282c011",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T11:13:48.278988+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_6",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · fp8",
- "model": "MiniMax-M3",
- "shape": {
- "hidden": 6144,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:2e0df6a62cd0143e",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28287496212",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287496212",
- "createdAt": "2026-06-27T11:13:48.278988+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 70.14399766921997,
- "p90": 101.59999877214432,
- "p95": 114.72000181674957,
- "p99": 231.32799565792084
- },
- "combine": {
- "p50": 56.8000003695488,
- "p90": 69.18399780988693,
- "p95": 76.64000242948532,
- "p99": 99.64799880981445
- },
- "roundtrip": {
- "p50": 154.30399775505066,
- "p90": 196.383997797966,
- "p95": 217.47200191020966,
- "p99": 263.90400528907776
- },
- "isolatedSum": {
- "p50": 126.94399803876877,
- "p90": 170.78399658203125,
- "p95": 191.3600042462349,
- "p99": 330.9759944677353
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 270336,
- "combineLogicalBytes": 540672,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 68.60800087451935,
- "p90": 98.33600372076035,
- "p95": 105.66399991512299,
- "p99": 159.45599973201752
- },
- "combine": {
- "p50": 56.60799890756607,
- "p90": 71.55200093984604,
- "p95": 75.48800110816956,
- "p99": 96.22400254011154
- },
- "roundtrip": {
- "p50": 158.07999670505524,
- "p90": 207.39200711250305,
- "p95": 222.3680019378662,
- "p99": 268.15998554229736
- },
- "isolatedSum": {
- "p50": 125.21599978208542,
- "p90": 169.88800466060638,
- "p95": 181.15200102329254,
- "p99": 255.68000227212906
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 528384,
- "combineLogicalBytes": 1056768,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 66.3679987192154,
- "p90": 87.23200112581253,
- "p95": 100.76799988746643,
- "p99": 133.5040032863617
- },
- "combine": {
- "p50": 56.41600117087364,
- "p90": 65.72800129652023,
- "p95": 73.27999919652939,
- "p99": 81.98399841785431
- },
- "roundtrip": {
- "p50": 150.4960060119629,
- "p90": 190.36799669265747,
- "p95": 200.32000541687012,
- "p99": 249.37599897384644
- },
- "isolatedSum": {
- "p50": 122.78399989008904,
- "p90": 152.96000242233276,
- "p95": 174.04799908399582,
- "p99": 215.488001704216
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1062912,
- "combineLogicalBytes": 2125824,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 67.80800223350525,
- "p90": 100.80000013113022,
- "p95": 111.00800335407257,
- "p99": 136.99199259281158
- },
- "combine": {
- "p50": 57.08799883723259,
- "p90": 72.22399860620499,
- "p95": 76.60800218582153,
- "p99": 86.33600175380707
- },
- "roundtrip": {
- "p50": 153.56799960136414,
- "p90": 201.50400698184967,
- "p95": 210.24000644683838,
- "p99": 257.3759853839874
- },
- "isolatedSum": {
- "p50": 124.89600107073784,
- "p90": 173.0239987373352,
- "p95": 187.6160055398941,
- "p99": 223.32799434661865
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2131968,
- "combineLogicalBytes": 4263936,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 65.76000154018402,
- "p90": 92.6079973578453,
- "p95": 103.13600301742554,
- "p99": 142.81600713729858
- },
- "combine": {
- "p50": 58.400001376867294,
- "p90": 68.70400160551071,
- "p95": 77.40800082683563,
- "p99": 84.22400057315826
- },
- "roundtrip": {
- "p50": 162.75200247764587,
- "p90": 214.30400013923645,
- "p95": 235.167995095253,
- "p99": 264.70398902893066
- },
- "isolatedSum": {
- "p50": 124.16000291705132,
- "p90": 161.31199896335602,
- "p95": 180.54400384426117,
- "p99": 227.04000771045685
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4251648,
- "combineLogicalBytes": 8503296,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 69.31199878454208,
- "p90": 97.59999811649323,
- "p95": 107.77600109577179,
- "p99": 145.37599682807922
- },
- "combine": {
- "p50": 64.67200070619583,
- "p90": 80.35200089216232,
- "p95": 85.4720026254654,
- "p99": 101.88800096511841
- },
- "roundtrip": {
- "p50": 164.32000696659088,
- "p90": 208.48000049591064,
- "p95": 222.88000583648682,
- "p99": 259.71201062202454
- },
- "isolatedSum": {
- "p50": 133.98399949073792,
- "p90": 177.95199900865555,
- "p95": 193.24800372123718,
- "p99": 247.26399779319763
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 8454144,
- "combineLogicalBytes": 16908288,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 77.31200009584427,
- "p90": 101.69599950313568,
- "p95": 117.76000261306763,
- "p99": 361.5039885044098
- },
- "combine": {
- "p50": 72.35199958086014,
- "p90": 85.63199639320374,
- "p95": 92.51199662685394,
- "p99": 103.64799946546555
- },
- "roundtrip": {
- "p50": 168.2880073785782,
- "p90": 209.9200040102005,
- "p95": 219.67999637126923,
- "p99": 266.84799790382385
- },
- "isolatedSum": {
- "p50": 149.6639996767044,
- "p90": 187.32799589633942,
- "p95": 210.27199923992157,
- "p99": 465.15198796987534
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 16711680,
- "combineLogicalBytes": 33423360,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 84.70399677753448,
- "p90": 109.21599715948105,
- "p95": 114.78400230407715,
- "p99": 161.47199273109436
- },
- "combine": {
- "p50": 88.25600147247314,
- "p90": 105.0880029797554,
- "p95": 113.11999708414078,
- "p99": 147.42399752140045
- },
- "roundtrip": {
- "p50": 195.5839991569519,
- "p90": 248.28800559043884,
- "p95": 262.4959945678711,
- "p99": 325.56799054145813
- },
- "isolatedSum": {
- "p50": 172.95999825000763,
- "p90": 214.30400013923645,
- "p95": 227.90399938821793,
- "p99": 308.8959902524948
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 33288192,
- "combineLogicalBytes": 66576384,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-1bedbd87",
- "identity": "h200|deepep|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h200_87683f6c",
- "comparisonKey": "73242cc56a07dc73",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:54:22.337969+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_8",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · fp8",
- "model": "MiniMax-M3",
- "shape": {
- "hidden": 6144,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:2e0df6a62cd0143e",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271767522",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271767522",
- "createdAt": "2026-06-26T23:54:22.337969+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 217.43999421596527,
- "p90": 302.7519881725311,
- "p95": 334.4320058822632,
- "p99": 396.06401324272156
- },
- "combine": {
- "p50": 55.1999993622303,
- "p90": 72.03199714422226,
- "p95": 78.23999971151352,
- "p99": 108.09600353240967
- },
- "roundtrip": {
- "p50": 251.71199440956116,
- "p90": 317.27999448776245,
- "p95": 335.10398864746094,
- "p99": 397.92001247406006
- },
- "isolatedSum": {
- "p50": 272.6399935781956,
- "p90": 374.7839853167534,
- "p95": 412.6720055937767,
- "p99": 504.1600167751312
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 270336,
- "combineLogicalBytes": 540672,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 206.81600272655487,
- "p90": 269.6639895439148,
- "p95": 289.6000146865845,
- "p99": 343.23200583457947
- },
- "combine": {
- "p50": 55.135998874902725,
- "p90": 71.77600264549255,
- "p95": 77.47200131416321,
- "p99": 96.09600156545639
- },
- "roundtrip": {
- "p50": 247.93599545955658,
- "p90": 305.63199520111084,
- "p95": 323.168009519577,
- "p99": 380.12799620628357
- },
- "isolatedSum": {
- "p50": 261.9520016014576,
- "p90": 341.43999218940735,
- "p95": 367.0720160007477,
- "p99": 439.32800740003586
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 528384,
- "combineLogicalBytes": 1056768,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 211.04000508785248,
- "p90": 283.32799673080444,
- "p95": 302.65599489212036,
- "p99": 377.6639997959137
- },
- "combine": {
- "p50": 56.89600110054016,
- "p90": 70.68800181150436,
- "p95": 78.3040001988411,
- "p99": 85.4400023818016
- },
- "roundtrip": {
- "p50": 251.52000784873962,
- "p90": 306.4959943294525,
- "p95": 319.64799761772156,
- "p99": 344.1599905490875
- },
- "isolatedSum": {
- "p50": 267.93600618839264,
- "p90": 354.0159985423088,
- "p95": 380.95999509096146,
- "p99": 463.1040021777153
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1062912,
- "combineLogicalBytes": 2125824,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 204.92799580097198,
- "p90": 272.09600806236267,
- "p95": 291.29600524902344,
- "p99": 364.3519878387451
- },
- "combine": {
- "p50": 56.96000158786774,
- "p90": 71.96799665689468,
- "p95": 77.79199630022049,
- "p99": 86.91199868917465
- },
- "roundtrip": {
- "p50": 245.69599330425262,
- "p90": 303.16799879074097,
- "p95": 321.9519853591919,
- "p99": 421.1199879646301
- },
- "isolatedSum": {
- "p50": 261.8879973888397,
- "p90": 344.06400471925735,
- "p95": 369.0880015492439,
- "p99": 451.26398652791977
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2131968,
- "combineLogicalBytes": 4263936,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 211.61599457263947,
- "p90": 274.3679881095886,
- "p95": 311.2959861755371,
- "p99": 390.8799886703491
- },
- "combine": {
- "p50": 58.720000088214874,
- "p90": 74.68800246715546,
- "p95": 80.09599894285202,
- "p99": 87.5839963555336
- },
- "roundtrip": {
- "p50": 250.65600872039795,
- "p90": 313.24800848960876,
- "p95": 336.1920118331909,
- "p99": 386.59200072288513
- },
- "isolatedSum": {
- "p50": 270.33599466085434,
- "p90": 349.0559905767441,
- "p95": 391.39198511838913,
- "p99": 478.4639850258827
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4251648,
- "combineLogicalBytes": 8503296,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 204.92799580097198,
- "p90": 262.62399554252625,
- "p95": 280.5440127849579,
- "p99": 327.4880051612854
- },
- "combine": {
- "p50": 64.54399973154068,
- "p90": 81.85599744319916,
- "p95": 87.8399983048439,
- "p99": 104.41599786281586
- },
- "roundtrip": {
- "p50": 262.59198784828186,
- "p90": 327.7440071105957,
- "p95": 351.6159951686859,
- "p99": 406.0800075531006
- },
- "isolatedSum": {
- "p50": 269.47199553251266,
- "p90": 344.4799929857254,
- "p95": 368.3840110898018,
- "p99": 431.90400302410126
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 8454144,
- "combineLogicalBytes": 16908288,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 247.5840002298355,
- "p90": 392.5119936466217,
- "p95": 406.14399313926697,
- "p99": 443.5200095176697
- },
- "combine": {
- "p50": 71.84000313282013,
- "p90": 89.85599875450134,
- "p95": 94.68799829483032,
- "p99": 119.32799965143204
- },
- "roundtrip": {
- "p50": 261.85598969459534,
- "p90": 329.24801111221313,
- "p95": 345.15199065208435,
- "p99": 426.1760115623474
- },
- "isolatedSum": {
- "p50": 319.42400336265564,
- "p90": 482.36799240112305,
- "p95": 500.8319914340973,
- "p99": 562.8480091691017
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 16711680,
- "combineLogicalBytes": 33423360,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 218.33600103855133,
- "p90": 282.6240062713623,
- "p95": 299.1040050983429,
- "p99": 340.831995010376
- },
- "combine": {
- "p50": 87.16800063848495,
- "p90": 104.67199981212616,
- "p95": 109.18399691581726,
- "p99": 127.32799351215363
- },
- "roundtrip": {
- "p50": 291.83998703956604,
- "p90": 343.6479866504669,
- "p95": 355.48800230026245,
- "p99": 407.1680009365082
- },
- "isolatedSum": {
- "p50": 305.5040016770363,
- "p90": 387.29600608348846,
- "p95": 408.28800201416016,
- "p99": 468.1599885225296
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 33288192,
- "combineLogicalBytes": 66576384,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-1d12a6ce",
- "identity": "h200|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h200_9979edfc",
- "comparisonKey": "057f864d1542d54f",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T10:26:28.109691+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_6",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · fp8",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "2.0.0+af9a040",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28286433802",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286433802",
- "createdAt": "2026-06-27T10:26:28.109691+00:00",
- "sha": "91c7acf59a5e524f37742922ec67721d86a03f6b"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 66.94400310516357,
- "p90": 102.65599936246872,
- "p95": 111.55200004577637,
- "p99": 136.06399297714233
- },
- "combine": {
- "p50": 61.503998935222626,
- "p90": 75.99999755620956,
- "p95": 80.64000308513641,
- "p99": 118.33599954843521
- },
- "roundtrip": {
- "p50": 168.7999963760376,
- "p90": 279.00800108909607,
- "p95": 304.03199791908264,
- "p99": 436.41600012779236
- },
- "isolatedSum": {
- "p50": 128.4480020403862,
- "p90": 178.65599691867828,
- "p95": 192.19200313091278,
- "p99": 254.39999252557755
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 315392,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 68.51200014352798,
- "p90": 98.43199700117111,
- "p95": 108.67200046777725,
- "p99": 120.41600048542023
- },
- "combine": {
- "p50": 61.69600039720535,
- "p90": 78.14399898052216,
- "p95": 82.0159986615181,
- "p99": 97.9200005531311
- },
- "roundtrip": {
- "p50": 167.04000532627106,
- "p90": 214.88000452518463,
- "p95": 225.63199698925018,
- "p99": 264.8319900035858
- },
- "isolatedSum": {
- "p50": 130.20800054073334,
- "p90": 176.57599598169327,
- "p95": 190.68799912929535,
- "p99": 218.33600103855133
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 67.77600198984146,
- "p90": 99.5199978351593,
- "p95": 105.27999699115753,
- "p99": 120.7680031657219
- },
- "combine": {
- "p50": 60.99199876189232,
- "p90": 76.9599974155426,
- "p95": 81.37600123882294,
- "p99": 85.28000116348267
- },
- "roundtrip": {
- "p50": 158.36800634860992,
- "p90": 202.4639993906021,
- "p95": 213.34399282932281,
- "p99": 470.46399116516113
- },
- "isolatedSum": {
- "p50": 128.76800075173378,
- "p90": 176.4799952507019,
- "p95": 186.65599822998047,
- "p99": 206.04800432920456
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1240064,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 68.44799965620041,
- "p90": 100.3199964761734,
- "p95": 105.21599650382996,
- "p99": 126.43200159072876
- },
- "combine": {
- "p50": 63.45599889755249,
- "p90": 79.0719985961914,
- "p95": 84.99199897050858,
- "p99": 93.02400052547455
- },
- "roundtrip": {
- "p50": 166.78400337696075,
- "p90": 212.0639979839325,
- "p95": 220.09600698947906,
- "p99": 258.8160037994385
- },
- "isolatedSum": {
- "p50": 131.9039985537529,
- "p90": 179.3919950723648,
- "p95": 190.20799547433853,
- "p99": 219.4560021162033
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487296,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 68.31999868154526,
- "p90": 94.97600048780441,
- "p95": 101.88800096511841,
- "p99": 112.73600161075592
- },
- "combine": {
- "p50": 63.80800157785416,
- "p90": 75.58400183916092,
- "p95": 82.97599852085114,
- "p99": 96.70399874448776
- },
- "roundtrip": {
- "p50": 165.47200083732605,
- "p90": 223.29600155353546,
- "p95": 241.98399484157562,
- "p99": 347.9999899864197
- },
- "isolatedSum": {
- "p50": 132.1280002593994,
- "p90": 170.56000232696533,
- "p95": 184.86399948596954,
- "p99": 209.44000035524368
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4960256,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 72.60800153017044,
- "p90": 105.12000322341919,
- "p95": 111.10399663448334,
- "p99": 125.08800625801086
- },
- "combine": {
- "p50": 70.23999840021133,
- "p90": 83.52000266313553,
- "p95": 88.35200220346451,
- "p99": 93.1520015001297
- },
- "roundtrip": {
- "p50": 169.37600076198578,
- "p90": 216.25599265098572,
- "p95": 225.15200078487396,
- "p99": 254.59200143814087
- },
- "isolatedSum": {
- "p50": 142.84799993038177,
- "p90": 188.64000588655472,
- "p95": 199.45599883794785,
- "p99": 218.24000775814056
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9863168,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 79.0719985961914,
- "p90": 104.54399883747101,
- "p95": 109.76000130176544,
- "p99": 320.99199295043945
- },
- "combine": {
- "p50": 80.51200211048126,
- "p90": 98.39999675750732,
- "p95": 101.85600072145462,
- "p99": 225.53600370883942
- },
- "roundtrip": {
- "p50": 180.2240014076233,
- "p90": 218.4319943189621,
- "p95": 229.312002658844,
- "p99": 268.70399713516235
- },
- "isolatedSum": {
- "p50": 159.58400070667267,
- "p90": 202.94399559497833,
- "p95": 211.61600202322006,
- "p99": 546.5279966592789
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19496960,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 86.62399649620056,
- "p90": 106.52799904346466,
- "p95": 113.21599781513214,
- "p99": 120.89599668979645
- },
- "combine": {
- "p50": 98.1760025024414,
- "p90": 115.13599753379822,
- "p95": 118.6240017414093,
- "p99": 130.5920034646988
- },
- "roundtrip": {
- "p50": 210.207998752594,
- "p90": 238.3359968662262,
- "p95": 245.15199661254883,
- "p99": 258.87998938560486
- },
- "isolatedSum": {
- "p50": 184.79999899864197,
- "p90": 221.66399657726288,
- "p95": 231.83999955654144,
- "p99": 251.48800015449524
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-9a6e69f6",
- "identity": "h200|deepep|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h200_87683f6c",
- "comparisonKey": "c387c5e642249761",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:50:29.289162+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_5",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · fp8",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271636896",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271636896",
- "createdAt": "2026-06-26T23:50:29.289162+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 228.70400547981262,
- "p90": 269.6959972381592,
- "p95": 279.5200049877167,
- "p99": 338.1119966506958
- },
- "combine": {
- "p50": 61.08799949288368,
- "p90": 73.5040009021759,
- "p95": 82.20800012350082,
- "p99": 98.33600372076035
- },
- "roundtrip": {
- "p50": 271.232008934021,
- "p90": 306.94401264190674,
- "p95": 324.2560029029846,
- "p99": 374.65599179267883
- },
- "isolatedSum": {
- "p50": 289.7920049726963,
- "p90": 343.1999981403351,
- "p95": 361.7280051112175,
- "p99": 436.44800037145615
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 315392,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 216.06400609016418,
- "p90": 246.33599817752838,
- "p95": 261.3759934902191,
- "p99": 341.40801429748535
- },
- "combine": {
- "p50": 59.7120001912117,
- "p90": 68.09599697589874,
- "p95": 74.46400076150894,
- "p99": 89.53599631786346
- },
- "roundtrip": {
- "p50": 268.99200677871704,
- "p90": 305.08801341056824,
- "p95": 324.41601157188416,
- "p99": 433.0880045890808
- },
- "isolatedSum": {
- "p50": 275.7760062813759,
- "p90": 314.4319951534271,
- "p95": 335.83999425172806,
- "p99": 430.9440106153488
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 229.98400032520294,
- "p90": 283.07199478149414,
- "p95": 300.00001192092896,
- "p99": 371.2959885597229
- },
- "combine": {
- "p50": 61.055999249219894,
- "p90": 78.68800312280655,
- "p95": 83.55200290679932,
- "p99": 112.47999966144562
- },
- "roundtrip": {
- "p50": 274.1119861602783,
- "p90": 337.0879888534546,
- "p95": 358.7520122528076,
- "p99": 398.75200390815735
- },
- "isolatedSum": {
- "p50": 291.03999957442284,
- "p90": 361.7599979043007,
- "p95": 383.55201482772827,
- "p99": 483.7759882211685
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1240064,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 218.87999773025513,
- "p90": 251.55198574066162,
- "p95": 265.855997800827,
- "p99": 311.39200925827026
- },
- "combine": {
- "p50": 62.111999839544296,
- "p90": 71.6480016708374,
- "p95": 77.11999863386154,
- "p99": 90.40000289678574
- },
- "roundtrip": {
- "p50": 266.9120132923126,
- "p90": 300.57600140571594,
- "p95": 317.8560137748718,
- "p99": 357.02401399612427
- },
- "isolatedSum": {
- "p50": 280.9919975697994,
- "p90": 323.199987411499,
- "p95": 342.97599643468857,
- "p99": 401.792012155056
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487296,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 219.29599344730377,
- "p90": 267.61600375175476,
- "p95": 287.00798749923706,
- "p99": 346.8160033226013
- },
- "combine": {
- "p50": 63.840001821517944,
- "p90": 79.77599650621414,
- "p95": 84.95999872684479,
- "p99": 98.49599748849869
- },
- "roundtrip": {
- "p50": 265.4719948768616,
- "p90": 309.9519908428192,
- "p95": 323.8399922847748,
- "p99": 397.8559970855713
- },
- "isolatedSum": {
- "p50": 283.1359952688217,
- "p90": 347.3920002579689,
- "p95": 371.96798622608185,
- "p99": 445.3120008111
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4960256,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 219.10400688648224,
- "p90": 245.5040067434311,
- "p95": 260.3200078010559,
- "p99": 308.0959916114807
- },
- "combine": {
- "p50": 69.50400024652481,
- "p90": 78.33600044250488,
- "p95": 83.96799862384796,
- "p99": 95.8079993724823
- },
- "roundtrip": {
- "p50": 275.2319872379303,
- "p90": 308.9599907398224,
- "p95": 331.07200264930725,
- "p99": 425.6319999694824
- },
- "isolatedSum": {
- "p50": 288.60800713300705,
- "p90": 323.840007185936,
- "p95": 344.28800642490387,
- "p99": 403.903990983963
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9863168,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 221.27999365329742,
- "p90": 263.90400528907776,
- "p95": 282.20799565315247,
- "p99": 368.51200461387634
- },
- "combine": {
- "p50": 79.77599650621414,
- "p90": 91.32800251245499,
- "p95": 96.6079980134964,
- "p99": 106.52799904346466
- },
- "roundtrip": {
- "p50": 288.4159982204437,
- "p90": 336.41600608825684,
- "p95": 353.7920117378235,
- "p99": 471.1360037326813
- },
- "isolatedSum": {
- "p50": 301.05599015951157,
- "p90": 355.23200780153275,
- "p95": 378.81599366664886,
- "p99": 475.040003657341
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19496960,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 233.024001121521,
- "p90": 284.4479978084564,
- "p95": 301.63198709487915,
- "p99": 392.5760090351105
- },
- "combine": {
- "p50": 97.50399738550186,
- "p90": 109.76000130176544,
- "p95": 115.99999666213989,
- "p99": 127.93600559234619
- },
- "roundtrip": {
- "p50": 316.6399896144867,
- "p90": 356.06399178504944,
- "p95": 368.5759902000427,
- "p99": 464.352011680603
- },
- "isolatedSum": {
- "p50": 330.52799850702286,
- "p90": 394.20799911022186,
- "p95": 417.63198375701904,
- "p99": 520.5120146274567
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-a04f9063",
- "identity": "h200|deepep|7168|8|384|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760",
- "colorKey": "h200_9979edfc",
- "comparisonKey": "7a8492db4d26e76b",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T11:14:07.695062+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_2",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · fp8",
- "model": "Kimi-K2",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 384,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "d6c49ae98878760",
- "workloadId": "set:8:9a27d0df4b17fa09",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28287502149",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287502149",
- "createdAt": "2026-06-27T11:14:07.695062+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 73.02399724721909,
- "p90": 93.91999989748001,
- "p95": 107.42399841547012,
- "p99": 139.20000195503235
- },
- "combine": {
- "p50": 59.93599817156792,
- "p90": 70.36799937486649,
- "p95": 75.93599706888199,
- "p99": 93.44000369310379
- },
- "roundtrip": {
- "p50": 157.69599378108978,
- "p90": 192.09599494934082,
- "p95": 211.32799983024597,
- "p99": 397.7600038051605
- },
- "isolatedSum": {
- "p50": 132.959995418787,
- "p90": 164.2879992723465,
- "p95": 183.3599954843521,
- "p99": 232.64000564813614
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 301056,
- "combineLogicalBytes": 602112,
- "fanoutMean": 5.25,
- "recvTokensMax": 8,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 71.3919997215271,
- "p90": 89.91999924182892,
- "p95": 99.71199929714203,
- "p99": 113.79200220108032
- },
- "combine": {
- "p50": 58.720000088214874,
- "p90": 72.09599763154984,
- "p95": 77.60000228881836,
- "p99": 88.70399743318558
- },
- "roundtrip": {
- "p50": 158.62399339675903,
- "p90": 189.5039975643158,
- "p95": 197.82400131225586,
- "p99": 229.34399545192719
- },
- "isolatedSum": {
- "p50": 130.11199980974197,
- "p90": 162.01599687337875,
- "p95": 177.3120015859604,
- "p99": 202.4959996342659
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 609280,
- "combineLogicalBytes": 1218560,
- "fanoutMean": 5.3125,
- "recvTokensMax": 14,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 72.7040022611618,
- "p90": 106.04800283908844,
- "p95": 114.81600254774094,
- "p99": 146.84799313545227
- },
- "combine": {
- "p50": 60.5119988322258,
- "p90": 75.58400183916092,
- "p95": 79.42400127649307,
- "p99": 93.9520001411438
- },
- "roundtrip": {
- "p50": 158.75199437141418,
- "p90": 193.15199553966522,
- "p95": 202.04800367355347,
- "p99": 231.51999711990356
- },
- "isolatedSum": {
- "p50": 133.2160010933876,
- "p90": 181.63200467824936,
- "p95": 194.240003824234,
- "p99": 240.79999327659607
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1204224,
- "combineLogicalBytes": 2408448,
- "fanoutMean": 5.25,
- "recvTokensMax": 26,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 70.0799971818924,
- "p90": 88.22400122880936,
- "p95": 97.59999811649323,
- "p99": 165.82399606704712
- },
- "combine": {
- "p50": 60.54399907588959,
- "p90": 70.68800181150436,
- "p95": 78.07999849319458,
- "p99": 89.05600011348724
- },
- "roundtrip": {
- "p50": 159.32799875736237,
- "p90": 187.6160055398941,
- "p95": 201.24800503253937,
- "p99": 239.58399891853333
- },
- "isolatedSum": {
- "p50": 130.62399625778198,
- "p90": 158.91200304031372,
- "p95": 175.6799966096878,
- "p99": 254.87999618053436
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2415616,
- "combineLogicalBytes": 4831232,
- "fanoutMean": 5.265625,
- "recvTokensMax": 48,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 73.69600236415863,
- "p90": 100.38399696350098,
- "p95": 108.19199681282043,
- "p99": 146.14400267601013
- },
- "combine": {
- "p50": 63.840001821517944,
- "p90": 74.87999647855759,
- "p95": 80.38400113582611,
- "p99": 123.23199957609177
- },
- "roundtrip": {
- "p50": 161.43999993801117,
- "p90": 194.97600197792053,
- "p95": 208.67200195789337,
- "p99": 259.68000292778015
- },
- "isolatedSum": {
- "p50": 137.53600418567657,
- "p90": 175.26399344205856,
- "p95": 188.57599794864655,
- "p99": 269.3760022521019
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4924416,
- "combineLogicalBytes": 9848832,
- "fanoutMean": 5.3671875,
- "recvTokensMax": 91,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 71.6480016708374,
- "p90": 94.14400160312653,
- "p95": 102.65599936246872,
- "p99": 167.32800006866455
- },
- "combine": {
- "p50": 69.37599927186966,
- "p90": 80.32000064849854,
- "p95": 88.19200098514557,
- "p99": 124.22399967908859
- },
- "roundtrip": {
- "p50": 167.55199432373047,
- "p90": 192.32000410556793,
- "p95": 208.54400098323822,
- "p99": 261.50399446487427
- },
- "isolatedSum": {
- "p50": 141.02400094270706,
- "p90": 174.46400225162506,
- "p95": 190.8480003476143,
- "p99": 291.55199974775314
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9748480,
- "combineLogicalBytes": 19496960,
- "fanoutMean": 5.3125,
- "recvTokensMax": 178,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 79.80799674987793,
- "p90": 91.2960022687912,
- "p95": 100.51199793815613,
- "p99": 125.72799623012543
- },
- "combine": {
- "p50": 78.11199873685837,
- "p90": 84.70399677753448,
- "p95": 90.7839983701706,
- "p99": 105.56799918413162
- },
- "roundtrip": {
- "p50": 183.80799889564514,
- "p90": 212.96000480651855,
- "p95": 232.7679991722107,
- "p99": 262.688010931015
- },
- "isolatedSum": {
- "p50": 157.9199954867363,
- "p90": 175.99999904632568,
- "p95": 191.29599630832672,
- "p99": 231.29599541425705
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19418112,
- "combineLogicalBytes": 38836224,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 372,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 88.3840024471283,
- "p90": 100.3199964761734,
- "p95": 105.56799918413162,
- "p99": 115.77600240707397
- },
- "combine": {
- "p50": 97.24800288677216,
- "p90": 107.10400342941284,
- "p95": 112.28799819946289,
- "p99": 124.38400089740753
- },
- "roundtrip": {
- "p50": 212.16000616550446,
- "p90": 234.3360036611557,
- "p95": 243.93600225448608,
- "p99": 303.5520017147064
- },
- "isolatedSum": {
- "p50": 185.63200533390045,
- "p90": 207.42399990558624,
- "p95": 217.8559973835945,
- "p99": 240.1600033044815
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38757376,
- "combineLogicalBytes": 77514752,
- "fanoutMean": 5.2802734375,
- "recvTokensMax": 707,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-180681db",
- "identity": "h200|deepep|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760",
- "colorKey": "h200_87683f6c",
- "comparisonKey": "3006922c66758d92",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:53:15.049258+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_9",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · fp8",
- "model": "Kimi-K2",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 384,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "d6c49ae98878760",
- "workloadId": "set:8:9a27d0df4b17fa09",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271721386",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271721386",
- "createdAt": "2026-06-26T23:53:15.049258+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 212.44800090789795,
- "p90": 272.8320062160492,
- "p95": 292.32001304626465,
- "p99": 382.752001285553
- },
- "combine": {
- "p50": 58.75200033187866,
- "p90": 73.40800017118454,
- "p95": 78.5600021481514,
- "p99": 96.12800180912018
- },
- "roundtrip": {
- "p50": 247.26399779319763,
- "p90": 306.36799335479736,
- "p95": 325.1200020313263,
- "p99": 389.8560106754303
- },
- "isolatedSum": {
- "p50": 271.2000012397766,
- "p90": 346.24000638723373,
- "p95": 370.88001519441605,
- "p99": 478.88000309467316
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 301056,
- "combineLogicalBytes": 602112,
- "fanoutMean": 5.25,
- "recvTokensMax": 8,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 212.09600567817688,
- "p90": 273.69600534439087,
- "p95": 297.791987657547,
- "p99": 586.5920186042786
- },
- "combine": {
- "p50": 58.17599967122078,
- "p90": 74.81600344181061,
- "p95": 79.71200346946716,
- "p99": 97.120001912117
- },
- "roundtrip": {
- "p50": 265.3760015964508,
- "p90": 339.6799862384796,
- "p95": 375.5840063095093,
- "p99": 458.8159918785095
- },
- "isolatedSum": {
- "p50": 270.27200534939766,
- "p90": 348.5120087862015,
- "p95": 377.50399112701416,
- "p99": 683.7120205163956
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 609280,
- "combineLogicalBytes": 1218560,
- "fanoutMean": 5.3125,
- "recvTokensMax": 14,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 197.6960003376007,
- "p90": 252.8960108757019,
- "p95": 267.64801144599915,
- "p99": 318.59201192855835
- },
- "combine": {
- "p50": 57.920001447200775,
- "p90": 70.49600034952164,
- "p95": 76.4160007238388,
- "p99": 87.36000210046768
- },
- "roundtrip": {
- "p50": 246.91200256347656,
- "p90": 306.2080144882202,
- "p95": 339.1680121421814,
- "p99": 585.1519703865051
- },
- "isolatedSum": {
- "p50": 255.61600178480148,
- "p90": 323.39201122522354,
- "p95": 344.06401216983795,
- "p99": 405.95201402902603
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1204224,
- "combineLogicalBytes": 2408448,
- "fanoutMean": 5.25,
- "recvTokensMax": 26,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 211.93599700927734,
- "p90": 265.1520073413849,
- "p95": 276.6079902648926,
- "p99": 336.5760147571564
- },
- "combine": {
- "p50": 59.647999703884125,
- "p90": 77.02399790287018,
- "p95": 82.94399827718735,
- "p99": 96.54399752616882
- },
- "roundtrip": {
- "p50": 259.5840096473694,
- "p90": 317.6639974117279,
- "p95": 331.9680094718933,
- "p99": 400.06399154663086
- },
- "isolatedSum": {
- "p50": 271.58399671316147,
- "p90": 342.17600524425507,
- "p95": 359.5519885420799,
- "p99": 433.1200122833252
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2415616,
- "combineLogicalBytes": 4831232,
- "fanoutMean": 5.265625,
- "recvTokensMax": 48,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 214.01600539684296,
- "p90": 275.90399980545044,
- "p95": 303.9039969444275,
- "p99": 374.30399656295776
- },
- "combine": {
- "p50": 61.76000088453293,
- "p90": 80.4160013794899,
- "p95": 84.79999750852585,
- "p99": 99.16800260543823
- },
- "roundtrip": {
- "p50": 258.59200954437256,
- "p90": 322.9120075702667,
- "p95": 347.104012966156,
- "p99": 422.39999771118164
- },
- "isolatedSum": {
- "p50": 275.7760062813759,
- "p90": 356.32000118494034,
- "p95": 388.70399445295334,
- "p99": 473.471999168396
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4924416,
- "combineLogicalBytes": 9848832,
- "fanoutMean": 5.3671875,
- "recvTokensMax": 91,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 209.9200040102005,
- "p90": 263.7439966201782,
- "p95": 275.2639949321747,
- "p99": 311.13600730895996
- },
- "combine": {
- "p50": 67.58400052785873,
- "p90": 84.09599959850311,
- "p95": 87.42400258779526,
- "p99": 103.90400141477585
- },
- "roundtrip": {
- "p50": 263.5520100593567,
- "p90": 318.30400228500366,
- "p95": 334.5920145511627,
- "p99": 403.80799770355225
- },
- "isolatedSum": {
- "p50": 277.50400453805923,
- "p90": 347.83999621868134,
- "p95": 362.68799751996994,
- "p99": 415.0400087237358
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9748480,
- "combineLogicalBytes": 19496960,
- "fanoutMean": 5.3125,
- "recvTokensMax": 178,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 205.82400262355804,
- "p90": 253.02401185035706,
- "p95": 266.36800169944763,
- "p99": 311.5200102329254
- },
- "combine": {
- "p50": 78.40000092983246,
- "p90": 92.76799857616425,
- "p95": 98.04800152778625,
- "p99": 111.07199639081955
- },
- "roundtrip": {
- "p50": 272.7360129356384,
- "p90": 325.50400495529175,
- "p95": 342.6879942417145,
- "p99": 378.6559998989105
- },
- "isolatedSum": {
- "p50": 284.2240035533905,
- "p90": 345.7920104265213,
- "p95": 364.4160032272339,
- "p99": 422.59200662374496
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19418112,
- "combineLogicalBytes": 38836224,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 372,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 223.23200106620789,
- "p90": 271.61601185798645,
- "p95": 281.98400139808655,
- "p99": 319.96798515319824
- },
- "combine": {
- "p50": 96.25600278377533,
- "p90": 112.44799941778183,
- "p95": 115.61600118875504,
- "p99": 127.36000120639801
- },
- "roundtrip": {
- "p50": 324.864000082016,
- "p90": 388.63998651504517,
- "p95": 415.3279960155487,
- "p99": 494.3999946117401
- },
- "isolatedSum": {
- "p50": 319.4880038499832,
- "p90": 384.0640112757683,
- "p95": 397.6000025868416,
- "p99": 447.32798635959625
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38757376,
- "combineLogicalBytes": 77514752,
- "fanoutMean": 5.2802734375,
- "recvTokensMax": 707,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-b1b077c8",
- "identity": "h200|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176",
- "colorKey": "h200_3a17d46b",
- "comparisonKey": "f29f35383c05d38b",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:30:04.228393+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_9",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · fp8 (norm)",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1818,
- "configuredUnits": 24,
- "deviceUnits": 132,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28254401482",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254401482",
- "createdAt": "2026-06-26T17:30:04.228393+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 70.23999840021133,
- "p90": 92.38400310277939,
- "p95": 101.88800096511841,
- "p99": 121.15199863910675
- },
- "combine": {
- "p50": 58.88000130653381,
- "p90": 70.3359991312027,
- "p95": 78.65600287914276,
- "p99": 101.43999755382538
- },
- "roundtrip": {
- "p50": 159.32799875736237,
- "p90": 200.3840059041977,
- "p95": 213.69600296020508,
- "p99": 243.58400702476501
- },
- "isolatedSum": {
- "p50": 129.11999970674515,
- "p90": 162.7200022339821,
- "p95": 180.54400384426117,
- "p99": 222.59199619293213
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 315392,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 67.55200028419495,
- "p90": 91.45600348711014,
- "p95": 102.33599692583084,
- "p99": 144.57599818706512
- },
- "combine": {
- "p50": 59.42400172352791,
- "p90": 71.6480016708374,
- "p95": 81.24800026416779,
- "p99": 105.43999820947647
- },
- "roundtrip": {
- "p50": 156.12800419330597,
- "p90": 199.13600385189056,
- "p95": 215.32799303531647,
- "p99": 382.4000060558319
- },
- "isolatedSum": {
- "p50": 126.97600200772285,
- "p90": 163.10400515794754,
- "p95": 183.58399718999863,
- "p99": 250.0159963965416
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 69.72800195217133,
- "p90": 88.54400366544724,
- "p95": 98.24000298976898,
- "p99": 228.60799729824066
- },
- "combine": {
- "p50": 60.92799827456474,
- "p90": 72.92799651622772,
- "p95": 77.7600035071373,
- "p99": 90.91199934482574
- },
- "roundtrip": {
- "p50": 160.67199409008026,
- "p90": 186.20799481868744,
- "p95": 196.44799828529358,
- "p99": 242.14400351047516
- },
- "isolatedSum": {
- "p50": 130.65600022673607,
- "p90": 161.47200018167496,
- "p95": 176.00000649690628,
- "p99": 319.5199966430664
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1240064,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 70.49600034952164,
- "p90": 97.47199714183807,
- "p95": 107.84000158309937,
- "p99": 151.90400183200836
- },
- "combine": {
- "p50": 61.47199869155884,
- "p90": 76.89599692821503,
- "p95": 85.28000116348267,
- "p99": 107.64800012111664
- },
- "roundtrip": {
- "p50": 155.8080017566681,
- "p90": 187.45599687099457,
- "p95": 205.24799823760986,
- "p99": 242.88000166416168
- },
- "isolatedSum": {
- "p50": 131.96799904108047,
- "p90": 174.3679940700531,
- "p95": 193.12000274658203,
- "p99": 259.552001953125
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487296,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 68.4799998998642,
- "p90": 86.94399893283844,
- "p95": 95.58399766683578,
- "p99": 126.08000636100769
- },
- "combine": {
- "p50": 63.391998410224915,
- "p90": 77.34400033950806,
- "p95": 86.62399649620056,
- "p99": 119.55200135707855
- },
- "roundtrip": {
- "p50": 164.2879992723465,
- "p90": 188.09600174427032,
- "p95": 203.64800095558167,
- "p99": 272.7999985218048
- },
- "isolatedSum": {
- "p50": 131.8719983100891,
- "p90": 164.2879992723465,
- "p95": 182.20799416303635,
- "p99": 245.63200771808624
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4960256,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 68.25599819421768,
- "p90": 91.13600105047226,
- "p95": 98.91200065612793,
- "p99": 114.78400230407715
- },
- "combine": {
- "p50": 66.27199798822403,
- "p90": 78.84799689054489,
- "p95": 85.40800213813782,
- "p99": 92.73599833250046
- },
- "roundtrip": {
- "p50": 165.0879979133606,
- "p90": 203.45599949359894,
- "p95": 221.15199267864227,
- "p99": 462.911993265152
- },
- "isolatedSum": {
- "p50": 134.5279961824417,
- "p90": 169.98399794101715,
- "p95": 184.32000279426575,
- "p99": 207.5200006365776
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9863168,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 74.46400076150894,
- "p90": 89.21600133180618,
- "p95": 99.32799637317657,
- "p99": 120.57600170373917
- },
- "combine": {
- "p50": 80.44800162315369,
- "p90": 89.75999802350998,
- "p95": 94.65599805116653,
- "p99": 122.30399996042252
- },
- "roundtrip": {
- "p50": 183.45600366592407,
- "p90": 210.78400313854218,
- "p95": 228.5439968109131,
- "p99": 287.4239981174469
- },
- "isolatedSum": {
- "p50": 154.91200238466263,
- "p90": 178.97599935531616,
- "p95": 193.9839944243431,
- "p99": 242.88000166416168
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19496960,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 88.67199718952179,
- "p90": 102.7199998497963,
- "p95": 111.93600296974182,
- "p99": 128.9920061826706
- },
- "combine": {
- "p50": 96.83199971914291,
- "p90": 108.86400192975998,
- "p95": 114.43199962377548,
- "p99": 124.1919994354248
- },
- "roundtrip": {
- "p50": 208.99200439453125,
- "p90": 229.34399545192719,
- "p95": 239.9040013551712,
- "p99": 260.22401452064514
- },
- "isolatedSum": {
- "p50": 185.5039969086647,
- "p90": 211.58400177955627,
- "p95": 226.3680025935173,
- "p99": 253.1840056180954
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-a2649fd4",
- "identity": "h200|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176",
- "colorKey": "h200_50a9ee63",
- "comparisonKey": "aae31d5755e4ce66",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:30:20.768220+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_1",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "cached-layout-comm-only-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · fp8 (norm) [cl]",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1818,
- "configuredUnits": 24,
- "deviceUnits": 132,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28254418007",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254418007",
- "createdAt": "2026-06-26T17:30:20.768220+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 56.09599873423576,
- "p90": 86.7839977145195,
- "p95": 94.97600048780441,
- "p99": 109.98400300741196
- },
- "combine": {
- "p50": 60.864001512527466,
- "p90": 79.64800298213959,
- "p95": 85.7279971241951,
- "p99": 109.24799740314484
- },
- "roundtrip": {
- "p50": 148.60799908638,
- "p90": 199.42399859428406,
- "p95": 207.45599269866943,
- "p99": 260.5440020561218
- },
- "isolatedSum": {
- "p50": 116.96000024676323,
- "p90": 166.4320006966591,
- "p95": 180.7039976119995,
- "p99": 219.2320004105568
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 315392,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 52.06400156021118,
- "p90": 83.42400193214417,
- "p95": 88.99199962615967,
- "p99": 123.80799651145935
- },
- "combine": {
- "p50": 59.808000922203064,
- "p90": 77.91999727487564,
- "p95": 84.48000252246857,
- "p99": 130.78400492668152
- },
- "roundtrip": {
- "p50": 145.82400023937225,
- "p90": 194.91200149059296,
- "p95": 215.10399878025055,
- "p99": 273.79199862480164
- },
- "isolatedSum": {
- "p50": 111.87200248241425,
- "p90": 161.3439992070198,
- "p95": 173.47200214862823,
- "p99": 254.59200143814087
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 56.60799890756607,
- "p90": 89.08800035715103,
- "p95": 98.91200065612793,
- "p99": 111.7440015077591
- },
- "combine": {
- "p50": 60.7680007815361,
- "p90": 78.52800190448761,
- "p95": 84.22400057315826,
- "p99": 97.95200079679489
- },
- "roundtrip": {
- "p50": 143.74400675296783,
- "p90": 192.7040070295334,
- "p95": 212.0320051908493,
- "p99": 294.46399211883545
- },
- "isolatedSum": {
- "p50": 117.37599968910217,
- "p90": 167.61600226163864,
- "p95": 183.1360012292862,
- "p99": 209.69600230455399
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1240064,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 52.25599929690361,
- "p90": 80.09599894285202,
- "p95": 88.35200220346451,
- "p99": 109.37599837779999
- },
- "combine": {
- "p50": 60.736000537872314,
- "p90": 79.48800176382065,
- "p95": 85.60000360012054,
- "p99": 108.64000022411346
- },
- "roundtrip": {
- "p50": 141.12000167369843,
- "p90": 183.87199938297272,
- "p95": 195.23200392723083,
- "p99": 286.24001145362854
- },
- "isolatedSum": {
- "p50": 112.99199983477592,
- "p90": 159.58400070667267,
- "p95": 173.95200580358505,
- "p99": 218.01599860191345
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487296,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 53.47200110554695,
- "p90": 77.60000228881836,
- "p95": 85.05599945783615,
- "p99": 93.9520001411438
- },
- "combine": {
- "p50": 62.49599903821945,
- "p90": 77.34400033950806,
- "p95": 82.11199939250946,
- "p99": 95.77599912881851
- },
- "roundtrip": {
- "p50": 142.17600226402283,
- "p90": 183.77600610256195,
- "p95": 197.79199361801147,
- "p99": 241.5360063314438
- },
- "isolatedSum": {
- "p50": 115.9680001437664,
- "p90": 154.94400262832642,
- "p95": 167.1679988503456,
- "p99": 189.7279992699623
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4960256,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 55.39200082421303,
- "p90": 81.05599880218506,
- "p95": 89.15200084447861,
- "p99": 109.6000000834465
- },
- "combine": {
- "p50": 66.39999896287918,
- "p90": 84.927998483181,
- "p95": 88.3840024471283,
- "p99": 101.3759970664978
- },
- "roundtrip": {
- "p50": 148.15999567508698,
- "p90": 191.23199582099915,
- "p95": 200.57600736618042,
- "p99": 228.4799963235855
- },
- "isolatedSum": {
- "p50": 121.79199978709221,
- "p90": 165.98399728536606,
- "p95": 177.5360032916069,
- "p99": 210.9759971499443
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9863168,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 65.08799642324448,
- "p90": 90.97599983215332,
- "p95": 100.63999891281128,
- "p99": 148.28799664974213
- },
- "combine": {
- "p50": 81.05599880218506,
- "p90": 96.54399752616882,
- "p95": 99.23200309276581,
- "p99": 106.52799904346466
- },
- "roundtrip": {
- "p50": 171.424001455307,
- "p90": 216.8000042438507,
- "p95": 232.1919947862625,
- "p99": 288.38399052619934
- },
- "isolatedSum": {
- "p50": 146.14399522542953,
- "p90": 187.51999735832214,
- "p95": 199.8720020055771,
- "p99": 254.8159956932068
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19496960,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 75.00799745321274,
- "p90": 94.14400160312653,
- "p95": 99.04000163078308,
- "p99": 115.23199826478958
- },
- "combine": {
- "p50": 97.34400361776352,
- "p90": 115.84000289440155,
- "p95": 119.03999745845795,
- "p99": 133.56800377368927
- },
- "roundtrip": {
- "p50": 197.79199361801147,
- "p90": 227.80799865722656,
- "p95": 237.8239929676056,
- "p99": 276.8320143222809
- },
- "isolatedSum": {
- "p50": 172.35200107097626,
- "p90": 209.98400449752808,
- "p95": 218.07999908924103,
- "p99": 248.80000203847885
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-fdd09e42",
- "identity": "h200|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h200_4f483b60",
- "comparisonKey": "95dcff383339100e",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:50:13.723754+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_10",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "cached-layout-comm-only-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · fp8 [cl]",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271629782",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271629782",
- "createdAt": "2026-06-26T23:50:13.723754+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 51.04000121355057,
- "p90": 76.64000242948532,
- "p95": 84.48000252246857,
- "p99": 115.32799899578094
- },
- "combine": {
- "p50": 59.20000001788139,
- "p90": 77.47200131416321,
- "p95": 87.13600039482117,
- "p99": 133.85599851608276
- },
- "roundtrip": {
- "p50": 140.73599874973297,
- "p90": 177.18400061130524,
- "p95": 189.60000574588776,
- "p99": 239.3919974565506
- },
- "isolatedSum": {
- "p50": 110.24000123143196,
- "p90": 154.11200374364853,
- "p95": 171.61600291728973,
- "p99": 249.1839975118637
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 315392,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 54.71999943256378,
- "p90": 82.40000158548355,
- "p95": 88.16000074148178,
- "p99": 115.10399729013443
- },
- "combine": {
- "p50": 60.19200012087822,
- "p90": 74.78400319814682,
- "p95": 81.44000172615051,
- "p99": 106.84800148010254
- },
- "roundtrip": {
- "p50": 147.13600277900696,
- "p90": 190.75199961662292,
- "p95": 217.79200434684753,
- "p99": 253.79198789596558
- },
- "isolatedSum": {
- "p50": 114.911999553442,
- "p90": 157.18400478363037,
- "p95": 169.6000024676323,
- "p99": 221.95199877023697
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 54.048001766204834,
- "p90": 77.53600180149078,
- "p95": 84.99199897050858,
- "p99": 106.4319983124733
- },
- "combine": {
- "p50": 60.70400029420853,
- "p90": 75.83999633789062,
- "p95": 82.36800134181976,
- "p99": 106.84800148010254
- },
- "roundtrip": {
- "p50": 144.31999623775482,
- "p90": 184.4799965620041,
- "p95": 193.9840018749237,
- "p99": 240.83200097084045
- },
- "isolatedSum": {
- "p50": 114.75200206041336,
- "p90": 153.3759981393814,
- "p95": 167.36000031232834,
- "p99": 213.27999979257584
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1240064,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 54.687999188899994,
- "p90": 88.25600147247314,
- "p95": 94.46399658918381,
- "p99": 120.19199877977371
- },
- "combine": {
- "p50": 61.824001371860504,
- "p90": 77.02399790287018,
- "p95": 83.26400071382523,
- "p99": 101.88800096511841
- },
- "roundtrip": {
- "p50": 140.35199582576752,
- "p90": 180.09600043296814,
- "p95": 193.53599846363068,
- "p99": 230.5919975042343
- },
- "isolatedSum": {
- "p50": 116.5120005607605,
- "p90": 165.27999937534332,
- "p95": 177.72799730300903,
- "p99": 222.07999974489212
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487296,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 54.17599901556969,
- "p90": 81.11999928951263,
- "p95": 88.8959988951683,
- "p99": 129.4720023870468
- },
- "combine": {
- "p50": 62.3680017888546,
- "p90": 78.36800068616867,
- "p95": 82.56000280380249,
- "p99": 101.21600329875946
- },
- "roundtrip": {
- "p50": 140.47999680042267,
- "p90": 177.66399681568146,
- "p95": 196.99199497699738,
- "p99": 237.7600073814392
- },
- "isolatedSum": {
- "p50": 116.54400080442429,
- "p90": 159.4879999756813,
- "p95": 171.4560016989708,
- "p99": 230.68800568580627
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4960256,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 57.24800005555153,
- "p90": 79.64800298213959,
- "p95": 85.91999858617783,
- "p99": 104.67199981212616
- },
- "combine": {
- "p50": 68.41599941253662,
- "p90": 82.33600109815598,
- "p95": 85.7279971241951,
- "p99": 99.10400211811066
- },
- "roundtrip": {
- "p50": 145.1520025730133,
- "p90": 178.1120002269745,
- "p95": 187.6479983329773,
- "p99": 228.7359982728958
- },
- "isolatedSum": {
- "p50": 125.66399946808815,
- "p90": 161.98400408029556,
- "p95": 171.64799571037292,
- "p99": 203.77600193023682
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9863168,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 66.30399823188782,
- "p90": 83.90399813652039,
- "p95": 90.17600119113922,
- "p99": 149.1840034723282
- },
- "combine": {
- "p50": 78.72000336647034,
- "p90": 93.79199892282486,
- "p95": 98.88000041246414,
- "p99": 114.01599645614624
- },
- "roundtrip": {
- "p50": 164.8319959640503,
- "p90": 199.48799908161163,
- "p95": 211.2639993429184,
- "p99": 271.93599939346313
- },
- "isolatedSum": {
- "p50": 145.02400159835815,
- "p90": 177.69599705934525,
- "p95": 189.05600160360336,
- "p99": 263.1999999284744
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19496960,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 73.82400333881378,
- "p90": 90.62399715185165,
- "p95": 95.39200365543365,
- "p99": 114.52800035476685
- },
- "combine": {
- "p50": 97.24800288677216,
- "p90": 112.31999844312668,
- "p95": 115.77600240707397,
- "p99": 130.49599528312683
- },
- "roundtrip": {
- "p50": 199.77599382400513,
- "p90": 228.32000255584717,
- "p95": 247.29600548744202,
- "p99": 297.88801074028015
- },
- "isolatedSum": {
- "p50": 171.07200622558594,
- "p90": 202.94399559497833,
- "p95": 211.16800606250763,
- "p99": 245.02399563789368
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-39796825",
- "identity": "h200|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h200_ff232ea5",
- "comparisonKey": "643e1b15925a53af",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:51:34.222899+00:00",
- "status": "valid",
- "publicationStatus": "diagnostic",
- "runner": "h200-dgxc-slurm_4",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "ll",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · fp8 LL",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 132,
- "resourceClass": "fixed-kernel",
- "conformanceClass": "not-applicable",
- "fixedKernel": true,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271653486",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271653486",
- "createdAt": "2026-06-26T23:51:34.222899+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 29.08799983561039,
- "p90": 36.41600161790848,
- "p95": 44.28799822926521,
- "p99": 63.551999628543854
- },
- "combine": {
- "p50": 40.95999896526337,
- "p90": 64.70400094985962,
- "p95": 74.8480036854744,
- "p99": 125.69600343704224
- },
- "roundtrip": {
- "p50": 1856.8320274353027,
- "p90": 1879.7760009765625,
- "p95": 1894.495964050293,
- "p99": 2116.607904434204
- },
- "isolatedSum": {
- "p50": 70.04799880087376,
- "p90": 101.1200025677681,
- "p95": 119.13600191473961,
- "p99": 189.2480030655861
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 315392,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 14,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 28.76799926161766,
- "p90": 36.22400015592575,
- "p95": 42.11200028657913,
- "p99": 48.767998814582825
- },
- "combine": {
- "p50": 36.06399893760681,
- "p90": 45.75999826192856,
- "p95": 52.2879995405674,
- "p99": 84.1279998421669
- },
- "roundtrip": {
- "p50": 1847.4880456924438,
- "p90": 1861.0880374908447,
- "p95": 1871.3279962539673,
- "p99": 2004.607915878296
- },
- "isolatedSum": {
- "p50": 64.83199819922447,
- "p90": 81.98399841785431,
- "p95": 94.39999982714653,
- "p99": 132.89599865674973
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 21,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 28.575999662280083,
- "p90": 38.816001266241074,
- "p95": 45.632001012563705,
- "p99": 57.95200169086456
- },
- "combine": {
- "p50": 41.69600084424019,
- "p90": 59.93599817156792,
- "p95": 68.06399673223495,
- "p99": 170.30400037765503
- },
- "roundtrip": {
- "p50": 1848.3840227127075,
- "p90": 1869.920015335083,
- "p95": 1881.9199800491333,
- "p99": 1995.0400590896606
- },
- "isolatedSum": {
- "p50": 70.27200050652027,
- "p90": 98.75199943780899,
- "p95": 113.69599774479866,
- "p99": 228.2560020685196
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1240064,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 39,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 29.37600016593933,
- "p90": 37.21600025892258,
- "p95": 50.65599828958511,
- "p99": 62.65600025653839
- },
- "combine": {
- "p50": 47.520000487565994,
- "p90": 61.664000153541565,
- "p95": 68.57600063085556,
- "p99": 103.2319962978363
- },
- "roundtrip": {
- "p50": 1859.2000007629395,
- "p90": 1878.6879777908325,
- "p95": 1886.1440420150757,
- "p99": 1924.1600036621094
- },
- "isolatedSum": {
- "p50": 76.89600065350533,
- "p90": 98.88000041246414,
- "p95": 119.23199892044067,
- "p99": 165.8879965543747
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487296,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 74,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 31.039999797940254,
- "p90": 43.83999854326248,
- "p95": 53.63199859857559,
- "p99": 66.01600348949432
- },
- "combine": {
- "p50": 52.25599929690361,
- "p90": 69.43999975919724,
- "p95": 82.40000158548355,
- "p99": 131.99999928474426
- },
- "roundtrip": {
- "p50": 1864.0960454940796,
- "p90": 1884.160041809082,
- "p95": 1898.1759548187256,
- "p99": 1969.1519737243652
- },
- "isolatedSum": {
- "p50": 83.29599909484386,
- "p90": 113.27999830245972,
- "p95": 136.03200018405914,
- "p99": 198.0160027742386
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4960256,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 145,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 33.79200026392937,
- "p90": 45.27999833226204,
- "p95": 49.31199923157692,
- "p99": 58.14399942755699
- },
- "combine": {
- "p50": 47.839999198913574,
- "p90": 64.25599753856659,
- "p95": 70.36799937486649,
- "p99": 101.53599828481674
- },
- "roundtrip": {
- "p50": 1865.056037902832,
- "p90": 1881.5360069274902,
- "p95": 1888.8959884643555,
- "p99": 1917.7600145339966
- },
- "isolatedSum": {
- "p50": 81.63199946284294,
- "p90": 109.53599587082863,
- "p95": 119.6799986064434,
- "p99": 159.67999771237373
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9863168,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 287,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 40.44799879193306,
- "p90": 49.6320016682148,
- "p95": 52.799999713897705,
- "p99": 64.96000289916992
- },
- "combine": {
- "p50": 63.58399987220764,
- "p90": 81.31200075149536,
- "p95": 98.7199991941452,
- "p99": 231.1680018901825
- },
- "roundtrip": {
- "p50": 1885.632038116455,
- "p90": 1903.3279418945312,
- "p95": 1914.080023765564,
- "p99": 2039.776086807251
- },
- "isolatedSum": {
- "p50": 104.0319986641407,
- "p90": 130.94400241971016,
- "p95": 151.5199989080429,
- "p99": 296.1280047893524
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19496960,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 564,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 51.552001386880875,
- "p90": 60.60799956321716,
- "p95": 62.65600025653839,
- "p99": 73.82400333881378
- },
- "combine": {
- "p50": 86.81599795818329,
- "p90": 96.19200229644775,
- "p95": 108.47999900579453,
- "p99": 146.7839926481247
- },
- "roundtrip": {
- "p50": 1922.6560592651367,
- "p90": 1938.4959936141968,
- "p95": 1957.0879936218262,
- "p99": 2130.3679943084717
- },
- "isolatedSum": {
- "p50": 138.36799934506416,
- "p90": 156.80000185966492,
- "p95": 171.13599926233292,
- "p99": 220.60799598693848
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 1104,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-dbb437b5",
- "identity": "h200|deepep|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h200_7ec76e6d",
- "comparisonKey": "9a87b27b98bf2d7a",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:51:35.330044+00:00",
- "status": "valid",
- "publicationStatus": "diagnostic",
- "runner": "h200-dgxc-slurm_13",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "ll",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · fp8 LL",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 132,
- "resourceClass": "fixed-kernel",
- "conformanceClass": "not-applicable",
- "fixedKernel": true,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271656517",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271656517",
- "createdAt": "2026-06-26T23:51:35.330044+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 28.76799926161766,
- "p90": 38.88000175356865,
- "p95": 44.73600164055824,
- "p99": 61.15199998021126
- },
- "combine": {
- "p50": 36.768000572919846,
- "p90": 48.287998884916306,
- "p95": 57.53599852323532,
- "p99": 90.81599861383438
- },
- "roundtrip": {
- "p50": 1847.7439880371094,
- "p90": 1855.6159734725952,
- "p95": 1860.543966293335,
- "p99": 1893.2160139083862
- },
- "isolatedSum": {
- "p50": 65.5359998345375,
- "p90": 87.16800063848495,
- "p95": 102.27200016379356,
- "p99": 151.96799859404564
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 315392,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 14,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 28.991999104619026,
- "p90": 33.376000821590424,
- "p95": 37.02399879693985,
- "p99": 41.05599969625473
- },
- "combine": {
- "p50": 37.59999945759773,
- "p90": 49.375999718904495,
- "p95": 58.62399935722351,
- "p99": 235.83999276161194
- },
- "roundtrip": {
- "p50": 1847.6799726486206,
- "p90": 1855.936050415039,
- "p95": 1861.4720106124878,
- "p99": 1959.007978439331
- },
- "isolatedSum": {
- "p50": 66.59199856221676,
- "p90": 82.75200054049492,
- "p95": 95.64799815416336,
- "p99": 276.89599245786667
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 21,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 28.16000021994114,
- "p90": 52.41600051522255,
- "p95": 58.59199911355972,
- "p99": 83.23200047016144
- },
- "combine": {
- "p50": 36.959998309612274,
- "p90": 48.06400090456009,
- "p95": 54.59199845790863,
- "p99": 94.59199756383896
- },
- "roundtrip": {
- "p50": 1848.3200073242188,
- "p90": 1858.62398147583,
- "p95": 1864.5440340042114,
- "p99": 1925.9519577026367
- },
- "isolatedSum": {
- "p50": 65.11999852955341,
- "p90": 100.48000141978264,
- "p95": 113.18399757146835,
- "p99": 177.8239980340004
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1240064,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 39,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 29.7279991209507,
- "p90": 36.3520011305809,
- "p95": 41.08799993991852,
- "p99": 52.191998809576035
- },
- "combine": {
- "p50": 37.88800165057182,
- "p90": 50.52800104022026,
- "p95": 61.24800071120262,
- "p99": 175.7120043039322
- },
- "roundtrip": {
- "p50": 1849.4080305099487,
- "p90": 1862.7519607543945,
- "p95": 1875.4240274429321,
- "p99": 1930.5599927902222
- },
- "isolatedSum": {
- "p50": 67.61600077152252,
- "p90": 86.88000217080116,
- "p95": 102.33600065112114,
- "p99": 227.90400311350822
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487296,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 74,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 31.775999814271927,
- "p90": 37.856001406908035,
- "p95": 43.007999658584595,
- "p99": 52.2879995405674
- },
- "combine": {
- "p50": 41.280001401901245,
- "p90": 52.319999784231186,
- "p95": 64.41599875688553,
- "p99": 140.28799533843994
- },
- "roundtrip": {
- "p50": 1854.848027229309,
- "p90": 1876.3200044631958,
- "p95": 1915.3599739074707,
- "p99": 1982.6879501342773
- },
- "isolatedSum": {
- "p50": 73.05600121617317,
- "p90": 90.17600119113922,
- "p95": 107.42399841547012,
- "p99": 192.57599487900734
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4960256,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 145,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 33.344000577926636,
- "p90": 36.159999668598175,
- "p95": 38.30400109291077,
- "p99": 46.14400118589401
- },
- "combine": {
- "p50": 46.30399867892265,
- "p90": 56.223999708890915,
- "p95": 66.49599969387054,
- "p99": 109.24799740314484
- },
- "roundtrip": {
- "p50": 1862.8159761428833,
- "p90": 1875.2959966659546,
- "p95": 1890.6559944152832,
- "p99": 1946.6559886932373
- },
- "isolatedSum": {
- "p50": 79.64799925684929,
- "p90": 92.38399937748909,
- "p95": 104.80000078678131,
- "p99": 155.39199858903885
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9863168,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 287,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 39.68000039458275,
- "p90": 51.58400163054466,
- "p95": 57.72799998521805,
- "p99": 97.63199836015701
- },
- "combine": {
- "p50": 60.70400029420853,
- "p90": 75.29599964618683,
- "p95": 94.2080020904541,
- "p99": 319.7759985923767
- },
- "roundtrip": {
- "p50": 1882.3360204696655,
- "p90": 1892.0639753341675,
- "p95": 1907.5520038604736,
- "p99": 1997.3440170288086
- },
- "isolatedSum": {
- "p50": 100.38400068879128,
- "p90": 126.88000127673149,
- "p95": 151.93600207567215,
- "p99": 417.4079969525337
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19496960,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 564,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 51.552001386880875,
- "p90": 55.07199838757515,
- "p95": 59.007998555898666,
- "p99": 66.11199676990509
- },
- "combine": {
- "p50": 86.43200248479843,
- "p90": 93.08800101280212,
- "p95": 100.89600086212158,
- "p99": 167.10400581359863
- },
- "roundtrip": {
- "p50": 1921.3759899139404,
- "p90": 1930.4640293121338,
- "p95": 1935.968041419983,
- "p99": 1968.6399698257446
- },
- "isolatedSum": {
- "p50": 137.9840038716793,
- "p90": 148.15999940037727,
- "p95": 159.90399941802025,
- "p99": 233.21600258350372
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 1104,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-1caa7ff5",
- "identity": "h200|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176",
- "colorKey": "h200_df102230",
- "comparisonKey": "2ce1d8f2e79d5005",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:31:08.227503+00:00",
- "status": "valid",
- "publicationStatus": "diagnostic",
- "runner": "h200-dgxc-slurm_3",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "ll",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · fp8 LL (norm)",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 132,
- "resourceClass": "fixed-kernel",
- "conformanceClass": "not-applicable",
- "fixedKernel": true,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28254435010",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254435010",
- "createdAt": "2026-06-26T17:31:08.227503+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 28.736000880599022,
- "p90": 42.24000126123428,
- "p95": 44.76799815893173,
- "p99": 50.97600072622299
- },
- "combine": {
- "p50": 37.087999284267426,
- "p90": 44.256001710891724,
- "p95": 49.6320016682148,
- "p99": 65.60000032186508
- },
- "roundtrip": {
- "p50": 1824.4800567626953,
- "p90": 1831.7760229110718,
- "p95": 1838.3680582046509,
- "p99": 1884.1919898986816
- },
- "isolatedSum": {
- "p50": 65.82400016486645,
- "p90": 86.496002972126,
- "p95": 94.39999982714653,
- "p99": 116.57600104808807
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 315392,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 14,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 28.00000086426735,
- "p90": 33.31200033426285,
- "p95": 37.66399994492531,
- "p99": 50.36799982190132
- },
- "combine": {
- "p50": 36.86400130391121,
- "p90": 45.27999833226204,
- "p95": 51.29599943757057,
- "p99": 124.1919994354248
- },
- "roundtrip": {
- "p50": 1824.9599933624268,
- "p90": 1835.4239463806152,
- "p95": 1843.8400030136108,
- "p99": 1961.7279767990112
- },
- "isolatedSum": {
- "p50": 64.86400216817856,
- "p90": 78.59199866652489,
- "p95": 88.95999938249588,
- "p99": 174.55999925732613
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 21,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 28.48000079393387,
- "p90": 33.59999880194664,
- "p95": 36.41600161790848,
- "p99": 42.33599826693535
- },
- "combine": {
- "p50": 37.53599897027016,
- "p90": 47.839999198913574,
- "p95": 62.144000083208084,
- "p99": 136.4479959011078
- },
- "roundtrip": {
- "p50": 1825.8240222930908,
- "p90": 1833.9519500732422,
- "p95": 1842.0480489730835,
- "p99": 1925.0880479812622
- },
- "isolatedSum": {
- "p50": 66.01599976420403,
- "p90": 81.43999800086021,
- "p95": 98.56000170111656,
- "p99": 178.78399416804314
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1240064,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 39,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 28.28799933195114,
- "p90": 32.00000151991844,
- "p95": 34.55999866127968,
- "p99": 39.744000881910324
- },
- "combine": {
- "p50": 37.43999823927879,
- "p90": 46.78399860858917,
- "p95": 53.69599908590317,
- "p99": 124.64000284671783
- },
- "roundtrip": {
- "p50": 1826.3360261917114,
- "p90": 1834.1439962387085,
- "p95": 1840.1600122451782,
- "p99": 1865.6320571899414
- },
- "isolatedSum": {
- "p50": 65.72799757122993,
- "p90": 78.78400012850761,
- "p95": 88.25599774718285,
- "p99": 164.38400372862816
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487296,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 74,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 31.136000528931618,
- "p90": 34.94400158524513,
- "p95": 37.856001406908035,
- "p99": 46.39999940991402
- },
- "combine": {
- "p50": 39.264000952243805,
- "p90": 44.28799822926521,
- "p95": 46.46399989724159,
- "p99": 77.85599678754807
- },
- "roundtrip": {
- "p50": 1830.4959535598755,
- "p90": 1838.304042816162,
- "p95": 1842.78404712677,
- "p99": 1957.919955253601
- },
- "isolatedSum": {
- "p50": 70.40000148117542,
- "p90": 79.23199981451035,
- "p95": 84.32000130414963,
- "p99": 124.25599619746208
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4960256,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 145,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 32.896000891923904,
- "p90": 35.96799820661545,
- "p95": 39.135999977588654,
- "p99": 45.56800052523613
- },
- "combine": {
- "p50": 45.791998505592346,
- "p90": 54.016001522541046,
- "p95": 83.0719992518425,
- "p99": 153.56799960136414
- },
- "roundtrip": {
- "p50": 1840.1600122451782,
- "p90": 1847.5840091705322,
- "p95": 1853.9199829101562,
- "p99": 1896.1600065231323
- },
- "isolatedSum": {
- "p50": 78.68799939751625,
- "p90": 89.9839997291565,
- "p95": 122.20799922943115,
- "p99": 199.13600012660027
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9863168,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 287,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 38.84800150990486,
- "p90": 42.43199899792671,
- "p95": 47.16800153255463,
- "p99": 62.144000083208084
- },
- "combine": {
- "p50": 59.67999994754791,
- "p90": 66.14399701356888,
- "p95": 83.16799998283386,
- "p99": 121.21599912643433
- },
- "roundtrip": {
- "p50": 1859.5199584960938,
- "p90": 1866.495966911316,
- "p95": 1875.264048576355,
- "p99": 1916.1280393600464
- },
- "isolatedSum": {
- "p50": 98.52800145745277,
- "p90": 108.57599601149559,
- "p95": 130.3360015153885,
- "p99": 183.3599992096424
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19496960,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 564,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 51.80799961090088,
- "p90": 55.135998874902725,
- "p95": 59.776000678539276,
- "p99": 68.83200258016586
- },
- "combine": {
- "p50": 86.40000224113464,
- "p90": 92.03200042247772,
- "p95": 95.74399888515472,
- "p99": 156.41599893569946
- },
- "roundtrip": {
- "p50": 1899.392008781433,
- "p90": 1905.2480459213257,
- "p95": 1909.440040588379,
- "p99": 1973.3760356903076
- },
- "isolatedSum": {
- "p50": 138.20800185203552,
- "p90": 147.16799929738045,
- "p95": 155.519999563694,
- "p99": 225.24800151586533
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 1104,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-858b05cb",
- "identity": "h200|deepep|7168|8|256|fp8-directcast|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h200_2b594dfd",
- "comparisonKey": "a4b473bf0791db70",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T15:56:11.323618+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_8",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · fp8-directcast",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8-directcast",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28294159741",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28294159741",
- "createdAt": "2026-06-27T15:56:11.323618+00:00",
- "sha": "42eddb48c3eed35214c5ad50da1aa6527363ff70"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 66.75200164318085,
- "p90": 95.23200243711472,
- "p95": 102.75200009346008,
- "p99": 119.13599818944931
- },
- "combine": {
- "p50": 59.007998555898666,
- "p90": 76.1599987745285,
- "p95": 82.0159986615181,
- "p99": 103.00800204277039
- },
- "roundtrip": {
- "p50": 152.54400670528412,
- "p90": 193.12000274658203,
- "p95": 204.8960030078888,
- "p99": 230.68800568580627
- },
- "isolatedSum": {
- "p50": 125.76000019907951,
- "p90": 171.39200121164322,
- "p95": 184.76799875497818,
- "p99": 222.1440002322197
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 63.58399987220764,
- "p90": 91.00800007581711,
- "p95": 99.80800002813339,
- "p99": 118.52800101041794
- },
- "combine": {
- "p50": 58.94400179386139,
- "p90": 70.592001080513,
- "p95": 77.82399654388428,
- "p99": 87.77599781751633
- },
- "roundtrip": {
- "p50": 151.32799744606018,
- "p90": 191.96799397468567,
- "p95": 202.4639993906021,
- "p99": 234.17599499225616
- },
- "isolatedSum": {
- "p50": 122.52800166606903,
- "p90": 161.6000011563301,
- "p95": 177.63199657201767,
- "p99": 206.30399882793427
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 66.01600348949432,
- "p90": 93.9520001411438,
- "p95": 105.59999942779541,
- "p99": 121.8239963054657
- },
- "combine": {
- "p50": 60.35200133919716,
- "p90": 74.72000271081924,
- "p95": 78.5600021481514,
- "p99": 88.73599767684937
- },
- "roundtrip": {
- "p50": 154.84799444675446,
- "p90": 194.5600062608719,
- "p95": 203.19999754428864,
- "p99": 230.335995554924
- },
- "isolatedSum": {
- "p50": 126.36800482869148,
- "p90": 168.67200285196304,
- "p95": 184.1600015759468,
- "p99": 210.55999398231506
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 67.10399687290192,
- "p90": 94.4959968328476,
- "p95": 104.76800054311752,
- "p99": 123.00799787044525
- },
- "combine": {
- "p50": 61.08799949288368,
- "p90": 78.04799824953079,
- "p95": 82.17599987983704,
- "p99": 98.75199943780899
- },
- "roundtrip": {
- "p50": 155.93600273132324,
- "p90": 198.2399970293045,
- "p95": 208.03199708461761,
- "p99": 242.8479939699173
- },
- "isolatedSum": {
- "p50": 128.1919963657856,
- "p90": 172.5439950823784,
- "p95": 186.94400042295456,
- "p99": 221.75999730825424
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 67.16799736022949,
- "p90": 97.31200337409973,
- "p95": 106.9440022110939,
- "p99": 129.37599420547485
- },
- "combine": {
- "p50": 61.5679994225502,
- "p90": 77.44000107049942,
- "p95": 81.66400343179703,
- "p99": 91.64799749851227
- },
- "roundtrip": {
- "p50": 154.4319987297058,
- "p90": 195.3279972076416,
- "p95": 206.68800175189972,
- "p99": 227.7120053768158
- },
- "isolatedSum": {
- "p50": 128.7359967827797,
- "p90": 174.75200444459915,
- "p95": 188.60800564289093,
- "p99": 221.02399170398712
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 67.80800223350525,
- "p90": 92.19200164079666,
- "p95": 102.4319976568222,
- "p99": 133.7279975414276
- },
- "combine": {
- "p50": 67.74400174617767,
- "p90": 82.84799754619598,
- "p95": 87.61599659919739,
- "p99": 97.120001912117
- },
- "roundtrip": {
- "p50": 159.13599729537964,
- "p90": 200.06400346755981,
- "p95": 211.84000372886658,
- "p99": 244.6720004081726
- },
- "isolatedSum": {
- "p50": 135.55200397968292,
- "p90": 175.03999918699265,
- "p95": 190.0479942560196,
- "p99": 230.84799945354462
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 77.60000228881836,
- "p90": 101.59999877214432,
- "p95": 109.11999642848969,
- "p99": 125.91999769210815
- },
- "combine": {
- "p50": 78.68800312280655,
- "p90": 94.08000111579895,
- "p95": 100.47999769449234,
- "p99": 115.52000045776367
- },
- "roundtrip": {
- "p50": 180.16000092029572,
- "p90": 224.95999932289124,
- "p95": 240.79999327659607,
- "p99": 329.75998520851135
- },
- "isolatedSum": {
- "p50": 156.2880054116249,
- "p90": 195.67999988794327,
- "p95": 209.59999412298203,
- "p99": 241.43999814987183
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 86.7839977145195,
- "p90": 109.79200154542923,
- "p95": 122.78400361537933,
- "p99": 158.11200439929962
- },
- "combine": {
- "p50": 96.3520035147667,
- "p90": 111.84000223875046,
- "p95": 115.77600240707397,
- "p99": 128.22400033473969
- },
- "roundtrip": {
- "p50": 209.88799631595612,
- "p90": 239.1359955072403,
- "p95": 253.9840042591095,
- "p99": 331.84000849723816
- },
- "isolatedSum": {
- "p50": 183.1360012292862,
- "p90": 221.6320037841797,
- "p95": 238.5600060224533,
- "p99": 286.3360047340393
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-339f09b5",
- "identity": "h200|deepep|7168|8|256|fp8-pertoken|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h200_7351c157",
- "comparisonKey": "156f1708b9a7b98d",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T15:56:14.997520+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_10",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · fp8-pertoken",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8-pertoken",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28294163450",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28294163450",
- "createdAt": "2026-06-27T15:56:14.997520+00:00",
- "sha": "42eddb48c3eed35214c5ad50da1aa6527363ff70"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 66.81600213050842,
- "p90": 88.67199718952179,
- "p95": 102.9760017991066,
- "p99": 120.60800194740295
- },
- "combine": {
- "p50": 59.29600074887276,
- "p90": 72.86400347948074,
- "p95": 78.75200361013412,
- "p99": 86.84799820184708
- },
- "roundtrip": {
- "p50": 154.6880006790161,
- "p90": 198.2720047235489,
- "p95": 219.55199539661407,
- "p99": 281.69599175453186
- },
- "isolatedSum": {
- "p50": 126.11200287938118,
- "p90": 161.53600066900253,
- "p95": 181.72800540924072,
- "p99": 207.45600014925003
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 70.43199986219406,
- "p90": 104.73600029945374,
- "p95": 119.32799965143204,
- "p99": 193.7279999256134
- },
- "combine": {
- "p50": 59.10399928689003,
- "p90": 71.32799923419952,
- "p95": 80.28800040483475,
- "p99": 100.16000270843506
- },
- "roundtrip": {
- "p50": 155.03999590873718,
- "p90": 205.53599298000336,
- "p95": 231.58399760723114,
- "p99": 357.08799958229065
- },
- "isolatedSum": {
- "p50": 129.5359991490841,
- "p90": 176.06399953365326,
- "p95": 199.61600005626678,
- "p99": 293.88800263404846
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 68.92800331115723,
- "p90": 96.38399630784988,
- "p95": 109.66400057077408,
- "p99": 130.97600638866425
- },
- "combine": {
- "p50": 61.02399900555611,
- "p90": 75.68000257015228,
- "p95": 83.61600339412689,
- "p99": 102.78400033712387
- },
- "roundtrip": {
- "p50": 158.04800391197205,
- "p90": 202.94399559497833,
- "p95": 213.53599429130554,
- "p99": 251.19999051094055
- },
- "isolatedSum": {
- "p50": 129.95200231671333,
- "p90": 172.06399887800217,
- "p95": 193.28000396490097,
- "p99": 233.76000672578812
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 66.01600348949432,
- "p90": 96.25600278377533,
- "p95": 106.72000050544739,
- "p99": 128.86400520801544
- },
- "combine": {
- "p50": 60.19200012087822,
- "p90": 72.92799651622772,
- "p95": 79.03999835252762,
- "p99": 88.19200098514557
- },
- "roundtrip": {
- "p50": 153.85599434375763,
- "p90": 197.56799936294556,
- "p95": 215.64799547195435,
- "p99": 285.2480113506317
- },
- "isolatedSum": {
- "p50": 126.20800361037254,
- "p90": 169.18399930000305,
- "p95": 185.759998857975,
- "p99": 217.056006193161
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 66.23999774456024,
- "p90": 92.99200028181076,
- "p95": 102.81600058078766,
- "p99": 128.9599984884262
- },
- "combine": {
- "p50": 63.1679967045784,
- "p90": 78.36800068616867,
- "p95": 84.35200154781342,
- "p99": 111.00800335407257
- },
- "roundtrip": {
- "p50": 161.79199516773224,
- "p90": 204.48000729084015,
- "p95": 219.26400065422058,
- "p99": 282.4319899082184
- },
- "isolatedSum": {
- "p50": 129.40799444913864,
- "p90": 171.36000096797943,
- "p95": 187.16800212860107,
- "p99": 239.96800184249878
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 71.68000191450119,
- "p90": 102.75200009346008,
- "p95": 115.68000167608261,
- "p99": 132.89600610733032
- },
- "combine": {
- "p50": 68.7360018491745,
- "p90": 83.42400193214417,
- "p95": 88.25600147247314,
- "p99": 106.72000050544739
- },
- "roundtrip": {
- "p50": 166.04800522327423,
- "p90": 211.64800226688385,
- "p95": 225.79200565814972,
- "p99": 305.7920038700104
- },
- "isolatedSum": {
- "p50": 140.4160037636757,
- "p90": 186.17600202560425,
- "p95": 203.93600314855576,
- "p99": 239.6160066127777
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 77.95199751853943,
- "p90": 101.69599950313568,
- "p95": 111.32799834012985,
- "p99": 139.0720009803772
- },
- "combine": {
- "p50": 79.26400005817413,
- "p90": 92.57599711418152,
- "p95": 98.91200065612793,
- "p99": 126.36800110340118
- },
- "roundtrip": {
- "p50": 175.48799514770508,
- "p90": 220.32000124454498,
- "p95": 231.64799809455872,
- "p99": 279.4559895992279
- },
- "isolatedSum": {
- "p50": 157.21599757671356,
- "p90": 194.2719966173172,
- "p95": 210.23999899625778,
- "p99": 265.4400020837784
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 87.07199990749359,
- "p90": 109.56799983978271,
- "p95": 121.21599912643433,
- "p99": 166.20799899101257
- },
- "combine": {
- "p50": 96.6079980134964,
- "p90": 113.66400122642517,
- "p95": 119.64800208806992,
- "p99": 157.1200042963028
- },
- "roundtrip": {
- "p50": 212.44800090789795,
- "p90": 258.36798548698425,
- "p95": 284.41599011421204,
- "p99": 348.9600121974945
- },
- "isolatedSum": {
- "p50": 183.67999792099,
- "p90": 223.23200106620789,
- "p95": 240.86400121450424,
- "p99": 323.32800328731537
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-16f8b2e1",
- "identity": "h200|deepep|4096|8|128|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569",
- "colorKey": "h200_d982b749",
- "comparisonKey": "465ef3841664f1ea",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T11:14:26.678836+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_13",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16",
- "model": "Qwen3.5",
- "shape": {
- "hidden": 4096,
- "topk": 8,
- "experts": 128,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "dc27c5e0894e569",
- "workloadId": "set:6:76d8142d69406335",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28287506806",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287506806",
- "createdAt": "2026-06-27T11:14:26.678836+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 89.79199826717377,
- "p90": 110.84800213575363,
- "p95": 119.00799721479416,
- "p99": 141.92000031471252
- },
- "combine": {
- "p50": 83.3280012011528,
- "p90": 95.96800059080124,
- "p95": 100.38399696350098,
- "p99": 112.35199868679047
- },
- "roundtrip": {
- "p50": 150.81599354743958,
- "p90": 175.64800381660461,
- "p95": 183.96799266338348,
- "p99": 206.59199357032776
- },
- "isolatedSum": {
- "p50": 173.11999946832657,
- "p90": 206.81600272655487,
- "p95": 219.39199417829514,
- "p99": 254.271999001503
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 44564480,
- "combineLogicalBytes": 44564480,
- "fanoutMean": 5.3125,
- "recvTokensMax": 699,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 110.23999750614166,
- "p90": 128.48000228405,
- "p95": 134.88000631332397,
- "p99": 166.143998503685
- },
- "combine": {
- "p50": 104.86400127410889,
- "p90": 116.95999652147293,
- "p95": 122.52800166606903,
- "p99": 139.3280029296875
- },
- "roundtrip": {
- "p50": 193.95199418067932,
- "p90": 219.32800114154816,
- "p95": 232.16000199317932,
- "p99": 261.79200410842896
- },
- "isolatedSum": {
- "p50": 215.10399878025055,
- "p90": 245.43999880552292,
- "p95": 257.408007979393,
- "p99": 305.4720014333725
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 89726976,
- "combineLogicalBytes": 89726976,
- "fanoutMean": 5.34814453125,
- "recvTokensMax": 1385,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 146.2080031633377,
- "p90": 167.64800250530243,
- "p95": 177.37600207328796,
- "p99": 212.38400042057037
- },
- "combine": {
- "p50": 152.63999998569489,
- "p90": 164.48000073432922,
- "p95": 170.68800330162048,
- "p99": 188.960000872612
- },
- "roundtrip": {
- "p50": 272.99201488494873,
- "p90": 291.1359965801239,
- "p95": 302.0159900188446,
- "p99": 328.575998544693
- },
- "isolatedSum": {
- "p50": 298.8480031490326,
- "p90": 332.12800323963165,
- "p95": 348.06400537490845,
- "p99": 401.3440012931824
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 179503104,
- "combineLogicalBytes": 179503104,
- "fanoutMean": 5.349609375,
- "recvTokensMax": 2772,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 215.488001704216,
- "p90": 237.34399676322937,
- "p95": 246.94399535655975,
- "p99": 288.03199529647827
- },
- "combine": {
- "p50": 248.35200607776642,
- "p90": 259.71201062202454,
- "p95": 266.4639949798584,
- "p99": 279.00800108909607
- },
- "roundtrip": {
- "p50": 438.4959936141968,
- "p90": 459.80799198150635,
- "p95": 470.71999311447144,
- "p99": 498.4000027179718
- },
- "isolatedSum": {
- "p50": 463.8400077819824,
- "p90": 497.0560073852539,
- "p95": 513.4079903364182,
- "p99": 567.0399963855743
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 359022592,
- "combineLogicalBytes": 359022592,
- "fanoutMean": 5.349853515625,
- "recvTokensMax": 5558,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 349.5680093765259,
- "p90": 368.4160113334656,
- "p95": 377.82400846481323,
- "p99": 437.824010848999
- },
- "combine": {
- "p50": 416.703999042511,
- "p90": 430.9439957141876,
- "p95": 437.18400597572327,
- "p99": 455.1680088043213
- },
- "roundtrip": {
- "p50": 740.2560114860535,
- "p90": 760.7359886169434,
- "p95": 771.3599801063538,
- "p99": 818.4639811515808
- },
- "isolatedSum": {
- "p50": 766.2720084190369,
- "p90": 799.3600070476532,
- "p95": 815.0080144405365,
- "p99": 892.9920196533203
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 716111872,
- "combineLogicalBytes": 716111872,
- "fanoutMean": 5.33544921875,
- "recvTokensMax": 10982,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 630.8159828186035,
- "p90": 655.0719738006592,
- "p95": 665.440022945404,
- "p99": 703.3920288085938
- },
- "combine": {
- "p50": 754.7199726104736,
- "p90": 771.1359858512878,
- "p95": 779.6480059623718,
- "p99": 856.9279909133911
- },
- "roundtrip": {
- "p50": 1357.0560216903687,
- "p90": 1393.8560485839844,
- "p95": 1428.4160137176514,
- "p99": 1616.320013999939
- },
- "isolatedSum": {
- "p50": 1385.5359554290771,
- "p90": 1426.207959651947,
- "p95": 1445.0880289077759,
- "p99": 1560.3200197219849
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1432395776,
- "combineLogicalBytes": 1432395776,
- "fanoutMean": 5.336090087890625,
- "recvTokensMax": 21939,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-5888aff1",
- "identity": "h200|deepep|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569",
- "colorKey": "h200_3a47b6c9",
- "comparisonKey": "a14fc35e02b01662",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:53:49.842184+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_12",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16",
- "model": "Qwen3.5",
- "shape": {
- "hidden": 4096,
- "topk": 8,
- "experts": 128,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "dc27c5e0894e569",
- "workloadId": "set:6:76d8142d69406335",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271748233",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271748233",
- "createdAt": "2026-06-26T23:53:49.842184+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 91.96799993515015,
- "p90": 112.70400136709213,
- "p95": 120.70400267839432,
- "p99": 143.8400000333786
- },
- "combine": {
- "p50": 83.29600095748901,
- "p90": 93.40800344944,
- "p95": 99.29600358009338,
- "p99": 117.44000017642975
- },
- "roundtrip": {
- "p50": 151.2639969587326,
- "p90": 170.78399658203125,
- "p95": 179.32799458503723,
- "p99": 211.93599700927734
- },
- "isolatedSum": {
- "p50": 175.26400089263916,
- "p90": 206.11200481653214,
- "p95": 220.0000062584877,
- "p99": 261.28000020980835
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 44564480,
- "combineLogicalBytes": 44564480,
- "fanoutMean": 5.3125,
- "recvTokensMax": 699,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 111.7120012640953,
- "p90": 129.82399761676788,
- "p95": 141.59999787807465,
- "p99": 159.58400070667267
- },
- "combine": {
- "p50": 104.35199737548828,
- "p90": 119.93599683046341,
- "p95": 123.83999675512314,
- "p99": 136.22400164604187
- },
- "roundtrip": {
- "p50": 195.42400538921356,
- "p90": 218.4000015258789,
- "p95": 231.51999711990356,
- "p99": 307.16800689697266
- },
- "isolatedSum": {
- "p50": 216.0639986395836,
- "p90": 249.7599944472313,
- "p95": 265.4399946331978,
- "p99": 295.80800235271454
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 89726976,
- "combineLogicalBytes": 89726976,
- "fanoutMean": 5.34814453125,
- "recvTokensMax": 1385,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 147.23199605941772,
- "p90": 165.12000560760498,
- "p95": 172.992005944252,
- "p99": 204.6079933643341
- },
- "combine": {
- "p50": 153.53600680828094,
- "p90": 168.2240068912506,
- "p95": 175.90400576591492,
- "p99": 192.09599494934082
- },
- "roundtrip": {
- "p50": 270.8800137042999,
- "p90": 295.1680123806,
- "p95": 303.77599596977234,
- "p99": 446.8800127506256
- },
- "isolatedSum": {
- "p50": 300.76800286769867,
- "p90": 333.3440124988556,
- "p95": 348.89601171016693,
- "p99": 396.7039883136749
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 179503104,
- "combineLogicalBytes": 179503104,
- "fanoutMean": 5.349609375,
- "recvTokensMax": 2772,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 214.52799439430237,
- "p90": 237.63200640678406,
- "p95": 244.9920028448105,
- "p99": 282.5919985771179
- },
- "combine": {
- "p50": 249.08800423145294,
- "p90": 261.0880136489868,
- "p95": 267.8079903125763,
- "p99": 287.7439856529236
- },
- "roundtrip": {
- "p50": 438.27199935913086,
- "p90": 458.24000239372253,
- "p95": 469.88800168037415,
- "p99": 508.1599950790405
- },
- "isolatedSum": {
- "p50": 463.6159986257553,
- "p90": 498.7200200557709,
- "p95": 512.7999931573868,
- "p99": 570.3359842300415
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 359022592,
- "combineLogicalBytes": 359022592,
- "fanoutMean": 5.349853515625,
- "recvTokensMax": 5558,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 352.1279990673065,
- "p90": 375.0720024108887,
- "p95": 387.1999979019165,
- "p99": 523.360013961792
- },
- "combine": {
- "p50": 419.9039936065674,
- "p90": 433.8560104370117,
- "p95": 441.536009311676,
- "p99": 501.6319751739502
- },
- "roundtrip": {
- "p50": 744.5759773254395,
- "p90": 766.4960026741028,
- "p95": 777.3119807243347,
- "p99": 837.7919793128967
- },
- "isolatedSum": {
- "p50": 772.0319926738739,
- "p90": 808.9280128479004,
- "p95": 828.7360072135925,
- "p99": 1024.9919891357422
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 716111872,
- "combineLogicalBytes": 716111872,
- "fanoutMean": 5.33544921875,
- "recvTokensMax": 10982,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 629.6319961547852,
- "p90": 648.9279866218567,
- "p95": 656.2560200691223,
- "p99": 715.1039838790894
- },
- "combine": {
- "p50": 754.368007183075,
- "p90": 767.1359777450562,
- "p95": 774.5919823646545,
- "p99": 917.5040125846863
- },
- "roundtrip": {
- "p50": 1354.0480136871338,
- "p90": 1376.4159679412842,
- "p95": 1387.8079652786255,
- "p99": 1428.8320541381836
- },
- "isolatedSum": {
- "p50": 1384.00000333786,
- "p90": 1416.0639643669128,
- "p95": 1430.8480024337769,
- "p99": 1632.6079964637756
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1432395776,
- "combineLogicalBytes": 1432395776,
- "fanoutMean": 5.336090087890625,
- "recvTokensMax": 21939,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-b183f57f",
- "identity": "h200|deepep|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42",
- "colorKey": "h200_3a47b6c9",
- "comparisonKey": "6953183723230449",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:54:18.715974+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_0",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16",
- "model": "shape 5120/8/160",
- "shape": {
- "hidden": 5120,
- "topk": 8,
- "experts": 160,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "0c022a63bbcbf42",
- "workloadId": "set:6:28c0c09b13ff0acf",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271763623",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271763623",
- "createdAt": "2026-06-26T23:54:18.715974+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 97.69599884748459,
- "p90": 105.95200210809708,
- "p95": 110.68800091743469,
- "p99": 117.37599968910217
- },
- "combine": {
- "p50": 90.33600240945816,
- "p90": 95.64799815416336,
- "p95": 98.65599870681763,
- "p99": 108.03200304508209
- },
- "roundtrip": {
- "p50": 164.32000696659088,
- "p90": 174.01599884033203,
- "p95": 181.0240000486374,
- "p99": 201.56799256801605
- },
- "isolatedSum": {
- "p50": 188.03200125694275,
- "p90": 201.60000026226044,
- "p95": 209.34399962425232,
- "p99": 225.40800273418427
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 55674880,
- "combineLogicalBytes": 55674880,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 699,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 124.79999661445618,
- "p90": 143.96800100803375,
- "p95": 151.96800231933594,
- "p99": 176.57600343227386
- },
- "combine": {
- "p50": 119.71200257539749,
- "p90": 133.56800377368927,
- "p95": 140.09599387645721,
- "p99": 156.70399367809296
- },
- "roundtrip": {
- "p50": 216.48000180721283,
- "p90": 235.35999655723572,
- "p95": 243.00800263881683,
- "p99": 263.71198892593384
- },
- "isolatedSum": {
- "p50": 244.51199918985367,
- "p90": 277.536004781723,
- "p95": 292.06399619579315,
- "p99": 333.2799971103668
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 111104000,
- "combineLogicalBytes": 111104000,
- "fanoutMean": 5.2978515625,
- "recvTokensMax": 1387,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 161.6320013999939,
- "p90": 176.54399573802948,
- "p95": 185.47199666500092,
- "p99": 204.96000349521637
- },
- "combine": {
- "p50": 177.47199535369873,
- "p90": 187.74400651454926,
- "p95": 193.88799369335175,
- "p99": 218.27200055122375
- },
- "roundtrip": {
- "p50": 309.2159926891327,
- "p90": 327.2320032119751,
- "p95": 333.1199884414673,
- "p99": 373.1519877910614
- },
- "isolatedSum": {
- "p50": 339.1039967536926,
- "p90": 364.28800225257874,
- "p95": 379.35999035835266,
- "p99": 423.2320040464401
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 223098880,
- "combineLogicalBytes": 223098880,
- "fanoutMean": 5.319091796875,
- "recvTokensMax": 2762,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 242.97599494457245,
- "p90": 263.264000415802,
- "p95": 271.10400795936584,
- "p99": 296.54398560523987
- },
- "combine": {
- "p50": 279.6800136566162,
- "p90": 291.55200719833374,
- "p95": 296.7039942741394,
- "p99": 321.82401418685913
- },
- "roundtrip": {
- "p50": 498.30400943756104,
- "p90": 516.0959959030151,
- "p95": 529.4719934463501,
- "p99": 696.6400146484375
- },
- "isolatedSum": {
- "p50": 522.6560086011887,
- "p90": 554.8160076141357,
- "p95": 567.8080022335052,
- "p99": 618.367999792099
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 446730240,
- "combineLogicalBytes": 446730240,
- "fanoutMean": 5.325439453125,
- "recvTokensMax": 5518,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 402.52798795700073,
- "p90": 414.46399688720703,
- "p95": 420.28799653053284,
- "p99": 450.72001218795776
- },
- "combine": {
- "p50": 478.7839949131012,
- "p90": 488.22399973869324,
- "p95": 490.4960095882416,
- "p99": 499.07198548316956
- },
- "roundtrip": {
- "p50": 857.6639890670776,
- "p90": 869.3439960479736,
- "p95": 882.3680281639099,
- "p99": 1592.25594997406
- },
- "isolatedSum": {
- "p50": 881.3119828701019,
- "p90": 902.6879966259003,
- "p95": 910.7840061187744,
- "p99": 949.7919976711273
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 893634560,
- "combineLogicalBytes": 893634560,
- "fanoutMean": 5.32647705078125,
- "recvTokensMax": 11032,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 750.4640221595764,
- "p90": 770.9119915962219,
- "p95": 780.6079983711243,
- "p99": 812.3199939727783
- },
- "combine": {
- "p50": 873.1840252876282,
- "p90": 885.6319785118103,
- "p95": 893.4080004692078,
- "p99": 941.9839978218079
- },
- "roundtrip": {
- "p50": 1586.143970489502,
- "p90": 1606.112003326416,
- "p95": 1623.5840320587158,
- "p99": 1662.7839803695679
- },
- "isolatedSum": {
- "p50": 1623.6480474472046,
- "p90": 1656.5439701080322,
- "p95": 1674.015998840332,
- "p99": 1754.3039917945862
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1786265600,
- "combineLogicalBytes": 1786265600,
- "fanoutMean": 5.323486328125,
- "recvTokensMax": 21895,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-dfdf595d",
- "identity": "h200|deepep|6144|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "h200_d982b749",
- "comparisonKey": "089552474e5d15cf",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T11:13:50.694218+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_7",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16",
- "model": "MiniMax-M3",
- "shape": {
- "hidden": 6144,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:9f5e1e005a35e937",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28287495061",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287495061",
- "createdAt": "2026-06-27T11:13:50.694218+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 107.13600367307663,
- "p90": 125.66399574279785,
- "p95": 131.23199343681335,
- "p99": 139.29599523544312
- },
- "combine": {
- "p50": 95.51999717950821,
- "p90": 110.81600189208984,
- "p95": 115.39199948310852,
- "p99": 158.07999670505524
- },
- "roundtrip": {
- "p50": 180.83199858665466,
- "p90": 198.04799556732178,
- "p95": 205.59999346733093,
- "p99": 217.1200066804886
- },
- "isolatedSum": {
- "p50": 202.65600085258484,
- "p90": 236.4799976348877,
- "p95": 246.62399291992188,
- "p99": 297.37599194049835
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 66576384,
- "combineLogicalBytes": 66576384,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 132.89600610733032,
- "p90": 149.05600249767303,
- "p95": 155.13600409030914,
- "p99": 168.64000260829926
- },
- "combine": {
- "p50": 128.03199887275696,
- "p90": 142.91200041770935,
- "p95": 147.71200716495514,
- "p99": 169.27999258041382
- },
- "roundtrip": {
- "p50": 236.89599335193634,
- "p90": 251.23199820518494,
- "p95": 261.6640031337738,
- "p99": 302.68800258636475
- },
- "isolatedSum": {
- "p50": 260.9280049800873,
- "p90": 291.9680029153824,
- "p95": 302.8480112552643,
- "p99": 337.9199951887131
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 133619712,
- "combineLogicalBytes": 133619712,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 182.68799781799316,
- "p90": 202.81599462032318,
- "p95": 213.1199985742569,
- "p99": 237.98400163650513
- },
- "combine": {
- "p50": 200.57600736618042,
- "p90": 216.09599888324738,
- "p95": 226.623997092247,
- "p99": 267.36000180244446
- },
- "roundtrip": {
- "p50": 357.31199383735657,
- "p90": 381.3439905643463,
- "p95": 394.8479890823364,
- "p99": 424.127995967865
- },
- "isolatedSum": {
- "p50": 383.2640051841736,
- "p90": 418.91199350357056,
- "p95": 439.7439956665039,
- "p99": 505.3440034389496
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 267657216,
- "combineLogicalBytes": 267657216,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 275.6800055503845,
- "p90": 293.0560111999512,
- "p95": 299.19999837875366,
- "p99": 318.04800033569336
- },
- "combine": {
- "p50": 319.8719918727875,
- "p90": 332.41599798202515,
- "p95": 340.2239978313446,
- "p99": 369.4719970226288
- },
- "roundtrip": {
- "p50": 570.2400207519531,
- "p90": 585.919976234436,
- "p95": 596.8000292778015,
- "p99": 636.7040276527405
- },
- "isolatedSum": {
- "p50": 595.551997423172,
- "p90": 625.4720091819763,
- "p95": 639.4239962100983,
- "p99": 687.5199973583221
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 534380544,
- "combineLogicalBytes": 534380544,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 462.2719883918762,
- "p90": 478.87998819351196,
- "p95": 489.79198932647705,
- "p99": 548.7679839134216
- },
- "combine": {
- "p50": 548.5119819641113,
- "p90": 561.5040063858032,
- "p95": 568.3199763298035,
- "p99": 726.7199754714966
- },
- "roundtrip": {
- "p50": 983.0080270767212,
- "p90": 996.6400265693665,
- "p95": 1016.3520574569702,
- "p99": 1202.5279998779297
- },
- "isolatedSum": {
- "p50": 1010.7839703559875,
- "p90": 1040.3839945793152,
- "p95": 1058.1119656562805,
- "p99": 1275.4879593849182
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1066119168,
- "combineLogicalBytes": 1066119168,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 868.2559728622437,
- "p90": 894.5599794387817,
- "p95": 901.9839763641357,
- "p99": 927.3279905319214
- },
- "combine": {
- "p50": 1004.7680139541626,
- "p90": 1020.8319425582886,
- "p95": 1037.503957748413,
- "p99": 1106.7520380020142
- },
- "roundtrip": {
- "p50": 1834.112048149109,
- "p90": 1855.2639484405518,
- "p95": 1866.6880130767822,
- "p99": 2027.26411819458
- },
- "isolatedSum": {
- "p50": 1873.0239868164062,
- "p90": 1915.3919219970703,
- "p95": 1939.4879341125488,
- "p99": 2034.0800285339355
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2131722240,
- "combineLogicalBytes": 2131722240,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-96267e21",
- "identity": "h200|deepep|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "h200_3a47b6c9",
- "comparisonKey": "27afbf0ad63e86ca",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:55:01.688428+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_9",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16",
- "model": "MiniMax-M3",
- "shape": {
- "hidden": 6144,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:9f5e1e005a35e937",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271778692",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271778692",
- "createdAt": "2026-06-26T23:55:01.688428+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 108.64000022411346,
- "p90": 120.70400267839432,
- "p95": 131.58400356769562,
- "p99": 146.2399959564209
- },
- "combine": {
- "p50": 95.71199864149094,
- "p90": 103.67999970912933,
- "p95": 112.73600161075592,
- "p99": 121.50400131940842
- },
- "roundtrip": {
- "p50": 181.0240000486374,
- "p90": 199.2959976196289,
- "p95": 207.16799795627594,
- "p99": 244.9280023574829
- },
- "isolatedSum": {
- "p50": 204.3519988656044,
- "p90": 224.38400238752365,
- "p95": 244.32000517845154,
- "p99": 267.7439972758293
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 66576384,
- "combineLogicalBytes": 66576384,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 135.29600203037262,
- "p90": 148.00000190734863,
- "p95": 157.72800147533417,
- "p99": 182.20800161361694
- },
- "combine": {
- "p50": 128.31999361515045,
- "p90": 139.74399864673615,
- "p95": 145.7280069589615,
- "p99": 158.75199437141418
- },
- "roundtrip": {
- "p50": 235.6480062007904,
- "p90": 248.6400008201599,
- "p95": 259.16799902915955,
- "p99": 301.60000920295715
- },
- "isolatedSum": {
- "p50": 263.61599564552307,
- "p90": 287.7440005540848,
- "p95": 303.45600843429565,
- "p99": 340.9599959850311
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 133619712,
- "combineLogicalBytes": 133619712,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 184.1920018196106,
- "p90": 199.8080015182495,
- "p95": 208.48000049591064,
- "p99": 231.90400004386902
- },
- "combine": {
- "p50": 198.62399995326996,
- "p90": 212.0320051908493,
- "p95": 221.18400037288666,
- "p99": 289.7599935531616
- },
- "roundtrip": {
- "p50": 349.4719862937927,
- "p90": 366.3040101528168,
- "p95": 376.8320083618164,
- "p99": 431.2959909439087
- },
- "isolatedSum": {
- "p50": 382.81600177288055,
- "p90": 411.8400067090988,
- "p95": 429.6640008687973,
- "p99": 521.6639935970306
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 267657216,
- "combineLogicalBytes": 267657216,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 278.0799865722656,
- "p90": 296.671986579895,
- "p95": 305.759996175766,
- "p99": 346.8799889087677
- },
- "combine": {
- "p50": 313.1519854068756,
- "p90": 324.6079981327057,
- "p95": 331.9680094718933,
- "p99": 350.5600094795227
- },
- "roundtrip": {
- "p50": 563.1999969482422,
- "p90": 577.9839754104614,
- "p95": 589.5040035247803,
- "p99": 688.9920234680176
- },
- "isolatedSum": {
- "p50": 591.2319719791412,
- "p90": 621.2799847126007,
- "p95": 637.7280056476593,
- "p99": 697.4399983882904
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 534380544,
- "combineLogicalBytes": 534380544,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 465.8240079879761,
- "p90": 484.3200147151947,
- "p95": 496.2559938430786,
- "p99": 558.8799715042114
- },
- "combine": {
- "p50": 544.3519949913025,
- "p90": 560.1599812507629,
- "p95": 564.9600028991699,
- "p99": 624.0959763526917
- },
- "roundtrip": {
- "p50": 981.0879826545715,
- "p90": 996.3520169258118,
- "p95": 1007.7439546585083,
- "p99": 1077.1839618682861
- },
- "isolatedSum": {
- "p50": 1010.1760029792786,
- "p90": 1044.4799959659576,
- "p95": 1061.2159967422485,
- "p99": 1182.975947856903
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1066119168,
- "combineLogicalBytes": 1066119168,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 858.2080006599426,
- "p90": 877.9839873313904,
- "p95": 884.0000033378601,
- "p99": 925.6640076637268
- },
- "combine": {
- "p50": 981.98401927948,
- "p90": 994.4959878921509,
- "p95": 1000.9280443191528,
- "p99": 1111.9040250778198
- },
- "roundtrip": {
- "p50": 1810.1119995117188,
- "p90": 1826.0159492492676,
- "p95": 1833.7279558181763,
- "p99": 1947.551965713501
- },
- "isolatedSum": {
- "p50": 1840.1920199394226,
- "p90": 1872.4799752235413,
- "p95": 1884.928047657013,
- "p99": 2037.5680327415466
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2131722240,
- "combineLogicalBytes": 2131722240,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-bc48bfe5",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||157ca81687ddb63",
- "colorKey": "h200_d982b749",
- "comparisonKey": "6da1f9e2ab025dbe",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:56:28.417730+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_0",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "157ca81687ddb63",
- "workloadId": "set:3:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271827040",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271827040",
- "createdAt": "2026-06-26T23:56:28.417730+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 116.80000275373459,
- "p90": 135.80800592899323,
- "p95": 142.14399456977844,
- "p99": 172.7679967880249
- },
- "combine": {
- "p50": 104.35199737548828,
- "p90": 121.56800180673599,
- "p95": 125.72799623012543,
- "p99": 150.65599977970123
- },
- "roundtrip": {
- "p50": 195.77600061893463,
- "p90": 216.22399985790253,
- "p95": 222.9440063238144,
- "p99": 267.67998933792114
- },
- "isolatedSum": {
- "p50": 221.15200012922287,
- "p90": 257.3760077357292,
- "p95": 267.87199079990387,
- "p99": 323.42399656772614
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 202.36800611019135,
- "p90": 223.1999933719635,
- "p95": 231.58399760723114,
- "p99": 263.7439966201782
- },
- "combine": {
- "p50": 223.93600642681122,
- "p90": 236.32000386714935,
- "p95": 241.88800156116486,
- "p99": 258.7839961051941
- },
- "roundtrip": {
- "p50": 399.58399534225464,
- "p90": 417.279988527298,
- "p95": 424.4160056114197,
- "p99": 459.77601408958435
- },
- "isolatedSum": {
- "p50": 426.30401253700256,
- "p90": 459.51999723911285,
- "p95": 473.471999168396,
- "p99": 522.5279927253723
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 312266752,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 542.5919890403748,
- "p90": 557.5039982795715,
- "p95": 563.3280277252197,
- "p99": 587.8080129623413
- },
- "combine": {
- "p50": 619.1999912261963,
- "p90": 634.5599889755249,
- "p95": 646.3040113449097,
- "p99": 683.8080286979675
- },
- "roundtrip": {
- "p50": 1131.1999559402466,
- "p90": 1146.720051765442,
- "p95": 1155.743956565857,
- "p99": 1289.952039718628
- },
- "isolatedSum": {
- "p50": 1161.791980266571,
- "p90": 1192.0639872550964,
- "p95": 1209.6320390701294,
- "p99": 1271.6160416603088
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1243805696,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-5553e87c",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "h200_d982b749",
- "comparisonKey": "6d1b97a966875452",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T10:26:28.382976+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_4",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "2.0.0+af9a040",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28286432534",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286432534",
- "createdAt": "2026-06-27T10:26:28.382976+00:00",
- "sha": "91c7acf59a5e524f37742922ec67721d86a03f6b"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 116.89600348472595,
- "p90": 136.22400164604187,
- "p95": 142.2719955444336,
- "p99": 153.1199961900711
- },
- "combine": {
- "p50": 106.11200332641602,
- "p90": 122.17599898576736,
- "p95": 125.85599720478058,
- "p99": 131.77600502967834
- },
- "roundtrip": {
- "p50": 195.77600061893463,
- "p90": 213.95200490951538,
- "p95": 220.15999257564545,
- "p99": 227.77600586414337
- },
- "isolatedSum": {
- "p50": 223.00800681114197,
- "p90": 258.40000063180923,
- "p95": 268.1279927492142,
- "p99": 284.89600121974945
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 146.88000082969666,
- "p90": 168.06399822235107,
- "p95": 172.54400253295898,
- "p99": 178.17600071430206
- },
- "combine": {
- "p50": 145.53600549697876,
- "p90": 157.0879966020584,
- "p95": 163.90399634838104,
- "p99": 171.7119961977005
- },
- "roundtrip": {
- "p50": 264.92801308631897,
- "p90": 279.4879972934723,
- "p95": 288.4800136089325,
- "p99": 297.60000109672546
- },
- "isolatedSum": {
- "p50": 292.4160063266754,
- "p90": 325.1519948244095,
- "p95": 336.44799888134,
- "p99": 349.88799691200256
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 155889664,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 202.04800367355347,
- "p90": 215.5199944972992,
- "p95": 223.68000447750092,
- "p99": 234.52800512313843
- },
- "combine": {
- "p50": 224.35200214385986,
- "p90": 235.23199558258057,
- "p95": 237.15199530124664,
- "p99": 253.91998887062073
- },
- "roundtrip": {
- "p50": 403.23200821876526,
- "p90": 414.88000750541687,
- "p95": 423.2639968395233,
- "p99": 433.79199504852295
- },
- "isolatedSum": {
- "p50": 426.40000581741333,
- "p90": 450.75199007987976,
- "p95": 460.83199977874756,
- "p99": 488.44799399375916
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 312266752,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 314.62401151657104,
- "p90": 331.13598823547363,
- "p95": 335.80800890922546,
- "p99": 343.80799531936646
- },
- "combine": {
- "p50": 356.03201389312744,
- "p90": 364.1600012779236,
- "p95": 366.5600121021271,
- "p99": 376.22401118278503
- },
- "roundtrip": {
- "p50": 647.8400230407715,
- "p90": 659.1359972953796,
- "p95": 664.9600267410278,
- "p99": 687.2320175170898
- },
- "isolatedSum": {
- "p50": 670.6560254096985,
- "p90": 695.2959895133972,
- "p95": 702.3680210113525,
- "p99": 720.0320065021515
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 623443968,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 531.0080051422119,
- "p90": 542.527973651886,
- "p95": 549.4719743728638,
- "p99": 571.0399746894836
- },
- "combine": {
- "p50": 619.871973991394,
- "p90": 628.9920210838318,
- "p95": 632.9280138015747,
- "p99": 642.4639821052551
- },
- "roundtrip": {
- "p50": 1122.8159666061401,
- "p90": 1134.7839832305908,
- "p95": 1140.8319473266602,
- "p99": 1158.4320068359375
- },
- "isolatedSum": {
- "p50": 1150.879979133606,
- "p90": 1171.5199947357178,
- "p95": 1182.3999881744385,
- "p99": 1213.5039567947388
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1243805696,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1004.5440196990967,
- "p90": 1023.7760543823242,
- "p95": 1030.2400588989258,
- "p99": 1042.464017868042
- },
- "combine": {
- "p50": 1125.6959438323975,
- "p90": 1136.1279487609863,
- "p95": 1140.544056892395,
- "p99": 1155.4239988327026
- },
- "roundtrip": {
- "p50": 2086.143970489502,
- "p90": 2106.048107147217,
- "p95": 2112.096071243286,
- "p99": 2332.0000171661377
- },
- "isolatedSum": {
- "p50": 2130.239963531494,
- "p90": 2159.9040031433105,
- "p95": 2170.784115791321,
- "p99": 2197.8880167007446
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487009280,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-71f62108",
- "identity": "h200|deepep|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "h200_3a47b6c9",
- "comparisonKey": "c80c3e7446de9680",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:50:05.486154+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_12",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271618490",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271618490",
- "createdAt": "2026-06-26T23:50:05.486154+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 118.68800222873688,
- "p90": 127.3919939994812,
- "p95": 133.31200182437897,
- "p99": 144.57599818706512
- },
- "combine": {
- "p50": 105.8880016207695,
- "p90": 112.76800185441971,
- "p95": 117.79200285673141,
- "p99": 129.72800433635712
- },
- "roundtrip": {
- "p50": 199.35999810695648,
- "p90": 209.4399929046631,
- "p95": 215.7440036535263,
- "p99": 257.82400369644165
- },
- "isolatedSum": {
- "p50": 224.57600384950638,
- "p90": 240.1599958539009,
- "p95": 251.10400468111038,
- "p99": 274.30400252342224
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 145.4080045223236,
- "p90": 154.91199493408203,
- "p95": 159.2639982700348,
- "p99": 170.6559956073761
- },
- "combine": {
- "p50": 144.3520039319992,
- "p90": 150.59199929237366,
- "p95": 153.05599570274353,
- "p99": 167.4879938364029
- },
- "roundtrip": {
- "p50": 263.5200023651123,
- "p90": 270.3680098056793,
- "p95": 274.7200131416321,
- "p99": 291.1039888858795
- },
- "isolatedSum": {
- "p50": 289.7600084543228,
- "p90": 305.5039942264557,
- "p95": 312.3199939727783,
- "p99": 338.143989443779
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 155889664,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 205.1839977502823,
- "p90": 219.04000639915466,
- "p95": 227.743998169899,
- "p99": 242.5920069217682
- },
- "combine": {
- "p50": 221.50400280952454,
- "p90": 232.96000063419342,
- "p95": 239.58399891853333,
- "p99": 263.0400061607361
- },
- "roundtrip": {
- "p50": 397.8239893913269,
- "p90": 412.03200817108154,
- "p95": 421.08801007270813,
- "p99": 463.8400077819824
- },
- "isolatedSum": {
- "p50": 426.6880005598068,
- "p90": 452.0000070333481,
- "p95": 467.3279970884323,
- "p99": 505.6320130825043
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 312266752,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 316.0000145435333,
- "p90": 324.0959942340851,
- "p95": 328.99200916290283,
- "p99": 351.6159951686859
- },
- "combine": {
- "p50": 350.17600655555725,
- "p90": 358.5599958896637,
- "p95": 363.2960021495819,
- "p99": 392.8639888763428
- },
- "roundtrip": {
- "p50": 639.4559741020203,
- "p90": 655.1039814949036,
- "p95": 665.3760075569153,
- "p99": 768.8000202178955
- },
- "isolatedSum": {
- "p50": 666.1760210990906,
- "p90": 682.6559901237488,
- "p95": 692.2880113124847,
- "p99": 744.4799840450287
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 623443968,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 527.4559855461121,
- "p90": 543.936014175415,
- "p95": 551.3280034065247,
- "p99": 568.5439705848694
- },
- "combine": {
- "p50": 612.384021282196,
- "p90": 627.3279786109924,
- "p95": 639.519989490509,
- "p99": 984.5119714736938
- },
- "roundtrip": {
- "p50": 1111.6160154342651,
- "p90": 1130.6240558624268,
- "p95": 1139.7759914398193,
- "p99": 1297.5679636001587
- },
- "isolatedSum": {
- "p50": 1139.840006828308,
- "p90": 1171.2639927864075,
- "p95": 1190.8479928970337,
- "p99": 1553.0559420585632
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1243805696,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 994.4959878921509,
- "p90": 1017.6960229873657,
- "p95": 1024.7360467910767,
- "p99": 1044.8640584945679
- },
- "combine": {
- "p50": 1103.9680242538452,
- "p90": 1115.7439947128296,
- "p95": 1122.3039627075195,
- "p99": 1306.1439990997314
- },
- "roundtrip": {
- "p50": 2064.448118209839,
- "p90": 2089.344024658203,
- "p95": 2106.0800552368164,
- "p99": 2285.504102706909
- },
- "isolatedSum": {
- "p50": 2098.464012145996,
- "p90": 2133.4400177001953,
- "p95": 2147.040009498596,
- "p99": 2351.0080575942993
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487009280,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-2e712f4f",
- "identity": "h200|deepep|7168|8|384|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf",
- "colorKey": "h200_d982b749",
- "comparisonKey": "cbe784eff055b137",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T11:14:16.208325+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_0",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16",
- "model": "Kimi-K2",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 384,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "cd50548525dafdf",
- "workloadId": "set:6:b23bc0c4b6402c69",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28287501303",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287501303",
- "createdAt": "2026-06-27T11:14:16.208325+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 122.079998254776,
- "p90": 145.60000598430634,
- "p95": 166.4000004529953,
- "p99": 212.38400042057037
- },
- "combine": {
- "p50": 105.98400235176086,
- "p90": 121.40800058841705,
- "p95": 127.23200023174286,
- "p99": 153.28000485897064
- },
- "roundtrip": {
- "p50": 196.44799828529358,
- "p90": 221.15199267864227,
- "p95": 232.80000686645508,
- "p99": 247.1040040254593
- },
- "isolatedSum": {
- "p50": 228.06400060653687,
- "p90": 267.0080065727234,
- "p95": 293.63200068473816,
- "p99": 365.664005279541
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77514752,
- "combineLogicalBytes": 77514752,
- "fanoutMean": 5.2802734375,
- "recvTokensMax": 707,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 147.07200229167938,
- "p90": 169.11999881267548,
- "p95": 174.72000420093536,
- "p99": 191.5840059518814
- },
- "combine": {
- "p50": 142.5279974937439,
- "p90": 154.59200739860535,
- "p95": 160.19199788570404,
- "p99": 169.95200514793396
- },
- "roundtrip": {
- "p50": 261.75999641418457,
- "p90": 279.4879972934723,
- "p95": 287.07200288772583,
- "p99": 312.99200654029846
- },
- "isolatedSum": {
- "p50": 289.5999997854233,
- "p90": 323.7120062112808,
- "p95": 334.9120020866394,
- "p99": 361.53601109981537
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 154570752,
- "combineLogicalBytes": 154570752,
- "fanoutMean": 5.2646484375,
- "recvTokensMax": 1391,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 208.92800390720367,
- "p90": 239.71199989318848,
- "p95": 256.6080093383789,
- "p99": 289.8240089416504
- },
- "combine": {
- "p50": 226.84800624847412,
- "p90": 248.9600032567978,
- "p95": 259.8400115966797,
- "p99": 303.6159873008728
- },
- "roundtrip": {
- "p50": 399.9040126800537,
- "p90": 421.7599928379059,
- "p95": 431.2640130519867,
- "p99": 470.91200947761536
- },
- "isolatedSum": {
- "p50": 435.7760101556778,
- "p90": 488.67200314998627,
- "p95": 516.4480209350586,
- "p99": 593.4399962425232
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 309772288,
- "combineLogicalBytes": 309772288,
- "fanoutMean": 5.275390625,
- "recvTokensMax": 2754,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 312.54398822784424,
- "p90": 332.41599798202515,
- "p95": 337.8239870071411,
- "p99": 356.3520014286041
- },
- "combine": {
- "p50": 352.03200578689575,
- "p90": 364.03200030326843,
- "p95": 370.88000774383545,
- "p99": 386.49600744247437
- },
- "roundtrip": {
- "p50": 642.1759724617004,
- "p90": 663.8720035552979,
- "p95": 672.4159717559814,
- "p99": 710.2400064468384
- },
- "isolatedSum": {
- "p50": 664.57599401474,
- "p90": 696.4479982852936,
- "p95": 708.7039947509766,
- "p99": 742.8480088710785
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 619501568,
- "combineLogicalBytes": 619501568,
- "fanoutMean": 5.2750244140625,
- "recvTokensMax": 5469,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 524.2239832878113,
- "p90": 540.3519868850708,
- "p95": 545.7919836044312,
- "p99": 580.672025680542
- },
- "combine": {
- "p50": 604.1600108146667,
- "p90": 617.6319718360901,
- "p95": 624.8639822006226,
- "p99": 663.2959842681885
- },
- "roundtrip": {
- "p50": 1101.6960144042969,
- "p90": 1123.4560012817383,
- "p95": 1130.944013595581,
- "p99": 1197.759985923767
- },
- "isolatedSum": {
- "p50": 1128.383994102478,
- "p90": 1157.983958721161,
- "p95": 1170.6559658050537,
- "p99": 1243.9680099487305
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1239375872,
- "combineLogicalBytes": 1239375872,
- "fanoutMean": 5.276611328125,
- "recvTokensMax": 10883,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 980.1279902458191,
- "p90": 1001.3760328292847,
- "p95": 1012.0639801025391,
- "p99": 1055.3920269012451
- },
- "combine": {
- "p50": 1095.0080156326294,
- "p90": 1110.5279922485352,
- "p95": 1121.8559741973877,
- "p99": 1219.1040515899658
- },
- "roundtrip": {
- "p50": 2037.1840000152588,
- "p90": 2063.6160373687744,
- "p95": 2101.50408744812,
- "p99": 2307.6798915863037
- },
- "isolatedSum": {
- "p50": 2075.1360058784485,
- "p90": 2111.90402507782,
- "p95": 2133.9199542999268,
- "p99": 2274.496078491211
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2479669248,
- "combineLogicalBytes": 2479669248,
- "fanoutMean": 5.278564453125,
- "recvTokensMax": 21730,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-a8fb4d9b",
- "identity": "h200|deepep|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf",
- "colorKey": "h200_3a47b6c9",
- "comparisonKey": "f6581a3621ac6cd2",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:53:25.459367+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_13",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16",
- "model": "Kimi-K2",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 384,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "cd50548525dafdf",
- "workloadId": "set:6:b23bc0c4b6402c69",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271732597",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271732597",
- "createdAt": "2026-06-26T23:53:25.459367+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 115.90400338172913,
- "p90": 134.14399325847626,
- "p95": 140.35199582576752,
- "p99": 160.38399934768677
- },
- "combine": {
- "p50": 104.09600287675858,
- "p90": 119.71200257539749,
- "p95": 124.64000284671783,
- "p99": 145.31199634075165
- },
- "roundtrip": {
- "p50": 195.64799964427948,
- "p90": 212.8639966249466,
- "p95": 219.9999988079071,
- "p99": 230.3680032491684
- },
- "isolatedSum": {
- "p50": 220.0000062584877,
- "p90": 253.85599583387375,
- "p95": 264.99199867248535,
- "p99": 305.6959956884384
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77514752,
- "combineLogicalBytes": 77514752,
- "fanoutMean": 5.2802734375,
- "recvTokensMax": 707,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 145.53600549697876,
- "p90": 162.30399906635284,
- "p95": 170.3999936580658,
- "p99": 184.64000523090363
- },
- "combine": {
- "p50": 143.77599954605103,
- "p90": 157.21599757671356,
- "p95": 162.27200627326965,
- "p99": 175.64800381660461
- },
- "roundtrip": {
- "p50": 265.1199996471405,
- "p90": 283.90398621559143,
- "p95": 289.0239953994751,
- "p99": 302.0159900188446
- },
- "isolatedSum": {
- "p50": 289.3120050430298,
- "p90": 319.5199966430664,
- "p95": 332.67199993133545,
- "p99": 360.28800904750824
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 154570752,
- "combineLogicalBytes": 154570752,
- "fanoutMean": 5.2646484375,
- "recvTokensMax": 1391,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 205.4399996995926,
- "p90": 224.0000069141388,
- "p95": 232.06399381160736,
- "p99": 262.719988822937
- },
- "combine": {
- "p50": 225.0880002975464,
- "p90": 243.96799504756927,
- "p95": 250.0160038471222,
- "p99": 335.55200695991516
- },
- "roundtrip": {
- "p50": 403.55199575424194,
- "p90": 432.8959882259369,
- "p95": 447.1360146999359,
- "p99": 589.6000266075134
- },
- "isolatedSum": {
- "p50": 430.527999997139,
- "p90": 467.96800196170807,
- "p95": 482.07999765872955,
- "p99": 598.2719957828522
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 309772288,
- "combineLogicalBytes": 309772288,
- "fanoutMean": 5.275390625,
- "recvTokensMax": 2754,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 312.79999017715454,
- "p90": 334.7199857234955,
- "p95": 340.1919901371002,
- "p99": 368.73599886894226
- },
- "combine": {
- "p50": 356.1280071735382,
- "p90": 367.45598912239075,
- "p95": 372.6719915866852,
- "p99": 395.77600359916687
- },
- "roundtrip": {
- "p50": 643.1999802589417,
- "p90": 657.3759913444519,
- "p95": 663.7439727783203,
- "p99": 708.1599831581116
- },
- "isolatedSum": {
- "p50": 668.9279973506927,
- "p90": 702.1759748458862,
- "p95": 712.8639817237854,
- "p99": 764.5120024681091
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 619501568,
- "combineLogicalBytes": 619501568,
- "fanoutMean": 5.2750244140625,
- "recvTokensMax": 5469,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 523.2639908790588,
- "p90": 541.9520139694214,
- "p95": 552.2559881210327,
- "p99": 611.3280057907104
- },
- "combine": {
- "p50": 611.0079884529114,
- "p90": 623.0080127716064,
- "p95": 630.3359866142273,
- "p99": 657.2480201721191
- },
- "roundtrip": {
- "p50": 1108.7679862976074,
- "p90": 1123.9999532699585,
- "p95": 1132.3200464248657,
- "p99": 1233.63196849823
- },
- "isolatedSum": {
- "p50": 1134.2719793319702,
- "p90": 1164.9600267410278,
- "p95": 1182.59197473526,
- "p99": 1268.5760259628296
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1239375872,
- "combineLogicalBytes": 1239375872,
- "fanoutMean": 5.276611328125,
- "recvTokensMax": 10883,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 984.5119714736938,
- "p90": 1019.4560289382935,
- "p95": 1036.128044128418,
- "p99": 1103.0399799346924
- },
- "combine": {
- "p50": 1114.6559715270996,
- "p90": 1129.472017288208,
- "p95": 1136.896014213562,
- "p99": 1180.3200244903564
- },
- "roundtrip": {
- "p50": 2057.408094406128,
- "p90": 2091.423988342285,
- "p95": 2103.264093399048,
- "p99": 2406.8479537963867
- },
- "isolatedSum": {
- "p50": 2099.1679430007935,
- "p90": 2148.9280462265015,
- "p95": 2173.02405834198,
- "p99": 2283.360004425049
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2479669248,
- "combineLogicalBytes": 2479669248,
- "fanoutMean": 5.278564453125,
- "recvTokensMax": 21730,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-ad612267",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39",
- "colorKey": "h200_b5c683eb",
- "comparisonKey": "b18bebc70bf6167d",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:03:03.036669+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_13",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · balanced",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "balanced",
- "routingLabel": "balanced",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "0a3064a2af0dd39",
- "workloadId": "set:6:2dad1a73ff872905",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272035224",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272035224",
- "createdAt": "2026-06-27T00:03:03.036669+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 132.60799646377563,
- "p90": 151.8400013446808,
- "p95": 157.3760062456131,
- "p99": 181.47200345993042
- },
- "combine": {
- "p50": 125.40799379348755,
- "p90": 146.59200608730316,
- "p95": 152.73599326610565,
- "p99": 228.5439968109131
- },
- "roundtrip": {
- "p50": 230.20799458026886,
- "p90": 244.51200664043427,
- "p95": 253.4080147743225,
- "p99": 302.2719919681549
- },
- "isolatedSum": {
- "p50": 258.0159902572632,
- "p90": 298.43200743198395,
- "p95": 310.11199951171875,
- "p99": 410.0160002708435
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 117440512,
- "combineLogicalBytes": 117440512,
- "fanoutMean": 8,
- "recvTokensMax": 1024,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 175.99999904632568,
- "p90": 196.83200120925903,
- "p95": 202.2400051355362,
- "p99": 229.5680046081543
- },
- "combine": {
- "p50": 175.58400332927704,
- "p90": 189.82400000095367,
- "p95": 193.79200041294098,
- "p99": 265.5999958515167
- },
- "roundtrip": {
- "p50": 323.0719864368439,
- "p90": 339.29601311683655,
- "p95": 345.3119993209839,
- "p99": 369.4399893283844
- },
- "isolatedSum": {
- "p50": 351.5840023756027,
- "p90": 386.6560012102127,
- "p95": 396.0320055484772,
- "p99": 495.168000459671
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 234881024,
- "combineLogicalBytes": 234881024,
- "fanoutMean": 8,
- "recvTokensMax": 2048,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 275.35998821258545,
- "p90": 301.472008228302,
- "p95": 311.19999289512634,
- "p99": 359.0080142021179
- },
- "combine": {
- "p50": 268.5120105743408,
- "p90": 284.38401222229004,
- "p95": 289.3120050430298,
- "p99": 321.6319978237152
- },
- "roundtrip": {
- "p50": 519.9040174484253,
- "p90": 549.2479801177979,
- "p95": 559.6160292625427,
- "p99": 602.4960279464722
- },
- "isolatedSum": {
- "p50": 543.8719987869263,
- "p90": 585.856020450592,
- "p95": 600.5119979381561,
- "p99": 680.6400120258331
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 469762048,
- "combineLogicalBytes": 469762048,
- "fanoutMean": 8,
- "recvTokensMax": 4096,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 458.75200629234314,
- "p90": 610.4320287704468,
- "p95": 643.1999802589417,
- "p99": 663.7120246887207
- },
- "combine": {
- "p50": 451.3919949531555,
- "p90": 462.911993265152,
- "p95": 471.23199701309204,
- "p99": 480.8639883995056
- },
- "roundtrip": {
- "p50": 882.0160031318665,
- "p90": 899.4879722595215,
- "p95": 906.6879749298096,
- "p99": 926.688015460968
- },
- "isolatedSum": {
- "p50": 910.1440012454987,
- "p90": 1073.3440220355988,
- "p95": 1114.4319772720337,
- "p99": 1144.5760130882263
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 939524096,
- "combineLogicalBytes": 939524096,
- "fanoutMean": 8,
- "recvTokensMax": 8192,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 819.5520043373108,
- "p90": 837.664008140564,
- "p95": 856.3200235366821,
- "p99": 920.5440282821655
- },
- "combine": {
- "p50": 816.6080117225647,
- "p90": 834.879994392395,
- "p95": 846.9439744949341,
- "p99": 919.264018535614
- },
- "roundtrip": {
- "p50": 1605.247974395752,
- "p90": 1634.1760158538818,
- "p95": 1654.9760103225708,
- "p99": 1745.8560466766357
- },
- "isolatedSum": {
- "p50": 1636.1600160598755,
- "p90": 1672.544002532959,
- "p95": 1703.2639980316162,
- "p99": 1839.8080468177795
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1879048192,
- "combineLogicalBytes": 1879048192,
- "fanoutMean": 8,
- "recvTokensMax": 16384,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1564.7679567337036,
- "p90": 1586.0799551010132,
- "p95": 1601.8879413604736,
- "p99": 1723.0720520019531
- },
- "combine": {
- "p50": 1521.9199657440186,
- "p90": 1538.7840270996094,
- "p95": 1547.104001045227,
- "p99": 1626.911997795105
- },
- "roundtrip": {
- "p50": 3057.663917541504,
- "p90": 3078.3679485321045,
- "p95": 3098.1760025024414,
- "p99": 3246.783971786499
- },
- "isolatedSum": {
- "p50": 3086.687922477722,
- "p90": 3124.8639822006226,
- "p95": 3148.9919424057007,
- "p99": 3349.984049797058
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3758096384,
- "combineLogicalBytes": 3758096384,
- "fanoutMean": 8,
- "recvTokensMax": 32768,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-30f874f3",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||9e6ac678a09f7f8",
- "colorKey": "h200_b5c683eb",
- "comparisonKey": "b18bebc70bf6167d",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:56:38.753854+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_11",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · balanced",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "balanced",
- "routingLabel": "balanced",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "9e6ac678a09f7f8",
- "workloadId": "set:3:2dad1a73ff872905",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271834221",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271834221",
- "createdAt": "2026-06-26T23:56:38.753854+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 131.99999928474426,
- "p90": 147.74399995803833,
- "p95": 155.68000078201294,
- "p99": 168.7680035829544
- },
- "combine": {
- "p50": 126.01600587368011,
- "p90": 139.74399864673615,
- "p95": 146.08000218868256,
- "p99": 156.73600137233734
- },
- "roundtrip": {
- "p50": 229.8559993505478,
- "p90": 251.583993434906,
- "p95": 260.0319981575012,
- "p99": 275.07200837135315
- },
- "isolatedSum": {
- "p50": 258.0160051584244,
- "p90": 287.4879986047745,
- "p95": 301.7600029706955,
- "p99": 325.50400495529175
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 117440512,
- "combineLogicalBytes": 117440512,
- "fanoutMean": 8,
- "recvTokensMax": 1024,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 270.7520127296448,
- "p90": 292.89600253105164,
- "p95": 304.9600124359131,
- "p99": 352.6400029659271
- },
- "combine": {
- "p50": 268.5759961605072,
- "p90": 281.76000714302063,
- "p95": 287.200003862381,
- "p99": 301.31199955940247
- },
- "roundtrip": {
- "p50": 514.4960284233093,
- "p90": 532.7360033988953,
- "p95": 542.1119928359985,
- "p99": 571.615993976593
- },
- "isolatedSum": {
- "p50": 539.328008890152,
- "p90": 574.6560096740723,
- "p95": 592.1600162982941,
- "p99": 653.9520025253296
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 469762048,
- "combineLogicalBytes": 469762048,
- "fanoutMean": 8,
- "recvTokensMax": 4096,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 822.0800161361694,
- "p90": 845.1840281486511,
- "p95": 862.5919818878174,
- "p99": 1313.3759498596191
- },
- "combine": {
- "p50": 820.032000541687,
- "p90": 837.7919793128967,
- "p95": 846.3680148124695,
- "p99": 873.3440041542053
- },
- "roundtrip": {
- "p50": 1605.9520244598389,
- "p90": 1629.3439865112305,
- "p95": 1645.1200246810913,
- "p99": 1737.1840476989746
- },
- "isolatedSum": {
- "p50": 1642.1120166778564,
- "p90": 1682.9760074615479,
- "p95": 1708.9599967002869,
- "p99": 2186.7199540138245
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1879048192,
- "combineLogicalBytes": 1879048192,
- "fanoutMean": 8,
- "recvTokensMax": 16384,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-a2c76343",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9",
- "colorKey": "h200_d0dfa19a",
- "comparisonKey": "4ade4ca52869383d",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:56:42.077253+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_7",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · balanced-rank-local",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "balanced-rank-local",
- "routingLabel": "balanced-rank-local",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "7aa44c7b86748b9",
- "workloadId": "set:3:388ff74baef05c72",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271841288",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271841288",
- "createdAt": "2026-06-26T23:56:42.077253+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 85.08799970149994,
- "p90": 109.40799862146378,
- "p95": 117.47200042009354,
- "p99": 164.38399255275726
- },
- "combine": {
- "p50": 71.45600020885468,
- "p90": 86.496002972126,
- "p95": 91.26400202512741,
- "p99": 106.20799660682678
- },
- "roundtrip": {
- "p50": 132.51200318336487,
- "p90": 162.7199947834015,
- "p95": 173.8560050725937,
- "p99": 221.5680032968521
- },
- "isolatedSum": {
- "p50": 156.54399991035461,
- "p90": 195.90400159358978,
- "p95": 208.73600244522095,
- "p99": 270.59198915958405
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 14680064,
- "combineLogicalBytes": 14680064,
- "fanoutMean": 1,
- "recvTokensMax": 128,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 111.00800335407257,
- "p90": 130.43199479579926,
- "p95": 136.80000603199005,
- "p99": 165.75999557971954
- },
- "combine": {
- "p50": 118.1119978427887,
- "p90": 134.62400436401367,
- "p95": 139.67999815940857,
- "p99": 149.6639996767044
- },
- "roundtrip": {
- "p50": 202.30400562286377,
- "p90": 223.83999824523926,
- "p95": 241.85599386692047,
- "p99": 296.25600576400757
- },
- "isolatedSum": {
- "p50": 229.12000119686127,
- "p90": 265.0559991598129,
- "p95": 276.4800041913986,
- "p99": 315.42399525642395
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 58720256,
- "combineLogicalBytes": 58720256,
- "fanoutMean": 1,
- "recvTokensMax": 512,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 189.63199853897095,
- "p90": 209.72800254821777,
- "p95": 218.4319943189621,
- "p99": 254.14401292800903
- },
- "combine": {
- "p50": 284.960001707077,
- "p90": 298.7520098686218,
- "p95": 303.2959997653961,
- "p99": 331.9999873638153
- },
- "roundtrip": {
- "p50": 447.3919868469238,
- "p90": 475.42399168014526,
- "p95": 484.8639965057373,
- "p99": 529.9519896507263
- },
- "isolatedSum": {
- "p50": 474.592000246048,
- "p90": 508.4800124168396,
- "p95": 521.7279940843582,
- "p99": 586.1440002918243
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 234881024,
- "combineLogicalBytes": 234881024,
- "fanoutMean": 1,
- "recvTokensMax": 2048,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-79209ba6",
- "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71",
- "colorKey": "h200_06544e53",
- "comparisonKey": "822fd37c7222ef9b",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:03:05.638717+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_8",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · balanced+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "balanced",
- "routingLabel": "balanced+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "df54a9510825f71",
- "workloadId": "set:6:2dad1a73ff872905",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 1,
- "eplbImbalanceAfter": 1,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272038593",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272038593",
- "createdAt": "2026-06-27T00:03:05.638717+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 95.83999961614609,
- "p90": 112.73600161075592,
- "p95": 121.15199863910675,
- "p99": 153.4080058336258
- },
- "combine": {
- "p50": 83.99999886751175,
- "p90": 97.6639986038208,
- "p95": 104.22399640083313,
- "p99": 116.89600348472595
- },
- "roundtrip": {
- "p50": 154.1759967803955,
- "p90": 176.32000148296356,
- "p95": 183.45600366592407,
- "p99": 211.29600703716278
- },
- "isolatedSum": {
- "p50": 179.83999848365784,
- "p90": 210.40000021457672,
- "p95": 225.37599503993988,
- "p99": 270.30400931835175
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 29360128,
- "combineLogicalBytes": 29360128,
- "fanoutMean": 2,
- "recvTokensMax": 384,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 115.55200070142746,
- "p90": 135.68000495433807,
- "p95": 143.39199662208557,
- "p99": 163.26400637626648
- },
- "combine": {
- "p50": 103.35999727249146,
- "p90": 120.03199756145477,
- "p95": 127.32799351215363,
- "p99": 154.4319987297058
- },
- "roundtrip": {
- "p50": 197.2160041332245,
- "p90": 215.58399498462677,
- "p95": 223.26399385929108,
- "p99": 242.5599992275238
- },
- "isolatedSum": {
- "p50": 218.91199797391891,
- "p90": 255.71200251579285,
- "p95": 270.7199901342392,
- "p99": 317.6960051059723
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 58720256,
- "combineLogicalBytes": 58720256,
- "fanoutMean": 2,
- "recvTokensMax": 768,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 144.6080058813095,
- "p90": 169.0559983253479,
- "p95": 176.7680048942566,
- "p99": 208.064004778862
- },
- "combine": {
- "p50": 140.47999680042267,
- "p90": 155.74400126934052,
- "p95": 161.6639941930771,
- "p99": 184.1920018196106
- },
- "roundtrip": {
- "p50": 262.9440128803253,
- "p90": 282.24000334739685,
- "p95": 290.6560003757477,
- "p99": 320.0640082359314
- },
- "isolatedSum": {
- "p50": 285.0880026817322,
- "p90": 324.7999995946884,
- "p95": 338.4319990873337,
- "p99": 392.2560065984726
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 117440512,
- "combineLogicalBytes": 117440512,
- "fanoutMean": 2,
- "recvTokensMax": 1536,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 205.4080069065094,
- "p90": 229.21599447727203,
- "p95": 238.49600553512573,
- "p99": 255.67999482154846
- },
- "combine": {
- "p50": 214.7199958562851,
- "p90": 231.90400004386902,
- "p95": 236.86400055885315,
- "p99": 255.64798712730408
- },
- "roundtrip": {
- "p50": 399.4239866733551,
- "p90": 420.48001289367676,
- "p95": 429.6000003814697,
- "p99": 593.7280058860779
- },
- "isolatedSum": {
- "p50": 420.1280027627945,
- "p90": 461.11999452114105,
- "p95": 475.3600060939789,
- "p99": 511.32798194885254
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 234881024,
- "combineLogicalBytes": 234881024,
- "fanoutMean": 2,
- "recvTokensMax": 3072,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 331.743985414505,
- "p90": 350.23999214172363,
- "p95": 361.08800768852234,
- "p99": 406.0479998588562
- },
- "combine": {
- "p50": 360.54399609565735,
- "p90": 375.90399384498596,
- "p95": 382.78400897979736,
- "p99": 404.4159948825836
- },
- "roundtrip": {
- "p50": 664.0639901161194,
- "p90": 679.9039840698242,
- "p95": 693.5679912567139,
- "p99": 743.1359887123108
- },
- "isolatedSum": {
- "p50": 692.2879815101624,
- "p90": 726.1439859867096,
- "p95": 743.8720166683197,
- "p99": 810.4639947414398
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 469762048,
- "combineLogicalBytes": 469762048,
- "fanoutMean": 2,
- "recvTokensMax": 6144,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 580.5439949035645,
- "p90": 593.3759808540344,
- "p95": 602.3679971694946,
- "p99": 637.6640200614929
- },
- "combine": {
- "p50": 628.3519864082336,
- "p90": 641.1839723587036,
- "p95": 648.3839750289917,
- "p99": 680.9279918670654
- },
- "roundtrip": {
- "p50": 1181.7920207977295,
- "p90": 1199.295997619629,
- "p95": 1210.3359699249268,
- "p99": 1255.4240226745605
- },
- "isolatedSum": {
- "p50": 1208.895981311798,
- "p90": 1234.559953212738,
- "p95": 1250.7519721984863,
- "p99": 1318.5920119285583
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 939524096,
- "combineLogicalBytes": 939524096,
- "fanoutMean": 2,
- "recvTokensMax": 12288,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-c14326f0",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||38fd0bcf7109c32",
- "colorKey": "h200_189562cd",
- "comparisonKey": "b9475bb176588857",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:57:32.803411+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_13",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · hotspot-single",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "hotspot-single",
- "routingLabel": "hotspot-single",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "38fd0bcf7109c32",
- "workloadId": "set:3:b952d4a43d688b50",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271862413",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271862413",
- "createdAt": "2026-06-26T23:57:32.803411+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 125.11999905109406,
- "p90": 140.00000059604645,
- "p95": 149.88799393177032,
- "p99": 171.23199999332428
- },
- "combine": {
- "p50": 118.65600198507309,
- "p90": 132.64000415802002,
- "p95": 137.60000467300415,
- "p99": 164.95999693870544
- },
- "roundtrip": {
- "p50": 221.5680032968521,
- "p90": 238.14399540424347,
- "p95": 251.71199440956116,
- "p99": 291.6480004787445
- },
- "isolatedSum": {
- "p50": 243.77600103616714,
- "p90": 272.64000475406647,
- "p95": 287.4879986047745,
- "p99": 336.1919969320297
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 78102528,
- "combineLogicalBytes": 78102528,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 1024,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 245.02399563789368,
- "p90": 257.31199979782104,
- "p95": 265.8880054950714,
- "p99": 298.72000217437744
- },
- "combine": {
- "p50": 263.68001103401184,
- "p90": 275.32801032066345,
- "p95": 281.9199860095978,
- "p99": 299.1679906845093
- },
- "roundtrip": {
- "p50": 481.9839894771576,
- "p90": 495.6800043582916,
- "p95": 506.1759948730469,
- "p99": 808.3199858665466
- },
- "isolatedSum": {
- "p50": 508.7040066719055,
- "p90": 532.6400101184845,
- "p95": 547.8079915046692,
- "p99": 597.8879928588867
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 311091200,
- "combineLogicalBytes": 311091200,
- "fanoutMean": 5.2978515625,
- "recvTokensMax": 4096,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 725.1200079917908,
- "p90": 748.0959892272949,
- "p95": 763.0079984664917,
- "p99": 812.0959997177124
- },
- "combine": {
- "p50": 799.3280291557312,
- "p90": 813.9839768409729,
- "p95": 823.5200047492981,
- "p99": 875.6160140037537
- },
- "roundtrip": {
- "p50": 1494.3679571151733,
- "p90": 1516.1919593811035,
- "p95": 1528.2560586929321,
- "p99": 1709.8560333251953
- },
- "isolatedSum": {
- "p50": 1524.448037147522,
- "p90": 1562.0799660682678,
- "p95": 1586.5280032157898,
- "p99": 1687.712013721466
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1241511936,
- "combineLogicalBytes": 1241511936,
- "fanoutMean": 5.28570556640625,
- "recvTokensMax": 16384,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-17171887",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c",
- "colorKey": "h200_189562cd",
- "comparisonKey": "b9475bb176588857",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:05:28.346517+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_4",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · hotspot-single",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "hotspot-single",
- "routingLabel": "hotspot-single",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "bfbb64a166e9f1c",
- "workloadId": "set:6:b952d4a43d688b50",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272106904",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272106904",
- "createdAt": "2026-06-27T00:05:28.346517+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 124.41600114107132,
- "p90": 159.9999964237213,
- "p95": 167.39200055599213,
- "p99": 211.42399311065674
- },
- "combine": {
- "p50": 118.01599711179733,
- "p90": 146.5280055999756,
- "p95": 150.27199685573578,
- "p99": 162.9759967327118
- },
- "roundtrip": {
- "p50": 220.2560007572174,
- "p90": 253.91998887062073,
- "p95": 258.432000875473,
- "p99": 271.42399549484253
- },
- "isolatedSum": {
- "p50": 242.43199825286865,
- "p90": 306.5280020236969,
- "p95": 317.6639974117279,
- "p99": 374.39998984336853
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 78102528,
- "combineLogicalBytes": 78102528,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 1024,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 162.432000041008,
- "p90": 170.6240028142929,
- "p95": 175.04000663757324,
- "p99": 188.38399648666382
- },
- "combine": {
- "p50": 165.0879979133606,
- "p90": 175.7120043039322,
- "p95": 179.83999848365784,
- "p99": 191.77600741386414
- },
- "roundtrip": {
- "p50": 301.66399478912354,
- "p90": 317.3759877681732,
- "p95": 322.6880133152008,
- "p99": 333.69600772857666
- },
- "isolatedSum": {
- "p50": 327.5199979543686,
- "p90": 346.3360071182251,
- "p95": 354.8800051212311,
- "p99": 380.16000390052795
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 156090368,
- "combineLogicalBytes": 156090368,
- "fanoutMean": 5.31640625,
- "recvTokensMax": 2048,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 238.97600173950195,
- "p90": 247.0400035381317,
- "p95": 250.0160038471222,
- "p99": 259.39199328422546
- },
- "combine": {
- "p50": 261.9200050830841,
- "p90": 275.2000093460083,
- "p95": 279.58399057388306,
- "p99": 300.4480004310608
- },
- "roundtrip": {
- "p50": 482.33601450920105,
- "p90": 499.1999864578247,
- "p95": 507.3919892311096,
- "p99": 570.527970790863
- },
- "isolatedSum": {
- "p50": 500.89600682258606,
- "p90": 522.24001288414,
- "p95": 529.5999944210052,
- "p99": 559.8399937152863
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 311091200,
- "combineLogicalBytes": 311091200,
- "fanoutMean": 5.2978515625,
- "recvTokensMax": 4096,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 392.8639888763428,
- "p90": 402.72000432014465,
- "p95": 406.23998641967773,
- "p99": 445.3760087490082
- },
- "combine": {
- "p50": 443.1680142879486,
- "p90": 455.80801367759705,
- "p95": 461.5040123462677,
- "p99": 481.53600096702576
- },
- "roundtrip": {
- "p50": 817.5680041313171,
- "p90": 835.2320194244385,
- "p95": 845.3760147094727,
- "p99": 893.887996673584
- },
- "isolatedSum": {
- "p50": 836.0320031642914,
- "p90": 858.5280179977417,
- "p95": 867.7439987659454,
- "p99": 926.9120097160339
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 620648448,
- "combineLogicalBytes": 620648448,
- "fanoutMean": 5.2847900390625,
- "recvTokensMax": 8192,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 718.783974647522,
- "p90": 730.3680181503296,
- "p95": 737.280011177063,
- "p99": 808.1920146942139
- },
- "combine": {
- "p50": 797.4399924278259,
- "p90": 810.8800053596497,
- "p95": 820.032000541687,
- "p99": 849.3760228157043
- },
- "roundtrip": {
- "p50": 1490.3680086135864,
- "p90": 1507.5839757919312,
- "p95": 1519.2960500717163,
- "p99": 1630.944013595581
- },
- "isolatedSum": {
- "p50": 1516.223967075348,
- "p90": 1541.2480235099792,
- "p95": 1557.31201171875,
- "p99": 1657.5680375099182
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1241511936,
- "combineLogicalBytes": 1241511936,
- "fanoutMean": 5.28570556640625,
- "recvTokensMax": 16384,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1386.0160112380981,
- "p90": 1401.0560512542725,
- "p95": 1406.1440229415894,
- "p99": 1621.7279434204102
- },
- "combine": {
- "p50": 1483.199954032898,
- "p90": 1497.5999593734741,
- "p95": 1502.17604637146,
- "p99": 1538.0480289459229
- },
- "roundtrip": {
- "p50": 2845.855951309204,
- "p90": 2863.840103149414,
- "p95": 2879.647970199585,
- "p99": 3068.063974380493
- },
- "isolatedSum": {
- "p50": 2869.215965270996,
- "p90": 2898.6560106277466,
- "p95": 2908.3200693130493,
- "p99": 3159.775972366333
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2484242432,
- "combineLogicalBytes": 2484242432,
- "fanoutMean": 5.288299560546875,
- "recvTokensMax": 32768,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-f354b9c6",
- "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8",
- "colorKey": "h200_80a72891",
- "comparisonKey": "52b3ac7f405659bf",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:05:25.966329+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_7",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · hotspot-single+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "hotspot-single",
- "routingLabel": "hotspot-single+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "29ae5ace13636f8",
- "workloadId": "set:6:b952d4a43d688b50",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 1.8466796875,
- "eplbImbalanceAfter": 1.0002700343276514,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272110404",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272110404",
- "createdAt": "2026-06-27T00:05:25.966329+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 117.24799871444702,
- "p90": 134.17600095272064,
- "p95": 140.25600254535675,
- "p99": 158.84800255298615
- },
- "combine": {
- "p50": 107.68000036478043,
- "p90": 119.39200013875961,
- "p95": 123.99999797344208,
- "p99": 129.82399761676788
- },
- "roundtrip": {
- "p50": 196.60800695419312,
- "p90": 215.16799926757812,
- "p95": 223.07200729846954,
- "p99": 271.232008934021
- },
- "isolatedSum": {
- "p50": 224.92799907922745,
- "p90": 253.56800109148026,
- "p95": 264.2560005187988,
- "p99": 288.672000169754
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77701120,
- "combineLogicalBytes": 77701120,
- "fanoutMean": 5.29296875,
- "recvTokensMax": 697,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 145.9520012140274,
- "p90": 165.69599509239197,
- "p95": 176.35199427604675,
- "p99": 214.49600160121918
- },
- "combine": {
- "p50": 143.61600577831268,
- "p90": 153.28000485897064,
- "p95": 157.3439985513687,
- "p99": 169.91999745368958
- },
- "roundtrip": {
- "p50": 263.7439966201782,
- "p90": 279.1680097579956,
- "p95": 287.07200288772583,
- "p99": 316.0960078239441
- },
- "isolatedSum": {
- "p50": 289.5680069923401,
- "p90": 318.9759999513626,
- "p95": 333.69599282741547,
- "p99": 384.41599905490875
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 155187200,
- "combineLogicalBytes": 155187200,
- "fanoutMean": 5.28564453125,
- "recvTokensMax": 1372,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 204.96000349521637,
- "p90": 222.81600534915924,
- "p95": 232.1919947862625,
- "p99": 259.552001953125
- },
- "combine": {
- "p50": 222.4320024251938,
- "p90": 239.51999843120575,
- "p95": 245.2480047941208,
- "p99": 269.3760097026825
- },
- "roundtrip": {
- "p50": 400.83199739456177,
- "p90": 421.7279851436615,
- "p95": 431.3279986381531,
- "p99": 482.14399814605713
- },
- "isolatedSum": {
- "p50": 427.39200592041016,
- "p90": 462.336003780365,
- "p95": 477.4399995803833,
- "p99": 528.9280116558075
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 311162880,
- "combineLogicalBytes": 311162880,
- "fanoutMean": 5.299072265625,
- "recvTokensMax": 2761,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 313.76001238822937,
- "p90": 342.24000573158264,
- "p95": 361.11998558044434,
- "p99": 480.3520143032074
- },
- "combine": {
- "p50": 359.20000076293945,
- "p90": 373.79199266433716,
- "p95": 381.9519877433777,
- "p99": 407.77599811553955
- },
- "roundtrip": {
- "p50": 644.2880034446716,
- "p90": 664.1600131988525,
- "p95": 676.4799952507019,
- "p99": 748.8639950752258
- },
- "isolatedSum": {
- "p50": 672.9600131511688,
- "p90": 716.0319983959198,
- "p95": 743.071973323822,
- "p99": 888.128012418747
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 619974656,
- "combineLogicalBytes": 619974656,
- "fanoutMean": 5.279052734375,
- "recvTokensMax": 5481,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 538.3679866790771,
- "p90": 557.1839809417725,
- "p95": 566.0160183906555,
- "p99": 608.0639958381653
- },
- "combine": {
- "p50": 618.9759969711304,
- "p90": 630.3359866142273,
- "p95": 636.2559795379639,
- "p99": 653.5680294036865
- },
- "roundtrip": {
- "p50": 1131.2960386276245,
- "p90": 1151.263952255249,
- "p95": 1159.0080261230469,
- "p99": 1297.9520559310913
- },
- "isolatedSum": {
- "p50": 1157.3439836502075,
- "p90": 1187.5199675559998,
- "p95": 1202.2719979286194,
- "p99": 1261.6320252418518
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1240020992,
- "combineLogicalBytes": 1240020992,
- "fanoutMean": 5.27935791015625,
- "recvTokensMax": 10883,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 984.2240214347839,
- "p90": 1003.5840272903442,
- "p95": 1015.2319669723511,
- "p99": 1056.480050086975
- },
- "combine": {
- "p50": 1093.9840078353882,
- "p90": 1107.9679727554321,
- "p95": 1119.9040412902832,
- "p99": 1297.055959701538
- },
- "roundtrip": {
- "p50": 2046.5600490570068,
- "p90": 2070.3680515289307,
- "p95": 2092.5118923187256,
- "p99": 2573.024034500122
- },
- "isolatedSum": {
- "p50": 2078.208029270172,
- "p90": 2111.5520000457764,
- "p95": 2135.1360082626343,
- "p99": 2353.536009788513
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480414720,
- "combineLogicalBytes": 2480414720,
- "fanoutMean": 5.2801513671875,
- "recvTokensMax": 21702,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-db979d37",
- "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d",
- "colorKey": "h200_580d7b05",
- "comparisonKey": "b1de1efab41abbdf",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:02:37.856020+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_0",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · uniform+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "uniform",
- "routingLabel": "uniform+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "2225dbbdab9bf2d",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 1.006072998046875,
- "eplbImbalanceAfter": 1.0000152587890625,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272024348",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272024348",
- "createdAt": "2026-06-27T00:02:37.856020+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 115.87200313806534,
- "p90": 125.88800489902496,
- "p95": 131.71200454235077,
- "p99": 142.46399700641632
- },
- "combine": {
- "p50": 103.96800190210342,
- "p90": 115.48800021409988,
- "p95": 122.68800288438797,
- "p99": 204.3199986219406
- },
- "roundtrip": {
- "p50": 195.5839991569519,
- "p90": 206.65599405765533,
- "p95": 212.25599944591522,
- "p99": 236.03199422359467
- },
- "isolatedSum": {
- "p50": 219.84000504016876,
- "p90": 241.37600511312485,
- "p95": 254.40000742673874,
- "p99": 346.78399562835693
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77041664,
- "combineLogicalBytes": 77041664,
- "fanoutMean": 5.248046875,
- "recvTokensMax": 686,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 147.39200472831726,
- "p90": 165.50399363040924,
- "p95": 174.20800030231476,
- "p99": 197.11999595165253
- },
- "combine": {
- "p50": 146.7839926481247,
- "p90": 158.55999290943146,
- "p95": 162.9440039396286,
- "p99": 175.20000040531158
- },
- "roundtrip": {
- "p50": 266.7520046234131,
- "p90": 286.24001145362854,
- "p95": 293.1840121746063,
- "p99": 322.33598828315735
- },
- "isolatedSum": {
- "p50": 294.17599737644196,
- "p90": 324.0639865398407,
- "p95": 337.15200424194336,
- "p99": 372.3199963569641
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 154542080,
- "combineLogicalBytes": 154542080,
- "fanoutMean": 5.263671875,
- "recvTokensMax": 1365,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 203.74399423599243,
- "p90": 221.11999988555908,
- "p95": 229.95199263095856,
- "p99": 253.08799743652344
- },
- "combine": {
- "p50": 222.52799570560455,
- "p90": 234.72000658512115,
- "p95": 238.24000358581543,
- "p99": 259.3280076980591
- },
- "roundtrip": {
- "p50": 398.17601442337036,
- "p90": 415.74400663375854,
- "p95": 422.04800248146057,
- "p99": 459.26401019096375
- },
- "isolatedSum": {
- "p50": 426.271989941597,
- "p90": 455.84000647068024,
- "p95": 468.191996216774,
- "p99": 512.4160051345825
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 310589440,
- "combineLogicalBytes": 310589440,
- "fanoutMean": 5.289306640625,
- "recvTokensMax": 2746,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 310.36800146102905,
- "p90": 327.93599367141724,
- "p95": 333.72798562049866,
- "p99": 371.8079924583435
- },
- "combine": {
- "p50": 355.9679985046387,
- "p90": 369.4719970226288,
- "p95": 383.07198882102966,
- "p99": 431.4880073070526
- },
- "roundtrip": {
- "p50": 641.9199705123901,
- "p90": 660.9920263290405,
- "p95": 668.9280271530151,
- "p99": 718.9760208129883
- },
- "isolatedSum": {
- "p50": 666.3359999656677,
- "p90": 697.407990694046,
- "p95": 716.7999744415283,
- "p99": 803.2959997653961
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 619171840,
- "combineLogicalBytes": 619171840,
- "fanoutMean": 5.272216796875,
- "recvTokensMax": 5467,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 537.1519923210144,
- "p90": 553.5680055618286,
- "p95": 562.6559853553772,
- "p99": 586.9759917259216
- },
- "combine": {
- "p50": 612.1600270271301,
- "p90": 625.0240206718445,
- "p95": 633.8880062103271,
- "p99": 660.863995552063
- },
- "roundtrip": {
- "p50": 1119.968056678772,
- "p90": 1136.064052581787,
- "p95": 1145.2480554580688,
- "p99": 1263.4880542755127
- },
- "isolatedSum": {
- "p50": 1149.3120193481445,
- "p90": 1178.592026233673,
- "p95": 1196.5439915657043,
- "p99": 1247.8399872779846
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1238945792,
- "combineLogicalBytes": 1238945792,
- "fanoutMean": 5.2747802734375,
- "recvTokensMax": 10913,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1003.8080215454102,
- "p90": 1027.008056640625,
- "p95": 1034.432053565979,
- "p99": 1060.1919889450073
- },
- "combine": {
- "p50": 1111.0399961471558,
- "p90": 1125.8879899978638,
- "p95": 1135.3280544281006,
- "p99": 1165.727972984314
- },
- "roundtrip": {
- "p50": 2077.5039196014404,
- "p90": 2101.6640663146973,
- "p95": 2114.016056060791,
- "p99": 2324.8000144958496
- },
- "isolatedSum": {
- "p50": 2114.848017692566,
- "p90": 2152.8960466384888,
- "p95": 2169.7601079940796,
- "p99": 2225.9199619293213
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2481747968,
- "combineLogicalBytes": 2481747968,
- "fanoutMean": 5.282989501953125,
- "recvTokensMax": 21789,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-59b7e35e",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||4caecd33bedf786",
- "colorKey": "h200_b6aa6110",
- "comparisonKey": "b89b8b0279afe699",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:56:59.891356+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_2",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · zipf",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "zipf",
- "routingLabel": "zipf",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "4caecd33bedf786",
- "workloadId": "set:3:830e36e88869e222",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271848591",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271848591",
- "createdAt": "2026-06-26T23:56:59.891356+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 122.46400117874146,
- "p90": 133.85599851608276,
- "p95": 141.15199446678162,
- "p99": 168.12799870967865
- },
- "combine": {
- "p50": 112.5440001487732,
- "p90": 125.791996717453,
- "p95": 132.1599930524826,
- "p99": 143.327996134758
- },
- "roundtrip": {
- "p50": 215.7440036535263,
- "p90": 240.03200232982635,
- "p95": 247.13599681854248,
- "p99": 281.5360128879547
- },
- "isolatedSum": {
- "p50": 235.00800132751465,
- "p90": 259.64799523353577,
- "p95": 273.3119875192642,
- "p99": 311.45599484443665
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 49946624,
- "combineLogicalBytes": 49946624,
- "fanoutMean": 3.40234375,
- "recvTokensMax": 1022,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 238.0480021238327,
- "p90": 249.439999461174,
- "p95": 253.34399938583374,
- "p99": 271.39198780059814
- },
- "combine": {
- "p50": 259.3280076980591,
- "p90": 273.6639976501465,
- "p95": 278.1440019607544,
- "p99": 748.5759854316711
- },
- "roundtrip": {
- "p50": 472.7039933204651,
- "p90": 492.76798963546753,
- "p95": 497.5360035896301,
- "p99": 524.8640179634094
- },
- "isolatedSum": {
- "p50": 497.3760098218918,
- "p90": 523.1039971113205,
- "p95": 531.4880013465881,
- "p99": 1019.9679732322693
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 201678848,
- "combineLogicalBytes": 201678848,
- "fanoutMean": 3.4345703125,
- "recvTokensMax": 4094,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 671.0079908370972,
- "p90": 682.7840209007263,
- "p95": 691.6159987449646,
- "p99": 782.4000120162964
- },
- "combine": {
- "p50": 788.0319952964783,
- "p90": 803.0400276184082,
- "p95": 810.4000091552734,
- "p99": 879.2639970779419
- },
- "roundtrip": {
- "p50": 1432.5439929962158,
- "p90": 1457.2800397872925,
- "p95": 1470.2719449996948,
- "p99": 1641.3120031356812
- },
- "isolatedSum": {
- "p50": 1459.0399861335754,
- "p90": 1485.8240485191345,
- "p95": 1502.016007900238,
- "p99": 1661.6640090942383
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 808822784,
- "combineLogicalBytes": 808822784,
- "fanoutMean": 3.44354248046875,
- "recvTokensMax": 16380,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-520b6c38",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86",
- "colorKey": "h200_b6aa6110",
- "comparisonKey": "b89b8b0279afe699",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:03:30.997265+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_9",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · zipf",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "zipf",
- "routingLabel": "zipf",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "b5217e990b95f86",
- "workloadId": "set:6:830e36e88869e222",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272049186",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272049186",
- "createdAt": "2026-06-27T00:03:30.997265+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 122.14399874210358,
- "p90": 134.14399325847626,
- "p95": 146.01600170135498,
- "p99": 162.62400150299072
- },
- "combine": {
- "p50": 112.92800307273865,
- "p90": 121.11999839544296,
- "p95": 126.68800354003906,
- "p99": 141.50400459766388
- },
- "roundtrip": {
- "p50": 214.30400013923645,
- "p90": 228.28799486160278,
- "p95": 232.57599771022797,
- "p99": 247.48800694942474
- },
- "isolatedSum": {
- "p50": 235.07200181484222,
- "p90": 255.26399165391922,
- "p95": 272.70400524139404,
- "p99": 304.1280061006546
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 49946624,
- "combineLogicalBytes": 49946624,
- "fanoutMean": 3.40234375,
- "recvTokensMax": 1022,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 170.43200135231018,
- "p90": 187.71199882030487,
- "p95": 196.0960030555725,
- "p99": 223.00800681114197
- },
- "combine": {
- "p50": 163.87200355529785,
- "p90": 181.60000443458557,
- "p95": 186.36800348758698,
- "p99": 197.02400267124176
- },
- "roundtrip": {
- "p50": 303.8400113582611,
- "p90": 328.000009059906,
- "p95": 333.0560028553009,
- "p99": 366.2079870700836
- },
- "isolatedSum": {
- "p50": 334.30400490760803,
- "p90": 369.31200325489044,
- "p95": 382.4640065431595,
- "p99": 420.0320094823837
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 100509696,
- "combineLogicalBytes": 100509696,
- "fanoutMean": 3.42333984375,
- "recvTokensMax": 2046,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 235.83999276161194,
- "p90": 244.6720004081726,
- "p95": 248.86399507522583,
- "p99": 265.4080092906952
- },
- "combine": {
- "p50": 259.90399718284607,
- "p90": 269.6639895439148,
- "p95": 276.06400847435,
- "p99": 299.0399897098541
- },
- "roundtrip": {
- "p50": 473.471999168396,
- "p90": 492.12801456451416,
- "p95": 498.3679950237274,
- "p99": 528.544008731842
- },
- "isolatedSum": {
- "p50": 495.743989944458,
- "p90": 514.3359899520874,
- "p95": 524.9280035495758,
- "p99": 564.4479990005493
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 201678848,
- "combineLogicalBytes": 201678848,
- "fanoutMean": 3.4345703125,
- "recvTokensMax": 4094,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 381.087988615036,
- "p90": 397.47199416160583,
- "p95": 404.35200929641724,
- "p99": 493.4079945087433
- },
- "combine": {
- "p50": 437.27999925613403,
- "p90": 450.8799910545349,
- "p95": 458.3039879798889,
- "p99": 476.25601291656494
- },
- "roundtrip": {
- "p50": 790.5600070953369,
- "p90": 804.9920201301575,
- "p95": 813.9200210571289,
- "p99": 841.5359854698181
- },
- "isolatedSum": {
- "p50": 818.36798787117,
- "p90": 848.3519852161407,
- "p95": 862.6559972763062,
- "p99": 969.6640074253082
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 405035008,
- "combineLogicalBytes": 405035008,
- "fanoutMean": 3.4488525390625,
- "recvTokensMax": 8189,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 672.1280217170715,
- "p90": 685.0879788398743,
- "p95": 689.9200081825256,
- "p99": 743.4560060501099
- },
- "combine": {
- "p50": 783.1360101699829,
- "p90": 793.0560111999512,
- "p95": 796.6399788856506,
- "p99": 806.5599799156189
- },
- "roundtrip": {
- "p50": 1425.7919788360596,
- "p90": 1442.0160055160522,
- "p95": 1455.4879665374756,
- "p99": 1550.75204372406
- },
- "isolatedSum": {
- "p50": 1455.2640318870544,
- "p90": 1478.1439900398254,
- "p95": 1486.5599870681763,
- "p99": 1550.0159859657288
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 808822784,
- "combineLogicalBytes": 808822784,
- "fanoutMean": 3.44354248046875,
- "recvTokensMax": 16380,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1269.1839933395386,
- "p90": 1284.1919660568237,
- "p95": 1291.8720245361328,
- "p99": 1339.2640352249146
- },
- "combine": {
- "p50": 1472.8000164031982,
- "p90": 1489.8879528045654,
- "p95": 1502.17604637146,
- "p99": 1692.639946937561
- },
- "roundtrip": {
- "p50": 2711.7760181427,
- "p90": 2730.015993118286,
- "p95": 2753.5040378570557,
- "p99": 2926.464080810547
- },
- "isolatedSum": {
- "p50": 2741.984009742737,
- "p90": 2774.079918861389,
- "p95": 2794.048070907593,
- "p99": 3031.9039821624756
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1619795968,
- "combineLogicalBytes": 1619795968,
- "fanoutMean": 3.4481201171875,
- "recvTokensMax": 32761,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-f5907eae",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||3dd868cb33839a3",
- "colorKey": "h200_c5b3365a",
- "comparisonKey": "d19848fb38a35ed8",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:57:20.998823+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_3",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · zipf-heavy",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "zipf-heavy",
- "routingLabel": "zipf-heavy",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "3dd868cb33839a3",
- "workloadId": "set:3:1ca614e23cc66be1",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271855852",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271855852",
- "createdAt": "2026-06-26T23:57:20.998823+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 117.72800236940384,
- "p90": 127.48800218105316,
- "p95": 133.91999900341034,
- "p99": 146.11199498176575
- },
- "combine": {
- "p50": 107.29599744081497,
- "p90": 117.3119992017746,
- "p95": 122.43200093507767,
- "p99": 134.11200046539307
- },
- "roundtrip": {
- "p50": 205.85599541664124,
- "p90": 220.09600698947906,
- "p95": 228.5120040178299,
- "p99": 244.09599602222443
- },
- "isolatedSum": {
- "p50": 225.0239998102188,
- "p90": 244.80000138282776,
- "p95": 256.351999938488,
- "p99": 280.2239954471588
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 22650880,
- "combineLogicalBytes": 22650880,
- "fanoutMean": 1.54296875,
- "recvTokensMax": 1024,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 232.89600014686584,
- "p90": 245.12000381946564,
- "p95": 253.4399926662445,
- "p99": 292.03200340270996
- },
- "combine": {
- "p50": 245.34399807453156,
- "p90": 260.25599241256714,
- "p95": 269.27998661994934,
- "p99": 297.37600684165955
- },
- "roundtrip": {
- "p50": 454.68801259994507,
- "p90": 472.6080000400543,
- "p95": 486.6560101509094,
- "p99": 522.4639773368835
- },
- "isolatedSum": {
- "p50": 478.2399982213974,
- "p90": 505.3759962320328,
- "p95": 522.7199792861938,
- "p99": 589.4080102443695
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 91521024,
- "combineLogicalBytes": 91521024,
- "fanoutMean": 1.55859375,
- "recvTokensMax": 4096,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 662.335991859436,
- "p90": 673.632025718689,
- "p95": 681.2160015106201,
- "p99": 744.5759773254395
- },
- "combine": {
- "p50": 772.5759744644165,
- "p90": 791.8720245361328,
- "p95": 806.6239953041077,
- "p99": 855.2640080451965
- },
- "roundtrip": {
- "p50": 1405.9840440750122,
- "p90": 1435.2960586547852,
- "p95": 1455.7119607925415,
- "p99": 1716.3519859313965
- },
- "isolatedSum": {
- "p50": 1434.9119663238525,
- "p90": 1465.5040502548218,
- "p95": 1487.8399968147278,
- "p99": 1599.839985370636
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 368062464,
- "combineLogicalBytes": 368062464,
- "fanoutMean": 1.5670166015625,
- "recvTokensMax": 16384,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-75dcaec2",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe",
- "colorKey": "h200_c5b3365a",
- "comparisonKey": "d19848fb38a35ed8",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:04:55.820445+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_8",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · zipf-heavy",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "zipf-heavy",
- "routingLabel": "zipf-heavy",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "bbcd1d9d8d1e4fe",
- "workloadId": "set:6:1ca614e23cc66be1",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272093905",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272093905",
- "createdAt": "2026-06-27T00:04:55.820445+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 118.14399808645248,
- "p90": 130.17599284648895,
- "p95": 135.5839967727661,
- "p99": 147.07200229167938
- },
- "combine": {
- "p50": 108.83200168609619,
- "p90": 120.57600170373917,
- "p95": 127.55200266838074,
- "p99": 140.73599874973297
- },
- "roundtrip": {
- "p50": 206.65599405765533,
- "p90": 219.04000639915466,
- "p95": 224.48000311851501,
- "p99": 242.0479953289032
- },
- "isolatedSum": {
- "p50": 226.97599977254868,
- "p90": 250.75199455022812,
- "p95": 263.13599944114685,
- "p99": 287.80800104141235
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 22650880,
- "combineLogicalBytes": 22650880,
- "fanoutMean": 1.54296875,
- "recvTokensMax": 1024,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 155.45600652694702,
- "p90": 165.95199704170227,
- "p95": 173.92000555992126,
- "p99": 202.39999890327454
- },
- "combine": {
- "p50": 150.94399452209473,
- "p90": 162.59199380874634,
- "p95": 170.3680008649826,
- "p99": 186.24000251293182
- },
- "roundtrip": {
- "p50": 287.6800000667572,
- "p90": 302.94400453567505,
- "p95": 309.7279965877533,
- "p99": 357.7919900417328
- },
- "isolatedSum": {
- "p50": 306.40000104904175,
- "p90": 328.5439908504486,
- "p95": 344.28800642490387,
- "p99": 388.64000141620636
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 45688832,
- "combineLogicalBytes": 45688832,
- "fanoutMean": 1.55615234375,
- "recvTokensMax": 2048,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 230.9119999408722,
- "p90": 242.11199581623077,
- "p95": 249.66399371623993,
- "p99": 269.8880136013031
- },
- "combine": {
- "p50": 247.16800451278687,
- "p90": 260.5760097503662,
- "p95": 264.6400034427643,
- "p99": 289.66400027275085
- },
- "roundtrip": {
- "p50": 456.86399936676025,
- "p90": 473.28001260757446,
- "p95": 481.1519980430603,
- "p99": 534.8799824714661
- },
- "isolatedSum": {
- "p50": 478.08000445365906,
- "p90": 502.688005566597,
- "p95": 514.3039971590042,
- "p99": 559.552013874054
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 91521024,
- "combineLogicalBytes": 91521024,
- "fanoutMean": 1.55859375,
- "recvTokensMax": 4096,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 374.2719888687134,
- "p90": 386.6559863090515,
- "p95": 397.5679874420166,
- "p99": 506.0480237007141
- },
- "combine": {
- "p50": 423.1039881706238,
- "p90": 436.0319972038269,
- "p95": 440.8319890499115,
- "p99": 470.97599506378174
- },
- "roundtrip": {
- "p50": 771.232008934021,
- "p90": 783.9679718017578,
- "p95": 795.5520153045654,
- "p99": 828.4800052642822
- },
- "isolatedSum": {
- "p50": 797.3759770393372,
- "p90": 822.6879835128784,
- "p95": 838.3999764919281,
- "p99": 977.0240187644958
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 183916544,
- "combineLogicalBytes": 183916544,
- "fanoutMean": 1.5660400390625,
- "recvTokensMax": 8192,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 661.8559956550598,
- "p90": 673.9199757575989,
- "p95": 679.6159744262695,
- "p99": 697.5039839744568
- },
- "combine": {
- "p50": 770.6559896469116,
- "p90": 781.1520099639893,
- "p95": 786.7839932441711,
- "p99": 830.560028553009
- },
- "roundtrip": {
- "p50": 1405.791997909546,
- "p90": 1421.280026435852,
- "p95": 1432.2559833526611,
- "p99": 1481.6319942474365
- },
- "isolatedSum": {
- "p50": 1432.5119853019714,
- "p90": 1455.0719857215881,
- "p95": 1466.3999676704407,
- "p99": 1528.0640125274658
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 368062464,
- "combineLogicalBytes": 368062464,
- "fanoutMean": 1.5670166015625,
- "recvTokensMax": 16384,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1246.2400197982788,
- "p90": 1261.631965637207,
- "p95": 1269.5679664611816,
- "p99": 1482.5600385665894
- },
- "combine": {
- "p50": 1440.384030342102,
- "p90": 1459.455966949463,
- "p95": 1471.519947052002,
- "p99": 1634.0479850769043
- },
- "roundtrip": {
- "p50": 2662.400007247925,
- "p90": 2688.096046447754,
- "p95": 2712.4478816986084,
- "p99": 2846.719980239868
- },
- "isolatedSum": {
- "p50": 2686.624050140381,
- "p90": 2721.08793258667,
- "p95": 2741.0879135131836,
- "p99": 3116.6080236434937
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 734720000,
- "combineLogicalBytes": 734720000,
- "fanoutMean": 1.56402587890625,
- "recvTokensMax": 32768,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-9bcc6cfd",
- "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb",
- "colorKey": "h200_06aa1194",
- "comparisonKey": "fe01776775c5fb5e",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:05:23.968491+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_3",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · zipf-heavy+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "zipf-heavy",
- "routingLabel": "zipf-heavy+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "46855e7fa6754eb",
- "workloadId": "set:6:1ca614e23cc66be1",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 7.38995361328125,
- "eplbImbalanceAfter": 1.0000210716610862,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272097307",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272097307",
- "createdAt": "2026-06-27T00:05:23.968491+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 117.79200285673141,
- "p90": 122.94399738311768,
- "p95": 127.96799838542938,
- "p99": 138.33600282669067
- },
- "combine": {
- "p50": 104.38399761915207,
- "p90": 111.35999858379364,
- "p95": 117.79200285673141,
- "p99": 128.63999605178833
- },
- "roundtrip": {
- "p50": 197.82400131225586,
- "p90": 205.85599541664124,
- "p95": 212.351992726326,
- "p99": 252.86400318145752
- },
- "isolatedSum": {
- "p50": 222.17600047588348,
- "p90": 234.30399596691132,
- "p95": 245.7600012421608,
- "p99": 266.975998878479
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 79206400,
- "combineLogicalBytes": 79206400,
- "fanoutMean": 5.3955078125,
- "recvTokensMax": 713,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 145.60000598430634,
- "p90": 154.14400398731232,
- "p95": 158.39999914169312,
- "p99": 173.63199591636658
- },
- "combine": {
- "p50": 145.6959992647171,
- "p90": 150.56000649929047,
- "p95": 155.2640050649643,
- "p99": 165.56799411773682
- },
- "roundtrip": {
- "p50": 267.520010471344,
- "p90": 276.99199318885803,
- "p95": 283.03998708724976,
- "p99": 307.3599934577942
- },
- "isolatedSum": {
- "p50": 291.29600524902344,
- "p90": 304.7040104866028,
- "p95": 313.6640042066574,
- "p99": 339.1999900341034
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 159330304,
- "combineLogicalBytes": 159330304,
- "fanoutMean": 5.4267578125,
- "recvTokensMax": 1436,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 204.79999482631683,
- "p90": 213.85599672794342,
- "p95": 218.27200055122375,
- "p99": 238.52799832820892
- },
- "combine": {
- "p50": 219.4879949092865,
- "p90": 226.9439995288849,
- "p95": 233.66400599479675,
- "p99": 274.944007396698
- },
- "roundtrip": {
- "p50": 400.160014629364,
- "p90": 409.7279906272888,
- "p95": 419.16799545288086,
- "p99": 445.6320106983185
- },
- "isolatedSum": {
- "p50": 424.28798973560333,
- "p90": 440.7999962568283,
- "p95": 451.9360065460205,
- "p99": 513.4720057249069
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 319535104,
- "combineLogicalBytes": 319535104,
- "fanoutMean": 5.441650390625,
- "recvTokensMax": 2897,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 317.7599906921387,
- "p90": 327.87200808525085,
- "p95": 340.06398916244507,
- "p99": 393.3440148830414
- },
- "combine": {
- "p50": 356.1600148677826,
- "p90": 364.6079897880554,
- "p95": 369.82399225234985,
- "p99": 396.8319892883301
- },
- "roundtrip": {
- "p50": 649.6959924697876,
- "p90": 660.3519916534424,
- "p95": 664.7040247917175,
- "p99": 683.4239959716797
- },
- "isolatedSum": {
- "p50": 673.9200055599213,
- "p90": 692.4799978733063,
- "p95": 709.8879814147949,
- "p99": 790.1760041713715
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 638410752,
- "combineLogicalBytes": 638410752,
- "fanoutMean": 5.43603515625,
- "recvTokensMax": 5815,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 541.1199927330017,
- "p90": 549.8560070991516,
- "p95": 555.4239749908447,
- "p99": 643.6160206794739
- },
- "combine": {
- "p50": 614.8800253868103,
- "p90": 626.3039708137512,
- "p95": 632.2240233421326,
- "p99": 680.8639764785767
- },
- "roundtrip": {
- "p50": 1131.7440271377563,
- "p90": 1142.7839994430542,
- "p95": 1148.192048072815,
- "p99": 1196.768045425415
- },
- "isolatedSum": {
- "p50": 1156.000018119812,
- "p90": 1176.1599779129028,
- "p95": 1187.6479983329773,
- "p99": 1324.4799971580505
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1275144192,
- "combineLogicalBytes": 1275144192,
- "fanoutMean": 5.42889404296875,
- "recvTokensMax": 11606,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1027.9680490493774,
- "p90": 1046.720027923584,
- "p95": 1055.4239749908447,
- "p99": 1100.000023841858
- },
- "combine": {
- "p50": 1124.384045600891,
- "p90": 1135.9679698944092,
- "p95": 1140.8640146255493,
- "p99": 1170.9760427474976
- },
- "roundtrip": {
- "p50": 2114.5920753479004,
- "p90": 2138.495922088623,
- "p95": 2152.127981185913,
- "p99": 2480.2560806274414
- },
- "isolatedSum": {
- "p50": 2152.3520946502686,
- "p90": 2182.687997817993,
- "p95": 2196.287989616394,
- "p99": 2270.9760665893555
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2546374656,
- "combineLogicalBytes": 2546374656,
- "fanoutMean": 5.420562744140625,
- "recvTokensMax": 23170,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-e075077e",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428",
- "colorKey": "h200_6a794fcd",
- "comparisonKey": "b6c24dab2941895d",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:04:10.125267+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_2",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · zipf-mild",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "zipf-mild",
- "routingLabel": "zipf-mild",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "cf93f8f6b52e428",
- "workloadId": "set:6:a224603e5a1640b8",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272065129",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272065129",
- "createdAt": "2026-06-27T00:04:10.125267+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 126.52799487113953,
- "p90": 140.3840035200119,
- "p95": 146.17599546909332,
- "p99": 177.08800733089447
- },
- "combine": {
- "p50": 116.73600226640701,
- "p90": 128.86400520801544,
- "p95": 133.63200426101685,
- "p99": 143.8719928264618
- },
- "roundtrip": {
- "p50": 216.35200083255768,
- "p90": 234.3360036611557,
- "p95": 240.25599658489227,
- "p99": 277.3120105266571
- },
- "isolatedSum": {
- "p50": 243.26399713754654,
- "p90": 269.24800872802734,
- "p95": 279.80799973011017,
- "p99": 320.96000015735626
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 70160384,
- "combineLogicalBytes": 70160384,
- "fanoutMean": 4.779296875,
- "recvTokensMax": 987,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 163.96799683570862,
- "p90": 176.256000995636,
- "p95": 180.4479956626892,
- "p99": 201.50400698184967
- },
- "combine": {
- "p50": 160.41600704193115,
- "p90": 173.0560064315796,
- "p95": 178.3680021762848,
- "p99": 186.75200641155243
- },
- "roundtrip": {
- "p50": 298.94399642944336,
- "p90": 319.487988948822,
- "p95": 328.0960023403168,
- "p99": 354.65601086616516
- },
- "isolatedSum": {
- "p50": 324.38400387763977,
- "p90": 349.3120074272156,
- "p95": 358.815997838974,
- "p99": 388.2560133934021
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 140879872,
- "combineLogicalBytes": 140879872,
- "fanoutMean": 4.79833984375,
- "recvTokensMax": 1972,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 237.72799968719482,
- "p90": 252.48000025749207,
- "p95": 263.8719975948334,
- "p99": 307.16800689697266
- },
- "combine": {
- "p50": 262.1760070323944,
- "p90": 279.1999876499176,
- "p95": 284.7999930381775,
- "p99": 311.8399977684021
- },
- "roundtrip": {
- "p50": 477.82400250434875,
- "p90": 500.70399045944214,
- "p95": 516.5759921073914,
- "p99": 701.632022857666
- },
- "isolatedSum": {
- "p50": 499.90400671958923,
- "p90": 531.6799879074097,
- "p95": 548.6719906330109,
- "p99": 619.0080046653748
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 282333184,
- "combineLogicalBytes": 282333184,
- "fanoutMean": 4.80810546875,
- "recvTokensMax": 3936,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 378.495991230011,
- "p90": 390.04799723625183,
- "p95": 399.58399534225464,
- "p99": 429.6320080757141
- },
- "combine": {
- "p50": 439.9360120296478,
- "p90": 452.2880017757416,
- "p95": 457.15200901031494,
- "p99": 474.047988653183
- },
- "roundtrip": {
- "p50": 797.4079847335815,
- "p90": 816.32000207901,
- "p95": 828.6399841308594,
- "p99": 955.839991569519
- },
- "isolatedSum": {
- "p50": 818.4320032596588,
- "p90": 842.3359990119934,
- "p95": 856.7360043525696,
- "p99": 903.6799967288971
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 566716416,
- "combineLogicalBytes": 566716416,
- "fanoutMean": 4.8255615234375,
- "recvTokensMax": 7855,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 679.4559955596924,
- "p90": 694.208025932312,
- "p95": 704.255998134613,
- "p99": 742.8159713745117
- },
- "combine": {
- "p50": 780.7040214538574,
- "p90": 795.1679825782776,
- "p95": 804.7360181808472,
- "p99": 879.7439932823181
- },
- "roundtrip": {
- "p50": 1432.0640563964844,
- "p90": 1453.279972076416,
- "p95": 1465.8559560775757,
- "p99": 1602.3039817810059
- },
- "isolatedSum": {
- "p50": 1460.1600170135498,
- "p90": 1489.3760085105896,
- "p95": 1508.9920163154602,
- "p99": 1622.5599646568298
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1132285952,
- "combineLogicalBytes": 1132285952,
- "fanoutMean": 4.8206787109375,
- "recvTokensMax": 15694,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1287.071943283081,
- "p90": 1304.8959970474243,
- "p95": 1310.7839822769165,
- "p99": 1432.2240352630615
- },
- "combine": {
- "p50": 1463.6160135269165,
- "p90": 1483.8080406188965,
- "p95": 1511.7119550704956,
- "p99": 1699.0400552749634
- },
- "roundtrip": {
- "p50": 2723.9038944244385,
- "p90": 2744.607925415039,
- "p95": 2758.2719326019287,
- "p99": 2967.616081237793
- },
- "isolatedSum": {
- "p50": 2750.6879568099976,
- "p90": 2788.704037666321,
- "p95": 2822.495937347412,
- "p99": 3131.264090538025
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2267840512,
- "combineLogicalBytes": 2267840512,
- "fanoutMean": 4.82763671875,
- "recvTokensMax": 31357,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-f4768a96",
- "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9",
- "colorKey": "h200_b2ffaf91",
- "comparisonKey": "d826aaa5f1321f31",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:04:16.163335+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_12",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · zipf-mild+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "zipf-mild",
- "routingLabel": "zipf-mild+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "27ddc85ded0add9",
- "workloadId": "set:6:a224603e5a1640b8",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 2.545684814453125,
- "eplbImbalanceAfter": 1.0001495361328125,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272068834",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272068834",
- "createdAt": "2026-06-27T00:04:16.163335+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 118.07999759912491,
- "p90": 128.63999605178833,
- "p95": 134.62400436401367,
- "p99": 156.2879979610443
- },
- "combine": {
- "p50": 105.47199845314026,
- "p90": 114.43199962377548,
- "p95": 119.19999867677689,
- "p99": 136.09600067138672
- },
- "roundtrip": {
- "p50": 197.24799692630768,
- "p90": 206.01600408554077,
- "p95": 211.0079973936081,
- "p99": 226.01599991321564
- },
- "isolatedSum": {
- "p50": 223.55199605226517,
- "p90": 243.0719956755638,
- "p95": 253.82400304079056,
- "p99": 292.38399863243103
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 78159872,
- "combineLogicalBytes": 78159872,
- "fanoutMean": 5.32421875,
- "recvTokensMax": 702,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 149.08799529075623,
- "p90": 157.27999806404114,
- "p95": 161.56800091266632,
- "p99": 172.83199727535248
- },
- "combine": {
- "p50": 143.77599954605103,
- "p90": 148.99200201034546,
- "p95": 152.12799608707428,
- "p99": 163.68000209331512
- },
- "roundtrip": {
- "p50": 265.28000831604004,
- "p90": 273.50398898124695,
- "p95": 279.35999631881714,
- "p99": 293.37599873542786
- },
- "isolatedSum": {
- "p50": 292.86399483680725,
- "p90": 306.2720000743866,
- "p95": 313.6959969997406,
- "p99": 336.5119993686676
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 156563456,
- "combineLogicalBytes": 156563456,
- "fanoutMean": 5.33251953125,
- "recvTokensMax": 1393,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 206.2080055475235,
- "p90": 216.99200570583344,
- "p95": 223.03999960422516,
- "p99": 264.44798707962036
- },
- "combine": {
- "p50": 225.40800273418427,
- "p90": 233.37599635124207,
- "p95": 238.65599930286407,
- "p99": 253.56799364089966
- },
- "roundtrip": {
- "p50": 404.4800102710724,
- "p90": 415.2959883213043,
- "p95": 423.552006483078,
- "p99": 451.9039988517761
- },
- "isolatedSum": {
- "p50": 431.61600828170776,
- "p90": 450.3680020570755,
- "p95": 461.69599890708923,
- "p99": 518.01598072052
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 312410112,
- "combineLogicalBytes": 312410112,
- "fanoutMean": 5.3203125,
- "recvTokensMax": 2773,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 313.27998638153076,
- "p90": 324.8960077762604,
- "p95": 334.7199857234955,
- "p99": 349.2160141468048
- },
- "combine": {
- "p50": 357.05599188804626,
- "p90": 370.59199810028076,
- "p95": 381.4080059528351,
- "p99": 418.43199729919434
- },
- "roundtrip": {
- "p50": 643.7439918518066,
- "p90": 656.0959815979004,
- "p95": 666.2399768829346,
- "p99": 702.9759883880615
- },
- "isolatedSum": {
- "p50": 670.335978269577,
- "p90": 695.4880058765411,
- "p95": 716.1279916763306,
- "p99": 767.6480114459991
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 622712832,
- "combineLogicalBytes": 622712832,
- "fanoutMean": 5.3023681640625,
- "recvTokensMax": 5498,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 530.1439762115479,
- "p90": 539.5519733428955,
- "p95": 543.008029460907,
- "p99": 568.9600110054016
- },
- "combine": {
- "p50": 611.5840077400208,
- "p90": 622.048020362854,
- "p95": 629.2799711227417,
- "p99": 677.5040030479431
- },
- "roundtrip": {
- "p50": 1115.488052368164,
- "p90": 1129.248023033142,
- "p95": 1135.583996772766,
- "p99": 1275.6479978561401
- },
- "isolatedSum": {
- "p50": 1141.7279839515686,
- "p90": 1161.5999937057495,
- "p95": 1172.2880005836487,
- "p99": 1246.4640140533447
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1245038592,
- "combineLogicalBytes": 1245038592,
- "fanoutMean": 5.30072021484375,
- "recvTokensMax": 10955,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 986.1119985580444,
- "p90": 1002.2720098495483,
- "p95": 1011.0080242156982,
- "p99": 1069.0239667892456
- },
- "combine": {
- "p50": 1125.3440380096436,
- "p90": 1136.6080045700073,
- "p95": 1142.3360109329224,
- "p99": 1163.8400554656982
- },
- "roundtrip": {
- "p50": 2081.088066101074,
- "p90": 2097.9840755462646,
- "p95": 2111.0079288482666,
- "p99": 2311.743974685669
- },
- "isolatedSum": {
- "p50": 2111.456036567688,
- "p90": 2138.8800144195557,
- "p95": 2153.3440351486206,
- "p99": 2232.864022254944
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2489460736,
- "combineLogicalBytes": 2489460736,
- "fanoutMean": 5.299407958984375,
- "recvTokensMax": 21864,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-e1ecd1d4",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86",
- "colorKey": "h200_f2b19f62",
- "comparisonKey": "a7c9c0202574b9d0",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:04:45.749249+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_10",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · zipf-moderate",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "zipf-moderate",
- "routingLabel": "zipf-moderate",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "b5217e990b95f86",
- "workloadId": "set:6:6709a02c31933a9f",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272079152",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272079152",
- "createdAt": "2026-06-27T00:04:45.749249+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 122.94399738311768,
- "p90": 134.20799374580383,
- "p95": 138.87999951839447,
- "p99": 150.87999403476715
- },
- "combine": {
- "p50": 111.90400272607803,
- "p90": 122.43200093507767,
- "p95": 128.38399410247803,
- "p99": 136.4479959011078
- },
- "roundtrip": {
- "p50": 213.8880044221878,
- "p90": 230.43200373649597,
- "p95": 236.735999584198,
- "p99": 261.4080011844635
- },
- "isolatedSum": {
- "p50": 234.8480001091957,
- "p90": 256.6399946808815,
- "p95": 267.2639936208725,
- "p99": 287.32798993587494
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 49946624,
- "combineLogicalBytes": 49946624,
- "fanoutMean": 3.40234375,
- "recvTokensMax": 1022,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 161.72799468040466,
- "p90": 174.20800030231476,
- "p95": 182.49599635601044,
- "p99": 194.72000002861023
- },
- "combine": {
- "p50": 158.27199816703796,
- "p90": 174.8799979686737,
- "p95": 179.58399653434753,
- "p99": 191.26400351524353
- },
- "roundtrip": {
- "p50": 296.9920039176941,
- "p90": 319.0079927444458,
- "p95": 327.2320032119751,
- "p99": 340.03201127052307
- },
- "isolatedSum": {
- "p50": 319.9999928474426,
- "p90": 349.08799827098846,
- "p95": 362.07999289035797,
- "p99": 385.98400354385376
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 100509696,
- "combineLogicalBytes": 100509696,
- "fanoutMean": 3.42333984375,
- "recvTokensMax": 2046,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 237.34399676322937,
- "p90": 252.19199061393738,
- "p95": 259.48798656463623,
- "p99": 274.0800082683563
- },
- "combine": {
- "p50": 260.44800877571106,
- "p90": 278.2079875469208,
- "p95": 284.7999930381775,
- "p99": 298.880010843277
- },
- "roundtrip": {
- "p50": 475.1040041446686,
- "p90": 495.2319860458374,
- "p95": 509.3119740486145,
- "p99": 531.8080186843872
- },
- "isolatedSum": {
- "p50": 497.79200553894043,
- "p90": 530.3999781608582,
- "p95": 544.2879796028137,
- "p99": 572.9600191116333
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 201678848,
- "combineLogicalBytes": 201678848,
- "fanoutMean": 3.4345703125,
- "recvTokensMax": 4094,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 379.4879913330078,
- "p90": 389.60000872612,
- "p95": 395.6800103187561,
- "p99": 409.92000699043274
- },
- "combine": {
- "p50": 438.1760060787201,
- "p90": 452.06400752067566,
- "p95": 457.69599080085754,
- "p99": 494.59201097488403
- },
- "roundtrip": {
- "p50": 794.2079901695251,
- "p90": 809.7919821739197,
- "p95": 823.6799836158752,
- "p99": 875.6160140037537
- },
- "isolatedSum": {
- "p50": 817.6639974117279,
- "p90": 841.6640162467957,
- "p95": 853.3760011196136,
- "p99": 904.5120179653168
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 405035008,
- "combineLogicalBytes": 405035008,
- "fanoutMean": 3.4488525390625,
- "recvTokensMax": 8189,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 671.2319850921631,
- "p90": 682.6879978179932,
- "p95": 689.2480254173279,
- "p99": 929.0879964828491
- },
- "combine": {
- "p50": 786.7839932441711,
- "p90": 799.1999983787537,
- "p95": 804.2880296707153,
- "p99": 833.6960077285767
- },
- "roundtrip": {
- "p50": 1430.0800561904907,
- "p90": 1449.9200582504272,
- "p95": 1461.3120555877686,
- "p99": 1667.8080558776855
- },
- "isolatedSum": {
- "p50": 1458.0159783363342,
- "p90": 1481.8879961967468,
- "p95": 1493.5360550880432,
- "p99": 1762.7840042114258
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 808822784,
- "combineLogicalBytes": 808822784,
- "fanoutMean": 3.44354248046875,
- "recvTokensMax": 16380,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1269.2480087280273,
- "p90": 1284.5439910888672,
- "p95": 1292.9919958114624,
- "p99": 1424.064040184021
- },
- "combine": {
- "p50": 1480.6400537490845,
- "p90": 1504.7039985656738,
- "p95": 1519.10400390625,
- "p99": 1724.0320444107056
- },
- "roundtrip": {
- "p50": 2719.4879055023193,
- "p90": 2740.70405960083,
- "p95": 2764.8000717163086,
- "p99": 3076.0960578918457
- },
- "isolatedSum": {
- "p50": 2749.888062477112,
- "p90": 2789.247989654541,
- "p95": 2812.0959997177124,
- "p99": 3148.0960845947266
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1619795968,
- "combineLogicalBytes": 1619795968,
- "fanoutMean": 3.4481201171875,
- "recvTokensMax": 32761,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-f58892d6",
- "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39",
- "colorKey": "h200_bac4102c",
- "comparisonKey": "402825358de599a6",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:04:49.601548+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_6",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · zipf-moderate+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "zipf-moderate",
- "routingLabel": "zipf-moderate+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "2b57a75d27f5b39",
- "workloadId": "set:6:6709a02c31933a9f",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 4.895263671875,
- "eplbImbalanceAfter": 1.0000902811686199,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272082600",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272082600",
- "createdAt": "2026-06-27T00:04:49.601548+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 117.72800236940384,
- "p90": 136.25599443912506,
- "p95": 140.8960074186325,
- "p99": 185.34399569034576
- },
- "combine": {
- "p50": 103.61599922180176,
- "p90": 115.9679964184761,
- "p95": 122.49600142240524,
- "p99": 137.7599984407425
- },
- "roundtrip": {
- "p50": 197.02400267124176,
- "p90": 215.13600647449493,
- "p95": 222.6240038871765,
- "p99": 233.43999683856964
- },
- "isolatedSum": {
- "p50": 221.3440015912056,
- "p90": 252.22399085760117,
- "p95": 263.39200884103775,
- "p99": 323.10399413108826
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77385728,
- "combineLogicalBytes": 77385728,
- "fanoutMean": 5.271484375,
- "recvTokensMax": 691,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 145.21600306034088,
- "p90": 166.1120057106018,
- "p95": 175.1679927110672,
- "p99": 194.91200149059296
- },
- "combine": {
- "p50": 144.22400295734406,
- "p90": 156.2879979610443,
- "p95": 161.18399798870087,
- "p99": 171.90399765968323
- },
- "roundtrip": {
- "p50": 262.87999749183655,
- "p90": 277.5999903678894,
- "p95": 286.3999903202057,
- "p99": 298.97600412368774
- },
- "isolatedSum": {
- "p50": 289.44000601768494,
- "p90": 322.4000036716461,
- "p95": 336.35199069976807,
- "p99": 366.8159991502762
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 155172864,
- "combineLogicalBytes": 155172864,
- "fanoutMean": 5.28515625,
- "recvTokensMax": 1378,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 203.3279985189438,
- "p90": 218.36799383163452,
- "p95": 226.1440008878708,
- "p99": 242.8479939699173
- },
- "combine": {
- "p50": 223.00800681114197,
- "p90": 237.5359982252121,
- "p95": 245.7599937915802,
- "p99": 267.2959864139557
- },
- "roundtrip": {
- "p50": 399.77601170539856,
- "p90": 420.415997505188,
- "p95": 433.1839978694916,
- "p99": 505.40798902511597
- },
- "isolatedSum": {
- "p50": 426.33600533008575,
- "p90": 455.9039920568466,
- "p95": 471.903994679451,
- "p99": 510.143980383873
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 310546432,
- "combineLogicalBytes": 310546432,
- "fanoutMean": 5.28857421875,
- "recvTokensMax": 2745,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 312.1280074119568,
- "p90": 327.7119994163513,
- "p95": 334.879994392395,
- "p99": 400.4479944705963
- },
- "combine": {
- "p50": 352.7680039405823,
- "p90": 362.527996301651,
- "p95": 367.6159977912903,
- "p99": 386.0799968242645
- },
- "roundtrip": {
- "p50": 641.1839723587036,
- "p90": 658.1119894981384,
- "p95": 666.0159826278687,
- "p99": 719.5199728012085
- },
- "isolatedSum": {
- "p50": 664.8960113525391,
- "p90": 690.2399957180023,
- "p95": 702.4959921836853,
- "p99": 786.5279912948608
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 620619776,
- "combineLogicalBytes": 620619776,
- "fanoutMean": 5.2845458984375,
- "recvTokensMax": 5526,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 527.5200009346008,
- "p90": 542.4320101737976,
- "p95": 550.4639744758606,
- "p99": 575.2959847450256
- },
- "combine": {
- "p50": 620.3839778900146,
- "p90": 633.5999965667725,
- "p95": 639.2639875411987,
- "p99": 673.8560199737549
- },
- "roundtrip": {
- "p50": 1121.1520433425903,
- "p90": 1137.0879411697388,
- "p95": 1147.3599672317505,
- "p99": 1174.7519969940186
- },
- "isolatedSum": {
- "p50": 1147.9039788246155,
- "p90": 1176.03200674057,
- "p95": 1189.7279620170593,
- "p99": 1249.1520047187805
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1239175168,
- "combineLogicalBytes": 1239175168,
- "fanoutMean": 5.2757568359375,
- "recvTokensMax": 11165,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1003.3919811248779,
- "p90": 1031.5200090408325,
- "p95": 1040.4160022735596,
- "p99": 1070.2400207519531
- },
- "combine": {
- "p50": 1121.9840049743652,
- "p90": 1135.7760429382324,
- "p95": 1145.0239419937134,
- "p99": 1167.8400039672852
- },
- "roundtrip": {
- "p50": 2083.0399990081787,
- "p90": 2113.568067550659,
- "p95": 2122.431993484497,
- "p99": 2277.791976928711
- },
- "isolatedSum": {
- "p50": 2125.375986099243,
- "p90": 2167.296051979065,
- "p95": 2185.439944267273,
- "p99": 2238.0800247192383
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2481604608,
- "combineLogicalBytes": 2481604608,
- "fanoutMean": 5.282684326171875,
- "recvTokensMax": 22165,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-8c2088d8",
- "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39",
- "colorKey": "h200_1eda221e",
- "comparisonKey": "6ee0b18a3e276ae1",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:03:37.741116+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_4",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 · zipf+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "zipf",
- "routingLabel": "zipf+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "2b57a75d27f5b39",
- "workloadId": "set:6:830e36e88869e222",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 4.895263671875,
- "eplbImbalanceAfter": 1.0000902811686199,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272052634",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272052634",
- "createdAt": "2026-06-27T00:03:37.741116+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 118.65600198507309,
- "p90": 133.56800377368927,
- "p95": 139.1039937734604,
- "p99": 146.97599411010742
- },
- "combine": {
- "p50": 104.3199971318245,
- "p90": 118.01599711179733,
- "p95": 121.76000326871872,
- "p99": 131.77600502967834
- },
- "roundtrip": {
- "p50": 197.02400267124176,
- "p90": 214.75200355052948,
- "p95": 219.67999637126923,
- "p99": 230.97600042819977
- },
- "isolatedSum": {
- "p50": 222.97599911689758,
- "p90": 251.5840008854866,
- "p95": 260.8639970421791,
- "p99": 278.75199913978577
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77385728,
- "combineLogicalBytes": 77385728,
- "fanoutMean": 5.271484375,
- "recvTokensMax": 691,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 144.41600441932678,
- "p90": 161.8880033493042,
- "p95": 168.96000504493713,
- "p99": 186.43200397491455
- },
- "combine": {
- "p50": 143.19999516010284,
- "p90": 153.08800339698792,
- "p95": 157.4079990386963,
- "p99": 164.60800170898438
- },
- "roundtrip": {
- "p50": 262.87999749183655,
- "p90": 275.32801032066345,
- "p95": 282.4000120162964,
- "p99": 291.00799560546875
- },
- "isolatedSum": {
- "p50": 287.6159995794296,
- "p90": 314.9760067462921,
- "p95": 326.3680040836334,
- "p99": 351.0400056838989
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 155172864,
- "combineLogicalBytes": 155172864,
- "fanoutMean": 5.28515625,
- "recvTokensMax": 1378,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 203.0079960823059,
- "p90": 220.768004655838,
- "p95": 227.55199670791626,
- "p99": 253.63200902938843
- },
- "combine": {
- "p50": 219.4879949092865,
- "p90": 227.52000391483307,
- "p95": 231.23200237751007,
- "p99": 248.79999458789825
- },
- "roundtrip": {
- "p50": 397.0560133457184,
- "p90": 409.5039963722229,
- "p95": 413.4719967842102,
- "p99": 425.82398653030396
- },
- "isolatedSum": {
- "p50": 422.4959909915924,
- "p90": 448.2880085706711,
- "p95": 458.78399908542633,
- "p99": 502.4320036172867
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 310546432,
- "combineLogicalBytes": 310546432,
- "fanoutMean": 5.28857421875,
- "recvTokensMax": 2745,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 311.5839958190918,
- "p90": 334.52799916267395,
- "p95": 339.2319977283478,
- "p99": 353.88800501823425
- },
- "combine": {
- "p50": 350.20801424980164,
- "p90": 362.0480000972748,
- "p95": 365.9839928150177,
- "p99": 423.71198534965515
- },
- "roundtrip": {
- "p50": 636.7999911308289,
- "p90": 650.1439809799194,
- "p95": 654.2080044746399,
- "p99": 711.4560008049011
- },
- "isolatedSum": {
- "p50": 661.7920100688934,
- "p90": 696.5759992599487,
- "p95": 705.2159905433655,
- "p99": 777.5999903678894
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 620619776,
- "combineLogicalBytes": 620619776,
- "fanoutMean": 5.2845458984375,
- "recvTokensMax": 5526,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 528.3839702606201,
- "p90": 545.7599759101868,
- "p95": 551.9999861717224,
- "p99": 572.2879767417908
- },
- "combine": {
- "p50": 608.959972858429,
- "p90": 620.9920048713684,
- "p95": 626.1119842529297,
- "p99": 657.0559740066528
- },
- "roundtrip": {
- "p50": 1110.2720499038696,
- "p90": 1125.0239610671997,
- "p95": 1132.032036781311,
- "p99": 1183.0079555511475
- },
- "isolatedSum": {
- "p50": 1137.343943119049,
- "p90": 1166.7519807815552,
- "p95": 1178.111970424652,
- "p99": 1229.3439507484436
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1239175168,
- "combineLogicalBytes": 1239175168,
- "fanoutMean": 5.2757568359375,
- "recvTokensMax": 11165,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 994.4639801979065,
- "p90": 1016.1600112915039,
- "p95": 1023.9039659500122,
- "p99": 1042.0479774475098
- },
- "combine": {
- "p50": 1103.2960414886475,
- "p90": 1116.2559986114502,
- "p95": 1121.7600107192993,
- "p99": 1139.4879817962646
- },
- "roundtrip": {
- "p50": 2056.544065475464,
- "p90": 2077.9199600219727,
- "p95": 2088.671922683716,
- "p99": 2251.3279914855957
- },
- "isolatedSum": {
- "p50": 2097.760021686554,
- "p90": 2132.416009902954,
- "p95": 2145.6639766693115,
- "p99": 2181.5359592437744
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2481604608,
- "combineLogicalBytes": 2481604608,
- "fanoutMean": 5.282684326171875,
- "recvTokensMax": 22165,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-8e568434",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31",
- "colorKey": "h200_c851a534",
- "comparisonKey": "1f9e00010b0d6e5b",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:29:59.726916+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_11",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 (norm)",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1818,
- "configuredUnits": 24,
- "deviceUnits": 132,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28254392935",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254392935",
- "createdAt": "2026-06-26T17:29:59.726916+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 116.44800007343292,
- "p90": 126.97599828243256,
- "p95": 137.92000710964203,
- "p99": 159.96800363063812
- },
- "combine": {
- "p50": 103.55199873447418,
- "p90": 113.11999708414078,
- "p95": 120.80000340938568,
- "p99": 147.10399508476257
- },
- "roundtrip": {
- "p50": 194.62400674819946,
- "p90": 208.19200575351715,
- "p95": 215.39199352264404,
- "p99": 238.75199258327484
- },
- "isolatedSum": {
- "p50": 219.9999988079071,
- "p90": 240.09599536657333,
- "p95": 258.7200105190277,
- "p99": 307.0719987154007
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 146.97599411010742,
- "p90": 163.07200491428375,
- "p95": 171.77599668502808,
- "p99": 191.42399728298187
- },
- "combine": {
- "p50": 142.84799993038177,
- "p90": 154.78399395942688,
- "p95": 165.12000560760498,
- "p99": 172.28800058364868
- },
- "roundtrip": {
- "p50": 267.0080065727234,
- "p90": 288.9600098133087,
- "p95": 295.77600955963135,
- "p99": 315.71200489997864
- },
- "isolatedSum": {
- "p50": 289.8239940404892,
- "p90": 317.85599887371063,
- "p95": 336.89600229263306,
- "p99": 363.71199786663055
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 155889664,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 207.68000185489655,
- "p90": 228.64000499248505,
- "p95": 236.92800104618073,
- "p99": 267.90401339530945
- },
- "combine": {
- "p50": 210.36800742149353,
- "p90": 225.0239998102188,
- "p95": 234.68799889087677,
- "p99": 271.58400416374207
- },
- "roundtrip": {
- "p50": 390.49598574638367,
- "p90": 413.37600350379944,
- "p95": 420.28799653053284,
- "p99": 449.8240053653717
- },
- "isolatedSum": {
- "p50": 418.0480092763901,
- "p90": 453.66400480270386,
- "p95": 471.6159999370575,
- "p99": 539.4880175590515
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 312266752,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 324.8960077762604,
- "p90": 341.5679931640625,
- "p95": 351.4559864997864,
- "p99": 364.73599076271057
- },
- "combine": {
- "p50": 328.0960023403168,
- "p90": 339.6480083465576,
- "p95": 345.95200419425964,
- "p99": 362.8480136394501
- },
- "roundtrip": {
- "p50": 628.9600133895874,
- "p90": 643.231987953186,
- "p95": 649.3120193481445,
- "p99": 664.3199920654297
- },
- "isolatedSum": {
- "p50": 652.9920101165771,
- "p90": 681.2160015106201,
- "p95": 697.407990694046,
- "p99": 727.5840044021606
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 623443968,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 566.3679838180542,
- "p90": 581.0880064964294,
- "p95": 587.2960090637207,
- "p99": 609.1520190238953
- },
- "combine": {
- "p50": 560.9920024871826,
- "p90": 573.0559825897217,
- "p95": 578.2399773597717,
- "p99": 609.7279787063599
- },
- "roundtrip": {
- "p50": 1097.3440408706665,
- "p90": 1114.400029182434,
- "p95": 1121.791958808899,
- "p99": 1286.6239547729492
- },
- "isolatedSum": {
- "p50": 1127.3599863052368,
- "p90": 1154.1439890861511,
- "p95": 1165.5359864234924,
- "p99": 1218.8799977302551
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1243805696,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1051.8079996109009,
- "p90": 1067.8720474243164,
- "p95": 1078.271985054016,
- "p99": 1161.4079475402832
- },
- "combine": {
- "p50": 1028.9920568466187,
- "p90": 1044.0959930419922,
- "p95": 1054.4320344924927,
- "p99": 1218.783974647522
- },
- "roundtrip": {
- "p50": 2049.3760108947754,
- "p90": 2068.4800148010254,
- "p95": 2079.200029373169,
- "p99": 2593.600034713745
- },
- "isolatedSum": {
- "p50": 2080.8000564575195,
- "p90": 2111.9680404663086,
- "p95": 2132.704019546509,
- "p99": 2380.191922187805
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487009280,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-6764a75f",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|normalized|0.18|0a3064a2af0dd39",
- "colorKey": "h200_a1e795ec",
- "comparisonKey": "5a22622d9db14749",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:30:54.944678+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_8",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 (norm) · balanced",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "balanced",
- "routingLabel": "balanced",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1818,
- "configuredUnits": 24,
- "deviceUnits": 132,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "0a3064a2af0dd39",
- "workloadId": "set:6:2dad1a73ff872905",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28254443915",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254443915",
- "createdAt": "2026-06-26T17:30:54.944678+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 133.88800621032715,
- "p90": 147.16799557209015,
- "p95": 159.5200002193451,
- "p99": 177.76000499725342
- },
- "combine": {
- "p50": 119.39200013875961,
- "p90": 131.80799782276154,
- "p95": 139.74399864673615,
- "p99": 152.48000621795654
- },
- "roundtrip": {
- "p50": 227.64800488948822,
- "p90": 249.05599653720856,
- "p95": 255.74401021003723,
- "p99": 274.3679881095886
- },
- "isolatedSum": {
- "p50": 253.28000634908676,
- "p90": 278.9759933948517,
- "p95": 299.26399886608124,
- "p99": 330.24001121520996
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 117440512,
- "combineLogicalBytes": 117440512,
- "fanoutMean": 8,
- "recvTokensMax": 1024,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 176.54399573802948,
- "p90": 188.4160041809082,
- "p95": 203.07199656963348,
- "p99": 299.8400032520294
- },
- "combine": {
- "p50": 169.91999745368958,
- "p90": 175.48799514770508,
- "p95": 180.16000092029572,
- "p99": 187.51999735832214
- },
- "roundtrip": {
- "p50": 319.4560110569,
- "p90": 328.7679851055145,
- "p95": 336.32001280784607,
- "p99": 355.0400137901306
- },
- "isolatedSum": {
- "p50": 346.46399319171906,
- "p90": 363.9039993286133,
- "p95": 383.2319974899292,
- "p99": 487.36000061035156
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 234881024,
- "combineLogicalBytes": 234881024,
- "fanoutMean": 8,
- "recvTokensMax": 2048,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 269.567996263504,
- "p90": 288.12798857688904,
- "p95": 294.048011302948,
- "p99": 315.3280019760132
- },
- "combine": {
- "p50": 262.0159983634949,
- "p90": 282.1120023727417,
- "p95": 286.5920066833496,
- "p99": 306.11199140548706
- },
- "roundtrip": {
- "p50": 505.7920217514038,
- "p90": 531.9039821624756,
- "p95": 535.7760190963745,
- "p99": 544.6720123291016
- },
- "isolatedSum": {
- "p50": 531.5839946269989,
- "p90": 570.2399909496307,
- "p95": 580.6400179862976,
- "p99": 621.4399933815002
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 469762048,
- "combineLogicalBytes": 469762048,
- "fanoutMean": 8,
- "recvTokensMax": 4096,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 443.87200474739075,
- "p90": 459.55199003219604,
- "p95": 467.74399280548096,
- "p99": 487.199991941452
- },
- "combine": {
- "p50": 427.64800786972046,
- "p90": 442.81598925590515,
- "p95": 451.58401131629944,
- "p99": 483.13599824905396
- },
- "roundtrip": {
- "p50": 844.7999954223633,
- "p90": 860.0640296936035,
- "p95": 867.0719861984253,
- "p99": 924.67200756073
- },
- "isolatedSum": {
- "p50": 871.5200126171112,
- "p90": 902.3679792881012,
- "p95": 919.3280041217804,
- "p99": 970.335990190506
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 939524096,
- "combineLogicalBytes": 939524096,
- "fanoutMean": 8,
- "recvTokensMax": 8192,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 806.1119914054871,
- "p90": 823.7119913101196,
- "p95": 832.4480056762695,
- "p99": 892.3199772834778
- },
- "combine": {
- "p50": 758.9120268821716,
- "p90": 777.1199941635132,
- "p95": 790.3040051460266,
- "p99": 827.3919820785522
- },
- "roundtrip": {
- "p50": 1534.5920324325562,
- "p90": 1550.75204372406,
- "p95": 1561.3759756088257,
- "p99": 1597.9520082473755
- },
- "isolatedSum": {
- "p50": 1565.0240182876587,
- "p90": 1600.8319854736328,
- "p95": 1622.7520108222961,
- "p99": 1719.71195936203
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1879048192,
- "combineLogicalBytes": 1879048192,
- "fanoutMean": 8,
- "recvTokensMax": 16384,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1534.7520112991333,
- "p90": 1552.4159669876099,
- "p95": 1570.9120035171509,
- "p99": 1686.7519617080688
- },
- "combine": {
- "p50": 1415.2640104293823,
- "p90": 1439.2000436782837,
- "p95": 1449.120044708252,
- "p99": 1643.1679725646973
- },
- "roundtrip": {
- "p50": 2922.528028488159,
- "p90": 2943.743944168091,
- "p95": 2957.535982131958,
- "p99": 3040.5759811401367
- },
- "isolatedSum": {
- "p50": 2950.0160217285156,
- "p90": 2991.6160106658936,
- "p95": 3020.032048225403,
- "p99": 3329.919934272766
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3758096384,
- "combineLogicalBytes": 3758096384,
- "fanoutMean": 8,
- "recvTokensMax": 32768,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-e63750d6",
- "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|normalized|0.18|b5217e990b95f86",
- "colorKey": "h200_0a93a01f",
- "comparisonKey": "f4911d0a95d49c62",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:31:03.582434+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_0",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 (norm) · zipf",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "zipf",
- "routingLabel": "zipf",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1818,
- "configuredUnits": 24,
- "deviceUnits": 132,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "b5217e990b95f86",
- "workloadId": "set:6:830e36e88869e222",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28254452252",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254452252",
- "createdAt": "2026-06-26T17:31:03.582434+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 120.86399644613266,
- "p90": 133.53599607944489,
- "p95": 138.5280042886734,
- "p99": 154.01600301265717
- },
- "combine": {
- "p50": 112.64000087976456,
- "p90": 124.86399710178375,
- "p95": 130.5599957704544,
- "p99": 142.7839994430542
- },
- "roundtrip": {
- "p50": 213.47199380397797,
- "p90": 229.72799837589264,
- "p95": 238.68800699710846,
- "p99": 280.8000147342682
- },
- "isolatedSum": {
- "p50": 233.50399732589722,
- "p90": 258.39999318122864,
- "p95": 269.0880000591278,
- "p99": 296.80000245571136
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 49946624,
- "combineLogicalBytes": 49946624,
- "fanoutMean": 3.40234375,
- "recvTokensMax": 1022,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 163.55200111865997,
- "p90": 173.7920045852661,
- "p95": 181.66400492191315,
- "p99": 202.87999510765076
- },
- "combine": {
- "p50": 156.54399991035461,
- "p90": 170.9119975566864,
- "p95": 178.20799350738525,
- "p99": 194.62400674819946
- },
- "roundtrip": {
- "p50": 297.1839904785156,
- "p90": 314.65598940849304,
- "p95": 321.02400064468384,
- "p99": 352.28800773620605
- },
- "isolatedSum": {
- "p50": 320.0960010290146,
- "p90": 344.7040021419525,
- "p95": 359.8719984292984,
- "p99": 397.5040018558502
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 100509696,
- "combineLogicalBytes": 100509696,
- "fanoutMean": 3.42333984375,
- "recvTokensMax": 2046,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 237.56800591945648,
- "p90": 246.72000110149384,
- "p95": 252.44799256324768,
- "p99": 262.2720003128052
- },
- "combine": {
- "p50": 242.3039972782135,
- "p90": 256.99201226234436,
- "p95": 264.5759880542755,
- "p99": 294.17601227760315
- },
- "roundtrip": {
- "p50": 457.5679898262024,
- "p90": 477.27999091148376,
- "p95": 485.6959879398346,
- "p99": 519.9679732322693
- },
- "isolatedSum": {
- "p50": 479.87200319767,
- "p90": 503.7120133638382,
- "p95": 517.0239806175232,
- "p99": 556.4480125904083
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 201678848,
- "combineLogicalBytes": 201678848,
- "fanoutMean": 3.4345703125,
- "recvTokensMax": 4094,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 384.0320110321045,
- "p90": 394.9120044708252,
- "p95": 400.2879858016968,
- "p99": 411.77600622177124
- },
- "combine": {
- "p50": 408.2239866256714,
- "p90": 420.22401094436646,
- "p95": 427.39200592041016,
- "p99": 457.5679898262024
- },
- "roundtrip": {
- "p50": 765.9199833869934,
- "p90": 785.9519720077515,
- "p95": 798.2079982757568,
- "p99": 844.543993473053
- },
- "isolatedSum": {
- "p50": 792.2559976577759,
- "p90": 815.1360154151917,
- "p95": 827.6799917221069,
- "p99": 869.3439960479736
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 405035008,
- "combineLogicalBytes": 405035008,
- "fanoutMean": 3.4488525390625,
- "recvTokensMax": 8189,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 669.6959733963013,
- "p90": 682.3359727859497,
- "p95": 689.0559792518616,
- "p99": 731.8080067634583
- },
- "combine": {
- "p50": 727.1360158920288,
- "p90": 740.4800057411194,
- "p95": 746.783971786499,
- "p99": 762.8480195999146
- },
- "roundtrip": {
- "p50": 1366.0800457000732,
- "p90": 1389.631986618042,
- "p95": 1405.6639671325684,
- "p99": 1561.8239641189575
- },
- "isolatedSum": {
- "p50": 1396.83198928833,
- "p90": 1422.815978527069,
- "p95": 1435.8399510383606,
- "p99": 1494.6560263633728
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 808822784,
- "combineLogicalBytes": 808822784,
- "fanoutMean": 3.44354248046875,
- "recvTokensMax": 16380,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1259.1999769210815,
- "p90": 1273.1839418411255,
- "p95": 1278.5600423812866,
- "p99": 1390.463948249817
- },
- "combine": {
- "p50": 1366.8160438537598,
- "p90": 1383.2319974899292,
- "p95": 1391.2960290908813,
- "p99": 1428.5119771957397
- },
- "roundtrip": {
- "p50": 2598.0799198150635,
- "p90": 2617.0880794525146,
- "p95": 2628.2238960266113,
- "p99": 2879.9679279327393
- },
- "isolatedSum": {
- "p50": 2626.0160207748413,
- "p90": 2656.4159393310547,
- "p95": 2669.856071472168,
- "p99": 2818.9759254455566
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1619795968,
- "combineLogicalBytes": 1619795968,
- "fanoutMean": 3.4481201171875,
- "recvTokensMax": 32761,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-353049ec",
- "identity": "h200|deepep|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|normalized|0.18|2b57a75d27f5b39",
- "colorKey": "h200_993777bf",
- "comparisonKey": "cb74cc9ee6130bb2",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:47:04.200207+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_1",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 (norm) · zipf+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "zipf",
- "routingLabel": "zipf+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1818,
- "configuredUnits": 24,
- "deviceUnits": 132,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "2b57a75d27f5b39",
- "workloadId": "set:6:830e36e88869e222",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 4.895263671875,
- "eplbImbalanceAfter": 1.0000902811686199,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28255303840",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28255303840",
- "createdAt": "2026-06-26T17:47:04.200207+00:00",
- "sha": "36d3eb6c3c7386d3220c873d305410219c5c0f17"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 117.40799993276596,
- "p90": 132.54399597644806,
- "p95": 140.06400108337402,
- "p99": 154.27200496196747
- },
- "combine": {
- "p50": 104.3199971318245,
- "p90": 118.04799735546112,
- "p95": 123.99999797344208,
- "p99": 158.75199437141418
- },
- "roundtrip": {
- "p50": 193.9840018749237,
- "p90": 207.68000185489655,
- "p95": 215.61600267887115,
- "p99": 244.6720004081726
- },
- "isolatedSum": {
- "p50": 221.72799706459045,
- "p90": 250.59199333190918,
- "p95": 264.0639990568161,
- "p99": 313.02399933338165
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77385728,
- "combineLogicalBytes": 77385728,
- "fanoutMean": 5.271484375,
- "recvTokensMax": 691,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 146.68799936771393,
- "p90": 160.64000129699707,
- "p95": 169.855996966362,
- "p99": 192.06400215625763
- },
- "combine": {
- "p50": 142.91200041770935,
- "p90": 152.0320028066635,
- "p95": 157.98400342464447,
- "p99": 178.0479997396469
- },
- "roundtrip": {
- "p50": 266.1440074443817,
- "p90": 278.7199914455414,
- "p95": 285.6000065803528,
- "p99": 310.43198704719543
- },
- "isolatedSum": {
- "p50": 289.5999997854233,
- "p90": 312.6720041036606,
- "p95": 327.84000039100647,
- "p99": 370.11200189590454
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 155172864,
- "combineLogicalBytes": 155172864,
- "fanoutMean": 5.28515625,
- "recvTokensMax": 1378,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 208.12800526618958,
- "p90": 229.8559993505478,
- "p95": 237.34399676322937,
- "p99": 272.5760042667389
- },
- "combine": {
- "p50": 210.62399446964264,
- "p90": 222.75200486183167,
- "p95": 228.99200022220612,
- "p99": 251.45599246025085
- },
- "roundtrip": {
- "p50": 391.4879858493805,
- "p90": 413.05598616600037,
- "p95": 424.54400658607483,
- "p99": 474.047988653183
- },
- "isolatedSum": {
- "p50": 418.7519997358322,
- "p90": 452.60800421237946,
- "p95": 466.3359969854355,
- "p99": 524.0319967269897
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 310546432,
- "combineLogicalBytes": 310546432,
- "fanoutMean": 5.28857421875,
- "recvTokensMax": 2745,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 322.7840065956116,
- "p90": 342.78398752212524,
- "p95": 351.6800105571747,
- "p99": 378.2399892807007
- },
- "combine": {
- "p50": 330.1439881324768,
- "p90": 345.0239896774292,
- "p95": 349.8559892177582,
- "p99": 379.13599610328674
- },
- "roundtrip": {
- "p50": 626.2080073356628,
- "p90": 646.8480229377747,
- "p95": 661.1520051956177,
- "p99": 823.4559893608093
- },
- "isolatedSum": {
- "p50": 652.9279947280884,
- "p90": 687.8079771995544,
- "p95": 701.5359997749329,
- "p99": 757.3759853839874
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 620619776,
- "combineLogicalBytes": 620619776,
- "fanoutMean": 5.2845458984375,
- "recvTokensMax": 5526,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 569.7280168533325,
- "p90": 585.7920050621033,
- "p95": 596.2240099906921,
- "p99": 690.7520294189453
- },
- "combine": {
- "p50": 569.1199898719788,
- "p90": 583.1040143966675,
- "p95": 591.0400152206421,
- "p99": 609.503984451294
- },
- "roundtrip": {
- "p50": 1109.8560094833374,
- "p90": 1127.8719902038574,
- "p95": 1138.335943222046,
- "p99": 1191.648006439209
- },
- "isolatedSum": {
- "p50": 1138.8480067253113,
- "p90": 1168.8960194587708,
- "p95": 1187.2640252113342,
- "p99": 1300.2560138702393
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1239175168,
- "combineLogicalBytes": 1239175168,
- "fanoutMean": 5.2757568359375,
- "recvTokensMax": 11165,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1082.5920104980469,
- "p90": 1103.16801071167,
- "p95": 1116.927981376648,
- "p99": 1311.8400573730469
- },
- "combine": {
- "p50": 1018.3039903640747,
- "p90": 1032.4480533599854,
- "p95": 1047.5200414657593,
- "p99": 1417.472004890442
- },
- "roundtrip": {
- "p50": 2072.60799407959,
- "p90": 2096.7679023742676,
- "p95": 2112.7359867095947,
- "p99": 2388.000011444092
- },
- "isolatedSum": {
- "p50": 2100.8960008621216,
- "p90": 2135.6160640716553,
- "p95": 2164.448022842407,
- "p99": 2729.3120622634888
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2481604608,
- "combineLogicalBytes": 2481604608,
- "fanoutMean": 5.282684326171875,
- "recvTokensMax": 22165,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-5c3f9114",
- "identity": "h200|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31",
- "colorKey": "h200_edd92e38",
- "comparisonKey": "696a49bd5b0de953",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:30:13.181201+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_4",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "cached-layout-comm-only-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 (norm) [cl]",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1818,
- "configuredUnits": 24,
- "deviceUnits": 132,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28254409438",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254409438",
- "createdAt": "2026-06-26T17:30:13.181201+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 104.032002389431,
- "p90": 116.12799763679504,
- "p95": 120.83200365304947,
- "p99": 131.00799918174744
- },
- "combine": {
- "p50": 103.07200253009796,
- "p90": 115.167997777462,
- "p95": 120.95999717712402,
- "p99": 125.76000392436981
- },
- "roundtrip": {
- "p50": 182.23999440670013,
- "p90": 196.48000597953796,
- "p95": 200.095996260643,
- "p99": 249.7600018978119
- },
- "isolatedSum": {
- "p50": 207.10400491952896,
- "p90": 231.29599541425705,
- "p95": 241.7920008301735,
- "p99": 256.76800310611725
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 136.03200018405914,
- "p90": 151.96800231933594,
- "p95": 158.4639996290207,
- "p99": 170.68800330162048
- },
- "combine": {
- "p50": 142.59199798107147,
- "p90": 157.53600001335144,
- "p95": 161.18399798870087,
- "p99": 179.6800047159195
- },
- "roundtrip": {
- "p50": 252.8960108757019,
- "p90": 265.28000831604004,
- "p95": 271.232008934021,
- "p99": 293.4400141239166
- },
- "isolatedSum": {
- "p50": 278.6239981651306,
- "p90": 309.5040023326874,
- "p95": 319.64799761772156,
- "p99": 350.36800801754
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 155889664,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 194.87999379634857,
- "p90": 210.33599972724915,
- "p95": 215.87200462818146,
- "p99": 243.9039945602417
- },
- "combine": {
- "p50": 208.064004778862,
- "p90": 222.04799950122833,
- "p95": 230.14399409294128,
- "p99": 255.42399287223816
- },
- "roundtrip": {
- "p50": 378.84798645973206,
- "p90": 394.9120044708252,
- "p95": 405.5039882659912,
- "p99": 434.27199125289917
- },
- "isolatedSum": {
- "p50": 402.94399857521057,
- "p90": 432.3839992284775,
- "p95": 446.01599872112274,
- "p99": 499.32798743247986
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 312266752,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 312.99200654029846,
- "p90": 334.1119885444641,
- "p95": 342.9119884967804,
- "p99": 389.15199041366577
- },
- "combine": {
- "p50": 326.1120021343231,
- "p90": 339.35999870300293,
- "p95": 347.3280072212219,
- "p99": 393.0560052394867
- },
- "roundtrip": {
- "p50": 614.0159964561462,
- "p90": 628.4800171852112,
- "p95": 635.7759833335876,
- "p99": 708.4479928016663
- },
- "isolatedSum": {
- "p50": 639.1040086746216,
- "p90": 673.471987247467,
- "p95": 690.2399957180023,
- "p99": 782.2079956531525
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 623443968,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 549.3760108947754,
- "p90": 563.264012336731,
- "p95": 569.2480206489563,
- "p99": 593.1519865989685
- },
- "combine": {
- "p50": 560.8000159263611,
- "p90": 573.2799768447876,
- "p95": 579.8400044441223,
- "p99": 591.871976852417
- },
- "roundtrip": {
- "p50": 1080.9600353240967,
- "p90": 1097.5359678268433,
- "p95": 1106.0800552368164,
- "p99": 1136.512041091919
- },
- "isolatedSum": {
- "p50": 1110.1760268211365,
- "p90": 1136.5439891815186,
- "p95": 1149.0880250930786,
- "p99": 1185.0239634513855
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1243805696,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1017.7919864654541,
- "p90": 1032.1600437164307,
- "p95": 1039.6480560302734,
- "p99": 1061.1519813537598
- },
- "combine": {
- "p50": 1013.0879878997803,
- "p90": 1025.823950767517,
- "p95": 1031.775951385498,
- "p99": 1097.7599620819092
- },
- "roundtrip": {
- "p50": 2001.5358924865723,
- "p90": 2015.7439708709717,
- "p95": 2029.7598838806152,
- "p99": 2119.1039085388184
- },
- "isolatedSum": {
- "p50": 2030.8799743652344,
- "p90": 2057.9839944839478,
- "p95": 2071.4240074157715,
- "p99": 2158.911943435669
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487009280,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-e1047fdc",
- "identity": "h200|deepep|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "h200_76bb7d5d",
- "comparisonKey": "174936235ac15d2c",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:49:44.261568+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_2",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "cached-layout-comm-only-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · bf16 [cl]",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271611947",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271611947",
- "createdAt": "2026-06-26T23:49:44.261568+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 104.3199971318245,
- "p90": 121.50400131940842,
- "p95": 125.50400197505951,
- "p99": 141.76000654697418
- },
- "combine": {
- "p50": 104.032002389431,
- "p90": 119.71200257539749,
- "p95": 123.96799772977829,
- "p99": 145.4080045223236
- },
- "roundtrip": {
- "p50": 184.4799965620041,
- "p90": 197.24799692630768,
- "p95": 202.11200416088104,
- "p99": 221.91999852657318
- },
- "isolatedSum": {
- "p50": 208.3519995212555,
- "p90": 241.2160038948059,
- "p95": 249.4719997048378,
- "p99": 287.1680110692978
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 134.49600338935852,
- "p90": 149.59999918937683,
- "p95": 156.63999319076538,
- "p99": 199.0080028772354
- },
- "combine": {
- "p50": 143.71199905872345,
- "p90": 156.51200711727142,
- "p95": 161.6639941930771,
- "p99": 174.14399981498718
- },
- "roundtrip": {
- "p50": 254.88001108169556,
- "p90": 277.50399708747864,
- "p95": 284.09600257873535,
- "p99": 315.20000100135803
- },
- "isolatedSum": {
- "p50": 278.20800244808197,
- "p90": 306.11200630664825,
- "p95": 318.30398738384247,
- "p99": 373.1520026922226
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 155889664,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 192.89599359035492,
- "p90": 207.39200711250305,
- "p95": 213.53599429130554,
- "p99": 229.8240065574646
- },
- "combine": {
- "p50": 222.88000583648682,
- "p90": 239.77600038051605,
- "p95": 244.06400322914124,
- "p99": 276.16000175476074
- },
- "roundtrip": {
- "p50": 388.51198554039,
- "p90": 405.08800745010376,
- "p95": 412.6400053501129,
- "p99": 470.43201327323914
- },
- "isolatedSum": {
- "p50": 415.77599942684174,
- "p90": 447.1680074930191,
- "p95": 457.5999975204468,
- "p99": 505.98400831222534
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 312266752,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 304.32000756263733,
- "p90": 328.2560110092163,
- "p95": 334.6239924430847,
- "p99": 354.8159897327423
- },
- "combine": {
- "p50": 352.35199332237244,
- "p90": 364.1279935836792,
- "p95": 372.44799733161926,
- "p99": 391.80800318717957
- },
- "roundtrip": {
- "p50": 630.1760077476501,
- "p90": 646.7840075492859,
- "p95": 655.135989189148,
- "p99": 679.5520186424255
- },
- "isolatedSum": {
- "p50": 656.6720008850098,
- "p90": 692.3840045928955,
- "p95": 707.071989774704,
- "p99": 746.6239929199219
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 623443968,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 515.2000188827515,
- "p90": 530.0800204277039,
- "p95": 538.9119982719421,
- "p99": 611.7119789123535
- },
- "combine": {
- "p50": 611.2319827079773,
- "p90": 623.5520243644714,
- "p95": 633.2160234451294,
- "p99": 764.1919851303101
- },
- "roundtrip": {
- "p50": 1099.4880199432373,
- "p90": 1118.4959411621094,
- "p95": 1131.1999559402466,
- "p99": 1154.2079448699951
- },
- "isolatedSum": {
- "p50": 1126.4320015907288,
- "p90": 1153.6320447921753,
- "p95": 1172.1280217170715,
- "p99": 1375.9039640426636
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1243805696,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 964.959979057312,
- "p90": 992.2879934310913,
- "p95": 1000.3199577331543,
- "p99": 1034.4959497451782
- },
- "combine": {
- "p50": 1105.7920455932617,
- "p90": 1125.1840591430664,
- "p95": 1137.5679969787598,
- "p99": 1247.26402759552
- },
- "roundtrip": {
- "p50": 2036.895990371704,
- "p90": 2068.3839321136475,
- "p95": 2084.383964538574,
- "p99": 2168.4799194335938
- },
- "isolatedSum": {
- "p50": 2070.7520246505737,
- "p90": 2117.4720525741577,
- "p95": 2137.887954711914,
- "p99": 2281.7599773406982
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487009280,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-e384c8f8",
- "identity": "h200|deepep|4096|8|128|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569",
- "colorKey": "h200_9979edfc",
- "comparisonKey": "ca4b77cbfe002bae",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T11:14:27.799131+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_11",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · fp8",
- "model": "Qwen3.5",
- "shape": {
- "hidden": 4096,
- "topk": 8,
- "experts": 128,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "dc27c5e0894e569",
- "workloadId": "set:6:76d8142d69406335",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28287507619",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287507619",
- "createdAt": "2026-06-27T11:14:27.799131+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 77.2159993648529,
- "p90": 103.67999970912933,
- "p95": 112.0000034570694,
- "p99": 133.63200426101685
- },
- "combine": {
- "p50": 73.72800260782242,
- "p90": 91.71199798583984,
- "p95": 96.99200093746185,
- "p99": 107.45599865913391
- },
- "roundtrip": {
- "p50": 171.1679995059967,
- "p90": 215.87200462818146,
- "p95": 231.36000335216522,
- "p99": 281.3119888305664
- },
- "isolatedSum": {
- "p50": 150.94400197267532,
- "p90": 195.39199769496918,
- "p95": 208.99200439453125,
- "p99": 241.08800292015076
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 22282240,
- "combineLogicalBytes": 44564480,
- "fanoutMean": 5.3125,
- "recvTokensMax": 699,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 92.32000261545181,
- "p90": 114.88000303506851,
- "p95": 124.54400211572647,
- "p99": 145.6959992647171
- },
- "combine": {
- "p50": 98.78399968147278,
- "p90": 115.99999666213989,
- "p95": 121.76000326871872,
- "p99": 152.92799472808838
- },
- "roundtrip": {
- "p50": 223.29600155353546,
- "p90": 252.16001272201538,
- "p95": 263.90400528907776,
- "p99": 281.72799944877625
- },
- "isolatedSum": {
- "p50": 191.1040022969246,
- "p90": 230.8799996972084,
- "p95": 246.3040053844452,
- "p99": 298.6239939928055
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 44863488,
- "combineLogicalBytes": 89726976,
- "fanoutMean": 5.34814453125,
- "recvTokensMax": 1385,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 120.38400024175644,
- "p90": 147.77599275112152,
- "p95": 158.78400206565857,
- "p99": 194.87999379634857
- },
- "combine": {
- "p50": 148.44800531864166,
- "p90": 163.71199488639832,
- "p95": 171.6800034046173,
- "p99": 186.8479996919632
- },
- "roundtrip": {
- "p50": 343.9359962940216,
- "p90": 367.64800548553467,
- "p95": 382.9120099544525,
- "p99": 435.84001064300537
- },
- "isolatedSum": {
- "p50": 268.8320055603981,
- "p90": 311.48798763751984,
- "p95": 330.4640054702759,
- "p99": 381.72799348831177
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 89751552,
- "combineLogicalBytes": 179503104,
- "fanoutMean": 5.349609375,
- "recvTokensMax": 2772,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 174.97600615024567,
- "p90": 203.07199656963348,
- "p95": 215.71199595928192,
- "p99": 236.76800727844238
- },
- "combine": {
- "p50": 243.68000030517578,
- "p90": 257.6960027217865,
- "p95": 264.16000723838806,
- "p99": 295.26400566101074
- },
- "roundtrip": {
- "p50": 581.7599892616272,
- "p90": 607.3920130729675,
- "p95": 614.687979221344,
- "p99": 658.847987651825
- },
- "isolatedSum": {
- "p50": 418.65600645542145,
- "p90": 460.76799929142,
- "p95": 479.87200319767,
- "p99": 532.0320129394531
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 179511296,
- "combineLogicalBytes": 359022592,
- "fanoutMean": 5.349853515625,
- "recvTokensMax": 5558,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 281.1200022697449,
- "p90": 304.1599988937378,
- "p95": 312.032014131546,
- "p99": 346.3360071182251
- },
- "combine": {
- "p50": 413.12000155448914,
- "p90": 429.3760061264038,
- "p95": 438.87999653816223,
- "p99": 470.8159863948822
- },
- "roundtrip": {
- "p50": 1013.4719610214233,
- "p90": 1037.824034690857,
- "p95": 1052.0960092544556,
- "p99": 1194.1440105438232
- },
- "isolatedSum": {
- "p50": 694.240003824234,
- "p90": 733.5360050201416,
- "p95": 750.9120106697083,
- "p99": 817.1519935131073
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 358055936,
- "combineLogicalBytes": 716111872,
- "fanoutMean": 5.33544921875,
- "recvTokensMax": 10982,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 500.7359981536865,
- "p90": 529.9519896507263,
- "p95": 540.992021560669,
- "p99": 584.3200087547302
- },
- "combine": {
- "p50": 754.8159956932068,
- "p90": 771.6479897499084,
- "p95": 786.4639759063721,
- "p99": 983.8079810142517
- },
- "roundtrip": {
- "p50": 1906.6879749298096,
- "p90": 1934.656023979187,
- "p95": 1949.887990951538,
- "p99": 2083.967924118042
- },
- "isolatedSum": {
- "p50": 1255.5519938468933,
- "p90": 1301.5999794006348,
- "p95": 1327.455997467041,
- "p99": 1568.127989768982
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 716197888,
- "combineLogicalBytes": 1432395776,
- "fanoutMean": 5.336090087890625,
- "recvTokensMax": 21939,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-26de8d70",
- "identity": "h200|deepep|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569",
- "colorKey": "h200_87683f6c",
- "comparisonKey": "b7adcc489d58bf89",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:53:37.273038+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_5",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · fp8",
- "model": "Qwen3.5",
- "shape": {
- "hidden": 4096,
- "topk": 8,
- "experts": 128,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "dc27c5e0894e569",
- "workloadId": "set:6:76d8142d69406335",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271739849",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271739849",
- "createdAt": "2026-06-26T23:53:37.273038+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 233.2800030708313,
- "p90": 296.25600576400757,
- "p95": 315.45600295066833,
- "p99": 387.84000277519226
- },
- "combine": {
- "p50": 74.72000271081924,
- "p90": 92.96000003814697,
- "p95": 97.98400104045868,
- "p99": 124.86399710178375
- },
- "roundtrip": {
- "p50": 278.9759933948517,
- "p90": 337.44001388549805,
- "p95": 363.5840117931366,
- "p99": 408.9600145816803
- },
- "isolatedSum": {
- "p50": 308.00000578165054,
- "p90": 389.21600580215454,
- "p95": 413.440003991127,
- "p99": 512.703999876976
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 22282240,
- "combineLogicalBytes": 44564480,
- "fanoutMean": 5.3125,
- "recvTokensMax": 699,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 240.28800427913666,
- "p90": 292.03200340270996,
- "p95": 306.97599053382874,
- "p99": 329.5679986476898
- },
- "combine": {
- "p50": 98.30400347709656,
- "p90": 115.07199704647064,
- "p95": 119.00799721479416,
- "p99": 131.9359987974167
- },
- "roundtrip": {
- "p50": 325.408011674881,
- "p90": 376.67199969291687,
- "p95": 392.8639888763428,
- "p99": 439.520001411438
- },
- "isolatedSum": {
- "p50": 338.5920077562332,
- "p90": 407.1040004491806,
- "p95": 425.9839877486229,
- "p99": 461.5039974451065
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 44863488,
- "combineLogicalBytes": 89726976,
- "fanoutMean": 5.34814453125,
- "recvTokensMax": 1385,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 315.45600295066833,
- "p90": 357.08799958229065,
- "p95": 369.9199855327606,
- "p99": 407.039999961853
- },
- "combine": {
- "p50": 147.45600521564484,
- "p90": 164.67200219631195,
- "p95": 168.16000640392303,
- "p99": 182.52800405025482
- },
- "roundtrip": {
- "p50": 460.4479968547821,
- "p90": 508.575975894928,
- "p95": 523.360013961792,
- "p99": 576.0959982872009
- },
- "isolatedSum": {
- "p50": 462.91200816631317,
- "p90": 521.7600017786026,
- "p95": 538.0799919366837,
- "p99": 589.5680040121078
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 89751552,
- "combineLogicalBytes": 179503104,
- "fanoutMean": 5.349609375,
- "recvTokensMax": 2772,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 458.2720100879669,
- "p90": 501.5680193901062,
- "p95": 517.632007598877,
- "p99": 562.1119737625122
- },
- "combine": {
- "p50": 241.2160038948059,
- "p90": 252.06398963928223,
- "p95": 257.34400749206543,
- "p99": 279.83999252319336
- },
- "roundtrip": {
- "p50": 681.9199919700623,
- "p90": 713.4079933166504,
- "p95": 728.8320064544678,
- "p99": 805.8239817619324
- },
- "isolatedSum": {
- "p50": 699.4880139827728,
- "p90": 753.6320090293884,
- "p95": 774.9760150909424,
- "p99": 841.9519662857056
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 179511296,
- "combineLogicalBytes": 359022592,
- "fanoutMean": 5.349853515625,
- "recvTokensMax": 5558,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 734.112024307251,
- "p90": 769.8879837989807,
- "p95": 783.7439775466919,
- "p99": 899.9680280685425
- },
- "combine": {
- "p50": 410.17600893974304,
- "p90": 422.4640130996704,
- "p95": 427.64800786972046,
- "p99": 457.72799849510193
- },
- "roundtrip": {
- "p50": 1137.4399662017822,
- "p90": 1176.416039466858,
- "p95": 1203.328013420105,
- "p99": 1318.8159465789795
- },
- "isolatedSum": {
- "p50": 1144.288033246994,
- "p90": 1192.3519968986511,
- "p95": 1211.3919854164124,
- "p99": 1357.6960265636444
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 358055936,
- "combineLogicalBytes": 716111872,
- "fanoutMean": 5.33544921875,
- "recvTokensMax": 10982,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1373.792052268982,
- "p90": 1396.7679738998413,
- "p95": 1406.9440364837646,
- "p99": 1577.5359869003296
- },
- "combine": {
- "p50": 750.3679990768433,
- "p90": 762.6879811286926,
- "p95": 770.3359723091125,
- "p99": 788.0319952964783
- },
- "roundtrip": {
- "p50": 2134.335994720459,
- "p90": 2161.439895629883,
- "p95": 2178.2400608062744,
- "p99": 2561.3439083099365
- },
- "isolatedSum": {
- "p50": 2124.160051345825,
- "p90": 2159.455955028534,
- "p95": 2177.280008792877,
- "p99": 2365.567982196808
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 716197888,
- "combineLogicalBytes": 1432395776,
- "fanoutMean": 5.336090087890625,
- "recvTokensMax": 21939,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-2e0e49b4",
- "identity": "h200|deepep|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42",
- "colorKey": "h200_87683f6c",
- "comparisonKey": "dcdf4b262ed1d48f",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:54:08.323229+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_10",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · fp8",
- "model": "shape 5120/8/160",
- "shape": {
- "hidden": 5120,
- "topk": 8,
- "experts": 160,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "0c022a63bbcbf42",
- "workloadId": "set:6:28c0c09b13ff0acf",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271755854",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271755854",
- "createdAt": "2026-06-26T23:54:08.323229+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 219.84000504016876,
- "p90": 274.01599287986755,
- "p95": 289.5039916038513,
- "p99": 343.77598762512207
- },
- "combine": {
- "p50": 81.08799904584885,
- "p90": 91.90399944782257,
- "p95": 99.55199807882309,
- "p99": 105.79200088977814
- },
- "roundtrip": {
- "p50": 288.57600688934326,
- "p90": 340.2239978313446,
- "p95": 353.95199060440063,
- "p99": 388.0319893360138
- },
- "isolatedSum": {
- "p50": 300.9280040860176,
- "p90": 365.9199923276901,
- "p95": 389.0559896826744,
- "p99": 449.5679885149002
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 27837440,
- "combineLogicalBytes": 55674880,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 699,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 254.62400913238525,
- "p90": 299.74400997161865,
- "p95": 313.2160007953644,
- "p99": 335.6480002403259
- },
- "combine": {
- "p50": 112.60800063610077,
- "p90": 124.57600235939026,
- "p95": 128.31999361515045,
- "p99": 137.472003698349
- },
- "roundtrip": {
- "p50": 357.88801312446594,
- "p90": 402.78398990631104,
- "p95": 418.7839925289154,
- "p99": 468.3839976787567
- },
- "isolatedSum": {
- "p50": 367.232009768486,
- "p90": 424.3200123310089,
- "p95": 441.53599441051483,
- "p99": 473.1200039386749
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 55552000,
- "combineLogicalBytes": 111104000,
- "fanoutMean": 5.2978515625,
- "recvTokensMax": 1387,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 338.17601203918457,
- "p90": 376.48001313209534,
- "p95": 391.80800318717957,
- "p99": 431.71200156211853
- },
- "combine": {
- "p50": 170.43200135231018,
- "p90": 182.8480064868927,
- "p95": 187.77599930763245,
- "p99": 198.46400618553162
- },
- "roundtrip": {
- "p50": 509.5679759979248,
- "p90": 558.2079887390137,
- "p95": 577.6960253715515,
- "p99": 617.7600026130676
- },
- "isolatedSum": {
- "p50": 508.60801339149475,
- "p90": 559.328019618988,
- "p95": 579.584002494812,
- "p99": 630.1760077476501
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 111549440,
- "combineLogicalBytes": 223098880,
- "fanoutMean": 5.319091796875,
- "recvTokensMax": 2762,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 505.0879716873169,
- "p90": 540.7040119171143,
- "p95": 552.6720285415649,
- "p99": 595.1679944992065
- },
- "combine": {
- "p50": 273.75999093055725,
- "p90": 285.66399216651917,
- "p95": 291.4240062236786,
- "p99": 313.05599212646484
- },
- "roundtrip": {
- "p50": 780.2879810333252,
- "p90": 834.7839713096619,
- "p95": 867.3920035362244,
- "p99": 1058.9760541915894
- },
- "isolatedSum": {
- "p50": 778.8479626178741,
- "p90": 826.3680040836334,
- "p95": 844.0960347652435,
- "p99": 908.2239866256714
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 223365120,
- "combineLogicalBytes": 446730240,
- "fanoutMean": 5.325439453125,
- "recvTokensMax": 5518,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 859.1039776802063,
- "p90": 874.3680119514465,
- "p95": 884.447991847992,
- "p99": 1000.8000135421753
- },
- "combine": {
- "p50": 476.0960042476654,
- "p90": 487.5839948654175,
- "p95": 495.9680140018463,
- "p99": 551.2639880180359
- },
- "roundtrip": {
- "p50": 1315.2320384979248,
- "p90": 1342.4960374832153,
- "p95": 1364.9920225143433,
- "p99": 1437.1839761734009
- },
- "isolatedSum": {
- "p50": 1335.1999819278717,
- "p90": 1361.952006816864,
- "p95": 1380.4160058498383,
- "p99": 1552.0640015602112
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 446817280,
- "combineLogicalBytes": 893634560,
- "fanoutMean": 5.32647705078125,
- "recvTokensMax": 11032,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1618.3040142059326,
- "p90": 1638.8479471206665,
- "p95": 1650.3679752349854,
- "p99": 1797.8880405426025
- },
- "combine": {
- "p50": 871.5839982032776,
- "p90": 885.4719996452332,
- "p95": 893.7280178070068,
- "p99": 936.1280202865601
- },
- "roundtrip": {
- "p50": 2472.0640182495117,
- "p90": 2496.8960285186768,
- "p95": 2517.6639556884766,
- "p99": 2775.1998901367188
- },
- "isolatedSum": {
- "p50": 2489.88801240921,
- "p90": 2524.3199467658997,
- "p95": 2544.095993041992,
- "p99": 2734.0160608291626
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 893132800,
- "combineLogicalBytes": 1786265600,
- "fanoutMean": 5.323486328125,
- "recvTokensMax": 21895,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-cd909950",
- "identity": "h200|deepep|6144|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "h200_9979edfc",
- "comparisonKey": "eb524229a3f58a63",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T11:14:00.891802+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_9",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · fp8",
- "model": "MiniMax-M3",
- "shape": {
- "hidden": 6144,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:9f5e1e005a35e937",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28287496212",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287496212",
- "createdAt": "2026-06-27T11:14:00.891802+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 84.54400300979614,
- "p90": 112.44799941778183,
- "p95": 123.03999811410904,
- "p99": 166.81599617004395
- },
- "combine": {
- "p50": 87.99999952316284,
- "p90": 105.56799918413162,
- "p95": 112.35199868679047,
- "p99": 141.34399592876434
- },
- "roundtrip": {
- "p50": 196.16000354290009,
- "p90": 240.22400379180908,
- "p95": 254.91198897361755,
- "p99": 326.30398869514465
- },
- "isolatedSum": {
- "p50": 172.54400253295898,
- "p90": 218.01599860191345,
- "p95": 235.3919968008995,
- "p99": 308.1599920988083
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 33288192,
- "combineLogicalBytes": 66576384,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 101.75999999046326,
- "p90": 124.09599870443344,
- "p95": 130.11200726032257,
- "p99": 143.61600577831268
- },
- "combine": {
- "p50": 120.41600048542023,
- "p90": 135.13599336147308,
- "p95": 138.5280042886734,
- "p99": 143.8719928264618
- },
- "roundtrip": {
- "p50": 278.0480086803436,
- "p90": 296.9599962234497,
- "p95": 302.91199684143066,
- "p99": 346.3360071182251
- },
- "isolatedSum": {
- "p50": 222.17600047588348,
- "p90": 259.2319920659065,
- "p95": 268.640011548996,
- "p99": 287.4879986047745
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 66809856,
- "combineLogicalBytes": 133619712,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 131.6159963607788,
- "p90": 153.18399667739868,
- "p95": 161.82400286197662,
- "p99": 184.79999899864197
- },
- "combine": {
- "p50": 191.00800156593323,
- "p90": 205.1199972629547,
- "p95": 210.62399446964264,
- "p99": 231.87200725078583
- },
- "roundtrip": {
- "p50": 444.19199228286743,
- "p90": 466.5600061416626,
- "p95": 479.13599014282227,
- "p99": 664.7359728813171
- },
- "isolatedSum": {
- "p50": 322.62399792671204,
- "p90": 358.3039939403534,
- "p95": 372.44799733161926,
- "p99": 416.6720062494278
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 133828608,
- "combineLogicalBytes": 267657216,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 195.64799964427948,
- "p90": 217.21599996089935,
- "p95": 222.78399765491486,
- "p99": 280.0320088863373
- },
- "combine": {
- "p50": 306.304007768631,
- "p90": 318.39999556541443,
- "p95": 326.6240060329437,
- "p99": 356.9279909133911
- },
- "roundtrip": {
- "p50": 739.0080094337463,
- "p90": 759.2960000038147,
- "p95": 774.0479707717896,
- "p99": 813.5039806365967
- },
- "isolatedSum": {
- "p50": 501.95200741291046,
- "p90": 535.6159955263138,
- "p95": 549.4080036878586,
- "p99": 636.9599997997284
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 267190272,
- "combineLogicalBytes": 534380544,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 324.319988489151,
- "p90": 343.77598762512207,
- "p95": 355.9359908103943,
- "p99": 389.2799913883209
- },
- "combine": {
- "p50": 538.1439924240112,
- "p90": 549.2799878120422,
- "p95": 556.9919943809509,
- "p99": 602.7839779853821
- },
- "roundtrip": {
- "p50": 1345.0239896774292,
- "p90": 1376.4480352401733,
- "p95": 1404.3519496917725,
- "p99": 1568.6399936676025
- },
- "isolatedSum": {
- "p50": 862.4639809131622,
- "p90": 893.0559754371643,
- "p95": 912.9279851913452,
- "p99": 992.063969373703
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 533059584,
- "combineLogicalBytes": 1066119168,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 583.4559798240662,
- "p90": 596.5120196342468,
- "p95": 603.9360165596008,
- "p99": 710.3360295295715
- },
- "combine": {
- "p50": 978.2400131225586,
- "p90": 994.8480129241943,
- "p95": 1007.7120065689087,
- "p99": 1139.9359703063965
- },
- "roundtrip": {
- "p50": 2591.327905654907,
- "p90": 2624.3200302124023,
- "p95": 2637.3119354248047,
- "p99": 2756.351947784424
- },
- "isolatedSum": {
- "p50": 1561.6959929466248,
- "p90": 1591.3600325584412,
- "p95": 1611.6480231285095,
- "p99": 1850.271999835968
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1065861120,
- "combineLogicalBytes": 2131722240,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-92d6dac4",
- "identity": "h200|deepep|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "h200_87683f6c",
- "comparisonKey": "5878390fb0ef3ac0",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:54:33.209811+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_1",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · fp8",
- "model": "MiniMax-M3",
- "shape": {
- "hidden": 6144,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:9f5e1e005a35e937",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271771597",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271771597",
- "createdAt": "2026-06-26T23:54:33.209811+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 237.12000250816345,
- "p90": 447.00801372528076,
- "p95": 466.2080109119415,
- "p99": 509.2800259590149
- },
- "combine": {
- "p50": 89.59999680519104,
- "p90": 118.20799857378006,
- "p95": 120.38400024175644,
- "p99": 131.55199587345123
- },
- "roundtrip": {
- "p50": 299.51998591423035,
- "p90": 465.9839868545532,
- "p95": 490.01601338386536,
- "p99": 533.9199900627136
- },
- "isolatedSum": {
- "p50": 326.7199993133545,
- "p90": 565.2160122990608,
- "p95": 586.592011153698,
- "p99": 640.8320218324661
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 33288192,
- "combineLogicalBytes": 66576384,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 272.96000719070435,
- "p90": 312.19199299812317,
- "p95": 322.7840065956116,
- "p99": 376.6080141067505
- },
- "combine": {
- "p50": 121.91999703645706,
- "p90": 133.34399461746216,
- "p95": 139.1039937734604,
- "p99": 144.48000490665436
- },
- "roundtrip": {
- "p50": 388.5760009288788,
- "p90": 429.28001284599304,
- "p95": 448.5439956188202,
- "p99": 507.87198543548584
- },
- "isolatedSum": {
- "p50": 394.8800042271614,
- "p90": 445.5359876155853,
- "p95": 461.88800036907196,
- "p99": 521.0880190134048
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 66809856,
- "combineLogicalBytes": 133619712,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 375.61601400375366,
- "p90": 427.4879992008209,
- "p95": 443.77601146698,
- "p99": 500.4799962043762
- },
- "combine": {
- "p50": 192.9599940776825,
- "p90": 205.08800446987152,
- "p95": 213.47199380397797,
- "p99": 237.92000114917755
- },
- "roundtrip": {
- "p50": 553.5680055618286,
- "p90": 599.2000102996826,
- "p95": 623.583972454071,
- "p99": 716.1920070648193
- },
- "isolatedSum": {
- "p50": 568.5760080814362,
- "p90": 632.5760036706924,
- "p95": 657.248005270958,
- "p99": 738.3999973535538
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 133828608,
- "combineLogicalBytes": 267657216,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 557.6000213623047,
- "p90": 596.7360138893127,
- "p95": 607.3920130729675,
- "p99": 644.9599862098694
- },
- "combine": {
- "p50": 306.335985660553,
- "p90": 316.3520097732544,
- "p95": 320.51199674606323,
- "p99": 334.52799916267395
- },
- "roundtrip": {
- "p50": 853.1839847564697,
- "p90": 880.8959722518921,
- "p95": 895.3920006752014,
- "p99": 966.7840003967285
- },
- "isolatedSum": {
- "p50": 863.9360070228577,
- "p90": 913.0880236625671,
- "p95": 927.9040098190308,
- "p99": 979.4879853725433
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 267190272,
- "combineLogicalBytes": 534380544,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 987.8720045089722,
- "p90": 1001.9840002059937,
- "p95": 1013.2479667663574,
- "p99": 1395.5520391464233
- },
- "combine": {
- "p50": 540.9280061721802,
- "p90": 573.7280249595642,
- "p95": 584.6400260925293,
- "p99": 626.0480284690857
- },
- "roundtrip": {
- "p50": 1523.6799716949463,
- "p90": 1545.408010482788,
- "p95": 1558.1120252609253,
- "p99": 1704.2880058288574
- },
- "isolatedSum": {
- "p50": 1528.8000106811523,
- "p90": 1575.7120251655579,
- "p95": 1597.8879928588867,
- "p99": 2021.600067615509
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 533059584,
- "combineLogicalBytes": 1066119168,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1865.3759956359863,
- "p90": 1883.2000494003296,
- "p95": 1893.02396774292,
- "p99": 1925.7279634475708
- },
- "combine": {
- "p50": 981.823980808258,
- "p90": 994.0800070762634,
- "p95": 1002.7199983596802,
- "p99": 1096.3200330734253
- },
- "roundtrip": {
- "p50": 2907.2320461273193,
- "p90": 2933.151960372925,
- "p95": 2943.104028701782,
- "p99": 3191.3599967956543
- },
- "isolatedSum": {
- "p50": 2847.1999764442444,
- "p90": 2877.280056476593,
- "p95": 2895.7439661026,
- "p99": 3022.047996520996
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1065861120,
- "combineLogicalBytes": 2131722240,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-e6cb64c3",
- "identity": "h200|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "h200_9979edfc",
- "comparisonKey": "73a640c71287a1ce",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T10:26:33.521456+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_5",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · fp8",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "2.0.0+af9a040",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28286433802",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286433802",
- "createdAt": "2026-06-27T10:26:33.521456+00:00",
- "sha": "91c7acf59a5e524f37742922ec67721d86a03f6b"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 86.17600053548813,
- "p90": 102.20800340175629,
- "p95": 112.15999722480774,
- "p99": 126.68800354003906
- },
- "combine": {
- "p50": 96.44799679517746,
- "p90": 110.97600311040878,
- "p95": 116.83200299739838,
- "p99": 120.44800072908401
- },
- "roundtrip": {
- "p50": 209.98400449752808,
- "p90": 236.95999383926392,
- "p95": 250.40000677108765,
- "p99": 302.11201310157776
- },
- "isolatedSum": {
- "p50": 182.6239973306656,
- "p90": 213.18400651216507,
- "p95": 228.99200022220612,
- "p99": 247.13600426912308
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 103.39199751615524,
- "p90": 121.05599790811539,
- "p95": 127.96799838542938,
- "p99": 135.6479972600937
- },
- "combine": {
- "p50": 137.79200613498688,
- "p90": 151.07199549674988,
- "p95": 155.13600409030914,
- "p99": 164.89599645137787
- },
- "roundtrip": {
- "p50": 314.2400085926056,
- "p90": 329.50401306152344,
- "p95": 339.26400542259216,
- "p99": 374.36801195144653
- },
- "isolatedSum": {
- "p50": 241.18400365114212,
- "p90": 272.12799340486526,
- "p95": 283.1040024757385,
- "p99": 300.54399371147156
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77944832,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 135.96799969673157,
- "p90": 154.91199493408203,
- "p95": 162.75200247764587,
- "p99": 174.5920032262802
- },
- "combine": {
- "p50": 218.62399578094482,
- "p90": 232.80000686645508,
- "p95": 239.99999463558197,
- "p99": 370.59199810028076
- },
- "roundtrip": {
- "p50": 495.2639937400818,
- "p90": 509.2160105705261,
- "p95": 516.9280171394348,
- "p99": 547.6800203323364
- },
- "isolatedSum": {
- "p50": 354.5919954776764,
- "p90": 387.7120018005371,
- "p95": 402.75199711322784,
- "p99": 545.184001326561
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 156133376,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 203.07199656963348,
- "p90": 224.95999932289124,
- "p95": 230.04800081253052,
- "p99": 242.5920069217682
- },
- "combine": {
- "p50": 351.967990398407,
- "p90": 361.5039885044098,
- "p95": 367.2640025615692,
- "p99": 383.2319974899292
- },
- "roundtrip": {
- "p50": 836.3519906997681,
- "p90": 849.6959805488586,
- "p95": 854.1439771652222,
- "p99": 861.3759875297546
- },
- "isolatedSum": {
- "p50": 555.0399869680405,
- "p90": 586.463987827301,
- "p95": 597.3120033740997,
- "p99": 625.8240044116974
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 311721984,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 333.2799971103668,
- "p90": 350.14399886131287,
- "p95": 359.96800661087036,
- "p99": 417.4720048904419
- },
- "combine": {
- "p50": 617.3120141029358,
- "p90": 628.0959844589233,
- "p95": 631.6159963607788,
- "p99": 644.8959708213806
- },
- "roundtrip": {
- "p50": 1508.4160566329956,
- "p90": 1521.9520330429077,
- "p95": 1531.7440032958984,
- "p99": 1626.688003540039
- },
- "isolatedSum": {
- "p50": 950.5920112133026,
- "p90": 978.2399833202362,
- "p95": 991.5840029716492,
- "p99": 1062.3679757118225
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 621902848,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 604.9280166625977,
- "p90": 614.6559715270996,
- "p95": 619.488000869751,
- "p99": 634.335994720459
- },
- "combine": {
- "p50": 1122.1439838409424,
- "p90": 1135.9360218048096,
- "p95": 1145.7600593566895,
- "p99": 1211.1680507659912
- },
- "roundtrip": {
- "p50": 2860.6081008911133,
- "p90": 2879.5840740203857,
- "p95": 2889.3120288848877,
- "p99": 3131.5200328826904
- },
- "isolatedSum": {
- "p50": 1727.07200050354,
- "p90": 1750.5919933319092,
- "p95": 1765.2480602264404,
- "p99": 1845.5040454864502
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1243504640,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-4da6f6db",
- "identity": "h200|deepep|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "h200_87683f6c",
- "comparisonKey": "90a8a7fc3b314f23",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:50:44.259181+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_3",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · fp8",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271640687",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271640687",
- "createdAt": "2026-06-26T23:50:44.259181+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 239.3600046634674,
- "p90": 286.52799129486084,
- "p95": 313.79199028015137,
- "p99": 391.2000060081482
- },
- "combine": {
- "p50": 97.21600264310837,
- "p90": 110.59200018644333,
- "p95": 116.67200177907944,
- "p99": 134.783998131752
- },
- "roundtrip": {
- "p50": 309.9519908428192,
- "p90": 360.48001050949097,
- "p95": 381.5680146217346,
- "p99": 466.94400906562805
- },
- "isolatedSum": {
- "p50": 336.5760073065758,
- "p90": 397.11999148130417,
- "p95": 430.4639920592308,
- "p99": 525.9840041399002
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 291.0720109939575,
- "p90": 340.5759930610657,
- "p95": 355.19999265670776,
- "p99": 430.30399084091187
- },
- "combine": {
- "p50": 137.7599984407425,
- "p90": 154.30399775505066,
- "p95": 160.41600704193115,
- "p99": 182.3360025882721
- },
- "roundtrip": {
- "p50": 415.8079922199249,
- "p90": 464.0960097312927,
- "p95": 484.5759868621826,
- "p99": 556.8320155143738
- },
- "isolatedSum": {
- "p50": 428.8320094347,
- "p90": 494.87999081611633,
- "p95": 515.6159996986389,
- "p99": 612.639993429184
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77944832,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 408.28800201416016,
- "p90": 486.4000082015991,
- "p95": 495.7759976387024,
- "p99": 554.3680191040039
- },
- "combine": {
- "p50": 219.10400688648224,
- "p90": 233.37599635124207,
- "p95": 239.48800563812256,
- "p99": 266.07999205589294
- },
- "roundtrip": {
- "p50": 607.4560284614563,
- "p90": 650.2400040626526,
- "p95": 670.5920100212097,
- "p99": 729.3760180473328
- },
- "isolatedSum": {
- "p50": 627.3920089006424,
- "p90": 719.7760045528412,
- "p95": 735.264003276825,
- "p99": 820.4480111598969
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 156133376,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 621.9840049743652,
- "p90": 667.8720116615295,
- "p95": 696.0639953613281,
- "p99": 765.0880217552185
- },
- "combine": {
- "p50": 346.8480110168457,
- "p90": 362.08000779151917,
- "p95": 368.47999691963196,
- "p99": 384.89601016044617
- },
- "roundtrip": {
- "p50": 955.2639722824097,
- "p90": 1010.1120471954346,
- "p95": 1039.4879579544067,
- "p99": 1108.6399555206299
- },
- "isolatedSum": {
- "p50": 968.8320159912109,
- "p90": 1029.9520194530487,
- "p95": 1064.54399228096,
- "p99": 1149.9840319156647
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 311721984,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 1107.7439785003662,
- "p90": 1126.9439458847046,
- "p95": 1137.887954711914,
- "p99": 1176.8319606781006
- },
- "combine": {
- "p50": 609.9200248718262,
- "p90": 624.4159936904907,
- "p95": 631.8399906158447,
- "p99": 652.1919965744019
- },
- "roundtrip": {
- "p50": 1692.2240257263184,
- "p90": 1713.1520509719849,
- "p95": 1732.5439453125,
- "p99": 1810.7199668884277
- },
- "isolatedSum": {
- "p50": 1717.6640033721924,
- "p90": 1751.3599395751953,
- "p95": 1769.7279453277588,
- "p99": 1829.0239572525024
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 621902848,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 2100.4478931427,
- "p90": 2129.312038421631,
- "p95": 2148.47993850708,
- "p99": 2358.464002609253
- },
- "combine": {
- "p50": 1102.6560068130493,
- "p90": 1120.0640201568604,
- "p95": 1132.8959465026855,
- "p99": 1158.560037612915
- },
- "roundtrip": {
- "p50": 3193.376064300537,
- "p90": 3219.615936279297,
- "p95": 3229.9840450286865,
- "p99": 3288.5758876800537
- },
- "isolatedSum": {
- "p50": 3203.1038999557495,
- "p90": 3249.376058578491,
- "p95": 3281.3758850097656,
- "p99": 3517.024040222168
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1243504640,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-15326a90",
- "identity": "h200|deepep|7168|8|384|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf",
- "colorKey": "h200_9979edfc",
- "comparisonKey": "0bd4a1be28b155b0",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T11:14:15.177243+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_4",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · fp8",
- "model": "Kimi-K2",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 384,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "cd50548525dafdf",
- "workloadId": "set:6:b23bc0c4b6402c69",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28287502149",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287502149",
- "createdAt": "2026-06-27T11:14:15.177243+00:00",
- "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 86.40000224113464,
- "p90": 109.18399691581726,
- "p95": 114.3999993801117,
- "p99": 152.0960032939911
- },
- "combine": {
- "p50": 96.99200093746185,
- "p90": 110.55999994277954,
- "p95": 116.83200299739838,
- "p99": 123.64800274372101
- },
- "roundtrip": {
- "p50": 211.42399311065674,
- "p90": 238.11200261116028,
- "p95": 247.8400021791458,
- "p99": 270.81599831581116
- },
- "isolatedSum": {
- "p50": 183.3920031785965,
- "p90": 219.7439968585968,
- "p95": 231.23200237751007,
- "p99": 275.7440060377121
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38757376,
- "combineLogicalBytes": 77514752,
- "fanoutMean": 5.2802734375,
- "recvTokensMax": 707,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 104.38399761915207,
- "p90": 124.89599734544754,
- "p95": 131.71200454235077,
- "p99": 141.66399836540222
- },
- "combine": {
- "p50": 137.05599308013916,
- "p90": 149.82399344444275,
- "p95": 154.14400398731232,
- "p99": 171.87200486660004
- },
- "roundtrip": {
- "p50": 308.8639974594116,
- "p90": 326.7520070075989,
- "p95": 331.2320113182068,
- "p99": 342.52798557281494
- },
- "isolatedSum": {
- "p50": 241.43999069929123,
- "p90": 274.7199907898903,
- "p95": 285.8560085296631,
- "p99": 313.53600323200226
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77285376,
- "combineLogicalBytes": 154570752,
- "fanoutMean": 5.2646484375,
- "recvTokensMax": 1391,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 141.12000167369843,
- "p90": 168.47999393939972,
- "p95": 177.47199535369873,
- "p99": 233.43999683856964
- },
- "combine": {
- "p50": 215.83999693393707,
- "p90": 233.60000550746918,
- "p95": 237.7600073814392,
- "p99": 313.08799982070923
- },
- "roundtrip": {
- "p50": 488.5759949684143,
- "p90": 503.32802534103394,
- "p95": 508.67199897766113,
- "p99": 524.0640044212341
- },
- "isolatedSum": {
- "p50": 356.9599986076355,
- "p90": 402.0799994468689,
- "p95": 415.23200273513794,
- "p99": 546.5279966592789
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 154886144,
- "combineLogicalBytes": 309772288,
- "fanoutMean": 5.275390625,
- "recvTokensMax": 2754,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 205.79199492931366,
- "p90": 228.89600694179535,
- "p95": 234.46400463581085,
- "p99": 248.89600276947021
- },
- "combine": {
- "p50": 347.3599851131439,
- "p90": 359.0080142021179,
- "p95": 364.73599076271057,
- "p99": 389.3119990825653
- },
- "roundtrip": {
- "p50": 830.016016960144,
- "p90": 851.2319922447205,
- "p95": 861.8239760398865,
- "p99": 894.0479755401611
- },
- "isolatedSum": {
- "p50": 553.1519800424576,
- "p90": 587.9040211439133,
- "p95": 599.1999953985214,
- "p99": 638.2080018520355
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 309750784,
- "combineLogicalBytes": 619501568,
- "fanoutMean": 5.2750244140625,
- "recvTokensMax": 5469,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 340.1600122451782,
- "p90": 360.1279854774475,
- "p95": 373.7280070781708,
- "p99": 421.4720129966736
- },
- "combine": {
- "p50": 600.1920104026794,
- "p90": 613.1839752197266,
- "p95": 621.2480068206787,
- "p99": 657.696008682251
- },
- "roundtrip": {
- "p50": 1490.880012512207,
- "p90": 1514.016032218933,
- "p95": 1529.2479991912842,
- "p99": 1652.6720523834229
- },
- "isolatedSum": {
- "p50": 940.3520226478577,
- "p90": 973.3119606971741,
- "p95": 994.9760138988495,
- "p99": 1079.1680216789246
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 619687936,
- "combineLogicalBytes": 1239375872,
- "fanoutMean": 5.276611328125,
- "recvTokensMax": 10883,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 612.2879981994629,
- "p90": 627.1359920501709,
- "p95": 634.656012058258,
- "p99": 680.351972579956
- },
- "combine": {
- "p50": 1088.5440111160278,
- "p90": 1107.0400476455688,
- "p95": 1131.872057914734,
- "p99": 1238.976001739502
- },
- "roundtrip": {
- "p50": 2821.4080333709717,
- "p90": 2847.007989883423,
- "p95": 2862.6561164855957,
- "p99": 3033.9200496673584
- },
- "isolatedSum": {
- "p50": 1700.8320093154907,
- "p90": 1734.1760396957397,
- "p95": 1766.528069972992,
- "p99": 1919.327974319458
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1239834624,
- "combineLogicalBytes": 2479669248,
- "fanoutMean": 5.278564453125,
- "recvTokensMax": 21730,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-d2673258",
- "identity": "h200|deepep|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf",
- "colorKey": "h200_87683f6c",
- "comparisonKey": "ae4528707b5ffd7f",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:53:16.316846+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_3",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "runtime-visible-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · fp8",
- "model": "Kimi-K2",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 384,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "cd50548525dafdf",
- "workloadId": "set:6:b23bc0c4b6402c69",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271725115",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271725115",
- "createdAt": "2026-06-26T23:53:16.316846+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 221.27999365329742,
- "p90": 242.20800399780273,
- "p95": 255.3279995918274,
- "p99": 294.94398832321167
- },
- "combine": {
- "p50": 96.67199850082397,
- "p90": 103.20000350475311,
- "p95": 107.32799768447876,
- "p99": 117.85600334405899
- },
- "roundtrip": {
- "p50": 306.8479895591736,
- "p90": 331.07200264930725,
- "p95": 352.31998562812805,
- "p99": 409.05600786209106
- },
- "isolatedSum": {
- "p50": 317.9519921541214,
- "p90": 345.40800750255585,
- "p95": 362.65599727630615,
- "p99": 412.79999166727066
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38757376,
- "combineLogicalBytes": 77514752,
- "fanoutMean": 5.2802734375,
- "recvTokensMax": 707,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 282.04798698425293,
- "p90": 307.3279857635498,
- "p95": 327.2320032119751,
- "p99": 442.68798828125
- },
- "combine": {
- "p50": 138.87999951839447,
- "p90": 145.05599439144135,
- "p95": 152.73599326610565,
- "p99": 170.01600563526154
- },
- "roundtrip": {
- "p50": 410.46398878097534,
- "p90": 435.39199233055115,
- "p95": 465.6960070133209,
- "p99": 525.2479910850525
- },
- "isolatedSum": {
- "p50": 420.9279865026474,
- "p90": 452.38398015499115,
- "p95": 479.96799647808075,
- "p99": 612.7039939165115
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77285376,
- "combineLogicalBytes": 154570752,
- "fanoutMean": 5.2646484375,
- "recvTokensMax": 1391,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 390.9760117530823,
- "p90": 407.8719913959503,
- "p95": 414.3039882183075,
- "p99": 448.2240080833435
- },
- "combine": {
- "p50": 212.3199999332428,
- "p90": 220.2560007572174,
- "p95": 229.08799350261688,
- "p99": 299.71200227737427
- },
- "roundtrip": {
- "p50": 589.3120169639587,
- "p90": 609.9839806556702,
- "p95": 625.5040168762207,
- "p99": 686.6880059242249
- },
- "isolatedSum": {
- "p50": 603.2960116863251,
- "p90": 628.1279921531677,
- "p95": 643.3919817209244,
- "p99": 747.9360103607178
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 154886144,
- "combineLogicalBytes": 309772288,
- "fanoutMean": 5.275390625,
- "recvTokensMax": 2754,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 601.7919778823853,
- "p90": 624.064028263092,
- "p95": 640.0960087776184,
- "p99": 705.2800059318542
- },
- "combine": {
- "p50": 343.29599142074585,
- "p90": 351.39200091362,
- "p95": 357.02401399612427,
- "p99": 386.01601123809814
- },
- "roundtrip": {
- "p50": 930.400013923645,
- "p90": 953.1520009040833,
- "p95": 967.1040177345276,
- "p99": 1069.5680379867554
- },
- "isolatedSum": {
- "p50": 945.0879693031311,
- "p90": 975.456029176712,
- "p95": 997.1200227737427,
- "p99": 1091.2960171699524
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 309750784,
- "combineLogicalBytes": 619501568,
- "fanoutMean": 5.2750244140625,
- "recvTokensMax": 5469,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 1100.0959873199463,
- "p90": 1113.9520406723022,
- "p95": 1130.784034729004,
- "p99": 1221.2159633636475
- },
- "combine": {
- "p50": 596.3199734687805,
- "p90": 606.9440245628357,
- "p95": 612.6400232315063,
- "p99": 648.5120058059692
- },
- "roundtrip": {
- "p50": 1675.5199432373047,
- "p90": 1687.999963760376,
- "p95": 1695.3599452972412,
- "p99": 2014.2719745635986
- },
- "isolatedSum": {
- "p50": 1696.4159607887268,
- "p90": 1720.896065235138,
- "p95": 1743.4240579605103,
- "p99": 1869.7279691696167
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 619687936,
- "combineLogicalBytes": 1239375872,
- "fanoutMean": 5.276611328125,
- "recvTokensMax": 10883,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 2087.3920917510986,
- "p90": 2099.519968032837,
- "p95": 2110.6879711151123,
- "p99": 2213.7598991394043
- },
- "combine": {
- "p50": 1087.4559879302979,
- "p90": 1099.4240045547485,
- "p95": 1103.5200357437134,
- "p99": 1151.8080234527588
- },
- "roundtrip": {
- "p50": 3166.016101837158,
- "p90": 3187.0079040527344,
- "p95": 3196.5761184692383,
- "p99": 3422.0480918884277
- },
- "isolatedSum": {
- "p50": 3174.8480796813965,
- "p90": 3198.9439725875854,
- "p95": 3214.2080068588257,
- "p99": 3365.567922592163
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1239834624,
- "combineLogicalBytes": 2479669248,
- "fanoutMean": 5.278564453125,
- "recvTokensMax": 21730,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-5a82a4d9",
- "identity": "h200|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31",
- "colorKey": "h200_3a17d46b",
- "comparisonKey": "680e15fb3428bab0",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:30:05.917629+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_10",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · fp8 (norm)",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1818,
- "configuredUnits": 24,
- "deviceUnits": 132,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28254401482",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254401482",
- "createdAt": "2026-06-26T17:30:05.917629+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 86.81599795818329,
- "p90": 108.2879975438118,
- "p95": 115.26399850845337,
- "p99": 141.79199934005737
- },
- "combine": {
- "p50": 96.38399630784988,
- "p90": 114.68800157308578,
- "p95": 119.55200135707855,
- "p99": 138.72000575065613
- },
- "roundtrip": {
- "p50": 210.59200167655945,
- "p90": 242.94400215148926,
- "p95": 254.17599081993103,
- "p99": 313.27998638153076
- },
- "isolatedSum": {
- "p50": 183.19999426603317,
- "p90": 222.97599911689758,
- "p95": 234.81599986553192,
- "p99": 280.5120050907135
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 103.2319962978363,
- "p90": 128.28800082206726,
- "p95": 134.8160058259964,
- "p99": 155.07200360298157
- },
- "combine": {
- "p50": 133.66399705410004,
- "p90": 149.79200065135956,
- "p95": 157.21599757671356,
- "p99": 173.37599396705627
- },
- "roundtrip": {
- "p50": 304.22401428222656,
- "p90": 332.41599798202515,
- "p95": 337.92001008987427,
- "p99": 353.2800078392029
- },
- "isolatedSum": {
- "p50": 236.89599335193634,
- "p90": 278.0800014734268,
- "p95": 292.03200340270996,
- "p99": 328.44799757003784
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77944832,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 135.77599823474884,
- "p90": 162.30399906635284,
- "p95": 169.95200514793396,
- "p99": 237.98400163650513
- },
- "combine": {
- "p50": 203.2960057258606,
- "p90": 220.41599452495575,
- "p95": 226.55999660491943,
- "p99": 257.31199979782104
- },
- "roundtrip": {
- "p50": 476.9600033760071,
- "p90": 496.63999676704407,
- "p95": 511.55197620391846,
- "p99": 544.7999835014343
- },
- "isolatedSum": {
- "p50": 339.07200396060944,
- "p90": 382.7199935913086,
- "p95": 396.5120017528534,
- "p99": 495.2960014343262
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 156133376,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 196.57599925994873,
- "p90": 218.87999773025513,
- "p95": 225.3119945526123,
- "p99": 253.7280023097992
- },
- "combine": {
- "p50": 320.607990026474,
- "p90": 335.2319896221161,
- "p95": 344.4800078868866,
- "p99": 365.9519851207733
- },
- "roundtrip": {
- "p50": 794.7199940681458,
- "p90": 817.6959753036499,
- "p95": 837.0879888534546,
- "p99": 910.5280041694641
- },
- "isolatedSum": {
- "p50": 517.1839892864227,
- "p90": 554.1119873523712,
- "p95": 569.7920024394989,
- "p99": 619.6799874305725
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 311721984,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 320.16000151634216,
- "p90": 343.55199337005615,
- "p95": 363.45601081848145,
- "p99": 439.9999976158142
- },
- "combine": {
- "p50": 554.8160076141357,
- "p90": 569.7919726371765,
- "p95": 577.6000022888184,
- "p99": 639.3280029296875
- },
- "roundtrip": {
- "p50": 1425.7279634475708,
- "p90": 1448.3519792556763,
- "p95": 1468.4480428695679,
- "p99": 1752.8959512710571
- },
- "isolatedSum": {
- "p50": 874.9760091304779,
- "p90": 913.3439660072327,
- "p95": 941.0560131072998,
- "p99": 1079.3280005455017
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 621902848,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 572.4160075187683,
- "p90": 584.447979927063,
- "p95": 591.6479825973511,
- "p99": 629.6640038490295
- },
- "combine": {
- "p50": 1012.6080513000488,
- "p90": 1025.696039199829,
- "p95": 1030.2400588989258,
- "p99": 1060.1279735565186
- },
- "roundtrip": {
- "p50": 2698.7199783325195,
- "p90": 2725.055932998657,
- "p95": 2745.215892791748,
- "p99": 2952.064037322998
- },
- "isolatedSum": {
- "p50": 1585.0240588188171,
- "p90": 1610.144019126892,
- "p95": 1621.8880414962769,
- "p99": 1689.791977405548
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1243504640,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-da3555d5",
- "identity": "h200|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31",
- "colorKey": "h200_50a9ee63",
- "comparisonKey": "ee1a607167629f55",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T17:30:23.809590+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_13",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "cached-layout-comm-only-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · fp8 (norm) [cl]",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.1818,
- "configuredUnits": 24,
- "deviceUnits": 132,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28254418007",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254418007",
- "createdAt": "2026-06-26T17:30:23.809590+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 73.69600236415863,
- "p90": 84.63999629020691,
- "p95": 90.08000046014786,
- "p99": 106.6880002617836
- },
- "combine": {
- "p50": 95.20000219345093,
- "p90": 106.97600245475769,
- "p95": 112.28799819946289,
- "p99": 135.77599823474884
- },
- "roundtrip": {
- "p50": 196.70400023460388,
- "p90": 213.79199624061584,
- "p95": 224.16000068187714,
- "p99": 281.0240089893341
- },
- "isolatedSum": {
- "p50": 168.89600455760956,
- "p90": 191.6159987449646,
- "p95": 202.36799865961075,
- "p99": 242.46399849653244
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 91.71199798583984,
- "p90": 108.0000028014183,
- "p95": 111.87200248241425,
- "p99": 124.57600235939026
- },
- "combine": {
- "p50": 132.7359974384308,
- "p90": 146.2399959564209,
- "p95": 151.8400013446808,
- "p99": 165.56799411773682
- },
- "roundtrip": {
- "p50": 291.456013917923,
- "p90": 308.57598781585693,
- "p95": 313.34400177001953,
- "p99": 330.78399300575256
- },
- "isolatedSum": {
- "p50": 224.44799542427063,
- "p90": 254.2399987578392,
- "p95": 263.71200382709503,
- "p99": 290.1439964771271
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77944832,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 125.50400197505951,
- "p90": 144.3520039319992,
- "p95": 149.85600113868713,
- "p99": 213.6639952659607
- },
- "combine": {
- "p50": 203.10400426387787,
- "p90": 215.64799547195435,
- "p95": 220.47999501228333,
- "p99": 236.92800104618073
- },
- "roundtrip": {
- "p50": 464.7040069103241,
- "p90": 485.5999946594238,
- "p95": 495.64799666404724,
- "p99": 524.3520140647888
- },
- "isolatedSum": {
- "p50": 328.6080062389374,
- "p90": 359.99999940395355,
- "p95": 370.33599615097046,
- "p99": 450.5919963121414
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 156133376,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 184.1599941253662,
- "p90": 198.94400238990784,
- "p95": 204.352006316185,
- "p99": 232.12799429893494
- },
- "combine": {
- "p50": 318.39999556541443,
- "p90": 328.96000146865845,
- "p95": 333.15199613571167,
- "p99": 352.7359962463379
- },
- "roundtrip": {
- "p50": 782.4640274047852,
- "p90": 796.064019203186,
- "p95": 802.4960160255432,
- "p99": 826.4960050582886
- },
- "isolatedSum": {
- "p50": 502.55998969078064,
- "p90": 527.9040038585663,
- "p95": 537.5040024518967,
- "p99": 584.8639905452728
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 311721984,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 304.3519854545593,
- "p90": 320.8320140838623,
- "p95": 336.2559974193573,
- "p99": 371.42398953437805
- },
- "combine": {
- "p50": 550.4000186920166,
- "p90": 560.2880120277405,
- "p95": 567.7760243415833,
- "p99": 656.8959951400757
- },
- "roundtrip": {
- "p50": 1410.4959964752197,
- "p90": 1427.456021308899,
- "p95": 1436.4160299301147,
- "p99": 1585.2479934692383
- },
- "isolatedSum": {
- "p50": 854.7520041465759,
- "p90": 881.1200261116028,
- "p95": 904.0320217609406,
- "p99": 1028.3199846744537
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 621902848,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 542.8479909896851,
- "p90": 557.5680136680603,
- "p95": 565.5360221862793,
- "p99": 587.7760052680969
- },
- "combine": {
- "p50": 1013.5680437088013,
- "p90": 1026.4320373535156,
- "p95": 1031.999945640564,
- "p99": 1048.192024230957
- },
- "roundtrip": {
- "p50": 2668.4160232543945,
- "p90": 2694.3039894104004,
- "p95": 2716.320037841797,
- "p99": 3019.615888595581
- },
- "isolatedSum": {
- "p50": 1556.4160346984863,
- "p90": 1584.000051021576,
- "p95": 1597.5359678268433,
- "p99": 1635.968029499054
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1243504640,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-4a1bc537",
- "identity": "h200|deepep|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "h200_4f483b60",
- "comparisonKey": "ac62097ce902c24f",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:50:33.490755+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_1",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "cached-layout-comm-only-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · deepep · fp8 [cl]",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "fp8",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "1.2.1",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271633476",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271633476",
- "createdAt": "2026-06-26T23:50:33.490755+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 75.71200281381607,
- "p90": 95.29600292444229,
- "p95": 102.11200267076492,
- "p99": 128.83199751377106
- },
- "combine": {
- "p50": 97.31200337409973,
- "p90": 115.93600362539291,
- "p95": 120.80000340938568,
- "p99": 140.44800400733948
- },
- "roundtrip": {
- "p50": 200.8959949016571,
- "p90": 248.28800559043884,
- "p95": 261.24799251556396,
- "p99": 302.5600016117096
- },
- "isolatedSum": {
- "p50": 173.0240061879158,
- "p90": 211.2320065498352,
- "p95": 222.9120060801506,
- "p99": 269.28000152111053
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38836224,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 91.61599725484848,
- "p90": 110.33599823713303,
- "p95": 116.35199934244156,
- "p99": 134.17600095272064
- },
- "combine": {
- "p50": 136.76799833774567,
- "p90": 151.5199989080429,
- "p95": 159.04000401496887,
- "p99": 170.6240028142929
- },
- "roundtrip": {
- "p50": 299.45600032806396,
- "p90": 324.38400387763977,
- "p95": 331.07200264930725,
- "p99": 365.7279908657074
- },
- "isolatedSum": {
- "p50": 228.38399559259415,
- "p90": 261.85599714517593,
- "p95": 275.39200335741043,
- "p99": 304.80000376701355
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77944832,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 127.83999741077423,
- "p90": 142.94399321079254,
- "p95": 150.4960060119629,
- "p99": 162.7199947834015
- },
- "combine": {
- "p50": 214.62400257587433,
- "p90": 226.78400576114655,
- "p95": 231.51999711990356,
- "p99": 242.14400351047516
- },
- "roundtrip": {
- "p50": 483.5200011730194,
- "p90": 497.2800016403198,
- "p95": 504.5120120048523,
- "p99": 540.831983089447
- },
- "isolatedSum": {
- "p50": 342.46399998664856,
- "p90": 369.7279989719391,
- "p95": 382.01600313186646,
- "p99": 404.86399829387665
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 156133376,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 194.75199282169342,
- "p90": 214.88000452518463,
- "p95": 220.2879935503006,
- "p99": 243.74400079250336
- },
- "combine": {
- "p50": 346.3360071182251,
- "p90": 362.8160059452057,
- "p95": 374.4960129261017,
- "p99": 426.56001448631287
- },
- "roundtrip": {
- "p50": 824.5440125465393,
- "p90": 852.5760173797607,
- "p95": 862.2400164604187,
- "p99": 896.6720104217529
- },
- "isolatedSum": {
- "p50": 541.0879999399185,
- "p90": 577.6960104703903,
- "p95": 594.7840064764023,
- "p99": 670.3040152788162
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 311721984,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 325.0879943370819,
- "p90": 342.52798557281494,
- "p95": 348.9919900894165,
- "p99": 374.9440014362335
- },
- "combine": {
- "p50": 603.8720011711121,
- "p90": 613.6959791183472,
- "p95": 618.1120276451111,
- "p99": 640.3520107269287
- },
- "roundtrip": {
- "p50": 1486.36794090271,
- "p90": 1510.7519626617432,
- "p95": 1524.1600275039673,
- "p99": 1566.3679838180542
- },
- "isolatedSum": {
- "p50": 928.959995508194,
- "p90": 956.2239646911621,
- "p95": 967.1040177345276,
- "p99": 1015.2960121631622
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 621902848,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 586.624026298523,
- "p90": 618.9759969711304,
- "p95": 627.6800036430359,
- "p99": 654.7200083732605
- },
- "combine": {
- "p50": 1108.8639497756958,
- "p90": 1126.1119842529297,
- "p95": 1134.2079639434814,
- "p99": 1169.376015663147
- },
- "roundtrip": {
- "p50": 2817.1839714050293,
- "p90": 2849.3120670318604,
- "p95": 2871.0079193115234,
- "p99": 3254.4960975646973
- },
- "isolatedSum": {
- "p50": 1695.4879760742188,
- "p90": 1745.08798122406,
- "p95": 1761.8879675865173,
- "p99": 1824.0960240364075
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1243504640,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-8ae4b608",
- "identity": "h200|nccl-ep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|16|decode|normal|none|none|0|normalized|0.18|22edb632bb1b9d9",
- "colorKey": "h200_45246fb2",
- "comparisonKey": "bd3ee598fb548c4d",
- "schemaVersion": 3,
- "generatedAt": "2026-06-28T15:33:05.143900+00:00",
- "status": "valid",
- "publicationStatus": "comparable-experimental",
- "runner": "h200-dgxc-slurm_6",
- "sku": "h200",
- "backend": "nccl-ep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-multinode-ib",
- "transport": "rdma",
- "worldSize": 16,
- "epSize": 16,
- "label": "H200 EP16 · nccl-ep · bf16 (norm)",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 132,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 2,
- "gpusPerNode": 16,
- "scaleUpDomain": 16
- },
- "routingConsistent": true,
- "traceSignature": "22edb632bb1b9d9",
- "workloadId": null,
- "workloadSource": "seeded-runtime",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": null,
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28327088942",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28327088942",
- "createdAt": "2026-06-28T15:33:05.143900+00:00",
- "sha": "127785d43b1ea119c05a2b798bf0be56e5c9baa7"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 16,
- "dispatch": {
- "p50": 578.4000158309937,
- "p90": 2543.3599948883057,
- "p95": 2675.1999855041504,
- "p99": 2675.1999855041504
- },
- "combine": {
- "p50": 233.43999683856964,
- "p90": 532.7680110931396,
- "p95": 914.2079949378967,
- "p99": 914.2079949378967
- },
- "roundtrip": {
- "p50": 794.975996017456,
- "p90": 861.2800240516663,
- "p95": 1168.6400175094604,
- "p99": 1168.6400175094604
- },
- "isolatedSum": {
- "p50": 811.8400126695633,
- "p90": 3076.1280059814453,
- "p95": 3589.407980442047,
- "p99": 3589.407980442047
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1505280,
- "combineLogicalBytes": 1505280,
- "fanoutMean": 6.5625,
- "recvTokensMax": 12,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 8,
- "trials": 1
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 32,
- "dispatch": {
- "p50": 547.2319722175598,
- "p90": 880.2559971809387,
- "p95": 977.3759841918945,
- "p99": 977.3759841918945
- },
- "combine": {
- "p50": 212.25599944591522,
- "p90": 238.3359968662262,
- "p95": 239.32799696922302,
- "p99": 239.32799696922302
- },
- "roundtrip": {
- "p50": 960.6080055236816,
- "p90": 2553.6320209503174,
- "p95": 2696.3839530944824,
- "p99": 2696.3839530944824
- },
- "isolatedSum": {
- "p50": 759.487971663475,
- "p90": 1118.591994047165,
- "p95": 1216.7039811611176,
- "p99": 1216.7039811611176
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3067904,
- "combineLogicalBytes": 3067904,
- "fanoutMean": 6.6875,
- "recvTokensMax": 24,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 8,
- "trials": 1
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 64,
- "dispatch": {
- "p50": 621.504008769989,
- "p90": 645.1839804649353,
- "p95": 711.0400199890137,
- "p99": 711.0400199890137
- },
- "combine": {
- "p50": 249.08800423145294,
- "p90": 263.64800333976746,
- "p95": 269.53598856925964,
- "p99": 269.53598856925964
- },
- "roundtrip": {
- "p50": 1369.53604221344,
- "p90": 1802.5599718093872,
- "p95": 1879.744052886963,
- "p99": 1879.744052886963
- },
- "isolatedSum": {
- "p50": 870.592013001442,
- "p90": 908.8319838047028,
- "p95": 980.5760085582733,
- "p99": 980.5760085582733
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 5992448,
- "combineLogicalBytes": 5992448,
- "fanoutMean": 6.53125,
- "recvTokensMax": 43,
- "stragglerRank": 10,
- "correct": true,
- "samplesPooled": 8,
- "trials": 1
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 128,
- "dispatch": {
- "p50": 611.8080019950867,
- "p90": 2058.079957962036,
- "p95": 2190.5601024627686,
- "p99": 2190.5601024627686
- },
- "combine": {
- "p50": 238.46399784088135,
- "p90": 636.1280083656311,
- "p95": 679.2960166931152,
- "p99": 679.2960166931152
- },
- "roundtrip": {
- "p50": 799.5200157165527,
- "p90": 1625.3759860992432,
- "p95": 2821.2480545043945,
- "p99": 2821.2480545043945
- },
- "isolatedSum": {
- "p50": 850.271999835968,
- "p90": 2694.2079663276672,
- "p95": 2869.856119155884,
- "p99": 2869.856119155884
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 12214272,
- "combineLogicalBytes": 12214272,
- "fanoutMean": 6.65625,
- "recvTokensMax": 84,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 8,
- "trials": 1
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 256,
- "dispatch": {
- "p50": 631.8399906158447,
- "p90": 645.6639766693115,
- "p95": 672.3840236663818,
- "p99": 672.3840236663818
- },
- "combine": {
- "p50": 256.9279968738556,
- "p90": 264.1279995441437,
- "p95": 272.41599559783936,
- "p99": 272.41599559783936
- },
- "roundtrip": {
- "p50": 827.135980129242,
- "p90": 967.136025428772,
- "p95": 1139.7440433502197,
- "p99": 1139.7440433502197
- },
- "isolatedSum": {
- "p50": 888.7679874897003,
- "p90": 909.7919762134552,
- "p95": 944.8000192642212,
- "p99": 944.8000192642212
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 24127488,
- "combineLogicalBytes": 24127488,
- "fanoutMean": 6.57421875,
- "recvTokensMax": 154,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 8,
- "trials": 1
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 512,
- "dispatch": {
- "p50": 782.8800082206726,
- "p90": 2639.967918395996,
- "p95": 2675.584077835083,
- "p99": 2675.584077835083
- },
- "combine": {
- "p50": 265.855997800827,
- "p90": 287.200003862381,
- "p95": 290.43200612068176,
- "p99": 290.43200612068176
- },
- "roundtrip": {
- "p50": 890.496015548706,
- "p90": 1573.8240480422974,
- "p95": 2191.551923751831,
- "p99": 2191.551923751831
- },
- "isolatedSum": {
- "p50": 1048.7360060214996,
- "p90": 2927.167922258377,
- "p95": 2966.0160839557648,
- "p99": 2966.0160839557648
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 48140288,
- "combineLogicalBytes": 48140288,
- "fanoutMean": 6.55859375,
- "recvTokensMax": 295,
- "stragglerRank": 15,
- "correct": true,
- "samplesPooled": 8,
- "trials": 1
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 679.3280243873596,
- "p90": 740.6079769134521,
- "p95": 822.9439854621887,
- "p99": 822.9439854621887
- },
- "combine": {
- "p50": 339.9040102958679,
- "p90": 763.9359831809998,
- "p95": 791.6160225868225,
- "p99": 791.6160225868225
- },
- "roundtrip": {
- "p50": 922.2720265388489,
- "p90": 1468.127965927124,
- "p95": 1530.8159589767456,
- "p99": 1530.8159589767456
- },
- "isolatedSum": {
- "p50": 1019.2320346832275,
- "p90": 1504.543960094452,
- "p95": 1614.5600080490112,
- "p99": 1614.5600080490112
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 96165888,
- "combineLogicalBytes": 96165888,
- "fanoutMean": 6.55078125,
- "recvTokensMax": 573,
- "stragglerRank": 14,
- "correct": true,
- "samplesPooled": 8,
- "trials": 1
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 808.1279993057251,
- "p90": 833.5999846458435,
- "p95": 1317.952036857605,
- "p99": 1317.952036857605
- },
- "combine": {
- "p50": 518.9120173454285,
- "p90": 535.0080132484436,
- "p95": 549.5679974555969,
- "p99": 549.5679974555969
- },
- "roundtrip": {
- "p50": 1294.9440479278564,
- "p90": 1688.86399269104,
- "p95": 2760.256052017212,
- "p99": 2760.256052017212
- },
- "isolatedSum": {
- "p50": 1327.0400166511536,
- "p90": 1368.607997894287,
- "p95": 1867.520034313202,
- "p99": 1867.520034313202
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 191758336,
- "combineLogicalBytes": 191758336,
- "fanoutMean": 6.53125,
- "recvTokensMax": 1126,
- "stragglerRank": 15,
- "correct": true,
- "samplesPooled": 8,
- "trials": 1
- }
- ]
- },
- {
- "id": "cx-d2620b3b",
- "identity": "h200|uccl|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "colorKey": "h200_c317e88d",
- "comparisonKey": "8bbd7f30d0bdbd11",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T17:36:22.388714+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_9",
- "sku": "h200",
- "backend": "uccl",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · uccl · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "ac583971f94b176",
- "workloadId": "set:8:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": null,
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28296668644",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28296668644",
- "createdAt": "2026-06-27T17:36:22.388714+00:00",
- "sha": "cfa1ec56258b94b4a173844810a163a832bcb07e"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 123.61600250005722,
- "p90": 165.53600132465363,
- "p95": 184.38400328159332,
- "p99": 203.13599705696106
- },
- "combine": {
- "p50": 83.93599838018417,
- "p90": 102.33599692583084,
- "p95": 113.76000195741653,
- "p99": 124.89599734544754
- },
- "roundtrip": {
- "p50": 184.32000279426575,
- "p90": 227.52000391483307,
- "p95": 243.3920055627823,
- "p99": 272.38398790359497
- },
- "isolatedSum": {
- "p50": 207.5520008802414,
- "p90": 267.87199825048447,
- "p95": 298.14400523900986,
- "p99": 328.0319944024086
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 114.84800279140472,
- "p90": 152.6080071926117,
- "p95": 160.0639969110489,
- "p99": 180.9920072555542
- },
- "combine": {
- "p50": 82.40000158548355,
- "p90": 91.80799871683121,
- "p95": 102.94400155544281,
- "p99": 110.75200140476227
- },
- "roundtrip": {
- "p50": 183.74399840831757,
- "p90": 219.7120040655136,
- "p95": 225.69599747657776,
- "p99": 255.71200251579285
- },
- "isolatedSum": {
- "p50": 197.24800437688828,
- "p90": 244.4160059094429,
- "p95": 263.0079984664917,
- "p99": 291.74400866031647
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 111.68000102043152,
- "p90": 147.8080004453659,
- "p95": 156.19200468063354,
- "p99": 167.35999286174774
- },
- "combine": {
- "p50": 84.1279998421669,
- "p90": 91.96799993515015,
- "p95": 107.55199939012527,
- "p99": 117.85600334405899
- },
- "roundtrip": {
- "p50": 196.44799828529358,
- "p90": 245.2480047941208,
- "p95": 256.3199996948242,
- "p99": 278.0480086803436
- },
- "isolatedSum": {
- "p50": 195.80800086259842,
- "p90": 239.77600038051605,
- "p95": 263.7440040707588,
- "p99": 285.21599620580673
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 112.83200234174728,
- "p90": 151.19999647140503,
- "p95": 155.87200224399567,
- "p99": 166.33599996566772
- },
- "combine": {
- "p50": 84.25600081682205,
- "p90": 98.39999675750732,
- "p95": 109.56799983978271,
- "p99": 117.8240031003952
- },
- "roundtrip": {
- "p50": 184.9920004606247,
- "p90": 221.82400524616241,
- "p95": 229.98400032520294,
- "p99": 244.35199797153473
- },
- "isolatedSum": {
- "p50": 197.08800315856934,
- "p90": 249.59999322891235,
- "p95": 265.4400020837784,
- "p99": 284.1600030660629
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 120.28799951076508,
- "p90": 159.93599593639374,
- "p95": 176.64000391960144,
- "p99": 217.02399849891663
- },
- "combine": {
- "p50": 85.9839990735054,
- "p90": 95.42399644851685,
- "p95": 103.64799946546555,
- "p99": 113.63200098276138
- },
- "roundtrip": {
- "p50": 203.0400037765503,
- "p90": 253.91998887062073,
- "p95": 280.5759906768799,
- "p99": 364.51199650764465
- },
- "isolatedSum": {
- "p50": 206.27199858427048,
- "p90": 255.35999238491058,
- "p95": 280.288003385067,
- "p99": 330.655999481678
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 32,
- "globalTokens": 256,
- "dispatch": {
- "p50": 123.58400225639343,
- "p90": 147.96799421310425,
- "p95": 156.38400614261627,
- "p99": 169.5680022239685
- },
- "combine": {
- "p50": 91.67999774217606,
- "p90": 106.20799660682678,
- "p95": 115.99999666213989,
- "p99": 126.97599828243256
- },
- "roundtrip": {
- "p50": 195.96800208091736,
- "p90": 235.07200181484222,
- "p95": 244.35199797153473,
- "p99": 258.87998938560486
- },
- "isolatedSum": {
- "p50": 215.2639999985695,
- "p90": 254.17599081993103,
- "p95": 272.38400280475616,
- "p99": 296.54400050640106
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 19726336,
- "combineLogicalBytes": 19726336,
- "fanoutMean": 5.375,
- "recvTokensMax": 182,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 64,
- "globalTokens": 512,
- "dispatch": {
- "p50": 136.00000739097595,
- "p90": 157.24800527095795,
- "p95": 164.89599645137787,
- "p99": 197.37599790096283
- },
- "combine": {
- "p50": 100.54399818181992,
- "p90": 108.22399705648422,
- "p95": 118.40000003576279,
- "p99": 127.07200646400452
- },
- "roundtrip": {
- "p50": 203.96800339221954,
- "p90": 239.96800184249878,
- "p95": 250.46399235725403,
- "p99": 268.38400959968567
- },
- "isolatedSum": {
- "p50": 236.54400557279587,
- "p90": 265.47200232744217,
- "p95": 283.29599648714066,
- "p99": 324.44800436496735
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 38993920,
- "combineLogicalBytes": 38993920,
- "fanoutMean": 5.3125,
- "recvTokensMax": 367,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 156.15999698638916,
- "p90": 172.38399386405945,
- "p95": 181.66400492191315,
- "p99": 197.4720060825348
- },
- "combine": {
- "p50": 119.1679984331131,
- "p90": 133.18400084972382,
- "p95": 142.84799993038177,
- "p99": 152.96000242233276
- },
- "roundtrip": {
- "p50": 237.69600689411163,
- "p90": 256.0639977455139,
- "p95": 266.01600646972656,
- "p99": 278.2079875469208
- },
- "isolatedSum": {
- "p50": 275.32799541950226,
- "p90": 305.56799471378326,
- "p95": 324.5120048522949,
- "p99": 350.43200850486755
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-ec807828",
- "identity": "h200|uccl|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31",
- "colorKey": "h200_c317e88d",
- "comparisonKey": "4f6cbb2ad4892beb",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T17:36:28.990296+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "h200-dgxc-slurm_13",
- "sku": "h200",
- "backend": "uccl",
- "phase": "prefill",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "epSize": 8,
- "label": "H200 EP8 · uccl · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.1515,
- "configuredUnits": 20,
- "deviceUnits": 132,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 1,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "64d989e2e2a6b31",
- "workloadId": "set:6:a426d66e479dc893",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": null,
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28296668644",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28296668644",
- "createdAt": "2026-06-27T17:36:28.990296+00:00",
- "sha": "cfa1ec56258b94b4a173844810a163a832bcb07e"
- },
- "rows": [
- {
- "tokensPerRank": 128,
- "globalTokens": 1024,
- "dispatch": {
- "p50": 159.71200168132782,
- "p90": 192.60799884796143,
- "p95": 207.8399956226349,
- "p99": 266.6560113430023
- },
- "combine": {
- "p50": 120.92799693346024,
- "p90": 134.20799374580383,
- "p95": 145.9839940071106,
- "p99": 155.7759940624237
- },
- "roundtrip": {
- "p50": 235.00800132751465,
- "p90": 250.94398856163025,
- "p95": 275.55200457572937,
- "p99": 301.66399478912354
- },
- "isolatedSum": {
- "p50": 280.63999861478806,
- "p90": 326.81599259376526,
- "p95": 353.8239896297455,
- "p99": 422.432005405426
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 77672448,
- "combineLogicalBytes": 77672448,
- "fanoutMean": 5.291015625,
- "recvTokensMax": 723,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 256,
- "globalTokens": 2048,
- "dispatch": {
- "p50": 185.85599958896637,
- "p90": 202.78400182724,
- "p95": 209.82399582862854,
- "p99": 239.71199989318848
- },
- "combine": {
- "p50": 160.89600324630737,
- "p90": 168.86399686336517,
- "p95": 174.27200078964233,
- "p99": 189.88800048828125
- },
- "roundtrip": {
- "p50": 307.20001459121704,
- "p90": 324.5759904384613,
- "p95": 329.3440043926239,
- "p99": 353.0240058898926
- },
- "isolatedSum": {
- "p50": 346.75200283527374,
- "p90": 371.64799869060516,
- "p95": 384.0959966182709,
- "p99": 429.6000003814697
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 155889664,
- "combineLogicalBytes": 155889664,
- "fanoutMean": 5.3095703125,
- "recvTokensMax": 1422,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 512,
- "globalTokens": 4096,
- "dispatch": {
- "p50": 239.29600417613983,
- "p90": 249.24799799919128,
- "p95": 254.97600436210632,
- "p99": 267.2320008277893
- },
- "combine": {
- "p50": 236.80000007152557,
- "p90": 243.93600225448608,
- "p95": 246.72000110149384,
- "p99": 257.1200132369995
- },
- "roundtrip": {
- "p50": 436.2879991531372,
- "p90": 448.3200013637543,
- "p95": 454.52800393104553,
- "p99": 473.2159972190857
- },
- "isolatedSum": {
- "p50": 476.0960042476654,
- "p90": 493.18400025367737,
- "p95": 501.69600546360016,
- "p99": 524.3520140647888
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 312266752,
- "combineLogicalBytes": 312266752,
- "fanoutMean": 5.31787109375,
- "recvTokensMax": 2779,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 1024,
- "globalTokens": 8192,
- "dispatch": {
- "p50": 351.74399614334106,
- "p90": 368.99200081825256,
- "p95": 383.35999846458435,
- "p99": 419.23201084136963
- },
- "combine": {
- "p50": 371.7440068721771,
- "p90": 381.72799348831177,
- "p95": 388.3199989795685,
- "p99": 399.26400780677795
- },
- "roundtrip": {
- "p50": 682.9439997673035,
- "p90": 696.7359781265259,
- "p95": 707.647979259491,
- "p99": 768.2560086250305
- },
- "isolatedSum": {
- "p50": 723.4880030155182,
- "p90": 750.7199943065643,
- "p95": 771.6799974441528,
- "p99": 818.4960186481476
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 623443968,
- "combineLogicalBytes": 623443968,
- "fanoutMean": 5.30859375,
- "recvTokensMax": 5505,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2048,
- "globalTokens": 16384,
- "dispatch": {
- "p50": 584.7359895706177,
- "p90": 603.0399799346924,
- "p95": 611.1680269241333,
- "p99": 635.0719928741455
- },
- "combine": {
- "p50": 632.9600214958191,
- "p90": 644.3520188331604,
- "p95": 648.0640172958374,
- "p99": 671.2639927864075
- },
- "roundtrip": {
- "p50": 1173.792004585266,
- "p90": 1189.3759965896606,
- "p95": 1196.7999935150146,
- "p99": 1212.448000907898
- },
- "isolatedSum": {
- "p50": 1217.6960110664368,
- "p90": 1247.3919987678528,
- "p95": 1259.2320442199707,
- "p99": 1306.335985660553
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1243805696,
- "combineLogicalBytes": 1243805696,
- "fanoutMean": 5.29547119140625,
- "recvTokensMax": 10952,
- "stragglerRank": 7,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4096,
- "globalTokens": 32768,
- "dispatch": {
- "p50": 1028.6400318145752,
- "p90": 1050.5599975585938,
- "p95": 1060.0320100784302,
- "p99": 1135.2959871292114
- },
- "combine": {
- "p50": 1139.7119760513306,
- "p90": 1153.1200408935547,
- "p95": 1158.5919857025146,
- "p99": 1179.0399551391602
- },
- "roundtrip": {
- "p50": 2122.623920440674,
- "p90": 2145.440101623535,
- "p95": 2151.3919830322266,
- "p99": 2202.49605178833
- },
- "isolatedSum": {
- "p50": 2168.3520078659058,
- "p90": 2203.6800384521484,
- "p95": 2218.623995780945,
- "p99": 2314.3359422683716
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2487009280,
- "combineLogicalBytes": 2487009280,
- "fanoutMean": 5.294189453125,
- "recvTokensMax": 21781,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-279043f8",
- "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|fp8-saturation|none|none|0|tuned||c774c8e4abb34da",
- "colorKey": "mi355x_4ec24046",
- "comparisonKey": "5776ea979804ef91",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:08:32.534640+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "mi355x-amds_05",
- "sku": "mi355x",
- "backend": "mori",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "mi355x-xgmi",
- "transport": "xgmi",
- "worldSize": 8,
- "epSize": 8,
- "label": "MI355X EP8 · mori · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "fp8-saturation",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.3125,
- "configuredUnits": 80,
- "deviceUnits": 256,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 2,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "c774c8e4abb34da",
- "workloadId": "set:5:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272169530",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272169530",
- "createdAt": "2026-06-27T00:08:32.534640+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 40.19999876618385,
- "p90": 43.000999838113785,
- "p95": 44.56000030040741,
- "p99": 47.880999743938446
- },
- "combine": {
- "p50": 17.760999500751495,
- "p90": 19.360000267624855,
- "p95": 20.959999412298203,
- "p99": 23.080000653862953
- },
- "roundtrip": {
- "p50": 56.04099854826927,
- "p90": 59.00000035762787,
- "p95": 60.201000422239304,
- "p99": 62.24000081419945
- },
- "isolatedSum": {
- "p50": 57.96099826693535,
- "p90": 62.36100010573864,
- "p95": 65.51999971270561,
- "p99": 70.9610003978014
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 42.64099895954132,
- "p90": 45.52000015974045,
- "p95": 47.07999899983406,
- "p99": 49.76100102066994
- },
- "combine": {
- "p50": 16.599999740719795,
- "p90": 18.60000006854534,
- "p95": 19.79999989271164,
- "p99": 23.080000653862953
- },
- "roundtrip": {
- "p50": 58.96100029349327,
- "p90": 62.39999830722809,
- "p95": 64.32099640369415,
- "p99": 102.64100134372711
- },
- "isolatedSum": {
- "p50": 59.240998700261116,
- "p90": 64.12000022828579,
- "p95": 66.8799988925457,
- "p99": 72.84100167453289
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 42.160000652074814,
- "p90": 44.76099833846092,
- "p95": 46.20100185275078,
- "p99": 48.5600009560585
- },
- "combine": {
- "p50": 19.759999588131905,
- "p90": 21.27999998629093,
- "p95": 22.5210003554821,
- "p99": 25.200000032782555
- },
- "roundtrip": {
- "p50": 62.001001089811325,
- "p90": 65.32099843025208,
- "p95": 66.16000086069107,
- "p99": 69.15999948978424
- },
- "isolatedSum": {
- "p50": 61.92000024020672,
- "p90": 66.04099832475185,
- "p95": 68.72200220823288,
- "p99": 73.76000098884106
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 42.399998754262924,
- "p90": 45.35999894142151,
- "p95": 47.15999960899353,
- "p99": 49.52000081539154
- },
- "combine": {
- "p50": 20.880000665783882,
- "p90": 23.08100089430809,
- "p95": 24.04000051319599,
- "p99": 26.441000401973724
- },
- "roundtrip": {
- "p50": 62.52100318670273,
- "p90": 65.64100086688995,
- "p95": 66.56000018119812,
- "p99": 68.84100288152695
- },
- "isolatedSum": {
- "p50": 63.279999420046806,
- "p90": 68.4409998357296,
- "p95": 71.20000012218952,
- "p99": 75.96100121736526
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 42.52000153064728,
- "p90": 45.32000049948692,
- "p95": 46.640001237392426,
- "p99": 49.04000088572502
- },
- "combine": {
- "p50": 25.599999353289604,
- "p90": 27.799999341368675,
- "p95": 29.239999130368233,
- "p99": 31.520001590251923
- },
- "roundtrip": {
- "p50": 67.63999909162521,
- "p90": 70.60100138187408,
- "p95": 71.68100029230118,
- "p99": 74.36099648475647
- },
- "isolatedSum": {
- "p50": 68.12000088393688,
- "p90": 73.1199998408556,
- "p95": 75.88000036776066,
- "p99": 80.56000247597694
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-60c60832",
- "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||c774c8e4abb34da",
- "colorKey": "mi355x_4ec24046",
- "comparisonKey": "3677ee6ace04ac65",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:53:59.155172+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "mi355x-amds_05",
- "sku": "mi355x",
- "backend": "mori",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "mi355x-xgmi",
- "transport": "xgmi",
- "worldSize": 8,
- "epSize": 8,
- "label": "MI355X EP8 · mori · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.3125,
- "configuredUnits": 80,
- "deviceUnits": 256,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 2,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "c774c8e4abb34da",
- "workloadId": "set:5:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28273516714",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28273516714",
- "createdAt": "2026-06-27T00:53:59.155172+00:00",
- "sha": "2c15d9415503e9ccb84cd49cf446a122796efc1e"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 40.6000018119812,
- "p90": 43.76000165939331,
- "p95": 45.239999890327454,
- "p99": 54.71999943256378
- },
- "combine": {
- "p50": 17.920000478625298,
- "p90": 19.039999693632126,
- "p95": 20.999999716877937,
- "p99": 22.87999913096428
- },
- "roundtrip": {
- "p50": 56.32000043988228,
- "p90": 59.4400018453598,
- "p95": 60.64099818468094,
- "p99": 63.19999694824219
- },
- "isolatedSum": {
- "p50": 58.5200022906065,
- "p90": 62.800001353025436,
- "p95": 66.23999960720539,
- "p99": 77.59999856352806
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 42.64000058174133,
- "p90": 45.35999894142151,
- "p95": 46.76000028848648,
- "p99": 50.23999884724617
- },
- "combine": {
- "p50": 16.759999096393585,
- "p90": 18.68000067770481,
- "p95": 19.801000133156776,
- "p99": 22.08000048995018
- },
- "roundtrip": {
- "p50": 58.9199997484684,
- "p90": 61.799999326467514,
- "p95": 62.95999884605408,
- "p99": 65.20000100135803
- },
- "isolatedSum": {
- "p50": 59.39999967813492,
- "p90": 64.03999961912632,
- "p95": 66.56100042164326,
- "p99": 72.31999933719635
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 42.44000092148781,
- "p90": 45.281000435352325,
- "p95": 46.4400015771389,
- "p99": 47.919999808073044
- },
- "combine": {
- "p50": 19.999999552965164,
- "p90": 21.99999988079071,
- "p95": 23.360000923275948,
- "p99": 25.72000026702881
- },
- "roundtrip": {
- "p50": 61.91999837756157,
- "p90": 65.20099937915802,
- "p95": 66.3599967956543,
- "p99": 67.84100085496902
- },
- "isolatedSum": {
- "p50": 62.44000047445297,
- "p90": 67.28100031614304,
- "p95": 69.80000250041485,
- "p99": 73.64000007510185
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 42.44000092148781,
- "p90": 45.00100016593933,
- "p95": 46.88100144267082,
- "p99": 49.27999898791313
- },
- "combine": {
- "p50": 20.880000665783882,
- "p90": 22.840000689029694,
- "p95": 24.240000173449516,
- "p99": 26.399999856948853
- },
- "roundtrip": {
- "p50": 62.401000410318375,
- "p90": 65.48000127077103,
- "p95": 66.28099828958511,
- "p99": 68.00000369548798
- },
- "isolatedSum": {
- "p50": 63.32000158727169,
- "p90": 67.84100085496902,
- "p95": 71.12100161612034,
- "p99": 75.67999884486198
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 42.520999908447266,
- "p90": 45.1200008392334,
- "p95": 46.59999907016754,
- "p99": 49.04000088572502
- },
- "combine": {
- "p50": 25.8799996227026,
- "p90": 27.879999950528145,
- "p95": 29.239999130368233,
- "p99": 31.800001859664917
- },
- "roundtrip": {
- "p50": 67.80099868774414,
- "p90": 71.16000354290009,
- "p95": 72.2000002861023,
- "p99": 74.47999715805054
- },
- "isolatedSum": {
- "p50": 68.40099953114986,
- "p90": 73.00000078976154,
- "p95": 75.83999820053577,
- "p99": 80.84000274538994
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-f513e0f0",
- "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|small-amplitude|none|none|0|tuned||c774c8e4abb34da",
- "colorKey": "mi355x_4ec24046",
- "comparisonKey": "43eedfb9c3cc2b53",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:07:01.734617+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "mi355x-amds_01",
- "sku": "mi355x",
- "backend": "mori",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "mi355x-xgmi",
- "transport": "xgmi",
- "worldSize": 8,
- "epSize": 8,
- "label": "MI355X EP8 · mori · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "small-amplitude",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.3125,
- "configuredUnits": 80,
- "deviceUnits": 256,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 2,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "c774c8e4abb34da",
- "workloadId": "set:5:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272162006",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272162006",
- "createdAt": "2026-06-27T00:07:01.734617+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 40.44099897146225,
- "p90": 43.72100159525871,
- "p95": 45.1200008392334,
- "p99": 51.600001752376556
- },
- "combine": {
- "p50": 15.960000455379486,
- "p90": 18.160000443458557,
- "p95": 19.279999658465385,
- "p99": 21.159999072551727
- },
- "roundtrip": {
- "p50": 55.56099861860275,
- "p90": 58.75999853014946,
- "p95": 60.120001435279846,
- "p99": 63.63999843597412
- },
- "isolatedSum": {
- "p50": 56.400999426841736,
- "p90": 61.88100203871727,
- "p95": 64.40000049769878,
- "p99": 72.76000082492828
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 42.08099842071533,
- "p90": 45.0810007750988,
- "p95": 46.39999940991402,
- "p99": 49.76100102066994
- },
- "combine": {
- "p50": 16.00000075995922,
- "p90": 18.60000006854534,
- "p95": 19.55999992787838,
- "p99": 21.920999512076378
- },
- "roundtrip": {
- "p50": 58.32099914550781,
- "p90": 61.64000183343887,
- "p95": 63.600003719329834,
- "p99": 67.59999692440033
- },
- "isolatedSum": {
- "p50": 58.08099918067455,
- "p90": 63.68100084364414,
- "p95": 65.9599993377924,
- "p99": 71.68200053274632
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 41.839998215436935,
- "p90": 44.920001178979874,
- "p95": 46.28000035881996,
- "p99": 49.40100014209747
- },
- "combine": {
- "p50": 19.31999996304512,
- "p90": 21.75999991595745,
- "p95": 22.5600004196167,
- "p99": 24.43999983370304
- },
- "roundtrip": {
- "p50": 60.80099940299988,
- "p90": 64.03999775648117,
- "p95": 65.56099653244019,
- "p99": 69.92000341415405
- },
- "isolatedSum": {
- "p50": 61.159998178482056,
- "p90": 66.68000109493732,
- "p95": 68.84000077843666,
- "p99": 73.84099997580051
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 42.080000042915344,
- "p90": 45.20000144839287,
- "p95": 46.64099961519241,
- "p99": 48.43999817967415
- },
- "combine": {
- "p50": 20.16099914908409,
- "p90": 22.280000150203705,
- "p95": 23.04000034928322,
- "p99": 24.960000067949295
- },
- "roundtrip": {
- "p50": 62.199998646974564,
- "p90": 65.36100059747696,
- "p95": 66.72099977731705,
- "p99": 68.71999800205231
- },
- "isolatedSum": {
- "p50": 62.240999191999435,
- "p90": 67.48000159859657,
- "p95": 69.68099996447563,
- "p99": 73.39999824762344
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 42.24099963903427,
- "p90": 45.239999890327454,
- "p95": 46.36099934577942,
- "p99": 48.40100184082985
- },
- "combine": {
- "p50": 24.639999493956566,
- "p90": 26.88100002706051,
- "p95": 27.881000190973282,
- "p99": 30.079999938607216
- },
- "roundtrip": {
- "p50": 67.47999787330627,
- "p90": 70.60100138187408,
- "p95": 72.28100299835205,
- "p99": 75.20099729299545
- },
- "isolatedSum": {
- "p50": 66.88099913299084,
- "p90": 72.12099991738796,
- "p95": 74.2419995367527,
- "p99": 78.48100177943707
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 4,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-67074ab6",
- "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|wide-dynamic-range|none|none|0|tuned||c774c8e4abb34da",
- "colorKey": "mi355x_4ec24046",
- "comparisonKey": "2ccb7553c969aafc",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:07:48.076161+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "mi355x-amds_06",
- "sku": "mi355x",
- "backend": "mori",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "mi355x-xgmi",
- "transport": "xgmi",
- "worldSize": 8,
- "epSize": 8,
- "label": "MI355X EP8 · mori · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "wide-dynamic-range",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.3125,
- "configuredUnits": 80,
- "deviceUnits": 256,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 2,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "c774c8e4abb34da",
- "workloadId": "set:5:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272165928",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272165928",
- "createdAt": "2026-06-27T00:07:48.076161+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 40.240999311208725,
- "p90": 43.43999922275543,
- "p95": 44.76099833846092,
- "p99": 48.11999946832657
- },
- "combine": {
- "p50": 16.839999705553055,
- "p90": 18.319999799132347,
- "p95": 19.600000232458115,
- "p99": 23.399999365210533
- },
- "roundtrip": {
- "p50": 56.120000779628754,
- "p90": 59.48000028729439,
- "p95": 60.76100096106529,
- "p99": 65.24000316858292
- },
- "isolatedSum": {
- "p50": 57.08099901676178,
- "p90": 61.75999902188778,
- "p95": 64.36099857091904,
- "p99": 71.5199988335371
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 42.44000092148781,
- "p90": 45.48000171780586,
- "p95": 46.51999846100807,
- "p99": 49.19999837875366
- },
- "combine": {
- "p50": 16.201000660657883,
- "p90": 18.479999154806137,
- "p95": 19.55999992787838,
- "p99": 21.800000220537186
- },
- "roundtrip": {
- "p50": 58.80099907517433,
- "p90": 61.96000054478645,
- "p95": 62.76000291109085,
- "p99": 64.19999897480011
- },
- "isolatedSum": {
- "p50": 58.64100158214569,
- "p90": 63.960000872612,
- "p95": 66.07999838888645,
- "p99": 70.99999859929085
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 42.04000160098076,
- "p90": 44.280000030994415,
- "p95": 45.921001583337784,
- "p99": 49.28100109100342
- },
- "combine": {
- "p50": 19.039999693632126,
- "p90": 21.51999995112419,
- "p95": 22.801000624895096,
- "p99": 24.560000747442245
- },
- "roundtrip": {
- "p50": 61.601001769304276,
- "p90": 64.92000073194504,
- "p95": 66.00099802017212,
- "p99": 67.72000342607498
- },
- "isolatedSum": {
- "p50": 61.080001294612885,
- "p90": 65.7999999821186,
- "p95": 68.72200220823288,
- "p99": 73.84100183844566
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 42.27999970316887,
- "p90": 45.00000178813934,
- "p95": 46.23999819159508,
- "p99": 48.16000163555145
- },
- "combine": {
- "p50": 20.320000126957893,
- "p90": 23.32100085914135,
- "p95": 25.439999997615814,
- "p99": 57.88100138306618
- },
- "roundtrip": {
- "p50": 62.3599998652935,
- "p90": 65.0399997830391,
- "p95": 66.0799965262413,
- "p99": 68.00100207328796
- },
- "isolatedSum": {
- "p50": 62.59999983012676,
- "p90": 68.3210026472807,
- "p95": 71.67999818921089,
- "p99": 106.04100301861763
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 42.399998754262924,
- "p90": 45.0810007750988,
- "p95": 46.23999819159508,
- "p99": 48.8400012254715
- },
- "combine": {
- "p50": 25.120999664068222,
- "p90": 27.2000003606081,
- "p95": 28.161000460386276,
- "p99": 30.319999903440475
- },
- "roundtrip": {
- "p50": 67.63999909162521,
- "p90": 70.79999893903732,
- "p95": 71.68000191450119,
- "p99": 73.72000068426132
- },
- "isolatedSum": {
- "p50": 67.52099841833115,
- "p90": 72.2810011357069,
- "p95": 74.40099865198135,
- "p99": 79.16000112891197
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-23f1ecd4",
- "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|zeros|none|none|0|tuned||c774c8e4abb34da",
- "colorKey": "mi355x_4ec24046",
- "comparisonKey": "1ab1f06166250146",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:06:16.763261+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "mi355x-amds_02",
- "sku": "mi355x",
- "backend": "mori",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "mi355x-xgmi",
- "transport": "xgmi",
- "worldSize": 8,
- "epSize": 8,
- "label": "MI355X EP8 · mori · bf16",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "zeros",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.3125,
- "configuredUnits": 80,
- "deviceUnits": 256,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 2,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "c774c8e4abb34da",
- "workloadId": "set:5:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28272158268",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28272158268",
- "createdAt": "2026-06-27T00:06:16.763261+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 40.240999311208725,
- "p90": 43.5199998319149,
- "p95": 44.920001178979874,
- "p99": 54.32000011205673
- },
- "combine": {
- "p50": 17.680000513792038,
- "p90": 19.401000812649727,
- "p95": 20.759999752044678,
- "p99": 23.80100078880787
- },
- "roundtrip": {
- "p50": 56.040000170469284,
- "p90": 59.12100151181221,
- "p95": 60.47999858856201,
- "p99": 63.040003180503845
- },
- "isolatedSum": {
- "p50": 57.92099982500076,
- "p90": 62.92100064456463,
- "p95": 65.68000093102455,
- "p99": 78.1210009008646
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 42.319998145103455,
- "p90": 44.87999901175499,
- "p95": 46.480998396873474,
- "p99": 49.320999532938
- },
- "combine": {
- "p50": 16.720000654459,
- "p90": 18.240999430418015,
- "p95": 19.401000812649727,
- "p99": 23.240000009536743
- },
- "roundtrip": {
- "p50": 58.479998260736465,
- "p90": 61.879999935626984,
- "p95": 62.880001962184906,
- "p99": 65.99999964237213
- },
- "isolatedSum": {
- "p50": 59.039998799562454,
- "p90": 63.120998442173004,
- "p95": 65.8819992095232,
- "p99": 72.56099954247475
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 41.919998824596405,
- "p90": 45.120999217033386,
- "p95": 46.59999907016754,
- "p99": 50.84000155329704
- },
- "combine": {
- "p50": 19.79999989271164,
- "p90": 21.27999998629093,
- "p95": 23.16099964082241,
- "p99": 25.400999933481216
- },
- "roundtrip": {
- "p50": 61.51999905705452,
- "p90": 64.40100073814392,
- "p95": 65.80100208520889,
- "p99": 68.24000179767609
- },
- "isolatedSum": {
- "p50": 61.719998717308044,
- "p90": 66.40099920332432,
- "p95": 69.76099871098995,
- "p99": 76.24100148677826
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 42.121000587940216,
- "p90": 45.04000023007393,
- "p95": 46.31999880075455,
- "p99": 50.641000270843506
- },
- "combine": {
- "p50": 21.04100026190281,
- "p90": 22.95999974012375,
- "p95": 24.6799997985363,
- "p99": 26.920000091195107
- },
- "roundtrip": {
- "p50": 62.20100075006485,
- "p90": 66.39999896287918,
- "p95": 68.59999895095825,
- "p99": 95.88100016117096
- },
- "isolatedSum": {
- "p50": 63.162000849843025,
- "p90": 67.99999997019768,
- "p95": 70.99999859929085,
- "p99": 77.56100036203861
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 42.281001806259155,
- "p90": 45.27999833226204,
- "p95": 46.51999846100807,
- "p99": 49.320001155138016
- },
- "combine": {
- "p50": 25.919999927282333,
- "p90": 28.080999851226807,
- "p95": 29.559999704360962,
- "p99": 32.35999867320061
- },
- "roundtrip": {
- "p50": 67.31999665498734,
- "p90": 70.2809989452362,
- "p95": 71.40100002288818,
- "p99": 74.16000217199326
- },
- "isolatedSum": {
- "p50": 68.20100173354149,
- "p90": 73.36099818348885,
- "p95": 76.07999816536903,
- "p99": 81.67999982833862
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-83a44089",
- "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||2c22646e864c27e",
- "colorKey": "mi355x_eb5b377e",
- "comparisonKey": "5bbe7a250a72d8b4",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:58:24.839410+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "mi355x-amds_01",
- "sku": "mi355x",
- "backend": "mori",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "mi355x-xgmi",
- "transport": "xgmi",
- "worldSize": 8,
- "epSize": 8,
- "label": "MI355X EP8 · mori · bf16 · balanced",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "balanced",
- "routingLabel": "balanced",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.3125,
- "configuredUnits": 80,
- "deviceUnits": 256,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 2,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "2c22646e864c27e",
- "workloadId": "set:5:7af12818400d6348",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271906612",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271906612",
- "createdAt": "2026-06-26T23:58:24.839410+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 40.36099836230278,
- "p90": 43.44100132584572,
- "p95": 44.60100084543228,
- "p99": 48.920001834630966
- },
- "combine": {
- "p50": 16.3199994713068,
- "p90": 18.880000337958336,
- "p95": 19.88000050187111,
- "p99": 21.880999207496643
- },
- "roundtrip": {
- "p50": 57.20100179314613,
- "p90": 60.63999980688095,
- "p95": 61.72100082039833,
- "p99": 64.56000357866287
- },
- "isolatedSum": {
- "p50": 56.68099783360958,
- "p90": 62.321001663804054,
- "p95": 64.48100134730339,
- "p99": 70.80100104212761
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 917504,
- "combineLogicalBytes": 917504,
- "fanoutMean": 8,
- "recvTokensMax": 8,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 42.64099895954132,
- "p90": 45.680999755859375,
- "p95": 47.2010001540184,
- "p99": 49.47999864816666
- },
- "combine": {
- "p50": 16.519999131560326,
- "p90": 18.92000064253807,
- "p95": 20.080000162124634,
- "p99": 21.801000460982323
- },
- "roundtrip": {
- "p50": 59.52100083231926,
- "p90": 62.67999857664108,
- "p95": 63.84100019931793,
- "p99": 66.96099787950516
- },
- "isolatedSum": {
- "p50": 59.160998091101646,
- "p90": 64.60100039839745,
- "p95": 67.28100031614304,
- "p99": 71.28099910914898
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1835008,
- "combineLogicalBytes": 1835008,
- "fanoutMean": 8,
- "recvTokensMax": 16,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 42.64000058174133,
- "p90": 45.8809994161129,
- "p95": 47.00100049376488,
- "p99": 49.959998577833176
- },
- "combine": {
- "p50": 20.759999752044678,
- "p90": 23.600000888109207,
- "p95": 24.480000138282776,
- "p99": 26.760000735521317
- },
- "roundtrip": {
- "p50": 64.12000209093094,
- "p90": 67.08099693059921,
- "p95": 67.88100302219391,
- "p99": 70.36100327968597
- },
- "isolatedSum": {
- "p50": 63.40000033378601,
- "p90": 69.4810003042221,
- "p95": 71.48100063204765,
- "p99": 76.71999931335449
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3670016,
- "combineLogicalBytes": 3670016,
- "fanoutMean": 8,
- "recvTokensMax": 32,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 42.7200011909008,
- "p90": 45.88000103831291,
- "p95": 47.36100137233734,
- "p99": 49.60000142455101
- },
- "combine": {
- "p50": 22.679999470710754,
- "p90": 25.280000641942024,
- "p95": 26.159999892115593,
- "p99": 27.240000665187836
- },
- "roundtrip": {
- "p50": 65.72099775075912,
- "p90": 68.64099949598312,
- "p95": 69.64000314474106,
- "p99": 72.2000002861023
- },
- "isolatedSum": {
- "p50": 65.40000066161156,
- "p90": 71.16000168025494,
- "p95": 73.52100126445293,
- "p99": 76.84000208973885
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 7340032,
- "combineLogicalBytes": 7340032,
- "fanoutMean": 8,
- "recvTokensMax": 64,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 42.87999868392944,
- "p90": 45.88000103831291,
- "p95": 46.959999948740005,
- "p99": 48.79999905824661
- },
- "combine": {
- "p50": 28.119999915361404,
- "p90": 30.44000081717968,
- "p95": 31.401000916957855,
- "p99": 33.640000969171524
- },
- "roundtrip": {
- "p50": 71.80000096559525,
- "p90": 75.15999674797058,
- "p95": 76.39999687671661,
- "p99": 78.31999659538269
- },
- "isolatedSum": {
- "p50": 70.99999859929085,
- "p90": 76.32000185549259,
- "p95": 78.36100086569786,
- "p99": 82.44000002741814
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 14680064,
- "combineLogicalBytes": 14680064,
- "fanoutMean": 8,
- "recvTokensMax": 128,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-c1291ad7",
- "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||15d7289bb70ed17",
- "colorKey": "mi355x_ae729691",
- "comparisonKey": "730c294e090417f2",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:59:10.167624+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "mi355x-amds_06",
- "sku": "mi355x",
- "backend": "mori",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "mi355x-xgmi",
- "transport": "xgmi",
- "worldSize": 8,
- "epSize": 8,
- "label": "MI355X EP8 · mori · bf16 · balanced-rank-local",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "balanced-rank-local",
- "routingLabel": "balanced-rank-local",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.3125,
- "configuredUnits": 80,
- "deviceUnits": 256,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 2,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "15d7289bb70ed17",
- "workloadId": "set:5:2eebbed158fe1320",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271910050",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271910050",
- "createdAt": "2026-06-26T23:59:10.167624+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 36.80099919438362,
- "p90": 39.80100154876709,
- "p95": 40.76100140810013,
- "p99": 43.63999888300896
- },
- "combine": {
- "p50": 15.320000238716602,
- "p90": 17.480000853538513,
- "p95": 18.68000067770481,
- "p99": 20.999999716877937
- },
- "roundtrip": {
- "p50": 49.07999932765961,
- "p90": 51.80000141263008,
- "p95": 52.76099964976311,
- "p99": 53.76100167632103
- },
- "isolatedSum": {
- "p50": 52.12099943310022,
- "p90": 57.2810024023056,
- "p95": 59.44100208580494,
- "p99": 64.6399985998869
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 114688,
- "combineLogicalBytes": 114688,
- "fanoutMean": 1,
- "recvTokensMax": 4,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 38.07999938726425,
- "p90": 40.39999842643738,
- "p95": 41.20099917054176,
- "p99": 42.80000180006027
- },
- "combine": {
- "p50": 15.799999237060547,
- "p90": 17.999999225139618,
- "p95": 19.279999658465385,
- "p99": 21.040000021457672
- },
- "roundtrip": {
- "p50": 51.600001752376556,
- "p90": 53.92000079154968,
- "p95": 55.24099990725517,
- "p99": 57.32100084424019
- },
- "isolatedSum": {
- "p50": 53.8799986243248,
- "p90": 58.399997651576996,
- "p95": 60.48099882900715,
- "p99": 63.840001821517944
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 229376,
- "combineLogicalBytes": 229376,
- "fanoutMean": 1,
- "recvTokensMax": 4,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 32.71999955177307,
- "p90": 35.5600006878376,
- "p95": 36.559998989105225,
- "p99": 39.000000804662704
- },
- "combine": {
- "p50": 13.72000016272068,
- "p90": 15.799999237060547,
- "p95": 16.599999740719795,
- "p99": 18.120000138878822
- },
- "roundtrip": {
- "p50": 45.71999981999397,
- "p90": 49.04000088572502,
- "p95": 49.96100068092346,
- "p99": 51.44000053405762
- },
- "isolatedSum": {
- "p50": 46.43999971449375,
- "p90": 51.35999992489815,
- "p95": 53.15999872982502,
- "p99": 57.12000094354153
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 458752,
- "combineLogicalBytes": 458752,
- "fanoutMean": 1,
- "recvTokensMax": 4,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 37.84099966287613,
- "p90": 40.92000052332878,
- "p95": 41.999999433755875,
- "p99": 43.880000710487366
- },
- "combine": {
- "p50": 14.919999986886978,
- "p90": 17.27999933063984,
- "p95": 18.039999529719353,
- "p99": 19.55999992787838
- },
- "roundtrip": {
- "p50": 52.241001278162,
- "p90": 55.75999990105629,
- "p95": 56.68000131845474,
- "p99": 58.35999920964241
- },
- "isolatedSum": {
- "p50": 52.76099964976311,
- "p90": 58.19999985396862,
- "p95": 60.03999896347523,
- "p99": 63.440000638365746
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 917504,
- "combineLogicalBytes": 917504,
- "fanoutMean": 1,
- "recvTokensMax": 8,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 38.24099898338318,
- "p90": 40.92000052332878,
- "p95": 41.839998215436935,
- "p99": 44.16000097990036
- },
- "combine": {
- "p50": 16.24000072479248,
- "p90": 18.841000273823738,
- "p95": 19.88000050187111,
- "p99": 22.280000150203705
- },
- "roundtrip": {
- "p50": 54.28000167012215,
- "p90": 57.840000838041306,
- "p95": 58.800000697374344,
- "p99": 60.96100062131882
- },
- "isolatedSum": {
- "p50": 54.48099970817566,
- "p90": 59.76100079715252,
- "p95": 61.719998717308044,
- "p99": 66.44000113010406
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1835008,
- "combineLogicalBytes": 1835008,
- "fanoutMean": 1,
- "recvTokensMax": 16,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-ace78f17",
- "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||c8b7839b4895c1a",
- "colorKey": "mi355x_62dc5cd4",
- "comparisonKey": "316ae2638347880f",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:01:29.418642+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "mi355x-amds_00",
- "sku": "mi355x",
- "backend": "mori",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "mi355x-xgmi",
- "transport": "xgmi",
- "worldSize": 8,
- "epSize": 8,
- "label": "MI355X EP8 · mori · bf16 · hotspot-single",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "hotspot-single",
- "routingLabel": "hotspot-single",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.3125,
- "configuredUnits": 80,
- "deviceUnits": 256,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 2,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "c8b7839b4895c1a",
- "workloadId": "set:5:286be993cd819ed9",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271920340",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271920340",
- "createdAt": "2026-06-27T00:01:29.418642+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 39.84000161290169,
- "p90": 42.55999997258186,
- "p95": 44.08000037074089,
- "p99": 48.601001501083374
- },
- "combine": {
- "p50": 16.200000420212746,
- "p90": 17.960000783205032,
- "p95": 19.07999999821186,
- "p99": 21.640000864863396
- },
- "roundtrip": {
- "p50": 55.44000118970871,
- "p90": 58.27999860048294,
- "p95": 59.20099839568138,
- "p99": 60.920000076293945
- },
- "isolatedSum": {
- "p50": 56.04000203311443,
- "p90": 60.520000755786896,
- "p95": 63.16000036895275,
- "p99": 70.24100236594677
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 602112,
- "combineLogicalBytes": 602112,
- "fanoutMean": 5.25,
- "recvTokensMax": 8,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 42.1609990298748,
- "p90": 44.920001178979874,
- "p95": 45.80099880695343,
- "p99": 47.800999134778976
- },
- "combine": {
- "p50": 16.07999950647354,
- "p90": 18.401000648736954,
- "p95": 19.279999658465385,
- "p99": 20.880000665783882
- },
- "roundtrip": {
- "p50": 58.35999920964241,
- "p90": 61.56099960207939,
- "p95": 62.60000169277191,
- "p99": 64.7599995136261
- },
- "isolatedSum": {
- "p50": 58.24099853634834,
- "p90": 63.32100182771683,
- "p95": 65.08099846541882,
- "p99": 68.68099980056286
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1247232,
- "combineLogicalBytes": 1247232,
- "fanoutMean": 5.4375,
- "recvTokensMax": 16,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 41.96000099182129,
- "p90": 44.599998742341995,
- "p95": 45.96000164747238,
- "p99": 48.16100001335144
- },
- "combine": {
- "p50": 19.401000812649727,
- "p90": 21.880000829696655,
- "p95": 23.080000653862953,
- "p99": 24.12099950015545
- },
- "roundtrip": {
- "p50": 61.68099865317345,
- "p90": 65.20099937915802,
- "p95": 65.99999964237213,
- "p99": 67.4000009894371
- },
- "isolatedSum": {
- "p50": 61.361001804471016,
- "p90": 66.47999957203865,
- "p95": 69.04000230133533,
- "p99": 72.28199951350689
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2451456,
- "combineLogicalBytes": 2451456,
- "fanoutMean": 5.34375,
- "recvTokensMax": 32,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 41.80099815130234,
- "p90": 44.2809984087944,
- "p95": 45.559998601675034,
- "p99": 48.39999973773956
- },
- "combine": {
- "p50": 21.239999681711197,
- "p90": 23.19999970495701,
- "p95": 24.080000817775726,
- "p99": 26.040000841021538
- },
- "roundtrip": {
- "p50": 62.960997223854065,
- "p90": 66.041000187397,
- "p95": 66.91999733448029,
- "p99": 68.71999800205231
- },
- "isolatedSum": {
- "p50": 63.040997833013535,
- "p90": 67.48099811375141,
- "p95": 69.63999941945076,
- "p99": 74.4400005787611
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4859904,
- "combineLogicalBytes": 4859904,
- "fanoutMean": 5.296875,
- "recvTokensMax": 64,
- "stragglerRank": 6,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 42.1609990298748,
- "p90": 45.00000178813934,
- "p95": 45.96000164747238,
- "p99": 50.40000006556511
- },
- "combine": {
- "p50": 26.599999517202377,
- "p90": 28.68100069463253,
- "p95": 29.96000088751316,
- "p99": 31.720001250505447
- },
- "roundtrip": {
- "p50": 69.20100003480911,
- "p90": 71.76099717617035,
- "p95": 72.7199986577034,
- "p99": 74.16000217199326
- },
- "isolatedSum": {
- "p50": 68.76099854707718,
- "p90": 73.68100248277187,
- "p95": 75.92000253498554,
- "p99": 82.12000131607056
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9748480,
- "combineLogicalBytes": 9748480,
- "fanoutMean": 5.3125,
- "recvTokensMax": 128,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-2129d47b",
- "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||4d5546b3fb85130",
- "colorKey": "mi355x_570d6605",
- "comparisonKey": "1ea3da47c00f36f8",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:59:55.992554+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "mi355x-amds_07",
- "sku": "mi355x",
- "backend": "mori",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "mi355x-xgmi",
- "transport": "xgmi",
- "worldSize": 8,
- "epSize": 8,
- "label": "MI355X EP8 · mori · bf16 · zipf",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "zipf",
- "routingLabel": "zipf",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.3125,
- "configuredUnits": 80,
- "deviceUnits": 256,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 2,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "4d5546b3fb85130",
- "workloadId": "set:5:f5576e2b712d38c3",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271913592",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271913592",
- "createdAt": "2026-06-26T23:59:55.992554+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 39.03999924659729,
- "p90": 41.76099970936775,
- "p95": 43.40000078082085,
- "p99": 47.15999960899353
- },
- "combine": {
- "p50": 16.359999775886536,
- "p90": 18.519999459385872,
- "p95": 20.12000046670437,
- "p99": 23.40099960565567
- },
- "roundtrip": {
- "p50": 53.95999923348427,
- "p90": 57.20100179314613,
- "p95": 58.75999853014946,
- "p99": 61.20099872350693
- },
- "isolatedSum": {
- "p50": 55.399999022483826,
- "p90": 60.280999168753624,
- "p95": 63.520001247525215,
- "p99": 70.5609992146492
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 444416,
- "combineLogicalBytes": 444416,
- "fanoutMean": 3.875,
- "recvTokensMax": 8,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 40.640998631715775,
- "p90": 43.99999976158142,
- "p95": 44.840000569820404,
- "p99": 48.0009987950325
- },
- "combine": {
- "p50": 16.519999131560326,
- "p90": 18.561000004410744,
- "p95": 20.24099975824356,
- "p99": 23.520000278949738
- },
- "roundtrip": {
- "p50": 55.52000179886818,
- "p90": 59.321001172065735,
- "p95": 60.72099879384041,
- "p99": 68.88099759817123
- },
- "isolatedSum": {
- "p50": 57.1609977632761,
- "p90": 62.560999765992165,
- "p95": 65.08100032806396,
- "p99": 71.52099907398224
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 845824,
- "combineLogicalBytes": 845824,
- "fanoutMean": 3.6875,
- "recvTokensMax": 16,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 41.28099977970123,
- "p90": 44.16000097990036,
- "p95": 45.00000178813934,
- "p99": 47.68000170588493
- },
- "combine": {
- "p50": 17.640000209212303,
- "p90": 20.160000771284103,
- "p95": 21.479999646544456,
- "p99": 24.6799997985363
- },
- "roundtrip": {
- "p50": 59.04100090265274,
- "p90": 63.07999789714813,
- "p95": 64.87999856472015,
- "p99": 68.83999705314636
- },
- "isolatedSum": {
- "p50": 58.920999988913536,
- "p90": 64.32000175118446,
- "p95": 66.4800014346838,
- "p99": 72.36000150442123
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1691648,
- "combineLogicalBytes": 1691648,
- "fanoutMean": 3.6875,
- "recvTokensMax": 32,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 41.919998824596405,
- "p90": 44.801000505685806,
- "p95": 46.84000089764595,
- "p99": 50.880998373031616
- },
- "combine": {
- "p50": 19.600000232458115,
- "p90": 22.120000794529915,
- "p95": 23.520000278949738,
- "p99": 26.799999177455902
- },
- "roundtrip": {
- "p50": 61.000000685453415,
- "p90": 64.56000357866287,
- "p95": 65.88099896907806,
- "p99": 69.52100247144699
- },
- "isolatedSum": {
- "p50": 61.51999905705452,
- "p90": 66.92100130021572,
- "p95": 70.36000117659569,
- "p99": 77.68099755048752
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3354624,
- "combineLogicalBytes": 3354624,
- "fanoutMean": 3.65625,
- "recvTokensMax": 64,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 41.839998215436935,
- "p90": 44.920001178979874,
- "p95": 46.92000150680542,
- "p99": 50.1599982380867
- },
- "combine": {
- "p50": 24.481000378727913,
- "p90": 27.720000594854355,
- "p95": 30.561000108718872,
- "p99": 59.321001172065735
- },
- "roundtrip": {
- "p50": 66.23999774456024,
- "p90": 69.36100125312805,
- "p95": 70.47999650239944,
- "p99": 73.36000353097916
- },
- "isolatedSum": {
- "p50": 66.32099859416485,
- "p90": 72.64000177383423,
- "p95": 77.48100161552429,
- "p99": 109.48099941015244
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 6537216,
- "combineLogicalBytes": 6537216,
- "fanoutMean": 3.5625,
- "recvTokensMax": 127,
- "stragglerRank": 3,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-47886ba2",
- "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||5c00b1a0c13aa3e",
- "colorKey": "mi355x_6fd30e97",
- "comparisonKey": "41d88b5d4da0110a",
- "schemaVersion": 3,
- "generatedAt": "2026-06-27T00:00:43.491121+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "mi355x-amds_03",
- "sku": "mi355x",
- "backend": "mori",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "mi355x-xgmi",
- "transport": "xgmi",
- "worldSize": 8,
- "epSize": 8,
- "label": "MI355X EP8 · mori · bf16 · zipf-heavy",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "zipf-heavy",
- "routingLabel": "zipf-heavy",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.3125,
- "configuredUnits": 80,
- "deviceUnits": 256,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 2,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "5c00b1a0c13aa3e",
- "workloadId": "set:5:6b84350720aa8233",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271916622",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271916622",
- "createdAt": "2026-06-27T00:00:43.491121+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 38.32000121474266,
- "p90": 40.28100147843361,
- "p95": 41.600000113248825,
- "p99": 46.31999880075455
- },
- "combine": {
- "p50": 15.720000490546227,
- "p90": 17.03999936580658,
- "p95": 18.640000373125076,
- "p99": 20.800000056624413
- },
- "roundtrip": {
- "p50": 51.16099864244461,
- "p90": 53.55999991297722,
- "p95": 54.96000126004219,
- "p99": 57.760998606681824
- },
- "isolatedSum": {
- "p50": 54.04000170528889,
- "p90": 57.32100084424019,
- "p95": 60.2400004863739,
- "p99": 67.11999885737896
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 172032,
- "combineLogicalBytes": 172032,
- "fanoutMean": 1.5,
- "recvTokensMax": 8,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 40.47999903559685,
- "p90": 42.64099895954132,
- "p95": 44.47999969124794,
- "p99": 48.760998994112015
- },
- "combine": {
- "p50": 16.00099913775921,
- "p90": 17.160000279545784,
- "p95": 18.039999529719353,
- "p99": 20.800000056624413
- },
- "roundtrip": {
- "p50": 53.16000059247017,
- "p90": 56.07999861240387,
- "p95": 57.64099955558777,
- "p99": 60.08100137114525
- },
- "isolatedSum": {
- "p50": 56.480998173356056,
- "p90": 59.800999239087105,
- "p95": 62.51999922096729,
- "p99": 69.56099905073643
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 315392,
- "combineLogicalBytes": 315392,
- "fanoutMean": 1.375,
- "recvTokensMax": 16,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 41.200000792741776,
- "p90": 43.241001665592194,
- "p95": 44.52100023627281,
- "p99": 48.280999064445496
- },
- "combine": {
- "p50": 17.240000888705254,
- "p90": 18.519999459385872,
- "p95": 20.19999921321869,
- "p99": 22.5210003554821
- },
- "roundtrip": {
- "p50": 56.561000645160675,
- "p90": 59.241000562906265,
- "p95": 60.440998524427414,
- "p99": 64.4410029053688
- },
- "isolatedSum": {
- "p50": 58.44000168144703,
- "p90": 61.761001124978065,
- "p95": 64.7209994494915,
- "p99": 70.8019994199276
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 616448,
- "combineLogicalBytes": 616448,
- "fanoutMean": 1.34375,
- "recvTokensMax": 32,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 41.31999984383583,
- "p90": 43.28100010752678,
- "p95": 44.679999351501465,
- "p99": 46.480000019073486
- },
- "combine": {
- "p50": 18.8400000333786,
- "p90": 20.041000097990036,
- "p95": 21.240999922156334,
- "p99": 24.441000074148178
- },
- "roundtrip": {
- "p50": 58.761000633239746,
- "p90": 61.43999844789505,
- "p95": 63.1600022315979,
- "p99": 65.52000343799591
- },
- "isolatedSum": {
- "p50": 60.15999987721443,
- "p90": 63.322000205516815,
- "p95": 65.9209992736578,
- "p99": 70.92100009322166
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1376256,
- "combineLogicalBytes": 1376256,
- "fanoutMean": 1.5,
- "recvTokensMax": 64,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 41.40099883079529,
- "p90": 43.480001389980316,
- "p95": 44.440001249313354,
- "p99": 46.00000008940697
- },
- "combine": {
- "p50": 22.87999913096428,
- "p90": 24.6799997985363,
- "p95": 26.559999212622643,
- "p99": 29.40100058913231
- },
- "roundtrip": {
- "p50": 63.19999694824219,
- "p90": 65.76000154018402,
- "p95": 67.28000193834305,
- "p99": 69.64100152254105
- },
- "isolatedSum": {
- "p50": 64.28099796175957,
- "p90": 68.16000118851662,
- "p95": 71.000000461936,
- "p99": 75.40100067853928
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2781184,
- "combineLogicalBytes": 2781184,
- "fanoutMean": 1.515625,
- "recvTokensMax": 128,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-8d163d45",
- "identity": "mi355x|mori|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||d42040086b5de07",
- "colorKey": "mi355x_65e339f9",
- "comparisonKey": "2ba4cba3af48c2b3",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T23:39:01.384245+00:00",
- "status": "valid",
- "publicationStatus": "official",
- "runner": "mi355x-amds_07",
- "sku": "mi355x",
- "backend": "mori",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "tuned",
- "suite": "backend-default",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "mi355x-xgmi",
- "transport": "xgmi",
- "worldSize": 8,
- "epSize": 8,
- "label": "MI355X EP8 · mori · bf16 · zipf+eplb",
- "model": "DeepSeek-V3 (EPLB physical)",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 288,
- "routing": "zipf",
- "routingLabel": "zipf+eplb",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": true,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": null,
- "achievedFraction": 0.3125,
- "configuredUnits": 80,
- "deviceUnits": 256,
- "resourceClass": "backend-tuned",
- "conformanceClass": "backend-default",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 2,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "d42040086b5de07",
- "workloadId": "set:5:f5576e2b712d38c3",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": 4.875,
- "eplbImbalanceAfter": 1.0033482142857144,
- "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28271245352",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271245352",
- "createdAt": "2026-06-26T23:39:01.384245+00:00",
- "sha": "ee4ffe77871d0200cb4a78c96d3ae9f692e9af02"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 39.319999516010284,
- "p90": 42.11999848484993,
- "p95": 43.15999895334244,
- "p99": 46.52100056409836
- },
- "combine": {
- "p50": 15.399999916553497,
- "p90": 17.601000145077705,
- "p95": 18.75999942421913,
- "p99": 21.320000290870667
- },
- "roundtrip": {
- "p50": 54.23999950289726,
- "p90": 57.440001517534256,
- "p95": 58.921001851558685,
- "p99": 60.95999851822853
- },
- "isolatedSum": {
- "p50": 54.71999943256378,
- "p90": 59.720998629927635,
- "p95": 61.91999837756157,
- "p99": 67.84100085496902
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 587776,
- "combineLogicalBytes": 587776,
- "fanoutMean": 5.125,
- "recvTokensMax": 7,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 41.71999916434288,
- "p90": 44.84099894762039,
- "p95": 46.4400015771389,
- "p99": 49.15999993681908
- },
- "combine": {
- "p50": 15.599999576807022,
- "p90": 17.839999869465828,
- "p95": 19.88000050187111,
- "p99": 22.5600004196167
- },
- "roundtrip": {
- "p50": 57.08099901676178,
- "p90": 60.67999824881554,
- "p95": 61.59999966621399,
- "p99": 63.48100304603577
- },
- "isolatedSum": {
- "p50": 57.3199987411499,
- "p90": 62.68099881708622,
- "p95": 66.32000207901001,
- "p99": 71.72000035643578
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1161216,
- "combineLogicalBytes": 1161216,
- "fanoutMean": 5.0625,
- "recvTokensMax": 13,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 41.88000038266182,
- "p90": 44.08099874854088,
- "p95": 45.120999217033386,
- "p99": 48.239998519420624
- },
- "combine": {
- "p50": 18.719999119639397,
- "p90": 21.04100026190281,
- "p95": 22.760000079870224,
- "p99": 26.760000735521317
- },
- "roundtrip": {
- "p50": 61.43999844789505,
- "p90": 64.43999707698822,
- "p95": 65.68100303411484,
- "p99": 67.87999719381332
- },
- "isolatedSum": {
- "p50": 60.599999502301216,
- "p90": 65.12199901044369,
- "p95": 67.88099929690361,
- "p99": 74.99999925494194
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2394112,
- "combineLogicalBytes": 2394112,
- "fanoutMean": 5.21875,
- "recvTokensMax": 23,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 41.839998215436935,
- "p90": 44.79999840259552,
- "p95": 46.23999819159508,
- "p99": 48.36000129580498
- },
- "combine": {
- "p50": 21.199999377131462,
- "p90": 22.95999974012375,
- "p95": 24.19999986886978,
- "p99": 26.040000841021538
- },
- "roundtrip": {
- "p50": 61.51999905705452,
- "p90": 64.92000073194504,
- "p95": 65.92000275850296,
- "p99": 68.08000057935715
- },
- "isolatedSum": {
- "p50": 63.0399975925684,
- "p90": 67.75999814271927,
- "p95": 70.43999806046486,
- "p99": 74.40000213682652
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4845568,
- "combineLogicalBytes": 4845568,
- "fanoutMean": 5.28125,
- "recvTokensMax": 45,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 42.319998145103455,
- "p90": 44.759999960660934,
- "p95": 46.28000035881996,
- "p99": 49.240998923778534
- },
- "combine": {
- "p50": 24.879999458789825,
- "p90": 27.079999446868896,
- "p95": 28.440000489354134,
- "p99": 56.88000097870827
- },
- "roundtrip": {
- "p50": 66.3599967956543,
- "p90": 69.95999813079834,
- "p95": 70.91999799013138,
- "p99": 73.00099730491638
- },
- "isolatedSum": {
- "p50": 67.19999760389328,
- "p90": 71.83999940752983,
- "p95": 74.7200008481741,
- "p99": 106.1209999024868
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9676800,
- "combineLogicalBytes": 9676800,
- "fanoutMean": 5.2734375,
- "recvTokensMax": 88,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-2d0599c0",
- "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|c774c8e4abb34da",
- "colorKey": "mi355x_2fa43515",
- "comparisonKey": "2796ed88af4b14b0",
- "schemaVersion": 3,
- "generatedAt": "2026-06-26T15:40:45.756534+00:00",
- "status": "valid",
- "publicationStatus": "diagnostic",
- "runner": "mi355x-amds_04",
- "sku": "mi355x",
- "backend": "mori",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "mi355x-xgmi",
- "transport": "xgmi",
- "worldSize": 8,
- "epSize": 8,
- "label": "MI355X EP8 · mori · bf16 (norm)",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": 0.3125,
- "configuredUnits": 80,
- "deviceUnits": 256,
- "resourceClass": "unknown",
- "conformanceClass": "minimum-functional",
- "fixedKernel": false,
- "paretoEligible": false
- },
- "placement": {
- "kind": "packed",
- "nodes": 2,
- "gpusPerNode": 8,
- "scaleUpDomain": 8
- },
- "routingConsistent": true,
- "traceSignature": "c774c8e4abb34da",
- "workloadId": "set:5:d8d49658059863f2",
- "workloadSource": "canonical-serialized",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2",
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28247575150",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28247575150",
- "createdAt": "2026-06-26T15:40:45.756534+00:00",
- "sha": "fd23d02b65dba6f1ed963342b188022fc27263d1"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 8,
- "dispatch": {
- "p50": 40.55999964475632,
- "p90": 43.15999895334244,
- "p95": 44.881001114845276,
- "p99": 47.55999892950058
- },
- "combine": {
- "p50": 16.119999811053276,
- "p90": 18.719999119639397,
- "p95": 19.840000197291374,
- "p99": 22.520000115036964
- },
- "roundtrip": {
- "p50": 56.040000170469284,
- "p90": 59.20000001788139,
- "p95": 60.80099940299988,
- "p99": 63.120998442173004
- },
- "isolatedSum": {
- "p50": 56.67999945580959,
- "p90": 61.879998072981834,
- "p95": 64.72100131213665,
- "p99": 70.07999904453754
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 630784,
- "combineLogicalBytes": 630784,
- "fanoutMean": 5.5,
- "recvTokensMax": 7,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 16,
- "dispatch": {
- "p50": 42.55999997258186,
- "p90": 45.441001653671265,
- "p95": 47.040000557899475,
- "p99": 49.959998577833176
- },
- "combine": {
- "p50": 16.16000011563301,
- "p90": 18.360000103712082,
- "p95": 19.600000232458115,
- "p99": 22.63999916613102
- },
- "roundtrip": {
- "p50": 58.83999913930893,
- "p90": 61.88099831342697,
- "p95": 63.48100304603577,
- "p99": 65.40100276470184
- },
- "isolatedSum": {
- "p50": 58.720000088214874,
- "p90": 63.80100175738335,
- "p95": 66.64000079035759,
- "p99": 72.5999977439642
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1232896,
- "combineLogicalBytes": 1232896,
- "fanoutMean": 5.375,
- "recvTokensMax": 13,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 32,
- "dispatch": {
- "p50": 42.160000652074814,
- "p90": 44.840000569820404,
- "p95": 46.28000035881996,
- "p99": 49.84100162982941
- },
- "combine": {
- "p50": 19.039999693632126,
- "p90": 22.1599992364645,
- "p95": 23.48100021481514,
- "p99": 54.63999882340431
- },
- "roundtrip": {
- "p50": 61.59999966621399,
- "p90": 64.71999734640121,
- "p95": 65.76000154018402,
- "p99": 68.36000084877014
- },
- "isolatedSum": {
- "p50": 61.20000034570694,
- "p90": 66.9999998062849,
- "p95": 69.7610005736351,
- "p99": 104.48100045323372
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 2480128,
- "combineLogicalBytes": 2480128,
- "fanoutMean": 5.40625,
- "recvTokensMax": 29,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 64,
- "dispatch": {
- "p50": 42.52000153064728,
- "p90": 45.1200008392334,
- "p95": 46.080999076366425,
- "p99": 48.8400012254715
- },
- "combine": {
- "p50": 20.479999482631683,
- "p90": 22.520000115036964,
- "p95": 23.479999974370003,
- "p99": 25.800000876188278
- },
- "roundtrip": {
- "p50": 62.67999857664108,
- "p90": 65.5599981546402,
- "p95": 66.880002617836,
- "p99": 68.56100261211395
- },
- "isolatedSum": {
- "p50": 63.00000101327896,
- "p90": 67.64000095427036,
- "p95": 69.56099905073643,
- "p99": 74.64000210165977
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 4974592,
- "combineLogicalBytes": 4974592,
- "fanoutMean": 5.421875,
- "recvTokensMax": 47,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- },
- {
- "tokensPerRank": 16,
- "globalTokens": 128,
- "dispatch": {
- "p50": 42.67999902367592,
- "p90": 45.27999833226204,
- "p95": 46.799998730421066,
- "p99": 49.720000475645065
- },
- "combine": {
- "p50": 24.921000003814697,
- "p90": 27.240000665187836,
- "p95": 28.07999961078167,
- "p99": 30.27999959886074
- },
- "roundtrip": {
- "p50": 67.9209977388382,
- "p90": 71.04100286960602,
- "p95": 72.12000340223312,
- "p99": 74.08100366592407
- },
- "isolatedSum": {
- "p50": 67.60099902749062,
- "p90": 72.51999899744987,
- "p95": 74.87999834120274,
- "p99": 80.0000000745058
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 9920512,
- "combineLogicalBytes": 9920512,
- "fanoutMean": 5.40625,
- "recvTokensMax": 92,
- "stragglerRank": 0,
- "correct": true,
- "samplesPooled": 600,
- "trials": 3
- }
- ]
- },
- {
- "id": "cx-cd519ebd",
- "identity": "mi355x|nccl-ep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|16|decode|normal|none|none|0|normalized|0.18|1a4734625a379e3",
- "colorKey": "mi355x_1180f01d",
- "comparisonKey": "919b62d5ead26bb1",
- "schemaVersion": 3,
- "generatedAt": "2026-06-28T17:32:29.450290+00:00",
- "status": "valid",
- "publicationStatus": "comparable-experimental",
- "runner": "mi355x-amds_04",
- "sku": "mi355x",
- "backend": "nccl-ep",
- "phase": "decode",
- "mode": "normal",
- "resourceMode": "normalized",
- "suite": "resource-constrained",
- "comparisonClass": "standardized",
- "measurementContract": "layout-and-dispatch-v1",
- "topologyClass": "mi355x-multinode-rdma",
- "transport": "rdma",
- "worldSize": 16,
- "epSize": 16,
- "label": "MI355X EP16 · nccl-ep · bf16 (norm)",
- "model": "DeepSeek-V3/V4",
- "shape": {
- "hidden": 7168,
- "topk": 8,
- "experts": 256,
- "routing": "uniform",
- "routingLabel": "uniform",
- "routingStep": 0,
- "unevenTokens": "none",
- "eplbEnabled": false,
- "dispatchDtype": "bf16",
- "activationProfile": "normal",
- "combineQuantMode": "none"
- },
- "resourceProfile": {
- "requestedFraction": 0.18,
- "achievedFraction": null,
- "configuredUnits": null,
- "deviceUnits": 256,
- "resourceClass": "resource-constrained",
- "conformanceClass": "resource-conforming",
- "fixedKernel": false,
- "paretoEligible": true
- },
- "placement": {
- "kind": "packed",
- "nodes": 2,
- "gpusPerNode": 16,
- "scaleUpDomain": 16
- },
- "routingConsistent": true,
- "traceSignature": "1a4734625a379e3",
- "workloadId": null,
- "workloadSource": "seeded-runtime",
- "eplbImbalanceBefore": null,
- "eplbImbalanceAfter": null,
- "backendVersion": null,
- "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975",
- "repository": "SemiAnalysisAI/InferenceX",
- "run": {
- "id": "28328718973",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28328718973",
- "createdAt": "2026-06-28T17:32:29.450290+00:00",
- "sha": "41135333c6788fca7a4051185dfbb3a850649ed5"
- },
- "rows": [
- {
- "tokensPerRank": 1,
- "globalTokens": 16,
- "dispatch": {
- "p50": 360.44201254844666,
- "p90": 381.12300634384155,
- "p95": 1227.2510528564453,
- "p99": 1227.2510528564453
- },
- "combine": {
- "p50": 120.64100056886673,
- "p90": 125.08100271224976,
- "p95": 169.5210039615631,
- "p99": 169.5210039615631
- },
- "roundtrip": {
- "p50": 445.8029866218567,
- "p90": 475.7640063762665,
- "p95": 482.00398683547974,
- "p99": 482.00398683547974
- },
- "isolatedSum": {
- "p50": 481.0830131173134,
- "p90": 506.2040090560913,
- "p95": 1396.7720568180084,
- "p99": 1396.7720568180084
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 1505280,
- "combineLogicalBytes": 1505280,
- "fanoutMean": 6.5625,
- "recvTokensMax": 12,
- "stragglerRank": 15,
- "correct": true,
- "samplesPooled": 8,
- "trials": 1
- },
- {
- "tokensPerRank": 2,
- "globalTokens": 32,
- "dispatch": {
- "p50": 345.1229929924011,
- "p90": 452.7229964733124,
- "p95": 493.32401156425476,
- "p99": 493.32401156425476
- },
- "combine": {
- "p50": 124.20099973678589,
- "p90": 149.48099851608276,
- "p95": 168.08100044727325,
- "p99": 168.08100044727325
- },
- "roundtrip": {
- "p50": 448.28298687934875,
- "p90": 470.24399042129517,
- "p95": 487.1650040149689,
- "p99": 487.1650040149689
- },
- "isolatedSum": {
- "p50": 469.323992729187,
- "p90": 602.2039949893951,
- "p95": 661.405012011528,
- "p99": 661.405012011528
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 3067904,
- "combineLogicalBytes": 3067904,
- "fanoutMean": 6.6875,
- "recvTokensMax": 24,
- "stragglerRank": 5,
- "correct": true,
- "samplesPooled": 8,
- "trials": 1
- },
- {
- "tokensPerRank": 4,
- "globalTokens": 64,
- "dispatch": {
- "p50": 352.80299186706543,
- "p90": 358.8019907474518,
- "p95": 375.04300475120544,
- "p99": 375.04300475120544
- },
- "combine": {
- "p50": 128.24100255966187,
- "p90": 134.04099643230438,
- "p95": 137.12100684642792,
- "p99": 137.12100684642792
- },
- "roundtrip": {
- "p50": 448.76399636268616,
- "p90": 456.76299929618835,
- "p95": 464.20300006866455,
- "p99": 464.20300006866455
- },
- "isolatedSum": {
- "p50": 481.0439944267273,
- "p90": 492.84298717975616,
- "p95": 512.1640115976334,
- "p99": 512.1640115976334
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 5992448,
- "combineLogicalBytes": 5992448,
- "fanoutMean": 6.53125,
- "recvTokensMax": 43,
- "stragglerRank": 1,
- "correct": true,
- "samplesPooled": 8,
- "trials": 1
- },
- {
- "tokensPerRank": 8,
- "globalTokens": 128,
- "dispatch": {
- "p50": 430.6829869747162,
- "p90": 1308.8120222091675,
- "p95": 1478.0919551849365,
- "p99": 1478.0919551849365
- },
- "combine": {
- "p50": 140.1209980249405,
- "p90": 159.64199602603912,
- "p95": 194.28199529647827,
- "p99": 194.28199529647827
- },
- "roundtrip": {
- "p50": 471.68299555778503,
- "p90": 499.44400787353516,
- "p95": 1358.8520288467407,
- "p99": 1358.8520288467407
- },
- "isolatedSum": {
- "p50": 570.8039849996567,
- "p90": 1468.4540182352066,
- "p95": 1672.3739504814148,
- "p99": 1672.3739504814148
- },
- "roundtripMeasured": true,
- "dispatchLogicalBytes": 12214272,
- "combineLogicalBytes": 12214272,
- "fanoutMean": 6.65625,
- "recvTokensMax": 84,
- "stragglerRank": 2,
- "correct": true,
- "samplesPooled": 8,
- "trials": 1
- }
- ]
- }
- ],
- "failures": [
- {
- "id": "cxf-6e691abd",
- "identity": "h100|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176",
- "generatedAt": "2026-06-26T17:32:59.549027+00:00",
- "publicationStatus": "diagnostic",
- "status": "valid",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "config": "fp8/ll/layout-and-dispatch",
- "reason": "anomaly:roundtrip_gt_isolated_sum",
- "returnCode": null,
- "run": {
- "id": "28254359089",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254359089",
- "createdAt": "2026-06-26T17:32:59.549027+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
- }
- },
- {
- "id": "cxf-25e7e895",
- "identity": "h100|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "generatedAt": "2026-06-26T23:49:09.827299+00:00",
- "publicationStatus": "diagnostic",
- "status": "valid",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "config": "fp8/ll/layout-and-dispatch",
- "reason": "anomaly:roundtrip_gt_isolated_sum",
- "returnCode": null,
- "run": {
- "id": "28271594334",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271594334",
- "createdAt": "2026-06-26T23:49:09.827299+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- }
- },
- {
- "id": "cxf-433580a5",
- "identity": "h100|deepep|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "generatedAt": "2026-06-26T23:49:16.484836+00:00",
- "publicationStatus": "diagnostic",
- "status": "valid",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "config": "fp8/ll/runtime-visible",
- "reason": "anomaly:roundtrip_gt_isolated_sum",
- "returnCode": null,
- "run": {
- "id": "28271598000",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271598000",
- "createdAt": "2026-06-26T23:49:16.484836+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- }
- },
- {
- "id": "cxf-bf8e2b86",
- "identity": "h100|uccl||||unknown|normal|unknown|unknown||decode|normal|none|none|0|tuned||",
- "generatedAt": "1970-01-01T00:00:00.000Z",
- "publicationStatus": "failed",
- "status": "failed",
- "sku": "h100",
- "backend": "uccl",
- "phase": "decode",
- "config": "unknown/normal/unknown",
- "reason": "unknown",
- "returnCode": 1,
- "run": {
- "id": null,
- "url": null,
- "createdAt": "1970-01-01T00:00:00.000Z",
- "sha": null
- }
- },
- {
- "id": "cxf-70961aef",
- "identity": "h200|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176",
- "generatedAt": "2026-06-26T17:31:08.227503+00:00",
- "publicationStatus": "diagnostic",
- "status": "valid",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "config": "fp8/ll/layout-and-dispatch",
- "reason": "anomaly:roundtrip_gt_isolated_sum",
- "returnCode": null,
- "run": {
- "id": "28254435010",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254435010",
- "createdAt": "2026-06-26T17:31:08.227503+00:00",
- "sha": "60dec7d70f554e252fec87709e2be52752947db1"
- }
- },
- {
- "id": "cxf-e15f2b54",
- "identity": "h200|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "generatedAt": "2026-06-26T23:51:34.222899+00:00",
- "publicationStatus": "diagnostic",
- "status": "valid",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "config": "fp8/ll/layout-and-dispatch",
- "reason": "anomaly:roundtrip_gt_isolated_sum",
- "returnCode": null,
- "run": {
- "id": "28271653486",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271653486",
- "createdAt": "2026-06-26T23:51:34.222899+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- }
- },
- {
- "id": "cxf-33a53f33",
- "identity": "h200|deepep|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176",
- "generatedAt": "2026-06-26T23:51:35.330044+00:00",
- "publicationStatus": "diagnostic",
- "status": "valid",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "config": "fp8/ll/runtime-visible",
- "reason": "anomaly:roundtrip_gt_isolated_sum",
- "returnCode": null,
- "run": {
- "id": "28271656517",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271656517",
- "createdAt": "2026-06-26T23:51:35.330044+00:00",
- "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13"
- }
- },
- {
- "id": "cxf-26d1baf4",
- "identity": "mi355x|mori|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|c774c8e4abb34da",
- "generatedAt": "2026-06-26T15:40:45.756534+00:00",
- "publicationStatus": "diagnostic",
- "status": "valid",
- "sku": "mi355x",
- "backend": "mori",
- "phase": "decode",
- "config": "bf16/normal/layout-and-dispatch",
- "reason": "resource-nonconforming",
- "returnCode": null,
- "run": {
- "id": "28247575150",
- "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28247575150",
- "createdAt": "2026-06-26T15:40:45.756534+00:00",
- "sha": "fd23d02b65dba6f1ed963342b188022fc27263d1"
- }
- }
- ],
- "summaryCards": [
- {
- "title": "Best backend · decode EP8",
- "value": "flashinfer · B300",
- "sub": "71 us RT p99 · mxfp8 · T=64"
- },
- {
- "title": "Best backend · prefill EP8",
- "value": "flashinfer · B300",
- "sub": "85 us RT p99 · nvfp4 · T=256"
- },
- {
- "title": "LL -> normal crossover",
- "value": "T~128 tok/rank",
- "sub": "H100 EP8 fp8 · normal RT p50 wins above this"
- },
- {
- "title": "Resource-normalized winner",
- "value": "deepep · H100",
- "sub": "113 us RT p99 · bf16 · T=64"
- },
- {
- "title": "Backend-default winner",
- "value": "flashinfer · B300",
- "sub": "71 us RT p99 · mxfp8 · T=64"
- },
- {
- "title": "Most unstable config",
- "value": "H100 · deepep decode",
- "sub": "3.27x p99 under zipf-heavy vs uniform",
- "warning": true
- },
- {
- "title": "Invalid / diagnostic cases",
- "value": "8",
- "sub": "see Evidence failed table",
- "warning": true,
- "href": "#tab-evidence"
- }
- ],
- "decision": {
- "budgetsUs": [100, 250, 500],
- "maxTokensUnderBudget": [
- {
- "id": "cxb-3f6620d0",
- "sku": "b300",
- "backend": "deepep",
- "phase": "decode",
- "dispatchDtype": "bf16",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": 128,
- "500": 128
- }
- },
- {
- "id": "cxb-c27e2cad",
- "sku": "b300",
- "backend": "deepep",
- "phase": "decode",
- "dispatchDtype": "fp8",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": 128,
- "500": 128
- }
- },
- {
- "id": "cxb-567c4192",
- "sku": "b300",
- "backend": "deepep",
- "phase": "decode",
- "dispatchDtype": "fp8-directcast",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": 128,
- "500": 128
- }
- },
- {
- "id": "cxb-10314900",
- "sku": "b300",
- "backend": "deepep",
- "phase": "decode",
- "dispatchDtype": "fp8-pertoken",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": 128,
- "500": 128
- }
- },
- {
- "id": "cxb-238797ce",
- "sku": "b300",
- "backend": "deepep",
- "phase": "prefill",
- "dispatchDtype": "bf16",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": 128,
- "500": 512
- }
- },
- {
- "id": "cxb-67e5feea",
- "sku": "b300",
- "backend": "deepep",
- "phase": "prefill",
- "dispatchDtype": "fp8",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": 128,
- "500": 256
- }
- },
- {
- "id": "cxb-7cddf11f",
- "sku": "b300",
- "backend": "deepep-hybrid",
- "phase": "decode",
- "dispatchDtype": "bf16",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": 32,
- "500": 128
- }
- },
- {
- "id": "cxb-4a0e300c",
- "sku": "b300",
- "backend": "deepep-hybrid",
- "phase": "prefill",
- "dispatchDtype": "bf16",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": null,
- "500": 512
- }
- },
- {
- "id": "cxb-6136a9d3",
- "sku": "b300",
- "backend": "flashinfer",
- "phase": "decode",
- "dispatchDtype": "bf16",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": 128,
- "250": 128,
- "500": 128
- }
- },
- {
- "id": "cxb-30070070",
- "sku": "b300",
- "backend": "flashinfer",
- "phase": "decode",
- "dispatchDtype": "fp8",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": 128,
- "250": 128,
- "500": 128
- }
- },
- {
- "id": "cxb-9a73b5f5",
- "sku": "b300",
- "backend": "flashinfer",
- "phase": "decode",
- "dispatchDtype": "mxfp8",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": 128,
- "250": 128,
- "500": 128
- }
- },
- {
- "id": "cxb-207d8ef2",
- "sku": "b300",
- "backend": "flashinfer",
- "phase": "prefill",
- "dispatchDtype": "bf16",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": 128,
- "250": 512,
- "500": 1024
- }
- },
- {
- "id": "cxb-ae942e6d",
- "sku": "b300",
- "backend": "flashinfer",
- "phase": "prefill",
- "dispatchDtype": "fp8",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": 256,
- "250": 1024,
- "500": 2048
- }
- },
- {
- "id": "cxb-dede56e2",
- "sku": "b300",
- "backend": "flashinfer",
- "phase": "prefill",
- "dispatchDtype": "mxfp8",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": 256,
- "250": 1024,
- "500": 2048
- }
- },
- {
- "id": "cxb-85dec801",
- "sku": "b300",
- "backend": "flashinfer",
- "phase": "prefill",
- "dispatchDtype": "nvfp4",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": 256,
- "250": 1024,
- "500": 2048
- }
- },
- {
- "id": "cxb-2fdde1de",
- "sku": "b300",
- "backend": "uccl",
- "phase": "decode",
- "dispatchDtype": "bf16",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": 128,
- "500": 128
- }
- },
- {
- "id": "cxb-8d828593",
- "sku": "b300",
- "backend": "uccl",
- "phase": "prefill",
- "dispatchDtype": "bf16",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": 128,
- "500": 512
- }
- },
- {
- "id": "cxb-7171c240",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "dispatchDtype": "bf16",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": 128,
- "500": 128
- }
- },
- {
- "id": "cxb-6f4d88a5",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "dispatchDtype": "bf16",
- "epSize": 8,
- "mode": "ll",
- "budgets": {
- "100": 32,
- "250": 128,
- "500": 128
- }
- },
- {
- "id": "cxb-416fcf7d",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "dispatchDtype": "fp8",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": 128,
- "500": 128
- }
- },
- {
- "id": "cxb-d35502c2",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "dispatchDtype": "fp8-directcast",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": 128,
- "500": 128
- }
- },
- {
- "id": "cxb-779ba710",
- "sku": "h100",
- "backend": "deepep",
- "phase": "decode",
- "dispatchDtype": "fp8-pertoken",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": 128,
- "500": 128
- }
- },
- {
- "id": "cxb-d524fd7e",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "dispatchDtype": "bf16",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": 128,
- "500": 512
- }
- },
- {
- "id": "cxb-bf310e7a",
- "sku": "h100",
- "backend": "deepep",
- "phase": "prefill",
- "dispatchDtype": "fp8",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": 128,
- "500": 256
- }
- },
- {
- "id": "cxb-0f748c2f",
- "sku": "h100",
- "backend": "deepep-hybrid",
- "phase": "decode",
- "dispatchDtype": "bf16",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": 2,
- "500": 128
- }
- },
- {
- "id": "cxb-402bdadc",
- "sku": "h100",
- "backend": "deepep-hybrid",
- "phase": "prefill",
- "dispatchDtype": "bf16",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": null,
- "500": 512
- }
- },
- {
- "id": "cxb-f1858975",
- "sku": "h100",
- "backend": "flashinfer",
- "phase": "decode",
- "dispatchDtype": "bf16",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": 128,
- "500": 128
- }
- },
- {
- "id": "cxb-236b5900",
- "sku": "h100",
- "backend": "flashinfer",
- "phase": "decode",
- "dispatchDtype": "fp8",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": 128,
- "500": 128
- }
- },
- {
- "id": "cxb-0d201725",
- "sku": "h100",
- "backend": "flashinfer",
- "phase": "decode",
- "dispatchDtype": "mxfp8",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": 128,
- "500": 128
- }
- },
- {
- "id": "cxb-6fee4962",
- "sku": "h100",
- "backend": "flashinfer",
- "phase": "prefill",
- "dispatchDtype": "bf16",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": 128,
- "250": 256,
- "500": 512
- }
- },
- {
- "id": "cxb-6d37a6fd",
- "sku": "h100",
- "backend": "flashinfer",
- "phase": "prefill",
- "dispatchDtype": "fp8",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": 512,
- "500": 1024
- }
- },
- {
- "id": "cxb-00728192",
- "sku": "h100",
- "backend": "flashinfer",
- "phase": "prefill",
- "dispatchDtype": "mxfp8",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": 512,
- "500": 1024
- }
- },
- {
- "id": "cxb-5657eb6e",
- "sku": "h100",
- "backend": "uccl",
- "phase": "decode",
- "dispatchDtype": "bf16",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": 1,
- "500": 128
- }
- },
- {
- "id": "cxb-8af55e63",
- "sku": "h100",
- "backend": "uccl",
- "phase": "prefill",
- "dispatchDtype": "bf16",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": null,
- "500": 512
- }
- },
- {
- "id": "cxb-a3bb3bd5",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "dispatchDtype": "bf16",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": 128,
- "500": 128
- }
- },
- {
- "id": "cxb-274a06b0",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "dispatchDtype": "bf16",
- "epSize": 8,
- "mode": "ll",
- "budgets": {
- "100": 32,
- "250": 128,
- "500": 128
- }
- },
- {
- "id": "cxb-1d12a6ce",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "dispatchDtype": "fp8",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": 32,
- "500": 128
- }
- },
- {
- "id": "cxb-858b05cb",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "dispatchDtype": "fp8-directcast",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": 32,
- "500": 128
- }
- },
- {
- "id": "cxb-339f09b5",
- "sku": "h200",
- "backend": "deepep",
- "phase": "decode",
- "dispatchDtype": "fp8-pertoken",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": null,
- "500": 128
- }
- },
- {
- "id": "cxb-bc48bfe5",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "dispatchDtype": "bf16",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": 128,
- "500": 512
- }
- },
- {
- "id": "cxb-e6cb64c3",
- "sku": "h200",
- "backend": "deepep",
- "phase": "prefill",
- "dispatchDtype": "fp8",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": null,
- "500": 256
- }
- },
- {
- "id": "cxb-d2620b3b",
- "sku": "h200",
- "backend": "uccl",
- "phase": "decode",
- "dispatchDtype": "bf16",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": 8,
- "500": 128
- }
- },
- {
- "id": "cxb-ec807828",
- "sku": "h200",
- "backend": "uccl",
- "phase": "prefill",
- "dispatchDtype": "bf16",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": null,
- "250": null,
- "500": 512
- }
- },
- {
- "id": "cxb-279043f8",
- "sku": "mi355x",
- "backend": "mori",
- "phase": "decode",
- "dispatchDtype": "bf16",
- "epSize": 8,
- "mode": "normal",
- "budgets": {
- "100": 16,
- "250": 16,
- "500": 16
- }
- }
- ],
- "recommendations": [
- {
- "id": "cxr-d2992d7c",
- "sku": "b300",
- "phase": "decode",
- "atTokensPerRank": 64,
- "lowestP99DispatchUs": 71.4,
- "config": "mxfp8/normal/layout-and-dispatch-v1/uniform/tuned",
- "epSize": 8
- },
- {
- "id": "cxr-1c3060b2",
- "sku": "b300",
- "phase": "prefill",
- "atTokensPerRank": 256,
- "lowestP99DispatchUs": 85,
- "config": "nvfp4/normal/layout-and-dispatch-v1/uniform/tuned",
- "epSize": 8
- },
- {
- "id": "cxr-8fcf986c",
- "sku": "h100",
- "phase": "decode",
- "atTokensPerRank": 64,
- "lowestP99DispatchUs": 53.1,
- "config": "fp8/ll/layout-and-dispatch-v1/uniform/normalized",
- "epSize": 8
- },
- {
- "id": "cxr-466c0bc2",
- "sku": "h100",
- "phase": "prefill",
- "atTokensPerRank": 256,
- "lowestP99DispatchUs": 104.6,
- "config": "fp8/normal/cached-layout-comm-only-v1/uniform/tuned",
- "epSize": 8
- },
- {
- "id": "cxr-c2fe14a3",
- "sku": "h200",
- "phase": "decode",
- "atTokensPerRank": 64,
- "lowestP99DispatchUs": 62.1,
- "config": "fp8/ll/layout-and-dispatch-v1/uniform/normalized",
- "epSize": 8
- },
- {
- "id": "cxr-7e4f951f",
- "sku": "h200",
- "phase": "prefill",
- "atTokensPerRank": 256,
- "lowestP99DispatchUs": 124.6,
- "config": "fp8/normal/cached-layout-comm-only-v1/uniform/normalized",
- "epSize": 8
- }
- ],
- "llCrossover": [
- {
- "sku": "h100",
- "ep": 8,
- "dtype": "bf16",
- "stat": "p50",
- "basis": "measured-roundtrip",
- "normal_faster_at_T": "never-in-range"
- },
- {
- "sku": "h100",
- "ep": 8,
- "dtype": "bf16",
- "stat": "p99",
- "basis": "measured-roundtrip",
- "normal_faster_at_T": "never-in-range"
- },
- {
- "sku": "h100",
- "ep": 8,
- "dtype": "bf16",
- "stat": "p50",
- "basis": "measured-roundtrip",
- "normal_faster_at_T": "never-in-range"
- },
- {
- "sku": "h100",
- "ep": 8,
- "dtype": "bf16",
- "stat": "p99",
- "basis": "measured-roundtrip",
- "normal_faster_at_T": "never-in-range"
- },
- {
- "sku": "h100",
- "ep": 8,
- "dtype": "bf16",
- "stat": "p50",
- "basis": "measured-roundtrip",
- "normal_faster_at_T": "never-in-range"
- },
- {
- "sku": "h100",
- "ep": 8,
- "dtype": "bf16",
- "stat": "p99",
- "basis": "measured-roundtrip",
- "normal_faster_at_T": "never-in-range"
- },
- {
- "sku": "h100",
- "ep": 8,
- "dtype": "fp8",
- "stat": "p50",
- "basis": "measured-roundtrip",
- "normal_faster_at_T": 128
- },
- {
- "sku": "h100",
- "ep": 8,
- "dtype": "fp8",
- "stat": "p99",
- "basis": "measured-roundtrip",
- "normal_faster_at_T": 128
- },
- {
- "sku": "h100",
- "ep": 8,
- "dtype": "fp8",
- "stat": "p50",
- "basis": "measured-roundtrip",
- "normal_faster_at_T": 128
- },
- {
- "sku": "h100",
- "ep": 8,
- "dtype": "fp8",
- "stat": "p99",
- "basis": "measured-roundtrip",
- "normal_faster_at_T": 128
- },
- {
- "sku": "h100",
- "ep": 8,
- "dtype": "fp8",
- "stat": "p50",
- "basis": "measured-roundtrip",
- "normal_faster_at_T": 128
- },
- {
- "sku": "h100",
- "ep": 8,
- "dtype": "fp8",
- "stat": "p99",
- "basis": "measured-roundtrip",
- "normal_faster_at_T": 128
- },
- {
- "sku": "h200",
- "ep": 8,
- "dtype": "bf16",
- "stat": "p50",
- "basis": "measured-roundtrip",
- "normal_faster_at_T": "never-in-range"
- },
- {
- "sku": "h200",
- "ep": 8,
- "dtype": "bf16",
- "stat": "p99",
- "basis": "measured-roundtrip",
- "normal_faster_at_T": "never-in-range"
- },
- {
- "sku": "h200",
- "ep": 8,
- "dtype": "bf16",
- "stat": "p50",
- "basis": "measured-roundtrip",
- "normal_faster_at_T": "never-in-range"
- },
- {
- "sku": "h200",
- "ep": 8,
- "dtype": "bf16",
- "stat": "p99",
- "basis": "measured-roundtrip",
- "normal_faster_at_T": "never-in-range"
- },
- {
- "sku": "h200",
- "ep": 8,
- "dtype": "bf16",
- "stat": "p50",
- "basis": "measured-roundtrip",
- "normal_faster_at_T": "never-in-range"
- },
- {
- "sku": "h200",
- "ep": 8,
- "dtype": "bf16",
- "stat": "p99",
- "basis": "measured-roundtrip",
- "normal_faster_at_T": "never-in-range"
- },
- {
- "sku": "h200",
- "ep": 8,
- "dtype": "fp8",
- "stat": "p50",
- "basis": "measured-roundtrip",
- "normal_faster_at_T": 128
- },
- {
- "sku": "h200",
- "ep": 8,
- "dtype": "fp8",
- "stat": "p99",
- "basis": "measured-roundtrip",
- "normal_faster_at_T": 128
- },
- {
- "sku": "h200",
- "ep": 8,
- "dtype": "fp8",
- "stat": "p50",
- "basis": "measured-roundtrip",
- "normal_faster_at_T": 128
- },
- {
- "sku": "h200",
- "ep": 8,
- "dtype": "fp8",
- "stat": "p99",
- "basis": "measured-roundtrip",
- "normal_faster_at_T": 128
- },
- {
- "sku": "h200",
- "ep": 8,
- "dtype": "fp8",
- "stat": "p50",
- "basis": "measured-roundtrip",
- "normal_faster_at_T": 128
- },
- {
- "sku": "h200",
- "ep": 8,
- "dtype": "fp8",
- "stat": "p99",
- "basis": "measured-roundtrip",
- "normal_faster_at_T": 128
- }
- ],
- "resourcePareto": [
- {
- "sku": "b300",
- "phase": "decode",
- "dtype": "bf16",
- "T": 1,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1351,
- "dispatch_p50": 81.2,
- "dispatch_p99": 93,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1824,
- "dispatch_p50": 57,
- "dispatch_p99": 73.4,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "b300",
- "phase": "decode",
- "dtype": "bf16",
- "T": 2,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1351,
- "dispatch_p50": 80.9,
- "dispatch_p99": 89.8,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1824,
- "dispatch_p50": 57,
- "dispatch_p99": 73.2,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "b300",
- "phase": "decode",
- "dtype": "bf16",
- "T": 4,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1351,
- "dispatch_p50": 81.4,
- "dispatch_p99": 107,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1824,
- "dispatch_p50": 57.8,
- "dispatch_p99": 68.1,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "b300",
- "phase": "decode",
- "dtype": "bf16",
- "T": 8,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1351,
- "dispatch_p50": 81.4,
- "dispatch_p99": 93.3,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1824,
- "dispatch_p50": 59.3,
- "dispatch_p99": 68.2,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "b300",
- "phase": "decode",
- "dtype": "bf16",
- "T": 16,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1351,
- "dispatch_p50": 82.8,
- "dispatch_p99": 97.4,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1824,
- "dispatch_p50": 62.8,
- "dispatch_p99": 76.7,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "b300",
- "phase": "decode",
- "dtype": "bf16",
- "T": 32,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1351,
- "dispatch_p50": 99.8,
- "dispatch_p99": 106.8,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1824,
- "dispatch_p50": 69.2,
- "dispatch_p99": 81.7,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "b300",
- "phase": "decode",
- "dtype": "bf16",
- "T": 64,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1351,
- "dispatch_p50": 111.1,
- "dispatch_p99": 119,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1824,
- "dispatch_p50": 82.5,
- "dispatch_p99": 99.5,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "b300",
- "phase": "decode",
- "dtype": "bf16",
- "T": 128,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1351,
- "dispatch_p50": 124.5,
- "dispatch_p99": 138.8,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1824,
- "dispatch_p50": 93.9,
- "dispatch_p99": 105,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "b300",
- "phase": "decode",
- "dtype": "fp8",
- "T": 1,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1351,
- "dispatch_p50": 56.6,
- "dispatch_p99": 67.4,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1824,
- "dispatch_p50": 56,
- "dispatch_p99": 69.6,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "b300",
- "phase": "decode",
- "dtype": "fp8",
- "T": 2,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1351,
- "dispatch_p50": 57.2,
- "dispatch_p99": 67.6,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1824,
- "dispatch_p50": 56.9,
- "dispatch_p99": 68.5,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "b300",
- "phase": "decode",
- "dtype": "fp8",
- "T": 4,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1351,
- "dispatch_p50": 59.2,
- "dispatch_p99": 68,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1824,
- "dispatch_p50": 57.6,
- "dispatch_p99": 67.2,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "b300",
- "phase": "decode",
- "dtype": "fp8",
- "T": 8,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1351,
- "dispatch_p50": 59.8,
- "dispatch_p99": 69.4,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1824,
- "dispatch_p50": 59.5,
- "dispatch_p99": 73.2,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "b300",
- "phase": "decode",
- "dtype": "fp8",
- "T": 16,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1351,
- "dispatch_p50": 61.2,
- "dispatch_p99": 85.9,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1824,
- "dispatch_p50": 60.7,
- "dispatch_p99": 69.8,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "b300",
- "phase": "decode",
- "dtype": "fp8",
- "T": 32,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1351,
- "dispatch_p50": 64,
- "dispatch_p99": 75.7,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1824,
- "dispatch_p50": 63.6,
- "dispatch_p99": 72.5,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "b300",
- "phase": "decode",
- "dtype": "fp8",
- "T": 64,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1351,
- "dispatch_p50": 72.4,
- "dispatch_p99": 84,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1824,
- "dispatch_p50": 72.6,
- "dispatch_p99": 82.7,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "b300",
- "phase": "decode",
- "dtype": "fp8",
- "T": 128,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1351,
- "dispatch_p50": 86.4,
- "dispatch_p99": 98.1,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1824,
- "dispatch_p50": 83.2,
- "dispatch_p99": 90.2,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "b300",
- "phase": "prefill",
- "dtype": "bf16",
- "T": 128,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1351,
- "dispatch_p50": 125.3,
- "dispatch_p99": 135.5,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1824,
- "dispatch_p50": 94.1,
- "dispatch_p99": 116.4,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "b300",
- "phase": "prefill",
- "dtype": "bf16",
- "T": 256,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1351,
- "dispatch_p50": 157.3,
- "dispatch_p99": 174.4,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1824,
- "dispatch_p50": 135.4,
- "dispatch_p99": 151,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "b300",
- "phase": "prefill",
- "dtype": "bf16",
- "T": 512,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1351,
- "dispatch_p50": 222.4,
- "dispatch_p99": 234.7,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1824,
- "dispatch_p50": 193.2,
- "dispatch_p99": 206.7,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "b300",
- "phase": "prefill",
- "dtype": "bf16",
- "T": 1024,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1351,
- "dispatch_p50": 346,
- "dispatch_p99": 360.8,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1824,
- "dispatch_p50": 326.2,
- "dispatch_p99": 341.6,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "b300",
- "phase": "prefill",
- "dtype": "bf16",
- "T": 2048,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1351,
- "dispatch_p50": 592,
- "dispatch_p99": 609.6,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1824,
- "dispatch_p50": 577.1,
- "dispatch_p99": 591.3,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "b300",
- "phase": "prefill",
- "dtype": "bf16",
- "T": 4096,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1351,
- "dispatch_p50": 1092.6,
- "dispatch_p99": 1123.6,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1824,
- "dispatch_p50": 1069.5,
- "dispatch_p99": 1090.9,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "b300",
- "phase": "prefill",
- "dtype": "fp8",
- "T": 128,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1351,
- "dispatch_p50": 86.3,
- "dispatch_p99": 98.3,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1824,
- "dispatch_p50": 83.5,
- "dispatch_p99": 102.1,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "b300",
- "phase": "prefill",
- "dtype": "fp8",
- "T": 256,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1351,
- "dispatch_p50": 118,
- "dispatch_p99": 129.9,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1824,
- "dispatch_p50": 116.7,
- "dispatch_p99": 135.4,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "b300",
- "phase": "prefill",
- "dtype": "fp8",
- "T": 512,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1351,
- "dispatch_p50": 176.9,
- "dispatch_p99": 189.6,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1824,
- "dispatch_p50": 177.8,
- "dispatch_p99": 191.1,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "b300",
- "phase": "prefill",
- "dtype": "fp8",
- "T": 1024,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1351,
- "dispatch_p50": 299.3,
- "dispatch_p99": 312.9,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1824,
- "dispatch_p50": 298,
- "dispatch_p99": 319.1,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "b300",
- "phase": "prefill",
- "dtype": "fp8",
- "T": 2048,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1351,
- "dispatch_p50": 535.1,
- "dispatch_p99": 553.6,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1824,
- "dispatch_p50": 541.5,
- "dispatch_p99": 557.8,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "b300",
- "phase": "prefill",
- "dtype": "fp8",
- "T": 4096,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1351,
- "dispatch_p50": 1012,
- "dispatch_p99": 1036.7,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1824,
- "dispatch_p50": 1019.6,
- "dispatch_p99": 1045.3,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h100",
- "phase": "decode",
- "dtype": "bf16",
- "T": 1,
- "n_points": 4,
- "curve": [
- {
- "achieved_fraction": 0.0985,
- "dispatch_p50": 97.2,
- "dispatch_p99": 111.1,
- "resource_class": "resource-constrained"
- },
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 127.7,
- "dispatch_p99": 143.9,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 95.7,
- "dispatch_p99": 109.4,
- "resource_class": "resource-constrained"
- },
- {
- "achieved_fraction": 0.5985,
- "dispatch_p50": 96.3,
- "dispatch_p99": 108.5,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h100",
- "phase": "decode",
- "dtype": "bf16",
- "T": 2,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 179.5,
- "dispatch_p99": 194.9,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 71.2,
- "dispatch_p99": 107.9,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h100",
- "phase": "decode",
- "dtype": "bf16",
- "T": 4,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 130.9,
- "dispatch_p99": 201,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 95.2,
- "dispatch_p99": 439.6,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h100",
- "phase": "decode",
- "dtype": "bf16",
- "T": 8,
- "n_points": 4,
- "curve": [
- {
- "achieved_fraction": 0.0985,
- "dispatch_p50": 99.3,
- "dispatch_p99": 113.5,
- "resource_class": "resource-constrained"
- },
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 133.1,
- "dispatch_p99": 479,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 95.4,
- "dispatch_p99": 113.5,
- "resource_class": "resource-constrained"
- },
- {
- "achieved_fraction": 0.5985,
- "dispatch_p50": 96.7,
- "dispatch_p99": 112.2,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h100",
- "phase": "decode",
- "dtype": "bf16",
- "T": 16,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 129.6,
- "dispatch_p99": 203.3,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 95.7,
- "dispatch_p99": 106.5,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h100",
- "phase": "decode",
- "dtype": "bf16",
- "T": 32,
- "n_points": 4,
- "curve": [
- {
- "achieved_fraction": 0.0985,
- "dispatch_p50": 103.3,
- "dispatch_p99": 121.4,
- "resource_class": "resource-constrained"
- },
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 181.8,
- "dispatch_p99": 324.5,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 80.9,
- "dispatch_p99": 113.2,
- "resource_class": "resource-constrained"
- },
- {
- "achieved_fraction": 0.5985,
- "dispatch_p50": 102.9,
- "dispatch_p99": 114.4,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h100",
- "phase": "decode",
- "dtype": "bf16",
- "T": 64,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 147.7,
- "dispatch_p99": 211.7,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 103.5,
- "dispatch_p99": 125.7,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h100",
- "phase": "decode",
- "dtype": "bf16",
- "T": 128,
- "n_points": 4,
- "curve": [
- {
- "achieved_fraction": 0.0985,
- "dispatch_p50": 129.7,
- "dispatch_p99": 143.9,
- "resource_class": "resource-constrained"
- },
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 186.2,
- "dispatch_p99": 208,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 111.3,
- "dispatch_p99": 139.9,
- "resource_class": "resource-constrained"
- },
- {
- "achieved_fraction": 0.5985,
- "dispatch_p50": 129.1,
- "dispatch_p99": 142.1,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h100",
- "phase": "decode",
- "dtype": "fp8",
- "T": 1,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 98.8,
- "dispatch_p99": 114.9,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 98,
- "dispatch_p99": 110.2,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h100",
- "phase": "decode",
- "dtype": "fp8",
- "T": 2,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 99.5,
- "dispatch_p99": 111.6,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 72.2,
- "dispatch_p99": 105.1,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h100",
- "phase": "decode",
- "dtype": "fp8",
- "T": 4,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 99.3,
- "dispatch_p99": 110.5,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 72.3,
- "dispatch_p99": 115.5,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h100",
- "phase": "decode",
- "dtype": "fp8",
- "T": 8,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 100.2,
- "dispatch_p99": 111.9,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 97.5,
- "dispatch_p99": 113.4,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h100",
- "phase": "decode",
- "dtype": "fp8",
- "T": 16,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 98.9,
- "dispatch_p99": 112.2,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 97.1,
- "dispatch_p99": 113.4,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h100",
- "phase": "decode",
- "dtype": "fp8",
- "T": 32,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 99.9,
- "dispatch_p99": 181.1,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 73.1,
- "dispatch_p99": 112.1,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h100",
- "phase": "decode",
- "dtype": "fp8",
- "T": 64,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 101.4,
- "dispatch_p99": 370.9,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 78.7,
- "dispatch_p99": 125.4,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h100",
- "phase": "decode",
- "dtype": "fp8",
- "T": 128,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 106.2,
- "dispatch_p99": 117.6,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 90.3,
- "dispatch_p99": 117.1,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h100",
- "phase": "prefill",
- "dtype": "bf16",
- "T": 128,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 161.1,
- "dispatch_p99": 170.8,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 110.5,
- "dispatch_p99": 166,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h100",
- "phase": "prefill",
- "dtype": "bf16",
- "T": 256,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 186.6,
- "dispatch_p99": 197.3,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 147.4,
- "dispatch_p99": 154.3,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h100",
- "phase": "prefill",
- "dtype": "bf16",
- "T": 512,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 239.6,
- "dispatch_p99": 250.6,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 204.9,
- "dispatch_p99": 226.4,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h100",
- "phase": "prefill",
- "dtype": "bf16",
- "T": 1024,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 346.9,
- "dispatch_p99": 358.1,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 319.9,
- "dispatch_p99": 330.6,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h100",
- "phase": "prefill",
- "dtype": "bf16",
- "T": 2048,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 571.1,
- "dispatch_p99": 621.2,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 570.9,
- "dispatch_p99": 593.9,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h100",
- "phase": "prefill",
- "dtype": "bf16",
- "T": 4096,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 1035.6,
- "dispatch_p99": 1074.5,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 1075.9,
- "dispatch_p99": 1102.5,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h100",
- "phase": "prefill",
- "dtype": "fp8",
- "T": 128,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 90,
- "dispatch_p99": 158.9,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 89.6,
- "dispatch_p99": 100.8,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h100",
- "phase": "prefill",
- "dtype": "fp8",
- "T": 256,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 107.5,
- "dispatch_p99": 170.3,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 106.2,
- "dispatch_p99": 125.9,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h100",
- "phase": "prefill",
- "dtype": "fp8",
- "T": 512,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 147,
- "dispatch_p99": 460.9,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 138.5,
- "dispatch_p99": 197.3,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h100",
- "phase": "prefill",
- "dtype": "fp8",
- "T": 1024,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 215.3,
- "dispatch_p99": 223.5,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 197.4,
- "dispatch_p99": 216.3,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h100",
- "phase": "prefill",
- "dtype": "fp8",
- "T": 2048,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 354.8,
- "dispatch_p99": 380.3,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 318.7,
- "dispatch_p99": 347.3,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h100",
- "phase": "prefill",
- "dtype": "fp8",
- "T": 4096,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 641.4,
- "dispatch_p99": 655.5,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 574.8,
- "dispatch_p99": 604.8,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h200",
- "phase": "decode",
- "dtype": "bf16",
- "T": 1,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 123.6,
- "dispatch_p99": 203.1,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 74.1,
- "dispatch_p99": 138,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h200",
- "phase": "decode",
- "dtype": "bf16",
- "T": 2,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 114.8,
- "dispatch_p99": 181,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 74.3,
- "dispatch_p99": 131.7,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h200",
- "phase": "decode",
- "dtype": "bf16",
- "T": 4,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 111.7,
- "dispatch_p99": 167.4,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 75,
- "dispatch_p99": 139.7,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h200",
- "phase": "decode",
- "dtype": "bf16",
- "T": 8,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 112.8,
- "dispatch_p99": 166.3,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 74.8,
- "dispatch_p99": 123.2,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h200",
- "phase": "decode",
- "dtype": "bf16",
- "T": 16,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 120.3,
- "dispatch_p99": 217,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 73.2,
- "dispatch_p99": 195.9,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h200",
- "phase": "decode",
- "dtype": "bf16",
- "T": 32,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 123.6,
- "dispatch_p99": 169.6,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 82.3,
- "dispatch_p99": 134.9,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h200",
- "phase": "decode",
- "dtype": "bf16",
- "T": 64,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 136,
- "dispatch_p99": 197.4,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 91.3,
- "dispatch_p99": 146.8,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h200",
- "phase": "decode",
- "dtype": "bf16",
- "T": 128,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 156.2,
- "dispatch_p99": 197.5,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 116,
- "dispatch_p99": 149.2,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h200",
- "phase": "decode",
- "dtype": "fp8",
- "T": 1,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 73,
- "dispatch_p99": 139.2,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 70.2,
- "dispatch_p99": 121.2,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h200",
- "phase": "decode",
- "dtype": "fp8",
- "T": 2,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 71.4,
- "dispatch_p99": 113.8,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 67.6,
- "dispatch_p99": 144.6,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h200",
- "phase": "decode",
- "dtype": "fp8",
- "T": 4,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 72.7,
- "dispatch_p99": 146.8,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 69.7,
- "dispatch_p99": 228.6,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h200",
- "phase": "decode",
- "dtype": "fp8",
- "T": 8,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 70.1,
- "dispatch_p99": 165.8,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 70.5,
- "dispatch_p99": 151.9,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h200",
- "phase": "decode",
- "dtype": "fp8",
- "T": 16,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 73.7,
- "dispatch_p99": 146.1,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 68.5,
- "dispatch_p99": 126.1,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h200",
- "phase": "decode",
- "dtype": "fp8",
- "T": 32,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 71.6,
- "dispatch_p99": 167.3,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 68.3,
- "dispatch_p99": 114.8,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h200",
- "phase": "decode",
- "dtype": "fp8",
- "T": 64,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 79.8,
- "dispatch_p99": 125.7,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 74.5,
- "dispatch_p99": 120.6,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h200",
- "phase": "decode",
- "dtype": "fp8",
- "T": 128,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 88.4,
- "dispatch_p99": 115.8,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 88.7,
- "dispatch_p99": 129,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h200",
- "phase": "prefill",
- "dtype": "bf16",
- "T": 128,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 159.7,
- "dispatch_p99": 266.7,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 116.4,
- "dispatch_p99": 160,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h200",
- "phase": "prefill",
- "dtype": "bf16",
- "T": 256,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 185.9,
- "dispatch_p99": 239.7,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 147,
- "dispatch_p99": 191.4,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h200",
- "phase": "prefill",
- "dtype": "bf16",
- "T": 512,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 239.3,
- "dispatch_p99": 267.2,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 207.7,
- "dispatch_p99": 267.9,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h200",
- "phase": "prefill",
- "dtype": "bf16",
- "T": 1024,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 351.7,
- "dispatch_p99": 419.2,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 324.9,
- "dispatch_p99": 364.7,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h200",
- "phase": "prefill",
- "dtype": "bf16",
- "T": 2048,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 584.7,
- "dispatch_p99": 635.1,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 566.4,
- "dispatch_p99": 609.2,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h200",
- "phase": "prefill",
- "dtype": "bf16",
- "T": 4096,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 1028.6,
- "dispatch_p99": 1135.3,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 1051.8,
- "dispatch_p99": 1161.4,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h200",
- "phase": "prefill",
- "dtype": "fp8",
- "T": 128,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 86.4,
- "dispatch_p99": 152.1,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 86.8,
- "dispatch_p99": 141.8,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h200",
- "phase": "prefill",
- "dtype": "fp8",
- "T": 256,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 104.4,
- "dispatch_p99": 141.7,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 103.2,
- "dispatch_p99": 155.1,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h200",
- "phase": "prefill",
- "dtype": "fp8",
- "T": 512,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 141.1,
- "dispatch_p99": 233.4,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 135.8,
- "dispatch_p99": 238,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h200",
- "phase": "prefill",
- "dtype": "fp8",
- "T": 1024,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 205.8,
- "dispatch_p99": 248.9,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 196.6,
- "dispatch_p99": 253.7,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h200",
- "phase": "prefill",
- "dtype": "fp8",
- "T": 2048,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 340.2,
- "dispatch_p99": 421.5,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 320.2,
- "dispatch_p99": 440,
- "resource_class": "resource-constrained"
- }
- ]
- },
- {
- "sku": "h200",
- "phase": "prefill",
- "dtype": "fp8",
- "T": 4096,
- "n_points": 2,
- "curve": [
- {
- "achieved_fraction": 0.1515,
- "dispatch_p50": 612.3,
- "dispatch_p99": 680.4,
- "resource_class": "backend-tuned"
- },
- {
- "achieved_fraction": 0.1818,
- "dispatch_p50": 572.4,
- "dispatch_p99": 629.7,
- "resource_class": "resource-constrained"
- }
- ]
- }
- ],
- "topologyPenalty": [
- {
- "sku": "h200",
- "phase": "decode",
- "dtype": "bf16",
- "T": 1,
- "ep8_p50": 123.6,
- "ep16_p50": 578.4,
- "penalty_pct": 367.9
- },
- {
- "sku": "h200",
- "phase": "decode",
- "dtype": "bf16",
- "T": 2,
- "ep8_p50": 114.8,
- "ep16_p50": 547.2,
- "penalty_pct": 376.5
- },
- {
- "sku": "h200",
- "phase": "decode",
- "dtype": "bf16",
- "T": 4,
- "ep8_p50": 111.7,
- "ep16_p50": 621.5,
- "penalty_pct": 456.5
- },
- {
- "sku": "h200",
- "phase": "decode",
- "dtype": "bf16",
- "T": 8,
- "ep8_p50": 112.8,
- "ep16_p50": 611.8,
- "penalty_pct": 442.2
- },
- {
- "sku": "h200",
- "phase": "decode",
- "dtype": "bf16",
- "T": 16,
- "ep8_p50": 120.3,
- "ep16_p50": 631.8,
- "penalty_pct": 425.3
- },
- {
- "sku": "h200",
- "phase": "decode",
- "dtype": "bf16",
- "T": 32,
- "ep8_p50": 123.6,
- "ep16_p50": 782.9,
- "penalty_pct": 533.5
- },
- {
- "sku": "h200",
- "phase": "decode",
- "dtype": "bf16",
- "T": 64,
- "ep8_p50": 136,
- "ep16_p50": 679.3,
- "penalty_pct": 399.5
- },
- {
- "sku": "h200",
- "phase": "decode",
- "dtype": "bf16",
- "T": 128,
- "ep8_p50": 156.2,
- "ep16_p50": 808.1,
- "penalty_pct": 417.5
- },
- {
- "sku": "mi355x",
- "phase": "decode",
- "dtype": "bf16",
- "T": 1,
- "ep8_p50": 40.6,
- "ep16_p50": 360.4,
- "penalty_pct": 788.7
- },
- {
- "sku": "mi355x",
- "phase": "decode",
- "dtype": "bf16",
- "T": 2,
- "ep8_p50": 42.6,
- "ep16_p50": 345.1,
- "penalty_pct": 710.9
- },
- {
- "sku": "mi355x",
- "phase": "decode",
- "dtype": "bf16",
- "T": 4,
- "ep8_p50": 42.2,
- "ep16_p50": 352.8,
- "penalty_pct": 736.8
- },
- {
- "sku": "mi355x",
- "phase": "decode",
- "dtype": "bf16",
- "T": 8,
- "ep8_p50": 42.5,
- "ep16_p50": 430.7,
- "penalty_pct": 912.9
- }
- ],
- "skewPenalty": [
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 1,
- "p50_amplification": 0.694,
- "p99_amplification": 0.867
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 2,
- "p50_amplification": 0.695,
- "p99_amplification": 0.811
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 4,
- "p50_amplification": 0.697,
- "p99_amplification": 0.683
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 8,
- "p50_amplification": 0.716,
- "p99_amplification": 0.76
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 16,
- "p50_amplification": 0.716,
- "p99_amplification": 0.881
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 32,
- "p50_amplification": 0.743,
- "p99_amplification": 0.837
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 64,
- "p50_amplification": 0.718,
- "p99_amplification": 0.756
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 128,
- "p50_amplification": 0.813,
- "p99_amplification": 0.898
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 1,
- "p50_amplification": 0.698,
- "p99_amplification": 0.753
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 8,
- "p50_amplification": 0.723,
- "p99_amplification": 0.798
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 32,
- "p50_amplification": 0.76,
- "p99_amplification": 0.82
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 128,
- "p50_amplification": 0.828,
- "p99_amplification": 0.816
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 1,
- "p50_amplification": 0.722,
- "p99_amplification": 0.819
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 2,
- "p50_amplification": 0.707,
- "p99_amplification": 0.777
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 4,
- "p50_amplification": 0.705,
- "p99_amplification": 0.634
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 8,
- "p50_amplification": 0.713,
- "p99_amplification": 0.806
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 16,
- "p50_amplification": 0.709,
- "p99_amplification": 0.817
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 32,
- "p50_amplification": 0.705,
- "p99_amplification": 0.887
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 64,
- "p50_amplification": 0.707,
- "p99_amplification": 0.736
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 128,
- "p50_amplification": 0.736,
- "p99_amplification": 0.729
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 1,
- "p50_amplification": 0.699,
- "p99_amplification": 0.752
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 8,
- "p50_amplification": 0.699,
- "p99_amplification": 0.708
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 32,
- "p50_amplification": 0.68,
- "p99_amplification": 0.77
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 128,
- "p50_amplification": 0.737,
- "p99_amplification": 0.823
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy+eplb",
- "T": 1,
- "p50_amplification": 21.723,
- "p99_amplification": 36.695
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy+eplb",
- "T": 2,
- "p50_amplification": 21.728,
- "p99_amplification": 38.053
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy+eplb",
- "T": 4,
- "p50_amplification": 21.606,
- "p99_amplification": 31.919
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy+eplb",
- "T": 8,
- "p50_amplification": 21.676,
- "p99_amplification": 57.264
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy+eplb",
- "T": 16,
- "p50_amplification": 21.27,
- "p99_amplification": 35.187
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy+eplb",
- "T": 32,
- "p50_amplification": 17.906,
- "p99_amplification": 53.04
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy+eplb",
- "T": 64,
- "p50_amplification": 16.017,
- "p99_amplification": 28.424
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy+eplb",
- "T": 128,
- "p50_amplification": 14.456,
- "p99_amplification": 24.57
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-mild",
- "T": 1,
- "p50_amplification": 0.707,
- "p99_amplification": 0.797
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-mild",
- "T": 2,
- "p50_amplification": 0.712,
- "p99_amplification": 0.752
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-mild",
- "T": 4,
- "p50_amplification": 0.722,
- "p99_amplification": 0.662
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-mild",
- "T": 8,
- "p50_amplification": 0.739,
- "p99_amplification": 0.785
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-mild",
- "T": 16,
- "p50_amplification": 0.767,
- "p99_amplification": 0.905
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-mild",
- "T": 32,
- "p50_amplification": 0.708,
- "p99_amplification": 0.772
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-mild",
- "T": 64,
- "p50_amplification": 0.788,
- "p99_amplification": 0.832
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-mild",
- "T": 128,
- "p50_amplification": 0.833,
- "p99_amplification": 0.85
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-mild+eplb",
- "T": 1,
- "p50_amplification": 0.685,
- "p99_amplification": 0.747
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-mild+eplb",
- "T": 2,
- "p50_amplification": 0.69,
- "p99_amplification": 0.712
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-mild+eplb",
- "T": 4,
- "p50_amplification": 0.694,
- "p99_amplification": 0.609
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-mild+eplb",
- "T": 8,
- "p50_amplification": 0.715,
- "p99_amplification": 0.804
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-mild+eplb",
- "T": 16,
- "p50_amplification": 0.722,
- "p99_amplification": 0.739
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-mild+eplb",
- "T": 32,
- "p50_amplification": 0.681,
- "p99_amplification": 0.713
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-mild+eplb",
- "T": 64,
- "p50_amplification": 0.777,
- "p99_amplification": 0.867
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-mild+eplb",
- "T": 128,
- "p50_amplification": 0.744,
- "p99_amplification": 0.791
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-moderate",
- "T": 1,
- "p50_amplification": 0.697,
- "p99_amplification": 0.741
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-moderate",
- "T": 2,
- "p50_amplification": 0.703,
- "p99_amplification": 0.718
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-moderate",
- "T": 4,
- "p50_amplification": 0.717,
- "p99_amplification": 0.623
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-moderate",
- "T": 8,
- "p50_amplification": 0.721,
- "p99_amplification": 0.745
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-moderate",
- "T": 16,
- "p50_amplification": 0.723,
- "p99_amplification": 0.868
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-moderate",
- "T": 32,
- "p50_amplification": 0.746,
- "p99_amplification": 0.763
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-moderate",
- "T": 64,
- "p50_amplification": 0.716,
- "p99_amplification": 0.866
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-moderate",
- "T": 128,
- "p50_amplification": 0.823,
- "p99_amplification": 0.912
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-moderate+eplb",
- "T": 1,
- "p50_amplification": 0.712,
- "p99_amplification": 0.809
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-moderate+eplb",
- "T": 2,
- "p50_amplification": 0.716,
- "p99_amplification": 0.706
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-moderate+eplb",
- "T": 4,
- "p50_amplification": 0.734,
- "p99_amplification": 0.686
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-moderate+eplb",
- "T": 8,
- "p50_amplification": 0.74,
- "p99_amplification": 0.87
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-moderate+eplb",
- "T": 16,
- "p50_amplification": 0.796,
- "p99_amplification": 0.781
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-moderate+eplb",
- "T": 32,
- "p50_amplification": 0.702,
- "p99_amplification": 0.751
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-moderate+eplb",
- "T": 64,
- "p50_amplification": 0.796,
- "p99_amplification": 0.801
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-moderate+eplb",
- "T": 128,
- "p50_amplification": 0.762,
- "p99_amplification": 0.77
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 1,
- "p50_amplification": 0.714,
- "p99_amplification": 0.778
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 2,
- "p50_amplification": 0.72,
- "p99_amplification": 0.825
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 4,
- "p50_amplification": 0.757,
- "p99_amplification": 0.868
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 8,
- "p50_amplification": 0.741,
- "p99_amplification": 0.849
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 16,
- "p50_amplification": 0.806,
- "p99_amplification": 0.813
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 32,
- "p50_amplification": 0.703,
- "p99_amplification": 0.776
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 64,
- "p50_amplification": 0.807,
- "p99_amplification": 0.86
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 128,
- "p50_amplification": 0.763,
- "p99_amplification": 0.785
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 1,
- "p50_amplification": 0.688,
- "p99_amplification": 0.915
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 2,
- "p50_amplification": 0.69,
- "p99_amplification": 0.807
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 4,
- "p50_amplification": 0.705,
- "p99_amplification": 0.895
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 8,
- "p50_amplification": 0.721,
- "p99_amplification": 0.745
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 16,
- "p50_amplification": 0.718,
- "p99_amplification": 0.756
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 32,
- "p50_amplification": 0.746,
- "p99_amplification": 0.765
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 64,
- "p50_amplification": 0.722,
- "p99_amplification": 0.759
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 128,
- "p50_amplification": 0.817,
- "p99_amplification": 0.83
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 1,
- "p50_amplification": 0.7,
- "p99_amplification": 0.781
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 2,
- "p50_amplification": 0.712,
- "p99_amplification": 0.796
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 4,
- "p50_amplification": 0.707,
- "p99_amplification": 0.634
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 8,
- "p50_amplification": 0.723,
- "p99_amplification": 0.838
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 16,
- "p50_amplification": 0.731,
- "p99_amplification": 0.855
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 32,
- "p50_amplification": 0.694,
- "p99_amplification": 0.779
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 64,
- "p50_amplification": 0.791,
- "p99_amplification": 0.841
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 128,
- "p50_amplification": 0.755,
- "p99_amplification": 0.76
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 128,
- "p50_amplification": 0.829,
- "p99_amplification": 0.934
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 512,
- "p50_amplification": 0.921,
- "p99_amplification": 0.942
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 2048,
- "p50_amplification": 1.095,
- "p99_amplification": 1.103
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 128,
- "p50_amplification": 0.8,
- "p99_amplification": 0.839
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 256,
- "p50_amplification": 0.829,
- "p99_amplification": 0.848
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 512,
- "p50_amplification": 0.931,
- "p99_amplification": 0.942
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 1024,
- "p50_amplification": 1.005,
- "p99_amplification": 1.01
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 2048,
- "p50_amplification": 1.083,
- "p99_amplification": 1.227
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 4096,
- "p50_amplification": 1.146,
- "p99_amplification": 1.14
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy",
- "T": 128,
- "p50_amplification": 0.737,
- "p99_amplification": 0.836
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy",
- "T": 512,
- "p50_amplification": 0.81,
- "p99_amplification": 0.873
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy",
- "T": 2048,
- "p50_amplification": 0.963,
- "p99_amplification": 1.019
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy",
- "T": 128,
- "p50_amplification": 0.746,
- "p99_amplification": 0.975
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy",
- "T": 256,
- "p50_amplification": 0.811,
- "p99_amplification": 0.829
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy",
- "T": 512,
- "p50_amplification": 0.839,
- "p99_amplification": 0.905
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy",
- "T": 1024,
- "p50_amplification": 0.9,
- "p99_amplification": 0.935
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy",
- "T": 2048,
- "p50_amplification": 0.96,
- "p99_amplification": 1.02
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy",
- "T": 4096,
- "p50_amplification": 1.018,
- "p99_amplification": 1.098
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy+eplb",
- "T": 128,
- "p50_amplification": 0.753,
- "p99_amplification": 0.786
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy+eplb",
- "T": 256,
- "p50_amplification": 0.844,
- "p99_amplification": 0.875
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy+eplb",
- "T": 512,
- "p50_amplification": 0.866,
- "p99_amplification": 0.913
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy+eplb",
- "T": 1024,
- "p50_amplification": 0.945,
- "p99_amplification": 1.093
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy+eplb",
- "T": 2048,
- "p50_amplification": 0.983,
- "p99_amplification": 1.102
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy+eplb",
- "T": 4096,
- "p50_amplification": 0.993,
- "p99_amplification": 0.991
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-mild",
- "T": 128,
- "p50_amplification": 0.82,
- "p99_amplification": 0.813
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-mild",
- "T": 256,
- "p50_amplification": 0.889,
- "p99_amplification": 0.876
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-mild",
- "T": 512,
- "p50_amplification": 0.961,
- "p99_amplification": 0.957
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-mild",
- "T": 1024,
- "p50_amplification": 1.028,
- "p99_amplification": 1.021
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-mild",
- "T": 2048,
- "p50_amplification": 1.113,
- "p99_amplification": 1.115
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-mild",
- "T": 4096,
- "p50_amplification": 1.176,
- "p99_amplification": 1.271
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-mild+eplb",
- "T": 128,
- "p50_amplification": 0.758,
- "p99_amplification": 0.804
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-mild+eplb",
- "T": 256,
- "p50_amplification": 0.841,
- "p99_amplification": 0.85
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-mild+eplb",
- "T": 512,
- "p50_amplification": 0.871,
- "p99_amplification": 1.015
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-mild+eplb",
- "T": 1024,
- "p50_amplification": 0.943,
- "p99_amplification": 0.968
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-mild+eplb",
- "T": 2048,
- "p50_amplification": 0.956,
- "p99_amplification": 1.001
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-mild+eplb",
- "T": 4096,
- "p50_amplification": 0.979,
- "p99_amplification": 0.987
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-moderate",
- "T": 128,
- "p50_amplification": 0.804,
- "p99_amplification": 0.808
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-moderate",
- "T": 256,
- "p50_amplification": 0.864,
- "p99_amplification": 0.87
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-moderate",
- "T": 512,
- "p50_amplification": 0.91,
- "p99_amplification": 0.924
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-moderate",
- "T": 1024,
- "p50_amplification": 1.004,
- "p99_amplification": 1.015
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-moderate",
- "T": 2048,
- "p50_amplification": 1.082,
- "p99_amplification": 1.117
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-moderate",
- "T": 4096,
- "p50_amplification": 1.146,
- "p99_amplification": 1.182
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-moderate+eplb",
- "T": 128,
- "p50_amplification": 0.758,
- "p99_amplification": 0.82
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-moderate+eplb",
- "T": 256,
- "p50_amplification": 0.874,
- "p99_amplification": 0.915
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-moderate+eplb",
- "T": 512,
- "p50_amplification": 0.87,
- "p99_amplification": 0.884
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-moderate+eplb",
- "T": 1024,
- "p50_amplification": 0.939,
- "p99_amplification": 0.971
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-moderate+eplb",
- "T": 2048,
- "p50_amplification": 0.971,
- "p99_amplification": 1.051
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-moderate+eplb",
- "T": 4096,
- "p50_amplification": 0.975,
- "p99_amplification": 0.981
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf+eplb",
- "T": 128,
- "p50_amplification": 0.754,
- "p99_amplification": 0.83
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf+eplb",
- "T": 256,
- "p50_amplification": 0.873,
- "p99_amplification": 0.861
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf+eplb",
- "T": 512,
- "p50_amplification": 0.87,
- "p99_amplification": 0.915
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf+eplb",
- "T": 1024,
- "p50_amplification": 0.941,
- "p99_amplification": 0.95
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf+eplb",
- "T": 2048,
- "p50_amplification": 0.97,
- "p99_amplification": 0.978
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf+eplb",
- "T": 4096,
- "p50_amplification": 0.974,
- "p99_amplification": 0.962
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 128,
- "p50_amplification": 0.809,
- "p99_amplification": 0.826
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 256,
- "p50_amplification": 0.866,
- "p99_amplification": 0.862
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 512,
- "p50_amplification": 0.917,
- "p99_amplification": 0.952
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 1024,
- "p50_amplification": 1.008,
- "p99_amplification": 1.01
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 2048,
- "p50_amplification": 1.084,
- "p99_amplification": 1.083
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 4096,
- "p50_amplification": 1.146,
- "p99_amplification": 1.136
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf+eplb",
- "T": 128,
- "p50_amplification": 0.745,
- "p99_amplification": 0.954
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf+eplb",
- "T": 256,
- "p50_amplification": 0.866,
- "p99_amplification": 0.889
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf+eplb",
- "T": 512,
- "p50_amplification": 0.866,
- "p99_amplification": 0.903
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf+eplb",
- "T": 1024,
- "p50_amplification": 0.925,
- "p99_amplification": 0.924
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf+eplb",
- "T": 2048,
- "p50_amplification": 0.968,
- "p99_amplification": 1.102
- },
- {
- "sku": "b300",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf+eplb",
- "T": 4096,
- "p50_amplification": 0.972,
- "p99_amplification": 0.974
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 1,
- "p50_amplification": 0.731,
- "p99_amplification": 0.751
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 2,
- "p50_amplification": 0.535,
- "p99_amplification": 0.549
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 4,
- "p50_amplification": 0.733,
- "p99_amplification": 0.547
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 8,
- "p50_amplification": 0.736,
- "p99_amplification": 0.233
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 16,
- "p50_amplification": 0.749,
- "p99_amplification": 0.544
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 32,
- "p50_amplification": 0.531,
- "p99_amplification": 0.341
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 64,
- "p50_amplification": 0.724,
- "p99_amplification": 1.182
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 128,
- "p50_amplification": 0.69,
- "p99_amplification": 0.731
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 1,
- "p50_amplification": 0.762,
- "p99_amplification": 0.821
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 8,
- "p50_amplification": 0.733,
- "p99_amplification": 0.287
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 32,
- "p50_amplification": 0.563,
- "p99_amplification": 0.381
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 128,
- "p50_amplification": 0.748,
- "p99_amplification": 0.787
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 1,
- "p50_amplification": 0.737,
- "p99_amplification": 0.929
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 2,
- "p50_amplification": 0.526,
- "p99_amplification": 0.543
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 4,
- "p50_amplification": 0.727,
- "p99_amplification": 0.538
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 8,
- "p50_amplification": 0.723,
- "p99_amplification": 0.351
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 16,
- "p50_amplification": 0.743,
- "p99_amplification": 0.542
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 32,
- "p50_amplification": 0.532,
- "p99_amplification": 0.338
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 64,
- "p50_amplification": 0.709,
- "p99_amplification": 0.565
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 128,
- "p50_amplification": 0.648,
- "p99_amplification": 0.714
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 1,
- "p50_amplification": 0.751,
- "p99_amplification": 0.775
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 8,
- "p50_amplification": 0.738,
- "p99_amplification": 0.235
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 32,
- "p50_amplification": 0.544,
- "p99_amplification": 0.357
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 128,
- "p50_amplification": 0.654,
- "p99_amplification": 0.654
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy+eplb",
- "T": 1,
- "p50_amplification": 0.736,
- "p99_amplification": 0.75
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy+eplb",
- "T": 2,
- "p50_amplification": 0.389,
- "p99_amplification": 0.548
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy+eplb",
- "T": 4,
- "p50_amplification": 0.548,
- "p99_amplification": 0.537
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy+eplb",
- "T": 8,
- "p50_amplification": 0.542,
- "p99_amplification": 0.216
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy+eplb",
- "T": 16,
- "p50_amplification": 0.633,
- "p99_amplification": 0.523
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy+eplb",
- "T": 32,
- "p50_amplification": 0.494,
- "p99_amplification": 0.331
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy+eplb",
- "T": 64,
- "p50_amplification": 0.651,
- "p99_amplification": 0.604
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy+eplb",
- "T": 128,
- "p50_amplification": 0.615,
- "p99_amplification": 0.673
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-mild",
- "T": 1,
- "p50_amplification": 0.762,
- "p99_amplification": 0.783
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-mild",
- "T": 2,
- "p50_amplification": 0.406,
- "p99_amplification": 0.563
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-mild",
- "T": 4,
- "p50_amplification": 0.583,
- "p99_amplification": 0.545
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-mild",
- "T": 8,
- "p50_amplification": 0.577,
- "p99_amplification": 0.228
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-mild",
- "T": 16,
- "p50_amplification": 0.741,
- "p99_amplification": 0.569
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-mild",
- "T": 32,
- "p50_amplification": 0.499,
- "p99_amplification": 0.337
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-mild",
- "T": 64,
- "p50_amplification": 0.687,
- "p99_amplification": 0.656
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-mild",
- "T": 128,
- "p50_amplification": 0.658,
- "p99_amplification": 0.691
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-mild+eplb",
- "T": 1,
- "p50_amplification": 0.772,
- "p99_amplification": 0.787
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-mild+eplb",
- "T": 2,
- "p50_amplification": 0.41,
- "p99_amplification": 0.591
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-mild+eplb",
- "T": 4,
- "p50_amplification": 0.742,
- "p99_amplification": 0.551
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-mild+eplb",
- "T": 8,
- "p50_amplification": 0.728,
- "p99_amplification": 0.233
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-mild+eplb",
- "T": 16,
- "p50_amplification": 0.748,
- "p99_amplification": 0.556
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-mild+eplb",
- "T": 32,
- "p50_amplification": 0.504,
- "p99_amplification": 0.341
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-mild+eplb",
- "T": 64,
- "p50_amplification": 0.709,
- "p99_amplification": 0.581
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-mild+eplb",
- "T": 128,
- "p50_amplification": 0.61,
- "p99_amplification": 0.673
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-moderate",
- "T": 1,
- "p50_amplification": 0.75,
- "p99_amplification": 0.764
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-moderate",
- "T": 2,
- "p50_amplification": 0.395,
- "p99_amplification": 0.544
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-moderate",
- "T": 4,
- "p50_amplification": 0.559,
- "p99_amplification": 0.522
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-moderate",
- "T": 8,
- "p50_amplification": 0.57,
- "p99_amplification": 0.224
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-moderate",
- "T": 16,
- "p50_amplification": 0.612,
- "p99_amplification": 1.13
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-moderate",
- "T": 32,
- "p50_amplification": 0.487,
- "p99_amplification": 0.339
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-moderate",
- "T": 64,
- "p50_amplification": 0.693,
- "p99_amplification": 0.585
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-moderate",
- "T": 128,
- "p50_amplification": 0.676,
- "p99_amplification": 0.711
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-moderate+eplb",
- "T": 1,
- "p50_amplification": 0.546,
- "p99_amplification": 0.695
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-moderate+eplb",
- "T": 2,
- "p50_amplification": 0.389,
- "p99_amplification": 0.494
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-moderate+eplb",
- "T": 4,
- "p50_amplification": 0.566,
- "p99_amplification": 0.554
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-moderate+eplb",
- "T": 8,
- "p50_amplification": 0.569,
- "p99_amplification": 0.221
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-moderate+eplb",
- "T": 16,
- "p50_amplification": 0.637,
- "p99_amplification": 0.525
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-moderate+eplb",
- "T": 32,
- "p50_amplification": 0.497,
- "p99_amplification": 0.338
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-moderate+eplb",
- "T": 64,
- "p50_amplification": 0.656,
- "p99_amplification": 0.587
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-moderate+eplb",
- "T": 128,
- "p50_amplification": 0.634,
- "p99_amplification": 0.676
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 1,
- "p50_amplification": 0.551,
- "p99_amplification": 1.339
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 2,
- "p50_amplification": 0.381,
- "p99_amplification": 0.491
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 4,
- "p50_amplification": 0.554,
- "p99_amplification": 0.534
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 8,
- "p50_amplification": 0.723,
- "p99_amplification": 0.325
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 16,
- "p50_amplification": 0.64,
- "p99_amplification": 0.525
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 32,
- "p50_amplification": 0.497,
- "p99_amplification": 0.342
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 64,
- "p50_amplification": 0.648,
- "p99_amplification": 1.883
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 128,
- "p50_amplification": 0.614,
- "p99_amplification": 0.686
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 1,
- "p50_amplification": 0.759,
- "p99_amplification": 0.771
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 2,
- "p50_amplification": 0.394,
- "p99_amplification": 0.583
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 4,
- "p50_amplification": 0.536,
- "p99_amplification": 0.655
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 8,
- "p50_amplification": 0.714,
- "p99_amplification": 0.239
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 16,
- "p50_amplification": 0.737,
- "p99_amplification": 0.552
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 32,
- "p50_amplification": 0.457,
- "p99_amplification": 0.336
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 64,
- "p50_amplification": 0.683,
- "p99_amplification": 0.637
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 128,
- "p50_amplification": 0.651,
- "p99_amplification": 0.725
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 1,
- "p50_amplification": 0.546,
- "p99_amplification": 0.579
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 2,
- "p50_amplification": 0.392,
- "p99_amplification": 0.434
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 4,
- "p50_amplification": 0.559,
- "p99_amplification": 0.54
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 8,
- "p50_amplification": 0.528,
- "p99_amplification": 0.187
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 16,
- "p50_amplification": 0.64,
- "p99_amplification": 0.539
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 32,
- "p50_amplification": 0.448,
- "p99_amplification": 0.333
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 64,
- "p50_amplification": 0.675,
- "p99_amplification": 0.592
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 128,
- "p50_amplification": 0.61,
- "p99_amplification": 0.671
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 128,
- "p50_amplification": 0.778,
- "p99_amplification": 0.802
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 512,
- "p50_amplification": 0.999,
- "p99_amplification": 1.045
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 2048,
- "p50_amplification": 1.186,
- "p99_amplification": 1.129
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 128,
- "p50_amplification": 0.774,
- "p99_amplification": 0.778
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 256,
- "p50_amplification": 0.882,
- "p99_amplification": 1.161
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 512,
- "p50_amplification": 0.993,
- "p99_amplification": 0.998
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 1024,
- "p50_amplification": 1.089,
- "p99_amplification": 1.091
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 2048,
- "p50_amplification": 1.183,
- "p99_amplification": 1.128
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 4096,
- "p50_amplification": 1.23,
- "p99_amplification": 1.21
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy",
- "T": 128,
- "p50_amplification": 0.73,
- "p99_amplification": 0.741
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy",
- "T": 512,
- "p50_amplification": 0.987,
- "p99_amplification": 1.026
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy",
- "T": 2048,
- "p50_amplification": 1.154,
- "p99_amplification": 1.092
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy",
- "T": 128,
- "p50_amplification": 0.719,
- "p99_amplification": 0.744
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy",
- "T": 256,
- "p50_amplification": 0.854,
- "p99_amplification": 0.858
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy",
- "T": 512,
- "p50_amplification": 0.965,
- "p99_amplification": 0.96
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy",
- "T": 1024,
- "p50_amplification": 1.079,
- "p99_amplification": 1.082
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy",
- "T": 2048,
- "p50_amplification": 1.158,
- "p99_amplification": 1.104
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy",
- "T": 4096,
- "p50_amplification": 1.199,
- "p99_amplification": 1.176
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy+eplb",
- "T": 128,
- "p50_amplification": 0.687,
- "p99_amplification": 0.714
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy+eplb",
- "T": 256,
- "p50_amplification": 0.787,
- "p99_amplification": 0.795
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy+eplb",
- "T": 512,
- "p50_amplification": 0.839,
- "p99_amplification": 0.834
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy+eplb",
- "T": 1024,
- "p50_amplification": 0.891,
- "p99_amplification": 0.887
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy+eplb",
- "T": 2048,
- "p50_amplification": 0.933,
- "p99_amplification": 0.898
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy+eplb",
- "T": 4096,
- "p50_amplification": 0.989,
- "p99_amplification": 0.995
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-mild",
- "T": 128,
- "p50_amplification": 0.768,
- "p99_amplification": 0.783
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-mild",
- "T": 256,
- "p50_amplification": 0.859,
- "p99_amplification": 0.864
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-mild",
- "T": 512,
- "p50_amplification": 0.973,
- "p99_amplification": 0.969
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-mild",
- "T": 1024,
- "p50_amplification": 1.088,
- "p99_amplification": 1.083
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-mild",
- "T": 2048,
- "p50_amplification": 1.184,
- "p99_amplification": 1.275
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-mild",
- "T": 4096,
- "p50_amplification": 1.241,
- "p99_amplification": 1.216
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-mild+eplb",
- "T": 128,
- "p50_amplification": 0.698,
- "p99_amplification": 0.719
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-mild+eplb",
- "T": 256,
- "p50_amplification": 0.796,
- "p99_amplification": 0.792
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-mild+eplb",
- "T": 512,
- "p50_amplification": 0.84,
- "p99_amplification": 0.847
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-mild+eplb",
- "T": 1024,
- "p50_amplification": 0.877,
- "p99_amplification": 1.339
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-mild+eplb",
- "T": 2048,
- "p50_amplification": 0.913,
- "p99_amplification": 0.87
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-mild+eplb",
- "T": 4096,
- "p50_amplification": 0.956,
- "p99_amplification": 0.956
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-moderate",
- "T": 128,
- "p50_amplification": 0.774,
- "p99_amplification": 0.799
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-moderate",
- "T": 256,
- "p50_amplification": 0.878,
- "p99_amplification": 0.885
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-moderate",
- "T": 512,
- "p50_amplification": 0.991,
- "p99_amplification": 0.989
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-moderate",
- "T": 1024,
- "p50_amplification": 1.093,
- "p99_amplification": 1.157
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-moderate",
- "T": 2048,
- "p50_amplification": 1.183,
- "p99_amplification": 1.466
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-moderate",
- "T": 4096,
- "p50_amplification": 1.231,
- "p99_amplification": 1.253
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-moderate+eplb",
- "T": 128,
- "p50_amplification": 0.709,
- "p99_amplification": 0.732
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-moderate+eplb",
- "T": 256,
- "p50_amplification": 0.777,
- "p99_amplification": 0.79
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-moderate+eplb",
- "T": 512,
- "p50_amplification": 0.836,
- "p99_amplification": 0.831
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-moderate+eplb",
- "T": 1024,
- "p50_amplification": 0.874,
- "p99_amplification": 0.875
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-moderate+eplb",
- "T": 2048,
- "p50_amplification": 0.912,
- "p99_amplification": 0.871
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-moderate+eplb",
- "T": 4096,
- "p50_amplification": 0.971,
- "p99_amplification": 0.978
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf+eplb",
- "T": 128,
- "p50_amplification": 0.689,
- "p99_amplification": 0.709
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf+eplb",
- "T": 256,
- "p50_amplification": 0.774,
- "p99_amplification": 0.778
- },
- {
- "sku": "h100",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf+eplb",
- "T": 512,
- "p50_amplification": 0.834,
- "p99_amplification": 0.888
+ {
+ "id": "cxr-f7274fdd",
+ "sku": "gb300",
+ "phase": "prefill",
+ "atTokensPerRank": 256,
+ "lowestP99DispatchUs": 141.9,
+ "config": "bf16/normal/layout-and-dispatch-v1/balanced+eplb/tuned",
+ "epSize": 8
+ }
+ ],
+ "llCrossover": [
+ {
+ "sku": "gb200",
+ "ep": 8,
+ "dtype": "bf16",
+ "stat": "p50",
+ "basis": "measured-roundtrip",
+ "normal_faster_at_T": 128
},
{
- "sku": "h100",
+ "sku": "gb200",
"ep": 8,
- "phase": "prefill",
- "routing": "zipf+eplb",
- "T": 1024,
- "p50_amplification": 0.875,
- "p99_amplification": 0.876
+ "dtype": "bf16",
+ "stat": "p99",
+ "basis": "measured-roundtrip",
+ "normal_faster_at_T": 128
},
{
- "sku": "h100",
+ "sku": "gb200",
"ep": 8,
- "phase": "prefill",
- "routing": "zipf+eplb",
- "T": 2048,
- "p50_amplification": 0.918,
- "p99_amplification": 0.873
+ "dtype": "bf16",
+ "stat": "p50",
+ "basis": "measured-roundtrip",
+ "normal_faster_at_T": 128
},
{
- "sku": "h100",
+ "sku": "gb200",
"ep": 8,
- "phase": "prefill",
- "routing": "zipf+eplb",
- "T": 4096,
- "p50_amplification": 0.985,
- "p99_amplification": 0.997
+ "dtype": "bf16",
+ "stat": "p99",
+ "basis": "measured-roundtrip",
+ "normal_faster_at_T": 128
},
{
- "sku": "h100",
+ "sku": "gb200",
"ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 128,
- "p50_amplification": 0.739,
- "p99_amplification": 0.765
+ "dtype": "fp8",
+ "stat": "p50",
+ "basis": "measured-roundtrip",
+ "normal_faster_at_T": 128
},
{
- "sku": "h100",
+ "sku": "gb200",
"ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 256,
- "p50_amplification": 0.854,
- "p99_amplification": 0.86
+ "dtype": "fp8",
+ "stat": "p99",
+ "basis": "measured-roundtrip",
+ "normal_faster_at_T": 128
},
{
- "sku": "h100",
+ "sku": "gb200",
"ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 512,
- "p50_amplification": 0.98,
- "p99_amplification": 0.983
+ "dtype": "fp8",
+ "stat": "p50",
+ "basis": "measured-roundtrip",
+ "normal_faster_at_T": 128
},
{
- "sku": "h100",
+ "sku": "gb200",
"ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 1024,
- "p50_amplification": 1.095,
- "p99_amplification": 1.097
+ "dtype": "fp8",
+ "stat": "p99",
+ "basis": "measured-roundtrip",
+ "normal_faster_at_T": 128
},
{
- "sku": "h100",
+ "sku": "gb300",
"ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 2048,
- "p50_amplification": 1.162,
- "p99_amplification": 1.1
+ "dtype": "bf16",
+ "stat": "p50",
+ "basis": "measured-roundtrip",
+ "normal_faster_at_T": 128
},
{
- "sku": "h100",
+ "sku": "gb300",
"ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 4096,
- "p50_amplification": 1.208,
- "p99_amplification": 1.19
+ "dtype": "bf16",
+ "stat": "p99",
+ "basis": "measured-roundtrip",
+ "normal_faster_at_T": 128
},
{
- "sku": "h100",
+ "sku": "gb300",
"ep": 8,
- "phase": "prefill",
- "routing": "zipf+eplb",
- "T": 128,
- "p50_amplification": 0.687,
- "p99_amplification": 0.708
+ "dtype": "bf16",
+ "stat": "p50",
+ "basis": "measured-roundtrip",
+ "normal_faster_at_T": 128
},
{
- "sku": "h100",
+ "sku": "gb300",
"ep": 8,
- "phase": "prefill",
- "routing": "zipf+eplb",
- "T": 256,
- "p50_amplification": 0.773,
- "p99_amplification": 0.771
+ "dtype": "bf16",
+ "stat": "p99",
+ "basis": "measured-roundtrip",
+ "normal_faster_at_T": 128
},
{
- "sku": "h100",
+ "sku": "gb300",
"ep": 8,
- "phase": "prefill",
- "routing": "zipf+eplb",
- "T": 512,
- "p50_amplification": 0.858,
- "p99_amplification": 0.857
+ "dtype": "fp8",
+ "stat": "p50",
+ "basis": "measured-roundtrip",
+ "normal_faster_at_T": 128
},
{
- "sku": "h100",
+ "sku": "gb300",
"ep": 8,
- "phase": "prefill",
- "routing": "zipf+eplb",
- "T": 1024,
- "p50_amplification": 0.943,
- "p99_amplification": 0.944
+ "dtype": "fp8",
+ "stat": "p99",
+ "basis": "measured-roundtrip",
+ "normal_faster_at_T": 128
},
{
- "sku": "h100",
+ "sku": "gb300",
"ep": 8,
- "phase": "prefill",
- "routing": "zipf+eplb",
- "T": 2048,
- "p50_amplification": 1.023,
- "p99_amplification": 0.968
+ "dtype": "fp8",
+ "stat": "p50",
+ "basis": "measured-roundtrip",
+ "normal_faster_at_T": 128
},
{
- "sku": "h100",
+ "sku": "gb300",
"ep": 8,
- "phase": "prefill",
- "routing": "zipf+eplb",
- "T": 4096,
- "p50_amplification": 1.069,
- "p99_amplification": 1.055
- },
+ "dtype": "fp8",
+ "stat": "p99",
+ "basis": "measured-roundtrip",
+ "normal_faster_at_T": 128
+ }
+ ],
+ "resourcePareto": [],
+ "topologyPenalty": [],
+ "skewPenalty": [
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf",
"T": 1,
- "p50_amplification": 0.58,
- "p99_amplification": 0.69
+ "p50_amplification": 1.018,
+ "p99_amplification": 1.026
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf",
"T": 2,
- "p50_amplification": 0.642,
- "p99_amplification": 0.807
+ "p50_amplification": 1.015,
+ "p99_amplification": 0.927
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf",
"T": 4,
- "p50_amplification": 0.651,
- "p99_amplification": 1.004
+ "p50_amplification": 1.041,
+ "p99_amplification": 1.125
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf",
"T": 8,
- "p50_amplification": 0.652,
- "p99_amplification": 0.798
+ "p50_amplification": 1.017,
+ "p99_amplification": 1.1
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf",
"T": 16,
- "p50_amplification": 0.646,
- "p99_amplification": 0.648
+ "p50_amplification": 0.991,
+ "p99_amplification": 0.985
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf",
"T": 32,
- "p50_amplification": 0.671,
- "p99_amplification": 0.956
+ "p50_amplification": 0.999,
+ "p99_amplification": 0.995
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf",
"T": 64,
- "p50_amplification": 0.712,
- "p99_amplification": 0.882
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 128,
- "p50_amplification": 0.783,
- "p99_amplification": 0.789
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 1,
- "p50_amplification": 0.578,
- "p99_amplification": 0.711
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 8,
- "p50_amplification": 0.648,
- "p99_amplification": 0.886
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 32,
- "p50_amplification": 0.661,
- "p99_amplification": 0.791
+ "p50_amplification": 0.968,
+ "p99_amplification": 1.062
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf",
"T": 128,
- "p50_amplification": 0.785,
- "p99_amplification": 0.815
+ "p50_amplification": 1.01,
+ "p99_amplification": 0.96
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-heavy",
"T": 1,
- "p50_amplification": 0.525,
- "p99_amplification": 0.897
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 2,
- "p50_amplification": 0.57,
- "p99_amplification": 0.76
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 4,
- "p50_amplification": 0.655,
- "p99_amplification": 0.753
+ "p50_amplification": 0.998,
+ "p99_amplification": 0.975
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-heavy",
"T": 8,
- "p50_amplification": 0.644,
- "p99_amplification": 0.795
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 16,
- "p50_amplification": 0.602,
- "p99_amplification": 0.563
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 32,
- "p50_amplification": 0.622,
- "p99_amplification": 0.709
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 64,
- "p50_amplification": 0.704,
- "p99_amplification": 1.052
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 128,
- "p50_amplification": 0.759,
- "p99_amplification": 0.851
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 1,
- "p50_amplification": 0.568,
+ "p50_amplification": 0.963,
"p99_amplification": 1.025
},
{
- "sku": "h200",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 8,
- "p50_amplification": 0.667,
- "p99_amplification": 0.831
- },
- {
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-heavy",
"T": 32,
- "p50_amplification": 0.665,
- "p99_amplification": 0.785
+ "p50_amplification": 0.99,
+ "p99_amplification": 1.02
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-heavy",
"T": 128,
- "p50_amplification": 0.766,
- "p99_amplification": 0.783
+ "p50_amplification": 0.976,
+ "p99_amplification": 0.977
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-heavy+eplb",
"T": 1,
- "p50_amplification": 0.56,
- "p99_amplification": 0.545
+ "p50_amplification": 1.058,
+ "p99_amplification": 3.623
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-heavy+eplb",
"T": 2,
- "p50_amplification": 0.627,
- "p99_amplification": 0.538
+ "p50_amplification": 1.041,
+ "p99_amplification": 3.305
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-heavy+eplb",
"T": 4,
- "p50_amplification": 0.665,
- "p99_amplification": 0.738
+ "p50_amplification": 1.067,
+ "p99_amplification": 3.523
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-heavy+eplb",
"T": 8,
- "p50_amplification": 0.662,
- "p99_amplification": 0.729
+ "p50_amplification": 1.046,
+ "p99_amplification": 3.605
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-heavy+eplb",
"T": 16,
- "p50_amplification": 0.611,
- "p99_amplification": 0.559
+ "p50_amplification": 1.11,
+ "p99_amplification": 3.612
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-heavy+eplb",
"T": 32,
- "p50_amplification": 0.656,
- "p99_amplification": 0.853
+ "p50_amplification": 1.033,
+ "p99_amplification": 3.524
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-heavy+eplb",
"T": 64,
- "p50_amplification": 0.684,
- "p99_amplification": 0.778
+ "p50_amplification": 1.037,
+ "p99_amplification": 3.106
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-heavy+eplb",
"T": 128,
- "p50_amplification": 0.75,
- "p99_amplification": 0.711
+ "p50_amplification": 1.025,
+ "p99_amplification": 2.904
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-mild",
"T": 1,
- "p50_amplification": 0.598,
- "p99_amplification": 0.616
+ "p50_amplification": 0.985,
+ "p99_amplification": 1.001
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-mild",
"T": 2,
- "p50_amplification": 0.636,
- "p99_amplification": 0.653
+ "p50_amplification": 0.971,
+ "p99_amplification": 0.93
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-mild",
"T": 4,
- "p50_amplification": 0.685,
- "p99_amplification": 0.767
+ "p50_amplification": 1.003,
+ "p99_amplification": 1.048
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-mild",
"T": 8,
- "p50_amplification": 0.673,
- "p99_amplification": 0.707
+ "p50_amplification": 0.994,
+ "p99_amplification": 1.056
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-mild",
"T": 16,
- "p50_amplification": 0.637,
- "p99_amplification": 0.506
+ "p50_amplification": 0.983,
+ "p99_amplification": 0.942
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-mild",
"T": 32,
- "p50_amplification": 0.679,
- "p99_amplification": 0.718
+ "p50_amplification": 0.994,
+ "p99_amplification": 0.991
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-mild",
"T": 64,
- "p50_amplification": 0.73,
- "p99_amplification": 0.78
+ "p50_amplification": 1.008,
+ "p99_amplification": 0.974
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-mild",
"T": 128,
- "p50_amplification": 0.797,
- "p99_amplification": 0.847
+ "p50_amplification": 1.031,
+ "p99_amplification": 1.001
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-mild+eplb",
"T": 1,
- "p50_amplification": 0.59,
- "p99_amplification": 0.634
+ "p50_amplification": 1.064,
+ "p99_amplification": 1.117
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-mild+eplb",
"T": 2,
- "p50_amplification": 0.636,
- "p99_amplification": 0.673
+ "p50_amplification": 1.039,
+ "p99_amplification": 1
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-mild+eplb",
"T": 4,
- "p50_amplification": 0.657,
- "p99_amplification": 1.101
+ "p50_amplification": 1.061,
+ "p99_amplification": 1.165
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-mild+eplb",
"T": 8,
- "p50_amplification": 0.661,
- "p99_amplification": 0.83
+ "p50_amplification": 1.052,
+ "p99_amplification": 1.163
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-mild+eplb",
"T": 16,
- "p50_amplification": 0.621,
- "p99_amplification": 0.586
+ "p50_amplification": 1.033,
+ "p99_amplification": 1.056
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-mild+eplb",
"T": 32,
- "p50_amplification": 0.65,
- "p99_amplification": 0.689
+ "p50_amplification": 1.038,
+ "p99_amplification": 1.012
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-mild+eplb",
"T": 64,
- "p50_amplification": 0.714,
- "p99_amplification": 0.681
+ "p50_amplification": 1.02,
+ "p99_amplification": 0.969
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-mild+eplb",
"T": 128,
- "p50_amplification": 0.738,
- "p99_amplification": 0.869
+ "p50_amplification": 1.023,
+ "p99_amplification": 0.976
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-moderate",
"T": 1,
- "p50_amplification": 0.59,
- "p99_amplification": 0.753
+ "p50_amplification": 0.997,
+ "p99_amplification": 1.009
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-moderate",
"T": 2,
- "p50_amplification": 0.638,
- "p99_amplification": 0.75
+ "p50_amplification": 0.992,
+ "p99_amplification": 0.91
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-moderate",
"T": 4,
- "p50_amplification": 0.661,
- "p99_amplification": 0.788
+ "p50_amplification": 1.012,
+ "p99_amplification": 1.043
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-moderate",
"T": 8,
- "p50_amplification": 0.654,
- "p99_amplification": 0.708
+ "p50_amplification": 0.994,
+ "p99_amplification": 1.049
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-moderate",
"T": 16,
- "p50_amplification": 0.611,
- "p99_amplification": 0.54
+ "p50_amplification": 0.972,
+ "p99_amplification": 0.955
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-moderate",
"T": 32,
- "p50_amplification": 0.691,
- "p99_amplification": 0.799
+ "p50_amplification": 0.996,
+ "p99_amplification": 1.008
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-moderate",
"T": 64,
- "p50_amplification": 0.732,
- "p99_amplification": 0.808
+ "p50_amplification": 0.969,
+ "p99_amplification": 0.942
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-moderate",
"T": 128,
- "p50_amplification": 0.789,
- "p99_amplification": 0.812
+ "p50_amplification": 1.025,
+ "p99_amplification": 0.98
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-moderate+eplb",
"T": 1,
- "p50_amplification": 0.582,
- "p99_amplification": 0.644
+ "p50_amplification": 1.012,
+ "p99_amplification": 1.032
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-moderate+eplb",
"T": 2,
- "p50_amplification": 0.644,
- "p99_amplification": 0.809
+ "p50_amplification": 0.994,
+ "p99_amplification": 0.963
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-moderate+eplb",
"T": 4,
- "p50_amplification": 0.683,
- "p99_amplification": 0.889
+ "p50_amplification": 1.011,
+ "p99_amplification": 1.135
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-moderate+eplb",
"T": 8,
- "p50_amplification": 0.661,
- "p99_amplification": 0.875
+ "p50_amplification": 1.009,
+ "p99_amplification": 1.016
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-moderate+eplb",
"T": 16,
- "p50_amplification": 0.605,
- "p99_amplification": 0.654
+ "p50_amplification": 0.993,
+ "p99_amplification": 0.926
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-moderate+eplb",
"T": 32,
- "p50_amplification": 0.663,
- "p99_amplification": 0.977
+ "p50_amplification": 0.998,
+ "p99_amplification": 0.992
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-moderate+eplb",
"T": 64,
- "p50_amplification": 0.715,
- "p99_amplification": 0.942
+ "p50_amplification": 0.999,
+ "p99_amplification": 0.943
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf-moderate+eplb",
"T": 128,
- "p50_amplification": 0.758,
- "p99_amplification": 0.885
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 1,
- "p50_amplification": 0.584,
- "p99_amplification": 0.649
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 2,
- "p50_amplification": 0.642,
- "p99_amplification": 0.694
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 4,
- "p50_amplification": 0.665,
- "p99_amplification": 0.703
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 8,
- "p50_amplification": 0.674,
- "p99_amplification": 1.103
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 16,
- "p50_amplification": 0.637,
- "p99_amplification": 0.594
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 32,
- "p50_amplification": 0.675,
- "p99_amplification": 0.747
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 64,
- "p50_amplification": 0.688,
- "p99_amplification": 0.676
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 128,
- "p50_amplification": 0.75,
- "p99_amplification": 0.84
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 1,
- "p50_amplification": 0.601,
- "p99_amplification": 0.766
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 2,
- "p50_amplification": 0.644,
- "p99_amplification": 0.735
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 4,
- "p50_amplification": 0.658,
- "p99_amplification": 0.737
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 8,
- "p50_amplification": 0.667,
- "p99_amplification": 0.861
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 16,
- "p50_amplification": 0.644,
- "p99_amplification": 0.664
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 32,
- "p50_amplification": 0.672,
- "p99_amplification": 0.756
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 64,
- "p50_amplification": 0.714,
- "p99_amplification": 0.683
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf",
- "T": 128,
- "p50_amplification": 0.788,
- "p99_amplification": 0.927
+ "p50_amplification": 1,
+ "p99_amplification": 0.959
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf+eplb",
"T": 1,
- "p50_amplification": 0.59,
- "p99_amplification": 0.634
+ "p50_amplification": 0.983,
+ "p99_amplification": 0.998
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf+eplb",
"T": 2,
- "p50_amplification": 0.637,
- "p99_amplification": 0.678
+ "p50_amplification": 0.965,
+ "p99_amplification": 0.905
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf+eplb",
"T": 4,
- "p50_amplification": 0.704,
- "p99_amplification": 1.069
+ "p50_amplification": 1.004,
+ "p99_amplification": 1.031
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf+eplb",
"T": 8,
- "p50_amplification": 0.66,
- "p99_amplification": 0.846
+ "p50_amplification": 0.987,
+ "p99_amplification": 0.982
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf+eplb",
"T": 16,
- "p50_amplification": 0.634,
- "p99_amplification": 0.688
+ "p50_amplification": 0.988,
+ "p99_amplification": 0.93
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf+eplb",
"T": 32,
- "p50_amplification": 0.663,
- "p99_amplification": 0.758
+ "p50_amplification": 0.99,
+ "p99_amplification": 0.982
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf+eplb",
"T": 64,
- "p50_amplification": 0.748,
- "p99_amplification": 0.887
+ "p50_amplification": 0.994,
+ "p99_amplification": 0.939
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "decode",
"routing": "zipf+eplb",
"T": 128,
- "p50_amplification": 0.741,
- "p99_amplification": 0.813
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 128,
- "p50_amplification": 0.767,
- "p99_amplification": 0.631
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 512,
- "p50_amplification": 0.995,
- "p99_amplification": 1.016
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf",
- "T": 2048,
- "p50_amplification": 1.148,
- "p99_amplification": 1.232
+ "p50_amplification": 0.994,
+ "p99_amplification": 0.95
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf",
"T": 128,
- "p50_amplification": 0.765,
- "p99_amplification": 0.61
+ "p50_amplification": 1.008,
+ "p99_amplification": 2.891
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf",
"T": 256,
- "p50_amplification": 0.917,
- "p99_amplification": 0.93
+ "p50_amplification": 0.996,
+ "p99_amplification": 2.73
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf",
"T": 512,
- "p50_amplification": 0.986,
- "p99_amplification": 0.993
+ "p50_amplification": 0.981,
+ "p99_amplification": 2.16
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf",
"T": 1024,
- "p50_amplification": 1.083,
- "p99_amplification": 1.177
+ "p50_amplification": 1.054,
+ "p99_amplification": 1.848
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf",
"T": 2048,
- "p50_amplification": 1.149,
- "p99_amplification": 1.171
+ "p50_amplification": 1.138,
+ "p99_amplification": 1.581
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf",
"T": 4096,
- "p50_amplification": 1.234,
- "p99_amplification": 1.18
+ "p50_amplification": 1.195,
+ "p99_amplification": 1.393
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
- "routing": "zipf-heavy",
+ "routing": "zipf",
"T": 128,
- "p50_amplification": 0.737,
- "p99_amplification": 0.548
+ "p50_amplification": 0.997,
+ "p99_amplification": 0.989
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
- "routing": "zipf-heavy",
+ "routing": "zipf",
"T": 512,
- "p50_amplification": 0.973,
- "p99_amplification": 1.093
+ "p50_amplification": 1,
+ "p99_amplification": 0.985
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
- "routing": "zipf-heavy",
+ "routing": "zipf",
"T": 2048,
- "p50_amplification": 1.133,
- "p99_amplification": 1.172
+ "p50_amplification": 1.148,
+ "p99_amplification": 1.151
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-heavy",
"T": 128,
- "p50_amplification": 0.74,
- "p99_amplification": 0.552
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy",
- "T": 256,
- "p50_amplification": 0.836,
- "p99_amplification": 0.844
+ "p50_amplification": 0.938,
+ "p99_amplification": 0.93
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-heavy",
"T": 512,
- "p50_amplification": 0.965,
- "p99_amplification": 1.01
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy",
- "T": 1024,
- "p50_amplification": 1.064,
- "p99_amplification": 1.207
+ "p50_amplification": 0.874,
+ "p99_amplification": 0.87
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-heavy",
"T": 2048,
- "p50_amplification": 1.132,
- "p99_amplification": 1.098
- },
- {
- "sku": "h200",
- "ep": 8,
- "phase": "prefill",
- "routing": "zipf-heavy",
- "T": 4096,
- "p50_amplification": 1.212,
- "p99_amplification": 1.306
+ "p50_amplification": 1.006,
+ "p99_amplification": 1.016
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-heavy+eplb",
"T": 128,
- "p50_amplification": 0.738,
- "p99_amplification": 0.519
+ "p50_amplification": 1,
+ "p99_amplification": 2.961
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-heavy+eplb",
"T": 256,
- "p50_amplification": 0.783,
- "p99_amplification": 0.724
+ "p50_amplification": 1.004,
+ "p99_amplification": 2.712
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-heavy+eplb",
"T": 512,
- "p50_amplification": 0.856,
- "p99_amplification": 0.893
+ "p50_amplification": 0.983,
+ "p99_amplification": 2.311
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-heavy+eplb",
"T": 1024,
- "p50_amplification": 0.903,
- "p99_amplification": 0.938
+ "p50_amplification": 1.007,
+ "p99_amplification": 1.885
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-heavy+eplb",
"T": 2048,
- "p50_amplification": 0.925,
- "p99_amplification": 1.013
+ "p50_amplification": 1.013,
+ "p99_amplification": 1.555
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-heavy+eplb",
"T": 4096,
- "p50_amplification": 0.999,
- "p99_amplification": 0.969
+ "p50_amplification": 1.02,
+ "p99_amplification": 1.311
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-mild",
"T": 128,
- "p50_amplification": 0.792,
- "p99_amplification": 0.664
+ "p50_amplification": 1.038,
+ "p99_amplification": 0.987
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-mild",
"T": 256,
- "p50_amplification": 0.882,
- "p99_amplification": 0.841
+ "p50_amplification": 1.039,
+ "p99_amplification": 1.031
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-mild",
"T": 512,
- "p50_amplification": 0.993,
- "p99_amplification": 1.149
+ "p50_amplification": 1.048,
+ "p99_amplification": 1.043
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-mild",
"T": 1024,
- "p50_amplification": 1.076,
- "p99_amplification": 1.025
+ "p50_amplification": 1.101,
+ "p99_amplification": 1.084
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-mild",
"T": 2048,
- "p50_amplification": 1.162,
- "p99_amplification": 1.17
+ "p50_amplification": 1.145,
+ "p99_amplification": 1.152
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-mild",
"T": 4096,
- "p50_amplification": 1.251,
- "p99_amplification": 1.262
+ "p50_amplification": 1.207,
+ "p99_amplification": 1.213
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-mild+eplb",
"T": 128,
- "p50_amplification": 0.739,
- "p99_amplification": 0.586
+ "p50_amplification": 1.007,
+ "p99_amplification": 0.973
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-mild+eplb",
"T": 256,
- "p50_amplification": 0.802,
- "p99_amplification": 0.721
+ "p50_amplification": 1.011,
+ "p99_amplification": 0.996
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-mild+eplb",
"T": 512,
- "p50_amplification": 0.862,
- "p99_amplification": 0.99
+ "p50_amplification": 1.001,
+ "p99_amplification": 0.993
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-mild+eplb",
"T": 1024,
- "p50_amplification": 0.891,
- "p99_amplification": 0.833
+ "p50_amplification": 1.004,
+ "p99_amplification": 1.008
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-mild+eplb",
"T": 2048,
- "p50_amplification": 0.907,
- "p99_amplification": 0.896
+ "p50_amplification": 1.012,
+ "p99_amplification": 1.012
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-mild+eplb",
"T": 4096,
- "p50_amplification": 0.959,
- "p99_amplification": 0.942
+ "p50_amplification": 1.006,
+ "p99_amplification": 1.005
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-moderate",
"T": 128,
- "p50_amplification": 0.77,
- "p99_amplification": 0.566
+ "p50_amplification": 1.002,
+ "p99_amplification": 0.971
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-moderate",
"T": 256,
- "p50_amplification": 0.87,
- "p99_amplification": 0.812
+ "p50_amplification": 0.982,
+ "p99_amplification": 0.973
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-moderate",
"T": 512,
- "p50_amplification": 0.992,
- "p99_amplification": 1.026
+ "p50_amplification": 0.973,
+ "p99_amplification": 0.974
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-moderate",
"T": 1024,
- "p50_amplification": 1.079,
- "p99_amplification": 0.978
+ "p50_amplification": 1.041,
+ "p99_amplification": 1.045
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-moderate",
"T": 2048,
- "p50_amplification": 1.148,
- "p99_amplification": 1.463
+ "p50_amplification": 1.13,
+ "p99_amplification": 1.134
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-moderate",
"T": 4096,
- "p50_amplification": 1.234,
- "p99_amplification": 1.254
+ "p50_amplification": 1.188,
+ "p99_amplification": 1.197
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-moderate+eplb",
"T": 128,
- "p50_amplification": 0.737,
- "p99_amplification": 0.695
+ "p50_amplification": 1.003,
+ "p99_amplification": 0.958
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-moderate+eplb",
"T": 256,
- "p50_amplification": 0.781,
- "p99_amplification": 0.813
+ "p50_amplification": 1.002,
+ "p99_amplification": 0.995
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-moderate+eplb",
"T": 512,
- "p50_amplification": 0.85,
- "p99_amplification": 0.909
+ "p50_amplification": 0.987,
+ "p99_amplification": 0.998
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-moderate+eplb",
"T": 1024,
- "p50_amplification": 0.887,
- "p99_amplification": 0.955
+ "p50_amplification": 1.002,
+ "p99_amplification": 1.012
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-moderate+eplb",
"T": 2048,
- "p50_amplification": 0.902,
- "p99_amplification": 0.906
+ "p50_amplification": 1.008,
+ "p99_amplification": 1.011
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf-moderate+eplb",
"T": 4096,
- "p50_amplification": 0.975,
- "p99_amplification": 0.943
+ "p50_amplification": 1.004,
+ "p99_amplification": 1.002
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf+eplb",
"T": 128,
- "p50_amplification": 0.743,
- "p99_amplification": 0.551
+ "p50_amplification": 0.993,
+ "p99_amplification": 0.988
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf+eplb",
"T": 256,
- "p50_amplification": 0.777,
- "p99_amplification": 0.778
+ "p50_amplification": 0.997,
+ "p99_amplification": 1.015
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf+eplb",
"T": 512,
- "p50_amplification": 0.848,
- "p99_amplification": 0.949
+ "p50_amplification": 0.989,
+ "p99_amplification": 0.984
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf+eplb",
"T": 1024,
- "p50_amplification": 0.886,
- "p99_amplification": 0.844
+ "p50_amplification": 0.997,
+ "p99_amplification": 1.002
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf+eplb",
"T": 2048,
- "p50_amplification": 0.904,
- "p99_amplification": 0.901
+ "p50_amplification": 1.012,
+ "p99_amplification": 1.01
},
{
- "sku": "h200",
+ "sku": "gb200",
"ep": 8,
"phase": "prefill",
"routing": "zipf+eplb",
"T": 4096,
- "p50_amplification": 0.967,
- "p99_amplification": 0.918
+ "p50_amplification": 1.004,
+ "p99_amplification": 1.004
},
{
- "sku": "h200",
+ "sku": "gb300",
"ep": 8,
- "phase": "prefill",
+ "phase": "decode",
"routing": "zipf",
- "T": 128,
- "p50_amplification": 0.757,
- "p99_amplification": 0.578
+ "T": 1,
+ "p50_amplification": 0.993,
+ "p99_amplification": 0.848
},
{
- "sku": "h200",
+ "sku": "gb300",
"ep": 8,
- "phase": "prefill",
+ "phase": "decode",
"routing": "zipf",
- "T": 256,
- "p50_amplification": 0.88,
- "p99_amplification": 0.846
+ "T": 2,
+ "p50_amplification": 0.979,
+ "p99_amplification": 0.897
},
{
- "sku": "h200",
+ "sku": "gb300",
"ep": 8,
- "phase": "prefill",
+ "phase": "decode",
"routing": "zipf",
- "T": 512,
- "p50_amplification": 0.993,
- "p99_amplification": 0.981
+ "T": 4,
+ "p50_amplification": 1,
+ "p99_amplification": 1.016
},
{
- "sku": "h200",
+ "sku": "gb300",
"ep": 8,
- "phase": "prefill",
+ "phase": "decode",
"routing": "zipf",
- "T": 1024,
- "p50_amplification": 1.092,
- "p99_amplification": 0.982
+ "T": 8,
+ "p50_amplification": 1.015,
+ "p99_amplification": 1.004
},
{
- "sku": "h200",
+ "sku": "gb300",
"ep": 8,
- "phase": "prefill",
+ "phase": "decode",
"routing": "zipf",
- "T": 2048,
- "p50_amplification": 1.145,
- "p99_amplification": 1.152
+ "T": 16,
+ "p50_amplification": 1.013,
+ "p99_amplification": 0.923
},
{
- "sku": "h200",
+ "sku": "gb300",
"ep": 8,
- "phase": "prefill",
+ "phase": "decode",
"routing": "zipf",
- "T": 4096,
- "p50_amplification": 1.224,
- "p99_amplification": 1.225
+ "T": 32,
+ "p50_amplification": 1.005,
+ "p99_amplification": 1.004
},
{
- "sku": "h200",
+ "sku": "gb300",
"ep": 8,
- "phase": "prefill",
- "routing": "zipf+eplb",
- "T": 128,
- "p50_amplification": 0.735,
- "p99_amplification": 0.579
+ "phase": "decode",
+ "routing": "zipf",
+ "T": 64,
+ "p50_amplification": 0.995,
+ "p99_amplification": 1.031
},
{
- "sku": "h200",
+ "sku": "gb300",
"ep": 8,
- "phase": "prefill",
- "routing": "zipf+eplb",
- "T": 256,
- "p50_amplification": 0.789,
- "p99_amplification": 0.801
+ "phase": "decode",
+ "routing": "zipf",
+ "T": 128,
+ "p50_amplification": 1.013,
+ "p99_amplification": 1.011
},
{
- "sku": "h200",
+ "sku": "gb300",
"ep": 8,
- "phase": "prefill",
- "routing": "zipf+eplb",
- "T": 512,
- "p50_amplification": 0.87,
- "p99_amplification": 1.02
+ "phase": "decode",
+ "routing": "zipf-heavy",
+ "T": 1,
+ "p50_amplification": 0.962,
+ "p99_amplification": 1.296
},
{
- "sku": "h200",
+ "sku": "gb300",
"ep": 8,
- "phase": "prefill",
- "routing": "zipf+eplb",
- "T": 1024,
- "p50_amplification": 0.918,
- "p99_amplification": 0.902
+ "phase": "decode",
+ "routing": "zipf-heavy",
+ "T": 8,
+ "p50_amplification": 0.999,
+ "p99_amplification": 1.418
},
{
- "sku": "h200",
+ "sku": "gb300",
"ep": 8,
- "phase": "prefill",
- "routing": "zipf+eplb",
- "T": 2048,
- "p50_amplification": 0.974,
- "p99_amplification": 1.088
+ "phase": "decode",
+ "routing": "zipf-heavy",
+ "T": 32,
+ "p50_amplification": 1.01,
+ "p99_amplification": 1.571
},
{
- "sku": "h200",
+ "sku": "gb300",
"ep": 8,
- "phase": "prefill",
- "routing": "zipf+eplb",
- "T": 4096,
- "p50_amplification": 1.052,
- "p99_amplification": 1.156
+ "phase": "decode",
+ "routing": "zipf-heavy",
+ "T": 128,
+ "p50_amplification": 0.972,
+ "p99_amplification": 1.487
},
{
- "sku": "mi355x",
+ "sku": "gb300",
"ep": 8,
"phase": "decode",
- "routing": "zipf",
+ "routing": "zipf-heavy+eplb",
"T": 1,
- "p50_amplification": 0.963,
- "p99_amplification": 0.992
+ "p50_amplification": 1.036,
+ "p99_amplification": 0.875
},
{
- "sku": "mi355x",
+ "sku": "gb300",
"ep": 8,
"phase": "decode",
- "routing": "zipf",
+ "routing": "zipf-heavy+eplb",
"T": 2,
- "p50_amplification": 0.955,
- "p99_amplification": 0.961
+ "p50_amplification": 1.012,
+ "p99_amplification": 1.008
},
{
- "sku": "mi355x",
+ "sku": "gb300",
"ep": 8,
"phase": "decode",
- "routing": "zipf",
+ "routing": "zipf-heavy+eplb",
"T": 4,
- "p50_amplification": 0.979,
- "p99_amplification": 0.957
+ "p50_amplification": 1.021,
+ "p99_amplification": 1.099
},
{
- "sku": "mi355x",
+ "sku": "gb300",
"ep": 8,
"phase": "decode",
- "routing": "zipf",
+ "routing": "zipf-heavy+eplb",
"T": 8,
- "p50_amplification": 0.986,
- "p99_amplification": 1.042
+ "p50_amplification": 1.037,
+ "p99_amplification": 0.996
},
{
- "sku": "mi355x",
+ "sku": "gb300",
"ep": 8,
"phase": "decode",
- "routing": "zipf",
+ "routing": "zipf-heavy+eplb",
"T": 16,
- "p50_amplification": 0.98,
- "p99_amplification": 1.009
- },
- {
- "sku": "mi355x",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 1,
- "p50_amplification": 0.945,
- "p99_amplification": 0.974
- },
- {
- "sku": "mi355x",
- "ep": 8,
- "phase": "decode",
- "routing": "zipf-heavy",
- "T": 2,
- "p50_amplification": 0.951,
- "p99_amplification": 0.976
+ "p50_amplification": 1.06,
+ "p99_amplification": 1.008
},
{
- "sku": "mi355x",
+ "sku": "gb300",
"ep": 8,
"phase": "decode",
- "routing": "zipf-heavy",
- "T": 4,
- "p50_amplification": 0.977,
- "p99_amplification": 0.969
+ "routing": "zipf-heavy+eplb",
+ "T": 32,
+ "p50_amplification": 1.055,
+ "p99_amplification": 1.108
},
{
- "sku": "mi355x",
+ "sku": "gb300",
"ep": 8,
"phase": "decode",
- "routing": "zipf-heavy",
- "T": 8,
- "p50_amplification": 0.972,
- "p99_amplification": 0.952
+ "routing": "zipf-heavy+eplb",
+ "T": 64,
+ "p50_amplification": 1.016,
+ "p99_amplification": 1.145
},
{
- "sku": "mi355x",
+ "sku": "gb300",
"ep": 8,
"phase": "decode",
- "routing": "zipf-heavy",
- "T": 16,
- "p50_amplification": 0.97,
- "p99_amplification": 0.925
+ "routing": "zipf-heavy+eplb",
+ "T": 128,
+ "p50_amplification": 1.02,
+ "p99_amplification": 1.04
},
{
- "sku": "mi355x",
+ "sku": "gb300",
"ep": 8,
"phase": "decode",
- "routing": "zipf+eplb",
+ "routing": "zipf-mild",
"T": 1,
- "p50_amplification": 0.969,
- "p99_amplification": 0.978
+ "p50_amplification": 0.947,
+ "p99_amplification": 0.787
},
{
- "sku": "mi355x",
+ "sku": "gb300",
"ep": 8,
"phase": "decode",
- "routing": "zipf+eplb",
+ "routing": "zipf-mild",
"T": 2,
- "p50_amplification": 0.98,
- "p99_amplification": 0.984
+ "p50_amplification": 0.937,
+ "p99_amplification": 0.874
},
{
- "sku": "mi355x",
+ "sku": "gb300",
"ep": 8,
"phase": "decode",
- "routing": "zipf+eplb",
+ "routing": "zipf-mild",
"T": 4,
- "p50_amplification": 0.993,
- "p99_amplification": 0.968
+ "p50_amplification": 0.946,
+ "p99_amplification": 1.009
},
{
- "sku": "mi355x",
+ "sku": "gb300",
"ep": 8,
"phase": "decode",
- "routing": "zipf+eplb",
+ "routing": "zipf-mild",
"T": 8,
- "p50_amplification": 0.984,
- "p99_amplification": 0.99
+ "p50_amplification": 0.966,
+ "p99_amplification": 0.924
},
{
- "sku": "mi355x",
+ "sku": "gb300",
"ep": 8,
- "phase": "decode",
- "routing": "zipf+eplb",
- "T": 16,
- "p50_amplification": 0.992,
- "p99_amplification": 0.99
- }
- ]
- },
- "nccl": [
- {
- "id": "cxn-a8203ce9",
- "identity": "nccl|b300|all_gather|b300-nvlink-island|nvlink|8|nccl-tests-v1",
- "op": "all_gather",
- "sku": "b300",
- "runner": "b300-nv_03",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "nodes": 1,
- "dtype": "float",
- "comparisonClass": "standardized",
- "comparisonKey": "e6eafb7204b78dd3",
- "measurementContract": "nccl-tests-v1",
- "avgBusBandwidthGbps": 186.922,
- "status": "valid",
- "valid": true,
- "colorKey": "b300_a8203ce9",
- "label": "B300 · b300-nvlink-island · nvlink (ws8)",
- "generatedAt": "2026-06-27T11:18:41.342024+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T11:18:41.342024+00:00",
- "sha": null
- },
- "rows": [
- {
- "sizeBytes": 128,
- "dtype": "float",
- "latencyUs": 27.36,
- "algBandwidthGbps": 0,
- "busBandwidthGbps": 0,
- "outOfPlaceUs": 27.36,
- "inPlaceUs": 27.26,
- "correct": true
- },
- {
- "sizeBytes": 256,
- "dtype": "float",
- "latencyUs": 26.88,
- "algBandwidthGbps": 0.01,
- "busBandwidthGbps": 0.01,
- "outOfPlaceUs": 26.88,
- "inPlaceUs": 26.89,
- "correct": true
- },
- {
- "sizeBytes": 512,
- "dtype": "float",
- "latencyUs": 27.11,
- "algBandwidthGbps": 0.02,
- "busBandwidthGbps": 0.02,
- "outOfPlaceUs": 27.11,
- "inPlaceUs": 27.07,
- "correct": true
- },
- {
- "sizeBytes": 1024,
- "dtype": "float",
- "latencyUs": 26.64,
- "algBandwidthGbps": 0.04,
- "busBandwidthGbps": 0.03,
- "outOfPlaceUs": 26.64,
- "inPlaceUs": 26.87,
- "correct": true
- },
- {
- "sizeBytes": 2048,
- "dtype": "float",
- "latencyUs": 27.03,
- "algBandwidthGbps": 0.08,
- "busBandwidthGbps": 0.07,
- "outOfPlaceUs": 27.03,
- "inPlaceUs": 26.8,
- "correct": true
- },
- {
- "sizeBytes": 4096,
- "dtype": "float",
- "latencyUs": 26.95,
- "algBandwidthGbps": 0.15,
- "busBandwidthGbps": 0.13,
- "outOfPlaceUs": 26.95,
- "inPlaceUs": 27.51,
- "correct": true
- },
- {
- "sizeBytes": 8192,
- "dtype": "float",
- "latencyUs": 26.84,
- "algBandwidthGbps": 0.31,
- "busBandwidthGbps": 0.27,
- "outOfPlaceUs": 27.05,
- "inPlaceUs": 26.84,
- "correct": true
- },
- {
- "sizeBytes": 16384,
- "dtype": "float",
- "latencyUs": 27.2,
- "algBandwidthGbps": 0.6,
- "busBandwidthGbps": 0.53,
- "outOfPlaceUs": 27.2,
- "inPlaceUs": 26.86,
- "correct": true
- },
- {
- "sizeBytes": 32768,
- "dtype": "float",
- "latencyUs": 26.68,
- "algBandwidthGbps": 1.23,
- "busBandwidthGbps": 1.07,
- "outOfPlaceUs": 26.98,
- "inPlaceUs": 26.68,
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "dtype": "float",
- "latencyUs": 26.75,
- "algBandwidthGbps": 2.45,
- "busBandwidthGbps": 2.14,
- "outOfPlaceUs": 26.89,
- "inPlaceUs": 26.75,
- "correct": true
- },
- {
- "sizeBytes": 131072,
- "dtype": "float",
- "latencyUs": 27.63,
- "algBandwidthGbps": 4.74,
- "busBandwidthGbps": 4.15,
- "outOfPlaceUs": 27.63,
- "inPlaceUs": 27.81,
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "dtype": "float",
- "latencyUs": 28.34,
- "algBandwidthGbps": 9.25,
- "busBandwidthGbps": 8.09,
- "outOfPlaceUs": 28.34,
- "inPlaceUs": 28.46,
- "correct": true
- },
- {
- "sizeBytes": 524288,
- "dtype": "float",
- "latencyUs": 29.45,
- "algBandwidthGbps": 17.8,
- "busBandwidthGbps": 15.58,
- "outOfPlaceUs": 29.49,
- "inPlaceUs": 29.45,
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "dtype": "float",
- "latencyUs": 31.36,
- "algBandwidthGbps": 33.43,
- "busBandwidthGbps": 29.25,
- "outOfPlaceUs": 31.51,
- "inPlaceUs": 31.36,
- "correct": true
- },
- {
- "sizeBytes": 2097152,
- "dtype": "float",
- "latencyUs": 35.8,
- "algBandwidthGbps": 58.58,
- "busBandwidthGbps": 51.26,
- "outOfPlaceUs": 35.94,
- "inPlaceUs": 35.8,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "dtype": "float",
- "latencyUs": 36.17,
- "algBandwidthGbps": 115.95,
- "busBandwidthGbps": 101.45,
- "outOfPlaceUs": 36.29,
- "inPlaceUs": 36.17,
- "correct": true
- },
- {
- "sizeBytes": 8388608,
- "dtype": "float",
- "latencyUs": 36.99,
- "algBandwidthGbps": 226.76,
- "busBandwidthGbps": 198.42,
- "outOfPlaceUs": 37.02,
- "inPlaceUs": 36.99,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "dtype": "float",
- "latencyUs": 47.07,
- "algBandwidthGbps": 356.41,
- "busBandwidthGbps": 311.86,
- "outOfPlaceUs": 47.08,
- "inPlaceUs": 47.07,
- "correct": true
- },
- {
- "sizeBytes": 33554432,
- "dtype": "float",
- "latencyUs": 74.95,
- "algBandwidthGbps": 447.68,
- "busBandwidthGbps": 391.72,
- "outOfPlaceUs": 75.78,
- "inPlaceUs": 74.95,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "dtype": "float",
- "latencyUs": 138.64,
- "algBandwidthGbps": 484.06,
- "busBandwidthGbps": 423.55,
- "outOfPlaceUs": 139.26,
- "inPlaceUs": 138.64,
- "correct": true
- },
- {
- "sizeBytes": 134217728,
- "dtype": "float",
- "latencyUs": 211.47,
- "algBandwidthGbps": 634.68,
- "busBandwidthGbps": 555.34,
- "outOfPlaceUs": 211.47,
- "inPlaceUs": 211.53,
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "dtype": "float",
- "latencyUs": 399.32,
- "algBandwidthGbps": 672.24,
- "busBandwidthGbps": 588.21,
- "outOfPlaceUs": 399.32,
- "inPlaceUs": 399.95,
- "correct": true
- },
- {
- "sizeBytes": 536870912,
- "dtype": "float",
- "latencyUs": 779.11,
- "algBandwidthGbps": 689.08,
- "busBandwidthGbps": 602.95,
- "outOfPlaceUs": 779.96,
- "inPlaceUs": 779.11,
- "correct": true
- },
- {
- "sizeBytes": 1073741824,
- "dtype": "float",
- "latencyUs": 1532.87,
- "algBandwidthGbps": 700.48,
- "busBandwidthGbps": 612.92,
- "outOfPlaceUs": 1533.45,
- "inPlaceUs": 1532.87,
- "correct": true
- },
- {
- "sizeBytes": 2147483648,
- "dtype": "float",
- "latencyUs": 3010.48,
- "algBandwidthGbps": 713.34,
- "busBandwidthGbps": 624.17,
- "outOfPlaceUs": 3010.48,
- "inPlaceUs": 3011.29,
- "correct": true
- },
- {
- "sizeBytes": 4294967296,
- "dtype": "float",
- "latencyUs": 5911.41,
- "algBandwidthGbps": 726.55,
- "busBandwidthGbps": 635.74,
- "outOfPlaceUs": 5949.57,
- "inPlaceUs": 5911.41,
- "correct": true
- },
- {
- "sizeBytes": 8589934592,
- "dtype": "float",
- "latencyUs": 11675.3,
- "algBandwidthGbps": 735.74,
- "busBandwidthGbps": 643.77,
- "outOfPlaceUs": 11728.1,
- "inPlaceUs": 11675.3,
- "correct": true
- }
- ]
- },
- {
- "id": "cxn-17454439",
- "identity": "nccl|h100|all_gather|h100-nvlink-island|nvlink|8|nccl-tests-v1",
- "op": "all_gather",
- "sku": "h100",
- "runner": "h100-dgxc-slurm_09",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "nodes": 1,
- "dtype": "float",
- "comparisonClass": "standardized",
- "comparisonKey": "dacea770825df094",
- "measurementContract": "nccl-tests-v1",
- "avgBusBandwidthGbps": 110.587,
- "status": "valid",
- "valid": true,
- "colorKey": "h100_17454439",
- "label": "H100 · h100-nvlink-island · nvlink (ws8)",
- "generatedAt": "2026-06-27T11:18:57.699787+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T11:18:57.699787+00:00",
- "sha": null
- },
- "rows": [
- {
- "sizeBytes": 128,
- "dtype": "float",
- "latencyUs": 40.4,
- "algBandwidthGbps": 0,
- "busBandwidthGbps": 0,
- "outOfPlaceUs": 40.4,
- "inPlaceUs": 39.34,
- "correct": true
- },
- {
- "sizeBytes": 256,
- "dtype": "float",
- "latencyUs": 38.62,
- "algBandwidthGbps": 0.01,
- "busBandwidthGbps": 0.01,
- "outOfPlaceUs": 38.62,
- "inPlaceUs": 38.09,
- "correct": true
- },
- {
- "sizeBytes": 512,
- "dtype": "float",
- "latencyUs": 38.41,
- "algBandwidthGbps": 0.01,
- "busBandwidthGbps": 0.01,
- "outOfPlaceUs": 38.41,
- "inPlaceUs": 38.32,
- "correct": true
- },
- {
- "sizeBytes": 1024,
- "dtype": "float",
- "latencyUs": 38.68,
- "algBandwidthGbps": 0.03,
- "busBandwidthGbps": 0.02,
- "outOfPlaceUs": 38.68,
- "inPlaceUs": 37.58,
- "correct": true
- },
- {
- "sizeBytes": 2048,
- "dtype": "float",
- "latencyUs": 37.29,
- "algBandwidthGbps": 0.05,
- "busBandwidthGbps": 0.05,
- "outOfPlaceUs": 37.29,
- "inPlaceUs": 37.12,
- "correct": true
- },
- {
- "sizeBytes": 4096,
- "dtype": "float",
- "latencyUs": 37.53,
- "algBandwidthGbps": 0.11,
- "busBandwidthGbps": 0.1,
- "outOfPlaceUs": 37.53,
- "inPlaceUs": 37.17,
- "correct": true
- },
- {
- "sizeBytes": 8192,
- "dtype": "float",
- "latencyUs": 37.52,
- "algBandwidthGbps": 0.22,
- "busBandwidthGbps": 0.19,
- "outOfPlaceUs": 37.52,
- "inPlaceUs": 37.53,
- "correct": true
- },
- {
- "sizeBytes": 16384,
- "dtype": "float",
- "latencyUs": 37.13,
- "algBandwidthGbps": 0.44,
- "busBandwidthGbps": 0.39,
- "outOfPlaceUs": 37.13,
- "inPlaceUs": 37.09,
- "correct": true
- },
- {
- "sizeBytes": 32768,
- "dtype": "float",
- "latencyUs": 37.43,
- "algBandwidthGbps": 0.88,
- "busBandwidthGbps": 0.77,
- "outOfPlaceUs": 37.43,
- "inPlaceUs": 37.42,
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "dtype": "float",
- "latencyUs": 37.64,
- "algBandwidthGbps": 1.74,
- "busBandwidthGbps": 1.52,
- "outOfPlaceUs": 37.64,
- "inPlaceUs": 37.63,
- "correct": true
- },
- {
- "sizeBytes": 131072,
- "dtype": "float",
- "latencyUs": 38.19,
- "algBandwidthGbps": 3.43,
- "busBandwidthGbps": 3,
- "outOfPlaceUs": 38.48,
- "inPlaceUs": 38.19,
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "dtype": "float",
- "latencyUs": 39.66,
- "algBandwidthGbps": 6.61,
- "busBandwidthGbps": 5.78,
- "outOfPlaceUs": 39.66,
- "inPlaceUs": 40.15,
- "correct": true
- },
- {
- "sizeBytes": 524288,
- "dtype": "float",
- "latencyUs": 41.79,
- "algBandwidthGbps": 12.55,
- "busBandwidthGbps": 10.98,
- "outOfPlaceUs": 42.17,
- "inPlaceUs": 41.79,
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "dtype": "float",
- "latencyUs": 43.89,
- "algBandwidthGbps": 23.89,
- "busBandwidthGbps": 20.9,
- "outOfPlaceUs": 45.09,
- "inPlaceUs": 43.89,
- "correct": true
- },
- {
- "sizeBytes": 2097152,
- "dtype": "float",
- "latencyUs": 44.32,
- "algBandwidthGbps": 47.31,
- "busBandwidthGbps": 41.4,
- "outOfPlaceUs": 44.55,
- "inPlaceUs": 44.32,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "dtype": "float",
- "latencyUs": 44.97,
- "algBandwidthGbps": 93.27,
- "busBandwidthGbps": 81.61,
- "outOfPlaceUs": 44.97,
- "inPlaceUs": 45,
- "correct": true
- },
- {
- "sizeBytes": 8388608,
- "dtype": "float",
- "latencyUs": 45.6,
- "algBandwidthGbps": 183.98,
- "busBandwidthGbps": 160.98,
- "outOfPlaceUs": 46.08,
- "inPlaceUs": 45.6,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "dtype": "float",
- "latencyUs": 67.94,
- "algBandwidthGbps": 246.95,
- "busBandwidthGbps": 216.08,
- "outOfPlaceUs": 70.1,
- "inPlaceUs": 67.94,
- "correct": true
- },
- {
- "sizeBytes": 33554432,
- "dtype": "float",
- "latencyUs": 122.71,
- "algBandwidthGbps": 273.44,
- "busBandwidthGbps": 239.26,
- "outOfPlaceUs": 125.34,
- "inPlaceUs": 122.71,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "dtype": "float",
- "latencyUs": 206.56,
- "algBandwidthGbps": 324.88,
- "busBandwidthGbps": 284.27,
- "outOfPlaceUs": 210.98,
- "inPlaceUs": 206.56,
- "correct": true
- },
- {
- "sizeBytes": 134217728,
- "dtype": "float",
- "latencyUs": 390.25,
- "algBandwidthGbps": 343.93,
- "busBandwidthGbps": 300.94,
- "outOfPlaceUs": 396.19,
- "inPlaceUs": 390.25,
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "dtype": "float",
- "latencyUs": 728.52,
- "algBandwidthGbps": 368.47,
- "busBandwidthGbps": 322.41,
- "outOfPlaceUs": 733.59,
- "inPlaceUs": 728.52,
- "correct": true
- },
- {
- "sizeBytes": 536870912,
- "dtype": "float",
- "latencyUs": 1394.3,
- "algBandwidthGbps": 385.05,
- "busBandwidthGbps": 336.92,
- "outOfPlaceUs": 1397.39,
- "inPlaceUs": 1394.3,
- "correct": true
- },
- {
- "sizeBytes": 1073741824,
- "dtype": "float",
- "latencyUs": 2705.03,
- "algBandwidthGbps": 396.94,
- "busBandwidthGbps": 347.33,
- "outOfPlaceUs": 2729.3,
- "inPlaceUs": 2705.03,
- "correct": true
- },
- {
- "sizeBytes": 2147483648,
- "dtype": "float",
- "latencyUs": 5306.37,
- "algBandwidthGbps": 404.7,
- "busBandwidthGbps": 354.11,
- "outOfPlaceUs": 5374.68,
- "inPlaceUs": 5306.37,
- "correct": true
- },
- {
- "sizeBytes": 4294967296,
- "dtype": "float",
- "latencyUs": 10451.7,
- "algBandwidthGbps": 410.93,
- "busBandwidthGbps": 359.57,
- "outOfPlaceUs": 10616.4,
- "inPlaceUs": 10451.7,
- "correct": true
- },
- {
- "sizeBytes": 8589934592,
- "dtype": "float",
- "latencyUs": 20734.1,
- "algBandwidthGbps": 414.29,
- "busBandwidthGbps": 362.5,
- "outOfPlaceUs": 21013.2,
- "inPlaceUs": 20734.1,
- "correct": true
- }
- ]
- },
- {
- "id": "cxn-cc1fe619",
- "identity": "nccl|h200|all_gather|h200-nvlink-island|nvlink|8|nccl-tests-v1",
- "op": "all_gather",
- "sku": "h200",
- "runner": "h200-dgxc-slurm_2",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "nodes": 1,
- "dtype": "float",
- "comparisonClass": "standardized",
- "comparisonKey": "e2f081a269356db7",
- "measurementContract": "nccl-tests-v1",
- "avgBusBandwidthGbps": 111.028,
- "status": "valid",
- "valid": true,
- "colorKey": "h200_cc1fe619",
- "label": "H200 · h200-nvlink-island · nvlink (ws8)",
- "generatedAt": "2026-06-27T11:19:06.426368+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T11:19:06.426368+00:00",
- "sha": null
- },
- "rows": [
- {
- "sizeBytes": 128,
- "dtype": "float",
- "latencyUs": 40.82,
- "algBandwidthGbps": 0,
- "busBandwidthGbps": 0,
- "outOfPlaceUs": 40.82,
- "inPlaceUs": 37.72,
- "correct": true
- },
- {
- "sizeBytes": 256,
- "dtype": "float",
- "latencyUs": 39.21,
- "algBandwidthGbps": 0.01,
- "busBandwidthGbps": 0.01,
- "outOfPlaceUs": 39.21,
- "inPlaceUs": 38.7,
- "correct": true
- },
- {
- "sizeBytes": 512,
- "dtype": "float",
- "latencyUs": 39.32,
- "algBandwidthGbps": 0.01,
- "busBandwidthGbps": 0.01,
- "outOfPlaceUs": 39.32,
- "inPlaceUs": 38.61,
- "correct": true
- },
- {
- "sizeBytes": 1024,
- "dtype": "float",
- "latencyUs": 38.35,
- "algBandwidthGbps": 0.03,
- "busBandwidthGbps": 0.02,
- "outOfPlaceUs": 38.35,
- "inPlaceUs": 37.52,
- "correct": true
- },
- {
- "sizeBytes": 2048,
- "dtype": "float",
- "latencyUs": 38.87,
- "algBandwidthGbps": 0.05,
- "busBandwidthGbps": 0.05,
- "outOfPlaceUs": 38.87,
- "inPlaceUs": 37.95,
- "correct": true
- },
- {
- "sizeBytes": 4096,
- "dtype": "float",
- "latencyUs": 39.96,
- "algBandwidthGbps": 0.1,
- "busBandwidthGbps": 0.09,
- "outOfPlaceUs": 39.96,
- "inPlaceUs": 38.92,
- "correct": true
- },
- {
- "sizeBytes": 8192,
- "dtype": "float",
- "latencyUs": 38.44,
- "algBandwidthGbps": 0.21,
- "busBandwidthGbps": 0.19,
- "outOfPlaceUs": 38.44,
- "inPlaceUs": 38.97,
- "correct": true
- },
- {
- "sizeBytes": 16384,
- "dtype": "float",
- "latencyUs": 39.02,
- "algBandwidthGbps": 0.42,
- "busBandwidthGbps": 0.37,
- "outOfPlaceUs": 39.02,
- "inPlaceUs": 38.61,
- "correct": true
- },
- {
- "sizeBytes": 32768,
- "dtype": "float",
- "latencyUs": 37.96,
- "algBandwidthGbps": 0.86,
- "busBandwidthGbps": 0.76,
- "outOfPlaceUs": 37.96,
- "inPlaceUs": 39.63,
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "dtype": "float",
- "latencyUs": 38.78,
- "algBandwidthGbps": 1.69,
- "busBandwidthGbps": 1.48,
- "outOfPlaceUs": 38.78,
- "inPlaceUs": 38.91,
- "correct": true
- },
- {
- "sizeBytes": 131072,
- "dtype": "float",
- "latencyUs": 39.77,
- "algBandwidthGbps": 3.3,
- "busBandwidthGbps": 2.88,
- "outOfPlaceUs": 39.77,
- "inPlaceUs": 40.11,
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "dtype": "float",
- "latencyUs": 40.4,
- "algBandwidthGbps": 6.49,
- "busBandwidthGbps": 5.68,
- "outOfPlaceUs": 40.56,
- "inPlaceUs": 40.4,
- "correct": true
- },
- {
- "sizeBytes": 524288,
- "dtype": "float",
- "latencyUs": 42.21,
- "algBandwidthGbps": 12.42,
- "busBandwidthGbps": 10.87,
- "outOfPlaceUs": 42.21,
- "inPlaceUs": 48.64,
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "dtype": "float",
- "latencyUs": 44.25,
- "algBandwidthGbps": 23.7,
- "busBandwidthGbps": 20.73,
- "outOfPlaceUs": 46.55,
- "inPlaceUs": 44.25,
- "correct": true
- },
- {
- "sizeBytes": 2097152,
- "dtype": "float",
- "latencyUs": 44.67,
- "algBandwidthGbps": 46.95,
- "busBandwidthGbps": 41.08,
- "outOfPlaceUs": 45.93,
- "inPlaceUs": 44.67,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "dtype": "float",
- "latencyUs": 45.73,
- "algBandwidthGbps": 91.71,
- "busBandwidthGbps": 80.25,
- "outOfPlaceUs": 45.73,
- "inPlaceUs": 50.3,
- "correct": true
- },
- {
- "sizeBytes": 8388608,
- "dtype": "float",
- "latencyUs": 49.87,
- "algBandwidthGbps": 168.19,
- "busBandwidthGbps": 147.17,
- "outOfPlaceUs": 49.87,
- "inPlaceUs": 49.89,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "dtype": "float",
- "latencyUs": 66.01,
- "algBandwidthGbps": 254.16,
- "busBandwidthGbps": 222.39,
- "outOfPlaceUs": 66.91,
- "inPlaceUs": 66.01,
- "correct": true
- },
- {
- "sizeBytes": 33554432,
- "dtype": "float",
- "latencyUs": 119.98,
- "algBandwidthGbps": 279.66,
- "busBandwidthGbps": 244.7,
- "outOfPlaceUs": 123.43,
- "inPlaceUs": 119.98,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "dtype": "float",
- "latencyUs": 203.19,
- "algBandwidthGbps": 330.27,
- "busBandwidthGbps": 288.99,
- "outOfPlaceUs": 207.29,
- "inPlaceUs": 203.19,
- "correct": true
- },
- {
- "sizeBytes": 134217728,
- "dtype": "float",
- "latencyUs": 376.8,
- "algBandwidthGbps": 356.2,
- "busBandwidthGbps": 311.68,
- "outOfPlaceUs": 380.65,
- "inPlaceUs": 376.8,
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "dtype": "float",
- "latencyUs": 719.69,
- "algBandwidthGbps": 372.99,
- "busBandwidthGbps": 326.36,
- "outOfPlaceUs": 725.33,
- "inPlaceUs": 719.69,
- "correct": true
- },
- {
- "sizeBytes": 536870912,
- "dtype": "float",
- "latencyUs": 1381.87,
- "algBandwidthGbps": 388.51,
- "busBandwidthGbps": 339.95,
- "outOfPlaceUs": 1395.46,
- "inPlaceUs": 1381.87,
- "correct": true
- },
- {
- "sizeBytes": 1073741824,
- "dtype": "float",
- "latencyUs": 2707.03,
- "algBandwidthGbps": 396.65,
- "busBandwidthGbps": 347.07,
- "outOfPlaceUs": 2726.86,
- "inPlaceUs": 2707.03,
- "correct": true
- },
- {
- "sizeBytes": 2147483648,
- "dtype": "float",
- "latencyUs": 5309.69,
- "algBandwidthGbps": 404.45,
- "busBandwidthGbps": 353.89,
- "outOfPlaceUs": 5364.37,
- "inPlaceUs": 5309.69,
- "correct": true
- },
- {
- "sizeBytes": 4294967296,
- "dtype": "float",
- "latencyUs": 10464.7,
- "algBandwidthGbps": 410.42,
- "busBandwidthGbps": 359.12,
- "outOfPlaceUs": 10637.1,
- "inPlaceUs": 10464.7,
- "correct": true
- },
- {
- "sizeBytes": 8589934592,
- "dtype": "float",
- "latencyUs": 20742.5,
- "algBandwidthGbps": 414.12,
- "busBandwidthGbps": 362.36,
- "outOfPlaceUs": 21038.3,
- "inPlaceUs": 20742.5,
- "correct": true
- }
- ]
- },
- {
- "id": "cxn-e1de3b53",
- "identity": "nccl|mi355x|all_gather|mi355x-xgmi|xgmi|8|nccl-tests-v1",
- "op": "all_gather",
- "sku": "mi355x",
- "runner": "mi355x-amds_01",
- "topologyClass": "mi355x-xgmi",
- "transport": "xgmi",
- "worldSize": 8,
- "nodes": 1,
- "dtype": "float",
- "comparisonClass": "standardized",
- "comparisonKey": "8f8417874bf37410",
- "measurementContract": "nccl-tests-v1",
- "avgBusBandwidthGbps": 114.277,
- "status": "valid",
- "valid": true,
- "colorKey": "mi355x_e1de3b53",
- "label": "MI355X · mi355x-xgmi · xgmi (ws8)",
- "generatedAt": "2026-06-29T02:39:13.078018+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-29T02:39:13.078018+00:00",
- "sha": null
- },
- "rows": [
- {
- "sizeBytes": 128,
- "dtype": "float",
- "latencyUs": 187.5,
- "algBandwidthGbps": 0,
- "busBandwidthGbps": 0,
- "outOfPlaceUs": 187.5,
- "inPlaceUs": 204.9,
- "correct": true
- },
- {
- "sizeBytes": 256,
- "dtype": "float",
- "latencyUs": 176.3,
- "algBandwidthGbps": 0,
- "busBandwidthGbps": 0,
- "outOfPlaceUs": 176.3,
- "inPlaceUs": 202.7,
- "correct": true
- },
- {
- "sizeBytes": 512,
- "dtype": "float",
- "latencyUs": 176.5,
- "algBandwidthGbps": 0,
- "busBandwidthGbps": 0,
- "outOfPlaceUs": 176.5,
- "inPlaceUs": 204,
- "correct": true
- },
- {
- "sizeBytes": 1024,
- "dtype": "float",
- "latencyUs": 173.8,
- "algBandwidthGbps": 0.01,
- "busBandwidthGbps": 0.01,
- "outOfPlaceUs": 173.8,
- "inPlaceUs": 170.9,
- "correct": true
- },
- {
- "sizeBytes": 2048,
- "dtype": "float",
- "latencyUs": 177.9,
- "algBandwidthGbps": 0.01,
- "busBandwidthGbps": 0.01,
- "outOfPlaceUs": 177.9,
- "inPlaceUs": 171.6,
- "correct": true
- },
- {
- "sizeBytes": 4096,
- "dtype": "float",
- "latencyUs": 175.4,
- "algBandwidthGbps": 0.02,
- "busBandwidthGbps": 0.02,
- "outOfPlaceUs": 175.4,
- "inPlaceUs": 171.3,
- "correct": true
- },
- {
- "sizeBytes": 8192,
- "dtype": "float",
- "latencyUs": 173.1,
- "algBandwidthGbps": 0.05,
- "busBandwidthGbps": 0.04,
- "outOfPlaceUs": 210,
- "inPlaceUs": 173.1,
- "correct": true
- },
- {
- "sizeBytes": 16384,
- "dtype": "float",
- "latencyUs": 172.7,
- "algBandwidthGbps": 0.09,
- "busBandwidthGbps": 0.08,
- "outOfPlaceUs": 210.5,
- "inPlaceUs": 172.7,
- "correct": true
- },
- {
- "sizeBytes": 32768,
- "dtype": "float",
- "latencyUs": 173.2,
- "algBandwidthGbps": 0.19,
- "busBandwidthGbps": 0.17,
- "outOfPlaceUs": 210.5,
- "inPlaceUs": 173.2,
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "dtype": "float",
- "latencyUs": 177.8,
- "algBandwidthGbps": 0.37,
- "busBandwidthGbps": 0.32,
- "outOfPlaceUs": 215.9,
- "inPlaceUs": 177.8,
- "correct": true
- },
- {
- "sizeBytes": 131072,
- "dtype": "float",
- "latencyUs": 126.3,
- "algBandwidthGbps": 1.04,
- "busBandwidthGbps": 0.91,
- "outOfPlaceUs": 223.8,
- "inPlaceUs": 126.3,
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "dtype": "float",
- "latencyUs": 136.7,
- "algBandwidthGbps": 1.92,
- "busBandwidthGbps": 1.68,
- "outOfPlaceUs": 139.9,
- "inPlaceUs": 136.7,
- "correct": true
- },
- {
- "sizeBytes": 524288,
- "dtype": "float",
- "latencyUs": 137.9,
- "algBandwidthGbps": 3.8,
- "busBandwidthGbps": 3.33,
- "outOfPlaceUs": 140.8,
- "inPlaceUs": 137.9,
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "dtype": "float",
- "latencyUs": 124.5,
- "algBandwidthGbps": 8.42,
- "busBandwidthGbps": 7.37,
- "outOfPlaceUs": 124.5,
- "inPlaceUs": 142,
- "correct": true
- },
- {
- "sizeBytes": 2097152,
- "dtype": "float",
- "latencyUs": 129.4,
- "algBandwidthGbps": 16.21,
- "busBandwidthGbps": 14.18,
- "outOfPlaceUs": 129.4,
- "inPlaceUs": 148.6,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "dtype": "float",
- "latencyUs": 140.7,
- "algBandwidthGbps": 29.82,
- "busBandwidthGbps": 26.09,
- "outOfPlaceUs": 140.7,
- "inPlaceUs": 158.2,
- "correct": true
- },
- {
- "sizeBytes": 8388608,
- "dtype": "float",
- "latencyUs": 141.3,
- "algBandwidthGbps": 59.35,
- "busBandwidthGbps": 51.93,
- "outOfPlaceUs": 141.3,
- "inPlaceUs": 158.8,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "dtype": "float",
- "latencyUs": 88.34,
- "algBandwidthGbps": 189.91,
- "busBandwidthGbps": 166.17,
- "outOfPlaceUs": 128.5,
- "inPlaceUs": 88.34,
- "correct": true
- },
- {
- "sizeBytes": 33554432,
- "dtype": "float",
- "latencyUs": 104.3,
- "algBandwidthGbps": 321.59,
- "busBandwidthGbps": 281.39,
- "outOfPlaceUs": 142.4,
- "inPlaceUs": 104.3,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "dtype": "float",
- "latencyUs": 174.1,
- "algBandwidthGbps": 385.36,
- "busBandwidthGbps": 337.19,
- "outOfPlaceUs": 174.1,
- "inPlaceUs": 174.4,
- "correct": true
- },
- {
- "sizeBytes": 134217728,
- "dtype": "float",
- "latencyUs": 320.4,
- "algBandwidthGbps": 418.92,
- "busBandwidthGbps": 366.55,
- "outOfPlaceUs": 320.9,
- "inPlaceUs": 320.4,
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "dtype": "float",
- "latencyUs": 616.6,
- "algBandwidthGbps": 435.32,
- "busBandwidthGbps": 380.9,
- "outOfPlaceUs": 625.8,
- "inPlaceUs": 616.6,
- "correct": true
- },
- {
- "sizeBytes": 536870912,
- "dtype": "float",
- "latencyUs": 1206.3,
- "algBandwidthGbps": 445.05,
- "busBandwidthGbps": 389.42,
- "outOfPlaceUs": 1207.7,
- "inPlaceUs": 1206.3,
- "correct": true
- },
- {
- "sizeBytes": 1073741824,
- "dtype": "float",
- "latencyUs": 2396.4,
- "algBandwidthGbps": 448.06,
- "busBandwidthGbps": 392.06,
- "outOfPlaceUs": 2396.4,
- "inPlaceUs": 2399,
- "correct": true
- },
- {
- "sizeBytes": 2147483648,
- "dtype": "float",
- "latencyUs": 4736.7,
- "algBandwidthGbps": 453.37,
- "busBandwidthGbps": 396.7,
- "outOfPlaceUs": 4750.2,
- "inPlaceUs": 4736.7,
- "correct": true
- },
- {
- "sizeBytes": 4294967296,
- "dtype": "float",
- "latencyUs": 9395.7,
- "algBandwidthGbps": 457.12,
- "busBandwidthGbps": 399.98,
- "outOfPlaceUs": 9395.7,
- "inPlaceUs": 9416.8,
- "correct": true
- },
- {
- "sizeBytes": 8589934592,
- "dtype": "float",
- "latencyUs": 18643,
- "algBandwidthGbps": 460.77,
- "busBandwidthGbps": 403.17,
- "outOfPlaceUs": 18643,
- "inPlaceUs": 18899,
- "correct": true
- }
- ]
- },
- {
- "id": "cxn-940e3e1c",
- "identity": "nccl|b300|all_reduce|b300-nvlink-island|nvlink|8|nccl-tests-v1",
- "op": "all_reduce",
- "sku": "b300",
- "runner": "b300-nv_03",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "nodes": 1,
- "dtype": "float",
- "comparisonClass": "standardized",
- "comparisonKey": "139076c9959b0653",
- "measurementContract": "nccl-tests-v1",
- "avgBusBandwidthGbps": 218.816,
- "status": "valid",
- "valid": true,
- "colorKey": "b300_940e3e1c",
- "label": "B300 · b300-nvlink-island · nvlink (ws8)",
- "generatedAt": "2026-06-27T11:18:24.142157+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T11:18:24.142157+00:00",
- "sha": null
- },
- "rows": [
- {
- "sizeBytes": 8,
- "dtype": "float",
- "latencyUs": 28.3,
- "algBandwidthGbps": 0,
- "busBandwidthGbps": 0,
- "outOfPlaceUs": 28.3,
- "inPlaceUs": 27.59,
- "correct": true
- },
- {
- "sizeBytes": 16,
- "dtype": "float",
- "latencyUs": 27.27,
- "algBandwidthGbps": 0,
- "busBandwidthGbps": 0,
- "outOfPlaceUs": 27.27,
- "inPlaceUs": 27.06,
- "correct": true
- },
- {
- "sizeBytes": 32,
- "dtype": "float",
- "latencyUs": 27.25,
- "algBandwidthGbps": 0,
- "busBandwidthGbps": 0,
- "outOfPlaceUs": 27.25,
- "inPlaceUs": 27.3,
- "correct": true
- },
- {
- "sizeBytes": 64,
- "dtype": "float",
- "latencyUs": 27.32,
- "algBandwidthGbps": 0,
- "busBandwidthGbps": 0,
- "outOfPlaceUs": 27.32,
- "inPlaceUs": 27.28,
- "correct": true
- },
- {
- "sizeBytes": 128,
- "dtype": "float",
- "latencyUs": 27.42,
- "algBandwidthGbps": 0,
- "busBandwidthGbps": 0.01,
- "outOfPlaceUs": 27.42,
- "inPlaceUs": 27.59,
- "correct": true
- },
- {
- "sizeBytes": 256,
- "dtype": "float",
- "latencyUs": 27.26,
- "algBandwidthGbps": 0.01,
- "busBandwidthGbps": 0.02,
- "outOfPlaceUs": 27.26,
- "inPlaceUs": 27.32,
- "correct": true
- },
- {
- "sizeBytes": 512,
- "dtype": "float",
- "latencyUs": 27.16,
- "algBandwidthGbps": 0.02,
- "busBandwidthGbps": 0.03,
- "outOfPlaceUs": 27.16,
- "inPlaceUs": 27.38,
- "correct": true
- },
- {
- "sizeBytes": 1024,
- "dtype": "float",
- "latencyUs": 27.33,
- "algBandwidthGbps": 0.04,
- "busBandwidthGbps": 0.07,
- "outOfPlaceUs": 27.33,
- "inPlaceUs": 27.14,
- "correct": true
- },
- {
- "sizeBytes": 2048,
- "dtype": "float",
- "latencyUs": 27.36,
- "algBandwidthGbps": 0.07,
- "busBandwidthGbps": 0.13,
- "outOfPlaceUs": 27.36,
- "inPlaceUs": 27.33,
- "correct": true
- },
- {
- "sizeBytes": 4096,
- "dtype": "float",
- "latencyUs": 27.3,
- "algBandwidthGbps": 0.15,
- "busBandwidthGbps": 0.26,
- "outOfPlaceUs": 27.3,
- "inPlaceUs": 27.35,
- "correct": true
- },
- {
- "sizeBytes": 8192,
- "dtype": "float",
- "latencyUs": 27.52,
- "algBandwidthGbps": 0.3,
- "busBandwidthGbps": 0.52,
- "outOfPlaceUs": 27.52,
- "inPlaceUs": 27.59,
- "correct": true
- },
- {
- "sizeBytes": 16384,
- "dtype": "float",
- "latencyUs": 27.64,
- "algBandwidthGbps": 0.59,
- "busBandwidthGbps": 1.04,
- "outOfPlaceUs": 27.64,
- "inPlaceUs": 27.61,
- "correct": true
- },
- {
- "sizeBytes": 32768,
- "dtype": "float",
- "latencyUs": 27.76,
- "algBandwidthGbps": 1.18,
- "busBandwidthGbps": 2.07,
- "outOfPlaceUs": 27.76,
- "inPlaceUs": 27.85,
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "dtype": "float",
- "latencyUs": 27.7,
- "algBandwidthGbps": 2.37,
- "busBandwidthGbps": 4.14,
- "outOfPlaceUs": 28.19,
- "inPlaceUs": 27.7,
- "correct": true
- },
- {
- "sizeBytes": 131072,
- "dtype": "float",
- "latencyUs": 28.21,
- "algBandwidthGbps": 4.65,
- "busBandwidthGbps": 8.13,
- "outOfPlaceUs": 28.59,
- "inPlaceUs": 28.21,
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "dtype": "float",
- "latencyUs": 28.56,
- "algBandwidthGbps": 9.18,
- "busBandwidthGbps": 16.06,
- "outOfPlaceUs": 29.16,
- "inPlaceUs": 28.56,
- "correct": true
- },
- {
- "sizeBytes": 524288,
- "dtype": "float",
- "latencyUs": 29.89,
- "algBandwidthGbps": 17.54,
- "busBandwidthGbps": 30.7,
- "outOfPlaceUs": 29.89,
- "inPlaceUs": 29.93,
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "dtype": "float",
- "latencyUs": 32.16,
- "algBandwidthGbps": 32.61,
- "busBandwidthGbps": 57.06,
- "outOfPlaceUs": 32.16,
- "inPlaceUs": 32.67,
- "correct": true
- },
- {
- "sizeBytes": 2097152,
- "dtype": "float",
- "latencyUs": 37.47,
- "algBandwidthGbps": 55.97,
- "busBandwidthGbps": 97.94,
- "outOfPlaceUs": 37.47,
- "inPlaceUs": 38.07,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "dtype": "float",
- "latencyUs": 56.79,
- "algBandwidthGbps": 73.86,
- "busBandwidthGbps": 129.26,
- "outOfPlaceUs": 56.88,
- "inPlaceUs": 56.79,
- "correct": true
- },
- {
- "sizeBytes": 8388608,
- "dtype": "float",
- "latencyUs": 77.08,
- "algBandwidthGbps": 108.83,
- "busBandwidthGbps": 190.45,
- "outOfPlaceUs": 78.24,
- "inPlaceUs": 77.08,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "dtype": "float",
- "latencyUs": 104.77,
- "algBandwidthGbps": 160.14,
- "busBandwidthGbps": 280.24,
- "outOfPlaceUs": 106.93,
- "inPlaceUs": 104.77,
- "correct": true
- },
- {
- "sizeBytes": 33554432,
- "dtype": "float",
- "latencyUs": 166.18,
- "algBandwidthGbps": 201.91,
- "busBandwidthGbps": 353.34,
- "outOfPlaceUs": 168.44,
- "inPlaceUs": 166.18,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "dtype": "float",
- "latencyUs": 274.52,
- "algBandwidthGbps": 244.46,
- "busBandwidthGbps": 427.8,
- "outOfPlaceUs": 274.52,
- "inPlaceUs": 275.23,
- "correct": true
- },
- {
- "sizeBytes": 134217728,
- "dtype": "float",
- "latencyUs": 391.34,
- "algBandwidthGbps": 342.97,
- "busBandwidthGbps": 600.19,
- "outOfPlaceUs": 391.34,
- "inPlaceUs": 392.6,
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "dtype": "float",
- "latencyUs": 711.09,
- "algBandwidthGbps": 377.5,
- "busBandwidthGbps": 660.62,
- "outOfPlaceUs": 711.09,
- "inPlaceUs": 712.3,
- "correct": true
- },
- {
- "sizeBytes": 536870912,
- "dtype": "float",
- "latencyUs": 1324.96,
- "algBandwidthGbps": 405.2,
- "busBandwidthGbps": 709.1,
- "outOfPlaceUs": 1324.96,
- "inPlaceUs": 1327.33,
- "correct": true
- },
- {
- "sizeBytes": 1073741824,
- "dtype": "float",
- "latencyUs": 2553.96,
- "algBandwidthGbps": 420.42,
- "busBandwidthGbps": 735.74,
- "outOfPlaceUs": 2558.96,
- "inPlaceUs": 2553.96,
- "correct": true
- },
- {
- "sizeBytes": 2147483648,
- "dtype": "float",
- "latencyUs": 4571.5,
- "algBandwidthGbps": 469.75,
- "busBandwidthGbps": 822.07,
- "outOfPlaceUs": 4576.46,
- "inPlaceUs": 4571.5,
- "correct": true
- },
- {
- "sizeBytes": 4294967296,
- "dtype": "float",
- "latencyUs": 9024.56,
- "algBandwidthGbps": 475.92,
- "busBandwidthGbps": 832.86,
- "outOfPlaceUs": 9034.78,
- "inPlaceUs": 9024.56,
- "correct": true
- },
- {
- "sizeBytes": 8589934592,
- "dtype": "float",
- "latencyUs": 17971.9,
- "algBandwidthGbps": 477.96,
- "busBandwidthGbps": 836.44,
- "outOfPlaceUs": 17991.5,
- "inPlaceUs": 17971.9,
- "correct": true
- }
- ]
- },
- {
- "id": "cxn-fd5a787b",
- "identity": "allreduce-fw|b300|flashinfer-oneshot|b300-nvlink-island|nvlink|8|allreduce-fw-v1",
- "op": "all_reduce",
- "sku": "b300",
- "runner": "b300-nv_11",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "nodes": null,
- "dtype": "bf16",
- "comparisonClass": null,
- "comparisonKey": "81bfaa10f5beda36",
- "measurementContract": "allreduce-fw-v1",
- "avgBusBandwidthGbps": null,
- "status": "valid",
- "valid": true,
- "colorKey": "b300_fd5a787b",
- "label": "B300 · flashinfer-oneshot (fw-AR · ws8)",
- "generatedAt": "2026-06-28T01:47:48.908164+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-28T01:47:48.908164+00:00",
- "sha": null
- },
- "rows": [
- {
- "sizeBytes": 4096,
- "dtype": "bf16",
- "latencyUs": 11.661,
- "algBandwidthGbps": 0.351,
- "busBandwidthGbps": 0.615,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 16384,
- "dtype": "bf16",
- "latencyUs": 11.601,
- "algBandwidthGbps": 1.412,
- "busBandwidthGbps": 2.472,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "dtype": "bf16",
- "latencyUs": 12.381,
- "algBandwidthGbps": 5.293,
- "busBandwidthGbps": 9.263,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "dtype": "bf16",
- "latencyUs": 14.274,
- "algBandwidthGbps": 18.365,
- "busBandwidthGbps": 32.139,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "dtype": "bf16",
- "latencyUs": 23.854,
- "algBandwidthGbps": 43.958,
- "busBandwidthGbps": 76.926,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "dtype": "bf16",
- "latencyUs": 75.394,
- "algBandwidthGbps": 55.632,
- "busBandwidthGbps": 97.356,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "dtype": "bf16",
- "latencyUs": 244.644,
- "algBandwidthGbps": 68.578,
- "busBandwidthGbps": 120.011,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "dtype": "bf16",
- "latencyUs": 956.149,
- "algBandwidthGbps": 70.187,
- "busBandwidthGbps": 122.827,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- }
- ]
- },
- {
- "id": "cxn-087af4ad",
- "identity": "allreduce-fw|b300|flashinfer-twoshot|b300-nvlink-island|nvlink|8|allreduce-fw-v1",
- "op": "all_reduce",
- "sku": "b300",
- "runner": "b300-nv_11",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "nodes": null,
- "dtype": "bf16",
- "comparisonClass": null,
- "comparisonKey": "183298dcd11c3e1e",
- "measurementContract": "allreduce-fw-v1",
- "avgBusBandwidthGbps": null,
- "status": "valid",
- "valid": true,
- "colorKey": "b300_087af4ad",
- "label": "B300 · flashinfer-twoshot (fw-AR · ws8)",
- "generatedAt": "2026-06-28T01:47:48.908164+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-28T01:47:48.908164+00:00",
- "sha": null
- },
- "rows": [
- {
- "sizeBytes": 65536,
- "dtype": "bf16",
- "latencyUs": 385.191,
- "algBandwidthGbps": 0.17,
- "busBandwidthGbps": 0.298,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "dtype": "bf16",
- "latencyUs": 118.644,
- "algBandwidthGbps": 2.209,
- "busBandwidthGbps": 3.867,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "dtype": "bf16",
- "latencyUs": 47.46,
- "algBandwidthGbps": 22.094,
- "busBandwidthGbps": 38.664,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "dtype": "bf16",
- "latencyUs": 43.002,
- "algBandwidthGbps": 97.537,
- "busBandwidthGbps": 170.69,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "dtype": "bf16",
- "latencyUs": 90.81,
- "algBandwidthGbps": 184.75,
- "busBandwidthGbps": 323.313,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "dtype": "bf16",
- "latencyUs": 353.165,
- "algBandwidthGbps": 190.021,
- "busBandwidthGbps": 332.537,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- }
- ]
- },
- {
- "id": "cxn-345c72e8",
- "identity": "allreduce-fw|b300|nccl|b300-nvlink-island|nvlink|8|allreduce-fw-v1",
- "op": "all_reduce",
- "sku": "b300",
- "runner": "b300-nv_11",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "nodes": null,
- "dtype": "bf16",
- "comparisonClass": null,
- "comparisonKey": "9c254fab92b5fac7",
- "measurementContract": "allreduce-fw-v1",
- "avgBusBandwidthGbps": null,
- "status": "valid",
- "valid": true,
- "colorKey": "b300_345c72e8",
- "label": "B300 · nccl (fw-AR · ws8)",
- "generatedAt": "2026-06-28T01:47:48.908164+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-28T01:47:48.908164+00:00",
- "sha": null
- },
- "rows": [
- {
- "sizeBytes": 1024,
- "dtype": "bf16",
- "latencyUs": 51,
- "algBandwidthGbps": 0.02,
- "busBandwidthGbps": 0.035,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 4096,
- "dtype": "bf16",
- "latencyUs": 29.788,
- "algBandwidthGbps": 0.138,
- "busBandwidthGbps": 0.241,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 16384,
- "dtype": "bf16",
- "latencyUs": 25.746,
- "algBandwidthGbps": 0.636,
- "busBandwidthGbps": 1.114,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "dtype": "bf16",
- "latencyUs": 43.559,
- "algBandwidthGbps": 1.505,
- "busBandwidthGbps": 2.633,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "dtype": "bf16",
- "latencyUs": 27.737,
- "algBandwidthGbps": 9.451,
- "busBandwidthGbps": 16.539,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "dtype": "bf16",
- "latencyUs": 29.05,
- "algBandwidthGbps": 36.096,
- "busBandwidthGbps": 63.168,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "dtype": "bf16",
- "latencyUs": 52.692,
- "algBandwidthGbps": 79.601,
- "busBandwidthGbps": 139.301,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "dtype": "bf16",
- "latencyUs": 95.558,
- "algBandwidthGbps": 175.571,
- "busBandwidthGbps": 307.25,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "dtype": "bf16",
- "latencyUs": 302.87,
- "algBandwidthGbps": 221.577,
- "busBandwidthGbps": 387.759,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- }
- ]
- },
- {
- "id": "cxn-18cb0223",
- "identity": "allreduce-fw|h100|flashinfer-oneshot|h100-nvlink-island|nvlink|8|allreduce-fw-v1",
- "op": "all_reduce",
- "sku": "h100",
- "runner": "h100-dgxc-slurm_17",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "nodes": null,
- "dtype": "bf16",
- "comparisonClass": null,
- "comparisonKey": "2876f45736ca183e",
- "measurementContract": "allreduce-fw-v1",
- "avgBusBandwidthGbps": null,
- "status": "valid",
- "valid": true,
- "colorKey": "h100_18cb0223",
- "label": "H100 · flashinfer-oneshot (fw-AR · ws8)",
- "generatedAt": "2026-06-28T01:47:32.393320+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-28T01:47:32.393320+00:00",
- "sha": null
- },
- "rows": [
- {
- "sizeBytes": 4096,
- "dtype": "bf16",
- "latencyUs": 19.209,
- "algBandwidthGbps": 0.213,
- "busBandwidthGbps": 0.373,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 16384,
- "dtype": "bf16",
- "latencyUs": 18.247,
- "algBandwidthGbps": 0.898,
- "busBandwidthGbps": 1.571,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "dtype": "bf16",
- "latencyUs": 18.258,
- "algBandwidthGbps": 3.589,
- "busBandwidthGbps": 6.282,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "dtype": "bf16",
- "latencyUs": 17.969,
- "algBandwidthGbps": 14.589,
- "busBandwidthGbps": 25.531,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "dtype": "bf16",
- "latencyUs": 32.62,
- "algBandwidthGbps": 32.145,
- "busBandwidthGbps": 56.254,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "dtype": "bf16",
- "latencyUs": 119.14,
- "algBandwidthGbps": 35.205,
- "busBandwidthGbps": 61.609,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "dtype": "bf16",
- "latencyUs": 464.128,
- "algBandwidthGbps": 36.148,
- "busBandwidthGbps": 63.259,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "dtype": "bf16",
- "latencyUs": 1854.815,
- "algBandwidthGbps": 36.181,
- "busBandwidthGbps": 63.317,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- }
- ]
- },
- {
- "id": "cxn-5a5e256d",
- "identity": "allreduce-fw|h100|flashinfer-twoshot|h100-nvlink-island|nvlink|8|allreduce-fw-v1",
- "op": "all_reduce",
- "sku": "h100",
- "runner": "h100-dgxc-slurm_17",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "nodes": null,
- "dtype": "bf16",
- "comparisonClass": null,
- "comparisonKey": "3914980c40380611",
- "measurementContract": "allreduce-fw-v1",
- "avgBusBandwidthGbps": null,
- "status": "valid",
- "valid": true,
- "colorKey": "h100_5a5e256d",
- "label": "H100 · flashinfer-twoshot (fw-AR · ws8)",
- "generatedAt": "2026-06-28T01:47:32.393320+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-28T01:47:32.393320+00:00",
- "sha": null
- },
- "rows": [
- {
- "sizeBytes": 65536,
- "dtype": "bf16",
- "latencyUs": 261.726,
- "algBandwidthGbps": 0.25,
- "busBandwidthGbps": 0.438,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "dtype": "bf16",
- "latencyUs": 85.069,
- "algBandwidthGbps": 3.082,
- "busBandwidthGbps": 5.393,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "dtype": "bf16",
- "latencyUs": 36.265,
- "algBandwidthGbps": 28.914,
- "busBandwidthGbps": 50.6,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "dtype": "bf16",
- "latencyUs": 53.77,
- "algBandwidthGbps": 78.004,
- "busBandwidthGbps": 136.507,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "dtype": "bf16",
- "latencyUs": 144.773,
- "algBandwidthGbps": 115.886,
- "busBandwidthGbps": 202.801,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "dtype": "bf16",
- "latencyUs": 584.195,
- "algBandwidthGbps": 114.874,
- "busBandwidthGbps": 201.03,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- }
- ]
- },
- {
- "id": "cxn-4676ac48",
- "identity": "nccl|h100|all_reduce|h100-nvlink-island|nvlink|8|nccl-tests-v1",
- "op": "all_reduce",
- "sku": "h100",
- "runner": "h100-dgxc-slurm_09",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "nodes": 1,
- "dtype": "float",
- "comparisonClass": "standardized",
- "comparisonKey": "059665d8b168a0d7",
- "measurementContract": "nccl-tests-v1",
- "avgBusBandwidthGbps": 145.585,
- "status": "valid",
- "valid": true,
- "colorKey": "h100_4676ac48",
- "label": "H100 · h100-nvlink-island · nvlink (ws8)",
- "generatedAt": "2026-06-27T11:18:41.017727+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T11:18:41.017727+00:00",
- "sha": null
- },
- "rows": [
- {
- "sizeBytes": 8,
- "dtype": "float",
- "latencyUs": 108.66,
- "algBandwidthGbps": 0,
- "busBandwidthGbps": 0,
- "outOfPlaceUs": 108.66,
- "inPlaceUs": 38.99,
- "correct": true
- },
- {
- "sizeBytes": 16,
- "dtype": "float",
- "latencyUs": 39.33,
- "algBandwidthGbps": 0,
- "busBandwidthGbps": 0,
- "outOfPlaceUs": 39.33,
- "inPlaceUs": 38.7,
- "correct": true
- },
- {
- "sizeBytes": 32,
- "dtype": "float",
- "latencyUs": 73.95,
- "algBandwidthGbps": 0,
- "busBandwidthGbps": 0,
- "outOfPlaceUs": 73.95,
- "inPlaceUs": 38.72,
- "correct": true
- },
- {
- "sizeBytes": 64,
- "dtype": "float",
- "latencyUs": 39.17,
- "algBandwidthGbps": 0,
- "busBandwidthGbps": 0,
- "outOfPlaceUs": 39.17,
- "inPlaceUs": 38.71,
- "correct": true
- },
- {
- "sizeBytes": 128,
- "dtype": "float",
- "latencyUs": 39.12,
- "algBandwidthGbps": 0,
- "busBandwidthGbps": 0.01,
- "outOfPlaceUs": 39.12,
- "inPlaceUs": 38.4,
- "correct": true
- },
- {
- "sizeBytes": 256,
- "dtype": "float",
- "latencyUs": 38.88,
- "algBandwidthGbps": 0.01,
- "busBandwidthGbps": 0.01,
- "outOfPlaceUs": 38.88,
- "inPlaceUs": 38.41,
- "correct": true
- },
- {
- "sizeBytes": 512,
- "dtype": "float",
- "latencyUs": 39.08,
- "algBandwidthGbps": 0.01,
- "busBandwidthGbps": 0.02,
- "outOfPlaceUs": 39.08,
- "inPlaceUs": 38.59,
- "correct": true
- },
- {
- "sizeBytes": 1024,
- "dtype": "float",
- "latencyUs": 38.88,
- "algBandwidthGbps": 0.03,
- "busBandwidthGbps": 0.05,
- "outOfPlaceUs": 38.88,
- "inPlaceUs": 38.83,
- "correct": true
- },
- {
- "sizeBytes": 2048,
- "dtype": "float",
- "latencyUs": 39.58,
- "algBandwidthGbps": 0.05,
- "busBandwidthGbps": 0.09,
- "outOfPlaceUs": 39.58,
- "inPlaceUs": 39.25,
- "correct": true
- },
- {
- "sizeBytes": 4096,
- "dtype": "float",
- "latencyUs": 38.69,
- "algBandwidthGbps": 0.11,
- "busBandwidthGbps": 0.19,
- "outOfPlaceUs": 38.94,
- "inPlaceUs": 38.69,
- "correct": true
- },
- {
- "sizeBytes": 8192,
- "dtype": "float",
- "latencyUs": 38.69,
- "algBandwidthGbps": 0.21,
- "busBandwidthGbps": 0.37,
- "outOfPlaceUs": 38.69,
- "inPlaceUs": 39.4,
- "correct": true
- },
- {
- "sizeBytes": 16384,
- "dtype": "float",
- "latencyUs": 39.08,
- "algBandwidthGbps": 0.42,
- "busBandwidthGbps": 0.73,
- "outOfPlaceUs": 39.08,
- "inPlaceUs": 39.06,
- "correct": true
- },
- {
- "sizeBytes": 32768,
- "dtype": "float",
- "latencyUs": 38.26,
- "algBandwidthGbps": 0.86,
- "busBandwidthGbps": 1.5,
- "outOfPlaceUs": 39.3,
- "inPlaceUs": 38.26,
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "dtype": "float",
- "latencyUs": 38.71,
- "algBandwidthGbps": 1.69,
- "busBandwidthGbps": 2.96,
- "outOfPlaceUs": 38.95,
- "inPlaceUs": 38.71,
- "correct": true
- },
- {
- "sizeBytes": 131072,
- "dtype": "float",
- "latencyUs": 39.68,
- "algBandwidthGbps": 3.3,
- "busBandwidthGbps": 5.78,
- "outOfPlaceUs": 40.2,
- "inPlaceUs": 39.68,
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "dtype": "float",
- "latencyUs": 40.97,
- "algBandwidthGbps": 6.4,
- "busBandwidthGbps": 11.2,
- "outOfPlaceUs": 41.31,
- "inPlaceUs": 40.97,
- "correct": true
- },
- {
- "sizeBytes": 524288,
- "dtype": "float",
- "latencyUs": 43.56,
- "algBandwidthGbps": 12.04,
- "busBandwidthGbps": 21.06,
- "outOfPlaceUs": 43.56,
- "inPlaceUs": 43.68,
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "dtype": "float",
- "latencyUs": 46.56,
- "algBandwidthGbps": 22.52,
- "busBandwidthGbps": 39.42,
- "outOfPlaceUs": 46.76,
- "inPlaceUs": 46.56,
- "correct": true
- },
- {
- "sizeBytes": 2097152,
- "dtype": "float",
- "latencyUs": 44.32,
- "algBandwidthGbps": 47.32,
- "busBandwidthGbps": 82.81,
- "outOfPlaceUs": 44.44,
- "inPlaceUs": 44.32,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "dtype": "float",
- "latencyUs": 55.78,
- "algBandwidthGbps": 75.19,
- "busBandwidthGbps": 131.58,
- "outOfPlaceUs": 56.11,
- "inPlaceUs": 55.78,
- "correct": true
- },
- {
- "sizeBytes": 8388608,
- "dtype": "float",
- "latencyUs": 83.88,
- "algBandwidthGbps": 100,
- "busBandwidthGbps": 175.01,
- "outOfPlaceUs": 85.22,
- "inPlaceUs": 83.88,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "dtype": "float",
- "latencyUs": 125.29,
- "algBandwidthGbps": 133.91,
- "busBandwidthGbps": 234.34,
- "outOfPlaceUs": 125.73,
- "inPlaceUs": 125.29,
- "correct": true
- },
- {
- "sizeBytes": 33554432,
- "dtype": "float",
- "latencyUs": 200.68,
- "algBandwidthGbps": 167.2,
- "busBandwidthGbps": 292.6,
- "outOfPlaceUs": 200.82,
- "inPlaceUs": 200.68,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "dtype": "float",
- "latencyUs": 325,
- "algBandwidthGbps": 206.49,
- "busBandwidthGbps": 361.36,
- "outOfPlaceUs": 325,
- "inPlaceUs": 325.69,
- "correct": true
- },
- {
- "sizeBytes": 134217728,
- "dtype": "float",
- "latencyUs": 585.92,
- "algBandwidthGbps": 229.07,
- "busBandwidthGbps": 400.87,
- "outOfPlaceUs": 585.97,
- "inPlaceUs": 585.92,
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "dtype": "float",
- "latencyUs": 1110.23,
- "algBandwidthGbps": 241.78,
- "busBandwidthGbps": 423.12,
- "outOfPlaceUs": 1111.7,
- "inPlaceUs": 1110.23,
- "correct": true
- },
- {
- "sizeBytes": 536870912,
- "dtype": "float",
- "latencyUs": 2145.48,
- "algBandwidthGbps": 250.23,
- "busBandwidthGbps": 437.91,
- "outOfPlaceUs": 2145.48,
- "inPlaceUs": 2147.26,
- "correct": true
- },
- {
- "sizeBytes": 1073741824,
- "dtype": "float",
- "latencyUs": 4026.19,
- "algBandwidthGbps": 266.69,
- "busBandwidthGbps": 466.71,
- "outOfPlaceUs": 4026.19,
- "inPlaceUs": 4031.14,
- "correct": true
- },
- {
- "sizeBytes": 2147483648,
- "dtype": "float",
- "latencyUs": 7957.67,
- "algBandwidthGbps": 269.86,
- "busBandwidthGbps": 472.26,
- "outOfPlaceUs": 7958.73,
- "inPlaceUs": 7957.67,
- "correct": true
- },
- {
- "sizeBytes": 4294967296,
- "dtype": "float",
- "latencyUs": 15778.7,
- "algBandwidthGbps": 272.2,
- "busBandwidthGbps": 476.35,
- "outOfPlaceUs": 15778.7,
- "inPlaceUs": 15787,
- "correct": true
- },
- {
- "sizeBytes": 8589934592,
- "dtype": "float",
- "latencyUs": 31394.3,
- "algBandwidthGbps": 273.61,
- "busBandwidthGbps": 478.83,
- "outOfPlaceUs": 31404.3,
- "inPlaceUs": 31394.3,
- "correct": true
- }
- ]
- },
- {
- "id": "cxn-ae07ad9c",
- "identity": "allreduce-fw|h100|nccl|h100-nvlink-island|nvlink|8|allreduce-fw-v1",
- "op": "all_reduce",
- "sku": "h100",
- "runner": "h100-dgxc-slurm_17",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "nodes": null,
- "dtype": "bf16",
- "comparisonClass": null,
- "comparisonKey": "aa6fba4338779d59",
- "measurementContract": "allreduce-fw-v1",
- "avgBusBandwidthGbps": null,
- "status": "valid",
- "valid": true,
- "colorKey": "h100_ae07ad9c",
- "label": "H100 · nccl (fw-AR · ws8)",
- "generatedAt": "2026-06-28T01:47:32.393320+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-28T01:47:32.393320+00:00",
- "sha": null
- },
- "rows": [
- {
- "sizeBytes": 1024,
- "dtype": "bf16",
- "latencyUs": 32.458,
- "algBandwidthGbps": 0.032,
- "busBandwidthGbps": 0.055,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 4096,
- "dtype": "bf16",
- "latencyUs": 30.771,
- "algBandwidthGbps": 0.133,
- "busBandwidthGbps": 0.233,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 16384,
- "dtype": "bf16",
- "latencyUs": 31.116,
- "algBandwidthGbps": 0.527,
- "busBandwidthGbps": 0.921,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "dtype": "bf16",
- "latencyUs": 61.512,
- "algBandwidthGbps": 1.065,
- "busBandwidthGbps": 1.864,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "dtype": "bf16",
- "latencyUs": 30.758,
- "algBandwidthGbps": 8.523,
- "busBandwidthGbps": 14.915,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "dtype": "bf16",
- "latencyUs": 33.86,
- "algBandwidthGbps": 30.968,
- "busBandwidthGbps": 54.194,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "dtype": "bf16",
- "latencyUs": 84.309,
- "algBandwidthGbps": 49.749,
- "busBandwidthGbps": 87.061,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "dtype": "bf16",
- "latencyUs": 118.376,
- "algBandwidthGbps": 141.728,
- "busBandwidthGbps": 248.024,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "dtype": "bf16",
- "latencyUs": 322.062,
- "algBandwidthGbps": 208.372,
- "busBandwidthGbps": 364.652,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- }
- ]
- },
- {
- "id": "cxn-83a9e484",
- "identity": "nccl|h200|all_reduce|h200-nvlink-island|nvlink|8|nccl-tests-v1",
- "op": "all_reduce",
- "sku": "h200",
- "runner": "h200-dgxc-slurm_2",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "worldSize": 8,
- "nodes": 1,
- "dtype": "float",
- "comparisonClass": "standardized",
- "comparisonKey": "9171bd1206f1d15c",
- "measurementContract": "nccl-tests-v1",
- "avgBusBandwidthGbps": 147.096,
- "status": "valid",
- "valid": true,
- "colorKey": "h200_83a9e484",
- "label": "H200 · h200-nvlink-island · nvlink (ws8)",
- "generatedAt": "2026-06-27T11:18:51.255960+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T11:18:51.255960+00:00",
- "sha": null
- },
- "rows": [
- {
- "sizeBytes": 8,
- "dtype": "float",
- "latencyUs": 46.19,
- "algBandwidthGbps": 0,
- "busBandwidthGbps": 0,
- "outOfPlaceUs": 46.19,
- "inPlaceUs": 45.4,
- "correct": true
- },
- {
- "sizeBytes": 16,
- "dtype": "float",
- "latencyUs": 46.3,
- "algBandwidthGbps": 0,
- "busBandwidthGbps": 0,
- "outOfPlaceUs": 46.3,
- "inPlaceUs": 49.15,
- "correct": true
- },
- {
- "sizeBytes": 32,
- "dtype": "float",
- "latencyUs": 43.48,
- "algBandwidthGbps": 0,
- "busBandwidthGbps": 0,
- "outOfPlaceUs": 43.48,
- "inPlaceUs": 41.06,
- "correct": true
- },
- {
- "sizeBytes": 64,
- "dtype": "float",
- "latencyUs": 40.84,
- "algBandwidthGbps": 0,
- "busBandwidthGbps": 0,
- "outOfPlaceUs": 40.84,
- "inPlaceUs": 40.88,
- "correct": true
- },
- {
- "sizeBytes": 128,
- "dtype": "float",
- "latencyUs": 39.89,
- "algBandwidthGbps": 0,
- "busBandwidthGbps": 0.01,
- "outOfPlaceUs": 39.89,
- "inPlaceUs": 43.96,
- "correct": true
- },
- {
- "sizeBytes": 256,
- "dtype": "float",
- "latencyUs": 43.77,
- "algBandwidthGbps": 0.01,
- "busBandwidthGbps": 0.01,
- "outOfPlaceUs": 43.77,
- "inPlaceUs": 44.61,
- "correct": true
- },
- {
- "sizeBytes": 512,
- "dtype": "float",
- "latencyUs": 40.31,
- "algBandwidthGbps": 0.01,
- "busBandwidthGbps": 0.02,
- "outOfPlaceUs": 40.31,
- "inPlaceUs": 41.46,
- "correct": true
- },
- {
- "sizeBytes": 1024,
- "dtype": "float",
- "latencyUs": 40.55,
- "algBandwidthGbps": 0.03,
- "busBandwidthGbps": 0.04,
- "outOfPlaceUs": 40.55,
- "inPlaceUs": 41.64,
- "correct": true
- },
- {
- "sizeBytes": 2048,
- "dtype": "float",
- "latencyUs": 40.83,
- "algBandwidthGbps": 0.05,
- "busBandwidthGbps": 0.09,
- "outOfPlaceUs": 43.27,
- "inPlaceUs": 40.83,
- "correct": true
- },
- {
- "sizeBytes": 4096,
- "dtype": "float",
- "latencyUs": 41.81,
- "algBandwidthGbps": 0.1,
- "busBandwidthGbps": 0.17,
- "outOfPlaceUs": 41.81,
- "inPlaceUs": 41.67,
- "correct": true
- },
- {
- "sizeBytes": 8192,
- "dtype": "float",
- "latencyUs": 41.55,
- "algBandwidthGbps": 0.2,
- "busBandwidthGbps": 0.35,
- "outOfPlaceUs": 41.69,
- "inPlaceUs": 41.55,
- "correct": true
- },
- {
- "sizeBytes": 16384,
- "dtype": "float",
- "latencyUs": 40.78,
- "algBandwidthGbps": 0.4,
- "busBandwidthGbps": 0.7,
- "outOfPlaceUs": 44.62,
- "inPlaceUs": 40.78,
- "correct": true
- },
- {
- "sizeBytes": 32768,
- "dtype": "float",
- "latencyUs": 40.5,
- "algBandwidthGbps": 0.81,
- "busBandwidthGbps": 1.42,
- "outOfPlaceUs": 41.35,
- "inPlaceUs": 40.5,
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "dtype": "float",
- "latencyUs": 41.2,
- "algBandwidthGbps": 1.59,
- "busBandwidthGbps": 2.78,
- "outOfPlaceUs": 45.65,
- "inPlaceUs": 41.2,
- "correct": true
- },
- {
- "sizeBytes": 131072,
- "dtype": "float",
- "latencyUs": 44.7,
- "algBandwidthGbps": 2.93,
- "busBandwidthGbps": 5.13,
- "outOfPlaceUs": 44.7,
- "inPlaceUs": 45.23,
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "dtype": "float",
- "latencyUs": 42.45,
- "algBandwidthGbps": 6.18,
- "busBandwidthGbps": 10.81,
- "outOfPlaceUs": 43.35,
- "inPlaceUs": 42.45,
- "correct": true
- },
- {
- "sizeBytes": 524288,
- "dtype": "float",
- "latencyUs": 45.27,
- "algBandwidthGbps": 11.58,
- "busBandwidthGbps": 20.27,
- "outOfPlaceUs": 45.27,
- "inPlaceUs": 50.92,
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "dtype": "float",
- "latencyUs": 47.39,
- "algBandwidthGbps": 22.13,
- "busBandwidthGbps": 38.72,
- "outOfPlaceUs": 47.39,
- "inPlaceUs": 48.94,
- "correct": true
- },
- {
- "sizeBytes": 2097152,
- "dtype": "float",
- "latencyUs": 45.86,
- "algBandwidthGbps": 45.73,
- "busBandwidthGbps": 80.02,
- "outOfPlaceUs": 48.22,
- "inPlaceUs": 45.86,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "dtype": "float",
- "latencyUs": 54.76,
- "algBandwidthGbps": 76.6,
- "busBandwidthGbps": 134.05,
- "outOfPlaceUs": 54.92,
- "inPlaceUs": 54.76,
- "correct": true
- },
- {
- "sizeBytes": 8388608,
- "dtype": "float",
- "latencyUs": 82.71,
- "algBandwidthGbps": 101.42,
- "busBandwidthGbps": 177.49,
- "outOfPlaceUs": 83.49,
- "inPlaceUs": 82.71,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "dtype": "float",
- "latencyUs": 124.23,
- "algBandwidthGbps": 135.05,
- "busBandwidthGbps": 236.34,
- "outOfPlaceUs": 125.83,
- "inPlaceUs": 124.23,
- "correct": true
- },
- {
- "sizeBytes": 33554432,
- "dtype": "float",
- "latencyUs": 199.57,
- "algBandwidthGbps": 168.13,
- "busBandwidthGbps": 294.23,
- "outOfPlaceUs": 199.57,
- "inPlaceUs": 199.89,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "dtype": "float",
- "latencyUs": 321.61,
- "algBandwidthGbps": 208.67,
- "busBandwidthGbps": 365.16,
- "outOfPlaceUs": 321.61,
- "inPlaceUs": 322.55,
- "correct": true
- },
- {
- "sizeBytes": 134217728,
- "dtype": "float",
- "latencyUs": 571.39,
- "algBandwidthGbps": 234.9,
- "busBandwidthGbps": 411.07,
- "outOfPlaceUs": 573.19,
- "inPlaceUs": 571.39,
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "dtype": "float",
- "latencyUs": 1073.14,
- "algBandwidthGbps": 250.14,
- "busBandwidthGbps": 437.75,
- "outOfPlaceUs": 1073.14,
- "inPlaceUs": 1076.11,
- "correct": true
- },
- {
- "sizeBytes": 536870912,
- "dtype": "float",
- "latencyUs": 2090.9,
- "algBandwidthGbps": 256.77,
- "busBandwidthGbps": 449.34,
- "outOfPlaceUs": 2091.74,
- "inPlaceUs": 2090.9,
- "correct": true
- },
- {
- "sizeBytes": 1073741824,
- "dtype": "float",
- "latencyUs": 4010.65,
- "algBandwidthGbps": 267.72,
- "busBandwidthGbps": 468.51,
- "outOfPlaceUs": 4013.31,
- "inPlaceUs": 4010.65,
- "correct": true
- },
- {
- "sizeBytes": 2147483648,
- "dtype": "float",
- "latencyUs": 7917.63,
- "algBandwidthGbps": 271.23,
- "busBandwidthGbps": 474.65,
- "outOfPlaceUs": 7920.22,
- "inPlaceUs": 7917.63,
- "correct": true
- },
- {
- "sizeBytes": 4294967296,
- "dtype": "float",
- "latencyUs": 15691.2,
- "algBandwidthGbps": 273.72,
- "busBandwidthGbps": 479.01,
- "outOfPlaceUs": 15691.2,
- "inPlaceUs": 15701,
- "correct": true
- },
- {
- "sizeBytes": 8589934592,
- "dtype": "float",
- "latencyUs": 31241,
- "algBandwidthGbps": 274.96,
- "busBandwidthGbps": 481.17,
- "outOfPlaceUs": 31280.5,
- "inPlaceUs": 31241,
- "correct": true
- }
- ]
- },
- {
- "id": "cxn-be6147f8",
- "identity": "nccl|mi355x|all_reduce|mi355x-xgmi|xgmi|8|nccl-tests-v1",
- "op": "all_reduce",
- "sku": "mi355x",
- "runner": "mi355x-amds_01",
- "topologyClass": "mi355x-xgmi",
- "transport": "xgmi",
- "worldSize": 8,
- "nodes": 1,
- "dtype": "float",
- "comparisonClass": "standardized",
- "comparisonKey": "643cf957198f1634",
- "measurementContract": "nccl-tests-v1",
- "avgBusBandwidthGbps": 132.947,
- "status": "valid",
- "valid": true,
- "colorKey": "mi355x_be6147f8",
- "label": "MI355X · mi355x-xgmi · xgmi (ws8)",
- "generatedAt": "2026-06-29T02:37:18.096029+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-29T02:37:18.096029+00:00",
- "sha": null
- },
- "rows": [
- {
- "sizeBytes": 8,
- "dtype": "float",
- "latencyUs": 70.23,
- "algBandwidthGbps": 0,
- "busBandwidthGbps": 0,
- "outOfPlaceUs": 70.23,
- "inPlaceUs": 55.94,
- "correct": true
- },
- {
- "sizeBytes": 16,
- "dtype": "float",
- "latencyUs": 58.71,
- "algBandwidthGbps": 0,
- "busBandwidthGbps": 0,
- "outOfPlaceUs": 58.71,
- "inPlaceUs": 59.19,
- "correct": true
- },
- {
- "sizeBytes": 32,
- "dtype": "float",
- "latencyUs": 58.37,
- "algBandwidthGbps": 0,
- "busBandwidthGbps": 0,
- "outOfPlaceUs": 58.37,
- "inPlaceUs": 58.26,
- "correct": true
- },
- {
- "sizeBytes": 64,
- "dtype": "float",
- "latencyUs": 58.11,
- "algBandwidthGbps": 0,
- "busBandwidthGbps": 0,
- "outOfPlaceUs": 58.11,
- "inPlaceUs": 67.17,
- "correct": true
- },
- {
- "sizeBytes": 128,
- "dtype": "float",
- "latencyUs": 58.88,
- "algBandwidthGbps": 0,
- "busBandwidthGbps": 0,
- "outOfPlaceUs": 58.88,
- "inPlaceUs": 68.44,
- "correct": true
- },
- {
- "sizeBytes": 256,
- "dtype": "float",
- "latencyUs": 57.68,
- "algBandwidthGbps": 0,
- "busBandwidthGbps": 0.01,
- "outOfPlaceUs": 57.68,
- "inPlaceUs": 68.1,
- "correct": true
- },
- {
- "sizeBytes": 512,
- "dtype": "float",
- "latencyUs": 57.2,
- "algBandwidthGbps": 0.01,
- "busBandwidthGbps": 0.02,
- "outOfPlaceUs": 57.2,
- "inPlaceUs": 68.38,
- "correct": true
- },
- {
- "sizeBytes": 1024,
- "dtype": "float",
- "latencyUs": 42.33,
- "algBandwidthGbps": 0.02,
- "busBandwidthGbps": 0.04,
- "outOfPlaceUs": 42.33,
- "inPlaceUs": 56.59,
- "correct": true
- },
- {
- "sizeBytes": 2048,
- "dtype": "float",
- "latencyUs": 56.27,
- "algBandwidthGbps": 0.04,
- "busBandwidthGbps": 0.06,
- "outOfPlaceUs": 56.27,
- "inPlaceUs": 57.42,
- "correct": true
- },
- {
- "sizeBytes": 4096,
- "dtype": "float",
- "latencyUs": 56.73,
- "algBandwidthGbps": 0.07,
- "busBandwidthGbps": 0.13,
- "outOfPlaceUs": 56.73,
- "inPlaceUs": 57.52,
- "correct": true
- },
- {
- "sizeBytes": 8192,
- "dtype": "float",
- "latencyUs": 46.68,
- "algBandwidthGbps": 0.18,
- "busBandwidthGbps": 0.31,
- "outOfPlaceUs": 56.91,
- "inPlaceUs": 46.68,
- "correct": true
- },
- {
- "sizeBytes": 16384,
- "dtype": "float",
- "latencyUs": 56.73,
- "algBandwidthGbps": 0.29,
- "busBandwidthGbps": 0.51,
- "outOfPlaceUs": 56.73,
- "inPlaceUs": 58.19,
- "correct": true
- },
- {
- "sizeBytes": 32768,
- "dtype": "float",
- "latencyUs": 57.45,
- "algBandwidthGbps": 0.57,
- "busBandwidthGbps": 1,
- "outOfPlaceUs": 57.45,
- "inPlaceUs": 58.35,
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "dtype": "float",
- "latencyUs": 48.73,
- "algBandwidthGbps": 1.34,
- "busBandwidthGbps": 2.35,
- "outOfPlaceUs": 57.81,
- "inPlaceUs": 48.73,
- "correct": true
- },
- {
- "sizeBytes": 131072,
- "dtype": "float",
- "latencyUs": 61.94,
- "algBandwidthGbps": 2.12,
- "busBandwidthGbps": 3.7,
- "outOfPlaceUs": 61.94,
- "inPlaceUs": 72.74,
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "dtype": "float",
- "latencyUs": 62.66,
- "algBandwidthGbps": 4.18,
- "busBandwidthGbps": 7.32,
- "outOfPlaceUs": 71.87,
- "inPlaceUs": 62.66,
- "correct": true
- },
- {
- "sizeBytes": 524288,
- "dtype": "float",
- "latencyUs": 72.34,
- "algBandwidthGbps": 7.25,
- "busBandwidthGbps": 12.68,
- "outOfPlaceUs": 73.07,
- "inPlaceUs": 72.34,
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "dtype": "float",
- "latencyUs": 65.42,
- "algBandwidthGbps": 16.03,
- "busBandwidthGbps": 28.05,
- "outOfPlaceUs": 65.42,
- "inPlaceUs": 76.09,
- "correct": true
- },
- {
- "sizeBytes": 2097152,
- "dtype": "float",
- "latencyUs": 72.46,
- "algBandwidthGbps": 28.94,
- "busBandwidthGbps": 50.65,
- "outOfPlaceUs": 73.93,
- "inPlaceUs": 72.46,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "dtype": "float",
- "latencyUs": 62.19,
- "algBandwidthGbps": 67.44,
- "busBandwidthGbps": 118.02,
- "outOfPlaceUs": 63.37,
- "inPlaceUs": 62.19,
- "correct": true
- },
- {
- "sizeBytes": 8388608,
- "dtype": "float",
- "latencyUs": 76.29,
- "algBandwidthGbps": 109.96,
- "busBandwidthGbps": 192.43,
- "outOfPlaceUs": 80.46,
- "inPlaceUs": 76.29,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "dtype": "float",
- "latencyUs": 119,
- "algBandwidthGbps": 141.02,
- "busBandwidthGbps": 246.78,
- "outOfPlaceUs": 119,
- "inPlaceUs": 127.6,
- "correct": true
- },
- {
- "sizeBytes": 33554432,
- "dtype": "float",
- "latencyUs": 183,
- "algBandwidthGbps": 183.36,
- "busBandwidthGbps": 320.89,
- "outOfPlaceUs": 184.4,
- "inPlaceUs": 183,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "dtype": "float",
- "latencyUs": 323.3,
- "algBandwidthGbps": 207.56,
- "busBandwidthGbps": 363.23,
- "outOfPlaceUs": 323.9,
- "inPlaceUs": 323.3,
- "correct": true
- },
- {
- "sizeBytes": 134217728,
- "dtype": "float",
- "latencyUs": 613.1,
- "algBandwidthGbps": 218.91,
- "busBandwidthGbps": 383.09,
- "outOfPlaceUs": 623.4,
- "inPlaceUs": 613.1,
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "dtype": "float",
- "latencyUs": 1191.6,
- "algBandwidthGbps": 225.28,
- "busBandwidthGbps": 394.24,
- "outOfPlaceUs": 1191.6,
- "inPlaceUs": 1192.7,
- "correct": true
- },
- {
- "sizeBytes": 536870912,
- "dtype": "float",
- "latencyUs": 2349.2,
- "algBandwidthGbps": 228.54,
- "busBandwidthGbps": 399.94,
- "outOfPlaceUs": 2349.2,
- "inPlaceUs": 2353.4,
- "correct": true
- },
- {
- "sizeBytes": 1073741824,
- "dtype": "float",
- "latencyUs": 4668.6,
- "algBandwidthGbps": 229.99,
- "busBandwidthGbps": 402.49,
- "outOfPlaceUs": 4668.6,
- "inPlaceUs": 4671.6,
- "correct": true
- },
- {
- "sizeBytes": 2147483648,
- "dtype": "float",
- "latencyUs": 9245.8,
- "algBandwidthGbps": 232.27,
- "busBandwidthGbps": 406.47,
- "outOfPlaceUs": 9245.8,
- "inPlaceUs": 9250.8,
- "correct": true
- },
- {
- "sizeBytes": 4294967296,
- "dtype": "float",
- "latencyUs": 18524,
- "algBandwidthGbps": 231.86,
- "busBandwidthGbps": 405.76,
- "outOfPlaceUs": 18543,
- "inPlaceUs": 18524,
- "correct": true
- },
- {
- "sizeBytes": 8589934592,
- "dtype": "float",
- "latencyUs": 37129,
- "algBandwidthGbps": 231.36,
- "busBandwidthGbps": 404.87,
- "outOfPlaceUs": 37129,
- "inPlaceUs": 37136,
- "correct": true
- }
- ]
- },
- {
- "id": "cxn-300783f6",
- "identity": "allreduce-fw|mi355x|nccl|mi355x-xgmi|xgmi|8|allreduce-fw-v1",
- "op": "all_reduce",
- "sku": "mi355x",
- "runner": "mi355x-amds_02",
- "topologyClass": "mi355x-xgmi",
- "transport": "xgmi",
- "worldSize": 8,
- "nodes": null,
- "dtype": "bf16",
- "comparisonClass": null,
- "comparisonKey": "cffcc3132d487de4",
- "measurementContract": "allreduce-fw-v1",
- "avgBusBandwidthGbps": null,
- "status": "valid",
- "valid": true,
- "colorKey": "mi355x_300783f6",
- "label": "MI355X · nccl (fw-AR · ws8)",
- "generatedAt": "2026-06-28T05:14:21.326557+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-28T05:14:21.326557+00:00",
- "sha": null
- },
- "rows": [
- {
- "sizeBytes": 1024,
- "dtype": "bf16",
- "latencyUs": 43.632,
- "algBandwidthGbps": 0.023,
- "busBandwidthGbps": 0.041,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 4096,
- "dtype": "bf16",
- "latencyUs": 28.193,
- "algBandwidthGbps": 0.145,
- "busBandwidthGbps": 0.254,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 16384,
- "dtype": "bf16",
- "latencyUs": 26.58,
- "algBandwidthGbps": 0.616,
- "busBandwidthGbps": 1.079,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "dtype": "bf16",
- "latencyUs": 26.654,
- "algBandwidthGbps": 2.459,
- "busBandwidthGbps": 4.303,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "dtype": "bf16",
- "latencyUs": 28.918,
- "algBandwidthGbps": 9.065,
- "busBandwidthGbps": 15.864,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "dtype": "bf16",
- "latencyUs": 35.083,
- "algBandwidthGbps": 29.889,
- "busBandwidthGbps": 52.305,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "dtype": "bf16",
- "latencyUs": 56.62,
- "algBandwidthGbps": 74.078,
- "busBandwidthGbps": 129.636,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "dtype": "bf16",
- "latencyUs": 115.37,
- "algBandwidthGbps": 145.42,
- "busBandwidthGbps": 254.486,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "dtype": "bf16",
- "latencyUs": 361.633,
- "algBandwidthGbps": 185.572,
- "busBandwidthGbps": 324.75,
- "outOfPlaceUs": null,
- "inPlaceUs": null,
- "correct": true
- }
- ]
- },
- {
- "id": "cxn-35eb6655",
- "identity": "nccl|mi355x|alltoall|mi355x-xgmi|xgmi|8|nccl-tests-v1",
- "op": "alltoall",
- "sku": "mi355x",
- "runner": "mi355x-amds_01",
- "topologyClass": "mi355x-xgmi",
- "transport": "xgmi",
- "worldSize": 8,
- "nodes": 1,
- "dtype": "float",
- "comparisonClass": "standardized",
- "comparisonKey": "67a9b0532a278ee9",
- "measurementContract": "nccl-tests-v1",
- "avgBusBandwidthGbps": 108.705,
- "status": "valid",
- "valid": true,
- "colorKey": "mi355x_35eb6655",
- "label": "MI355X · mi355x-xgmi · xgmi (ws8)",
- "generatedAt": "2026-06-29T02:42:52.989210+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-29T02:42:52.989210+00:00",
- "sha": null
- },
- "rows": [
- {
- "sizeBytes": 128,
- "dtype": "float",
- "latencyUs": 35.84,
- "algBandwidthGbps": 0,
- "busBandwidthGbps": 0,
- "outOfPlaceUs": 35.84,
- "inPlaceUs": 77.06,
- "correct": true
- },
- {
- "sizeBytes": 256,
- "dtype": "float",
- "latencyUs": 49.55,
- "algBandwidthGbps": 0.01,
- "busBandwidthGbps": 0,
- "outOfPlaceUs": 49.55,
- "inPlaceUs": 72.32,
- "correct": true
- },
- {
- "sizeBytes": 512,
- "dtype": "float",
- "latencyUs": 49.25,
- "algBandwidthGbps": 0.01,
- "busBandwidthGbps": 0.01,
- "outOfPlaceUs": 49.25,
- "inPlaceUs": 82.36,
- "correct": true
- },
- {
- "sizeBytes": 1024,
- "dtype": "float",
- "latencyUs": 38.87,
- "algBandwidthGbps": 0.03,
- "busBandwidthGbps": 0.02,
- "outOfPlaceUs": 38.87,
- "inPlaceUs": 71.67,
- "correct": true
- },
- {
- "sizeBytes": 2048,
- "dtype": "float",
- "latencyUs": 38.2,
- "algBandwidthGbps": 0.05,
- "busBandwidthGbps": 0.05,
- "outOfPlaceUs": 38.2,
- "inPlaceUs": 81.13,
- "correct": true
- },
- {
- "sizeBytes": 4096,
- "dtype": "float",
- "latencyUs": 38.76,
- "algBandwidthGbps": 0.11,
- "busBandwidthGbps": 0.09,
- "outOfPlaceUs": 38.76,
- "inPlaceUs": 71.12,
- "correct": true
- },
- {
- "sizeBytes": 8192,
- "dtype": "float",
- "latencyUs": 48.95,
- "algBandwidthGbps": 0.17,
- "busBandwidthGbps": 0.15,
- "outOfPlaceUs": 48.95,
- "inPlaceUs": 70.91,
- "correct": true
- },
- {
- "sizeBytes": 16384,
- "dtype": "float",
- "latencyUs": 50.41,
- "algBandwidthGbps": 0.32,
- "busBandwidthGbps": 0.28,
- "outOfPlaceUs": 50.41,
- "inPlaceUs": 81.47,
- "correct": true
- },
- {
- "sizeBytes": 32768,
- "dtype": "float",
- "latencyUs": 50.09,
- "algBandwidthGbps": 0.65,
- "busBandwidthGbps": 0.57,
- "outOfPlaceUs": 50.09,
- "inPlaceUs": 71.15,
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "dtype": "float",
- "latencyUs": 49.62,
- "algBandwidthGbps": 1.32,
- "busBandwidthGbps": 1.16,
- "outOfPlaceUs": 49.62,
- "inPlaceUs": 82.19,
- "correct": true
- },
- {
- "sizeBytes": 131072,
- "dtype": "float",
- "latencyUs": 49.18,
- "algBandwidthGbps": 2.67,
- "busBandwidthGbps": 2.33,
- "outOfPlaceUs": 49.18,
- "inPlaceUs": 75.31,
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "dtype": "float",
- "latencyUs": 62.15,
- "algBandwidthGbps": 4.22,
- "busBandwidthGbps": 3.69,
- "outOfPlaceUs": 62.15,
- "inPlaceUs": 80.96,
- "correct": true
- },
- {
- "sizeBytes": 524288,
- "dtype": "float",
- "latencyUs": 62.11,
- "algBandwidthGbps": 8.44,
- "busBandwidthGbps": 7.39,
- "outOfPlaceUs": 62.11,
- "inPlaceUs": 90.5,
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "dtype": "float",
- "latencyUs": 53.07,
- "algBandwidthGbps": 19.76,
- "busBandwidthGbps": 17.29,
- "outOfPlaceUs": 53.07,
- "inPlaceUs": 72.69,
- "correct": true
- },
- {
- "sizeBytes": 2097152,
- "dtype": "float",
- "latencyUs": 63.56,
- "algBandwidthGbps": 32.99,
- "busBandwidthGbps": 28.87,
- "outOfPlaceUs": 63.56,
- "inPlaceUs": 85.67,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "dtype": "float",
- "latencyUs": 77.11,
- "algBandwidthGbps": 54.39,
- "busBandwidthGbps": 47.59,
- "outOfPlaceUs": 77.11,
- "inPlaceUs": 91.21,
- "correct": true
- },
- {
- "sizeBytes": 8388608,
- "dtype": "float",
- "latencyUs": 69.63,
- "algBandwidthGbps": 120.48,
- "busBandwidthGbps": 105.42,
- "outOfPlaceUs": 69.63,
- "inPlaceUs": 83.26,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "dtype": "float",
- "latencyUs": 71.3,
- "algBandwidthGbps": 235.3,
- "busBandwidthGbps": 205.89,
- "outOfPlaceUs": 71.3,
- "inPlaceUs": 93.33,
- "correct": true
- },
- {
- "sizeBytes": 33554432,
- "dtype": "float",
- "latencyUs": 104.3,
- "algBandwidthGbps": 321.79,
- "busBandwidthGbps": 281.57,
- "outOfPlaceUs": 104.3,
- "inPlaceUs": 108.6,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "dtype": "float",
- "latencyUs": 195.8,
- "algBandwidthGbps": 342.66,
- "busBandwidthGbps": 299.83,
- "outOfPlaceUs": 195.8,
- "inPlaceUs": 196.2,
- "correct": true
- },
- {
- "sizeBytes": 134217728,
- "dtype": "float",
- "latencyUs": 365.1,
- "algBandwidthGbps": 367.59,
- "busBandwidthGbps": 321.64,
- "outOfPlaceUs": 365.1,
- "inPlaceUs": 365.9,
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "dtype": "float",
- "latencyUs": 697.9,
- "algBandwidthGbps": 384.61,
- "busBandwidthGbps": 336.54,
- "outOfPlaceUs": 698.5,
- "inPlaceUs": 697.9,
- "correct": true
- },
- {
- "sizeBytes": 536870912,
- "dtype": "float",
- "latencyUs": 1353.3,
- "algBandwidthGbps": 396.7,
- "busBandwidthGbps": 347.11,
- "outOfPlaceUs": 1353.3,
- "inPlaceUs": 1355.9,
- "correct": true
- },
- {
- "sizeBytes": 1073741824,
- "dtype": "float",
- "latencyUs": 2675.6,
- "algBandwidthGbps": 401.32,
- "busBandwidthGbps": 351.15,
- "outOfPlaceUs": 2675.6,
- "inPlaceUs": 2679,
- "correct": true
- },
- {
- "sizeBytes": 2147483648,
- "dtype": "float",
- "latencyUs": 5296.7,
- "algBandwidthGbps": 405.43,
- "busBandwidthGbps": 354.76,
- "outOfPlaceUs": 5301,
- "inPlaceUs": 5296.7,
- "correct": true
- },
- {
- "sizeBytes": 4294967296,
- "dtype": "float",
- "latencyUs": 10543,
- "algBandwidthGbps": 407.38,
- "busBandwidthGbps": 356.46,
- "outOfPlaceUs": 10543,
- "inPlaceUs": 10668,
- "correct": true
- },
- {
- "sizeBytes": 8589934592,
- "dtype": "float",
- "latencyUs": 21021,
- "algBandwidthGbps": 408.63,
- "busBandwidthGbps": 357.55,
- "outOfPlaceUs": 21021,
- "inPlaceUs": 21415,
- "correct": true
- }
- ]
- },
- {
- "id": "cxn-9383336f",
- "identity": "nccl|mi355x|reduce_scatter|mi355x-xgmi|xgmi|8|nccl-tests-v1",
- "op": "reduce_scatter",
- "sku": "mi355x",
- "runner": "mi355x-amds_01",
- "topologyClass": "mi355x-xgmi",
- "transport": "xgmi",
- "worldSize": 8,
- "nodes": 1,
- "dtype": "float",
- "comparisonClass": "standardized",
- "comparisonKey": "fd5d1a361a3ebfa3",
- "measurementContract": "nccl-tests-v1",
- "avgBusBandwidthGbps": 116.588,
- "status": "valid",
- "valid": true,
- "colorKey": "mi355x_9383336f",
- "label": "MI355X · mi355x-xgmi · xgmi (ws8)",
- "generatedAt": "2026-06-29T02:40:54.838353+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-29T02:40:54.838353+00:00",
- "sha": null
- },
- "rows": [
- {
- "sizeBytes": 128,
- "dtype": "float",
- "latencyUs": 63.68,
- "algBandwidthGbps": 0,
- "busBandwidthGbps": 0,
- "outOfPlaceUs": 63.68,
- "inPlaceUs": 71.14,
- "correct": true
- },
- {
- "sizeBytes": 256,
- "dtype": "float",
- "latencyUs": 58.12,
- "algBandwidthGbps": 0,
- "busBandwidthGbps": 0,
- "outOfPlaceUs": 58.12,
- "inPlaceUs": 69.38,
- "correct": true
- },
- {
- "sizeBytes": 512,
- "dtype": "float",
- "latencyUs": 59.18,
- "algBandwidthGbps": 0.01,
- "busBandwidthGbps": 0.01,
- "outOfPlaceUs": 59.18,
- "inPlaceUs": 70.39,
- "correct": true
- },
- {
- "sizeBytes": 1024,
- "dtype": "float",
- "latencyUs": 58.61,
- "algBandwidthGbps": 0.02,
- "busBandwidthGbps": 0.02,
- "outOfPlaceUs": 58.61,
- "inPlaceUs": 59.64,
- "correct": true
- },
- {
- "sizeBytes": 2048,
- "dtype": "float",
- "latencyUs": 58.93,
- "algBandwidthGbps": 0.03,
- "busBandwidthGbps": 0.03,
- "outOfPlaceUs": 58.93,
- "inPlaceUs": 60.04,
- "correct": true
- },
- {
- "sizeBytes": 4096,
- "dtype": "float",
- "latencyUs": 59.87,
- "algBandwidthGbps": 0.07,
- "busBandwidthGbps": 0.06,
- "outOfPlaceUs": 59.87,
- "inPlaceUs": 59.13,
- "correct": true
- },
- {
- "sizeBytes": 8192,
- "dtype": "float",
- "latencyUs": 59.51,
- "algBandwidthGbps": 0.14,
- "busBandwidthGbps": 0.12,
- "outOfPlaceUs": 68.98,
- "inPlaceUs": 59.51,
- "correct": true
- },
- {
- "sizeBytes": 16384,
- "dtype": "float",
- "latencyUs": 59.64,
- "algBandwidthGbps": 0.27,
- "busBandwidthGbps": 0.24,
- "outOfPlaceUs": 69.54,
- "inPlaceUs": 59.64,
- "correct": true
- },
- {
- "sizeBytes": 32768,
- "dtype": "float",
- "latencyUs": 59.88,
- "algBandwidthGbps": 0.55,
- "busBandwidthGbps": 0.48,
- "outOfPlaceUs": 70.63,
- "inPlaceUs": 59.88,
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "dtype": "float",
- "latencyUs": 61.5,
- "algBandwidthGbps": 1.07,
- "busBandwidthGbps": 0.93,
- "outOfPlaceUs": 72.73,
- "inPlaceUs": 61.5,
- "correct": true
- },
- {
- "sizeBytes": 131072,
- "dtype": "float",
- "latencyUs": 65.15,
- "algBandwidthGbps": 2.01,
- "busBandwidthGbps": 1.76,
- "outOfPlaceUs": 74.45,
- "inPlaceUs": 65.15,
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "dtype": "float",
- "latencyUs": 69.72,
- "algBandwidthGbps": 3.76,
- "busBandwidthGbps": 3.29,
- "outOfPlaceUs": 70.1,
- "inPlaceUs": 69.72,
- "correct": true
- },
- {
- "sizeBytes": 524288,
- "dtype": "float",
- "latencyUs": 68.53,
- "algBandwidthGbps": 7.65,
- "busBandwidthGbps": 6.69,
- "outOfPlaceUs": 68.53,
- "inPlaceUs": 68.85,
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "dtype": "float",
- "latencyUs": 72,
- "algBandwidthGbps": 14.56,
- "busBandwidthGbps": 12.74,
- "outOfPlaceUs": 72,
- "inPlaceUs": 83.69,
- "correct": true
- },
- {
- "sizeBytes": 2097152,
- "dtype": "float",
- "latencyUs": 76.95,
- "algBandwidthGbps": 27.25,
- "busBandwidthGbps": 23.85,
- "outOfPlaceUs": 76.95,
- "inPlaceUs": 86.59,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "dtype": "float",
- "latencyUs": 76.39,
- "algBandwidthGbps": 54.91,
- "busBandwidthGbps": 48.04,
- "outOfPlaceUs": 76.39,
- "inPlaceUs": 87.44,
- "correct": true
- },
- {
- "sizeBytes": 8388608,
- "dtype": "float",
- "latencyUs": 78.25,
- "algBandwidthGbps": 107.21,
- "busBandwidthGbps": 93.8,
- "outOfPlaceUs": 78.25,
- "inPlaceUs": 89.31,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "dtype": "float",
- "latencyUs": 82.07,
- "algBandwidthGbps": 204.42,
- "busBandwidthGbps": 178.87,
- "outOfPlaceUs": 82.07,
- "inPlaceUs": 86.71,
- "correct": true
- },
- {
- "sizeBytes": 33554432,
- "dtype": "float",
- "latencyUs": 115.6,
- "algBandwidthGbps": 290.35,
- "busBandwidthGbps": 254.06,
- "outOfPlaceUs": 117.1,
- "inPlaceUs": 115.6,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "dtype": "float",
- "latencyUs": 187.1,
- "algBandwidthGbps": 358.71,
- "busBandwidthGbps": 313.88,
- "outOfPlaceUs": 192.5,
- "inPlaceUs": 187.1,
- "correct": true
- },
- {
- "sizeBytes": 134217728,
- "dtype": "float",
- "latencyUs": 323.5,
- "algBandwidthGbps": 414.87,
- "busBandwidthGbps": 363.02,
- "outOfPlaceUs": 342.8,
- "inPlaceUs": 323.5,
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "dtype": "float",
- "latencyUs": 604.1,
- "algBandwidthGbps": 444.38,
- "busBandwidthGbps": 388.83,
- "outOfPlaceUs": 658.6,
- "inPlaceUs": 604.1,
- "correct": true
- },
- {
- "sizeBytes": 536870912,
- "dtype": "float",
- "latencyUs": 1173.6,
- "algBandwidthGbps": 457.48,
- "busBandwidthGbps": 400.29,
- "outOfPlaceUs": 1259.9,
- "inPlaceUs": 1173.6,
- "correct": true
- },
- {
- "sizeBytes": 1073741824,
- "dtype": "float",
- "latencyUs": 2311.8,
- "algBandwidthGbps": 464.46,
- "busBandwidthGbps": 406.4,
- "outOfPlaceUs": 2481,
- "inPlaceUs": 2311.8,
- "correct": true
- },
- {
- "sizeBytes": 2147483648,
- "dtype": "float",
- "latencyUs": 4567.5,
- "algBandwidthGbps": 470.17,
- "busBandwidthGbps": 411.4,
- "outOfPlaceUs": 4853.2,
- "inPlaceUs": 4567.5,
- "correct": true
- },
- {
- "sizeBytes": 4294967296,
- "dtype": "float",
- "latencyUs": 9220.5,
- "algBandwidthGbps": 465.81,
- "busBandwidthGbps": 407.58,
- "outOfPlaceUs": 9610.1,
- "inPlaceUs": 9220.5,
- "correct": true
- },
- {
- "sizeBytes": 8589934592,
- "dtype": "float",
- "latencyUs": 19037,
- "algBandwidthGbps": 451.22,
- "busBandwidthGbps": 394.82,
- "outOfPlaceUs": 19087,
- "inPlaceUs": 19037,
- "correct": true
- }
- ]
- }
- ],
- "offload": [
- {
- "id": "cxt-2254035a",
- "identity": "offload|b300|b300-nvlink-island|nvlink|d2h|pageable|us",
- "cohortIdentity": "offload|b300|b300-nvlink-island|nvlink",
- "family": "offload",
- "sku": "b300",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "operation": "d2h",
- "subtype": "pageable",
- "valid": true,
- "status": "valid",
- "note": "peak 58 GB/s · copy/compute overlap 26% · 8 NUMA node(s)",
- "peakBandwidthGbps": 57.71,
- "latencyUnit": "us",
- "colorKey": "b300_2254035a",
- "label": "B300 · d2h · pageable",
- "generatedAt": "2026-06-27T13:14:13.476946+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:13.476946+00:00",
- "sha": null
- },
- "rows": [
- {
- "sizeBytes": 4096,
- "bandwidthGbps": 0.319,
- "latency": 12.8224,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 1.197,
- "latency": 13.6896,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 4.07,
- "latency": 16.1008,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 10.171,
- "latency": 25.7744,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 16.232,
- "latency": 64.5984,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 22.845,
- "latency": 183.6016,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 25.057,
- "latency": 669.5584,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 25.741,
- "latency": 2607.0801,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 25.884,
- "latency": 10370.5231,
- "sizeClass": null,
- "correct": null
- }
- ]
- },
- {
- "id": "cxt-ec9c695d",
- "identity": "offload|b300|b300-nvlink-island|nvlink|d2h|pinned|us",
- "cohortIdentity": "offload|b300|b300-nvlink-island|nvlink",
- "family": "offload",
- "sku": "b300",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "operation": "d2h",
- "subtype": "pinned",
- "valid": true,
- "status": "valid",
- "note": "peak 58 GB/s · copy/compute overlap 26% · 8 NUMA node(s)",
- "peakBandwidthGbps": 57.71,
- "latencyUnit": "us",
- "colorKey": "b300_ec9c695d",
- "label": "B300 · d2h · pinned",
- "generatedAt": "2026-06-27T13:14:13.476946+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:13.476946+00:00",
- "sha": null
- },
- "rows": [
- {
- "sizeBytes": 4096,
- "bandwidthGbps": 1.314,
- "latency": 3.1168,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 4.911,
- "latency": 3.336,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 16.26,
- "latency": 4.0304,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 35.371,
- "latency": 7.4112,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 49.656,
- "latency": 21.1168,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 55.179,
- "latency": 76.0128,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 56.698,
- "latency": 295.9056,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 57.243,
- "latency": 1172.3568,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 57.376,
- "latency": 4678.5118,
- "sizeClass": null,
- "correct": null
- }
- ]
- },
- {
- "id": "cxt-0325201a",
- "identity": "offload|b300|b300-nvlink-island|nvlink|h2d|pageable|us",
- "cohortIdentity": "offload|b300|b300-nvlink-island|nvlink",
- "family": "offload",
- "sku": "b300",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "operation": "h2d",
- "subtype": "pageable",
- "valid": true,
- "status": "valid",
- "note": "peak 58 GB/s · copy/compute overlap 26% · 8 NUMA node(s)",
- "peakBandwidthGbps": 57.71,
- "latencyUnit": "us",
- "colorKey": "b300_0325201a",
- "label": "B300 · h2d · pageable",
- "generatedAt": "2026-06-27T13:14:13.476946+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:13.476946+00:00",
- "sha": null
- },
- "rows": [
- {
- "sizeBytes": 4096,
- "bandwidthGbps": 0.48,
- "latency": 8.5408,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 1.73,
- "latency": 9.4704,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 4.35,
- "latency": 15.0656,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 11.573,
- "latency": 22.6512,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 19.272,
- "latency": 54.408,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 32.974,
- "latency": 127.2,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 38.009,
- "latency": 441.4016,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 39.678,
- "latency": 1691.3168,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 40.13,
- "latency": 6689.2288,
- "sizeClass": null,
- "correct": null
- }
- ]
- },
- {
- "id": "cxt-6112e71d",
- "identity": "offload|b300|b300-nvlink-island|nvlink|h2d|pinned|us",
- "cohortIdentity": "offload|b300|b300-nvlink-island|nvlink",
- "family": "offload",
- "sku": "b300",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "operation": "h2d",
- "subtype": "pinned",
- "valid": true,
- "status": "valid",
- "note": "peak 58 GB/s · copy/compute overlap 26% · 8 NUMA node(s)",
- "peakBandwidthGbps": 57.71,
- "latencyUnit": "us",
- "colorKey": "b300_6112e71d",
- "label": "B300 · h2d · pinned",
- "generatedAt": "2026-06-27T13:14:13.476946+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:13.476946+00:00",
- "sha": null
- },
- "rows": [
- {
- "sizeBytes": 4096,
- "bandwidthGbps": 1.204,
- "latency": 3.4032,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 4.481,
- "latency": 3.656,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 15.087,
- "latency": 4.344,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 32.966,
- "latency": 7.952,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 49.231,
- "latency": 21.2992,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 55.149,
- "latency": 76.0544,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 57.026,
- "latency": 294.2016,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 57.572,
- "latency": 1165.6432,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 57.71,
- "latency": 4651.4656,
- "sizeClass": null,
- "correct": null
- }
- ]
- },
- {
- "id": "cxt-cdb189fe",
- "identity": "offload|h100|h100-nvlink-island|nvlink|d2h|pageable|us",
- "cohortIdentity": "offload|h100|h100-nvlink-island|nvlink",
- "family": "offload",
- "sku": "h100",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "operation": "d2h",
- "subtype": "pageable",
- "valid": true,
- "status": "valid",
- "note": "peak 55 GB/s · copy/compute overlap 55% · 2 NUMA node(s)",
- "peakBandwidthGbps": 55.199,
- "latencyUnit": "us",
- "colorKey": "h100_cdb189fe",
- "label": "H100 · d2h · pageable",
- "generatedAt": "2026-06-27T13:13:55.178101+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:13:55.178101+00:00",
- "sha": null
- },
- "rows": [
- {
- "sizeBytes": 4096,
- "bandwidthGbps": 0.291,
- "latency": 14.0992,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 1.049,
- "latency": 15.6128,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 2.957,
- "latency": 22.1648,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 6.642,
- "latency": 39.4656,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 9.239,
- "latency": 113.4928,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 12.413,
- "latency": 337.9072,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 13.847,
- "latency": 1211.6448,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 14.279,
- "latency": 4699.8737,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 14.534,
- "latency": 18469.5724,
- "sizeClass": null,
- "correct": null
- }
- ]
- },
- {
- "id": "cxt-0606c0a1",
- "identity": "offload|h100|h100-nvlink-island|nvlink|d2h|pinned|us",
- "cohortIdentity": "offload|h100|h100-nvlink-island|nvlink",
- "family": "offload",
- "sku": "h100",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "operation": "d2h",
- "subtype": "pinned",
- "valid": true,
- "status": "valid",
- "note": "peak 55 GB/s · copy/compute overlap 55% · 2 NUMA node(s)",
- "peakBandwidthGbps": 55.199,
- "latencyUnit": "us",
- "colorKey": "h100_0606c0a1",
- "label": "H100 · d2h · pinned",
- "generatedAt": "2026-06-27T13:13:55.178101+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:13:55.178101+00:00",
- "sha": null
- },
- "rows": [
- {
- "sizeBytes": 4096,
- "bandwidthGbps": 0.862,
- "latency": 4.7504,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 3.537,
- "latency": 4.632,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 13.999,
- "latency": 4.6816,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 34.756,
- "latency": 7.5424,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 46.25,
- "latency": 22.672,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 52.69,
- "latency": 79.6032,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 54.319,
- "latency": 308.8672,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 54.768,
- "latency": 1225.3216,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 54.899,
- "latency": 4889.6255,
- "sizeClass": null,
- "correct": null
- }
- ]
- },
- {
- "id": "cxt-6119c3de",
- "identity": "offload|h100|h100-nvlink-island|nvlink|h2d|pageable|us",
- "cohortIdentity": "offload|h100|h100-nvlink-island|nvlink",
- "family": "offload",
- "sku": "h100",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "operation": "h2d",
- "subtype": "pageable",
- "valid": true,
- "status": "valid",
- "note": "peak 55 GB/s · copy/compute overlap 55% · 2 NUMA node(s)",
- "peakBandwidthGbps": 55.199,
- "latencyUnit": "us",
- "colorKey": "h100_6119c3de",
- "label": "H100 · h2d · pageable",
- "generatedAt": "2026-06-27T13:13:55.178101+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:13:55.178101+00:00",
- "sha": null
+ "phase": "decode",
+ "routing": "zipf-mild",
+ "T": 16,
+ "p50_amplification": 0.975,
+ "p99_amplification": 0.916
},
- "rows": [
- {
- "sizeBytes": 4096,
- "bandwidthGbps": 0.367,
- "latency": 11.168,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 1.141,
- "latency": 14.3536,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 2.491,
- "latency": 26.3136,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 8.311,
- "latency": 31.5408,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 12.373,
- "latency": 84.7456,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 16.967,
- "latency": 247.208,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 18.73,
- "latency": 895.7264,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 19.219,
- "latency": 3491.8175,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 19.362,
- "latency": 13864.0869,
- "sizeClass": null,
- "correct": null
- }
- ]
- },
- {
- "id": "cxt-60747181",
- "identity": "offload|h100|h100-nvlink-island|nvlink|h2d|pinned|us",
- "cohortIdentity": "offload|h100|h100-nvlink-island|nvlink",
- "family": "offload",
- "sku": "h100",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "operation": "h2d",
- "subtype": "pinned",
- "valid": true,
- "status": "valid",
- "note": "peak 55 GB/s · copy/compute overlap 55% · 2 NUMA node(s)",
- "peakBandwidthGbps": 55.199,
- "latencyUnit": "us",
- "colorKey": "h100_60747181",
- "label": "H100 · h2d · pinned",
- "generatedAt": "2026-06-27T13:13:55.178101+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:13:55.178101+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf-mild",
+ "T": 32,
+ "p50_amplification": 0.996,
+ "p99_amplification": 1.07
},
- "rows": [
- {
- "sizeBytes": 4096,
- "bandwidthGbps": 0.64,
- "latency": 6.3984,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 3.19,
- "latency": 5.136,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 12.693,
- "latency": 5.1632,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 33.464,
- "latency": 7.8336,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 47.39,
- "latency": 22.1264,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 52.967,
- "latency": 79.1872,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 54.546,
- "latency": 307.5808,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 55.077,
- "latency": 1218.4512,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 55.199,
- "latency": 4863.0142,
- "sizeClass": null,
- "correct": null
- }
- ]
- },
- {
- "id": "cxt-5472a2f0",
- "identity": "offload|h200|h200-nvlink-island|nvlink|d2h|pageable|us",
- "cohortIdentity": "offload|h200|h200-nvlink-island|nvlink",
- "family": "offload",
- "sku": "h200",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "operation": "d2h",
- "subtype": "pageable",
- "valid": true,
- "status": "valid",
- "note": "peak 55 GB/s · copy/compute overlap 0% · 2 NUMA node(s)",
- "peakBandwidthGbps": 54.738,
- "latencyUnit": "us",
- "colorKey": "h200_5472a2f0",
- "label": "H200 · d2h · pageable",
- "generatedAt": "2026-06-27T13:14:28.000433+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:28.000433+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf-mild",
+ "T": 64,
+ "p50_amplification": 0.993,
+ "p99_amplification": 0.994
},
- "rows": [
- {
- "sizeBytes": 4096,
- "bandwidthGbps": 0.349,
- "latency": 11.7232,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 1.05,
- "latency": 15.5984,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 3.61,
- "latency": 18.1552,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 6.584,
- "latency": 39.8176,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 8.775,
- "latency": 119.4976,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 13.542,
- "latency": 309.7312,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 15.692,
- "latency": 1069.1856,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 15.898,
- "latency": 4221.0976,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 16.284,
- "latency": 16484.2148,
- "sizeClass": null,
- "correct": null
- }
- ]
- },
- {
- "id": "cxt-a653b433",
- "identity": "offload|h200|h200-nvlink-island|nvlink|d2h|pinned|us",
- "cohortIdentity": "offload|h200|h200-nvlink-island|nvlink",
- "family": "offload",
- "sku": "h200",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "operation": "d2h",
- "subtype": "pinned",
- "valid": true,
- "status": "valid",
- "note": "peak 55 GB/s · copy/compute overlap 0% · 2 NUMA node(s)",
- "peakBandwidthGbps": 54.738,
- "latencyUnit": "us",
- "colorKey": "h200_a653b433",
- "label": "H200 · d2h · pinned",
- "generatedAt": "2026-06-27T13:14:28.000433+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:28.000433+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf-mild",
+ "T": 128,
+ "p50_amplification": 1.012,
+ "p99_amplification": 1.005
},
- "rows": [
- {
- "sizeBytes": 4096,
- "bandwidthGbps": 1.066,
- "latency": 3.8416,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 4.38,
- "latency": 3.7408,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 16.019,
- "latency": 4.0912,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 36.28,
- "latency": 7.2256,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 46.925,
- "latency": 22.3456,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 50.673,
- "latency": 82.7712,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 53.181,
- "latency": 315.4752,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 53.519,
- "latency": 1253.9344,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 51.961,
- "latency": 5166.0847,
- "sizeClass": null,
- "correct": null
- }
- ]
- },
- {
- "id": "cxt-88606cb0",
- "identity": "offload|h200|h200-nvlink-island|nvlink|h2d|pageable|us",
- "cohortIdentity": "offload|h200|h200-nvlink-island|nvlink",
- "family": "offload",
- "sku": "h200",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "operation": "h2d",
- "subtype": "pageable",
- "valid": true,
- "status": "valid",
- "note": "peak 55 GB/s · copy/compute overlap 0% · 2 NUMA node(s)",
- "peakBandwidthGbps": 54.738,
- "latencyUnit": "us",
- "colorKey": "h200_88606cb0",
- "label": "H200 · h2d · pageable",
- "generatedAt": "2026-06-27T13:14:28.000433+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:28.000433+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf-mild+eplb",
+ "T": 1,
+ "p50_amplification": 1.035,
+ "p99_amplification": 0.851
},
- "rows": [
- {
- "sizeBytes": 4096,
- "bandwidthGbps": 0.408,
- "latency": 10.048,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 1.057,
- "latency": 15.5072,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 2.766,
- "latency": 23.6976,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 9.51,
- "latency": 27.5664,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 13.367,
- "latency": 78.4464,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 18.167,
- "latency": 230.8736,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 20.785,
- "latency": 807.1696,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 21.442,
- "latency": 3129.8529,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 21.303,
- "latency": 12600.544,
- "sizeClass": null,
- "correct": null
- }
- ]
- },
- {
- "id": "cxt-543138f3",
- "identity": "offload|h200|h200-nvlink-island|nvlink|h2d|pinned|us",
- "cohortIdentity": "offload|h200|h200-nvlink-island|nvlink",
- "family": "offload",
- "sku": "h200",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "operation": "h2d",
- "subtype": "pinned",
- "valid": true,
- "status": "valid",
- "note": "peak 55 GB/s · copy/compute overlap 0% · 2 NUMA node(s)",
- "peakBandwidthGbps": 54.738,
- "latencyUnit": "us",
- "colorKey": "h200_543138f3",
- "label": "H200 · h2d · pinned",
- "generatedAt": "2026-06-27T13:14:28.000433+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:28.000433+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf-mild+eplb",
+ "T": 2,
+ "p50_amplification": 1.04,
+ "p99_amplification": 1.09
},
- "rows": [
- {
- "sizeBytes": 4096,
- "bandwidthGbps": 0.882,
- "latency": 4.6464,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 4.6,
- "latency": 3.5616,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 16.773,
- "latency": 3.9072,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 35.219,
- "latency": 7.4432,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 47.742,
- "latency": 21.9632,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 52.79,
- "latency": 79.4528,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 54.738,
- "latency": 306.4976,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 53.864,
- "latency": 1245.9056,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 54.328,
- "latency": 4940.9775,
- "sizeClass": null,
- "correct": null
- }
- ]
- }
- ],
- "copyEngine": [
- {
- "id": "cxt-6e3131b7",
- "identity": "copy-engine|b300|b300-nvlink-island|nvlink|dtod|copy-engine|us",
- "cohortIdentity": "copy-engine|b300|b300-nvlink-island|nvlink",
- "family": "copy-engine",
- "sku": "b300",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "operation": "dtod",
- "subtype": "copy-engine",
- "valid": true,
- "status": "valid",
- "note": "peak 33743 GB/s · copy-engine uses near-zero SMs: no",
- "peakBandwidthGbps": 33743.395,
- "latencyUnit": "us",
- "colorKey": "b300_6e3131b7",
- "label": "B300 · dtod · copy-engine",
- "generatedAt": "2026-06-27T13:14:14.567612+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:14.567612+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf-mild+eplb",
+ "T": 4,
+ "p50_amplification": 1.022,
+ "p99_amplification": 1.054
},
- "rows": [
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 7.729,
- "latency": 8.4789,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 31.851,
- "latency": 8.2304,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 131.475,
- "latency": 7.9755,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 506.069,
- "latency": 8.288,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 2092.131,
- "latency": 8.0192,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 8232.735,
- "latency": 8.1515,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 33743.395,
- "latency": 7.9552,
- "sizeClass": null,
- "correct": null
- }
- ]
- },
- {
- "id": "cxt-214329f7",
- "identity": "copy-engine|b300|b300-nvlink-island|nvlink|dtod|sm|us",
- "cohortIdentity": "copy-engine|b300|b300-nvlink-island|nvlink",
- "family": "copy-engine",
- "sku": "b300",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "operation": "dtod",
- "subtype": "sm",
- "valid": true,
- "status": "valid",
- "note": "peak 33743 GB/s · copy-engine uses near-zero SMs: no",
- "peakBandwidthGbps": 33743.395,
- "latencyUnit": "us",
- "colorKey": "b300_214329f7",
- "label": "B300 · dtod · sm",
- "generatedAt": "2026-06-27T13:14:14.567612+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:14.567612+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf-mild+eplb",
+ "T": 8,
+ "p50_amplification": 1.035,
+ "p99_amplification": 1.032
},
- "rows": [
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 7.772,
- "latency": 8.432,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 31.011,
- "latency": 8.4533,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 127.139,
- "latency": 8.2475,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 515.355,
- "latency": 8.1387,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 2004.925,
- "latency": 8.368,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 8245.683,
- "latency": 8.1387,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 32844.98,
- "latency": 8.1728,
- "sizeClass": null,
- "correct": null
- }
- ]
- },
- {
- "id": "cxt-64e7ea33",
- "identity": "copy-engine|b300|b300-nvlink-island|nvlink|htod|copy-engine|us",
- "cohortIdentity": "copy-engine|b300|b300-nvlink-island|nvlink",
- "family": "copy-engine",
- "sku": "b300",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "operation": "htod",
- "subtype": "copy-engine",
- "valid": true,
- "status": "valid",
- "note": "peak 33743 GB/s · copy-engine uses near-zero SMs: no",
- "peakBandwidthGbps": 33743.395,
- "latencyUnit": "us",
- "colorKey": "b300_64e7ea33",
- "label": "B300 · htod · copy-engine",
- "generatedAt": "2026-06-27T13:14:14.567612+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:14.567612+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf-mild+eplb",
+ "T": 16,
+ "p50_amplification": 1.041,
+ "p99_amplification": 1.026
},
- "rows": [
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 8.922,
- "latency": 7.3451,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 35.159,
- "latency": 7.456,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 139.617,
- "latency": 7.5104,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 525.479,
- "latency": 7.9819,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 2004.925,
- "latency": 8.368,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 8211.245,
- "latency": 8.1728,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 32556.046,
- "latency": 8.2453,
- "sizeClass": null,
- "correct": null
- }
- ]
- },
- {
- "id": "cxt-4b3f523b",
- "identity": "copy-engine|b300|b300-nvlink-island|nvlink|htod|sm|us",
- "cohortIdentity": "copy-engine|b300|b300-nvlink-island|nvlink",
- "family": "copy-engine",
- "sku": "b300",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "operation": "htod",
- "subtype": "sm",
- "valid": true,
- "status": "valid",
- "note": "peak 33743 GB/s · copy-engine uses near-zero SMs: no",
- "peakBandwidthGbps": 33743.395,
- "latencyUnit": "us",
- "colorKey": "b300_4b3f523b",
- "label": "B300 · htod · sm",
- "generatedAt": "2026-06-27T13:14:14.567612+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:14.567612+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf-mild+eplb",
+ "T": 32,
+ "p50_amplification": 1.043,
+ "p99_amplification": 1.015
},
- "rows": [
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 7.918,
- "latency": 8.2773,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 31.703,
- "latency": 8.2688,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 127.9,
- "latency": 8.1984,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 509.743,
- "latency": 8.2283,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 2022.716,
- "latency": 8.2944,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 8166.48,
- "latency": 8.2176,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 32413.478,
- "latency": 8.2816,
- "sizeClass": null,
- "correct": null
- }
- ]
- },
- {
- "id": "cxt-ff8a9f33",
- "identity": "copy-engine|h100|h100-nvlink-island|nvlink|dtod|copy-engine|us",
- "cohortIdentity": "copy-engine|h100|h100-nvlink-island|nvlink",
- "family": "copy-engine",
- "sku": "h100",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "operation": "dtod",
- "subtype": "copy-engine",
- "valid": true,
- "status": "valid",
- "note": "peak 18919 GB/s · copy-engine uses near-zero SMs: no",
- "peakBandwidthGbps": 18918.827,
- "latencyUnit": "us",
- "colorKey": "h100_ff8a9f33",
- "label": "H100 · dtod · copy-engine",
- "generatedAt": "2026-06-27T13:14:03.281164+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:03.281164+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf-mild+eplb",
+ "T": 64,
+ "p50_amplification": 1.021,
+ "p99_amplification": 1.052
},
- "rows": [
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 4.208,
- "latency": 15.5744,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 17.496,
- "latency": 14.9835,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 70.967,
- "latency": 14.7755,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 283.154,
- "latency": 14.8128,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 1120.673,
- "latency": 14.9707,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 4411.651,
- "latency": 15.2117,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 17557.959,
- "latency": 15.2885,
- "sizeClass": null,
- "correct": null
- }
- ]
- },
- {
- "id": "cxt-b4f7093b",
- "identity": "copy-engine|h100|h100-nvlink-island|nvlink|dtod|sm|us",
- "cohortIdentity": "copy-engine|h100|h100-nvlink-island|nvlink",
- "family": "copy-engine",
- "sku": "h100",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "operation": "dtod",
- "subtype": "sm",
- "valid": true,
- "status": "valid",
- "note": "peak 18919 GB/s · copy-engine uses near-zero SMs: no",
- "peakBandwidthGbps": 18918.827,
- "latencyUnit": "us",
- "colorKey": "h100_b4f7093b",
- "label": "H100 · dtod · sm",
- "generatedAt": "2026-06-27T13:14:03.281164+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:03.281164+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf-mild+eplb",
+ "T": 128,
+ "p50_amplification": 1.017,
+ "p99_amplification": 1.017
},
- "rows": [
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 4.26,
- "latency": 15.3856,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 17.366,
- "latency": 15.0955,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 68.961,
- "latency": 15.2053,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 274.803,
- "latency": 15.2629,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 1060.954,
- "latency": 15.8133,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 4300.674,
- "latency": 15.6043,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 17342.584,
- "latency": 15.4784,
- "sizeClass": null,
- "correct": null
- }
- ]
- },
- {
- "id": "cxt-08d3e6b7",
- "identity": "copy-engine|h100|h100-nvlink-island|nvlink|htod|copy-engine|us",
- "cohortIdentity": "copy-engine|h100|h100-nvlink-island|nvlink",
- "family": "copy-engine",
- "sku": "h100",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "operation": "htod",
- "subtype": "copy-engine",
- "valid": true,
- "status": "valid",
- "note": "peak 18919 GB/s · copy-engine uses near-zero SMs: no",
- "peakBandwidthGbps": 18918.827,
- "latencyUnit": "us",
- "colorKey": "h100_08d3e6b7",
- "label": "H100 · htod · copy-engine",
- "generatedAt": "2026-06-27T13:14:03.281164+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:03.281164+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf-moderate",
+ "T": 1,
+ "p50_amplification": 0.967,
+ "p99_amplification": 0.834
},
- "rows": [
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 4.543,
- "latency": 14.4267,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 18.364,
- "latency": 14.2752,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 73.192,
- "latency": 14.3264,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 290.776,
- "latency": 14.4245,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 1184.475,
- "latency": 14.1643,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 4735.759,
- "latency": 14.1707,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 18918.827,
- "latency": 14.1888,
- "sizeClass": null,
- "correct": null
- }
- ]
- },
- {
- "id": "cxt-8afae0f7",
- "identity": "copy-engine|h100|h100-nvlink-island|nvlink|htod|sm|us",
- "cohortIdentity": "copy-engine|h100|h100-nvlink-island|nvlink",
- "family": "copy-engine",
- "sku": "h100",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "operation": "htod",
- "subtype": "sm",
- "valid": true,
- "status": "valid",
- "note": "peak 18919 GB/s · copy-engine uses near-zero SMs: no",
- "peakBandwidthGbps": 18918.827,
- "latencyUnit": "us",
- "colorKey": "h100_8afae0f7",
- "label": "H100 · htod · sm",
- "generatedAt": "2026-06-27T13:14:03.281164+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:03.281164+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf-moderate",
+ "T": 2,
+ "p50_amplification": 0.949,
+ "p99_amplification": 0.94
},
- "rows": [
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 4.217,
- "latency": 15.5403,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 17.286,
- "latency": 15.1648,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 69.404,
- "latency": 15.1083,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 272.31,
- "latency": 15.4027,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 1106.715,
- "latency": 15.1595,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 4303.322,
- "latency": 15.5947,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 17472.627,
- "latency": 15.3632,
- "sizeClass": null,
- "correct": null
- }
- ]
- },
- {
- "id": "cxt-568b3ed1",
- "identity": "copy-engine|h200|h200-nvlink-island|nvlink|dtod|copy-engine|us",
- "cohortIdentity": "copy-engine|h200|h200-nvlink-island|nvlink",
- "family": "copy-engine",
- "sku": "h200",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "operation": "dtod",
- "subtype": "copy-engine",
- "valid": true,
- "status": "valid",
- "note": "peak 21990 GB/s · copy-engine uses near-zero SMs: no",
- "peakBandwidthGbps": 21990.41,
- "latencyUnit": "us",
- "colorKey": "h200_568b3ed1",
- "label": "H200 · dtod · copy-engine",
- "generatedAt": "2026-06-27T13:14:32.919518+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:32.919518+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf-moderate",
+ "T": 4,
+ "p50_amplification": 0.969,
+ "p99_amplification": 1.009
},
- "rows": [
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 3.796,
- "latency": 17.264,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 16.942,
- "latency": 15.4731,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 68.576,
- "latency": 15.2907,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 252.563,
- "latency": 16.6069,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 1049.556,
- "latency": 15.9851,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 3952.168,
- "latency": 16.9803,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 17644.131,
- "latency": 15.2139,
- "sizeClass": null,
- "correct": null
- }
- ]
- },
- {
- "id": "cxt-d2f1fcf5",
- "identity": "copy-engine|h200|h200-nvlink-island|nvlink|dtod|sm|us",
- "cohortIdentity": "copy-engine|h200|h200-nvlink-island|nvlink",
- "family": "copy-engine",
- "sku": "h200",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "operation": "dtod",
- "subtype": "sm",
- "valid": true,
- "status": "valid",
- "note": "peak 21990 GB/s · copy-engine uses near-zero SMs: no",
- "peakBandwidthGbps": 21990.41,
- "latencyUnit": "us",
- "colorKey": "h200_d2f1fcf5",
- "label": "H200 · dtod · sm",
- "generatedAt": "2026-06-27T13:14:32.919518+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:32.919518+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf-moderate",
+ "T": 8,
+ "p50_amplification": 0.99,
+ "p99_amplification": 0.988
},
- "rows": [
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 3.494,
- "latency": 18.7584,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 15.049,
- "latency": 17.4197,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 63.181,
- "latency": 16.5963,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 241.207,
- "latency": 17.3888,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 959.414,
- "latency": 17.4869,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 4005.001,
- "latency": 16.7563,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 16321.308,
- "latency": 16.4469,
- "sizeClass": null,
- "correct": null
- }
- ]
- },
- {
- "id": "cxt-16dd6dad",
- "identity": "copy-engine|h200|h200-nvlink-island|nvlink|htod|copy-engine|us",
- "cohortIdentity": "copy-engine|h200|h200-nvlink-island|nvlink",
- "family": "copy-engine",
- "sku": "h200",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "operation": "htod",
- "subtype": "copy-engine",
- "valid": true,
- "status": "valid",
- "note": "peak 21990 GB/s · copy-engine uses near-zero SMs: no",
- "peakBandwidthGbps": 21990.41,
- "latencyUnit": "us",
- "colorKey": "h200_16dd6dad",
- "label": "H200 · htod · copy-engine",
- "generatedAt": "2026-06-27T13:14:32.919518+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:32.919518+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf-moderate",
+ "T": 16,
+ "p50_amplification": 0.996,
+ "p99_amplification": 1.053
},
- "rows": [
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 4.478,
- "latency": 14.6357,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 19.382,
- "latency": 13.5253,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 73.252,
- "latency": 14.3147,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 300.417,
- "latency": 13.9616,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 1322.624,
- "latency": 12.6848,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 5426.008,
- "latency": 12.368,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 21990.41,
- "latency": 12.2069,
- "sizeClass": null,
- "correct": null
- }
- ]
- },
- {
- "id": "cxt-f87dced9",
- "identity": "copy-engine|h200|h200-nvlink-island|nvlink|htod|sm|us",
- "cohortIdentity": "copy-engine|h200|h200-nvlink-island|nvlink",
- "family": "copy-engine",
- "sku": "h200",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "operation": "htod",
- "subtype": "sm",
- "valid": true,
- "status": "valid",
- "note": "peak 21990 GB/s · copy-engine uses near-zero SMs: no",
- "peakBandwidthGbps": 21990.41,
- "latencyUnit": "us",
- "colorKey": "h200_f87dced9",
- "label": "H200 · htod · sm",
- "generatedAt": "2026-06-27T13:14:32.919518+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:32.919518+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf-moderate",
+ "T": 32,
+ "p50_amplification": 0.977,
+ "p99_amplification": 0.965
},
- "rows": [
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 4.164,
- "latency": 15.7387,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 16.954,
- "latency": 15.4624,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 65.654,
- "latency": 15.9712,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 283.195,
- "latency": 14.8107,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 1151.185,
- "latency": 14.5739,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 4718.356,
- "latency": 14.2229,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 18381.29,
- "latency": 14.6037,
- "sizeClass": null,
- "correct": null
- }
- ]
- },
- {
- "id": "cxt-0f7ea2f3",
- "identity": "copy-engine|mi355x|mi355x-xgmi|xgmi|dtod|copy-engine|us",
- "cohortIdentity": "copy-engine|mi355x|mi355x-xgmi|xgmi",
- "family": "copy-engine",
- "sku": "mi355x",
- "topologyClass": "mi355x-xgmi",
- "transport": "xgmi",
- "operation": "dtod",
- "subtype": "copy-engine",
- "valid": true,
- "status": "valid",
- "note": "peak 27738 GB/s · copy-engine uses near-zero SMs: no",
- "peakBandwidthGbps": 27738.291,
- "latencyUnit": "us",
- "colorKey": "mi355x_0f7ea2f3",
- "label": "MI355X · dtod · copy-engine",
- "generatedAt": "2026-06-29T00:49:25.677922+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-29T00:49:25.677922+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf-moderate",
+ "T": 64,
+ "p50_amplification": 0.942,
+ "p99_amplification": 0.978
},
- "rows": [
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 6.29,
- "latency": 10.4187,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 25.583,
- "latency": 10.2468,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 101.083,
- "latency": 10.3734,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 390.576,
- "latency": 10.7388,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 1611.113,
- "latency": 10.4134,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 6501.915,
- "latency": 10.3214,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 26296.406,
- "latency": 10.2081,
- "sizeClass": null,
- "correct": null
- }
- ]
- },
- {
- "id": "cxt-a301ee7b",
- "identity": "copy-engine|mi355x|mi355x-xgmi|xgmi|dtod|sm|us",
- "cohortIdentity": "copy-engine|mi355x|mi355x-xgmi|xgmi",
- "family": "copy-engine",
- "sku": "mi355x",
- "topologyClass": "mi355x-xgmi",
- "transport": "xgmi",
- "operation": "dtod",
- "subtype": "sm",
- "valid": true,
- "status": "valid",
- "note": "peak 27738 GB/s · copy-engine uses near-zero SMs: no",
- "peakBandwidthGbps": 27738.291,
- "latencyUnit": "us",
- "colorKey": "mi355x_a301ee7b",
- "label": "MI355X · dtod · sm",
- "generatedAt": "2026-06-29T00:49:25.677922+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-29T00:49:25.677922+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf-moderate",
+ "T": 128,
+ "p50_amplification": 0.988,
+ "p99_amplification": 0.979
},
- "rows": [
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 5.974,
- "latency": 10.9707,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 23.442,
- "latency": 11.1828,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 92.466,
- "latency": 11.3401,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 380.375,
- "latency": 11.0268,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 1520.029,
- "latency": 11.0374,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 6138.698,
- "latency": 10.9321,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 23990.086,
- "latency": 11.1894,
- "sizeClass": null,
- "correct": null
- }
- ]
- },
- {
- "id": "cxt-30a30277",
- "identity": "copy-engine|mi355x|mi355x-xgmi|xgmi|htod|copy-engine|us",
- "cohortIdentity": "copy-engine|mi355x|mi355x-xgmi|xgmi",
- "family": "copy-engine",
- "sku": "mi355x",
- "topologyClass": "mi355x-xgmi",
- "transport": "xgmi",
- "operation": "htod",
- "subtype": "copy-engine",
- "valid": true,
- "status": "valid",
- "note": "peak 27738 GB/s · copy-engine uses near-zero SMs: no",
- "peakBandwidthGbps": 27738.291,
- "latencyUnit": "us",
- "colorKey": "mi355x_30a30277",
- "label": "MI355X · htod · copy-engine",
- "generatedAt": "2026-06-29T00:49:25.677922+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-29T00:49:25.677922+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf-moderate+eplb",
+ "T": 1,
+ "p50_amplification": 1.016,
+ "p99_amplification": 1.001
},
- "rows": [
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 7.111,
- "latency": 9.2161,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 28.79,
- "latency": 9.1054,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 118.831,
- "latency": 8.8241,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 492.98,
- "latency": 8.5081,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 1921.041,
- "latency": 8.7334,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 7489.773,
- "latency": 8.9601,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 27738.291,
- "latency": 9.6774,
- "sizeClass": null,
- "correct": null
- }
- ]
- },
- {
- "id": "cxt-da4cda37",
- "identity": "copy-engine|mi355x|mi355x-xgmi|xgmi|htod|sm|us",
- "cohortIdentity": "copy-engine|mi355x|mi355x-xgmi|xgmi",
- "family": "copy-engine",
- "sku": "mi355x",
- "topologyClass": "mi355x-xgmi",
- "transport": "xgmi",
- "operation": "htod",
- "subtype": "sm",
- "valid": true,
- "status": "valid",
- "note": "peak 27738 GB/s · copy-engine uses near-zero SMs: no",
- "peakBandwidthGbps": 27738.291,
- "latencyUnit": "us",
- "colorKey": "mi355x_da4cda37",
- "label": "MI355X · htod · sm",
- "generatedAt": "2026-06-29T00:49:25.677922+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-29T00:49:25.677922+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf-moderate+eplb",
+ "T": 2,
+ "p50_amplification": 0.962,
+ "p99_amplification": 0.934
},
- "rows": [
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 6.239,
- "latency": 10.5041,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 24.106,
- "latency": 10.8748,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 97.985,
- "latency": 10.7014,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 394.892,
- "latency": 10.6214,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 1607.819,
- "latency": 10.4348,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 6392.071,
- "latency": 10.4988,
- "sizeClass": null,
- "correct": null
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 25471.24,
- "latency": 10.5388,
- "sizeClass": null,
- "correct": null
- }
- ]
- }
- ],
- "kvCache": [
- {
- "id": "cxt-72e44191",
- "identity": "kv-cache|b300|b300-nvlink-island|nvlink|dtod-local|contiguous/memcpy|ms",
- "cohortIdentity": "kv-cache|b300|nvlink",
- "family": "kv-cache",
- "sku": "b300",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "operation": "dtod-local",
- "subtype": "contiguous/memcpy",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "b300_72e44191",
- "label": "B300 · dtod-local · contiguous/memcpy",
- "generatedAt": "2026-06-27T13:14:28.674652+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:28.674652+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf-moderate+eplb",
+ "T": 4,
+ "p50_amplification": 0.963,
+ "p99_amplification": 0.966
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 4.86,
- "latency": 0.00337,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 18.31,
- "latency": 0.00358,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 79.48,
- "latency": 0.0033,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 315.89,
- "latency": 0.00332,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 1140.42,
- "latency": 0.00368,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 2696.03,
- "latency": 0.00622,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 2724.4,
- "latency": 0.02463,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 3189.99,
- "latency": 0.08415,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-0198272e",
- "identity": "kv-cache|b300|b300-nvlink-island|nvlink|dtod-local|paged/memcpy|ms",
- "cohortIdentity": "kv-cache|b300|nvlink",
- "family": "kv-cache",
- "sku": "b300",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "operation": "dtod-local",
- "subtype": "paged/memcpy",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "b300_0198272e",
- "label": "B300 · dtod-local · paged/memcpy",
- "generatedAt": "2026-06-27T13:14:28.674652+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:28.674652+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf-moderate+eplb",
+ "T": 8,
+ "p50_amplification": 1.005,
+ "p99_amplification": 0.991
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 3.27,
- "latency": 0.005,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 13.15,
- "latency": 0.00498,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 13.46,
- "latency": 0.01948,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 13.76,
- "latency": 0.07619,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 13.84,
- "latency": 0.30311,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 13.87,
- "latency": 1.20968,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 13.83,
- "latency": 4.85211,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 13.89,
- "latency": 19.32599,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-65e093de",
- "identity": "kv-cache|b300|b300-nvlink-island|nvlink|dtod-remote|contiguous/memcpy|ms",
- "cohortIdentity": "kv-cache|b300|nvlink",
- "family": "kv-cache",
- "sku": "b300",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "operation": "dtod-remote",
- "subtype": "contiguous/memcpy",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "b300_65e093de",
- "label": "B300 · dtod-remote · contiguous/memcpy",
- "generatedAt": "2026-06-27T13:14:28.674652+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:28.674652+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf-moderate+eplb",
+ "T": 16,
+ "p50_amplification": 1.01,
+ "p99_amplification": 1.073
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 1.08,
- "latency": 0.01514,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 4.52,
- "latency": 0.01451,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 17.43,
- "latency": 0.01504,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 67.07,
- "latency": 0.01563,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 205.84,
- "latency": 0.02038,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 409.12,
- "latency": 0.04101,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 644.24,
- "latency": 0.10417,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 736.42,
- "latency": 0.36451,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-502d7923",
- "identity": "kv-cache|b300|b300-nvlink-island|nvlink|dtod-remote|paged/memcpy|ms",
- "cohortIdentity": "kv-cache|b300|nvlink",
- "family": "kv-cache",
- "sku": "b300",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "operation": "dtod-remote",
- "subtype": "paged/memcpy",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "b300_502d7923",
- "label": "B300 · dtod-remote · paged/memcpy",
- "generatedAt": "2026-06-27T13:14:28.674652+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:28.674652+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf-moderate+eplb",
+ "T": 32,
+ "p50_amplification": 1.022,
+ "p99_amplification": 0.985
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 1.11,
- "latency": 0.01473,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 4.35,
- "latency": 0.01507,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 4.3,
- "latency": 0.06098,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 4.27,
- "latency": 0.24556,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 4.26,
- "latency": 0.98559,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 4.24,
- "latency": 3.9593,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 4.27,
- "latency": 15.72352,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 4.25,
- "latency": 63.14588,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-0560494f",
- "identity": "kv-cache|b300|b300-nvlink-island|nvlink|dtoh|contiguous/memcpy|ms",
- "cohortIdentity": "kv-cache|b300|nvlink",
- "family": "kv-cache",
- "sku": "b300",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "operation": "dtoh",
- "subtype": "contiguous/memcpy",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "b300_0560494f",
- "label": "B300 · dtoh · contiguous/memcpy",
- "generatedAt": "2026-06-27T13:14:28.674652+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:28.674652+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf-moderate+eplb",
+ "T": 64,
+ "p50_amplification": 1.003,
+ "p99_amplification": 1.254
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 1.29,
- "latency": 0.01266,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 4.53,
- "latency": 0.01447,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 10.95,
- "latency": 0.02394,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 17.12,
- "latency": 0.06125,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 23.25,
- "latency": 0.18038,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 25.14,
- "latency": 0.66728,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 25.77,
- "latency": 2.60365,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 26.05,
- "latency": 10.30309,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-ce77da1a",
- "identity": "kv-cache|b300|b300-nvlink-island|nvlink|dtoh|contiguous/pinned|ms",
- "cohortIdentity": "kv-cache|b300|nvlink",
- "family": "kv-cache",
- "sku": "b300",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "operation": "dtoh",
- "subtype": "contiguous/pinned",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "b300_ce77da1a",
- "label": "B300 · dtoh · contiguous/pinned",
- "generatedAt": "2026-06-27T13:14:28.674652+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:28.674652+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf-moderate+eplb",
+ "T": 128,
+ "p50_amplification": 0.996,
+ "p99_amplification": 1.004
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 4.69,
- "latency": 0.00349,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 15.49,
- "latency": 0.00423,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 34.21,
- "latency": 0.00766,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 49.37,
- "latency": 0.02124,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 55.18,
- "latency": 0.07601,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 56.69,
- "latency": 0.29592,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 57.26,
- "latency": 1.17204,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 57.37,
- "latency": 4.67905,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-46a8e034",
- "identity": "kv-cache|b300|b300-nvlink-island|nvlink|dtoh|paged/memcpy|ms",
- "cohortIdentity": "kv-cache|b300|nvlink",
- "family": "kv-cache",
- "sku": "b300",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "operation": "dtoh",
- "subtype": "paged/memcpy",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "b300_46a8e034",
- "label": "B300 · dtoh · paged/memcpy",
- "generatedAt": "2026-06-27T13:14:28.674652+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:28.674652+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf+eplb",
+ "T": 1,
+ "p50_amplification": 0.932,
+ "p99_amplification": 0.825
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 1.15,
- "latency": 0.01424,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 4.09,
- "latency": 0.01604,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 4.13,
- "latency": 0.06348,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 4.08,
- "latency": 0.25721,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 4.12,
- "latency": 1.01899,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 4.13,
- "latency": 4.05933,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 4.12,
- "latency": 16.28391,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 4.08,
- "latency": 65.79932,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-74b14d7d",
- "identity": "kv-cache|b300|b300-nvlink-island|nvlink|dtoh|paged/pinned|ms",
- "cohortIdentity": "kv-cache|b300|nvlink",
- "family": "kv-cache",
- "sku": "b300",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "operation": "dtoh",
- "subtype": "paged/pinned",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "b300_74b14d7d",
- "label": "B300 · dtoh · paged/pinned",
- "generatedAt": "2026-06-27T13:14:28.674652+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:28.674652+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf+eplb",
+ "T": 2,
+ "p50_amplification": 0.919,
+ "p99_amplification": 1.162
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 3.97,
- "latency": 0.00413,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 11.6,
- "latency": 0.00565,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 11.71,
- "latency": 0.02239,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 11.85,
- "latency": 0.08852,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 15.07,
- "latency": 0.27834,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 16.21,
- "latency": 1.0351,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 16,
- "latency": 4.19304,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 15.3,
- "latency": 17.54518,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-a39a3977",
- "identity": "kv-cache|b300|b300-nvlink-island|nvlink|htod|contiguous/memcpy|ms",
- "cohortIdentity": "kv-cache|b300|nvlink",
- "family": "kv-cache",
- "sku": "b300",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "operation": "htod",
- "subtype": "contiguous/memcpy",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "b300_a39a3977",
- "label": "B300 · htod · contiguous/memcpy",
- "generatedAt": "2026-06-27T13:14:28.674652+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:28.674652+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf+eplb",
+ "T": 4,
+ "p50_amplification": 0.912,
+ "p99_amplification": 0.912
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 4.67,
- "latency": 0.00351,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 10.64,
- "latency": 0.00616,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 28.03,
- "latency": 0.00935,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 39.84,
- "latency": 0.02632,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 40.9,
- "latency": 0.10256,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 43.53,
- "latency": 0.38545,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 40.29,
- "latency": 1.66584,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 40.43,
- "latency": 6.6389,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-4dc90462",
- "identity": "kv-cache|b300|b300-nvlink-island|nvlink|htod|contiguous/pinned|ms",
- "cohortIdentity": "kv-cache|b300|nvlink",
- "family": "kv-cache",
- "sku": "b300",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "operation": "htod",
- "subtype": "contiguous/pinned",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "b300_4dc90462",
- "label": "B300 · htod · contiguous/pinned",
- "generatedAt": "2026-06-27T13:14:28.674652+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:28.674652+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf+eplb",
+ "T": 8,
+ "p50_amplification": 0.937,
+ "p99_amplification": 1.21
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 4.92,
- "latency": 0.00333,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 15.9,
- "latency": 0.00412,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 35.21,
- "latency": 0.00745,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 49.53,
- "latency": 0.02117,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 54.66,
- "latency": 0.07673,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 57.11,
- "latency": 0.29375,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 57.27,
- "latency": 1.1717,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 57.29,
- "latency": 4.68587,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-1baaf76c",
- "identity": "kv-cache|b300|b300-nvlink-island|nvlink|htod|paged/memcpy|ms",
- "cohortIdentity": "kv-cache|b300|nvlink",
- "family": "kv-cache",
- "sku": "b300",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "operation": "htod",
- "subtype": "paged/memcpy",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "b300_1baaf76c",
- "label": "B300 · htod · paged/memcpy",
- "generatedAt": "2026-06-27T13:14:28.674652+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:28.674652+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf+eplb",
+ "T": 16,
+ "p50_amplification": 0.967,
+ "p99_amplification": 0.919
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 3.07,
- "latency": 0.00534,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 12.18,
- "latency": 0.00538,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 13.09,
- "latency": 0.02003,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 13.07,
- "latency": 0.08021,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 12.88,
- "latency": 0.32552,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 12.74,
- "latency": 1.31673,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 13.44,
- "latency": 4.99481,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 13.46,
- "latency": 19.93861,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-95e0eff5",
- "identity": "kv-cache|b300|b300-nvlink-island|nvlink|htod|paged/pinned|ms",
- "cohortIdentity": "kv-cache|b300|nvlink",
- "family": "kv-cache",
- "sku": "b300",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "operation": "htod",
- "subtype": "paged/pinned",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "b300_95e0eff5",
- "label": "B300 · htod · paged/pinned",
- "generatedAt": "2026-06-27T13:14:28.674652+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:28.674652+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf+eplb",
+ "T": 32,
+ "p50_amplification": 0.987,
+ "p99_amplification": 1.244
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 3.85,
- "latency": 0.00425,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 11.28,
- "latency": 0.00581,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 11.35,
- "latency": 0.0231,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 11.41,
- "latency": 0.0919,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 14.68,
- "latency": 0.28572,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 15.48,
- "latency": 1.08353,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 15.4,
- "latency": 4.35678,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 15.59,
- "latency": 17.21665,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-ac86e5b5",
- "identity": "kv-cache|h100|h100-nvlink-island|nvlink|dtod-local|contiguous/memcpy|ms",
- "cohortIdentity": "kv-cache|h100|nvlink",
- "family": "kv-cache",
- "sku": "h100",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "operation": "dtod-local",
- "subtype": "contiguous/memcpy",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "h100_ac86e5b5",
- "label": "H100 · dtod-local · contiguous/memcpy",
- "generatedAt": "2026-06-27T13:14:31.575969+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:31.575969+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf+eplb",
+ "T": 64,
+ "p50_amplification": 0.979,
+ "p99_amplification": 0.986
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 3.08,
- "latency": 0.00532,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 12.22,
- "latency": 0.00536,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 51.29,
- "latency": 0.00511,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 203.7,
- "latency": 0.00515,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 830.27,
- "latency": 0.00505,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 1848.69,
- "latency": 0.00908,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 1404.25,
- "latency": 0.04779,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 1496.83,
- "latency": 0.17934,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-5cd440fa",
- "identity": "kv-cache|h100|h100-nvlink-island|nvlink|dtod-local|paged/memcpy|ms",
- "cohortIdentity": "kv-cache|h100|nvlink",
- "family": "kv-cache",
- "sku": "h100",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "operation": "dtod-local",
- "subtype": "paged/memcpy",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "h100_5cd440fa",
- "label": "H100 · dtod-local · paged/memcpy",
- "generatedAt": "2026-06-27T13:14:31.575969+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:31.575969+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "decode",
+ "routing": "zipf+eplb",
+ "T": 128,
+ "p50_amplification": 0.969,
+ "p99_amplification": 1.191
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 1.87,
- "latency": 0.00875,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 7.19,
- "latency": 0.00912,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 7.74,
- "latency": 0.03385,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 8.12,
- "latency": 0.12917,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 8.12,
- "latency": 0.51673,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 8.12,
- "latency": 2.06732,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 8.14,
- "latency": 8.24075,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 8.16,
- "latency": 32.8879,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-ea4a3eaa",
- "identity": "kv-cache|h100|h100-nvlink-island|nvlink|dtod-remote|contiguous/memcpy|ms",
- "cohortIdentity": "kv-cache|h100|nvlink",
- "family": "kv-cache",
- "sku": "h100",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "operation": "dtod-remote",
- "subtype": "contiguous/memcpy",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "h100_ea4a3eaa",
- "label": "H100 · dtod-remote · contiguous/memcpy",
- "generatedAt": "2026-06-27T13:14:31.575969+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:31.575969+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf",
+ "T": 128,
+ "p50_amplification": 1.021,
+ "p99_amplification": 1.03
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 1.05,
- "latency": 0.01566,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 4.14,
- "latency": 0.01583,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 16.78,
- "latency": 0.01562,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 60.4,
- "latency": 0.01736,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 135.27,
- "latency": 0.03101,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 287.64,
- "latency": 0.05833,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 355.4,
- "latency": 0.18883,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 386.28,
- "latency": 0.69492,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-0ce612f7",
- "identity": "kv-cache|h100|h100-nvlink-island|nvlink|dtod-remote|paged/memcpy|ms",
- "cohortIdentity": "kv-cache|h100|nvlink",
- "family": "kv-cache",
- "sku": "h100",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "operation": "dtod-remote",
- "subtype": "paged/memcpy",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "h100_0ce612f7",
- "label": "H100 · dtod-remote · paged/memcpy",
- "generatedAt": "2026-06-27T13:14:31.575969+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:31.575969+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf",
+ "T": 256,
+ "p50_amplification": 1.004,
+ "p99_amplification": 1.182
+ },
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf",
+ "T": 512,
+ "p50_amplification": 1.012,
+ "p99_amplification": 1.077
+ },
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf",
+ "T": 1024,
+ "p50_amplification": 1.047,
+ "p99_amplification": 1.128
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 0.81,
- "latency": 0.02017,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 3.39,
- "latency": 0.01935,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 3.46,
- "latency": 0.07571,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 3.5,
- "latency": 0.29964,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 3.52,
- "latency": 1.19198,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 3.51,
- "latency": 4.78335,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 3.51,
- "latency": 19.11805,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 3.51,
- "latency": 76.49081,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-9514aa3b",
- "identity": "kv-cache|h100|h100-nvlink-island|nvlink|dtoh|contiguous/memcpy|ms",
- "cohortIdentity": "kv-cache|h100|nvlink",
- "family": "kv-cache",
- "sku": "h100",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "operation": "dtoh",
- "subtype": "contiguous/memcpy",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "h100_9514aa3b",
- "label": "H100 · dtoh · contiguous/memcpy",
- "generatedAt": "2026-06-27T13:14:31.575969+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:31.575969+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf",
+ "T": 2048,
+ "p50_amplification": 1.129,
+ "p99_amplification": 1.165
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 1.14,
- "latency": 0.01435,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 3.21,
- "latency": 0.02041,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 6.95,
- "latency": 0.03775,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 9.46,
- "latency": 0.11089,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 12.75,
- "latency": 0.32908,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 14.17,
- "latency": 1.18418,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 14.68,
- "latency": 4.57034,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 14.78,
- "latency": 18.15827,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-51d4ebbe",
- "identity": "kv-cache|h100|h100-nvlink-island|nvlink|dtoh|contiguous/pinned|ms",
- "cohortIdentity": "kv-cache|h100|nvlink",
- "family": "kv-cache",
- "sku": "h100",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "operation": "dtoh",
- "subtype": "contiguous/pinned",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "h100_51d4ebbe",
- "label": "H100 · dtoh · contiguous/pinned",
- "generatedAt": "2026-06-27T13:14:31.575969+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:31.575969+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf",
+ "T": 4096,
+ "p50_amplification": 1.187,
+ "p99_amplification": 1.194
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 3.63,
- "latency": 0.00452,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 14.61,
- "latency": 0.00449,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 34.97,
- "latency": 0.0075,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 45.56,
- "latency": 0.02301,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 52.87,
- "latency": 0.07934,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 54.46,
- "latency": 0.30805,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 54.81,
- "latency": 1.22436,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 54.92,
- "latency": 4.88742,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-53eb5188",
- "identity": "kv-cache|h100|h100-nvlink-island|nvlink|dtoh|paged/memcpy|ms",
- "cohortIdentity": "kv-cache|h100|nvlink",
- "family": "kv-cache",
- "sku": "h100",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "operation": "dtoh",
- "subtype": "paged/memcpy",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "h100_53eb5188",
- "label": "H100 · dtoh · paged/memcpy",
- "generatedAt": "2026-06-27T13:14:31.575969+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:31.575969+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf",
+ "T": 128,
+ "p50_amplification": 1.039,
+ "p99_amplification": 1.018
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 0.95,
- "latency": 0.01729,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 2.79,
- "latency": 0.02345,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 2.8,
- "latency": 0.0936,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 2.83,
- "latency": 0.37049,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 2.84,
- "latency": 1.47709,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 2.84,
- "latency": 5.91534,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 2.8,
- "latency": 23.94517,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 2.81,
- "latency": 95.42213,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-58b1ef69",
- "identity": "kv-cache|h100|h100-nvlink-island|nvlink|dtoh|paged/pinned|ms",
- "cohortIdentity": "kv-cache|h100|nvlink",
- "family": "kv-cache",
- "sku": "h100",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "operation": "dtoh",
- "subtype": "paged/pinned",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "h100_58b1ef69",
- "label": "H100 · dtoh · paged/pinned",
- "generatedAt": "2026-06-27T13:14:31.575969+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:31.575969+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf",
+ "T": 512,
+ "p50_amplification": 1.016,
+ "p99_amplification": 1.013
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 2.02,
- "latency": 0.00812,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 8.26,
- "latency": 0.00793,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 8.59,
- "latency": 0.03052,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 8.8,
- "latency": 0.11912,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 8.89,
- "latency": 0.47188,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 8.94,
- "latency": 1.87628,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 8.99,
- "latency": 7.46602,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 8.97,
- "latency": 29.91576,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-f0ce2a63",
- "identity": "kv-cache|h100|h100-nvlink-island|nvlink|htod|contiguous/memcpy|ms",
- "cohortIdentity": "kv-cache|h100|nvlink",
- "family": "kv-cache",
- "sku": "h100",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "operation": "htod",
- "subtype": "contiguous/memcpy",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "h100_f0ce2a63",
- "label": "H100 · htod · contiguous/memcpy",
- "generatedAt": "2026-06-27T13:14:31.575969+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:31.575969+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf",
+ "T": 2048,
+ "p50_amplification": 1.143,
+ "p99_amplification": 1.163
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 2.21,
- "latency": 0.00742,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 3.56,
- "latency": 0.0184,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 12.81,
- "latency": 0.02046,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 16.18,
- "latency": 0.06483,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 14.77,
- "latency": 0.28404,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 15.24,
- "latency": 1.10071,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 19.35,
- "latency": 3.46895,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 19.37,
- "latency": 13.85634,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-90f1ea66",
- "identity": "kv-cache|h100|h100-nvlink-island|nvlink|htod|contiguous/pinned|ms",
- "cohortIdentity": "kv-cache|h100|nvlink",
- "family": "kv-cache",
- "sku": "h100",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "operation": "htod",
- "subtype": "contiguous/pinned",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "h100_90f1ea66",
- "label": "H100 · htod · contiguous/pinned",
- "generatedAt": "2026-06-27T13:14:31.575969+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:31.575969+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-heavy",
+ "T": 128,
+ "p50_amplification": 1.01,
+ "p99_amplification": 0.998
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 3.54,
- "latency": 0.00463,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 10.95,
- "latency": 0.00598,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 20.37,
- "latency": 0.01287,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 24.67,
- "latency": 0.0425,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 45.61,
- "latency": 0.09197,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 19,
- "latency": 0.88291,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 15.38,
- "latency": 4.36425,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 26.93,
- "latency": 9.96701,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-e2eccf00",
- "identity": "kv-cache|h100|h100-nvlink-island|nvlink|htod|paged/memcpy|ms",
- "cohortIdentity": "kv-cache|h100|nvlink",
- "family": "kv-cache",
- "sku": "h100",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "operation": "htod",
- "subtype": "paged/memcpy",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "h100_e2eccf00",
- "label": "H100 · htod · paged/memcpy",
- "generatedAt": "2026-06-27T13:14:31.575969+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:31.575969+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-heavy",
+ "T": 512,
+ "p50_amplification": 0.927,
+ "p99_amplification": 0.91
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 1.48,
- "latency": 0.01107,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 4.89,
- "latency": 0.01341,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 5.26,
- "latency": 0.04985,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 5.25,
- "latency": 0.19989,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 4.87,
- "latency": 0.86178,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 4.88,
- "latency": 3.43634,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 5.29,
- "latency": 12.69012,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 5.3,
- "latency": 50.67481,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-9d427921",
- "identity": "kv-cache|h100|h100-nvlink-island|nvlink|htod|paged/pinned|ms",
- "cohortIdentity": "kv-cache|h100|nvlink",
- "family": "kv-cache",
- "sku": "h100",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "operation": "htod",
- "subtype": "paged/pinned",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "h100_9d427921",
- "label": "H100 · htod · paged/pinned",
- "generatedAt": "2026-06-27T13:14:31.575969+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:14:31.575969+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-heavy",
+ "T": 2048,
+ "p50_amplification": 1.01,
+ "p99_amplification": 1.028
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 1.62,
- "latency": 0.01012,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 7.91,
- "latency": 0.00828,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 8.25,
- "latency": 0.03177,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 8.56,
- "latency": 0.12251,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 8.72,
- "latency": 0.48117,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 8.69,
- "latency": 1.93067,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 8.76,
- "latency": 7.66475,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 8.8,
- "latency": 30.51378,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-da427647",
- "identity": "kv-cache|h200|h200-nvlink-island|nvlink|dtod-local|contiguous/memcpy|ms",
- "cohortIdentity": "kv-cache|h200|nvlink",
- "family": "kv-cache",
- "sku": "h200",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "operation": "dtod-local",
- "subtype": "contiguous/memcpy",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "h200_da427647",
- "label": "H200 · dtod-local · contiguous/memcpy",
- "generatedAt": "2026-06-27T13:15:06.269124+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:15:06.269124+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-heavy+eplb",
+ "T": 128,
+ "p50_amplification": 1.042,
+ "p99_amplification": 1.14
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 2.92,
- "latency": 0.00561,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 11.4,
- "latency": 0.00575,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 50.9,
- "latency": 0.00515,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 203.07,
- "latency": 0.00516,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 808.09,
- "latency": 0.00519,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 2577.62,
- "latency": 0.00651,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 1942.65,
- "latency": 0.03455,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 2094.13,
- "latency": 0.12818,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-e86f4c3c",
- "identity": "kv-cache|h200|h200-nvlink-island|nvlink|dtod-local|paged/memcpy|ms",
- "cohortIdentity": "kv-cache|h200|nvlink",
- "family": "kv-cache",
- "sku": "h200",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "operation": "dtod-local",
- "subtype": "paged/memcpy",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "h200_e86f4c3c",
- "label": "H200 · dtod-local · paged/memcpy",
- "generatedAt": "2026-06-27T13:15:06.269124+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:15:06.269124+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-heavy+eplb",
+ "T": 256,
+ "p50_amplification": 1.036,
+ "p99_amplification": 1.076
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 1.88,
- "latency": 0.0087,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 7.13,
- "latency": 0.00919,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 7.76,
- "latency": 0.03377,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 8.12,
- "latency": 0.12919,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 8.12,
- "latency": 0.51648,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 8.12,
- "latency": 2.06665,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 7.82,
- "latency": 8.58219,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 8.37,
- "latency": 32.07343,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-a92baae0",
- "identity": "kv-cache|h200|h200-nvlink-island|nvlink|dtod-remote|contiguous/memcpy|ms",
- "cohortIdentity": "kv-cache|h200|nvlink",
- "family": "kv-cache",
- "sku": "h200",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "operation": "dtod-remote",
- "subtype": "contiguous/memcpy",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "h200_a92baae0",
- "label": "H200 · dtod-remote · contiguous/memcpy",
- "generatedAt": "2026-06-27T13:15:06.269124+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:15:06.269124+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-heavy+eplb",
+ "T": 512,
+ "p50_amplification": 1.022,
+ "p99_amplification": 1.105
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 1.32,
- "latency": 0.01245,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 4.88,
- "latency": 0.01342,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 18.4,
- "latency": 0.01425,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 50.51,
- "latency": 0.02076,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 157.92,
- "latency": 0.02656,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 283.17,
- "latency": 0.05925,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 357.67,
- "latency": 0.18763,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 386.41,
- "latency": 0.69468,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-a09960ed",
- "identity": "kv-cache|h200|h200-nvlink-island|nvlink|dtod-remote|paged/memcpy|ms",
- "cohortIdentity": "kv-cache|h200|nvlink",
- "family": "kv-cache",
- "sku": "h200",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "operation": "dtod-remote",
- "subtype": "paged/memcpy",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "h200_a09960ed",
- "label": "H200 · dtod-remote · paged/memcpy",
- "generatedAt": "2026-06-27T13:15:06.269124+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:15:06.269124+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-heavy+eplb",
+ "T": 1024,
+ "p50_amplification": 1.026,
+ "p99_amplification": 1.114
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 1.02,
- "latency": 0.01611,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 4.09,
- "latency": 0.01602,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 4.28,
- "latency": 0.06122,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 4.15,
- "latency": 0.25284,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 4.16,
- "latency": 1.0074,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 4.38,
- "latency": 3.83027,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 3.98,
- "latency": 16.86224,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 4.35,
- "latency": 61.70685,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-5a06e0c5",
- "identity": "kv-cache|h200|h200-nvlink-island|nvlink|dtoh|contiguous/memcpy|ms",
- "cohortIdentity": "kv-cache|h200|nvlink",
- "family": "kv-cache",
- "sku": "h200",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "operation": "dtoh",
- "subtype": "contiguous/memcpy",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "h200_5a06e0c5",
- "label": "H200 · dtoh · contiguous/memcpy",
- "generatedAt": "2026-06-27T13:15:06.269124+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:15:06.269124+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-heavy+eplb",
+ "T": 2048,
+ "p50_amplification": 1.025,
+ "p99_amplification": 1.068
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 1.33,
- "latency": 0.01232,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 3.39,
- "latency": 0.01935,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 7.01,
- "latency": 0.03738,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 9.35,
- "latency": 0.11209,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 12.26,
- "latency": 0.34211,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 13.63,
- "latency": 1.2306,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 14,
- "latency": 4.79503,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 14.17,
- "latency": 18.94882,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-196034c4",
- "identity": "kv-cache|h200|h200-nvlink-island|nvlink|dtoh|contiguous/pinned|ms",
- "cohortIdentity": "kv-cache|h200|nvlink",
- "family": "kv-cache",
- "sku": "h200",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "operation": "dtoh",
- "subtype": "contiguous/pinned",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "h200_196034c4",
- "label": "H200 · dtoh · contiguous/pinned",
- "generatedAt": "2026-06-27T13:15:06.269124+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:15:06.269124+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-heavy+eplb",
+ "T": 4096,
+ "p50_amplification": 1.025,
+ "p99_amplification": 1.045
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 4.66,
- "latency": 0.00352,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 11.72,
- "latency": 0.00559,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 36.36,
- "latency": 0.00721,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 47.93,
- "latency": 0.02188,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 52.92,
- "latency": 0.07926,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 54.32,
- "latency": 0.30887,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 54.42,
- "latency": 1.2332,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 54.68,
- "latency": 4.90889,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-1edeeeca",
- "identity": "kv-cache|h200|h200-nvlink-island|nvlink|dtoh|paged/memcpy|ms",
- "cohortIdentity": "kv-cache|h200|nvlink",
- "family": "kv-cache",
- "sku": "h200",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "operation": "dtoh",
- "subtype": "paged/memcpy",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "h200_1edeeeca",
- "label": "H200 · dtoh · paged/memcpy",
- "generatedAt": "2026-06-27T13:15:06.269124+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:15:06.269124+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-mild",
+ "T": 128,
+ "p50_amplification": 1.032,
+ "p99_amplification": 1.244
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 1.12,
- "latency": 0.01465,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 3.05,
- "latency": 0.02151,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 3.1,
- "latency": 0.08467,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 3.2,
- "latency": 0.32818,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 3.07,
- "latency": 1.3646,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 3.14,
- "latency": 5.3446,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 3.12,
- "latency": 21.51246,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 3.1,
- "latency": 86.61224,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-19277faf",
- "identity": "kv-cache|h200|h200-nvlink-island|nvlink|dtoh|paged/pinned|ms",
- "cohortIdentity": "kv-cache|h200|nvlink",
- "family": "kv-cache",
- "sku": "h200",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "operation": "dtoh",
- "subtype": "paged/pinned",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "h200_19277faf",
- "label": "H200 · dtoh · paged/pinned",
- "generatedAt": "2026-06-27T13:15:06.269124+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:15:06.269124+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-mild",
+ "T": 256,
+ "p50_amplification": 1.03,
+ "p99_amplification": 1.015
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 2.57,
- "latency": 0.00638,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 6.21,
- "latency": 0.01056,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 6.34,
- "latency": 0.04137,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 6.39,
- "latency": 0.16406,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 9.68,
- "latency": 0.4333,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 11.16,
- "latency": 1.50278,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 10.75,
- "latency": 6.24109,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 11.08,
- "latency": 24.21999,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-0cec247d",
- "identity": "kv-cache|h200|h200-nvlink-island|nvlink|htod|contiguous/memcpy|ms",
- "cohortIdentity": "kv-cache|h200|nvlink",
- "family": "kv-cache",
- "sku": "h200",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "operation": "htod",
- "subtype": "contiguous/memcpy",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "h200_0cec247d",
- "label": "H200 · htod · contiguous/memcpy",
- "generatedAt": "2026-06-27T13:15:06.269124+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:15:06.269124+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-mild",
+ "T": 512,
+ "p50_amplification": 1.049,
+ "p99_amplification": 1.02
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 2.07,
- "latency": 0.00793,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 4.33,
- "latency": 0.01513,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 13.97,
- "latency": 0.01876,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 17.88,
- "latency": 0.05865,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 14.91,
- "latency": 0.28129,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 15.65,
- "latency": 1.07179,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 20.77,
- "latency": 3.23166,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 20.84,
- "latency": 12.88331,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-541fa51c",
- "identity": "kv-cache|h200|h200-nvlink-island|nvlink|htod|contiguous/pinned|ms",
- "cohortIdentity": "kv-cache|h200|nvlink",
- "family": "kv-cache",
- "sku": "h200",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "operation": "htod",
- "subtype": "contiguous/pinned",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "h200_541fa51c",
- "label": "H200 · htod · contiguous/pinned",
- "generatedAt": "2026-06-27T13:15:06.269124+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:15:06.269124+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-mild",
+ "T": 1024,
+ "p50_amplification": 1.095,
+ "p99_amplification": 1.099
+ },
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-mild",
+ "T": 2048,
+ "p50_amplification": 1.139,
+ "p99_amplification": 1.152
+ },
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-mild",
+ "T": 4096,
+ "p50_amplification": 1.201,
+ "p99_amplification": 1.205
+ },
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-mild+eplb",
+ "T": 128,
+ "p50_amplification": 1.004,
+ "p99_amplification": 1.275
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 4.25,
- "latency": 0.00386,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 10.91,
- "latency": 0.00601,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 34.78,
- "latency": 0.00754,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 47.66,
- "latency": 0.022,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 53.14,
- "latency": 0.07893,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 54.49,
- "latency": 0.30792,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 54.39,
- "latency": 1.23395,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 55.4,
- "latency": 4.84562,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-59482272",
- "identity": "kv-cache|h200|h200-nvlink-island|nvlink|htod|paged/memcpy|ms",
- "cohortIdentity": "kv-cache|h200|nvlink",
- "family": "kv-cache",
- "sku": "h200",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "operation": "htod",
- "subtype": "paged/memcpy",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "h200_59482272",
- "label": "H200 · htod · paged/memcpy",
- "generatedAt": "2026-06-27T13:15:06.269124+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:15:06.269124+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-mild+eplb",
+ "T": 256,
+ "p50_amplification": 1.004,
+ "p99_amplification": 1.011
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 1.23,
- "latency": 0.01335,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 4.57,
- "latency": 0.01434,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 5.08,
- "latency": 0.05156,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 4.95,
- "latency": 0.21203,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 4.73,
- "latency": 0.8865,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 4.77,
- "latency": 3.51835,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 5.3,
- "latency": 12.65221,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 5.32,
- "latency": 50.43789,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-a5a8f197",
- "identity": "kv-cache|h200|h200-nvlink-island|nvlink|htod|paged/pinned|ms",
- "cohortIdentity": "kv-cache|h200|nvlink",
- "family": "kv-cache",
- "sku": "h200",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "operation": "htod",
- "subtype": "paged/pinned",
- "valid": true,
- "status": "valid",
- "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "h200_a5a8f197",
- "label": "H200 · htod · paged/pinned",
- "generatedAt": "2026-06-27T13:15:06.269124+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:15:06.269124+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-mild+eplb",
+ "T": 512,
+ "p50_amplification": 1.003,
+ "p99_amplification": 1.002
},
- "rows": [
- {
- "sizeBytes": 16384,
- "bandwidthGbps": 2.01,
- "latency": 0.00814,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 5.86,
- "latency": 0.01117,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 6.21,
- "latency": 0.04221,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 6.39,
- "latency": 0.16417,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 9.51,
- "latency": 0.44121,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 10.54,
- "latency": 1.59134,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 10.79,
- "latency": 6.22042,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 10.65,
- "latency": 25.1967,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-3fe4f8ad",
- "identity": "kv-cache|mi355x|mi355x-xgmi|xgmi|dtod-remote|contiguous/rccl|ms",
- "cohortIdentity": "kv-cache|mi355x|xgmi",
- "family": "kv-cache",
- "sku": "mi355x",
- "topologyClass": "mi355x-xgmi",
- "transport": "xgmi",
- "operation": "dtod-remote",
- "subtype": "contiguous/rccl",
- "valid": true,
- "status": "valid",
- "note": "wired: rccl",
- "peakBandwidthGbps": null,
- "latencyUnit": "ms",
- "colorKey": "mi355x_3fe4f8ad",
- "label": "MI355X · dtod-remote · contiguous/rccl",
- "generatedAt": "2026-06-29T00:48:56.689585+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-29T00:48:56.689585+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-mild+eplb",
+ "T": 1024,
+ "p50_amplification": 1.007,
+ "p99_amplification": 1.014
},
- "rows": [
- {
- "sizeBytes": 65536,
- "bandwidthGbps": 0.93,
- "latency": 0.07018,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 262144,
- "bandwidthGbps": 4.14,
- "latency": 0.06326,
- "sizeClass": "decode",
- "correct": true
- },
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 14.43,
- "latency": 0.07267,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 46.03,
- "latency": 0.09112,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 63.43,
- "latency": 0.26449,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 69.8,
- "latency": 0.96147,
- "sizeClass": "prefill",
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 71.72,
- "latency": 3.74303,
- "sizeClass": "prefill",
- "correct": true
- }
- ]
- }
- ],
- "rlMesh": [
- {
- "id": "cxt-e28663d4",
- "identity": "rl-mesh|b300|b300-nvlink-island|nvlink|generator_to_trainer|paired|ms",
- "cohortIdentity": "rl-mesh|b300|nvlink",
- "family": "rl-mesh",
- "sku": "b300",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "operation": "generator_to_trainer",
- "subtype": "paired",
- "valid": true,
- "status": "valid",
- "note": "peak 682 GB/s · world=8: trainer 4 <-> generator 4",
- "peakBandwidthGbps": 681.89,
- "latencyUnit": "ms",
- "colorKey": "b300_e28663d4",
- "label": "B300 · gen->trn · paired",
- "generatedAt": "2026-06-27T13:38:50.291192+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:38:50.291192+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-mild+eplb",
+ "T": 2048,
+ "p50_amplification": 1.009,
+ "p99_amplification": 1.028
},
- "rows": [
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 22.43,
- "latency": 0.04675,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 210.03,
- "latency": 0.01997,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 444.24,
- "latency": 0.03777,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 613.35,
- "latency": 0.10941,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 672.64,
- "latency": 0.39908,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 1073741824,
- "bandwidthGbps": 681.89,
- "latency": 1.57465,
- "sizeClass": null,
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-abc63f3d",
- "identity": "rl-mesh|b300|b300-nvlink-island|nvlink|generator_to_trainer|redistribute|ms",
- "cohortIdentity": "rl-mesh|b300|nvlink",
- "family": "rl-mesh",
- "sku": "b300",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "operation": "generator_to_trainer",
- "subtype": "redistribute",
- "valid": true,
- "status": "valid",
- "note": "peak 682 GB/s · world=8: trainer 4 <-> generator 4",
- "peakBandwidthGbps": 681.89,
- "latencyUnit": "ms",
- "colorKey": "b300_abc63f3d",
- "label": "B300 · gen->trn · redistribute",
- "generatedAt": "2026-06-27T13:38:50.291192+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:38:50.291192+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-mild+eplb",
+ "T": 4096,
+ "p50_amplification": 1.005,
+ "p99_amplification": 1.007
},
- "rows": [
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 0.02,
- "latency": 44.24712,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 56.86,
- "latency": 0.07377,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 168.78,
- "latency": 0.0994,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 466.61,
- "latency": 0.14382,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 565.6,
- "latency": 0.4746,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 1073741824,
- "bandwidthGbps": 656.22,
- "latency": 1.63626,
- "sizeClass": null,
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-08ab0854",
- "identity": "rl-mesh|b300|b300-nvlink-island|nvlink|trainer_to_generator|paired|ms",
- "cohortIdentity": "rl-mesh|b300|nvlink",
- "family": "rl-mesh",
- "sku": "b300",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "operation": "trainer_to_generator",
- "subtype": "paired",
- "valid": true,
- "status": "valid",
- "note": "peak 682 GB/s · world=8: trainer 4 <-> generator 4",
- "peakBandwidthGbps": 681.89,
- "latencyUnit": "ms",
- "colorKey": "b300_08ab0854",
- "label": "B300 · trn->gen · paired",
- "generatedAt": "2026-06-27T13:38:50.291192+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:38:50.291192+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-moderate",
+ "T": 128,
+ "p50_amplification": 0.994,
+ "p99_amplification": 0.982
},
- "rows": [
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 8.13,
- "latency": 0.12892,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 161.07,
- "latency": 0.02604,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 455.8,
- "latency": 0.03681,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 613.96,
- "latency": 0.10931,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 670.34,
- "latency": 0.40045,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 1073741824,
- "bandwidthGbps": 681.46,
- "latency": 1.57564,
- "sizeClass": null,
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-bea1bfbd",
- "identity": "rl-mesh|b300|b300-nvlink-island|nvlink|trainer_to_generator|redistribute|ms",
- "cohortIdentity": "rl-mesh|b300|nvlink",
- "family": "rl-mesh",
- "sku": "b300",
- "topologyClass": "b300-nvlink-island",
- "transport": "nvlink",
- "operation": "trainer_to_generator",
- "subtype": "redistribute",
- "valid": true,
- "status": "valid",
- "note": "peak 682 GB/s · world=8: trainer 4 <-> generator 4",
- "peakBandwidthGbps": 681.89,
- "latencyUnit": "ms",
- "colorKey": "b300_bea1bfbd",
- "label": "B300 · trn->gen · redistribute",
- "generatedAt": "2026-06-27T13:38:50.291192+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:38:50.291192+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-moderate",
+ "T": 256,
+ "p50_amplification": 0.969,
+ "p99_amplification": 1.212
},
- "rows": [
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 0.01,
- "latency": 74.91642,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 66.21,
- "latency": 0.06334,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 295.56,
- "latency": 0.05676,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 581.82,
- "latency": 0.11534,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 543.6,
- "latency": 0.49381,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 1073741824,
- "bandwidthGbps": 659.57,
- "latency": 1.62794,
- "sizeClass": null,
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-3e3f24d0",
- "identity": "rl-mesh|h100|h100-nvlink-island|nvlink|generator_to_trainer|paired|ms",
- "cohortIdentity": "rl-mesh|h100|nvlink",
- "family": "rl-mesh",
- "sku": "h100",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "operation": "generator_to_trainer",
- "subtype": "paired",
- "valid": true,
- "status": "valid",
- "note": "peak 373 GB/s · world=8: trainer 4 <-> generator 4",
- "peakBandwidthGbps": 372.53,
- "latencyUnit": "ms",
- "colorKey": "h100_3e3f24d0",
- "label": "H100 · gen->trn · paired",
- "generatedAt": "2026-06-27T13:36:14.593136+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:36:14.593136+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-moderate",
+ "T": 512,
+ "p50_amplification": 0.991,
+ "p99_amplification": 0.985
},
- "rows": [
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 28.68,
- "latency": 0.03656,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 119.47,
- "latency": 0.03511,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 179.16,
- "latency": 0.09364,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 333.15,
- "latency": 0.20144,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 360.41,
- "latency": 0.7448,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 1073741824,
- "bandwidthGbps": 372.22,
- "latency": 2.88468,
- "sizeClass": null,
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-02dece19",
- "identity": "rl-mesh|h100|h100-nvlink-island|nvlink|generator_to_trainer|redistribute|ms",
- "cohortIdentity": "rl-mesh|h100|nvlink",
- "family": "rl-mesh",
- "sku": "h100",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "operation": "generator_to_trainer",
- "subtype": "redistribute",
- "valid": true,
- "status": "valid",
- "note": "peak 373 GB/s · world=8: trainer 4 <-> generator 4",
- "peakBandwidthGbps": 372.53,
- "latencyUnit": "ms",
- "colorKey": "h100_02dece19",
- "label": "H100 · gen->trn · redistribute",
- "generatedAt": "2026-06-27T13:36:14.593136+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:36:14.593136+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-moderate",
+ "T": 1024,
+ "p50_amplification": 1.034,
+ "p99_amplification": 1.121
},
- "rows": [
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 0.33,
- "latency": 3.20924,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 26.07,
- "latency": 0.16087,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 105.25,
- "latency": 0.1594,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 306.68,
- "latency": 0.21882,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 332.52,
- "latency": 0.80728,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 1073741824,
- "bandwidthGbps": 324.51,
- "latency": 3.30884,
- "sizeClass": null,
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-40b74430",
- "identity": "rl-mesh|h100|h100-nvlink-island|nvlink|trainer_to_generator|paired|ms",
- "cohortIdentity": "rl-mesh|h100|nvlink",
- "family": "rl-mesh",
- "sku": "h100",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "operation": "trainer_to_generator",
- "subtype": "paired",
- "valid": true,
- "status": "valid",
- "note": "peak 373 GB/s · world=8: trainer 4 <-> generator 4",
- "peakBandwidthGbps": 372.53,
- "latencyUnit": "ms",
- "colorKey": "h100_40b74430",
- "label": "H100 · trn->gen · paired",
- "generatedAt": "2026-06-27T13:36:14.593136+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:36:14.593136+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-moderate",
+ "T": 2048,
+ "p50_amplification": 1.124,
+ "p99_amplification": 1.132
},
- "rows": [
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 19.34,
- "latency": 0.05421,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 102.79,
- "latency": 0.04081,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 282.95,
- "latency": 0.05929,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 331.36,
- "latency": 0.20252,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 359.85,
- "latency": 0.74597,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 1073741824,
- "bandwidthGbps": 372.53,
- "latency": 2.88228,
- "sizeClass": null,
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-3f787c79",
- "identity": "rl-mesh|h100|h100-nvlink-island|nvlink|trainer_to_generator|redistribute|ms",
- "cohortIdentity": "rl-mesh|h100|nvlink",
- "family": "rl-mesh",
- "sku": "h100",
- "topologyClass": "h100-nvlink-island",
- "transport": "nvlink",
- "operation": "trainer_to_generator",
- "subtype": "redistribute",
- "valid": true,
- "status": "valid",
- "note": "peak 373 GB/s · world=8: trainer 4 <-> generator 4",
- "peakBandwidthGbps": 372.53,
- "latencyUnit": "ms",
- "colorKey": "h100_3f787c79",
- "label": "H100 · trn->gen · redistribute",
- "generatedAt": "2026-06-27T13:36:14.593136+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:36:14.593136+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-moderate",
+ "T": 4096,
+ "p50_amplification": 1.194,
+ "p99_amplification": 1.197
},
- "rows": [
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 0.02,
- "latency": 42.89165,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 26.19,
- "latency": 0.16012,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 107.67,
- "latency": 0.15583,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 313.63,
- "latency": 0.21398,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 324.4,
- "latency": 0.82748,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 1073741824,
- "bandwidthGbps": 336.39,
- "latency": 3.19197,
- "sizeClass": null,
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-3051cd1a",
- "identity": "rl-mesh|h200|h200-nvlink-island|nvlink|generator_to_trainer|paired|ms",
- "cohortIdentity": "rl-mesh|h200|nvlink",
- "family": "rl-mesh",
- "sku": "h200",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "operation": "generator_to_trainer",
- "subtype": "paired",
- "valid": true,
- "status": "valid",
- "note": "peak 369 GB/s · world=8: trainer 4 <-> generator 4",
- "peakBandwidthGbps": 369.31,
- "latencyUnit": "ms",
- "colorKey": "h200_3051cd1a",
- "label": "H200 · gen->trn · paired",
- "generatedAt": "2026-06-27T13:38:51.710797+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:38:51.710797+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-moderate+eplb",
+ "T": 128,
+ "p50_amplification": 1.007,
+ "p99_amplification": 0.993
},
- "rows": [
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 23.89,
- "latency": 0.0439,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 156.49,
- "latency": 0.0268,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 293.07,
- "latency": 0.05725,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 335.17,
- "latency": 0.20023,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 358.96,
- "latency": 0.74782,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 1073741824,
- "bandwidthGbps": 369.31,
- "latency": 2.90744,
- "sizeClass": null,
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-71059d57",
- "identity": "rl-mesh|h200|h200-nvlink-island|nvlink|generator_to_trainer|redistribute|ms",
- "cohortIdentity": "rl-mesh|h200|nvlink",
- "family": "rl-mesh",
- "sku": "h200",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "operation": "generator_to_trainer",
- "subtype": "redistribute",
- "valid": true,
- "status": "valid",
- "note": "peak 369 GB/s · world=8: trainer 4 <-> generator 4",
- "peakBandwidthGbps": 369.31,
- "latencyUnit": "ms",
- "colorKey": "h200_71059d57",
- "label": "H200 · gen->trn · redistribute",
- "generatedAt": "2026-06-27T13:38:51.710797+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:38:51.710797+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-moderate+eplb",
+ "T": 256,
+ "p50_amplification": 1.008,
+ "p99_amplification": 0.989
},
- "rows": [
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 0.04,
- "latency": 25.02575,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 39.38,
- "latency": 0.10651,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 155.89,
- "latency": 0.10762,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 245.8,
- "latency": 0.27303,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 330.18,
- "latency": 0.81301,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 1073741824,
- "bandwidthGbps": 342.63,
- "latency": 3.1338,
- "sizeClass": null,
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-c6f0b6b2",
- "identity": "rl-mesh|h200|h200-nvlink-island|nvlink|trainer_to_generator|paired|ms",
- "cohortIdentity": "rl-mesh|h200|nvlink",
- "family": "rl-mesh",
- "sku": "h200",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "operation": "trainer_to_generator",
- "subtype": "paired",
- "valid": true,
- "status": "valid",
- "note": "peak 369 GB/s · world=8: trainer 4 <-> generator 4",
- "peakBandwidthGbps": 369.31,
- "latencyUnit": "ms",
- "colorKey": "h200_c6f0b6b2",
- "label": "H200 · trn->gen · paired",
- "generatedAt": "2026-06-27T13:38:51.710797+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:38:51.710797+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-moderate+eplb",
+ "T": 512,
+ "p50_amplification": 1.014,
+ "p99_amplification": 1.148
},
- "rows": [
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 4.34,
- "latency": 0.24155,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 61.01,
- "latency": 0.06874,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 288.23,
- "latency": 0.05821,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 333.03,
- "latency": 0.20151,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 358.08,
- "latency": 0.74964,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 1073741824,
- "bandwidthGbps": 369.21,
- "latency": 2.90821,
- "sizeClass": null,
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-494c6e3f",
- "identity": "rl-mesh|h200|h200-nvlink-island|nvlink|trainer_to_generator|redistribute|ms",
- "cohortIdentity": "rl-mesh|h200|nvlink",
- "family": "rl-mesh",
- "sku": "h200",
- "topologyClass": "h200-nvlink-island",
- "transport": "nvlink",
- "operation": "trainer_to_generator",
- "subtype": "redistribute",
- "valid": true,
- "status": "valid",
- "note": "peak 369 GB/s · world=8: trainer 4 <-> generator 4",
- "peakBandwidthGbps": 369.31,
- "latencyUnit": "ms",
- "colorKey": "h200_494c6e3f",
- "label": "H200 · trn->gen · redistribute",
- "generatedAt": "2026-06-27T13:38:51.710797+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-27T13:38:51.710797+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-moderate+eplb",
+ "T": 1024,
+ "p50_amplification": 0.994,
+ "p99_amplification": 1.003
},
- "rows": [
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 0.02,
- "latency": 56.31775,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 33.42,
- "latency": 0.12549,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 99.65,
- "latency": 0.16836,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 180.83,
- "latency": 0.37112,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 260.28,
- "latency": 1.03132,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 1073741824,
- "bandwidthGbps": 340.94,
- "latency": 3.14936,
- "sizeClass": null,
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-2963cf1c",
- "identity": "rl-mesh|mi355x|mi355x-xgmi|xgmi|generator_to_trainer|paired|ms",
- "cohortIdentity": "rl-mesh|mi355x|xgmi",
- "family": "rl-mesh",
- "sku": "mi355x",
- "topologyClass": "mi355x-xgmi",
- "transport": "xgmi",
- "operation": "generator_to_trainer",
- "subtype": "paired",
- "valid": true,
- "status": "valid",
- "note": "peak 72 GB/s · world=8: trainer 4 <-> generator 4",
- "peakBandwidthGbps": 71.79,
- "latencyUnit": "ms",
- "colorKey": "mi355x_2963cf1c",
- "label": "MI355X · gen->trn · paired",
- "generatedAt": "2026-06-28T05:12:36.633047+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-28T05:12:36.633047+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-moderate+eplb",
+ "T": 2048,
+ "p50_amplification": 1.009,
+ "p99_amplification": 1.078
},
- "rows": [
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 14.01,
- "latency": 0.07485,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 45.33,
- "latency": 0.09253,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 62.8,
- "latency": 0.26717,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 69.38,
- "latency": 0.9672,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 71.12,
- "latency": 3.77445,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 1073741824,
- "bandwidthGbps": 71.62,
- "latency": 14.99269,
- "sizeClass": null,
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-687aa675",
- "identity": "rl-mesh|mi355x|mi355x-xgmi|xgmi|generator_to_trainer|redistribute|ms",
- "cohortIdentity": "rl-mesh|mi355x|xgmi",
- "family": "rl-mesh",
- "sku": "mi355x",
- "topologyClass": "mi355x-xgmi",
- "transport": "xgmi",
- "operation": "generator_to_trainer",
- "subtype": "redistribute",
- "valid": true,
- "status": "valid",
- "note": "peak 72 GB/s · world=8: trainer 4 <-> generator 4",
- "peakBandwidthGbps": 71.79,
- "latencyUnit": "ms",
- "colorKey": "mi355x_687aa675",
- "label": "MI355X · gen->trn · redistribute",
- "generatedAt": "2026-06-28T05:12:36.633047+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-28T05:12:36.633047+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf-moderate+eplb",
+ "T": 4096,
+ "p50_amplification": 1.004,
+ "p99_amplification": 1.004
},
- "rows": [
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 0.09,
- "latency": 12.00838,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 13.34,
- "latency": 0.3144,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 48.16,
- "latency": 0.34836,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 64.08,
- "latency": 1.04724,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 69.45,
- "latency": 3.8654,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 1073741824,
- "bandwidthGbps": 71.41,
- "latency": 15.03625,
- "sizeClass": null,
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-0700747c",
- "identity": "rl-mesh|mi355x|mi355x-xgmi|xgmi|trainer_to_generator|paired|ms",
- "cohortIdentity": "rl-mesh|mi355x|xgmi",
- "family": "rl-mesh",
- "sku": "mi355x",
- "topologyClass": "mi355x-xgmi",
- "transport": "xgmi",
- "operation": "trainer_to_generator",
- "subtype": "paired",
- "valid": true,
- "status": "valid",
- "note": "peak 72 GB/s · world=8: trainer 4 <-> generator 4",
- "peakBandwidthGbps": 71.79,
- "latencyUnit": "ms",
- "colorKey": "mi355x_0700747c",
- "label": "MI355X · trn->gen · paired",
- "generatedAt": "2026-06-28T05:12:36.633047+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-28T05:12:36.633047+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf+eplb",
+ "T": 128,
+ "p50_amplification": 1.014,
+ "p99_amplification": 1.007
},
- "rows": [
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 10.53,
- "latency": 0.0996,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 44.33,
- "latency": 0.09462,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 62.58,
- "latency": 0.2681,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 69.37,
- "latency": 0.96746,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 71.32,
- "latency": 3.76377,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 1073741824,
- "bandwidthGbps": 71.79,
- "latency": 14.95774,
- "sizeClass": null,
- "correct": true
- }
- ]
- },
- {
- "id": "cxt-a10511d5",
- "identity": "rl-mesh|mi355x|mi355x-xgmi|xgmi|trainer_to_generator|redistribute|ms",
- "cohortIdentity": "rl-mesh|mi355x|xgmi",
- "family": "rl-mesh",
- "sku": "mi355x",
- "topologyClass": "mi355x-xgmi",
- "transport": "xgmi",
- "operation": "trainer_to_generator",
- "subtype": "redistribute",
- "valid": true,
- "status": "valid",
- "note": "peak 72 GB/s · world=8: trainer 4 <-> generator 4",
- "peakBandwidthGbps": 71.79,
- "latencyUnit": "ms",
- "colorKey": "mi355x_a10511d5",
- "label": "MI355X · trn->gen · redistribute",
- "generatedAt": "2026-06-28T05:12:36.633047+00:00",
- "run": {
- "id": null,
- "url": null,
- "createdAt": "2026-06-28T05:12:36.633047+00:00",
- "sha": null
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf+eplb",
+ "T": 256,
+ "p50_amplification": 1.01,
+ "p99_amplification": 1.013
},
- "rows": [
- {
- "sizeBytes": 1048576,
- "bandwidthGbps": 0.01,
- "latency": 97.26006,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 4194304,
- "bandwidthGbps": 14.75,
- "latency": 0.28435,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 16777216,
- "bandwidthGbps": 50.28,
- "latency": 0.33368,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 67108864,
- "bandwidthGbps": 65.3,
- "latency": 1.02763,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 268435456,
- "bandwidthGbps": 70.05,
- "latency": 3.83224,
- "sizeClass": null,
- "correct": true
- },
- {
- "sizeBytes": 1073741824,
- "bandwidthGbps": 71.74,
- "latency": 14.96724,
- "sizeClass": null,
- "correct": true
- }
- ]
- }
- ],
- "scannedRuns": 313,
- "scannedArtifacts": 891,
- "contributingRuns": 313,
- "generatedAt": "2026-06-29T02:42:52.989Z"
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf+eplb",
+ "T": 512,
+ "p50_amplification": 1.003,
+ "p99_amplification": 0.994
+ },
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf+eplb",
+ "T": 1024,
+ "p50_amplification": 0.992,
+ "p99_amplification": 0.999
+ },
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf+eplb",
+ "T": 2048,
+ "p50_amplification": 1.006,
+ "p99_amplification": 1.016
+ },
+ {
+ "sku": "gb300",
+ "ep": 8,
+ "phase": "prefill",
+ "routing": "zipf+eplb",
+ "T": 4096,
+ "p50_amplification": 1.004,
+ "p99_amplification": 1.003
+ }
+ ]
+ },
+ "nccl": [],
+ "offload": [],
+ "copyEngine": [],
+ "kvCache": [],
+ "rlMesh": [],
+ "scannedRuns": 4,
+ "scannedArtifacts": 4,
+ "contributingRuns": 4,
+ "generatedAt": "2026-06-29T14:13:22.285Z"
}
diff --git a/packages/app/src/components/collectivex/CollectiveXDisplay.tsx b/packages/app/src/components/collectivex/CollectiveXDisplay.tsx
index 9c90e219..caa42258 100644
--- a/packages/app/src/components/collectivex/CollectiveXDisplay.tsx
+++ b/packages/app/src/components/collectivex/CollectiveXDisplay.tsx
@@ -177,7 +177,7 @@ function displaySeriesLabel(item: CollectiveXSeries): string {
}
function backendFilterValue(item: CollectiveXSeries): string {
- return collectiveXBackendLabel(item.backend, item.backendVersion);
+ return collectiveXBackendLabel(item.backend, item.backendVersion, item.shape.kernelGeneration);
}
function backendSortRank(value: string): number {
@@ -274,7 +274,7 @@ export default function CollectiveXDisplay() {
const [percentile, setPercentile] = useState('p99');
const [suite, setSuite] = useState('resource-constrained');
const [routing, setRouting] = useState('uniform');
- const [publication, setPublication] = useState('official-headline');
+ const [publication, setPublication] = useState('all');
const [xAxis, setXAxis] = useState('tokens-per-rank');
const [yAxis, setYAxis] = useState('latency');
const [xScaleType, setXScaleType] = useState